"""Tests for `core.socketio_cors` — the resolver, rejection predictor, and dedup logger that gate Socket.IO WebSocket origins. These pin the security-relevant behavior: - The resolver returns ``None`` (engineio's same-origin default — also the secure default) for anything other than an explicit allow-list or the wildcard. CRITICAL: the resolver must NEVER return ``[]`` — in engineio that means "disable CORS handling" which is identical to the ``'*'`` wildcard from a security standpoint (engineio/server.py:202: ``if cors_allowed_origins != []``). And it must never silently turn into ``'*'`` from a misshapen config value. - The rejection predictor must mirror engineio's same-origin check exactly so the warning we log is accurate. This includes accepting matches against ``X-Forwarded-Host`` since engineio honors that automatically when ``cors_allowed_origins`` is ``None``. - The dedup logger must emit each unique origin only once so a malicious site repeatedly hammering the WS endpoint can't spam logs. Pure unit tests — no Flask, no engineio, no network. Just the logic. """ import threading from typing import Any, List import pytest from core.socketio_cors import ( RejectionLogger, log_startup_status, resolve_cors_origins, will_reject, ) # ── helpers ─────────────────────────────────────────────────────────────── class _FakeConfig: """Minimal config_manager stub that returns one canned value for the `security.cors_origins` key. Anything else returns the default.""" def __init__(self, value: Any): self._value = value def get(self, key: str, default: Any = None) -> Any: if key == 'security.cors_origins': return self._value return default class _CapturingLogger: """Stand-in logger that records every warning/info call so tests can assert what was emitted (and how many times).""" def __init__(self): self.warnings: List[str] = [] self.infos: List[str] = [] def warning(self, msg: str) -> None: self.warnings.append(msg) def info(self, msg: str) -> None: self.infos.append(msg) # ── resolve_cors_origins ────────────────────────────────────────────────── @pytest.mark.parametrize("value, expected", [ # Unset / empty / whitespace / bogus types → None (engineio same-origin default) (None, None), ('', None), (' ', None), ('\n\n', None), (',,,', None), (12345, None), # numeric — invalid type ({'a': 1}, None), # dict — invalid type ([], None), # explicit empty list ([' ', ''], None), # list of all-empty strings # Wildcard ('*', '*'), (' * ', '*'), (['*'], '*'), (['https://x.com', '*'], '*'), # wildcard in a list still wins # Single origin ('https://x.com', ['https://x.com']), (['https://x.com'], ['https://x.com']), # Multiple origins, comma-separated ('https://x.com, http://y.com', ['https://x.com', 'http://y.com']), # Multiple origins, newline-separated (textarea input) ('https://x.com\nhttp://y.com', ['https://x.com', 'http://y.com']), # Mixed separators + extra commas / whitespace get cleaned ('https://x.com,, http://y.com,\n http://z.com', ['https://x.com', 'http://y.com', 'http://z.com']), # List with mixed types (bytes-like → str coerce) (['https://x.com', ' ', 'http://y.com'], ['https://x.com', 'http://y.com']), ]) def test_resolve_cors_origins_normalizes_input(value, expected): assert resolve_cors_origins(_FakeConfig(value)) == expected def test_resolve_cors_origins_handles_missing_config_manager(): """Defensive: if config_manager is None (e.g., very early init), the resolver must fall back to the secure default rather than crashing.""" assert resolve_cors_origins(None) is None def test_resolve_cors_origins_never_returns_empty_list(): """SECURITY CRITICAL: ``cors_allowed_origins=[]`` in engineio means "disable CORS handling entirely" — identical security to ``'*'`` (engineio/server.py:202). The resolver must return ``None`` for the secure default, never ``[]``, regardless of what the user typed.""" edge_cases = [None, '', ' ', '\n\n', ',,,', 12345, 3.14, {'a': 1}, object(), True, False, [], [' '], ['', ' '], (' ',)] for value in edge_cases: result = resolve_cors_origins(_FakeConfig(value)) assert result != [], ( f"resolve_cors_origins({value!r}) returned [] — that disables " f"engineio's CORS check entirely, allowing all origins. Must be None." ) def test_resolve_cors_origins_never_silently_returns_wildcard_for_garbage(): """Security-critical: a misshapen config value must NEVER turn into `'*'` by accident. Anything we can't parse falls back to same-origin.""" for bogus in [12345, 3.14, {'a': 1}, object(), True, False]: assert resolve_cors_origins(_FakeConfig(bogus)) is None, ( f"resolve_cors_origins({bogus!r}) returned a non-None value — " f"bogus inputs must default to same-origin only" ) # ── will_reject ─────────────────────────────────────────────────────────── @pytest.mark.parametrize("allowed, origin, host, scheme, expected_reject", [ # Same-origin (Origin's full {scheme}://{host} matches request) — allow (None, 'http://localhost:8888', 'localhost:8888', 'http', False), (None, 'http://192.168.1.5:8888', '192.168.1.5:8888', 'http', False), (None, 'https://soulsync.foo', 'soulsync.foo', 'https', False), # Cross-origin with default allow-list — reject (None, 'https://x.com', 'localhost:8888', 'http', True), (None, 'https://soulsync.foo', 'localhost:8888', 'http', True), # reverse proxy NOT forwarding Host # Scheme mismatch — engineio rejects, so do we (None, 'https://soulsync.foo', 'soulsync.foo', 'http', True), # Wildcard short-circuit — allow ('*', 'https://x.com', 'localhost:8888', 'http', False), ('*', 'https://anything.evil', 'localhost:8888', 'http', False), # Origin in allow-list — allow (['https://x.com'], 'https://x.com', 'localhost:8888', 'http', False), (['https://soulsync.foo'], 'https://soulsync.foo', 'localhost:8888', 'http', False), # Cross-origin not in allow-list — reject (['https://x.com'], 'https://y.com', 'localhost:8888', 'http', True), # Same-origin still works even when allow-list has other entries (['https://x.com'], 'http://localhost:8888', 'localhost:8888', 'http', False), ]) def test_will_reject_predicts_engineio_decision(allowed, origin, host, scheme, expected_reject): assert will_reject(allowed, origin, host, request_scheme=scheme) is expected_reject def test_will_reject_with_empty_host_only_uses_allowlist(): """If the request somehow has no Host header (shouldn't happen but be safe), same-origin can't be checked — fall through to allow-list only.""" assert will_reject(None, 'https://x.com', '', request_scheme='https') is True assert will_reject(['https://x.com'], 'https://x.com', '', request_scheme='https') is False assert will_reject('*', 'https://x.com', '', request_scheme='https') is False def test_will_reject_honors_x_forwarded_host(): """Engineio honors X-Forwarded-Host automatically when cors_allowed_origins is None (engineio/base_server.py:_cors_allowed_origins). Our predictor must mirror that — otherwise reverse-proxy users with proper proxy headers would trigger spurious "rejected" log lines.""" # Same-origin via X-Forwarded-Host (typical TLS-terminating reverse proxy) assert will_reject(None, 'https://soulsync.foo', 'internal:8888', request_scheme='http', forwarded_host='soulsync.foo', forwarded_proto='https') is False # X-Forwarded-Host with comma list (proxy chain) — first entry wins assert will_reject(None, 'https://soulsync.foo', 'internal:8888', request_scheme='http', forwarded_host='soulsync.foo, edge.proxy', forwarded_proto='https') is False # X-Forwarded-Host doesn't match either — still reject assert will_reject(None, 'https://attacker.com', 'internal:8888', request_scheme='http', forwarded_host='soulsync.foo', forwarded_proto='https') is True # X-Forwarded-Host empty — falls back to Host check (the unset case) assert will_reject(None, 'https://soulsync.foo', 'soulsync.foo', request_scheme='https', forwarded_host='') is False def test_will_reject_compares_full_scheme_when_known(): """When the caller provides scheme info, engineio compares full {scheme}://{host} strings. A TLS-terminating proxy can leave the backend seeing http while the browser's Origin is https — engineio rejects, our predictor must too (otherwise we miss logging it).""" # Backend sees http, browser sent https → engineio rejects → we predict reject assert will_reject(None, 'https://soulsync.foo', 'soulsync.foo', request_scheme='http') is True # Backend sees http, browser sent http → match → allow assert will_reject(None, 'http://soulsync.foo', 'soulsync.foo', request_scheme='http') is False # X-Forwarded-Proto says the public request was https → match origin's https assert will_reject(None, 'https://soulsync.foo', 'internal:8888', request_scheme='http', forwarded_host='soulsync.foo', forwarded_proto='https') is False # X-Forwarded-Proto says https but Origin is http → mismatch → reject assert will_reject(None, 'http://soulsync.foo', 'internal:8888', request_scheme='http', forwarded_host='soulsync.foo', forwarded_proto='https') is True # Comma-separated X-Forwarded-Proto (proxy chain) — first wins, like engineio assert will_reject(None, 'https://soulsync.foo', 'internal:8888', request_scheme='http', forwarded_host='soulsync.foo', forwarded_proto='https, http') is False def test_will_reject_allows_missing_origin_matching_engineio(): """Engineio (server.py:207: ``if origin:``) skips CORS validation entirely when no Origin header is sent — non-browser clients (curl, server-to-server) are intentionally permitted. Our predictor must match that or we'd log spurious "rejected" warnings for legitimate non-browser traffic. Must also not raise on None input.""" # Wildcard permits missing origin — and so does the default policy # (matches engineio's actual behavior). assert will_reject('*', None, 'localhost:8888') is False assert will_reject('*', '', 'localhost:8888') is False assert will_reject(None, None, 'localhost:8888') is False assert will_reject(None, '', 'localhost:8888') is False assert will_reject(['https://x.com'], None, 'localhost:8888') is False def test_will_reject_honors_forwarded_proto_alone(): """Engineio adds the forwarded candidate when EITHER X-Forwarded-Proto OR X-Forwarded-Host is present (it falls back to HTTP_HOST for the missing one). Our predictor must mirror that — otherwise a misconfig sending only X-Forwarded-Proto would look like a rejection in our log even though engineio actually allows it.""" # forwarded_proto alone: backend host stands in for forwarded_host assert will_reject(None, 'https://localhost:8888', 'localhost:8888', request_scheme='http', forwarded_proto='https') is False # forwarded_proto alone but origin's host doesn't match the backend host assert will_reject(None, 'https://attacker.com', 'localhost:8888', request_scheme='http', forwarded_proto='https') is True # ── RejectionLogger ─────────────────────────────────────────────────────── def test_rejection_logger_emits_once_per_unique_origin(): log = _CapturingLogger() rl = RejectionLogger(log) # Same origin three times — only one warning for _ in range(3): rl.maybe_log(None, 'https://attacker.com', 'localhost:8888') assert len(log.warnings) == 1 assert 'attacker.com' in log.warnings[0] # Different origin — separate warning rl.maybe_log(None, 'https://other.evil', 'localhost:8888') assert len(log.warnings) == 2 assert 'other.evil' in log.warnings[1] def test_rejection_logger_silent_when_request_would_be_allowed(): log = _CapturingLogger() rl = RejectionLogger(log) # Same-origin — no warning rl.maybe_log(None, 'http://localhost:8888', 'localhost:8888') # Wildcard — no warning rl.maybe_log('*', 'https://x.com', 'localhost:8888') # In allow-list — no warning rl.maybe_log(['https://x.com'], 'https://x.com', 'localhost:8888') # Same-origin via X-Forwarded-Host (with proxy scheme info) — no warning rl.maybe_log(None, 'https://soulsync.foo', 'internal:8888', request_scheme='http', forwarded_host='soulsync.foo', forwarded_proto='https') assert log.warnings == [] def test_rejection_logger_silent_when_no_origin_header(): """Non-browser clients (curl, server-to-server) don't send Origin — they should not trigger the warning.""" log = _CapturingLogger() rl = RejectionLogger(log) rl.maybe_log(None, None, 'localhost:8888') rl.maybe_log(None, '', 'localhost:8888') assert log.warnings == [] def test_rejection_logger_warning_message_points_user_to_settings(): """The warning is the ONLY signal users get when their reverse proxy setup is broken. It must name the origin AND tell them where to fix it.""" log = _CapturingLogger() rl = RejectionLogger(log) rl.maybe_log(None, 'https://soulsync.example.com', 'internal-host:8888') assert len(log.warnings) == 1 msg = log.warnings[0] assert 'soulsync.example.com' in msg, "warning must include the rejected origin" assert 'internal-host:8888' in msg, "warning must include the request Host so users can debug proxy config" assert 'Settings' in msg, "warning must point users to Settings" assert 'Allowed' in msg, "warning must name the field they need to edit" def test_rejection_logger_dedup_is_threadsafe(): """Two threads racing on the same novel origin must result in exactly one warning, not two. Locks the dedup set internally.""" log = _CapturingLogger() rl = RejectionLogger(log) barrier = threading.Barrier(8) def hammer(): barrier.wait() for _ in range(50): rl.maybe_log(None, 'https://race.test', 'localhost:8888') threads = [threading.Thread(target=hammer) for _ in range(8)] for t in threads: t.start() for t in threads: t.join() assert len(log.warnings) == 1 def test_rejection_logger_reset_for_tests_clears_dedup(): log = _CapturingLogger() rl = RejectionLogger(log) rl.maybe_log(None, 'https://x.com', 'localhost:8888') assert len(log.warnings) == 1 rl.reset_for_tests() rl.maybe_log(None, 'https://x.com', 'localhost:8888') assert len(log.warnings) == 2 # logged again after reset def test_rejection_logger_caps_dedup_set_at_configured_limit(): """A hostile actor opening connections from many distinct fake origins would otherwise grow the dedup set unbounded. After the cap is hit, further rejections are silently dropped (after one overflow notice).""" log = _CapturingLogger() rl = RejectionLogger(log, dedup_cap=5) # Fill the cap for i in range(5): rl.maybe_log(None, f'https://fake{i}.com', 'localhost:8888') assert len(log.warnings) == 5 # Next unique origin → overflow notice, NOT a per-origin warning rl.maybe_log(None, 'https://fake5.com', 'localhost:8888') assert len(log.warnings) == 6 assert 'cap' in log.warnings[5].lower() or 'suppress' in log.warnings[5].lower() # Further unique origins → silently dropped (overflow notice already emitted) for i in range(6, 20): rl.maybe_log(None, f'https://fake{i}.com', 'localhost:8888') assert len(log.warnings) == 6 # unchanged # After reset, cap restarts rl.reset_for_tests() rl.maybe_log(None, 'https://fake0.com', 'localhost:8888') assert len(log.warnings) == 7 def test_rejection_logger_default_cap_is_reasonable(): """The default cap should be high enough that legitimate-but-unusual setups (e.g., a power user with a dozen reverse-proxy domains rotating) don't hit the overflow notice during normal use.""" assert RejectionLogger.DEFAULT_DEDUP_CAP >= 50, ( "default dedup cap should fit normal usage" ) # ── log_startup_status ──────────────────────────────────────────────────── def test_startup_status_warns_on_wildcard(): """The wildcard is a security risk — startup must log a warning that points users to the settings page, not just an info line.""" log = _CapturingLogger() log_startup_status('*', log) assert len(log.warnings) == 1 assert "'*'" in log.warnings[0] assert 'Settings' in log.warnings[0] assert log.infos == [] def test_startup_status_info_logs_nonempty_allowlist(): """Non-empty allow-list → info, so users can confirm their config actually took effect.""" log = _CapturingLogger() log_startup_status(['https://x.com', 'https://y.com'], log) assert log.warnings == [] assert len(log.infos) == 1 assert 'https://x.com' in log.infos[0] def test_startup_status_silent_on_default_same_origin(): """None (default) → no log. Same-origin-only is the default; nothing noteworthy to announce on every startup.""" log = _CapturingLogger() log_startup_status(None, log) assert log.warnings == [] assert log.infos == []