You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/tests/test_socketio_cors.py

443 lines
19 KiB

"""Tests for `core.socketio_cors` — the resolver, rejection predictor,
and dedup logger that gate Socket.IO WebSocket origins.
These pin the security-relevant behavior:
- The resolver returns ``None`` (engineio's same-origin default — also
the secure default) for anything other than an explicit allow-list or
the wildcard. CRITICAL: the resolver must NEVER return ``[]`` — in
engineio that means "disable CORS handling" which is identical to the
``'*'`` wildcard from a security standpoint (engineio/server.py:202:
``if cors_allowed_origins != []``). And it must never silently turn
into ``'*'`` from a misshapen config value.
- The rejection predictor must mirror engineio's same-origin check
exactly so the warning we log is accurate. This includes accepting
matches against ``X-Forwarded-Host`` since engineio honors that
automatically when ``cors_allowed_origins`` is ``None``.
- The dedup logger must emit each unique origin only once so a malicious
site repeatedly hammering the WS endpoint can't spam logs.
Pure unit tests — no Flask, no engineio, no network. Just the logic.
"""
import threading
from typing import Any, List
import pytest
from core.socketio_cors import (
RejectionLogger,
log_startup_status,
resolve_cors_origins,
will_reject,
)
# ── helpers ───────────────────────────────────────────────────────────────
class _FakeConfig:
"""Minimal config_manager stub that returns one canned value for the
`security.cors_origins` key. Anything else returns the default."""
def __init__(self, value: Any):
self._value = value
def get(self, key: str, default: Any = None) -> Any:
if key == 'security.cors_origins':
return self._value
return default
class _CapturingLogger:
"""Stand-in logger that records every warning/info call so tests can
assert what was emitted (and how many times)."""
def __init__(self):
self.warnings: List[str] = []
self.infos: List[str] = []
def warning(self, msg: str) -> None:
self.warnings.append(msg)
def info(self, msg: str) -> None:
self.infos.append(msg)
# ── resolve_cors_origins ──────────────────────────────────────────────────
@pytest.mark.parametrize("value, expected", [
# Unset / empty / whitespace / bogus types → None (engineio same-origin default)
(None, None),
('', None),
(' ', None),
('\n\n', None),
(',,,', None),
(12345, None), # numeric — invalid type
({'a': 1}, None), # dict — invalid type
([], None), # explicit empty list
([' ', ''], None), # list of all-empty strings
# Wildcard
('*', '*'),
(' * ', '*'),
(['*'], '*'),
(['https://x.com', '*'], '*'), # wildcard in a list still wins
# Single origin
('https://x.com', ['https://x.com']),
(['https://x.com'], ['https://x.com']),
# Multiple origins, comma-separated
('https://x.com, http://y.com', ['https://x.com', 'http://y.com']),
# Multiple origins, newline-separated (textarea input)
('https://x.com\nhttp://y.com', ['https://x.com', 'http://y.com']),
# Mixed separators + extra commas / whitespace get cleaned
('https://x.com,, http://y.com,\n http://z.com', ['https://x.com', 'http://y.com', 'http://z.com']),
# List with mixed types (bytes-like → str coerce)
(['https://x.com', ' ', 'http://y.com'], ['https://x.com', 'http://y.com']),
])
def test_resolve_cors_origins_normalizes_input(value, expected):
assert resolve_cors_origins(_FakeConfig(value)) == expected
def test_resolve_cors_origins_handles_missing_config_manager():
"""Defensive: if config_manager is None (e.g., very early init), the
resolver must fall back to the secure default rather than crashing."""
assert resolve_cors_origins(None) is None
def test_resolve_cors_origins_never_returns_empty_list():
"""SECURITY CRITICAL: ``cors_allowed_origins=[]`` in engineio means
"disable CORS handling entirely" — identical security to ``'*'``
(engineio/server.py:202). The resolver must return ``None`` for the
secure default, never ``[]``, regardless of what the user typed."""
edge_cases = [None, '', ' ', '\n\n', ',,,', 12345, 3.14, {'a': 1},
object(), True, False, [], [' '], ['', ' '], (' ',)]
for value in edge_cases:
result = resolve_cors_origins(_FakeConfig(value))
assert result != [], (
f"resolve_cors_origins({value!r}) returned [] — that disables "
f"engineio's CORS check entirely, allowing all origins. Must be None."
)
def test_resolve_cors_origins_never_silently_returns_wildcard_for_garbage():
"""Security-critical: a misshapen config value must NEVER turn into
`'*'` by accident. Anything we can't parse falls back to same-origin."""
for bogus in [12345, 3.14, {'a': 1}, object(), True, False]:
assert resolve_cors_origins(_FakeConfig(bogus)) is None, (
f"resolve_cors_origins({bogus!r}) returned a non-None value — "
f"bogus inputs must default to same-origin only"
)
# ── will_reject ───────────────────────────────────────────────────────────
@pytest.mark.parametrize("allowed, origin, host, scheme, expected_reject", [
# Same-origin (Origin's full {scheme}://{host} matches request) — allow
(None, 'http://localhost:8888', 'localhost:8888', 'http', False),
(None, 'http://192.168.1.5:8888', '192.168.1.5:8888', 'http', False),
(None, 'https://soulsync.foo', 'soulsync.foo', 'https', False),
# Cross-origin with default allow-list — reject
(None, 'https://x.com', 'localhost:8888', 'http', True),
(None, 'https://soulsync.foo', 'localhost:8888', 'http', True), # reverse proxy NOT forwarding Host
# Scheme mismatch — engineio rejects, so do we
(None, 'https://soulsync.foo', 'soulsync.foo', 'http', True),
# Wildcard short-circuit — allow
('*', 'https://x.com', 'localhost:8888', 'http', False),
('*', 'https://anything.evil', 'localhost:8888', 'http', False),
# Origin in allow-list — allow
(['https://x.com'], 'https://x.com', 'localhost:8888', 'http', False),
(['https://soulsync.foo'], 'https://soulsync.foo', 'localhost:8888', 'http', False),
# Cross-origin not in allow-list — reject
(['https://x.com'], 'https://y.com', 'localhost:8888', 'http', True),
# Same-origin still works even when allow-list has other entries
(['https://x.com'], 'http://localhost:8888', 'localhost:8888', 'http', False),
])
def test_will_reject_predicts_engineio_decision(allowed, origin, host, scheme, expected_reject):
assert will_reject(allowed, origin, host, request_scheme=scheme) is expected_reject
def test_will_reject_with_empty_host_only_uses_allowlist():
"""If the request somehow has no Host header (shouldn't happen but be
safe), same-origin can't be checked — fall through to allow-list only."""
assert will_reject(None, 'https://x.com', '', request_scheme='https') is True
assert will_reject(['https://x.com'], 'https://x.com', '', request_scheme='https') is False
assert will_reject('*', 'https://x.com', '', request_scheme='https') is False
def test_will_reject_honors_x_forwarded_host():
"""Engineio honors X-Forwarded-Host automatically when
cors_allowed_origins is None (engineio/base_server.py:_cors_allowed_origins).
Our predictor must mirror that — otherwise reverse-proxy users with
proper proxy headers would trigger spurious "rejected" log lines."""
# Same-origin via X-Forwarded-Host (typical TLS-terminating reverse proxy)
assert will_reject(None, 'https://soulsync.foo', 'internal:8888',
request_scheme='http',
forwarded_host='soulsync.foo',
forwarded_proto='https') is False
# X-Forwarded-Host with comma list (proxy chain) — first entry wins
assert will_reject(None, 'https://soulsync.foo', 'internal:8888',
request_scheme='http',
forwarded_host='soulsync.foo, edge.proxy',
forwarded_proto='https') is False
# X-Forwarded-Host doesn't match either — still reject
assert will_reject(None, 'https://attacker.com', 'internal:8888',
request_scheme='http',
forwarded_host='soulsync.foo',
forwarded_proto='https') is True
# X-Forwarded-Host empty — falls back to Host check (the unset case)
assert will_reject(None, 'https://soulsync.foo', 'soulsync.foo',
request_scheme='https',
forwarded_host='') is False
def test_will_reject_compares_full_scheme_when_known():
"""When the caller provides scheme info, engineio compares full
{scheme}://{host} strings. A TLS-terminating proxy can leave the
backend seeing http while the browser's Origin is https — engineio
rejects, our predictor must too (otherwise we miss logging it)."""
# Backend sees http, browser sent https → engineio rejects → we predict reject
assert will_reject(None, 'https://soulsync.foo', 'soulsync.foo',
request_scheme='http') is True
# Backend sees http, browser sent http → match → allow
assert will_reject(None, 'http://soulsync.foo', 'soulsync.foo',
request_scheme='http') is False
# X-Forwarded-Proto says the public request was https → match origin's https
assert will_reject(None, 'https://soulsync.foo', 'internal:8888',
request_scheme='http',
forwarded_host='soulsync.foo',
forwarded_proto='https') is False
# X-Forwarded-Proto says https but Origin is http → mismatch → reject
assert will_reject(None, 'http://soulsync.foo', 'internal:8888',
request_scheme='http',
forwarded_host='soulsync.foo',
forwarded_proto='https') is True
# Comma-separated X-Forwarded-Proto (proxy chain) — first wins, like engineio
assert will_reject(None, 'https://soulsync.foo', 'internal:8888',
request_scheme='http',
forwarded_host='soulsync.foo',
forwarded_proto='https, http') is False
def test_will_reject_allows_missing_origin_matching_engineio():
"""Engineio (server.py:207: ``if origin:``) skips CORS validation
entirely when no Origin header is sent — non-browser clients (curl,
server-to-server) are intentionally permitted. Our predictor must
match that or we'd log spurious "rejected" warnings for legitimate
non-browser traffic. Must also not raise on None input."""
# Wildcard permits missing origin — and so does the default policy
# (matches engineio's actual behavior).
assert will_reject('*', None, 'localhost:8888') is False
assert will_reject('*', '', 'localhost:8888') is False
assert will_reject(None, None, 'localhost:8888') is False
assert will_reject(None, '', 'localhost:8888') is False
assert will_reject(['https://x.com'], None, 'localhost:8888') is False
def test_will_reject_honors_forwarded_proto_alone():
"""Engineio adds the forwarded candidate when EITHER X-Forwarded-Proto
OR X-Forwarded-Host is present (it falls back to HTTP_HOST for the
missing one). Our predictor must mirror that — otherwise a misconfig
sending only X-Forwarded-Proto would look like a rejection in our
log even though engineio actually allows it."""
# forwarded_proto alone: backend host stands in for forwarded_host
assert will_reject(None, 'https://localhost:8888', 'localhost:8888',
request_scheme='http',
forwarded_proto='https') is False
# forwarded_proto alone but origin's host doesn't match the backend host
assert will_reject(None, 'https://attacker.com', 'localhost:8888',
request_scheme='http',
forwarded_proto='https') is True
# ── RejectionLogger ───────────────────────────────────────────────────────
def test_rejection_logger_emits_once_per_unique_origin():
log = _CapturingLogger()
rl = RejectionLogger(log)
# Same origin three times — only one warning
for _ in range(3):
rl.maybe_log(None, 'https://attacker.com', 'localhost:8888')
assert len(log.warnings) == 1
assert 'attacker.com' in log.warnings[0]
# Different origin — separate warning
rl.maybe_log(None, 'https://other.evil', 'localhost:8888')
assert len(log.warnings) == 2
assert 'other.evil' in log.warnings[1]
def test_rejection_logger_silent_when_request_would_be_allowed():
log = _CapturingLogger()
rl = RejectionLogger(log)
# Same-origin — no warning
rl.maybe_log(None, 'http://localhost:8888', 'localhost:8888')
# Wildcard — no warning
rl.maybe_log('*', 'https://x.com', 'localhost:8888')
# In allow-list — no warning
rl.maybe_log(['https://x.com'], 'https://x.com', 'localhost:8888')
# Same-origin via X-Forwarded-Host (with proxy scheme info) — no warning
rl.maybe_log(None, 'https://soulsync.foo', 'internal:8888',
request_scheme='http',
forwarded_host='soulsync.foo',
forwarded_proto='https')
assert log.warnings == []
def test_rejection_logger_silent_when_no_origin_header():
"""Non-browser clients (curl, server-to-server) don't send Origin —
they should not trigger the warning."""
log = _CapturingLogger()
rl = RejectionLogger(log)
rl.maybe_log(None, None, 'localhost:8888')
rl.maybe_log(None, '', 'localhost:8888')
assert log.warnings == []
def test_rejection_logger_warning_message_points_user_to_settings():
"""The warning is the ONLY signal users get when their reverse proxy
setup is broken. It must name the origin AND tell them where to fix it."""
log = _CapturingLogger()
rl = RejectionLogger(log)
rl.maybe_log(None, 'https://soulsync.example.com', 'internal-host:8888')
assert len(log.warnings) == 1
msg = log.warnings[0]
assert 'soulsync.example.com' in msg, "warning must include the rejected origin"
assert 'internal-host:8888' in msg, "warning must include the request Host so users can debug proxy config"
assert 'Settings' in msg, "warning must point users to Settings"
assert 'Allowed' in msg, "warning must name the field they need to edit"
def test_rejection_logger_dedup_is_threadsafe():
"""Two threads racing on the same novel origin must result in exactly
one warning, not two. Locks the dedup set internally."""
log = _CapturingLogger()
rl = RejectionLogger(log)
barrier = threading.Barrier(8)
def hammer():
barrier.wait()
for _ in range(50):
rl.maybe_log(None, 'https://race.test', 'localhost:8888')
threads = [threading.Thread(target=hammer) for _ in range(8)]
for t in threads:
t.start()
for t in threads:
t.join()
assert len(log.warnings) == 1
def test_rejection_logger_reset_for_tests_clears_dedup():
log = _CapturingLogger()
rl = RejectionLogger(log)
rl.maybe_log(None, 'https://x.com', 'localhost:8888')
assert len(log.warnings) == 1
rl.reset_for_tests()
rl.maybe_log(None, 'https://x.com', 'localhost:8888')
assert len(log.warnings) == 2 # logged again after reset
def test_rejection_logger_caps_dedup_set_at_configured_limit():
"""A hostile actor opening connections from many distinct fake origins
would otherwise grow the dedup set unbounded. After the cap is hit,
further rejections are silently dropped (after one overflow notice)."""
log = _CapturingLogger()
rl = RejectionLogger(log, dedup_cap=5)
# Fill the cap
for i in range(5):
rl.maybe_log(None, f'https://fake{i}.com', 'localhost:8888')
assert len(log.warnings) == 5
# Next unique origin → overflow notice, NOT a per-origin warning
rl.maybe_log(None, 'https://fake5.com', 'localhost:8888')
assert len(log.warnings) == 6
assert 'cap' in log.warnings[5].lower() or 'suppress' in log.warnings[5].lower()
# Further unique origins → silently dropped (overflow notice already emitted)
for i in range(6, 20):
rl.maybe_log(None, f'https://fake{i}.com', 'localhost:8888')
assert len(log.warnings) == 6 # unchanged
# After reset, cap restarts
rl.reset_for_tests()
rl.maybe_log(None, 'https://fake0.com', 'localhost:8888')
assert len(log.warnings) == 7
def test_rejection_logger_default_cap_is_reasonable():
"""The default cap should be high enough that legitimate-but-unusual
setups (e.g., a power user with a dozen reverse-proxy domains rotating)
don't hit the overflow notice during normal use."""
assert RejectionLogger.DEFAULT_DEDUP_CAP >= 50, (
"default dedup cap should fit normal usage"
)
# ── log_startup_status ────────────────────────────────────────────────────
def test_startup_status_warns_on_wildcard():
"""The wildcard is a security risk — startup must log a warning that
points users to the settings page, not just an info line."""
log = _CapturingLogger()
log_startup_status('*', log)
assert len(log.warnings) == 1
assert "'*'" in log.warnings[0]
assert 'Settings' in log.warnings[0]
assert log.infos == []
def test_startup_status_info_logs_nonempty_allowlist():
"""Non-empty allow-list → info, so users can confirm their config
actually took effect."""
log = _CapturingLogger()
log_startup_status(['https://x.com', 'https://y.com'], log)
assert log.warnings == []
assert len(log.infos) == 1
assert 'https://x.com' in log.infos[0]
def test_startup_status_silent_on_default_same_origin():
"""None (default) → no log. Same-origin-only is the default;
nothing noteworthy to announce on every startup."""
log = _CapturingLogger()
log_startup_status(None, log)
assert log.warnings == []
assert log.infos == []