From 440c3624f36dffc24c33df0d2b6d45d6f498e2aa Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Wed, 20 May 2026 20:43:35 -0700 Subject: [PATCH] refactor(staging): inject batch-field accessor instead of importing runtime_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per code review: the album-bundle provenance override added in an earlier commit reached into ``core.runtime_state.download_batches`` directly from inside the staging matcher. Sibling modules shouldn't import each other's globals — the existing StagingDeps pattern is the canonical way to inject everything else this helper needs. - core/downloads/staging.py: new optional ``get_batch_field`` callable on ``StagingDeps`` (defaults to None for backward compat with any caller that doesn't know about it yet). The inline ``from core.runtime_state import download_batches`` is gone; the helper now calls ``deps.get_batch_field(batch_id, 'album_bundle_source')`` and falls back to 'staging' when None is returned. Accessor exceptions are swallowed with a debug log so a deleted batch mid-process can't break the staging match. - web_server.py: ``_build_staging_deps`` injects a small ``_staging_get_batch_field`` helper that wraps the tasks_lock + download_batches dict access. Centralises the lock semantics in one place — the staging module no longer needs to know about the lock or the dict. - tests/test_staging_album_provenance.py: 5 new tests covering the full matrix — torrent override applied, usenet override applied, no override falls back to 'staging', missing accessor (default None) falls back to 'staging', accessor raising falls back to 'staging'. Each test seeds + cleans a synthetic task in runtime_state so the test doesn't bleed state across the suite. --- core/downloads/staging.py | 22 ++- tests/test_staging_album_provenance.py | 198 +++++++++++++++++++++++++ web_server.py | 12 ++ 3 files changed, 224 insertions(+), 8 deletions(-) create mode 100644 tests/test_staging_album_provenance.py diff --git a/core/downloads/staging.py b/core/downloads/staging.py index a8dda6c3..dca686ef 100644 --- a/core/downloads/staging.py +++ b/core/downloads/staging.py @@ -58,6 +58,12 @@ class StagingDeps: get_staging_file_cache: Callable[[str], list] docker_resolve_path: Callable[[str], str] post_process_matched_download_with_verification: Callable + # Optional batch-field accessor. Returns ``download_batches[batch_id].get(field)`` + # when the runtime state is available, ``None`` otherwise. Injected so + # this module doesn't have to import from runtime_state directly — + # keeps the dep surface explicit and the function unit-testable + # without a live batch dict. + get_batch_field: Callable[[str, str], Any] = None # type: ignore[assignment] def try_staging_match(task_id, batch_id, track, deps: StagingDeps): @@ -144,15 +150,15 @@ def try_staging_match(task_id, batch_id, track, deps: StagingDeps): # Mark task as completed with staging context. # If the batch was populated by the torrent / usenet album-bundle # flow, prefer that provenance label over generic 'staging' so the - # download history reflects the real source. + # download history reflects the real source. The accessor is + # injected via StagingDeps so this module doesn't reach into + # runtime_state directly (see deps.get_batch_field docstring). _provenance_override = None - try: - from core.runtime_state import download_batches as _db - _batch = _db.get(batch_id) if batch_id else None - if isinstance(_batch, dict): - _provenance_override = _batch.get('album_bundle_source') - except Exception: - _provenance_override = None + if batch_id and deps.get_batch_field is not None: + try: + _provenance_override = deps.get_batch_field(batch_id, 'album_bundle_source') + except Exception as _exc: + logger.debug("get_batch_field failed: %s", _exc) _provenance_username = _provenance_override or 'staging' with tasks_lock: if task_id in download_tasks: diff --git a/tests/test_staging_album_provenance.py b/tests/test_staging_album_provenance.py new file mode 100644 index 00000000..fc62a411 --- /dev/null +++ b/tests/test_staging_album_provenance.py @@ -0,0 +1,198 @@ +"""Tests for the album-bundle provenance override in +``core/downloads/staging.py``. + +Verifies that when ``StagingDeps.get_batch_field`` returns a source +override (i.e. the batch was populated by the torrent / usenet +album-bundle flow), the staging matcher records that source on the +task instead of the generic 'staging' username. Provenance recording +downstream uses ``task['username']`` to set ``source_service`` on +the persisted download row — so this is the single point that +controls whether the history modal shows 'Torrent' / 'Usenet' vs +'Staging' / 'Soulseek'. + +Mocks the rest of StagingDeps so the test doesn't touch the +filesystem, AcoustID, or post-processing. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock + +from core.downloads.staging import StagingDeps, try_staging_match +from core.runtime_state import download_tasks, tasks_lock + + +def _make_deps(staging_file, transfer_dir, batch_field_value=None): + """Build a StagingDeps with mocked collaborators. ``batch_field_value`` + is what get_batch_field returns for ``album_bundle_source`` — None + means no override (generic staging match), 'torrent' / 'usenet' + means the album-bundle flow seeded the staging folder.""" + me = MagicMock() + me.normalize_string.side_effect = lambda s: (s or '').lower().strip() + config = MagicMock() + config.get.return_value = transfer_dir + return StagingDeps( + config_manager=config, + matching_engine=me, + get_staging_file_cache=lambda _b: [staging_file], + docker_resolve_path=lambda p: p, + post_process_matched_download_with_verification=lambda *a, **kw: None, + get_batch_field=(lambda _b, _f: batch_field_value) if batch_field_value is not None else (lambda _b, _f: None), + ) + + +def _seed_task(task_id: str, track_name: str, track_artist: str) -> None: + """Register a task in the runtime_state dict so try_staging_match + has something to mark complete.""" + with tasks_lock: + download_tasks[task_id] = { + 'status': 'searching', + 'track_info': { + 'name': track_name, + 'artists': [{'name': track_artist}], + '_is_explicit_album_download': False, + }, + } + + +def _cleanup_task(task_id: str) -> None: + with tasks_lock: + download_tasks.pop(task_id, None) + + +def test_staging_match_uses_torrent_override_when_present(tmp_path) -> None: + src = tmp_path / 'staging' / 'gnx_track_01.flac' + src.parent.mkdir() + src.write_bytes(b'fLaC') + transfer = tmp_path / 'transfer' + deps = _make_deps( + staging_file={'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'}, + transfer_dir=str(transfer), + batch_field_value='torrent', + ) + track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar']) + task_id = 'test_task_torrent_override' + _seed_task(task_id, 'Luther', 'Kendrick Lamar') + try: + ok = try_staging_match(task_id, 'batch_x', track, deps) + assert ok is True + with tasks_lock: + row = download_tasks[task_id] + assert row['username'] == 'torrent', \ + f"Expected provenance override 'torrent', got {row['username']!r}" + assert row['staging_match'] is True + finally: + _cleanup_task(task_id) + + +def test_staging_match_uses_usenet_override_when_present(tmp_path) -> None: + src = tmp_path / 'staging' / 'gnx_track_01.flac' + src.parent.mkdir() + src.write_bytes(b'fLaC') + transfer = tmp_path / 'transfer' + deps = _make_deps( + staging_file={'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'}, + transfer_dir=str(transfer), + batch_field_value='usenet', + ) + track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar']) + task_id = 'test_task_usenet_override' + _seed_task(task_id, 'Luther', 'Kendrick Lamar') + try: + try_staging_match(task_id, 'batch_x', track, deps) + with tasks_lock: + assert download_tasks[task_id]['username'] == 'usenet' + finally: + _cleanup_task(task_id) + + +def test_staging_match_falls_back_to_staging_without_override(tmp_path) -> None: + """When no batch override is present (manual file drop, or + batch has no album_bundle_source field), the staging matcher + uses the historical 'staging' username.""" + src = tmp_path / 'staging' / 'gnx_track_01.flac' + src.parent.mkdir() + src.write_bytes(b'fLaC') + transfer = tmp_path / 'transfer' + deps = _make_deps( + staging_file={'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'}, + transfer_dir=str(transfer), + batch_field_value=None, + ) + track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar']) + task_id = 'test_task_no_override' + _seed_task(task_id, 'Luther', 'Kendrick Lamar') + try: + try_staging_match(task_id, 'batch_x', track, deps) + with tasks_lock: + assert download_tasks[task_id]['username'] == 'staging' + finally: + _cleanup_task(task_id) + + +def test_staging_match_handles_missing_batch_field_callable(tmp_path) -> None: + """Backward compat: callers that build StagingDeps without + supplying ``get_batch_field`` (it defaults to None) still + work — staging matcher falls back to the 'staging' username.""" + src = tmp_path / 'staging' / 'gnx_track_01.flac' + src.parent.mkdir() + src.write_bytes(b'fLaC') + transfer = tmp_path / 'transfer' + + me = MagicMock() + me.normalize_string.side_effect = lambda s: (s or '').lower().strip() + config = MagicMock() + config.get.return_value = str(transfer) + deps = StagingDeps( + config_manager=config, + matching_engine=me, + get_staging_file_cache=lambda _b: [{'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'}], + docker_resolve_path=lambda p: p, + post_process_matched_download_with_verification=lambda *a, **kw: None, + # get_batch_field omitted — defaults to None + ) + track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar']) + task_id = 'test_task_no_accessor' + _seed_task(task_id, 'Luther', 'Kendrick Lamar') + try: + try_staging_match(task_id, 'batch_x', track, deps) + with tasks_lock: + assert download_tasks[task_id]['username'] == 'staging' + finally: + _cleanup_task(task_id) + + +def test_staging_match_swallows_accessor_exception(tmp_path) -> None: + """If the injected accessor raises (e.g. the batch was deleted + mid-process), the staging matcher should fall back to 'staging' + rather than failing the whole match.""" + src = tmp_path / 'staging' / 'gnx_track_01.flac' + src.parent.mkdir() + src.write_bytes(b'fLaC') + transfer = tmp_path / 'transfer' + + def _boom(_b, _f): + raise RuntimeError("batch went away") + + me = MagicMock() + me.normalize_string.side_effect = lambda s: (s or '').lower().strip() + config = MagicMock() + config.get.return_value = str(transfer) + deps = StagingDeps( + config_manager=config, + matching_engine=me, + get_staging_file_cache=lambda _b: [{'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'}], + docker_resolve_path=lambda p: p, + post_process_matched_download_with_verification=lambda *a, **kw: None, + get_batch_field=_boom, + ) + track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar']) + task_id = 'test_task_accessor_raises' + _seed_task(task_id, 'Luther', 'Kendrick Lamar') + try: + try_staging_match(task_id, 'batch_x', track, deps) + with tasks_lock: + assert download_tasks[task_id]['username'] == 'staging' + finally: + _cleanup_task(task_id) diff --git a/web_server.py b/web_server.py index ddf3ebe4..2b151d1f 100644 --- a/web_server.py +++ b/web_server.py @@ -16500,6 +16500,17 @@ def _get_staging_file_cache(batch_id): from core.downloads import staging as _downloads_staging +def _staging_get_batch_field(batch_id, field): + """Accessor injected into StagingDeps so the staging-match helper + can read an album-bundle provenance override off the batch state + without importing ``download_batches`` directly.""" + with tasks_lock: + row = download_batches.get(batch_id) + if isinstance(row, dict): + return row.get(field) + return None + + def _build_staging_deps(): """Build the StagingDeps bundle from web_server.py globals on each call.""" return _downloads_staging.StagingDeps( @@ -16508,6 +16519,7 @@ def _build_staging_deps(): get_staging_file_cache=_get_staging_file_cache, docker_resolve_path=docker_resolve_path, post_process_matched_download_with_verification=_post_process_matched_download_with_verification, + get_batch_field=_staging_get_batch_field, )