refactor(staging): inject batch-field accessor instead of importing runtime_state

Per code review: the album-bundle provenance override added in an
earlier commit reached into ``core.runtime_state.download_batches``
directly from inside the staging matcher. Sibling modules
shouldn't import each other's globals — the existing StagingDeps
pattern is the canonical way to inject everything else this helper
needs.

- core/downloads/staging.py: new optional ``get_batch_field``
  callable on ``StagingDeps`` (defaults to None for backward compat
  with any caller that doesn't know about it yet). The inline
  ``from core.runtime_state import download_batches`` is gone; the
  helper now calls ``deps.get_batch_field(batch_id,
  'album_bundle_source')`` and falls back to 'staging' when None
  is returned. Accessor exceptions are swallowed with a debug log
  so a deleted batch mid-process can't break the staging match.
- web_server.py: ``_build_staging_deps`` injects a small
  ``_staging_get_batch_field`` helper that wraps the tasks_lock +
  download_batches dict access. Centralises the lock semantics in
  one place — the staging module no longer needs to know about
  the lock or the dict.
- tests/test_staging_album_provenance.py: 5 new tests covering the
  full matrix — torrent override applied, usenet override applied,
  no override falls back to 'staging', missing accessor (default
  None) falls back to 'staging', accessor raising falls back to
  'staging'. Each test seeds + cleans a synthetic task in
  runtime_state so the test doesn't bleed state across the suite.
pull/671/head
Broque Thomas 4 days ago
parent ad59bf05a1
commit 440c3624f3

@ -58,6 +58,12 @@ class StagingDeps:
get_staging_file_cache: Callable[[str], list]
docker_resolve_path: Callable[[str], str]
post_process_matched_download_with_verification: Callable
# Optional batch-field accessor. Returns ``download_batches[batch_id].get(field)``
# when the runtime state is available, ``None`` otherwise. Injected so
# this module doesn't have to import from runtime_state directly —
# keeps the dep surface explicit and the function unit-testable
# without a live batch dict.
get_batch_field: Callable[[str, str], Any] = None # type: ignore[assignment]
def try_staging_match(task_id, batch_id, track, deps: StagingDeps):
@ -144,15 +150,15 @@ def try_staging_match(task_id, batch_id, track, deps: StagingDeps):
# Mark task as completed with staging context.
# If the batch was populated by the torrent / usenet album-bundle
# flow, prefer that provenance label over generic 'staging' so the
# download history reflects the real source.
# download history reflects the real source. The accessor is
# injected via StagingDeps so this module doesn't reach into
# runtime_state directly (see deps.get_batch_field docstring).
_provenance_override = None
try:
from core.runtime_state import download_batches as _db
_batch = _db.get(batch_id) if batch_id else None
if isinstance(_batch, dict):
_provenance_override = _batch.get('album_bundle_source')
except Exception:
_provenance_override = None
if batch_id and deps.get_batch_field is not None:
try:
_provenance_override = deps.get_batch_field(batch_id, 'album_bundle_source')
except Exception as _exc:
logger.debug("get_batch_field failed: %s", _exc)
_provenance_username = _provenance_override or 'staging'
with tasks_lock:
if task_id in download_tasks:

@ -0,0 +1,198 @@
"""Tests for the album-bundle provenance override in
``core/downloads/staging.py``.
Verifies that when ``StagingDeps.get_batch_field`` returns a source
override (i.e. the batch was populated by the torrent / usenet
album-bundle flow), the staging matcher records that source on the
task instead of the generic 'staging' username. Provenance recording
downstream uses ``task['username']`` to set ``source_service`` on
the persisted download row so this is the single point that
controls whether the history modal shows 'Torrent' / 'Usenet' vs
'Staging' / 'Soulseek'.
Mocks the rest of StagingDeps so the test doesn't touch the
filesystem, AcoustID, or post-processing.
"""
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import MagicMock
from core.downloads.staging import StagingDeps, try_staging_match
from core.runtime_state import download_tasks, tasks_lock
def _make_deps(staging_file, transfer_dir, batch_field_value=None):
"""Build a StagingDeps with mocked collaborators. ``batch_field_value``
is what get_batch_field returns for ``album_bundle_source`` None
means no override (generic staging match), 'torrent' / 'usenet'
means the album-bundle flow seeded the staging folder."""
me = MagicMock()
me.normalize_string.side_effect = lambda s: (s or '').lower().strip()
config = MagicMock()
config.get.return_value = transfer_dir
return StagingDeps(
config_manager=config,
matching_engine=me,
get_staging_file_cache=lambda _b: [staging_file],
docker_resolve_path=lambda p: p,
post_process_matched_download_with_verification=lambda *a, **kw: None,
get_batch_field=(lambda _b, _f: batch_field_value) if batch_field_value is not None else (lambda _b, _f: None),
)
def _seed_task(task_id: str, track_name: str, track_artist: str) -> None:
"""Register a task in the runtime_state dict so try_staging_match
has something to mark complete."""
with tasks_lock:
download_tasks[task_id] = {
'status': 'searching',
'track_info': {
'name': track_name,
'artists': [{'name': track_artist}],
'_is_explicit_album_download': False,
},
}
def _cleanup_task(task_id: str) -> None:
with tasks_lock:
download_tasks.pop(task_id, None)
def test_staging_match_uses_torrent_override_when_present(tmp_path) -> None:
src = tmp_path / 'staging' / 'gnx_track_01.flac'
src.parent.mkdir()
src.write_bytes(b'fLaC')
transfer = tmp_path / 'transfer'
deps = _make_deps(
staging_file={'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'},
transfer_dir=str(transfer),
batch_field_value='torrent',
)
track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar'])
task_id = 'test_task_torrent_override'
_seed_task(task_id, 'Luther', 'Kendrick Lamar')
try:
ok = try_staging_match(task_id, 'batch_x', track, deps)
assert ok is True
with tasks_lock:
row = download_tasks[task_id]
assert row['username'] == 'torrent', \
f"Expected provenance override 'torrent', got {row['username']!r}"
assert row['staging_match'] is True
finally:
_cleanup_task(task_id)
def test_staging_match_uses_usenet_override_when_present(tmp_path) -> None:
src = tmp_path / 'staging' / 'gnx_track_01.flac'
src.parent.mkdir()
src.write_bytes(b'fLaC')
transfer = tmp_path / 'transfer'
deps = _make_deps(
staging_file={'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'},
transfer_dir=str(transfer),
batch_field_value='usenet',
)
track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar'])
task_id = 'test_task_usenet_override'
_seed_task(task_id, 'Luther', 'Kendrick Lamar')
try:
try_staging_match(task_id, 'batch_x', track, deps)
with tasks_lock:
assert download_tasks[task_id]['username'] == 'usenet'
finally:
_cleanup_task(task_id)
def test_staging_match_falls_back_to_staging_without_override(tmp_path) -> None:
"""When no batch override is present (manual file drop, or
batch has no album_bundle_source field), the staging matcher
uses the historical 'staging' username."""
src = tmp_path / 'staging' / 'gnx_track_01.flac'
src.parent.mkdir()
src.write_bytes(b'fLaC')
transfer = tmp_path / 'transfer'
deps = _make_deps(
staging_file={'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'},
transfer_dir=str(transfer),
batch_field_value=None,
)
track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar'])
task_id = 'test_task_no_override'
_seed_task(task_id, 'Luther', 'Kendrick Lamar')
try:
try_staging_match(task_id, 'batch_x', track, deps)
with tasks_lock:
assert download_tasks[task_id]['username'] == 'staging'
finally:
_cleanup_task(task_id)
def test_staging_match_handles_missing_batch_field_callable(tmp_path) -> None:
"""Backward compat: callers that build StagingDeps without
supplying ``get_batch_field`` (it defaults to None) still
work staging matcher falls back to the 'staging' username."""
src = tmp_path / 'staging' / 'gnx_track_01.flac'
src.parent.mkdir()
src.write_bytes(b'fLaC')
transfer = tmp_path / 'transfer'
me = MagicMock()
me.normalize_string.side_effect = lambda s: (s or '').lower().strip()
config = MagicMock()
config.get.return_value = str(transfer)
deps = StagingDeps(
config_manager=config,
matching_engine=me,
get_staging_file_cache=lambda _b: [{'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'}],
docker_resolve_path=lambda p: p,
post_process_matched_download_with_verification=lambda *a, **kw: None,
# get_batch_field omitted — defaults to None
)
track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar'])
task_id = 'test_task_no_accessor'
_seed_task(task_id, 'Luther', 'Kendrick Lamar')
try:
try_staging_match(task_id, 'batch_x', track, deps)
with tasks_lock:
assert download_tasks[task_id]['username'] == 'staging'
finally:
_cleanup_task(task_id)
def test_staging_match_swallows_accessor_exception(tmp_path) -> None:
"""If the injected accessor raises (e.g. the batch was deleted
mid-process), the staging matcher should fall back to 'staging'
rather than failing the whole match."""
src = tmp_path / 'staging' / 'gnx_track_01.flac'
src.parent.mkdir()
src.write_bytes(b'fLaC')
transfer = tmp_path / 'transfer'
def _boom(_b, _f):
raise RuntimeError("batch went away")
me = MagicMock()
me.normalize_string.side_effect = lambda s: (s or '').lower().strip()
config = MagicMock()
config.get.return_value = str(transfer)
deps = StagingDeps(
config_manager=config,
matching_engine=me,
get_staging_file_cache=lambda _b: [{'full_path': str(src), 'title': 'Luther', 'artist': 'Kendrick Lamar'}],
docker_resolve_path=lambda p: p,
post_process_matched_download_with_verification=lambda *a, **kw: None,
get_batch_field=_boom,
)
track = SimpleNamespace(name='Luther', artists=['Kendrick Lamar'])
task_id = 'test_task_accessor_raises'
_seed_task(task_id, 'Luther', 'Kendrick Lamar')
try:
try_staging_match(task_id, 'batch_x', track, deps)
with tasks_lock:
assert download_tasks[task_id]['username'] == 'staging'
finally:
_cleanup_task(task_id)

@ -16500,6 +16500,17 @@ def _get_staging_file_cache(batch_id):
from core.downloads import staging as _downloads_staging
def _staging_get_batch_field(batch_id, field):
"""Accessor injected into StagingDeps so the staging-match helper
can read an album-bundle provenance override off the batch state
without importing ``download_batches`` directly."""
with tasks_lock:
row = download_batches.get(batch_id)
if isinstance(row, dict):
return row.get(field)
return None
def _build_staging_deps():
"""Build the StagingDeps bundle from web_server.py globals on each call."""
return _downloads_staging.StagingDeps(
@ -16508,6 +16519,7 @@ def _build_staging_deps():
get_staging_file_cache=_get_staging_file_cache,
docker_resolve_path=docker_resolve_path,
post_process_matched_download_with_verification=_post_process_matched_download_with_verification,
get_batch_field=_staging_get_batch_field,
)

Loading…
Cancel
Save