From b8384beef9a274cb3b536e7d1180f6c319110b7d Mon Sep 17 00:00:00 2001 From: Tyler Richardson-LaPlume <170156756+IamGroot60@users.noreply.github.com> Date: Fri, 29 May 2026 00:49:02 -0400 Subject: [PATCH] =?UTF-8?q?Fix:=20Usenet=20bundle=20stuck=20at=2099%/100%?= =?UTF-8?q?=20=E2=80=94=20SAB=20reports=20post-processing=20in=20History?= =?UTF-8?q?=20as=20non-terminal=20(#721)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier #721 fix tolerated a ~10s "completed but no save_path" window, but the real production stall sits upstream of that: SABnzbd removes a finished download from the queue and runs par2 verify / repair / unpack *in History*, exposing the live stage in the slot `status` ('Verifying' / 'Repairing' / 'Extracting' / 'Moving' / ...) with `storage` empty until the final move. `_parse_history_slot` mapped EVERY non-'Failed' status to 'completed', so a still-extracting 1.7 GB FLAC album looked "completed with no save_path" the instant download hit 100%. The poll burned its completed-no-path budget mid-PP and bailed, freezing the UI on the last download emit (the stuck-at-99%/100% signature). SAB then finished fine — which is why the job shows Completed in History but SoulSync never staged it. Root fix - `_parse_history_slot` routes `status` through `_map_state`, so PP stages stay NON-terminal: the poll keeps waiting (as 'downloading') for as long as post-processing takes and only a real 'Completed' flips to terminal success. `save_path` is trusted only on true completion (mid-PP path fields may point at the incomplete dir). Supporting / defensive - `UsenetStatus.incomplete_path`: surfaced separately from save_path (SAB `incomplete_path`) and used by the poll loops as a LAST RESORT after the completed-no-path window, to recover the case where `storage` never lands but the files are physically on disk. - `poll_album_download`: dedicated, configurable completed-no-path window (~120s via `download_source.album_bundle_completed_no_path_seconds`) decoupled from the ~10s transient-miss window; incomplete_path fallback; a 30s heartbeat log so the previously-silent poll loop is diagnosable. - `usenet.py` `_download_thread`: per-track parity — it was erroring immediately on the first completed-no-path read. - `album_bundle_dispatch.py` / `status.py` / `monitor.py`: use the project `get_logger` so download-flow logs land in app.log under the `soulsync.*` namespace (they were console-only before, which hid the `[Album Bundle] flow failed` line during triage). Tests - PP-history state mapping; end-to-end Hunky Dory PP regression (download -> Verifying/Extracting in History past both budgets -> Completed+storage -> success); completed-no-path window + incomplete_path fallback; per-track thread parity. ruff + compileall + pytest all green (the only local failures are environmental: missing tzdata + local tools/ffmpeg.exe, neither present on CI). Co-Authored-By: Claude Opus 4.8 (1M context) --- core/download_plugins/album_bundle.py | 122 ++++++++++++++-- core/download_plugins/usenet.py | 47 ++++++- core/downloads/album_bundle_dispatch.py | 11 +- core/downloads/monitor.py | 5 +- core/downloads/status.py | 5 +- core/usenet_clients/base.py | 9 ++ core/usenet_clients/sabnzbd.py | 63 +++++++-- tests/test_album_bundle.py | 101 ++++++++++++++ tests/test_torrent_usenet_plugins.py | 96 +++++++++++++ tests/test_usenet_client_adapters.py | 177 ++++++++++++++++++++++++ 10 files changed, 606 insertions(+), 30 deletions(-) diff --git a/core/download_plugins/album_bundle.py b/core/download_plugins/album_bundle.py index 746022a0..6abe6328 100644 --- a/core/download_plugins/album_bundle.py +++ b/core/download_plugins/album_bundle.py @@ -202,6 +202,34 @@ def get_transient_miss_threshold() -> int: return DEFAULT_TRANSIENT_MISS_THRESHOLD +# How long to keep polling after the client reports terminal success +# but hasn't yet exposed a final save_path. Distinct from the +# transient-miss threshold because the two model different things: +# a transient miss is "the job vanished — fail fast (~10s) so a deleted +# job doesn't hang"; a completed-no-path read is "the download SUCCEEDED +# and the files are on disk — SAB just hasn't finished writing the +# ``storage`` field." The #706 fix reused the 5-poll (~10s) miss window +# here, but #721's own report shows SAB can take 2+ minutes (or, on some +# versions, never expose ``storage`` at all) — so a 10s window false-fails +# a download that actually completed. Expressed in SECONDS (converted to +# a poll count against the live interval) so it's interval-independent. +# Override via ``download_source.album_bundle_completed_no_path_seconds``. +DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS = 120.0 + + +def get_completed_no_path_window_seconds() -> float: + """Return the completed-but-no-save_path tolerance window (seconds).""" + raw = config_manager.get('download_source.album_bundle_completed_no_path_seconds', + DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS) + try: + value = float(raw) + if value > 0: + return value + except (TypeError, ValueError): + pass + return DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS + + class TransientMissCounter: """Bounded retry counter for adapter status reads. @@ -237,6 +265,7 @@ def poll_album_download( failed_states: frozenset = frozenset(['failed']), is_shutdown: Optional[Callable[[], bool]] = None, transient_miss_threshold: int = DEFAULT_TRANSIENT_MISS_THRESHOLD, + completed_no_path_threshold: Optional[int] = None, poll_interval: Optional[float] = None, timeout: Optional[float] = None, sleep: Callable[[float], None] = time.sleep, @@ -270,16 +299,28 @@ def poll_album_download( 'error' → poll infinite-looped until the 6-hour timeout. - ``transient_miss_threshold`` is the number of consecutive None / 'error' reads tolerated before declaring the job gone. Sized for - the SAB queue→history gap window. - - Returns the adapter's reported save_path on terminal success, or - ``None`` on any failure (timeout / disappeared / explicit failed - / shutdown). On every failure path emits ``'failed'`` once with an - ``error`` field describing why. + the SAB queue→history gap window (~10s) — a vanished job should + fail fast. + - ``completed_no_path_threshold`` is a SEPARATE, longer window for + the "client says complete but no save_path yet" case. The download + already succeeded, so this defaults to ~120s (configurable via + ``download_source.album_bundle_completed_no_path_seconds``) instead + of reusing the 10s miss window — #721 showed SAB can take 2+ minutes + to write ``storage``. When the window is exhausted the loop falls + back to the adapter's ``incomplete_path`` (the on-disk in-progress + dir) if present, and only emits terminal ``failed`` when there's no + path of any kind to scan. + + Returns the adapter's reported save_path (or, as a last resort, its + ``incomplete_path``) on terminal success, or ``None`` on any failure + (timeout / disappeared / explicit failed / shutdown). On every + failure path emits ``'failed'`` once with an ``error`` field + describing why. """ interval = poll_interval if poll_interval is not None else get_poll_interval() deadline = monotonic() + (timeout if timeout is not None else get_poll_timeout()) last_save_path: Optional[str] = None + last_incomplete_path: Optional[str] = None misses = TransientMissCounter(transient_miss_threshold) # Separate counter for "client reports terminal-success state but no # save_path field has landed yet." SAB History flips ``status`` to @@ -290,9 +331,19 @@ def poll_album_download( # seconds because ``storage`` isn't populated yet. Pre-fix the # poll returned ``None`` on the first such read, the bundle # plugin marked the batch failed, but the UI still displayed the - # last ``downloading`` progress emit. Now we retry up to the - # same threshold so SAB has a window to write the path. - completed_no_path_misses = TransientMissCounter(transient_miss_threshold) + # last ``downloading`` progress emit. + # + # This window is intentionally LONGER than the transient-miss window: + # the download already SUCCEEDED, so being patient here is cheap and + # correct, whereas the original 5-poll (~10s) reuse false-failed real + # completions (#721 reported SAB taking 2+ minutes). Default ~120s, + # converted from seconds to a poll count against the live interval. + if completed_no_path_threshold is None: + completed_no_path_threshold = max( + transient_miss_threshold, + int(get_completed_no_path_window_seconds() / max(interval, 0.001)) or 1, + ) + completed_no_path_misses = TransientMissCounter(completed_no_path_threshold) def _fail(reason: str) -> None: try: @@ -300,6 +351,16 @@ def poll_album_download( except Exception as cb_exc: logger.debug("%s terminal emit failed: %s", log_prefix, cb_exc) + # Heartbeat so the otherwise-silent download loop is diagnosable. + # The loop emits progress to the UI on every poll but logs nothing + # during normal operation — which made the #721 "stuck at N%" reports + # impossible to triage from logs alone (we couldn't tell if the poll + # was alive, what state SAB returned, or whether it had wedged). Log + # the raw adapter read at most once per heartbeat interval. + HEARTBEAT_SECONDS = 30.0 + last_heartbeat = monotonic() + poll_count = 0 + while monotonic() < deadline: if is_shutdown and is_shutdown(): # Shutdown is a clean exit — don't paint failure on the UI; @@ -312,6 +373,21 @@ def poll_album_download( logger.warning("%s Poll error: %s", log_prefix, e) status = None + poll_count += 1 + now = monotonic() + if now - last_heartbeat >= HEARTBEAT_SECONDS: + last_heartbeat = now + if status is None: + logger.info("%s '%s' poll #%d: client returned no status (miss %d/%d)", + log_prefix, title, poll_count, misses.misses, misses.threshold) + else: + logger.info( + "%s '%s' poll #%d: state=%r progress=%.2f save_path=%r", + log_prefix, title, poll_count, + getattr(status, 'state', None), getattr(status, 'progress', 0.0) or 0.0, + getattr(status, 'save_path', None), + ) + if status is None: if misses.record_miss(): logger.error( @@ -334,6 +410,12 @@ def poll_album_download( speed=status.download_speed) if status.save_path: last_save_path = status.save_path + # Remember the in-progress dir too — never used on a normal + # completion, only as the last-resort fallback below when the + # final save_path provably never lands. + incomplete_path = getattr(status, 'incomplete_path', None) + if incomplete_path: + last_incomplete_path = incomplete_path if status.state in complete_states: if last_save_path: @@ -341,11 +423,27 @@ def poll_album_download( return last_save_path # Terminal-success state but no save_path landed yet. # SAB History flips ``Completed`` a few seconds before - # ``storage`` is populated — give the adapter a few more - # polls before declaring this a hard failure. Without this + # ``storage`` is populated — give the adapter a generous + # window before declaring this a hard failure. Without this # tolerance, every TAR / unrar-bearing usenet release # would race the path-write window and randomly fail. if completed_no_path_misses.record_miss(): + # Last resort before failing: SAB finished and the files + # are physically on disk (#721), but the final ``storage`` + # field never landed. Fall back to the in-progress dir so + # the bundle can still scan + stage the audio, rather than + # leaving the user stuck with a completed-in-SAB download + # that SoulSync never imports. + if last_incomplete_path: + logger.warning( + "%s '%s' completed on the client but never exposed a final " + "save_path after %d polls — falling back to the in-progress " + "path %r as a last resort. If staging fails, the SAB job " + "likely needs its post-process move to finish first.", + log_prefix, title, completed_no_path_misses.misses, + last_incomplete_path, + ) + return last_incomplete_path logger.error( "%s '%s' reported terminal success but no save_path landed " "after %d consecutive polls — bundle cannot stage. Adapter " @@ -419,9 +517,11 @@ __all__ = [ "DEFAULT_POLL_INTERVAL_SECONDS", "DEFAULT_POLL_TIMEOUT_SECONDS", "DEFAULT_TRANSIENT_MISS_THRESHOLD", + "DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS", "TransientMissCounter", "atomic_copy_to_staging", "copy_audio_files_atomically", + "get_completed_no_path_window_seconds", "get_poll_interval", "get_poll_timeout", "get_transient_miss_threshold", diff --git a/core/download_plugins/usenet.py b/core/download_plugins/usenet.py index 8988fec4..75302c50 100644 --- a/core/download_plugins/usenet.py +++ b/core/download_plugins/usenet.py @@ -24,6 +24,7 @@ from core.archive_pipeline import collect_audio_after_extraction from core.download_plugins.album_bundle import ( TransientMissCounter, copy_audio_files_atomically, + get_completed_no_path_window_seconds, pick_best_album_release, poll_album_download, ) @@ -229,11 +230,22 @@ class UsenetDownloadPlugin(DownloadSourcePlugin): deadline = time.monotonic() + _POLL_TIMEOUT_SECONDS last_save_path: Optional[str] = None + last_incomplete_path: Optional[str] = None # Tolerate transient None / unmapped 'error' reads — SAB # removes a job from the queue before adding it to history, # and on busy servers that gap spans several polls. See # ``album_bundle.TransientMissCounter`` for the shared rule. misses = TransientMissCounter() + # Separate, LONGER window for "SAB says completed but hasn't + # written the final save_path yet" — the per-track sibling of the + # bundle fix (#721). Without this the thread called + # ``_finalize_download(None)`` on the first Completed-no-path read + # and errored a download that actually succeeded in SAB. Default + # ~120s, converted to a poll count against the live interval. + completed_no_path_misses = TransientMissCounter( + max(misses.threshold, + int(get_completed_no_path_window_seconds() / max(_POLL_INTERVAL_SECONDS, 0.001)) or 1) + ) while time.monotonic() < deadline: if self.shutdown_check and self.shutdown_check(): return @@ -267,10 +279,41 @@ class UsenetDownloadPlugin(DownloadSourcePlugin): row['error'] = status.error if status.save_path: last_save_path = status.save_path + incomplete_path = getattr(status, 'incomplete_path', None) + if incomplete_path: + last_incomplete_path = incomplete_path if status.state in _COMPLETE_STATES: - self._finalize_download(download_id, last_save_path) - return + if last_save_path: + self._finalize_download(download_id, last_save_path) + return + # Completed but no final save_path yet — SAB flips + # History to 'Completed' before writing ``storage``. + # Wait out the (longer) completed-no-path window rather + # than erroring a download that actually succeeded. + if completed_no_path_misses.record_miss(): + if last_incomplete_path: + logger.warning( + "Usenet %s: '%s' completed but no final save_path after " + "%d polls — falling back to in-progress path %r", + download_id[:8], job_id, completed_no_path_misses.misses, + last_incomplete_path, + ) + self._finalize_download(download_id, last_incomplete_path) + return + self._mark_error( + download_id, + "Usenet job completed but client never reported a save_path", + ) + return + logger.info( + "Usenet %s: '%s' completed on client but save_path not yet set — " + "retrying (poll %d/%d)", + download_id[:8], job_id, + completed_no_path_misses.misses, completed_no_path_misses.threshold, + ) + time.sleep(_POLL_INTERVAL_SECONDS) + continue if status.state == 'failed': self._mark_error(download_id, status.error or "Usenet client reported failure") return diff --git a/core/downloads/album_bundle_dispatch.py b/core/downloads/album_bundle_dispatch.py index 734d35a9..634c0e95 100644 --- a/core/downloads/album_bundle_dispatch.py +++ b/core/downloads/album_bundle_dispatch.py @@ -31,11 +31,18 @@ testable without touching live runtime state. from __future__ import annotations -import logging from pathlib import Path from typing import Any, Callable, Optional, Protocol -logger = logging.getLogger(__name__) +from utils.logging_config import get_logger + +# Use the project logger factory so these lines land in app.log under the +# ``soulsync.*`` namespace the file handler captures. Plain +# ``logging.getLogger(__name__)`` logs to the console only (the file +# handler is attached to the ``soulsync`` logger), which is why +# ``[Album Bundle] flow failed`` showed up in the terminal but never in +# app.log during the #721 triage. +logger = get_logger("downloads.album_bundle_dispatch") class BatchStateAccess(Protocol): diff --git a/core/downloads/monitor.py b/core/downloads/monitor.py index c4b645f5..4bcb0bc3 100644 --- a/core/downloads/monitor.py +++ b/core/downloads/monitor.py @@ -5,7 +5,6 @@ The class body is byte-identical to the original. Module-level globals helpers and orchestrator handles. ``IS_SHUTTING_DOWN`` is a module-level flag mirrored from web_server's own flag in ``_shutdown_runtime_components``. """ -import logging import threading import time @@ -18,8 +17,10 @@ from core.runtime_state import ( tasks_lock, ) from utils.async_helpers import run_async +from utils.logging_config import get_logger -logger = logging.getLogger(__name__) +# Project logger factory so these lines reach app.log (soulsync.* namespace). +logger = get_logger("downloads.monitor") # Mirrored from web_server.IS_SHUTTING_DOWN via _shutdown_runtime_components. IS_SHUTTING_DOWN = False diff --git a/core/downloads/status.py b/core/downloads/status.py index 2d2073db..010581b9 100644 --- a/core/downloads/status.py +++ b/core/downloads/status.py @@ -20,7 +20,6 @@ are passed via `StatusDeps` so the module is web_server-import-free. from __future__ import annotations -import logging import threading import time from dataclasses import dataclass @@ -32,8 +31,10 @@ from core.runtime_state import ( download_tasks, tasks_lock, ) +from utils.logging_config import get_logger -logger = logging.getLogger(__name__) +# Project logger factory so these lines reach app.log (soulsync.* namespace). +logger = get_logger("downloads.status") def _schedule_completion_callback(deps, batch_id: str, task_id: str, success: bool) -> None: diff --git a/core/usenet_clients/base.py b/core/usenet_clients/base.py index f77b3d78..12b25578 100644 --- a/core/usenet_clients/base.py +++ b/core/usenet_clients/base.py @@ -39,6 +39,15 @@ class UsenetStatus: download_speed: int # bytes/sec eta: Optional[int] = None # seconds, None if unknown save_path: Optional[str] = None + # In-progress / pre-move directory (SAB ``incomplete_path``). Kept + # SEPARATE from ``save_path`` on purpose: it points at the staging + # dir SAB uses BEFORE its post-process move, so it must never be + # treated as the final path on a normal completion. The poll loops + # only fall back to it as a LAST RESORT — after waiting the full + # completed-but-no-save_path window — to recover the (#721) case + # where SAB finished, the files are physically on disk, but the + # final ``storage`` field never lands. See ``poll_album_download``. + incomplete_path: Optional[str] = None category: Optional[str] = None files: Optional[List[str]] = None error: Optional[str] = None diff --git a/core/usenet_clients/sabnzbd.py b/core/usenet_clients/sabnzbd.py index e6c30a9f..daf55d65 100644 --- a/core/usenet_clients/sabnzbd.py +++ b/core/usenet_clients/sabnzbd.py @@ -232,19 +232,54 @@ class SABnzbdAdapter: ) def _parse_history_slot(self, slot: dict) -> UsenetStatus: - # History entries are post-download — progress is 1.0 unless failed. - sab_state = (slot.get('status') or '').lower() - is_failed = sab_state == 'failed' - save_path = self._extract_history_save_path(slot) + # History entries cover BOTH finished jobs AND jobs that are still + # POST-PROCESSING. SAB removes a job from the queue the moment the + # download finishes and runs par2 verify / repair / unpack / move + # while the job sits in History — exposing the live stage in the + # ``status`` field ('Verifying' / 'Repairing' / 'Extracting' / + # 'Moving' / 'Running' / ...). Only 'Completed' means truly done; + # 'Failed' / 'Deleted' mean failure. + # + # The old logic mapped EVERY non-'failed' status to 'completed'. + # That made the poll treat a still-extracting 1.7 GB FLAC album + # (status 'Extracting', ``storage`` not written yet) as "completed + # but no save_path" and burn the completed-no-path window mid-PP — + # exactly the #721 stuck-at-99% signature in production where the + # path IS shared. Route the status through the same queue-state map + # so PP stages stay NON-terminal: the poll keeps waiting (as + # 'downloading') for as long as post-processing takes, and only a + # real 'Completed' flips it to terminal success. + mapped = _map_state(slot.get('status') or '') + is_failed = mapped == 'failed' + is_completed = mapped == 'completed' + # Only trust the final path on a TRUE completion. Mid-PP the path + # fields may be empty or still point at the incomplete dir; the + # completed-no-path retry handles the brief gap between the + # 'Completed' flip and ``storage`` landing in the same response. + save_path = self._extract_history_save_path(slot) if is_completed else None + # ``incomplete_path`` is surfaced separately (NOT as save_path) so + # the poll loops can fall back to it only as a last resort once + # the final ``storage`` field has provably failed to land — see + # ``UsenetStatus.incomplete_path`` and the #721 fallback in + # ``poll_album_download``. Whitespace-only values are treated as + # absent, same as the save_path chain. + incomplete_path = slot.get('incomplete_path') + if not (isinstance(incomplete_path, str) and incomplete_path.strip()): + incomplete_path = None + bytes_total = int(slot.get('bytes') or 0) return UsenetStatus( id=str(slot.get('nzo_id') or ''), name=slot.get('name') or '', - state='failed' if is_failed else 'completed', + state=mapped, + # Download itself is finished for ANY History slot (in-PP or + # done), so report full download progress — don't snap the UI + # back to 0% while SAB verifies/unpacks. Failed = 0. progress=0.0 if is_failed else 1.0, - size=int(slot.get('bytes') or 0), - downloaded=int(slot.get('bytes') or 0) if not is_failed else 0, + size=bytes_total, + downloaded=0 if is_failed else bytes_total, download_speed=0, save_path=save_path, + incomplete_path=incomplete_path, category=slot.get('category'), error=slot.get('fail_message') if is_failed else None, ) @@ -279,10 +314,16 @@ class SABnzbdAdapter: value = slot.get(key) if value and isinstance(value, str) and value.strip(): return value - # Loud diagnostic when the bundle poll is about to fail on this: - # users on SAB versions / forks with novel field names need to - # see this in the logs so we can grow ``_HISTORY_SAVE_PATH_KEYS``. - logger.debug( + # Loud diagnostic when the bundle poll is about to wait/fail on + # this: users on SAB versions / forks with novel field names need + # to see this in the logs so we can grow ``_HISTORY_SAVE_PATH_KEYS``. + # INFO (not DEBUG) on purpose — a completed History slot with no + # resolvable path is the #721 stuck-bundle signature, and dumping + # the actual slot keys here is what lets us add the missing field + # without a debug-logging round-trip. It only fires for completed + # slots that lack every known path field, so it self-limits the + # moment ``storage`` lands. + logger.info( "[SAB] History slot for nzo_id=%s has no save_path in any " "of the known fields %r — slot keys: %r", slot.get('nzo_id'), diff --git a/tests/test_album_bundle.py b/tests/test_album_bundle.py index a628b68b..57790286 100644 --- a/tests/test_album_bundle.py +++ b/tests/test_album_bundle.py @@ -22,10 +22,12 @@ import pytest from core.download_plugins.album_bundle import ( ALBUM_PICK_MAX_BYTES, ALBUM_PICK_MIN_BYTES, + DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS, DEFAULT_POLL_INTERVAL_SECONDS, DEFAULT_POLL_TIMEOUT_SECONDS, atomic_copy_to_staging, copy_audio_files_atomically, + get_completed_no_path_window_seconds, get_poll_interval, get_poll_timeout, pick_best_album_release, @@ -301,6 +303,28 @@ def test_get_poll_timeout_falls_back_on_garbage() -> None: assert get_poll_timeout() == DEFAULT_POLL_TIMEOUT_SECONDS +def test_get_completed_no_path_window_uses_default_when_unset() -> None: + with patch('core.download_plugins.album_bundle.config_manager') as cm: + cm.get.return_value = DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS + assert get_completed_no_path_window_seconds() == DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS + + +def test_get_completed_no_path_window_honours_override() -> None: + """Users whose SAB is slow to write ``storage`` (large box sets, + slow disks) can widen the tolerance without touching code.""" + with patch('core.download_plugins.album_bundle.config_manager') as cm: + cm.get.return_value = 300 + assert get_completed_no_path_window_seconds() == 300.0 + + +def test_get_completed_no_path_window_falls_back_on_garbage() -> None: + with patch('core.download_plugins.album_bundle.config_manager') as cm: + cm.get.return_value = '' + assert get_completed_no_path_window_seconds() == DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS + cm.get.return_value = 0 + assert get_completed_no_path_window_seconds() == DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS + + # --------------------------------------------------------------------------- # poll_album_download — lifted poll loop for both torrent + usenet plugins. # --------------------------------------------------------------------------- @@ -395,6 +419,7 @@ class _Status: downloaded: int = 0 download_speed: int = 0 error: Optional[str] = None + incomplete_path: Optional[str] = None class _ScriptedClock: @@ -657,6 +682,82 @@ def test_poll_gives_up_when_completed_with_no_save_path_persists() -> None: assert 'save_path' in err or 'success but never' in err +def test_poll_completed_no_path_window_is_longer_than_miss_window() -> None: + """#721 follow-up: the completed-but-no-save_path window must be + DECOUPLED from (and far longer than) the transient-miss window. SAB + can take 2+ minutes to write ``storage``; the old code reused the + 5-poll (~10s) miss window here and false-failed real completions. + With a small miss threshold but the default long no-path window, a + download that takes 8 completed-no-path polls before ``storage`` + lands must still succeed.""" + clock = _ScriptedClock() + emit, calls = _make_emit_recorder() + sequence = iter( + [_Status(state='completed', save_path=None, progress=1.0)] * 8 + + [_Status(state='completed', save_path='/dl/late', progress=1.0)] + ) + result = poll_album_download( + get_status=lambda: next(sequence), + title='Slow SAB', + emit=emit, + complete_states=frozenset(['completed']), + transient_miss_threshold=3, # vanished-job window stays short + # completed_no_path_threshold left to default (~120s / interval). + sleep=clock.sleep, monotonic=clock.monotonic, + poll_interval=2.0, timeout=600.0, + ) + assert result == '/dl/late' + assert 'failed' not in [c[0] for c in calls] + + +def test_poll_falls_back_to_incomplete_path_after_window_exhausted() -> None: + """When SAB reports the job completed but the final save_path NEVER + lands (some SAB versions / no post-process move), the files are + still physically on disk in the in-progress dir. Rather than failing + a download that actually succeeded, the poll falls back to the + adapter's ``incomplete_path`` as a last resort once the window is + exhausted — no terminal 'failed' emit.""" + clock = _ScriptedClock() + emit, calls = _make_emit_recorder() + result = poll_album_download( + get_status=lambda: _Status( + state='completed', save_path=None, + incomplete_path='/sab/incomplete/album', progress=1.0, + ), + title='No Storage Field', + emit=emit, + complete_states=frozenset(['completed']), + completed_no_path_threshold=3, + sleep=clock.sleep, monotonic=clock.monotonic, + poll_interval=2.0, timeout=600.0, + ) + assert result == '/sab/incomplete/album' + assert 'failed' not in [c[0] for c in calls] + + +def test_poll_fails_when_no_path_and_no_incomplete_path() -> None: + """Last resort only fires when there's actually a path to scan. + With neither a final save_path nor an incomplete_path, the poll + still fails loudly after the window so the UI doesn't freeze.""" + clock = _ScriptedClock() + emit, calls = _make_emit_recorder() + result = poll_album_download( + get_status=lambda: _Status(state='completed', save_path=None, + incomplete_path=None, progress=1.0), + title='Truly Pathless', + emit=emit, + complete_states=frozenset(['completed']), + completed_no_path_threshold=3, + sleep=clock.sleep, monotonic=clock.monotonic, + poll_interval=2.0, timeout=600.0, + ) + assert result is None + failed_calls = [c for c in calls if c[0] == 'failed'] + assert len(failed_calls) == 1 + err = failed_calls[0][1].get('error', '').lower() + assert 'save_path' in err or 'success but never' in err + + def test_poll_uses_save_path_from_earlier_downloading_emit_if_completed_lacks_one() -> None: """Sticky save_path: when an earlier ``downloading`` status carried a non-empty ``save_path`` (qBit shows the target dir mid-download), diff --git a/tests/test_torrent_usenet_plugins.py b/tests/test_torrent_usenet_plugins.py index 1dd9f75b..8b53c6f9 100644 --- a/tests/test_torrent_usenet_plugins.py +++ b/tests/test_torrent_usenet_plugins.py @@ -361,6 +361,102 @@ def test_usenet_finalize_picks_first_audio_file(tmp_path: Path) -> None: assert plugin.active_downloads['u-1']['file_path'].endswith('track1.flac') +class _FakeClock: + """Deterministic monotonic + sleep so the per-track poll loop runs + in microseconds and never actually blocks.""" + + def __init__(self) -> None: + self.now = 0.0 + + def monotonic(self) -> float: + return self.now + + def sleep(self, seconds: float) -> None: + self.now += seconds + + +def _drive_download_thread(plugin, statuses, *, window_seconds=10.0): + """Run ``_download_thread`` end-to-end against a scripted adapter. + + ``statuses`` is the sequence of ``UsenetStatus`` reads the poll loop + will see (one per poll). Returns the finished active_downloads row.""" + download_id = 'u-poll' + plugin.active_downloads[download_id] = { + 'id': download_id, 'filename': 'x', 'username': 'usenet', + 'display_name': 'X', 'state': 'Initializing', 'progress': 0.0, + 'size': 0, 'transferred': 0, 'speed': 0, 'file_path': None, + 'audio_files': [], 'job_id': None, 'error': None, + } + adapter = MagicMock() + adapter.is_configured.return_value = True + adapter.add_nzb.return_value = 'job1' + adapter.get_status.side_effect = list(statuses) + clock = _FakeClock() + with patch('core.download_plugins.usenet.get_active_usenet_adapter', return_value=adapter), \ + patch('core.download_plugins.usenet.run_async', side_effect=lambda x: x), \ + patch('core.download_plugins.usenet.get_completed_no_path_window_seconds', + return_value=window_seconds), \ + patch('core.download_plugins.usenet.time', clock), \ + patch('core.download_plugins.usenet.collect_audio_after_extraction', + return_value=[Path('/done/track1.flac')]): + plugin._download_thread(download_id, 'http://x/y.nzb') + return plugin.active_downloads[download_id] + + +def test_usenet_thread_waits_out_completed_no_path_then_finalizes(tmp_path: Path) -> None: + """Per-track sibling of the #721 bundle fix. SAB flips History to + 'completed' before writing ``storage`` — the thread must NOT error + on the first such read. It waits out the completed-no-path window; + when the path lands it finalizes as Succeeded.""" + plugin = UsenetDownloadPlugin() + statuses = [ + UsenetStatus(id='job1', name='A', state='downloading', progress=0.6, + size=100, downloaded=60, download_speed=10), + UsenetStatus(id='job1', name='A', state='completed', progress=1.0, + size=100, downloaded=100, download_speed=0, save_path=None), + UsenetStatus(id='job1', name='A', state='completed', progress=1.0, + size=100, downloaded=100, download_speed=0, save_path=None), + UsenetStatus(id='job1', name='A', state='completed', progress=1.0, + size=100, downloaded=100, download_speed=0, + save_path='/done/album'), + ] + row = _drive_download_thread(plugin, statuses) + assert row['state'] == 'Completed, Succeeded' + assert row['progress'] == 100.0 + assert row['file_path'] == str(Path('/done/track1.flac')) + + +def test_usenet_thread_falls_back_to_incomplete_path_when_storage_never_lands() -> None: + """If ``storage`` never lands but SAB exposed an ``incomplete_path`` + (files physically on disk), the thread recovers via the in-progress + dir as a last resort rather than erroring a completed download.""" + plugin = UsenetDownloadPlugin() + completed_no_path = UsenetStatus( + id='job1', name='A', state='completed', progress=1.0, + size=100, downloaded=100, download_speed=0, + save_path=None, incomplete_path='/sab/incomplete/A', + ) + # Window of 10s / 2s interval = 5 polls, floored at the miss + # threshold; supply plenty so the fallback fires. + row = _drive_download_thread(plugin, [completed_no_path] * 12) + assert row['state'] == 'Completed, Succeeded' + assert row['audio_files'] == [str(Path('/done/track1.flac'))] + + +def test_usenet_thread_errors_when_completed_with_no_path_at_all() -> None: + """No final save_path AND no incomplete_path → there's nothing to + scan, so the thread errors (rather than spinning or finalizing a + phantom path).""" + plugin = UsenetDownloadPlugin() + completed_no_path = UsenetStatus( + id='job1', name='A', state='completed', progress=1.0, + size=100, downloaded=100, download_speed=0, save_path=None, + ) + row = _drive_download_thread(plugin, [completed_no_path] * 12) + assert row['state'] == 'Completed, Errored' + assert 'save_path' in (row['error'] or '').lower() + + def test_usenet_is_configured_requires_both_sides() -> None: plugin = UsenetDownloadPlugin() fake_adapter = MagicMock() diff --git a/tests/test_usenet_client_adapters.py b/tests/test_usenet_client_adapters.py index 8e1c6d0b..1445a36c 100644 --- a/tests/test_usenet_client_adapters.py +++ b/tests/test_usenet_client_adapters.py @@ -254,6 +254,103 @@ def test_sab_history_save_path_ignores_incomplete_path() -> None: assert slot.save_path is None +def test_sab_history_post_processing_states_are_not_terminal() -> None: + """#721 production root cause: SAB keeps a job in History while it + post-processes (verify / repair / unpack / move), exposing the live + stage in ``status``. Those must map to NON-terminal states so the + poll keeps waiting — NOT to 'completed', which would make the poll + treat a still-extracting album as "completed but no save_path" and + bail mid-PP (the stuck-at-99% bug). Only a real 'Completed' is + terminal success.""" + adapter = _sab_with_config() + for sab_status, expected in [ + ('Verifying', 'verifying'), + ('Repairing', 'repairing'), + ('Extracting', 'extracting'), + ('Moving', 'extracting'), + ('Running', 'extracting'), + ('Queued', 'queued'), + ]: + slot = adapter._parse_history_slot({ + 'nzo_id': 'pp', 'name': 'PP', 'status': sab_status, + 'bytes': 1000, + # ``storage`` not written yet — PP still in flight. + }) + assert slot.state == expected, f'{sab_status} -> {slot.state}, want {expected}' + # Non-terminal: not 'completed', not 'failed'. + assert slot.state not in ('completed', 'failed') + # Download is done, so progress is full, but no final path yet. + assert slot.progress == 1.0 + assert slot.save_path is None + + +def test_sab_history_completed_still_resolves_save_path() -> None: + """The true-completion path is unchanged: status 'Completed' with a + populated ``storage`` resolves to terminal success + final path.""" + adapter = _sab_with_config() + slot = adapter._parse_history_slot({ + 'nzo_id': 'done', 'name': 'D', 'status': 'Completed', + 'bytes': 1000, 'storage': '/data/downloads/music/Album', + }) + assert slot.state == 'completed' + assert slot.progress == 1.0 + assert slot.save_path == '/data/downloads/music/Album' + + +def test_sab_history_post_processing_ignores_storage_until_completed() -> None: + """Even if SAB has written a (possibly incomplete-dir) path field + while still post-processing, the adapter must NOT expose it as + save_path until the slot flips to 'Completed' — otherwise the bundle + could stage from a half-unpacked directory.""" + adapter = _sab_with_config() + slot = adapter._parse_history_slot({ + 'nzo_id': 'pp2', 'name': 'PP2', 'status': 'Extracting', + 'bytes': 1000, 'storage': '/data/downloads/music/Album', + }) + assert slot.state == 'extracting' + assert slot.save_path is None + + +def test_sab_history_surfaces_incomplete_path_separately_from_save_path() -> None: + """``incomplete_path`` must be exposed on its OWN field, never folded + into ``save_path``. The poll loops fall back to it only as a last + resort after the completed-no-path window is exhausted (#721) — using + it as save_path would bypass that window and point staging at the + pre-move dir on every normal completion.""" + adapter = _sab_with_config() + slot = adapter._parse_history_slot({ + 'nzo_id': 'gap2', 'name': 'No Storage', 'status': 'Completed', + 'bytes': 0, + # storage / path / download_path / dirname all absent — the + # #721 gap window — but the in-progress dir is known. + 'incomplete_path': '/sab/incomplete/No Storage', + }) + assert slot.save_path is None + assert slot.incomplete_path == '/sab/incomplete/No Storage' + + +def test_sab_history_incomplete_path_ignores_whitespace_only() -> None: + adapter = _sab_with_config() + slot = adapter._parse_history_slot({ + 'nzo_id': 'gap3', 'name': 'WS', 'status': 'Completed', + 'bytes': 0, 'incomplete_path': ' ', + }) + assert slot.incomplete_path is None + + +def test_sab_history_prefers_storage_and_still_carries_incomplete_path() -> None: + """When ``storage`` IS present, save_path resolves to it normally, + and incomplete_path is carried alongside (harmless — the poll only + consults it when save_path never lands).""" + adapter = _sab_with_config() + slot = adapter._parse_history_slot({ + 'nzo_id': 'both', 'name': 'B', 'status': 'Completed', + 'bytes': 0, 'storage': '/final', 'incomplete_path': '/inc', + }) + assert slot.save_path == '/final' + assert slot.incomplete_path == '/inc' + + def test_sab_add_nzb_via_url_returns_first_nzo_id() -> None: adapter = _sab_with_config() with patch('core.usenet_clients.sabnzbd.http_requests.get', @@ -425,6 +522,86 @@ def test_sab_poll_recovers_after_queue_to_history_handoff_gap() -> None: assert 'failed' not in [e[0] for e in emits] +def test_sab_poll_waits_through_history_post_processing_then_completes() -> None: + """Integration regression for the #721 PRODUCTION root cause + (David Bowie - Hunky Dory, 1.7 GB FLAC, stuck at 99%). + + SAB moves a finished download into History and runs par2 verify + + unpack THERE, exposing the live stage in ``status`` while ``storage`` + stays empty until the final move. For a 1.7 GB FLAC album that + post-processing window is far longer than the ~10s completed-no-path + budget. Pre-fix the adapter mapped 'Extracting' → 'completed', so the + poll saw "completed but no save_path" the instant download finished, + burned its budget mid-PP, and bailed — freezing the UI on the last + 'downloading' emit (99%). SAB then finished fine, which is why the + job shows Completed in History but SoulSync never staged it. + + Post-fix the in-PP History slots map to NON-terminal states, so the + poll just keeps waiting (as 'downloading') until SAB flips the slot + to 'Completed' with a real ``storage`` path — no matter how long PP + takes — then returns it.""" + from core.download_plugins.album_bundle import poll_album_download + + adapter = _sab_with_config() + downloading = _mock_response(200, {'queue': {'slots': [ + {'nzo_id': 'hd', 'filename': 'Hunky Dory', 'status': 'Downloading', + 'percentage': '99', 'mb': '1700', 'mbleft': '17', 'timeleft': '0:00:05'}, + ]}}) + empty_queue = _mock_response(200, {'queue': {'slots': []}}) + # Long post-processing window, reported via HISTORY (download already + # left the queue). storage NOT yet written. + pp_verify = _mock_response(200, {'history': {'slots': [ + {'nzo_id': 'hd', 'name': 'Hunky Dory', 'status': 'Verifying', 'bytes': 1_700_000_000}, + ]}}) + pp_extract = _mock_response(200, {'history': {'slots': [ + {'nzo_id': 'hd', 'name': 'Hunky Dory', 'status': 'Extracting', 'bytes': 1_700_000_000}, + ]}}) + done = _mock_response(200, {'history': {'slots': [ + {'nzo_id': 'hd', 'name': 'Hunky Dory', 'status': 'Completed', + 'bytes': 1_700_000_000, + 'storage': '/data/downloads/music/David.Bowie-Hunky.Dory'}, + ]}}) + + # Each poll = queue call + (if empty) history call. + poll_results = [ + downloading, # poll 1: queue has it @99% (1 call) + empty_queue, pp_verify, # poll 2: PP verifying in history + empty_queue, pp_verify, # poll 3: still verifying + empty_queue, pp_extract, # poll 4: extracting + empty_queue, pp_extract, # poll 5: still extracting + empty_queue, pp_extract, # poll 6: still extracting (past old 5-poll budget) + empty_queue, pp_extract, # poll 7 + empty_queue, done, # poll 8: completed + storage + ] + + class _Clock: + def __init__(self): self.now = 0.0 + def monotonic(self): return self.now + def sleep(self, s): self.now += s + + clock = _Clock() + emits: list = [] + with patch('core.usenet_clients.sabnzbd.http_requests.get', + side_effect=poll_results): + result = poll_album_download( + get_status=lambda: adapter._get_status_sync('hd'), + title='David Bowie - Hunky Dory', + emit=lambda state, **kw: emits.append((state, kw)), + complete_states=frozenset(['completed']), + failed_states=frozenset(['failed']), + transient_miss_threshold=5, + # Short no-path budget proves PP is NOT consuming it anymore. + completed_no_path_threshold=2, + sleep=clock.sleep, monotonic=clock.monotonic, + poll_interval=2.0, timeout=600.0, + ) + + assert result == '/data/downloads/music/David.Bowie-Hunky.Dory' + # Never failed despite PP outlasting both the miss budget AND the + # (deliberately tiny) completed-no-path budget — PP is non-terminal. + assert 'failed' not in [e[0] for e in emits] + + # --------------------------------------------------------------------------- # NZBGet # ---------------------------------------------------------------------------