You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/core/download_plugins/album_bundle.py

753 lines
32 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""Shared helpers for the album-bundle download flow.
The torrent and usenet download plugins both implement a
``download_album_to_staging`` method that searches Prowlarr for a
whole release, hands it to the active downloader, walks the
resulting audio files, and copies them into the staging folder. The
two implementations share the same release-picker heuristic and the
same staging-path collision logic.
Pulled out of ``core/download_plugins/torrent.py`` so the usenet
plugin doesn't have to import private helpers from a sibling
plugin (Cin's "no leaky module boundaries" standard).
Also exposes ``atomic_copy_to_staging`` — the audio file is copied
to a ``.tmp.<random>`` sidecar first and atomically renamed onto its
final extension. The Auto-Import worker filters by audio extension
so the in-flight ``.tmp`` file is never picked up mid-copy, closing
the race between the album-bundle copy loop and Auto-Import's
folder scan.
"""
from __future__ import annotations
import re
import shutil
import time
import unicodedata
import uuid
from pathlib import Path
from typing import Any, Callable, Iterable, Optional
from config.settings import config_manager
from utils.logging_config import get_logger
logger = get_logger("download_plugins.album_bundle")
# Minimum album-title relevance a Prowlarr candidate must clear to be eligible
# for an album-bundle download (#730). Prowlarr returns broad fuzzy matches — a
# "Heroes" search also returns other Bowie albums — so without this gate the
# most-popular result wins regardless of whether it's the right album. Below
# this floor we refuse the bundle and let the caller fall back to per-track.
_ALBUM_TITLE_RELEVANCE_FLOOR = 0.6
# Album-pick size floor / ceiling. Single-track torrents (~10 MB)
# are rejected when bigger candidates exist; anything past 3 GB is
# treated as suspicious (multi-disc box-set + scans + extras).
ALBUM_PICK_MIN_BYTES = 40 * 1024 * 1024
ALBUM_PICK_MAX_BYTES = 3 * 1024 * 1024 * 1024
# Quality-score weights for the album-pick heuristic. Mirrors the
# tier order in ``core/imports/file_ops.py``'s ``quality_tiers`` —
# higher number = preferred.
_QUALITY_SCORE = {'flac': 4, 'ogg': 3, 'aac': 2, 'mp3': 1}
# Default poll cadence + timeout for the album-download poll loop.
# Both are overridable through config so users with slow trackers
# / large box-sets can extend the deadline without editing code.
DEFAULT_POLL_INTERVAL_SECONDS = 2.0
DEFAULT_POLL_TIMEOUT_SECONDS = 6 * 60 * 60
def get_poll_interval() -> float:
"""Return the per-poll sleep duration (seconds). Configurable via
``download_source.album_bundle_poll_interval_seconds``."""
raw = config_manager.get('download_source.album_bundle_poll_interval_seconds',
DEFAULT_POLL_INTERVAL_SECONDS)
try:
value = float(raw)
if value > 0:
return value
except (TypeError, ValueError):
pass
return DEFAULT_POLL_INTERVAL_SECONDS
def get_poll_timeout() -> float:
"""Return the total deadline for an album-bundle download
(seconds). Configurable via
``download_source.album_bundle_timeout_seconds``."""
raw = config_manager.get('download_source.album_bundle_timeout_seconds',
DEFAULT_POLL_TIMEOUT_SECONDS)
try:
value = float(raw)
if value > 0:
return value
except (TypeError, ValueError):
pass
return DEFAULT_POLL_TIMEOUT_SECONDS
def quality_score(title: str, quality_guess) -> int:
"""Map a release title's inferred quality to a sortable integer.
``quality_guess`` is the function from each plugin that maps a
title string to a quality string ('flac' / 'mp3' / etc.) — passed
in so this module doesn't have to import either plugin and risk
a circular import."""
return _QUALITY_SCORE.get(quality_guess(title) or '', 0)
def _normalize_release_text(text: str) -> str:
"""Lowercase, fold accents (Björk -> bjork), strip punctuation to spaces.
NFKD-decompose then drop combining marks so accented characters fold to
their base letter instead of fragmenting (the naive approach turned
'Björk' into 'bj rk'). Collapses runs of whitespace.
"""
if not text:
return ""
decomposed = unicodedata.normalize("NFKD", text)
stripped = "".join(c for c in decomposed if not unicodedata.combining(c))
lowered = stripped.lower()
# Punctuation -> space (so "heroes" matches "heroes:" / "heroes -"),
# then collapse whitespace.
cleaned = re.sub(r"[^a-z0-9]+", " ", lowered)
return re.sub(r"\s+", " ", cleaned).strip()
# Edition / format / qualifier words that appear in stored album names or
# release titles but say nothing about WHICH album it is. Stripped before
# scoring so "Currents" matches "Currents (Deluxe)" and "Heroes" matches
# "Heroes (2017 Remaster)" — the #730 fix must not reject the RIGHT album just
# because the DB name carries an edition suffix the torrent title lacks.
_ALBUM_NOISE_WORDS = frozenset({
"deluxe", "edition", "remaster", "remastered", "remasters", "remix",
"expanded", "anniversary", "bonus", "version", "explicit", "clean",
"reissue", "special", "limited", "collectors", "collector", "the",
"ep", "lp", "album", "single", "disc", "cd", "vol", "volume",
"flac", "mp3", "aac", "ogg", "wav", "alac", "m4a", "320", "256", "192",
"web", "vinyl", "hi", "res", "hires", "24bit", "16bit", "original",
"soundtrack", "ost",
})
def _significant_words(normalized: str) -> list:
"""Words that actually identify an album: drop pure-digit tokens (years,
bitrates) and edition/format noise. Keeps at least the raw words if the
filter would empty it (e.g. an album literally named '1989' or 'Deluxe')."""
words = [w for w in normalized.split()
if w not in _ALBUM_NOISE_WORDS and not w.isdigit()]
return words or normalized.split()
def album_title_relevance(candidate_title: str, album_name: str) -> float:
"""How well a release title matches the requested album, 0.01.0.
Scores the fraction of the album's SIGNIFICANT words (edition/format/year
noise removed) that appear as whole words in the candidate title.
Word-boundary, not substring, so "Heroes" does NOT match "Superheroes" and
a different album sharing no significant words scores 0 — while "Currents"
still matches "Currents (Deluxe)" and "Heroes" matches the "2017 Remaster".
Returns 1.0 when there's no album name to check (can't gate on nothing —
preserves old behavior for callers that don't pass a title).
"""
norm_album = _normalize_release_text(album_name)
if not norm_album:
return 1.0
norm_title = _normalize_release_text(candidate_title)
if not norm_title:
return 0.0
album_words = _significant_words(norm_album)
title_words = set(norm_title.split())
if not album_words:
return 1.0
matched = sum(1 for w in album_words if w in title_words)
coverage = matched / len(album_words)
# Full-phrase bonus (idea from contributor PR #731): when the album's core
# phrase appears intact in the title, we're highly confident it's the right
# release even if token-coverage is dragged down by a long multi-word name.
# MUST be word-boundary anchored, NOT a raw substring — a naive
# `phrase in norm_title` lets "heroes" match "superheroes" and reintroduces
# the exact wrong-album bug #730 fixes (PR #731's version has this flaw).
core_phrase = " ".join(album_words)
if core_phrase and re.search(rf"(?:^| ){re.escape(core_phrase)}(?: |$)", norm_title):
coverage = max(coverage, 0.9)
return coverage
def pick_best_album_release(candidates, quality_guess,
album_name: str = "") -> Optional[object]:
"""Pick the single best torrent / NZB for an album-bundle download.
Heuristic, in priority order:
0. Album-TITLE relevance gate (#730): drop candidates whose title doesn't
sufficiently match the requested album. Prowlarr returns broad fuzzy
matches, so without this the most-popular result wins even when it's a
different album. When ``album_name`` is given and NOTHING clears the
relevance floor, return None — the caller then falls back to per-track
rather than downloading a confident mismatch.
1. Reasonable album-ish size (40 MB 3 GB) — drops single-track
releases that snuck in and quarantines suspicious giants.
2. Higher seeders > lower (dead torrents = dead downloads).
Usenet releases use ``grabs`` as a popularity proxy when
seeders is None.
3. Higher quality (FLAC > AAC > MP3) inferred from title.
4. Larger size as tiebreaker (often = higher bitrate).
"""
if not candidates:
return None
# 0. Title-relevance gate. Only applied when we know the album name; with
# no name we can't judge relevance, so we don't gate (old behavior).
if album_name:
relevant = [
c for c in candidates
if album_title_relevance(c.title or "", album_name) >= _ALBUM_TITLE_RELEVANCE_FLOOR
]
if not relevant:
logger.warning(
"[Album Bundle] No candidate cleared the title-relevance floor "
"for '%s' (%d candidates rejected as wrong album) — refusing the "
"bundle so the caller falls back to per-track.",
album_name, len(candidates),
)
return None
candidates = relevant
sized = [c for c in candidates
if ALBUM_PICK_MIN_BYTES <= (c.size or 0) <= ALBUM_PICK_MAX_BYTES]
pool = sized or list(candidates)
if not pool:
return None
def _score(c) -> tuple:
seeders = c.seeders if c.seeders is not None else (c.grabs or 0)
return (seeders, quality_score(c.title or '', quality_guess), c.size or 0)
return max(pool, key=_score)
def unique_staging_path(staging_dir: Path, src: Path) -> Path:
"""Return a destination path inside ``staging_dir`` that doesn't
collide with an existing file. Appends ``_1``, ``_2``, ... before
the extension when needed; gives up after 1000 candidates and
returns the unsuffixed path so the caller will overwrite (better
than infinite loop or crash)."""
dest = staging_dir / src.name
if not dest.exists():
return dest
stem = dest.stem
suffix = dest.suffix
for i in range(1, 1000):
candidate = staging_dir / f"{stem}_{i}{suffix}"
if not candidate.exists():
return candidate
return dest
def atomic_copy_to_staging(src: Path, dest: Path) -> bool:
"""Copy ``src`` to ``dest`` without exposing a partial file to
folder scanners.
The Auto-Import worker filters by audio extension when scanning
Staging — see ``AUDIO_EXTENSIONS`` in ``core/auto_import_worker.py``.
Naming the in-flight file ``<dest>.tmp.<random>`` keeps it
invisible until the rename atomically swings it to its final
extension. ``os.replace`` (used by ``Path.rename`` on Python 3.x)
is atomic on the same filesystem, so Auto-Import either sees the
file at its final name (complete) or doesn't see it at all
(in flight).
Returns True on success, False on copy / rename failure. Caller
is expected to log the failure case so we don't double-log here.
"""
tmp = dest.with_name(f"{dest.name}.tmp.{uuid.uuid4().hex[:8]}")
try:
shutil.copy2(src, tmp)
except Exception:
# Best-effort cleanup of the partial file. If unlink fails
# (locked, permissions) we leave it — Auto-Import ignores it
# anyway because of the .tmp extension.
try:
if tmp.exists():
tmp.unlink()
except Exception as cleanup_exc:
logger.debug("album_bundle tmp cleanup failed: %s", cleanup_exc)
raise
try:
tmp.replace(dest)
return True
except Exception:
try:
tmp.unlink(missing_ok=True)
except Exception as cleanup_exc:
logger.debug("album_bundle tmp cleanup failed: %s", cleanup_exc)
raise
# Number of consecutive None-status reads tolerated before treating the
# job as gone. Sized for the SAB queue→history transition window: SAB
# removes the slot from the queue before adding it to history, and on a
# busy server (par2 verify + unrar) that window can be several poll
# intervals. At the default 2s interval, 5 retries = ~10s of tolerance
# before we give up and emit a terminal failure. Override via
# ``download_source.album_bundle_transient_miss_threshold`` for users
# whose servers need more headroom (very large multi-disc box sets,
# slow disks, etc.).
DEFAULT_TRANSIENT_MISS_THRESHOLD = 5
def get_transient_miss_threshold() -> int:
"""Return the configured transient-miss threshold for poll loops."""
raw = config_manager.get('download_source.album_bundle_transient_miss_threshold',
DEFAULT_TRANSIENT_MISS_THRESHOLD)
try:
value = int(raw)
if value > 0:
return value
except (TypeError, ValueError):
pass
return DEFAULT_TRANSIENT_MISS_THRESHOLD
# How long to keep polling after the client reports terminal success
# but hasn't yet exposed a final save_path. Distinct from the
# transient-miss threshold because the two model different things:
# a transient miss is "the job vanished — fail fast (~10s) so a deleted
# job doesn't hang"; a completed-no-path read is "the download SUCCEEDED
# and the files are on disk — SAB just hasn't finished writing the
# ``storage`` field." The #706 fix reused the 5-poll (~10s) miss window
# here, but #721's own report shows SAB can take 2+ minutes (or, on some
# versions, never expose ``storage`` at all) — so a 10s window false-fails
# a download that actually completed. Expressed in SECONDS (converted to
# a poll count against the live interval) so it's interval-independent.
# Override via ``download_source.album_bundle_completed_no_path_seconds``.
DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS = 120.0
def get_completed_no_path_window_seconds() -> float:
"""Return the completed-but-no-save_path tolerance window (seconds)."""
raw = config_manager.get('download_source.album_bundle_completed_no_path_seconds',
DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS)
try:
value = float(raw)
if value > 0:
return value
except (TypeError, ValueError):
pass
return DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS
class TransientMissCounter:
"""Bounded retry counter for adapter status reads.
Both the album-bundle poll (in ``poll_album_download``) and the
per-track download threads in ``usenet.py`` / ``torrent.py`` need
the same "tolerate N consecutive missing or unmapped reads before
declaring the job gone" logic. Lifted into one class so the rule
is in one place and unit-testable in isolation — the per-track
paths used to carry inline counters that mirrored this logic by
hand, which is exactly the kind of duplication that drifts."""
def __init__(self, threshold: Optional[int] = None) -> None:
self.threshold = threshold if threshold is not None else get_transient_miss_threshold()
self.misses = 0
def record_miss(self) -> bool:
"""Bump the miss counter. Returns True when the counter has
reached the threshold (caller should give up)."""
self.misses += 1
return self.misses >= self.threshold
def reset(self) -> None:
"""Successful read — reset the counter back to zero."""
self.misses = 0
def poll_album_download(
*,
get_status: Callable[[], Optional[Any]],
title: str,
emit: Callable[..., None],
complete_states: frozenset,
failed_states: frozenset = frozenset(['failed']),
is_shutdown: Optional[Callable[[], bool]] = None,
transient_miss_threshold: int = DEFAULT_TRANSIENT_MISS_THRESHOLD,
completed_no_path_threshold: Optional[int] = None,
poll_interval: Optional[float] = None,
timeout: Optional[float] = None,
sleep: Callable[[float], None] = time.sleep,
monotonic: Callable[[], float] = time.monotonic,
log_prefix: str = '[album_bundle]',
) -> Optional[str]:
"""Drive the per-poll status loop for an album-bundle download.
Lifted out of ``UsenetDownloadPlugin._poll_album_download`` and the
sibling torrent method so the loop is testable in isolation and so
both plugins share the same exit semantics.
Contract:
- ``get_status()`` returns the adapter status object for the bound
job, ``None`` when the client doesn't know about the job
currently (transient or terminal — disambiguated by retry count).
- ``emit(state, **fields)`` is the plugin's progress callback —
this function calls it on EVERY successful poll with
``state='downloading'`` and ALWAYS calls it once more with
``state='failed'`` before returning ``None`` on any failure path,
so the UI doesn't freeze on the last 'downloading' emit.
- ``complete_states`` is the adapter's terminal-success set
('completed' alone for usenet; 'seeding' + 'completed' for
torrent because seeding-but-files-on-disk also counts).
- ``failed_states`` is the explicit-failure set. The adapter-level
'error' (unmapped state default) is intentionally NOT in here —
that's treated as a transient miss because a real SAB / NZBGet
/ qBit never returns a literal 'error' state on a healthy job;
it's only our default fallback for unknown queue strings. Real
example: SAB's 'Pp' post-processing state was unmapped → became
'error' → poll infinite-looped until the 6-hour timeout.
- ``transient_miss_threshold`` is the number of consecutive None /
'error' reads tolerated before declaring the job gone. Sized for
the SAB queue→history gap window (~10s) — a vanished job should
fail fast.
- ``completed_no_path_threshold`` is a SEPARATE, longer window for
the "client says complete but no save_path yet" case. The download
already succeeded, so this defaults to ~120s (configurable via
``download_source.album_bundle_completed_no_path_seconds``) instead
of reusing the 10s miss window — #721 showed SAB can take 2+ minutes
to write ``storage``. When the window is exhausted the loop falls
back to the adapter's ``incomplete_path`` (the on-disk in-progress
dir) if present, and only emits terminal ``failed`` when there's no
path of any kind to scan.
Returns the adapter's reported save_path (or, as a last resort, its
``incomplete_path``) on terminal success, or ``None`` on any failure
(timeout / disappeared / explicit failed / shutdown). On every
failure path emits ``'failed'`` once with an ``error`` field
describing why.
"""
interval = poll_interval if poll_interval is not None else get_poll_interval()
deadline = monotonic() + (timeout if timeout is not None else get_poll_timeout())
last_save_path: Optional[str] = None
last_incomplete_path: Optional[str] = None
misses = TransientMissCounter(transient_miss_threshold)
# Separate counter for "client reports terminal-success state but no
# save_path field has landed yet." SAB History flips ``status`` to
# 'Completed' a few seconds before its post-processing pipeline
# writes the final ``storage`` field — see issue #721 (Forty Licks
# stuck at 61%): SAB shows Completed in the UI, but
# ``_parse_history_slot`` returns ``save_path=None`` for those few
# seconds because ``storage`` isn't populated yet. Pre-fix the
# poll returned ``None`` on the first such read, the bundle
# plugin marked the batch failed, but the UI still displayed the
# last ``downloading`` progress emit.
#
# This window is intentionally LONGER than the transient-miss window:
# the download already SUCCEEDED, so being patient here is cheap and
# correct, whereas the original 5-poll (~10s) reuse false-failed real
# completions (#721 reported SAB taking 2+ minutes). Default ~120s,
# converted from seconds to a poll count against the live interval.
if completed_no_path_threshold is None:
completed_no_path_threshold = max(
transient_miss_threshold,
int(get_completed_no_path_window_seconds() / max(interval, 0.001)) or 1,
)
completed_no_path_misses = TransientMissCounter(completed_no_path_threshold)
def _fail(reason: str) -> None:
try:
emit('failed', release=title, error=reason)
except Exception as cb_exc:
logger.debug("%s terminal emit failed: %s", log_prefix, cb_exc)
# Heartbeat so the otherwise-silent download loop is diagnosable.
# The loop emits progress to the UI on every poll but logs nothing
# during normal operation — which made the #721 "stuck at N%" reports
# impossible to triage from logs alone (we couldn't tell if the poll
# was alive, what state SAB returned, or whether it had wedged). Log
# the raw adapter read at most once per heartbeat interval.
HEARTBEAT_SECONDS = 30.0
last_heartbeat = monotonic()
poll_count = 0
while monotonic() < deadline:
if is_shutdown and is_shutdown():
# Shutdown is a clean exit — don't paint failure on the UI;
# the app is going away anyway.
return None
try:
status = get_status()
except Exception as e:
logger.warning("%s Poll error: %s", log_prefix, e)
status = None
poll_count += 1
now = monotonic()
if now - last_heartbeat >= HEARTBEAT_SECONDS:
last_heartbeat = now
if status is None:
logger.info("%s '%s' poll #%d: client returned no status (miss %d/%d)",
log_prefix, title, poll_count, misses.misses, misses.threshold)
else:
logger.info(
"%s '%s' poll #%d: state=%r progress=%.2f save_path=%r",
log_prefix, title, poll_count,
getattr(status, 'state', None), getattr(status, 'progress', 0.0) or 0.0,
getattr(status, 'save_path', None),
)
if status is None:
if misses.record_miss():
logger.error(
"%s '%s' missing from client for %d consecutive polls — giving up",
log_prefix, title, misses.misses,
)
_fail('Disappeared from client (no status after retries)')
return None
sleep(interval)
continue
# Reset the miss counter only when the adapter returned a state
# we actually recognise. The default-fallback 'error' is treated
# as a continuing transient miss below, so it must NOT reset
# here — otherwise a persistently-unmapped state loops forever.
if status.state != 'error':
misses.reset()
emit('downloading', progress=status.progress, downloaded=status.downloaded,
speed=status.download_speed)
if status.save_path:
last_save_path = status.save_path
# Remember the in-progress dir too — never used on a normal
# completion, only as the last-resort fallback below when the
# final save_path provably never lands.
incomplete_path = getattr(status, 'incomplete_path', None)
if incomplete_path:
last_incomplete_path = incomplete_path
if status.state in complete_states:
if last_save_path:
completed_no_path_misses.reset()
return last_save_path
# Terminal-success state but no save_path landed yet.
# SAB History flips ``Completed`` a few seconds before
# ``storage`` is populated — give the adapter a generous
# window before declaring this a hard failure. Without this
# tolerance, every TAR / unrar-bearing usenet release
# would race the path-write window and randomly fail.
if completed_no_path_misses.record_miss():
# Last resort before failing: SAB finished and the files
# are physically on disk (#721), but the final ``storage``
# field never landed. Fall back to the in-progress dir so
# the bundle can still scan + stage the audio, rather than
# leaving the user stuck with a completed-in-SAB download
# that SoulSync never imports.
if last_incomplete_path:
logger.warning(
"%s '%s' completed on the client but never exposed a final "
"save_path after %d polls — falling back to the in-progress "
"path %r as a last resort. If staging fails, the SAB job "
"likely needs its post-process move to finish first.",
log_prefix, title, completed_no_path_misses.misses,
last_incomplete_path,
)
return last_incomplete_path
logger.error(
"%s '%s' reported terminal success but no save_path landed "
"after %d consecutive polls — bundle cannot stage. Adapter "
"may need new history-slot fallback fields (storage / path "
"/ download_path / dirname). Last status: state=%r progress=%r",
log_prefix, title, completed_no_path_misses.misses,
status.state, status.progress,
)
_fail('Client reported success but never provided a save_path')
return None
logger.info(
"%s '%s' is %s on the client but save_path not yet set — "
"retrying (poll %d/%d)",
log_prefix, title, status.state,
completed_no_path_misses.misses, completed_no_path_misses.threshold,
)
sleep(interval)
continue
if status.state in failed_states:
error = getattr(status, 'error', None) or 'Client reported failure'
logger.error("%s '%s' failed: %s", log_prefix, title, error)
_fail(error)
return None
if status.state == 'error':
# Unmapped adapter state — see contract docstring. Warn so
# we hear about new states the adapter map needs to grow
# without breaking the user's download. The miss counter
# was intentionally NOT reset above for this branch.
logger.warning(
"%s '%s' returned unmapped state — treating as transient",
log_prefix, title,
)
if misses.record_miss():
_fail('Client returned unmapped state repeatedly')
return None
sleep(interval)
logger.error("%s '%s' timed out", log_prefix, title)
_fail('Download timed out')
return None
def _candidate_download_roots(config_get: Callable[..., Any]) -> list:
"""Directories where THIS process can read finished downloads — used by
``resolve_reported_save_path`` for the basename fallback.
Order matters: most-specific usenet/torrent roots first, then the
general Soulseek download / transfer dirs, which in the standard
shared-volume arr setup are bind-mounted to the very directory the
usenet client writes its completed downloads into. Relative values
(e.g. ``./downloads``) resolve against the process CWD — the
container's ``/app`` — which is exactly where those mounts live.
"""
roots: list = []
for key in (
'download_source.usenet_download_path',
'usenet_client.completed_path',
'usenet_client.download_path',
'download_source.torrent_download_path',
'soulseek.download_path',
'soulseek.transfer_path',
):
value = config_get(key, None)
if value:
roots.append(str(value))
seen: set = set()
out: list = []
for root in roots:
if root not in seen:
seen.add(root)
out.append(root)
return out
def resolve_reported_save_path(
reported_path: Optional[str],
config_get: Optional[Callable[..., Any]] = None,
) -> Optional[str]:
"""Translate a downloader-reported save_path into one THIS process can read.
Usenet / torrent clients report paths from inside THEIR OWN container
(e.g. SAB hands back ``/data/downloads/music/<album>``); SoulSync often
mounts the very same files at a different point (``/app/downloads/<album>``).
Feeding the client's path straight to the audio walker then yields
"No audio files found" even though the files are physically present —
the classic arr-stack remote-path mismatch.
Resolution order:
1. The reported path verbatim, if it's a readable directory here
(deployments that mirror the client's mount paths).
2. Explicit prefix mappings from ``download_source.usenet_path_mappings``
— a list of ``{"from": "...", "to": "..."}`` (Sonarr/Radarr-style
remote path mapping) for non-shared / oddly-mounted layouts.
3. Basename fallback: a same-named folder under a known SoulSync
download root. Zero-config for the standard shared-volume setup —
the album folder shows up under SoulSync's own ``./downloads``
mount with the same name the client reported.
Returns the best resolved path, or ``reported_path`` unchanged when
nothing better is found (so the caller's existing "no audio" error still
surfaces, with both paths logged).
"""
if not reported_path:
return reported_path
if config_get is None:
config_get = config_manager.get
def _is_dir(candidate) -> bool:
try:
return Path(candidate).is_dir()
except OSError:
return False
# 1. Reported path is directly readable — mounts already line up.
if _is_dir(reported_path):
return reported_path
normalized = str(reported_path).replace('\\', '/')
# 2. Explicit prefix mappings (remote-path-mapping escape hatch).
mappings = config_get('download_source.usenet_path_mappings', None) or []
if isinstance(mappings, (list, tuple)):
for mapping in mappings:
if not isinstance(mapping, dict):
continue
frm = str(mapping.get('from') or '').replace('\\', '/').rstrip('/')
to = str(mapping.get('to') or '')
if not frm or not to:
continue
if normalized == frm or normalized.startswith(frm + '/'):
rest = normalized[len(frm):].lstrip('/')
candidate = str(Path(to) / rest) if rest else to
if _is_dir(candidate):
return candidate
# 3. Basename fallback under known download roots — covers the standard
# shared-volume layout with zero configuration.
basename = Path(normalized).name
if basename:
for root in _candidate_download_roots(config_get):
candidate = Path(root) / basename
if _is_dir(candidate):
return str(candidate)
return reported_path
def copy_audio_files_atomically(
sources: Iterable[Path], staging_dir: Path,
) -> list:
"""Convenience wrapper: pick a non-colliding staging path for
each source, copy via ``atomic_copy_to_staging``. Returns the
list of final destination paths (as strings). Files that fail
to copy are logged and skipped; the caller decides what to do
with a partial result."""
staging_dir.mkdir(parents=True, exist_ok=True)
out: list = []
for src in sources:
dest = unique_staging_path(staging_dir, src)
try:
atomic_copy_to_staging(src, dest)
out.append(str(dest))
except Exception as e:
logger.warning("[album_bundle] Failed to stage %s -> %s: %s", src, dest, e)
return out
# Re-export so callers don't have to remember which module owns
# what. The ``time`` import is kept so plugins can ``from
# core.download_plugins.album_bundle import time`` if they want to,
# avoiding a second std-lib import line for a single use.
__all__ = [
"ALBUM_PICK_MIN_BYTES",
"ALBUM_PICK_MAX_BYTES",
"DEFAULT_POLL_INTERVAL_SECONDS",
"DEFAULT_POLL_TIMEOUT_SECONDS",
"DEFAULT_TRANSIENT_MISS_THRESHOLD",
"DEFAULT_COMPLETED_NO_PATH_WINDOW_SECONDS",
"TransientMissCounter",
"atomic_copy_to_staging",
"copy_audio_files_atomically",
"get_completed_no_path_window_seconds",
"get_poll_interval",
"get_poll_timeout",
"get_transient_miss_threshold",
"resolve_reported_save_path",
"pick_best_album_release",
"poll_album_download",
"quality_score",
"time",
"unique_staging_path",
]