From 85ba93f16fc2f16638f0acde037f68b2f0e9b14e Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Tue, 26 May 2026 07:12:49 -0700 Subject: [PATCH] Fix album-bundle staging match + wishlist provenance (#700, #698) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause (#700): the Soulseek album-bundle path downloads whole releases into a private staging dir, then per-track workers claim those files via the staging-match shortcut. When slskd files arrived without ID3 tags (common for FLAC rips), the staging cache fell back to the filename stem as the title — and stems shaped like "Artist - Album - 03 - Title" could not clear the 0.80 title- similarity threshold against the clean Spotify track name. Every track in the album went not_found, the batch ended "failed" in the Downloads UI with an empty queue, and the bundle-downloaded files just sat unused in staging. Fix: in _staging_title_variants, add a trailing-title variant by extracting the segments after a bare track-number block (e.g. "03") between " - " delimiters. Conservative — only fires when a clear digit segment is present, so real song titles with dashes like "Hold Me - Live" are left intact. Generated as an additional variant alongside the existing raw/compacted/feat-stripped/bonus-stripped forms, so behavior on already-matching files is unchanged. Downstream (#698): the album-bundle staging miss pushed every failed track to the wishlist labelled as a playlist track, and a couple of fallback paths in ensure_wishlist_track_format and the slskd-result reconstruction hardcoded album_type='single' / total_tracks=1 on the stored album dict. On wishlist requeue the path builder saw album_type='single' and routed the download through single_path, dumping the file in the Singles tree even though it belonged to an album. (Running Reorganize would fix it because the DB album linkage was still correct, but the file landed in the wrong place first.) Fixes: - new resolve_wishlist_source_type_for_batch() returns 'album' for is_album_download batches; wishlist_failed.py now calls it instead of hardcoding 'playlist' - build_wishlist_source_context() threads album_context / artist_context / is_album_download from the batch into the wishlist row so future requeue logic has authoritative routing data - the non-dict-album fallback in ensure_wishlist_track_format and the slskd-result reconstruction default album_type='album' (and total_tracks=0 = unknown) instead of lying with 'single'/1; the existing setdefault chain handles dict-shaped album data unchanged Tests: - 2 staging-match tests pin the new tail-extraction behavior against a realistic untagged slskd stem, plus a negative test that confirms a dash-in-title without a digit segment still does NOT extract a variant - 2 payload tests pin the album_type='album' default for both fallback paths - 4 processing tests pin resolve_wishlist_source_type_for_batch() and the album-context threading in build_wishlist_source_context() 3974 pass; no behavioural change on already-working flows. --- core/downloads/staging.py | 28 +++++++++- core/downloads/wishlist_failed.py | 10 ++-- core/wishlist/payloads.py | 4 +- core/wishlist/processing.py | 26 ++++++++- tests/downloads/test_downloads_staging.py | 68 +++++++++++++++++++++++ tests/wishlist/test_payloads.py | 38 +++++++++++++ tests/wishlist/test_processing.py | 48 ++++++++++++++++ webui/static/helper.js | 2 + 8 files changed, 215 insertions(+), 9 deletions(-) diff --git a/core/downloads/staging.py b/core/downloads/staging.py index d1d4e3c3..bd115cc6 100644 --- a/core/downloads/staging.py +++ b/core/downloads/staging.py @@ -78,6 +78,30 @@ def _extract_explicit_track_number(filename: str) -> int: return 0 +def _extract_release_filename_title(stem: str) -> str: + """Extract the trailing title segment from a release-style filename stem. + + Slskd album bundles often arrive untagged with stems like + 'Artist - Album - 03 - Title' or '03 - Title'. The full stem is too + noisy to fuzzy-match against a clean Spotify title, so when we + detect a bare track-number segment between ' - ' delimiters we + return the trailing segments as an extra match candidate. + + Returns '' when no clear track-number signal is present so we don't + accidentally extract tails from real song titles that legitimately + contain ' - ' (e.g. 'Hold Me - Live'). + """ + if not stem: + return '' + parts = [p.strip() for p in stem.split(' - ') if p.strip()] + if len(parts) < 2: + return '' + for i, part in enumerate(parts): + if re.fullmatch(r'\d{1,3}', part) and i < len(parts) - 1: + return ' - '.join(parts[i + 1:]).strip() + return '' + + def _staging_title_variants(title: Any, normalize: Callable[[str], str]) -> list[str]: """Return conservative title variants for release-file matching. @@ -112,8 +136,10 @@ def _staging_title_variants(title: Any, normalize: Callable[[str], str]) -> list flags=re.IGNORECASE, ) + release_tail = _extract_release_filename_title(compacted_separators) + variants: list[str] = [] - for candidate in (raw, compacted_separators, without_feat, without_bonus): + for candidate in (raw, compacted_separators, without_feat, without_bonus, release_tail): normalized = normalize(candidate) if normalized and normalized not in variants: variants.append(normalized) diff --git a/core/downloads/wishlist_failed.py b/core/downloads/wishlist_failed.py index 1b1f30bb..d99750da 100644 --- a/core/downloads/wishlist_failed.py +++ b/core/downloads/wishlist_failed.py @@ -19,6 +19,7 @@ from core.wishlist.processing import ( build_wishlist_source_context as _build_wishlist_source_context, recover_uncaptured_failed_tracks as _recover_uncaptured_failed_tracks, remove_completed_tracks_from_wishlist as _remove_completed_tracks_from_wishlist, + resolve_wishlist_source_type_for_batch as _resolve_wishlist_source_type_for_batch, ) from core.wishlist.resolution import ( check_and_remove_from_wishlist as _check_and_remove_from_wishlist, @@ -107,10 +108,11 @@ def _process_failed_tracks_to_wishlist_exact(batch_id): if permanently_failed_tracks: try: wishlist_service = get_wishlist_service() - + # Create source_context identical to sync.py source_context = _build_wishlist_source_context(batch) - + batch_source_type = _resolve_wishlist_source_type_for_batch(batch) + # Process each failed track (matching sync.py's loop) with safety limit max_failed_tracks = min(len(permanently_failed_tracks), 50) # Safety limit wing_it_skipped = 0 @@ -129,10 +131,10 @@ def _process_failed_tracks_to_wishlist_exact(batch_id): continue logger.error(f"[Wishlist Processing] Adding track {i+1}/{max_failed_tracks}: {track_name}") - + success = wishlist_service.add_failed_track_from_modal( track_info=failed_track_info, - source_type='playlist', + source_type=batch_source_type, source_context=source_context, profile_id=batch.get('profile_id', 1) ) diff --git a/core/wishlist/payloads.py b/core/wishlist/payloads.py index df0098b8..50f3a0e7 100644 --- a/core/wishlist/payloads.py +++ b/core/wishlist/payloads.py @@ -106,8 +106,6 @@ def ensure_wishlist_track_format(track_info): else: album = { 'name': str(album_data) if album_data else track_info.get('name', 'Unknown Album'), - 'album_type': 'single', - 'total_tracks': 1, 'release_date': '', } album.setdefault('images', []) @@ -351,7 +349,7 @@ def extract_wishlist_track_from_modal_info(track_info: Dict[str, Any]) -> Option "id": f"reconstructed_{hash(f'{slskd_result.artist}_{slskd_result.title}')}", "name": getattr(slskd_result, "title", "Unknown Track"), "artists": [{"name": getattr(slskd_result, "artist", "Unknown Artist")}], - "album": {"name": album_name, "images": [], "album_type": "single", "total_tracks": 1}, + "album": {"name": album_name, "images": [], "album_type": "album", "total_tracks": 0}, "duration_ms": 0, "reconstructed": True, } diff --git a/core/wishlist/processing.py b/core/wishlist/processing.py index 3a8cbf9d..c3de917e 100644 --- a/core/wishlist/processing.py +++ b/core/wishlist/processing.py @@ -148,15 +148,39 @@ def recover_uncaptured_failed_tracks( return recovered_count +def resolve_wishlist_source_type_for_batch(batch: Dict[str, Any]) -> str: + """Pick the wishlist ``source_type`` for failed tracks coming out of a + download batch. + + Album-context batches must produce ``'album'`` provenance — the legacy + hardcoded ``'playlist'`` mislabels every album-batch failure, breaks + the wishlist UI's source filter, and makes ``by_source_type`` stats + look like all wishlist work came from playlists. + """ + return 'album' if batch.get('is_album_download') else 'playlist' + + def build_wishlist_source_context(batch: Dict[str, Any], current_time: datetime | None = None) -> Dict[str, Any]: """Build the source_context payload used when adding failed tracks back to the wishlist.""" current_time = current_time or datetime.now() - return { + context = { 'playlist_name': batch.get('playlist_name', 'Unknown Playlist'), 'playlist_id': batch.get('playlist_id', None), 'added_from': 'webui_modal', 'timestamp': current_time.isoformat(), } + # Preserve album-batch provenance so wishlist requeue has a real signal + # for album-vs-single routing instead of relying on per-track album dicts + # that may have been mangled by reconstruction fallbacks. + if batch.get('is_album_download'): + context['is_album_download'] = True + album_ctx = batch.get('album_context') + if isinstance(album_ctx, dict): + context['album_context'] = album_ctx + artist_ctx = batch.get('artist_context') + if isinstance(artist_ctx, dict): + context['artist_context'] = artist_ctx + return context def finalize_auto_wishlist_completion( diff --git a/tests/downloads/test_downloads_staging.py b/tests/downloads/test_downloads_staging.py index 097c3323..a400f351 100644 --- a/tests/downloads/test_downloads_staging.py +++ b/tests/downloads/test_downloads_staging.py @@ -495,6 +495,74 @@ def test_staging_title_match_keeps_wrong_versions_separate(tmp_path): assert 'staging_t_wrong_version' not in matched_downloads_context +def test_staging_title_match_handles_untagged_release_filename(tmp_path): + """Album-bundle slskd downloads often arrive without ID3 tags. + + When that happens the staging cache falls back to the file stem + for the title (e.g. 'Kendrick Lamar - GNX - 03 - Reincarnated'). + The full stem is too noisy to fuzzy-match against the clean + Spotify title at the 0.80 threshold, so the variant generator + pulls out the trailing-title segment when a track-number block + is present between ' - ' delimiters. + """ + src_file = tmp_path / 'staging' / 'Kendrick Lamar - GNX - 03 - Reincarnated.flac' + src_file.parent.mkdir() + src_file.touch() + + deps = _build_deps( + transfer_path=str(tmp_path / 'transfer'), + staging_files=[ + { + 'full_path': str(src_file), + 'title': 'Kendrick Lamar - GNX - 03 - Reincarnated', + 'artist': 'Kendrick Lamar', + 'track_number': 3, + }, + ], + ) + _seed_task('t_untagged', track_info={ + '_is_explicit_album_download': True, + '_explicit_album_context': {'id': 'alb', 'name': 'GNX'}, + '_explicit_artist_context': {'id': 'art', 'name': 'Kendrick Lamar'}, + }) + + result = ds.try_staging_match( + 't_untagged', 'b1', + _Track(name='Reincarnated', artists=['Kendrick Lamar']), + deps, + ) + + assert result is True + assert matched_downloads_context['staging_t_untagged']['track_info']['track_number'] == 3 + + +def test_staging_title_match_keeps_dash_titles_intact(tmp_path): + """The trailing-title variant must not fire when there's no track-number + segment — otherwise a legit title like 'Hold Me - Live' would generate + a 'Live' variant and false-match unrelated 'Live' stems on disk. + """ + src_file = tmp_path / 'staging' / 'Live.flac' + src_file.parent.mkdir() + src_file.touch() + + deps = _build_deps( + transfer_path=str(tmp_path / 'transfer'), + staging_files=[ + {'full_path': str(src_file), 'title': 'Live', 'artist': 'Other Artist'}, + ], + ) + _seed_task('t_dash') + + result = ds.try_staging_match( + 't_dash', 'b1', + _Track(name='Hold Me - Live', artists=['Some Artist']), + deps, + ) + + assert result is False + assert 'staging_t_dash' not in matched_downloads_context + + def test_fallback_context_synthesizes_from_track(tmp_path): """Without explicit context, synthesizes spotify_artist/album from the track.""" src_file = tmp_path / 'staging' / 'Hello.flac' diff --git a/tests/wishlist/test_payloads.py b/tests/wishlist/test_payloads.py index a5c77376..b8cbd499 100644 --- a/tests/wishlist/test_payloads.py +++ b/tests/wishlist/test_payloads.py @@ -106,6 +106,44 @@ def test_extract_spotify_track_from_modal_info_reconstructs_from_slskd_result(): assert out["album"]["name"] == "Album Three" +def test_ensure_wishlist_track_format_defaults_non_dict_album_to_album_type(): + """When ``album`` arrives as a non-dict (legacy/reconstruction path) we + must not stamp ``album_type='single'`` — that lies about the origin + and routes the wishlist requeue through the single_path template + instead of album_path, dumping album tracks into the Singles tree. + Default to 'album' / total_tracks=0 (unknown) so downstream code can + fall through to the real release-type detection logic.""" + track = { + "name": "Song", + "artist": "Artist One", + "album": "Album From Legacy String", + } + + out = payloads.ensure_wishlist_track_format(track) + + assert out["album"]["name"] == "Album From Legacy String" + assert out["album"]["album_type"] == "album" + assert out["album"]["total_tracks"] == 0 + + +def test_extract_spotify_track_from_modal_info_slskd_reconstruction_defaults_to_album(): + """Slskd-result reconstruction is a last-resort path; defaulting to + ``album_type='single'`` corrupted the requeue routing for album + batches. Same fix as ensure_wishlist_track_format: default 'album'.""" + track_info = { + "slskd_result": SimpleNamespace( + title="Song Three", + artist="Artist Three", + album="Album Three", + ) + } + + out = payloads.extract_spotify_track_from_modal_info(track_info) + + assert out["album"]["album_type"] == "album" + assert out["album"]["total_tracks"] == 0 + + def test_extract_wishlist_track_from_modal_info_uses_track_data_key(): track_info = { "track_data": { diff --git a/tests/wishlist/test_processing.py b/tests/wishlist/test_processing.py index 709b3cc6..0b2f9080 100644 --- a/tests/wishlist/test_processing.py +++ b/tests/wishlist/test_processing.py @@ -108,6 +108,53 @@ def test_add_cancelled_tracks_to_failed_tracks_builds_entries(): assert failed[0]["failure_reason"] == "Download cancelled" +def test_resolve_wishlist_source_type_for_batch_returns_album_for_album_batch(): + assert processing.resolve_wishlist_source_type_for_batch({"is_album_download": True}) == "album" + + +def test_resolve_wishlist_source_type_for_batch_returns_playlist_otherwise(): + assert processing.resolve_wishlist_source_type_for_batch({}) == "playlist" + assert processing.resolve_wishlist_source_type_for_batch({"is_album_download": False}) == "playlist" + + +def test_build_wishlist_source_context_minimal_batch_skips_album_provenance(): + """Non-album batches must not carry album_context (would mislead the + requeue logic into routing single-track sync failures through + album_path).""" + batch = { + "playlist_name": "My Mix", + "playlist_id": "pl-99", + } + + context = processing.build_wishlist_source_context(batch) + + assert context["playlist_name"] == "My Mix" + assert context["playlist_id"] == "pl-99" + assert context["added_from"] == "webui_modal" + assert "is_album_download" not in context + assert "album_context" not in context + assert "artist_context" not in context + + +def test_build_wishlist_source_context_preserves_album_context_for_album_batches(): + """Album batches must carry album_context/artist_context through to the + wishlist row so a later requeue has authoritative routing data instead + of having to reconstruct it from per-track album dicts.""" + batch = { + "playlist_name": "Album: GNX", + "playlist_id": "library_redownload_abc", + "is_album_download": True, + "album_context": {"id": "alb-1", "name": "GNX", "total_tracks": 12}, + "artist_context": {"id": "art-1", "name": "Kendrick Lamar"}, + } + + context = processing.build_wishlist_source_context(batch) + + assert context["is_album_download"] is True + assert context["album_context"] == {"id": "alb-1", "name": "GNX", "total_tracks": 12} + assert context["artist_context"] == {"id": "art-1", "name": "Kendrick Lamar"} + + def test_recover_uncaptured_failed_tracks_builds_entries(): batch = {"queue": ["a"]} download_tasks = { @@ -364,3 +411,4 @@ def test_finalize_auto_wishlist_completion_with_no_tracks_added_still_resets_sta ) ] assert db.connection.committed is True + diff --git a/webui/static/helper.js b/webui/static/helper.js index 69faa24a..f4e07249 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3415,6 +3415,8 @@ function closeHelperSearch() { const WHATS_NEW = { '2.6.2': [ { date: 'May 24, 2026 — 2.6.2 release' }, + { title: 'Fix: whole-album downloads ending up "failed" with empty queues', desc: 'the Soulseek album-bundle path routes whole-album downloads through a private staging dir, then per-track workers claim the staged files. when slskd files arrived without ID3 tags, the staging cache fell back to filename stems like "Artist - Album - 03 - Title" — too noisy to clear the title-similarity threshold against the clean Spotify title, so every track went not_found and the batch ended failed even with all files on disk. staging match now pulls the trailing-title segment when a bare track-number is present between " - " delimiters, so slskd filename patterns match cleanly. closes #700.' }, + { title: 'Fix: album tracks getting requeued as singles by the wishlist', desc: 'downstream of the album-bundle fix above. failed album-batch tracks were going to the wishlist with `source_type=\'playlist\'` hardcoded, and a couple of fallback paths were stamping `album_type=\'single\'` on the stored album dict. on requeue the path builder saw single → routed to the Singles tree even though the track belonged to an album (running Reorganize would patch it because the DB still knew). album batches now carry `source_type=\'album\'`, source-context preserves `album_context` / `artist_context`, and the non-dict-album + slskd-reconstruction fallbacks default to `album` instead of lying with `single`. closes #698.' }, { title: 'Fix: Redownload Album button on the enhanced artist view was dead', desc: 'the Redownload button on the enhanced artist-page album row was throwing a silent ReferenceError on click — no popup, no toast, no log line, button just did nothing. the underlying function was lost when script.js got split into 17 domain modules and nothing caught it for a while. restored. closes #699.' }, { title: 'iTunes / Apple Music link import', desc: 'new iTunes Link tab on the Sync page, between Deezer Link and YouTube. paste an Apple Music album, track, or playlist URL and SoulSync pulls the tracklist, runs it through the same discovery → sync → download flow as the other link tabs. handles the new Apple Music SPA token shape — token gets scraped from the JS bundle on first use and cached for 6 hours, with a 401 retry that refetches if Apple rotates mid-session.' }, { title: 'Auto-Sync: bulk schedule by source + custom interval columns', desc: 'each source group in the sidebar gets a Bulk button — opens a popover that lets you schedule every playlist in that group at a chosen interval in one click (1h / 2h / 4h / 8h / 12h / 16h / 24h / 48h / 72h / weekly) or "Custom interval…" for an arbitrary number of hours. also includes "Unschedule all" to clear a source\'s schedules. on the board itself, a schedule with a non-standard interval (e.g. 6h or 36h, created via Automations page or the custom prompt) now renders as its own dashed-border column instead of disappearing because it didn\'t match a hardcoded bucket.' },