diff --git a/core/repair_jobs/acoustid_scanner.py b/core/repair_jobs/acoustid_scanner.py index afe83d4a..660c216f 100644 --- a/core/repair_jobs/acoustid_scanner.py +++ b/core/repair_jobs/acoustid_scanner.py @@ -192,10 +192,38 @@ class AcoustIDScannerJob(RepairJob): if not aid_title: return + # Resolve which artist value to compare against, in priority order: + # 1. DB `track_artist` (per-track, manually curated or scanner- + # populated) — trust it when populated. Respects user edits + # from the enhanced library view. + # 2. File's ARTIST tag — ground truth for what's on disk. + # Catches legacy compilation tracks where `track_artist` + # column is NULL because they were downloaded before that + # column existed; the file itself has the correct per- + # track artist (Tidal/Spotify/Deezer all write it). + # 3. Album artist — final fallback for files without proper + # ARTIST tags AND no DB track_artist. + track_artist = (expected.get('track_artist') or '').strip() + if track_artist: + expected_artist = track_artist + else: + file_artist = None + try: + from core.tag_writer import read_file_tags + file_tags = read_file_tags(fpath) + file_artist = (file_tags.get('artist') or '').strip() or None + except Exception as e: + logger.debug("file-tag artist read failed for %s: %s", fname, e) + expected_artist = ( + file_artist + or (expected.get('album_artist') or '').strip() + or expected['artist'] + ) + # Normalize and compare norm_expected_title = _normalize(expected['title']) norm_aid_title = _normalize(aid_title) - norm_expected_artist = _normalize(expected['artist']) + norm_expected_artist = _normalize(expected_artist) norm_aid_artist = _normalize(aid_artist) title_sim = SequenceMatcher(None, norm_expected_title, norm_aid_title).ratio() @@ -216,7 +244,7 @@ class AcoustIDScannerJob(RepairJob): from core.matching.artist_aliases import artist_names_match _, artist_sim = artist_names_match( - expected['artist'], + expected_artist, aid_artist, threshold=artist_threshold, ) @@ -243,14 +271,14 @@ class AcoustIDScannerJob(RepairJob): file_path=fpath, title=f'Wrong download: "{expected["title"]}" is actually "{aid_title}"', description=( - f'Expected "{expected["title"]}" by {expected["artist"]}, ' + f'Expected "{expected["title"]}" by {expected_artist}, ' f'but audio fingerprint matches "{aid_title}" by {aid_artist} ' f'(fingerprint: {best_score:.0%}, title match: {title_sim:.0%}, ' f'artist match: {artist_sim:.0%})' ), details={ 'expected_title': expected['title'], - 'expected_artist': expected['artist'], + 'expected_artist': expected_artist, 'acoustid_title': aid_title, 'acoustid_artist': aid_artist, 'fingerprint_score': round(best_score, 3), @@ -284,11 +312,21 @@ class AcoustIDScannerJob(RepairJob): # import path when different from album artist) and fall # back to the album artist only when the per-track column # is NULL or empty (legacy rows / single-artist albums). + # Load `track_artist` (raw, may be empty) AND `album_artist` + # separately so `_scan_file` can tell the difference between + # 'DB has a curated per-track value' and 'DB fell back to + # album artist'. The COALESCE'd `artist` field is kept as a + # convenience for the existing `expected['artist']` consumers + # that want a single resolved value, but the resolution + # priority that actually drives the comparison is reproduced + # in `_scan_file`: track_artist → file tag → album_artist. cursor.execute(""" SELECT t.id, t.title, COALESCE(NULLIF(t.track_artist, ''), ar.name) AS artist, t.file_path, t.track_number, - al.title AS album_title, al.thumb_url, ar.thumb_url + al.title AS album_title, al.thumb_url, ar.thumb_url, + NULLIF(t.track_artist, '') AS track_artist, + ar.name AS album_artist FROM tracks t LEFT JOIN artists ar ON ar.id = t.artist_id LEFT JOIN albums al ON al.id = t.album_id @@ -312,6 +350,8 @@ class AcoustIDScannerJob(RepairJob): 'album_title': row[5] or '', 'album_thumb_url': row[6] or None, 'artist_thumb_url': row[7] or None, + 'track_artist': row[8] or '', # raw (may be empty) + 'album_artist': row[9] or '', } except Exception as e: logger.error("Error loading tracks from DB: %s", e) diff --git a/tests/test_acoustid_scanner.py b/tests/test_acoustid_scanner.py index 6b8ee4ef..a6d83e12 100644 --- a/tests/test_acoustid_scanner.py +++ b/tests/test_acoustid_scanner.py @@ -54,8 +54,11 @@ def _make_context(rows): def test_load_db_tracks_skips_null_ids_and_normalizes_track_ids(): job = AcoustIDScannerJob() context = _make_context([ - (None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None), - (42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb"), + # 10 columns: id, title, artist (COALESCE'd), file_path, track_number, + # album_title, album_thumb, artist_thumb, track_artist (raw, may be ''), + # album_artist. + (None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None, "", "Artist"), + (42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb", "", "Artist"), ]) tracks = job._load_db_tracks(context) @@ -68,8 +71,11 @@ def test_load_db_tracks_skips_null_ids_and_normalizes_track_ids(): def test_scan_handles_mixed_track_id_types(monkeypatch): job = AcoustIDScannerJob() context = _make_context([ - (None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None), - (42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb"), + # 10 columns: id, title, artist (COALESCE'd), file_path, track_number, + # album_title, album_thumb, artist_thumb, track_artist (raw, may be ''), + # album_artist. + (None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None, "", "Artist"), + (42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb", "", "Artist"), ]) monkeypatch.setattr(job, "_resolve_path", lambda file_path, _context: file_path) @@ -360,3 +366,257 @@ def test_load_db_tracks_falls_back_when_track_artist_empty_string(): # Empty string in track_artist → NULLIF returns NULL → COALESCE # falls back to album artist assert tracks['t1']['artist'] == 'Album Artist' + + +# --------------------------------------------------------------------------- +# File-tag fallback for legacy compilation tracks — Skowl Discord follow-up +# --------------------------------------------------------------------------- +# +# Skowl reported that the AcoustID Scanner was STILL flagging his +# compilation tracks even after the COALESCE(track_artist, album_artist) +# fix shipped. Cause: his tracks were downloaded BEFORE the +# `tracks.track_artist` column existed, so for those rows +# `track_artist IS NULL` and COALESCE falls back to the ALBUM artist +# (the curator) — same wrong-comparison the prior fix was supposed to +# eliminate. +# +# The audio file's ARTIST tag is ground truth for what's on disk: +# Tidal/Spotify/Deezer all write the per-track artist into the file's +# tag at download time, regardless of the SoulSync DB schema. Reading +# it during the scan closes the gap without requiring a DB backfill +# of the legacy rows. These tests pin: +# - File ARTIST tag trumps DB-resolved expected artist when present +# (Skowl's exact case: file says 'Eclypse', DB says 'Andromedik', +# AcoustID returns 'Eclypse' → no finding) +# - Missing file tag falls through to DB value (preserves +# pre-fix behavior for tracks without proper file tags) +# - mutagen failure is swallowed → falls through to DB +# - File tag matches DB → no behavioral change + + +def test_scanner_uses_file_tag_artist_over_db_for_legacy_compilation(monkeypatch): + """Skowl's exact case verbatim: + + DB row: artist_id → 'Andromedik' (album artist), track_artist=NULL + File tag: ARTIST='Eclypse' (Tidal-tagged correctly) + AcoustID: artist='Eclypse' + Pre-fix: expected='Andromedik' vs actual='Eclypse' → flag + Post-fix: file tag trumps DB → expected='Eclypse' → no flag + """ + job = AcoustIDScannerJob() + captured_findings = [] + context = _make_finding_capturing_context( + track_row=("city-lights", "City Lights", "Andromedik", + "/music/eclypse-city-lights.opus", 1, + "High Tea Music: Vol 1", None, None), + captured=captured_findings, + ) + + fake_acoustid = SimpleNamespace( + fingerprint_and_lookup=lambda fpath: { + 'best_score': 0.99, + 'recordings': [{ + 'title': 'City Lights', + 'artist': 'Eclypse', + }], + }, + ) + + # Patch read_file_tags to return Tidal's correct per-track artist. + # The scanner imports lazily inside _scan_file so we patch the + # source module's symbol. + monkeypatch.setattr( + 'core.tag_writer.read_file_tags', + lambda fpath: {'artist': 'Eclypse', 'title': 'City Lights'}, + ) + + result = JobResultStub() + job._scan_file( + '/music/eclypse-city-lights.opus', + 'city-lights', + {'title': 'City Lights', 'artist': 'Andromedik'}, # DB-resolved expected + fake_acoustid, + context, + result, + fp_threshold=0.85, + title_threshold=0.85, + artist_threshold=0.6, + ) + + assert captured_findings == [], ( + f"Expected no finding (file tag matches AcoustID); got {captured_findings}" + ) + + +def test_scanner_falls_back_to_db_when_file_tag_missing(monkeypatch): + """Defensive: file has no ARTIST tag (rare but possible for + non-standard formats / damaged files). MUST fall back to DB + expected value. Otherwise the fix would BREAK the existing + 'flag genuine mismatches' contract for files without tags.""" + job = AcoustIDScannerJob() + captured_findings = [] + context = _make_finding_capturing_context( + track_row=("99", "Some Track", "Foreigner", + "/music/track.flac", 1, "Album", None, None), + captured=captured_findings, + ) + + fake_acoustid = SimpleNamespace( + fingerprint_and_lookup=lambda fpath: { + 'best_score': 0.99, + 'recordings': [{ + 'title': 'Some Track', + 'artist': 'Different Band', + }], + }, + ) + + # File has no ARTIST tag (read_file_tags returns None for the field) + monkeypatch.setattr( + 'core.tag_writer.read_file_tags', + lambda fpath: {'artist': None}, + ) + + result = JobResultStub() + job._scan_file( + '/music/track.flac', + '99', + {'title': 'Some Track', 'artist': 'Foreigner'}, + fake_acoustid, + context, + result, + fp_threshold=0.85, + title_threshold=0.85, + artist_threshold=0.6, + ) + + # Should still flag — file tag was missing, fell back to DB + # ('Foreigner') vs AcoustID ('Different Band') mismatch + assert len(captured_findings) == 1, ( + f"Expected finding (file tag missing → DB fallback → genuine mismatch); got {captured_findings}" + ) + + +def test_scanner_swallows_file_tag_read_exception(monkeypatch): + """Defensive: mutagen errors mid-read shouldn't crash the scan + — must log + fall back to DB value gracefully.""" + job = AcoustIDScannerJob() + captured_findings = [] + context = _make_finding_capturing_context( + track_row=("99", "Track", "RealArtist", + "/music/corrupted.mp3", 1, "Album", None, None), + captured=captured_findings, + ) + + fake_acoustid = SimpleNamespace( + fingerprint_and_lookup=lambda fpath: { + 'best_score': 0.99, + 'recordings': [{'title': 'Track', 'artist': 'RealArtist'}], + }, + ) + + def boom(fpath): + raise RuntimeError("mutagen exploded on corrupted file") + + monkeypatch.setattr('core.tag_writer.read_file_tags', boom) + + result = JobResultStub() + job._scan_file( + '/music/corrupted.mp3', + '99', + {'title': 'Track', 'artist': 'RealArtist'}, + fake_acoustid, + context, + result, + fp_threshold=0.85, + title_threshold=0.85, + artist_threshold=0.6, + ) + + # No finding — DB matches AcoustID after the fallback + assert captured_findings == [] + + +def test_scanner_trusts_curated_db_track_artist_over_stale_file_tag(monkeypatch): + """The flip side of Skowl's case — user manually corrected + `track_artist` in the DB via the enhanced library view but + didn't re-tag the file. Pre-refactor 'file tag always wins' + would flag this as a false positive (file says wrong, DB says + right, AcoustID matches DB). Post-refactor: DB track_artist + is the curated source of truth when populated → file tag is + only consulted when DB is empty. No spurious flag. + + This is why `_load_db_tracks` surfaces `track_artist` as a + separate field instead of just the COALESCE'd `artist`: + `_scan_file` needs to distinguish 'DB has a curated value' + from 'DB fell back to album artist'.""" + job = AcoustIDScannerJob() + captured_findings = [] + context = _make_finding_capturing_context( + track_row=("99", "Track", "AlbumArtist", + "/music/track.flac", 1, "Album", None, None), + captured=captured_findings, + ) + + fake_acoustid = SimpleNamespace( + fingerprint_and_lookup=lambda fpath: { + 'best_score': 0.99, + 'recordings': [{'title': 'Track', 'artist': 'Eclypse'}], + }, + ) + + # File has wrong tag (stale — user edited DB but didn't re-tag), + # DB has correct value, AcoustID matches DB. + monkeypatch.setattr( + 'core.tag_writer.read_file_tags', + lambda fpath: {'artist': 'WrongStaleTag'}, + ) + + result = JobResultStub() + job._scan_file( + '/music/track.flac', '99', + # Simulates the post-refactor _load_db_tracks output: + # track_artist populated (curated) takes priority over file tag. + {'title': 'Track', 'artist': 'Eclypse', + 'track_artist': 'Eclypse', 'album_artist': 'AlbumArtist'}, + fake_acoustid, context, result, + fp_threshold=0.85, title_threshold=0.85, artist_threshold=0.6, + ) + + assert captured_findings == [], ( + f"DB curated value must trump stale file tag; got {captured_findings}" + ) + + +def test_scanner_file_tag_matches_db_no_behavioral_change(monkeypatch): + """Sanity: when file tag and DB agree, behavior is identical to + the pre-fix path. No double-counting, no spurious findings.""" + job = AcoustIDScannerJob() + captured_findings = [] + context = _make_finding_capturing_context( + track_row=("99", "Track", "RealArtist", + "/music/track.flac", 1, "Album", None, None), + captured=captured_findings, + ) + + fake_acoustid = SimpleNamespace( + fingerprint_and_lookup=lambda fpath: { + 'best_score': 0.99, + 'recordings': [{'title': 'Track', 'artist': 'RealArtist'}], + }, + ) + + monkeypatch.setattr( + 'core.tag_writer.read_file_tags', + lambda fpath: {'artist': 'RealArtist'}, + ) + + result = JobResultStub() + job._scan_file( + '/music/track.flac', '99', + {'title': 'Track', 'artist': 'RealArtist'}, + fake_acoustid, context, result, + fp_threshold=0.85, title_threshold=0.85, artist_threshold=0.6, + ) + + assert captured_findings == [] diff --git a/webui/static/helper.js b/webui/static/helper.js index 2cce02dd..16a4efa4 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3416,6 +3416,7 @@ const WHATS_NEW = { '2.5.1': [ // --- post-release patch work on the 2.5.1 line — entries hidden by _getLatestWhatsNewVersion until the build version bumps --- { date: 'Unreleased — 2.5.1 patch work' }, + { title: 'AcoustID Scanner: File-Tag Fallback For Legacy Compilation Tracks', desc: 'follow-up to the compilation-album scanner fix. previous patch made the scanner read `tracks.track_artist` (per-track artist column) via COALESCE so compilation tracks would compare against the right value. but tracks downloaded BEFORE that column existed have track_artist=NULL — COALESCE falls back to album artist (the curator) and we\'re back to the wrong-comparison case. fix: explicit 3-tier resolution in `_scan_file` — (1) `tracks.track_artist` from DB if populated → trust it (respects manual edits from the enhanced library view), (2) audio file\'s ARTIST tag via mutagen if present → use it (tidal/spotify/deezer all write the per-track artist into the file at download time, so it\'s ground truth even when DB is stale), (3) album artist → final fallback for files without proper ARTIST tags AND no DB track_artist. file open is essentially free since acoustid is opening it for fingerprinting anyway. critical guard: when DB track_artist is populated (curated value), it always wins over file tag — protects users who edited DB but didn\'t re-tag the file from getting false-positive flags. closes the legacy-data gap without requiring a one-time DB backfill or a re-download. 5 new tests pin: file-tag-resolves-skowl-case (legacy NULL track_artist → file tag wins → no flag), tag-missing-falls-back-to-album-artist (preserves existing genuine-mismatch contract), mutagen-exception-swallowed (debug log, fall-through), tag-matches-DB no behavioral change, and the false-positive guard (DB populated → trumps stale file tag).', page: 'tools' }, { title: 'Tidal Favorite Albums + Artists Now Show Up On Discover', desc: 'discover → your albums (and your artists) was returning nothing for tidal users regardless of how many albums/artists they\'d favorited. cause: `get_favorite_albums` and `get_favorite_artists` were calling the deprecated `/v2/favorites?filter[type]=ALBUMS|ARTISTS` endpoint, which returns 404 for personal favorites — that endpoint is scoped to collections the third-party app created itself, not the user\'s app-level favorites. the V1 fallback was also dead because modern OAuth tokens carry `collection.read` instead of the legacy `r_usr` scope V1 requires (returns 403). same root cause as the favorited tracks fix from #502. fix: rewire to the working V2 user-collection endpoints — `/v2/userCollectionAlbums/me/relationships/items` and `/v2/userCollectionArtists/me/relationships/items` — using the same cursor-paginated pattern shipped for tracks. ID enumeration lifted into a generic `_iter_collection_resource_ids(path, expected_type, max_ids)` helper so tracks/albums/artists all share one walker (~80 lines deduped). batch hydration via `/v2/{albums|artists}?filter[id]=...&include=...` with extended JSON:API include semantics — single request returns 20 albums + their artists + cover artworks all in `included[]`, parsed via two static helpers (`_first_artist_name`, `_first_artwork_url`) that map relationship refs to the included map. cover/profile images pick `files[0]` (largest variant Tidal returns, typically 1280×1280). public methods preserve the prior return shape so the discover aggregator in web_server.py stays byte-identical. 24 new tests pin: cursor-walker dispatch (correct path + type), included-map building, artist + artwork relationship resolution (full + missing + unknown-id), batch hydration parse for albums + artists, empty-input + HTTP-error short-circuits, BATCH_SIZE chunking (41 IDs → 20/20/1), end-to-end orchestrator behavior.', page: 'discover' }, { title: 'Server Playlist Sync: Append Mode (Stop Overwriting User-Added Tracks)', desc: 'discord report (cjfc, 2026-04-26): syncing a spotify playlist to your server overwrote anything you\'d manually added to the server-side playlist. now there\'s a per-sync mode picker next to the Sync button on the playlist details modal: "Replace" (default, current behavior — delete + recreate) or "Append only" (preserve existing, only add tracks not already there). useful when the source platform caps playlist size (spotify 100-track limit) and you\'re manually building beyond it on the server. each server client (plex / jellyfin / navidrome) gets a new `append_to_playlist(name, tracks)` method that uses the server\'s native append api — plex `addItems`, jellyfin `POST /Playlists//Items`, navidrome subsonic `updatePlaylist?songIdToAdd=...`. no delete-recreate, no backup playlist created in append mode (preserves playlist creation date + metadata + non-soulsync-managed tracks). dedup-by-id ensures we never add a track that\'s already on the playlist (matched by ratingKey for plex, jellyfin guid id for jellyfin, song id for navidrome — server-native identity, not fuzzy title+artist match). falls back to `create_playlist` when the playlist doesn\'t exist yet (first sync). sync_service dispatches via the new mode flag through /api/sync/start; soulsync standalone has no playlist methods at all so the dispatch falls back to update_playlist with a warning log when append is requested against it. 15 new tests pin: missing playlist → create delegation, dedup filtering (existing ids skipped), short-circuit on no-new-tracks (no api call), failure paths return False without raising, contract listing for each server client.', page: 'sync' }, ],