diff --git a/core/metadata/discography_filters.py b/core/metadata/discography_filters.py index c49eeca4..4e1e0ecd 100644 --- a/core/metadata/discography_filters.py +++ b/core/metadata/discography_filters.py @@ -130,8 +130,53 @@ def load_global_content_filter_settings(config_manager: Any) -> Dict[str, Any]: } +def track_already_owned( + db: Any, + track_name: str, + requested_artist: str, + album_name: str, + server_source: Optional[str], + confidence_threshold: float = 0.7, +) -> bool: + """Return True if the track is already in the user's library. + + Discord report (Skowl): clicking "Download Discography" twice on + the same artist re-queued every track instead of skipping the + half already on disk. Trace: the endpoint added each track to the + wishlist via ``db.add_to_wishlist``, which only dedups against the + wishlist itself — once a wishlist track downloads it leaves the + wishlist, so the second discography click re-inserted everything. + + The discography backfill repair job already runs the same check + via ``db.check_track_exists`` — this helper centralises the + contract so the user-facing endpoint matches that behavior. + + `check_track_exists` is name+artist+album based, format-agnostic. + Skowl's "Blasphemy mode" library (FLAC converted to MP3 then + original deleted) matches just fine — track_name + artist + album + don't change with format. + + Returns False on any exception so a transient DB hiccup doesn't + silently nuke a discography fetch — a redundant wishlist add is + much cheaper to recover from than a missed track. + """ + if not requested_artist or not track_name: + return False + try: + match, confidence = db.check_track_exists( + track_name, requested_artist, + confidence_threshold=confidence_threshold, + server_source=server_source, + album=album_name or None, + ) + except Exception: + return False + return bool(match) and confidence >= confidence_threshold + + __all__ = [ 'track_artist_matches', 'content_type_skip_reason', 'load_global_content_filter_settings', + 'track_already_owned', ] diff --git a/tests/metadata/test_discography_filters.py b/tests/metadata/test_discography_filters.py index 572e8cbb..400e0885 100644 --- a/tests/metadata/test_discography_filters.py +++ b/tests/metadata/test_discography_filters.py @@ -25,6 +25,7 @@ import pytest from core.metadata.discography_filters import ( content_type_skip_reason, load_global_content_filter_settings, + track_already_owned, track_artist_matches, ) @@ -215,3 +216,107 @@ class TestLoadGlobalSettings: assert result['include_remixes'] is False assert result['include_acoustic'] is True assert result['include_instrumentals'] is False + + +# --------------------------------------------------------------------------- +# track_already_owned +# --------------------------------------------------------------------------- + + +class _FakeDB: + """Minimal stub for the parts of MusicDatabase the helper touches.""" + + def __init__(self, response): + self._response = response + self.calls = [] + + def check_track_exists(self, title, artist, **kwargs): + self.calls.append({'title': title, 'artist': artist, **kwargs}) + if isinstance(self._response, Exception): + raise self._response + return self._response + + +class TestTrackAlreadyOwned: + def test_returns_true_when_match_clears_threshold(self): + """Skowl's case: the second discography click finds the track + already in library at confidence ≥ 0.7 → skip.""" + db = _FakeDB((object(), 0.85)) + assert track_already_owned( + db, 'Hotline Bling', 'Drake', 'Views', 'plex', + ) is True + + def test_returns_false_when_no_match(self): + """Library doesn't have it → return False so the caller queues + the track. (None track returned with confidence 0.0.)""" + db = _FakeDB((None, 0.0)) + assert track_already_owned( + db, 'New Song', 'Drake', 'New Album', 'plex', + ) is False + + def test_returns_false_when_match_below_threshold(self): + """A weak fuzzy match shouldn't count — better to over-queue + than to silently drop a real missing track. Mirrors the + backfill repair job's `if db_track and confidence >= 0.7` guard.""" + db = _FakeDB((object(), 0.5)) # below default 0.7 + assert track_already_owned( + db, 'Sort Of Like Hotline Bling', 'Drake', 'Views', 'plex', + ) is False + + def test_passes_album_to_check(self): + """Album param is what enables album-aware matching for + multi-artist albums in `check_track_exists`. Pin it gets through.""" + db = _FakeDB((object(), 0.9)) + track_already_owned(db, 'Track', 'Artist', 'Album X', 'plex') + assert db.calls[0]['album'] == 'Album X' + + def test_passes_server_source_to_check(self): + """Active media server scopes the lookup so the skip check + only fires on tracks the user can actually see in their + library through their currently-active server.""" + db = _FakeDB((object(), 0.9)) + track_already_owned(db, 'Track', 'Artist', 'Album', 'navidrome') + assert db.calls[0]['server_source'] == 'navidrome' + + def test_empty_album_passed_as_none(self): + """Empty-string album becomes None so check_track_exists's + album-aware fallback doesn't try to match against ''.""" + db = _FakeDB((None, 0.0)) + track_already_owned(db, 'Track', 'Artist', '', 'plex') + assert db.calls[0]['album'] is None + + def test_missing_track_or_artist_returns_false_without_calling_db(self): + """Don't fire a DB call when we have nothing to match against — + defensive AND avoids polluting query logs with empty lookups.""" + db = _FakeDB((object(), 0.9)) + assert track_already_owned(db, '', 'Artist', 'Album', 'plex') is False + assert track_already_owned(db, 'Track', '', 'Album', 'plex') is False + assert track_already_owned(db, '', '', 'Album', 'plex') is False + assert db.calls == [], "DB must not be called when track or artist is empty" + + def test_db_exception_returns_false(self): + """If the DB call raises (lock contention, schema mismatch, + whatever), treat as 'not owned' and let the caller queue. + A redundant wishlist add is much cheaper to recover from + than a missed track.""" + db = _FakeDB(RuntimeError('db locked')) + assert track_already_owned(db, 'Track', 'Artist', 'Album', 'plex') is False + + def test_custom_confidence_threshold_honored(self): + """Caller can tighten or loosen the threshold. 0.95 means only + very-high-confidence matches count as owned.""" + db = _FakeDB((object(), 0.8)) + # Default threshold (0.7): match counts + assert track_already_owned(db, 'Track', 'Artist', 'Album', 'plex') is True + # Tighter threshold (0.95): same match doesn't count + assert track_already_owned( + db, 'Track', 'Artist', 'Album', 'plex', + confidence_threshold=0.95, + ) is False + + def test_none_server_source_passes_through(self): + """When the caller can't determine the active server, pass + None — `check_track_exists` falls back to a cross-server search.""" + db = _FakeDB((None, 0.0)) + track_already_owned(db, 'Track', 'Artist', 'Album', None) + assert db.calls[0]['server_source'] is None diff --git a/web_server.py b/web_server.py index 9aefc252..790266af 100644 --- a/web_server.py +++ b/web_server.py @@ -9273,6 +9273,7 @@ def download_discography(artist_id): from core.metadata.discography_filters import ( content_type_skip_reason, load_global_content_filter_settings, + track_already_owned, track_artist_matches, ) db = MusicDatabase() @@ -9282,14 +9283,24 @@ def download_discography(artist_id): # mid-stream and the four bool reads aren't worth re-running per # track. content_settings = load_global_content_filter_settings(config_manager) + # Library-ownership check uses the active media server so the + # match is scoped to the same source whose tracks the user can + # actually see in their library. None falls through to a + # cross-server search inside check_track_exists. + active_server = None + try: + active_server = config_manager.get_active_media_server() + except Exception as e: + logger.debug("active media server lookup failed: %s", e) total_added = 0 total_skipped = 0 total_skipped_artist = 0 total_skipped_filter = 0 + total_skipped_owned = 0 def generate_ndjson(): - nonlocal total_added, total_skipped, total_skipped_artist, total_skipped_filter + nonlocal total_added, total_skipped, total_skipped_artist, total_skipped_filter, total_skipped_owned for entry in album_entries: album_id = entry['id'] @@ -9339,6 +9350,7 @@ def download_discography(artist_id): skipped = 0 skipped_artist = 0 skipped_filter = 0 + skipped_owned = 0 for track in tracks: track_name = track.get('name', '') @@ -9364,6 +9376,16 @@ def download_discography(artist_id): skipped_filter += 1 continue + # Skowl (Discord): clicking Download Discography + # twice re-queued every track because add_to_wishlist + # only dedups against the wishlist, not the library. + # Same library-ownership check the discography + # backfill repair job uses. Format-agnostic so + # Blasphemy mode (FLAC→MP3) doesn't false-miss. + if track_already_owned(db, track_name, hint_artist, album_name, active_server): + skipped_owned += 1 + continue + spotify_track_data = { 'id': track_id, 'name': track_name, @@ -9412,10 +9434,12 @@ def download_discography(artist_id): total_skipped += skipped total_skipped_artist += skipped_artist total_skipped_filter += skipped_filter + total_skipped_owned += skipped_owned logger.warning( f"[Discography] {album_name} ({resolved_source}): {added} added, " f"{skipped} skipped (wishlist), {skipped_artist} skipped (artist mismatch), " - f"{skipped_filter} skipped (content filter)" + f"{skipped_filter} skipped (content filter), " + f"{skipped_owned} skipped (already in library)" ) yield json.dumps({ "album_id": album_id, @@ -9425,6 +9449,7 @@ def download_discography(artist_id): "tracks_skipped": skipped, "tracks_skipped_artist": skipped_artist, "tracks_skipped_filter": skipped_filter, + "tracks_skipped_owned": skipped_owned, "tracks_total": len(tracks), "source": resolved_source, }) + '\n' @@ -9440,7 +9465,8 @@ def download_discography(artist_id): logger.warning( f"[Discography] Complete for {artist_name}: {total_added} tracks added, " f"{total_skipped} skipped (wishlist), {total_skipped_artist} skipped (artist mismatch), " - f"{total_skipped_filter} skipped (content filter) across {len(album_entries)} albums" + f"{total_skipped_filter} skipped (content filter), " + f"{total_skipped_owned} skipped (already in library) across {len(album_entries)} albums" ) yield json.dumps({ "status": "complete", @@ -9448,6 +9474,7 @@ def download_discography(artist_id): "total_skipped": total_skipped, "total_skipped_artist": total_skipped_artist, "total_skipped_filter": total_skipped_filter, + "total_skipped_owned": total_skipped_owned, "total_albums": len(album_entries), }) + '\n' diff --git a/webui/static/helper.js b/webui/static/helper.js index f9bc4b2a..3b648e5b 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3416,6 +3416,7 @@ const WHATS_NEW = { '2.5.1': [ // --- post-release patch work on the 2.5.1 line — entries hidden by _getLatestWhatsNewVersion until the build version bumps --- { date: 'Unreleased — 2.5.1 patch work' }, + { title: 'Download Discography: Skips Tracks Already In Your Library', desc: 'discord report (skowl): clicking download discography on the same artist twice re-queued every track instead of skipping the half already on disk. trace: the endpoint added each track via `add_to_wishlist`, which dedups against the wishlist itself but never checks the library — once a downloaded track leaves the wishlist the next click re-inserts it. fix: same library-ownership check the discography backfill repair job already runs (`db.check_track_exists` at confidence ≥ 0.7). format-agnostic — name + artist + album, no extension comparison — so blasphemy mode (flac → mp3 with original deleted) doesn\'t false-miss. exception during the check returns "not owned" so a transient db hiccup doesn\'t silently nuke the discography fetch (a redundant wishlist add is cheap, a missed track isn\'t). per-album response carries a new `tracks_skipped_owned` counter alongside the artist / content / wishlist skips. 10 new tests at the helper boundary.', page: 'discover' }, { title: 'Download Discography: No More Cross-Artist Tracks Or Unwanted Remixes', desc: 'issue #559: download discography pulled in tracks from compilations / appears-on albums where the artist was only featured on one or two tracks — every other track on those albums got added too. also ignored your watchlist "include remixes / live / acoustic / instrumental" settings, so one-off discography downloads kept stuffing your wishlist with remix ladders. fix: per-track filter at the endpoint. drops tracks where the requested artist isn\'t named in the track\'s artists list (keeps features, drops unrelated compilation entries). honors `watchlist.global_include_*` settings the same way the discography backfill repair job already does. per-album response carries new skip counts so the ui can show how much got filtered. 21 new tests at the helper boundary.', page: 'discover' }, { title: 'Album Completeness: "Could Not Determine Album Folder" Error Now Tells You What To Fix', desc: 'github issue #558 (gabistek, navidrome on docker / arch host): clicking auto-fill or fix selected on the album completeness findings page returned a flat "could not determine album folder from existing tracks" error with no diagnostic. trace: the path resolver in `core/library/path_resolver.py` probes transfer + download + `library.music_paths` config + plex api library locations to map db-recorded paths to actual files on disk. for plex users the api auto-discovers the mount paths (per #476). navidrome\'s subsonic api doesn\'t expose filesystem paths at all (only folder names via `getMusicFolders`), and navidrome\'s native rest api on top of that doesn\'t expose them either — there is no api signal we can probe. so for navidrome users in docker, if the path navidrome reports (`/music/artist/album/track.flac`) doesn\'t exist as-is in the soulsync container view AND the user hasn\'t manually configured settings → library → music paths, the resolver returns none and the fix workflow bailed silently. fix: lifted the resolver into a diagnostic-aware variant (`resolve_library_file_path_with_diagnostic` returning a `(resolved, ResolveAttempt)` tuple) that records what was tried — raw-path-existed, base-dirs-probed, whether config_manager / plex_client were wired up. repair_worker uses the diagnostic to render a multi-part error: names the active media server, shows one sample db-recorded path the album\'s tracks have, lists every base directory the resolver actually probed, and points at settings → library → music paths as the actionable fix. user can now read the error and know exactly what to mount or configure. no auto-probing of common docker conventions — too speculative, could resolve to wrong dirs on the suffix-walk if conventional paths happen to contain a partial collision. backwards compatible: legacy `resolve_library_file_path` kept as a thin wrapper that drops the attempt, every existing call site unchanged. 12 new tests pin: tuple shape, raw-path short-circuit attempt fields, base-dirs listed even on walk failure, had-flags reflect caller inputs, error renders active server name + sample path + base dirs, distinguishes empty-base-dirs vs tried-and-failed cases, settings hint always present, defensive against none attempt + missing sample + missing config_manager.', page: 'tools' }, { title: 'Import History: Clear History Button Now Clears Stuck "Processing" Rows', desc: 'noticed on the import page: clear history left zombie rows behind that all showed "⧗ processing" status from 2-9 days ago. trace: `_record_in_progress` inserts a `status=\'processing\'` row up-front so the ui can render the in-flight import while it runs, then `_finalize_result` updates it to `completed`/`failed` when the import finishes. when the server is restarted mid-import (or the worker crashes), the row never gets finalized — stays at `processing` forever. the clear-history endpoint\'s sql `DELETE ... WHERE status IN (\'completed\', \'approved\', \'failed\', \'needs_identification\', \'rejected\')` didn\'t include `processing`, so those zombies survived every click. fix: add `processing` to the delete list, but guard against nuking actually-live imports by intersecting against `_snapshot_active()` — any folder hash currently registered in the worker\'s in-memory `_active_imports` map is excluded from the delete. `pending_review` deliberately left out so user still has to approve/reject those explicitly. one endpoint touched (`/api/auto-import/clear-completed` in web_server.py). no worker changes. zombie-row pile gets swept on next click, new imports still record + update normally.', page: 'import' },