Skip already-owned tracks during download discography

- new track_already_owned helper wraps db.check_track_exists at
  the same confidence threshold the discography backfill repair job
  uses (0.7) — name+artist+album, format-agnostic so blasphemy-mode
  libraries (flac → mp3 + delete original) match correctly
- endpoint runs the check after the artist + content-type filters and
  before add_to_wishlist, so a second discography click on the same
  artist no longer re-queues every track that already downloaded
- per-album response carries a new tracks_skipped_owned counter
  alongside the existing artist/content/wishlist skip categories

Discord report (Skowl).
pull/564/head
Broque Thomas 1 week ago
parent fb1795cf6c
commit 4892baf8d4

@ -130,8 +130,53 @@ def load_global_content_filter_settings(config_manager: Any) -> Dict[str, Any]:
}
def track_already_owned(
db: Any,
track_name: str,
requested_artist: str,
album_name: str,
server_source: Optional[str],
confidence_threshold: float = 0.7,
) -> bool:
"""Return True if the track is already in the user's library.
Discord report (Skowl): clicking "Download Discography" twice on
the same artist re-queued every track instead of skipping the
half already on disk. Trace: the endpoint added each track to the
wishlist via ``db.add_to_wishlist``, which only dedups against the
wishlist itself once a wishlist track downloads it leaves the
wishlist, so the second discography click re-inserted everything.
The discography backfill repair job already runs the same check
via ``db.check_track_exists`` this helper centralises the
contract so the user-facing endpoint matches that behavior.
`check_track_exists` is name+artist+album based, format-agnostic.
Skowl's "Blasphemy mode" library (FLAC converted to MP3 then
original deleted) matches just fine track_name + artist + album
don't change with format.
Returns False on any exception so a transient DB hiccup doesn't
silently nuke a discography fetch a redundant wishlist add is
much cheaper to recover from than a missed track.
"""
if not requested_artist or not track_name:
return False
try:
match, confidence = db.check_track_exists(
track_name, requested_artist,
confidence_threshold=confidence_threshold,
server_source=server_source,
album=album_name or None,
)
except Exception:
return False
return bool(match) and confidence >= confidence_threshold
__all__ = [
'track_artist_matches',
'content_type_skip_reason',
'load_global_content_filter_settings',
'track_already_owned',
]

@ -25,6 +25,7 @@ import pytest
from core.metadata.discography_filters import (
content_type_skip_reason,
load_global_content_filter_settings,
track_already_owned,
track_artist_matches,
)
@ -215,3 +216,107 @@ class TestLoadGlobalSettings:
assert result['include_remixes'] is False
assert result['include_acoustic'] is True
assert result['include_instrumentals'] is False
# ---------------------------------------------------------------------------
# track_already_owned
# ---------------------------------------------------------------------------
class _FakeDB:
"""Minimal stub for the parts of MusicDatabase the helper touches."""
def __init__(self, response):
self._response = response
self.calls = []
def check_track_exists(self, title, artist, **kwargs):
self.calls.append({'title': title, 'artist': artist, **kwargs})
if isinstance(self._response, Exception):
raise self._response
return self._response
class TestTrackAlreadyOwned:
def test_returns_true_when_match_clears_threshold(self):
"""Skowl's case: the second discography click finds the track
already in library at confidence 0.7 skip."""
db = _FakeDB((object(), 0.85))
assert track_already_owned(
db, 'Hotline Bling', 'Drake', 'Views', 'plex',
) is True
def test_returns_false_when_no_match(self):
"""Library doesn't have it → return False so the caller queues
the track. (None track returned with confidence 0.0.)"""
db = _FakeDB((None, 0.0))
assert track_already_owned(
db, 'New Song', 'Drake', 'New Album', 'plex',
) is False
def test_returns_false_when_match_below_threshold(self):
"""A weak fuzzy match shouldn't count — better to over-queue
than to silently drop a real missing track. Mirrors the
backfill repair job's `if db_track and confidence >= 0.7` guard."""
db = _FakeDB((object(), 0.5)) # below default 0.7
assert track_already_owned(
db, 'Sort Of Like Hotline Bling', 'Drake', 'Views', 'plex',
) is False
def test_passes_album_to_check(self):
"""Album param is what enables album-aware matching for
multi-artist albums in `check_track_exists`. Pin it gets through."""
db = _FakeDB((object(), 0.9))
track_already_owned(db, 'Track', 'Artist', 'Album X', 'plex')
assert db.calls[0]['album'] == 'Album X'
def test_passes_server_source_to_check(self):
"""Active media server scopes the lookup so the skip check
only fires on tracks the user can actually see in their
library through their currently-active server."""
db = _FakeDB((object(), 0.9))
track_already_owned(db, 'Track', 'Artist', 'Album', 'navidrome')
assert db.calls[0]['server_source'] == 'navidrome'
def test_empty_album_passed_as_none(self):
"""Empty-string album becomes None so check_track_exists's
album-aware fallback doesn't try to match against ''."""
db = _FakeDB((None, 0.0))
track_already_owned(db, 'Track', 'Artist', '', 'plex')
assert db.calls[0]['album'] is None
def test_missing_track_or_artist_returns_false_without_calling_db(self):
"""Don't fire a DB call when we have nothing to match against —
defensive AND avoids polluting query logs with empty lookups."""
db = _FakeDB((object(), 0.9))
assert track_already_owned(db, '', 'Artist', 'Album', 'plex') is False
assert track_already_owned(db, 'Track', '', 'Album', 'plex') is False
assert track_already_owned(db, '', '', 'Album', 'plex') is False
assert db.calls == [], "DB must not be called when track or artist is empty"
def test_db_exception_returns_false(self):
"""If the DB call raises (lock contention, schema mismatch,
whatever), treat as 'not owned' and let the caller queue.
A redundant wishlist add is much cheaper to recover from
than a missed track."""
db = _FakeDB(RuntimeError('db locked'))
assert track_already_owned(db, 'Track', 'Artist', 'Album', 'plex') is False
def test_custom_confidence_threshold_honored(self):
"""Caller can tighten or loosen the threshold. 0.95 means only
very-high-confidence matches count as owned."""
db = _FakeDB((object(), 0.8))
# Default threshold (0.7): match counts
assert track_already_owned(db, 'Track', 'Artist', 'Album', 'plex') is True
# Tighter threshold (0.95): same match doesn't count
assert track_already_owned(
db, 'Track', 'Artist', 'Album', 'plex',
confidence_threshold=0.95,
) is False
def test_none_server_source_passes_through(self):
"""When the caller can't determine the active server, pass
None `check_track_exists` falls back to a cross-server search."""
db = _FakeDB((None, 0.0))
track_already_owned(db, 'Track', 'Artist', 'Album', None)
assert db.calls[0]['server_source'] is None

@ -9273,6 +9273,7 @@ def download_discography(artist_id):
from core.metadata.discography_filters import (
content_type_skip_reason,
load_global_content_filter_settings,
track_already_owned,
track_artist_matches,
)
db = MusicDatabase()
@ -9282,14 +9283,24 @@ def download_discography(artist_id):
# mid-stream and the four bool reads aren't worth re-running per
# track.
content_settings = load_global_content_filter_settings(config_manager)
# Library-ownership check uses the active media server so the
# match is scoped to the same source whose tracks the user can
# actually see in their library. None falls through to a
# cross-server search inside check_track_exists.
active_server = None
try:
active_server = config_manager.get_active_media_server()
except Exception as e:
logger.debug("active media server lookup failed: %s", e)
total_added = 0
total_skipped = 0
total_skipped_artist = 0
total_skipped_filter = 0
total_skipped_owned = 0
def generate_ndjson():
nonlocal total_added, total_skipped, total_skipped_artist, total_skipped_filter
nonlocal total_added, total_skipped, total_skipped_artist, total_skipped_filter, total_skipped_owned
for entry in album_entries:
album_id = entry['id']
@ -9339,6 +9350,7 @@ def download_discography(artist_id):
skipped = 0
skipped_artist = 0
skipped_filter = 0
skipped_owned = 0
for track in tracks:
track_name = track.get('name', '')
@ -9364,6 +9376,16 @@ def download_discography(artist_id):
skipped_filter += 1
continue
# Skowl (Discord): clicking Download Discography
# twice re-queued every track because add_to_wishlist
# only dedups against the wishlist, not the library.
# Same library-ownership check the discography
# backfill repair job uses. Format-agnostic so
# Blasphemy mode (FLAC→MP3) doesn't false-miss.
if track_already_owned(db, track_name, hint_artist, album_name, active_server):
skipped_owned += 1
continue
spotify_track_data = {
'id': track_id,
'name': track_name,
@ -9412,10 +9434,12 @@ def download_discography(artist_id):
total_skipped += skipped
total_skipped_artist += skipped_artist
total_skipped_filter += skipped_filter
total_skipped_owned += skipped_owned
logger.warning(
f"[Discography] {album_name} ({resolved_source}): {added} added, "
f"{skipped} skipped (wishlist), {skipped_artist} skipped (artist mismatch), "
f"{skipped_filter} skipped (content filter)"
f"{skipped_filter} skipped (content filter), "
f"{skipped_owned} skipped (already in library)"
)
yield json.dumps({
"album_id": album_id,
@ -9425,6 +9449,7 @@ def download_discography(artist_id):
"tracks_skipped": skipped,
"tracks_skipped_artist": skipped_artist,
"tracks_skipped_filter": skipped_filter,
"tracks_skipped_owned": skipped_owned,
"tracks_total": len(tracks),
"source": resolved_source,
}) + '\n'
@ -9440,7 +9465,8 @@ def download_discography(artist_id):
logger.warning(
f"[Discography] Complete for {artist_name}: {total_added} tracks added, "
f"{total_skipped} skipped (wishlist), {total_skipped_artist} skipped (artist mismatch), "
f"{total_skipped_filter} skipped (content filter) across {len(album_entries)} albums"
f"{total_skipped_filter} skipped (content filter), "
f"{total_skipped_owned} skipped (already in library) across {len(album_entries)} albums"
)
yield json.dumps({
"status": "complete",
@ -9448,6 +9474,7 @@ def download_discography(artist_id):
"total_skipped": total_skipped,
"total_skipped_artist": total_skipped_artist,
"total_skipped_filter": total_skipped_filter,
"total_skipped_owned": total_skipped_owned,
"total_albums": len(album_entries),
}) + '\n'

@ -3416,6 +3416,7 @@ const WHATS_NEW = {
'2.5.1': [
// --- post-release patch work on the 2.5.1 line — entries hidden by _getLatestWhatsNewVersion until the build version bumps ---
{ date: 'Unreleased — 2.5.1 patch work' },
{ title: 'Download Discography: Skips Tracks Already In Your Library', desc: 'discord report (skowl): clicking download discography on the same artist twice re-queued every track instead of skipping the half already on disk. trace: the endpoint added each track via `add_to_wishlist`, which dedups against the wishlist itself but never checks the library — once a downloaded track leaves the wishlist the next click re-inserts it. fix: same library-ownership check the discography backfill repair job already runs (`db.check_track_exists` at confidence ≥ 0.7). format-agnostic — name + artist + album, no extension comparison — so blasphemy mode (flac → mp3 with original deleted) doesn\'t false-miss. exception during the check returns "not owned" so a transient db hiccup doesn\'t silently nuke the discography fetch (a redundant wishlist add is cheap, a missed track isn\'t). per-album response carries a new `tracks_skipped_owned` counter alongside the artist / content / wishlist skips. 10 new tests at the helper boundary.', page: 'discover' },
{ title: 'Download Discography: No More Cross-Artist Tracks Or Unwanted Remixes', desc: 'issue #559: download discography pulled in tracks from compilations / appears-on albums where the artist was only featured on one or two tracks — every other track on those albums got added too. also ignored your watchlist "include remixes / live / acoustic / instrumental" settings, so one-off discography downloads kept stuffing your wishlist with remix ladders. fix: per-track filter at the endpoint. drops tracks where the requested artist isn\'t named in the track\'s artists list (keeps features, drops unrelated compilation entries). honors `watchlist.global_include_*` settings the same way the discography backfill repair job already does. per-album response carries new skip counts so the ui can show how much got filtered. 21 new tests at the helper boundary.', page: 'discover' },
{ title: 'Album Completeness: "Could Not Determine Album Folder" Error Now Tells You What To Fix', desc: 'github issue #558 (gabistek, navidrome on docker / arch host): clicking auto-fill or fix selected on the album completeness findings page returned a flat "could not determine album folder from existing tracks" error with no diagnostic. trace: the path resolver in `core/library/path_resolver.py` probes transfer + download + `library.music_paths` config + plex api library locations to map db-recorded paths to actual files on disk. for plex users the api auto-discovers the mount paths (per #476). navidrome\'s subsonic api doesn\'t expose filesystem paths at all (only folder names via `getMusicFolders`), and navidrome\'s native rest api on top of that doesn\'t expose them either — there is no api signal we can probe. so for navidrome users in docker, if the path navidrome reports (`/music/artist/album/track.flac`) doesn\'t exist as-is in the soulsync container view AND the user hasn\'t manually configured settings → library → music paths, the resolver returns none and the fix workflow bailed silently. fix: lifted the resolver into a diagnostic-aware variant (`resolve_library_file_path_with_diagnostic` returning a `(resolved, ResolveAttempt)` tuple) that records what was tried — raw-path-existed, base-dirs-probed, whether config_manager / plex_client were wired up. repair_worker uses the diagnostic to render a multi-part error: names the active media server, shows one sample db-recorded path the album\'s tracks have, lists every base directory the resolver actually probed, and points at settings → library → music paths as the actionable fix. user can now read the error and know exactly what to mount or configure. no auto-probing of common docker conventions — too speculative, could resolve to wrong dirs on the suffix-walk if conventional paths happen to contain a partial collision. backwards compatible: legacy `resolve_library_file_path` kept as a thin wrapper that drops the attempt, every existing call site unchanged. 12 new tests pin: tuple shape, raw-path short-circuit attempt fields, base-dirs listed even on walk failure, had-flags reflect caller inputs, error renders active server name + sample path + base dirs, distinguishes empty-base-dirs vs tried-and-failed cases, settings hint always present, defensive against none attempt + missing sample + missing config_manager.', page: 'tools' },
{ title: 'Import History: Clear History Button Now Clears Stuck "Processing" Rows', desc: 'noticed on the import page: clear history left zombie rows behind that all showed "⧗ processing" status from 2-9 days ago. trace: `_record_in_progress` inserts a `status=\'processing\'` row up-front so the ui can render the in-flight import while it runs, then `_finalize_result` updates it to `completed`/`failed` when the import finishes. when the server is restarted mid-import (or the worker crashes), the row never gets finalized — stays at `processing` forever. the clear-history endpoint\'s sql `DELETE ... WHERE status IN (\'completed\', \'approved\', \'failed\', \'needs_identification\', \'rejected\')` didn\'t include `processing`, so those zombies survived every click. fix: add `processing` to the delete list, but guard against nuking actually-live imports by intersecting against `_snapshot_active()` — any folder hash currently registered in the worker\'s in-memory `_active_imports` map is excluded from the delete. `pending_review` deliberately left out so user still has to approve/reject those explicitly. one endpoint touched (`/api/auto-import/clear-completed` in web_server.py). no worker changes. zombie-row pile gets swept on next click, new imports still record + update normally.', page: 'import' },

Loading…
Cancel
Save