diff --git a/core/repair_jobs/quality_upgrade.py b/core/repair_jobs/quality_upgrade.py index 8f98f0fa..f0ad8c53 100644 --- a/core/repair_jobs/quality_upgrade.py +++ b/core/repair_jobs/quality_upgrade.py @@ -64,6 +64,20 @@ _PROFILE_KEY_RANK = { 'mp3_192': RANK_192, } +# Per-source file-tag key holding that source's own track ID (written by enrichment). +_SOURCE_TRACK_ID_TAG = { + 'spotify': 'spotify_track_id', + 'deezer': 'deezer_track_id', + 'itunes': 'itunes_track_id', + 'audiodb': 'audiodb_track_id', + 'musicbrainz': 'musicbrainz_releasetrackid', + 'tidal': 'tidal_track_id', +} + +# Reject a fuzzy candidate whose length differs from ours by more than this (ms) — +# catches wrong versions (live/edit/remix) that share a title. Exact tiers skip it. +_DURATION_TOLERANCE_MS = 5000 + def _normalize_kbps(bitrate: Optional[int]) -> Optional[int]: """Library bitrate may be stored in bps (e.g. 320000) or kbps (320). @@ -159,24 +173,68 @@ def _norm_isrc(value: Any) -> str: return str(value).upper().replace('-', '').replace(' ', '').strip() -def _read_track_isrc(file_path: str) -> str: - """Read the ISRC the enrichment pipeline embedded in the file's tags. +def _read_file_ids(file_path: str) -> Dict[str, str]: + """Read the identifiers enrichment embedded in the file's tags. Enrichment matches every track to the metadata sources and writes the IDs - (ISRC, per-source track IDs) into the file — so an already-enriched track - carries its exact identity. Returns '' when unreadable / not enriched.""" + (ISRC + per-source track IDs) into the file — so an already-enriched track + carries its exact identity. Returns a dict with a normalized ``isrc`` plus any + ``_track_id`` tags present; empty dict when unreadable / not enriched.""" resolved = resolve_library_file_path(file_path) if file_path else None if not resolved and file_path and os.path.isfile(file_path): resolved = file_path if not resolved: - return '' + return {} try: info = read_embedded_tags(resolved) except Exception: - return '' + return {} if not info or not info.get('available'): - return '' - return _norm_isrc((info.get('tags') or {}).get('isrc')) + return {} + tags = info.get('tags') or {} + out: Dict[str, str] = {} + isrc = _norm_isrc(tags.get('isrc')) + if isrc: + out['isrc'] = isrc + for tag_key in set(_SOURCE_TRACK_ID_TAG.values()): + val = tags.get(tag_key) + if val: + out[tag_key] = str(val) + return out + + +def _duration_ok(want_ms: Any, got_ms: Any, tolerance_ms: int = _DURATION_TOLERANCE_MS) -> bool: + """Wrong-version guard: True when the candidate's length is within tolerance of + ours — or when either length is unknown (never reject on missing data).""" + try: + w, g = int(want_ms or 0), int(got_ms or 0) + except (TypeError, ValueError): + return True + if w <= 0 or g <= 0: + return True + return abs(w - g) <= tolerance_ms + + +def _match_via_track_id(file_ids: Dict[str, str], + source_priority: List[str]) -> Tuple[Optional[Any], Optional[str]]: + """Most-direct path: enrichment already wrote this track's per-source IDs into + the file. If we have the active source's own track ID, fetch that exact track by + ID — no search at all. Returns (track, source) or (None, None).""" + for source in source_priority: + tag_key = _SOURCE_TRACK_ID_TAG.get(source) + track_id = file_ids.get(tag_key) if tag_key else None + if not track_id: + continue + client = get_client_for_source(source) + if not client or not hasattr(client, 'get_track_details'): + continue + try: + track = client.get_track_details(str(track_id)) + except Exception: + track = None + if track: + return track, source + return None, None def _candidate_isrc(cand: Any) -> str: @@ -217,13 +275,16 @@ def _match_via_isrc(isrc: str, source_priority: List[str]) -> Tuple[Optional[Any # Column order for the _load_tracks SELECT — rows come back as dicts keyed by these. _TRACK_COLS = ( - 'id', 'title', 'file_path', 'bitrate', 'artist_name', 'album_title', 'album_id', - 'track_number', 'spotify_album_id', 'itunes_album_id', 'deezer_id', + 'id', 'title', 'file_path', 'bitrate', 'duration', 'artist_name', 'album_title', + 'album_id', 'track_number', 'spotify_album_id', 'itunes_album_id', 'deezer_id', 'musicbrainz_release_id', 'audiodb_id', ) # Human-readable note per match tier (search uses a confidence % instead). -_MATCH_NOTE = {'isrc': 'exact ISRC match', 'album': 'matched within album'} +_MATCH_NOTE = { + 'track_id': 'exact track ID', 'isrc': 'exact ISRC match', + 'album': 'matched within album', +} # Per-source column holding that source's album ID on the albums table. _SOURCE_ALBUM_ID_COL = { @@ -240,9 +301,11 @@ def _norm_title(value: Any) -> str: return ''.join(ch for ch in str(value or '').lower() if ch.isalnum()) -def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any) -> Optional[Any]: +def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any, + want_duration_ms: Any = None) -> Optional[Any]: """Pick the track in an album's tracklist that matches ours — exact normalized - title first (track_number breaks ties), then a high-similarity fuzzy fallback.""" + title first (track_number then duration break ties), then a high-similarity + fuzzy fallback that respects the duration guard.""" want = _norm_title(title) exact = [] best, best_score = None, 0.0 @@ -252,6 +315,8 @@ def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any) exact.append(it) continue if engine and it_name: + if not _duration_ok(want_duration_ms, _extract_lookup_value(it, 'duration_ms', 'duration')): + continue score = engine.similarity_score( engine.normalize_string(title), engine.normalize_string(it_name)) if score > best_score and score >= 0.85: @@ -261,13 +326,17 @@ def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any) for it in exact: if _extract_lookup_value(it, 'track_number') == track_number: return it + # Multiple same-title cuts (e.g. album + live): prefer the closest length. + if want_duration_ms and len(exact) > 1: + exact.sort(key=lambda it: abs(int(want_duration_ms) - int( + _extract_lookup_value(it, 'duration_ms', 'duration', default=0) or 0))) return exact[0] return best def _match_via_album(engine: Any, source_priority: List[str], artist: str, album_title: str, - title: str, track_number: Any, - stored_album_ids: Dict[str, str]) -> Tuple[Optional[Any], Optional[str]]: + title: str, track_number: Any, stored_album_ids: Dict[str, str], + want_duration_ms: Any = None) -> Tuple[Optional[Any], Optional[str]]: """Structured artist → album → track match. For each source: use the album's stored source ID if we already have it (enriched album), else find the album by searching ``artist album``; then pull that album's tracklist and locate our @@ -305,7 +374,7 @@ def _match_via_album(engine: Any, source_priority: List[str], artist: str, album except Exception: resp = None items = resp.get('items') if isinstance(resp, dict) else None - match = _find_track_in_album(items, title, track_number, engine) + match = _find_track_in_album(items, title, track_number, engine, want_duration_ms) if match is None: continue # The album tracklist's tracks usually omit the album object — attach it so @@ -319,7 +388,8 @@ def _match_via_album(engine: Any, source_priority: List[str], artist: str, album def _find_best_match(engine: Any, source_priority: List[str], title: str, artist: str, - album: str, min_confidence: float) -> Tuple[Optional[Any], float, Optional[str], bool]: + album: str, min_confidence: float, + want_duration_ms: Any = None) -> Tuple[Optional[Any], float, Optional[str], bool]: """Search the configured metadata sources for the best replacement match. Returns (best_track, confidence, source, attempted_any_provider).""" temp_track = type('TempTrack', (), {'name': title, 'artists': [artist], 'album': album})() @@ -336,6 +406,10 @@ def _find_best_match(engine: Any, source_priority: List[str], title: str, artist matches = _search_tracks_for_source(source, query, limit=5, client=client) time.sleep(0.5) # be gentle on metadata APIs for cand in matches or []: + # Wrong-version guard: a candidate whose length is way off is a + # different cut (live/edit/remix) — reject before it can win. + if not _duration_ok(want_duration_ms, _extract_lookup_value(cand, 'duration_ms', 'duration')): + continue cand_artists = _track_artist_names(cand) artist_conf = max( (engine.similarity_score(engine.normalize_string(artist), @@ -369,12 +443,14 @@ class QualityUpgradeJob(RepairJob): "track against your Quality Profile using BOTH the file format and its " 'bitrate — so a 128 kbps MP3 is no longer treated the same as a 320 kbps ' 'one, and enabling MP3-320/256 in your profile actually counts.\n\n' - 'For every track below your preferred quality, it finds a better version and ' - 'creates a finding. If the track was enriched, it uses the ISRC embedded in ' - 'the file to resolve the EXACT track (and its album) — no guessing; otherwise ' - 'it falls back to a name/artist search with a confidence score. Nothing is ' - 'queued automatically: applying a finding adds that matched track — with its ' - 'album context — to the wishlist, the same as any other download.\n\n' + 'For every track below your preferred quality it resolves the exact better ' + 'version using the most precise identity available, in order: the source ' + "track ID enrichment wrote into the file → the file's ISRC → the album's " + 'tracklist (by stored album ID or album search) → a name/artist search. The ' + 'fuzzy steps also reject candidates whose length is off (wrong live/edit cut). ' + 'It skips tracks it already proposed, so re-runs are cheap. Nothing is queued ' + 'automatically: applying a finding adds that matched track — with its album ' + 'context — to the wishlist, the same as any other download.\n\n' 'Settings:\n' '- Scope: "watchlist" (watchlisted artists only) or "all" (whole library)\n' '- Min confidence: minimum match confidence (0-1) to surface a finding\n\n' @@ -404,8 +480,8 @@ class QualityUpgradeJob(RepairJob): conn = db._get_connection() try: base = ( - "SELECT t.id, t.title, t.file_path, t.bitrate, a.name AS artist_name, " - "al.title AS album_title, t.album_id, t.track_number, " + "SELECT t.id, t.title, t.file_path, t.bitrate, t.duration, " + "a.name AS artist_name, al.title AS album_title, t.album_id, t.track_number, " "al.spotify_album_id, al.itunes_album_id, al.deezer_id, " "al.musicbrainz_release_id, al.audiodb_id " "FROM tracks t " @@ -428,6 +504,21 @@ class QualityUpgradeJob(RepairJob): finally: conn.close() + def _load_existing_finding_ids(self, db: Any) -> set: + """Track IDs that already have a finding for this job (any status). Lets a + re-run skip tracks we've already proposed/dismissed without re-hitting the + metadata API — pending stays deduped, and a dismissed track stays dismissed.""" + conn = db._get_connection() + try: + rows = conn.execute( + "SELECT entity_id FROM repair_findings WHERE job_id = ? AND entity_type = 'track'", + (self.job_id,)).fetchall() + return {str(r[0]) for r in rows if r and r[0] is not None} + except Exception: + return set() + finally: + conn.close() + def estimate_scope(self, context: JobContext) -> int: try: return len(self._load_tracks(context.db, self._get_settings(context)['scope'])) @@ -459,6 +550,10 @@ class QualityUpgradeJob(RepairJob): if context.report_progress: context.report_progress(phase=f'Checking quality on {total} tracks...', total=total) + # Tracks we've already proposed/dismissed — skip them so a re-run doesn't + # re-resolve the same tracks against the metadata API. + already_found = self._load_existing_finding_ids(db) + # Metadata source for matching — resolved lazily so we only fail if we # actually find a low-quality track that needs a match. engine = None @@ -474,6 +569,7 @@ class QualityUpgradeJob(RepairJob): title = row['title'] file_path = row['file_path'] bitrate = row['bitrate'] + duration_ms = row.get('duration') artist_name = row['artist_name'] album_title = row['album_title'] album_id = row['album_id'] @@ -483,6 +579,10 @@ class QualityUpgradeJob(RepairJob): } result.scanned += 1 + if str(track_id) in already_found: + result.findings_skipped_dedup += 1 + continue + if meets_preferred_quality(file_path, bitrate, quality_profile): result.skipped += 1 if context.update_progress and (i + 1) % 25 == 0: @@ -510,26 +610,39 @@ class QualityUpgradeJob(RepairJob): log_line=f'Low quality ({current_label}): {artist_name} - {title}', log_type='info') + # Read the identifiers enrichment embedded in the file once (ISRC + + # per-source track IDs), used by the two most-exact tiers below. + file_ids = _read_file_ids(file_path) + # Tiered match, best identity first, loosest last: - # 1. ISRC embedded in the file tags (enriched track) → EXACT track. - # 2. Album → track: use the album's stored source ID if we have it - # (enriched album), else find the album by search, then locate our - # track in its tracklist. Pins the right album even when the track - # itself isn't enriched. (artist → album → track) + # 0. The active source's OWN track ID, embedded in the file by + # enrichment → fetch that exact track by ID. No search at all. + # 1. ISRC (also in the tags) → exact track on any source. + # 2. Album → track: stored album source ID if we have it (enriched + # album), else find the album by search, then locate our track in + # its tracklist. Pins the right album even when the track itself + # isn't enriched. (artist → album → track) # 3. Plain artist+title search with similarity scoring. (artist → track) + # The fuzzy tiers (2-3) also apply a duration guard to reject wrong cuts. best, source, conf, attempted = None, None, 0.0, False - matched_via = 'isrc' - best, source = _match_via_isrc(_read_track_isrc(file_path), source_priority) + matched_via = 'track_id' + best, source = _match_via_track_id(file_ids, source_priority) if best: conf, attempted = 1.0, True + if not best: + matched_via = 'isrc' + best, source = _match_via_isrc(file_ids.get('isrc', ''), source_priority) + if best: + conf, attempted = 1.0, True + if not best: matched_via = 'album' try: best, source = _match_via_album( engine, source_priority, artist_name or '', album_title or '', - title, track_number, stored_album_ids) + title, track_number, stored_album_ids, duration_ms) except Exception as e: logger.debug("[Quality Upgrade] Album match error for %s - %s: %s", artist_name, title, e) best = None @@ -540,7 +653,8 @@ class QualityUpgradeJob(RepairJob): matched_via = 'search' try: best, conf, source, attempted = _find_best_match( - engine, source_priority, title, artist_name or '', album_title or '', min_conf) + engine, source_priority, title, artist_name or '', album_title or '', + min_conf, duration_ms) except Exception as e: logger.debug("[Quality Upgrade] Match error for %s - %s: %s", artist_name, title, e) result.errors += 1 diff --git a/tests/repair_jobs/test_quality_upgrade.py b/tests/repair_jobs/test_quality_upgrade.py index f4d8ab4c..d1ff35fa 100644 --- a/tests/repair_jobs/test_quality_upgrade.py +++ b/tests/repair_jobs/test_quality_upgrade.py @@ -96,13 +96,20 @@ def meets(path, bitrate, profile): # --- scan produces a finding (seam) ---------------------------------------- class _FakeConn: - def __init__(self, rows): + def __init__(self, rows, finding_ids=()): self._rows = rows + self._finding_ids = list(finding_ids) + self._sql = '' - def execute(self, *a, **k): + def execute(self, sql='', *a, **k): + self._sql = sql or '' return self def fetchall(self): + # The existing-findings query reads repair_findings; everything else is the + # track load. + if 'repair_findings' in self._sql: + return [(fid,) for fid in self._finding_ids] return self._rows def close(self): @@ -110,15 +117,16 @@ class _FakeConn: class _FakeDB: - def __init__(self, rows, profile): + def __init__(self, rows, profile, finding_ids=()): self._rows = rows self._profile = profile + self._finding_ids = finding_ids def get_quality_profile(self): return self._profile def _get_connection(self): - return _FakeConn(self._rows) + return _FakeConn(self._rows, self._finding_ids) def get_watchlist_artists(self, profile_id=1): return [types.SimpleNamespace(artist_name='Artist A')] @@ -135,12 +143,13 @@ def _ctx(db, findings): ) -def test_scan_creates_finding_for_low_quality_track(monkeypatch): - # One 128 kbps MP3 (below the balanced floor) for Artist A. - rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)] - db = _FakeDB(rows, BALANCED) +def _row(track_id=1, title='Song One', path='/music/a.mp3', bitrate=128, duration=180000, + artist='Artist A', album='Album X', album_id=10, track_number=6): + """A track row in _TRACK_COLS order (album source-id columns default to None).""" + return (track_id, title, path, bitrate, duration, artist, album, album_id, track_number) + - # Stub the metadata side so the test stays offline. +def _stub_engine(monkeypatch): monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify') monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify']) monkeypatch.setattr( @@ -151,10 +160,16 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch): normalize_string=lambda s: s, ), ) + + +def test_scan_creates_finding_for_low_quality_track(monkeypatch): + db = _FakeDB([_row(bitrate=128)], BALANCED) + _stub_engine(monkeypatch) fake_match = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X', 'images': []}} - # No ISRC / album hit → exercise the search tier. - monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '') + # No track-id / ISRC / album hit → exercise the search tier. + monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {}) + monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None)) monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None)) monkeypatch.setattr(qu, '_find_best_match', lambda *a, **k: (fake_match, 0.95, 'spotify', True)) @@ -162,9 +177,7 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch): monkeypatch.setattr(qu, '_track_name', lambda t: 'Song One') findings = [] - job = qu.QualityUpgradeJob() - # default scope 'watchlist'; config_manager None → defaults used - result = job.scan(_ctx(db, findings)) + result = qu.QualityUpgradeJob().scan(_ctx(db, findings)) assert result.findings_created == 1 assert len(findings) == 1 @@ -177,6 +190,63 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch): assert f['details']['provider'] == 'spotify' +def test_match_via_track_id_fetches_exact_by_id(monkeypatch): + """Most-direct tier: a per-source track ID in the tags → get_track_details by ID.""" + track = {'id': 'sp9', 'name': 'Song One', 'album': {'name': 'Album X'}} + client = types.SimpleNamespace(get_track_details=lambda tid: track if tid == 'sp9' else None) + monkeypatch.setattr(qu, 'get_client_for_source', lambda src: client) + best, source = qu._match_via_track_id({'spotify_track_id': 'sp9'}, ['spotify']) + assert best['id'] == 'sp9' + assert source == 'spotify' + assert qu._match_via_track_id({}, ['spotify']) == (None, None) # no ID → nothing + + +def test_duration_ok_guard(): + assert qu._duration_ok(180000, 181000) is True # within 5s + assert qu._duration_ok(180000, 200000) is False # 20s off — wrong cut + assert qu._duration_ok(None, 200000) is True # unknown → lenient + assert qu._duration_ok(180000, 0) is True # unknown → lenient + + +def test_scan_prefers_track_id_tier(monkeypatch): + """The source's own track ID (from file tags) wins over every other tier.""" + db = _FakeDB([_row()], BALANCED) + _stub_engine(monkeypatch) + monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {'spotify_track_id': 'sp9', 'isrc': 'X'}) + fake = {'id': 'sp9', 'name': 'Song One', 'album': {'name': 'Album X'}} + monkeypatch.setattr(qu, '_match_via_track_id', lambda ids, sp: (fake, 'spotify')) + monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake)) + monkeypatch.setattr(qu, '_track_name', lambda t: 'Song One') + + def _boom(*a, **k): + raise AssertionError("no lower tier should run when the track-ID tier matches") + monkeypatch.setattr(qu, '_match_via_isrc', _boom) + monkeypatch.setattr(qu, '_match_via_album', _boom) + monkeypatch.setattr(qu, '_find_best_match', _boom) + + findings = [] + result = qu.QualityUpgradeJob().scan(_ctx(db, findings)) + assert result.findings_created == 1 + assert findings[0]['details']['matched_via'] == 'track_id' + + +def test_scan_skips_already_proposed_tracks(monkeypatch): + """A re-run must not re-resolve a track that already has a finding.""" + db = _FakeDB([_row(track_id=1)], BALANCED, finding_ids=['1']) + monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify') + monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify']) + + def _boom(*a, **k): + raise AssertionError("no matching for an already-proposed track") + monkeypatch.setattr(qu, '_match_via_track_id', _boom) + monkeypatch.setattr(qu, '_find_best_match', _boom) + + findings = [] + result = qu.QualityUpgradeJob().scan(_ctx(db, findings)) + assert findings == [] + assert result.findings_skipped_dedup == 1 + + def test_match_via_isrc_accepts_exact_match(monkeypatch): """The guard accepts only a candidate whose own ISRC equals ours (dash/case insensitive), so it survives a source returning unrelated hits first.""" @@ -201,14 +271,12 @@ def test_match_via_isrc_rejects_all_mismatches(monkeypatch): def test_scan_prefers_isrc_exact_match_over_fuzzy(monkeypatch): - """When the file carries an ISRC and it resolves, use the exact match and do - NOT run the fuzzy search at all.""" - rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)] - db = _FakeDB(rows, BALANCED) - monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify') - monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify']) - monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace()) - monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: 'USRC17607839') + """No track-ID, but the file carries an ISRC that resolves → use the exact match + and do NOT run the album/search tiers.""" + db = _FakeDB([_row()], BALANCED) + _stub_engine(monkeypatch) + monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {'isrc': 'USRC17607839'}) + monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None)) fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}} monkeypatch.setattr(qu, '_match_via_isrc', lambda isrc, sp: (fake, 'spotify')) monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake)) @@ -225,15 +293,13 @@ def test_scan_prefers_isrc_exact_match_over_fuzzy(monkeypatch): assert findings[0]['details']['match_confidence'] == 1.0 -def test_scan_falls_back_to_search_without_isrc(monkeypatch): - """No usable ISRC → fall back to fuzzy search.""" - rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)] - db = _FakeDB(rows, BALANCED) - monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify') - monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify']) - monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace()) - monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '') # un-enriched - monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None)) # no album hit +def test_scan_falls_back_to_search_without_ids(monkeypatch): + """No track-ID / ISRC / album hit → fall back to fuzzy search.""" + db = _FakeDB([_row()], BALANCED) + _stub_engine(monkeypatch) + monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {}) # un-enriched + monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None)) + monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None)) fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}} monkeypatch.setattr(qu, '_find_best_match', lambda *a, **k: (fake, 0.88, 'spotify', True)) monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake)) @@ -245,15 +311,13 @@ def test_scan_falls_back_to_search_without_isrc(monkeypatch): assert findings[0]['details']['matched_via'] == 'search' -def test_scan_uses_album_tier_when_no_isrc(monkeypatch): - """No ISRC, but the album→track lookup resolves it → matched_via 'album', - and the fuzzy search is never reached.""" - rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)] - db = _FakeDB(rows, BALANCED) - monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify') - monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify']) - monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace()) - monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '') +def test_scan_uses_album_tier_when_no_ids(monkeypatch): + """No track-ID / ISRC, but the album→track lookup resolves it → matched_via + 'album', and the fuzzy search is never reached.""" + db = _FakeDB([_row()], BALANCED) + _stub_engine(monkeypatch) + monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {}) + monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None)) fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}} monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (fake, 'spotify')) monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake)) @@ -283,8 +347,7 @@ def test_find_track_in_album_exact_title_with_track_number(monkeypatch): def test_scan_skips_tracks_meeting_quality(monkeypatch): # A 320 kbps MP3 meets the balanced profile → no finding, no metadata calls. - rows = [(2, 'Good Song', '/music/b.mp3', 320, 'Artist A', 'Album Y', 11)] - db = _FakeDB(rows, BALANCED) + db = _FakeDB([_row(track_id=2, title='Good Song', bitrate=320)], BALANCED) def _boom(*a, **k): # must never be called for an acceptable track raise AssertionError("matching should not run for an acceptable track")