Quality Upgrade: best-in-class matching (direct track-ID tier, dedup-skip, duration guard)

Four refinements on top of the tiered matcher: 1. Direct source track-ID tier (new top tier): enrichment writes each source's own track ID into the file tags (spotify_track_id/deezer_track_id/itunes_track_id/...). If we have the active source's track ID, fetch that exact track by ID via get_track_details — zero search. Tiers are now: track-ID -> ISRC -> album->track -> artist+title. _read_file_ids reads ISRC + all per-source IDs in one tag read. 2. Skip already-proposed tracks: a re-run loads existing finding entity_ids for the job and skips those tracks before any API call (pending stays deduped, dismissed stays dismissed) — re-runs are cheap. 3. Wrong-version guard: the fuzzy tiers (album-search + track search) reject a candidate whose length differs from ours by >5s (live/edit/remix with same title). _load_tracks now selects t.duration; exact tiers (track-ID/ISRC/stored-album-ID) skip the guard. 4. Tighter album matching: same-title cuts in an album are disambiguated by closest duration when track_number doesn't decide it. Findings record matched_via = track_id | isrc | album | search. 30 repair tests pass (added track-ID tier, duration guard, dedup-skip, and unit coverage).
5 days ago · 030d9bf9ff
parent 777781db6a
commit 030d9bf9ff
2 changed files with 253 additions and 76 deletions
--- a/core/repair_jobs/quality_upgrade.py
+++ b/core/repair_jobs/quality_upgrade.py
@ -64,6 +64,20 @@ _PROFILE_KEY_RANK = {
    'mp3_192': RANK_192,
 }

+# Per-source file-tag key holding that source's own track ID (written by enrichment).
+_SOURCE_TRACK_ID_TAG = {
+    'spotify': 'spotify_track_id',
+    'deezer': 'deezer_track_id',
+    'itunes': 'itunes_track_id',
+    'audiodb': 'audiodb_track_id',
+    'musicbrainz': 'musicbrainz_releasetrackid',
+    'tidal': 'tidal_track_id',
+}
+
+# Reject a fuzzy candidate whose length differs from ours by more than this (ms) —
+# catches wrong versions (live/edit/remix) that share a title. Exact tiers skip it.
+_DURATION_TOLERANCE_MS = 5000
+

 def _normalize_kbps(bitrate: Optional[int]) -> Optional[int]:
    """Library bitrate may be stored in bps (e.g. 320000) or kbps (320).
@ -159,24 +173,68 @@ def _norm_isrc(value: Any) -> str:
    return str(value).upper().replace('-', '').replace(' ', '').strip()


-def _read_track_isrc(file_path: str) -> str:
-    """Read the ISRC the enrichment pipeline embedded in the file's tags.
+def _read_file_ids(file_path: str) -> Dict[str, str]:
+    """Read the identifiers enrichment embedded in the file's tags.

    Enrichment matches every track to the metadata sources and writes the IDs
-    (ISRC, per-source track IDs) into the file — so an already-enriched track
-    carries its exact identity. Returns '' when unreadable / not enriched."""
+    (ISRC + per-source track IDs) into the file — so an already-enriched track
+    carries its exact identity. Returns a dict with a normalized ``isrc`` plus any
+    ``<source>_track_id`` tags present; empty dict when unreadable / not enriched."""
    resolved = resolve_library_file_path(file_path) if file_path else None
    if not resolved and file_path and os.path.isfile(file_path):
        resolved = file_path
    if not resolved:
-        return ''
+        return {}
    try:
        info = read_embedded_tags(resolved)
    except Exception:
-        return ''
+        return {}
    if not info or not info.get('available'):
-        return ''
-    return _norm_isrc((info.get('tags') or {}).get('isrc'))
+        return {}
+    tags = info.get('tags') or {}
+    out: Dict[str, str] = {}
+    isrc = _norm_isrc(tags.get('isrc'))
+    if isrc:
+        out['isrc'] = isrc
+    for tag_key in set(_SOURCE_TRACK_ID_TAG.values()):
+        val = tags.get(tag_key)
+        if val:
+            out[tag_key] = str(val)
+    return out
+
+
+def _duration_ok(want_ms: Any, got_ms: Any, tolerance_ms: int = _DURATION_TOLERANCE_MS) -> bool:
+    """Wrong-version guard: True when the candidate's length is within tolerance of
+    ours — or when either length is unknown (never reject on missing data)."""
+    try:
+        w, g = int(want_ms or 0), int(got_ms or 0)
+    except (TypeError, ValueError):
+        return True
+    if w <= 0 or g <= 0:
+        return True
+    return abs(w - g) <= tolerance_ms
+
+
+def _match_via_track_id(file_ids: Dict[str, str],
+                        source_priority: List[str]) -> Tuple[Optional[Any], Optional[str]]:
+    """Most-direct path: enrichment already wrote this track's per-source IDs into
+    the file. If we have the active source's own track ID, fetch that exact track by
+    ID — no search at all. Returns (track, source) or (None, None)."""
+    for source in source_priority:
+        tag_key = _SOURCE_TRACK_ID_TAG.get(source)
+        track_id = file_ids.get(tag_key) if tag_key else None
+        if not track_id:
+            continue
+        client = get_client_for_source(source)
+        if not client or not hasattr(client, 'get_track_details'):
+            continue
+        try:
+            track = client.get_track_details(str(track_id))
+        except Exception:
+            track = None
+        if track:
+            return track, source
+    return None, None


 def _candidate_isrc(cand: Any) -> str:
@ -217,13 +275,16 @@ def _match_via_isrc(isrc: str, source_priority: List[str]) -> Tuple[Optional[Any

 # Column order for the _load_tracks SELECT — rows come back as dicts keyed by these.
 _TRACK_COLS = (
-    'id', 'title', 'file_path', 'bitrate', 'artist_name', 'album_title', 'album_id',
-    'track_number', 'spotify_album_id', 'itunes_album_id', 'deezer_id',
+    'id', 'title', 'file_path', 'bitrate', 'duration', 'artist_name', 'album_title',
+    'album_id', 'track_number', 'spotify_album_id', 'itunes_album_id', 'deezer_id',
    'musicbrainz_release_id', 'audiodb_id',
 )

 # Human-readable note per match tier (search uses a confidence % instead).
-_MATCH_NOTE = {'isrc': 'exact ISRC match', 'album': 'matched within album'}
+_MATCH_NOTE = {
+    'track_id': 'exact track ID', 'isrc': 'exact ISRC match',
+    'album': 'matched within album',
+}

 # Per-source column holding that source's album ID on the albums table.
 _SOURCE_ALBUM_ID_COL = {
@ -240,9 +301,11 @@ def _norm_title(value: Any) -> str:
    return ''.join(ch for ch in str(value or '').lower() if ch.isalnum())


-def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any) -> Optional[Any]:
+def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any,
+                         want_duration_ms: Any = None) -> Optional[Any]:
    """Pick the track in an album's tracklist that matches ours — exact normalized
-    title first (track_number breaks ties), then a high-similarity fuzzy fallback."""
+    title first (track_number then duration break ties), then a high-similarity
+    fuzzy fallback that respects the duration guard."""
    want = _norm_title(title)
    exact = []
    best, best_score = None, 0.0
@ -252,6 +315,8 @@ def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any)
            exact.append(it)
            continue
        if engine and it_name:
+            if not _duration_ok(want_duration_ms, _extract_lookup_value(it, 'duration_ms', 'duration')):
+                continue
            score = engine.similarity_score(
                engine.normalize_string(title), engine.normalize_string(it_name))
            if score > best_score and score >= 0.85:
@ -261,13 +326,17 @@ def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any)
            for it in exact:
                if _extract_lookup_value(it, 'track_number') == track_number:
                    return it
+        # Multiple same-title cuts (e.g. album + live): prefer the closest length.
+        if want_duration_ms and len(exact) > 1:
+            exact.sort(key=lambda it: abs(int(want_duration_ms) - int(
+                _extract_lookup_value(it, 'duration_ms', 'duration', default=0) or 0)))
        return exact[0]
    return best


 def _match_via_album(engine: Any, source_priority: List[str], artist: str, album_title: str,
-                     title: str, track_number: Any,
-                     stored_album_ids: Dict[str, str]) -> Tuple[Optional[Any], Optional[str]]:
+                     title: str, track_number: Any, stored_album_ids: Dict[str, str],
+                     want_duration_ms: Any = None) -> Tuple[Optional[Any], Optional[str]]:
    """Structured artist → album → track match. For each source: use the album's
    stored source ID if we already have it (enriched album), else find the album
    by searching ``artist album``; then pull that album's tracklist and locate our
@ -305,7 +374,7 @@ def _match_via_album(engine: Any, source_priority: List[str], artist: str, album
        except Exception:
            resp = None
        items = resp.get('items') if isinstance(resp, dict) else None
-        match = _find_track_in_album(items, title, track_number, engine)
+        match = _find_track_in_album(items, title, track_number, engine, want_duration_ms)
        if match is None:
            continue
        # The album tracklist's tracks usually omit the album object — attach it so
@ -319,7 +388,8 @@ def _match_via_album(engine: Any, source_priority: List[str], artist: str, album


 def _find_best_match(engine: Any, source_priority: List[str], title: str, artist: str,
-                     album: str, min_confidence: float) -> Tuple[Optional[Any], float, Optional[str], bool]:
+                     album: str, min_confidence: float,
+                     want_duration_ms: Any = None) -> Tuple[Optional[Any], float, Optional[str], bool]:
    """Search the configured metadata sources for the best replacement match.
    Returns (best_track, confidence, source, attempted_any_provider)."""
    temp_track = type('TempTrack', (), {'name': title, 'artists': [artist], 'album': album})()
@ -336,6 +406,10 @@ def _find_best_match(engine: Any, source_priority: List[str], title: str, artist
            matches = _search_tracks_for_source(source, query, limit=5, client=client)
            time.sleep(0.5)  # be gentle on metadata APIs
            for cand in matches or []:
+                # Wrong-version guard: a candidate whose length is way off is a
+                # different cut (live/edit/remix) — reject before it can win.
+                if not _duration_ok(want_duration_ms, _extract_lookup_value(cand, 'duration_ms', 'duration')):
+                    continue
                cand_artists = _track_artist_names(cand)
                artist_conf = max(
                    (engine.similarity_score(engine.normalize_string(artist),
@ -369,12 +443,14 @@ class QualityUpgradeJob(RepairJob):
        "track against your Quality Profile using BOTH the file format and its "
        'bitrate — so a 128 kbps MP3 is no longer treated the same as a 320 kbps '
        'one, and enabling MP3-320/256 in your profile actually counts.\n\n'
-        'For every track below your preferred quality, it finds a better version and '
-        'creates a finding. If the track was enriched, it uses the ISRC embedded in '
-        'the file to resolve the EXACT track (and its album) — no guessing; otherwise '
-        'it falls back to a name/artist search with a confidence score. Nothing is '
-        'queued automatically: applying a finding adds that matched track — with its '
-        'album context — to the wishlist, the same as any other download.\n\n'
+        'For every track below your preferred quality it resolves the exact better '
+        'version using the most precise identity available, in order: the source '
+        "track ID enrichment wrote into the file → the file's ISRC → the album's "
+        'tracklist (by stored album ID or album search) → a name/artist search. The '
+        'fuzzy steps also reject candidates whose length is off (wrong live/edit cut). '
+        'It skips tracks it already proposed, so re-runs are cheap. Nothing is queued '
+        'automatically: applying a finding adds that matched track — with its album '
+        'context — to the wishlist, the same as any other download.\n\n'
        'Settings:\n'
        '- Scope: "watchlist" (watchlisted artists only) or "all" (whole library)\n'
        '- Min confidence: minimum match confidence (0-1) to surface a finding\n\n'
@ -404,8 +480,8 @@ class QualityUpgradeJob(RepairJob):
        conn = db._get_connection()
        try:
            base = (
-                "SELECT t.id, t.title, t.file_path, t.bitrate, a.name AS artist_name, "
-                "al.title AS album_title, t.album_id, t.track_number, "
+                "SELECT t.id, t.title, t.file_path, t.bitrate, t.duration, "
+                "a.name AS artist_name, al.title AS album_title, t.album_id, t.track_number, "
                "al.spotify_album_id, al.itunes_album_id, al.deezer_id, "
                "al.musicbrainz_release_id, al.audiodb_id "
                "FROM tracks t "
@ -428,6 +504,21 @@ class QualityUpgradeJob(RepairJob):
        finally:
            conn.close()

+    def _load_existing_finding_ids(self, db: Any) -> set:
+        """Track IDs that already have a finding for this job (any status). Lets a
+        re-run skip tracks we've already proposed/dismissed without re-hitting the
+        metadata API — pending stays deduped, and a dismissed track stays dismissed."""
+        conn = db._get_connection()
+        try:
+            rows = conn.execute(
+                "SELECT entity_id FROM repair_findings WHERE job_id = ? AND entity_type = 'track'",
+                (self.job_id,)).fetchall()
+            return {str(r[0]) for r in rows if r and r[0] is not None}
+        except Exception:
+            return set()
+        finally:
+            conn.close()
+
    def estimate_scope(self, context: JobContext) -> int:
        try:
            return len(self._load_tracks(context.db, self._get_settings(context)['scope']))
@ -459,6 +550,10 @@ class QualityUpgradeJob(RepairJob):
        if context.report_progress:
            context.report_progress(phase=f'Checking quality on {total} tracks...', total=total)

+        # Tracks we've already proposed/dismissed — skip them so a re-run doesn't
+        # re-resolve the same tracks against the metadata API.
+        already_found = self._load_existing_finding_ids(db)
+
        # Metadata source for matching — resolved lazily so we only fail if we
        # actually find a low-quality track that needs a match.
        engine = None
@ -474,6 +569,7 @@ class QualityUpgradeJob(RepairJob):
            title = row['title']
            file_path = row['file_path']
            bitrate = row['bitrate']
+            duration_ms = row.get('duration')
            artist_name = row['artist_name']
            album_title = row['album_title']
            album_id = row['album_id']
@ -483,6 +579,10 @@ class QualityUpgradeJob(RepairJob):
            }
            result.scanned += 1

+            if str(track_id) in already_found:
+                result.findings_skipped_dedup += 1
+                continue
+
            if meets_preferred_quality(file_path, bitrate, quality_profile):
                result.skipped += 1
                if context.update_progress and (i + 1) % 25 == 0:
@ -510,26 +610,39 @@ class QualityUpgradeJob(RepairJob):
                    log_line=f'Low quality ({current_label}): {artist_name} - {title}',
                    log_type='info')

+            # Read the identifiers enrichment embedded in the file once (ISRC +
+            # per-source track IDs), used by the two most-exact tiers below.
+            file_ids = _read_file_ids(file_path)
+
            # Tiered match, best identity first, loosest last:
-            #   1. ISRC embedded in the file tags (enriched track) → EXACT track.
-            #   2. Album → track: use the album's stored source ID if we have it
-            #      (enriched album), else find the album by search, then locate our
-            #      track in its tracklist. Pins the right album even when the track
-            #      itself isn't enriched. (artist → album → track)
+            #   0. The active source's OWN track ID, embedded in the file by
+            #      enrichment → fetch that exact track by ID. No search at all.
+            #   1. ISRC (also in the tags) → exact track on any source.
+            #   2. Album → track: stored album source ID if we have it (enriched
+            #      album), else find the album by search, then locate our track in
+            #      its tracklist. Pins the right album even when the track itself
+            #      isn't enriched. (artist → album → track)
            #   3. Plain artist+title search with similarity scoring. (artist → track)
+            # The fuzzy tiers (2-3) also apply a duration guard to reject wrong cuts.
            best, source, conf, attempted = None, None, 0.0, False

-            matched_via = 'isrc'
-            best, source = _match_via_isrc(_read_track_isrc(file_path), source_priority)
+            matched_via = 'track_id'
+            best, source = _match_via_track_id(file_ids, source_priority)
            if best:
                conf, attempted = 1.0, True

+            if not best:
+                matched_via = 'isrc'
+                best, source = _match_via_isrc(file_ids.get('isrc', ''), source_priority)
+                if best:
+                    conf, attempted = 1.0, True
+
            if not best:
                matched_via = 'album'
                try:
                    best, source = _match_via_album(
                        engine, source_priority, artist_name or '', album_title or '',
-                        title, track_number, stored_album_ids)
+                        title, track_number, stored_album_ids, duration_ms)
                except Exception as e:
                    logger.debug("[Quality Upgrade] Album match error for %s - %s: %s", artist_name, title, e)
                    best = None
@ -540,7 +653,8 @@ class QualityUpgradeJob(RepairJob):
                matched_via = 'search'
                try:
                    best, conf, source, attempted = _find_best_match(
-                        engine, source_priority, title, artist_name or '', album_title or '', min_conf)
+                        engine, source_priority, title, artist_name or '', album_title or '',
+                        min_conf, duration_ms)
                except Exception as e:
                    logger.debug("[Quality Upgrade] Match error for %s - %s: %s", artist_name, title, e)
                    result.errors += 1
--- a/tests/repair_jobs/test_quality_upgrade.py
+++ b/tests/repair_jobs/test_quality_upgrade.py
@ -96,13 +96,20 @@ def meets(path, bitrate, profile):
 # --- scan produces a finding (seam) ----------------------------------------

 class _FakeConn:
-    def __init__(self, rows):
+    def __init__(self, rows, finding_ids=()):
        self._rows = rows
+        self._finding_ids = list(finding_ids)
+        self._sql = ''

-    def execute(self, *a, **k):
+    def execute(self, sql='', *a, **k):
+        self._sql = sql or ''
        return self

    def fetchall(self):
+        # The existing-findings query reads repair_findings; everything else is the
+        # track load.
+        if 'repair_findings' in self._sql:
+            return [(fid,) for fid in self._finding_ids]
        return self._rows

    def close(self):
@ -110,15 +117,16 @@ class _FakeConn:


 class _FakeDB:
-    def __init__(self, rows, profile):
+    def __init__(self, rows, profile, finding_ids=()):
        self._rows = rows
        self._profile = profile
+        self._finding_ids = finding_ids

    def get_quality_profile(self):
        return self._profile

    def _get_connection(self):
-        return _FakeConn(self._rows)
+        return _FakeConn(self._rows, self._finding_ids)

    def get_watchlist_artists(self, profile_id=1):
        return [types.SimpleNamespace(artist_name='Artist A')]
@ -135,12 +143,13 @@ def _ctx(db, findings):
    )


-def test_scan_creates_finding_for_low_quality_track(monkeypatch):
-    # One 128 kbps MP3 (below the balanced floor) for Artist A.
-    rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)]
-    db = _FakeDB(rows, BALANCED)
+def _row(track_id=1, title='Song One', path='/music/a.mp3', bitrate=128, duration=180000,
+         artist='Artist A', album='Album X', album_id=10, track_number=6):
+    """A track row in _TRACK_COLS order (album source-id columns default to None)."""
+    return (track_id, title, path, bitrate, duration, artist, album, album_id, track_number)
+

-    # Stub the metadata side so the test stays offline.
+def _stub_engine(monkeypatch):
    monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
    monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
    monkeypatch.setattr(
@ -151,10 +160,16 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch):
            normalize_string=lambda s: s,
        ),
    )
+
+
+def test_scan_creates_finding_for_low_quality_track(monkeypatch):
+    db = _FakeDB([_row(bitrate=128)], BALANCED)
+    _stub_engine(monkeypatch)
    fake_match = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'],
                  'album': {'name': 'Album X', 'images': []}}
-    # No ISRC / album hit → exercise the search tier.
-    monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '')
+    # No track-id / ISRC / album hit → exercise the search tier.
+    monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {})
+    monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None))
    monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None))
    monkeypatch.setattr(qu, '_find_best_match',
                        lambda *a, **k: (fake_match, 0.95, 'spotify', True))
@ -162,9 +177,7 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch):
    monkeypatch.setattr(qu, '_track_name', lambda t: 'Song One')

    findings = []
-    job = qu.QualityUpgradeJob()
-    # default scope 'watchlist'; config_manager None → defaults used
-    result = job.scan(_ctx(db, findings))
+    result = qu.QualityUpgradeJob().scan(_ctx(db, findings))

    assert result.findings_created == 1
    assert len(findings) == 1
@ -177,6 +190,63 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch):
    assert f['details']['provider'] == 'spotify'


+def test_match_via_track_id_fetches_exact_by_id(monkeypatch):
+    """Most-direct tier: a per-source track ID in the tags → get_track_details by ID."""
+    track = {'id': 'sp9', 'name': 'Song One', 'album': {'name': 'Album X'}}
+    client = types.SimpleNamespace(get_track_details=lambda tid: track if tid == 'sp9' else None)
+    monkeypatch.setattr(qu, 'get_client_for_source', lambda src: client)
+    best, source = qu._match_via_track_id({'spotify_track_id': 'sp9'}, ['spotify'])
+    assert best['id'] == 'sp9'
+    assert source == 'spotify'
+    assert qu._match_via_track_id({}, ['spotify']) == (None, None)  # no ID → nothing
+
+
+def test_duration_ok_guard():
+    assert qu._duration_ok(180000, 181000) is True      # within 5s
+    assert qu._duration_ok(180000, 200000) is False     # 20s off — wrong cut
+    assert qu._duration_ok(None, 200000) is True         # unknown → lenient
+    assert qu._duration_ok(180000, 0) is True            # unknown → lenient
+
+
+def test_scan_prefers_track_id_tier(monkeypatch):
+    """The source's own track ID (from file tags) wins over every other tier."""
+    db = _FakeDB([_row()], BALANCED)
+    _stub_engine(monkeypatch)
+    monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {'spotify_track_id': 'sp9', 'isrc': 'X'})
+    fake = {'id': 'sp9', 'name': 'Song One', 'album': {'name': 'Album X'}}
+    monkeypatch.setattr(qu, '_match_via_track_id', lambda ids, sp: (fake, 'spotify'))
+    monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake))
+    monkeypatch.setattr(qu, '_track_name', lambda t: 'Song One')
+
+    def _boom(*a, **k):
+        raise AssertionError("no lower tier should run when the track-ID tier matches")
+    monkeypatch.setattr(qu, '_match_via_isrc', _boom)
+    monkeypatch.setattr(qu, '_match_via_album', _boom)
+    monkeypatch.setattr(qu, '_find_best_match', _boom)
+
+    findings = []
+    result = qu.QualityUpgradeJob().scan(_ctx(db, findings))
+    assert result.findings_created == 1
+    assert findings[0]['details']['matched_via'] == 'track_id'
+
+
+def test_scan_skips_already_proposed_tracks(monkeypatch):
+    """A re-run must not re-resolve a track that already has a finding."""
+    db = _FakeDB([_row(track_id=1)], BALANCED, finding_ids=['1'])
+    monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
+    monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
+
+    def _boom(*a, **k):
+        raise AssertionError("no matching for an already-proposed track")
+    monkeypatch.setattr(qu, '_match_via_track_id', _boom)
+    monkeypatch.setattr(qu, '_find_best_match', _boom)
+
+    findings = []
+    result = qu.QualityUpgradeJob().scan(_ctx(db, findings))
+    assert findings == []
+    assert result.findings_skipped_dedup == 1
+
+
 def test_match_via_isrc_accepts_exact_match(monkeypatch):
    """The guard accepts only a candidate whose own ISRC equals ours (dash/case
    insensitive), so it survives a source returning unrelated hits first."""
@ -201,14 +271,12 @@ def test_match_via_isrc_rejects_all_mismatches(monkeypatch):


 def test_scan_prefers_isrc_exact_match_over_fuzzy(monkeypatch):
-    """When the file carries an ISRC and it resolves, use the exact match and do
-    NOT run the fuzzy search at all."""
-    rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)]
-    db = _FakeDB(rows, BALANCED)
-    monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
-    monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
-    monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace())
-    monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: 'USRC17607839')
+    """No track-ID, but the file carries an ISRC that resolves → use the exact match
+    and do NOT run the album/search tiers."""
+    db = _FakeDB([_row()], BALANCED)
+    _stub_engine(monkeypatch)
+    monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {'isrc': 'USRC17607839'})
+    monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None))
    fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}}
    monkeypatch.setattr(qu, '_match_via_isrc', lambda isrc, sp: (fake, 'spotify'))
    monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake))
@ -225,15 +293,13 @@ def test_scan_prefers_isrc_exact_match_over_fuzzy(monkeypatch):
    assert findings[0]['details']['match_confidence'] == 1.0


-def test_scan_falls_back_to_search_without_isrc(monkeypatch):
-    """No usable ISRC → fall back to fuzzy search."""
-    rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)]
-    db = _FakeDB(rows, BALANCED)
-    monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
-    monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
-    monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace())
-    monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '')  # un-enriched
-    monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None))  # no album hit
+def test_scan_falls_back_to_search_without_ids(monkeypatch):
+    """No track-ID / ISRC / album hit → fall back to fuzzy search."""
+    db = _FakeDB([_row()], BALANCED)
+    _stub_engine(monkeypatch)
+    monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {})  # un-enriched
+    monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None))
+    monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None))
    fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}}
    monkeypatch.setattr(qu, '_find_best_match', lambda *a, **k: (fake, 0.88, 'spotify', True))
    monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake))
@ -245,15 +311,13 @@ def test_scan_falls_back_to_search_without_isrc(monkeypatch):
    assert findings[0]['details']['matched_via'] == 'search'


-def test_scan_uses_album_tier_when_no_isrc(monkeypatch):
-    """No ISRC, but the album→track lookup resolves it → matched_via 'album',
-    and the fuzzy search is never reached."""
-    rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)]
-    db = _FakeDB(rows, BALANCED)
-    monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
-    monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
-    monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace())
-    monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '')
+def test_scan_uses_album_tier_when_no_ids(monkeypatch):
+    """No track-ID / ISRC, but the album→track lookup resolves it → matched_via
+    'album', and the fuzzy search is never reached."""
+    db = _FakeDB([_row()], BALANCED)
+    _stub_engine(monkeypatch)
+    monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {})
+    monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None))
    fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}}
    monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (fake, 'spotify'))
    monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake))
@ -283,8 +347,7 @@ def test_find_track_in_album_exact_title_with_track_number(monkeypatch):

 def test_scan_skips_tracks_meeting_quality(monkeypatch):
    # A 320 kbps MP3 meets the balanced profile → no finding, no metadata calls.
-    rows = [(2, 'Good Song', '/music/b.mp3', 320, 'Artist A', 'Album Y', 11)]
-    db = _FakeDB(rows, BALANCED)
+    db = _FakeDB([_row(track_id=2, title='Good Song', bitrate=320)], BALANCED)

    def _boom(*a, **k):  # must never be called for an acceptable track
        raise AssertionError("matching should not run for an acceptable track")