Quality Upgrade: best-in-class matching (direct track-ID tier, dedup-skip, duration guard)

Four refinements on top of the tiered matcher:

1. Direct source track-ID tier (new top tier): enrichment writes each source's own
   track ID into the file tags (spotify_track_id/deezer_track_id/itunes_track_id/...).
   If we have the active source's track ID, fetch that exact track by ID via
   get_track_details — zero search. Tiers are now: track-ID -> ISRC -> album->track
   -> artist+title. _read_file_ids reads ISRC + all per-source IDs in one tag read.

2. Skip already-proposed tracks: a re-run loads existing finding entity_ids for the
   job and skips those tracks before any API call (pending stays deduped, dismissed
   stays dismissed) — re-runs are cheap.

3. Wrong-version guard: the fuzzy tiers (album-search + track search) reject a
   candidate whose length differs from ours by >5s (live/edit/remix with same title).
   _load_tracks now selects t.duration; exact tiers (track-ID/ISRC/stored-album-ID)
   skip the guard.

4. Tighter album matching: same-title cuts in an album are disambiguated by closest
   duration when track_number doesn't decide it.

Findings record matched_via = track_id | isrc | album | search. 30 repair tests pass
(added track-ID tier, duration guard, dedup-skip, and unit coverage).
pull/869/head
BoulderBadgeDad 5 days ago
parent 777781db6a
commit 030d9bf9ff

@ -64,6 +64,20 @@ _PROFILE_KEY_RANK = {
'mp3_192': RANK_192,
}
# Per-source file-tag key holding that source's own track ID (written by enrichment).
_SOURCE_TRACK_ID_TAG = {
'spotify': 'spotify_track_id',
'deezer': 'deezer_track_id',
'itunes': 'itunes_track_id',
'audiodb': 'audiodb_track_id',
'musicbrainz': 'musicbrainz_releasetrackid',
'tidal': 'tidal_track_id',
}
# Reject a fuzzy candidate whose length differs from ours by more than this (ms) —
# catches wrong versions (live/edit/remix) that share a title. Exact tiers skip it.
_DURATION_TOLERANCE_MS = 5000
def _normalize_kbps(bitrate: Optional[int]) -> Optional[int]:
"""Library bitrate may be stored in bps (e.g. 320000) or kbps (320).
@ -159,24 +173,68 @@ def _norm_isrc(value: Any) -> str:
return str(value).upper().replace('-', '').replace(' ', '').strip()
def _read_track_isrc(file_path: str) -> str:
"""Read the ISRC the enrichment pipeline embedded in the file's tags.
def _read_file_ids(file_path: str) -> Dict[str, str]:
"""Read the identifiers enrichment embedded in the file's tags.
Enrichment matches every track to the metadata sources and writes the IDs
(ISRC, per-source track IDs) into the file so an already-enriched track
carries its exact identity. Returns '' when unreadable / not enriched."""
(ISRC + per-source track IDs) into the file so an already-enriched track
carries its exact identity. Returns a dict with a normalized ``isrc`` plus any
``<source>_track_id`` tags present; empty dict when unreadable / not enriched."""
resolved = resolve_library_file_path(file_path) if file_path else None
if not resolved and file_path and os.path.isfile(file_path):
resolved = file_path
if not resolved:
return ''
return {}
try:
info = read_embedded_tags(resolved)
except Exception:
return ''
return {}
if not info or not info.get('available'):
return ''
return _norm_isrc((info.get('tags') or {}).get('isrc'))
return {}
tags = info.get('tags') or {}
out: Dict[str, str] = {}
isrc = _norm_isrc(tags.get('isrc'))
if isrc:
out['isrc'] = isrc
for tag_key in set(_SOURCE_TRACK_ID_TAG.values()):
val = tags.get(tag_key)
if val:
out[tag_key] = str(val)
return out
def _duration_ok(want_ms: Any, got_ms: Any, tolerance_ms: int = _DURATION_TOLERANCE_MS) -> bool:
"""Wrong-version guard: True when the candidate's length is within tolerance of
ours or when either length is unknown (never reject on missing data)."""
try:
w, g = int(want_ms or 0), int(got_ms or 0)
except (TypeError, ValueError):
return True
if w <= 0 or g <= 0:
return True
return abs(w - g) <= tolerance_ms
def _match_via_track_id(file_ids: Dict[str, str],
source_priority: List[str]) -> Tuple[Optional[Any], Optional[str]]:
"""Most-direct path: enrichment already wrote this track's per-source IDs into
the file. If we have the active source's own track ID, fetch that exact track by
ID no search at all. Returns (track, source) or (None, None)."""
for source in source_priority:
tag_key = _SOURCE_TRACK_ID_TAG.get(source)
track_id = file_ids.get(tag_key) if tag_key else None
if not track_id:
continue
client = get_client_for_source(source)
if not client or not hasattr(client, 'get_track_details'):
continue
try:
track = client.get_track_details(str(track_id))
except Exception:
track = None
if track:
return track, source
return None, None
def _candidate_isrc(cand: Any) -> str:
@ -217,13 +275,16 @@ def _match_via_isrc(isrc: str, source_priority: List[str]) -> Tuple[Optional[Any
# Column order for the _load_tracks SELECT — rows come back as dicts keyed by these.
_TRACK_COLS = (
'id', 'title', 'file_path', 'bitrate', 'artist_name', 'album_title', 'album_id',
'track_number', 'spotify_album_id', 'itunes_album_id', 'deezer_id',
'id', 'title', 'file_path', 'bitrate', 'duration', 'artist_name', 'album_title',
'album_id', 'track_number', 'spotify_album_id', 'itunes_album_id', 'deezer_id',
'musicbrainz_release_id', 'audiodb_id',
)
# Human-readable note per match tier (search uses a confidence % instead).
_MATCH_NOTE = {'isrc': 'exact ISRC match', 'album': 'matched within album'}
_MATCH_NOTE = {
'track_id': 'exact track ID', 'isrc': 'exact ISRC match',
'album': 'matched within album',
}
# Per-source column holding that source's album ID on the albums table.
_SOURCE_ALBUM_ID_COL = {
@ -240,9 +301,11 @@ def _norm_title(value: Any) -> str:
return ''.join(ch for ch in str(value or '').lower() if ch.isalnum())
def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any) -> Optional[Any]:
def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any,
want_duration_ms: Any = None) -> Optional[Any]:
"""Pick the track in an album's tracklist that matches ours — exact normalized
title first (track_number breaks ties), then a high-similarity fuzzy fallback."""
title first (track_number then duration break ties), then a high-similarity
fuzzy fallback that respects the duration guard."""
want = _norm_title(title)
exact = []
best, best_score = None, 0.0
@ -252,6 +315,8 @@ def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any)
exact.append(it)
continue
if engine and it_name:
if not _duration_ok(want_duration_ms, _extract_lookup_value(it, 'duration_ms', 'duration')):
continue
score = engine.similarity_score(
engine.normalize_string(title), engine.normalize_string(it_name))
if score > best_score and score >= 0.85:
@ -261,13 +326,17 @@ def _find_track_in_album(items: Any, title: str, track_number: Any, engine: Any)
for it in exact:
if _extract_lookup_value(it, 'track_number') == track_number:
return it
# Multiple same-title cuts (e.g. album + live): prefer the closest length.
if want_duration_ms and len(exact) > 1:
exact.sort(key=lambda it: abs(int(want_duration_ms) - int(
_extract_lookup_value(it, 'duration_ms', 'duration', default=0) or 0)))
return exact[0]
return best
def _match_via_album(engine: Any, source_priority: List[str], artist: str, album_title: str,
title: str, track_number: Any,
stored_album_ids: Dict[str, str]) -> Tuple[Optional[Any], Optional[str]]:
title: str, track_number: Any, stored_album_ids: Dict[str, str],
want_duration_ms: Any = None) -> Tuple[Optional[Any], Optional[str]]:
"""Structured artist → album → track match. For each source: use the album's
stored source ID if we already have it (enriched album), else find the album
by searching ``artist album``; then pull that album's tracklist and locate our
@ -305,7 +374,7 @@ def _match_via_album(engine: Any, source_priority: List[str], artist: str, album
except Exception:
resp = None
items = resp.get('items') if isinstance(resp, dict) else None
match = _find_track_in_album(items, title, track_number, engine)
match = _find_track_in_album(items, title, track_number, engine, want_duration_ms)
if match is None:
continue
# The album tracklist's tracks usually omit the album object — attach it so
@ -319,7 +388,8 @@ def _match_via_album(engine: Any, source_priority: List[str], artist: str, album
def _find_best_match(engine: Any, source_priority: List[str], title: str, artist: str,
album: str, min_confidence: float) -> Tuple[Optional[Any], float, Optional[str], bool]:
album: str, min_confidence: float,
want_duration_ms: Any = None) -> Tuple[Optional[Any], float, Optional[str], bool]:
"""Search the configured metadata sources for the best replacement match.
Returns (best_track, confidence, source, attempted_any_provider)."""
temp_track = type('TempTrack', (), {'name': title, 'artists': [artist], 'album': album})()
@ -336,6 +406,10 @@ def _find_best_match(engine: Any, source_priority: List[str], title: str, artist
matches = _search_tracks_for_source(source, query, limit=5, client=client)
time.sleep(0.5) # be gentle on metadata APIs
for cand in matches or []:
# Wrong-version guard: a candidate whose length is way off is a
# different cut (live/edit/remix) — reject before it can win.
if not _duration_ok(want_duration_ms, _extract_lookup_value(cand, 'duration_ms', 'duration')):
continue
cand_artists = _track_artist_names(cand)
artist_conf = max(
(engine.similarity_score(engine.normalize_string(artist),
@ -369,12 +443,14 @@ class QualityUpgradeJob(RepairJob):
"track against your Quality Profile using BOTH the file format and its "
'bitrate — so a 128 kbps MP3 is no longer treated the same as a 320 kbps '
'one, and enabling MP3-320/256 in your profile actually counts.\n\n'
'For every track below your preferred quality, it finds a better version and '
'creates a finding. If the track was enriched, it uses the ISRC embedded in '
'the file to resolve the EXACT track (and its album) — no guessing; otherwise '
'it falls back to a name/artist search with a confidence score. Nothing is '
'queued automatically: applying a finding adds that matched track — with its '
'album context — to the wishlist, the same as any other download.\n\n'
'For every track below your preferred quality it resolves the exact better '
'version using the most precise identity available, in order: the source '
"track ID enrichment wrote into the file → the file's ISRC → the album's "
'tracklist (by stored album ID or album search) → a name/artist search. The '
'fuzzy steps also reject candidates whose length is off (wrong live/edit cut). '
'It skips tracks it already proposed, so re-runs are cheap. Nothing is queued '
'automatically: applying a finding adds that matched track — with its album '
'context — to the wishlist, the same as any other download.\n\n'
'Settings:\n'
'- Scope: "watchlist" (watchlisted artists only) or "all" (whole library)\n'
'- Min confidence: minimum match confidence (0-1) to surface a finding\n\n'
@ -404,8 +480,8 @@ class QualityUpgradeJob(RepairJob):
conn = db._get_connection()
try:
base = (
"SELECT t.id, t.title, t.file_path, t.bitrate, a.name AS artist_name, "
"al.title AS album_title, t.album_id, t.track_number, "
"SELECT t.id, t.title, t.file_path, t.bitrate, t.duration, "
"a.name AS artist_name, al.title AS album_title, t.album_id, t.track_number, "
"al.spotify_album_id, al.itunes_album_id, al.deezer_id, "
"al.musicbrainz_release_id, al.audiodb_id "
"FROM tracks t "
@ -428,6 +504,21 @@ class QualityUpgradeJob(RepairJob):
finally:
conn.close()
def _load_existing_finding_ids(self, db: Any) -> set:
"""Track IDs that already have a finding for this job (any status). Lets a
re-run skip tracks we've already proposed/dismissed without re-hitting the
metadata API pending stays deduped, and a dismissed track stays dismissed."""
conn = db._get_connection()
try:
rows = conn.execute(
"SELECT entity_id FROM repair_findings WHERE job_id = ? AND entity_type = 'track'",
(self.job_id,)).fetchall()
return {str(r[0]) for r in rows if r and r[0] is not None}
except Exception:
return set()
finally:
conn.close()
def estimate_scope(self, context: JobContext) -> int:
try:
return len(self._load_tracks(context.db, self._get_settings(context)['scope']))
@ -459,6 +550,10 @@ class QualityUpgradeJob(RepairJob):
if context.report_progress:
context.report_progress(phase=f'Checking quality on {total} tracks...', total=total)
# Tracks we've already proposed/dismissed — skip them so a re-run doesn't
# re-resolve the same tracks against the metadata API.
already_found = self._load_existing_finding_ids(db)
# Metadata source for matching — resolved lazily so we only fail if we
# actually find a low-quality track that needs a match.
engine = None
@ -474,6 +569,7 @@ class QualityUpgradeJob(RepairJob):
title = row['title']
file_path = row['file_path']
bitrate = row['bitrate']
duration_ms = row.get('duration')
artist_name = row['artist_name']
album_title = row['album_title']
album_id = row['album_id']
@ -483,6 +579,10 @@ class QualityUpgradeJob(RepairJob):
}
result.scanned += 1
if str(track_id) in already_found:
result.findings_skipped_dedup += 1
continue
if meets_preferred_quality(file_path, bitrate, quality_profile):
result.skipped += 1
if context.update_progress and (i + 1) % 25 == 0:
@ -510,26 +610,39 @@ class QualityUpgradeJob(RepairJob):
log_line=f'Low quality ({current_label}): {artist_name} - {title}',
log_type='info')
# Read the identifiers enrichment embedded in the file once (ISRC +
# per-source track IDs), used by the two most-exact tiers below.
file_ids = _read_file_ids(file_path)
# Tiered match, best identity first, loosest last:
# 1. ISRC embedded in the file tags (enriched track) → EXACT track.
# 2. Album → track: use the album's stored source ID if we have it
# (enriched album), else find the album by search, then locate our
# track in its tracklist. Pins the right album even when the track
# itself isn't enriched. (artist → album → track)
# 0. The active source's OWN track ID, embedded in the file by
# enrichment → fetch that exact track by ID. No search at all.
# 1. ISRC (also in the tags) → exact track on any source.
# 2. Album → track: stored album source ID if we have it (enriched
# album), else find the album by search, then locate our track in
# its tracklist. Pins the right album even when the track itself
# isn't enriched. (artist → album → track)
# 3. Plain artist+title search with similarity scoring. (artist → track)
# The fuzzy tiers (2-3) also apply a duration guard to reject wrong cuts.
best, source, conf, attempted = None, None, 0.0, False
matched_via = 'isrc'
best, source = _match_via_isrc(_read_track_isrc(file_path), source_priority)
matched_via = 'track_id'
best, source = _match_via_track_id(file_ids, source_priority)
if best:
conf, attempted = 1.0, True
if not best:
matched_via = 'isrc'
best, source = _match_via_isrc(file_ids.get('isrc', ''), source_priority)
if best:
conf, attempted = 1.0, True
if not best:
matched_via = 'album'
try:
best, source = _match_via_album(
engine, source_priority, artist_name or '', album_title or '',
title, track_number, stored_album_ids)
title, track_number, stored_album_ids, duration_ms)
except Exception as e:
logger.debug("[Quality Upgrade] Album match error for %s - %s: %s", artist_name, title, e)
best = None
@ -540,7 +653,8 @@ class QualityUpgradeJob(RepairJob):
matched_via = 'search'
try:
best, conf, source, attempted = _find_best_match(
engine, source_priority, title, artist_name or '', album_title or '', min_conf)
engine, source_priority, title, artist_name or '', album_title or '',
min_conf, duration_ms)
except Exception as e:
logger.debug("[Quality Upgrade] Match error for %s - %s: %s", artist_name, title, e)
result.errors += 1

@ -96,13 +96,20 @@ def meets(path, bitrate, profile):
# --- scan produces a finding (seam) ----------------------------------------
class _FakeConn:
def __init__(self, rows):
def __init__(self, rows, finding_ids=()):
self._rows = rows
self._finding_ids = list(finding_ids)
self._sql = ''
def execute(self, *a, **k):
def execute(self, sql='', *a, **k):
self._sql = sql or ''
return self
def fetchall(self):
# The existing-findings query reads repair_findings; everything else is the
# track load.
if 'repair_findings' in self._sql:
return [(fid,) for fid in self._finding_ids]
return self._rows
def close(self):
@ -110,15 +117,16 @@ class _FakeConn:
class _FakeDB:
def __init__(self, rows, profile):
def __init__(self, rows, profile, finding_ids=()):
self._rows = rows
self._profile = profile
self._finding_ids = finding_ids
def get_quality_profile(self):
return self._profile
def _get_connection(self):
return _FakeConn(self._rows)
return _FakeConn(self._rows, self._finding_ids)
def get_watchlist_artists(self, profile_id=1):
return [types.SimpleNamespace(artist_name='Artist A')]
@ -135,12 +143,13 @@ def _ctx(db, findings):
)
def test_scan_creates_finding_for_low_quality_track(monkeypatch):
# One 128 kbps MP3 (below the balanced floor) for Artist A.
rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)]
db = _FakeDB(rows, BALANCED)
def _row(track_id=1, title='Song One', path='/music/a.mp3', bitrate=128, duration=180000,
artist='Artist A', album='Album X', album_id=10, track_number=6):
"""A track row in _TRACK_COLS order (album source-id columns default to None)."""
return (track_id, title, path, bitrate, duration, artist, album, album_id, track_number)
# Stub the metadata side so the test stays offline.
def _stub_engine(monkeypatch):
monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
monkeypatch.setattr(
@ -151,10 +160,16 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch):
normalize_string=lambda s: s,
),
)
def test_scan_creates_finding_for_low_quality_track(monkeypatch):
db = _FakeDB([_row(bitrate=128)], BALANCED)
_stub_engine(monkeypatch)
fake_match = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'],
'album': {'name': 'Album X', 'images': []}}
# No ISRC / album hit → exercise the search tier.
monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '')
# No track-id / ISRC / album hit → exercise the search tier.
monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {})
monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None))
monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None))
monkeypatch.setattr(qu, '_find_best_match',
lambda *a, **k: (fake_match, 0.95, 'spotify', True))
@ -162,9 +177,7 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch):
monkeypatch.setattr(qu, '_track_name', lambda t: 'Song One')
findings = []
job = qu.QualityUpgradeJob()
# default scope 'watchlist'; config_manager None → defaults used
result = job.scan(_ctx(db, findings))
result = qu.QualityUpgradeJob().scan(_ctx(db, findings))
assert result.findings_created == 1
assert len(findings) == 1
@ -177,6 +190,63 @@ def test_scan_creates_finding_for_low_quality_track(monkeypatch):
assert f['details']['provider'] == 'spotify'
def test_match_via_track_id_fetches_exact_by_id(monkeypatch):
"""Most-direct tier: a per-source track ID in the tags → get_track_details by ID."""
track = {'id': 'sp9', 'name': 'Song One', 'album': {'name': 'Album X'}}
client = types.SimpleNamespace(get_track_details=lambda tid: track if tid == 'sp9' else None)
monkeypatch.setattr(qu, 'get_client_for_source', lambda src: client)
best, source = qu._match_via_track_id({'spotify_track_id': 'sp9'}, ['spotify'])
assert best['id'] == 'sp9'
assert source == 'spotify'
assert qu._match_via_track_id({}, ['spotify']) == (None, None) # no ID → nothing
def test_duration_ok_guard():
assert qu._duration_ok(180000, 181000) is True # within 5s
assert qu._duration_ok(180000, 200000) is False # 20s off — wrong cut
assert qu._duration_ok(None, 200000) is True # unknown → lenient
assert qu._duration_ok(180000, 0) is True # unknown → lenient
def test_scan_prefers_track_id_tier(monkeypatch):
"""The source's own track ID (from file tags) wins over every other tier."""
db = _FakeDB([_row()], BALANCED)
_stub_engine(monkeypatch)
monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {'spotify_track_id': 'sp9', 'isrc': 'X'})
fake = {'id': 'sp9', 'name': 'Song One', 'album': {'name': 'Album X'}}
monkeypatch.setattr(qu, '_match_via_track_id', lambda ids, sp: (fake, 'spotify'))
monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake))
monkeypatch.setattr(qu, '_track_name', lambda t: 'Song One')
def _boom(*a, **k):
raise AssertionError("no lower tier should run when the track-ID tier matches")
monkeypatch.setattr(qu, '_match_via_isrc', _boom)
monkeypatch.setattr(qu, '_match_via_album', _boom)
monkeypatch.setattr(qu, '_find_best_match', _boom)
findings = []
result = qu.QualityUpgradeJob().scan(_ctx(db, findings))
assert result.findings_created == 1
assert findings[0]['details']['matched_via'] == 'track_id'
def test_scan_skips_already_proposed_tracks(monkeypatch):
"""A re-run must not re-resolve a track that already has a finding."""
db = _FakeDB([_row(track_id=1)], BALANCED, finding_ids=['1'])
monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
def _boom(*a, **k):
raise AssertionError("no matching for an already-proposed track")
monkeypatch.setattr(qu, '_match_via_track_id', _boom)
monkeypatch.setattr(qu, '_find_best_match', _boom)
findings = []
result = qu.QualityUpgradeJob().scan(_ctx(db, findings))
assert findings == []
assert result.findings_skipped_dedup == 1
def test_match_via_isrc_accepts_exact_match(monkeypatch):
"""The guard accepts only a candidate whose own ISRC equals ours (dash/case
insensitive), so it survives a source returning unrelated hits first."""
@ -201,14 +271,12 @@ def test_match_via_isrc_rejects_all_mismatches(monkeypatch):
def test_scan_prefers_isrc_exact_match_over_fuzzy(monkeypatch):
"""When the file carries an ISRC and it resolves, use the exact match and do
NOT run the fuzzy search at all."""
rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)]
db = _FakeDB(rows, BALANCED)
monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace())
monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: 'USRC17607839')
"""No track-ID, but the file carries an ISRC that resolves → use the exact match
and do NOT run the album/search tiers."""
db = _FakeDB([_row()], BALANCED)
_stub_engine(monkeypatch)
monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {'isrc': 'USRC17607839'})
monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None))
fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}}
monkeypatch.setattr(qu, '_match_via_isrc', lambda isrc, sp: (fake, 'spotify'))
monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake))
@ -225,15 +293,13 @@ def test_scan_prefers_isrc_exact_match_over_fuzzy(monkeypatch):
assert findings[0]['details']['match_confidence'] == 1.0
def test_scan_falls_back_to_search_without_isrc(monkeypatch):
"""No usable ISRC → fall back to fuzzy search."""
rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)]
db = _FakeDB(rows, BALANCED)
monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace())
monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '') # un-enriched
monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None)) # no album hit
def test_scan_falls_back_to_search_without_ids(monkeypatch):
"""No track-ID / ISRC / album hit → fall back to fuzzy search."""
db = _FakeDB([_row()], BALANCED)
_stub_engine(monkeypatch)
monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {}) # un-enriched
monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None))
monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (None, None))
fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}}
monkeypatch.setattr(qu, '_find_best_match', lambda *a, **k: (fake, 0.88, 'spotify', True))
monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake))
@ -245,15 +311,13 @@ def test_scan_falls_back_to_search_without_isrc(monkeypatch):
assert findings[0]['details']['matched_via'] == 'search'
def test_scan_uses_album_tier_when_no_isrc(monkeypatch):
"""No ISRC, but the album→track lookup resolves it → matched_via 'album',
and the fuzzy search is never reached."""
rows = [(1, 'Song One', '/music/a.mp3', 128, 'Artist A', 'Album X', 10)]
db = _FakeDB(rows, BALANCED)
monkeypatch.setattr(qu, 'get_primary_source', lambda: 'spotify')
monkeypatch.setattr(qu, 'get_source_priority', lambda src: ['spotify'])
monkeypatch.setattr('core.matching_engine.MusicMatchingEngine', lambda: types.SimpleNamespace())
monkeypatch.setattr(qu, '_read_track_isrc', lambda fp: '')
def test_scan_uses_album_tier_when_no_ids(monkeypatch):
"""No track-ID / ISRC, but the album→track lookup resolves it → matched_via
'album', and the fuzzy search is never reached."""
db = _FakeDB([_row()], BALANCED)
_stub_engine(monkeypatch)
monkeypatch.setattr(qu, '_read_file_ids', lambda fp: {})
monkeypatch.setattr(qu, '_match_via_track_id', lambda *a, **k: (None, None))
fake = {'id': 'sp1', 'name': 'Song One', 'artists': ['Artist A'], 'album': {'name': 'Album X'}}
monkeypatch.setattr(qu, '_match_via_album', lambda *a, **k: (fake, 'spotify'))
monkeypatch.setattr(qu, '_normalize_track_match', lambda t, s: dict(fake))
@ -283,8 +347,7 @@ def test_find_track_in_album_exact_title_with_track_number(monkeypatch):
def test_scan_skips_tracks_meeting_quality(monkeypatch):
# A 320 kbps MP3 meets the balanced profile → no finding, no metadata calls.
rows = [(2, 'Good Song', '/music/b.mp3', 320, 'Artist A', 'Album Y', 11)]
db = _FakeDB(rows, BALANCED)
db = _FakeDB([_row(track_id=2, title='Good Song', bitrate=320)], BALANCED)
def _boom(*a, **k): # must never be called for an acceptable track
raise AssertionError("matching should not run for an acceptable track")

Loading…
Cancel
Save