You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/tests/test_acoustid_scanner.py

228 lines
7.2 KiB

from types import SimpleNamespace
from core.repair_jobs.acoustid_scanner import AcoustIDScannerJob
class _FakeCursor:
def __init__(self, rows):
self._rows = rows
self.executed = []
def execute(self, query, params=None):
self.executed.append((query, params))
return self
def fetchall(self):
return self._rows
def fetchone(self):
return None
class _FakeConnection:
def __init__(self, rows):
self._cursor = _FakeCursor(rows)
def cursor(self):
return self._cursor
def close(self):
pass
def _make_context(rows):
conn = _FakeConnection(rows)
config_manager = SimpleNamespace(
get=lambda key, default=None: default,
set=lambda *args, **kwargs: None,
)
db = SimpleNamespace(_get_connection=lambda: conn)
return SimpleNamespace(
db=db,
transfer_folder="/music",
config_manager=config_manager,
acoustid_client=object(),
create_finding=None,
report_progress=lambda **kwargs: None,
update_progress=lambda *args, **kwargs: None,
check_stop=lambda: False,
wait_if_paused=lambda: False,
sleep_or_stop=lambda *args, **kwargs: False,
)
def test_load_db_tracks_skips_null_ids_and_normalizes_track_ids():
job = AcoustIDScannerJob()
context = _make_context([
(None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None),
(42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb"),
])
tracks = job._load_db_tracks(context)
assert list(tracks.keys()) == ["42"]
assert tracks["42"]["title"] == "Good Track"
assert tracks["42"]["artist"] == "Artist"
def test_scan_handles_mixed_track_id_types(monkeypatch):
job = AcoustIDScannerJob()
context = _make_context([
(None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None),
(42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb"),
])
monkeypatch.setattr(job, "_resolve_path", lambda file_path, _context: file_path)
scanned_track_ids = []
def fake_scan_file(fpath, track_id, expected, acoustid_client, context, result,
fp_threshold, title_threshold, artist_threshold):
scanned_track_ids.append(track_id)
monkeypatch.setattr(job, "_scan_file", fake_scan_file)
result = job.scan(context)
assert result.scanned == 1
assert scanned_track_ids == ["42"]
# ---------------------------------------------------------------------------
# Multi-value artist credit — Foxxify Discord report
# ---------------------------------------------------------------------------
#
# AcoustID returns the FULL artist credit while the library DB
# carries only the primary artist. Pre-fix raw SequenceMatcher
# scored 43% — below the 0.6 threshold — and the scanner created a
# Wrong Song finding even though the audio was correct. Post-fix the
# scanner routes through `artist_names_match` which splits the credit
# and finds the primary artist at 100%, suppressing the false flag.
def _make_finding_capturing_context(track_row, captured):
"""Context that captures any create_finding calls into the
`captured` list. Tests assert against this list to verify whether
the scanner created a finding (false positive) or correctly
skipped (multi-value match resolved)."""
conn = _FakeConnection([track_row])
config_manager = SimpleNamespace(
get=lambda key, default=None: default,
set=lambda *args, **kwargs: None,
)
db = SimpleNamespace(_get_connection=lambda: conn)
def fake_create_finding(**kwargs):
captured.append(kwargs)
return True
return SimpleNamespace(
db=db,
transfer_folder="/music",
config_manager=config_manager,
acoustid_client=object(),
create_finding=fake_create_finding,
report_progress=lambda **kwargs: None,
update_progress=lambda *args, **kwargs: None,
check_stop=lambda: False,
wait_if_paused=lambda: False,
sleep_or_stop=lambda *args, **kwargs: False,
)
def test_scanner_no_finding_when_primary_artist_in_acoustid_credit():
"""Reporter's exact case verbatim:
Library DB: title='Tea Parties With Dale Earnhardt' artist='Okayracer'
AcoustID: title='Tea Parties With Dale Earnhardt'
artist='Okayracer, aldrch & poptropicaslutz!'
Pre-fix: artist_sim=43% → Wrong Song finding
Post-fix: 'Okayracer' found in credit → 100% → no finding
"""
job = AcoustIDScannerJob()
captured_findings = []
context = _make_finding_capturing_context(
track_row=("69241726", "Tea Parties With Dale Earnhardt", "Okayracer",
"/music/track.opus", 1, "Album", None, None),
captured=captured_findings,
)
fake_acoustid = SimpleNamespace(
fingerprint_and_lookup=lambda fpath: {
'best_score': 0.99,
'recordings': [{
'title': 'Tea Parties With Dale Earnhardt',
'artist': 'Okayracer, aldrch & poptropicaslutz!',
}],
},
)
result = JobResultStub()
job._scan_file(
'/music/track.opus',
'69241726',
{'title': 'Tea Parties With Dale Earnhardt', 'artist': 'Okayracer'},
fake_acoustid,
context,
result,
fp_threshold=0.85,
title_threshold=0.85,
artist_threshold=0.6,
)
assert captured_findings == [], (
f"Expected no finding (primary artist in credit); got {captured_findings}"
)
def test_scanner_still_flags_genuine_artist_mismatch():
"""Sanity: multi-value path doesn't suppress legitimate
mismatches. If expected artist is NOT in the credit at all,
finding still fires."""
job = AcoustIDScannerJob()
captured_findings = []
context = _make_finding_capturing_context(
track_row=("99", "Some Track", "Foreigner",
"/music/track.flac", 1, "Album", None, None),
captured=captured_findings,
)
fake_acoustid = SimpleNamespace(
fingerprint_and_lookup=lambda fpath: {
'best_score': 0.99,
'recordings': [{
'title': 'Some Track',
'artist': 'Different Band, Other Person & Random Featuring',
}],
},
)
result = JobResultStub()
job._scan_file(
'/music/track.flac',
'99',
{'title': 'Some Track', 'artist': 'Foreigner'},
fake_acoustid,
context,
result,
fp_threshold=0.85,
title_threshold=0.85,
artist_threshold=0.6,
)
assert len(captured_findings) == 1, (
f"Expected a finding for genuine mismatch; got {len(captured_findings)}"
)
assert captured_findings[0]['finding_type'] == 'acoustid_mismatch'
class JobResultStub:
"""Minimal JobResult-like stub for the scanner integration tests
above. The real JobResult tracks scanned/skipped/findings_created
counters via attribute assignment — same shape works here."""
findings_created = 0
findings_skipped_dedup = 0
errors = 0
scanned = 0
skipped = 0