mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
782 lines
27 KiB
782 lines
27 KiB
from types import SimpleNamespace
|
|
|
|
from core.repair_jobs.acoustid_scanner import AcoustIDScannerJob
|
|
|
|
|
|
class _FakeCursor:
|
|
def __init__(self, rows):
|
|
self._rows = rows
|
|
self.executed = []
|
|
|
|
def execute(self, query, params=None):
|
|
self.executed.append((query, params))
|
|
return self
|
|
|
|
def fetchall(self):
|
|
return self._rows
|
|
|
|
def fetchone(self):
|
|
return None
|
|
|
|
|
|
class _FakeConnection:
|
|
def __init__(self, rows):
|
|
self._cursor = _FakeCursor(rows)
|
|
|
|
def cursor(self):
|
|
return self._cursor
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
|
|
def _make_context(rows):
|
|
conn = _FakeConnection(rows)
|
|
config_manager = SimpleNamespace(
|
|
get=lambda key, default=None: default,
|
|
set=lambda *args, **kwargs: None,
|
|
)
|
|
db = SimpleNamespace(_get_connection=lambda: conn)
|
|
return SimpleNamespace(
|
|
db=db,
|
|
transfer_folder="/music",
|
|
config_manager=config_manager,
|
|
acoustid_client=object(),
|
|
create_finding=None,
|
|
report_progress=lambda **kwargs: None,
|
|
update_progress=lambda *args, **kwargs: None,
|
|
check_stop=lambda: False,
|
|
wait_if_paused=lambda: False,
|
|
sleep_or_stop=lambda *args, **kwargs: False,
|
|
)
|
|
|
|
|
|
def test_load_db_tracks_skips_null_ids_and_normalizes_track_ids():
|
|
job = AcoustIDScannerJob()
|
|
context = _make_context([
|
|
# 11 columns: id, title, artist (COALESCE'd), file_path, track_number,
|
|
# album_title, album_thumb, artist_thumb, track_artist (raw, may be ''),
|
|
# album_artist, duration_ms (issue #587 — duration guard).
|
|
(None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None, "", "Artist", 180000),
|
|
(42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb", "", "Artist", 240000),
|
|
])
|
|
|
|
tracks = job._load_db_tracks(context)
|
|
|
|
assert list(tracks.keys()) == ["42"]
|
|
assert tracks["42"]["title"] == "Good Track"
|
|
assert tracks["42"]["artist"] == "Artist"
|
|
assert tracks["42"]["duration_ms"] == 240000
|
|
|
|
|
|
def test_scan_handles_mixed_track_id_types(monkeypatch):
|
|
job = AcoustIDScannerJob()
|
|
context = _make_context([
|
|
# 11 columns: id, title, artist (COALESCE'd), file_path, track_number,
|
|
# album_title, album_thumb, artist_thumb, track_artist (raw, may be ''),
|
|
# album_artist, duration_ms.
|
|
(None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None, "", "Artist", 180000),
|
|
(42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb", "", "Artist", 240000),
|
|
])
|
|
|
|
monkeypatch.setattr(job, "_resolve_path", lambda file_path, _context: file_path)
|
|
|
|
scanned_track_ids = []
|
|
|
|
def fake_scan_file(fpath, track_id, expected, acoustid_client, context, result,
|
|
fp_threshold, title_threshold, artist_threshold):
|
|
scanned_track_ids.append(track_id)
|
|
|
|
monkeypatch.setattr(job, "_scan_file", fake_scan_file)
|
|
|
|
result = job.scan(context)
|
|
|
|
assert result.scanned == 1
|
|
assert scanned_track_ids == ["42"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Multi-value artist credit — Foxxify Discord report
|
|
# ---------------------------------------------------------------------------
|
|
#
|
|
# AcoustID returns the FULL artist credit while the library DB
|
|
# carries only the primary artist. Pre-fix raw SequenceMatcher
|
|
# scored 43% — below the 0.6 threshold — and the scanner created a
|
|
# Wrong Song finding even though the audio was correct. Post-fix the
|
|
# scanner routes through `artist_names_match` which splits the credit
|
|
# and finds the primary artist at 100%, suppressing the false flag.
|
|
|
|
|
|
def _make_finding_capturing_context(track_row, captured):
|
|
"""Context that captures any create_finding calls into the
|
|
`captured` list. Tests assert against this list to verify whether
|
|
the scanner created a finding (false positive) or correctly
|
|
skipped (multi-value match resolved)."""
|
|
conn = _FakeConnection([track_row])
|
|
config_manager = SimpleNamespace(
|
|
get=lambda key, default=None: default,
|
|
set=lambda *args, **kwargs: None,
|
|
)
|
|
db = SimpleNamespace(_get_connection=lambda: conn)
|
|
|
|
def fake_create_finding(**kwargs):
|
|
captured.append(kwargs)
|
|
return True
|
|
|
|
return SimpleNamespace(
|
|
db=db,
|
|
transfer_folder="/music",
|
|
config_manager=config_manager,
|
|
acoustid_client=object(),
|
|
create_finding=fake_create_finding,
|
|
report_progress=lambda **kwargs: None,
|
|
update_progress=lambda *args, **kwargs: None,
|
|
check_stop=lambda: False,
|
|
wait_if_paused=lambda: False,
|
|
sleep_or_stop=lambda *args, **kwargs: False,
|
|
)
|
|
|
|
|
|
def test_scanner_no_finding_when_primary_artist_in_acoustid_credit():
|
|
"""Reporter's exact case verbatim:
|
|
|
|
Library DB: title='Tea Parties With Dale Earnhardt' artist='Okayracer'
|
|
AcoustID: title='Tea Parties With Dale Earnhardt'
|
|
artist='Okayracer, aldrch & poptropicaslutz!'
|
|
Pre-fix: artist_sim=43% → Wrong Song finding
|
|
Post-fix: 'Okayracer' found in credit → 100% → no finding
|
|
"""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("69241726", "Tea Parties With Dale Earnhardt", "Okayracer",
|
|
"/music/track.opus", 1, "Album", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [{
|
|
'title': 'Tea Parties With Dale Earnhardt',
|
|
'artist': 'Okayracer, aldrch & poptropicaslutz!',
|
|
}],
|
|
},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/track.opus',
|
|
'69241726',
|
|
{'title': 'Tea Parties With Dale Earnhardt', 'artist': 'Okayracer'},
|
|
fake_acoustid,
|
|
context,
|
|
result,
|
|
fp_threshold=0.85,
|
|
title_threshold=0.85,
|
|
artist_threshold=0.6,
|
|
)
|
|
|
|
assert captured_findings == [], (
|
|
f"Expected no finding (primary artist in credit); got {captured_findings}"
|
|
)
|
|
|
|
|
|
def test_scanner_still_flags_genuine_artist_mismatch():
|
|
"""Sanity: multi-value path doesn't suppress legitimate
|
|
mismatches. If expected artist is NOT in the credit at all,
|
|
finding still fires."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("99", "Some Track", "Foreigner",
|
|
"/music/track.flac", 1, "Album", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [{
|
|
'title': 'Some Track',
|
|
'artist': 'Different Band, Other Person & Random Featuring',
|
|
}],
|
|
},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/track.flac',
|
|
'99',
|
|
{'title': 'Some Track', 'artist': 'Foreigner'},
|
|
fake_acoustid,
|
|
context,
|
|
result,
|
|
fp_threshold=0.85,
|
|
title_threshold=0.85,
|
|
artist_threshold=0.6,
|
|
)
|
|
|
|
assert len(captured_findings) == 1, (
|
|
f"Expected a finding for genuine mismatch; got {len(captured_findings)}"
|
|
)
|
|
assert captured_findings[0]['finding_type'] == 'acoustid_mismatch'
|
|
|
|
|
|
class JobResultStub:
|
|
"""Minimal JobResult-like stub for the scanner integration tests
|
|
above. The real JobResult tracks scanned/skipped/findings_created
|
|
counters via attribute assignment — same shape works here."""
|
|
findings_created = 0
|
|
findings_skipped_dedup = 0
|
|
errors = 0
|
|
scanned = 0
|
|
skipped = 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Compilation albums — Skowl Discord report
|
|
# ---------------------------------------------------------------------------
|
|
#
|
|
# Compilation albums (e.g. "High Tea Music: Vol 1") have different
|
|
# artists per track but `tracks.artist_id` points at the ALBUM artist
|
|
# (curator / label name applied to every row). The scanner used to
|
|
# compare AcoustID's per-track artist against the album artist →
|
|
# 12% sim → Wrong Song flag on every track. The `tracks.track_artist`
|
|
# column already holds the correct per-track artist for these cases
|
|
# (populated by every server-scan + auto-import path) — scanner just
|
|
# wasn't reading it. Post-fix `_load_db_tracks` prefers track_artist
|
|
# via `COALESCE(NULLIF(t.track_artist, ''), ar.name)`.
|
|
|
|
|
|
def _make_real_db_context(tmp_path):
|
|
"""Build a context with a REAL SQLite DB so the scanner's
|
|
multi-table JOIN runs against actual schema. SimpleNamespace
|
|
fakes can't simulate the JOIN."""
|
|
import sqlite3
|
|
db_path = tmp_path / "test.db"
|
|
conn = sqlite3.connect(str(db_path))
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
cursor.executescript("""
|
|
CREATE TABLE artists (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
thumb_url TEXT
|
|
);
|
|
CREATE TABLE albums (
|
|
id TEXT PRIMARY KEY,
|
|
title TEXT NOT NULL,
|
|
thumb_url TEXT
|
|
);
|
|
CREATE TABLE tracks (
|
|
id TEXT PRIMARY KEY,
|
|
title TEXT,
|
|
artist_id TEXT,
|
|
album_id TEXT,
|
|
file_path TEXT,
|
|
track_number INTEGER,
|
|
track_artist TEXT,
|
|
duration INTEGER
|
|
);
|
|
""")
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
class _RealDB:
|
|
def _get_connection(self):
|
|
c = sqlite3.connect(str(db_path))
|
|
c.row_factory = sqlite3.Row
|
|
return c
|
|
|
|
return _RealDB()
|
|
|
|
|
|
def test_load_db_tracks_prefers_track_artist_for_compilation():
|
|
"""Reporter's exact case (Skowl) — compilation album where
|
|
every track has a different artist credited via track_artist
|
|
column, while artist_id points at the album-level curator."""
|
|
import tempfile, pathlib
|
|
tmp = pathlib.Path(tempfile.mkdtemp())
|
|
db = _make_real_db_context(tmp)
|
|
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("INSERT INTO artists (id, name) VALUES ('andro', 'Andromedik')")
|
|
cursor.execute(
|
|
"INSERT INTO albums (id, title) VALUES ('hightea', 'High Tea Music: Vol 1')"
|
|
)
|
|
cursor.execute(
|
|
"INSERT INTO tracks (id, title, artist_id, album_id, file_path, track_artist) "
|
|
"VALUES (?, ?, ?, ?, ?, ?)",
|
|
('city-lights', 'City Lights', 'andro', 'hightea',
|
|
'/music/citylights.mp3', 'Eclypse'),
|
|
)
|
|
cursor.execute(
|
|
"INSERT INTO tracks (id, title, artist_id, album_id, file_path, track_artist) "
|
|
"VALUES (?, ?, ?, ?, ?, ?)",
|
|
('invasion', 'Invasion', 'andro', 'hightea',
|
|
'/music/invasion.mp3', None), # NULL track_artist falls back
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
job = AcoustIDScannerJob()
|
|
context = SimpleNamespace(
|
|
db=db,
|
|
config_manager=SimpleNamespace(get=lambda *a, **k: None),
|
|
)
|
|
tracks = job._load_db_tracks(context)
|
|
|
|
# Track with track_artist populated → Eclypse (per-track), NOT
|
|
# Andromedik (album-artist via artist_id).
|
|
assert tracks['city-lights']['artist'] == 'Eclypse', (
|
|
f"Compilation track must use track_artist; got {tracks['city-lights']['artist']!r}"
|
|
)
|
|
# Track with NULL track_artist → falls back to album artist
|
|
# via COALESCE. Backward compat for legacy rows + single-artist
|
|
# albums where track_artist isn't populated.
|
|
assert tracks['invasion']['artist'] == 'Andromedik'
|
|
|
|
|
|
def test_load_db_tracks_falls_back_when_track_artist_empty_string():
|
|
"""Defensive: NULLIF treats empty string as NULL too. Some
|
|
legacy rows might have stored '' instead of NULL."""
|
|
import tempfile, pathlib
|
|
tmp = pathlib.Path(tempfile.mkdtemp())
|
|
db = _make_real_db_context(tmp)
|
|
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("INSERT INTO artists (id, name) VALUES ('a', 'Album Artist')")
|
|
cursor.execute("INSERT INTO albums (id, title) VALUES ('alb', 'Album')")
|
|
cursor.execute(
|
|
"INSERT INTO tracks (id, title, artist_id, album_id, file_path, track_artist) "
|
|
"VALUES (?, ?, ?, ?, ?, ?)",
|
|
('t1', 'T1', 'a', 'alb', '/music/t1.mp3', ''), # empty string
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
job = AcoustIDScannerJob()
|
|
context = SimpleNamespace(
|
|
db=db,
|
|
config_manager=SimpleNamespace(get=lambda *a, **k: None),
|
|
)
|
|
tracks = job._load_db_tracks(context)
|
|
|
|
# Empty string in track_artist → NULLIF returns NULL → COALESCE
|
|
# falls back to album artist
|
|
assert tracks['t1']['artist'] == 'Album Artist'
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# File-tag fallback for legacy compilation tracks — Skowl Discord follow-up
|
|
# ---------------------------------------------------------------------------
|
|
#
|
|
# Skowl reported that the AcoustID Scanner was STILL flagging his
|
|
# compilation tracks even after the COALESCE(track_artist, album_artist)
|
|
# fix shipped. Cause: his tracks were downloaded BEFORE the
|
|
# `tracks.track_artist` column existed, so for those rows
|
|
# `track_artist IS NULL` and COALESCE falls back to the ALBUM artist
|
|
# (the curator) — same wrong-comparison the prior fix was supposed to
|
|
# eliminate.
|
|
#
|
|
# The audio file's ARTIST tag is ground truth for what's on disk:
|
|
# Tidal/Spotify/Deezer all write the per-track artist into the file's
|
|
# tag at download time, regardless of the SoulSync DB schema. Reading
|
|
# it during the scan closes the gap without requiring a DB backfill
|
|
# of the legacy rows. These tests pin:
|
|
# - File ARTIST tag trumps DB-resolved expected artist when present
|
|
# (Skowl's exact case: file says 'Eclypse', DB says 'Andromedik',
|
|
# AcoustID returns 'Eclypse' → no finding)
|
|
# - Missing file tag falls through to DB value (preserves
|
|
# pre-fix behavior for tracks without proper file tags)
|
|
# - mutagen failure is swallowed → falls through to DB
|
|
# - File tag matches DB → no behavioral change
|
|
|
|
|
|
def test_scanner_uses_file_tag_artist_over_db_for_legacy_compilation(monkeypatch):
|
|
"""Skowl's exact case verbatim:
|
|
|
|
DB row: artist_id → 'Andromedik' (album artist), track_artist=NULL
|
|
File tag: ARTIST='Eclypse' (Tidal-tagged correctly)
|
|
AcoustID: artist='Eclypse'
|
|
Pre-fix: expected='Andromedik' vs actual='Eclypse' → flag
|
|
Post-fix: file tag trumps DB → expected='Eclypse' → no flag
|
|
"""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("city-lights", "City Lights", "Andromedik",
|
|
"/music/eclypse-city-lights.opus", 1,
|
|
"High Tea Music: Vol 1", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [{
|
|
'title': 'City Lights',
|
|
'artist': 'Eclypse',
|
|
}],
|
|
},
|
|
)
|
|
|
|
# Patch read_file_tags to return Tidal's correct per-track artist.
|
|
# The scanner imports lazily inside _scan_file so we patch the
|
|
# source module's symbol.
|
|
monkeypatch.setattr(
|
|
'core.tag_writer.read_file_tags',
|
|
lambda fpath: {'artist': 'Eclypse', 'title': 'City Lights'},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/eclypse-city-lights.opus',
|
|
'city-lights',
|
|
{'title': 'City Lights', 'artist': 'Andromedik'}, # DB-resolved expected
|
|
fake_acoustid,
|
|
context,
|
|
result,
|
|
fp_threshold=0.85,
|
|
title_threshold=0.85,
|
|
artist_threshold=0.6,
|
|
)
|
|
|
|
assert captured_findings == [], (
|
|
f"Expected no finding (file tag matches AcoustID); got {captured_findings}"
|
|
)
|
|
|
|
|
|
def test_scanner_falls_back_to_db_when_file_tag_missing(monkeypatch):
|
|
"""Defensive: file has no ARTIST tag (rare but possible for
|
|
non-standard formats / damaged files). MUST fall back to DB
|
|
expected value. Otherwise the fix would BREAK the existing
|
|
'flag genuine mismatches' contract for files without tags."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("99", "Some Track", "Foreigner",
|
|
"/music/track.flac", 1, "Album", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [{
|
|
'title': 'Some Track',
|
|
'artist': 'Different Band',
|
|
}],
|
|
},
|
|
)
|
|
|
|
# File has no ARTIST tag (read_file_tags returns None for the field)
|
|
monkeypatch.setattr(
|
|
'core.tag_writer.read_file_tags',
|
|
lambda fpath: {'artist': None},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/track.flac',
|
|
'99',
|
|
{'title': 'Some Track', 'artist': 'Foreigner'},
|
|
fake_acoustid,
|
|
context,
|
|
result,
|
|
fp_threshold=0.85,
|
|
title_threshold=0.85,
|
|
artist_threshold=0.6,
|
|
)
|
|
|
|
# Should still flag — file tag was missing, fell back to DB
|
|
# ('Foreigner') vs AcoustID ('Different Band') mismatch
|
|
assert len(captured_findings) == 1, (
|
|
f"Expected finding (file tag missing → DB fallback → genuine mismatch); got {captured_findings}"
|
|
)
|
|
|
|
|
|
def test_scanner_swallows_file_tag_read_exception(monkeypatch):
|
|
"""Defensive: mutagen errors mid-read shouldn't crash the scan
|
|
— must log + fall back to DB value gracefully."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("99", "Track", "RealArtist",
|
|
"/music/corrupted.mp3", 1, "Album", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [{'title': 'Track', 'artist': 'RealArtist'}],
|
|
},
|
|
)
|
|
|
|
def boom(fpath):
|
|
raise RuntimeError("mutagen exploded on corrupted file")
|
|
|
|
monkeypatch.setattr('core.tag_writer.read_file_tags', boom)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/corrupted.mp3',
|
|
'99',
|
|
{'title': 'Track', 'artist': 'RealArtist'},
|
|
fake_acoustid,
|
|
context,
|
|
result,
|
|
fp_threshold=0.85,
|
|
title_threshold=0.85,
|
|
artist_threshold=0.6,
|
|
)
|
|
|
|
# No finding — DB matches AcoustID after the fallback
|
|
assert captured_findings == []
|
|
|
|
|
|
def test_scanner_trusts_curated_db_track_artist_over_stale_file_tag(monkeypatch):
|
|
"""The flip side of Skowl's case — user manually corrected
|
|
`track_artist` in the DB via the enhanced library view but
|
|
didn't re-tag the file. Pre-refactor 'file tag always wins'
|
|
would flag this as a false positive (file says wrong, DB says
|
|
right, AcoustID matches DB). Post-refactor: DB track_artist
|
|
is the curated source of truth when populated → file tag is
|
|
only consulted when DB is empty. No spurious flag.
|
|
|
|
This is why `_load_db_tracks` surfaces `track_artist` as a
|
|
separate field instead of just the COALESCE'd `artist`:
|
|
`_scan_file` needs to distinguish 'DB has a curated value'
|
|
from 'DB fell back to album artist'."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("99", "Track", "AlbumArtist",
|
|
"/music/track.flac", 1, "Album", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [{'title': 'Track', 'artist': 'Eclypse'}],
|
|
},
|
|
)
|
|
|
|
# File has wrong tag (stale — user edited DB but didn't re-tag),
|
|
# DB has correct value, AcoustID matches DB.
|
|
monkeypatch.setattr(
|
|
'core.tag_writer.read_file_tags',
|
|
lambda fpath: {'artist': 'WrongStaleTag'},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/track.flac', '99',
|
|
# Simulates the post-refactor _load_db_tracks output:
|
|
# track_artist populated (curated) takes priority over file tag.
|
|
{'title': 'Track', 'artist': 'Eclypse',
|
|
'track_artist': 'Eclypse', 'album_artist': 'AlbumArtist'},
|
|
fake_acoustid, context, result,
|
|
fp_threshold=0.85, title_threshold=0.85, artist_threshold=0.6,
|
|
)
|
|
|
|
assert captured_findings == [], (
|
|
f"DB curated value must trump stale file tag; got {captured_findings}"
|
|
)
|
|
|
|
|
|
def test_scanner_file_tag_matches_db_no_behavioral_change(monkeypatch):
|
|
"""Sanity: when file tag and DB agree, behavior is identical to
|
|
the pre-fix path. No double-counting, no spurious findings."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("99", "Track", "RealArtist",
|
|
"/music/track.flac", 1, "Album", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [{'title': 'Track', 'artist': 'RealArtist'}],
|
|
},
|
|
)
|
|
|
|
monkeypatch.setattr(
|
|
'core.tag_writer.read_file_tags',
|
|
lambda fpath: {'artist': 'RealArtist'},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/track.flac', '99',
|
|
{'title': 'Track', 'artist': 'RealArtist'},
|
|
fake_acoustid, context, result,
|
|
fp_threshold=0.85, title_threshold=0.85, artist_threshold=0.6,
|
|
)
|
|
|
|
assert captured_findings == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Issue #587 — multi-candidate scan + duration guard (Foxxify report)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_scanner_no_finding_when_lower_ranked_candidate_matches():
|
|
"""Foxxify case 2 — AcoustID returns multiple recordings per
|
|
fingerprint; the top match is the wrong-credited recording but a
|
|
lower-ranked candidate matches expected metadata exactly. Scanner
|
|
should iterate ALL candidates and suppress the finding.
|
|
|
|
Repro: file is "Nana" by Geoxor, AcoustID top match is "Nana" by
|
|
Edward Vesala Trio (different recording sharing similar
|
|
fingerprint), AcoustID's second candidate is the actual Geoxor
|
|
track. Pre-fix scanner only saw [0] → flagged. Post-fix sees [1]
|
|
→ no flag."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("nana", "Nana", "Geoxor",
|
|
"/music/nana.opus", 6, "Stardust", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.97,
|
|
'recordings': [
|
|
# AcoustID's top match — wrong artist for our file
|
|
{'title': 'Nana', 'artist': 'Edward Vesala Trio'},
|
|
# Lower-ranked candidate — actually matches our expected
|
|
{'title': 'Nana', 'artist': 'Geoxor'},
|
|
],
|
|
},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/nana.opus', 'nana',
|
|
{'title': 'Nana', 'artist': 'Geoxor'},
|
|
fake_acoustid, context, result,
|
|
fp_threshold=0.85, title_threshold=0.85, artist_threshold=0.6,
|
|
)
|
|
|
|
assert captured_findings == [], (
|
|
f"Expected no finding (lower-ranked candidate matches); got {captured_findings}"
|
|
)
|
|
|
|
|
|
def test_scanner_still_flags_when_no_candidate_matches():
|
|
"""Confirm the multi-candidate check doesn't accidentally suppress
|
|
legitimate mismatches — if NO candidate matches expected metadata,
|
|
the finding still fires."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("99", "Expected Title", "Expected Artist",
|
|
"/music/track.flac", 1, "Album", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [
|
|
{'title': 'Wrong Track', 'artist': 'Wrong Artist A'},
|
|
{'title': 'Different Wrong', 'artist': 'Wrong Artist B'},
|
|
],
|
|
},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
job._scan_file(
|
|
'/music/track.flac', '99',
|
|
{'title': 'Expected Title', 'artist': 'Expected Artist'},
|
|
fake_acoustid, context, result,
|
|
fp_threshold=0.85, title_threshold=0.85, artist_threshold=0.6,
|
|
)
|
|
|
|
assert len(captured_findings) == 1
|
|
|
|
|
|
def test_scanner_skips_finding_on_strong_duration_mismatch():
|
|
"""Foxxify case 3 — 17-minute mashup edit fingerprints to a 5-minute
|
|
late-70s Japanese hiphop track. Fingerprint matched a sample/intro
|
|
section but the recordings are clearly different (drastic length
|
|
difference). Scanner should skip the finding rather than recommend
|
|
retag of a totally different track length."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("mashup", "Some Mashup Edit", "Mashup Artist",
|
|
"/music/mashup.opus", 1, "Mashups", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
# AcoustID matched a 5-minute Japanese hiphop track via fingerprint
|
|
# hash collision. Expected file is 17 minutes — duration guard
|
|
# should kick in.
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.98,
|
|
'recordings': [
|
|
{'title': 'Different Song', 'artist': 'Different Artist',
|
|
'duration': 300}, # 5 min — way off from our 17 min file
|
|
],
|
|
},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
# 17 minutes = 1020 sec = 1020000 ms
|
|
job._scan_file(
|
|
'/music/mashup.opus', 'mashup',
|
|
{'title': 'Some Mashup Edit', 'artist': 'Mashup Artist', 'duration_ms': 1020000},
|
|
fake_acoustid, context, result,
|
|
fp_threshold=0.85, title_threshold=0.85, artist_threshold=0.6,
|
|
)
|
|
|
|
assert captured_findings == [], (
|
|
f"Expected no finding (duration mismatch suggests collision); got {captured_findings}"
|
|
)
|
|
|
|
|
|
def test_scanner_still_flags_when_duration_matches():
|
|
"""Confirm the duration guard only kicks in for STRONG mismatches —
|
|
similar-length wrong song still gets flagged."""
|
|
job = AcoustIDScannerJob()
|
|
captured_findings = []
|
|
context = _make_finding_capturing_context(
|
|
track_row=("99", "Expected", "Artist",
|
|
"/music/track.flac", 1, "Album", None, None),
|
|
captured=captured_findings,
|
|
)
|
|
|
|
fake_acoustid = SimpleNamespace(
|
|
fingerprint_and_lookup=lambda fpath: {
|
|
'best_score': 0.99,
|
|
'recordings': [
|
|
{'title': 'Wrong Song', 'artist': 'Wrong Artist',
|
|
'duration': 180}, # 3 min, matches expected
|
|
],
|
|
},
|
|
)
|
|
|
|
result = JobResultStub()
|
|
# 3-minute file with 3-minute candidate — same length, but title +
|
|
# artist clearly mismatch → finding should still fire
|
|
job._scan_file(
|
|
'/music/track.flac', '99',
|
|
{'title': 'Expected', 'artist': 'Artist', 'duration_ms': 180000},
|
|
fake_acoustid, context, result,
|
|
fp_threshold=0.85, title_threshold=0.85, artist_threshold=0.6,
|
|
)
|
|
|
|
assert len(captured_findings) == 1
|