SoulSync/tests/test_acoustid_scanner.py

from types import SimpleNamespace

from core.repair_jobs.acoustid_scanner import AcoustIDScannerJob


class _FakeCursor:
    def __init__(self, rows):
        self._rows = rows
        self.executed = []

    def execute(self, query, params=None):
        self.executed.append((query, params))
        return self

    def fetchall(self):
        return self._rows

    def fetchone(self):
        return None


class _FakeConnection:
    def __init__(self, rows):
        self._cursor = _FakeCursor(rows)

    def cursor(self):
        return self._cursor

    def close(self):
        pass


def _make_context(rows):
    conn = _FakeConnection(rows)
    config_manager = SimpleNamespace(
        get=lambda key, default=None: default,
        set=lambda *args, **kwargs: None,
    )
    db = SimpleNamespace(_get_connection=lambda: conn)
    return SimpleNamespace(
        db=db,
        transfer_folder="/music",
        config_manager=config_manager,
        acoustid_client=object(),
        create_finding=None,
        report_progress=lambda **kwargs: None,
        update_progress=lambda *args, **kwargs: None,
        check_stop=lambda: False,
        wait_if_paused=lambda: False,
        sleep_or_stop=lambda *args, **kwargs: False,
    )


def test_load_db_tracks_skips_null_ids_and_normalizes_track_ids():
    job = AcoustIDScannerJob()
    context = _make_context([
        (None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None),
        (42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb"),
    ])

    tracks = job._load_db_tracks(context)

    assert list(tracks.keys()) == ["42"]
    assert tracks["42"]["title"] == "Good Track"
    assert tracks["42"]["artist"] == "Artist"


def test_scan_handles_mixed_track_id_types(monkeypatch):
    job = AcoustIDScannerJob()
    context = _make_context([
        (None, "Broken Track", "Artist", "/music/broken.flac", 1, "Album", None, None),
        (42, "Good Track", "Artist", "/music/good.flac", 2, "Album", "album-thumb", "artist-thumb"),
    ])

    monkeypatch.setattr(job, "_resolve_path", lambda file_path, _context: file_path)

    scanned_track_ids = []

    def fake_scan_file(fpath, track_id, expected, acoustid_client, context, result,
                       fp_threshold, title_threshold, artist_threshold):
        scanned_track_ids.append(track_id)

    monkeypatch.setattr(job, "_scan_file", fake_scan_file)

    result = job.scan(context)

    assert result.scanned == 1
    assert scanned_track_ids == ["42"]


# ---------------------------------------------------------------------------
# Multi-value artist credit — Foxxify Discord report
# ---------------------------------------------------------------------------
#
# AcoustID returns the FULL artist credit while the library DB
# carries only the primary artist. Pre-fix raw SequenceMatcher
# scored 43% — below the 0.6 threshold — and the scanner created a
# Wrong Song finding even though the audio was correct. Post-fix the
# scanner routes through `artist_names_match` which splits the credit
# and finds the primary artist at 100%, suppressing the false flag.


def _make_finding_capturing_context(track_row, captured):
    """Context that captures any create_finding calls into the
    `captured` list. Tests assert against this list to verify whether
    the scanner created a finding (false positive) or correctly
    skipped (multi-value match resolved)."""
    conn = _FakeConnection([track_row])
    config_manager = SimpleNamespace(
        get=lambda key, default=None: default,
        set=lambda *args, **kwargs: None,
    )
    db = SimpleNamespace(_get_connection=lambda: conn)

    def fake_create_finding(**kwargs):
        captured.append(kwargs)
        return True

    return SimpleNamespace(
        db=db,
        transfer_folder="/music",
        config_manager=config_manager,
        acoustid_client=object(),
        create_finding=fake_create_finding,
        report_progress=lambda **kwargs: None,
        update_progress=lambda *args, **kwargs: None,
        check_stop=lambda: False,
        wait_if_paused=lambda: False,
        sleep_or_stop=lambda *args, **kwargs: False,
    )


def test_scanner_no_finding_when_primary_artist_in_acoustid_credit():
    """Reporter's exact case verbatim:

        Library DB:   title='Tea Parties With Dale Earnhardt' artist='Okayracer'
        AcoustID:     title='Tea Parties With Dale Earnhardt'
                      artist='Okayracer, aldrch & poptropicaslutz!'
        Pre-fix:      artist_sim=43% → Wrong Song finding
        Post-fix:     'Okayracer' found in credit → 100% → no finding
    """
    job = AcoustIDScannerJob()
    captured_findings = []
    context = _make_finding_capturing_context(
        track_row=("69241726", "Tea Parties With Dale Earnhardt", "Okayracer",
                   "/music/track.opus", 1, "Album", None, None),
        captured=captured_findings,
    )

    fake_acoustid = SimpleNamespace(
        fingerprint_and_lookup=lambda fpath: {
            'best_score': 0.99,
            'recordings': [{
                'title': 'Tea Parties With Dale Earnhardt',
                'artist': 'Okayracer, aldrch & poptropicaslutz!',
            }],
        },
    )

    result = JobResultStub()
    job._scan_file(
        '/music/track.opus',
        '69241726',
        {'title': 'Tea Parties With Dale Earnhardt', 'artist': 'Okayracer'},
        fake_acoustid,
        context,
        result,
        fp_threshold=0.85,
        title_threshold=0.85,
        artist_threshold=0.6,
    )

    assert captured_findings == [], (
        f"Expected no finding (primary artist in credit); got {captured_findings}"
    )


def test_scanner_still_flags_genuine_artist_mismatch():
    """Sanity: multi-value path doesn't suppress legitimate
    mismatches. If expected artist is NOT in the credit at all,
    finding still fires."""
    job = AcoustIDScannerJob()
    captured_findings = []
    context = _make_finding_capturing_context(
        track_row=("99", "Some Track", "Foreigner",
                   "/music/track.flac", 1, "Album", None, None),
        captured=captured_findings,
    )

    fake_acoustid = SimpleNamespace(
        fingerprint_and_lookup=lambda fpath: {
            'best_score': 0.99,
            'recordings': [{
                'title': 'Some Track',
                'artist': 'Different Band, Other Person & Random Featuring',
            }],
        },
    )

    result = JobResultStub()
    job._scan_file(
        '/music/track.flac',
        '99',
        {'title': 'Some Track', 'artist': 'Foreigner'},
        fake_acoustid,
        context,
        result,
        fp_threshold=0.85,
        title_threshold=0.85,
        artist_threshold=0.6,
    )

    assert len(captured_findings) == 1, (
        f"Expected a finding for genuine mismatch; got {len(captured_findings)}"
    )
    assert captured_findings[0]['finding_type'] == 'acoustid_mismatch'


class JobResultStub:
    """Minimal JobResult-like stub for the scanner integration tests
    above. The real JobResult tracks scanned/skipped/findings_created
    counters via attribute assignment — same shape works here."""
    findings_created = 0
    findings_skipped_dedup = 0
    errors = 0
    scanned = 0
    skipped = 0