You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/tests/test_library_track_identity.py

413 lines
16 KiB

"""Tests for the provider-neutral external-ID match helper.
Discord-reported (CAL): the watchlist scanner re-downloaded a track
already on disk because the library DB had stale album metadata. The
album fuzzy correctly said the names didn't match and the scanner
declared the track missing. The track's stable external IDs (Spotify
ID, Deezer ID, MusicBrainz recording ID, ISRC, etc.) were available on
both sides but never consulted.
These tests pin the new ID-extraction helper + the library SELECT so
the regression doesn't return.
"""
from __future__ import annotations
import sqlite3
from typing import Any, Dict
import pytest
from core.library.track_identity import (
EXTERNAL_ID_COLUMNS,
extract_external_ids,
find_library_track_by_external_id,
)
# ---------------------------------------------------------------------------
# extract_external_ids
# ---------------------------------------------------------------------------
class TestExtractExternalIdsFromDirectFields:
def test_spotify_track_with_spotify_id_field(self):
track = {'spotify_id': 'sp1', 'name': 'Hello'}
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
def test_track_with_alias_spotify_track_id(self):
track = {'spotify_track_id': 'sp1', 'name': 'Hello'}
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
def test_track_with_uppercase_tag_name(self):
track = {'SPOTIFY_TRACK_ID': 'sp1', 'name': 'Hello'}
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
def test_itunes_via_trackId_alias(self):
track = {'trackId': 12345, 'name': 'Hello'}
assert extract_external_ids(track) == {'itunes_id': '12345'}
def test_deezer_via_provider_native_id(self):
track = {'id': 'dz1', 'provider': 'deezer', 'name': 'Hello'}
assert extract_external_ids(track) == {'deezer_id': 'dz1'}
def test_isrc_extracted(self):
track = {'isrc': 'USRC17607839', 'name': 'Hello'}
assert extract_external_ids(track) == {'isrc': 'USRC17607839'}
def test_musicbrainz_recording_id_extracted(self):
track = {'musicbrainz_recording_id': 'mb-uuid-1', 'name': 'Hello'}
assert extract_external_ids(track) == {'mbid': 'mb-uuid-1'}
def test_audiodb_id_via_idTrack_alias(self):
track = {'idTrack': 'adb1', 'name': 'Hello'}
assert extract_external_ids(track) == {'audiodb_id': 'adb1'}
def test_soul_id_extracted(self):
track = {'soul_id': 'soul-abc', 'name': 'Hello'}
assert extract_external_ids(track) == {'soul_id': 'soul-abc'}
class TestExtractExternalIdsFromProviderField:
"""The provider field disambiguates a track's native ``id`` field."""
def test_provider_spotify_with_native_id(self):
track = {'provider': 'spotify', 'id': 'sp1', 'name': 'Hello'}
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
def test_provider_itunes_with_native_id(self):
track = {'provider': 'itunes', 'id': 'it1', 'name': 'Hello'}
assert extract_external_ids(track) == {'itunes_id': 'it1'}
def test_provider_tidal_with_native_id(self):
track = {'provider': 'tidal', 'id': 'td1', 'name': 'Hello'}
assert extract_external_ids(track) == {'tidal_id': 'td1'}
def test_provider_qobuz_with_native_id(self):
track = {'provider': 'qobuz', 'id': 'qb1', 'name': 'Hello'}
assert extract_external_ids(track) == {'qobuz_id': 'qb1'}
def test_provider_musicbrainz_with_native_id(self):
track = {'provider': 'musicbrainz', 'id': 'mb-uuid', 'name': 'Hello'}
assert extract_external_ids(track) == {'mbid': 'mb-uuid'}
def test_provider_hydrabase_with_native_id(self):
track = {'provider': 'hydrabase', 'id': 'hyd1', 'name': 'Hello'}
assert extract_external_ids(track) == {'soul_id': 'hyd1'}
def test_source_field_treated_same_as_provider(self):
track = {'source': 'deezer', 'id': 'dz1', 'name': 'Hello'}
assert extract_external_ids(track) == {'deezer_id': 'dz1'}
def test_underscore_source_field_treated_same_as_provider(self):
"""Deezer / Discogs / Hydrabase clients tag normalized tracks
with ``_source`` (underscore prefix). Must be recognized as the
provider disambiguator — otherwise watchlist scans against those
sources silently miss the ID match and fall through to fuzzy."""
track = {'_source': 'deezer', 'id': 'dz1', 'name': 'Hello'}
assert extract_external_ids(track) == {'deezer_id': 'dz1'}
def test_underscore_source_hydrabase(self):
track = {'_source': 'hydrabase', 'id': 'hyd-soul-1', 'name': 'Hello'}
assert extract_external_ids(track) == {'soul_id': 'hyd-soul-1'}
def test_native_id_without_provider_is_ignored(self):
"""Without a provider field we can't tell which source 'id' belongs to."""
track = {'id': 'unknown', 'name': 'Hello'}
assert extract_external_ids(track) == {}
def test_source_hint_disambiguates_when_track_has_no_provider(self):
"""Spotify and iTunes raw API responses don't carry a provider /
source / _source field. The watchlist scanner passes the
configured primary source as a hint so the extractor can map
the track's native ``id`` field correctly."""
track = {'id': 'sp1', 'name': 'Hello'} # No provider field
assert extract_external_ids(track, source_hint='spotify') == {'spotify_id': 'sp1'}
assert extract_external_ids(track, source_hint='itunes') == {'itunes_id': 'sp1'}
assert extract_external_ids(track, source_hint='deezer') == {'deezer_id': 'sp1'}
def test_source_hint_does_not_override_track_provider(self):
"""If the track already carries an explicit provider, that wins
over the hint — defensive against the hint being wrong."""
track = {'_source': 'deezer', 'id': 'dz1', 'name': 'Hello'}
assert extract_external_ids(track, source_hint='spotify') == {'deezer_id': 'dz1'}
def test_source_hint_none_is_ignored(self):
track = {'id': 'unknown', 'name': 'Hello'}
assert extract_external_ids(track, source_hint=None) == {}
class TestExtractExternalIdsMixedAndDefensive:
def test_track_with_multiple_provider_specific_fields(self):
track = {
'spotify_id': 'sp1',
'itunes_id': 'it1',
'isrc': 'USRC17607839',
'name': 'Hello',
}
assert extract_external_ids(track) == {
'spotify_id': 'sp1',
'itunes_id': 'it1',
'isrc': 'USRC17607839',
}
def test_direct_field_takes_precedence_over_provider_native_id(self):
"""If both 'spotify_id' and provider/'id' are set, the direct
field wins (already collected first)."""
track = {
'spotify_id': 'direct-sp',
'provider': 'spotify',
'id': 'native-sp',
}
assert extract_external_ids(track) == {'spotify_id': 'direct-sp'}
def test_object_style_track_supported(self):
class _Track:
def __init__(self):
self.spotify_id = 'sp1'
self.isrc = 'USRC17607839'
self.name = 'Hello'
assert extract_external_ids(_Track()) == {
'spotify_id': 'sp1',
'isrc': 'USRC17607839',
}
def test_empty_strings_treated_as_missing(self):
track = {'spotify_id': '', 'itunes_id': ' ', 'isrc': None}
assert extract_external_ids(track) == {}
def test_no_ids_returns_empty_dict(self):
track = {'name': 'Hello', 'duration_ms': 1000}
assert extract_external_ids(track) == {}
def test_none_track_returns_empty_dict(self):
assert extract_external_ids(None) == {}
def test_numeric_ids_coerced_to_string(self):
track = {'spotify_id': 12345, 'name': 'Hello'}
assert extract_external_ids(track) == {'spotify_id': '12345'}
# ---------------------------------------------------------------------------
# find_library_track_by_external_id
# ---------------------------------------------------------------------------
class _FakeDatabase:
"""Minimal DB stand-in exposing ``_get_connection()`` like MusicDatabase."""
def __init__(self):
self._conn = sqlite3.connect(':memory:')
self._conn.row_factory = sqlite3.Row
# Schema mirrors the columns the helper reads — only the ones we
# actually use need to exist.
self._conn.execute("""
CREATE TABLE tracks (
id INTEGER PRIMARY KEY,
title TEXT,
spotify_track_id TEXT,
itunes_track_id TEXT,
deezer_id TEXT,
tidal_id TEXT,
qobuz_id TEXT,
musicbrainz_recording_id TEXT,
audiodb_id TEXT,
soul_id TEXT,
isrc TEXT,
server_source TEXT
)
""")
self._conn.commit()
def _get_connection(self):
# Mirror MusicDatabase's pattern: caller closes the returned
# connection. Use a thin wrapper that no-ops close so the in-
# memory DB isn't dropped between calls.
class _NoCloseConn:
def __init__(_self, real):
_self._real = real
def __getattr__(_self, name):
return getattr(_self._real, name)
def close(_self):
pass
return _NoCloseConn(self._conn)
def insert(self, **kwargs):
cols = ', '.join(kwargs.keys())
placeholders = ', '.join('?' * len(kwargs))
self._conn.execute(
f"INSERT INTO tracks ({cols}) VALUES ({placeholders})",
list(kwargs.values()),
)
self._conn.commit()
@pytest.fixture
def db():
return _FakeDatabase()
class TestFindLibraryTrackBySpotifyId:
def test_match_by_spotify_id(self, db):
db.insert(title='Hello', spotify_track_id='sp1')
result = find_library_track_by_external_id(db, external_ids={'spotify_id': 'sp1'})
assert result is not None
assert result['title'] == 'Hello'
assert result['spotify_track_id'] == 'sp1'
def test_no_match_returns_none(self, db):
db.insert(title='Hello', spotify_track_id='sp-other')
result = find_library_track_by_external_id(db, external_ids={'spotify_id': 'sp1'})
assert result is None
def test_null_column_is_skipped(self, db):
"""A library row with NULL spotify_track_id must NOT match an
empty/missing source ID — the IS NOT NULL guard prevents that."""
db.insert(title='NoIDs') # all IDs NULL
# Empty external_ids → no match
assert find_library_track_by_external_id(db, external_ids={}) is None
class TestFindLibraryTrackProviderNeutral:
def test_match_by_itunes_id_when_spotify_missing(self, db):
db.insert(title='Hello', itunes_track_id='it1')
result = find_library_track_by_external_id(
db, external_ids={'itunes_id': 'it1'},
)
assert result is not None
assert result['itunes_track_id'] == 'it1'
def test_match_by_deezer_id(self, db):
db.insert(title='Hello', deezer_id='dz1')
result = find_library_track_by_external_id(
db, external_ids={'deezer_id': 'dz1'},
)
assert result is not None
def test_match_by_tidal_id(self, db):
db.insert(title='Hello', tidal_id='td1')
result = find_library_track_by_external_id(
db, external_ids={'tidal_id': 'td1'},
)
assert result is not None
def test_match_by_qobuz_id(self, db):
db.insert(title='Hello', qobuz_id='qb1')
result = find_library_track_by_external_id(
db, external_ids={'qobuz_id': 'qb1'},
)
assert result is not None
def test_match_by_musicbrainz_recording_id(self, db):
db.insert(title='Hello', musicbrainz_recording_id='mb-uuid')
result = find_library_track_by_external_id(
db, external_ids={'mbid': 'mb-uuid'},
)
assert result is not None
def test_match_by_isrc_across_providers(self, db):
"""ISRC is the cross-source identity — a library track imported
from Deezer can be matched against a Spotify scan if both carry
the same ISRC."""
db.insert(title='Hello', deezer_id='dz1', isrc='USRC17607839')
# Source track has Spotify ID + ISRC; library only has Deezer + ISRC.
# The ISRC bridges them.
result = find_library_track_by_external_id(
db, external_ids={'spotify_id': 'sp-different', 'isrc': 'USRC17607839'},
)
assert result is not None
assert result['isrc'] == 'USRC17607839'
def test_match_by_soul_id(self, db):
db.insert(title='Hello', soul_id='hyd-soul-1')
result = find_library_track_by_external_id(
db, external_ids={'soul_id': 'hyd-soul-1'},
)
assert result is not None
class TestFindLibraryTrackOrSemantics:
def test_any_one_matching_id_is_enough(self, db):
db.insert(title='Hello', spotify_track_id='sp1')
result = find_library_track_by_external_id(
db,
external_ids={
'spotify_id': 'sp1',
'itunes_id': 'wrong',
'deezer_id': 'wrong',
},
)
assert result is not None
def test_no_matching_id_returns_none(self, db):
db.insert(title='Hello', spotify_track_id='sp1', itunes_track_id='it1')
result = find_library_track_by_external_id(
db,
external_ids={'deezer_id': 'dz-other'},
)
assert result is None
def test_empty_external_ids_returns_none(self, db):
db.insert(title='Hello', spotify_track_id='sp1')
assert find_library_track_by_external_id(db, external_ids={}) is None
class TestFindLibraryTrackServerSourceFilter:
def test_server_source_match(self, db):
db.insert(title='Hello', spotify_track_id='sp1', server_source='plex')
result = find_library_track_by_external_id(
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
)
assert result is not None
def test_server_source_mismatch_with_filter(self, db):
db.insert(title='Hello', spotify_track_id='sp1', server_source='jellyfin')
result = find_library_track_by_external_id(
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
)
# Filter excludes jellyfin, so no match.
assert result is None
def test_null_server_source_passes_filter(self, db):
"""Older library rows may have NULL server_source — those should
still match when a filter is applied (defensive)."""
db.insert(title='Hello', spotify_track_id='sp1', server_source=None)
result = find_library_track_by_external_id(
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
)
assert result is not None
def test_no_filter_matches_any_server_source(self, db):
db.insert(title='Hello', spotify_track_id='sp1', server_source='jellyfin')
result = find_library_track_by_external_id(
db, external_ids={'spotify_id': 'sp1'}, server_source=None,
)
assert result is not None
# ---------------------------------------------------------------------------
# EXTERNAL_ID_COLUMNS map sanity
# ---------------------------------------------------------------------------
class TestExternalIdColumnsMap:
def test_every_known_id_name_has_a_column(self):
"""If extract_external_ids ever adds a new ID name, the map needs
a column entry too — otherwise find_library_track_by_external_id
silently ignores it."""
# Sample of ID names extract_external_ids can return; keep in sync.
known_id_names = {
'spotify_id', 'itunes_id', 'deezer_id', 'tidal_id', 'qobuz_id',
'mbid', 'audiodb_id', 'soul_id', 'isrc',
}
assert set(EXTERNAL_ID_COLUMNS.keys()) == known_id_names
def test_column_names_are_unique(self):
cols = list(EXTERNAL_ID_COLUMNS.values())
assert len(cols) == len(set(cols)), \
f"Duplicate column targets in EXTERNAL_ID_COLUMNS: {cols}"