mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
413 lines
16 KiB
413 lines
16 KiB
"""Tests for the provider-neutral external-ID match helper.
|
|
|
|
Discord-reported (CAL): the watchlist scanner re-downloaded a track
|
|
already on disk because the library DB had stale album metadata. The
|
|
album fuzzy correctly said the names didn't match and the scanner
|
|
declared the track missing. The track's stable external IDs (Spotify
|
|
ID, Deezer ID, MusicBrainz recording ID, ISRC, etc.) were available on
|
|
both sides but never consulted.
|
|
|
|
These tests pin the new ID-extraction helper + the library SELECT so
|
|
the regression doesn't return.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
from typing import Any, Dict
|
|
|
|
import pytest
|
|
|
|
from core.library.track_identity import (
|
|
EXTERNAL_ID_COLUMNS,
|
|
extract_external_ids,
|
|
find_library_track_by_external_id,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# extract_external_ids
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExtractExternalIdsFromDirectFields:
|
|
def test_spotify_track_with_spotify_id_field(self):
|
|
track = {'spotify_id': 'sp1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
|
|
|
|
def test_track_with_alias_spotify_track_id(self):
|
|
track = {'spotify_track_id': 'sp1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
|
|
|
|
def test_track_with_uppercase_tag_name(self):
|
|
track = {'SPOTIFY_TRACK_ID': 'sp1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
|
|
|
|
def test_itunes_via_trackId_alias(self):
|
|
track = {'trackId': 12345, 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'itunes_id': '12345'}
|
|
|
|
def test_deezer_via_provider_native_id(self):
|
|
track = {'id': 'dz1', 'provider': 'deezer', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'deezer_id': 'dz1'}
|
|
|
|
def test_isrc_extracted(self):
|
|
track = {'isrc': 'USRC17607839', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'isrc': 'USRC17607839'}
|
|
|
|
def test_musicbrainz_recording_id_extracted(self):
|
|
track = {'musicbrainz_recording_id': 'mb-uuid-1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'mbid': 'mb-uuid-1'}
|
|
|
|
def test_audiodb_id_via_idTrack_alias(self):
|
|
track = {'idTrack': 'adb1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'audiodb_id': 'adb1'}
|
|
|
|
def test_soul_id_extracted(self):
|
|
track = {'soul_id': 'soul-abc', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'soul_id': 'soul-abc'}
|
|
|
|
|
|
class TestExtractExternalIdsFromProviderField:
|
|
"""The provider field disambiguates a track's native ``id`` field."""
|
|
|
|
def test_provider_spotify_with_native_id(self):
|
|
track = {'provider': 'spotify', 'id': 'sp1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
|
|
|
|
def test_provider_itunes_with_native_id(self):
|
|
track = {'provider': 'itunes', 'id': 'it1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'itunes_id': 'it1'}
|
|
|
|
def test_provider_tidal_with_native_id(self):
|
|
track = {'provider': 'tidal', 'id': 'td1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'tidal_id': 'td1'}
|
|
|
|
def test_provider_qobuz_with_native_id(self):
|
|
track = {'provider': 'qobuz', 'id': 'qb1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'qobuz_id': 'qb1'}
|
|
|
|
def test_provider_musicbrainz_with_native_id(self):
|
|
track = {'provider': 'musicbrainz', 'id': 'mb-uuid', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'mbid': 'mb-uuid'}
|
|
|
|
def test_provider_hydrabase_with_native_id(self):
|
|
track = {'provider': 'hydrabase', 'id': 'hyd1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'soul_id': 'hyd1'}
|
|
|
|
def test_source_field_treated_same_as_provider(self):
|
|
track = {'source': 'deezer', 'id': 'dz1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'deezer_id': 'dz1'}
|
|
|
|
def test_underscore_source_field_treated_same_as_provider(self):
|
|
"""Deezer / Discogs / Hydrabase clients tag normalized tracks
|
|
with ``_source`` (underscore prefix). Must be recognized as the
|
|
provider disambiguator — otherwise watchlist scans against those
|
|
sources silently miss the ID match and fall through to fuzzy."""
|
|
track = {'_source': 'deezer', 'id': 'dz1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'deezer_id': 'dz1'}
|
|
|
|
def test_underscore_source_hydrabase(self):
|
|
track = {'_source': 'hydrabase', 'id': 'hyd-soul-1', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'soul_id': 'hyd-soul-1'}
|
|
|
|
def test_native_id_without_provider_is_ignored(self):
|
|
"""Without a provider field we can't tell which source 'id' belongs to."""
|
|
track = {'id': 'unknown', 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {}
|
|
|
|
def test_source_hint_disambiguates_when_track_has_no_provider(self):
|
|
"""Spotify and iTunes raw API responses don't carry a provider /
|
|
source / _source field. The watchlist scanner passes the
|
|
configured primary source as a hint so the extractor can map
|
|
the track's native ``id`` field correctly."""
|
|
track = {'id': 'sp1', 'name': 'Hello'} # No provider field
|
|
assert extract_external_ids(track, source_hint='spotify') == {'spotify_id': 'sp1'}
|
|
assert extract_external_ids(track, source_hint='itunes') == {'itunes_id': 'sp1'}
|
|
assert extract_external_ids(track, source_hint='deezer') == {'deezer_id': 'sp1'}
|
|
|
|
def test_source_hint_does_not_override_track_provider(self):
|
|
"""If the track already carries an explicit provider, that wins
|
|
over the hint — defensive against the hint being wrong."""
|
|
track = {'_source': 'deezer', 'id': 'dz1', 'name': 'Hello'}
|
|
assert extract_external_ids(track, source_hint='spotify') == {'deezer_id': 'dz1'}
|
|
|
|
def test_source_hint_none_is_ignored(self):
|
|
track = {'id': 'unknown', 'name': 'Hello'}
|
|
assert extract_external_ids(track, source_hint=None) == {}
|
|
|
|
|
|
class TestExtractExternalIdsMixedAndDefensive:
|
|
def test_track_with_multiple_provider_specific_fields(self):
|
|
track = {
|
|
'spotify_id': 'sp1',
|
|
'itunes_id': 'it1',
|
|
'isrc': 'USRC17607839',
|
|
'name': 'Hello',
|
|
}
|
|
assert extract_external_ids(track) == {
|
|
'spotify_id': 'sp1',
|
|
'itunes_id': 'it1',
|
|
'isrc': 'USRC17607839',
|
|
}
|
|
|
|
def test_direct_field_takes_precedence_over_provider_native_id(self):
|
|
"""If both 'spotify_id' and provider/'id' are set, the direct
|
|
field wins (already collected first)."""
|
|
track = {
|
|
'spotify_id': 'direct-sp',
|
|
'provider': 'spotify',
|
|
'id': 'native-sp',
|
|
}
|
|
assert extract_external_ids(track) == {'spotify_id': 'direct-sp'}
|
|
|
|
def test_object_style_track_supported(self):
|
|
class _Track:
|
|
def __init__(self):
|
|
self.spotify_id = 'sp1'
|
|
self.isrc = 'USRC17607839'
|
|
self.name = 'Hello'
|
|
|
|
assert extract_external_ids(_Track()) == {
|
|
'spotify_id': 'sp1',
|
|
'isrc': 'USRC17607839',
|
|
}
|
|
|
|
def test_empty_strings_treated_as_missing(self):
|
|
track = {'spotify_id': '', 'itunes_id': ' ', 'isrc': None}
|
|
assert extract_external_ids(track) == {}
|
|
|
|
def test_no_ids_returns_empty_dict(self):
|
|
track = {'name': 'Hello', 'duration_ms': 1000}
|
|
assert extract_external_ids(track) == {}
|
|
|
|
def test_none_track_returns_empty_dict(self):
|
|
assert extract_external_ids(None) == {}
|
|
|
|
def test_numeric_ids_coerced_to_string(self):
|
|
track = {'spotify_id': 12345, 'name': 'Hello'}
|
|
assert extract_external_ids(track) == {'spotify_id': '12345'}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# find_library_track_by_external_id
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class _FakeDatabase:
|
|
"""Minimal DB stand-in exposing ``_get_connection()`` like MusicDatabase."""
|
|
|
|
def __init__(self):
|
|
self._conn = sqlite3.connect(':memory:')
|
|
self._conn.row_factory = sqlite3.Row
|
|
# Schema mirrors the columns the helper reads — only the ones we
|
|
# actually use need to exist.
|
|
self._conn.execute("""
|
|
CREATE TABLE tracks (
|
|
id INTEGER PRIMARY KEY,
|
|
title TEXT,
|
|
spotify_track_id TEXT,
|
|
itunes_track_id TEXT,
|
|
deezer_id TEXT,
|
|
tidal_id TEXT,
|
|
qobuz_id TEXT,
|
|
musicbrainz_recording_id TEXT,
|
|
audiodb_id TEXT,
|
|
soul_id TEXT,
|
|
isrc TEXT,
|
|
server_source TEXT
|
|
)
|
|
""")
|
|
self._conn.commit()
|
|
|
|
def _get_connection(self):
|
|
# Mirror MusicDatabase's pattern: caller closes the returned
|
|
# connection. Use a thin wrapper that no-ops close so the in-
|
|
# memory DB isn't dropped between calls.
|
|
class _NoCloseConn:
|
|
def __init__(_self, real):
|
|
_self._real = real
|
|
|
|
def __getattr__(_self, name):
|
|
return getattr(_self._real, name)
|
|
|
|
def close(_self):
|
|
pass
|
|
|
|
return _NoCloseConn(self._conn)
|
|
|
|
def insert(self, **kwargs):
|
|
cols = ', '.join(kwargs.keys())
|
|
placeholders = ', '.join('?' * len(kwargs))
|
|
self._conn.execute(
|
|
f"INSERT INTO tracks ({cols}) VALUES ({placeholders})",
|
|
list(kwargs.values()),
|
|
)
|
|
self._conn.commit()
|
|
|
|
|
|
@pytest.fixture
|
|
def db():
|
|
return _FakeDatabase()
|
|
|
|
|
|
class TestFindLibraryTrackBySpotifyId:
|
|
def test_match_by_spotify_id(self, db):
|
|
db.insert(title='Hello', spotify_track_id='sp1')
|
|
result = find_library_track_by_external_id(db, external_ids={'spotify_id': 'sp1'})
|
|
assert result is not None
|
|
assert result['title'] == 'Hello'
|
|
assert result['spotify_track_id'] == 'sp1'
|
|
|
|
def test_no_match_returns_none(self, db):
|
|
db.insert(title='Hello', spotify_track_id='sp-other')
|
|
result = find_library_track_by_external_id(db, external_ids={'spotify_id': 'sp1'})
|
|
assert result is None
|
|
|
|
def test_null_column_is_skipped(self, db):
|
|
"""A library row with NULL spotify_track_id must NOT match an
|
|
empty/missing source ID — the IS NOT NULL guard prevents that."""
|
|
db.insert(title='NoIDs') # all IDs NULL
|
|
# Empty external_ids → no match
|
|
assert find_library_track_by_external_id(db, external_ids={}) is None
|
|
|
|
|
|
class TestFindLibraryTrackProviderNeutral:
|
|
def test_match_by_itunes_id_when_spotify_missing(self, db):
|
|
db.insert(title='Hello', itunes_track_id='it1')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'itunes_id': 'it1'},
|
|
)
|
|
assert result is not None
|
|
assert result['itunes_track_id'] == 'it1'
|
|
|
|
def test_match_by_deezer_id(self, db):
|
|
db.insert(title='Hello', deezer_id='dz1')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'deezer_id': 'dz1'},
|
|
)
|
|
assert result is not None
|
|
|
|
def test_match_by_tidal_id(self, db):
|
|
db.insert(title='Hello', tidal_id='td1')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'tidal_id': 'td1'},
|
|
)
|
|
assert result is not None
|
|
|
|
def test_match_by_qobuz_id(self, db):
|
|
db.insert(title='Hello', qobuz_id='qb1')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'qobuz_id': 'qb1'},
|
|
)
|
|
assert result is not None
|
|
|
|
def test_match_by_musicbrainz_recording_id(self, db):
|
|
db.insert(title='Hello', musicbrainz_recording_id='mb-uuid')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'mbid': 'mb-uuid'},
|
|
)
|
|
assert result is not None
|
|
|
|
def test_match_by_isrc_across_providers(self, db):
|
|
"""ISRC is the cross-source identity — a library track imported
|
|
from Deezer can be matched against a Spotify scan if both carry
|
|
the same ISRC."""
|
|
db.insert(title='Hello', deezer_id='dz1', isrc='USRC17607839')
|
|
# Source track has Spotify ID + ISRC; library only has Deezer + ISRC.
|
|
# The ISRC bridges them.
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'spotify_id': 'sp-different', 'isrc': 'USRC17607839'},
|
|
)
|
|
assert result is not None
|
|
assert result['isrc'] == 'USRC17607839'
|
|
|
|
def test_match_by_soul_id(self, db):
|
|
db.insert(title='Hello', soul_id='hyd-soul-1')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'soul_id': 'hyd-soul-1'},
|
|
)
|
|
assert result is not None
|
|
|
|
|
|
class TestFindLibraryTrackOrSemantics:
|
|
def test_any_one_matching_id_is_enough(self, db):
|
|
db.insert(title='Hello', spotify_track_id='sp1')
|
|
result = find_library_track_by_external_id(
|
|
db,
|
|
external_ids={
|
|
'spotify_id': 'sp1',
|
|
'itunes_id': 'wrong',
|
|
'deezer_id': 'wrong',
|
|
},
|
|
)
|
|
assert result is not None
|
|
|
|
def test_no_matching_id_returns_none(self, db):
|
|
db.insert(title='Hello', spotify_track_id='sp1', itunes_track_id='it1')
|
|
result = find_library_track_by_external_id(
|
|
db,
|
|
external_ids={'deezer_id': 'dz-other'},
|
|
)
|
|
assert result is None
|
|
|
|
def test_empty_external_ids_returns_none(self, db):
|
|
db.insert(title='Hello', spotify_track_id='sp1')
|
|
assert find_library_track_by_external_id(db, external_ids={}) is None
|
|
|
|
|
|
class TestFindLibraryTrackServerSourceFilter:
|
|
def test_server_source_match(self, db):
|
|
db.insert(title='Hello', spotify_track_id='sp1', server_source='plex')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
|
|
)
|
|
assert result is not None
|
|
|
|
def test_server_source_mismatch_with_filter(self, db):
|
|
db.insert(title='Hello', spotify_track_id='sp1', server_source='jellyfin')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
|
|
)
|
|
# Filter excludes jellyfin, so no match.
|
|
assert result is None
|
|
|
|
def test_null_server_source_passes_filter(self, db):
|
|
"""Older library rows may have NULL server_source — those should
|
|
still match when a filter is applied (defensive)."""
|
|
db.insert(title='Hello', spotify_track_id='sp1', server_source=None)
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
|
|
)
|
|
assert result is not None
|
|
|
|
def test_no_filter_matches_any_server_source(self, db):
|
|
db.insert(title='Hello', spotify_track_id='sp1', server_source='jellyfin')
|
|
result = find_library_track_by_external_id(
|
|
db, external_ids={'spotify_id': 'sp1'}, server_source=None,
|
|
)
|
|
assert result is not None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# EXTERNAL_ID_COLUMNS map sanity
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExternalIdColumnsMap:
|
|
def test_every_known_id_name_has_a_column(self):
|
|
"""If extract_external_ids ever adds a new ID name, the map needs
|
|
a column entry too — otherwise find_library_track_by_external_id
|
|
silently ignores it."""
|
|
# Sample of ID names extract_external_ids can return; keep in sync.
|
|
known_id_names = {
|
|
'spotify_id', 'itunes_id', 'deezer_id', 'tidal_id', 'qobuz_id',
|
|
'mbid', 'audiodb_id', 'soul_id', 'isrc',
|
|
}
|
|
assert set(EXTERNAL_ID_COLUMNS.keys()) == known_id_names
|
|
|
|
def test_column_names_are_unique(self):
|
|
cols = list(EXTERNAL_ID_COLUMNS.values())
|
|
assert len(cols) == len(set(cols)), \
|
|
f"Duplicate column targets in EXTERNAL_ID_COLUMNS: {cols}"
|