mirror of https://github.com/Nezreka/SoulSync.git
Reported case (CAL): a track already on disk got re-downloaded by the
watchlist scanner on every scan. Library DB had stale album metadata
for the file (track tagged on album "Left Alone") while the metadata
source reported it on a different album ("NPC" single). The
title+artist+album fuzzy block correctly said the album names didn't
match and declared the track missing — but the file's stable external
IDs (Spotify ID, ISRC, etc.) unambiguously identified it as the same
recording.
The earlier compilation-album fix (PR #461) handled qualifier drift
("OST" vs "Music From The Motion Picture"). This case is two
genuinely different album names referring to the same song.
Fix: provider-neutral external-ID short-circuit before the fuzzy
block in `is_track_missing_from_library`. Pulls every recognized ID
off the source track (Spotify / iTunes / Deezer / Tidal / Qobuz /
MusicBrainz / AudioDB / Hydrabase / ISRC), runs a single SELECT
against the indexed external-ID columns on the `tracks` table, and
treats any hit as "track exists in library — don't re-download".
If no IDs are available (older imports without enrichment, library
scans that didn't populate external IDs), falls through to the
existing fuzzy logic so the safety net stays intact.
New `core/library/track_identity.py` module with two helpers:
- `extract_external_ids(track)`: handles dict and object-style track
shapes, direct-field aliases (spotify_id / spotify_track_id /
SPOTIFY_TRACK_ID), and provider-disambiguated native `id` fields
(when track has `provider='deezer'` and `id='X'`, treats X as a
Deezer ID).
- `find_library_track_by_external_id(db, external_ids,
server_source)`: builds an OR of indexed column matches with
IS NOT NULL guards, optional server_source filter that also
passes legacy NULL rows, single-row LIMIT.
ISRC bridges across providers — a library track imported via Deezer
can be matched against a Spotify scan when both sides carry the
same ISRC.
43 regression tests in `tests/test_library_track_identity.py`:
- 9 ID-extraction tests for direct fields (Spotify / iTunes / Deezer /
ISRC / MBID / AudioDB / Hydrabase)
- 8 ID-extraction tests via the provider field (8 providers + source
alias + missing-provider-ignored)
- 7 mixed/defensive tests (multiple IDs, object-style, empty strings,
None track, numeric coercion)
- 8 lookup tests (per-provider + ISRC cross-bridge)
- 3 OR-semantics tests
- 4 server_source filter tests
- 2 ID-column-map sanity tests
Full pytest 1606 passed; ruff clean.
pull/470/head
parent
813eebdd62
commit
ecb8939c80
@ -0,0 +1,206 @@
|
||||
"""Match a metadata-source track against the library by stable external IDs.
|
||||
|
||||
Discord-reported (CAL): the watchlist scanner re-downloaded a track that
|
||||
already existed on disk because the library DB had stale album metadata
|
||||
(track tagged on album "Left Alone" while Spotify reported it as on the
|
||||
"NPC" single). The matching logic relied on title + artist + album fuzzy
|
||||
comparison; the album fuzzy correctly said the names didn't match, the
|
||||
scanner declared the track missing, and the wishlist re-added + re-
|
||||
downloaded it on every scan.
|
||||
|
||||
The track has a stable external identity though — every download embeds
|
||||
Spotify / iTunes / Deezer / Tidal / Qobuz / MusicBrainz / AudioDB /
|
||||
Hydrabase / ISRC IDs as both file tags AND DB columns. This module pulls
|
||||
those IDs off either side and asks: do we already have a row in the
|
||||
``tracks`` table whose external-ID column matches one of the source
|
||||
track's IDs? If yes, the track is NOT missing, regardless of how the
|
||||
album metadata drifted between sources.
|
||||
|
||||
Provider-neutral by design — no spotify-only paths.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger("library.track_identity")
|
||||
|
||||
|
||||
# Maps the conceptual ID name (used in the source-track dict we extract
|
||||
# below) to the column name on the library ``tracks`` table where that
|
||||
# ID is persisted. Keep the column names in sync with the schema in
|
||||
# ``database/music_database.py``.
|
||||
EXTERNAL_ID_COLUMNS: Dict[str, str] = {
|
||||
'spotify_id': 'spotify_track_id',
|
||||
'itunes_id': 'itunes_track_id',
|
||||
'deezer_id': 'deezer_id',
|
||||
'tidal_id': 'tidal_id',
|
||||
'qobuz_id': 'qobuz_id',
|
||||
'mbid': 'musicbrainz_recording_id',
|
||||
'audiodb_id': 'audiodb_id',
|
||||
'soul_id': 'soul_id',
|
||||
'isrc': 'isrc',
|
||||
}
|
||||
|
||||
|
||||
def _coerce(value: Any) -> Optional[str]:
|
||||
"""Return value as a non-empty string, or None for empty / missing."""
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _get(track: Any, *names: str) -> Optional[str]:
|
||||
"""Read the first non-empty attribute / dict key from ``names`` off
|
||||
``track``. Accepts both dict-style and dataclass / object tracks."""
|
||||
for name in names:
|
||||
try:
|
||||
value = track[name] if isinstance(track, dict) else getattr(track, name, None)
|
||||
except (TypeError, KeyError):
|
||||
value = None
|
||||
coerced = _coerce(value)
|
||||
if coerced is not None:
|
||||
return coerced
|
||||
return None
|
||||
|
||||
|
||||
def extract_external_ids(track: Any) -> Dict[str, str]:
|
||||
"""Pull every recognized external ID off a metadata-source track.
|
||||
|
||||
Handles the source-source naming drift: Spotify tracks expose ``id``
|
||||
as the Spotify track ID; Deezer tracks expose ``id`` as the Deezer
|
||||
track ID; iTunes tracks may use ``trackId`` or ``id``. The disamb-
|
||||
iguating field is ``provider`` / ``source``. Tracks coming from a
|
||||
SoulSync internal pipeline often carry every known ID set to its
|
||||
source-specific value — we just collect whatever's there.
|
||||
|
||||
Returns a dict mapping conceptual ID name → ID value. Keys present
|
||||
in ``EXTERNAL_ID_COLUMNS``. Empty dict when no IDs are available.
|
||||
"""
|
||||
if track is None:
|
||||
return {}
|
||||
|
||||
ids: Dict[str, str] = {}
|
||||
|
||||
# Provider-neutral fields that carry their own name regardless of
|
||||
# source. Most internal SoulSync tracks have these set; external
|
||||
# source responses usually only have one of them populated.
|
||||
direct_id_fields = {
|
||||
'spotify_id': ('spotify_id', 'spotify_track_id', 'SPOTIFY_TRACK_ID'),
|
||||
'itunes_id': ('itunes_id', 'itunes_track_id', 'trackId', 'ITUNES_TRACK_ID'),
|
||||
'deezer_id': ('deezer_id', 'deezer_track_id', 'DEEZER_TRACK_ID'),
|
||||
'tidal_id': ('tidal_id', 'tidal_track_id', 'TIDAL_TRACK_ID'),
|
||||
'qobuz_id': ('qobuz_id', 'qobuz_track_id', 'QOBUZ_TRACK_ID'),
|
||||
'mbid': ('musicbrainz_recording_id', 'mbid', 'MUSICBRAINZ_RECORDING_ID'),
|
||||
'audiodb_id': ('audiodb_id', 'idTrack', 'AUDIODB_TRACK_ID'),
|
||||
'soul_id': ('soul_id', 'SOUL_ID'),
|
||||
'isrc': ('isrc', 'ISRC'),
|
||||
}
|
||||
for name, candidates in direct_id_fields.items():
|
||||
value = _get(track, *candidates)
|
||||
if value:
|
||||
ids[name] = value
|
||||
|
||||
# Provider field tells us which native ``id`` belongs to. Without
|
||||
# this, a Deezer track's ``id`` field would be silently ignored
|
||||
# (we wouldn't know to map it to deezer_id).
|
||||
provider = (_get(track, 'provider', 'source') or '').lower()
|
||||
native_id = _get(track, 'id')
|
||||
if native_id and provider:
|
||||
provider_to_key = {
|
||||
'spotify': 'spotify_id',
|
||||
'itunes': 'itunes_id',
|
||||
'deezer': 'deezer_id',
|
||||
'tidal': 'tidal_id',
|
||||
'qobuz': 'qobuz_id',
|
||||
'musicbrainz': 'mbid',
|
||||
'audiodb': 'audiodb_id',
|
||||
'hydrabase': 'soul_id',
|
||||
}
|
||||
key = provider_to_key.get(provider)
|
||||
if key and key not in ids:
|
||||
ids[key] = native_id
|
||||
|
||||
return ids
|
||||
|
||||
|
||||
def find_library_track_by_external_id(
|
||||
db: Any,
|
||||
*,
|
||||
external_ids: Dict[str, str],
|
||||
server_source: Optional[str] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Return a row from the ``tracks`` table whose any external ID
|
||||
column matches one of the provided IDs, or None if no match.
|
||||
|
||||
Returns a sqlite3.Row-like dict so callers can read whatever fields
|
||||
they want (id, title, file_path, etc.). When ``server_source`` is
|
||||
set, restrict matches to tracks scanned from that media server —
|
||||
avoids false positives when a user binds the same DB into multiple
|
||||
profiles/servers.
|
||||
|
||||
Performance: every external_id column is indexed in the schema, so
|
||||
each OR clause hits an index. Limit 1 because we only need to know
|
||||
whether a match exists.
|
||||
"""
|
||||
if not external_ids:
|
||||
return None
|
||||
|
||||
clauses: List[str] = []
|
||||
params: List[Any] = []
|
||||
for id_name, id_value in external_ids.items():
|
||||
column = EXTERNAL_ID_COLUMNS.get(id_name)
|
||||
if not column or not id_value:
|
||||
continue
|
||||
clauses.append(f"({column} = ? AND {column} IS NOT NULL AND {column} != '')")
|
||||
params.append(id_value)
|
||||
|
||||
if not clauses:
|
||||
return None
|
||||
|
||||
where_external = " OR ".join(clauses)
|
||||
|
||||
# Optional server_source filter
|
||||
if server_source:
|
||||
sql = (
|
||||
f"SELECT * FROM tracks WHERE ({where_external}) "
|
||||
f"AND (server_source = ? OR server_source IS NULL) LIMIT 1"
|
||||
)
|
||||
params.append(server_source)
|
||||
else:
|
||||
sql = f"SELECT * FROM tracks WHERE ({where_external}) LIMIT 1"
|
||||
|
||||
conn = None
|
||||
try:
|
||||
conn = db._get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(sql, params)
|
||||
row = cursor.fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
# sqlite3.Row supports keys() — return as dict for caller stability.
|
||||
try:
|
||||
return dict(row)
|
||||
except (TypeError, ValueError):
|
||||
# Fallback for cursors that don't return Row objects.
|
||||
cols = [c[0] for c in cursor.description]
|
||||
return dict(zip(cols, row, strict=False))
|
||||
except Exception as exc:
|
||||
logger.debug(f"find_library_track_by_external_id query failed: {exc}")
|
||||
return None
|
||||
finally:
|
||||
if conn is not None:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
__all__ = [
|
||||
'EXTERNAL_ID_COLUMNS',
|
||||
'extract_external_ids',
|
||||
'find_library_track_by_external_id',
|
||||
]
|
||||
@ -0,0 +1,380 @@
|
||||
"""Tests for the provider-neutral external-ID match helper.
|
||||
|
||||
Discord-reported (CAL): the watchlist scanner re-downloaded a track
|
||||
already on disk because the library DB had stale album metadata. The
|
||||
album fuzzy correctly said the names didn't match and the scanner
|
||||
declared the track missing. The track's stable external IDs (Spotify
|
||||
ID, Deezer ID, MusicBrainz recording ID, ISRC, etc.) were available on
|
||||
both sides but never consulted.
|
||||
|
||||
These tests pin the new ID-extraction helper + the library SELECT so
|
||||
the regression doesn't return.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from core.library.track_identity import (
|
||||
EXTERNAL_ID_COLUMNS,
|
||||
extract_external_ids,
|
||||
find_library_track_by_external_id,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# extract_external_ids
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestExtractExternalIdsFromDirectFields:
|
||||
def test_spotify_track_with_spotify_id_field(self):
|
||||
track = {'spotify_id': 'sp1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
|
||||
|
||||
def test_track_with_alias_spotify_track_id(self):
|
||||
track = {'spotify_track_id': 'sp1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
|
||||
|
||||
def test_track_with_uppercase_tag_name(self):
|
||||
track = {'SPOTIFY_TRACK_ID': 'sp1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
|
||||
|
||||
def test_itunes_via_trackId_alias(self):
|
||||
track = {'trackId': 12345, 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'itunes_id': '12345'}
|
||||
|
||||
def test_deezer_via_provider_native_id(self):
|
||||
track = {'id': 'dz1', 'provider': 'deezer', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'deezer_id': 'dz1'}
|
||||
|
||||
def test_isrc_extracted(self):
|
||||
track = {'isrc': 'USRC17607839', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'isrc': 'USRC17607839'}
|
||||
|
||||
def test_musicbrainz_recording_id_extracted(self):
|
||||
track = {'musicbrainz_recording_id': 'mb-uuid-1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'mbid': 'mb-uuid-1'}
|
||||
|
||||
def test_audiodb_id_via_idTrack_alias(self):
|
||||
track = {'idTrack': 'adb1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'audiodb_id': 'adb1'}
|
||||
|
||||
def test_soul_id_extracted(self):
|
||||
track = {'soul_id': 'soul-abc', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'soul_id': 'soul-abc'}
|
||||
|
||||
|
||||
class TestExtractExternalIdsFromProviderField:
|
||||
"""The provider field disambiguates a track's native ``id`` field."""
|
||||
|
||||
def test_provider_spotify_with_native_id(self):
|
||||
track = {'provider': 'spotify', 'id': 'sp1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'spotify_id': 'sp1'}
|
||||
|
||||
def test_provider_itunes_with_native_id(self):
|
||||
track = {'provider': 'itunes', 'id': 'it1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'itunes_id': 'it1'}
|
||||
|
||||
def test_provider_tidal_with_native_id(self):
|
||||
track = {'provider': 'tidal', 'id': 'td1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'tidal_id': 'td1'}
|
||||
|
||||
def test_provider_qobuz_with_native_id(self):
|
||||
track = {'provider': 'qobuz', 'id': 'qb1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'qobuz_id': 'qb1'}
|
||||
|
||||
def test_provider_musicbrainz_with_native_id(self):
|
||||
track = {'provider': 'musicbrainz', 'id': 'mb-uuid', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'mbid': 'mb-uuid'}
|
||||
|
||||
def test_provider_hydrabase_with_native_id(self):
|
||||
track = {'provider': 'hydrabase', 'id': 'hyd1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'soul_id': 'hyd1'}
|
||||
|
||||
def test_source_field_treated_same_as_provider(self):
|
||||
track = {'source': 'deezer', 'id': 'dz1', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'deezer_id': 'dz1'}
|
||||
|
||||
def test_native_id_without_provider_is_ignored(self):
|
||||
"""Without a provider field we can't tell which source 'id' belongs to."""
|
||||
track = {'id': 'unknown', 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {}
|
||||
|
||||
|
||||
class TestExtractExternalIdsMixedAndDefensive:
|
||||
def test_track_with_multiple_provider_specific_fields(self):
|
||||
track = {
|
||||
'spotify_id': 'sp1',
|
||||
'itunes_id': 'it1',
|
||||
'isrc': 'USRC17607839',
|
||||
'name': 'Hello',
|
||||
}
|
||||
assert extract_external_ids(track) == {
|
||||
'spotify_id': 'sp1',
|
||||
'itunes_id': 'it1',
|
||||
'isrc': 'USRC17607839',
|
||||
}
|
||||
|
||||
def test_direct_field_takes_precedence_over_provider_native_id(self):
|
||||
"""If both 'spotify_id' and provider/'id' are set, the direct
|
||||
field wins (already collected first)."""
|
||||
track = {
|
||||
'spotify_id': 'direct-sp',
|
||||
'provider': 'spotify',
|
||||
'id': 'native-sp',
|
||||
}
|
||||
assert extract_external_ids(track) == {'spotify_id': 'direct-sp'}
|
||||
|
||||
def test_object_style_track_supported(self):
|
||||
class _Track:
|
||||
def __init__(self):
|
||||
self.spotify_id = 'sp1'
|
||||
self.isrc = 'USRC17607839'
|
||||
self.name = 'Hello'
|
||||
|
||||
assert extract_external_ids(_Track()) == {
|
||||
'spotify_id': 'sp1',
|
||||
'isrc': 'USRC17607839',
|
||||
}
|
||||
|
||||
def test_empty_strings_treated_as_missing(self):
|
||||
track = {'spotify_id': '', 'itunes_id': ' ', 'isrc': None}
|
||||
assert extract_external_ids(track) == {}
|
||||
|
||||
def test_no_ids_returns_empty_dict(self):
|
||||
track = {'name': 'Hello', 'duration_ms': 1000}
|
||||
assert extract_external_ids(track) == {}
|
||||
|
||||
def test_none_track_returns_empty_dict(self):
|
||||
assert extract_external_ids(None) == {}
|
||||
|
||||
def test_numeric_ids_coerced_to_string(self):
|
||||
track = {'spotify_id': 12345, 'name': 'Hello'}
|
||||
assert extract_external_ids(track) == {'spotify_id': '12345'}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# find_library_track_by_external_id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeDatabase:
|
||||
"""Minimal DB stand-in exposing ``_get_connection()`` like MusicDatabase."""
|
||||
|
||||
def __init__(self):
|
||||
self._conn = sqlite3.connect(':memory:')
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
# Schema mirrors the columns the helper reads — only the ones we
|
||||
# actually use need to exist.
|
||||
self._conn.execute("""
|
||||
CREATE TABLE tracks (
|
||||
id INTEGER PRIMARY KEY,
|
||||
title TEXT,
|
||||
spotify_track_id TEXT,
|
||||
itunes_track_id TEXT,
|
||||
deezer_id TEXT,
|
||||
tidal_id TEXT,
|
||||
qobuz_id TEXT,
|
||||
musicbrainz_recording_id TEXT,
|
||||
audiodb_id TEXT,
|
||||
soul_id TEXT,
|
||||
isrc TEXT,
|
||||
server_source TEXT
|
||||
)
|
||||
""")
|
||||
self._conn.commit()
|
||||
|
||||
def _get_connection(self):
|
||||
# Mirror MusicDatabase's pattern: caller closes the returned
|
||||
# connection. Use a thin wrapper that no-ops close so the in-
|
||||
# memory DB isn't dropped between calls.
|
||||
class _NoCloseConn:
|
||||
def __init__(_self, real):
|
||||
_self._real = real
|
||||
|
||||
def __getattr__(_self, name):
|
||||
return getattr(_self._real, name)
|
||||
|
||||
def close(_self):
|
||||
pass
|
||||
|
||||
return _NoCloseConn(self._conn)
|
||||
|
||||
def insert(self, **kwargs):
|
||||
cols = ', '.join(kwargs.keys())
|
||||
placeholders = ', '.join('?' * len(kwargs))
|
||||
self._conn.execute(
|
||||
f"INSERT INTO tracks ({cols}) VALUES ({placeholders})",
|
||||
list(kwargs.values()),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db():
|
||||
return _FakeDatabase()
|
||||
|
||||
|
||||
class TestFindLibraryTrackBySpotifyId:
|
||||
def test_match_by_spotify_id(self, db):
|
||||
db.insert(title='Hello', spotify_track_id='sp1')
|
||||
result = find_library_track_by_external_id(db, external_ids={'spotify_id': 'sp1'})
|
||||
assert result is not None
|
||||
assert result['title'] == 'Hello'
|
||||
assert result['spotify_track_id'] == 'sp1'
|
||||
|
||||
def test_no_match_returns_none(self, db):
|
||||
db.insert(title='Hello', spotify_track_id='sp-other')
|
||||
result = find_library_track_by_external_id(db, external_ids={'spotify_id': 'sp1'})
|
||||
assert result is None
|
||||
|
||||
def test_null_column_is_skipped(self, db):
|
||||
"""A library row with NULL spotify_track_id must NOT match an
|
||||
empty/missing source ID — the IS NOT NULL guard prevents that."""
|
||||
db.insert(title='NoIDs') # all IDs NULL
|
||||
# Empty external_ids → no match
|
||||
assert find_library_track_by_external_id(db, external_ids={}) is None
|
||||
|
||||
|
||||
class TestFindLibraryTrackProviderNeutral:
|
||||
def test_match_by_itunes_id_when_spotify_missing(self, db):
|
||||
db.insert(title='Hello', itunes_track_id='it1')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'itunes_id': 'it1'},
|
||||
)
|
||||
assert result is not None
|
||||
assert result['itunes_track_id'] == 'it1'
|
||||
|
||||
def test_match_by_deezer_id(self, db):
|
||||
db.insert(title='Hello', deezer_id='dz1')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'deezer_id': 'dz1'},
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
def test_match_by_tidal_id(self, db):
|
||||
db.insert(title='Hello', tidal_id='td1')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'tidal_id': 'td1'},
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
def test_match_by_qobuz_id(self, db):
|
||||
db.insert(title='Hello', qobuz_id='qb1')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'qobuz_id': 'qb1'},
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
def test_match_by_musicbrainz_recording_id(self, db):
|
||||
db.insert(title='Hello', musicbrainz_recording_id='mb-uuid')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'mbid': 'mb-uuid'},
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
def test_match_by_isrc_across_providers(self, db):
|
||||
"""ISRC is the cross-source identity — a library track imported
|
||||
from Deezer can be matched against a Spotify scan if both carry
|
||||
the same ISRC."""
|
||||
db.insert(title='Hello', deezer_id='dz1', isrc='USRC17607839')
|
||||
# Source track has Spotify ID + ISRC; library only has Deezer + ISRC.
|
||||
# The ISRC bridges them.
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'spotify_id': 'sp-different', 'isrc': 'USRC17607839'},
|
||||
)
|
||||
assert result is not None
|
||||
assert result['isrc'] == 'USRC17607839'
|
||||
|
||||
def test_match_by_soul_id(self, db):
|
||||
db.insert(title='Hello', soul_id='hyd-soul-1')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'soul_id': 'hyd-soul-1'},
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
|
||||
class TestFindLibraryTrackOrSemantics:
|
||||
def test_any_one_matching_id_is_enough(self, db):
|
||||
db.insert(title='Hello', spotify_track_id='sp1')
|
||||
result = find_library_track_by_external_id(
|
||||
db,
|
||||
external_ids={
|
||||
'spotify_id': 'sp1',
|
||||
'itunes_id': 'wrong',
|
||||
'deezer_id': 'wrong',
|
||||
},
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
def test_no_matching_id_returns_none(self, db):
|
||||
db.insert(title='Hello', spotify_track_id='sp1', itunes_track_id='it1')
|
||||
result = find_library_track_by_external_id(
|
||||
db,
|
||||
external_ids={'deezer_id': 'dz-other'},
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_empty_external_ids_returns_none(self, db):
|
||||
db.insert(title='Hello', spotify_track_id='sp1')
|
||||
assert find_library_track_by_external_id(db, external_ids={}) is None
|
||||
|
||||
|
||||
class TestFindLibraryTrackServerSourceFilter:
|
||||
def test_server_source_match(self, db):
|
||||
db.insert(title='Hello', spotify_track_id='sp1', server_source='plex')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
def test_server_source_mismatch_with_filter(self, db):
|
||||
db.insert(title='Hello', spotify_track_id='sp1', server_source='jellyfin')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
|
||||
)
|
||||
# Filter excludes jellyfin, so no match.
|
||||
assert result is None
|
||||
|
||||
def test_null_server_source_passes_filter(self, db):
|
||||
"""Older library rows may have NULL server_source — those should
|
||||
still match when a filter is applied (defensive)."""
|
||||
db.insert(title='Hello', spotify_track_id='sp1', server_source=None)
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'spotify_id': 'sp1'}, server_source='plex',
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
def test_no_filter_matches_any_server_source(self, db):
|
||||
db.insert(title='Hello', spotify_track_id='sp1', server_source='jellyfin')
|
||||
result = find_library_track_by_external_id(
|
||||
db, external_ids={'spotify_id': 'sp1'}, server_source=None,
|
||||
)
|
||||
assert result is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# EXTERNAL_ID_COLUMNS map sanity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestExternalIdColumnsMap:
|
||||
def test_every_known_id_name_has_a_column(self):
|
||||
"""If extract_external_ids ever adds a new ID name, the map needs
|
||||
a column entry too — otherwise find_library_track_by_external_id
|
||||
silently ignores it."""
|
||||
# Sample of ID names extract_external_ids can return; keep in sync.
|
||||
known_id_names = {
|
||||
'spotify_id', 'itunes_id', 'deezer_id', 'tidal_id', 'qobuz_id',
|
||||
'mbid', 'audiodb_id', 'soul_id', 'isrc',
|
||||
}
|
||||
assert set(EXTERNAL_ID_COLUMNS.keys()) == known_id_names
|
||||
|
||||
def test_column_names_are_unique(self):
|
||||
cols = list(EXTERNAL_ID_COLUMNS.values())
|
||||
assert len(cols) == len(set(cols)), \
|
||||
f"Duplicate column targets in EXTERNAL_ID_COLUMNS: {cols}"
|
||||
Loading…
Reference in new issue