mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
359 lines
14 KiB
359 lines
14 KiB
"""Regression tests for the post-processing → provenance → tracks ID flow.
|
|
|
|
Companion to test_library_track_identity.py. The watchlist external-ID
|
|
match (PR #470) closed the demand side: when the watchlist asks "do we
|
|
have this track?", it queries by Spotify/iTunes/Deezer/etc. IDs before
|
|
falling back to fuzzy. But for users on Plex / Jellyfin / Navidrome,
|
|
the ``tracks.spotify_track_id`` column only gets populated by
|
|
asynchronous enrichment workers — sometimes hours after the file is
|
|
written. During that window the ID match falls through to fuzzy and
|
|
the bug returns.
|
|
|
|
This PR closes the supply side: the IDs we already collect at
|
|
post-processing time get persisted to ``track_downloads``, and the
|
|
media-server sync code copies them onto the new ``tracks`` row
|
|
immediately. These tests pin:
|
|
|
|
1. Schema migration adds the new ID columns + indexes
|
|
2. ``record_track_download`` accepts and persists the new kwargs
|
|
3. ``get_provenance_by_file_path`` finds rows by exact + suffix match
|
|
4. ``backfill_track_external_ids_from_provenance`` copies IDs onto a
|
|
tracks row idempotently (COALESCE — preserves existing values)
|
|
5. ``find_provenance_by_external_id`` queries the new columns
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sqlite3
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Any, Dict
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def db_path(tmp_path: Path):
|
|
return tmp_path / "test_music.db"
|
|
|
|
|
|
@pytest.fixture
|
|
def db(db_path: Path, monkeypatch):
|
|
"""Real MusicDatabase against a tmp SQLite file so the schema
|
|
migration runs end-to-end (validates the ALTER TABLE additions)."""
|
|
monkeypatch.setenv('DATABASE_PATH', str(db_path))
|
|
# MusicDatabase is heavy; isolate to a fresh import each test so
|
|
# other tests don't get our env-var pollution.
|
|
import importlib
|
|
import database.music_database as music_db_module
|
|
importlib.reload(music_db_module)
|
|
db = music_db_module.MusicDatabase(str(db_path))
|
|
yield db
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Schema migration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestSchemaMigration:
|
|
def test_track_downloads_has_new_external_id_columns(self, db):
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("PRAGMA table_info(track_downloads)")
|
|
cols = {row[1] for row in cursor.fetchall()}
|
|
|
|
assert 'spotify_track_id' in cols
|
|
assert 'itunes_track_id' in cols
|
|
assert 'deezer_track_id' in cols
|
|
assert 'tidal_track_id' in cols
|
|
assert 'qobuz_track_id' in cols
|
|
assert 'musicbrainz_recording_id' in cols
|
|
assert 'audiodb_id' in cols
|
|
assert 'soul_id' in cols
|
|
assert 'isrc' in cols
|
|
|
|
def test_track_downloads_has_external_id_indexes(self, db):
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='track_downloads'"
|
|
)
|
|
idx_names = {row[0] for row in cursor.fetchall()}
|
|
|
|
assert 'idx_td_spotify_id' in idx_names
|
|
assert 'idx_td_itunes_id' in idx_names
|
|
assert 'idx_td_deezer_id' in idx_names
|
|
assert 'idx_td_isrc' in idx_names
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# record_track_download persists IDs
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestRecordTrackDownloadPersistsIds:
|
|
def test_persists_all_external_ids(self, db):
|
|
rec_id = db.record_track_download(
|
|
file_path='/lib/Artist/Album/Track.mp3',
|
|
source_service='soulseek',
|
|
source_username='user1',
|
|
source_filename='Track.mp3',
|
|
track_title='Track',
|
|
spotify_track_id='sp1',
|
|
itunes_track_id='it1',
|
|
deezer_track_id='dz1',
|
|
tidal_track_id='td1',
|
|
qobuz_track_id='qb1',
|
|
musicbrainz_recording_id='mb-uuid-1',
|
|
audiodb_id='adb1',
|
|
soul_id='hyd-soul-1',
|
|
isrc='USRC17607839',
|
|
)
|
|
assert rec_id is not None
|
|
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT spotify_track_id, itunes_track_id, deezer_track_id, "
|
|
"tidal_track_id, qobuz_track_id, musicbrainz_recording_id, "
|
|
"audiodb_id, soul_id, isrc FROM track_downloads WHERE id = ?",
|
|
(rec_id,),
|
|
)
|
|
row = tuple(cursor.fetchone())
|
|
assert row == (
|
|
'sp1', 'it1', 'dz1', 'td1', 'qb1', 'mb-uuid-1',
|
|
'adb1', 'hyd-soul-1', 'USRC17607839',
|
|
)
|
|
|
|
def test_omitted_ids_persist_as_null(self, db):
|
|
"""Backward compat — callers that don't pass the new kwargs
|
|
still work, columns just stay NULL."""
|
|
rec_id = db.record_track_download(
|
|
file_path='/lib/Artist/Album/Track.mp3',
|
|
source_service='soulseek',
|
|
source_username='user1',
|
|
source_filename='Track.mp3',
|
|
track_title='Track',
|
|
)
|
|
assert rec_id is not None
|
|
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT spotify_track_id FROM track_downloads WHERE id = ?", (rec_id,))
|
|
assert cursor.fetchone()[0] is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# get_provenance_by_file_path
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestGetProvenanceByFilePath:
|
|
def test_exact_match(self, db):
|
|
db.record_track_download(
|
|
file_path='/lib/Artist/Album/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
spotify_track_id='sp1',
|
|
)
|
|
result = db.get_provenance_by_file_path('/lib/Artist/Album/Track.mp3')
|
|
assert result is not None
|
|
assert result['spotify_track_id'] == 'sp1'
|
|
|
|
def test_returns_none_when_no_match(self, db):
|
|
result = db.get_provenance_by_file_path('/nonexistent/path.mp3')
|
|
assert result is None
|
|
|
|
def test_returns_none_for_empty_path(self, db):
|
|
assert db.get_provenance_by_file_path('') is None
|
|
assert db.get_provenance_by_file_path(None) is None
|
|
|
|
def test_basename_suffix_fallback(self, db):
|
|
"""Recorded path differs from queried path by mount root —
|
|
common when SoulSync container writes under /app/Transfer
|
|
but Plex container reports the same file as /media/Music."""
|
|
db.record_track_download(
|
|
file_path='/app/Transfer/Artist/Album/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
spotify_track_id='sp1',
|
|
)
|
|
result = db.get_provenance_by_file_path('/media/Music/Artist/Album/Track.mp3')
|
|
assert result is not None
|
|
assert result['spotify_track_id'] == 'sp1'
|
|
|
|
def test_returns_most_recent_when_multiple(self, db):
|
|
"""Same file_path can have multiple download records (re-downloads,
|
|
retries). Most recent wins."""
|
|
db.record_track_download(
|
|
file_path='/lib/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
spotify_track_id='sp-old',
|
|
)
|
|
db.record_track_download(
|
|
file_path='/lib/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
spotify_track_id='sp-new',
|
|
)
|
|
result = db.get_provenance_by_file_path('/lib/Track.mp3')
|
|
assert result['spotify_track_id'] == 'sp-new'
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# backfill_track_external_ids_from_provenance
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestBackfillTrackExternalIdsFromProvenance:
|
|
def _seed_artist_album_and_track(self, db, *, track_id, file_path):
|
|
"""Insert a minimal artists/albums/tracks chain so backfill has
|
|
a row to update."""
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"INSERT INTO artists (id, name, server_source) VALUES (?, ?, 'plex')",
|
|
('artist-1', 'Test Artist'),
|
|
)
|
|
cursor.execute(
|
|
"INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, 'plex')",
|
|
('album-1', 'artist-1', 'Test Album'),
|
|
)
|
|
cursor.execute(
|
|
"INSERT INTO tracks (id, album_id, artist_id, title, file_path, server_source) "
|
|
"VALUES (?, ?, ?, ?, ?, 'plex')",
|
|
(track_id, 'album-1', 'artist-1', 'Test Track', file_path),
|
|
)
|
|
conn.commit()
|
|
|
|
def test_copies_all_ids_when_tracks_columns_empty(self, db):
|
|
self._seed_artist_album_and_track(db, track_id='t1', file_path='/lib/Track.mp3')
|
|
db.record_track_download(
|
|
file_path='/lib/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
spotify_track_id='sp1',
|
|
deezer_track_id='dz1',
|
|
isrc='USRC17607839',
|
|
)
|
|
|
|
updated = db.backfill_track_external_ids_from_provenance('t1', '/lib/Track.mp3')
|
|
assert updated > 0
|
|
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT spotify_track_id, deezer_id, isrc FROM tracks WHERE id = ?",
|
|
('t1',),
|
|
)
|
|
assert tuple(cursor.fetchone()) == ('sp1', 'dz1', 'USRC17607839')
|
|
|
|
def test_preserves_existing_ids(self, db):
|
|
"""COALESCE-update — if the enrichment worker already wrote a
|
|
spotify_track_id, the provenance backfill must NOT overwrite it
|
|
(enrichment is generally more authoritative for late binding)."""
|
|
self._seed_artist_album_and_track(db, track_id='t1', file_path='/lib/Track.mp3')
|
|
|
|
# Pre-populate spotify_track_id with the enrichment-worker value
|
|
conn = db._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("UPDATE tracks SET spotify_track_id = 'sp-from-enrichment' WHERE id = ?", ('t1',))
|
|
conn.commit()
|
|
|
|
# Provenance has a different value
|
|
db.record_track_download(
|
|
file_path='/lib/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
spotify_track_id='sp-from-provenance',
|
|
deezer_track_id='dz1', # This one IS missing on tracks, should backfill
|
|
)
|
|
|
|
db.backfill_track_external_ids_from_provenance('t1', '/lib/Track.mp3')
|
|
|
|
cursor.execute("SELECT spotify_track_id, deezer_id FROM tracks WHERE id = ?", ('t1',))
|
|
row = cursor.fetchone()
|
|
assert row[0] == 'sp-from-enrichment', "Existing spotify_track_id must be preserved"
|
|
assert row[1] == 'dz1', "Empty deezer_id should be filled from provenance"
|
|
|
|
def test_returns_zero_when_no_provenance(self, db):
|
|
self._seed_artist_album_and_track(db, track_id='t1', file_path='/lib/Track.mp3')
|
|
# No record_track_download call — no provenance row exists
|
|
updated = db.backfill_track_external_ids_from_provenance('t1', '/lib/Track.mp3')
|
|
assert updated == 0
|
|
|
|
def test_returns_zero_for_empty_inputs(self, db):
|
|
assert db.backfill_track_external_ids_from_provenance(None, '/lib/Track.mp3') == 0
|
|
assert db.backfill_track_external_ids_from_provenance('t1', None) == 0
|
|
assert db.backfill_track_external_ids_from_provenance('t1', '') == 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# find_provenance_by_external_id
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFindProvenanceByExternalId:
|
|
def test_match_by_spotify_id(self, db):
|
|
db.record_track_download(
|
|
file_path='/lib/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
spotify_track_id='sp1',
|
|
)
|
|
|
|
from core.library.track_identity import find_provenance_by_external_id
|
|
result = find_provenance_by_external_id(db, external_ids={'spotify_id': 'sp1'})
|
|
assert result is not None
|
|
assert result['file_path'] == '/lib/Track.mp3'
|
|
assert result['spotify_track_id'] == 'sp1'
|
|
|
|
def test_match_by_isrc(self, db):
|
|
db.record_track_download(
|
|
file_path='/lib/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
isrc='USRC17607839',
|
|
)
|
|
|
|
from core.library.track_identity import find_provenance_by_external_id
|
|
result = find_provenance_by_external_id(db, external_ids={'isrc': 'USRC17607839'})
|
|
assert result is not None
|
|
|
|
def test_returns_none_when_no_match(self, db):
|
|
from core.library.track_identity import find_provenance_by_external_id
|
|
result = find_provenance_by_external_id(db, external_ids={'spotify_id': 'sp-other'})
|
|
assert result is None
|
|
|
|
def test_returns_none_for_empty_external_ids(self, db):
|
|
from core.library.track_identity import find_provenance_by_external_id
|
|
assert find_provenance_by_external_id(db, external_ids={}) is None
|
|
|
|
def test_returns_most_recent_when_multiple_matches(self, db):
|
|
"""Re-downloads create multiple rows. Newest wins."""
|
|
db.record_track_download(
|
|
file_path='/lib/Track-v1.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
spotify_track_id='sp1',
|
|
)
|
|
db.record_track_download(
|
|
file_path='/lib/Track-v2.mp3',
|
|
source_service='tidal', source_username='tidal', source_filename='Track.flac',
|
|
spotify_track_id='sp1',
|
|
)
|
|
|
|
from core.library.track_identity import find_provenance_by_external_id
|
|
result = find_provenance_by_external_id(db, external_ids={'spotify_id': 'sp1'})
|
|
assert result['file_path'] == '/lib/Track-v2.mp3'
|
|
|
|
def test_or_semantics_across_id_types(self, db):
|
|
"""Provenance has only ISRC; source asks with multiple IDs incl. ISRC.
|
|
Match should fire on ISRC."""
|
|
db.record_track_download(
|
|
file_path='/lib/Track.mp3',
|
|
source_service='soulseek', source_username='u', source_filename='Track.mp3',
|
|
isrc='USRC17607839',
|
|
)
|
|
|
|
from core.library.track_identity import find_provenance_by_external_id
|
|
result = find_provenance_by_external_id(db, external_ids={
|
|
'spotify_id': 'sp-mismatch',
|
|
'isrc': 'USRC17607839',
|
|
})
|
|
assert result is not None
|