From 345273df22234422458d74a0fc7cf6c392f44ce7 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Thu, 30 Apr 2026 16:35:20 -0700 Subject: [PATCH 1/2] Match soundtrack tracks against per-track artist, fix dead fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs surfacing the same user-reported symptom: a Vaiana OST track ("Where You Are" by Christopher Jackson) wouldn't match against a Plex/Emby library because the album sits under the album artist (Lin-Manuel Miranda). Bug 1: the data was already there but scoring ignored it. The DB schema has a tracks.track_artist column, the scanner populates it from Plex's originalTitle and Jellyfin's ArtistItems[0], and the SQL WHERE clause already searches it — but _rows_to_tracks dropped the column on its way to the Python object, and _calculate_track_confidence only scored against the album-artist JOIN. Candidates whose track- artist matched got returned by the search and then immediately filtered out by the low confidence score. Fix: _rows_to_tracks now propagates row['track_artist'] onto the returned object, and _calculate_track_confidence takes the better of (album-artist similarity, track-artist similarity) so soundtracks match through whichever credit the search query carries. Bug 2: the album-aware fallback path constructed DatabaseTrack with kwargs the dataclass doesn't accept (artist_name, album_title, server_source). Every row TypeError'd, the outer except swallowed it silently, and the fallback never matched anything since the column was added — invisible because nothing logged it. Fix: build DatabaseTrack with valid fields and attach the joined columns afterwards, the same pattern _rows_to_tracks uses. Adds 6 regression tests covering: track-artist match (the OST case), album-artist still matches, scorer takes the better of the two, defensive handling for tracks without track_artist, search-path attribute propagation, and the previously-dead album-aware fallback. --- database/music_database.py | 38 +++++- tests/test_track_artist_matching.py | 204 ++++++++++++++++++++++++++++ 2 files changed, 237 insertions(+), 5 deletions(-) create mode 100644 tests/test_track_artist_matching.py diff --git a/database/music_database.py b/database/music_database.py index e2d76a38..eef5fb48 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -5357,6 +5357,12 @@ class MusicDatabase: track.album_title = row['album_title'] track.album_thumb_url = row['album_thumb_url'] if 'album_thumb_url' in row.keys() else '' track.server_source = row['server_source'] if 'server_source' in row.keys() else '' + # Per-track artist (from ID3 ARTIST tag) for compilations/soundtracks where + # the track artist differs from the album artist. Used by + # _calculate_track_confidence so soundtrack tracks credited to the song's + # actual performer match correctly when the album sits under a different + # primary artist (Plex's track.originalTitle, Jellyfin's ArtistItems[0]). + track.track_artist = row['track_artist'] if 'track_artist' in row.keys() else None tracks.append(track) return tracks @@ -5541,13 +5547,22 @@ class MusicDatabase: """, params) for row in cursor.fetchall(): + # DatabaseTrack is a strict dataclass — only the declared + # fields go in __init__; the joined artist/album/server + # values are attached afterwards just like _rows_to_tracks + # does. Building it the kwarg-soup way used to raise + # TypeError on every fallback row, silently swallowed by + # the outer except, so this path never matched anything. db_track = DatabaseTrack( - id=row['id'], title=row['title'], artist_name=row['artist_name'], - album_title=row['album_title'], album_id=row['album_id'], - track_number=row['track_number'], duration=row['duration'], - file_path=row['file_path'], bitrate=row['bitrate'], - artist_id=row['artist_id'], server_source=row['server_source'] + id=row['id'], album_id=row['album_id'], artist_id=row['artist_id'], + title=row['title'], track_number=row['track_number'], + duration=row['duration'], file_path=row['file_path'], + bitrate=row['bitrate'], ) + db_track.artist_name = row['artist_name'] + db_track.album_title = row['album_title'] + db_track.server_source = row['server_source'] + db_track.track_artist = row['track_artist'] if 'track_artist' in row.keys() else None title_sim = max( self._string_similarity(self._normalize_for_comparison(title), self._normalize_for_comparison(db_track.title)), self._string_similarity(self._clean_track_title_for_comparison(title), self._clean_track_title_for_comparison(db_track.title)) @@ -6267,6 +6282,19 @@ class MusicDatabase: # Direct similarity with Unicode normalization title_similarity = self._string_similarity(search_title_norm, db_title_norm) artist_similarity = self._string_similarity(search_artist_norm, db_artist_norm) + + # Soundtracks/compilations: the album-level artist (artists.name via JOIN) + # often differs from the per-track artist (e.g. Vaiana OST is filed under + # Lin-Manuel Miranda but "Where You Are" is performed by Christopher + # Jackson). Score against tracks.track_artist too and take the better + # match so playlist sync can find these. + db_track_artist = getattr(db_track, 'track_artist', None) + if db_track_artist: + db_track_artist_norm = self._normalize_for_comparison(db_track_artist) + artist_similarity = max( + artist_similarity, + self._string_similarity(search_artist_norm, db_track_artist_norm), + ) # Also try with cleaned versions (removing parentheses, brackets, etc.) clean_search_title = self._clean_track_title_for_comparison(search_title) diff --git a/tests/test_track_artist_matching.py b/tests/test_track_artist_matching.py new file mode 100644 index 00000000..a4e6632f --- /dev/null +++ b/tests/test_track_artist_matching.py @@ -0,0 +1,204 @@ +"""Regression tests for soundtrack/compilation track-artist matching. + +The Discord-reported bug: a Vaiana OST track ("Where You Are" by +Christopher Jackson) failed to match against a Plex/Emby library +because the album's primary artist was Lin-Manuel Miranda. SoulSync's +DB stores the per-track artist in ``tracks.track_artist`` (from +Plex's ``originalTitle`` or Jellyfin's ``ArtistItems[0]``), but the +confidence scorer only compared against the album-artist JOIN and +never looked at ``track_artist``. + +These tests pin the new behaviour: + +- ``_calculate_track_confidence`` scores against ``track_artist`` too, + taking the better artist similarity, so soundtrack tracks credited + to the actual performer match. +- ``_rows_to_tracks`` propagates ``track_artist`` from row to object. +- The album-aware fallback constructs DatabaseTrack with the right + dataclass fields (it used to TypeError on every row). +""" + +import sqlite3 +import tempfile +from pathlib import Path + +import pytest + +from database.music_database import DatabaseTrack, MusicDatabase + + +@pytest.fixture +def db_with_soundtrack(tmp_path: Path): + """Build a real MusicDatabase with one OST-style row inserted by hand. + + Mirrors the Discord scenario: album artist ("Lin-Manuel Miranda") + differs from the actual performer of the track ("Christopher + Jackson"), and the per-track artist is stored in + ``tracks.track_artist``. + """ + db_path = tmp_path / "test.db" + db = MusicDatabase(database_path=str(db_path)) + + conn = db._get_connection() + cursor = conn.cursor() + + cursor.execute( + "INSERT INTO artists (id, name, server_source) VALUES (?, ?, ?)", + ("artist-1", "Lin-Manuel Miranda", "plex"), + ) + cursor.execute( + "INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, ?)", + ("album-1", "artist-1", "Vaiana (English Version/Original Motion Picture Soundtrack)", "plex"), + ) + cursor.execute( + """ + INSERT INTO tracks ( + id, album_id, artist_id, title, track_number, duration, + file_path, bitrate, server_source, track_artist + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ("track-1", "album-1", "artist-1", "Where You Are", 4, 210000, + "/music/where_you_are.mp3", 320, "plex", "Christopher Jackson"), + ) + conn.commit() + conn.close() + return db + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +def test_check_track_exists_matches_via_track_artist(db_with_soundtrack: MusicDatabase) -> None: + """The reported scenario: search by per-track performer must succeed + even when the album sits under a different primary artist.""" + track, confidence = db_with_soundtrack.check_track_exists( + title="Where You Are", + artist="Christopher Jackson", + confidence_threshold=0.8, + ) + assert track is not None, "soundtrack track should match via track_artist" + assert track.title == "Where You Are" + assert confidence >= 0.8 + + +def test_check_track_exists_still_matches_via_album_artist(db_with_soundtrack: MusicDatabase) -> None: + """Searching by the album artist must still work (regression + guard — we want to ADD a fallback, not replace the original path).""" + track, confidence = db_with_soundtrack.check_track_exists( + title="Where You Are", + artist="Lin-Manuel Miranda", + confidence_threshold=0.8, + ) + assert track is not None, "album-artist match must keep working" + assert track.title == "Where You Are" + + +def test_calculate_track_confidence_uses_better_artist_match( + db_with_soundtrack: MusicDatabase, +) -> None: + """Scorer must take the BETTER of (album-artist sim, track-artist sim).""" + track = DatabaseTrack( + id="t1", album_id="a1", artist_id="ar1", + title="Where You Are", track_number=4, duration=210000, + file_path="/x.mp3", bitrate=320, + ) + track.artist_name = "Lin-Manuel Miranda" + track.track_artist = "Christopher Jackson" + + # Search by the per-track artist scores high + track_artist_conf = db_with_soundtrack._calculate_track_confidence( + "Where You Are", "Christopher Jackson", track, + ) + # Search by the album artist also scores high + album_artist_conf = db_with_soundtrack._calculate_track_confidence( + "Where You Are", "Lin-Manuel Miranda", track, + ) + assert track_artist_conf >= 0.8 + assert album_artist_conf >= 0.8 + + +def test_calculate_track_confidence_handles_missing_track_artist( + db_with_soundtrack: MusicDatabase, +) -> None: + """Tracks without a per-track artist (the common case for non- + compilations) must keep working — the scorer must not crash on a + missing attribute and must fall through to the album-artist score.""" + track = DatabaseTrack( + id="t2", album_id="a2", artist_id="ar2", + title="Some Song", track_number=1, duration=200000, + file_path="/y.mp3", bitrate=320, + ) + track.artist_name = "Some Artist" + # Deliberately do NOT set track_artist — most rows leave it None. + conf = db_with_soundtrack._calculate_track_confidence( + "Some Song", "Some Artist", track, + ) + assert conf >= 0.8 + + +def test_search_tracks_attaches_track_artist(db_with_soundtrack: MusicDatabase) -> None: + """The search path must propagate track_artist onto returned objects + so the confidence scorer can use it. This used to be silently + dropped during row→object conversion.""" + rows = db_with_soundtrack.search_tracks( + title="Where You Are", artist="Christopher Jackson", limit=10, + ) + assert rows, "search must find the soundtrack track" + track = rows[0] + assert getattr(track, 'track_artist', None) == "Christopher Jackson" + assert track.artist_name == "Lin-Manuel Miranda" + + +def test_album_aware_fallback_actually_works(tmp_path: Path) -> None: + """The album-aware fallback path used to TypeError on every row + because DatabaseTrack(...) was called with kwargs that don't exist + on the dataclass (artist_name, album_title, server_source). Every + fallback row silently failed, so this entire branch never matched + anything since track_artist was added. + + Pin the new behaviour by forcing the main path to miss (artist + string nowhere in the row) and verifying the fallback succeeds + when an album-name hint is provided. + """ + db_path = tmp_path / "fallback_test.db" + db = MusicDatabase(database_path=str(db_path)) + + conn = db._get_connection() + cursor = conn.cursor() + cursor.execute( + "INSERT INTO artists (id, name, server_source) VALUES (?, ?, ?)", + ("ar-x", "Various Artists", "plex"), + ) + cursor.execute( + "INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, ?)", + ("al-x", "ar-x", "Awesome Mix Vol. 1", "plex"), + ) + cursor.execute( + """ + INSERT INTO tracks ( + id, album_id, artist_id, title, track_number, duration, + file_path, bitrate, server_source, track_artist + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ("tr-x", "al-x", "ar-x", "Hooked on a Feeling", 2, 175000, + "/m/hooked.mp3", 320, "plex", None), # No per-track artist set + ) + conn.commit() + conn.close() + + # Search by an artist that doesn't match either album_artist or + # track_artist. Main path will fail; album hint kicks in fallback. + track, confidence = db.check_track_exists( + title="Hooked on a Feeling", + artist="Blue Swede", # Real performer, not in the DB row + confidence_threshold=0.7, + album="Awesome Mix Vol. 1", + ) + # Fallback matches on album name + title only — the artist mismatch + # doesn't disqualify the result. Pre-fix this would have raised + # TypeError internally and returned (None, 0.0). + assert track is not None, "album-aware fallback must find the track" + assert track.title == "Hooked on a Feeling" + assert confidence >= 0.7 From f1ec62bad3552e0ac504fd1ee08a78b0ed2fae0d Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Thu, 30 Apr 2026 17:13:15 -0700 Subject: [PATCH 2/2] Add fallback negative-case test for track-artist matching Re-enabling the previously-dead album-aware fallback could in theory leak false positives if its 0.8 album-title floor were ineffective. Pin the floor with a clearly-mismatched album hint ("Disney Hits" against "Ray of Light") and assert the search returns no match. Distinct artist names with no shared words so the main path actually fails through to the fallback (the prior draft used "Different Artist" / "Real Artist" which both contain "Artist" and scored above the main path's threshold, never reaching the fallback at all). --- tests/test_track_artist_matching.py | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/test_track_artist_matching.py b/tests/test_track_artist_matching.py index a4e6632f..916eb644 100644 --- a/tests/test_track_artist_matching.py +++ b/tests/test_track_artist_matching.py @@ -151,6 +151,57 @@ def test_search_tracks_attaches_track_artist(db_with_soundtrack: MusicDatabase) assert track.artist_name == "Lin-Manuel Miranda" +def test_album_aware_fallback_does_not_over_match_wrong_album(tmp_path: Path) -> None: + """Fallback must reject when the album-name hint doesn't actually + match the row's album. Otherwise re-enabling the previously-dead + fallback would surface false positives whenever the search title + happens to exist on a different album. + + Album threshold is 0.8 — a clearly different album name like + "Some Other Album" must not pass. + """ + db_path = tmp_path / "negative_fallback.db" + db = MusicDatabase(database_path=str(db_path)) + + conn = db._get_connection() + cursor = conn.cursor() + cursor.execute( + "INSERT INTO artists (id, name, server_source) VALUES (?, ?, ?)", + ("ar-y", "Madonna", "plex"), + ) + cursor.execute( + "INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, ?)", + ("al-y", "ar-y", "Ray of Light", "plex"), + ) + cursor.execute( + """ + INSERT INTO tracks ( + id, album_id, artist_id, title, track_number, duration, + file_path, bitrate, server_source, track_artist + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ("tr-y", "al-y", "ar-y", "Frozen", 1, 200000, + "/m/frozen.mp3", 320, "plex", None), + ) + conn.commit() + conn.close() + + # Search by a clearly different artist + a totally unrelated album + # hint. Main path scores low artist similarity → falls through to + # the album-aware fallback. Fallback's 0.8 album-title floor must + # reject "Disney Hits" against "Ray of Light". + track, _ = db.check_track_exists( + title="Frozen", + artist="Idina Menzel", + confidence_threshold=0.7, + album="Disney Hits", + ) + assert track is None, ( + "fallback must reject mismatched album hints — otherwise " + "re-enabling the previously-dead path leaks false positives" + ) + + def test_album_aware_fallback_actually_works(tmp_path: Path) -> None: """The album-aware fallback path used to TypeError on every row because DatabaseTrack(...) was called with kwargs that don't exist