Merge pull request #451 from Nezreka/fix/match-against-track-artist

Match soundtrack tracks against per-track artist, fix dead fallback
pull/452/head
BoulderBadgeDad 3 weeks ago committed by GitHub
commit cbafd07009
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -5357,6 +5357,12 @@ class MusicDatabase:
track.album_title = row['album_title']
track.album_thumb_url = row['album_thumb_url'] if 'album_thumb_url' in row.keys() else ''
track.server_source = row['server_source'] if 'server_source' in row.keys() else ''
# Per-track artist (from ID3 ARTIST tag) for compilations/soundtracks where
# the track artist differs from the album artist. Used by
# _calculate_track_confidence so soundtrack tracks credited to the song's
# actual performer match correctly when the album sits under a different
# primary artist (Plex's track.originalTitle, Jellyfin's ArtistItems[0]).
track.track_artist = row['track_artist'] if 'track_artist' in row.keys() else None
tracks.append(track)
return tracks
@ -5541,13 +5547,22 @@ class MusicDatabase:
""", params)
for row in cursor.fetchall():
# DatabaseTrack is a strict dataclass — only the declared
# fields go in __init__; the joined artist/album/server
# values are attached afterwards just like _rows_to_tracks
# does. Building it the kwarg-soup way used to raise
# TypeError on every fallback row, silently swallowed by
# the outer except, so this path never matched anything.
db_track = DatabaseTrack(
id=row['id'], title=row['title'], artist_name=row['artist_name'],
album_title=row['album_title'], album_id=row['album_id'],
track_number=row['track_number'], duration=row['duration'],
file_path=row['file_path'], bitrate=row['bitrate'],
artist_id=row['artist_id'], server_source=row['server_source']
id=row['id'], album_id=row['album_id'], artist_id=row['artist_id'],
title=row['title'], track_number=row['track_number'],
duration=row['duration'], file_path=row['file_path'],
bitrate=row['bitrate'],
)
db_track.artist_name = row['artist_name']
db_track.album_title = row['album_title']
db_track.server_source = row['server_source']
db_track.track_artist = row['track_artist'] if 'track_artist' in row.keys() else None
title_sim = max(
self._string_similarity(self._normalize_for_comparison(title), self._normalize_for_comparison(db_track.title)),
self._string_similarity(self._clean_track_title_for_comparison(title), self._clean_track_title_for_comparison(db_track.title))
@ -6267,6 +6282,19 @@ class MusicDatabase:
# Direct similarity with Unicode normalization
title_similarity = self._string_similarity(search_title_norm, db_title_norm)
artist_similarity = self._string_similarity(search_artist_norm, db_artist_norm)
# Soundtracks/compilations: the album-level artist (artists.name via JOIN)
# often differs from the per-track artist (e.g. Vaiana OST is filed under
# Lin-Manuel Miranda but "Where You Are" is performed by Christopher
# Jackson). Score against tracks.track_artist too and take the better
# match so playlist sync can find these.
db_track_artist = getattr(db_track, 'track_artist', None)
if db_track_artist:
db_track_artist_norm = self._normalize_for_comparison(db_track_artist)
artist_similarity = max(
artist_similarity,
self._string_similarity(search_artist_norm, db_track_artist_norm),
)
# Also try with cleaned versions (removing parentheses, brackets, etc.)
clean_search_title = self._clean_track_title_for_comparison(search_title)

@ -0,0 +1,255 @@
"""Regression tests for soundtrack/compilation track-artist matching.
The Discord-reported bug: a Vaiana OST track ("Where You Are" by
Christopher Jackson) failed to match against a Plex/Emby library
because the album's primary artist was Lin-Manuel Miranda. SoulSync's
DB stores the per-track artist in ``tracks.track_artist`` (from
Plex's ``originalTitle`` or Jellyfin's ``ArtistItems[0]``), but the
confidence scorer only compared against the album-artist JOIN and
never looked at ``track_artist``.
These tests pin the new behaviour:
- ``_calculate_track_confidence`` scores against ``track_artist`` too,
taking the better artist similarity, so soundtrack tracks credited
to the actual performer match.
- ``_rows_to_tracks`` propagates ``track_artist`` from row to object.
- The album-aware fallback constructs DatabaseTrack with the right
dataclass fields (it used to TypeError on every row).
"""
import sqlite3
import tempfile
from pathlib import Path
import pytest
from database.music_database import DatabaseTrack, MusicDatabase
@pytest.fixture
def db_with_soundtrack(tmp_path: Path):
"""Build a real MusicDatabase with one OST-style row inserted by hand.
Mirrors the Discord scenario: album artist ("Lin-Manuel Miranda")
differs from the actual performer of the track ("Christopher
Jackson"), and the per-track artist is stored in
``tracks.track_artist``.
"""
db_path = tmp_path / "test.db"
db = MusicDatabase(database_path=str(db_path))
conn = db._get_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO artists (id, name, server_source) VALUES (?, ?, ?)",
("artist-1", "Lin-Manuel Miranda", "plex"),
)
cursor.execute(
"INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, ?)",
("album-1", "artist-1", "Vaiana (English Version/Original Motion Picture Soundtrack)", "plex"),
)
cursor.execute(
"""
INSERT INTO tracks (
id, album_id, artist_id, title, track_number, duration,
file_path, bitrate, server_source, track_artist
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
("track-1", "album-1", "artist-1", "Where You Are", 4, 210000,
"/music/where_you_are.mp3", 320, "plex", "Christopher Jackson"),
)
conn.commit()
conn.close()
return db
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_check_track_exists_matches_via_track_artist(db_with_soundtrack: MusicDatabase) -> None:
"""The reported scenario: search by per-track performer must succeed
even when the album sits under a different primary artist."""
track, confidence = db_with_soundtrack.check_track_exists(
title="Where You Are",
artist="Christopher Jackson",
confidence_threshold=0.8,
)
assert track is not None, "soundtrack track should match via track_artist"
assert track.title == "Where You Are"
assert confidence >= 0.8
def test_check_track_exists_still_matches_via_album_artist(db_with_soundtrack: MusicDatabase) -> None:
"""Searching by the album artist must still work (regression
guard we want to ADD a fallback, not replace the original path)."""
track, confidence = db_with_soundtrack.check_track_exists(
title="Where You Are",
artist="Lin-Manuel Miranda",
confidence_threshold=0.8,
)
assert track is not None, "album-artist match must keep working"
assert track.title == "Where You Are"
def test_calculate_track_confidence_uses_better_artist_match(
db_with_soundtrack: MusicDatabase,
) -> None:
"""Scorer must take the BETTER of (album-artist sim, track-artist sim)."""
track = DatabaseTrack(
id="t1", album_id="a1", artist_id="ar1",
title="Where You Are", track_number=4, duration=210000,
file_path="/x.mp3", bitrate=320,
)
track.artist_name = "Lin-Manuel Miranda"
track.track_artist = "Christopher Jackson"
# Search by the per-track artist scores high
track_artist_conf = db_with_soundtrack._calculate_track_confidence(
"Where You Are", "Christopher Jackson", track,
)
# Search by the album artist also scores high
album_artist_conf = db_with_soundtrack._calculate_track_confidence(
"Where You Are", "Lin-Manuel Miranda", track,
)
assert track_artist_conf >= 0.8
assert album_artist_conf >= 0.8
def test_calculate_track_confidence_handles_missing_track_artist(
db_with_soundtrack: MusicDatabase,
) -> None:
"""Tracks without a per-track artist (the common case for non-
compilations) must keep working the scorer must not crash on a
missing attribute and must fall through to the album-artist score."""
track = DatabaseTrack(
id="t2", album_id="a2", artist_id="ar2",
title="Some Song", track_number=1, duration=200000,
file_path="/y.mp3", bitrate=320,
)
track.artist_name = "Some Artist"
# Deliberately do NOT set track_artist — most rows leave it None.
conf = db_with_soundtrack._calculate_track_confidence(
"Some Song", "Some Artist", track,
)
assert conf >= 0.8
def test_search_tracks_attaches_track_artist(db_with_soundtrack: MusicDatabase) -> None:
"""The search path must propagate track_artist onto returned objects
so the confidence scorer can use it. This used to be silently
dropped during rowobject conversion."""
rows = db_with_soundtrack.search_tracks(
title="Where You Are", artist="Christopher Jackson", limit=10,
)
assert rows, "search must find the soundtrack track"
track = rows[0]
assert getattr(track, 'track_artist', None) == "Christopher Jackson"
assert track.artist_name == "Lin-Manuel Miranda"
def test_album_aware_fallback_does_not_over_match_wrong_album(tmp_path: Path) -> None:
"""Fallback must reject when the album-name hint doesn't actually
match the row's album. Otherwise re-enabling the previously-dead
fallback would surface false positives whenever the search title
happens to exist on a different album.
Album threshold is 0.8 a clearly different album name like
"Some Other Album" must not pass.
"""
db_path = tmp_path / "negative_fallback.db"
db = MusicDatabase(database_path=str(db_path))
conn = db._get_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO artists (id, name, server_source) VALUES (?, ?, ?)",
("ar-y", "Madonna", "plex"),
)
cursor.execute(
"INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, ?)",
("al-y", "ar-y", "Ray of Light", "plex"),
)
cursor.execute(
"""
INSERT INTO tracks (
id, album_id, artist_id, title, track_number, duration,
file_path, bitrate, server_source, track_artist
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
("tr-y", "al-y", "ar-y", "Frozen", 1, 200000,
"/m/frozen.mp3", 320, "plex", None),
)
conn.commit()
conn.close()
# Search by a clearly different artist + a totally unrelated album
# hint. Main path scores low artist similarity → falls through to
# the album-aware fallback. Fallback's 0.8 album-title floor must
# reject "Disney Hits" against "Ray of Light".
track, _ = db.check_track_exists(
title="Frozen",
artist="Idina Menzel",
confidence_threshold=0.7,
album="Disney Hits",
)
assert track is None, (
"fallback must reject mismatched album hints — otherwise "
"re-enabling the previously-dead path leaks false positives"
)
def test_album_aware_fallback_actually_works(tmp_path: Path) -> None:
"""The album-aware fallback path used to TypeError on every row
because DatabaseTrack(...) was called with kwargs that don't exist
on the dataclass (artist_name, album_title, server_source). Every
fallback row silently failed, so this entire branch never matched
anything since track_artist was added.
Pin the new behaviour by forcing the main path to miss (artist
string nowhere in the row) and verifying the fallback succeeds
when an album-name hint is provided.
"""
db_path = tmp_path / "fallback_test.db"
db = MusicDatabase(database_path=str(db_path))
conn = db._get_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO artists (id, name, server_source) VALUES (?, ?, ?)",
("ar-x", "Various Artists", "plex"),
)
cursor.execute(
"INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, ?)",
("al-x", "ar-x", "Awesome Mix Vol. 1", "plex"),
)
cursor.execute(
"""
INSERT INTO tracks (
id, album_id, artist_id, title, track_number, duration,
file_path, bitrate, server_source, track_artist
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
("tr-x", "al-x", "ar-x", "Hooked on a Feeling", 2, 175000,
"/m/hooked.mp3", 320, "plex", None), # No per-track artist set
)
conn.commit()
conn.close()
# Search by an artist that doesn't match either album_artist or
# track_artist. Main path will fail; album hint kicks in fallback.
track, confidence = db.check_track_exists(
title="Hooked on a Feeling",
artist="Blue Swede", # Real performer, not in the DB row
confidence_threshold=0.7,
album="Awesome Mix Vol. 1",
)
# Fallback matches on album name + title only — the artist mismatch
# doesn't disqualify the result. Pre-fix this would have raised
# TypeError internally and returned (None, 0.0).
assert track is not None, "album-aware fallback must find the track"
assert track.title == "Hooked on a Feeling"
assert confidence >= 0.7
Loading…
Cancel
Save