diff --git a/database/music_database.py b/database/music_database.py index eef5fb48..2fbb8cde 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -4944,15 +4944,29 @@ class MusicDatabase: plex_original = getattr(track_obj, 'originalTitle', None) if plex_original and plex_original.strip(): track_artist = plex_original.strip() - # Jellyfin/Emby: ArtistItems[0] is the track artist, may differ from album artist + # Jellyfin/Emby: store ALL ArtistItems, not just [0]. A track + # like "Super Single" by Artist1 feat. Artist2 has both names in + # ArtistItems; if we kept only the first, completion checks for + # Artist2's discography (where the same track also appears as a + # single) would never find this row in the library. Joining with + # "; " matches Jellyfin's own UI convention and lets the search + # path treat each name as a separate artist credit. if not track_artist and hasattr(track_obj, '_data'): raw = getattr(track_obj, '_data', {}) or {} artist_items = raw.get('ArtistItems', []) if artist_items: - jf_track_artist = artist_items[0].get('Name', '') + jf_track_artist_names = [ + a.get('Name', '') for a in artist_items if a.get('Name') + ] + jf_track_artist = '; '.join(jf_track_artist_names) album_artists = raw.get('AlbumArtists', []) jf_album_artist = album_artists[0].get('Name', '') if album_artists else '' - if jf_track_artist and jf_track_artist != jf_album_artist: + # Store when the track has multiple artists OR when the + # single-artist credit differs from the album artist. + if jf_track_artist and ( + len(jf_track_artist_names) > 1 + or jf_track_artist != jf_album_artist + ): track_artist = jf_track_artist # Navidrome/Subsonic: artist attribute is per-track if not track_artist and hasattr(track_obj, 'artist') and isinstance(getattr(track_obj, 'artist', None), str): @@ -6288,13 +6302,33 @@ class MusicDatabase: # Lin-Manuel Miranda but "Where You Are" is performed by Christopher # Jackson). Score against tracks.track_artist too and take the better # match so playlist sync can find these. + # + # Featured artists: tracks with multiple credits ("Artist1, Artist2", + # "Artist1 feat. Artist2", "Artist1 & Artist2") split on common + # delimiters and score each piece independently. Without this, a + # discography completion check for Artist2 would miss a track stored + # in the library under Artist1's album with a "feat. Artist2" credit. db_track_artist = getattr(db_track, 'track_artist', None) if db_track_artist: db_track_artist_norm = self._normalize_for_comparison(db_track_artist) - artist_similarity = max( - artist_similarity, - self._string_similarity(search_artist_norm, db_track_artist_norm), + # Whole-string similarity first as the floor. + track_artist_sim = self._string_similarity(search_artist_norm, db_track_artist_norm) + # Then split on multi-artist delimiters and score each piece — + # Spotify's "feat.", "ft.", commas, semicolons, ampersands, and + # "x" between names all show up here in real-world tags. + pieces = re.split( + r'\s*(?:[;,&]|\bfeat\.?\b|\bft\.?\b|\bfeaturing\b|\bvs\.?\b|\bx\b)\s*', + db_track_artist_norm, + flags=re.IGNORECASE, ) + for piece in pieces: + piece = piece.strip() + if not piece: + continue + piece_sim = self._string_similarity(search_artist_norm, piece) + if piece_sim > track_artist_sim: + track_artist_sim = piece_sim + artist_similarity = max(artist_similarity, track_artist_sim) # Also try with cleaned versions (removing parentheses, brackets, etc.) clean_search_title = self._clean_track_title_for_comparison(search_title) diff --git a/tests/test_featured_artist_completion.py b/tests/test_featured_artist_completion.py new file mode 100644 index 00000000..d114d6bc --- /dev/null +++ b/tests/test_featured_artist_completion.py @@ -0,0 +1,206 @@ +"""Regression tests for featured-artist track matching. + +Discord-reported scenario: a single "super single" by Artist1 feat. +Artist2 also appears on the album "super album" (Artist1). When the +album is fully owned, Artist1's discography shows the single as +complete, but Artist2's discography (which lists the same track as +their own single) shows it as missing — even though the same +recording exists in the library under Artist1's album. + +Two layers of fix pinned by these tests: + +- Scanner: store ALL Jellyfin/Emby ArtistItems in tracks.track_artist + (joined with "; "), not just ArtistItems[0]. The first artist + often equals the album artist and used to suppress the row. +- Scoring: split track_artist on common multi-artist delimiters + (",", ";", "&", "feat.", "ft.", "featuring", "vs.", "x") and + score each piece independently against the search artist. +""" + +import sqlite3 +from pathlib import Path + +import pytest + +from database.music_database import DatabaseTrack, MusicDatabase + + +@pytest.fixture +def db_with_feat_track(tmp_path: Path): + """Build a real MusicDatabase with the featured-artist scenario. + + "Super Single" by "Artist1, Artist2" stored under Artist1's + album. Mirrors what the Jellyfin scanner now writes when a + track has multiple ArtistItems. + """ + db_path = tmp_path / "feat.db" + db = MusicDatabase(database_path=str(db_path)) + conn = db._get_connection() + cursor = conn.cursor() + cursor.execute( + "INSERT INTO artists (id, name, server_source) VALUES (?, ?, ?)", + ("ar-1", "Artist1", "jellyfin"), + ) + cursor.execute( + "INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, ?)", + ("al-1", "ar-1", "Super Album", "jellyfin"), + ) + cursor.execute( + """ + INSERT INTO tracks ( + id, album_id, artist_id, title, track_number, duration, + file_path, bitrate, server_source, track_artist + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ("tr-1", "al-1", "ar-1", "Super Single", 3, 200000, + "/m/super.mp3", 320, "jellyfin", "Artist1; Artist2"), + ) + conn.commit() + conn.close() + return db + + +# --------------------------------------------------------------------------- +# Scoring: featured artist matches via split +# --------------------------------------------------------------------------- + + +def test_featured_artist_matches_via_track_artist_split(db_with_feat_track: MusicDatabase) -> None: + """The reported scenario: searching for the featured artist + (Artist2) finds the track stored under the primary artist's + album because track_artist contains both names.""" + track, confidence = db_with_feat_track.check_track_exists( + title="Super Single", + artist="Artist2", + confidence_threshold=0.7, + ) + assert track is not None + assert confidence >= 0.7 + + +def test_primary_artist_still_matches(db_with_feat_track: MusicDatabase) -> None: + """Forward compat: searching for the primary artist must still + work — the original album-artist match path is preserved.""" + track, confidence = db_with_feat_track.check_track_exists( + title="Super Single", + artist="Artist1", + confidence_threshold=0.7, + ) + assert track is not None + assert confidence >= 0.7 + + +@pytest.mark.parametrize("track_artist_value", [ + "Artist1, Artist2", + "Artist1; Artist2", + "Artist1 & Artist2", + "Artist1 feat. Artist2", + "Artist1 ft. Artist2", + "Artist1 featuring Artist2", + "Artist1 vs. Artist2", + "Artist1 x Artist2", +]) +def test_scoring_handles_common_multi_artist_separators( + db_with_feat_track: MusicDatabase, track_artist_value: str, +) -> None: + """Score must find the featured artist regardless of which + delimiter the metadata source / tag uses.""" + track = DatabaseTrack( + id="x", album_id="y", artist_id="z", + title="Super Single", track_number=1, duration=200000, + file_path="/m/x.mp3", bitrate=320, + ) + track.artist_name = "Artist1" + track.track_artist = track_artist_value + conf = db_with_feat_track._calculate_track_confidence( + "Super Single", "Artist2", track, + ) + assert conf >= 0.7, ( + f"separator '{track_artist_value}' should still let Artist2 match" + ) + + +def test_split_does_not_inflate_score_beyond_whole_string_floor( + db_with_feat_track: MusicDatabase, +) -> None: + """Splitting must only ADD to the score (best-of), never pull it + below the whole-string baseline. Same artist on both sides should + score 1.0 the same way it always did, with or without delimiters.""" + track = DatabaseTrack( + id="x", album_id="y", artist_id="z", + title="Solo Song", track_number=1, duration=200000, + file_path="/m/x.mp3", bitrate=320, + ) + track.artist_name = "Solo Artist" + track.track_artist = "Solo Artist" # No delimiters at all + conf = db_with_feat_track._calculate_track_confidence( + "Solo Song", "Solo Artist", track, + ) + assert conf >= 0.99, "exact-match score must not regress" + + +# --------------------------------------------------------------------------- +# Scanner: Jellyfin ArtistItems propagation +# --------------------------------------------------------------------------- + + +class _StubJellyfinTrack: + """Minimal stub mimicking JellyfinTrack: real attributes the scanner + reads (ratingKey, title, trackNumber, duration, path, bitRate) plus + the ``_data`` raw dict where ArtistItems live.""" + def __init__(self, track_id, title, track_artists, album_artist, + track_number=1, duration=200000, file_path="/m/x.mp3", + bit_rate=320): + self.ratingKey = track_id + self.title = title + self.trackNumber = track_number + self.duration = duration + self.path = file_path + self.bitRate = bit_rate + self._data = { + 'ArtistItems': [{'Name': n} for n in track_artists], + 'AlbumArtists': [{'Name': album_artist}], + } + + +def test_jellyfin_scanner_stores_all_track_artists(tmp_path: Path) -> None: + """The scanner must persist EVERY name from ArtistItems, not just + the first. Pre-fix the scanner kept only [0] which was usually + equal to the album artist, so nothing distinguishing was stored. + """ + db = MusicDatabase(database_path=str(tmp_path / "scan.db")) + conn = db._get_connection() + cursor = conn.cursor() + + # Seed the artist + album the track will hang off + cursor.execute( + "INSERT INTO artists (id, name, server_source) VALUES (?, ?, ?)", + ("ar-1", "Artist1", "jellyfin"), + ) + cursor.execute( + "INSERT INTO albums (id, artist_id, title, server_source) VALUES (?, ?, ?, ?)", + ("al-1", "ar-1", "Super Album", "jellyfin"), + ) + conn.commit() + conn.close() + + track_obj = _StubJellyfinTrack( + track_id="tr-1", + title="Super Single", + track_artists=["Artist1", "Artist2"], + album_artist="Artist1", + ) + db.insert_or_update_media_track( + track_obj, album_id="al-1", artist_id="ar-1", server_source="jellyfin", + ) + + conn = db._get_connection() + cursor = conn.cursor() + cursor.execute("SELECT track_artist FROM tracks WHERE id = ?", ("tr-1",)) + row = cursor.fetchone() + conn.close() + assert row is not None + assert row[0] is not None, "scanner should not drop multi-artist track credits" + assert "Artist2" in row[0], ( + f"track_artist must contain every ArtistItem — got {row[0]!r}" + )