From 6bf337423d187a7afdc7e1806bb967c720163ce3 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Tue, 17 Mar 2026 15:18:28 -0700 Subject: [PATCH] Prefer album versions over singles when matching tracks to metadata sources Add album_type and total_tracks fields to Track dataclass, populate from Spotify/iTunes/Deezer API responses, and apply a small tiebreaker bonus (+0.02 for albums, +0.01 for EPs) in all matching loops so album versions win when confidence scores are otherwise equal. --- core/deezer_client.py | 13 +++++++++++++ core/itunes_client.py | 15 ++++++++++++++- core/spotify_client.py | 4 +++- web_server.py | 34 ++++++++++++++++++++++++++++++---- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/core/deezer_client.py b/core/deezer_client.py index 1e73fc9b..e9cb8806 100644 --- a/core/deezer_client.py +++ b/core/deezer_client.py @@ -83,6 +83,17 @@ class Track: if track_data.get('link'): external_urls['deezer'] = track_data['link'] + # Deezer search doesn't return album_type directly; infer if nb_tracks available + nb_tracks = album_data.get('nb_tracks') if isinstance(album_data, dict) else None + album_type = track_data.get('type') # Deezer sometimes returns 'album'/'single' + if not album_type and nb_tracks: + if nb_tracks <= 3: + album_type = 'single' + elif nb_tracks <= 6: + album_type = 'ep' + else: + album_type = 'album' + return cls( id=str(track_data.get('id', '')), name=track_data.get('title', ''), @@ -96,6 +107,8 @@ class Track: release_date=track_data.get('release_date') or (album_data.get('release_date') if isinstance(album_data, dict) else None), track_number=track_data.get('track_position'), disc_number=track_data.get('disk_number', 1), + album_type=album_type, + total_tracks=nb_tracks, ) diff --git a/core/itunes_client.py b/core/itunes_client.py index e44f2d31..1d4ba022 100644 --- a/core/itunes_client.py +++ b/core/itunes_client.py @@ -72,6 +72,8 @@ class Track: release_date: Optional[str] = None track_number: Optional[int] = None disc_number: Optional[int] = None + album_type: Optional[str] = None + total_tracks: Optional[int] = None @classmethod def from_itunes_track(cls, track_data: Dict[str, Any], clean_artist_name: Optional[str] = None) -> 'Track': @@ -92,6 +94,15 @@ class Track: if 'trackViewUrl' in track_data: external_urls['itunes'] = track_data['trackViewUrl'] + # Infer album type from track count + track_count = track_data.get('trackCount', 0) + if track_count <= 3: + album_type = 'single' + elif track_count <= 6: + album_type = 'ep' + else: + album_type = 'album' + return cls( id=str(track_data.get('trackId', '')), name=track_data.get('trackName', ''), @@ -102,7 +113,9 @@ class Track: preview_url=track_data.get('previewUrl'), external_urls=external_urls if external_urls else None, image_url=album_image_url, - release_date=track_data.get('releaseDate', '').split('T')[0] if track_data.get('releaseDate') else None + release_date=track_data.get('releaseDate', '').split('T')[0] if track_data.get('releaseDate') else None, + album_type=album_type, + total_tracks=track_count or None ) @dataclass diff --git a/core/spotify_client.py b/core/spotify_client.py index 3320b889..c0d1f69c 100644 --- a/core/spotify_client.py +++ b/core/spotify_client.py @@ -317,7 +317,9 @@ class Track: preview_url=track_data.get('preview_url'), external_urls=track_data.get('external_urls'), image_url=album_image_url, - release_date=track_data.get('album', {}).get('release_date') + release_date=track_data.get('album', {}).get('release_date'), + album_type=track_data.get('album', {}).get('album_type'), + total_tracks=track_data.get('album', {}).get('total_tracks') ) @dataclass diff --git a/web_server.py b/web_server.py index 3b2d7d61..4ad27965 100644 --- a/web_server.py +++ b/web_server.py @@ -9761,6 +9761,12 @@ def enhance_artist_quality(artist_id): matching_engine.normalize_string(sp_track.name) ) combined = artist_conf * 0.5 + title_conf * 0.5 + # Small bonus for album tracks over singles + _at = getattr(sp_track, 'album_type', None) or '' + if _at == 'album': + combined += 0.02 + elif _at == 'ep': + combined += 0.01 if combined > best_confidence and combined >= 0.7: best_confidence = combined best_match = sp_track @@ -9831,6 +9837,12 @@ def enhance_artist_quality(artist_id): matching_engine.normalize_string(it_track.name) ) combined = artist_conf * 0.5 + title_conf * 0.5 + # Small bonus for album tracks over singles + _at = getattr(it_track, 'album_type', None) or '' + if _at == 'album': + combined += 0.02 + elif _at == 'ep': + combined += 0.01 if combined > itunes_best_conf and combined >= 0.7: itunes_best_conf = combined itunes_best = it_track @@ -13012,10 +13024,10 @@ def _detect_album_info_web(context: dict, artist: dict) -> dict: query = f"artist:{artist_name} track:{clean_title}" tracks = spotify_client.search_tracks(query, limit=5) - # Find the best matching track + # Find the best matching track (prefer album versions over singles) best_match = None best_confidence = 0 - + if tracks: from core.matching_engine import MusicMatchingEngine matching_engine = MusicMatchingEngine() @@ -13029,9 +13041,16 @@ def _detect_album_info_web(context: dict, artist: dict) -> dict: matching_engine.normalize_string(clean_title), matching_engine.normalize_string(track.name) ) - + combined_confidence = (artist_confidence * 0.6 + title_confidence * 0.4) - + + # Small bonus for album tracks so they win ties over singles/EPs + album_type = getattr(track, 'album_type', None) or '' + if album_type == 'album': + combined_confidence += 0.02 + elif album_type == 'ep': + combined_confidence += 0.01 + if combined_confidence > best_confidence and combined_confidence > 0.75: # Higher threshold to avoid bad matches best_match = track best_confidence = combined_confidence @@ -19877,6 +19896,13 @@ def _run_quality_scanner(scope='watchlist', profile_id=1): # Combined confidence (50% artist + 50% title) combined_confidence = (artist_confidence * 0.5 + title_confidence * 0.5) + # Small bonus for album tracks over singles + _at = getattr(spotify_track, 'album_type', None) or '' + if _at == 'album': + combined_confidence += 0.02 + elif _at == 'ep': + combined_confidence += 0.01 + print(f"🔍 [Quality Scanner] Candidate: '{spotify_track.artists[0]}' - '{spotify_track.name}' (confidence: {combined_confidence:.3f})") # Update best match if this is better