Prefer album versions over singles when matching tracks to metadata sources

Add album_type and total_tracks fields to Track dataclass, populate from
Spotify/iTunes/Deezer API responses, and apply a small tiebreaker bonus
(+0.02 for albums, +0.01 for EPs) in all matching loops so album versions
win when confidence scores are otherwise equal.
pull/253/head
Broque Thomas 2 months ago
parent 171a64005d
commit 6bf337423d

@ -83,6 +83,17 @@ class Track:
if track_data.get('link'):
external_urls['deezer'] = track_data['link']
# Deezer search doesn't return album_type directly; infer if nb_tracks available
nb_tracks = album_data.get('nb_tracks') if isinstance(album_data, dict) else None
album_type = track_data.get('type') # Deezer sometimes returns 'album'/'single'
if not album_type and nb_tracks:
if nb_tracks <= 3:
album_type = 'single'
elif nb_tracks <= 6:
album_type = 'ep'
else:
album_type = 'album'
return cls(
id=str(track_data.get('id', '')),
name=track_data.get('title', ''),
@ -96,6 +107,8 @@ class Track:
release_date=track_data.get('release_date') or (album_data.get('release_date') if isinstance(album_data, dict) else None),
track_number=track_data.get('track_position'),
disc_number=track_data.get('disk_number', 1),
album_type=album_type,
total_tracks=nb_tracks,
)

@ -72,6 +72,8 @@ class Track:
release_date: Optional[str] = None
track_number: Optional[int] = None
disc_number: Optional[int] = None
album_type: Optional[str] = None
total_tracks: Optional[int] = None
@classmethod
def from_itunes_track(cls, track_data: Dict[str, Any], clean_artist_name: Optional[str] = None) -> 'Track':
@ -92,6 +94,15 @@ class Track:
if 'trackViewUrl' in track_data:
external_urls['itunes'] = track_data['trackViewUrl']
# Infer album type from track count
track_count = track_data.get('trackCount', 0)
if track_count <= 3:
album_type = 'single'
elif track_count <= 6:
album_type = 'ep'
else:
album_type = 'album'
return cls(
id=str(track_data.get('trackId', '')),
name=track_data.get('trackName', ''),
@ -102,7 +113,9 @@ class Track:
preview_url=track_data.get('previewUrl'),
external_urls=external_urls if external_urls else None,
image_url=album_image_url,
release_date=track_data.get('releaseDate', '').split('T')[0] if track_data.get('releaseDate') else None
release_date=track_data.get('releaseDate', '').split('T')[0] if track_data.get('releaseDate') else None,
album_type=album_type,
total_tracks=track_count or None
)
@dataclass

@ -317,7 +317,9 @@ class Track:
preview_url=track_data.get('preview_url'),
external_urls=track_data.get('external_urls'),
image_url=album_image_url,
release_date=track_data.get('album', {}).get('release_date')
release_date=track_data.get('album', {}).get('release_date'),
album_type=track_data.get('album', {}).get('album_type'),
total_tracks=track_data.get('album', {}).get('total_tracks')
)
@dataclass

@ -9761,6 +9761,12 @@ def enhance_artist_quality(artist_id):
matching_engine.normalize_string(sp_track.name)
)
combined = artist_conf * 0.5 + title_conf * 0.5
# Small bonus for album tracks over singles
_at = getattr(sp_track, 'album_type', None) or ''
if _at == 'album':
combined += 0.02
elif _at == 'ep':
combined += 0.01
if combined > best_confidence and combined >= 0.7:
best_confidence = combined
best_match = sp_track
@ -9831,6 +9837,12 @@ def enhance_artist_quality(artist_id):
matching_engine.normalize_string(it_track.name)
)
combined = artist_conf * 0.5 + title_conf * 0.5
# Small bonus for album tracks over singles
_at = getattr(it_track, 'album_type', None) or ''
if _at == 'album':
combined += 0.02
elif _at == 'ep':
combined += 0.01
if combined > itunes_best_conf and combined >= 0.7:
itunes_best_conf = combined
itunes_best = it_track
@ -13012,10 +13024,10 @@ def _detect_album_info_web(context: dict, artist: dict) -> dict:
query = f"artist:{artist_name} track:{clean_title}"
tracks = spotify_client.search_tracks(query, limit=5)
# Find the best matching track
# Find the best matching track (prefer album versions over singles)
best_match = None
best_confidence = 0
if tracks:
from core.matching_engine import MusicMatchingEngine
matching_engine = MusicMatchingEngine()
@ -13029,9 +13041,16 @@ def _detect_album_info_web(context: dict, artist: dict) -> dict:
matching_engine.normalize_string(clean_title),
matching_engine.normalize_string(track.name)
)
combined_confidence = (artist_confidence * 0.6 + title_confidence * 0.4)
# Small bonus for album tracks so they win ties over singles/EPs
album_type = getattr(track, 'album_type', None) or ''
if album_type == 'album':
combined_confidence += 0.02
elif album_type == 'ep':
combined_confidence += 0.01
if combined_confidence > best_confidence and combined_confidence > 0.75: # Higher threshold to avoid bad matches
best_match = track
best_confidence = combined_confidence
@ -19877,6 +19896,13 @@ def _run_quality_scanner(scope='watchlist', profile_id=1):
# Combined confidence (50% artist + 50% title)
combined_confidence = (artist_confidence * 0.5 + title_confidence * 0.5)
# Small bonus for album tracks over singles
_at = getattr(spotify_track, 'album_type', None) or ''
if _at == 'album':
combined_confidence += 0.02
elif _at == 'ep':
combined_confidence += 0.01
print(f"🔍 [Quality Scanner] Candidate: '{spotify_track.artists[0]}' - '{spotify_track.name}' (confidence: {combined_confidence:.3f})")
# Update best match if this is better

Loading…
Cancel
Save