Add album-aware track matching for multi-artist albums

When artist-specific track search fails, falls back to album-aware
matching: finds the album by title (any artist), then checks if the
track exists on it. Fixes daily re-downloads of collaborative albums
filed under a different artist (e.g., "Spiral Staircases" tagged
under "The Alchemist" but scanned from "Larry June's" watchlist).

- check_track_exists: new album parameter, album-aware fallback with
  0.8 album title threshold + 0.7 track title threshold
- Watchlist scanner: passes album_data.get('name') to track checks
- Download modal: passes batch_album_context to fallback track search
- Wishlist callers (4 spots): extract and pass track album name
- Backwards compatible: album=None default, no change for callers
  without album context (singles, playlists)
pull/253/head
Broque Thomas 2 months ago
parent 4baf5e53d4
commit 7f9755a26e

@ -779,7 +779,7 @@ class WatchlistScanner:
if not self._should_include_track(track, album_data, watchlist_artist):
continue # Skip this track based on content type preferences
if self.is_track_missing_from_library(track):
if self.is_track_missing_from_library(track, album_name=album_data.get('name')):
new_tracks_found += 1
# Add to wishlist
@ -1337,9 +1337,9 @@ class WatchlistScanner:
logger.warning(f"Error checking track content type inclusion: {e}")
return True # Default to including on error
def is_track_missing_from_library(self, track) -> bool:
def is_track_missing_from_library(self, track, album_name: str = None) -> bool:
"""
Check if a track is missing from the local Plex library.
Check if a track is missing from the local library.
Uses the same matching logic as the download missing tracks modals.
"""
try:
@ -1374,7 +1374,7 @@ class WatchlistScanner:
# Use same database check as modals with server awareness
from config.settings import config_manager
active_server = config_manager.get_active_media_server()
db_track, confidence = self.database.check_track_exists(query_title, artist_name, confidence_threshold=0.7, server_source=active_server)
db_track, confidence = self.database.check_track_exists(query_title, artist_name, confidence_threshold=0.7, server_source=active_server, album=album_name)
if db_track and confidence >= 0.7:
logger.debug(f"✔️ Track found in library: '{original_title}' by '{artist_name}' (confidence: {confidence:.2f})")

@ -4830,10 +4830,13 @@ class MusicDatabase:
return list(set(variations))
def check_track_exists(self, title: str, artist: str, confidence_threshold: float = 0.8, server_source: str = None) -> Tuple[Optional[DatabaseTrack], float]:
def check_track_exists(self, title: str, artist: str, confidence_threshold: float = 0.8, server_source: str = None, album: str = None) -> Tuple[Optional[DatabaseTrack], float]:
"""
Check if a track exists in the database with enhanced fuzzy matching and confidence scoring.
Now uses the same sophisticated matching approach as album checking for consistency.
Args:
album: Optional album name enables album-aware matching for multi-artist albums
Returns (track, confidence) tuple where confidence is 0.0-1.0
"""
try:
@ -4873,9 +4876,57 @@ class MusicDatabase:
if best_match and best_confidence >= confidence_threshold:
logger.debug(f"✅ Enhanced track match found: '{title}' -> '{best_match.title}' (confidence: {best_confidence:.3f})")
return best_match, best_confidence
else:
logger.debug(f"❌ No confident track match for '{title}' (best: {best_confidence:.3f}, threshold: {confidence_threshold})")
return None, best_confidence
# Album-aware fallback: find album by title (any artist), check tracks on it
# Handles multi-artist albums filed under a different artist in the library
if album and best_confidence < confidence_threshold:
logger.debug(f"⚠️ Artist-specific search failed, trying album-aware fallback: '{title}' on '{album}'")
try:
album_candidates = self.search_albums(title=album, artist="", limit=10, server_source=server_source)
for album_candidate in album_candidates:
album_title_sim = max(
self._string_similarity(self._normalize_for_comparison(album), self._normalize_for_comparison(album_candidate.title)),
self._string_similarity(self._clean_album_title_for_comparison(album), self._clean_album_title_for_comparison(album_candidate.title))
)
if album_title_sim < 0.8:
continue
conn = self._get_connection()
cursor = conn.cursor()
source_filter = "AND t.server_source = ?" if server_source else ""
params = [album_candidate.id] + ([server_source] if server_source else [])
cursor.execute(f"""
SELECT t.*, a.name as artist_name, al.title as album_title
FROM tracks t
JOIN artists a ON a.id = t.artist_id
JOIN albums al ON al.id = t.album_id
WHERE t.album_id = ? {source_filter}
""", params)
for row in cursor.fetchall():
db_track = DatabaseTrack(
id=row['id'], title=row['title'], artist_name=row['artist_name'],
album_title=row['album_title'], album_id=row['album_id'],
track_number=row['track_number'], duration=row['duration'],
file_path=row['file_path'], bitrate=row['bitrate'],
artist_id=row['artist_id'], server_source=row['server_source']
)
title_sim = max(
self._string_similarity(self._normalize_for_comparison(title), self._normalize_for_comparison(db_track.title)),
self._string_similarity(self._clean_track_title_for_comparison(title), self._clean_track_title_for_comparison(db_track.title))
)
if title_sim > best_confidence and title_sim >= 0.7:
best_confidence = title_sim
best_match = db_track
if best_match and best_confidence >= 0.7:
logger.debug(f"✅ Album-aware fallback matched: '{title}' on '{album}' -> '{best_match.title}' by '{best_match.artist_name}' (title_sim: {best_confidence:.3f})")
return best_match, best_confidence
except Exception as album_fallback_err:
logger.debug(f"Album-aware fallback error: {album_fallback_err}")
logger.debug(f"❌ No confident track match for '{title}' (best: {best_confidence:.3f}, threshold: {confidence_threshold})")
return None, best_confidence
except Exception as e:
logger.error(f"Error checking track existence for '{title}' by '{artist}': {e}")

@ -18584,11 +18584,12 @@ def _automatic_wishlist_cleanup_after_db_update():
track_name = track.get('name', '')
artists = track.get('artists', [])
spotify_track_id = track.get('spotify_track_id') or track.get('id')
track_album = track.get('album', {}).get('name') if isinstance(track.get('album'), dict) else track.get('album')
# Skip if no essential data
if not track_name or not artists or not spotify_track_id:
continue
# Check each artist
found_in_db = False
for artist in artists:
@ -18599,12 +18600,13 @@ def _automatic_wishlist_cleanup_after_db_update():
artist_name = artist['name']
else:
artist_name = str(artist)
try:
db_track, confidence = db.check_track_exists(
track_name, artist_name,
confidence_threshold=0.7,
server_source=active_server
track_name, artist_name,
confidence_threshold=0.7,
server_source=active_server,
album=track_album
)
if db_track and confidence >= 0.7:
@ -19786,6 +19788,7 @@ def _process_wishlist_automatically(automation_id=None):
track_name = track.get('name', '')
artists = track.get('artists', [])
spotify_track_id = track.get('spotify_track_id') or track.get('id')
track_album = track.get('album', {}).get('name') if isinstance(track.get('album'), dict) else track.get('album')
if not track_name or not artists or not spotify_track_id:
continue
@ -19804,7 +19807,8 @@ def _process_wishlist_automatically(automation_id=None):
db_track, confidence = db.check_track_exists(
track_name, artist_name,
confidence_threshold=0.7,
server_source=active_server
server_source=active_server,
album=track_album
)
if db_track and confidence >= 0.7:
@ -20658,6 +20662,7 @@ def start_wishlist_missing_downloads():
track_name = track.get('name', '')
artists = track.get('artists', [])
spotify_track_id = track.get('spotify_track_id') or track.get('id')
track_album = track.get('album', {}).get('name') if isinstance(track.get('album'), dict) else track.get('album')
if not track_name or not artists or not spotify_track_id:
continue
@ -20676,7 +20681,8 @@ def start_wishlist_missing_downloads():
db_track, confidence = db.check_track_exists(
track_name, artist_name,
confidence_threshold=0.7,
server_source=active_server
server_source=active_server,
album=track_album
)
if db_track and confidence >= 0.7:
@ -20890,13 +20896,14 @@ def cleanup_wishlist():
track_name = track.get('name', '')
artists = track.get('artists', [])
spotify_track_id = track.get('spotify_track_id') or track.get('id')
track_album = track.get('album', {}).get('name') if isinstance(track.get('album'), dict) else track.get('album')
# Skip if no essential data
if not track_name or not artists or not spotify_track_id:
continue
print(f"📋 [Wishlist Cleanup] Checking track {processed_count}/{len(wishlist_tracks)}: '{track_name}'")
# Check each artist
found_in_db = False
for artist in artists:
@ -20907,12 +20914,13 @@ def cleanup_wishlist():
artist_name = artist['name']
else:
artist_name = str(artist)
try:
db_track, confidence = db.check_track_exists(
track_name, artist_name,
confidence_threshold=0.7,
server_source=active_server
track_name, artist_name,
confidence_threshold=0.7,
server_source=active_server,
album=track_album
)
if db_track and confidence >= 0.7:
@ -23324,6 +23332,7 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
found, confidence = True, best_sim
else:
# Fall back to global per-track search for this track
_fallback_album = batch_album_context.get('name') if batch_album_context else None
for artist in artists:
if isinstance(artist, str):
artist_name = artist
@ -23332,7 +23341,7 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
else:
artist_name = str(artist)
db_track, track_confidence = db.check_track_exists(
track_name, artist_name, confidence_threshold=0.7, server_source=active_server
track_name, artist_name, confidence_threshold=0.7, server_source=active_server, album=_fallback_album
)
if db_track and track_confidence >= 0.7:
found, confidence = True, track_confidence

Loading…
Cancel
Save