From 7f9755a26e268d854658fb00b697c6eb02f10b47 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Fri, 27 Mar 2026 11:44:01 -0700 Subject: [PATCH] Add album-aware track matching for multi-artist albums When artist-specific track search fails, falls back to album-aware matching: finds the album by title (any artist), then checks if the track exists on it. Fixes daily re-downloads of collaborative albums filed under a different artist (e.g., "Spiral Staircases" tagged under "The Alchemist" but scanned from "Larry June's" watchlist). - check_track_exists: new album parameter, album-aware fallback with 0.8 album title threshold + 0.7 track title threshold - Watchlist scanner: passes album_data.get('name') to track checks - Download modal: passes batch_album_context to fallback track search - Wishlist callers (4 spots): extract and pass track album name - Backwards compatible: album=None default, no change for callers without album context (singles, playlists) --- core/watchlist_scanner.py | 8 ++--- database/music_database.py | 61 ++++++++++++++++++++++++++++++++++---- web_server.py | 41 +++++++++++++++---------- 3 files changed, 85 insertions(+), 25 deletions(-) diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index cc0d91e7..b852747e 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -779,7 +779,7 @@ class WatchlistScanner: if not self._should_include_track(track, album_data, watchlist_artist): continue # Skip this track based on content type preferences - if self.is_track_missing_from_library(track): + if self.is_track_missing_from_library(track, album_name=album_data.get('name')): new_tracks_found += 1 # Add to wishlist @@ -1337,9 +1337,9 @@ class WatchlistScanner: logger.warning(f"Error checking track content type inclusion: {e}") return True # Default to including on error - def is_track_missing_from_library(self, track) -> bool: + def is_track_missing_from_library(self, track, album_name: str = None) -> bool: """ - Check if a track is missing from the local Plex library. + Check if a track is missing from the local library. Uses the same matching logic as the download missing tracks modals. """ try: @@ -1374,7 +1374,7 @@ class WatchlistScanner: # Use same database check as modals with server awareness from config.settings import config_manager active_server = config_manager.get_active_media_server() - db_track, confidence = self.database.check_track_exists(query_title, artist_name, confidence_threshold=0.7, server_source=active_server) + db_track, confidence = self.database.check_track_exists(query_title, artist_name, confidence_threshold=0.7, server_source=active_server, album=album_name) if db_track and confidence >= 0.7: logger.debug(f"✔️ Track found in library: '{original_title}' by '{artist_name}' (confidence: {confidence:.2f})") diff --git a/database/music_database.py b/database/music_database.py index 1d35ebbf..a59cb472 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -4830,10 +4830,13 @@ class MusicDatabase: return list(set(variations)) - def check_track_exists(self, title: str, artist: str, confidence_threshold: float = 0.8, server_source: str = None) -> Tuple[Optional[DatabaseTrack], float]: + def check_track_exists(self, title: str, artist: str, confidence_threshold: float = 0.8, server_source: str = None, album: str = None) -> Tuple[Optional[DatabaseTrack], float]: """ Check if a track exists in the database with enhanced fuzzy matching and confidence scoring. - Now uses the same sophisticated matching approach as album checking for consistency. + + Args: + album: Optional album name — enables album-aware matching for multi-artist albums + Returns (track, confidence) tuple where confidence is 0.0-1.0 """ try: @@ -4873,9 +4876,57 @@ class MusicDatabase: if best_match and best_confidence >= confidence_threshold: logger.debug(f"✅ Enhanced track match found: '{title}' -> '{best_match.title}' (confidence: {best_confidence:.3f})") return best_match, best_confidence - else: - logger.debug(f"❌ No confident track match for '{title}' (best: {best_confidence:.3f}, threshold: {confidence_threshold})") - return None, best_confidence + + # Album-aware fallback: find album by title (any artist), check tracks on it + # Handles multi-artist albums filed under a different artist in the library + if album and best_confidence < confidence_threshold: + logger.debug(f"⚠️ Artist-specific search failed, trying album-aware fallback: '{title}' on '{album}'") + try: + album_candidates = self.search_albums(title=album, artist="", limit=10, server_source=server_source) + for album_candidate in album_candidates: + album_title_sim = max( + self._string_similarity(self._normalize_for_comparison(album), self._normalize_for_comparison(album_candidate.title)), + self._string_similarity(self._clean_album_title_for_comparison(album), self._clean_album_title_for_comparison(album_candidate.title)) + ) + if album_title_sim < 0.8: + continue + + conn = self._get_connection() + cursor = conn.cursor() + source_filter = "AND t.server_source = ?" if server_source else "" + params = [album_candidate.id] + ([server_source] if server_source else []) + cursor.execute(f""" + SELECT t.*, a.name as artist_name, al.title as album_title + FROM tracks t + JOIN artists a ON a.id = t.artist_id + JOIN albums al ON al.id = t.album_id + WHERE t.album_id = ? {source_filter} + """, params) + + for row in cursor.fetchall(): + db_track = DatabaseTrack( + id=row['id'], title=row['title'], artist_name=row['artist_name'], + album_title=row['album_title'], album_id=row['album_id'], + track_number=row['track_number'], duration=row['duration'], + file_path=row['file_path'], bitrate=row['bitrate'], + artist_id=row['artist_id'], server_source=row['server_source'] + ) + title_sim = max( + self._string_similarity(self._normalize_for_comparison(title), self._normalize_for_comparison(db_track.title)), + self._string_similarity(self._clean_track_title_for_comparison(title), self._clean_track_title_for_comparison(db_track.title)) + ) + if title_sim > best_confidence and title_sim >= 0.7: + best_confidence = title_sim + best_match = db_track + + if best_match and best_confidence >= 0.7: + logger.debug(f"✅ Album-aware fallback matched: '{title}' on '{album}' -> '{best_match.title}' by '{best_match.artist_name}' (title_sim: {best_confidence:.3f})") + return best_match, best_confidence + except Exception as album_fallback_err: + logger.debug(f"Album-aware fallback error: {album_fallback_err}") + + logger.debug(f"❌ No confident track match for '{title}' (best: {best_confidence:.3f}, threshold: {confidence_threshold})") + return None, best_confidence except Exception as e: logger.error(f"Error checking track existence for '{title}' by '{artist}': {e}") diff --git a/web_server.py b/web_server.py index e99d080a..db3e88c6 100644 --- a/web_server.py +++ b/web_server.py @@ -18584,11 +18584,12 @@ def _automatic_wishlist_cleanup_after_db_update(): track_name = track.get('name', '') artists = track.get('artists', []) spotify_track_id = track.get('spotify_track_id') or track.get('id') - + track_album = track.get('album', {}).get('name') if isinstance(track.get('album'), dict) else track.get('album') + # Skip if no essential data if not track_name or not artists or not spotify_track_id: continue - + # Check each artist found_in_db = False for artist in artists: @@ -18599,12 +18600,13 @@ def _automatic_wishlist_cleanup_after_db_update(): artist_name = artist['name'] else: artist_name = str(artist) - + try: db_track, confidence = db.check_track_exists( - track_name, artist_name, - confidence_threshold=0.7, - server_source=active_server + track_name, artist_name, + confidence_threshold=0.7, + server_source=active_server, + album=track_album ) if db_track and confidence >= 0.7: @@ -19786,6 +19788,7 @@ def _process_wishlist_automatically(automation_id=None): track_name = track.get('name', '') artists = track.get('artists', []) spotify_track_id = track.get('spotify_track_id') or track.get('id') + track_album = track.get('album', {}).get('name') if isinstance(track.get('album'), dict) else track.get('album') if not track_name or not artists or not spotify_track_id: continue @@ -19804,7 +19807,8 @@ def _process_wishlist_automatically(automation_id=None): db_track, confidence = db.check_track_exists( track_name, artist_name, confidence_threshold=0.7, - server_source=active_server + server_source=active_server, + album=track_album ) if db_track and confidence >= 0.7: @@ -20658,6 +20662,7 @@ def start_wishlist_missing_downloads(): track_name = track.get('name', '') artists = track.get('artists', []) spotify_track_id = track.get('spotify_track_id') or track.get('id') + track_album = track.get('album', {}).get('name') if isinstance(track.get('album'), dict) else track.get('album') if not track_name or not artists or not spotify_track_id: continue @@ -20676,7 +20681,8 @@ def start_wishlist_missing_downloads(): db_track, confidence = db.check_track_exists( track_name, artist_name, confidence_threshold=0.7, - server_source=active_server + server_source=active_server, + album=track_album ) if db_track and confidence >= 0.7: @@ -20890,13 +20896,14 @@ def cleanup_wishlist(): track_name = track.get('name', '') artists = track.get('artists', []) spotify_track_id = track.get('spotify_track_id') or track.get('id') - + track_album = track.get('album', {}).get('name') if isinstance(track.get('album'), dict) else track.get('album') + # Skip if no essential data if not track_name or not artists or not spotify_track_id: continue - + print(f"📋 [Wishlist Cleanup] Checking track {processed_count}/{len(wishlist_tracks)}: '{track_name}'") - + # Check each artist found_in_db = False for artist in artists: @@ -20907,12 +20914,13 @@ def cleanup_wishlist(): artist_name = artist['name'] else: artist_name = str(artist) - + try: db_track, confidence = db.check_track_exists( - track_name, artist_name, - confidence_threshold=0.7, - server_source=active_server + track_name, artist_name, + confidence_threshold=0.7, + server_source=active_server, + album=track_album ) if db_track and confidence >= 0.7: @@ -23324,6 +23332,7 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): found, confidence = True, best_sim else: # Fall back to global per-track search for this track + _fallback_album = batch_album_context.get('name') if batch_album_context else None for artist in artists: if isinstance(artist, str): artist_name = artist @@ -23332,7 +23341,7 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): else: artist_name = str(artist) db_track, track_confidence = db.check_track_exists( - track_name, artist_name, confidence_threshold=0.7, server_source=active_server + track_name, artist_name, confidence_threshold=0.7, server_source=active_server, album=_fallback_album ) if db_track and track_confidence >= 0.7: found, confidence = True, track_confidence