diff --git a/core/matching_engine.py b/core/matching_engine.py index 4511d747..54b285e6 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -42,7 +42,7 @@ class MusicMatchingEngine: r'-\s*live.*', r'-\s*remix', r'-\s*radio edit', - # NEW: Patterns in the open title string (not in brackets) + # Patterns in the open title string (not in brackets) r'\s+feat\.?.*', r'\s+ft\.?.*', r'\s+featuring.*' @@ -97,14 +97,6 @@ class MusicMatchingEngine: return self.normalize_string(cleaned) - def extract_main_artist(self, artists: List[str]) -> str: - """Extracts and cleans the primary artist from a list.""" - if not artists: - return "" - - main_artist = artists[0] - return self.clean_artist(main_artist) - def similarity_score(self, str1: str, str2: str) -> float: """Calculates similarity score between two strings.""" if not str1 or not str2: @@ -128,19 +120,35 @@ class MusicMatchingEngine: def calculate_match_confidence(self, spotify_track: SpotifyTrack, plex_track: PlexTrackInfo) -> Tuple[float, str]: """Calculates a confidence score for a potential match with weighted factors.""" - # Clean titles and artists for comparison spotify_title_cleaned = self.clean_title(spotify_track.name) plex_title_cleaned = self.clean_title(plex_track.title) - spotify_main_artist_cleaned = self.extract_main_artist(spotify_track.artists) - plex_artist_normalized = self.normalize_string(plex_track.artist) + # --- Enhanced Artist Scoring --- + # Get a list of all cleaned artist names from Spotify + spotify_artists_cleaned = [self.clean_artist(a) for a in spotify_track.artists if a] + plex_artist_cleaned = self.clean_artist(plex_track.artist) + plex_artist_normalized = self.normalize_string(plex_track.artist) # For substring check - # --- Calculate individual scores --- - title_score = self.similarity_score(spotify_title_cleaned, plex_title_cleaned) + # Calculate the best possible artist score by checking each Spotify artist against the Plex artist + best_artist_score = 0.0 + for spotify_artist in spotify_artists_cleaned: + # First, check for a direct substring match, which is a very strong signal + if spotify_artist in plex_artist_normalized: + score = 1.0 + else: + # Otherwise, calculate similarity on the cleaned versions + score = self.similarity_score(spotify_artist, plex_artist_cleaned) + + if score > best_artist_score: + best_artist_score = score + # If we get a perfect match, we can stop early + if best_artist_score == 1.0: + break - # Artist score: check if main Spotify artist is in the Plex artist string - artist_score = 1.0 if spotify_main_artist_cleaned in plex_artist_normalized else self.similarity_score(spotify_main_artist_cleaned, self.clean_artist(plex_track.artist)) + artist_score = best_artist_score + # --- Calculate other scores --- + title_score = self.similarity_score(spotify_title_cleaned, plex_title_cleaned) duration_score = self.duration_similarity(spotify_track.duration_ms, plex_track.duration if plex_track.duration else 0) # --- Weighted confidence calculation ---