diff --git a/core/matching_engine.py b/core/matching_engine.py index 54b285e6..7e66826f 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -24,18 +24,9 @@ class MusicMatchingEngine: def __init__(self): # More comprehensive patterns to strip extra info from titles self.title_patterns = [ - # Patterns inside parentheses or brackets - r'\(feat\.?.*\)', - r'\[feat\.?.*\]', - r'\(with.*\)', - r'\(ft\.?.*\)', - r'\[ft\.?.*\]', - r'\(remix\)', - r'\(live\)', - r'\(acoustic\)', - r'\(radio edit\)', - r'\(album version\)', - r'\(original mix\)', + # NEW: General patterns to remove all content in brackets/parentheses first + r'\(.*\)', + r'\[.*\]', # Patterns after a hyphen r'-\s*single version', r'-\s*remaster.*', @@ -124,24 +115,19 @@ class MusicMatchingEngine: plex_title_cleaned = self.clean_title(plex_track.title) # --- Enhanced Artist Scoring --- - # Get a list of all cleaned artist names from Spotify spotify_artists_cleaned = [self.clean_artist(a) for a in spotify_track.artists if a] plex_artist_cleaned = self.clean_artist(plex_track.artist) - plex_artist_normalized = self.normalize_string(plex_track.artist) # For substring check + plex_artist_normalized = self.normalize_string(plex_track.artist) - # Calculate the best possible artist score by checking each Spotify artist against the Plex artist best_artist_score = 0.0 for spotify_artist in spotify_artists_cleaned: - # First, check for a direct substring match, which is a very strong signal if spotify_artist in plex_artist_normalized: score = 1.0 else: - # Otherwise, calculate similarity on the cleaned versions score = self.similarity_score(spotify_artist, plex_artist_cleaned) if score > best_artist_score: best_artist_score = score - # If we get a perfect match, we can stop early if best_artist_score == 1.0: break @@ -152,13 +138,16 @@ class MusicMatchingEngine: duration_score = self.duration_similarity(spotify_track.duration_ms, plex_track.duration if plex_track.duration else 0) # --- Weighted confidence calculation --- - # Weights: Title (50%), Artist (30%), Duration (20%) confidence = (title_score * 0.5) + (artist_score * 0.3) + (duration_score * 0.2) + # --- NEW: Add confidence boost for exact title matches --- + if spotify_title_cleaned == plex_title_cleaned and len(spotify_title_cleaned) > 0: + confidence = max(confidence, 0.85) # Boost to at least 0.85 for exact titles + # Determine match type based on scores if title_score > 0.95 and artist_score > 0.9 and duration_score > 0.9: match_type = "perfect_match" - confidence = max(confidence, 0.98) # Boost confidence for perfect matches + confidence = max(confidence, 0.98) elif title_score > 0.85 and artist_score > 0.8: match_type = "high_confidence" elif title_score > 0.75: