better

10 months ago · fda2b92724
parent 3e6b329e35
commit fda2b92724
1 changed files with 9 additions and 20 deletions
--- a/core/matching_engine.py
+++ b/core/matching_engine.py
@ -24,18 +24,9 @@ class MusicMatchingEngine:
    def __init__(self):
        # More comprehensive patterns to strip extra info from titles
        self.title_patterns = [
-            # Patterns inside parentheses or brackets
-            r'\(feat\.?.*\)',
-            r'\[feat\.?.*\]',
-            r'\(with.*\)',
-            r'\(ft\.?.*\)',
-            r'\[ft\.?.*\]',
-            r'\(remix\)',
-            r'\(live\)',
-            r'\(acoustic\)',
-            r'\(radio edit\)',
-            r'\(album version\)',
-            r'\(original mix\)',
+            # NEW: General patterns to remove all content in brackets/parentheses first
+            r'\(.*\)',
+            r'\[.*\]',
            # Patterns after a hyphen
            r'-\s*single version',
            r'-\s*remaster.*',
@ -124,24 +115,19 @@ class MusicMatchingEngine:
        plex_title_cleaned = self.clean_title(plex_track.title)

        # --- Enhanced Artist Scoring ---
-        # Get a list of all cleaned artist names from Spotify
        spotify_artists_cleaned = [self.clean_artist(a) for a in spotify_track.artists if a]
        plex_artist_cleaned = self.clean_artist(plex_track.artist)
-        plex_artist_normalized = self.normalize_string(plex_track.artist) # For substring check
+        plex_artist_normalized = self.normalize_string(plex_track.artist)

-        # Calculate the best possible artist score by checking each Spotify artist against the Plex artist
        best_artist_score = 0.0
        for spotify_artist in spotify_artists_cleaned:
-            # First, check for a direct substring match, which is a very strong signal
            if spotify_artist in plex_artist_normalized:
                score = 1.0
            else:
-                # Otherwise, calculate similarity on the cleaned versions
                score = self.similarity_score(spotify_artist, plex_artist_cleaned)
            
            if score > best_artist_score:
                best_artist_score = score
-                # If we get a perfect match, we can stop early
                if best_artist_score == 1.0:
                    break
        
@ -152,13 +138,16 @@ class MusicMatchingEngine:
        duration_score = self.duration_similarity(spotify_track.duration_ms, plex_track.duration if plex_track.duration else 0)
        
        # --- Weighted confidence calculation ---
-        # Weights: Title (50%), Artist (30%), Duration (20%)
        confidence = (title_score * 0.5) + (artist_score * 0.3) + (duration_score * 0.2)
        
+        # --- NEW: Add confidence boost for exact title matches ---
+        if spotify_title_cleaned == plex_title_cleaned and len(spotify_title_cleaned) > 0:
+            confidence = max(confidence, 0.85) # Boost to at least 0.85 for exact titles
+        
        # Determine match type based on scores
        if title_score > 0.95 and artist_score > 0.9 and duration_score > 0.9:
            match_type = "perfect_match"
-            confidence = max(confidence, 0.98) # Boost confidence for perfect matches
+            confidence = max(confidence, 0.98)
        elif title_score > 0.85 and artist_score > 0.8:
            match_type = "high_confidence"
        elif title_score > 0.75: