diff --git a/core/__pycache__/matching_engine.cpython-312.pyc b/core/__pycache__/matching_engine.cpython-312.pyc index d90c8cc6..51e780d3 100644 Binary files a/core/__pycache__/matching_engine.cpython-312.pyc and b/core/__pycache__/matching_engine.cpython-312.pyc differ diff --git a/core/matching_engine.py b/core/matching_engine.py index 31fd2c98..40d267ce 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -36,12 +36,13 @@ class MusicMatchingEngine: ] self.artist_patterns = [ + # Only remove featured artists, not parts of main artist names r'\s*feat\..*', r'\s*ft\..*', r'\s*featuring.*', - r'\s*&.*', - r'\s*and.*', - r',.*' + # REMOVED: r'\s*&.*' - This breaks "Daryl Hall & John Oates", "Blood & Water" + # REMOVED: r'\s*and.*' - This breaks artist names with "and" + # REMOVED: r',.*' - This can break legitimate artist names with commas ] def normalize_string(self, text: str) -> str: @@ -204,10 +205,13 @@ class MusicMatchingEngine: plex_core_title = self.get_core_string(plex_track.title) if spotify_core_title and spotify_core_title == plex_core_title: - # If the core titles are identical, we are highly confident. - # The final score is a high base (0.9) plus a bonus for artist similarity. - confidence = 0.90 + (artist_score * 0.09) # Max score of 0.99 - return confidence, "core_title_match" + # SAFETY CHECK: Only give high confidence if artist also matches reasonably well + # This prevents "Artist A - Girls" from matching "Artist Z - Girls" with high confidence + if artist_score >= 0.75: # Require decent artist match + # If the core titles are identical and artists match, we are highly confident + confidence = 0.90 + (artist_score * 0.09) # Max score of 0.99 + return confidence, "core_title_match" + # If artist score is too low, fall through to standard weighted calculation # --- Priority 2: Fuzzy Title Match (for variations, typos, etc.) --- spotify_title_cleaned = self.clean_title(spotify_track.name) @@ -294,17 +298,17 @@ class MusicMatchingEngine: # SAFETY CHECK: Don't return empty or too-short titles if not cleaned_title or len(cleaned_title.strip()) < 2: - print(f"⚠️ Album removal would create empty title: '{original_title}' → '{cleaned_title}' - keeping original") + logger.warning(f"Album removal would create empty title: '{original_title}' → '{cleaned_title}' - keeping original") return track_title, False # SAFETY CHECK: Don't remove if it would leave only articles or very short words words = cleaned_title.split() meaningful_words = [w for w in words if len(w) > 2 and w.lower() not in ['the', 'and', 'or', 'of', 'a', 'an']] if not meaningful_words: - print(f"⚠️ Album removal would leave only short words: '{original_title}' → '{cleaned_title}' - keeping original") + logger.warning(f"Album removal would leave only short words: '{original_title}' → '{cleaned_title}' - keeping original") return track_title, False - print(f"🎵 Detected album in title: '{original_title}' → '{cleaned_title}' (removed: '{match.group(1)}', similarity: {similarity:.2f})") + logger.debug(f"Detected album in title: '{original_title}' → '{cleaned_title}' (removed: '{match.group(1)}', similarity: {similarity:.2f})") return cleaned_title, True # Fallback: detect common album-like suffixes even without album context @@ -363,7 +367,7 @@ class MusicMatchingEngine: cleaned_track = self.clean_title(cleaned_title) if cleaned_track: queries.append(f"{artist} {cleaned_track}".strip()) - print(f"🎯 PRIORITY 1: Album-cleaned query: '{artist} {cleaned_track}'") + logger.debug(f"PRIORITY 1: Album-cleaned query: '{artist} {cleaned_track}'") # PRIORITY 2: Try simplified versions, but preserve important version info # Only remove content that's likely to be album names or noise, not version info @@ -392,9 +396,9 @@ class MusicMatchingEngine: dash_clean = self.clean_title(title_part) if dash_clean and dash_clean not in [self.clean_title(q.split(' ', 1)[1]) for q in queries if ' ' in q]: queries.append(f"{artist} {dash_clean}".strip()) - print(f"🎯 PRIORITY 2: Dash-cleaned query (removed album): '{artist} {dash_clean}'") + logger.debug(f"PRIORITY 2: Dash-cleaned query (removed album): '{artist} {dash_clean}'") elif should_preserve: - print(f"🎯 PRESERVED: Keeping dash content '{dash_content}' as it appears to be version info") + logger.debug(f"PRESERVED: Keeping dash content '{dash_content}' as it appears to be version info") # Pattern 2: Only remove parentheses that contain noise (feat, explicit, etc), not version info # Check if parentheses contain version-related keywords before removing @@ -425,16 +429,16 @@ class MusicMatchingEngine: simple_clean = self.clean_title(simple_title) if simple_clean and simple_clean not in [self.clean_title(q.split(' ', 1)[1]) for q in queries if ' ' in q]: queries.append(f"{artist} {simple_clean}".strip()) - print(f"🎯 PRIORITY 2: Noise-removed query: '{artist} {simple_clean}'") + logger.debug(f"PRIORITY 2: Noise-removed query: '{artist} {simple_clean}'") elif is_version: - print(f"🎯 PRESERVED: Keeping parentheses content '({paren_content})' as it appears to be version info") + logger.debug(f"PRESERVED: Keeping parentheses content '({paren_content})' as it appears to be version info") # PRIORITY 3: Original query (ONLY if no album was detected or if it's different) original_track_clean = self.clean_title(original_title) if not album_detected or not queries: # Only add original if no album detected or no other queries if original_track_clean not in [q.split(' ', 1)[1] for q in queries if ' ' in q]: queries.append(f"{artist} {original_track_clean}".strip()) - print(f"🎯 PRIORITY 3: Original query: '{artist} {original_track_clean}'") + logger.debug(f"PRIORITY 3: Original query: '{artist} {original_track_clean}'") # Remove duplicates while preserving order unique_queries = [] @@ -492,7 +496,7 @@ class MusicMatchingEngine: quality_bonus = 0.0 if slskd_track.quality: if slskd_track.quality.lower() == 'flac': - quality_bonus = 0.1 + quality_bonus = 0.07 # Reduced from 0.1 to prevent low-confidence FLAC beating high-confidence MP3 elif slskd_track.quality.lower() == 'mp3' and (slskd_track.bitrate or 0) >= 320: quality_bonus = 0.05