diff --git a/core/matching_engine.py b/core/matching_engine.py index 9533e2e6..75a95336 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -195,7 +195,9 @@ class MusicMatchingEngine: 'live', 'live at', 'live from', # Live versions (different recording) 'acoustic', 'unplugged', # Acoustic versions (different arrangement) 'slowed', 'reverb', 'sped up', 'speed up', # TikTok edits (different) - 'radio edit', 'radio version', # Radio edits (different) + 'radio edit', 'radio version', # Radio edits (different cut) + 'single edit', # Single edits (different cut) + 'album edit', # Album edits (different cut) 'instrumental', 'karaoke', # Instrumental (different) 'extended', 'extended version', # Extended (different length) 'demo', 'rough cut', # Demos (different recording) diff --git a/database/music_database.py b/database/music_database.py index 7f0838da..c166f0c4 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -2256,7 +2256,7 @@ class MusicDatabase: where_parts = [f"({' OR '.join(like_conditions)})"] if server_source: where_parts.append("tracks.server_source = ?") - params.insert(-1 if params else 0, server_source) # Insert before limit + params.append(server_source) # Append after LIKE params, before LIMIT where_clause = " AND ".join(where_parts) params.append(limit * 3) # Get more results for scoring @@ -3035,6 +3035,15 @@ class MusicDatabase: """Clean track title for comparison by normalizing brackets/dashes and removing noise""" cleaned = title.lower().strip() + # PRE-STEP: Handle "(with Artist)" featuring BEFORE bracket removal. + # This catches "with" only when used as featuring syntax inside brackets, + # NOT when "with" is part of the song title like "Stay With Me". + # e.g. "Levitating (with DaBaby)" → "Levitating" + # "Stay (with Justin Bieber)" → "Stay" + # "Stay With Me" → unchanged (no brackets around "with") + cleaned = re.sub(r'\s*\(with\s+[^)]*\)', '', cleaned, flags=re.IGNORECASE) + cleaned = re.sub(r'\s*\[with\s+[^\]]*\]', '', cleaned, flags=re.IGNORECASE) + # STEP 1: Normalize bracket/dash styles for consistent matching # Convert all bracket styles to spaces for better matching cleaned = re.sub(r'\s*[\[\(]\s*', ' ', cleaned) # Convert opening brackets/parens to space @@ -3054,19 +3063,18 @@ class MusicDatabase: r'\s*feat\..*', # Remove featuring r'\s*featuring.*', # Remove featuring r'\s*ft\..*', # Remove ft. - r'\s*with\s+.*', # Remove "with Artist" - - # Edit versions (same recording, different edit for format) - r'\s*radio\s+edit.*', # Remove "radio edit" - same song, radio format - r'\s*single\s+edit.*', # Remove "single edit" - same song, single format - r'\s*album\s+edit.*', # Remove "album edit" - same song, album format - r'\s*edit\s*$', # Remove trailing "edit" # Remasters (same recording, different mastering) r'\s*\d{4}\s*remaster.*', # Remove "2015 remaster" r'\s*remaster.*', # Remove "remaster/remastered" r'\s*remastered.*', # Remove "remastered" + # NOTE: Edit versions (radio edit, single edit, album edit) are NOT + # removed here — they are treated as different versions by + # matching_engine.similarity_score() which applies a 0.30 penalty. + # Removing them here would override that penalty via max() and + # cause incorrect matches (e.g. radio edit matched to full version). + # Version clarifications (metadata, not different recordings) r'\s*original\s+version.*', # Remove "original version" - clarification r'\s*album\s+version.*', # Remove "album version" - clarification @@ -3086,6 +3094,7 @@ class MusicDatabase: # - instrumental (different version) # - demo (different recording) # - extended (different length/content) + # - radio edit, single edit, album edit (different cuts) # These are handled by matching_engine.similarity_score() which applies penalties for pattern in patterns_to_remove: diff --git a/web_server.py b/web_server.py index f75f5804..5ec1d90e 100644 --- a/web_server.py +++ b/web_server.py @@ -19893,12 +19893,12 @@ def _run_sync_task(playlist_id, playlist_name, tracks_json): artist_name = str(artist) db_track, confidence = db.check_track_exists( - original_title, artist_name, - confidence_threshold=0.7, + original_title, artist_name, + confidence_threshold=0.80, server_source=active_server ) - - if db_track and confidence >= 0.7: + + if db_track and confidence >= 0.80: print(f"✅ Database match: '{db_track.title}' (confidence: {confidence:.2f})") # Create mock track object for playlist creation