diff --git a/core/matching_engine.py b/core/matching_engine.py index bf9a00c..4511d74 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -24,6 +24,7 @@ class MusicMatchingEngine: def __init__(self): # More comprehensive patterns to strip extra info from titles self.title_patterns = [ + # Patterns inside parentheses or brackets r'\(feat\.?.*\)', r'\[feat\.?.*\]', r'\(with.*\)', @@ -35,11 +36,16 @@ class MusicMatchingEngine: r'\(radio edit\)', r'\(album version\)', r'\(original mix\)', + # Patterns after a hyphen r'-\s*single version', r'-\s*remaster.*', r'-\s*live.*', r'-\s*remix', r'-\s*radio edit', + # NEW: Patterns in the open title string (not in brackets) + r'\s+feat\.?.*', + r'\s+ft\.?.*', + r'\s+featuring.*' ] self.artist_patterns = [ diff --git a/requirements.txt b/requirements.txt index bd5aae1..219ba86 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ python-dotenv>=1.0.0 cryptography>=41.0.0 mutagen>=1.47.0 Pillow>=10.0.0 -aiohttp>=3.9.0 \ No newline at end of file +aiohttp>=3.9.0 +unidecode>=1.3.8