update pt. vol. feat. matching

pull/8/head
Broque Thomas 6 months ago
parent 6d43524a99
commit 339cbe3c53

@ -64,6 +64,17 @@ class MusicMatchingEngine:
text = unidecode(text)
text = text.lower()
# Expand specific abbreviations for better matching
abbreviation_map = {
r'\bpt\.': 'part', # "pt." → "part"
r'\bvol\.': 'volume', # "vol." → "volume"
r'\bfeat\.': 'featured' # "feat." → "featured"
# Removed "ft." → "featured" (ambiguous: could be "feet" in measurements)
}
for pattern, replacement in abbreviation_map.items():
text = re.sub(pattern, replacement, text)
# --- IMPROVEMENT V4 ---
# The user correctly pointed out that replacing '$' with 's' was incorrect
# as it breaks searching for stylized names like A$AP Rocky.
@ -84,9 +95,9 @@ class MusicMatchingEngine:
"""Returns a 'core' version of a string with only letters and numbers for a strict comparison."""
if not text:
return ""
# Transliterate, lowercase, and remove everything that isn't a letter or digit.
text = unidecode(text).lower()
return re.sub(r'[^a-z0-9]', '', text)
# Use normalize_string first to get abbreviation expansion, then strip to core
normalized = self.normalize_string(text)
return re.sub(r'[^a-z0-9]', '', normalized)
def clean_title(self, title: str) -> str:
"""Cleans title by removing common extra info using regex for fuzzy matching."""

Loading…
Cancel
Save