update pt. vol. feat. matching

6 months ago · 339cbe3c53
parent 6d43524a99
commit 339cbe3c53
1 changed files with 14 additions and 3 deletions
--- a/core/matching_engine.py
+++ b/core/matching_engine.py
@ -64,6 +64,17 @@ class MusicMatchingEngine:
        text = unidecode(text)
        text = text.lower()
        
+        # Expand specific abbreviations for better matching
+        abbreviation_map = {
+            r'\bpt\.': 'part',      # "pt." → "part"
+            r'\bvol\.': 'volume',   # "vol." → "volume"
+            r'\bfeat\.': 'featured' # "feat." → "featured"
+            # Removed "ft." → "featured" (ambiguous: could be "feet" in measurements)
+        }
+        
+        for pattern, replacement in abbreviation_map.items():
+            text = re.sub(pattern, replacement, text)
+        
        # --- IMPROVEMENT V4 ---
        # The user correctly pointed out that replacing '$' with 's' was incorrect
        # as it breaks searching for stylized names like A$AP Rocky.
@ -84,9 +95,9 @@ class MusicMatchingEngine:
        """Returns a 'core' version of a string with only letters and numbers for a strict comparison."""
        if not text:
            return ""
-        # Transliterate, lowercase, and remove everything that isn't a letter or digit.
-        text = unidecode(text).lower()
-        return re.sub(r'[^a-z0-9]', '', text)
+        # Use normalize_string first to get abbreviation expansion, then strip to core
+        normalized = self.normalize_string(text)
+        return re.sub(r'[^a-z0-9]', '', normalized)

    def clean_title(self, title: str) -> str:
        """Cleans title by removing common extra info using regex for fuzzy matching."""