pull/2/head
Broque Thomas 10 months ago
parent a2d64e9953
commit 3e6b329e35

@ -42,7 +42,7 @@ class MusicMatchingEngine:
r'-\s*live.*',
r'-\s*remix',
r'-\s*radio edit',
# NEW: Patterns in the open title string (not in brackets)
# Patterns in the open title string (not in brackets)
r'\s+feat\.?.*',
r'\s+ft\.?.*',
r'\s+featuring.*'
@ -97,14 +97,6 @@ class MusicMatchingEngine:
return self.normalize_string(cleaned)
def extract_main_artist(self, artists: List[str]) -> str:
"""Extracts and cleans the primary artist from a list."""
if not artists:
return ""
main_artist = artists[0]
return self.clean_artist(main_artist)
def similarity_score(self, str1: str, str2: str) -> float:
"""Calculates similarity score between two strings."""
if not str1 or not str2:
@ -128,19 +120,35 @@ class MusicMatchingEngine:
def calculate_match_confidence(self, spotify_track: SpotifyTrack, plex_track: PlexTrackInfo) -> Tuple[float, str]:
"""Calculates a confidence score for a potential match with weighted factors."""
# Clean titles and artists for comparison
spotify_title_cleaned = self.clean_title(spotify_track.name)
plex_title_cleaned = self.clean_title(plex_track.title)
spotify_main_artist_cleaned = self.extract_main_artist(spotify_track.artists)
plex_artist_normalized = self.normalize_string(plex_track.artist)
# --- Enhanced Artist Scoring ---
# Get a list of all cleaned artist names from Spotify
spotify_artists_cleaned = [self.clean_artist(a) for a in spotify_track.artists if a]
plex_artist_cleaned = self.clean_artist(plex_track.artist)
plex_artist_normalized = self.normalize_string(plex_track.artist) # For substring check
# --- Calculate individual scores ---
title_score = self.similarity_score(spotify_title_cleaned, plex_title_cleaned)
# Calculate the best possible artist score by checking each Spotify artist against the Plex artist
best_artist_score = 0.0
for spotify_artist in spotify_artists_cleaned:
# First, check for a direct substring match, which is a very strong signal
if spotify_artist in plex_artist_normalized:
score = 1.0
else:
# Otherwise, calculate similarity on the cleaned versions
score = self.similarity_score(spotify_artist, plex_artist_cleaned)
if score > best_artist_score:
best_artist_score = score
# If we get a perfect match, we can stop early
if best_artist_score == 1.0:
break
# Artist score: check if main Spotify artist is in the Plex artist string
artist_score = 1.0 if spotify_main_artist_cleaned in plex_artist_normalized else self.similarity_score(spotify_main_artist_cleaned, self.clean_artist(plex_track.artist))
artist_score = best_artist_score
# --- Calculate other scores ---
title_score = self.similarity_score(spotify_title_cleaned, plex_title_cleaned)
duration_score = self.duration_similarity(spotify_track.duration_ms, plex_track.duration if plex_track.duration else 0)
# --- Weighted confidence calculation ---

Loading…
Cancel
Save