better matching

pull/2/head
Broque Thomas 7 months ago
parent fdc79c16b4
commit 86fcdd6869

@ -24,14 +24,12 @@ class MatchResult:
class MusicMatchingEngine:
def __init__(self):
# The order of these patterns is important. More general patterns go first.
# Conservative title patterns - only remove clear noise, preserve meaningful differences like remixes
self.title_patterns = [
# General patterns to remove all content in brackets/parentheses
r'\(.*\)',
r'\[.*\]',
# General pattern to remove everything after a hyphen, which is common for version info
r'\s-\s.*',
# Patterns to remove featuring artists from the title itself
# Only remove explicit/clean markers - preserve remixes, versions, and content after hyphens
r'\s*\(explicit\)',
r'\s*\(clean\)',
# Remove featuring artists from the title itself
r'\sfeat\.?.*',
r'\sft\.?.*',
r'\sfeaturing.*'

@ -943,28 +943,45 @@ class MusicDatabase:
"""Generate variations of track title for better matching"""
variations = [title] # Always include original
# IMPORTANT: Generate bracket/dash style variations for better matching
# Convert "Track - Instrumental" to "Track (Instrumental)" and vice versa
if ' - ' in title:
# Convert dash style to parentheses style
dash_parts = title.split(' - ', 1)
if len(dash_parts) == 2:
paren_version = f"{dash_parts[0]} ({dash_parts[1]})"
variations.append(paren_version)
if '(' in title and ')' in title:
# Convert parentheses style to dash style
dash_version = re.sub(r'\s*\(([^)]+)\)\s*', r' - \1', title)
if dash_version != title:
variations.append(dash_version)
# Clean up the title
title_lower = title.lower().strip()
# Common track title variations
# Conservative track title variations - only remove clear noise, preserve meaningful differences
track_patterns = [
# Remove version/remix info
r'\s*\(.*version\)',
r'\s*\(.*remix\)',
r'\s*\(.*mix\)',
r'\s*\(.*edit\)',
r'\s*\(.*radio\)',
# Remove featuring artists
# Remove explicit/clean markers only
r'\s*\(explicit\)',
r'\s*\(clean\)',
r'\s*\[explicit\]',
r'\s*\[clean\]',
# Remove featuring artists in parentheses
r'\s*\(.*feat\..*\)',
r'\s*\(.*featuring.*\)',
r'\s*\(.*ft\..*\)',
# Remove brackets/parentheses content
r'\s*\[.*\]',
r'\s*\(.*\)',
# Remove everything after dash
r'\s*-\s*.*'
# Remove radio/TV edit markers
r'\s*\(radio\s*edit\)',
r'\s*\(tv\s*edit\)',
r'\s*\[radio\s*edit\]',
r'\s*\[tv\s*edit\]',
]
# DO NOT remove remixes, versions, or content after dashes
# These are meaningful distinctions that should not be collapsed
for pattern in track_patterns:
# Apply pattern to original title
cleaned = re.sub(pattern, '', title, flags=re.IGNORECASE).strip()
@ -1020,22 +1037,30 @@ class MusicDatabase:
return 0.0
def _clean_track_title_for_comparison(self, title: str) -> str:
"""Clean track title for comparison by removing common noise"""
"""Clean track title for comparison by normalizing brackets/dashes and removing noise"""
cleaned = title.lower().strip()
# Remove common patterns that cause mismatches
# STEP 1: Normalize bracket/dash styles for consistent matching
# Convert all bracket styles to spaces for better matching
cleaned = re.sub(r'\s*[\[\(]\s*', ' ', cleaned) # Convert opening brackets/parens to space
cleaned = re.sub(r'\s*[\]\)]\s*', ' ', cleaned) # Convert closing brackets/parens to space
cleaned = re.sub(r'\s*-\s*', ' ', cleaned) # Convert dashes to spaces too
# STEP 2: Remove clear noise patterns
patterns_to_remove = [
r'\s*\(.*\)', # Remove anything in parentheses
r'\s*\[.*\]', # Remove anything in brackets
r'\s*-\s*.*', # Remove everything after dash
r'\s*feat\..*', # Remove featuring artists
r'\s*ft\..*', # Remove ft. artists
r'\s*featuring.*', # Remove featuring
r'\s*explicit\s*', # Remove explicit markers (now without brackets)
r'\s*clean\s*', # Remove clean markers (now without brackets)
r'\s*feat\..*', # Remove featuring (now without brackets)
r'\s*featuring.*', # Remove featuring (now without brackets)
r'\s*ft\..*', # Remove ft. (now without brackets)
]
for pattern in patterns_to_remove:
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE).strip()
# STEP 3: Clean up extra spaces
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
return cleaned
def _clean_album_title_for_comparison(self, title: str) -> str:

@ -1047,9 +1047,14 @@ class ArtistResultCard(QFrame):
def mousePressEvent(self, event):
"""Handle click to select artist"""
if event.button() == Qt.MouseButton.LeftButton:
self.artist_selected.emit(self.artist)
super().mousePressEvent(event)
try:
if event.button() == Qt.MouseButton.LeftButton:
self.artist_selected.emit(self.artist)
super().mousePressEvent(event)
except RuntimeError as e:
# Qt object has been deleted, ignore the event silently
print(f"⚠️ ArtistCard object deleted during mouse event: {e}")
pass
class AlbumCard(QFrame):
"""Card widget for displaying album information"""
@ -1485,12 +1490,17 @@ class AlbumCard(QFrame):
def mousePressEvent(self, event):
"""Handle click for download"""
# Don't allow downloads if already downloading
if (event.button() == Qt.MouseButton.LeftButton and
not self.progress_overlay.isVisible()):
print(f"🖱️ Album card clicked: {self.album.name} (owned: {self.is_owned})")
self.download_requested.emit(self.album)
super().mousePressEvent(event)
try:
# Don't allow downloads if already downloading
if (event.button() == Qt.MouseButton.LeftButton and
not self.progress_overlay.isVisible()):
print(f"🖱️ Album card clicked: {self.album.name} (owned: {self.is_owned})")
self.download_requested.emit(self.album)
super().mousePressEvent(event)
except RuntimeError as e:
# Qt object has been deleted, ignore the event silently
print(f"⚠️ AlbumCard object deleted during mouse event: {e}")
pass
class DownloadMissingAlbumTracksModal(QDialog):
"""Enhanced modal for downloading missing album tracks with live progress tracking"""
@ -1978,7 +1988,10 @@ class DownloadMissingAlbumTracksModal(QDialog):
else:
self.download_in_progress = False
self.cancel_btn.hide()
self.process_finished.emit()
try:
self.process_finished.emit()
except RuntimeError as e:
print(f"⚠️ Modal object deleted during analysis complete signal: {e}")
QMessageBox.information(self, "Analysis Complete", "All album tracks already exist in Plex! No downloads needed.")
# Close with accept since all tracks are already available (success case)
self.accept()
@ -2384,7 +2397,10 @@ class DownloadMissingAlbumTracksModal(QDialog):
self.cancel_btn.hide()
# Emit process_finished signal to unlock UI
self.process_finished.emit()
try:
self.process_finished.emit()
except RuntimeError as e:
print(f"⚠️ Modal object deleted during downloads complete signal: {e}")
# Determine the final message based on success or failure
if self.permanently_failed_tracks:
@ -2494,16 +2510,24 @@ class DownloadMissingAlbumTracksModal(QDialog):
def on_cancel_clicked(self):
"""Handle Cancel button"""
self.cancel_operations()
self.process_finished.emit()
self.reject()
try:
self.cancel_operations()
self.process_finished.emit()
self.reject()
except RuntimeError as e:
print(f"⚠️ Modal object deleted during cancel: {e}")
pass
def on_close_clicked(self):
"""Handle Close button"""
if self.cancel_requested or not self.download_in_progress:
self.cancel_operations()
self.process_finished.emit()
self.reject()
try:
if self.cancel_requested or not self.download_in_progress:
self.cancel_operations()
self.process_finished.emit()
self.reject()
except RuntimeError as e:
print(f"⚠️ Modal object deleted during close: {e}")
pass
def cancel_operations(self):
"""Cancel any ongoing operations"""

@ -7542,10 +7542,12 @@ class DownloadsPage(QWidget):
print(f" Album name from album_info: '{album_info['album_name']}'")
print(f" Original download item title: '{download_item.title}'")
# Use clean track name from album_info if available
clean_track_name = album_info.get('clean_track_name', download_item.title)
if hasattr(download_item, '_spotify_clean_title'):
# Use the Spotify title information if available (most accurate for matched tracks)
clean_track_name = download_item.title
if hasattr(download_item, '_spotify_clean_title') and download_item._spotify_clean_title:
clean_track_name = download_item._spotify_clean_title
elif album_info.get('clean_track_name'):
clean_track_name = album_info['clean_track_name']
print(f" Clean track name to use: '{clean_track_name}'")
@ -7564,10 +7566,12 @@ class DownloadsPage(QWidget):
else:
# Single track structure: Transfer/ARTIST/ARTIST - SINGLE/SINGLE.ext
# Use clean track name for single folder and filename
clean_track_name = album_info.get('clean_track_name', download_item.title) if album_info else download_item.title
if hasattr(download_item, '_spotify_clean_title'):
# Use the Spotify title information if available (most accurate for matched tracks)
clean_track_name = download_item.title
if hasattr(download_item, '_spotify_clean_title') and download_item._spotify_clean_title:
clean_track_name = download_item._spotify_clean_title
elif album_info and album_info.get('clean_track_name'):
clean_track_name = album_info['clean_track_name']
print(f" Original download item title: '{download_item.title}'")
print(f" Clean track name to use: '{clean_track_name}'")
@ -8098,8 +8102,9 @@ class DownloadsPage(QWidget):
track_num = spotify_track_number
print(f"🎯 Using Spotify track number: {track_num}")
# Store the clean Spotify track name for use in file organization
download_item._spotify_clean_title = clean_track_name
# Store the clean Spotify track name for use in file organization (only if not already set)
if not hasattr(download_item, '_spotify_clean_title') or not download_item._spotify_clean_title:
download_item._spotify_clean_title = clean_track_name
download_item._spotify_clean_album = album_name
# Extract album image URL from detailed track data
@ -8147,7 +8152,9 @@ class DownloadsPage(QWidget):
# Get track number from metadata or filename as fallback
track_num = self._extract_track_number(download_item)
download_item._spotify_clean_title = clean_track_name
# Only set if not already set (preserve original Spotify title from modal)
if not hasattr(download_item, '_spotify_clean_title') or not download_item._spotify_clean_title:
download_item._spotify_clean_title = clean_track_name
download_item._spotify_clean_album = album_name
# Try to get album image URL from matched_album if available
@ -8175,7 +8182,9 @@ class DownloadsPage(QWidget):
print(f"✅ Using cleaned Soulseek album context: '{clean_album}' (cleaned from '{download_item.album}')")
print(f"🧹 Cleaned track title: '{clean_title}' (cleaned from '{download_item.title}')")
download_item._spotify_clean_title = clean_title
# Only set if not already set (preserve original Spotify title from modal)
if not hasattr(download_item, '_spotify_clean_title') or not download_item._spotify_clean_title:
download_item._spotify_clean_title = clean_title
download_item._spotify_clean_album = clean_album
# Try to get album image URL from matched_album if available
@ -8196,7 +8205,9 @@ class DownloadsPage(QWidget):
print(f"🎯 No album context found, defaulting to single track structure with cleaned title")
clean_title = self._clean_track_title(download_item.title, artist.name)
download_item._spotify_clean_title = clean_title
# Only set if not already set (preserve original Spotify title from modal)
if not hasattr(download_item, '_spotify_clean_title') or not download_item._spotify_clean_title:
download_item._spotify_clean_title = clean_title
# Try to get album image URL from matched_album if available
album_image_url = None
@ -8273,7 +8284,11 @@ class DownloadsPage(QWidget):
self.matching_engine.normalize_string(track_name)
)
if similarity > 0.7: # Good match threshold
# Use higher threshold for remix matching to ensure precision
is_remix = any(word in clean_track.lower() for word in ['remix', 'mix', 'edit', 'version'])
threshold = 0.9 if is_remix else 0.7 # Much stricter for remixes
if similarity > threshold:
print(f"✅ FOUND: '{track_name}' (track #{track_number}) matches '{clean_track}' (similarity: {similarity:.2f})")
print(f"🎯 Forcing album classification for track in '{album.name}'")
@ -8282,7 +8297,7 @@ class DownloadsPage(QWidget):
'is_album': True, # Always true - we found it in an album!
'album_name': album.name,
'track_number': track_number,
'clean_track_name': track_name, # Use Spotify's clean name
'clean_track_name': clean_track, # Use the ORIGINAL download title, not the database match
'album_image_url': album.image_url,
'confidence': similarity,
'source': 'album_context_search'

@ -200,18 +200,20 @@ class PlaylistTrackAnalysisWorker(QRunnable):
# Get database instance
db = get_database()
# --- Generate a list of title variations ---
# --- Generate conservative title variations (preserve meaningful differences) ---
title_variations = [original_title]
if " - " in original_title:
title_variations.append(original_title.split(' - ')[0].strip())
# Only add cleaned version if it removes clear noise (not meaningful content like remixes)
cleaned_for_search = clean_track_name_for_search(original_title)
if cleaned_for_search.lower() != original_title.lower():
title_variations.append(cleaned_for_search)
# Use matching engine's conservative clean_title (no longer strips remixes/versions)
base_title = self.matching_engine.clean_title(original_title)
if base_title.lower() not in [t.lower() for t in title_variations]:
title_variations.append(base_title)
# DO NOT strip content after dashes - this removes important remix/version info
unique_title_variations = list(dict.fromkeys(title_variations))

Loading…
Cancel
Save