diff --git a/web_server.py b/web_server.py index 8d426861..7dd0a4da 100644 --- a/web_server.py +++ b/web_server.py @@ -6052,30 +6052,42 @@ def clean_youtube_track_title(title, artist_name=None): original_title = title - # FIRST: Remove artist name if it appears at the start with a dash - # Handle formats like "LITTLE BIG - MOUSTACHE" → "MOUSTACHE" - if artist_name: - # Create a regex pattern to match artist name at the beginning followed by dash - # Use word boundaries and case-insensitive matching for better accuracy - artist_pattern = r'^' + re.escape(artist_name.strip()) + r'\s*[-–—]\s*' + # FIRST: Try to extract track name from "Artist - Track" or "Track - Artist" format + artist_removed = False + if artist_name and '-' in title: + # Check if artist is at the start: "Artist - Track" or "Artist & Others - Track" + # Handle collaborations: "Artist1 & Artist2 - Track" or "Artist, Artist2 - Track" + artist_pattern = r'^' + re.escape(artist_name.strip()) + r'(?:\s*[&,x]\s*[^-]+)?\s*[-–—]\s*' cleaned_title = re.sub(artist_pattern, '', title, flags=re.IGNORECASE).strip() - - # Debug logging for artist removal + if cleaned_title != title: - print(f"🎯 Removed artist from title: '{title}' -> '{cleaned_title}' (artist: '{artist_name}')") - - title = cleaned_title - - # Remove content in brackets/braces of any type SECOND (before general dash removal) + print(f"🎯 Removed artist from start: '{title}' -> '{cleaned_title}' (artist: '{artist_name}')") + title = cleaned_title + artist_removed = True + else: + # Artist not at start, check if format is "Track - Artist" by looking for artist at end + # Only remove trailing artist if it comes after a dash + artist_end_pattern = r'\s*[-–—]\s*' + re.escape(artist_name.strip()) + r'(?:\s*[&,x]\s*[^-]+)?\s*$' + cleaned_title = re.sub(artist_end_pattern, '', title, flags=re.IGNORECASE).strip() + + if cleaned_title != title: + print(f"🎯 Removed artist from end: '{title}' -> '{cleaned_title}' (artist: '{artist_name}')") + title = cleaned_title + artist_removed = True + + # Remove content in brackets/braces BEFORE removing dashes title = re.sub(r'【[^】]*】', '', title) # Japanese brackets title = re.sub(r'\s*\([^)]*\)', '', title) # Parentheses - removes everything after first ( title = re.sub(r'\s*\(.*$', '', title) # Remove everything after lone ( (unmatched parentheses) title = re.sub(r'\[[^\]]*\]', '', title) # Square brackets title = re.sub(r'\{[^}]*\}', '', title) # Curly braces title = re.sub(r'<[^>]*>', '', title) # Angle brackets - - # Remove everything after a dash (often album or extra info) - title = re.sub(r'\s*-\s*.*$', '', title) + + # ONLY remove trailing dashes with garbage if artist was already removed + # This prevents "Artist1, Artist2 - Song" from becoming "Artist1, Artist2" + if artist_removed: + # Safe to remove any remaining trailing dash content (likely album/extra info) + title = re.sub(r'\s*-\s*.*$', '', title) # Remove everything after pipes (|) - often used for additional context title = re.split(r'\s*\|\s*', title)[0].strip() @@ -6108,17 +6120,27 @@ def clean_youtube_track_title(title, artist_name=None): for pattern in noise_patterns: title = re.sub(pattern, '', title, flags=re.IGNORECASE) - # Remove artist name from title if present + # Only remove artist name if it's standalone (not part of "Artist1 & Artist2") + # Skip this if the title contains collaboration indicators near the artist name if artist_name: - # Try removing exact artist name - title = re.sub(rf'\b{re.escape(artist_name)}\b', '', title, flags=re.IGNORECASE) - # Try removing artist name with common separators - title = re.sub(rf'\b{re.escape(artist_name)}\s*[-–—:]\s*', '', title, flags=re.IGNORECASE) - title = re.sub(rf'^{re.escape(artist_name)}\s*[-–—:]\s*', '', title, flags=re.IGNORECASE) + # Check if artist appears with collaboration indicators (& or ,) + collab_pattern = rf'\b{re.escape(artist_name)}\s*[&,]\s*\w+|[\w\s]+[&,]\s*{re.escape(artist_name)}\b' + has_collab = re.search(collab_pattern, title, flags=re.IGNORECASE) + + if not has_collab: + # Safe to remove artist - it's standalone + title = re.sub(rf'\b{re.escape(artist_name)}\b', '', title, flags=re.IGNORECASE) + title = re.sub(rf'\b{re.escape(artist_name)}\s*[-–—:]\s*', '', title, flags=re.IGNORECASE) + title = re.sub(rf'^{re.escape(artist_name)}\s*[-–—:]\s*', '', title, flags=re.IGNORECASE) + else: + print(f"⚠️ Skipping artist removal - collaboration detected: '{title}'") + # Remove "prod. Producer" patterns + title = re.sub(r'\s+prod\.?\s+\S+', '', title, flags=re.IGNORECASE) + # Remove all quotes and other punctuation title = re.sub(r'["\'''""„‚‛‹›«»]', '', title) - + # Remove featured artist patterns (after removing parentheses) feat_patterns = [ r'\s+feat\.?\s+.+$', # " feat Artist" at end diff --git a/webui/static/script.js b/webui/static/script.js index b4502c06..85afc71c 100644 --- a/webui/static/script.js +++ b/webui/static/script.js @@ -2926,7 +2926,7 @@ async function loadBeatportChartsFromBackend() { spotify_track: result.spotify_data ? result.spotify_data.name : '-', spotify_artist: result.spotify_data && result.spotify_data.artists ? result.spotify_data.artists.map(a => a.name || a).join(', ') : '-', - spotify_album: result.spotify_data ? result.spotify_data.album : '-' + spotify_album: result.spotify_data ? (typeof result.spotify_data.album === 'object' ? result.spotify_data.album.name : result.spotify_data.album) : '-' })); // Create Beatport state in YouTube system for modal functionality @@ -3162,7 +3162,7 @@ async function rehydrateBeatportChart(chartInfo, userRequested = false) { spotify_track: result.spotify_data ? result.spotify_data.name : '-', spotify_artist: result.spotify_data && result.spotify_data.artists ? result.spotify_data.artists.map(a => a.name || a).join(', ') : '-', - spotify_album: result.spotify_data ? result.spotify_data.album : '-' + spotify_album: result.spotify_data ? (typeof result.spotify_data.album === 'object' ? result.spotify_data.album.name : result.spotify_data.album) : '-' })); } @@ -11200,7 +11200,7 @@ async function openTidalDiscoveryModal(playlistId, playlistData) { spotify_track: result.spotify_data ? result.spotify_data.name : (result.spotify_track || '-'), spotify_artist: result.spotify_data && result.spotify_data.artists ? (Array.isArray(result.spotify_data.artists) ? result.spotify_data.artists.join(', ') : result.spotify_data.artists) : (result.spotify_artist || '-'), - spotify_album: result.spotify_data ? result.spotify_data.album : (result.spotify_album || '-'), + spotify_album: result.spotify_data ? (typeof result.spotify_data.album === 'object' ? result.spotify_data.album.name : result.spotify_data.album) : (result.spotify_album || '-'), spotify_data: result.spotify_data, // Pass through spotify_data spotify_id: result.spotify_id, // Pass through spotify_id manual_match: result.manual_match // Pass through manual match flag @@ -11319,7 +11319,7 @@ function startTidalDiscoveryPolling(fakeUrlHash, playlistId) { spotify_track: result.spotify_data ? result.spotify_data.name : (result.spotify_track || '-'), spotify_artist: result.spotify_data && result.spotify_data.artists ? (Array.isArray(result.spotify_data.artists) ? result.spotify_data.artists.join(', ') : result.spotify_data.artists) : (result.spotify_artist || '-'), - spotify_album: result.spotify_data ? result.spotify_data.album : (result.spotify_album || '-'), + spotify_album: result.spotify_data ? (typeof result.spotify_data.album === 'object' ? result.spotify_data.album.name : result.spotify_data.album) : (result.spotify_album || '-'), spotify_data: result.spotify_data, // Pass through spotify_id: result.spotify_id, // Pass through manual_match: result.manual_match // Pass through @@ -13071,7 +13071,7 @@ function startBeatportDiscoveryPolling(urlHash) { spotify_track: result.spotify_data ? result.spotify_data.name : (result.spotify_track || '-'), spotify_artist: result.spotify_data && result.spotify_data.artists ? result.spotify_data.artists.map(a => a.name || a).join(', ') : (result.spotify_artist || '-'), - spotify_album: result.spotify_data ? result.spotify_data.album : (result.spotify_album || '-'), + spotify_album: result.spotify_data ? (typeof result.spotify_data.album === 'object' ? result.spotify_data.album.name : result.spotify_data.album) : (result.spotify_album || '-'), spotify_data: result.spotify_data, // Pass through spotify_id: result.spotify_id, // Pass through manual_match: result.manual_match // Pass through