From 06defcfa3d48c21c98b35fe20908c7abe1d69618 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 6 Apr 2026 22:15:21 -0700 Subject: [PATCH] Fix streaming source matching and global search download bubbles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streaming matching: add artist gate rejecting candidates with artist similarity below 0.4, raise threshold to 0.60, block fallback to Soulseek filename matcher for Tidal/Qobuz/HiFi/Deezer. Fix single- char artist containment bug where normalize_string strips non-ASCII (e.g. "B小町" → "b") causing "b" to match any artist containing that letter. Fixed in both score_track_match and the Soulseek scorer. YouTube and Soulseek matching behavior unchanged. Global search: add registerSearchDownload() calls to _gsClickAlbum and _gsClickTrack so downloads create bubble snapshots on dashboard and search page, matching the enhanced search standard. Global search escaping: add _escAttr() helper to handle newlines in album/artist names that broke inline onclick string literals. --- core/matching_engine.py | 12 ++++++++++-- web_server.py | 39 +++++++++++++++++++++++++++++++++++---- webui/static/script.js | 41 +++++++++++++++++++++++++++++++++++------ 3 files changed, 80 insertions(+), 12 deletions(-) diff --git a/core/matching_engine.py b/core/matching_engine.py index e5d66d51..ae5bb066 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -269,7 +269,11 @@ class MusicMatchingEngine: cand_artist_normalized = self.normalize_string(raw_cand_artist) cand_artist_cleaned = self.clean_artist(raw_cand_artist) # Check containment (e.g., "drake" in "drake 21 savage") - if src_artist and src_artist in cand_artist_normalized: + # Skip for very short names (≤2 chars) — "b" matches everything + if src_artist and len(src_artist) > 2 and src_artist in cand_artist_normalized: + best_artist_score = 1.0 + break + elif src_artist and src_artist == cand_artist_normalized: best_artist_score = 1.0 break score = self.similarity_score(src_artist, cand_artist_cleaned) @@ -604,9 +608,13 @@ class MusicMatchingEngine: best_artist_similarity = 0.0 for artist in spotify_artists_norm: - if artist in slskd_filename_norm: + # Skip containment for very short names (≤2 chars) — "b" matches everything + if artist and len(artist) > 2 and artist in slskd_filename_norm: artist_score = 1.0 # Perfect match if any artist is found break + elif artist and len(artist) <= 2 and re.search(r'\b' + re.escape(artist) + r'\b', slskd_filename_norm): + artist_score = 1.0 + break else: # Try similarity matching as fallback for misspellings/variations artist_ratio = SequenceMatcher(None, artist, slskd_filename_norm).ratio() diff --git a/web_server.py b/web_server.py index 6b6f0efb..3960c6f7 100644 --- a/web_server.py +++ b/web_server.py @@ -25257,7 +25257,6 @@ def get_valid_candidates(results, spotify_track, query): _streaming_sources = ("youtube", "tidal", "qobuz", "hifi", "deezer_dl") if results[0].username in _streaming_sources: source_label = results[0].username.replace('_dl', '').title() - expected_artists = spotify_track.artists if spotify_track else [] expected_title = spotify_track.name if spotify_track else '' expected_duration = spotify_track.duration_ms if spotify_track else 0 @@ -25298,9 +25297,37 @@ def get_valid_candidates(results, spotify_track, query): is_wrong_version = True break + # Artist gate — streaming APIs (Tidal/Qobuz/HiFi/Deezer) have reliable metadata, + # so "My Will" by "B. Starr" should never match expected "B小町". + # Skip for YouTube — artist is parsed from video titles and often unreliable. + if r.username != 'youtube': + from difflib import SequenceMatcher + import re as _re + _cand_artist_raw = r.artist or '' + _cand_artist = matching_engine.normalize_string(_cand_artist_raw) + _best_artist = 0.0 + for _ea in expected_artists: + _ea_norm = matching_engine.normalize_string(_ea) + if not _ea_norm: + continue + # For short normalized names (e.g. "B小町"→"b"), containment is useless. + # Compare original Unicode strings directly via similarity instead. + if len(_ea_norm) <= 2: + _best_artist = max(_best_artist, SequenceMatcher(None, _ea.lower(), _cand_artist_raw.lower()).ratio()) + elif _re.search(r'\b' + _re.escape(_ea_norm) + r'\b', _cand_artist): + _best_artist = 1.0 + break + elif _ea_norm == _cand_artist: + _best_artist = 1.0 + break + else: + _best_artist = max(_best_artist, SequenceMatcher(None, _ea_norm, _cand_artist).ratio()) + if _best_artist < 0.4 and confidence < 0.85: + continue + r.confidence = confidence r.version_type = 'wrong_version' if is_wrong_version else match_type - if confidence >= 0.55: + if confidence >= 0.60: scored.append(r) if scored: @@ -25311,8 +25338,12 @@ def get_valid_candidates(results, spotify_track, query): f"(best: {best.confidence:.2f} '{best.artist} - {best.title}')") return scored else: - print(f"⚠️ [{source_label}] No streaming results passed validation (threshold: 0.55) — falling through to matching engine") - # Fall through to standard matching engine below + if results[0].username == 'youtube': + print(f"⚠️ [{source_label}] No streaming results passed validation — falling through to filename matching") + # YouTube artist data is unreliable, allow fallback to filename-based matching + else: + print(f"⚠️ [{source_label}] No streaming results passed validation (threshold: 0.60, artist gate: 0.40) — rejecting all candidates") + return [] # Tidal/Qobuz/HiFi/Deezer have structured metadata; don't fall back to filename matching # Uses the existing, powerful matching engine for scoring (Soulseek P2P results) _max_q = config_manager.get('soulseek.max_peer_queue', 0) or 0 diff --git a/webui/static/script.js b/webui/static/script.js index fdd17bc4..c424068d 100644 --- a/webui/static/script.js +++ b/webui/static/script.js @@ -16914,6 +16914,7 @@ function showToast(message, type = 'success', helpSection = null) { } function _escToast(s) { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; } +function _escAttr(s) { return _escToast(s).replace(/'/g, "\\'").replace(/\n/g, ' ').replace(/\r/g, ''); } function _updateNotifBadge() { const badge = document.getElementById('notif-bell-badge'); @@ -17279,13 +17280,13 @@ function _gsRender(data) { if (dbArtists.length) { h += '
📚 In Your Library
'; - h += dbArtists.map(a => `
${a.image_url ? `` : '🎤'}
${_escToast(a.name)}
Library
`).join(''); + h += dbArtists.map(a => `
${a.image_url ? `` : '🎤'}
${_escToast(a.name)}
Library
`).join(''); h += '
'; } if (artists.length) { h += `
🎤 Artists ${srcLabel}
`; - h += artists.map(a => `
${a.image_url ? `` : '🎤'}
${_escToast(a.name)}
`).join(''); + h += artists.map(a => `
${a.image_url ? `` : '🎤'}
${_escToast(a.name)}
`).join(''); h += '
'; } else if (loading.has('artists')) { h += `
🎤 Artists ${srcLabel}
Loading artists...
`; @@ -17299,7 +17300,7 @@ function _gsRender(data) { const ar = a.artist || (a.artists ? a.artists.join(', ') : ''); const yr = a.release_date ? a.release_date.substring(0, 4) : ''; const img = (a.image_url || '').replace(/'/g, "\\'"); - return `
${a.image_url ? `` : '💿'}
${_escToast(a.name)}
${_escToast(ar)}${yr ? ` · ${yr}` : ''}
`; + return `
${a.image_url ? `` : '💿'}
${_escToast(a.name)}
${_escToast(ar)}${yr ? ` · ${yr}` : ''}
`; }).join(''); h += ''; } @@ -17313,7 +17314,7 @@ function _gsRender(data) { h += singles.map(a => { const ar = a.artist || (a.artists ? a.artists.join(', ') : ''); const img = (a.image_url || '').replace(/'/g, "\\'"); - return `
${a.image_url ? `` : '🎶'}
${_escToast(a.name)}
${_escToast(ar)}
`; + return `
${a.image_url ? `` : '🎶'}
${_escToast(a.name)}
${_escToast(ar)}
`; }).join(''); h += ''; } @@ -17323,7 +17324,7 @@ function _gsRender(data) { h += tracks.map(t => { const ar = t.artist || (t.artists ? t.artists.join(', ') : ''); const dur = t.duration_ms ? `${Math.floor(t.duration_ms / 60000)}:${String(Math.floor((t.duration_ms % 60000) / 1000)).padStart(2, '0')}` : ''; - return `
${t.image_url ? `` : '🎵'}
${_escToast(t.name)}
${_escToast(ar)}${t.album ? ` · ${_escToast(t.album)}` : ''}
${dur}
`; + return `
${t.image_url ? `` : '🎵'}
${_escToast(t.name)}
${_escToast(ar)}${t.album ? ` · ${_escToast(t.album)}` : ''}
${dur}
`; }).join(''); h += ''; } else if (loading.has('tracks')) { @@ -17428,6 +17429,20 @@ async function _gsClickAlbum(albumId, albumName, artistName, imageUrl, source) { await openDownloadMissingModalForArtistAlbum(virtualPlaylistId, `[${artistName}] ${albumData.name}`, enrichedTracks, albumObj, artistObj, false); + // Register download bubble (same pattern as enhanced search) + registerSearchDownload( + { + id: albumData.id, + name: albumData.name, + artist: artistName, + image_url: albumData.images?.[0]?.url || imageUrl || null, + images: albumData.images || [] + }, + 'album', + virtualPlaylistId, + artistName + ); + } catch (e) { hideLoadingOverlay(); showToast('Failed to load album: ' + e.message, 'error'); @@ -17460,6 +17475,20 @@ async function _gsClickTrack(artistName, trackName, albumName, trackId, imageUrl await openDownloadMissingModalForArtistAlbum( virtualPlaylistId, playlistName, [enrichedTrack], albumObject, artistObject, false ); + + // Register download bubble (same pattern as enhanced search) + registerSearchDownload( + { + id: trackId || '', + name: trackName, + artist: artistName, + image_url: imageUrl || null, + images: imageUrl ? [{ url: imageUrl }] : [] + }, + 'track', + virtualPlaylistId, + artistName + ); } catch (e) { console.error('Error opening track download:', e); // Fallback: navigate to enhanced search @@ -55153,7 +55182,7 @@ async function _dblLoadList() {
${_escToast(e.artist_name)} ${e.created_at ? new Date(e.created_at).toLocaleDateString() : ''} - +
`).join(''); } catch (e) {