From 06defcfa3d48c21c98b35fe20908c7abe1d69618 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 6 Apr 2026 22:15:21 -0700 Subject: [PATCH] Fix streaming source matching and global search download bubbles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streaming matching: add artist gate rejecting candidates with artist similarity below 0.4, raise threshold to 0.60, block fallback to Soulseek filename matcher for Tidal/Qobuz/HiFi/Deezer. Fix single- char artist containment bug where normalize_string strips non-ASCII (e.g. "B小町" → "b") causing "b" to match any artist containing that letter. Fixed in both score_track_match and the Soulseek scorer. YouTube and Soulseek matching behavior unchanged. Global search: add registerSearchDownload() calls to _gsClickAlbum and _gsClickTrack so downloads create bubble snapshots on dashboard and search page, matching the enhanced search standard. Global search escaping: add _escAttr() helper to handle newlines in album/artist names that broke inline onclick string literals. --- core/matching_engine.py | 12 ++++++++++-- web_server.py | 39 +++++++++++++++++++++++++++++++++++---- webui/static/script.js | 41 +++++++++++++++++++++++++++++++++++------ 3 files changed, 80 insertions(+), 12 deletions(-) diff --git a/core/matching_engine.py b/core/matching_engine.py index e5d66d51..ae5bb066 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -269,7 +269,11 @@ class MusicMatchingEngine: cand_artist_normalized = self.normalize_string(raw_cand_artist) cand_artist_cleaned = self.clean_artist(raw_cand_artist) # Check containment (e.g., "drake" in "drake 21 savage") - if src_artist and src_artist in cand_artist_normalized: + # Skip for very short names (≤2 chars) — "b" matches everything + if src_artist and len(src_artist) > 2 and src_artist in cand_artist_normalized: + best_artist_score = 1.0 + break + elif src_artist and src_artist == cand_artist_normalized: best_artist_score = 1.0 break score = self.similarity_score(src_artist, cand_artist_cleaned) @@ -604,9 +608,13 @@ class MusicMatchingEngine: best_artist_similarity = 0.0 for artist in spotify_artists_norm: - if artist in slskd_filename_norm: + # Skip containment for very short names (≤2 chars) — "b" matches everything + if artist and len(artist) > 2 and artist in slskd_filename_norm: artist_score = 1.0 # Perfect match if any artist is found break + elif artist and len(artist) <= 2 and re.search(r'\b' + re.escape(artist) + r'\b', slskd_filename_norm): + artist_score = 1.0 + break else: # Try similarity matching as fallback for misspellings/variations artist_ratio = SequenceMatcher(None, artist, slskd_filename_norm).ratio() diff --git a/web_server.py b/web_server.py index 6b6f0efb..3960c6f7 100644 --- a/web_server.py +++ b/web_server.py @@ -25257,7 +25257,6 @@ def get_valid_candidates(results, spotify_track, query): _streaming_sources = ("youtube", "tidal", "qobuz", "hifi", "deezer_dl") if results[0].username in _streaming_sources: source_label = results[0].username.replace('_dl', '').title() - expected_artists = spotify_track.artists if spotify_track else [] expected_title = spotify_track.name if spotify_track else '' expected_duration = spotify_track.duration_ms if spotify_track else 0 @@ -25298,9 +25297,37 @@ def get_valid_candidates(results, spotify_track, query): is_wrong_version = True break + # Artist gate — streaming APIs (Tidal/Qobuz/HiFi/Deezer) have reliable metadata, + # so "My Will" by "B. Starr" should never match expected "B小町". + # Skip for YouTube — artist is parsed from video titles and often unreliable. + if r.username != 'youtube': + from difflib import SequenceMatcher + import re as _re + _cand_artist_raw = r.artist or '' + _cand_artist = matching_engine.normalize_string(_cand_artist_raw) + _best_artist = 0.0 + for _ea in expected_artists: + _ea_norm = matching_engine.normalize_string(_ea) + if not _ea_norm: + continue + # For short normalized names (e.g. "B小町"→"b"), containment is useless. + # Compare original Unicode strings directly via similarity instead. + if len(_ea_norm) <= 2: + _best_artist = max(_best_artist, SequenceMatcher(None, _ea.lower(), _cand_artist_raw.lower()).ratio()) + elif _re.search(r'\b' + _re.escape(_ea_norm) + r'\b', _cand_artist): + _best_artist = 1.0 + break + elif _ea_norm == _cand_artist: + _best_artist = 1.0 + break + else: + _best_artist = max(_best_artist, SequenceMatcher(None, _ea_norm, _cand_artist).ratio()) + if _best_artist < 0.4 and confidence < 0.85: + continue + r.confidence = confidence r.version_type = 'wrong_version' if is_wrong_version else match_type - if confidence >= 0.55: + if confidence >= 0.60: scored.append(r) if scored: @@ -25311,8 +25338,12 @@ def get_valid_candidates(results, spotify_track, query): f"(best: {best.confidence:.2f} '{best.artist} - {best.title}')") return scored else: - print(f"⚠️ [{source_label}] No streaming results passed validation (threshold: 0.55) — falling through to matching engine") - # Fall through to standard matching engine below + if results[0].username == 'youtube': + print(f"⚠️ [{source_label}] No streaming results passed validation — falling through to filename matching") + # YouTube artist data is unreliable, allow fallback to filename-based matching + else: + print(f"⚠️ [{source_label}] No streaming results passed validation (threshold: 0.60, artist gate: 0.40) — rejecting all candidates") + return [] # Tidal/Qobuz/HiFi/Deezer have structured metadata; don't fall back to filename matching # Uses the existing, powerful matching engine for scoring (Soulseek P2P results) _max_q = config_manager.get('soulseek.max_peer_queue', 0) or 0 diff --git a/webui/static/script.js b/webui/static/script.js index fdd17bc4..c424068d 100644 --- a/webui/static/script.js +++ b/webui/static/script.js @@ -16914,6 +16914,7 @@ function showToast(message, type = 'success', helpSection = null) { } function _escToast(s) { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; } +function _escAttr(s) { return _escToast(s).replace(/'/g, "\\'").replace(/\n/g, ' ').replace(/\r/g, ''); } function _updateNotifBadge() { const badge = document.getElementById('notif-bell-badge'); @@ -17279,13 +17280,13 @@ function _gsRender(data) { if (dbArtists.length) { h += '