diff --git a/core/musicbrainz_search.py b/core/musicbrainz_search.py index ff05be13..ebd5184c 100644 --- a/core/musicbrainz_search.py +++ b/core/musicbrainz_search.py @@ -638,13 +638,34 @@ class MusicBrainzSearchClient: logger.warning(f"MusicBrainz track search failed: {e}") return [] - def _search_tracks_text(self, track_name: str, artist_name: Optional[str], limit: int) -> List[Track]: - """Fallback text-search path for structured/fuzzy track queries.""" + def _search_tracks_text(self, track_name: str, artist_name: Optional[str], limit: int, + strict: bool = True, min_score: Optional[int] = None) -> List[Track]: + """Fallback text-search path for structured/fuzzy track queries. + + `strict=True` (default) keeps the field-scoped Lucene phrase match — + precise enough for enrichment-style flows where the inputs are + already known-clean. `strict=False` switches to a bare-query + MB lookup that hits alias/sortname indexes with diacritic folding — + needed for user-facing fuzzy surfaces (Fix popup cascade) where + recall beats precision because the user picks from the result list. + Mirrors the same toggle already on `search_recording` in + `core/musicbrainz_client.py`. + + `min_score` defaults to `self._MIN_SCORE` (80) — sized for the + enhanced search tab where unfiltered MB results are noisy. Pass + a lower value (or 0) when a downstream stage like + `core.metadata.relevance.rerank_tracks` will re-sort by artist + match — MB's free-text score heavily favours title-text matches + ("Army of Me (Bjork)" cover by HIRS Collective scores 100, + Björk's canonical "Army of Me" scores 28) so a high floor drops + the right answer. + """ try: - results = self._client.search_recording(track_name, artist_name=artist_name, limit=limit) - # Score filter matches the artist/album logic — cuts garbage - # title collisions from unrelated recordings. - results = [r for r in results if (r.get('score', 0) or 0) >= self._MIN_SCORE] + results = self._client.search_recording( + track_name, artist_name=artist_name, limit=limit, strict=strict + ) + threshold = self._MIN_SCORE if min_score is None else min_score + results = [r for r in results if (r.get('score', 0) or 0) >= threshold] tracks = [] for r in results: @@ -656,6 +677,30 @@ class MusicBrainzSearchClient: logger.warning(f"MusicBrainz track search failed: {e}") return [] + def search_tracks_with_artist(self, track: str, artist: str, + limit: int = 10) -> List[Track]: + """Search MB tracks with track + artist passed as separate fields. + + Powers the Fix-popup metadata cascade (`GET /api/musicbrainz/search_tracks`) + and any future surface where the caller already has the title/artist + split and wants the fuzzy-recall MB lookup without going through + `search_tracks`'s structured-query dispatch (`Artist - Track` + splitting, bare-name artist-first browse). + + Uses bare-query mode (`strict=False`) — diacritic-folded, hits + alias/sortname indexes, no `AND`-clause that kills recall when + either side mis-matches. Score floor lowered to 20 (vs the search + tab's 80) so MB recordings whose title doesn't literally contain + the artist name still enter the candidate pool — the endpoint's + `rerank_tracks` pass then sorts by artist-match relevance. Without + this, queries like `Army of Me` + `Bjork` only surface covers + (score 73-100) and miss Björk's canonical recording (score 28). + """ + if not track and not artist: + return [] + return self._search_tracks_text(track, artist or None, limit, + strict=False, min_score=20) + def _pick_representative_release(self, releases: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: """Pick the best release out of a release-group's editions. diff --git a/tests/metadata/test_musicbrainz_search.py b/tests/metadata/test_musicbrainz_search.py index 350a52a4..0a97a89b 100644 --- a/tests/metadata/test_musicbrainz_search.py +++ b/tests/metadata/test_musicbrainz_search.py @@ -887,3 +887,135 @@ def test_get_recording_flat_swallows_client_errors(): client._client.get_recording.side_effect = RuntimeError('boom') assert client.get_recording_flat('rec-err') is None + + +# --------------------------------------------------------------------------- +# search_tracks_with_artist — Fix-popup cascade adapter +# --------------------------------------------------------------------------- + +def test_search_tracks_with_artist_uses_bare_query_mode(): + """The Fix-popup cascade needs MB's bare-query mode so diacritics and + bracketed suffixes don't kill recall. The adapter must pass strict=False + through to the underlying search_recording call.""" + client = MusicBrainzSearchClient() + client._client = MagicMock() + client._client.search_recording.return_value = [ + {'id': 'rec-1', 'title': 'Army of Me', 'score': 95, + 'releases': [{'id': 'rel-1', 'title': 'Post', 'date': '1995'}], + 'artist-credit': [{'name': 'Björk'}]}, + ] + + tracks = client.search_tracks_with_artist('Army of Me', 'Björk', limit=10) + + # strict=False is the critical bit — fuzzy recall, not phrase precision + client._client.search_recording.assert_called_once_with( + 'Army of Me', artist_name='Björk', limit=10, strict=False + ) + assert len(tracks) == 1 + assert tracks[0].name == 'Army of Me' + assert 'Björk' in tracks[0].artists + + +def test_search_tracks_with_artist_handles_missing_artist(): + """Track-only query (no artist) still works — empty string becomes + None, and the underlying client searches recordings without an + artist filter.""" + client = MusicBrainzSearchClient() + client._client = MagicMock() + client._client.search_recording.return_value = [ + {'id': 'rec-1', 'title': 'Some Song', 'score': 90, + 'releases': [], 'artist-credit': [{'name': 'Unknown'}]}, + ] + + client.search_tracks_with_artist('Some Song', '', limit=5) + + # Empty artist → None passed to the client so MB drops the AND clause + client._client.search_recording.assert_called_once_with( + 'Some Song', artist_name=None, limit=5, strict=False + ) + + +def test_search_tracks_with_artist_empty_returns_empty_list(): + """No track and no artist → return [] without hitting the network.""" + client = MusicBrainzSearchClient() + client._client = MagicMock() + + assert client.search_tracks_with_artist('', '', limit=10) == [] + client._client.search_recording.assert_not_called() + + +def test_search_tracks_with_artist_keeps_low_score_for_rerank(): + """Cascade path uses a low score floor (20) so MB recordings whose + title doesn't literally contain the artist name still enter the + candidate pool — the endpoint's rerank pass surfaces them by + artist-match relevance. Real example: "Army of Me" + "Bjork" — the + canonical Björk recording scores 28 in MB (title doesn't contain + "Bjork"), while title-collision covers like "Army of Me (Bjork)" + score 73-100. Strict 80 floor drops the right answer.""" + client = MusicBrainzSearchClient() + client._client = MagicMock() + client._client.search_recording.return_value = [ + {'id': 'rec-cover', 'title': 'Army of Me (Bjork)', 'score': 100, + 'releases': [], 'artist-credit': [{'name': 'HIRS Collective'}]}, + {'id': 'rec-canonical', 'title': 'Army of Me', 'score': 28, + 'releases': [], 'artist-credit': [{'name': 'Björk'}]}, + {'id': 'rec-noise', 'title': 'Bjork', 'score': 5, + 'releases': [], 'artist-credit': [{'name': 'Random'}]}, + ] + + tracks = client.search_tracks_with_artist('Army of Me', 'Bjork', limit=50) + + ids = [t.id for t in tracks] + # Score=28 canonical Björk recording is kept — the endpoint's rerank + # will surface it by artist match. + assert 'rec-canonical' in ids + assert 'rec-cover' in ids + # Score=5 is below the 20 floor — true garbage still filtered out. + assert 'rec-noise' not in ids + + +def test_search_tracks_text_keeps_min_score_default_80_for_enhanced_search(): + """The enhanced search tab path keeps the historical 80 floor because + it has no downstream rerank — unfiltered MB results would be noisy + for free-text user search.""" + client = MusicBrainzSearchClient() + client._client = MagicMock() + client._client.search_recording.return_value = [ + {'id': 'rec-good', 'title': 'Good', 'score': 95, + 'releases': [], 'artist-credit': [{'name': 'A'}]}, + {'id': 'rec-mid', 'title': 'Mid', 'score': 40, + 'releases': [], 'artist-credit': [{'name': 'A'}]}, + ] + + # No min_score → defaults to _MIN_SCORE (80) + tracks = client._search_tracks_text('Good', 'A', limit=10) + + titles = [t.name for t in tracks] + assert 'Good' in titles + assert 'Mid' not in titles + + +def test_search_tracks_text_strict_param_default_true(): + """Default strict=True preserves the historical behaviour of the + structured-query text-search fallback path — important so the + enrichment-style `search_tracks('Artist - Track')` flow stays on + field-scoped Lucene phrase matching as before.""" + client = MusicBrainzSearchClient() + client._client = MagicMock() + client._client.search_recording.return_value = [] + + client._search_tracks_text('Track', 'Artist', limit=10) + + client._client.search_recording.assert_called_once_with( + 'Track', artist_name='Artist', limit=10, strict=True + ) + + +def test_search_tracks_with_artist_swallows_client_errors(): + """MB client raising must not crash the endpoint — return [] so the + Fix-popup cascade falls through to the next source.""" + client = MusicBrainzSearchClient() + client._client = MagicMock() + client._client.search_recording.side_effect = RuntimeError('network down') + + assert client.search_tracks_with_artist('Track', 'Artist', limit=10) == [] diff --git a/web_server.py b/web_server.py index 347e938c..b735c7c7 100644 --- a/web_server.py +++ b/web_server.py @@ -19190,6 +19190,69 @@ def search_deezer_tracks(): return jsonify({"error": str(e)}), 500 +@app.route('/api/musicbrainz/search_tracks', methods=['GET']) +def search_musicbrainz_tracks(): + """Search for tracks on MusicBrainz — used by the Discovery Fix popup + cascade and any future surface that needs track-level MB search in the + Fix-popup track shape. + + Mirrors the spotify / itunes / deezer search_tracks endpoints exactly: + accepts `track` + `artist` (or legacy `query`) plus `limit`, returns + `{tracks: [{id, name, artists, album, duration_ms, image_url, source}]}`. + + Uses MB's bare-query mode for max recall (diacritic-folded, + alias/sortname indexed) — same rationale as the manual MBID-paste + endpoint shipped earlier. The Fix popup is a user-facing fuzzy search + where the user picks from the result list, so recall beats precision. + """ + try: + track_q = request.args.get('track', '').strip() + artist_q = request.args.get('artist', '').strip() + legacy_query = request.args.get('query', '').strip() + limit = int(request.args.get('limit', 20)) + + if not (track_q or artist_q or legacy_query): + return jsonify({"error": "Query parameter is required"}), 400 + + from core.musicbrainz_search import MusicBrainzSearchClient + mb_search = MusicBrainzSearchClient() + + if track_q or artist_q: + tracks = mb_search.search_tracks_with_artist( + track_q or legacy_query, artist_q, limit=limit + ) + else: + # Legacy single-string query — let MB's structured-query + # dispatch decide artist-first browse vs text search. + tracks = mb_search.search_tracks(legacy_query, limit=limit) + + # Local rerank — same helper Deezer / iTunes use. Penalises + # cover / karaoke / tribute patterns + boosts exact-artist match. + if track_q or artist_q: + from core.metadata.relevance import rerank_tracks + tracks = rerank_tracks( + tracks, + expected_title=track_q, + expected_artist=artist_q, + ) + + tracks_dict = [{ + 'id': t.id, + 'name': t.name, + 'artists': t.artists, + 'album': t.album, + 'duration_ms': t.duration_ms, + 'image_url': t.image_url, + 'source': 'musicbrainz', + } for t in tracks] + + return jsonify({'tracks': tracks_dict}) + + except Exception as e: + logger.error(f"Error searching MusicBrainz tracks: {e}") + return jsonify({"error": str(e)}), 500 + + @app.route('/api/itunes/album/', methods=['GET']) def get_itunes_album_tracks(album_id): """Fetches full track details for a specific iTunes album.""" diff --git a/webui/static/helper.js b/webui/static/helper.js index 0b581183..ca5f330d 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3422,6 +3422,7 @@ const WHATS_NEW = { { title: 'Fix: MusicBrainz manual search missing results', desc: 'the Fix popup and manual library service search were using strict Lucene phrase-match queries against the `recording` / `release` / `artist` fields — diacritics ("Bjork" vs canonical "Björk"), bracketed suffixes like "(Live)", and any AND-clause mismatch all killed recall. switched user-facing manual lookups to bare queries that hit MB\'s alias / sortname indexes with diacritic folding. enrichment workers stay strict for precision.' }, { title: 'Fix: MusicBrainz album clicks 404ing in enhanced search', desc: 'every click on a MusicBrainz album result was silently 404-ing — the /release fetch was passing `cover-art-archive` as an `inc` param, which MB rejects with 400 (that field is returned on every release response by default, no include needed). dropped the bad include; album detail now loads correctly.' }, { title: 'Fix popup: paste a MusicBrainz URL or MBID to match directly', desc: 'new escape hatch on the Fix Track Match modal (the 🔧 Fix button on mirrored / YouTube / Tidal / Deezer / Beatport / ListenBrainz / Spotify-public discovery rows). when fuzzy search keeps ranking the wrong recording among many same-title versions, paste the MusicBrainz recording URL like `https://musicbrainz.org/recording/` or the bare UUID into the new field and hit "Look up". skips all fuzzy logic, resolves straight to that record, and runs it through the same confirm + match pipeline.' }, + { title: 'Fix popup: MusicBrainz added to the auto-search cascade', desc: 'the Fix Track Match modal used to query only Spotify → Deezer → iTunes for the auto-search, leaving MusicBrainz out of the loop entirely — even for users with MusicBrainz set as their primary metadata source. now MB is part of the cascade. when MB is your primary, it gets queried first; otherwise it sits as the last fallback. catches niche / non-mainstream / canonical-with-diacritics recordings that the commercial sources miss. Discogs is intentionally absent — Discogs has no track-level search API.' }, ], '2.5.5': [ { date: 'May 17, 2026 — 2.5.5 release' }, diff --git a/webui/static/wishlist-tools.js b/webui/static/wishlist-tools.js index ad705785..1a50bf19 100644 --- a/webui/static/wishlist-tools.js +++ b/webui/static/wishlist-tools.js @@ -214,12 +214,20 @@ async function searchDiscoveryFix() { } params.set('limit', '50'); - // Use the user's active metadata source first, then fall back to others + // Use the user's active metadata source first, then fall back to others. + // MusicBrainz is included so users on MB-as-primary get MB queried first, + // and so MB is available as a fallback for fuzzy / niche / non-mainstream + // recordings that Spotify / Deezer / iTunes miss (different catalogues, + // different cover coverage). MB sits last by default because it's + // rate-limited to 1 req/sec — when it's the active primary the activeIdx + // reorder below moves it to the front. Discogs is intentionally absent — + // Discogs has no track-level search API (releases only). const activeSource = (currentMusicSourceName || 'Spotify').toLowerCase(); const allSources = [ { key: 'spotify', endpoint: '/api/spotify/search_tracks', label: 'Spotify' }, { key: 'deezer', endpoint: '/api/deezer/search_tracks', label: 'Deezer' }, { key: 'itunes', endpoint: '/api/itunes/search_tracks', label: 'iTunes' }, + { key: 'musicbrainz', endpoint: '/api/musicbrainz/search_tracks', label: 'MusicBrainz' }, ]; // Put the active source first, keep others as fallbacks const activeIdx = allSources.findIndex(s => activeSource.includes(s.key));