diff --git a/core/library/service_search.py b/core/library/service_search.py index cb8b1874..38fd450e 100644 --- a/core/library/service_search.py +++ b/core/library/service_search.py @@ -100,12 +100,14 @@ def _search_service(service, entity_type, query): if not mb_worker or not mb_worker.mb_service: raise ValueError("MusicBrainz worker not initialized") mb_client = mb_worker.mb_service.mb_client + # User-facing manual search — prefer recall (fuzzy / alias / diacritic- + # folded) over strict phrase precision. User picks correct hit from list. if entity_type == 'artist': - items = mb_client.search_artist(query, limit=8) + items = mb_client.search_artist(query, limit=8, strict=False) return [{'id': a['id'], 'name': a.get('name', ''), 'image': None, 'extra': f"Score: {a.get('score', '')} · {a.get('disambiguation', '') or a.get('country', '')}"} for a in items] elif entity_type == 'album': - items = mb_client.search_release(query, limit=8) + items = mb_client.search_release(query, limit=8, strict=False) results = [] for r in items: artists = ', '.join(ac.get('name', '') for ac in r.get('artist-credit', []) if isinstance(ac, dict)) @@ -115,7 +117,7 @@ def _search_service(service, entity_type, query): 'extra': f"{artists} · {r.get('date', '')} · Score: {r.get('score', '')}"}) return results elif entity_type == 'track': - items = mb_client.search_recording(query, limit=8) + items = mb_client.search_recording(query, limit=8, strict=False) results = [] for r in items: artists = ', '.join(ac.get('name', '') for ac in r.get('artist-credit', []) if isinstance(ac, dict)) diff --git a/core/musicbrainz_client.py b/core/musicbrainz_client.py index b152f28f..f1d59491 100644 --- a/core/musicbrainz_client.py +++ b/core/musicbrainz_client.py @@ -128,27 +128,45 @@ class MusicBrainzClient: return [] @rate_limited - def search_release(self, album_name: str, artist_name: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]: + def search_release(self, album_name: str, artist_name: Optional[str] = None, + limit: int = 10, strict: bool = True) -> List[Dict[str, Any]]: """ - Search for releases (albums) by name - + Search for releases (albums) by name. + Args: album_name: Name of the album to search for artist_name: Optional artist name to narrow search limit: Maximum number of results to return - + strict: When True (default), builds a phrase-match Lucene query + against the `release` and `artist` fields — correct for + enrichment flows where exact name+artist are known. When + False, sends a bare query (album + artist joined) so MB + hits alias / sortname indexes and folds diacritics, + dramatically improving recall for user-facing fuzzy + lookups (e.g. the manual Fix popup). + Returns: List of release results """ try: - # Escape quotes and backslashes for Lucene query - safe_album = album_name.replace('\\', '\\\\').replace('"', '\\"') - query = f'release:"{safe_album}"' - - if artist_name: - safe_artist = artist_name.replace('\\', '\\\\').replace('"', '\\"') - query += f' AND artist:"{safe_artist}"' - + if strict: + # Escape quotes and backslashes for Lucene query + safe_album = album_name.replace('\\', '\\\\').replace('"', '\\"') + query = f'release:"{safe_album}"' + + if artist_name: + safe_artist = artist_name.replace('\\', '\\\\').replace('"', '\\"') + query += f' AND artist:"{safe_artist}"' + else: + # Bare query — MB tokenizes against title + artist credit + + # alias + sortname indexes together with diacritic folding. + # Recovers cases like "Bjork" → "Björk" that strict phrase + # queries miss. + parts = [album_name] + if artist_name: + parts.append(artist_name) + query = ' '.join(p for p in parts if p) + params = { 'query': query, 'fmt': 'json', @@ -173,27 +191,44 @@ class MusicBrainzClient: return [] @rate_limited - def search_recording(self, track_name: str, artist_name: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]: + def search_recording(self, track_name: str, artist_name: Optional[str] = None, + limit: int = 10, strict: bool = True) -> List[Dict[str, Any]]: """ - Search for recordings (tracks) by name - + Search for recordings (tracks) by name. + Args: track_name: Name of the track to search for artist_name: Optional artist name to narrow search limit: Maximum number of results to return - + strict: When True (default), builds a phrase-match Lucene query + against the `recording` and `artist` fields — correct for + enrichment flows where exact name+artist are known. When + False, sends a bare query (track + artist joined) so MB + hits alias / sortname indexes and folds diacritics. The + bare path also avoids the AND-clause that kills recall + when either side mis-matches (e.g. "Bjork" vs canonical + "Björk", or a track title with bracketed suffix like + "(Live)" that strict phrase match rejects). + Returns: List of recording results """ try: - # Escape quotes and backslashes for Lucene query - safe_track = track_name.replace('\\', '\\\\').replace('"', '\\"') - query = f'recording:"{safe_track}"' - - if artist_name: - safe_artist = artist_name.replace('\\', '\\\\').replace('"', '\\"') - query += f' AND artist:"{safe_artist}"' - + if strict: + # Escape quotes and backslashes for Lucene query + safe_track = track_name.replace('\\', '\\\\').replace('"', '\\"') + query = f'recording:"{safe_track}"' + + if artist_name: + safe_artist = artist_name.replace('\\', '\\\\').replace('"', '\\"') + query += f' AND artist:"{safe_artist}"' + else: + # Bare query — see search_release for rationale. + parts = [track_name] + if artist_name: + parts.append(artist_name) + query = ' '.join(p for p in parts if p) + params = { 'query': query, 'fmt': 'json', diff --git a/core/musicbrainz_search.py b/core/musicbrainz_search.py index 21001118..4ce82e87 100644 --- a/core/musicbrainz_search.py +++ b/core/musicbrainz_search.py @@ -856,8 +856,13 @@ class MusicBrainzSearchClient: shape the download modal expects. `rg_fallback` supplies release-group metadata (type, artist credits) when resolving from a release-group whose releases may be lightly populated.""" + # NOTE: `cover-art-archive` is NOT a valid `inc` param for the + # /release resource — MB returns 400 if you pass it. The CAA flags + # (`{'front': True, 'back': True, ...}`) come back on every release + # response by default, so we read them below without requesting an + # include. release = self._client.get_release( - release_mbid, includes=['recordings', 'artist-credits', 'release-groups', 'cover-art-archive'] + release_mbid, includes=['recordings', 'artist-credits', 'release-groups'] ) if not release: return None diff --git a/tests/metadata/test_musicbrainz_search.py b/tests/metadata/test_musicbrainz_search.py index 4269a5f8..719f02ae 100644 --- a/tests/metadata/test_musicbrainz_search.py +++ b/tests/metadata/test_musicbrainz_search.py @@ -541,7 +541,7 @@ def test_get_album_resolves_release_group_mbid_to_release(): 'rg-damn', includes=['releases', 'artist-credits'] ) client._client.get_release.assert_called_once_with( - 'rel-official', includes=['recordings', 'artist-credits', 'release-groups', 'cover-art-archive'] + 'rel-official', includes=['recordings', 'artist-credits', 'release-groups'] ) assert album is not None assert album['id'] == 'rg-damn' # Canonical ID stays the release-group MBID. diff --git a/web_server.py b/web_server.py index 695b9a57..46bdd39c 100644 --- a/web_server.py +++ b/web_server.py @@ -15734,8 +15734,14 @@ def musicbrainz_search_api(): mb_client = mb_svc.mb_client results = [] + # Manual Fix popup is user-facing fuzzy search — recall matters more + # than precision because the user picks the right hit from the list. + # Use bare-query mode so diacritics, aliases, and bracketed suffixes + # like "(Live)" don't kill matches the way strict field-scoped + # phrase queries do. Enrichment workers stay on strict mode (the + # default) since they auto-accept the top hit and need precision. if entity_type == 'artist': - raw = mb_client.search_artist(query, limit=limit) + raw = mb_client.search_artist(query, limit=limit, strict=False) for r in raw: results.append({ 'mbid': r.get('id', ''), @@ -15746,7 +15752,7 @@ def musicbrainz_search_api(): 'country': r.get('country', ''), }) elif entity_type == 'release': - raw = mb_client.search_release(query, artist_name=artist or None, limit=limit) + raw = mb_client.search_release(query, artist_name=artist or None, limit=limit, strict=False) for r in raw: artist_credit = ', '.join(a.get('name', '') for a in r.get('artist-credit', []) if isinstance(a, dict)) results.append({ @@ -15760,7 +15766,7 @@ def musicbrainz_search_api(): 'track_count': r.get('track-count', 0), }) elif entity_type == 'recording': - raw = mb_client.search_recording(query, artist_name=artist or None, limit=limit) + raw = mb_client.search_recording(query, artist_name=artist or None, limit=limit, strict=False) for r in raw: artist_credit = ', '.join(a.get('name', '') for a in r.get('artist-credit', []) if isinstance(a, dict)) releases = r.get('releases', []) diff --git a/webui/static/helper.js b/webui/static/helper.js index 2dbf6c91..40c74509 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3419,6 +3419,8 @@ const WHATS_NEW = { { title: 'Fix: MusicBrainz artist detail showing MBID as name', desc: 'clicking a MusicBrainz artist from search results was showing the raw MBID as the artist name on the detail page. URL-driven routing (PR #644) no longer passes the display name to the backend, so the source detail endpoint now looks it up directly from MusicBrainz by MBID.' }, { title: 'Fix: artist detail back button always showing "← Back"', desc: 'PR #644 removed the back-button label logic along with the origin stack. restored: a label stack (separate from browser history, which handles actual navigation) tracks where you came from across the full similar-artist chain — "← Back to Search", "← Back to Artist A", "← Back to Artist B", etc. API response backfills the current artist name so the stack has real names when clicking similar artists.' }, { title: 'Fix: Amazon search albums/artists missing, album downloads all track 01', desc: 't2tunes proxies Amazon Music and uses 400 to signal transient failures — first API call in a session hit this consistently, so album/artist searches always failed while track search (called 0.5s later) scraped through. added up to 3 retries with backoff on t2tunes-specific 400s. also: all search methods were using types=track,album but t2tunes album-type queries are broken — switched everything to types=track and derive albums/artists from track metadata instead. track numbers from album downloads were also always 1 — added index-based fallback when t2tunes tags omit trackNumber.' }, + { title: 'Fix: MusicBrainz manual search missing results', desc: 'the Fix popup and manual library service search were using strict Lucene phrase-match queries against the `recording` / `release` / `artist` fields — diacritics ("Bjork" vs canonical "Björk"), bracketed suffixes like "(Live)", and any AND-clause mismatch all killed recall. switched user-facing manual lookups to bare queries that hit MB\'s alias / sortname indexes with diacritic folding. enrichment workers stay strict for precision.' }, + { title: 'Fix: MusicBrainz album clicks 404ing in enhanced search', desc: 'every click on a MusicBrainz album result was silently 404-ing — the /release fetch was passing `cover-art-archive` as an `inc` param, which MB rejects with 400 (that field is returned on every release response by default, no include needed). dropped the bad include; album detail now loads correctly.' }, ], '2.5.5': [ { date: 'May 17, 2026 — 2.5.5 release' },