Merge pull request #655 from Nezreka/feat/fix-popup-mb-discogs-cascade

feat(fix-popup): include MusicBrainz in the auto-search cascade
pull/656/head
BoulderBadgeDad 6 days ago committed by GitHub
commit 02dc776692
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -638,13 +638,34 @@ class MusicBrainzSearchClient:
logger.warning(f"MusicBrainz track search failed: {e}")
return []
def _search_tracks_text(self, track_name: str, artist_name: Optional[str], limit: int) -> List[Track]:
"""Fallback text-search path for structured/fuzzy track queries."""
def _search_tracks_text(self, track_name: str, artist_name: Optional[str], limit: int,
strict: bool = True, min_score: Optional[int] = None) -> List[Track]:
"""Fallback text-search path for structured/fuzzy track queries.
`strict=True` (default) keeps the field-scoped Lucene phrase match
precise enough for enrichment-style flows where the inputs are
already known-clean. `strict=False` switches to a bare-query
MB lookup that hits alias/sortname indexes with diacritic folding
needed for user-facing fuzzy surfaces (Fix popup cascade) where
recall beats precision because the user picks from the result list.
Mirrors the same toggle already on `search_recording` in
`core/musicbrainz_client.py`.
`min_score` defaults to `self._MIN_SCORE` (80) sized for the
enhanced search tab where unfiltered MB results are noisy. Pass
a lower value (or 0) when a downstream stage like
`core.metadata.relevance.rerank_tracks` will re-sort by artist
match MB's free-text score heavily favours title-text matches
("Army of Me (Bjork)" cover by HIRS Collective scores 100,
Björk's canonical "Army of Me" scores 28) so a high floor drops
the right answer.
"""
try:
results = self._client.search_recording(track_name, artist_name=artist_name, limit=limit)
# Score filter matches the artist/album logic — cuts garbage
# title collisions from unrelated recordings.
results = [r for r in results if (r.get('score', 0) or 0) >= self._MIN_SCORE]
results = self._client.search_recording(
track_name, artist_name=artist_name, limit=limit, strict=strict
)
threshold = self._MIN_SCORE if min_score is None else min_score
results = [r for r in results if (r.get('score', 0) or 0) >= threshold]
tracks = []
for r in results:
@ -656,6 +677,30 @@ class MusicBrainzSearchClient:
logger.warning(f"MusicBrainz track search failed: {e}")
return []
def search_tracks_with_artist(self, track: str, artist: str,
limit: int = 10) -> List[Track]:
"""Search MB tracks with track + artist passed as separate fields.
Powers the Fix-popup metadata cascade (`GET /api/musicbrainz/search_tracks`)
and any future surface where the caller already has the title/artist
split and wants the fuzzy-recall MB lookup without going through
`search_tracks`'s structured-query dispatch (`Artist - Track`
splitting, bare-name artist-first browse).
Uses bare-query mode (`strict=False`) diacritic-folded, hits
alias/sortname indexes, no `AND`-clause that kills recall when
either side mis-matches. Score floor lowered to 20 (vs the search
tab's 80) so MB recordings whose title doesn't literally contain
the artist name still enter the candidate pool the endpoint's
`rerank_tracks` pass then sorts by artist-match relevance. Without
this, queries like `Army of Me` + `Bjork` only surface covers
(score 73-100) and miss Björk's canonical recording (score 28).
"""
if not track and not artist:
return []
return self._search_tracks_text(track, artist or None, limit,
strict=False, min_score=20)
def _pick_representative_release(self, releases: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""Pick the best release out of a release-group's editions.

@ -887,3 +887,135 @@ def test_get_recording_flat_swallows_client_errors():
client._client.get_recording.side_effect = RuntimeError('boom')
assert client.get_recording_flat('rec-err') is None
# ---------------------------------------------------------------------------
# search_tracks_with_artist — Fix-popup cascade adapter
# ---------------------------------------------------------------------------
def test_search_tracks_with_artist_uses_bare_query_mode():
"""The Fix-popup cascade needs MB's bare-query mode so diacritics and
bracketed suffixes don't kill recall. The adapter must pass strict=False
through to the underlying search_recording call."""
client = MusicBrainzSearchClient()
client._client = MagicMock()
client._client.search_recording.return_value = [
{'id': 'rec-1', 'title': 'Army of Me', 'score': 95,
'releases': [{'id': 'rel-1', 'title': 'Post', 'date': '1995'}],
'artist-credit': [{'name': 'Björk'}]},
]
tracks = client.search_tracks_with_artist('Army of Me', 'Björk', limit=10)
# strict=False is the critical bit — fuzzy recall, not phrase precision
client._client.search_recording.assert_called_once_with(
'Army of Me', artist_name='Björk', limit=10, strict=False
)
assert len(tracks) == 1
assert tracks[0].name == 'Army of Me'
assert 'Björk' in tracks[0].artists
def test_search_tracks_with_artist_handles_missing_artist():
"""Track-only query (no artist) still works — empty string becomes
None, and the underlying client searches recordings without an
artist filter."""
client = MusicBrainzSearchClient()
client._client = MagicMock()
client._client.search_recording.return_value = [
{'id': 'rec-1', 'title': 'Some Song', 'score': 90,
'releases': [], 'artist-credit': [{'name': 'Unknown'}]},
]
client.search_tracks_with_artist('Some Song', '', limit=5)
# Empty artist → None passed to the client so MB drops the AND clause
client._client.search_recording.assert_called_once_with(
'Some Song', artist_name=None, limit=5, strict=False
)
def test_search_tracks_with_artist_empty_returns_empty_list():
"""No track and no artist → return [] without hitting the network."""
client = MusicBrainzSearchClient()
client._client = MagicMock()
assert client.search_tracks_with_artist('', '', limit=10) == []
client._client.search_recording.assert_not_called()
def test_search_tracks_with_artist_keeps_low_score_for_rerank():
"""Cascade path uses a low score floor (20) so MB recordings whose
title doesn't literally contain the artist name still enter the
candidate pool the endpoint's rerank pass surfaces them by
artist-match relevance. Real example: "Army of Me" + "Bjork" the
canonical Björk recording scores 28 in MB (title doesn't contain
"Bjork"), while title-collision covers like "Army of Me (Bjork)"
score 73-100. Strict 80 floor drops the right answer."""
client = MusicBrainzSearchClient()
client._client = MagicMock()
client._client.search_recording.return_value = [
{'id': 'rec-cover', 'title': 'Army of Me (Bjork)', 'score': 100,
'releases': [], 'artist-credit': [{'name': 'HIRS Collective'}]},
{'id': 'rec-canonical', 'title': 'Army of Me', 'score': 28,
'releases': [], 'artist-credit': [{'name': 'Björk'}]},
{'id': 'rec-noise', 'title': 'Bjork', 'score': 5,
'releases': [], 'artist-credit': [{'name': 'Random'}]},
]
tracks = client.search_tracks_with_artist('Army of Me', 'Bjork', limit=50)
ids = [t.id for t in tracks]
# Score=28 canonical Björk recording is kept — the endpoint's rerank
# will surface it by artist match.
assert 'rec-canonical' in ids
assert 'rec-cover' in ids
# Score=5 is below the 20 floor — true garbage still filtered out.
assert 'rec-noise' not in ids
def test_search_tracks_text_keeps_min_score_default_80_for_enhanced_search():
"""The enhanced search tab path keeps the historical 80 floor because
it has no downstream rerank unfiltered MB results would be noisy
for free-text user search."""
client = MusicBrainzSearchClient()
client._client = MagicMock()
client._client.search_recording.return_value = [
{'id': 'rec-good', 'title': 'Good', 'score': 95,
'releases': [], 'artist-credit': [{'name': 'A'}]},
{'id': 'rec-mid', 'title': 'Mid', 'score': 40,
'releases': [], 'artist-credit': [{'name': 'A'}]},
]
# No min_score → defaults to _MIN_SCORE (80)
tracks = client._search_tracks_text('Good', 'A', limit=10)
titles = [t.name for t in tracks]
assert 'Good' in titles
assert 'Mid' not in titles
def test_search_tracks_text_strict_param_default_true():
"""Default strict=True preserves the historical behaviour of the
structured-query text-search fallback path important so the
enrichment-style `search_tracks('Artist - Track')` flow stays on
field-scoped Lucene phrase matching as before."""
client = MusicBrainzSearchClient()
client._client = MagicMock()
client._client.search_recording.return_value = []
client._search_tracks_text('Track', 'Artist', limit=10)
client._client.search_recording.assert_called_once_with(
'Track', artist_name='Artist', limit=10, strict=True
)
def test_search_tracks_with_artist_swallows_client_errors():
"""MB client raising must not crash the endpoint — return [] so the
Fix-popup cascade falls through to the next source."""
client = MusicBrainzSearchClient()
client._client = MagicMock()
client._client.search_recording.side_effect = RuntimeError('network down')
assert client.search_tracks_with_artist('Track', 'Artist', limit=10) == []

@ -19190,6 +19190,69 @@ def search_deezer_tracks():
return jsonify({"error": str(e)}), 500
@app.route('/api/musicbrainz/search_tracks', methods=['GET'])
def search_musicbrainz_tracks():
"""Search for tracks on MusicBrainz — used by the Discovery Fix popup
cascade and any future surface that needs track-level MB search in the
Fix-popup track shape.
Mirrors the spotify / itunes / deezer search_tracks endpoints exactly:
accepts `track` + `artist` (or legacy `query`) plus `limit`, returns
`{tracks: [{id, name, artists, album, duration_ms, image_url, source}]}`.
Uses MB's bare-query mode for max recall (diacritic-folded,
alias/sortname indexed) same rationale as the manual MBID-paste
endpoint shipped earlier. The Fix popup is a user-facing fuzzy search
where the user picks from the result list, so recall beats precision.
"""
try:
track_q = request.args.get('track', '').strip()
artist_q = request.args.get('artist', '').strip()
legacy_query = request.args.get('query', '').strip()
limit = int(request.args.get('limit', 20))
if not (track_q or artist_q or legacy_query):
return jsonify({"error": "Query parameter is required"}), 400
from core.musicbrainz_search import MusicBrainzSearchClient
mb_search = MusicBrainzSearchClient()
if track_q or artist_q:
tracks = mb_search.search_tracks_with_artist(
track_q or legacy_query, artist_q, limit=limit
)
else:
# Legacy single-string query — let MB's structured-query
# dispatch decide artist-first browse vs text search.
tracks = mb_search.search_tracks(legacy_query, limit=limit)
# Local rerank — same helper Deezer / iTunes use. Penalises
# cover / karaoke / tribute patterns + boosts exact-artist match.
if track_q or artist_q:
from core.metadata.relevance import rerank_tracks
tracks = rerank_tracks(
tracks,
expected_title=track_q,
expected_artist=artist_q,
)
tracks_dict = [{
'id': t.id,
'name': t.name,
'artists': t.artists,
'album': t.album,
'duration_ms': t.duration_ms,
'image_url': t.image_url,
'source': 'musicbrainz',
} for t in tracks]
return jsonify({'tracks': tracks_dict})
except Exception as e:
logger.error(f"Error searching MusicBrainz tracks: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/api/itunes/album/<album_id>', methods=['GET'])
def get_itunes_album_tracks(album_id):
"""Fetches full track details for a specific iTunes album."""

@ -3422,6 +3422,7 @@ const WHATS_NEW = {
{ title: 'Fix: MusicBrainz manual search missing results', desc: 'the Fix popup and manual library service search were using strict Lucene phrase-match queries against the `recording` / `release` / `artist` fields — diacritics ("Bjork" vs canonical "Björk"), bracketed suffixes like "(Live)", and any AND-clause mismatch all killed recall. switched user-facing manual lookups to bare queries that hit MB\'s alias / sortname indexes with diacritic folding. enrichment workers stay strict for precision.' },
{ title: 'Fix: MusicBrainz album clicks 404ing in enhanced search', desc: 'every click on a MusicBrainz album result was silently 404-ing — the /release fetch was passing `cover-art-archive` as an `inc` param, which MB rejects with 400 (that field is returned on every release response by default, no include needed). dropped the bad include; album detail now loads correctly.' },
{ title: 'Fix popup: paste a MusicBrainz URL or MBID to match directly', desc: 'new escape hatch on the Fix Track Match modal (the 🔧 Fix button on mirrored / YouTube / Tidal / Deezer / Beatport / ListenBrainz / Spotify-public discovery rows). when fuzzy search keeps ranking the wrong recording among many same-title versions, paste the MusicBrainz recording URL like `https://musicbrainz.org/recording/<uuid>` or the bare UUID into the new field and hit "Look up". skips all fuzzy logic, resolves straight to that record, and runs it through the same confirm + match pipeline.' },
{ title: 'Fix popup: MusicBrainz added to the auto-search cascade', desc: 'the Fix Track Match modal used to query only Spotify → Deezer → iTunes for the auto-search, leaving MusicBrainz out of the loop entirely — even for users with MusicBrainz set as their primary metadata source. now MB is part of the cascade. when MB is your primary, it gets queried first; otherwise it sits as the last fallback. catches niche / non-mainstream / canonical-with-diacritics recordings that the commercial sources miss. Discogs is intentionally absent — Discogs has no track-level search API.' },
],
'2.5.5': [
{ date: 'May 17, 2026 — 2.5.5 release' },

@ -214,12 +214,20 @@ async function searchDiscoveryFix() {
}
params.set('limit', '50');
// Use the user's active metadata source first, then fall back to others
// Use the user's active metadata source first, then fall back to others.
// MusicBrainz is included so users on MB-as-primary get MB queried first,
// and so MB is available as a fallback for fuzzy / niche / non-mainstream
// recordings that Spotify / Deezer / iTunes miss (different catalogues,
// different cover coverage). MB sits last by default because it's
// rate-limited to 1 req/sec — when it's the active primary the activeIdx
// reorder below moves it to the front. Discogs is intentionally absent —
// Discogs has no track-level search API (releases only).
const activeSource = (currentMusicSourceName || 'Spotify').toLowerCase();
const allSources = [
{ key: 'spotify', endpoint: '/api/spotify/search_tracks', label: 'Spotify' },
{ key: 'deezer', endpoint: '/api/deezer/search_tracks', label: 'Deezer' },
{ key: 'itunes', endpoint: '/api/itunes/search_tracks', label: 'iTunes' },
{ key: 'musicbrainz', endpoint: '/api/musicbrainz/search_tracks', label: 'MusicBrainz' },
];
// Put the active source first, keep others as fallbacks
const activeIdx = allSources.findIndex(s => activeSource.includes(s.key));

Loading…
Cancel
Save