diff --git a/core/discovery/manual_match.py b/core/discovery/manual_match.py new file mode 100644 index 00000000..8c56bbdf --- /dev/null +++ b/core/discovery/manual_match.py @@ -0,0 +1,70 @@ +"""Helpers for Fix-popup manual match persistence. + +When the user manually fixes a mirrored-playlist discovery via the Fix +popup, two questions land at the web_server route layer that are easier +to test in isolation: + +1. *Which metadata source did the manual match come from?* — the popup + cascade queries the user's primary source first, then Spotify / + Deezer / iTunes / MusicBrainz as fallbacks; each search endpoint + stamps `source` on its rows but the MBID-paste lookup uses a lean + flat shape that doesn't carry it. `derive_manual_match_provider` + collapses the fallback chain into a single string. + +2. *Should the discovery layer re-run for this track when the current + active provider differs from the cached one?* — re-running silently + overwrites the user's deliberate pick with whatever the auto-search + ranks first, so manual matches are exempt regardless of provider + drift. `is_drifted_for_redo` encapsulates the decision. +""" + +from __future__ import annotations + +from typing import Any, Dict, Optional + + +def derive_manual_match_provider( + payload_track: Dict[str, Any], + active_provider: Optional[str], +) -> str: + """Return the provider string to stamp on a manually-fixed match. + + Resolution order: + 1. ``payload_track['source']`` — every *_search_tracks endpoint + sets this; the MBID-paste path doesn't. + 2. ``active_provider`` — what the user has configured as their + primary discovery source. + 3. ``'spotify'`` — last-ditch default matching the historic + hardcode (so behaviour is identical when both upstream + signals are absent). + """ + if not isinstance(payload_track, dict): + payload_track = {} + source = payload_track.get('source') + if source: + return source + if active_provider: + return active_provider + return 'spotify' + + +def is_drifted_for_redo( + extra_data: Optional[Dict[str, Any]], + active_provider: Optional[str], +) -> bool: + """Return True when a cached discovery entry should be treated as + stale because the user's active provider has changed since it was + cached AND the entry isn't a manual match. + + Manual matches are *always* considered fresh: re-running discovery + against the current source would overwrite the user's deliberate + pick with whatever auto-search ranks first. The first Playlist + Pipeline run after a manual fix used to clobber it for exactly + this reason — the check lives here now so it's pinned by tests. + """ + if not isinstance(extra_data, dict): + return False + if extra_data.get('manual_match'): + return False + cached_provider = extra_data.get('provider', 'spotify') + return cached_provider != active_provider diff --git a/core/metadata/relevance.py b/core/metadata/relevance.py index 37aed5e0..d3e68935 100644 --- a/core/metadata/relevance.py +++ b/core/metadata/relevance.py @@ -295,6 +295,7 @@ def rerank_tracks( *, expected_title: str, expected_artist: str, + prefer_known_duration: bool = False, ) -> List[Track]: """Return a copy of ``tracks`` sorted by descending relevance score against the expected title + artist. @@ -304,6 +305,15 @@ def rerank_tracks( fallback when two candidates score identically — the source's popularity signal is still useful as a tiebreak). + ``prefer_known_duration``: when True, recordings with non-zero + ``duration_ms`` are ranked ahead of duplicate-score recordings + that lack length data. Used for MusicBrainz which often has + several recordings per song (single edition, album edition, + compilations, remasters) where some carry length and some don't. + Sort key sits between score and the stable-order tiebreaker so + relevance still wins — length is only a tiebreaker on equal + scores, not a global re-shuffle. + No-op when both ``expected_title`` and ``expected_artist`` are empty (no signal to rank against — return input order).""" if not expected_title and not expected_artist: @@ -312,8 +322,12 @@ def rerank_tracks( (score_track(t, expected_title=expected_title, expected_artist=expected_artist), idx, t) for idx, t in enumerate(tracks) ] - # Sort by score desc; idx asc as tiebreaker preserves stable order. - scored.sort(key=lambda x: (-x[0], x[1])) + if prefer_known_duration: + # Sort key: score desc, has-length first (0 before 1), idx asc. + scored.sort(key=lambda x: (-x[0], 0 if (x[2].duration_ms or 0) > 0 else 1, x[1])) + else: + # Sort by score desc; idx asc as tiebreaker preserves stable order. + scored.sort(key=lambda x: (-x[0], x[1])) return [t for _score, _idx, t in scored] diff --git a/core/musicbrainz_search.py b/core/musicbrainz_search.py index ff39a827..8e7bf9bd 100644 --- a/core/musicbrainz_search.py +++ b/core/musicbrainz_search.py @@ -772,12 +772,11 @@ class MusicBrainzSearchClient: When only one field is present: bare-query mode directly — same recall-over-precision tradeoff the old single-path took. - Results are stable-sorted to prefer entries with non-zero - `duration_ms`. MB often has several recordings per song (single - release, album release, compilations, remasters) and not every - recording carries length data. Without this, the first match can - be a length-less duplicate while a sibling recording with the - real 3:04 sits two rows down. + Callers wanting MB-specific length-preference ordering (multi- + edition recordings where some lack length data) should pass + ``prefer_known_duration=True`` to ``rerank_tracks`` downstream + — a stable sort here would just be re-sorted away by the rerank + pass anyway. """ if not track and not artist: return [] @@ -790,13 +789,11 @@ class MusicBrainzSearchClient: results = self._search_tracks_text( track, artist, limit, strict=False, min_score=20 ) - else: - results = self._search_tracks_text( - track, artist or None, limit, strict=False, min_score=20 - ) + return results - results.sort(key=lambda t: 0 if (t.duration_ms or 0) > 0 else 1) - return results + return self._search_tracks_text( + track, artist or None, limit, strict=False, min_score=20 + ) def _pick_representative_release(self, releases: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: """Pick the best release out of a release-group's editions. diff --git a/tests/discovery/test_manual_match.py b/tests/discovery/test_manual_match.py new file mode 100644 index 00000000..8bc34b09 --- /dev/null +++ b/tests/discovery/test_manual_match.py @@ -0,0 +1,106 @@ +"""Tests for core.discovery.manual_match helpers. + +These pin the contract for two route-layer decisions lifted out of +web_server.py so the Fix-popup → mirrored-playlist back-sync flow is +testable in isolation (per kettui's standing rule that web_server.py +behavior is reproduced in core/ modules with real unit tests, not by +AST-parsing the route file). +""" + +from core.discovery.manual_match import ( + derive_manual_match_provider, + is_drifted_for_redo, +) + + +# --------------------------------------------------------------------------- +# derive_manual_match_provider +# --------------------------------------------------------------------------- + + +def test_derive_uses_payload_source_when_present(): + """Search-endpoint payloads always stamp `source` — that's the + authoritative provider for a manual match.""" + payload = {'id': 'rec-1', 'source': 'musicbrainz', 'name': 'Track'} + assert derive_manual_match_provider(payload, 'spotify') == 'musicbrainz' + + +def test_derive_falls_back_to_active_when_payload_missing_source(): + """MBID-paste path returns a lean flat shape without `source`. Fall + back to the user's active discovery source so the cached match + matches whatever provider next compares against it.""" + payload = {'id': 'mb-mbid', 'name': 'Track'} # no `source` + assert derive_manual_match_provider(payload, 'musicbrainz') == 'musicbrainz' + + +def test_derive_falls_back_to_spotify_when_both_missing(): + """Last-ditch default matches the historic hardcode so behaviour is + identical when both upstream signals are absent (e.g. broken + config, missing active source).""" + assert derive_manual_match_provider({}, None) == 'spotify' + assert derive_manual_match_provider({}, '') == 'spotify' + + +def test_derive_handles_non_dict_payload_gracefully(): + """Defensive — caller passes whatever request.get_json() returned.""" + assert derive_manual_match_provider(None, 'spotify') == 'spotify' + assert derive_manual_match_provider('not-a-dict', 'musicbrainz') == 'musicbrainz' + + +def test_derive_payload_source_wins_even_when_active_set(): + """`source` on payload is authoritative — even if the user's active + source changed mid-flow, the match came from whatever the popup + cascade actually queried.""" + payload = {'source': 'itunes'} + assert derive_manual_match_provider(payload, 'spotify') == 'itunes' + + +# --------------------------------------------------------------------------- +# is_drifted_for_redo +# --------------------------------------------------------------------------- + + +def test_drift_redo_when_provider_changed_and_not_manual(): + """Standard provider-drift case: cached provider differs from + active, no manual flag → re-discover so active source's IDs / + artwork take effect.""" + extra = {'discovered': True, 'provider': 'spotify'} + assert is_drifted_for_redo(extra, 'musicbrainz') is True + + +def test_drift_no_redo_when_provider_matches(): + """Same provider → cached entry is fresh, no redo needed.""" + extra = {'discovered': True, 'provider': 'spotify'} + assert is_drifted_for_redo(extra, 'spotify') is False + + +def test_drift_no_redo_when_manual_match_even_if_provider_drifted(): + """The crux of the bug fix: manual matches are exempt from + provider-drift redo. Re-running would overwrite the user's pick.""" + extra = {'discovered': True, 'provider': 'musicbrainz', 'manual_match': True} + assert is_drifted_for_redo(extra, 'spotify') is False + + +def test_drift_no_redo_when_manual_match_with_matching_provider(): + """Manual + provider match: trivially fresh.""" + extra = {'discovered': True, 'provider': 'spotify', 'manual_match': True} + assert is_drifted_for_redo(extra, 'spotify') is False + + +def test_drift_no_redo_when_extra_data_missing(): + """No cached entry → nothing to drift from.""" + assert is_drifted_for_redo(None, 'spotify') is False + assert is_drifted_for_redo({}, 'spotify') is False + + +def test_drift_handles_non_dict_extra_data(): + """Defensive — extra_data deserialisation can land non-dict shapes.""" + assert is_drifted_for_redo('not-a-dict', 'spotify') is False + + +def test_drift_default_provider_is_spotify_when_absent(): + """Historic cached entries may pre-date the provider column being + populated — treat absent provider as 'spotify' (the legacy default).""" + extra = {'discovered': True} # no provider field + assert is_drifted_for_redo(extra, 'spotify') is False + assert is_drifted_for_redo(extra, 'musicbrainz') is True diff --git a/tests/metadata/test_musicbrainz_search.py b/tests/metadata/test_musicbrainz_search.py index 322cc284..96d2c220 100644 --- a/tests/metadata/test_musicbrainz_search.py +++ b/tests/metadata/test_musicbrainz_search.py @@ -1050,11 +1050,12 @@ def test_search_tracks_with_artist_falls_back_to_bare_when_strict_empty(): assert tracks[0].id == 'rec-canonical' -def test_search_tracks_with_artist_prefers_results_with_known_length(): - """MB has multiple recordings per song (single release, album release, - compilations) and not every recording carries length data. Stable - sort moves length-known entries ahead of length-zero duplicates so - the user sees the actionable 3:04 row first, not the 0:00 sibling.""" +def test_search_tracks_with_artist_does_not_resort_by_length(): + """Length-preference ordering lives downstream in + ``rerank_tracks(..., prefer_known_duration=True)`` — sorting here + would be re-sorted away by rerank anyway, so this method preserves + the order MB returned. Pin the contract: this method does not + re-shuffle by duration_ms.""" client = MusicBrainzSearchClient() client._client = MagicMock() client._client.search_recording.return_value = [ @@ -1067,10 +1068,9 @@ def test_search_tracks_with_artist_prefers_results_with_known_length(): tracks = client.search_tracks_with_artist('Coffee Break', 'Zeds Dead', limit=10) - # rec-with-length must surface first even though MB scored it lower - assert tracks[0].id == 'rec-with-length' - assert tracks[0].duration_ms == 184000 - assert tracks[1].id == 'rec-no-length' + # MB's order is preserved here — rerank applies length-pref downstream. + assert tracks[0].id == 'rec-no-length' + assert tracks[1].id == 'rec-with-length' def test_search_tracks_with_artist_handles_missing_artist(): diff --git a/tests/metadata/test_relevance.py b/tests/metadata/test_relevance.py index 38e71555..4c6f9394 100644 --- a/tests/metadata/test_relevance.py +++ b/tests/metadata/test_relevance.py @@ -50,6 +50,7 @@ def _track( album: str = 'Unknown', album_type: str = 'album', track_id: str = 't', + duration_ms: int = 200000, ) -> Track: """Tiny Track factory — keeps test bodies focused on the fields under test.""" @@ -58,7 +59,7 @@ def _track( name=name, artists=[artist], album=album, - duration_ms=200000, + duration_ms=duration_ms, album_type=album_type, ) @@ -366,6 +367,56 @@ class TestRerankTracks: ranked = rerank_tracks([a, b], expected_title='Track', expected_artist='Artist') assert [t.id for t in ranked] == ['first', 'second'] + def test_prefer_known_duration_promotes_length_known_on_ties(self): + """MB has multiple recordings per song where some lack length + data. With ``prefer_known_duration=True`` and equal relevance + scores, the recording with non-zero duration_ms must surface + ahead of the length-less sibling.""" + no_length = _track('Track', artist='Artist', track_id='no-len', duration_ms=0) + with_length = _track('Track', artist='Artist', track_id='with-len', duration_ms=184000) + ranked = rerank_tracks( + [no_length, with_length], + expected_title='Track', + expected_artist='Artist', + prefer_known_duration=True, + ) + assert [t.id for t in ranked] == ['with-len', 'no-len'] + + def test_prefer_known_duration_does_not_override_relevance(self): + """Length-preference is a TIEBREAKER, not a global resort. A + length-less track that scores higher on relevance must still + win — only equal-score pairs use length as the deciding bit.""" + # Wrong-artist length-known: scored low. Right-artist length-less: + # scored high. + length_known_cover = _track( + 'Track', artist='Karaoke Channel', album='Karaoke Hits', + album_type='compilation', track_id='cover', duration_ms=184000, + ) + length_less_real = _track( + 'Track', artist='Real Artist', track_id='real', duration_ms=0, + ) + ranked = rerank_tracks( + [length_known_cover, length_less_real], + expected_title='Track', + expected_artist='Real Artist', + prefer_known_duration=True, + ) + assert ranked[0].id == 'real', "relevance must beat length-pref" + + def test_prefer_known_duration_default_off(self): + """Default behaviour unchanged — length-preference is opt-in + for MB callers; Spotify / iTunes / Deezer don't need it + because their search results always include length.""" + no_length = _track('Track', artist='Artist', track_id='no-len', duration_ms=0) + with_length = _track('Track', artist='Artist', track_id='with-len', duration_ms=184000) + ranked = rerank_tracks( + [no_length, with_length], + expected_title='Track', + expected_artist='Artist', + ) + # Default: stable input order, no length-pref re-shuffle. + assert [t.id for t in ranked] == ['no-len', 'with-len'] + # --------------------------------------------------------------------------- # filter_and_rerank — score floor convenience diff --git a/web_server.py b/web_server.py index 0fd2ff69..7e6209ed 100644 --- a/web_server.py +++ b/web_server.py @@ -19416,12 +19416,19 @@ def search_musicbrainz_tracks(): # Local rerank — same helper Deezer / iTunes use. Penalises # cover / karaoke / tribute patterns + boosts exact-artist match. + # `prefer_known_duration=True` is MB-specific: MB has multiple + # recordings per song (single / album / compilation / remaster + # editions) and not every recording carries length data. The + # flag promotes length-known recordings ahead of length-less + # duplicates when relevance scores tie, so the user sees the + # actionable 3:04 row before the 0:00 sibling. if track_q or artist_q: from core.metadata.relevance import rerank_tracks tracks = rerank_tracks( tracks, expected_title=track_q, expected_artist=artist_q, + prefer_known_duration=True, ) tracks_dict = [{ @@ -24315,6 +24322,20 @@ def update_youtube_discovery_match(): logger.info(f"Manual match updated: youtube - {identifier} - track {track_index}") logger.info(f" → {result['spotify_artist']} - {result['spotify_track']}") + # See core.discovery.manual_match — Fix-popup matches can come from + # any metadata source (primary first, then Spotify / Deezer / iTunes + # / MusicBrainz as fallbacks). Hardcoding 'spotify' here used to + # make every non-Spotify manual match look provider-drifted on the + # next prepare-discovery, which triggered automatic re-discovery + # that overwrote the user's pick. Computed once before the try + # block so both the cache-save path AND the mirrored-DB save below + # (in the except fallback case) see the same value. + from core.discovery.manual_match import derive_manual_match_provider + match_source = derive_manual_match_provider( + spotify_track, _get_active_discovery_source() + ) + matched_data = None + # Save manual fix to discovery cache so it appears in discovery pool try: # Get original track name from the YouTube/source track data @@ -24349,20 +24370,6 @@ def update_youtube_discovery_match(): album_obj['image_url'] = image_url album_obj['images'] = [{'url': image_url}] - # Manual fixes can come from any metadata source — the Fix-popup - # cascade queries the user's primary first, then Spotify / Deezer / - # iTunes / MusicBrainz as fallbacks. Each search endpoint stamps - # `source` on its results; the MBID-paste lookup doesn't, so fall - # back to the active discovery source. Hardcoding 'spotify' here - # used to make every non-Spotify manual match look like it had - # provider-drifted on the next prepare-discovery, triggering - # automatic re-discovery that overwrote the user's manual pick. - match_source = ( - spotify_track.get('source') - or _get_active_discovery_source() - or 'spotify' - ) - matched_data = { 'id': spotify_track['id'], 'name': spotify_track['name'], @@ -24380,16 +24387,12 @@ def update_youtube_discovery_match(): logger.info(f"Manual fix saved to discovery cache: {original_name} by {original_artist}") except Exception as cache_err: logger.error(f"Error saving manual fix to discovery cache: {cache_err}") - # match_source needs to exist for the mirrored-DB block below even - # if cache save failed — use the same fallback chain. - match_source = ( - spotify_track.get('source') - or _get_active_discovery_source() - or 'spotify' - ) - # Persist manual fix to DB for mirrored playlists - if identifier.startswith('mirrored_'): + # Persist manual fix to DB for mirrored playlists. Skips when the + # cache-save block raised before matched_data was constructed — + # without the payload there's nothing to persist, and re-deriving + # it here would duplicate the construction logic above. + if matched_data is not None and identifier.startswith('mirrored_'): try: tracks = state['playlist']['tracks'] if track_index < len(tracks): @@ -33688,21 +33691,20 @@ def prepare_mirrored_discovery(playlist_id): pre_discovered_count = 0 has_pending = False + from core.discovery.manual_match import is_drifted_for_redo + for idx, track in enumerate(tracks): extra = track.get('extra_data') if extra and extra.get('discovered'): cached_provider = extra.get('provider', 'spotify') - is_manual = bool(extra.get('manual_match')) - - # If the cached result was discovered by a different provider than the - # currently active one, treat it as pending so re-discovery uses the - # correct source (IDs, album data, images differ between providers). - # Manual matches are exempt: the user explicitly picked that record, - # so re-discovery would overwrite their intentional fix every cycle. - # Without this guard, fixing a track via the Fix popup correctly - # downloads it once, but the next Playlist Pipeline run re-marks it - # "Provider Changed" and reverts the manual map. - if cached_provider != _current_provider and not is_manual: + + # See core.discovery.manual_match.is_drifted_for_redo — + # provider-drift triggers re-discovery so the active source's + # IDs / artwork take effect, but manual matches are exempt: + # re-running would overwrite the user's deliberate pick with + # whatever auto-search ranks first. Pre-fix, every Playlist + # Pipeline run clobbered manual fixes for exactly this reason. + if is_drifted_for_redo(extra, _current_provider): has_pending = True dur = track.get('duration_ms', 0) pre_discovered_results.append({ @@ -33784,16 +33786,14 @@ def prepare_mirrored_discovery(playlist_id): 'confidence': 0, }) - # Treat as cached if at least one track was discovered by the current provider, - # OR if any track carries a manual_match (those are exempt from provider-drift - # re-discovery above, so they count as cached regardless of cached_provider). + # Treat as cached when at least one track has a non-drifted cached + # discovery — same predicate the per-track loop above uses (inverse + # polarity), so a future field change only has to land in + # core.discovery.manual_match.is_drifted_for_redo. has_cached = any( t.get('extra_data') and (t['extra_data'].get('discovered') or t['extra_data'].get('discovery_attempted')) and - ( - t['extra_data'].get('provider', 'spotify') == _current_provider - or t['extra_data'].get('manual_match') - ) + not is_drifted_for_redo(t['extra_data'], _current_provider) for t in tracks ) diff --git a/webui/static/library.js b/webui/static/library.js index dac0467a..a979233c 100644 --- a/webui/static/library.js +++ b/webui/static/library.js @@ -885,7 +885,7 @@ function navigateToArtistDetail(artistId, artistName, sourceOverride = null, opt // disabled in private-browsing modes. let _preferEnhanced = false; try { - _preferEnhanced = localStorage.getItem('soulsync-library-view-mode') === 'enhanced'; + _preferEnhanced = localStorage.getItem(_libraryViewModeKey()) === 'enhanced'; } catch (_) { /* localStorage unavailable */ } // Navigate to artist detail page @@ -2925,10 +2925,23 @@ function toggleEnhancedView(enabled) { // Persist the choice so the next artist click (and the next page reload) // honours it instead of always reverting to Standard. try { - localStorage.setItem('soulsync-library-view-mode', enabled ? 'enhanced' : 'standard'); + localStorage.setItem(_libraryViewModeKey(), enabled ? 'enhanced' : 'standard'); } catch (_) { /* localStorage unavailable */ } } +// localStorage key for the Enhanced/Standard toggle, scoped to the active +// profile so different admin profiles can keep different defaults. Falls +// back to an unsuffixed key when no profile is loaded (matches the original +// behaviour for any pre-multi-profile saved value). +function _libraryViewModeKey() { + const pid = (typeof currentProfile === 'object' && currentProfile && currentProfile.id != null) + ? currentProfile.id + : null; + return pid != null + ? `soulsync-library-view-mode:${pid}` + : 'soulsync-library-view-mode'; +} + async function loadEnhancedViewData(artistId) { const container = document.getElementById('enhanced-view-container'); if (!container) return;