From 246503066b05a67a14b68b2ec83f7f5fe48c1609 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Tue, 26 May 2026 13:07:01 -0700 Subject: [PATCH] Fold provider-matching into PlaylistSource contract (Phase 1b) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds ``discover_tracks(tracks) -> List[NormalizedTrack]`` to the PlaylistSource interface. Sources whose tracks already carry provider IDs (Spotify, Tidal, Qobuz, YouTube, Deezer, Spotify public, iTunes link, SoulSync Discovery) inherit a no-op default; ListenBrainz + Last.fm override to run the matching engine. This closes the last gap before LB / Last.fm / SoulSync Discovery can land as Sync-page mirror sources: the refresh handler now calls ``source.discover_tracks(...)`` whenever a source returns tracks with ``needs_discovery=True``, so mirrored LB rows arrive already discovered + ready for the sync pipeline. Previously, LB playlists ran through a separate state-machine worker tied to the Discover-page UI, with results stored in ``discovery_cache`` instead of ``mirrored_playlist_tracks.extra_data``. Changes: - ``core/playlists/sources/base.py`` — PlaylistSource switches from Protocol to ABC so a concrete default for ``discover_tracks`` can live on the base class. The four real-work methods stay ``@abstractmethod``; instantiating an adapter that forgets one fails loudly at construction. - ``core/discovery/matching.py`` (new) — pure ``match_mb_tracks`` helper that runs Strategy-1-only matching-engine queries against Spotify (primary) or iTunes (fallback). No state machine, no discovery-cache writes, no wing-it stub — that richer flow stays in ``core/discovery/listenbrainz.py`` for the Discover-page UI. - ``ListenBrainzPlaylistSource`` + ``LastFMPlaylistSource`` take an optional ``discover_callable`` constructor arg. Last.fm reuses the LB implementation since the track shape is identical. - ``bootstrap.build_playlist_source_registry`` accepts a ``discover_callable`` kwarg and wires it into LB + Last.fm adapters. - ``web_server.py`` boot constructs the discovery callable from the existing matching engine + ``_discovery_score_candidates`` + Spotify / iTunes clients, passes through to the registry. - ``refresh_mirrored.py`` adds a small ``_maybe_discover`` helper that calls ``source.discover_tracks(...)`` between fetch and ``to_mirror_track_dict`` projection — only fires when at least one track has ``needs_discovery=True``, so the normal Spotify / Tidal / etc. refresh path stays a zero-cost pass-through. Tests: - 5 new adapter tests: default no-op pass-through, LB discovery with mixed matches/misses, LB no-callable fallback, Last.fm shares the LB implementation, mirror-dict spotify_hint emit. - 1 new automation test: end-to-end LB refresh with a stub discover_callable proves the matched_data lands in ``mirror_playlist_tracks.extra_data`` after the registry refresh + discover hop. 225 tests across adapter + automation suites green. --- core/automation/handlers/refresh_mirrored.py | 34 ++++- core/discovery/matching.py | 134 +++++++++++++++++++ core/playlists/sources/base.py | 47 +++++-- core/playlists/sources/bootstrap.py | 11 +- core/playlists/sources/lastfm.py | 21 ++- core/playlists/sources/listenbrainz.py | 81 ++++++++++- tests/automation/test_handlers_playlist.py | 104 ++++++++++++++ tests/test_playlist_sources_adapters.py | 122 +++++++++++++++++ web_server.py | 18 +++ webui/static/helper.js | 1 + 10 files changed, 556 insertions(+), 17 deletions(-) create mode 100644 core/discovery/matching.py diff --git a/core/automation/handlers/refresh_mirrored.py b/core/automation/handlers/refresh_mirrored.py index 98418c8a..12eccb8d 100644 --- a/core/automation/handlers/refresh_mirrored.py +++ b/core/automation/handlers/refresh_mirrored.py @@ -89,7 +89,13 @@ def auto_refresh_mirrored(config: Dict[str, Any], deps: AutomationDeps) -> Dict[ ) continue - tracks = [to_mirror_track_dict(t) for t in detail.tracks] + # Sources that return MB-metadata-only tracks (LB, Last.fm) + # mark them ``needs_discovery=True``. Hand them to the + # adapter's matcher so the resulting mirror rows carry + # provider IDs + matched_data, ready for the sync pipeline. + detail_tracks = _maybe_discover(detail.tracks, source, deps) + + tracks = [to_mirror_track_dict(t) for t in detail_tracks] refreshed += _commit_refresh(pl, source, source_id, tracks, db, deps, auto_id) except _SkipPlaylist: # Source-specific soft-skip (e.g. Tidal not authenticated). @@ -105,6 +111,32 @@ def auto_refresh_mirrored(config: Dict[str, Any], deps: AutomationDeps) -> Dict[ return {'status': 'completed', 'refreshed': str(refreshed), 'errors': str(len(errors))} +def _maybe_discover( + tracks: List[Any], + source: str, + deps: AutomationDeps, +) -> List[Any]: + """Run the adapter's ``discover_tracks`` when any track needs it. + + Most sources are no-ops here (their tracks have provider IDs + already). LB / Last.fm are the ones that actually do work.""" + if not tracks: + return tracks + if not any(getattr(t, "needs_discovery", False) for t in tracks): + return tracks + registry = deps.playlist_source_registry + if registry is None: + return tracks + adapter = registry.get_source(source) + if adapter is None: + return tracks + try: + return adapter.discover_tracks(tracks) + except Exception as exc: + deps.logger.warning(f"{source} discover_tracks failed: {exc}") + return tracks + + class _SkipPlaylist(Exception): """Internal sentinel: source-specific soft-skip (e.g. not authed). diff --git a/core/discovery/matching.py b/core/discovery/matching.py new file mode 100644 index 00000000..4b89da4b --- /dev/null +++ b/core/discovery/matching.py @@ -0,0 +1,134 @@ +"""Pure helper for matching raw MusicBrainz-metadata tracks against +Spotify / iTunes. + +Used by the PlaylistSource adapters whose ``get_playlist`` returns +tracks with ``needs_discovery=True`` (ListenBrainz, Last.fm radio). +Phase 1b ships Strategy 1 only (matching-engine queries → search → +score → pick best ≥0.9). The richer multi-strategy + +discovery-cache flow stays in +``core.discovery.listenbrainz.run_listenbrainz_discovery_worker`` +for the Discover-page state-machine UI; this helper is the slimmer +version used by the auto-refresh pipeline. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class MBMatchDeps: + """Bundle of primitives the matcher needs. + + Wired up at bootstrap. Tests pass stub callables / clients.""" + + matching_engine: Any + score_candidates: Callable[..., Any] + spotify_client_getter: Callable[[], Any] + itunes_client_getter: Callable[[], Any] + prefer_spotify_getter: Callable[[], bool] + min_confidence: float = 0.9 + + +def match_mb_track( + track: Dict[str, Any], deps: MBMatchDeps +) -> Optional[Dict[str, Any]]: + """Try to match a single MB-metadata track. + + Input shape: + ``{'track_name', 'artist_name', 'album_name', 'duration_ms'}`` + + Returns the matched_data dict (Spotify/iTunes track projection) + or ``None`` when no candidate cleared the confidence threshold. + """ + title = track.get("track_name") or "" + artist = track.get("artist_name") or "" + album = track.get("album_name") or "" + duration_ms = int(track.get("duration_ms") or 0) + if not title or not artist: + return None + + spotify_client = deps.spotify_client_getter() + itunes_client = deps.itunes_client_getter() + use_spotify = bool( + deps.prefer_spotify_getter() + and spotify_client is not None + and getattr(spotify_client, "is_spotify_authenticated", lambda: False)() + ) + if not use_spotify and itunes_client is None: + return None + + # Strategy 1 — matching-engine query generation. + try: + temp_track = type("_TempTrack", (), { + "name": title, + "artists": [artist], + "album": album or None, + })() + queries = deps.matching_engine.generate_download_queries(temp_track) + except Exception as exc: + logger.debug(f"matching_engine query-gen failed: {exc}") + queries = [f"{artist} {title}", title] + + best_match: Any = None + best_confidence = 0.0 + for query in queries: + try: + if use_spotify: + results = spotify_client.search_tracks(query, limit=10) + else: + results = itunes_client.search_tracks(query, limit=10) + except Exception as exc: + logger.debug(f"search failed for query={query!r}: {exc}") + continue + if not results: + continue + try: + match, confidence, _ = deps.score_candidates( + title, artist, duration_ms, results + ) + except Exception as exc: + logger.debug(f"score_candidates failed: {exc}") + continue + if match and confidence > best_confidence and confidence >= deps.min_confidence: + best_match = match + best_confidence = confidence + if best_confidence >= deps.min_confidence: + break + + if not best_match: + return None + + provider = "spotify" if use_spotify else "itunes" + image_url = getattr(best_match, "image_url", None) or "" + album_data: Dict[str, Any] = { + "name": getattr(best_match, "album", "") or "", + } + if image_url: + album_data["images"] = [{"url": image_url}] + return { + "id": getattr(best_match, "id", "") or "", + "name": getattr(best_match, "name", "") or "", + "artists": list(getattr(best_match, "artists", []) or []), + "album": album_data, + "duration_ms": int(getattr(best_match, "duration_ms", 0) or 0), + "image_url": image_url, + "source": provider, + "_provider": provider, + "_confidence": float(best_confidence), + } + + +def match_mb_tracks( + tracks: List[Dict[str, Any]], deps: MBMatchDeps +) -> List[Optional[Dict[str, Any]]]: + """Vectorized variant — runs ``match_mb_track`` per track. + + Phase 1b is sequential. If profiling shows it's too slow on big + LB playlists, this becomes the natural spot to thread-pool the + per-track searches.""" + return [match_mb_track(t, deps) for t in tracks] diff --git a/core/playlists/sources/base.py b/core/playlists/sources/base.py index 1de22e4c..dda63416 100644 --- a/core/playlists/sources/base.py +++ b/core/playlists/sources/base.py @@ -24,8 +24,9 @@ Discovery flag: from __future__ import annotations +from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Protocol, runtime_checkable +from typing import Any, Dict, List, Optional # Canonical source identifiers used as the key in mirrored_playlists.source @@ -110,21 +111,27 @@ class PlaylistDetail: tracks: List[NormalizedTrack] = field(default_factory=list) -@runtime_checkable -class PlaylistSource(Protocol): +class PlaylistSource(ABC): """Contract every playlist source adapter implements. Capability flags let callers query the adapter's shape before invoking it (e.g. ``supports_listing=False`` for URL-only sources means the Sync page should render a paste-URL input instead of a playlist picker). + + ABC rather than Protocol so we can ship a concrete default for + ``discover_tracks`` (sources without provider matching just return + the input list unchanged) — only the MB-metadata-only sources + (ListenBrainz, Last.fm radio) need to override. """ - name: str - supports_listing: bool - supports_refresh: bool - requires_auth: bool + # Class-level attributes; subclasses pin them to concrete values. + name: str = "" + supports_listing: bool = True + supports_refresh: bool = True + requires_auth: bool = False + @abstractmethod def is_authenticated(self) -> bool: """Return True if the adapter can currently call its backend. @@ -132,12 +139,14 @@ class PlaylistSource(Protocol): this is always True. For sources where auth check is expensive, the adapter may cache (existing clients already do this).""" + @abstractmethod def list_playlists(self) -> List[PlaylistMeta]: """Return all playlists the user has access to. For ``supports_listing=False`` sources, return ``[]`` and let the caller use ``get_playlist`` with a URL/ID directly.""" + @abstractmethod def get_playlist(self, playlist_id: str) -> Optional[PlaylistDetail]: """Fetch full playlist (meta + tracks) by source-native ID. @@ -145,12 +154,30 @@ class PlaylistSource(Protocol): backed sources it's the native ID string. Returns ``None`` if the playlist isn't reachable (404, auth failure, etc.).""" + @abstractmethod def refresh_playlist(self, playlist_id: str) -> Optional[PlaylistDetail]: """Re-fetch a playlist for the auto-refresh pipeline. - Default behavior is identical to ``get_playlist``. Sources whose - refresh has side effects (e.g. ListenBrainz cache update, - SoulSync Discovery regeneration) override this.""" + Default behavior is usually identical to ``get_playlist``. + Sources whose refresh has side effects (e.g. ListenBrainz cache + update, SoulSync Discovery regeneration) do real work here.""" + + def discover_tracks(self, tracks: List[NormalizedTrack]) -> List[NormalizedTrack]: + """Match raw tracks against a provider (Spotify / iTunes / etc.). + + Default no-op: returns ``tracks`` unchanged. Only the MB- + metadata-only sources (ListenBrainz, Last.fm radio) override + this — every other adapter already returns tracks with + ``needs_discovery=False`` and provider IDs filled in. + + Matched tracks should have ``extra['discovered']=True`` + + ``extra['matched_data']`` populated so ``to_mirror_track_dict`` + produces the canonical ``extra_data`` JSON shape downstream + consumers (mirrored-playlist DB, sync pipeline, wishlist) + already expect. Unmatched tracks should be returned as-is + with ``needs_discovery`` left True so the caller can decide + what to do (mark as wing-it, skip, retry later).""" + return tracks # ─── projection helpers ──────────────────────────────────────────────── diff --git a/core/playlists/sources/bootstrap.py b/core/playlists/sources/bootstrap.py index 0b3b0855..1d2aa77b 100644 --- a/core/playlists/sources/bootstrap.py +++ b/core/playlists/sources/bootstrap.py @@ -51,6 +51,7 @@ def build_playlist_source_registry( lastfm_manager_getter: Optional[Callable[[], Any]] = None, personalized_manager_getter: Optional[Callable[[], Any]] = None, profile_id_getter: Optional[Callable[[], int]] = None, + discover_callable: Optional[Callable[..., Any]] = None, ) -> PlaylistSourceRegistry: """Build a fresh registry with every default adapter registered. @@ -80,11 +81,17 @@ def build_playlist_source_registry( _no_manager = lambda: None reg.register( SOURCE_LISTENBRAINZ, - lambda: ListenBrainzPlaylistSource(listenbrainz_manager_getter or _no_manager), + lambda: ListenBrainzPlaylistSource( + listenbrainz_manager_getter or _no_manager, + discover_callable=discover_callable, + ), ) reg.register( SOURCE_LASTFM, - lambda: LastFMPlaylistSource(lastfm_manager_getter or _no_manager), + lambda: LastFMPlaylistSource( + lastfm_manager_getter or _no_manager, + discover_callable=discover_callable, + ), ) reg.register( SOURCE_SOULSYNC_DISCOVERY, diff --git a/core/playlists/sources/lastfm.py b/core/playlists/sources/lastfm.py index b5697f6b..ee13206c 100644 --- a/core/playlists/sources/lastfm.py +++ b/core/playlists/sources/lastfm.py @@ -21,6 +21,10 @@ from core.playlists.sources.base import ( PlaylistSource, SOURCE_LASTFM, ) +from core.playlists.sources.listenbrainz import ( + DiscoverCallable, + ListenBrainzPlaylistSource, +) LASTFM_PLAYLIST_TYPE = "lastfm_radio" @@ -32,10 +36,19 @@ class LastFMPlaylistSource(PlaylistSource): supports_refresh = False # Refresh requires re-running the radio generator requires_auth = True - def __init__(self, manager_getter: Callable[[], Any]): + def __init__( + self, + manager_getter: Callable[[], Any], + discover_callable: Optional[DiscoverCallable] = None, + ): """``manager_getter`` returns the profile's ``ListenBrainzManager`` - (Last.fm radio playlists share that storage layer).""" + (Last.fm radio playlists share that storage layer). + + ``discover_callable`` runs matching-engine + provider search; + Last.fm radio tracks are MB-metadata only, so this is needed + for ``discover_tracks`` to do real work.""" self._manager_getter = manager_getter + self._discover_callable = discover_callable def _manager(self): return self._manager_getter() @@ -85,6 +98,10 @@ class LastFMPlaylistSource(PlaylistSource): # snapshot. return self.get_playlist(playlist_id) + # Discovery shares the LB adapter's implementation — same track + # shape (MB metadata), same matching needs. + discover_tracks = ListenBrainzPlaylistSource.discover_tracks + # ---- projection helpers ------------------------------------------------ def _meta_from_cache_row(self, row: Dict[str, Any]) -> PlaylistMeta: diff --git a/core/playlists/sources/listenbrainz.py b/core/playlists/sources/listenbrainz.py index 861045eb..d6c96107 100644 --- a/core/playlists/sources/listenbrainz.py +++ b/core/playlists/sources/listenbrainz.py @@ -25,6 +25,12 @@ from core.playlists.sources.base import ( ) +# Type alias for the discovery callable: takes a list of MB-shaped +# track dicts, returns a parallel list of matched_data dicts (or None +# when no match). Kept narrow so test stubs are easy. +DiscoverCallable = Callable[[List[Dict[str, Any]]], List[Optional[Dict[str, Any]]]] + + class ListenBrainzPlaylistSource(PlaylistSource): name = SOURCE_LISTENBRAINZ supports_listing = True @@ -36,11 +42,20 @@ class ListenBrainzPlaylistSource(PlaylistSource): # ``meta.extra['playlist_type']`` if it wants per-type sub-tabs. PLAYLIST_TYPES = ("created_for_user", "user_created", "collaborative") - def __init__(self, manager_getter: Callable[[], Any]): + def __init__( + self, + manager_getter: Callable[[], Any], + discover_callable: Optional[DiscoverCallable] = None, + ): """``manager_getter`` returns a live ``ListenBrainzManager`` for the current profile. ``None`` is allowed and means "no LB - configured" — adapter degrades to empty results.""" + configured" — adapter degrades to empty results. + + ``discover_callable`` runs the actual matching-engine + provider + search. ``None`` means no discovery is wired (Phase 0 default): + ``discover_tracks`` returns the input list unchanged.""" self._manager_getter = manager_getter + self._discover_callable = discover_callable def _manager(self): return self._manager_getter() @@ -105,6 +120,68 @@ class ListenBrainzPlaylistSource(PlaylistSource): tracks = [self._track_from_cache_row(t, idx) for idx, t in enumerate(tracks_raw)] return PlaylistDetail(meta=meta, tracks=tracks) + def discover_tracks(self, tracks: List[NormalizedTrack]) -> List[NormalizedTrack]: + """Run each MB-metadata track through the matching engine. + + Tracks with ``needs_discovery=False`` (e.g. already-matched + survivors of a previous refresh) pass through unchanged. + Matched tracks get ``extra['discovered']=True`` + a + ``matched_data`` block so the projection helper can produce + the canonical ``extra_data`` JSON; ``needs_discovery`` flips + to False on them. + + Unmatched tracks stay ``needs_discovery=True`` so the caller + can decide how to handle them (wing-it stub, skip, retry).""" + if not tracks or self._discover_callable is None: + return tracks + + to_match: List[Dict[str, Any]] = [] + match_indices: List[int] = [] + for idx, t in enumerate(tracks): + if not t.needs_discovery: + continue + to_match.append({ + "track_name": t.track_name, + "artist_name": t.artist_name, + "album_name": t.album_name or "", + "duration_ms": t.duration_ms or 0, + }) + match_indices.append(idx) + + if not to_match: + return tracks + + try: + matched = self._discover_callable(to_match) or [] + except Exception: + return tracks + + out = list(tracks) + for slot_idx, result in zip(match_indices, matched): + if not result: + continue + track = out[slot_idx] + provider = result.pop("_provider", None) or "unknown" + confidence = result.pop("_confidence", None) + new_extra = dict(track.extra or {}) + new_extra["discovered"] = True + new_extra["provider"] = provider + if confidence is not None: + new_extra["confidence"] = confidence + new_extra["matched_data"] = result + out[slot_idx] = NormalizedTrack( + position=track.position, + track_name=track.track_name, + artist_name=track.artist_name, + album_name=track.album_name, + duration_ms=track.duration_ms, + source_track_id=result.get("id") or track.source_track_id, + image_url=result.get("image_url") or track.image_url, + needs_discovery=False, + extra=new_extra, + ) + return out + def refresh_playlist(self, playlist_id: str) -> Optional[PlaylistDetail]: """Trigger a manager-side refresh, then return the new snapshot. diff --git a/tests/automation/test_handlers_playlist.py b/tests/automation/test_handlers_playlist.py index 02808d4b..740ff16e 100644 --- a/tests/automation/test_handlers_playlist.py +++ b/tests/automation/test_handlers_playlist.py @@ -433,6 +433,110 @@ class TestRefreshMirrored: assert parsed_calls == ['https://youtube.com/playlist?list=yt_pl'] assert db.mirror_calls[0]['tracks'][0]['source_track_id'] == 'vid1' + def test_listenbrainz_refresh_runs_discovery_and_writes_matched_data(self): + """End-to-end: LB cached playlist → adapter projects to + NormalizedTrack with needs_discovery=True → refresh_mirrored + calls source.discover_tracks → matched_data lands in + extra_data on the mirror DB row.""" + from core.playlists.sources.bootstrap import build_playlist_source_registry + from core.playlists.sources.listenbrainz import ListenBrainzPlaylistSource + + class _StubLBManager: + def get_cached_playlists(self, playlist_type): + if playlist_type == 'created_for_user': + return [{ + 'playlist_mbid': 'lb-1', + 'title': 'LB Weekly', + 'creator': 'ListenBrainz', + 'track_count': 1, + 'annotation': {}, + 'last_updated': '2026-05-26', + }] + return [] + + def get_playlist_type(self, mbid): + return 'created_for_user' if mbid == 'lb-1' else '' + + def get_cached_tracks(self, mbid): + if mbid == 'lb-1': + return [{ + 'track_name': 'MB Song', + 'artist_name': 'MB Artist', + 'album_name': 'MB Album', + 'duration_ms': 240_000, + 'recording_mbid': 'rec-1', + 'release_mbid': 'rel-1', + 'album_cover_url': '', + 'additional_metadata': {}, + }] + return [] + + def update_all_playlists(self): + pass + + discovery_calls = [] + + def fake_discover(track_dicts): + discovery_calls.append(list(track_dicts)) + return [{ + 'id': 'sp-matched', + 'name': 'Matched', + 'artists': ['Spotify Artist'], + 'album': {'name': 'Spotify Album'}, + 'duration_ms': 240_000, + 'image_url': 'art', + 'source': 'spotify', + '_provider': 'spotify', + '_confidence': 0.93, + }] + + # Build a registry with the LB adapter pre-wired with discovery. + # The default _build_deps registry won't have discovery wired, + # so we override it for this test. + registry = build_playlist_source_registry( + spotify_client_getter=lambda: None, + tidal_client_getter=lambda: None, + qobuz_client_getter=lambda: None, + deezer_client_getter=lambda: None, + listenbrainz_manager_getter=lambda: _StubLBManager(), + discover_callable=fake_discover, + ) + + db = _StubDB(playlists=[ + { + 'id': 33, + 'name': 'LB Weekly', + 'source': 'listenbrainz', + 'source_playlist_id': 'lb-1', + 'profile_id': 1, + }, + ]) + deps = _build_deps(get_database=lambda: db) + # Replace the default registry with the one wired up above. + # AutomationDeps is a frozen dataclass-like — re-assign via + # object.__setattr__ since it's a plain dataclass. + object.__setattr__(deps, 'playlist_source_registry', registry) + + result = auto_refresh_mirrored({'playlist_id': '33'}, deps) + + assert result['status'] == 'completed' + assert result['refreshed'] == '1' + # discover_tracks ran once, with the single MB track. + assert len(discovery_calls) == 1 + assert discovery_calls[0][0]['track_name'] == 'MB Song' + assert discovery_calls[0][0]['artist_name'] == 'MB Artist' + + # Mirror DB row carries the matched_data extra_data. + call = db.mirror_calls[0] + assert call['source'] == 'listenbrainz' + assert len(call['tracks']) == 1 + track = call['tracks'][0] + assert track['source_track_id'] == 'sp-matched' + extra = json.loads(track['extra_data']) + assert extra['discovered'] is True + assert extra['provider'] == 'spotify' + assert extra['matched_data']['id'] == 'sp-matched' + def test_spotify_public_uses_authed_spotify_when_signed_in(self): """The handler-level fallback chain: when Spotify is authed AND the public URL is a playlist URL, prefer the authed API so diff --git a/tests/test_playlist_sources_adapters.py b/tests/test_playlist_sources_adapters.py index 20355ffa..e6b443ff 100644 --- a/tests/test_playlist_sources_adapters.py +++ b/tests/test_playlist_sources_adapters.py @@ -690,6 +690,128 @@ def test_mirror_dict_spotify_authed_emits_matched_data(): assert extra["matched_data"]["artists"] == [{"name": "Adele"}] +def test_default_discover_tracks_is_no_op(): + """Adapters whose tracks already carry provider IDs (Spotify, + Tidal, Qobuz, YouTube, Deezer, Spotify-public, iTunes-link, + SoulSync-Discovery) inherit the ABC default — return tracks + unchanged.""" + track = NormalizedTrack( + position=0, + track_name="Song", + artist_name="Artist", + source_track_id="abc", + needs_discovery=False, + ) + src = SpotifyPlaylistSource(lambda: None) + out = src.discover_tracks([track]) + assert out == [track] + + +def test_listenbrainz_discover_tracks_uses_callable(): + """When the LB adapter is wired with a discover_callable, MB + tracks get matched_data populated; ``needs_discovery`` flips to + False on matches; non-matches stay as-is.""" + + def fake_discover(track_dicts): + # Match the first, leave second unmatched. + return [ + { + "id": "matched-1", + "name": "Matched", + "artists": ["Artist 1"], + "album": {"name": "Album"}, + "duration_ms": 200_000, + "image_url": "art", + "source": "spotify", + "_provider": "spotify", + "_confidence": 0.95, + }, + None, + ] + + src = ListenBrainzPlaylistSource( + lambda: None, + discover_callable=fake_discover, + ) + tracks = [ + NormalizedTrack( + position=0, + track_name="Song A", + artist_name="Artist 1", + source_track_id="mbid-1", + needs_discovery=True, + ), + NormalizedTrack( + position=1, + track_name="Song B", + artist_name="Artist 2", + source_track_id="mbid-2", + needs_discovery=True, + ), + ] + out = src.discover_tracks(tracks) + assert len(out) == 2 + + assert out[0].needs_discovery is False + assert out[0].source_track_id == "matched-1" + assert out[0].extra["discovered"] is True + assert out[0].extra["provider"] == "spotify" + assert out[0].extra["confidence"] == 0.95 + assert out[0].extra["matched_data"]["id"] == "matched-1" + + # Unmatched stays as-is. + assert out[1].needs_discovery is True + assert out[1].source_track_id == "mbid-2" + assert "matched_data" not in (out[1].extra or {}) + + +def test_listenbrainz_discover_tracks_no_callable_is_no_op(): + """If no ``discover_callable`` is wired, the adapter returns the + list unchanged — refresh paths that haven't enabled discovery + still work.""" + src = ListenBrainzPlaylistSource(lambda: None, discover_callable=None) + tracks = [ + NormalizedTrack( + position=0, + track_name="T", + artist_name="A", + needs_discovery=True, + ) + ] + assert src.discover_tracks(tracks) == tracks + + +def test_lastfm_discover_tracks_shares_listenbrainz_implementation(): + """Last.fm radio tracks have the same MB-metadata shape as LB + tracks, so the adapter reuses LB's ``discover_tracks``.""" + + def fake_discover(track_dicts): + return [{ + "id": "lfm-matched", + "name": "Match", + "artists": ["Artist"], + "album": {"name": ""}, + "duration_ms": 200_000, + "image_url": "", + "source": "spotify", + "_provider": "spotify", + }] + + src = LastFMPlaylistSource(lambda: None, discover_callable=fake_discover) + tracks = [ + NormalizedTrack( + position=0, + track_name="T", + artist_name="A", + needs_discovery=True, + ) + ] + out = src.discover_tracks(tracks) + assert out[0].needs_discovery is False + assert out[0].source_track_id == "lfm-matched" + assert out[0].extra["matched_data"]["id"] == "lfm-matched" + + def test_mirror_dict_spotify_public_emits_spotify_hint(): """Public-embed path: track ID known but album art / canonical metadata missing, so we emit a ``spotify_hint`` for the discovery diff --git a/web_server.py b/web_server.py index beca48b3..d49e238c 100644 --- a/web_server.py +++ b/web_server.py @@ -991,6 +991,23 @@ def _register_automation_handlers(): # mirrors is not yet wired (current handler skips them). return None + # Discovery callable for the LB / Last.fm adapters' ``discover_tracks``. + # Wraps the pure ``match_mb_tracks`` helper with the live matching + # engine + Spotify / iTunes clients. Adapter calls it per refresh + # when any track has ``needs_discovery=True``. + from core.discovery.matching import MBMatchDeps, match_mb_tracks + + _mb_match_deps = MBMatchDeps( + matching_engine=matching_engine, + score_candidates=_discovery_score_candidates, + spotify_client_getter=lambda: spotify_client, + itunes_client_getter=_get_itunes_client, + prefer_spotify_getter=lambda: (_get_active_discovery_source() == 'spotify'), + ) + + def _discover_callable_for_registry(tracks): + return match_mb_tracks(tracks, _mb_match_deps) + _playlist_source_registry = build_playlist_source_registry( spotify_client_getter=lambda: spotify_client, tidal_client_getter=lambda: tidal_client, @@ -1002,6 +1019,7 @@ def _register_automation_handlers(): lastfm_manager_getter=_lb_manager_for_registry, personalized_manager_getter=_build_personalized_manager, profile_id_getter=get_current_profile_id, + discover_callable=_discover_callable_for_registry, ) _automation_deps = AutomationDeps( diff --git a/webui/static/helper.js b/webui/static/helper.js index 784c846b..aa59a98d 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3417,6 +3417,7 @@ const WHATS_NEW = { { unreleased: true }, { title: 'Groundwork: unified playlist source layer', desc: 'first slice of a refactor that\'ll let ListenBrainz, Last.fm radio, and SoulSync Discovery playlists live as Sync-page tabs alongside Spotify / Tidal / Qobuz / YouTube — so they can be mirrored + scheduled like the rest. this commit adds the shared adapter layer all those sources will plug into; no UI changes yet. nothing to do on your end.' }, { title: 'Auto-Sync refresh now routes through the unified source layer', desc: 'follow-up to the groundwork above. the mirrored-playlist auto-refresh handler used to have a ~190-line if/elif chain branching per source (one branch each for Spotify, Spotify public, Deezer, Tidal, YouTube). now it asks the source registry for the right adapter and calls one refresh method. behavior identical — same matched_data, same Tidal-skip-on-no-auth log, same Spotify-public-prefers-authed-API fallback. unlocks ListenBrainz / Last.fm / SoulSync Discovery as future Sync-page mirror sources without a fresh elif branch each time.' }, + { title: 'Discovery folded into the unified source contract', desc: 'next slice of the groundwork. each playlist source can now answer one extra question — "match these raw tracks against Spotify / iTunes" — through the same adapter interface. Spotify / Tidal / Qobuz / YouTube / Deezer / Spotify-public / iTunes-link / SoulSync-Discovery all answer trivially (their tracks already have provider IDs); ListenBrainz + Last.fm run the matching engine. mirror-refresh now calls this automatically when a source returns MB-metadata-only tracks, so when ListenBrainz becomes a Sync-page tab next commit, its mirrors land already discovered + ready to sync — no separate Discover-page round-trip needed.' }, ], '2.6.2': [ { date: 'May 24, 2026 — 2.6.2 release' },