From 246503066b05a67a14b68b2ec83f7f5fe48c1609 Mon Sep 17 00:00:00 2001
From: Broque Thomas <26755000+Nezreka@users.noreply.github.com>
Date: Tue, 26 May 2026 13:07:01 -0700
Subject: [PATCH] Fold provider-matching into PlaylistSource contract (Phase
 1b)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds ``discover_tracks(tracks) -> List[NormalizedTrack]`` to the
PlaylistSource interface. Sources whose tracks already carry
provider IDs (Spotify, Tidal, Qobuz, YouTube, Deezer, Spotify
public, iTunes link, SoulSync Discovery) inherit a no-op default;
ListenBrainz + Last.fm override to run the matching engine.

This closes the last gap before LB / Last.fm / SoulSync Discovery
can land as Sync-page mirror sources: the refresh handler now
calls ``source.discover_tracks(...)`` whenever a source returns
tracks with ``needs_discovery=True``, so mirrored LB rows arrive
already discovered + ready for the sync pipeline. Previously, LB
playlists ran through a separate state-machine worker tied to the
Discover-page UI, with results stored in ``discovery_cache``
instead of ``mirrored_playlist_tracks.extra_data``.

Changes:

- ``core/playlists/sources/base.py`` — PlaylistSource switches from
  Protocol to ABC so a concrete default for ``discover_tracks``
  can live on the base class. The four real-work methods stay
  ``@abstractmethod``; instantiating an adapter that forgets one
  fails loudly at construction.
- ``core/discovery/matching.py`` (new) — pure ``match_mb_tracks``
  helper that runs Strategy-1-only matching-engine queries against
  Spotify (primary) or iTunes (fallback). No state machine, no
  discovery-cache writes, no wing-it stub — that richer flow stays
  in ``core/discovery/listenbrainz.py`` for the Discover-page UI.
- ``ListenBrainzPlaylistSource`` + ``LastFMPlaylistSource`` take
  an optional ``discover_callable`` constructor arg. Last.fm reuses
  the LB implementation since the track shape is identical.
- ``bootstrap.build_playlist_source_registry`` accepts a
  ``discover_callable`` kwarg and wires it into LB + Last.fm
  adapters.
- ``web_server.py`` boot constructs the discovery callable from the
  existing matching engine + ``_discovery_score_candidates`` +
  Spotify / iTunes clients, passes through to the registry.
- ``refresh_mirrored.py`` adds a small ``_maybe_discover`` helper
  that calls ``source.discover_tracks(...)`` between fetch and
  ``to_mirror_track_dict`` projection — only fires when at least
  one track has ``needs_discovery=True``, so the normal Spotify /
  Tidal / etc. refresh path stays a zero-cost pass-through.

Tests:

- 5 new adapter tests: default no-op pass-through, LB discovery
  with mixed matches/misses, LB no-callable fallback, Last.fm
  shares the LB implementation, mirror-dict spotify_hint emit.
- 1 new automation test: end-to-end LB refresh with a stub
  discover_callable proves the matched_data lands in
  ``mirror_playlist_tracks.extra_data`` after the registry
  refresh + discover hop.

225 tests across adapter + automation suites green.
---
 core/automation/handlers/refresh_mirrored.py |  34 ++++-
 core/discovery/matching.py                   | 134 +++++++++++++++++++
 core/playlists/sources/base.py               |  47 +++++--
 core/playlists/sources/bootstrap.py          |  11 +-
 core/playlists/sources/lastfm.py             |  21 ++-
 core/playlists/sources/listenbrainz.py       |  81 ++++++++++-
 tests/automation/test_handlers_playlist.py   | 104 ++++++++++++++
 tests/test_playlist_sources_adapters.py      | 122 +++++++++++++++++
 web_server.py                                |  18 +++
 webui/static/helper.js                       |   1 +
 10 files changed, 556 insertions(+), 17 deletions(-)
 create mode 100644 core/discovery/matching.py

diff --git a/core/automation/handlers/refresh_mirrored.py b/core/automation/handlers/refresh_mirrored.py
index 98418c8a..12eccb8d 100644
--- a/core/automation/handlers/refresh_mirrored.py
+++ b/core/automation/handlers/refresh_mirrored.py
@@ -89,7 +89,13 @@ def auto_refresh_mirrored(config: Dict[str, Any], deps: AutomationDeps) -> Dict[
                 )
                 continue
 
-            tracks = [to_mirror_track_dict(t) for t in detail.tracks]
+            # Sources that return MB-metadata-only tracks (LB, Last.fm)
+            # mark them ``needs_discovery=True``. Hand them to the
+            # adapter's matcher so the resulting mirror rows carry
+            # provider IDs + matched_data, ready for the sync pipeline.
+            detail_tracks = _maybe_discover(detail.tracks, source, deps)
+
+            tracks = [to_mirror_track_dict(t) for t in detail_tracks]
             refreshed += _commit_refresh(pl, source, source_id, tracks, db, deps, auto_id)
         except _SkipPlaylist:
             # Source-specific soft-skip (e.g. Tidal not authenticated).
@@ -105,6 +111,32 @@ def auto_refresh_mirrored(config: Dict[str, Any], deps: AutomationDeps) -> Dict[
     return {'status': 'completed', 'refreshed': str(refreshed), 'errors': str(len(errors))}
 
 
+def _maybe_discover(
+    tracks: List[Any],
+    source: str,
+    deps: AutomationDeps,
+) -> List[Any]:
+    """Run the adapter's ``discover_tracks`` when any track needs it.
+
+    Most sources are no-ops here (their tracks have provider IDs
+    already). LB / Last.fm are the ones that actually do work."""
+    if not tracks:
+        return tracks
+    if not any(getattr(t, "needs_discovery", False) for t in tracks):
+        return tracks
+    registry = deps.playlist_source_registry
+    if registry is None:
+        return tracks
+    adapter = registry.get_source(source)
+    if adapter is None:
+        return tracks
+    try:
+        return adapter.discover_tracks(tracks)
+    except Exception as exc:
+        deps.logger.warning(f"{source} discover_tracks failed: {exc}")
+        return tracks
+
+
 class _SkipPlaylist(Exception):
     """Internal sentinel: source-specific soft-skip (e.g. not authed).
 
diff --git a/core/discovery/matching.py b/core/discovery/matching.py
new file mode 100644
index 00000000..4b89da4b
--- /dev/null
+++ b/core/discovery/matching.py
@@ -0,0 +1,134 @@
+"""Pure helper for matching raw MusicBrainz-metadata tracks against
+Spotify / iTunes.
+
+Used by the PlaylistSource adapters whose ``get_playlist`` returns
+tracks with ``needs_discovery=True`` (ListenBrainz, Last.fm radio).
+Phase 1b ships Strategy 1 only (matching-engine queries → search →
+score → pick best ≥0.9). The richer multi-strategy +
+discovery-cache flow stays in
+``core.discovery.listenbrainz.run_listenbrainz_discovery_worker``
+for the Discover-page state-machine UI; this helper is the slimmer
+version used by the auto-refresh pipeline.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class MBMatchDeps:
+    """Bundle of primitives the matcher needs.
+
+    Wired up at bootstrap. Tests pass stub callables / clients."""
+
+    matching_engine: Any
+    score_candidates: Callable[..., Any]
+    spotify_client_getter: Callable[[], Any]
+    itunes_client_getter: Callable[[], Any]
+    prefer_spotify_getter: Callable[[], bool]
+    min_confidence: float = 0.9
+
+
+def match_mb_track(
+    track: Dict[str, Any], deps: MBMatchDeps
+) -> Optional[Dict[str, Any]]:
+    """Try to match a single MB-metadata track.
+
+    Input shape:
+        ``{'track_name', 'artist_name', 'album_name', 'duration_ms'}``
+
+    Returns the matched_data dict (Spotify/iTunes track projection)
+    or ``None`` when no candidate cleared the confidence threshold.
+    """
+    title = track.get("track_name") or ""
+    artist = track.get("artist_name") or ""
+    album = track.get("album_name") or ""
+    duration_ms = int(track.get("duration_ms") or 0)
+    if not title or not artist:
+        return None
+
+    spotify_client = deps.spotify_client_getter()
+    itunes_client = deps.itunes_client_getter()
+    use_spotify = bool(
+        deps.prefer_spotify_getter()
+        and spotify_client is not None
+        and getattr(spotify_client, "is_spotify_authenticated", lambda: False)()
+    )
+    if not use_spotify and itunes_client is None:
+        return None
+
+    # Strategy 1 — matching-engine query generation.
+    try:
+        temp_track = type("_TempTrack", (), {
+            "name": title,
+            "artists": [artist],
+            "album": album or None,
+        })()
+        queries = deps.matching_engine.generate_download_queries(temp_track)
+    except Exception as exc:
+        logger.debug(f"matching_engine query-gen failed: {exc}")
+        queries = [f"{artist} {title}", title]
+
+    best_match: Any = None
+    best_confidence = 0.0
+    for query in queries:
+        try:
+            if use_spotify:
+                results = spotify_client.search_tracks(query, limit=10)
+            else:
+                results = itunes_client.search_tracks(query, limit=10)
+        except Exception as exc:
+            logger.debug(f"search failed for query={query!r}: {exc}")
+            continue
+        if not results:
+            continue
+        try:
+            match, confidence, _ = deps.score_candidates(
+                title, artist, duration_ms, results
+            )
+        except Exception as exc:
+            logger.debug(f"score_candidates failed: {exc}")
+            continue
+        if match and confidence > best_confidence and confidence >= deps.min_confidence:
+            best_match = match
+            best_confidence = confidence
+        if best_confidence >= deps.min_confidence:
+            break
+
+    if not best_match:
+        return None
+
+    provider = "spotify" if use_spotify else "itunes"
+    image_url = getattr(best_match, "image_url", None) or ""
+    album_data: Dict[str, Any] = {
+        "name": getattr(best_match, "album", "") or "",
+    }
+    if image_url:
+        album_data["images"] = [{"url": image_url}]
+    return {
+        "id": getattr(best_match, "id", "") or "",
+        "name": getattr(best_match, "name", "") or "",
+        "artists": list(getattr(best_match, "artists", []) or []),
+        "album": album_data,
+        "duration_ms": int(getattr(best_match, "duration_ms", 0) or 0),
+        "image_url": image_url,
+        "source": provider,
+        "_provider": provider,
+        "_confidence": float(best_confidence),
+    }
+
+
+def match_mb_tracks(
+    tracks: List[Dict[str, Any]], deps: MBMatchDeps
+) -> List[Optional[Dict[str, Any]]]:
+    """Vectorized variant — runs ``match_mb_track`` per track.
+
+    Phase 1b is sequential. If profiling shows it's too slow on big
+    LB playlists, this becomes the natural spot to thread-pool the
+    per-track searches."""
+    return [match_mb_track(t, deps) for t in tracks]
diff --git a/core/playlists/sources/base.py b/core/playlists/sources/base.py
index 1de22e4c..dda63416 100644
--- a/core/playlists/sources/base.py
+++ b/core/playlists/sources/base.py
@@ -24,8 +24,9 @@ Discovery flag:
 
 from __future__ import annotations
 
+from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+from typing import Any, Dict, List, Optional
 
 
 # Canonical source identifiers used as the key in mirrored_playlists.source
@@ -110,21 +111,27 @@ class PlaylistDetail:
     tracks: List[NormalizedTrack] = field(default_factory=list)
 
 
-@runtime_checkable
-class PlaylistSource(Protocol):
+class PlaylistSource(ABC):
     """Contract every playlist source adapter implements.
 
     Capability flags let callers query the adapter's shape before
     invoking it (e.g. ``supports_listing=False`` for URL-only sources
     means the Sync page should render a paste-URL input instead of a
     playlist picker).
+
+    ABC rather than Protocol so we can ship a concrete default for
+    ``discover_tracks`` (sources without provider matching just return
+    the input list unchanged) — only the MB-metadata-only sources
+    (ListenBrainz, Last.fm radio) need to override.
     """
 
-    name: str
-    supports_listing: bool
-    supports_refresh: bool
-    requires_auth: bool
+    # Class-level attributes; subclasses pin them to concrete values.
+    name: str = ""
+    supports_listing: bool = True
+    supports_refresh: bool = True
+    requires_auth: bool = False
 
+    @abstractmethod
     def is_authenticated(self) -> bool:
         """Return True if the adapter can currently call its backend.
 
@@ -132,12 +139,14 @@ class PlaylistSource(Protocol):
         this is always True. For sources where auth check is expensive,
         the adapter may cache (existing clients already do this)."""
 
+    @abstractmethod
     def list_playlists(self) -> List[PlaylistMeta]:
         """Return all playlists the user has access to.
 
         For ``supports_listing=False`` sources, return ``[]`` and let
         the caller use ``get_playlist`` with a URL/ID directly."""
 
+    @abstractmethod
     def get_playlist(self, playlist_id: str) -> Optional[PlaylistDetail]:
         """Fetch full playlist (meta + tracks) by source-native ID.
 
@@ -145,12 +154,30 @@ class PlaylistSource(Protocol):
         backed sources it's the native ID string. Returns ``None`` if
         the playlist isn't reachable (404, auth failure, etc.)."""
 
+    @abstractmethod
     def refresh_playlist(self, playlist_id: str) -> Optional[PlaylistDetail]:
         """Re-fetch a playlist for the auto-refresh pipeline.
 
-        Default behavior is identical to ``get_playlist``. Sources whose
-        refresh has side effects (e.g. ListenBrainz cache update,
-        SoulSync Discovery regeneration) override this."""
+        Default behavior is usually identical to ``get_playlist``.
+        Sources whose refresh has side effects (e.g. ListenBrainz cache
+        update, SoulSync Discovery regeneration) do real work here."""
+
+    def discover_tracks(self, tracks: List[NormalizedTrack]) -> List[NormalizedTrack]:
+        """Match raw tracks against a provider (Spotify / iTunes / etc.).
+
+        Default no-op: returns ``tracks`` unchanged. Only the MB-
+        metadata-only sources (ListenBrainz, Last.fm radio) override
+        this — every other adapter already returns tracks with
+        ``needs_discovery=False`` and provider IDs filled in.
+
+        Matched tracks should have ``extra['discovered']=True`` +
+        ``extra['matched_data']`` populated so ``to_mirror_track_dict``
+        produces the canonical ``extra_data`` JSON shape downstream
+        consumers (mirrored-playlist DB, sync pipeline, wishlist)
+        already expect. Unmatched tracks should be returned as-is
+        with ``needs_discovery`` left True so the caller can decide
+        what to do (mark as wing-it, skip, retry later)."""
+        return tracks
 
 
 # ─── projection helpers ────────────────────────────────────────────────
diff --git a/core/playlists/sources/bootstrap.py b/core/playlists/sources/bootstrap.py
index 0b3b0855..1d2aa77b 100644
--- a/core/playlists/sources/bootstrap.py
+++ b/core/playlists/sources/bootstrap.py
@@ -51,6 +51,7 @@ def build_playlist_source_registry(
     lastfm_manager_getter: Optional[Callable[[], Any]] = None,
     personalized_manager_getter: Optional[Callable[[], Any]] = None,
     profile_id_getter: Optional[Callable[[], int]] = None,
+    discover_callable: Optional[Callable[..., Any]] = None,
 ) -> PlaylistSourceRegistry:
     """Build a fresh registry with every default adapter registered.
 
@@ -80,11 +81,17 @@ def build_playlist_source_registry(
     _no_manager = lambda: None
     reg.register(
         SOURCE_LISTENBRAINZ,
-        lambda: ListenBrainzPlaylistSource(listenbrainz_manager_getter or _no_manager),
+        lambda: ListenBrainzPlaylistSource(
+            listenbrainz_manager_getter or _no_manager,
+            discover_callable=discover_callable,
+        ),
     )
     reg.register(
         SOURCE_LASTFM,
-        lambda: LastFMPlaylistSource(lastfm_manager_getter or _no_manager),
+        lambda: LastFMPlaylistSource(
+            lastfm_manager_getter or _no_manager,
+            discover_callable=discover_callable,
+        ),
     )
     reg.register(
         SOURCE_SOULSYNC_DISCOVERY,
diff --git a/core/playlists/sources/lastfm.py b/core/playlists/sources/lastfm.py
index b5697f6b..ee13206c 100644
--- a/core/playlists/sources/lastfm.py
+++ b/core/playlists/sources/lastfm.py
@@ -21,6 +21,10 @@ from core.playlists.sources.base import (
     PlaylistSource,
     SOURCE_LASTFM,
 )
+from core.playlists.sources.listenbrainz import (
+    DiscoverCallable,
+    ListenBrainzPlaylistSource,
+)
 
 
 LASTFM_PLAYLIST_TYPE = "lastfm_radio"
@@ -32,10 +36,19 @@ class LastFMPlaylistSource(PlaylistSource):
     supports_refresh = False  # Refresh requires re-running the radio generator
     requires_auth = True
 
-    def __init__(self, manager_getter: Callable[[], Any]):
+    def __init__(
+        self,
+        manager_getter: Callable[[], Any],
+        discover_callable: Optional[DiscoverCallable] = None,
+    ):
         """``manager_getter`` returns the profile's ``ListenBrainzManager``
-        (Last.fm radio playlists share that storage layer)."""
+        (Last.fm radio playlists share that storage layer).
+
+        ``discover_callable`` runs matching-engine + provider search;
+        Last.fm radio tracks are MB-metadata only, so this is needed
+        for ``discover_tracks`` to do real work."""
         self._manager_getter = manager_getter
+        self._discover_callable = discover_callable
 
     def _manager(self):
         return self._manager_getter()
@@ -85,6 +98,10 @@ class LastFMPlaylistSource(PlaylistSource):
         # snapshot.
         return self.get_playlist(playlist_id)
 
+    # Discovery shares the LB adapter's implementation — same track
+    # shape (MB metadata), same matching needs.
+    discover_tracks = ListenBrainzPlaylistSource.discover_tracks
+
     # ---- projection helpers ------------------------------------------------
 
     def _meta_from_cache_row(self, row: Dict[str, Any]) -> PlaylistMeta:
diff --git a/core/playlists/sources/listenbrainz.py b/core/playlists/sources/listenbrainz.py
index 861045eb..d6c96107 100644
--- a/core/playlists/sources/listenbrainz.py
+++ b/core/playlists/sources/listenbrainz.py
@@ -25,6 +25,12 @@ from core.playlists.sources.base import (
 )
 
 
+# Type alias for the discovery callable: takes a list of MB-shaped
+# track dicts, returns a parallel list of matched_data dicts (or None
+# when no match). Kept narrow so test stubs are easy.
+DiscoverCallable = Callable[[List[Dict[str, Any]]], List[Optional[Dict[str, Any]]]]
+
+
 class ListenBrainzPlaylistSource(PlaylistSource):
     name = SOURCE_LISTENBRAINZ
     supports_listing = True
@@ -36,11 +42,20 @@ class ListenBrainzPlaylistSource(PlaylistSource):
     # ``meta.extra['playlist_type']`` if it wants per-type sub-tabs.
     PLAYLIST_TYPES = ("created_for_user", "user_created", "collaborative")
 
-    def __init__(self, manager_getter: Callable[[], Any]):
+    def __init__(
+        self,
+        manager_getter: Callable[[], Any],
+        discover_callable: Optional[DiscoverCallable] = None,
+    ):
         """``manager_getter`` returns a live ``ListenBrainzManager`` for
         the current profile. ``None`` is allowed and means "no LB
-        configured" — adapter degrades to empty results."""
+        configured" — adapter degrades to empty results.
+
+        ``discover_callable`` runs the actual matching-engine + provider
+        search. ``None`` means no discovery is wired (Phase 0 default):
+        ``discover_tracks`` returns the input list unchanged."""
         self._manager_getter = manager_getter
+        self._discover_callable = discover_callable
 
     def _manager(self):
         return self._manager_getter()
@@ -105,6 +120,68 @@ class ListenBrainzPlaylistSource(PlaylistSource):
         tracks = [self._track_from_cache_row(t, idx) for idx, t in enumerate(tracks_raw)]
         return PlaylistDetail(meta=meta, tracks=tracks)
 
+    def discover_tracks(self, tracks: List[NormalizedTrack]) -> List[NormalizedTrack]:
+        """Run each MB-metadata track through the matching engine.
+
+        Tracks with ``needs_discovery=False`` (e.g. already-matched
+        survivors of a previous refresh) pass through unchanged.
+        Matched tracks get ``extra['discovered']=True`` + a
+        ``matched_data`` block so the projection helper can produce
+        the canonical ``extra_data`` JSON; ``needs_discovery`` flips
+        to False on them.
+
+        Unmatched tracks stay ``needs_discovery=True`` so the caller
+        can decide how to handle them (wing-it stub, skip, retry)."""
+        if not tracks or self._discover_callable is None:
+            return tracks
+
+        to_match: List[Dict[str, Any]] = []
+        match_indices: List[int] = []
+        for idx, t in enumerate(tracks):
+            if not t.needs_discovery:
+                continue
+            to_match.append({
+                "track_name": t.track_name,
+                "artist_name": t.artist_name,
+                "album_name": t.album_name or "",
+                "duration_ms": t.duration_ms or 0,
+            })
+            match_indices.append(idx)
+
+        if not to_match:
+            return tracks
+
+        try:
+            matched = self._discover_callable(to_match) or []
+        except Exception:
+            return tracks
+
+        out = list(tracks)
+        for slot_idx, result in zip(match_indices, matched):
+            if not result:
+                continue
+            track = out[slot_idx]
+            provider = result.pop("_provider", None) or "unknown"
+            confidence = result.pop("_confidence", None)
+            new_extra = dict(track.extra or {})
+            new_extra["discovered"] = True
+            new_extra["provider"] = provider
+            if confidence is not None:
+                new_extra["confidence"] = confidence
+            new_extra["matched_data"] = result
+            out[slot_idx] = NormalizedTrack(
+                position=track.position,
+                track_name=track.track_name,
+                artist_name=track.artist_name,
+                album_name=track.album_name,
+                duration_ms=track.duration_ms,
+                source_track_id=result.get("id") or track.source_track_id,
+                image_url=result.get("image_url") or track.image_url,
+                needs_discovery=False,
+                extra=new_extra,
+            )
+        return out
+
     def refresh_playlist(self, playlist_id: str) -> Optional[PlaylistDetail]:
         """Trigger a manager-side refresh, then return the new snapshot.
 
diff --git a/tests/automation/test_handlers_playlist.py b/tests/automation/test_handlers_playlist.py
index 02808d4b..740ff16e 100644
--- a/tests/automation/test_handlers_playlist.py
+++ b/tests/automation/test_handlers_playlist.py
@@ -433,6 +433,110 @@ class TestRefreshMirrored:
         assert parsed_calls == ['https://youtube.com/playlist?list=yt_pl']
         assert db.mirror_calls[0]['tracks'][0]['source_track_id'] == 'vid1'
 
+    def test_listenbrainz_refresh_runs_discovery_and_writes_matched_data(self):
+        """End-to-end: LB cached playlist → adapter projects to
+        NormalizedTrack with needs_discovery=True → refresh_mirrored
+        calls source.discover_tracks → matched_data lands in
+        extra_data on the mirror DB row."""
+        from core.playlists.sources.bootstrap import build_playlist_source_registry
+        from core.playlists.sources.listenbrainz import ListenBrainzPlaylistSource
+
+        class _StubLBManager:
+            def get_cached_playlists(self, playlist_type):
+                if playlist_type == 'created_for_user':
+                    return [{
+                        'playlist_mbid': 'lb-1',
+                        'title': 'LB Weekly',
+                        'creator': 'ListenBrainz',
+                        'track_count': 1,
+                        'annotation': {},
+                        'last_updated': '2026-05-26',
+                    }]
+                return []
+
+            def get_playlist_type(self, mbid):
+                return 'created_for_user' if mbid == 'lb-1' else ''
+
+            def get_cached_tracks(self, mbid):
+                if mbid == 'lb-1':
+                    return [{
+                        'track_name': 'MB Song',
+                        'artist_name': 'MB Artist',
+                        'album_name': 'MB Album',
+                        'duration_ms': 240_000,
+                        'recording_mbid': 'rec-1',
+                        'release_mbid': 'rel-1',
+                        'album_cover_url': '',
+                        'additional_metadata': {},
+                    }]
+                return []
+
+            def update_all_playlists(self):
+                pass
+
+        discovery_calls = []
+
+        def fake_discover(track_dicts):
+            discovery_calls.append(list(track_dicts))
+            return [{
+                'id': 'sp-matched',
+                'name': 'Matched',
+                'artists': ['Spotify Artist'],
+                'album': {'name': 'Spotify Album'},
+                'duration_ms': 240_000,
+                'image_url': 'art',
+                'source': 'spotify',
+                '_provider': 'spotify',
+                '_confidence': 0.93,
+            }]
+
+        # Build a registry with the LB adapter pre-wired with discovery.
+        # The default _build_deps registry won't have discovery wired,
+        # so we override it for this test.
+        registry = build_playlist_source_registry(
+            spotify_client_getter=lambda: None,
+            tidal_client_getter=lambda: None,
+            qobuz_client_getter=lambda: None,
+            deezer_client_getter=lambda: None,
+            listenbrainz_manager_getter=lambda: _StubLBManager(),
+            discover_callable=fake_discover,
+        )
+
+        db = _StubDB(playlists=[
+            {
+                'id': 33,
+                'name': 'LB Weekly',
+                'source': 'listenbrainz',
+                'source_playlist_id': 'lb-1',
+                'profile_id': 1,
+            },
+        ])
+        deps = _build_deps(get_database=lambda: db)
+        # Replace the default registry with the one wired up above.
+        # AutomationDeps is a frozen dataclass-like — re-assign via
+        # object.__setattr__ since it's a plain dataclass.
+        object.__setattr__(deps, 'playlist_source_registry', registry)
+
+        result = auto_refresh_mirrored({'playlist_id': '33'}, deps)
+
+        assert result['status'] == 'completed'
+        assert result['refreshed'] == '1'
+        # discover_tracks ran once, with the single MB track.
+        assert len(discovery_calls) == 1
+        assert discovery_calls[0][0]['track_name'] == 'MB Song'
+        assert discovery_calls[0][0]['artist_name'] == 'MB Artist'
+
+        # Mirror DB row carries the matched_data extra_data.
+        call = db.mirror_calls[0]
+        assert call['source'] == 'listenbrainz'
+        assert len(call['tracks']) == 1
+        track = call['tracks'][0]
+        assert track['source_track_id'] == 'sp-matched'
+        extra = json.loads(track['extra_data'])
+        assert extra['discovered'] is True
+        assert extra['provider'] == 'spotify'
+        assert extra['matched_data']['id'] == 'sp-matched'
+
     def test_spotify_public_uses_authed_spotify_when_signed_in(self):
         """The handler-level fallback chain: when Spotify is authed
         AND the public URL is a playlist URL, prefer the authed API so
diff --git a/tests/test_playlist_sources_adapters.py b/tests/test_playlist_sources_adapters.py
index 20355ffa..e6b443ff 100644
--- a/tests/test_playlist_sources_adapters.py
+++ b/tests/test_playlist_sources_adapters.py
@@ -690,6 +690,128 @@ def test_mirror_dict_spotify_authed_emits_matched_data():
     assert extra["matched_data"]["artists"] == [{"name": "Adele"}]
 
 
+def test_default_discover_tracks_is_no_op():
+    """Adapters whose tracks already carry provider IDs (Spotify,
+    Tidal, Qobuz, YouTube, Deezer, Spotify-public, iTunes-link,
+    SoulSync-Discovery) inherit the ABC default — return tracks
+    unchanged."""
+    track = NormalizedTrack(
+        position=0,
+        track_name="Song",
+        artist_name="Artist",
+        source_track_id="abc",
+        needs_discovery=False,
+    )
+    src = SpotifyPlaylistSource(lambda: None)
+    out = src.discover_tracks([track])
+    assert out == [track]
+
+
+def test_listenbrainz_discover_tracks_uses_callable():
+    """When the LB adapter is wired with a discover_callable, MB
+    tracks get matched_data populated; ``needs_discovery`` flips to
+    False on matches; non-matches stay as-is."""
+
+    def fake_discover(track_dicts):
+        # Match the first, leave second unmatched.
+        return [
+            {
+                "id": "matched-1",
+                "name": "Matched",
+                "artists": ["Artist 1"],
+                "album": {"name": "Album"},
+                "duration_ms": 200_000,
+                "image_url": "art",
+                "source": "spotify",
+                "_provider": "spotify",
+                "_confidence": 0.95,
+            },
+            None,
+        ]
+
+    src = ListenBrainzPlaylistSource(
+        lambda: None,
+        discover_callable=fake_discover,
+    )
+    tracks = [
+        NormalizedTrack(
+            position=0,
+            track_name="Song A",
+            artist_name="Artist 1",
+            source_track_id="mbid-1",
+            needs_discovery=True,
+        ),
+        NormalizedTrack(
+            position=1,
+            track_name="Song B",
+            artist_name="Artist 2",
+            source_track_id="mbid-2",
+            needs_discovery=True,
+        ),
+    ]
+    out = src.discover_tracks(tracks)
+    assert len(out) == 2
+
+    assert out[0].needs_discovery is False
+    assert out[0].source_track_id == "matched-1"
+    assert out[0].extra["discovered"] is True
+    assert out[0].extra["provider"] == "spotify"
+    assert out[0].extra["confidence"] == 0.95
+    assert out[0].extra["matched_data"]["id"] == "matched-1"
+
+    # Unmatched stays as-is.
+    assert out[1].needs_discovery is True
+    assert out[1].source_track_id == "mbid-2"
+    assert "matched_data" not in (out[1].extra or {})
+
+
+def test_listenbrainz_discover_tracks_no_callable_is_no_op():
+    """If no ``discover_callable`` is wired, the adapter returns the
+    list unchanged — refresh paths that haven't enabled discovery
+    still work."""
+    src = ListenBrainzPlaylistSource(lambda: None, discover_callable=None)
+    tracks = [
+        NormalizedTrack(
+            position=0,
+            track_name="T",
+            artist_name="A",
+            needs_discovery=True,
+        )
+    ]
+    assert src.discover_tracks(tracks) == tracks
+
+
+def test_lastfm_discover_tracks_shares_listenbrainz_implementation():
+    """Last.fm radio tracks have the same MB-metadata shape as LB
+    tracks, so the adapter reuses LB's ``discover_tracks``."""
+
+    def fake_discover(track_dicts):
+        return [{
+            "id": "lfm-matched",
+            "name": "Match",
+            "artists": ["Artist"],
+            "album": {"name": ""},
+            "duration_ms": 200_000,
+            "image_url": "",
+            "source": "spotify",
+            "_provider": "spotify",
+        }]
+
+    src = LastFMPlaylistSource(lambda: None, discover_callable=fake_discover)
+    tracks = [
+        NormalizedTrack(
+            position=0,
+            track_name="T",
+            artist_name="A",
+            needs_discovery=True,
+        )
+    ]
+    out = src.discover_tracks(tracks)
+    assert out[0].needs_discovery is False
+    assert out[0].source_track_id == "lfm-matched"
+    assert out[0].extra["matched_data"]["id"] == "lfm-matched"
+
+
 def test_mirror_dict_spotify_public_emits_spotify_hint():
     """Public-embed path: track ID known but album art / canonical
     metadata missing, so we emit a ``spotify_hint`` for the discovery
diff --git a/web_server.py b/web_server.py
index beca48b3..d49e238c 100644
--- a/web_server.py
+++ b/web_server.py
@@ -991,6 +991,23 @@ def _register_automation_handlers():
         # mirrors is not yet wired (current handler skips them).
         return None
 
+    # Discovery callable for the LB / Last.fm adapters' ``discover_tracks``.
+    # Wraps the pure ``match_mb_tracks`` helper with the live matching
+    # engine + Spotify / iTunes clients. Adapter calls it per refresh
+    # when any track has ``needs_discovery=True``.
+    from core.discovery.matching import MBMatchDeps, match_mb_tracks
+
+    _mb_match_deps = MBMatchDeps(
+        matching_engine=matching_engine,
+        score_candidates=_discovery_score_candidates,
+        spotify_client_getter=lambda: spotify_client,
+        itunes_client_getter=_get_itunes_client,
+        prefer_spotify_getter=lambda: (_get_active_discovery_source() == 'spotify'),
+    )
+
+    def _discover_callable_for_registry(tracks):
+        return match_mb_tracks(tracks, _mb_match_deps)
+
     _playlist_source_registry = build_playlist_source_registry(
         spotify_client_getter=lambda: spotify_client,
         tidal_client_getter=lambda: tidal_client,
@@ -1002,6 +1019,7 @@ def _register_automation_handlers():
         lastfm_manager_getter=_lb_manager_for_registry,
         personalized_manager_getter=_build_personalized_manager,
         profile_id_getter=get_current_profile_id,
+        discover_callable=_discover_callable_for_registry,
     )
 
     _automation_deps = AutomationDeps(
diff --git a/webui/static/helper.js b/webui/static/helper.js
index 784c846b..aa59a98d 100644
--- a/webui/static/helper.js
+++ b/webui/static/helper.js
@@ -3417,6 +3417,7 @@ const WHATS_NEW = {
         { unreleased: true },
         { title: 'Groundwork: unified playlist source layer', desc: 'first slice of a refactor that\'ll let ListenBrainz, Last.fm radio, and SoulSync Discovery playlists live as Sync-page tabs alongside Spotify / Tidal / Qobuz / YouTube — so they can be mirrored + scheduled like the rest. this commit adds the shared adapter layer all those sources will plug into; no UI changes yet. nothing to do on your end.' },
         { title: 'Auto-Sync refresh now routes through the unified source layer', desc: 'follow-up to the groundwork above. the mirrored-playlist auto-refresh handler used to have a ~190-line if/elif chain branching per source (one branch each for Spotify, Spotify public, Deezer, Tidal, YouTube). now it asks the source registry for the right adapter and calls one refresh method. behavior identical — same matched_data, same Tidal-skip-on-no-auth log, same Spotify-public-prefers-authed-API fallback. unlocks ListenBrainz / Last.fm / SoulSync Discovery as future Sync-page mirror sources without a fresh elif branch each time.' },
+        { title: 'Discovery folded into the unified source contract', desc: 'next slice of the groundwork. each playlist source can now answer one extra question — "match these raw tracks against Spotify / iTunes" — through the same adapter interface. Spotify / Tidal / Qobuz / YouTube / Deezer / Spotify-public / iTunes-link / SoulSync-Discovery all answer trivially (their tracks already have provider IDs); ListenBrainz + Last.fm run the matching engine. mirror-refresh now calls this automatically when a source returns MB-metadata-only tracks, so when ListenBrainz becomes a Sync-page tab next commit, its mirrors land already discovered + ready to sync — no separate Discover-page round-trip needed.' },
     ],
     '2.6.2': [
         { date: 'May 24, 2026 — 2.6.2 release' },