mirror of https://github.com/Nezreka/SoulSync.git
Adds ``discover_tracks(tracks) -> List[NormalizedTrack]`` to the PlaylistSource interface. Sources whose tracks already carry provider IDs (Spotify, Tidal, Qobuz, YouTube, Deezer, Spotify public, iTunes link, SoulSync Discovery) inherit a no-op default; ListenBrainz + Last.fm override to run the matching engine. This closes the last gap before LB / Last.fm / SoulSync Discovery can land as Sync-page mirror sources: the refresh handler now calls ``source.discover_tracks(...)`` whenever a source returns tracks with ``needs_discovery=True``, so mirrored LB rows arrive already discovered + ready for the sync pipeline. Previously, LB playlists ran through a separate state-machine worker tied to the Discover-page UI, with results stored in ``discovery_cache`` instead of ``mirrored_playlist_tracks.extra_data``. Changes: - ``core/playlists/sources/base.py`` — PlaylistSource switches from Protocol to ABC so a concrete default for ``discover_tracks`` can live on the base class. The four real-work methods stay ``@abstractmethod``; instantiating an adapter that forgets one fails loudly at construction. - ``core/discovery/matching.py`` (new) — pure ``match_mb_tracks`` helper that runs Strategy-1-only matching-engine queries against Spotify (primary) or iTunes (fallback). No state machine, no discovery-cache writes, no wing-it stub — that richer flow stays in ``core/discovery/listenbrainz.py`` for the Discover-page UI. - ``ListenBrainzPlaylistSource`` + ``LastFMPlaylistSource`` take an optional ``discover_callable`` constructor arg. Last.fm reuses the LB implementation since the track shape is identical. - ``bootstrap.build_playlist_source_registry`` accepts a ``discover_callable`` kwarg and wires it into LB + Last.fm adapters. - ``web_server.py`` boot constructs the discovery callable from the existing matching engine + ``_discovery_score_candidates`` + Spotify / iTunes clients, passes through to the registry. - ``refresh_mirrored.py`` adds a small ``_maybe_discover`` helper that calls ``source.discover_tracks(...)`` between fetch and ``to_mirror_track_dict`` projection — only fires when at least one track has ``needs_discovery=True``, so the normal Spotify / Tidal / etc. refresh path stays a zero-cost pass-through. Tests: - 5 new adapter tests: default no-op pass-through, LB discovery with mixed matches/misses, LB no-callable fallback, Last.fm shares the LB implementation, mirror-dict spotify_hint emit. - 1 new automation test: end-to-end LB refresh with a stub discover_callable proves the matched_data lands in ``mirror_playlist_tracks.extra_data`` after the registry refresh + discover hop. 225 tests across adapter + automation suites green.pull/709/head
parent
8c41b05fe8
commit
246503066b
@ -0,0 +1,134 @@
|
||||
"""Pure helper for matching raw MusicBrainz-metadata tracks against
|
||||
Spotify / iTunes.
|
||||
|
||||
Used by the PlaylistSource adapters whose ``get_playlist`` returns
|
||||
tracks with ``needs_discovery=True`` (ListenBrainz, Last.fm radio).
|
||||
Phase 1b ships Strategy 1 only (matching-engine queries → search →
|
||||
score → pick best ≥0.9). The richer multi-strategy +
|
||||
discovery-cache flow stays in
|
||||
``core.discovery.listenbrainz.run_listenbrainz_discovery_worker``
|
||||
for the Discover-page state-machine UI; this helper is the slimmer
|
||||
version used by the auto-refresh pipeline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MBMatchDeps:
|
||||
"""Bundle of primitives the matcher needs.
|
||||
|
||||
Wired up at bootstrap. Tests pass stub callables / clients."""
|
||||
|
||||
matching_engine: Any
|
||||
score_candidates: Callable[..., Any]
|
||||
spotify_client_getter: Callable[[], Any]
|
||||
itunes_client_getter: Callable[[], Any]
|
||||
prefer_spotify_getter: Callable[[], bool]
|
||||
min_confidence: float = 0.9
|
||||
|
||||
|
||||
def match_mb_track(
|
||||
track: Dict[str, Any], deps: MBMatchDeps
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Try to match a single MB-metadata track.
|
||||
|
||||
Input shape:
|
||||
``{'track_name', 'artist_name', 'album_name', 'duration_ms'}``
|
||||
|
||||
Returns the matched_data dict (Spotify/iTunes track projection)
|
||||
or ``None`` when no candidate cleared the confidence threshold.
|
||||
"""
|
||||
title = track.get("track_name") or ""
|
||||
artist = track.get("artist_name") or ""
|
||||
album = track.get("album_name") or ""
|
||||
duration_ms = int(track.get("duration_ms") or 0)
|
||||
if not title or not artist:
|
||||
return None
|
||||
|
||||
spotify_client = deps.spotify_client_getter()
|
||||
itunes_client = deps.itunes_client_getter()
|
||||
use_spotify = bool(
|
||||
deps.prefer_spotify_getter()
|
||||
and spotify_client is not None
|
||||
and getattr(spotify_client, "is_spotify_authenticated", lambda: False)()
|
||||
)
|
||||
if not use_spotify and itunes_client is None:
|
||||
return None
|
||||
|
||||
# Strategy 1 — matching-engine query generation.
|
||||
try:
|
||||
temp_track = type("_TempTrack", (), {
|
||||
"name": title,
|
||||
"artists": [artist],
|
||||
"album": album or None,
|
||||
})()
|
||||
queries = deps.matching_engine.generate_download_queries(temp_track)
|
||||
except Exception as exc:
|
||||
logger.debug(f"matching_engine query-gen failed: {exc}")
|
||||
queries = [f"{artist} {title}", title]
|
||||
|
||||
best_match: Any = None
|
||||
best_confidence = 0.0
|
||||
for query in queries:
|
||||
try:
|
||||
if use_spotify:
|
||||
results = spotify_client.search_tracks(query, limit=10)
|
||||
else:
|
||||
results = itunes_client.search_tracks(query, limit=10)
|
||||
except Exception as exc:
|
||||
logger.debug(f"search failed for query={query!r}: {exc}")
|
||||
continue
|
||||
if not results:
|
||||
continue
|
||||
try:
|
||||
match, confidence, _ = deps.score_candidates(
|
||||
title, artist, duration_ms, results
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(f"score_candidates failed: {exc}")
|
||||
continue
|
||||
if match and confidence > best_confidence and confidence >= deps.min_confidence:
|
||||
best_match = match
|
||||
best_confidence = confidence
|
||||
if best_confidence >= deps.min_confidence:
|
||||
break
|
||||
|
||||
if not best_match:
|
||||
return None
|
||||
|
||||
provider = "spotify" if use_spotify else "itunes"
|
||||
image_url = getattr(best_match, "image_url", None) or ""
|
||||
album_data: Dict[str, Any] = {
|
||||
"name": getattr(best_match, "album", "") or "",
|
||||
}
|
||||
if image_url:
|
||||
album_data["images"] = [{"url": image_url}]
|
||||
return {
|
||||
"id": getattr(best_match, "id", "") or "",
|
||||
"name": getattr(best_match, "name", "") or "",
|
||||
"artists": list(getattr(best_match, "artists", []) or []),
|
||||
"album": album_data,
|
||||
"duration_ms": int(getattr(best_match, "duration_ms", 0) or 0),
|
||||
"image_url": image_url,
|
||||
"source": provider,
|
||||
"_provider": provider,
|
||||
"_confidence": float(best_confidence),
|
||||
}
|
||||
|
||||
|
||||
def match_mb_tracks(
|
||||
tracks: List[Dict[str, Any]], deps: MBMatchDeps
|
||||
) -> List[Optional[Dict[str, Any]]]:
|
||||
"""Vectorized variant — runs ``match_mb_track`` per track.
|
||||
|
||||
Phase 1b is sequential. If profiling shows it's too slow on big
|
||||
LB playlists, this becomes the natural spot to thread-pool the
|
||||
per-track searches."""
|
||||
return [match_mb_track(t, deps) for t in tracks]
|
||||
Loading…
Reference in new issue