Discovery fan-out and playlists follow source priority

Make discovery pool population and curated playlists follow the configured metadata source order. Keep Spotify strict where fallback would corrupt source-specific IDs, and trim fan-out with smaller similar-artist samples and page caps. Leave the remaining incremental path for follow-up.
1 month ago · bc83874c6f
parent 030374c5b0
commit bc83874c6f
2 changed files with 107 additions and 23 deletions
--- a/core/watchlist_scanner.py
+++ b/core/watchlist_scanner.py
@ -3227,7 +3227,8 @@ class WatchlistScanner:
        """
        Curate consistent playlist selections that stay the same until next discovery pool update.

-        Supports both Spotify and iTunes sources - creates separate curated playlists for each.
+        Supports the discovery metadata sources in priority order and creates
+        separate curated playlists for each source.
        - Release Radar: Prioritizes freshness + popularity from recent releases
        - Discovery Weekly: Balanced mix of popular picks, deep cuts, and mid-tier tracks

@ -3239,9 +3240,6 @@ class WatchlistScanner:

            logger.info("Curating discovery playlists...")

-            if self.spotify_client and self.spotify_client.is_rate_limited():
-                self._disable_spotify_for_run("global Spotify rate limit active")
-
            # Build listening profile for personalization
            profile = self._get_listening_profile(profile_id)
            if profile['has_data']:
@ -3250,13 +3248,10 @@ class WatchlistScanner:
                           f"{profile['avg_daily_plays']:.1f} avg daily plays")

            # Determine available sources
-            spotify_available = self._spotify_is_primary_source()
-            itunes_client, fallback_source = _get_fallback_metadata_client()
-
-            # Process each available source
-            sources_to_process = [fallback_source]  # Fallback source (iTunes/Deezer) always available
-            if spotify_available:
-                sources_to_process.append('spotify')
+            sources_to_process = self._discovery_source_priority()
+            if not sources_to_process:
+                logger.warning("No discovery sources available to curate playlists")
+                return

            # Pre-build artist genre cache from local DB for genre affinity scoring
            _artist_genre_cache = {}
@ -3309,7 +3304,7 @@ class WatchlistScanner:

                        for album in albums:
                            try:
-                                # Get album data from appropriate source
+                                # Get album data from the same source that won discovery
                                if source == 'spotify':
                                    album_id = album.get('album_spotify_id')
                                elif source == 'deezer':
@ -3319,12 +3314,7 @@ class WatchlistScanner:
                                if not album_id:
                                    continue

-                                if source == 'spotify':
-                                    album_data = self.spotify_client.get_album(album_id)
-                                else:
-                                    album_data = itunes_api_call_with_retry(
-                                        itunes_client.get_album, album_id
-                                    )
+                                album_data = self._get_album_data_for_source(source, album_id, album_name=album.get('album_name', ''))

                                if not album_data or 'tracks' not in album_data:
                                    continue
@ -3524,11 +3514,19 @@ class WatchlistScanner:
            if profile['has_data']:
                logger.info("Building 'Because You Listen To' playlists...")
                top_played = self.database.get_top_artists('30d', 3)
-                active_source_for_bylt = 'spotify' if spotify_available else fallback_source
-                all_pool_tracks = self.database.get_discovery_pool_tracks(
-                    limit=2000, new_releases_only=False,
-                    source=active_source_for_bylt, profile_id=profile_id
-                )
+                active_source_for_bylt = None
+                all_pool_tracks = []
+                for candidate_source in sources_to_process:
+                    all_pool_tracks = self.database.get_discovery_pool_tracks(
+                        limit=2000, new_releases_only=False,
+                        source=candidate_source, profile_id=profile_id
+                    )
+                    if all_pool_tracks:
+                        active_source_for_bylt = candidate_source
+                        break
+                if not active_source_for_bylt:
+                    logger.warning("No discovery pool tracks found for Because You Listen To")
+                    all_pool_tracks = []

                # Build source_artist_id → artist_name mapping from watchlist
                _wa_id_to_name = {}
--- a/tests/test_watchlist_scanner_scan.py
+++ b/tests/test_watchlist_scanner_scan.py
@ -827,6 +827,92 @@ def test_cache_discovery_recent_albums_falls_back_to_spotify_when_primary_has_no
    assert spotify_client.album_calls


+def test_curate_discovery_playlists_uses_source_priority_for_recent_albums(monkeypatch):
+    monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
+    monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer")
+    monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"])
+
+    artist = _build_artist("Playlist Artist")
+    scanner = _build_scanner({"tracks": {"items": []}}, [artist])
+
+    saved_playlists = []
+    recent_album = {
+        "album_deezer_id": "dz-album-1",
+        "album_itunes_id": None,
+        "album_spotify_id": None,
+        "album_name": "Recent Deezer Album",
+        "artist_name": "Playlist Artist",
+        "release_date": "2026-04-01",
+        "album_type": "album",
+        "album_cover_url": "https://example.com/deezer-album.jpg",
+        "artist_deezer_id": "dz-artist",
+        "artist_spotify_id": None,
+        "artist_itunes_id": None,
+    }
+    discovery_track = types.SimpleNamespace(
+        artist_name="Playlist Artist",
+        popularity=72,
+        deezer_track_id="dz-track-1",
+        spotify_track_id=None,
+        itunes_track_id=None,
+    )
+    deezer_client = _FakeSourceClient(
+        artist_id="dz-artist",
+        albums=[],
+        image_url="https://example.com/deezer-artist.jpg",
+        album_payload={
+            "id": "dz-album-1",
+            "name": "Recent Deezer Album",
+            "images": [{"url": "https://example.com/deezer-album.jpg"}],
+            "release_date": "2026-04-01",
+            "popularity": 40,
+            "tracks": {"items": [{"id": "dz-track-1", "name": "Track One", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]},
+            "artists": [{"id": "dz-artist"}],
+        },
+    )
+    spotify_client = _FakeSourceClient(
+        artist_id="sp-artist",
+        albums=[],
+        image_url="https://example.com/spotify-artist.jpg",
+        album_payload={
+            "id": "sp-album-1",
+            "name": "Spotify Album",
+            "images": [{"url": "https://example.com/spotify-album.jpg"}],
+            "release_date": "2026-04-01",
+            "popularity": 60,
+            "tracks": {"items": [{"id": "sp-track-1", "name": "Spotify Track", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]},
+            "artists": [{"id": "sp-artist"}],
+        },
+    )
+
+    def fake_get_client_for_source(source):
+        return {
+            "deezer": deezer_client,
+            "spotify": spotify_client,
+        }.get(source)
+
+    monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source)
+    monkeypatch.setattr(scanner, "_get_listening_profile", lambda profile_id: {
+        "has_data": False,
+        "top_artist_names": set(),
+        "top_genres": set(),
+        "avg_daily_plays": 0.0,
+        "artist_play_counts": {},
+    })
+    monkeypatch.setattr(scanner.database, "get_discovery_recent_albums", lambda limit, source, profile_id: [recent_album] if source == "deezer" else [], raising=False)
+    monkeypatch.setattr(scanner.database, "get_discovery_pool_tracks", lambda *args, **kwargs: [discovery_track] if kwargs.get("source") == "deezer" else [], raising=False)
+    monkeypatch.setattr(scanner.database, "save_curated_playlist", lambda key, tracks, profile_id=1: saved_playlists.append((key, list(tracks))) or True, raising=False)
+    monkeypatch.setattr(scanner.database, "get_top_artists", lambda *args, **kwargs: [], raising=False)
+    monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda *args, **kwargs: [], raising=False)
+
+    scanner.curate_discovery_playlists(profile_id=1)
+
+    assert any(call[0] == "dz-album-1" for call in deezer_client.album_calls)
+    assert spotify_client.album_calls == []
+    assert any(key == "release_radar_deezer" for key, _ in saved_playlists)
+    assert any(key == "discovery_weekly_deezer" for key, _ in saved_playlists)
+
+
 def test_match_to_spotify_uses_strict_lookup():
    spotify_client = _FakeSpotifyClient(
        search_results=[types.SimpleNamespace(id="fallback-id", name="Artist One")]