From bc83874c6fd62e959752ffa4a1fd28f4d1ff9391 Mon Sep 17 00:00:00 2001 From: Antti Kettunen Date: Fri, 17 Apr 2026 08:49:04 +0300 Subject: [PATCH] Discovery fan-out and playlists follow source priority Make discovery pool population and curated playlists follow the configured metadata source order. Keep Spotify strict where fallback would corrupt source-specific IDs, and trim fan-out with smaller similar-artist samples and page caps. Leave the remaining incremental path for follow-up. --- core/watchlist_scanner.py | 44 +++++++------- tests/test_watchlist_scanner_scan.py | 86 ++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 23 deletions(-) diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 1b4f76ae..1d3f461f 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -3227,7 +3227,8 @@ class WatchlistScanner: """ Curate consistent playlist selections that stay the same until next discovery pool update. - Supports both Spotify and iTunes sources - creates separate curated playlists for each. + Supports the discovery metadata sources in priority order and creates + separate curated playlists for each source. - Release Radar: Prioritizes freshness + popularity from recent releases - Discovery Weekly: Balanced mix of popular picks, deep cuts, and mid-tier tracks @@ -3239,9 +3240,6 @@ class WatchlistScanner: logger.info("Curating discovery playlists...") - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - # Build listening profile for personalization profile = self._get_listening_profile(profile_id) if profile['has_data']: @@ -3250,13 +3248,10 @@ class WatchlistScanner: f"{profile['avg_daily_plays']:.1f} avg daily plays") # Determine available sources - spotify_available = self._spotify_is_primary_source() - itunes_client, fallback_source = _get_fallback_metadata_client() - - # Process each available source - sources_to_process = [fallback_source] # Fallback source (iTunes/Deezer) always available - if spotify_available: - sources_to_process.append('spotify') + sources_to_process = self._discovery_source_priority() + if not sources_to_process: + logger.warning("No discovery sources available to curate playlists") + return # Pre-build artist genre cache from local DB for genre affinity scoring _artist_genre_cache = {} @@ -3309,7 +3304,7 @@ class WatchlistScanner: for album in albums: try: - # Get album data from appropriate source + # Get album data from the same source that won discovery if source == 'spotify': album_id = album.get('album_spotify_id') elif source == 'deezer': @@ -3319,12 +3314,7 @@ class WatchlistScanner: if not album_id: continue - if source == 'spotify': - album_data = self.spotify_client.get_album(album_id) - else: - album_data = itunes_api_call_with_retry( - itunes_client.get_album, album_id - ) + album_data = self._get_album_data_for_source(source, album_id, album_name=album.get('album_name', '')) if not album_data or 'tracks' not in album_data: continue @@ -3524,11 +3514,19 @@ class WatchlistScanner: if profile['has_data']: logger.info("Building 'Because You Listen To' playlists...") top_played = self.database.get_top_artists('30d', 3) - active_source_for_bylt = 'spotify' if spotify_available else fallback_source - all_pool_tracks = self.database.get_discovery_pool_tracks( - limit=2000, new_releases_only=False, - source=active_source_for_bylt, profile_id=profile_id - ) + active_source_for_bylt = None + all_pool_tracks = [] + for candidate_source in sources_to_process: + all_pool_tracks = self.database.get_discovery_pool_tracks( + limit=2000, new_releases_only=False, + source=candidate_source, profile_id=profile_id + ) + if all_pool_tracks: + active_source_for_bylt = candidate_source + break + if not active_source_for_bylt: + logger.warning("No discovery pool tracks found for Because You Listen To") + all_pool_tracks = [] # Build source_artist_id → artist_name mapping from watchlist _wa_id_to_name = {} diff --git a/tests/test_watchlist_scanner_scan.py b/tests/test_watchlist_scanner_scan.py index 4916a937..a1fad3bd 100644 --- a/tests/test_watchlist_scanner_scan.py +++ b/tests/test_watchlist_scanner_scan.py @@ -827,6 +827,92 @@ def test_cache_discovery_recent_albums_falls_back_to_spotify_when_primary_has_no assert spotify_client.album_calls +def test_curate_discovery_playlists_uses_source_priority_for_recent_albums(monkeypatch): + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0) + monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer") + monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"]) + + artist = _build_artist("Playlist Artist") + scanner = _build_scanner({"tracks": {"items": []}}, [artist]) + + saved_playlists = [] + recent_album = { + "album_deezer_id": "dz-album-1", + "album_itunes_id": None, + "album_spotify_id": None, + "album_name": "Recent Deezer Album", + "artist_name": "Playlist Artist", + "release_date": "2026-04-01", + "album_type": "album", + "album_cover_url": "https://example.com/deezer-album.jpg", + "artist_deezer_id": "dz-artist", + "artist_spotify_id": None, + "artist_itunes_id": None, + } + discovery_track = types.SimpleNamespace( + artist_name="Playlist Artist", + popularity=72, + deezer_track_id="dz-track-1", + spotify_track_id=None, + itunes_track_id=None, + ) + deezer_client = _FakeSourceClient( + artist_id="dz-artist", + albums=[], + image_url="https://example.com/deezer-artist.jpg", + album_payload={ + "id": "dz-album-1", + "name": "Recent Deezer Album", + "images": [{"url": "https://example.com/deezer-album.jpg"}], + "release_date": "2026-04-01", + "popularity": 40, + "tracks": {"items": [{"id": "dz-track-1", "name": "Track One", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]}, + "artists": [{"id": "dz-artist"}], + }, + ) + spotify_client = _FakeSourceClient( + artist_id="sp-artist", + albums=[], + image_url="https://example.com/spotify-artist.jpg", + album_payload={ + "id": "sp-album-1", + "name": "Spotify Album", + "images": [{"url": "https://example.com/spotify-album.jpg"}], + "release_date": "2026-04-01", + "popularity": 60, + "tracks": {"items": [{"id": "sp-track-1", "name": "Spotify Track", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]}, + "artists": [{"id": "sp-artist"}], + }, + ) + + def fake_get_client_for_source(source): + return { + "deezer": deezer_client, + "spotify": spotify_client, + }.get(source) + + monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source) + monkeypatch.setattr(scanner, "_get_listening_profile", lambda profile_id: { + "has_data": False, + "top_artist_names": set(), + "top_genres": set(), + "avg_daily_plays": 0.0, + "artist_play_counts": {}, + }) + monkeypatch.setattr(scanner.database, "get_discovery_recent_albums", lambda limit, source, profile_id: [recent_album] if source == "deezer" else [], raising=False) + monkeypatch.setattr(scanner.database, "get_discovery_pool_tracks", lambda *args, **kwargs: [discovery_track] if kwargs.get("source") == "deezer" else [], raising=False) + monkeypatch.setattr(scanner.database, "save_curated_playlist", lambda key, tracks, profile_id=1: saved_playlists.append((key, list(tracks))) or True, raising=False) + monkeypatch.setattr(scanner.database, "get_top_artists", lambda *args, **kwargs: [], raising=False) + monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda *args, **kwargs: [], raising=False) + + scanner.curate_discovery_playlists(profile_id=1) + + assert any(call[0] == "dz-album-1" for call in deezer_client.album_calls) + assert spotify_client.album_calls == [] + assert any(key == "release_radar_deezer" for key, _ in saved_playlists) + assert any(key == "discovery_weekly_deezer" for key, _ in saved_playlists) + + def test_match_to_spotify_uses_strict_lookup(): spotify_client = _FakeSpotifyClient( search_results=[types.SimpleNamespace(id="fallback-id", name="Artist One")]