Discovery fan-out and playlists follow source priority

Make discovery pool population and curated playlists follow the configured metadata source order. Keep Spotify strict where fallback would corrupt source-specific IDs, and trim fan-out with smaller similar-artist samples and page caps. Leave the remaining incremental path for follow-up.
pull/305/head
Antti Kettunen 1 month ago
parent 030374c5b0
commit bc83874c6f

@ -3227,7 +3227,8 @@ class WatchlistScanner:
"""
Curate consistent playlist selections that stay the same until next discovery pool update.
Supports both Spotify and iTunes sources - creates separate curated playlists for each.
Supports the discovery metadata sources in priority order and creates
separate curated playlists for each source.
- Release Radar: Prioritizes freshness + popularity from recent releases
- Discovery Weekly: Balanced mix of popular picks, deep cuts, and mid-tier tracks
@ -3239,9 +3240,6 @@ class WatchlistScanner:
logger.info("Curating discovery playlists...")
if self.spotify_client and self.spotify_client.is_rate_limited():
self._disable_spotify_for_run("global Spotify rate limit active")
# Build listening profile for personalization
profile = self._get_listening_profile(profile_id)
if profile['has_data']:
@ -3250,13 +3248,10 @@ class WatchlistScanner:
f"{profile['avg_daily_plays']:.1f} avg daily plays")
# Determine available sources
spotify_available = self._spotify_is_primary_source()
itunes_client, fallback_source = _get_fallback_metadata_client()
# Process each available source
sources_to_process = [fallback_source] # Fallback source (iTunes/Deezer) always available
if spotify_available:
sources_to_process.append('spotify')
sources_to_process = self._discovery_source_priority()
if not sources_to_process:
logger.warning("No discovery sources available to curate playlists")
return
# Pre-build artist genre cache from local DB for genre affinity scoring
_artist_genre_cache = {}
@ -3309,7 +3304,7 @@ class WatchlistScanner:
for album in albums:
try:
# Get album data from appropriate source
# Get album data from the same source that won discovery
if source == 'spotify':
album_id = album.get('album_spotify_id')
elif source == 'deezer':
@ -3319,12 +3314,7 @@ class WatchlistScanner:
if not album_id:
continue
if source == 'spotify':
album_data = self.spotify_client.get_album(album_id)
else:
album_data = itunes_api_call_with_retry(
itunes_client.get_album, album_id
)
album_data = self._get_album_data_for_source(source, album_id, album_name=album.get('album_name', ''))
if not album_data or 'tracks' not in album_data:
continue
@ -3524,11 +3514,19 @@ class WatchlistScanner:
if profile['has_data']:
logger.info("Building 'Because You Listen To' playlists...")
top_played = self.database.get_top_artists('30d', 3)
active_source_for_bylt = 'spotify' if spotify_available else fallback_source
all_pool_tracks = self.database.get_discovery_pool_tracks(
limit=2000, new_releases_only=False,
source=active_source_for_bylt, profile_id=profile_id
)
active_source_for_bylt = None
all_pool_tracks = []
for candidate_source in sources_to_process:
all_pool_tracks = self.database.get_discovery_pool_tracks(
limit=2000, new_releases_only=False,
source=candidate_source, profile_id=profile_id
)
if all_pool_tracks:
active_source_for_bylt = candidate_source
break
if not active_source_for_bylt:
logger.warning("No discovery pool tracks found for Because You Listen To")
all_pool_tracks = []
# Build source_artist_id → artist_name mapping from watchlist
_wa_id_to_name = {}

@ -827,6 +827,92 @@ def test_cache_discovery_recent_albums_falls_back_to_spotify_when_primary_has_no
assert spotify_client.album_calls
def test_curate_discovery_playlists_uses_source_priority_for_recent_albums(monkeypatch):
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer")
monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"])
artist = _build_artist("Playlist Artist")
scanner = _build_scanner({"tracks": {"items": []}}, [artist])
saved_playlists = []
recent_album = {
"album_deezer_id": "dz-album-1",
"album_itunes_id": None,
"album_spotify_id": None,
"album_name": "Recent Deezer Album",
"artist_name": "Playlist Artist",
"release_date": "2026-04-01",
"album_type": "album",
"album_cover_url": "https://example.com/deezer-album.jpg",
"artist_deezer_id": "dz-artist",
"artist_spotify_id": None,
"artist_itunes_id": None,
}
discovery_track = types.SimpleNamespace(
artist_name="Playlist Artist",
popularity=72,
deezer_track_id="dz-track-1",
spotify_track_id=None,
itunes_track_id=None,
)
deezer_client = _FakeSourceClient(
artist_id="dz-artist",
albums=[],
image_url="https://example.com/deezer-artist.jpg",
album_payload={
"id": "dz-album-1",
"name": "Recent Deezer Album",
"images": [{"url": "https://example.com/deezer-album.jpg"}],
"release_date": "2026-04-01",
"popularity": 40,
"tracks": {"items": [{"id": "dz-track-1", "name": "Track One", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]},
"artists": [{"id": "dz-artist"}],
},
)
spotify_client = _FakeSourceClient(
artist_id="sp-artist",
albums=[],
image_url="https://example.com/spotify-artist.jpg",
album_payload={
"id": "sp-album-1",
"name": "Spotify Album",
"images": [{"url": "https://example.com/spotify-album.jpg"}],
"release_date": "2026-04-01",
"popularity": 60,
"tracks": {"items": [{"id": "sp-track-1", "name": "Spotify Track", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]},
"artists": [{"id": "sp-artist"}],
},
)
def fake_get_client_for_source(source):
return {
"deezer": deezer_client,
"spotify": spotify_client,
}.get(source)
monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source)
monkeypatch.setattr(scanner, "_get_listening_profile", lambda profile_id: {
"has_data": False,
"top_artist_names": set(),
"top_genres": set(),
"avg_daily_plays": 0.0,
"artist_play_counts": {},
})
monkeypatch.setattr(scanner.database, "get_discovery_recent_albums", lambda limit, source, profile_id: [recent_album] if source == "deezer" else [], raising=False)
monkeypatch.setattr(scanner.database, "get_discovery_pool_tracks", lambda *args, **kwargs: [discovery_track] if kwargs.get("source") == "deezer" else [], raising=False)
monkeypatch.setattr(scanner.database, "save_curated_playlist", lambda key, tracks, profile_id=1: saved_playlists.append((key, list(tracks))) or True, raising=False)
monkeypatch.setattr(scanner.database, "get_top_artists", lambda *args, **kwargs: [], raising=False)
monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda *args, **kwargs: [], raising=False)
scanner.curate_discovery_playlists(profile_id=1)
assert any(call[0] == "dz-album-1" for call in deezer_client.album_calls)
assert spotify_client.album_calls == []
assert any(key == "release_radar_deezer" for key, _ in saved_playlists)
assert any(key == "discovery_weekly_deezer" for key, _ in saved_playlists)
def test_match_to_spotify_uses_strict_lookup():
spotify_client = _FakeSpotifyClient(
search_results=[types.SimpleNamespace(id="fallback-id", name="Artist One")]

Loading…
Cancel
Save