Merge pull request #305 from kettui/fix/cache_discovery_recent_albums-fixes

Make discovery pool cache population respect provider priority, reduce request volume
pull/306/head
BoulderBadgeDad 4 weeks ago committed by GitHub
commit 35320ef760
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -469,6 +469,15 @@ class WatchlistScanner:
'discogs': 'discogs_artist_id',
}.get(source)
@staticmethod
def _similar_artist_id_attribute_for_source(source: str) -> Optional[str]:
"""Return the similar-artist attribute that stores the given source ID."""
return {
'spotify': 'similar_artist_spotify_id',
'itunes': 'similar_artist_itunes_id',
'deezer': 'similar_artist_deezer_id',
}.get(source)
@staticmethod
def _extract_entity_id(value: Any) -> Optional[str]:
"""Extract an ID from a dataclass, dict, or plain object."""
@ -692,7 +701,9 @@ class WatchlistScanner:
artist_id: str,
album_type: str = 'album,single,ep',
limit: int = 50,
# Only applies to Spotify currently
skip_cache: bool = True,
# Only applies to Spotify currently
max_pages: int = 0,
) -> List[Any]:
"""Fetch artist albums for a specific source, keeping Spotify strict."""
@ -1587,11 +1598,7 @@ class WatchlistScanner:
def _match_to_spotify(self, artist_name: str) -> Optional[str]:
"""Match artist name to Spotify ID using fuzzy name comparison."""
try:
# Use the authenticated spotify_client passed to the scanner,
# not get_client_for_source which creates a fresh unauthenticated instance
client = self.spotify_client
if not client or not client.is_spotify_authenticated():
client = get_client_for_source('spotify')
client = get_client_for_source('spotify')
if not client:
return None
@ -2960,17 +2967,14 @@ class WatchlistScanner:
"""
Cache recent albums from watchlist and similar artists for discover page.
Supports both Spotify and iTunes sources - iTunes is always processed (baseline),
Spotify is added when authenticated. Same pattern as discovery pool.
Uses the configured source priority and caches the first source that
can return albums for each artist.
"""
try:
from datetime import datetime, timedelta
logger.info("Caching recent albums for discover page...")
if self.spotify_client and self.spotify_client.is_rate_limited():
self._disable_spotify_for_run("global Spotify rate limit active")
# Clear existing cache for this profile
self.database.clear_discovery_recent_albums(profile_id=profile_id)
@ -2987,21 +2991,20 @@ class WatchlistScanner:
except Exception:
pass
cutoff_date = datetime.now() - timedelta(days=days_lookback)
cached_count = {'spotify': 0, 'itunes': 0, 'deezer': 0}
albums_checked = 0
# Determine available sources
spotify_available = self._spotify_is_primary_source()
discovery_sources = self._discovery_source_priority()
if not discovery_sources:
logger.warning("No music sources available to cache recent albums")
return
# Get fallback metadata client (iTunes or Deezer)
itunes_client, fallback_source = _get_fallback_metadata_client()
cached_count = {source: 0 for source in discovery_sources}
albums_checked = 0
# Get artists to check (scoped to profile)
watchlist_artists = self.database.get_watchlist_artists(profile_id=profile_id)
similar_artists = self.database.get_top_similar_artists(limit=50, profile_id=profile_id)
# We only need a modest sample here; this path fans out into per-source album lookups.
similar_artists = self.database.get_top_similar_artists(limit=25, profile_id=profile_id)
logger.info(f"Checking albums from {len(watchlist_artists)} watchlist + {len(similar_artists)} similar artists")
logger.info(f"Sources: Spotify={spotify_available}, {fallback_source}=True")
def process_album(album, artist_name, artist_spotify_id, artist_itunes_id, source, artist_deezer_id=None):
"""Helper to process and cache a single album"""
@ -3047,118 +3050,137 @@ class WatchlistScanner:
# Process watchlist artists
for artist in watchlist_artists:
# Always process fallback source (iTunes or Deezer) as baseline
fallback_id = artist.itunes_artist_id if fallback_source == 'itunes' else artist.deezer_artist_id
if not fallback_id:
# Try to resolve fallback ID on-the-fly (with retry for rate limiting)
try:
results = itunes_api_call_with_retry(
itunes_client.search_artists, artist.artist_name, limit=1
)
if results and len(results) > 0:
fallback_id = results[0].id
fallback_resolved += 1
logger.debug(f"[{fallback_source}] Resolved ID for {artist.artist_name}: {fallback_id}")
else:
fallback_failed_resolve += 1
logger.info(f"[{fallback_source}] No artist found for: {artist.artist_name}")
except Exception as e:
fallback_failed_resolve += 1
logger.info(f"[{fallback_source}] Failed to resolve {artist.artist_name}: {e}")
selected_source = None
selected_artist_id = None
selected_albums = []
selected_watchlist_id = None
for source in discovery_sources:
source_attr = self._artist_id_attribute_for_source(source)
stored_id = getattr(artist, source_attr, None) if source_attr else None
cache_callback = None
if source == 'spotify':
cache_callback = lambda found_id, watchlist_id=artist.id: self._cache_watchlist_artist_source_id(artist, 'spotify', found_id)
elif source == 'itunes':
cache_callback = lambda found_id, watchlist_id=artist.id: self._cache_watchlist_artist_source_id(artist, 'itunes', found_id)
elif source == 'deezer':
cache_callback = lambda found_id, watchlist_id=artist.id: self._cache_watchlist_artist_source_id(artist, 'deezer', found_id)
artist_id = self._resolve_artist_id_for_source(
source,
artist.artist_name,
stored_id=stored_id,
cache_callback=cache_callback,
)
if not artist_id:
continue
if fallback_id:
try:
albums = itunes_api_call_with_retry(
itunes_client.get_artist_albums, fallback_id, album_type='album,single,ep', limit=20
)
for album in albums or []:
process_album(
album, artist.artist_name, artist.spotify_artist_id,
fallback_id if fallback_source == 'itunes' else None,
fallback_source,
artist_deezer_id=fallback_id if fallback_source == 'deezer' else None
)
except Exception as e:
logger.info(f"[{fallback_source}] Error fetching albums for {artist.artist_name}: {e}")
albums = self._get_artist_albums_for_source(
source,
artist_id,
album_type='album,single,ep',
limit=20,
skip_cache=True,
max_pages=2,
)
if not albums:
logger.debug(f"No recent albums found for {artist.artist_name} on {source}")
continue
# Process Spotify if authenticated
if spotify_available and artist.spotify_artist_id:
try:
albums = self.spotify_client.get_artist_albums(
artist.spotify_artist_id,
album_type='album,single,ep',
limit=20,
skip_cache=True,
max_pages=2,
)
for album in albums or []:
process_album(album, artist.artist_name, artist.spotify_artist_id, fallback_id if fallback_source == 'itunes' else None, 'spotify')
except Exception as e:
logger.debug(f"Error fetching Spotify albums for {artist.artist_name}: {e}")
selected_source = source
selected_artist_id = artist_id
selected_albums = albums
if source == 'spotify':
selected_watchlist_id = artist_id
elif source == 'itunes':
selected_watchlist_id = artist.itunes_artist_id or artist_id
elif source == 'deezer':
selected_watchlist_id = getattr(artist, 'deezer_artist_id', None) or artist_id
break
if not selected_source or not selected_artist_id or not selected_albums:
time.sleep(DELAY_BETWEEN_ARTISTS)
continue
for album in selected_albums:
process_album(
album,
artist.artist_name,
selected_watchlist_id if selected_source == 'spotify' else artist.spotify_artist_id,
selected_watchlist_id if selected_source == 'itunes' else None,
selected_source,
artist_deezer_id=selected_watchlist_id if selected_source == 'deezer' else None,
)
time.sleep(DELAY_BETWEEN_ARTISTS)
# Process similar artists
for artist in similar_artists:
# Always process fallback source (iTunes or Deezer) as baseline
fallback_id = artist.similar_artist_itunes_id if fallback_source == 'itunes' else getattr(artist, 'similar_artist_deezer_id', None)
if not fallback_id:
# Try to resolve fallback ID on-the-fly (with retry for rate limiting)
try:
results = itunes_api_call_with_retry(
itunes_client.search_artists, artist.similar_artist_name, limit=1
)
if results and len(results) > 0:
fallback_id = results[0].id
# Cache for future
if fallback_source == 'deezer':
self.database.update_similar_artist_deezer_id(artist.id, fallback_id)
else:
self.database.update_similar_artist_itunes_id(artist.id, fallback_id)
fallback_resolved += 1
logger.debug(f"[{fallback_source}] Resolved ID for similar artist {artist.similar_artist_name}: {fallback_id}")
else:
fallback_failed_resolve += 1
logger.info(f"[{fallback_source}] No artist found for similar: {artist.similar_artist_name}")
except Exception as e:
fallback_failed_resolve += 1
logger.info(f"[{fallback_source}] Failed to resolve similar {artist.similar_artist_name}: {e}")
selected_source = None
selected_artist_id = None
selected_albums = []
selected_similar_id = None
for source in discovery_sources:
source_attr = self._similar_artist_id_attribute_for_source(source)
stored_id = getattr(artist, source_attr, None) if source_attr else None
cache_callback = None
if source == 'itunes':
cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_itunes_id(similar_id, found_id)
elif source == 'deezer':
cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_deezer_id(similar_id, found_id)
artist_id = self._resolve_artist_id_for_source(
source,
artist.similar_artist_name,
stored_id=stored_id,
cache_callback=cache_callback,
)
if not artist_id:
continue
if fallback_id:
try:
albums = itunes_api_call_with_retry(
itunes_client.get_artist_albums, fallback_id, album_type='album,single,ep', limit=20
)
for album in albums or []:
process_album(
album, artist.similar_artist_name, artist.similar_artist_spotify_id,
fallback_id if fallback_source == 'itunes' else None,
fallback_source,
artist_deezer_id=fallback_id if fallback_source == 'deezer' else None
)
except Exception as e:
logger.info(f"[{fallback_source}] Error fetching albums for similar {artist.similar_artist_name}: {e}")
albums = self._get_artist_albums_for_source(
source,
artist_id,
album_type='album,single,ep',
limit=20,
skip_cache=True,
max_pages=2,
)
if not albums:
logger.debug(f"No recent albums found for similar {artist.similar_artist_name} on {source}")
continue
# Process Spotify if authenticated
if spotify_available and artist.similar_artist_spotify_id:
try:
albums = self.spotify_client.get_artist_albums(
artist.similar_artist_spotify_id,
album_type='album,single,ep',
limit=20,
skip_cache=True,
max_pages=2,
)
for album in albums or []:
process_album(album, artist.similar_artist_name, artist.similar_artist_spotify_id, fallback_id if fallback_source == 'itunes' else None, 'spotify')
except Exception as e:
logger.debug(f"Error fetching Spotify albums for {artist.similar_artist_name}: {e}")
selected_source = source
selected_artist_id = artist_id
selected_albums = albums
if source == 'spotify':
selected_similar_id = artist_id
elif source == 'itunes':
selected_similar_id = artist.similar_artist_itunes_id or artist_id
elif source == 'deezer':
selected_similar_id = getattr(artist, 'similar_artist_deezer_id', None) or artist_id
break
if not selected_source or not selected_artist_id or not selected_albums:
time.sleep(DELAY_BETWEEN_ARTISTS)
continue
for album in selected_albums:
process_album(
album,
artist.similar_artist_name,
selected_similar_id if selected_source == 'spotify' else artist.similar_artist_spotify_id,
selected_similar_id if selected_source == 'itunes' else None,
selected_source,
artist_deezer_id=selected_similar_id if selected_source == 'deezer' else None,
)
time.sleep(DELAY_BETWEEN_ARTISTS)
total_cached = cached_count['spotify'] + cached_count.get(fallback_source, 0)
logger.info(f"Cached {total_cached} recent albums (Spotify: {cached_count['spotify']}, {fallback_source}: {cached_count.get(fallback_source, 0)}) from {albums_checked} albums checked")
logger.info(f"[{fallback_source}] ID resolution stats: {fallback_resolved} resolved, {fallback_failed_resolve} failed")
total_cached = sum(cached_count.values())
logger.info(f"Cached {total_cached} recent albums from {albums_checked} albums checked")
logger.info(f"Recent albums ID resolution stats: {fallback_resolved} resolved, {fallback_failed_resolve} failed")
except Exception as e:
logger.error(f"Error caching discovery recent albums: {e}")
@ -3205,7 +3227,8 @@ class WatchlistScanner:
"""
Curate consistent playlist selections that stay the same until next discovery pool update.
Supports both Spotify and iTunes sources - creates separate curated playlists for each.
Supports the discovery metadata sources in priority order and creates
separate curated playlists for each source.
- Release Radar: Prioritizes freshness + popularity from recent releases
- Discovery Weekly: Balanced mix of popular picks, deep cuts, and mid-tier tracks
@ -3217,9 +3240,6 @@ class WatchlistScanner:
logger.info("Curating discovery playlists...")
if self.spotify_client and self.spotify_client.is_rate_limited():
self._disable_spotify_for_run("global Spotify rate limit active")
# Build listening profile for personalization
profile = self._get_listening_profile(profile_id)
if profile['has_data']:
@ -3228,13 +3248,10 @@ class WatchlistScanner:
f"{profile['avg_daily_plays']:.1f} avg daily plays")
# Determine available sources
spotify_available = self._spotify_is_primary_source()
itunes_client, fallback_source = _get_fallback_metadata_client()
# Process each available source
sources_to_process = [fallback_source] # Fallback source (iTunes/Deezer) always available
if spotify_available:
sources_to_process.append('spotify')
sources_to_process = self._discovery_source_priority()
if not sources_to_process:
logger.warning("No discovery sources available to curate playlists")
return
# Pre-build artist genre cache from local DB for genre affinity scoring
_artist_genre_cache = {}
@ -3287,7 +3304,7 @@ class WatchlistScanner:
for album in albums:
try:
# Get album data from appropriate source
# Get album data from the same source that won discovery
if source == 'spotify':
album_id = album.get('album_spotify_id')
elif source == 'deezer':
@ -3297,12 +3314,7 @@ class WatchlistScanner:
if not album_id:
continue
if source == 'spotify':
album_data = self.spotify_client.get_album(album_id)
else:
album_data = itunes_api_call_with_retry(
itunes_client.get_album, album_id
)
album_data = self._get_album_data_for_source(source, album_id, album_name=album.get('album_name', ''))
if not album_data or 'tracks' not in album_data:
continue
@ -3502,11 +3514,19 @@ class WatchlistScanner:
if profile['has_data']:
logger.info("Building 'Because You Listen To' playlists...")
top_played = self.database.get_top_artists('30d', 3)
active_source_for_bylt = 'spotify' if spotify_available else fallback_source
all_pool_tracks = self.database.get_discovery_pool_tracks(
limit=2000, new_releases_only=False,
source=active_source_for_bylt, profile_id=profile_id
)
active_source_for_bylt = None
all_pool_tracks = []
for candidate_source in sources_to_process:
all_pool_tracks = self.database.get_discovery_pool_tracks(
limit=2000, new_releases_only=False,
source=candidate_source, profile_id=profile_id
)
if all_pool_tracks:
active_source_for_bylt = candidate_source
break
if not active_source_for_bylt:
logger.warning("No discovery pool tracks found for Because You Listen To")
all_pool_tracks = []
# Build source_artist_id → artist_name mapping from watchlist
_wa_id_to_name = {}

@ -125,6 +125,7 @@ class _FakeDB:
self.similar_calls = []
self.discovery_pool_calls = []
self.discovery_pool_timestamp_calls = []
self.discovery_recent_calls = []
self.db_albums = []
def get_watchlist_artists(self, profile_id=None):
@ -144,6 +145,13 @@ class _FakeDB:
self.discovery_pool_calls.append((track_data, source, profile_id))
return True
def clear_discovery_recent_albums(self, profile_id=1):
return True
def cache_discovery_recent_album(self, album_data, source='spotify', profile_id=1):
self.discovery_recent_calls.append((album_data, source, profile_id))
return True
def cleanup_old_discovery_tracks(self, days_threshold=365):
return 0
@ -718,6 +726,193 @@ def test_populate_discovery_pool_uses_strict_spotify_for_database_album_search(m
)
def test_cache_discovery_recent_albums_uses_primary_source_first(monkeypatch):
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
monkeypatch.setattr(watchlist_scanner_module, "time", types.SimpleNamespace(sleep=lambda *_args, **_kwargs: None))
monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer")
monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"])
artist = _build_artist("Artist One")
album = types.SimpleNamespace(
id="dz-album-1",
name="Recent Deezer Album",
album_type="album",
release_date="2026-04-01",
image_url="https://example.com/deezer-album.jpg",
)
deezer_client = _FakeSourceClient(
artist_id="dz-artist",
albums=[album],
image_url="https://example.com/deezer-artist.jpg",
)
spotify_client = _FakeSourceClient(
artist_id="sp-artist",
albums=[types.SimpleNamespace(id="sp-album-1", name="Spotify Album", album_type="album")],
image_url="https://example.com/spotify-artist.jpg",
)
def fake_get_client_for_source(source):
return {
"deezer": deezer_client,
"spotify": spotify_client,
}.get(source)
monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source)
scanner = _build_scanner({"tracks": {"items": []}}, [artist])
scanner.database.get_top_similar_artists = lambda limit=50, profile_id=1: []
scanner.cache_discovery_recent_albums(profile_id=1)
assert scanner.database.discovery_recent_calls
assert scanner.database.discovery_recent_calls[0][1] == "deezer"
assert deezer_client.album_calls
assert spotify_client.search_calls == []
assert spotify_client.album_calls == []
def test_cache_discovery_recent_albums_falls_back_to_spotify_when_primary_has_no_albums(monkeypatch):
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
monkeypatch.setattr(watchlist_scanner_module, "time", types.SimpleNamespace(sleep=lambda *_args, **_kwargs: None))
monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer")
monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"])
artist = _build_artist("Fallback Artist")
artist.spotify_artist_id = None
deezer_client = _FakeSourceClient(
artist_id="dz-artist",
albums=[],
image_url="https://example.com/deezer-artist.jpg",
)
spotify_album = types.SimpleNamespace(
id="sp-album-1",
name="Spotify Recent Album",
album_type="album",
release_date="2026-04-01",
image_url="https://example.com/spotify-album.jpg",
)
spotify_client = _FakeSourceClient(
artist_id="sp-artist",
albums=[spotify_album],
image_url="https://example.com/spotify-artist.jpg",
album_payload={
"id": "sp-album-1",
"name": "Spotify Recent Album",
"images": [{"url": "https://example.com/spotify-album.jpg"}],
"release_date": "2026-04-01",
"popularity": 50,
"tracks": {"items": [{"id": "sp-track-1", "name": "Spotify Track", "artists": [{"name": "Fallback Artist"}]}]},
"artists": [{"id": "sp-artist"}],
},
)
def fake_get_client_for_source(source):
return {
"deezer": deezer_client,
"spotify": spotify_client,
}.get(source)
monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source)
scanner = _build_scanner({"tracks": {"items": []}}, [artist])
scanner.database.get_top_similar_artists = lambda limit=50, profile_id=1: []
scanner.cache_discovery_recent_albums(profile_id=1)
assert scanner.database.discovery_recent_calls
assert scanner.database.discovery_recent_calls[0][1] == "spotify"
assert deezer_client.album_calls
assert spotify_client.search_calls == [("Fallback Artist", 1, {"allow_fallback": False})]
assert spotify_client.album_calls
def test_curate_discovery_playlists_uses_source_priority_for_recent_albums(monkeypatch):
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer")
monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"])
artist = _build_artist("Playlist Artist")
scanner = _build_scanner({"tracks": {"items": []}}, [artist])
saved_playlists = []
recent_album = {
"album_deezer_id": "dz-album-1",
"album_itunes_id": None,
"album_spotify_id": None,
"album_name": "Recent Deezer Album",
"artist_name": "Playlist Artist",
"release_date": "2026-04-01",
"album_type": "album",
"album_cover_url": "https://example.com/deezer-album.jpg",
"artist_deezer_id": "dz-artist",
"artist_spotify_id": None,
"artist_itunes_id": None,
}
discovery_track = types.SimpleNamespace(
artist_name="Playlist Artist",
popularity=72,
deezer_track_id="dz-track-1",
spotify_track_id=None,
itunes_track_id=None,
)
deezer_client = _FakeSourceClient(
artist_id="dz-artist",
albums=[],
image_url="https://example.com/deezer-artist.jpg",
album_payload={
"id": "dz-album-1",
"name": "Recent Deezer Album",
"images": [{"url": "https://example.com/deezer-album.jpg"}],
"release_date": "2026-04-01",
"popularity": 40,
"tracks": {"items": [{"id": "dz-track-1", "name": "Track One", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]},
"artists": [{"id": "dz-artist"}],
},
)
spotify_client = _FakeSourceClient(
artist_id="sp-artist",
albums=[],
image_url="https://example.com/spotify-artist.jpg",
album_payload={
"id": "sp-album-1",
"name": "Spotify Album",
"images": [{"url": "https://example.com/spotify-album.jpg"}],
"release_date": "2026-04-01",
"popularity": 60,
"tracks": {"items": [{"id": "sp-track-1", "name": "Spotify Track", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]},
"artists": [{"id": "sp-artist"}],
},
)
def fake_get_client_for_source(source):
return {
"deezer": deezer_client,
"spotify": spotify_client,
}.get(source)
monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source)
monkeypatch.setattr(scanner, "_get_listening_profile", lambda profile_id: {
"has_data": False,
"top_artist_names": set(),
"top_genres": set(),
"avg_daily_plays": 0.0,
"artist_play_counts": {},
})
monkeypatch.setattr(scanner.database, "get_discovery_recent_albums", lambda limit, source, profile_id: [recent_album] if source == "deezer" else [], raising=False)
monkeypatch.setattr(scanner.database, "get_discovery_pool_tracks", lambda *args, **kwargs: [discovery_track] if kwargs.get("source") == "deezer" else [], raising=False)
monkeypatch.setattr(scanner.database, "save_curated_playlist", lambda key, tracks, profile_id=1: saved_playlists.append((key, list(tracks))) or True, raising=False)
monkeypatch.setattr(scanner.database, "get_top_artists", lambda *args, **kwargs: [], raising=False)
monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda *args, **kwargs: [], raising=False)
scanner.curate_discovery_playlists(profile_id=1)
assert any(call[0] == "dz-album-1" for call in deezer_client.album_calls)
assert spotify_client.album_calls == []
assert any(key == "release_radar_deezer" for key, _ in saved_playlists)
assert any(key == "discovery_weekly_deezer" for key, _ in saved_playlists)
def test_match_to_spotify_uses_strict_lookup():
spotify_client = _FakeSpotifyClient(
search_results=[types.SimpleNamespace(id="fallback-id", name="Artist One")]

Loading…
Cancel
Save