diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 02326956..1d3f461f 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -469,6 +469,15 @@ class WatchlistScanner: 'discogs': 'discogs_artist_id', }.get(source) + @staticmethod + def _similar_artist_id_attribute_for_source(source: str) -> Optional[str]: + """Return the similar-artist attribute that stores the given source ID.""" + return { + 'spotify': 'similar_artist_spotify_id', + 'itunes': 'similar_artist_itunes_id', + 'deezer': 'similar_artist_deezer_id', + }.get(source) + @staticmethod def _extract_entity_id(value: Any) -> Optional[str]: """Extract an ID from a dataclass, dict, or plain object.""" @@ -692,7 +701,9 @@ class WatchlistScanner: artist_id: str, album_type: str = 'album,single,ep', limit: int = 50, + # Only applies to Spotify currently skip_cache: bool = True, + # Only applies to Spotify currently max_pages: int = 0, ) -> List[Any]: """Fetch artist albums for a specific source, keeping Spotify strict.""" @@ -1587,11 +1598,7 @@ class WatchlistScanner: def _match_to_spotify(self, artist_name: str) -> Optional[str]: """Match artist name to Spotify ID using fuzzy name comparison.""" try: - # Use the authenticated spotify_client passed to the scanner, - # not get_client_for_source which creates a fresh unauthenticated instance - client = self.spotify_client - if not client or not client.is_spotify_authenticated(): - client = get_client_for_source('spotify') + client = get_client_for_source('spotify') if not client: return None @@ -2960,17 +2967,14 @@ class WatchlistScanner: """ Cache recent albums from watchlist and similar artists for discover page. - Supports both Spotify and iTunes sources - iTunes is always processed (baseline), - Spotify is added when authenticated. Same pattern as discovery pool. + Uses the configured source priority and caches the first source that + can return albums for each artist. """ try: from datetime import datetime, timedelta logger.info("Caching recent albums for discover page...") - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - # Clear existing cache for this profile self.database.clear_discovery_recent_albums(profile_id=profile_id) @@ -2987,21 +2991,20 @@ class WatchlistScanner: except Exception: pass cutoff_date = datetime.now() - timedelta(days=days_lookback) - cached_count = {'spotify': 0, 'itunes': 0, 'deezer': 0} - albums_checked = 0 - - # Determine available sources - spotify_available = self._spotify_is_primary_source() + discovery_sources = self._discovery_source_priority() + if not discovery_sources: + logger.warning("No music sources available to cache recent albums") + return - # Get fallback metadata client (iTunes or Deezer) - itunes_client, fallback_source = _get_fallback_metadata_client() + cached_count = {source: 0 for source in discovery_sources} + albums_checked = 0 # Get artists to check (scoped to profile) watchlist_artists = self.database.get_watchlist_artists(profile_id=profile_id) - similar_artists = self.database.get_top_similar_artists(limit=50, profile_id=profile_id) + # We only need a modest sample here; this path fans out into per-source album lookups. + similar_artists = self.database.get_top_similar_artists(limit=25, profile_id=profile_id) logger.info(f"Checking albums from {len(watchlist_artists)} watchlist + {len(similar_artists)} similar artists") - logger.info(f"Sources: Spotify={spotify_available}, {fallback_source}=True") def process_album(album, artist_name, artist_spotify_id, artist_itunes_id, source, artist_deezer_id=None): """Helper to process and cache a single album""" @@ -3047,118 +3050,137 @@ class WatchlistScanner: # Process watchlist artists for artist in watchlist_artists: - # Always process fallback source (iTunes or Deezer) as baseline - fallback_id = artist.itunes_artist_id if fallback_source == 'itunes' else artist.deezer_artist_id - if not fallback_id: - # Try to resolve fallback ID on-the-fly (with retry for rate limiting) - try: - results = itunes_api_call_with_retry( - itunes_client.search_artists, artist.artist_name, limit=1 - ) - if results and len(results) > 0: - fallback_id = results[0].id - fallback_resolved += 1 - logger.debug(f"[{fallback_source}] Resolved ID for {artist.artist_name}: {fallback_id}") - else: - fallback_failed_resolve += 1 - logger.info(f"[{fallback_source}] No artist found for: {artist.artist_name}") - except Exception as e: - fallback_failed_resolve += 1 - logger.info(f"[{fallback_source}] Failed to resolve {artist.artist_name}: {e}") + selected_source = None + selected_artist_id = None + selected_albums = [] + selected_watchlist_id = None + + for source in discovery_sources: + source_attr = self._artist_id_attribute_for_source(source) + stored_id = getattr(artist, source_attr, None) if source_attr else None + cache_callback = None + if source == 'spotify': + cache_callback = lambda found_id, watchlist_id=artist.id: self._cache_watchlist_artist_source_id(artist, 'spotify', found_id) + elif source == 'itunes': + cache_callback = lambda found_id, watchlist_id=artist.id: self._cache_watchlist_artist_source_id(artist, 'itunes', found_id) + elif source == 'deezer': + cache_callback = lambda found_id, watchlist_id=artist.id: self._cache_watchlist_artist_source_id(artist, 'deezer', found_id) + + artist_id = self._resolve_artist_id_for_source( + source, + artist.artist_name, + stored_id=stored_id, + cache_callback=cache_callback, + ) + if not artist_id: + continue - if fallback_id: - try: - albums = itunes_api_call_with_retry( - itunes_client.get_artist_albums, fallback_id, album_type='album,single,ep', limit=20 - ) - for album in albums or []: - process_album( - album, artist.artist_name, artist.spotify_artist_id, - fallback_id if fallback_source == 'itunes' else None, - fallback_source, - artist_deezer_id=fallback_id if fallback_source == 'deezer' else None - ) - except Exception as e: - logger.info(f"[{fallback_source}] Error fetching albums for {artist.artist_name}: {e}") + albums = self._get_artist_albums_for_source( + source, + artist_id, + album_type='album,single,ep', + limit=20, + skip_cache=True, + max_pages=2, + ) + if not albums: + logger.debug(f"No recent albums found for {artist.artist_name} on {source}") + continue - # Process Spotify if authenticated - if spotify_available and artist.spotify_artist_id: - try: - albums = self.spotify_client.get_artist_albums( - artist.spotify_artist_id, - album_type='album,single,ep', - limit=20, - skip_cache=True, - max_pages=2, - ) - for album in albums or []: - process_album(album, artist.artist_name, artist.spotify_artist_id, fallback_id if fallback_source == 'itunes' else None, 'spotify') - except Exception as e: - logger.debug(f"Error fetching Spotify albums for {artist.artist_name}: {e}") + selected_source = source + selected_artist_id = artist_id + selected_albums = albums + if source == 'spotify': + selected_watchlist_id = artist_id + elif source == 'itunes': + selected_watchlist_id = artist.itunes_artist_id or artist_id + elif source == 'deezer': + selected_watchlist_id = getattr(artist, 'deezer_artist_id', None) or artist_id + break + + if not selected_source or not selected_artist_id or not selected_albums: + time.sleep(DELAY_BETWEEN_ARTISTS) + continue + + for album in selected_albums: + process_album( + album, + artist.artist_name, + selected_watchlist_id if selected_source == 'spotify' else artist.spotify_artist_id, + selected_watchlist_id if selected_source == 'itunes' else None, + selected_source, + artist_deezer_id=selected_watchlist_id if selected_source == 'deezer' else None, + ) time.sleep(DELAY_BETWEEN_ARTISTS) # Process similar artists for artist in similar_artists: - # Always process fallback source (iTunes or Deezer) as baseline - fallback_id = artist.similar_artist_itunes_id if fallback_source == 'itunes' else getattr(artist, 'similar_artist_deezer_id', None) - if not fallback_id: - # Try to resolve fallback ID on-the-fly (with retry for rate limiting) - try: - results = itunes_api_call_with_retry( - itunes_client.search_artists, artist.similar_artist_name, limit=1 - ) - if results and len(results) > 0: - fallback_id = results[0].id - # Cache for future - if fallback_source == 'deezer': - self.database.update_similar_artist_deezer_id(artist.id, fallback_id) - else: - self.database.update_similar_artist_itunes_id(artist.id, fallback_id) - fallback_resolved += 1 - logger.debug(f"[{fallback_source}] Resolved ID for similar artist {artist.similar_artist_name}: {fallback_id}") - else: - fallback_failed_resolve += 1 - logger.info(f"[{fallback_source}] No artist found for similar: {artist.similar_artist_name}") - except Exception as e: - fallback_failed_resolve += 1 - logger.info(f"[{fallback_source}] Failed to resolve similar {artist.similar_artist_name}: {e}") + selected_source = None + selected_artist_id = None + selected_albums = [] + selected_similar_id = None + + for source in discovery_sources: + source_attr = self._similar_artist_id_attribute_for_source(source) + stored_id = getattr(artist, source_attr, None) if source_attr else None + cache_callback = None + if source == 'itunes': + cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_itunes_id(similar_id, found_id) + elif source == 'deezer': + cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_deezer_id(similar_id, found_id) + + artist_id = self._resolve_artist_id_for_source( + source, + artist.similar_artist_name, + stored_id=stored_id, + cache_callback=cache_callback, + ) + if not artist_id: + continue - if fallback_id: - try: - albums = itunes_api_call_with_retry( - itunes_client.get_artist_albums, fallback_id, album_type='album,single,ep', limit=20 - ) - for album in albums or []: - process_album( - album, artist.similar_artist_name, artist.similar_artist_spotify_id, - fallback_id if fallback_source == 'itunes' else None, - fallback_source, - artist_deezer_id=fallback_id if fallback_source == 'deezer' else None - ) - except Exception as e: - logger.info(f"[{fallback_source}] Error fetching albums for similar {artist.similar_artist_name}: {e}") + albums = self._get_artist_albums_for_source( + source, + artist_id, + album_type='album,single,ep', + limit=20, + skip_cache=True, + max_pages=2, + ) + if not albums: + logger.debug(f"No recent albums found for similar {artist.similar_artist_name} on {source}") + continue - # Process Spotify if authenticated - if spotify_available and artist.similar_artist_spotify_id: - try: - albums = self.spotify_client.get_artist_albums( - artist.similar_artist_spotify_id, - album_type='album,single,ep', - limit=20, - skip_cache=True, - max_pages=2, - ) - for album in albums or []: - process_album(album, artist.similar_artist_name, artist.similar_artist_spotify_id, fallback_id if fallback_source == 'itunes' else None, 'spotify') - except Exception as e: - logger.debug(f"Error fetching Spotify albums for {artist.similar_artist_name}: {e}") + selected_source = source + selected_artist_id = artist_id + selected_albums = albums + if source == 'spotify': + selected_similar_id = artist_id + elif source == 'itunes': + selected_similar_id = artist.similar_artist_itunes_id or artist_id + elif source == 'deezer': + selected_similar_id = getattr(artist, 'similar_artist_deezer_id', None) or artist_id + break + + if not selected_source or not selected_artist_id or not selected_albums: + time.sleep(DELAY_BETWEEN_ARTISTS) + continue + + for album in selected_albums: + process_album( + album, + artist.similar_artist_name, + selected_similar_id if selected_source == 'spotify' else artist.similar_artist_spotify_id, + selected_similar_id if selected_source == 'itunes' else None, + selected_source, + artist_deezer_id=selected_similar_id if selected_source == 'deezer' else None, + ) time.sleep(DELAY_BETWEEN_ARTISTS) - total_cached = cached_count['spotify'] + cached_count.get(fallback_source, 0) - logger.info(f"Cached {total_cached} recent albums (Spotify: {cached_count['spotify']}, {fallback_source}: {cached_count.get(fallback_source, 0)}) from {albums_checked} albums checked") - logger.info(f"[{fallback_source}] ID resolution stats: {fallback_resolved} resolved, {fallback_failed_resolve} failed") + total_cached = sum(cached_count.values()) + logger.info(f"Cached {total_cached} recent albums from {albums_checked} albums checked") + logger.info(f"Recent albums ID resolution stats: {fallback_resolved} resolved, {fallback_failed_resolve} failed") except Exception as e: logger.error(f"Error caching discovery recent albums: {e}") @@ -3205,7 +3227,8 @@ class WatchlistScanner: """ Curate consistent playlist selections that stay the same until next discovery pool update. - Supports both Spotify and iTunes sources - creates separate curated playlists for each. + Supports the discovery metadata sources in priority order and creates + separate curated playlists for each source. - Release Radar: Prioritizes freshness + popularity from recent releases - Discovery Weekly: Balanced mix of popular picks, deep cuts, and mid-tier tracks @@ -3217,9 +3240,6 @@ class WatchlistScanner: logger.info("Curating discovery playlists...") - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - # Build listening profile for personalization profile = self._get_listening_profile(profile_id) if profile['has_data']: @@ -3228,13 +3248,10 @@ class WatchlistScanner: f"{profile['avg_daily_plays']:.1f} avg daily plays") # Determine available sources - spotify_available = self._spotify_is_primary_source() - itunes_client, fallback_source = _get_fallback_metadata_client() - - # Process each available source - sources_to_process = [fallback_source] # Fallback source (iTunes/Deezer) always available - if spotify_available: - sources_to_process.append('spotify') + sources_to_process = self._discovery_source_priority() + if not sources_to_process: + logger.warning("No discovery sources available to curate playlists") + return # Pre-build artist genre cache from local DB for genre affinity scoring _artist_genre_cache = {} @@ -3287,7 +3304,7 @@ class WatchlistScanner: for album in albums: try: - # Get album data from appropriate source + # Get album data from the same source that won discovery if source == 'spotify': album_id = album.get('album_spotify_id') elif source == 'deezer': @@ -3297,12 +3314,7 @@ class WatchlistScanner: if not album_id: continue - if source == 'spotify': - album_data = self.spotify_client.get_album(album_id) - else: - album_data = itunes_api_call_with_retry( - itunes_client.get_album, album_id - ) + album_data = self._get_album_data_for_source(source, album_id, album_name=album.get('album_name', '')) if not album_data or 'tracks' not in album_data: continue @@ -3502,11 +3514,19 @@ class WatchlistScanner: if profile['has_data']: logger.info("Building 'Because You Listen To' playlists...") top_played = self.database.get_top_artists('30d', 3) - active_source_for_bylt = 'spotify' if spotify_available else fallback_source - all_pool_tracks = self.database.get_discovery_pool_tracks( - limit=2000, new_releases_only=False, - source=active_source_for_bylt, profile_id=profile_id - ) + active_source_for_bylt = None + all_pool_tracks = [] + for candidate_source in sources_to_process: + all_pool_tracks = self.database.get_discovery_pool_tracks( + limit=2000, new_releases_only=False, + source=candidate_source, profile_id=profile_id + ) + if all_pool_tracks: + active_source_for_bylt = candidate_source + break + if not active_source_for_bylt: + logger.warning("No discovery pool tracks found for Because You Listen To") + all_pool_tracks = [] # Build source_artist_id → artist_name mapping from watchlist _wa_id_to_name = {} diff --git a/tests/test_watchlist_scanner_scan.py b/tests/test_watchlist_scanner_scan.py index 5cc12222..a1fad3bd 100644 --- a/tests/test_watchlist_scanner_scan.py +++ b/tests/test_watchlist_scanner_scan.py @@ -125,6 +125,7 @@ class _FakeDB: self.similar_calls = [] self.discovery_pool_calls = [] self.discovery_pool_timestamp_calls = [] + self.discovery_recent_calls = [] self.db_albums = [] def get_watchlist_artists(self, profile_id=None): @@ -144,6 +145,13 @@ class _FakeDB: self.discovery_pool_calls.append((track_data, source, profile_id)) return True + def clear_discovery_recent_albums(self, profile_id=1): + return True + + def cache_discovery_recent_album(self, album_data, source='spotify', profile_id=1): + self.discovery_recent_calls.append((album_data, source, profile_id)) + return True + def cleanup_old_discovery_tracks(self, days_threshold=365): return 0 @@ -718,6 +726,193 @@ def test_populate_discovery_pool_uses_strict_spotify_for_database_album_search(m ) +def test_cache_discovery_recent_albums_uses_primary_source_first(monkeypatch): + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0) + monkeypatch.setattr(watchlist_scanner_module, "time", types.SimpleNamespace(sleep=lambda *_args, **_kwargs: None)) + monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer") + monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"]) + + artist = _build_artist("Artist One") + album = types.SimpleNamespace( + id="dz-album-1", + name="Recent Deezer Album", + album_type="album", + release_date="2026-04-01", + image_url="https://example.com/deezer-album.jpg", + ) + + deezer_client = _FakeSourceClient( + artist_id="dz-artist", + albums=[album], + image_url="https://example.com/deezer-artist.jpg", + ) + spotify_client = _FakeSourceClient( + artist_id="sp-artist", + albums=[types.SimpleNamespace(id="sp-album-1", name="Spotify Album", album_type="album")], + image_url="https://example.com/spotify-artist.jpg", + ) + + def fake_get_client_for_source(source): + return { + "deezer": deezer_client, + "spotify": spotify_client, + }.get(source) + + monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source) + + scanner = _build_scanner({"tracks": {"items": []}}, [artist]) + scanner.database.get_top_similar_artists = lambda limit=50, profile_id=1: [] + + scanner.cache_discovery_recent_albums(profile_id=1) + + assert scanner.database.discovery_recent_calls + assert scanner.database.discovery_recent_calls[0][1] == "deezer" + assert deezer_client.album_calls + assert spotify_client.search_calls == [] + assert spotify_client.album_calls == [] + + +def test_cache_discovery_recent_albums_falls_back_to_spotify_when_primary_has_no_albums(monkeypatch): + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0) + monkeypatch.setattr(watchlist_scanner_module, "time", types.SimpleNamespace(sleep=lambda *_args, **_kwargs: None)) + monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer") + monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"]) + + artist = _build_artist("Fallback Artist") + artist.spotify_artist_id = None + deezer_client = _FakeSourceClient( + artist_id="dz-artist", + albums=[], + image_url="https://example.com/deezer-artist.jpg", + ) + spotify_album = types.SimpleNamespace( + id="sp-album-1", + name="Spotify Recent Album", + album_type="album", + release_date="2026-04-01", + image_url="https://example.com/spotify-album.jpg", + ) + spotify_client = _FakeSourceClient( + artist_id="sp-artist", + albums=[spotify_album], + image_url="https://example.com/spotify-artist.jpg", + album_payload={ + "id": "sp-album-1", + "name": "Spotify Recent Album", + "images": [{"url": "https://example.com/spotify-album.jpg"}], + "release_date": "2026-04-01", + "popularity": 50, + "tracks": {"items": [{"id": "sp-track-1", "name": "Spotify Track", "artists": [{"name": "Fallback Artist"}]}]}, + "artists": [{"id": "sp-artist"}], + }, + ) + + def fake_get_client_for_source(source): + return { + "deezer": deezer_client, + "spotify": spotify_client, + }.get(source) + + monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source) + + scanner = _build_scanner({"tracks": {"items": []}}, [artist]) + scanner.database.get_top_similar_artists = lambda limit=50, profile_id=1: [] + + scanner.cache_discovery_recent_albums(profile_id=1) + + assert scanner.database.discovery_recent_calls + assert scanner.database.discovery_recent_calls[0][1] == "spotify" + assert deezer_client.album_calls + assert spotify_client.search_calls == [("Fallback Artist", 1, {"allow_fallback": False})] + assert spotify_client.album_calls + + +def test_curate_discovery_playlists_uses_source_priority_for_recent_albums(monkeypatch): + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0) + monkeypatch.setattr(watchlist_scanner_module, "get_primary_source", lambda: "deezer") + monkeypatch.setattr(watchlist_scanner_module, "get_source_priority", lambda primary: [primary, "spotify", "itunes"]) + + artist = _build_artist("Playlist Artist") + scanner = _build_scanner({"tracks": {"items": []}}, [artist]) + + saved_playlists = [] + recent_album = { + "album_deezer_id": "dz-album-1", + "album_itunes_id": None, + "album_spotify_id": None, + "album_name": "Recent Deezer Album", + "artist_name": "Playlist Artist", + "release_date": "2026-04-01", + "album_type": "album", + "album_cover_url": "https://example.com/deezer-album.jpg", + "artist_deezer_id": "dz-artist", + "artist_spotify_id": None, + "artist_itunes_id": None, + } + discovery_track = types.SimpleNamespace( + artist_name="Playlist Artist", + popularity=72, + deezer_track_id="dz-track-1", + spotify_track_id=None, + itunes_track_id=None, + ) + deezer_client = _FakeSourceClient( + artist_id="dz-artist", + albums=[], + image_url="https://example.com/deezer-artist.jpg", + album_payload={ + "id": "dz-album-1", + "name": "Recent Deezer Album", + "images": [{"url": "https://example.com/deezer-album.jpg"}], + "release_date": "2026-04-01", + "popularity": 40, + "tracks": {"items": [{"id": "dz-track-1", "name": "Track One", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]}, + "artists": [{"id": "dz-artist"}], + }, + ) + spotify_client = _FakeSourceClient( + artist_id="sp-artist", + albums=[], + image_url="https://example.com/spotify-artist.jpg", + album_payload={ + "id": "sp-album-1", + "name": "Spotify Album", + "images": [{"url": "https://example.com/spotify-album.jpg"}], + "release_date": "2026-04-01", + "popularity": 60, + "tracks": {"items": [{"id": "sp-track-1", "name": "Spotify Track", "artists": [{"name": "Playlist Artist"}], "duration_ms": 180000}]}, + "artists": [{"id": "sp-artist"}], + }, + ) + + def fake_get_client_for_source(source): + return { + "deezer": deezer_client, + "spotify": spotify_client, + }.get(source) + + monkeypatch.setattr(watchlist_scanner_module, "get_client_for_source", fake_get_client_for_source) + monkeypatch.setattr(scanner, "_get_listening_profile", lambda profile_id: { + "has_data": False, + "top_artist_names": set(), + "top_genres": set(), + "avg_daily_plays": 0.0, + "artist_play_counts": {}, + }) + monkeypatch.setattr(scanner.database, "get_discovery_recent_albums", lambda limit, source, profile_id: [recent_album] if source == "deezer" else [], raising=False) + monkeypatch.setattr(scanner.database, "get_discovery_pool_tracks", lambda *args, **kwargs: [discovery_track] if kwargs.get("source") == "deezer" else [], raising=False) + monkeypatch.setattr(scanner.database, "save_curated_playlist", lambda key, tracks, profile_id=1: saved_playlists.append((key, list(tracks))) or True, raising=False) + monkeypatch.setattr(scanner.database, "get_top_artists", lambda *args, **kwargs: [], raising=False) + monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda *args, **kwargs: [], raising=False) + + scanner.curate_discovery_playlists(profile_id=1) + + assert any(call[0] == "dz-album-1" for call in deezer_client.album_calls) + assert spotify_client.album_calls == [] + assert any(key == "release_radar_deezer" for key, _ in saved_playlists) + assert any(key == "discovery_weekly_deezer" for key, _ in saved_playlists) + + def test_match_to_spotify_uses_strict_lookup(): spotify_client = _FakeSpotifyClient( search_results=[types.SimpleNamespace(id="fallback-id", name="Artist One")]