diff --git a/core/spotify_client.py b/core/spotify_client.py index 6519550..f38a820 100644 --- a/core/spotify_client.py +++ b/core/spotify_client.py @@ -14,7 +14,18 @@ logger = get_logger("spotify_client") # Global rate limiting variables _last_api_call_time = 0 _api_call_lock = threading.Lock() -MIN_API_INTERVAL = 0.35 # 350ms between API calls (~171/min, under Spotify's ~180/min limit) +MIN_API_INTERVAL = 0.35 # Default: 350ms between API calls (~171/min, under Spotify's ~180/min limit) + +def _get_min_api_interval(): + """Get configurable API interval from settings, falling back to default.""" + try: + from config.settings import config_manager + val = config_manager.get('spotify.min_api_interval', None) + if val is not None: + return max(0.1, float(val)) # Floor at 100ms to prevent abuse + except Exception: + pass + return MIN_API_INTERVAL # Request queuing for burst handling import queue @@ -178,8 +189,23 @@ def _detect_and_set_rate_limit(exception, endpoint_name="unknown"): # Try to extract Retry-After from exception headers retry_after = None has_real_header = False - if hasattr(exception, 'headers') and exception.headers: - retry_after = exception.headers.get('Retry-After') or exception.headers.get('retry-after') + + # Method 1: SpotifyException.headers (set by spotipy with retries=0) + exc_headers = getattr(exception, 'headers', None) + if exc_headers and hasattr(exc_headers, 'get'): + retry_after = exc_headers.get('Retry-After') or exc_headers.get('retry-after') + if retry_after: + logger.info(f"Extracted Retry-After from exception headers: {retry_after}") + else: + logger.debug(f"Exception has headers but no Retry-After key. Headers type: {type(exc_headers).__name__}, keys: {list(exc_headers.keys())[:10] if hasattr(exc_headers, 'keys') else 'N/A'}") + + # Method 2: Parse from error message (some spotipy versions embed it) + if not retry_after: + import re + ra_match = re.search(r'[Rr]etry[- ][Aa]fter[:\s]+(\d+)', error_str) + if ra_match: + retry_after = ra_match.group(1) + logger.info(f"Extracted Retry-After from error message: {retry_after}") if retry_after: try: @@ -224,13 +250,14 @@ def rate_limited(func): if _is_globally_rate_limited(): raise SpotifyRateLimitError(0, func.__name__) - # Enforce minimum interval between API calls + # Enforce minimum interval between API calls (configurable via settings) + _interval = _get_min_api_interval() with _api_call_lock: current_time = time.time() time_since_last_call = current_time - _last_api_call_time - if time_since_last_call < MIN_API_INTERVAL: - sleep_time = MIN_API_INTERVAL - time_since_last_call + if time_since_last_call < _interval: + sleep_time = _interval - time_since_last_call time.sleep(sleep_time) _last_api_call_time = time.time() @@ -681,8 +708,9 @@ class SpotifyClient: if results['next']: with _api_call_lock: elapsed = time.time() - _last_api_call_time - if elapsed < MIN_API_INTERVAL: - time.sleep(MIN_API_INTERVAL - elapsed) + _pi = _get_min_api_interval() + if elapsed < _pi: + time.sleep(_pi - elapsed) globals()['_last_api_call_time'] = time.time() from core.api_call_tracker import api_call_tracker api_call_tracker.record_call('spotify', endpoint='get_user_playlists_page') @@ -960,8 +988,9 @@ class SpotifyClient: if results['next']: with _api_call_lock: elapsed = time.time() - _last_api_call_time - if elapsed < MIN_API_INTERVAL: - time.sleep(MIN_API_INTERVAL - elapsed) + _pi = _get_min_api_interval() + if elapsed < _pi: + time.sleep(_pi - elapsed) globals()['_last_api_call_time'] = time.time() from core.api_call_tracker import api_call_tracker api_call_tracker.record_call('spotify', endpoint='get_playlist_tracks_page') @@ -1304,8 +1333,9 @@ class SpotifyClient: while next_page.get('next'): with _api_call_lock: elapsed = time.time() - _last_api_call_time - if elapsed < MIN_API_INTERVAL: - time.sleep(MIN_API_INTERVAL - elapsed) + _pi = _get_min_api_interval() + if elapsed < _pi: + time.sleep(_pi - elapsed) globals()['_last_api_call_time'] = time.time() from core.api_call_tracker import api_call_tracker api_call_tracker.record_call('spotify', endpoint='get_album_tracks_page') @@ -1351,9 +1381,11 @@ class SpotifyClient: return None @rate_limited - def get_artist_albums(self, artist_id: str, album_type: str = 'album,single', limit: int = 10, skip_cache: bool = False) -> List[Album]: + def get_artist_albums(self, artist_id: str, album_type: str = 'album,single', limit: int = 10, skip_cache: bool = False, max_pages: int = 0) -> List[Album]: """Get albums by artist ID - falls back to iTunes if Spotify not authenticated. - Set skip_cache=True for watchlist scans that need fresh data to detect new releases.""" + Set skip_cache=True for watchlist scans that need fresh data to detect new releases. + Set max_pages to limit pagination (0 = fetch all). Spotify returns newest first, + so max_pages=1 is sufficient for new release detection.""" cache = get_metadata_cache() fallback_src = self._fallback_source source = fallback_src if self._is_itunes_id(artist_id) else 'spotify' @@ -1373,7 +1405,9 @@ class SpotifyClient: try: albums = [] raw_items = [] + # Spotify caps artist_albums at 10 per page results = self.sp.artist_albums(artist_id, album_type=album_type, limit=min(limit, 10)) + pages_fetched = 1 while results: for album_data in results['items']: @@ -1381,21 +1415,28 @@ class SpotifyClient: albums.append(album) raw_items.append(album_data) + # Stop if we've hit the page limit (0 = unlimited) + if max_pages and pages_fetched >= max_pages: + break + # Get next batch if available — throttle pagination to respect rate limits if results['next']: # Enforce same rate limit as decorated calls with _api_call_lock: elapsed = time.time() - _last_api_call_time - if elapsed < MIN_API_INTERVAL: - time.sleep(MIN_API_INTERVAL - elapsed) + _pi = _get_min_api_interval() + if elapsed < _pi: + time.sleep(_pi - elapsed) globals()['_last_api_call_time'] = time.time() from core.api_call_tracker import api_call_tracker api_call_tracker.record_call('spotify', endpoint='get_artist_albums_page') results = self.sp.next(results) + pages_fetched += 1 else: results = None - logger.info(f"Retrieved {len(albums)} albums for artist {artist_id}") + logger.info(f"Retrieved {len(albums)} albums for artist {artist_id}" + + (f" (page limit: {max_pages})" if max_pages else "")) # Cache the full artist albums result (wrapped in dict for cache compatibility) if raw_items: diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index a95588c..c18c379 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -857,34 +857,41 @@ class WatchlistScanner: If None, uses lookback period setting from database """ try: - # Get all artist albums (albums + singles) - this is rate limited in spotify_client - logger.debug(f"Fetching discography for artist {spotify_artist_id}") - albums = self.spotify_client.get_artist_albums(spotify_artist_id, album_type='album,single', limit=50, skip_cache=True) - - if not albums: - logger.warning(f"No albums found for artist {spotify_artist_id}") - return [] - - # Add small delay after fetching artist discography to be extra safe - time.sleep(0.3) # 300ms breathing room - - # Determine cutoff date for filtering + # Determine if we need the full discography or just recent releases. + # Spotify returns albums sorted newest-first, so for time-bounded scans + # we only need the first page (50 albums) — this cuts API calls by ~90% + # for prolific artists (262 albums = 27 calls → 1 call). + needs_full_discog = False cutoff_timestamp = last_scan_timestamp - # If no last scan timestamp, use per-artist lookback or global setting if cutoff_timestamp is None: if lookback_days is not None: - # Per-artist override cutoff_timestamp = datetime.now(timezone.utc) - timedelta(days=lookback_days) logger.info(f"Using per-artist lookback: {lookback_days} days (cutoff: {cutoff_timestamp})") else: - # Global setting lookback_period = self._get_lookback_period_setting() - if lookback_period != 'all': + if lookback_period == 'all': + needs_full_discog = True + else: days = int(lookback_period) cutoff_timestamp = datetime.now(timezone.utc) - timedelta(days=days) logger.info(f"Using global lookback period: {lookback_period} days (cutoff: {cutoff_timestamp})") + # Fetch albums — limit pagination unless full discography is needed + logger.debug(f"Fetching discography for artist {spotify_artist_id}" + + (" (full)" if needs_full_discog else " (recent only, max 1 page)")) + albums = self.spotify_client.get_artist_albums( + spotify_artist_id, album_type='album,single', limit=50, + skip_cache=True, max_pages=0 if needs_full_discog else 1 + ) + + if not albums: + logger.warning(f"No albums found for artist {spotify_artist_id}") + return [] + + # Add small delay after fetching artist discography to be extra safe + time.sleep(0.3) # 300ms breathing room + # Filter by release date if we have a cutoff timestamp if cutoff_timestamp: filtered_albums = [] @@ -914,25 +921,15 @@ class WatchlistScanner: lookback_days: Per-artist override for lookback period (None = use global setting) """ try: - # Get all artist albums (albums + singles) - # skip_cache for Spotify so watchlist scans always get fresh data - logger.debug(f"Fetching discography for artist {artist_id}") - _skip = {'skip_cache': True} if hasattr(client, 'sp') else {} - albums = client.get_artist_albums(artist_id, album_type='album,single', limit=50, **_skip) - - if not albums: - logger.warning(f"No albums found for artist {artist_id}") - return [] - - # Add small delay after fetching artist discography to be extra safe - time.sleep(0.3) # 300ms breathing room - - # Determine cutoff date for filtering + # Determine if we need full discography or just recent releases BEFORE fetching. + # Spotify returns albums newest-first, so for time-bounded scans we only need + # the first page (50 albums) — cuts API calls by ~90% for prolific artists. lookback_period = self._get_lookback_period_setting() + needs_full_discog = False - # If lookback is 'all', always return everything regardless of scan timestamp if lookback_period == 'all': cutoff_timestamp = None + needs_full_discog = True elif last_scan_timestamp is not None: cutoff_timestamp = last_scan_timestamp @@ -941,6 +938,7 @@ class WatchlistScanner: if rescan_cutoff == 'all': logger.info(f"Lookback period changed to 'all' — returning full discography") cutoff_timestamp = None + needs_full_discog = True elif rescan_cutoff is not None: scan_ts = cutoff_timestamp if scan_ts.tzinfo is None: @@ -951,10 +949,30 @@ class WatchlistScanner: logger.info(f"Lookback period change detected — expanding cutoff from {cutoff_timestamp} to {rescan_cutoff}") cutoff_timestamp = rescan_cutoff else: - # No scan timestamp — use lookback period - days = int(lookback_period) + # No scan timestamp — first scan, use lookback period + if lookback_days is not None: + days = lookback_days + else: + days = int(lookback_period) cutoff_timestamp = datetime.now(timezone.utc) - timedelta(days=days) - logger.info(f"Using lookback period: {lookback_period} days (cutoff: {cutoff_timestamp})") + logger.info(f"Using lookback period: {days} days (cutoff: {cutoff_timestamp})") + + # Fetch albums — limit pagination unless full discography is needed + logger.debug(f"Fetching discography for artist {artist_id}" + + (" (full)" if needs_full_discog else " (recent only, max 1 page)")) + _skip = {'skip_cache': True} if hasattr(client, 'sp') else {} + _max_pages = 0 if needs_full_discog else 1 + # Only pass max_pages to clients that support it (spotify_client) + if hasattr(client, 'sp'): + _skip['max_pages'] = _max_pages + albums = client.get_artist_albums(artist_id, album_type='album,single', limit=50, **_skip) + + if not albums: + logger.warning(f"No albums found for artist {artist_id}") + return [] + + # Add small delay after fetching artist discography to be extra safe + time.sleep(0.3) # 300ms breathing room # Filter by release date if we have a cutoff timestamp if cutoff_timestamp: diff --git a/webui/static/helper.js b/webui/static/helper.js index 5fecc6e..0425132 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3404,6 +3404,7 @@ const WHATS_NEW = { '2.2': [ // Newest features first { title: 'Fix Album Folder Splitting', desc: 'Collab albums and artist name changes no longer scatter tracks across multiple folders — $albumartist now uses album-level artist consistently' }, + { title: 'Fix Watchlist Rate Limiting', desc: 'Watchlist scans now fetch only newest albums instead of full discography (~90% fewer API calls). Configurable API interval in settings. Better Retry-After header extraction' }, { title: 'Discogs Integration', desc: 'New metadata source — enrichment worker, fallback source, enhanced search tab, watchlist support, cache browser. Genres, styles, labels, bios, ratings from 400+ taxonomy', page: 'dashboard' }, { title: 'Webhook THEN Action', desc: 'Send HTTP POST to any URL when automations complete — integrate with Gotify, Home Assistant, Slack, n8n. Configurable headers and message template', page: 'automations' }, { title: 'API Rate Monitor', desc: 'Real-time speedometer gauges for all enrichment services on the Dashboard. Click any gauge for 24h history chart. Spotify shows per-endpoint breakdown', page: 'dashboard' },