Reduce watchlist Spotify API calls ~90% + configurable rate interval

Addresses all three points from community rate-limiting report:

1. Watchlist scans fetched ALL albums then filtered — 262 albums = 27
   API calls per artist. Now determines upfront if full discography is
   needed: subsequent scans and time-bounded lookbacks use max_pages=1
   (1 API call). Only "full discography" global setting fetches all.

2. MIN_API_INTERVAL (350ms) now configurable via spotify.min_api_interval
   setting. Users who get rate-limited frequently can increase the delay.
   Floor at 100ms to prevent abuse.

3. Retry-After header extraction improved: added diagnostic logging when
   headers exist but lack Retry-After key, plus regex fallback to parse
   the value from the error message string.
pull/253/head
Broque Thomas 2 weeks ago
parent 30d5f76e3d
commit 4e4f258d25

@ -14,7 +14,18 @@ logger = get_logger("spotify_client")
# Global rate limiting variables
_last_api_call_time = 0
_api_call_lock = threading.Lock()
MIN_API_INTERVAL = 0.35 # 350ms between API calls (~171/min, under Spotify's ~180/min limit)
MIN_API_INTERVAL = 0.35 # Default: 350ms between API calls (~171/min, under Spotify's ~180/min limit)
def _get_min_api_interval():
"""Get configurable API interval from settings, falling back to default."""
try:
from config.settings import config_manager
val = config_manager.get('spotify.min_api_interval', None)
if val is not None:
return max(0.1, float(val)) # Floor at 100ms to prevent abuse
except Exception:
pass
return MIN_API_INTERVAL
# Request queuing for burst handling
import queue
@ -178,8 +189,23 @@ def _detect_and_set_rate_limit(exception, endpoint_name="unknown"):
# Try to extract Retry-After from exception headers
retry_after = None
has_real_header = False
if hasattr(exception, 'headers') and exception.headers:
retry_after = exception.headers.get('Retry-After') or exception.headers.get('retry-after')
# Method 1: SpotifyException.headers (set by spotipy with retries=0)
exc_headers = getattr(exception, 'headers', None)
if exc_headers and hasattr(exc_headers, 'get'):
retry_after = exc_headers.get('Retry-After') or exc_headers.get('retry-after')
if retry_after:
logger.info(f"Extracted Retry-After from exception headers: {retry_after}")
else:
logger.debug(f"Exception has headers but no Retry-After key. Headers type: {type(exc_headers).__name__}, keys: {list(exc_headers.keys())[:10] if hasattr(exc_headers, 'keys') else 'N/A'}")
# Method 2: Parse from error message (some spotipy versions embed it)
if not retry_after:
import re
ra_match = re.search(r'[Rr]etry[- ][Aa]fter[:\s]+(\d+)', error_str)
if ra_match:
retry_after = ra_match.group(1)
logger.info(f"Extracted Retry-After from error message: {retry_after}")
if retry_after:
try:
@ -224,13 +250,14 @@ def rate_limited(func):
if _is_globally_rate_limited():
raise SpotifyRateLimitError(0, func.__name__)
# Enforce minimum interval between API calls
# Enforce minimum interval between API calls (configurable via settings)
_interval = _get_min_api_interval()
with _api_call_lock:
current_time = time.time()
time_since_last_call = current_time - _last_api_call_time
if time_since_last_call < MIN_API_INTERVAL:
sleep_time = MIN_API_INTERVAL - time_since_last_call
if time_since_last_call < _interval:
sleep_time = _interval - time_since_last_call
time.sleep(sleep_time)
_last_api_call_time = time.time()
@ -681,8 +708,9 @@ class SpotifyClient:
if results['next']:
with _api_call_lock:
elapsed = time.time() - _last_api_call_time
if elapsed < MIN_API_INTERVAL:
time.sleep(MIN_API_INTERVAL - elapsed)
_pi = _get_min_api_interval()
if elapsed < _pi:
time.sleep(_pi - elapsed)
globals()['_last_api_call_time'] = time.time()
from core.api_call_tracker import api_call_tracker
api_call_tracker.record_call('spotify', endpoint='get_user_playlists_page')
@ -960,8 +988,9 @@ class SpotifyClient:
if results['next']:
with _api_call_lock:
elapsed = time.time() - _last_api_call_time
if elapsed < MIN_API_INTERVAL:
time.sleep(MIN_API_INTERVAL - elapsed)
_pi = _get_min_api_interval()
if elapsed < _pi:
time.sleep(_pi - elapsed)
globals()['_last_api_call_time'] = time.time()
from core.api_call_tracker import api_call_tracker
api_call_tracker.record_call('spotify', endpoint='get_playlist_tracks_page')
@ -1304,8 +1333,9 @@ class SpotifyClient:
while next_page.get('next'):
with _api_call_lock:
elapsed = time.time() - _last_api_call_time
if elapsed < MIN_API_INTERVAL:
time.sleep(MIN_API_INTERVAL - elapsed)
_pi = _get_min_api_interval()
if elapsed < _pi:
time.sleep(_pi - elapsed)
globals()['_last_api_call_time'] = time.time()
from core.api_call_tracker import api_call_tracker
api_call_tracker.record_call('spotify', endpoint='get_album_tracks_page')
@ -1351,9 +1381,11 @@ class SpotifyClient:
return None
@rate_limited
def get_artist_albums(self, artist_id: str, album_type: str = 'album,single', limit: int = 10, skip_cache: bool = False) -> List[Album]:
def get_artist_albums(self, artist_id: str, album_type: str = 'album,single', limit: int = 10, skip_cache: bool = False, max_pages: int = 0) -> List[Album]:
"""Get albums by artist ID - falls back to iTunes if Spotify not authenticated.
Set skip_cache=True for watchlist scans that need fresh data to detect new releases."""
Set skip_cache=True for watchlist scans that need fresh data to detect new releases.
Set max_pages to limit pagination (0 = fetch all). Spotify returns newest first,
so max_pages=1 is sufficient for new release detection."""
cache = get_metadata_cache()
fallback_src = self._fallback_source
source = fallback_src if self._is_itunes_id(artist_id) else 'spotify'
@ -1373,7 +1405,9 @@ class SpotifyClient:
try:
albums = []
raw_items = []
# Spotify caps artist_albums at 10 per page
results = self.sp.artist_albums(artist_id, album_type=album_type, limit=min(limit, 10))
pages_fetched = 1
while results:
for album_data in results['items']:
@ -1381,21 +1415,28 @@ class SpotifyClient:
albums.append(album)
raw_items.append(album_data)
# Stop if we've hit the page limit (0 = unlimited)
if max_pages and pages_fetched >= max_pages:
break
# Get next batch if available — throttle pagination to respect rate limits
if results['next']:
# Enforce same rate limit as decorated calls
with _api_call_lock:
elapsed = time.time() - _last_api_call_time
if elapsed < MIN_API_INTERVAL:
time.sleep(MIN_API_INTERVAL - elapsed)
_pi = _get_min_api_interval()
if elapsed < _pi:
time.sleep(_pi - elapsed)
globals()['_last_api_call_time'] = time.time()
from core.api_call_tracker import api_call_tracker
api_call_tracker.record_call('spotify', endpoint='get_artist_albums_page')
results = self.sp.next(results)
pages_fetched += 1
else:
results = None
logger.info(f"Retrieved {len(albums)} albums for artist {artist_id}")
logger.info(f"Retrieved {len(albums)} albums for artist {artist_id}" +
(f" (page limit: {max_pages})" if max_pages else ""))
# Cache the full artist albums result (wrapped in dict for cache compatibility)
if raw_items:

@ -857,34 +857,41 @@ class WatchlistScanner:
If None, uses lookback period setting from database
"""
try:
# Get all artist albums (albums + singles) - this is rate limited in spotify_client
logger.debug(f"Fetching discography for artist {spotify_artist_id}")
albums = self.spotify_client.get_artist_albums(spotify_artist_id, album_type='album,single', limit=50, skip_cache=True)
if not albums:
logger.warning(f"No albums found for artist {spotify_artist_id}")
return []
# Add small delay after fetching artist discography to be extra safe
time.sleep(0.3) # 300ms breathing room
# Determine cutoff date for filtering
# Determine if we need the full discography or just recent releases.
# Spotify returns albums sorted newest-first, so for time-bounded scans
# we only need the first page (50 albums) — this cuts API calls by ~90%
# for prolific artists (262 albums = 27 calls → 1 call).
needs_full_discog = False
cutoff_timestamp = last_scan_timestamp
# If no last scan timestamp, use per-artist lookback or global setting
if cutoff_timestamp is None:
if lookback_days is not None:
# Per-artist override
cutoff_timestamp = datetime.now(timezone.utc) - timedelta(days=lookback_days)
logger.info(f"Using per-artist lookback: {lookback_days} days (cutoff: {cutoff_timestamp})")
else:
# Global setting
lookback_period = self._get_lookback_period_setting()
if lookback_period != 'all':
if lookback_period == 'all':
needs_full_discog = True
else:
days = int(lookback_period)
cutoff_timestamp = datetime.now(timezone.utc) - timedelta(days=days)
logger.info(f"Using global lookback period: {lookback_period} days (cutoff: {cutoff_timestamp})")
# Fetch albums — limit pagination unless full discography is needed
logger.debug(f"Fetching discography for artist {spotify_artist_id}" +
(" (full)" if needs_full_discog else " (recent only, max 1 page)"))
albums = self.spotify_client.get_artist_albums(
spotify_artist_id, album_type='album,single', limit=50,
skip_cache=True, max_pages=0 if needs_full_discog else 1
)
if not albums:
logger.warning(f"No albums found for artist {spotify_artist_id}")
return []
# Add small delay after fetching artist discography to be extra safe
time.sleep(0.3) # 300ms breathing room
# Filter by release date if we have a cutoff timestamp
if cutoff_timestamp:
filtered_albums = []
@ -914,25 +921,15 @@ class WatchlistScanner:
lookback_days: Per-artist override for lookback period (None = use global setting)
"""
try:
# Get all artist albums (albums + singles)
# skip_cache for Spotify so watchlist scans always get fresh data
logger.debug(f"Fetching discography for artist {artist_id}")
_skip = {'skip_cache': True} if hasattr(client, 'sp') else {}
albums = client.get_artist_albums(artist_id, album_type='album,single', limit=50, **_skip)
if not albums:
logger.warning(f"No albums found for artist {artist_id}")
return []
# Add small delay after fetching artist discography to be extra safe
time.sleep(0.3) # 300ms breathing room
# Determine cutoff date for filtering
# Determine if we need full discography or just recent releases BEFORE fetching.
# Spotify returns albums newest-first, so for time-bounded scans we only need
# the first page (50 albums) — cuts API calls by ~90% for prolific artists.
lookback_period = self._get_lookback_period_setting()
needs_full_discog = False
# If lookback is 'all', always return everything regardless of scan timestamp
if lookback_period == 'all':
cutoff_timestamp = None
needs_full_discog = True
elif last_scan_timestamp is not None:
cutoff_timestamp = last_scan_timestamp
@ -941,6 +938,7 @@ class WatchlistScanner:
if rescan_cutoff == 'all':
logger.info(f"Lookback period changed to 'all' — returning full discography")
cutoff_timestamp = None
needs_full_discog = True
elif rescan_cutoff is not None:
scan_ts = cutoff_timestamp
if scan_ts.tzinfo is None:
@ -951,10 +949,30 @@ class WatchlistScanner:
logger.info(f"Lookback period change detected — expanding cutoff from {cutoff_timestamp} to {rescan_cutoff}")
cutoff_timestamp = rescan_cutoff
else:
# No scan timestamp — use lookback period
days = int(lookback_period)
# No scan timestamp — first scan, use lookback period
if lookback_days is not None:
days = lookback_days
else:
days = int(lookback_period)
cutoff_timestamp = datetime.now(timezone.utc) - timedelta(days=days)
logger.info(f"Using lookback period: {lookback_period} days (cutoff: {cutoff_timestamp})")
logger.info(f"Using lookback period: {days} days (cutoff: {cutoff_timestamp})")
# Fetch albums — limit pagination unless full discography is needed
logger.debug(f"Fetching discography for artist {artist_id}" +
(" (full)" if needs_full_discog else " (recent only, max 1 page)"))
_skip = {'skip_cache': True} if hasattr(client, 'sp') else {}
_max_pages = 0 if needs_full_discog else 1
# Only pass max_pages to clients that support it (spotify_client)
if hasattr(client, 'sp'):
_skip['max_pages'] = _max_pages
albums = client.get_artist_albums(artist_id, album_type='album,single', limit=50, **_skip)
if not albums:
logger.warning(f"No albums found for artist {artist_id}")
return []
# Add small delay after fetching artist discography to be extra safe
time.sleep(0.3) # 300ms breathing room
# Filter by release date if we have a cutoff timestamp
if cutoff_timestamp:

@ -3404,6 +3404,7 @@ const WHATS_NEW = {
'2.2': [
// Newest features first
{ title: 'Fix Album Folder Splitting', desc: 'Collab albums and artist name changes no longer scatter tracks across multiple folders — $albumartist now uses album-level artist consistently' },
{ title: 'Fix Watchlist Rate Limiting', desc: 'Watchlist scans now fetch only newest albums instead of full discography (~90% fewer API calls). Configurable API interval in settings. Better Retry-After header extraction' },
{ title: 'Discogs Integration', desc: 'New metadata source — enrichment worker, fallback source, enhanced search tab, watchlist support, cache browser. Genres, styles, labels, bios, ratings from 400+ taxonomy', page: 'dashboard' },
{ title: 'Webhook THEN Action', desc: 'Send HTTP POST to any URL when automations complete — integrate with Gotify, Home Assistant, Slack, n8n. Configurable headers and message template', page: 'automations' },
{ title: 'API Rate Monitor', desc: 'Real-time speedometer gauges for all enrichment services on the Dashboard. Click any gauge for 24h history chart. Spotify shows per-endpoint breakdown', page: 'dashboard' },

Loading…
Cancel
Save