You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/core/spotify_client.py

1421 lines
59 KiB

import spotipy
from spotipy.oauth2 import SpotifyOAuth, SpotifyClientCredentials
from typing import Dict, List, Optional, Any
import time
import threading
from functools import wraps
from dataclasses import dataclass
from utils.logging_config import get_logger
from config.settings import config_manager
from core.metadata_cache import get_metadata_cache
logger = get_logger("spotify_client")
# Global rate limiting variables
_last_api_call_time = 0
_api_call_lock = threading.Lock()
MIN_API_INTERVAL = 0.35 # 350ms between API calls (~171/min, under Spotify's ~180/min limit)
# Request queuing for burst handling
import queue
_request_queue = queue.Queue()
_queue_processor_running = False
# Global rate limit ban state — when Spotify returns a long Retry-After (>60s),
# we set this so ALL API calls are suppressed until the ban expires.
_rate_limit_lock = threading.Lock()
_rate_limit_until = 0 # Unix timestamp when the ban expires (0 = not banned)
_rate_limit_retry_after = 0 # Original Retry-After value in seconds
_rate_limit_endpoint = None # Which function triggered the ban
_rate_limit_set_at = 0 # When the ban was set
_rate_limit_ban_ended_at = 0 # When the last ban expired naturally (for post-ban cooldown)
_rate_limit_hit_count = 0 # How many times we've been rate limited recently (for escalation)
_rate_limit_first_hit = 0 # Timestamp of the first hit in the current escalation window
# Threshold: if Retry-After exceeds this, activate global ban instead of sleeping
_LONG_RATE_LIMIT_THRESHOLD = 60 # seconds
# After a ban expires, wait this long before making any auth probe calls.
# This prevents the "immediate re-probe → re-ban" cycle where Spotify's server-side
# cooldown outlasts the Retry-After value they sent us.
_POST_BAN_COOLDOWN = 300 # 5 minutes
# Escalation: if we get rate limited again within this window, increase ban duration
_ESCALATION_WINDOW = 3600 # 1 hour — if re-limited within this, escalate
_ESCALATION_MAX = 14400 # 4 hours max ban
_BASE_UNKNOWN_BAN = 1800 # 30 min default when Retry-After header is missing
_BASE_MAX_RETRIES_BAN = 3600 # 1 hour default when spotipy exhausted all retries
class SpotifyRateLimitError(Exception):
"""Raised when Spotify API calls are blocked due to active global rate limit ban."""
def __init__(self, retry_after, endpoint=None):
self.retry_after = retry_after
self.endpoint = endpoint
super().__init__(f"Spotify rate limited for {retry_after}s (triggered by {endpoint})")
def _set_global_rate_limit(retry_after_seconds, endpoint_name, has_real_header=False):
"""Activate the global rate limit ban. Escalates duration on repeated hits."""
global _rate_limit_until, _rate_limit_retry_after, _rate_limit_endpoint, _rate_limit_set_at
global _rate_limit_hit_count, _rate_limit_first_hit
with _rate_limit_lock:
now = time.time()
# Escalation: if we're hitting rate limits repeatedly, increase the ban
if not has_real_header:
# Only escalate when we don't have a real Retry-After (i.e., we're guessing)
if now - _rate_limit_first_hit < _ESCALATION_WINDOW and _rate_limit_first_hit > 0:
_rate_limit_hit_count += 1
else:
# New escalation window
_rate_limit_hit_count = 1
_rate_limit_first_hit = now
if _rate_limit_hit_count > 1:
# Double the ban for each repeated hit, up to max
escalated = retry_after_seconds * (2 ** (_rate_limit_hit_count - 1))
retry_after_seconds = min(escalated, _ESCALATION_MAX)
logger.warning(
f"Rate limit escalation: hit #{_rate_limit_hit_count} within window, "
f"ban escalated to {retry_after_seconds}s"
)
new_until = now + retry_after_seconds
# Only update if this extends the existing ban
if new_until > _rate_limit_until:
_rate_limit_until = new_until
_rate_limit_retry_after = retry_after_seconds
_rate_limit_endpoint = endpoint_name
_rate_limit_set_at = now
logger.warning(
f"GLOBAL RATE LIMIT ACTIVATED: {retry_after_seconds}s ban "
f"(expires {time.strftime('%H:%M:%S', time.localtime(new_until))}) "
f"triggered by {endpoint_name}"
)
def _is_globally_rate_limited():
"""Check if the global rate limit ban is active."""
global _rate_limit_ban_ended_at
with _rate_limit_lock:
if _rate_limit_until <= 0:
return False
if time.time() >= _rate_limit_until:
# Ban expired — record when it ended so post-ban cooldown can apply
if _rate_limit_ban_ended_at < _rate_limit_until:
_rate_limit_ban_ended_at = time.time()
logger.info("Rate limit ban expired, entering post-ban cooldown period")
return False
return True
def _is_in_post_ban_cooldown():
"""Check if we're in the post-ban cooldown period.
After a ban expires, we wait _POST_BAN_COOLDOWN seconds before allowing
auth probes to prevent the re-probe → re-ban cycle."""
with _rate_limit_lock:
if _rate_limit_ban_ended_at <= 0:
return False
elapsed = time.time() - _rate_limit_ban_ended_at
if elapsed < _POST_BAN_COOLDOWN:
return True
return False
def _get_post_ban_cooldown_remaining():
"""Get remaining seconds in post-ban cooldown, or 0 if not in cooldown."""
with _rate_limit_lock:
if _rate_limit_ban_ended_at <= 0:
return 0
remaining = _POST_BAN_COOLDOWN - (time.time() - _rate_limit_ban_ended_at)
return max(0, int(remaining))
def _get_rate_limit_info():
"""Get current rate limit ban details. Returns None if not rate limited."""
with _rate_limit_lock:
if _rate_limit_until <= 0:
return None
now = time.time()
remaining = _rate_limit_until - now
if remaining <= 0:
return None
return {
'active': True,
'remaining_seconds': int(remaining),
'retry_after': _rate_limit_retry_after,
'endpoint': _rate_limit_endpoint,
'set_at': _rate_limit_set_at,
'expires_at': _rate_limit_until
}
def _clear_rate_limit():
"""Manually clear the global rate limit ban AND post-ban cooldown.
Used by disconnect/reconnect so the user can immediately retry."""
global _rate_limit_until, _rate_limit_retry_after, _rate_limit_endpoint, _rate_limit_set_at, _rate_limit_ban_ended_at
global _rate_limit_hit_count, _rate_limit_first_hit
with _rate_limit_lock:
_rate_limit_until = 0
_rate_limit_retry_after = 0
_rate_limit_endpoint = None
_rate_limit_set_at = 0
_rate_limit_ban_ended_at = 0
_rate_limit_hit_count = 0
_rate_limit_first_hit = 0
logger.info("Global rate limit ban cleared (including post-ban cooldown)")
def _detect_and_set_rate_limit(exception, endpoint_name="unknown"):
"""Check if a Spotify exception is a 429 rate limit and activate global ban if so.
Returns True if rate limit was detected."""
error_str = str(exception)
# Check both string matching and http_status attribute (SpotifyException has it)
is_429 = getattr(exception, 'http_status', None) == 429
is_rate_limit_str = "429" in error_str or "rate limit" in error_str.lower()
if is_429 or is_rate_limit_str:
# Try to extract Retry-After from exception headers
retry_after = None
has_real_header = False
if hasattr(exception, 'headers') and exception.headers:
retry_after = exception.headers.get('Retry-After') or exception.headers.get('retry-after')
if retry_after:
try:
delay = int(retry_after)
has_real_header = True
logger.info(f"Rate limit detected on {endpoint_name} — Retry-After header: {delay}s")
except (ValueError, TypeError):
delay = _BASE_UNKNOWN_BAN
logger.warning(f"Rate limit detected on {endpoint_name} — unparseable Retry-After: {retry_after}")
else:
# No Retry-After header available
if "max retries" in error_str.lower():
delay = _BASE_MAX_RETRIES_BAN # 1 hour — retries exhausted
else:
delay = _BASE_UNKNOWN_BAN # 30 min
logger.warning(f"Rate limit detected on {endpoint_name} — no Retry-After header, using {delay}s default")
_set_global_rate_limit(delay, endpoint_name, has_real_header=has_real_header)
return True
return False
def rate_limited(func):
"""Decorator to enforce rate limiting on Spotify API calls with retry and exponential backoff"""
@wraps(func)
def wrapper(*args, **kwargs):
global _last_api_call_time
# Pre-flight check: if globally rate limited, don't even attempt the API call.
# Let the method body run so its internal is_spotify_authenticated() check
# returns False and iTunes fallback logic can execute.
if _is_globally_rate_limited():
return func(*args, **kwargs)
max_retries = 5
for attempt in range(max_retries + 1):
# Re-check ban before each retry — a previous attempt may have triggered one
if _is_globally_rate_limited():
raise SpotifyRateLimitError(0, func.__name__)
# Enforce minimum interval between API calls
with _api_call_lock:
current_time = time.time()
time_since_last_call = current_time - _last_api_call_time
if time_since_last_call < MIN_API_INTERVAL:
sleep_time = MIN_API_INTERVAL - time_since_last_call
time.sleep(sleep_time)
_last_api_call_time = time.time()
try:
return func(*args, **kwargs)
except SpotifyRateLimitError:
raise # Don't retry our own ban errors
except Exception as e:
error_str = str(e).lower()
is_rate_limit = "rate limit" in error_str or "429" in str(e)
is_server_error = "502" in str(e) or "503" in str(e)
if is_rate_limit:
# Try to extract Retry-After from spotipy exception headers
retry_after = None
if hasattr(e, 'headers') and e.headers:
retry_after = e.headers.get('Retry-After') or e.headers.get('retry-after')
if retry_after:
try:
delay = int(retry_after)
except (ValueError, TypeError):
delay = None
# If Retry-After is long, activate global ban instead of sleeping
if delay and delay > _LONG_RATE_LIMIT_THRESHOLD:
_set_global_rate_limit(delay, func.__name__, has_real_header=True)
raise SpotifyRateLimitError(delay, func.__name__)
if delay:
delay = delay + 1
else:
delay = 3.0 * (2 ** attempt)
else:
delay = 3.0 * (2 ** attempt) # 3, 6, 12, 24, 48
if attempt < max_retries:
logger.warning(f"Spotify rate limit hit, retrying in {delay:.0f}s (attempt {attempt + 1}/{max_retries}): {func.__name__}")
time.sleep(delay)
continue
else:
# All retries exhausted on 429s — activate global ban.
# Don't trust the Retry-After header here — we already retried
# with it multiple times and still got 429'd, so it's too short.
_set_global_rate_limit(_BASE_MAX_RETRIES_BAN, func.__name__)
elif is_server_error and attempt < max_retries:
delay = 2.0 * (2 ** attempt) # 2, 4, 8, 16, 32
logger.warning(f"Spotify server error, retrying in {delay:.0f}s (attempt {attempt + 1}/{max_retries}): {func.__name__}")
time.sleep(delay)
continue
raise
return wrapper
@dataclass
class Track:
id: str
name: str
artists: List[str]
album: str
duration_ms: int
popularity: int
preview_url: Optional[str] = None
external_urls: Optional[Dict[str, str]] = None
image_url: Optional[str] = None
release_date: Optional[str] = None
album_type: Optional[str] = None
total_tracks: Optional[int] = None
@classmethod
def from_spotify_track(cls, track_data: Dict[str, Any]) -> 'Track':
# Extract album image (largest available — Spotify returns images sorted largest first)
album_image_url = None
if 'album' in track_data and 'images' in track_data['album']:
images = track_data['album']['images']
if images:
album_image_url = images[0]['url']
return cls(
id=track_data['id'],
name=track_data['name'],
artists=[artist['name'] for artist in track_data['artists']],
album=track_data['album']['name'],
duration_ms=track_data['duration_ms'],
popularity=track_data.get('popularity', 0),
preview_url=track_data.get('preview_url'),
external_urls=track_data.get('external_urls'),
image_url=album_image_url,
release_date=track_data.get('album', {}).get('release_date'),
album_type=track_data.get('album', {}).get('album_type'),
total_tracks=track_data.get('album', {}).get('total_tracks')
)
@dataclass
class Artist:
id: str
name: str
popularity: int
genres: List[str]
followers: int
image_url: Optional[str] = None
external_urls: Optional[Dict[str, str]] = None
@classmethod
def from_spotify_artist(cls, artist_data: Dict[str, Any]) -> 'Artist':
# Get the largest image URL if available
image_url = None
if artist_data.get('images') and len(artist_data['images']) > 0:
image_url = artist_data['images'][0]['url']
return cls(
id=artist_data['id'],
name=artist_data['name'],
popularity=artist_data.get('popularity', 0),
genres=artist_data.get('genres', []),
followers=artist_data.get('followers', {}).get('total', 0),
image_url=image_url,
external_urls=artist_data.get('external_urls')
)
@dataclass
class Album:
id: str
name: str
artists: List[str]
release_date: str
total_tracks: int
album_type: str
image_url: Optional[str] = None
external_urls: Optional[Dict[str, str]] = None
artist_ids: Optional[List[str]] = None
@classmethod
def from_spotify_album(cls, album_data: Dict[str, Any]) -> 'Album':
# Get the largest image URL if available
image_url = None
if album_data.get('images') and len(album_data['images']) > 0:
image_url = album_data['images'][0]['url']
return cls(
id=album_data['id'],
name=album_data['name'],
artists=[artist['name'] for artist in album_data['artists']],
release_date=album_data.get('release_date', ''),
total_tracks=album_data.get('total_tracks', 0),
album_type=album_data.get('album_type', 'album'),
image_url=image_url,
external_urls=album_data.get('external_urls'),
artist_ids=[artist['id'] for artist in album_data['artists']]
)
@dataclass
class Playlist:
id: str
name: str
description: Optional[str]
owner: str
public: bool
collaborative: bool
tracks: List[Track]
total_tracks: int
@classmethod
def from_spotify_playlist(cls, playlist_data: Dict[str, Any], tracks: List[Track]) -> 'Playlist':
return cls(
id=playlist_data['id'],
name=playlist_data['name'],
description=playlist_data.get('description'),
owner=playlist_data['owner']['display_name'],
public=playlist_data['public'],
collaborative=playlist_data['collaborative'],
tracks=tracks,
total_tracks=(playlist_data.get('tracks') or playlist_data.get('items') or {}).get('total', 0)
)
class SpotifyClient:
def __init__(self):
self.sp: Optional[spotipy.Spotify] = None
self.user_id: Optional[str] = None
self._itunes_client = None # Lazy-loaded iTunes fallback
self._deezer_client = None # Lazy-loaded Deezer fallback
self._auth_cache_lock = threading.Lock()
self._auth_cached_result: Optional[bool] = None
self._auth_cache_time: float = 0
self._AUTH_CACHE_TTL = 300 # 5 minutes — auth status rarely changes, no need to probe often
self._setup_client()
def _is_spotify_id(self, id_str: str) -> bool:
"""Check if an ID is a Spotify ID (alphanumeric) vs a fallback source ID (numeric only)"""
if not id_str:
return False
# Spotify IDs contain letters and numbers; iTunes/Deezer IDs are purely numeric
return not id_str.isdigit()
def _is_itunes_id(self, id_str: str) -> bool:
"""Check if an ID is numeric (iTunes or Deezer format, not Spotify)"""
if not id_str:
return False
return id_str.isdigit()
@property
def _itunes(self):
"""Lazy-load iTunes client"""
if self._itunes_client is None:
from core.itunes_client import iTunesClient
self._itunes_client = iTunesClient()
logger.info("iTunes fallback client initialized")
return self._itunes_client
@property
def _deezer(self):
"""Lazy-load Deezer client for metadata fallback"""
if self._deezer_client is None:
from core.deezer_client import DeezerClient
self._deezer_client = DeezerClient()
logger.info("Deezer fallback client initialized")
return self._deezer_client
@property
def _fallback_source(self) -> str:
"""Get configured metadata fallback source ('itunes' or 'deezer')"""
try:
return config_manager.get('metadata.fallback_source', 'itunes') or 'itunes'
except Exception:
return 'itunes'
@property
def _fallback(self):
"""Get the active fallback metadata client based on settings"""
if self._fallback_source == 'deezer':
return self._deezer
return self._itunes
def reload_config(self):
"""Reload configuration and re-initialize client"""
self._invalidate_auth_cache()
self._setup_client()
def _setup_client(self):
config = config_manager.get_spotify_config()
if not config.get('client_id') or not config.get('client_secret'):
logger.warning("Spotify credentials not configured")
return
try:
auth_manager = SpotifyOAuth(
client_id=config['client_id'],
client_secret=config['client_secret'],
redirect_uri=config.get('redirect_uri', "http://127.0.0.1:8888/callback"),
scope="user-library-read user-read-private playlist-read-private playlist-read-collaborative user-read-email",
cache_path='config/.spotify_cache'
)
self.sp = spotipy.Spotify(auth_manager=auth_manager, retries=0, requests_timeout=15)
# retries=0: prevent spotipy from sleeping for Retry-After duration on 429s
# (can be hours). Our rate_limited decorator + global ban handle retries instead.
# requests_timeout=15: prevent any single request from hanging indefinitely.
# Don't fetch user info on startup - do it lazily to avoid blocking UI
self.user_id = None
logger.info("Spotify client initialized (user info will be fetched when needed)")
except Exception as e:
logger.error(f"Failed to authenticate with Spotify: {e}")
self.sp = None
def is_authenticated(self) -> bool:
"""
Check if client can service metadata requests.
Returns True if Spotify is authenticated OR fallback (iTunes/Deezer) is available.
For Spotify-specific auth check, use is_spotify_authenticated().
"""
# If Spotify is authenticated, we're good
if self.is_spotify_authenticated():
return True
# Fallback (iTunes or Deezer) is always available — no auth required
return True
def _invalidate_auth_cache(self):
"""Clear the auth cache so the next check makes a fresh API call"""
with self._auth_cache_lock:
self._auth_cached_result = None
self._auth_cache_time = 0
def is_spotify_authenticated(self) -> bool:
"""Check if Spotify client is specifically authenticated (not just iTunes fallback).
Results are cached for 60 seconds to avoid excessive API calls.
During rate limit bans and post-ban cooldown, returns False without making API calls."""
if self.sp is None:
return False
# If globally rate limited, report as NOT authenticated so callers
# skip Spotify and fall through to iTunes fallback naturally.
# This prevents any API calls that could extend the ban.
if _is_globally_rate_limited():
return False
# Post-ban cooldown: after a ban expires, don't probe Spotify immediately.
# Spotify's server-side cooldown can outlast the Retry-After they sent us,
# so probing right away would just re-trigger the ban.
if _is_in_post_ban_cooldown():
remaining = _get_post_ban_cooldown_remaining()
logger.debug(f"Post-ban cooldown active ({remaining}s left), skipping auth probe")
return False
# Check cache first (lock only for brief read)
with self._auth_cache_lock:
if self._auth_cached_result is not None and (time.time() - self._auth_cache_time) < self._AUTH_CACHE_TTL:
return self._auth_cached_result
# Cache miss — make API call outside the lock.
# Use a dedicated probe client (retries=0) so a 429 here propagates
# immediately and we can detect long Retry-After bans.
try:
probe = spotipy.Spotify(auth_manager=self.sp.auth_manager, retries=0)
probe.current_user()
result = True
except Exception as e:
error_str = str(e)
# Rate limit means we ARE authenticated — just throttled
if "rate" in error_str.lower() or "429" in error_str:
# ANY rate limit on the auth probe means Spotify is actively throttling us.
# Always activate a global ban — even with a short or missing Retry-After.
# Without this, the probe→429→probe cycle repeats every ~60s forever.
retry_after = None
if hasattr(e, 'headers') and e.headers:
retry_after = e.headers.get('Retry-After') or e.headers.get('retry-after')
has_real_header = False
try:
delay = int(retry_after) if retry_after else 0
if retry_after:
has_real_header = True
except (ValueError, TypeError):
delay = 0
# Minimum 30 min for auth probe 429s — these indicate persistent throttling
ban_duration = max(delay, _BASE_UNKNOWN_BAN)
_set_global_rate_limit(ban_duration, 'is_spotify_authenticated', has_real_header=has_real_header)
logger.warning(f"Auth probe rate limited — activating {ban_duration}s global ban")
result = True
else:
logger.debug(f"Spotify authentication check failed: {e}")
result = False
with self._auth_cache_lock:
self._auth_cached_result = result
self._auth_cache_time = time.time()
return result
def disconnect(self):
"""Disconnect Spotify: clear client, delete cache, invalidate auth cache, clear rate limit"""
import os
self.sp = None
self.user_id = None
self._invalidate_auth_cache()
_clear_rate_limit()
cache_path = 'config/.spotify_cache'
try:
if os.path.exists(cache_path):
os.remove(cache_path)
logger.info("Deleted Spotify cache file")
except Exception as e:
logger.warning(f"Failed to delete Spotify cache: {e}")
logger.info("Spotify client disconnected")
@staticmethod
def is_rate_limited():
"""Check if Spotify is globally rate limited."""
return _is_globally_rate_limited()
@staticmethod
def get_rate_limit_info():
"""Get rate limit ban details. Returns None if not rate limited."""
return _get_rate_limit_info()
@staticmethod
def clear_rate_limit():
"""Manually clear the rate limit ban."""
_clear_rate_limit()
@staticmethod
def get_post_ban_cooldown_remaining():
"""Get remaining seconds in post-ban cooldown, or 0 if not in cooldown."""
return _get_post_ban_cooldown_remaining()
def _ensure_user_id(self) -> bool:
"""Ensure user_id is loaded (may make API call)"""
if self.user_id is None and self.sp is not None:
try:
user_info = self.sp.current_user()
self.user_id = user_info['id']
logger.info(f"Successfully authenticated with Spotify as {user_info['display_name']}")
return True
except Exception as e:
logger.error(f"Failed to fetch user info: {e}")
return False
return self.user_id is not None
@rate_limited
def get_user_playlists(self) -> List[Playlist]:
if not self.is_spotify_authenticated():
logger.error("Not authenticated with Spotify")
return []
if not self._ensure_user_id():
logger.error("Failed to get user ID")
return []
playlists = []
try:
results = self.sp.current_user_playlists(limit=50)
while results:
for playlist_data in results['items']:
# Spotify API already returns all playlists the user has access to
# (owned + followed), so no need to filter
logger.info(f"Fetching tracks for playlist: {playlist_data['name']}")
tracks = self._get_playlist_tracks(playlist_data['id'])
playlist = Playlist.from_spotify_playlist(playlist_data, tracks)
playlists.append(playlist)
results = self.sp.next(results) if results['next'] else None
logger.info(f"Retrieved {len(playlists)} playlists")
return playlists
except Exception as e:
logger.error(f"Error fetching user playlists: {e}")
return []
@rate_limited
def get_user_playlists_metadata_only(self) -> List[Playlist]:
"""Get playlists without fetching all track details for faster loading"""
if not self.is_spotify_authenticated():
logger.error("Not authenticated with Spotify")
return []
if not self._ensure_user_id():
logger.error("Failed to get user ID")
return []
playlists = []
try:
# Fetch all playlists using pagination
limit = 50 # Maximum allowed by Spotify API
offset = 0
total_fetched = 0
logger.info("Beginning fetch of user playlists...")
while True:
results = self.sp.current_user_playlists(limit=limit, offset=offset)
if not results or 'items' not in results:
break
# Log expected total on first page
if offset == 0:
expected_total = results.get('total', 'Unknown')
logger.info(f"Spotify reports {expected_total} total playlists to fetch.")
batch_count = 0
for playlist_data in results['items']:
try:
# Spotify API already returns all playlists the user has access to
# (owned + followed), so no need to filter
# Handle potential missing owner data safely
if not playlist_data.get('owner'):
playlist_data['owner'] = {'display_name': 'Unknown Owner', 'id': 'unknown'}
elif not playlist_data['owner'].get('display_name'):
playlist_data['owner']['display_name'] = 'Unknown'
# Create playlist with empty tracks list for now
playlist = Playlist.from_spotify_playlist(playlist_data, [])
playlists.append(playlist)
batch_count += 1
except Exception as p_error:
p_name = playlist_data.get('name', 'Unknown') if playlist_data else 'None'
logger.warning(f"Skipping malformed playlist '{p_name}': {p_error}")
total_fetched += batch_count
logger.info(f"Retrieved {batch_count} playlists in batch (offset {offset}), total so far: {total_fetched}")
# Check if we've fetched all playlists
if len(results['items']) < limit or not results.get('next'):
break
offset += limit
logger.info(f"Retrieved {len(playlists)} total playlist metadata")
return playlists
except Exception as e:
logger.error(f"Error fetching user playlists metadata: {e}")
# Return partial results if we crashed mid-way but have some data
if playlists:
logger.info(f"Returning {len(playlists)} playlists fetched before error.")
return playlists
return []
@rate_limited
def get_saved_tracks_count(self) -> int:
"""Get the total count of user's saved/liked songs without fetching all tracks"""
if not self.is_spotify_authenticated():
logger.error("Not authenticated with Spotify")
return 0
try:
# Just fetch first page to get the total count
results = self.sp.current_user_saved_tracks(limit=1)
if results and 'total' in results:
total_count = results['total']
logger.info(f"User has {total_count} saved tracks")
return total_count
return 0
except Exception as e:
logger.error(f"Error fetching saved tracks count: {e}")
return 0
@rate_limited
def get_saved_tracks(self) -> List[Track]:
"""Fetch all user's saved/liked songs from Spotify"""
if not self.is_spotify_authenticated():
logger.error("Not authenticated with Spotify")
return []
tracks = []
try:
limit = 50 # Maximum allowed by Spotify API
offset = 0
total_fetched = 0
while True:
results = self.sp.current_user_saved_tracks(limit=limit, offset=offset)
if not results or 'items' not in results:
break
batch_count = 0
for item in results['items']:
if item['track'] and item['track']['id']:
track = Track.from_spotify_track(item['track'])
tracks.append(track)
batch_count += 1
total_fetched += batch_count
logger.info(f"Retrieved {batch_count} saved tracks in batch (offset {offset}), total: {total_fetched}")
# Check if we've fetched all saved tracks
if len(results['items']) < limit or not results.get('next'):
break
offset += limit
logger.info(f"Retrieved {len(tracks)} total saved tracks")
return tracks
except Exception as e:
logger.error(f"Error fetching saved tracks: {e}")
return []
@rate_limited
def get_saved_albums(self, since_timestamp=None) -> list:
"""Fetch user's saved albums from Spotify library.
Args:
since_timestamp: Optional ISO timestamp string. If provided, stops fetching
when reaching albums saved before this time (incremental sync).
Returns:
List of dicts with album metadata ready for DB upsert.
"""
if not self.is_spotify_authenticated():
logger.error("Not authenticated with Spotify")
return []
albums = []
try:
limit = 50 # Maximum allowed by Spotify API
offset = 0
total_fetched = 0
while True:
results = self.sp.current_user_saved_albums(limit=limit, offset=offset)
if not results or 'items' not in results:
break
batch_count = 0
stop_fetching = False
for item in results['items']:
album_data = item.get('album')
added_at = item.get('added_at', '')
if not album_data or not album_data.get('id'):
continue
# Incremental sync: stop when we hit albums saved before last sync
if since_timestamp and added_at and added_at < since_timestamp:
stop_fetching = True
break
# Extract primary artist
artists = album_data.get('artists', [])
artist_name = artists[0]['name'] if artists else 'Unknown Artist'
artist_id = artists[0].get('id', '') if artists else ''
# Get best image
images = album_data.get('images', [])
image_url = images[0]['url'] if images else None
albums.append({
'spotify_album_id': album_data['id'],
'album_name': album_data.get('name', ''),
'artist_name': artist_name,
'artist_id': artist_id,
'release_date': album_data.get('release_date', ''),
'total_tracks': album_data.get('total_tracks', 0),
'album_type': album_data.get('album_type', 'album'),
'image_url': image_url,
'date_saved': added_at,
})
batch_count += 1
total_fetched += batch_count
logger.info(f"Retrieved {batch_count} saved albums in batch (offset {offset}), total: {total_fetched}")
if stop_fetching:
logger.info(f"Incremental sync: reached albums saved before {since_timestamp}, stopping")
break
# Check if we've fetched all saved albums
if len(results['items']) < limit or not results.get('next'):
break
offset += limit
logger.info(f"Retrieved {len(albums)} total saved albums from Spotify library")
return albums
except Exception as e:
logger.error(f"Error fetching saved albums: {e}")
return []
def _get_playlist_items_page(self, playlist_id: str, limit: int = 100, offset: int = 0) -> dict:
"""Fetch playlist items using the /items endpoint (Feb 2026 Spotify API migration).
Spotipy's playlist_items() still uses the deprecated /tracks endpoint internally,
which returns 403 for Development Mode apps after the Feb 2026 API changes.
Tries the new /items endpoint first, falls back to spotipy's /tracks for
Extended Quota Mode apps where /items may not be available yet.
"""
plid = self.sp._get_id("playlist", playlist_id)
try:
return self.sp._get(
f"playlists/{plid}/items",
limit=limit,
offset=offset,
additional_types="track,episode"
)
except spotipy.SpotifyException as e:
if e.http_status in (403, 404):
# /items not available — fall back to old /tracks endpoint
return self.sp.playlist_items(playlist_id, limit=limit, offset=offset)
raise
@rate_limited
def _get_playlist_tracks(self, playlist_id: str) -> List[Track]:
if not self.is_spotify_authenticated():
return []
tracks = []
try:
results = self._get_playlist_items_page(playlist_id, limit=100)
while results:
for item in results['items']:
# Handle both old API ('track') and new Feb 2026 API ('item') field names
track_data = item.get('track') or item.get('item')
if track_data and track_data.get('id'):
track = Track.from_spotify_track(track_data)
tracks.append(track)
results = self.sp.next(results) if results['next'] else None
return tracks
except Exception as e:
logger.error(f"Error fetching playlist tracks: {e}")
return []
@rate_limited
def get_playlist_by_id(self, playlist_id: str) -> Optional[Playlist]:
if not self.is_spotify_authenticated():
return None
try:
playlist_data = self.sp.playlist(playlist_id)
tracks = self._get_playlist_tracks(playlist_id)
return Playlist.from_spotify_playlist(playlist_data, tracks)
except Exception as e:
logger.error(f"Error fetching playlist {playlist_id}: {e}")
return None
@rate_limited
def search_tracks(self, query: str, limit: int = 10) -> List[Track]:
"""Search for tracks - falls back to configured metadata source if Spotify not authenticated"""
cache = get_metadata_cache()
use_spotify = self.is_spotify_authenticated()
if use_spotify:
# Check Spotify cache
effective_limit = min(limit, 50) # Spotify API max is 50
cached_results = cache.get_search_results('spotify', 'track', query, effective_limit)
if cached_results is not None:
tracks = []
for raw in cached_results:
try:
tracks.append(Track.from_spotify_track(raw))
except Exception:
pass
if tracks:
return tracks
# Skip Spotify if globally rate limited — fall through to fallback
if self.is_rate_limited():
logger.debug(f"Spotify rate limited, skipping track search for: {query}")
use_spotify = False
else:
try:
results = self.sp.search(q=query, type='track', limit=effective_limit)
tracks = []
raw_items = results['tracks']['items']
for track_data in raw_items:
track = Track.from_spotify_track(track_data)
tracks.append(track)
# Cache individual tracks + search mapping
entries = [(td.get('id'), td) for td in raw_items if td.get('id')]
if entries:
cache.store_entities_bulk('spotify', 'track', entries)
cache.store_search_results('spotify', 'track', query, effective_limit,
[td.get('id') for td in raw_items if td.get('id')])
return tracks
except Exception as e:
logger.error(f"Error searching tracks via Spotify: {e}")
# Fall through to fallback
# Fallback (iTunes or Deezer — configured in settings)
logger.debug(f"Using {self._fallback_source} fallback for track search: {query}")
return self._fallback.search_tracks(query, limit)
@rate_limited
def search_artists(self, query: str, limit: int = 10) -> List[Artist]:
"""Search for artists - falls back to configured metadata source if Spotify not authenticated"""
cache = get_metadata_cache()
use_spotify = self.is_spotify_authenticated()
if use_spotify:
# Check Spotify cache
cached_results = cache.get_search_results('spotify', 'artist', query, min(limit, 10))
if cached_results is not None:
artists = []
for raw in cached_results:
try:
artists.append(Artist.from_spotify_artist(raw))
except Exception:
pass
if artists:
query_lower = query.lower().strip()
artists.sort(key=lambda a: (0 if a.name.lower().strip() == query_lower else 1))
return artists
if use_spotify:
try:
search_query = f'artist:{query}' if len(query.strip()) <= 4 else query
results = self.sp.search(q=search_query, type='artist', limit=min(limit, 10))
artists = []
raw_items = results['artists']['items']
for artist_data in raw_items:
artist = Artist.from_spotify_artist(artist_data)
artists.append(artist)
# Cache individual artists + search mapping
entries = [(ad.get('id'), ad) for ad in raw_items if ad.get('id')]
if entries:
cache.store_entities_bulk('spotify', 'artist', entries)
cache.store_search_results('spotify', 'artist', query, min(limit, 10),
[ad.get('id') for ad in raw_items if ad.get('id')])
# Re-rank: boost exact name matches to the top
query_lower = query.lower().strip()
artists.sort(key=lambda a: (0 if a.name.lower().strip() == query_lower else 1))
return artists
except Exception as e:
logger.error(f"Error searching artists via Spotify: {e}")
# Fall through to iTunes fallback
# Fallback (iTunes or Deezer)
logger.debug(f"Using {self._fallback_source} fallback for artist search: {query}")
artists = self._fallback.search_artists(query, limit)
query_lower = query.lower().strip()
artists.sort(key=lambda a: (0 if a.name.lower().strip() == query_lower else 1))
return artists
@rate_limited
def search_albums(self, query: str, limit: int = 10) -> List[Album]:
"""Search for albums - falls back to configured metadata source if Spotify not authenticated"""
cache = get_metadata_cache()
use_spotify = self.is_spotify_authenticated()
if use_spotify:
# Check Spotify cache
cached_results = cache.get_search_results('spotify', 'album', query, min(limit, 10))
if cached_results is not None:
albums = []
for raw in cached_results:
try:
albums.append(Album.from_spotify_album(raw))
except Exception:
pass
if albums:
return albums
if use_spotify:
# Skip Spotify if globally rate limited — fall through to fallback
if self.is_rate_limited():
logger.debug(f"Spotify rate limited, skipping album search for: {query}")
use_spotify = False
else:
try:
results = self.sp.search(q=query, type='album', limit=min(limit, 10))
albums = []
raw_items = results['albums']['items']
for album_data in raw_items:
album = Album.from_spotify_album(album_data)
albums.append(album)
# Cache individual albums + search mapping (skip if full data already cached)
entries = [(ad.get('id'), ad) for ad in raw_items if ad.get('id')]
if entries:
cache.store_entities_bulk('spotify', 'album', entries, skip_if_exists=True)
cache.store_search_results('spotify', 'album', query, min(limit, 10),
[ad.get('id') for ad in raw_items if ad.get('id')])
return albums
except Exception as e:
logger.error(f"Error searching albums via Spotify: {e}")
# Fall through to iTunes fallback
# Fallback (iTunes or Deezer)
logger.debug(f"Using {self._fallback_source} fallback for album search: {query}")
return self._fallback.search_albums(query, limit)
@rate_limited
def get_track_details(self, track_id: str) -> Optional[Dict[str, Any]]:
"""Get detailed track information - falls back to configured metadata source"""
# Check cache — we store raw track_data, reconstruct enhanced on hit
cache = get_metadata_cache()
fallback_src = self._fallback_source
source = fallback_src if self._is_itunes_id(track_id) else 'spotify'
cached = cache.get_entity(source, 'track', track_id)
if cached:
if source == 'spotify':
# Validate cache has full track data (not simplified from get_album_tracks)
if 'album' in cached:
return self._build_enhanced_track(cached)
# Simplified track cached by get_album_tracks — treat as cache miss
logger.debug(f"Cache hit for track {track_id} lacks album data, fetching full data")
else:
# Fallback cache hit — delegate to fallback client which reconstructs enhanced format
return self._fallback.get_track_details(track_id)
if self.is_spotify_authenticated():
try:
track_data = self.sp.track(track_id)
# Enhance with additional useful metadata for our purposes
if track_data:
# Cache the raw Spotify response
cache.store_entity('spotify', 'track', track_id, track_data)
return self._build_enhanced_track(track_data)
return track_data
except Exception as e:
_detect_and_set_rate_limit(e, 'get_track_details')
logger.error(f"Error fetching track details via Spotify: {e}")
# Fall through to iTunes fallback
# Fallback - only if ID is numeric (non-Spotify format)
if self._is_itunes_id(track_id):
logger.debug(f"Using {fallback_src} fallback for track details: {track_id}")
result = self._fallback.get_track_details(track_id)
return result
else:
logger.debug(f"Cannot use fallback for Spotify track ID: {track_id}")
return None
@staticmethod
def _build_enhanced_track(track_data: dict) -> dict:
"""Build enhanced track dict from raw Spotify track data."""
return {
'id': track_data['id'],
'name': track_data['name'],
'track_number': track_data['track_number'],
'disc_number': track_data['disc_number'],
'duration_ms': track_data['duration_ms'],
'explicit': track_data['explicit'],
'artists': [artist['name'] for artist in track_data['artists']],
'primary_artist': track_data['artists'][0]['name'] if track_data['artists'] else None,
'album': {
'id': track_data['album']['id'],
'name': track_data['album']['name'],
'total_tracks': track_data['album']['total_tracks'],
'release_date': track_data['album']['release_date'],
'album_type': track_data['album']['album_type'],
'artists': [artist['name'] for artist in track_data['album']['artists']]
},
'is_album_track': track_data['album']['total_tracks'] > 1,
'raw_data': track_data
}
@rate_limited
def get_track_features(self, track_id: str) -> Optional[Dict[str, Any]]:
# Check cache — use entity_id with '_features' suffix
cache = get_metadata_cache()
cache_key = f"{track_id}_features"
cached = cache.get_entity('spotify', 'track', cache_key)
if cached:
return cached
if not self.is_spotify_authenticated():
return None
try:
features = self.sp.audio_features(track_id)
result = features[0] if features else None
if result:
cache.store_entity('spotify', 'track', cache_key, result)
return result
except Exception as e:
logger.error(f"Error fetching track features: {e}")
return None
@rate_limited
def get_album(self, album_id: str) -> Optional[Dict[str, Any]]:
"""Get album information - falls back to configured metadata source"""
# Check cache first
cache = get_metadata_cache()
fallback_src = self._fallback_source
source = fallback_src if self._is_itunes_id(album_id) else 'spotify'
cached = cache.get_entity(source, 'album', album_id)
if cached:
if source == 'spotify':
# Validate cache has full album data (not simplified from artist_albums)
if 'tracks' in cached:
return cached
# Simplified album cached by get_artist_albums — treat as cache miss
logger.debug(f"Cache hit for album {album_id} lacks tracks, fetching full data")
else:
# Fallback cache hit — delegate to fallback client
return self._fallback.get_album(album_id)
if self.is_spotify_authenticated():
try:
album_data = self.sp.album(album_id)
if album_data:
cache.store_entity('spotify', 'album', album_id, album_data)
return album_data
except Exception as e:
_detect_and_set_rate_limit(e, 'get_album')
logger.error(f"Error fetching album via Spotify: {e}")
# Fall through to fallback
# Fallback - only if ID is numeric (non-Spotify format)
if self._is_itunes_id(album_id):
logger.debug(f"Using {fallback_src} fallback for album: {album_id}")
return self._fallback.get_album(album_id)
else:
logger.debug(f"Cannot use fallback for Spotify album ID: {album_id}")
return None
@rate_limited
def get_album_tracks(self, album_id: str) -> Optional[Dict[str, Any]]:
"""Get album tracks - falls back to configured metadata source"""
# Cache key uses album_id with '_tracks' suffix to differentiate from album metadata
cache = get_metadata_cache()
fallback_src = self._fallback_source
source = fallback_src if self._is_itunes_id(album_id) else 'spotify'
cache_key = f"{album_id}_tracks"
cached = cache.get_entity(source, 'album', cache_key)
if cached:
return cached
if self.is_spotify_authenticated():
try:
# Get first page of tracks
first_page = self.sp.album_tracks(album_id)
if not first_page or 'items' not in first_page:
return None
# Collect all tracks starting with first page
all_tracks = first_page['items'][:]
# Fetch remaining pages if they exist
next_page = first_page
while next_page.get('next'):
next_page = self.sp.next(next_page)
if next_page and 'items' in next_page:
all_tracks.extend(next_page['items'])
# Log success
logger.info(f"Retrieved {len(all_tracks)} tracks for album {album_id}")
# Return structure with all tracks
result = first_page.copy()
result['items'] = all_tracks
result['next'] = None # No more pages
result['limit'] = len(all_tracks) # Update to reflect all tracks fetched
# Cache the aggregated result
cache.store_entity('spotify', 'album', cache_key, result)
# Also cache individual tracks opportunistically (skip if full data already cached)
track_entries = []
for track in all_tracks:
tid = track.get('id')
if tid:
track_entries.append((tid, track))
if track_entries:
cache.store_entities_bulk('spotify', 'track', track_entries, skip_if_exists=True)
return result
except Exception as e:
_detect_and_set_rate_limit(e, 'get_album_tracks')
logger.error(f"Error fetching album tracks via Spotify: {e}")
# Fall through to iTunes fallback
# Fallback - only if ID is numeric (non-Spotify format)
if self._is_itunes_id(album_id):
logger.debug(f"Using {fallback_src} fallback for album tracks: {album_id}")
result = self._fallback.get_album_tracks(album_id)
return result
else:
logger.debug(f"Cannot use fallback for Spotify album ID: {album_id}")
return None
@rate_limited
def get_artist_albums(self, artist_id: str, album_type: str = 'album,single', limit: int = 10) -> List[Album]:
"""Get albums by artist ID - falls back to iTunes if Spotify not authenticated"""
if self.is_spotify_authenticated():
try:
albums = []
raw_items = []
results = self.sp.artist_albums(artist_id, album_type=album_type, limit=min(limit, 10))
while results:
for album_data in results['items']:
album = Album.from_spotify_album(album_data)
albums.append(album)
raw_items.append(album_data)
# Get next batch if available
results = self.sp.next(results) if results['next'] else None
logger.info(f"Retrieved {len(albums)} albums for artist {artist_id}")
# Cache individual albums opportunistically (skip if full data already cached)
cache = get_metadata_cache()
entries = [(ad.get('id'), ad) for ad in raw_items if ad.get('id')]
if entries:
cache.store_entities_bulk('spotify', 'album', entries, skip_if_exists=True)
return albums
except Exception as e:
_detect_and_set_rate_limit(e, 'get_artist_albums')
logger.error(f"Error fetching artist albums via Spotify: {e}")
# Fall through to iTunes fallback
# Fallback - only if ID is numeric (non-Spotify format)
if self._is_itunes_id(artist_id):
logger.debug(f"Using {self._fallback_source} fallback for artist albums: {artist_id}")
return self._fallback.get_artist_albums(artist_id, album_type, limit)
else:
logger.debug(f"Cannot use fallback for Spotify artist ID: {artist_id}")
return []
@rate_limited
def get_user_info(self) -> Optional[Dict[str, Any]]:
if not self.is_spotify_authenticated():
return None
try:
return self.sp.current_user()
except Exception as e:
logger.error(f"Error fetching user info: {e}")
return None
@rate_limited
def get_artist(self, artist_id: str) -> Optional[Dict[str, Any]]:
"""
Get full artist details - falls back to configured metadata source.
Args:
artist_id: Artist ID (Spotify or fallback source depending on authentication)
Returns:
Dictionary with artist data including images, genres, popularity
"""
# Check cache first (works even during rate limit bans)
cache = get_metadata_cache()
fallback_src = self._fallback_source
source = fallback_src if self._is_itunes_id(artist_id) else 'spotify'
cached = cache.get_entity(source, 'artist', artist_id)
if cached:
if source == 'spotify':
return cached # Spotify raw format is the expected format
# Fallback cache hit — delegate to fallback client which reconstructs Spotify-compatible format
return self._fallback.get_artist(artist_id)
if self.is_spotify_authenticated():
try:
result = self.sp.artist(artist_id)
if result:
cache.store_entity('spotify', 'artist', artist_id, result)
return result
except Exception as e:
_detect_and_set_rate_limit(e, 'get_artist')
logger.error(f"Error fetching artist via Spotify: {e}")
# Fall through to iTunes fallback
# Fallback - only if ID is numeric (non-Spotify format)
if self._is_itunes_id(artist_id):
logger.debug(f"Using {fallback_src} fallback for artist: {artist_id}")
return self._fallback.get_artist(artist_id)
else:
logger.debug(f"Cannot use fallback for Spotify artist ID: {artist_id}")
return None
@rate_limited
def get_artists_batch(self, artist_ids: List[str]) -> Dict[str, Dict]:
"""Get multiple artists, using cache where possible, batch API for misses.
Returns dict keyed by artist_id → artist data dict."""
if not artist_ids:
return {}
cache = get_metadata_cache()
found, missing = cache.get_entities_batch('spotify', 'artist', artist_ids)
if missing and self.is_spotify_authenticated():
try:
# Spotify batch endpoint accepts up to 50 IDs
for i in range(0, len(missing), 50):
chunk = missing[i:i + 50]
batch_result = self.sp.artists(chunk)
for artist_data in (batch_result or {}).get('artists', []):
if artist_data and artist_data.get('id'):
aid = artist_data['id']
cache.store_entity('spotify', 'artist', aid, artist_data)
found[aid] = artist_data
except Exception as e:
logger.error(f"Error in batch artist fetch: {e}")
return found