diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index b20f36f2..069584b3 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -398,11 +398,6 @@ class WatchlistScanner: self._metadata_service = MetadataService() return self._metadata_service - def _reset_spotify_run_state(self): - """Clear per-run Spotify suppression state.""" - self._spotify_disabled_for_run = False - self._spotify_disabled_reason = None - def _disable_spotify_for_run(self, reason: str): """Disable Spotify for rest of current run, once.""" if not self._spotify_disabled_for_run: @@ -574,334 +569,6 @@ class WatchlistScanner: return albums - def scan_all_watchlist_artists(self) -> List[ScanResult]: - """ - Scan artists in the watchlist for new releases. - - OPTIMIZED: Scans up to 50 artists per run using smart selection: - - Priority: Artists not scanned in 7+ days (guaranteed) - - Remainder: Random selection from other artists - - This reduces API calls while ensuring all artists scanned at least weekly. - Only checks releases after their last scan timestamp. - """ - logger.info("Starting watchlist scan") - - try: - self._reset_spotify_run_state() - from datetime import datetime, timedelta - import random - - # Get all watchlist artists - all_watchlist_artists = self.database.get_watchlist_artists() - if not all_watchlist_artists: - logger.info("No artists in watchlist to scan") - return [] - - logger.info(f"Found {len(all_watchlist_artists)} total artists in watchlist") - - # OPTIMIZATION: Select up to 50 artists to scan - # 1. Must scan: Artists not scanned in 7+ days (or never scanned) - seven_days_ago = datetime.now() - timedelta(days=7) - must_scan = [] - can_skip = [] - - for artist in all_watchlist_artists: - if artist.last_scan_timestamp is None: - # Never scanned - must scan - must_scan.append(artist) - elif artist.last_scan_timestamp < seven_days_ago: - # Not scanned in 7+ days - must scan - must_scan.append(artist) - else: - # Scanned recently - can skip (but might randomly select) - can_skip.append(artist) - - logger.info(f"Artists requiring scan (not scanned in 7+ days): {len(must_scan)}") - logger.info(f"Artists scanned recently (< 7 days): {len(can_skip)}") - - # 2. Fill remaining slots (up to 50 total) with random selection - max_artists_per_scan = 50 - artists_to_scan = must_scan.copy() - - remaining_slots = max_artists_per_scan - len(must_scan) - if remaining_slots > 0 and can_skip: - # Randomly sample from recently-scanned artists - random_sample_size = min(remaining_slots, len(can_skip)) - random_selection = random.sample(can_skip, random_sample_size) - artists_to_scan.extend(random_selection) - logger.info(f"Additionally scanning {len(random_selection)} randomly selected artists") - - # Shuffle to avoid always scanning same order - random.shuffle(artists_to_scan) - - logger.info(f"Total artists to scan this run: {len(artists_to_scan)}") - if len(all_watchlist_artists) > max_artists_per_scan: - logger.info(f"Skipping {len(all_watchlist_artists) - len(artists_to_scan)} artists (will be scanned in future runs)") - - watchlist_artists = artists_to_scan - - # PROACTIVE ID BACKFILLING (cross-provider support) - # Before scanning, ensure ALL artists have IDs for ALL available sources - # iTunes and Deezer are always available; Spotify requires authentication - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - providers_to_backfill = ['itunes', 'deezer'] - if self._spotify_is_primary_source(): - providers_to_backfill.append('spotify') - try: - from config.settings import config_manager as _cfg - if _cfg.get('discogs.token', ''): - providers_to_backfill.append('discogs') - except Exception: - pass - - for provider in providers_to_backfill: - try: - self._backfill_missing_ids(all_watchlist_artists, provider) - except Exception as backfill_error: - logger.warning(f"Error during {provider} ID backfilling: {backfill_error}") - # Continue with scan even if backfilling fails - - scan_results = [] - for i, artist in enumerate(watchlist_artists): - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - - try: - result = self.scan_artist(artist) - scan_results.append(result) - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - - if result.success: - logger.info(f"Scanned {artist.artist_name}: {result.new_tracks_found} new tracks found") - else: - logger.warning(f"Failed to scan {artist.artist_name}: {result.error_message}") - - # Rate limiting: Add delay between artists to avoid hitting Spotify API limits - # This is critical to prevent getting banned for 6+ hours - if i < len(watchlist_artists) - 1: # Don't delay after the last artist - logger.debug(f"Rate limiting: waiting {DELAY_BETWEEN_ARTISTS}s before scanning next artist") - time.sleep(DELAY_BETWEEN_ARTISTS) - - except Exception as e: - logger.error(f"Error scanning artist {artist.artist_name}: {e}") - scan_results.append(ScanResult( - artist_name=artist.artist_name, - spotify_artist_id=artist.spotify_artist_id, - albums_checked=0, - new_tracks_found=0, - tracks_added_to_wishlist=0, - success=False, - error_message=str(e) - )) - - # Log summary - successful_scans = [r for r in scan_results if r.success] - total_new_tracks = sum(r.new_tracks_found for r in successful_scans) - total_added_to_wishlist = sum(r.tracks_added_to_wishlist for r in successful_scans) - - logger.info(f"Watchlist scan complete: {len(successful_scans)}/{len(scan_results)} artists scanned successfully") - logger.info(f"Found {total_new_tracks} new tracks, added {total_added_to_wishlist} to wishlist") - - # Populate discovery pool with tracks from similar artists - logger.info("Starting discovery pool population...") - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - self.populate_discovery_pool() - - # Populate seasonal content (runs independently with its own threshold) - logger.info("Updating seasonal content...") - self._populate_seasonal_content() - - # Generate Last.fm Radio playlists for top tracks (max once per week) - self._generate_lastfm_radio_playlists() - - # Sync Spotify library cache (runs after main scan) - try: - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - self.sync_spotify_library_cache() - except Exception as lib_err: - logger.warning(f"Error syncing Spotify library cache: {lib_err}") - - return scan_results - - except Exception as e: - logger.error(f"Error during watchlist scan: {e}") - return [] - finally: - self._reset_spotify_run_state() - - def scan_artist(self, watchlist_artist: WatchlistArtist) -> ScanResult: - """ - Scan a single artist for new releases. - Only checks releases after the last scan timestamp. - Uses the active provider (Spotify if authenticated, otherwise iTunes). - """ - try: - logger.info(f"Scanning artist: {watchlist_artist.artist_name}") - - # Get the active client and artist ID based on provider - client, artist_id, provider = self._get_active_client_and_artist_id(watchlist_artist) - - if client is None or artist_id is None: - return ScanResult( - artist_name=watchlist_artist.artist_name, - spotify_artist_id=watchlist_artist.spotify_artist_id or '', - albums_checked=0, - new_tracks_found=0, - tracks_added_to_wishlist=0, - success=False, - error_message=f"No {self.metadata_service.get_active_provider()} ID available for this artist" - ) - - logger.info(f"Using {provider} provider for {watchlist_artist.artist_name} (ID: {artist_id})") - - # Update artist image if missing or on every scan to keep fresh - try: - image_url = None - artist_data = client.get_artist(artist_id) - if artist_data: - if 'images' in artist_data and artist_data['images']: - # Spotify/Deezer format: array of {url, height, width} - image_url = artist_data['images'][1]['url'] if len(artist_data['images']) > 1 else artist_data['images'][0]['url'] - elif artist_data.get('image_url'): - # Direct image_url format (iTunes/some providers) - image_url = artist_data['image_url'] - - if image_url: - db_artist_id = watchlist_artist.spotify_artist_id or watchlist_artist.itunes_artist_id or watchlist_artist.deezer_artist_id or artist_id - self.database.update_watchlist_artist_image(db_artist_id, image_url) - if not watchlist_artist.image_url: - logger.info(f"Backfilled artist image for {watchlist_artist.artist_name}") - else: - logger.debug(f"No image available for {watchlist_artist.artist_name} from {provider}") - except Exception as img_error: - logger.warning(f"Could not update artist image for {watchlist_artist.artist_name}: {img_error}") - - # Get artist discography using active provider - albums = self._get_artist_discography_with_client(client, artist_id, watchlist_artist.last_scan_timestamp, lookback_days=watchlist_artist.lookback_days) - - if albums is None: - return ScanResult( - artist_name=watchlist_artist.artist_name, - spotify_artist_id=watchlist_artist.spotify_artist_id or '', - albums_checked=0, - new_tracks_found=0, - tracks_added_to_wishlist=0, - success=False, - error_message=f"Failed to get artist discography from {provider}" - ) - - logger.info(f"Found {len(albums)} albums/singles to check for {watchlist_artist.artist_name}") - - # Safety check: Limit number of albums to scan to prevent extremely long sessions - MAX_ALBUMS_PER_ARTIST = 50 # Reasonable limit to prevent API abuse - if len(albums) > MAX_ALBUMS_PER_ARTIST: - logger.warning(f"Artist {watchlist_artist.artist_name} has {len(albums)} albums, limiting to {MAX_ALBUMS_PER_ARTIST} most recent") - albums = albums[:MAX_ALBUMS_PER_ARTIST] # Most recent albums are first - - # Check each album/single for missing tracks - new_tracks_found = 0 - tracks_added_to_wishlist = 0 - - for album_index, album in enumerate(albums): - try: - # Get full album data - logger.info(f"Checking album {album_index + 1}/{len(albums)}: {album.name}") - album_data = client.get_album(album.id) - if not album_data: - continue - - # Get album tracks (works for both Spotify and iTunes) - # Spotify's get_album() includes tracks, but we use get_album_tracks() for consistency - tracks_data = client.get_album_tracks(album.id) - if not tracks_data or not tracks_data.get('items'): - continue - - tracks = tracks_data['items'] - logger.debug(f"Checking album: {album_data.get('name', 'Unknown')} ({len(tracks)} tracks)") - - # Check if user wants this type of release - if not self._should_include_release(len(tracks), watchlist_artist): - release_type = "album" if len(tracks) >= 7 else ("EP" if len(tracks) >= 4 else "single") - logger.debug(f"Skipping {release_type}: {album_data.get('name', 'Unknown')} - user preference") - continue - - # Skip albums with placeholder track names (unreleased tracklist) - # Spotify uses "Track 1", "Track 2", etc. for unannounced tracks - if self._has_placeholder_tracks(tracks): - logger.info(f"Skipping album with placeholder tracks (unreleased tracklist): {album_data.get('name', 'Unknown')}") - continue - - # Check each track - for track in tracks: - # Check content type filters (live, remix, acoustic, compilation) - if not self._should_include_track(track, album_data, watchlist_artist): - continue # Skip this track based on content type preferences - - if self.is_track_missing_from_library(track, album_name=album_data.get('name')): - new_tracks_found += 1 - - # Add to wishlist - if self.add_track_to_wishlist(track, album_data, watchlist_artist): - tracks_added_to_wishlist += 1 - - # Rate limiting: Add delay between albums to prevent API abuse - # This is especially important for artists with many albums - if album_index < len(albums) - 1: # Don't delay after the last album - logger.debug(f"Rate limiting: waiting {DELAY_BETWEEN_ALBUMS}s before next album") - time.sleep(DELAY_BETWEEN_ALBUMS) - - except Exception as e: - logger.warning(f"Error checking album {album.name}: {e}") - continue - - # Update last scan timestamp for this artist - self.update_artist_scan_timestamp(watchlist_artist) - - # Fetch and store similar artists for discovery feature (with caching to avoid over-polling) - # Similar artists are fetched from MusicMap (works with any source) and matched to both Spotify and iTunes - source_artist_id = watchlist_artist.spotify_artist_id or watchlist_artist.itunes_artist_id or str(watchlist_artist.id) - try: - # Check if we have fresh similar artists cached (< 30 days old) - # If Spotify is authenticated, also require Spotify IDs to be present - spotify_authenticated = self.spotify_client and self.spotify_client.is_spotify_authenticated() - artist_profile_id = getattr(watchlist_artist, 'profile_id', 1) - if self.database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=artist_profile_id): - logger.info(f"Similar artists for {watchlist_artist.artist_name} are cached and fresh, skipping MusicMap fetch") - # Even if cached, backfill missing iTunes IDs (seamless dual-source support) - self._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=artist_profile_id) - else: - logger.info(f"Fetching similar artists for {watchlist_artist.artist_name}...") - self.update_similar_artists(watchlist_artist, profile_id=artist_profile_id) - logger.info(f"Similar artists updated for {watchlist_artist.artist_name}") - except Exception as similar_error: - logger.warning(f"Failed to update similar artists for {watchlist_artist.artist_name}: {similar_error}") - - return ScanResult( - artist_name=watchlist_artist.artist_name, - spotify_artist_id=watchlist_artist.spotify_artist_id or '', - albums_checked=len(albums), - new_tracks_found=new_tracks_found, - tracks_added_to_wishlist=tracks_added_to_wishlist, - success=True - ) - - except Exception as e: - logger.error(f"Error scanning artist {watchlist_artist.artist_name}: {e}") - return ScanResult( - artist_name=watchlist_artist.artist_name, - spotify_artist_id=watchlist_artist.spotify_artist_id or '', - albums_checked=0, - new_tracks_found=0, - tracks_added_to_wishlist=0, - success=False, - error_message=str(e) - ) - def _apply_global_watchlist_overrides(self, watchlist_artists: List[WatchlistArtist]): """Apply global watchlist release-type overrides to a batch of artists.""" try: diff --git a/web_server.py b/web_server.py index f74db392..58c08eb1 100644 --- a/web_server.py +++ b/web_server.py @@ -41463,7 +41463,7 @@ def refresh_spotify_library(): def _run_sync(): try: from core.watchlist_scanner import get_watchlist_scanner - scanner = get_watchlist_scanner() + scanner = get_watchlist_scanner(spotify_client) if scanner: # Force full sync by clearing last_sync timestamp database = get_database()