From 40fa139804669355f542dbbb553bff60508e69c8 Mon Sep 17 00:00:00 2001 From: Antti Kettunen Date: Thu, 16 Apr 2026 08:27:41 +0300 Subject: [PATCH] Remove dead watchlist scan paths Drop the legacy watchlist scan entrypoints that are no longer used by the web scan flow, and keep the live refresh path pointed at the shared scanner helper. --- core/watchlist_scanner.py | 333 -------------------------------------- web_server.py | 2 +- 2 files changed, 1 insertion(+), 334 deletions(-) diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index b20f36f2..069584b3 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -398,11 +398,6 @@ class WatchlistScanner: self._metadata_service = MetadataService() return self._metadata_service - def _reset_spotify_run_state(self): - """Clear per-run Spotify suppression state.""" - self._spotify_disabled_for_run = False - self._spotify_disabled_reason = None - def _disable_spotify_for_run(self, reason: str): """Disable Spotify for rest of current run, once.""" if not self._spotify_disabled_for_run: @@ -574,334 +569,6 @@ class WatchlistScanner: return albums - def scan_all_watchlist_artists(self) -> List[ScanResult]: - """ - Scan artists in the watchlist for new releases. - - OPTIMIZED: Scans up to 50 artists per run using smart selection: - - Priority: Artists not scanned in 7+ days (guaranteed) - - Remainder: Random selection from other artists - - This reduces API calls while ensuring all artists scanned at least weekly. - Only checks releases after their last scan timestamp. - """ - logger.info("Starting watchlist scan") - - try: - self._reset_spotify_run_state() - from datetime import datetime, timedelta - import random - - # Get all watchlist artists - all_watchlist_artists = self.database.get_watchlist_artists() - if not all_watchlist_artists: - logger.info("No artists in watchlist to scan") - return [] - - logger.info(f"Found {len(all_watchlist_artists)} total artists in watchlist") - - # OPTIMIZATION: Select up to 50 artists to scan - # 1. Must scan: Artists not scanned in 7+ days (or never scanned) - seven_days_ago = datetime.now() - timedelta(days=7) - must_scan = [] - can_skip = [] - - for artist in all_watchlist_artists: - if artist.last_scan_timestamp is None: - # Never scanned - must scan - must_scan.append(artist) - elif artist.last_scan_timestamp < seven_days_ago: - # Not scanned in 7+ days - must scan - must_scan.append(artist) - else: - # Scanned recently - can skip (but might randomly select) - can_skip.append(artist) - - logger.info(f"Artists requiring scan (not scanned in 7+ days): {len(must_scan)}") - logger.info(f"Artists scanned recently (< 7 days): {len(can_skip)}") - - # 2. Fill remaining slots (up to 50 total) with random selection - max_artists_per_scan = 50 - artists_to_scan = must_scan.copy() - - remaining_slots = max_artists_per_scan - len(must_scan) - if remaining_slots > 0 and can_skip: - # Randomly sample from recently-scanned artists - random_sample_size = min(remaining_slots, len(can_skip)) - random_selection = random.sample(can_skip, random_sample_size) - artists_to_scan.extend(random_selection) - logger.info(f"Additionally scanning {len(random_selection)} randomly selected artists") - - # Shuffle to avoid always scanning same order - random.shuffle(artists_to_scan) - - logger.info(f"Total artists to scan this run: {len(artists_to_scan)}") - if len(all_watchlist_artists) > max_artists_per_scan: - logger.info(f"Skipping {len(all_watchlist_artists) - len(artists_to_scan)} artists (will be scanned in future runs)") - - watchlist_artists = artists_to_scan - - # PROACTIVE ID BACKFILLING (cross-provider support) - # Before scanning, ensure ALL artists have IDs for ALL available sources - # iTunes and Deezer are always available; Spotify requires authentication - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - providers_to_backfill = ['itunes', 'deezer'] - if self._spotify_is_primary_source(): - providers_to_backfill.append('spotify') - try: - from config.settings import config_manager as _cfg - if _cfg.get('discogs.token', ''): - providers_to_backfill.append('discogs') - except Exception: - pass - - for provider in providers_to_backfill: - try: - self._backfill_missing_ids(all_watchlist_artists, provider) - except Exception as backfill_error: - logger.warning(f"Error during {provider} ID backfilling: {backfill_error}") - # Continue with scan even if backfilling fails - - scan_results = [] - for i, artist in enumerate(watchlist_artists): - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - - try: - result = self.scan_artist(artist) - scan_results.append(result) - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - - if result.success: - logger.info(f"Scanned {artist.artist_name}: {result.new_tracks_found} new tracks found") - else: - logger.warning(f"Failed to scan {artist.artist_name}: {result.error_message}") - - # Rate limiting: Add delay between artists to avoid hitting Spotify API limits - # This is critical to prevent getting banned for 6+ hours - if i < len(watchlist_artists) - 1: # Don't delay after the last artist - logger.debug(f"Rate limiting: waiting {DELAY_BETWEEN_ARTISTS}s before scanning next artist") - time.sleep(DELAY_BETWEEN_ARTISTS) - - except Exception as e: - logger.error(f"Error scanning artist {artist.artist_name}: {e}") - scan_results.append(ScanResult( - artist_name=artist.artist_name, - spotify_artist_id=artist.spotify_artist_id, - albums_checked=0, - new_tracks_found=0, - tracks_added_to_wishlist=0, - success=False, - error_message=str(e) - )) - - # Log summary - successful_scans = [r for r in scan_results if r.success] - total_new_tracks = sum(r.new_tracks_found for r in successful_scans) - total_added_to_wishlist = sum(r.tracks_added_to_wishlist for r in successful_scans) - - logger.info(f"Watchlist scan complete: {len(successful_scans)}/{len(scan_results)} artists scanned successfully") - logger.info(f"Found {total_new_tracks} new tracks, added {total_added_to_wishlist} to wishlist") - - # Populate discovery pool with tracks from similar artists - logger.info("Starting discovery pool population...") - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - self.populate_discovery_pool() - - # Populate seasonal content (runs independently with its own threshold) - logger.info("Updating seasonal content...") - self._populate_seasonal_content() - - # Generate Last.fm Radio playlists for top tracks (max once per week) - self._generate_lastfm_radio_playlists() - - # Sync Spotify library cache (runs after main scan) - try: - if self.spotify_client and self.spotify_client.is_rate_limited(): - self._disable_spotify_for_run("global Spotify rate limit active") - self.sync_spotify_library_cache() - except Exception as lib_err: - logger.warning(f"Error syncing Spotify library cache: {lib_err}") - - return scan_results - - except Exception as e: - logger.error(f"Error during watchlist scan: {e}") - return [] - finally: - self._reset_spotify_run_state() - - def scan_artist(self, watchlist_artist: WatchlistArtist) -> ScanResult: - """ - Scan a single artist for new releases. - Only checks releases after the last scan timestamp. - Uses the active provider (Spotify if authenticated, otherwise iTunes). - """ - try: - logger.info(f"Scanning artist: {watchlist_artist.artist_name}") - - # Get the active client and artist ID based on provider - client, artist_id, provider = self._get_active_client_and_artist_id(watchlist_artist) - - if client is None or artist_id is None: - return ScanResult( - artist_name=watchlist_artist.artist_name, - spotify_artist_id=watchlist_artist.spotify_artist_id or '', - albums_checked=0, - new_tracks_found=0, - tracks_added_to_wishlist=0, - success=False, - error_message=f"No {self.metadata_service.get_active_provider()} ID available for this artist" - ) - - logger.info(f"Using {provider} provider for {watchlist_artist.artist_name} (ID: {artist_id})") - - # Update artist image if missing or on every scan to keep fresh - try: - image_url = None - artist_data = client.get_artist(artist_id) - if artist_data: - if 'images' in artist_data and artist_data['images']: - # Spotify/Deezer format: array of {url, height, width} - image_url = artist_data['images'][1]['url'] if len(artist_data['images']) > 1 else artist_data['images'][0]['url'] - elif artist_data.get('image_url'): - # Direct image_url format (iTunes/some providers) - image_url = artist_data['image_url'] - - if image_url: - db_artist_id = watchlist_artist.spotify_artist_id or watchlist_artist.itunes_artist_id or watchlist_artist.deezer_artist_id or artist_id - self.database.update_watchlist_artist_image(db_artist_id, image_url) - if not watchlist_artist.image_url: - logger.info(f"Backfilled artist image for {watchlist_artist.artist_name}") - else: - logger.debug(f"No image available for {watchlist_artist.artist_name} from {provider}") - except Exception as img_error: - logger.warning(f"Could not update artist image for {watchlist_artist.artist_name}: {img_error}") - - # Get artist discography using active provider - albums = self._get_artist_discography_with_client(client, artist_id, watchlist_artist.last_scan_timestamp, lookback_days=watchlist_artist.lookback_days) - - if albums is None: - return ScanResult( - artist_name=watchlist_artist.artist_name, - spotify_artist_id=watchlist_artist.spotify_artist_id or '', - albums_checked=0, - new_tracks_found=0, - tracks_added_to_wishlist=0, - success=False, - error_message=f"Failed to get artist discography from {provider}" - ) - - logger.info(f"Found {len(albums)} albums/singles to check for {watchlist_artist.artist_name}") - - # Safety check: Limit number of albums to scan to prevent extremely long sessions - MAX_ALBUMS_PER_ARTIST = 50 # Reasonable limit to prevent API abuse - if len(albums) > MAX_ALBUMS_PER_ARTIST: - logger.warning(f"Artist {watchlist_artist.artist_name} has {len(albums)} albums, limiting to {MAX_ALBUMS_PER_ARTIST} most recent") - albums = albums[:MAX_ALBUMS_PER_ARTIST] # Most recent albums are first - - # Check each album/single for missing tracks - new_tracks_found = 0 - tracks_added_to_wishlist = 0 - - for album_index, album in enumerate(albums): - try: - # Get full album data - logger.info(f"Checking album {album_index + 1}/{len(albums)}: {album.name}") - album_data = client.get_album(album.id) - if not album_data: - continue - - # Get album tracks (works for both Spotify and iTunes) - # Spotify's get_album() includes tracks, but we use get_album_tracks() for consistency - tracks_data = client.get_album_tracks(album.id) - if not tracks_data or not tracks_data.get('items'): - continue - - tracks = tracks_data['items'] - logger.debug(f"Checking album: {album_data.get('name', 'Unknown')} ({len(tracks)} tracks)") - - # Check if user wants this type of release - if not self._should_include_release(len(tracks), watchlist_artist): - release_type = "album" if len(tracks) >= 7 else ("EP" if len(tracks) >= 4 else "single") - logger.debug(f"Skipping {release_type}: {album_data.get('name', 'Unknown')} - user preference") - continue - - # Skip albums with placeholder track names (unreleased tracklist) - # Spotify uses "Track 1", "Track 2", etc. for unannounced tracks - if self._has_placeholder_tracks(tracks): - logger.info(f"Skipping album with placeholder tracks (unreleased tracklist): {album_data.get('name', 'Unknown')}") - continue - - # Check each track - for track in tracks: - # Check content type filters (live, remix, acoustic, compilation) - if not self._should_include_track(track, album_data, watchlist_artist): - continue # Skip this track based on content type preferences - - if self.is_track_missing_from_library(track, album_name=album_data.get('name')): - new_tracks_found += 1 - - # Add to wishlist - if self.add_track_to_wishlist(track, album_data, watchlist_artist): - tracks_added_to_wishlist += 1 - - # Rate limiting: Add delay between albums to prevent API abuse - # This is especially important for artists with many albums - if album_index < len(albums) - 1: # Don't delay after the last album - logger.debug(f"Rate limiting: waiting {DELAY_BETWEEN_ALBUMS}s before next album") - time.sleep(DELAY_BETWEEN_ALBUMS) - - except Exception as e: - logger.warning(f"Error checking album {album.name}: {e}") - continue - - # Update last scan timestamp for this artist - self.update_artist_scan_timestamp(watchlist_artist) - - # Fetch and store similar artists for discovery feature (with caching to avoid over-polling) - # Similar artists are fetched from MusicMap (works with any source) and matched to both Spotify and iTunes - source_artist_id = watchlist_artist.spotify_artist_id or watchlist_artist.itunes_artist_id or str(watchlist_artist.id) - try: - # Check if we have fresh similar artists cached (< 30 days old) - # If Spotify is authenticated, also require Spotify IDs to be present - spotify_authenticated = self.spotify_client and self.spotify_client.is_spotify_authenticated() - artist_profile_id = getattr(watchlist_artist, 'profile_id', 1) - if self.database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=artist_profile_id): - logger.info(f"Similar artists for {watchlist_artist.artist_name} are cached and fresh, skipping MusicMap fetch") - # Even if cached, backfill missing iTunes IDs (seamless dual-source support) - self._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=artist_profile_id) - else: - logger.info(f"Fetching similar artists for {watchlist_artist.artist_name}...") - self.update_similar_artists(watchlist_artist, profile_id=artist_profile_id) - logger.info(f"Similar artists updated for {watchlist_artist.artist_name}") - except Exception as similar_error: - logger.warning(f"Failed to update similar artists for {watchlist_artist.artist_name}: {similar_error}") - - return ScanResult( - artist_name=watchlist_artist.artist_name, - spotify_artist_id=watchlist_artist.spotify_artist_id or '', - albums_checked=len(albums), - new_tracks_found=new_tracks_found, - tracks_added_to_wishlist=tracks_added_to_wishlist, - success=True - ) - - except Exception as e: - logger.error(f"Error scanning artist {watchlist_artist.artist_name}: {e}") - return ScanResult( - artist_name=watchlist_artist.artist_name, - spotify_artist_id=watchlist_artist.spotify_artist_id or '', - albums_checked=0, - new_tracks_found=0, - tracks_added_to_wishlist=0, - success=False, - error_message=str(e) - ) - def _apply_global_watchlist_overrides(self, watchlist_artists: List[WatchlistArtist]): """Apply global watchlist release-type overrides to a batch of artists.""" try: diff --git a/web_server.py b/web_server.py index f74db392..58c08eb1 100644 --- a/web_server.py +++ b/web_server.py @@ -41463,7 +41463,7 @@ def refresh_spotify_library(): def _run_sync(): try: from core.watchlist_scanner import get_watchlist_scanner - scanner = get_watchlist_scanner() + scanner = get_watchlist_scanner(spotify_client) if scanner: # Force full sync by clearing last_sync timestamp database = get_database()