Remove dead watchlist scan paths

Drop the legacy watchlist scan entrypoints that are no longer used by the web scan flow, and keep the live refresh path pointed at the shared scanner helper.
pull/303/head
Antti Kettunen 1 month ago
parent 657d86cace
commit 40fa139804

@ -398,11 +398,6 @@ class WatchlistScanner:
self._metadata_service = MetadataService()
return self._metadata_service
def _reset_spotify_run_state(self):
"""Clear per-run Spotify suppression state."""
self._spotify_disabled_for_run = False
self._spotify_disabled_reason = None
def _disable_spotify_for_run(self, reason: str):
"""Disable Spotify for rest of current run, once."""
if not self._spotify_disabled_for_run:
@ -574,334 +569,6 @@ class WatchlistScanner:
return albums
def scan_all_watchlist_artists(self) -> List[ScanResult]:
"""
Scan artists in the watchlist for new releases.
OPTIMIZED: Scans up to 50 artists per run using smart selection:
- Priority: Artists not scanned in 7+ days (guaranteed)
- Remainder: Random selection from other artists
This reduces API calls while ensuring all artists scanned at least weekly.
Only checks releases after their last scan timestamp.
"""
logger.info("Starting watchlist scan")
try:
self._reset_spotify_run_state()
from datetime import datetime, timedelta
import random
# Get all watchlist artists
all_watchlist_artists = self.database.get_watchlist_artists()
if not all_watchlist_artists:
logger.info("No artists in watchlist to scan")
return []
logger.info(f"Found {len(all_watchlist_artists)} total artists in watchlist")
# OPTIMIZATION: Select up to 50 artists to scan
# 1. Must scan: Artists not scanned in 7+ days (or never scanned)
seven_days_ago = datetime.now() - timedelta(days=7)
must_scan = []
can_skip = []
for artist in all_watchlist_artists:
if artist.last_scan_timestamp is None:
# Never scanned - must scan
must_scan.append(artist)
elif artist.last_scan_timestamp < seven_days_ago:
# Not scanned in 7+ days - must scan
must_scan.append(artist)
else:
# Scanned recently - can skip (but might randomly select)
can_skip.append(artist)
logger.info(f"Artists requiring scan (not scanned in 7+ days): {len(must_scan)}")
logger.info(f"Artists scanned recently (< 7 days): {len(can_skip)}")
# 2. Fill remaining slots (up to 50 total) with random selection
max_artists_per_scan = 50
artists_to_scan = must_scan.copy()
remaining_slots = max_artists_per_scan - len(must_scan)
if remaining_slots > 0 and can_skip:
# Randomly sample from recently-scanned artists
random_sample_size = min(remaining_slots, len(can_skip))
random_selection = random.sample(can_skip, random_sample_size)
artists_to_scan.extend(random_selection)
logger.info(f"Additionally scanning {len(random_selection)} randomly selected artists")
# Shuffle to avoid always scanning same order
random.shuffle(artists_to_scan)
logger.info(f"Total artists to scan this run: {len(artists_to_scan)}")
if len(all_watchlist_artists) > max_artists_per_scan:
logger.info(f"Skipping {len(all_watchlist_artists) - len(artists_to_scan)} artists (will be scanned in future runs)")
watchlist_artists = artists_to_scan
# PROACTIVE ID BACKFILLING (cross-provider support)
# Before scanning, ensure ALL artists have IDs for ALL available sources
# iTunes and Deezer are always available; Spotify requires authentication
if self.spotify_client and self.spotify_client.is_rate_limited():
self._disable_spotify_for_run("global Spotify rate limit active")
providers_to_backfill = ['itunes', 'deezer']
if self._spotify_is_primary_source():
providers_to_backfill.append('spotify')
try:
from config.settings import config_manager as _cfg
if _cfg.get('discogs.token', ''):
providers_to_backfill.append('discogs')
except Exception:
pass
for provider in providers_to_backfill:
try:
self._backfill_missing_ids(all_watchlist_artists, provider)
except Exception as backfill_error:
logger.warning(f"Error during {provider} ID backfilling: {backfill_error}")
# Continue with scan even if backfilling fails
scan_results = []
for i, artist in enumerate(watchlist_artists):
if self.spotify_client and self.spotify_client.is_rate_limited():
self._disable_spotify_for_run("global Spotify rate limit active")
try:
result = self.scan_artist(artist)
scan_results.append(result)
if self.spotify_client and self.spotify_client.is_rate_limited():
self._disable_spotify_for_run("global Spotify rate limit active")
if result.success:
logger.info(f"Scanned {artist.artist_name}: {result.new_tracks_found} new tracks found")
else:
logger.warning(f"Failed to scan {artist.artist_name}: {result.error_message}")
# Rate limiting: Add delay between artists to avoid hitting Spotify API limits
# This is critical to prevent getting banned for 6+ hours
if i < len(watchlist_artists) - 1: # Don't delay after the last artist
logger.debug(f"Rate limiting: waiting {DELAY_BETWEEN_ARTISTS}s before scanning next artist")
time.sleep(DELAY_BETWEEN_ARTISTS)
except Exception as e:
logger.error(f"Error scanning artist {artist.artist_name}: {e}")
scan_results.append(ScanResult(
artist_name=artist.artist_name,
spotify_artist_id=artist.spotify_artist_id,
albums_checked=0,
new_tracks_found=0,
tracks_added_to_wishlist=0,
success=False,
error_message=str(e)
))
# Log summary
successful_scans = [r for r in scan_results if r.success]
total_new_tracks = sum(r.new_tracks_found for r in successful_scans)
total_added_to_wishlist = sum(r.tracks_added_to_wishlist for r in successful_scans)
logger.info(f"Watchlist scan complete: {len(successful_scans)}/{len(scan_results)} artists scanned successfully")
logger.info(f"Found {total_new_tracks} new tracks, added {total_added_to_wishlist} to wishlist")
# Populate discovery pool with tracks from similar artists
logger.info("Starting discovery pool population...")
if self.spotify_client and self.spotify_client.is_rate_limited():
self._disable_spotify_for_run("global Spotify rate limit active")
self.populate_discovery_pool()
# Populate seasonal content (runs independently with its own threshold)
logger.info("Updating seasonal content...")
self._populate_seasonal_content()
# Generate Last.fm Radio playlists for top tracks (max once per week)
self._generate_lastfm_radio_playlists()
# Sync Spotify library cache (runs after main scan)
try:
if self.spotify_client and self.spotify_client.is_rate_limited():
self._disable_spotify_for_run("global Spotify rate limit active")
self.sync_spotify_library_cache()
except Exception as lib_err:
logger.warning(f"Error syncing Spotify library cache: {lib_err}")
return scan_results
except Exception as e:
logger.error(f"Error during watchlist scan: {e}")
return []
finally:
self._reset_spotify_run_state()
def scan_artist(self, watchlist_artist: WatchlistArtist) -> ScanResult:
"""
Scan a single artist for new releases.
Only checks releases after the last scan timestamp.
Uses the active provider (Spotify if authenticated, otherwise iTunes).
"""
try:
logger.info(f"Scanning artist: {watchlist_artist.artist_name}")
# Get the active client and artist ID based on provider
client, artist_id, provider = self._get_active_client_and_artist_id(watchlist_artist)
if client is None or artist_id is None:
return ScanResult(
artist_name=watchlist_artist.artist_name,
spotify_artist_id=watchlist_artist.spotify_artist_id or '',
albums_checked=0,
new_tracks_found=0,
tracks_added_to_wishlist=0,
success=False,
error_message=f"No {self.metadata_service.get_active_provider()} ID available for this artist"
)
logger.info(f"Using {provider} provider for {watchlist_artist.artist_name} (ID: {artist_id})")
# Update artist image if missing or on every scan to keep fresh
try:
image_url = None
artist_data = client.get_artist(artist_id)
if artist_data:
if 'images' in artist_data and artist_data['images']:
# Spotify/Deezer format: array of {url, height, width}
image_url = artist_data['images'][1]['url'] if len(artist_data['images']) > 1 else artist_data['images'][0]['url']
elif artist_data.get('image_url'):
# Direct image_url format (iTunes/some providers)
image_url = artist_data['image_url']
if image_url:
db_artist_id = watchlist_artist.spotify_artist_id or watchlist_artist.itunes_artist_id or watchlist_artist.deezer_artist_id or artist_id
self.database.update_watchlist_artist_image(db_artist_id, image_url)
if not watchlist_artist.image_url:
logger.info(f"Backfilled artist image for {watchlist_artist.artist_name}")
else:
logger.debug(f"No image available for {watchlist_artist.artist_name} from {provider}")
except Exception as img_error:
logger.warning(f"Could not update artist image for {watchlist_artist.artist_name}: {img_error}")
# Get artist discography using active provider
albums = self._get_artist_discography_with_client(client, artist_id, watchlist_artist.last_scan_timestamp, lookback_days=watchlist_artist.lookback_days)
if albums is None:
return ScanResult(
artist_name=watchlist_artist.artist_name,
spotify_artist_id=watchlist_artist.spotify_artist_id or '',
albums_checked=0,
new_tracks_found=0,
tracks_added_to_wishlist=0,
success=False,
error_message=f"Failed to get artist discography from {provider}"
)
logger.info(f"Found {len(albums)} albums/singles to check for {watchlist_artist.artist_name}")
# Safety check: Limit number of albums to scan to prevent extremely long sessions
MAX_ALBUMS_PER_ARTIST = 50 # Reasonable limit to prevent API abuse
if len(albums) > MAX_ALBUMS_PER_ARTIST:
logger.warning(f"Artist {watchlist_artist.artist_name} has {len(albums)} albums, limiting to {MAX_ALBUMS_PER_ARTIST} most recent")
albums = albums[:MAX_ALBUMS_PER_ARTIST] # Most recent albums are first
# Check each album/single for missing tracks
new_tracks_found = 0
tracks_added_to_wishlist = 0
for album_index, album in enumerate(albums):
try:
# Get full album data
logger.info(f"Checking album {album_index + 1}/{len(albums)}: {album.name}")
album_data = client.get_album(album.id)
if not album_data:
continue
# Get album tracks (works for both Spotify and iTunes)
# Spotify's get_album() includes tracks, but we use get_album_tracks() for consistency
tracks_data = client.get_album_tracks(album.id)
if not tracks_data or not tracks_data.get('items'):
continue
tracks = tracks_data['items']
logger.debug(f"Checking album: {album_data.get('name', 'Unknown')} ({len(tracks)} tracks)")
# Check if user wants this type of release
if not self._should_include_release(len(tracks), watchlist_artist):
release_type = "album" if len(tracks) >= 7 else ("EP" if len(tracks) >= 4 else "single")
logger.debug(f"Skipping {release_type}: {album_data.get('name', 'Unknown')} - user preference")
continue
# Skip albums with placeholder track names (unreleased tracklist)
# Spotify uses "Track 1", "Track 2", etc. for unannounced tracks
if self._has_placeholder_tracks(tracks):
logger.info(f"Skipping album with placeholder tracks (unreleased tracklist): {album_data.get('name', 'Unknown')}")
continue
# Check each track
for track in tracks:
# Check content type filters (live, remix, acoustic, compilation)
if not self._should_include_track(track, album_data, watchlist_artist):
continue # Skip this track based on content type preferences
if self.is_track_missing_from_library(track, album_name=album_data.get('name')):
new_tracks_found += 1
# Add to wishlist
if self.add_track_to_wishlist(track, album_data, watchlist_artist):
tracks_added_to_wishlist += 1
# Rate limiting: Add delay between albums to prevent API abuse
# This is especially important for artists with many albums
if album_index < len(albums) - 1: # Don't delay after the last album
logger.debug(f"Rate limiting: waiting {DELAY_BETWEEN_ALBUMS}s before next album")
time.sleep(DELAY_BETWEEN_ALBUMS)
except Exception as e:
logger.warning(f"Error checking album {album.name}: {e}")
continue
# Update last scan timestamp for this artist
self.update_artist_scan_timestamp(watchlist_artist)
# Fetch and store similar artists for discovery feature (with caching to avoid over-polling)
# Similar artists are fetched from MusicMap (works with any source) and matched to both Spotify and iTunes
source_artist_id = watchlist_artist.spotify_artist_id or watchlist_artist.itunes_artist_id or str(watchlist_artist.id)
try:
# Check if we have fresh similar artists cached (< 30 days old)
# If Spotify is authenticated, also require Spotify IDs to be present
spotify_authenticated = self.spotify_client and self.spotify_client.is_spotify_authenticated()
artist_profile_id = getattr(watchlist_artist, 'profile_id', 1)
if self.database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=artist_profile_id):
logger.info(f"Similar artists for {watchlist_artist.artist_name} are cached and fresh, skipping MusicMap fetch")
# Even if cached, backfill missing iTunes IDs (seamless dual-source support)
self._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=artist_profile_id)
else:
logger.info(f"Fetching similar artists for {watchlist_artist.artist_name}...")
self.update_similar_artists(watchlist_artist, profile_id=artist_profile_id)
logger.info(f"Similar artists updated for {watchlist_artist.artist_name}")
except Exception as similar_error:
logger.warning(f"Failed to update similar artists for {watchlist_artist.artist_name}: {similar_error}")
return ScanResult(
artist_name=watchlist_artist.artist_name,
spotify_artist_id=watchlist_artist.spotify_artist_id or '',
albums_checked=len(albums),
new_tracks_found=new_tracks_found,
tracks_added_to_wishlist=tracks_added_to_wishlist,
success=True
)
except Exception as e:
logger.error(f"Error scanning artist {watchlist_artist.artist_name}: {e}")
return ScanResult(
artist_name=watchlist_artist.artist_name,
spotify_artist_id=watchlist_artist.spotify_artist_id or '',
albums_checked=0,
new_tracks_found=0,
tracks_added_to_wishlist=0,
success=False,
error_message=str(e)
)
def _apply_global_watchlist_overrides(self, watchlist_artists: List[WatchlistArtist]):
"""Apply global watchlist release-type overrides to a batch of artists."""
try:

@ -41463,7 +41463,7 @@ def refresh_spotify_library():
def _run_sync():
try:
from core.watchlist_scanner import get_watchlist_scanner
scanner = get_watchlist_scanner()
scanner = get_watchlist_scanner(spotify_client)
if scanner:
# Force full sync by clearing last_sync timestamp
database = get_database()

Loading…
Cancel
Save