@ -398,11 +398,6 @@ class WatchlistScanner:
self . _metadata_service = MetadataService ( )
return self . _metadata_service
def _reset_spotify_run_state ( self ) :
""" Clear per-run Spotify suppression state. """
self . _spotify_disabled_for_run = False
self . _spotify_disabled_reason = None
def _disable_spotify_for_run ( self , reason : str ) :
""" Disable Spotify for rest of current run, once. """
if not self . _spotify_disabled_for_run :
@ -574,334 +569,6 @@ class WatchlistScanner:
return albums
def scan_all_watchlist_artists ( self ) - > List [ ScanResult ] :
"""
Scan artists in the watchlist for new releases .
OPTIMIZED : Scans up to 50 artists per run using smart selection :
- Priority : Artists not scanned in 7 + days ( guaranteed )
- Remainder : Random selection from other artists
This reduces API calls while ensuring all artists scanned at least weekly .
Only checks releases after their last scan timestamp .
"""
logger . info ( " Starting watchlist scan " )
try :
self . _reset_spotify_run_state ( )
from datetime import datetime , timedelta
import random
# Get all watchlist artists
all_watchlist_artists = self . database . get_watchlist_artists ( )
if not all_watchlist_artists :
logger . info ( " No artists in watchlist to scan " )
return [ ]
logger . info ( f " Found { len ( all_watchlist_artists ) } total artists in watchlist " )
# OPTIMIZATION: Select up to 50 artists to scan
# 1. Must scan: Artists not scanned in 7+ days (or never scanned)
seven_days_ago = datetime . now ( ) - timedelta ( days = 7 )
must_scan = [ ]
can_skip = [ ]
for artist in all_watchlist_artists :
if artist . last_scan_timestamp is None :
# Never scanned - must scan
must_scan . append ( artist )
elif artist . last_scan_timestamp < seven_days_ago :
# Not scanned in 7+ days - must scan
must_scan . append ( artist )
else :
# Scanned recently - can skip (but might randomly select)
can_skip . append ( artist )
logger . info ( f " Artists requiring scan (not scanned in 7+ days): { len ( must_scan ) } " )
logger . info ( f " Artists scanned recently (< 7 days): { len ( can_skip ) } " )
# 2. Fill remaining slots (up to 50 total) with random selection
max_artists_per_scan = 50
artists_to_scan = must_scan . copy ( )
remaining_slots = max_artists_per_scan - len ( must_scan )
if remaining_slots > 0 and can_skip :
# Randomly sample from recently-scanned artists
random_sample_size = min ( remaining_slots , len ( can_skip ) )
random_selection = random . sample ( can_skip , random_sample_size )
artists_to_scan . extend ( random_selection )
logger . info ( f " Additionally scanning { len ( random_selection ) } randomly selected artists " )
# Shuffle to avoid always scanning same order
random . shuffle ( artists_to_scan )
logger . info ( f " Total artists to scan this run: { len ( artists_to_scan ) } " )
if len ( all_watchlist_artists ) > max_artists_per_scan :
logger . info ( f " Skipping { len ( all_watchlist_artists ) - len ( artists_to_scan ) } artists (will be scanned in future runs) " )
watchlist_artists = artists_to_scan
# PROACTIVE ID BACKFILLING (cross-provider support)
# Before scanning, ensure ALL artists have IDs for ALL available sources
# iTunes and Deezer are always available; Spotify requires authentication
if self . spotify_client and self . spotify_client . is_rate_limited ( ) :
self . _disable_spotify_for_run ( " global Spotify rate limit active " )
providers_to_backfill = [ ' itunes ' , ' deezer ' ]
if self . _spotify_is_primary_source ( ) :
providers_to_backfill . append ( ' spotify ' )
try :
from config . settings import config_manager as _cfg
if _cfg . get ( ' discogs.token ' , ' ' ) :
providers_to_backfill . append ( ' discogs ' )
except Exception :
pass
for provider in providers_to_backfill :
try :
self . _backfill_missing_ids ( all_watchlist_artists , provider )
except Exception as backfill_error :
logger . warning ( f " Error during { provider } ID backfilling: { backfill_error } " )
# Continue with scan even if backfilling fails
scan_results = [ ]
for i , artist in enumerate ( watchlist_artists ) :
if self . spotify_client and self . spotify_client . is_rate_limited ( ) :
self . _disable_spotify_for_run ( " global Spotify rate limit active " )
try :
result = self . scan_artist ( artist )
scan_results . append ( result )
if self . spotify_client and self . spotify_client . is_rate_limited ( ) :
self . _disable_spotify_for_run ( " global Spotify rate limit active " )
if result . success :
logger . info ( f " Scanned { artist . artist_name } : { result . new_tracks_found } new tracks found " )
else :
logger . warning ( f " Failed to scan { artist . artist_name } : { result . error_message } " )
# Rate limiting: Add delay between artists to avoid hitting Spotify API limits
# This is critical to prevent getting banned for 6+ hours
if i < len ( watchlist_artists ) - 1 : # Don't delay after the last artist
logger . debug ( f " Rate limiting: waiting { DELAY_BETWEEN_ARTISTS } s before scanning next artist " )
time . sleep ( DELAY_BETWEEN_ARTISTS )
except Exception as e :
logger . error ( f " Error scanning artist { artist . artist_name } : { e } " )
scan_results . append ( ScanResult (
artist_name = artist . artist_name ,
spotify_artist_id = artist . spotify_artist_id ,
albums_checked = 0 ,
new_tracks_found = 0 ,
tracks_added_to_wishlist = 0 ,
success = False ,
error_message = str ( e )
) )
# Log summary
successful_scans = [ r for r in scan_results if r . success ]
total_new_tracks = sum ( r . new_tracks_found for r in successful_scans )
total_added_to_wishlist = sum ( r . tracks_added_to_wishlist for r in successful_scans )
logger . info ( f " Watchlist scan complete: { len ( successful_scans ) } / { len ( scan_results ) } artists scanned successfully " )
logger . info ( f " Found { total_new_tracks } new tracks, added { total_added_to_wishlist } to wishlist " )
# Populate discovery pool with tracks from similar artists
logger . info ( " Starting discovery pool population... " )
if self . spotify_client and self . spotify_client . is_rate_limited ( ) :
self . _disable_spotify_for_run ( " global Spotify rate limit active " )
self . populate_discovery_pool ( )
# Populate seasonal content (runs independently with its own threshold)
logger . info ( " Updating seasonal content... " )
self . _populate_seasonal_content ( )
# Generate Last.fm Radio playlists for top tracks (max once per week)
self . _generate_lastfm_radio_playlists ( )
# Sync Spotify library cache (runs after main scan)
try :
if self . spotify_client and self . spotify_client . is_rate_limited ( ) :
self . _disable_spotify_for_run ( " global Spotify rate limit active " )
self . sync_spotify_library_cache ( )
except Exception as lib_err :
logger . warning ( f " Error syncing Spotify library cache: { lib_err } " )
return scan_results
except Exception as e :
logger . error ( f " Error during watchlist scan: { e } " )
return [ ]
finally :
self . _reset_spotify_run_state ( )
def scan_artist ( self , watchlist_artist : WatchlistArtist ) - > ScanResult :
"""
Scan a single artist for new releases .
Only checks releases after the last scan timestamp .
Uses the active provider ( Spotify if authenticated , otherwise iTunes ) .
"""
try :
logger . info ( f " Scanning artist: { watchlist_artist . artist_name } " )
# Get the active client and artist ID based on provider
client , artist_id , provider = self . _get_active_client_and_artist_id ( watchlist_artist )
if client is None or artist_id is None :
return ScanResult (
artist_name = watchlist_artist . artist_name ,
spotify_artist_id = watchlist_artist . spotify_artist_id or ' ' ,
albums_checked = 0 ,
new_tracks_found = 0 ,
tracks_added_to_wishlist = 0 ,
success = False ,
error_message = f " No { self . metadata_service . get_active_provider ( ) } ID available for this artist "
)
logger . info ( f " Using { provider } provider for { watchlist_artist . artist_name } (ID: { artist_id } ) " )
# Update artist image if missing or on every scan to keep fresh
try :
image_url = None
artist_data = client . get_artist ( artist_id )
if artist_data :
if ' images ' in artist_data and artist_data [ ' images ' ] :
# Spotify/Deezer format: array of {url, height, width}
image_url = artist_data [ ' images ' ] [ 1 ] [ ' url ' ] if len ( artist_data [ ' images ' ] ) > 1 else artist_data [ ' images ' ] [ 0 ] [ ' url ' ]
elif artist_data . get ( ' image_url ' ) :
# Direct image_url format (iTunes/some providers)
image_url = artist_data [ ' image_url ' ]
if image_url :
db_artist_id = watchlist_artist . spotify_artist_id or watchlist_artist . itunes_artist_id or watchlist_artist . deezer_artist_id or artist_id
self . database . update_watchlist_artist_image ( db_artist_id , image_url )
if not watchlist_artist . image_url :
logger . info ( f " Backfilled artist image for { watchlist_artist . artist_name } " )
else :
logger . debug ( f " No image available for { watchlist_artist . artist_name } from { provider } " )
except Exception as img_error :
logger . warning ( f " Could not update artist image for { watchlist_artist . artist_name } : { img_error } " )
# Get artist discography using active provider
albums = self . _get_artist_discography_with_client ( client , artist_id , watchlist_artist . last_scan_timestamp , lookback_days = watchlist_artist . lookback_days )
if albums is None :
return ScanResult (
artist_name = watchlist_artist . artist_name ,
spotify_artist_id = watchlist_artist . spotify_artist_id or ' ' ,
albums_checked = 0 ,
new_tracks_found = 0 ,
tracks_added_to_wishlist = 0 ,
success = False ,
error_message = f " Failed to get artist discography from { provider } "
)
logger . info ( f " Found { len ( albums ) } albums/singles to check for { watchlist_artist . artist_name } " )
# Safety check: Limit number of albums to scan to prevent extremely long sessions
MAX_ALBUMS_PER_ARTIST = 50 # Reasonable limit to prevent API abuse
if len ( albums ) > MAX_ALBUMS_PER_ARTIST :
logger . warning ( f " Artist { watchlist_artist . artist_name } has { len ( albums ) } albums, limiting to { MAX_ALBUMS_PER_ARTIST } most recent " )
albums = albums [ : MAX_ALBUMS_PER_ARTIST ] # Most recent albums are first
# Check each album/single for missing tracks
new_tracks_found = 0
tracks_added_to_wishlist = 0
for album_index , album in enumerate ( albums ) :
try :
# Get full album data
logger . info ( f " Checking album { album_index + 1 } / { len ( albums ) } : { album . name } " )
album_data = client . get_album ( album . id )
if not album_data :
continue
# Get album tracks (works for both Spotify and iTunes)
# Spotify's get_album() includes tracks, but we use get_album_tracks() for consistency
tracks_data = client . get_album_tracks ( album . id )
if not tracks_data or not tracks_data . get ( ' items ' ) :
continue
tracks = tracks_data [ ' items ' ]
logger . debug ( f " Checking album: { album_data . get ( ' name ' , ' Unknown ' ) } ( { len ( tracks ) } tracks) " )
# Check if user wants this type of release
if not self . _should_include_release ( len ( tracks ) , watchlist_artist ) :
release_type = " album " if len ( tracks ) > = 7 else ( " EP " if len ( tracks ) > = 4 else " single " )
logger . debug ( f " Skipping { release_type } : { album_data . get ( ' name ' , ' Unknown ' ) } - user preference " )
continue
# Skip albums with placeholder track names (unreleased tracklist)
# Spotify uses "Track 1", "Track 2", etc. for unannounced tracks
if self . _has_placeholder_tracks ( tracks ) :
logger . info ( f " Skipping album with placeholder tracks (unreleased tracklist): { album_data . get ( ' name ' , ' Unknown ' ) } " )
continue
# Check each track
for track in tracks :
# Check content type filters (live, remix, acoustic, compilation)
if not self . _should_include_track ( track , album_data , watchlist_artist ) :
continue # Skip this track based on content type preferences
if self . is_track_missing_from_library ( track , album_name = album_data . get ( ' name ' ) ) :
new_tracks_found + = 1
# Add to wishlist
if self . add_track_to_wishlist ( track , album_data , watchlist_artist ) :
tracks_added_to_wishlist + = 1
# Rate limiting: Add delay between albums to prevent API abuse
# This is especially important for artists with many albums
if album_index < len ( albums ) - 1 : # Don't delay after the last album
logger . debug ( f " Rate limiting: waiting { DELAY_BETWEEN_ALBUMS } s before next album " )
time . sleep ( DELAY_BETWEEN_ALBUMS )
except Exception as e :
logger . warning ( f " Error checking album { album . name } : { e } " )
continue
# Update last scan timestamp for this artist
self . update_artist_scan_timestamp ( watchlist_artist )
# Fetch and store similar artists for discovery feature (with caching to avoid over-polling)
# Similar artists are fetched from MusicMap (works with any source) and matched to both Spotify and iTunes
source_artist_id = watchlist_artist . spotify_artist_id or watchlist_artist . itunes_artist_id or str ( watchlist_artist . id )
try :
# Check if we have fresh similar artists cached (< 30 days old)
# If Spotify is authenticated, also require Spotify IDs to be present
spotify_authenticated = self . spotify_client and self . spotify_client . is_spotify_authenticated ( )
artist_profile_id = getattr ( watchlist_artist , ' profile_id ' , 1 )
if self . database . has_fresh_similar_artists ( source_artist_id , days_threshold = 30 , require_spotify = spotify_authenticated , profile_id = artist_profile_id ) :
logger . info ( f " Similar artists for { watchlist_artist . artist_name } are cached and fresh, skipping MusicMap fetch " )
# Even if cached, backfill missing iTunes IDs (seamless dual-source support)
self . _backfill_similar_artists_itunes_ids ( source_artist_id , profile_id = artist_profile_id )
else :
logger . info ( f " Fetching similar artists for { watchlist_artist . artist_name } ... " )
self . update_similar_artists ( watchlist_artist , profile_id = artist_profile_id )
logger . info ( f " Similar artists updated for { watchlist_artist . artist_name } " )
except Exception as similar_error :
logger . warning ( f " Failed to update similar artists for { watchlist_artist . artist_name } : { similar_error } " )
return ScanResult (
artist_name = watchlist_artist . artist_name ,
spotify_artist_id = watchlist_artist . spotify_artist_id or ' ' ,
albums_checked = len ( albums ) ,
new_tracks_found = new_tracks_found ,
tracks_added_to_wishlist = tracks_added_to_wishlist ,
success = True
)
except Exception as e :
logger . error ( f " Error scanning artist { watchlist_artist . artist_name } : { e } " )
return ScanResult (
artist_name = watchlist_artist . artist_name ,
spotify_artist_id = watchlist_artist . spotify_artist_id or ' ' ,
albums_checked = 0 ,
new_tracks_found = 0 ,
tracks_added_to_wishlist = 0 ,
success = False ,
error_message = str ( e )
)
def _apply_global_watchlist_overrides ( self , watchlist_artists : List [ WatchlistArtist ] ) :
""" Apply global watchlist release-type overrides to a batch of artists. """
try :