diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 0815a528..b20f36f2 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -4,7 +4,7 @@ Watchlist Scanner Service - Monitors watched artists for new releases """ -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any, Optional, Callable from datetime import datetime, timezone, timedelta from dataclasses import dataclass import re @@ -901,6 +901,423 @@ class WatchlistScanner: success=False, error_message=str(e) ) + + def _apply_global_watchlist_overrides(self, watchlist_artists: List[WatchlistArtist]): + """Apply global watchlist release-type overrides to a batch of artists.""" + try: + from config.settings import config_manager + except Exception: + return + + if not config_manager.get('watchlist.global_override_enabled', False): + return + + g_albums = config_manager.get('watchlist.global_include_albums', True) + g_eps = config_manager.get('watchlist.global_include_eps', True) + g_singles = config_manager.get('watchlist.global_include_singles', True) + g_live = config_manager.get('watchlist.global_include_live', False) + g_remixes = config_manager.get('watchlist.global_include_remixes', False) + g_acoustic = config_manager.get('watchlist.global_include_acoustic', False) + g_compilations = config_manager.get('watchlist.global_include_compilations', False) + g_instrumentals = config_manager.get('watchlist.global_include_instrumentals', False) + + logger.info( + "Applying global watchlist override to %s artists " + "(albums=%s, eps=%s, singles=%s, live=%s, remixes=%s, acoustic=%s, compilations=%s, instrumentals=%s)", + len(watchlist_artists), + g_albums, + g_eps, + g_singles, + g_live, + g_remixes, + g_acoustic, + g_compilations, + g_instrumentals, + ) + + for artist in watchlist_artists: + artist.include_albums = g_albums + artist.include_eps = g_eps + artist.include_singles = g_singles + artist.include_live = g_live + artist.include_remixes = g_remixes + artist.include_acoustic = g_acoustic + artist.include_compilations = g_compilations + artist.include_instrumentals = g_instrumentals + + def scan_watchlist_profile( + self, + profile_id: int, + watchlist_artists: Optional[List[WatchlistArtist]] = None, + *, + scan_state: Optional[Dict[str, Any]] = None, + progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None, + cancel_check: Optional[Callable[[], bool]] = None, + artist_index_offset: int = 0, + total_artists_override: Optional[int] = None, + apply_global_overrides: bool = True, + ) -> List[ScanResult]: + """Scan a single watchlist profile using the shared watchlist scan engine.""" + if watchlist_artists is None: + watchlist_artists = self.database.get_watchlist_artists(profile_id=profile_id) + + if apply_global_overrides: + self._apply_global_watchlist_overrides(watchlist_artists) + + return self.scan_watchlist_artists( + watchlist_artists, + profile_id=profile_id, + scan_state=scan_state, + progress_callback=progress_callback, + cancel_check=cancel_check, + artist_index_offset=artist_index_offset, + total_artists_override=total_artists_override, + ) + + def scan_watchlist_artists( + self, + watchlist_artists: List[WatchlistArtist], + *, + profile_id: int = 1, + scan_state: Optional[Dict[str, Any]] = None, + progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None, + cancel_check: Optional[Callable[[], bool]] = None, + artist_index_offset: int = 0, + total_artists_override: Optional[int] = None, + ) -> List[ScanResult]: + """Scan a list of watchlist artists using the shared web watchlist scan flow.""" + scan_results: List[ScanResult] = [] + if not watchlist_artists: + if scan_state is not None: + scan_state.update({ + 'status': 'completed', + 'total_artists': 0, + 'current_artist_index': 0, + 'current_artist_name': '', + 'current_artist_image_url': '', + 'current_phase': 'completed', + 'albums_to_check': 0, + 'albums_checked': 0, + 'current_album': '', + 'current_album_image_url': '', + 'current_track_name': '', + 'tracks_found_this_scan': 0, + 'tracks_added_this_scan': 0, + 'recent_wishlist_additions': [], + 'results': [], + 'summary': { + 'total_artists': 0, + 'successful_scans': 0, + 'new_tracks_found': 0, + 'tracks_added_to_wishlist': 0, + }, + 'completed_at': datetime.now(), + 'error': None, + }) + return scan_results + + if scan_state is not None: + scan_state.update({ + 'status': 'scanning', + 'started_at': scan_state.get('started_at') or datetime.now(), + 'total_artists': total_artists_override if total_artists_override is not None else len(watchlist_artists), + 'current_artist_index': scan_state.get('current_artist_index', artist_index_offset), + 'current_artist_name': scan_state.get('current_artist_name', ''), + 'current_artist_image_url': scan_state.get('current_artist_image_url', ''), + 'current_phase': 'starting', + 'albums_to_check': 0, + 'albums_checked': 0, + 'current_album': '', + 'current_album_image_url': '', + 'current_track_name': '', + 'tracks_found_this_scan': scan_state.get('tracks_found_this_scan', 0), + 'tracks_added_this_scan': scan_state.get('tracks_added_this_scan', 0), + 'recent_wishlist_additions': scan_state.get('recent_wishlist_additions', []), + 'results': scan_state.get('results', []), + 'summary': scan_state.get('summary', {}), + 'error': None, + }) + + def _emit(event_type: str, **payload): + if progress_callback: + try: + progress_callback(event_type, payload) + except Exception: + logger.debug("Watchlist scan progress callback failed for %s", event_type, exc_info=True) + + _emit('scan_started', profile_id=profile_id, total_artists=len(watchlist_artists)) + + # Rate-limit-aware backfill of artist IDs for the providers we can safely resolve. + providers_to_backfill = ['itunes', 'deezer'] + if self.spotify_client and self.spotify_client.is_spotify_authenticated(): + providers_to_backfill.append('spotify') + try: + from config.settings import config_manager as _cfg + if _cfg.get('discogs.token', ''): + providers_to_backfill.append('discogs') + except Exception: + pass + + for provider in providers_to_backfill: + try: + logger.info("Checking for missing %s IDs in watchlist...", provider) + self._backfill_missing_ids(watchlist_artists, provider) + except Exception as backfill_error: + logger.warning("Error during %s ID backfilling: %s", provider, backfill_error) + + lookback_period = self._get_lookback_period_setting() + is_full_discography = (lookback_period == 'all') + artist_count = len(watchlist_artists) + + base_artist_delay = DELAY_BETWEEN_ARTISTS + base_album_delay = DELAY_BETWEEN_ALBUMS + if is_full_discography: + base_artist_delay *= 2.0 + base_album_delay *= 2.0 + if artist_count > 200: + base_artist_delay *= 1.5 + base_album_delay *= 1.25 + elif artist_count > 100: + base_artist_delay *= 1.25 + + artist_delay = base_artist_delay + album_delay = base_album_delay + logger.info( + "Scan parameters: %s artists, lookback=%s, delays: %.1fs/artist, %.1fs/album", + artist_count, + lookback_period, + artist_delay, + album_delay, + ) + + for i, artist in enumerate(watchlist_artists): + if cancel_check and cancel_check(): + logger.info("Watchlist scan cancelled after %s/%s artists", i, len(watchlist_artists)) + if scan_state is not None: + successful_scans = [r for r in scan_results if r.success] + scan_state['status'] = 'cancelled' + scan_state['current_phase'] = 'cancelled' + scan_state['summary'] = { + 'total_artists': i, + 'successful_scans': len(successful_scans), + 'new_tracks_found': sum(r.new_tracks_found for r in successful_scans), + 'tracks_added_to_wishlist': sum(r.tracks_added_to_wishlist for r in successful_scans), + 'cancelled': True, + } + _emit('cancelled', processed=i, total=len(watchlist_artists)) + break + + try: + artist_image_url = self.get_artist_image_url(artist) or '' + absolute_index = artist_index_offset + i + 1 + if scan_state is not None: + scan_state.update({ + 'current_artist_index': absolute_index, + 'current_artist_name': artist.artist_name, + 'current_artist_image_url': artist_image_url, + 'current_phase': 'fetching_discography', + 'albums_to_check': 0, + 'albums_checked': 0, + 'current_album': '', + 'current_album_image_url': '', + 'current_track_name': '', + }) + + _emit( + 'artist_started', + artist_name=artist.artist_name, + artist_index=absolute_index, + total_artists=total_artists_override if total_artists_override is not None else len(watchlist_artists), + profile_id=profile_id, + artist_image_url=artist_image_url, + ) + + albums = self.get_artist_discography_for_watchlist(artist, artist.last_scan_timestamp) + if albums is None: + scan_results.append(ScanResult( + artist_name=artist.artist_name, + spotify_artist_id=artist.spotify_artist_id or '', + albums_checked=0, + new_tracks_found=0, + tracks_added_to_wishlist=0, + success=False, + error_message="Failed to get artist discography", + )) + _emit( + 'artist_error', + artist_name=artist.artist_name, + profile_id=profile_id, + error_message="Failed to get artist discography", + ) + continue + + if scan_state is not None: + scan_state.update({ + 'current_phase': 'checking_albums', + 'albums_to_check': len(albums), + 'albums_checked': 0, + }) + + artist_new_tracks = 0 + artist_added_tracks = 0 + + for album_index, album in enumerate(albums): + try: + album_data = self.metadata_service.get_album(album.id) + if not album_data or 'tracks' not in album_data: + logger.debug("Skipping album %s (id=%s): no track data returned", album.name, album.id) + continue + + tracks = album_data['tracks']['items'] + if not self._should_include_release(len(tracks), artist): + continue + + album_image_url = '' + if 'images' in album_data and album_data['images']: + album_image_url = album_data['images'][0]['url'] + + if scan_state is not None: + scan_state.update({ + 'albums_checked': album_index + 1, + 'current_album': album.name, + 'current_album_image_url': album_image_url, + 'current_phase': f'checking_album_{album_index + 1}_of_{len(albums)}', + }) + + _emit( + 'album_started', + artist_name=artist.artist_name, + album_name=album.name, + album_index=album_index + 1, + total_albums=len(albums), + album_image_url=album_image_url, + ) + + for track in tracks: + if not self._should_include_track(track, album_data, artist): + continue + + track_name = track.get('name', 'Unknown Track') + if scan_state is not None: + scan_state['current_track_name'] = track_name + + if self.is_track_missing_from_library(track, album_name=album_data.get('name')): + artist_new_tracks += 1 + if scan_state is not None: + scan_state['tracks_found_this_scan'] += 1 + + if self.add_track_to_wishlist(track, album_data, artist): + artist_added_tracks += 1 + if scan_state is not None: + scan_state['tracks_added_this_scan'] += 1 + + track_artists = track.get('artists', []) + track_artist_name = track_artists[0].get('name', 'Unknown Artist') if track_artists else 'Unknown Artist' + if scan_state is not None: + scan_state['recent_wishlist_additions'].insert(0, { + 'track_name': track_name, + 'artist_name': track_artist_name, + 'album_image_url': album_image_url, + }) + if len(scan_state['recent_wishlist_additions']) > 10: + scan_state['recent_wishlist_additions'].pop() + + if album_index < len(albums) - 1: + time.sleep(album_delay) + + except Exception as e: + logger.warning("Error checking album %s: %s", album.name, e) + continue + + self.update_artist_scan_timestamp(artist) + + scan_results.append(ScanResult( + artist_name=artist.artist_name, + spotify_artist_id=artist.spotify_artist_id or '', + albums_checked=len(albums), + new_tracks_found=artist_new_tracks, + tracks_added_to_wishlist=artist_added_tracks, + success=True, + )) + + _emit( + 'artist_completed', + artist_name=artist.artist_name, + artist_index=absolute_index, + total_artists=total_artists_override if total_artists_override is not None else len(watchlist_artists), + profile_id=profile_id, + albums_checked=len(albums), + new_tracks_found=artist_new_tracks, + tracks_added_to_wishlist=artist_added_tracks, + ) + + try: + if scan_state is not None: + scan_state['current_phase'] = 'fetching_similar_artists' + source_artist_id = artist.spotify_artist_id or artist.itunes_artist_id or str(artist.id) + artist_profile_id = getattr(artist, 'profile_id', profile_id) + spotify_authenticated = self.spotify_client and self.spotify_client.is_spotify_authenticated() + if self.database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=artist_profile_id): + logger.info("Similar artists for %s are cached and fresh (profile %s)", artist.artist_name, artist_profile_id) + self._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=artist_profile_id) + else: + logger.info("Fetching similar artists for %s (profile %s)...", artist.artist_name, artist_profile_id) + self.update_similar_artists(artist, profile_id=artist_profile_id) + logger.info("Similar artists updated for %s", artist.artist_name) + except Exception as similar_error: + logger.warning("Failed to update similar artists for %s: %s", artist.artist_name, similar_error) + + if i < len(watchlist_artists) - 1: + if scan_state is not None: + scan_state['current_phase'] = 'rate_limiting' + time.sleep(artist_delay) + + except Exception as e: + logger.error("Error scanning artist %s: %s", artist.artist_name, e) + scan_results.append(ScanResult( + artist_name=artist.artist_name, + spotify_artist_id=artist.spotify_artist_id or '', + albums_checked=0, + new_tracks_found=0, + tracks_added_to_wishlist=0, + success=False, + error_message=str(e), + )) + _emit( + 'artist_error', + artist_name=artist.artist_name, + artist_index=artist_index_offset + i + 1, + total_artists=total_artists_override if total_artists_override is not None else len(watchlist_artists), + profile_id=profile_id, + error_message=str(e), + ) + + if scan_state is not None: + successful_scans = [r for r in scan_results if r.success] + total_new_tracks = sum(r.new_tracks_found for r in successful_scans) + total_added_to_wishlist = sum(r.tracks_added_to_wishlist for r in successful_scans) + scan_state['results'] = list(scan_state.get('results', [])) + scan_results + if scan_state.get('status') != 'cancelled': + scan_state['status'] = 'completed' + scan_state['completed_at'] = datetime.now() + scan_state['current_phase'] = 'completed' + scan_state['summary'] = { + 'total_artists': len(scan_results), + 'successful_scans': len(successful_scans), + 'new_tracks_found': total_new_tracks, + 'tracks_added_to_wishlist': total_added_to_wishlist, + } + + _emit( + 'scan_completed', + profile_id=profile_id, + total_artists=len(watchlist_artists), + total_scanned=len(scan_results), + successful_scans=len([r for r in scan_results if r.success]), + new_tracks_found=sum(r.new_tracks_found for r in scan_results if r.success), + tracks_added_to_wishlist=sum(r.tracks_added_to_wishlist for r in scan_results if r.success), + ) + return scan_results def get_artist_discography(self, spotify_artist_id: str, last_scan_timestamp: Optional[datetime] = None) -> Optional[List]: """ diff --git a/tests/test_watchlist_scanner_scan.py b/tests/test_watchlist_scanner_scan.py new file mode 100644 index 00000000..bb59d786 --- /dev/null +++ b/tests/test_watchlist_scanner_scan.py @@ -0,0 +1,199 @@ +import sys +import types + + +if "spotipy" not in sys.modules: + spotipy = types.ModuleType("spotipy") + + class _DummySpotify: + def __init__(self, *args, **kwargs): + pass + + oauth2 = types.ModuleType("spotipy.oauth2") + + class _DummyOAuth: + def __init__(self, *args, **kwargs): + pass + + spotipy.Spotify = _DummySpotify + oauth2.SpotifyOAuth = _DummyOAuth + oauth2.SpotifyClientCredentials = _DummyOAuth + spotipy.oauth2 = oauth2 + sys.modules["spotipy"] = spotipy + sys.modules["spotipy.oauth2"] = oauth2 + +import core.watchlist_scanner as watchlist_scanner_module +from core.watchlist_scanner import WatchlistScanner + + +class _FakeSpotifyClient: + def is_spotify_authenticated(self): + return False + + +class _FakeMetadataService: + def __init__(self, album_data): + self.spotify = _FakeSpotifyClient() + self.itunes = types.SimpleNamespace() + self._album_data = album_data + + def get_album(self, album_id): + return self._album_data + + +class _FakeDB: + def __init__(self, artists): + self.artists = artists + self.similar_calls = [] + + def get_watchlist_artists(self, profile_id=None): + return list(self.artists) + + def has_fresh_similar_artists(self, *args, **kwargs): + self.similar_calls.append((args, kwargs)) + return False + + +def _build_artist(name="Artist One", profile_id=11): + return types.SimpleNamespace( + artist_name=name, + spotify_artist_id="sp-artist", + itunes_artist_id="it-artist", + deezer_artist_id="dz-artist", + discogs_artist_id="dg-artist", + last_scan_timestamp=None, + id=123, + profile_id=profile_id, + include_albums=True, + include_eps=True, + include_singles=True, + include_live=False, + include_remixes=False, + include_acoustic=False, + include_compilations=False, + include_instrumentals=False, + lookback_days=7, + image_url=None, + ) + + +def _build_scanner(album_data, artists): + scanner = WatchlistScanner(metadata_service=_FakeMetadataService(album_data)) + scanner._database = _FakeDB(artists) + scanner._wishlist_service = types.SimpleNamespace() + scanner._matching_engine = types.SimpleNamespace() + return scanner + + +def test_scan_watchlist_profile_loads_artists_and_applies_overrides(monkeypatch): + artist = _build_artist() + scanner = _build_scanner({"tracks": {"items": []}}, [artist]) + + loaded_profiles = [] + override_calls = [] + scan_calls = [] + + monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda profile_id=None: loaded_profiles.append(profile_id) or [artist]) + monkeypatch.setattr(scanner, "_apply_global_watchlist_overrides", lambda artists: override_calls.append(list(artists))) + monkeypatch.setattr(scanner, "scan_watchlist_artists", lambda artists, **kwargs: scan_calls.append((list(artists), kwargs)) or ["ok"]) + + result = scanner.scan_watchlist_profile(42) + + assert result == ["ok"] + assert loaded_profiles == [42] + assert override_calls and override_calls[0][0].artist_name == "Artist One" + assert scan_calls and scan_calls[0][0][0].artist_name == "Artist One" + assert scan_calls[0][1]["profile_id"] == 42 + + +def test_scan_watchlist_artists_scans_tracks_and_updates_state(monkeypatch): + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0) + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ALBUMS", 0) + + artist = _build_artist() + album = types.SimpleNamespace(id="album-1", name="Album One") + album_data = { + "name": "Album One", + "images": [{"url": "https://example.com/album.jpg"}], + "tracks": { + "items": [ + { + "id": "track-1", + "name": "Track One", + "track_number": 1, + "disc_number": 1, + "artists": [{"name": "Artist One"}], + } + ] + }, + } + scanner = _build_scanner(album_data, [artist]) + scanner._database.has_fresh_similar_artists = lambda *args, **kwargs: False + + monkeypatch.setattr(scanner, "_backfill_missing_ids", lambda *args, **kwargs: None) + monkeypatch.setattr(scanner, "get_artist_image_url", lambda *_args, **_kwargs: "https://example.com/artist.jpg") + monkeypatch.setattr(scanner, "get_artist_discography_for_watchlist", lambda *_args, **_kwargs: [album]) + monkeypatch.setattr(scanner, "_get_lookback_period_setting", lambda: "30") + monkeypatch.setattr(scanner, "_get_rescan_cutoff", lambda: None) + monkeypatch.setattr(scanner, "_should_include_release", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "_should_include_track", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "is_track_missing_from_library", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "add_track_to_wishlist", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "update_artist_scan_timestamp", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "update_similar_artists", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "_backfill_similar_artists_itunes_ids", lambda *_args, **_kwargs: 0) + + scan_state = {} + results = scanner.scan_watchlist_artists([artist], scan_state=scan_state) + + assert len(results) == 1 + assert results[0].success is True + assert results[0].new_tracks_found == 1 + assert results[0].tracks_added_to_wishlist == 1 + assert scan_state["status"] == "completed" + assert scan_state["summary"]["successful_scans"] == 1 + assert scan_state["summary"]["new_tracks_found"] == 1 + assert scan_state["summary"]["tracks_added_to_wishlist"] == 1 + assert scan_state["recent_wishlist_additions"][0]["track_name"] == "Track One" + + +def test_scan_watchlist_artists_honors_cancel_check(monkeypatch): + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0) + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ALBUMS", 0) + + artist_a = _build_artist("Artist One") + artist_b = _build_artist("Artist Two") + album = types.SimpleNamespace(id="album-1", name="Album One") + album_data = { + "name": "Album One", + "tracks": {"items": []}, + } + scanner = _build_scanner(album_data, [artist_a, artist_b]) + scanner._database.has_fresh_similar_artists = lambda *args, **kwargs: False + + monkeypatch.setattr(scanner, "_backfill_missing_ids", lambda *args, **kwargs: None) + monkeypatch.setattr(scanner, "get_artist_image_url", lambda *_args, **_kwargs: "https://example.com/artist.jpg") + monkeypatch.setattr(scanner, "get_artist_discography_for_watchlist", lambda *_args, **_kwargs: [album]) + monkeypatch.setattr(scanner, "_get_lookback_period_setting", lambda: "30") + monkeypatch.setattr(scanner, "_get_rescan_cutoff", lambda: None) + monkeypatch.setattr(scanner, "_should_include_release", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "_should_include_track", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "is_track_missing_from_library", lambda *_args, **_kwargs: False) + monkeypatch.setattr(scanner, "add_track_to_wishlist", lambda *_args, **_kwargs: False) + monkeypatch.setattr(scanner, "update_artist_scan_timestamp", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "update_similar_artists", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "_backfill_similar_artists_itunes_ids", lambda *_args, **_kwargs: 0) + + cancels = iter([False, True]) + scan_state = {} + results = scanner.scan_watchlist_artists( + [artist_a, artist_b], + scan_state=scan_state, + cancel_check=lambda: next(cancels), + ) + + assert len(results) == 1 + assert results[0].artist_name == "Artist One" + assert scan_state["status"] == "cancelled" + assert scan_state["summary"]["cancelled"] is True + assert scan_state["summary"]["successful_scans"] == 1 diff --git a/web_server.py b/web_server.py index 829ccc5c..f74db392 100644 --- a/web_server.py +++ b/web_server.py @@ -39802,9 +39802,6 @@ def start_watchlist_scan(): database = get_database() watchlist_artists = database.get_watchlist_artists(profile_id=scan_profile_id) - # Apply global overrides if enabled - _apply_watchlist_global_overrides(watchlist_artists) - if not watchlist_artists: watchlist_scan_state['status'] = 'completed' watchlist_scan_state['summary'] = { @@ -39939,247 +39936,26 @@ def start_watchlist_scan(): # Pause enrichment workers during scan to reduce API contention _ew_state = _pause_enrichment_workers('watchlist scan') - - # Dynamic delay calculation based on scan scope - lookback_period = scanner._get_lookback_period_setting() - is_full_discography = (lookback_period == 'all') - artist_count = len(watchlist_artists) - - base_artist_delay = 2.0 - base_album_delay = 0.5 - - # Scale up for full discography (way more albums per artist) - if is_full_discography: - base_artist_delay *= 2.0 - base_album_delay *= 2.0 - - # Scale up further for large artist counts (sustained API pressure) - if artist_count > 200: - base_artist_delay *= 1.5 - base_album_delay *= 1.25 - elif artist_count > 100: - base_artist_delay *= 1.25 - - artist_delay = base_artist_delay - album_delay = base_album_delay - print(f"Scan parameters: {artist_count} artists, lookback={lookback_period}, " - f"delays: {artist_delay:.1f}s/artist, {album_delay:.1f}s/album") - - # Circuit breaker: pause scan on consecutive rate-limit failures - consecutive_failures = 0 - CIRCUIT_BREAKER_THRESHOLD = 3 - circuit_breaker_pause = 60 # seconds, doubles each trigger, max 600s - - for i, artist in enumerate(watchlist_artists): - # Check for cancel request - if watchlist_scan_state.get('cancel_requested'): - print(f"[Manual Watchlist Scan] Cancel requested after {i}/{len(watchlist_artists)} artists") - watchlist_scan_state['status'] = 'cancelled' - watchlist_scan_state['current_phase'] = 'cancelled' - watchlist_scan_state['summary'] = { - 'total_artists': i, - 'successful_scans': len([r for r in scan_results if r.success]), - 'new_tracks_found': sum(r.new_tracks_found for r in scan_results if r.success), - 'tracks_added_to_wishlist': sum(r.tracks_added_to_wishlist for r in scan_results if r.success), - 'cancelled': True - } - break - - try: - # Fetch artist image using provider-aware method - artist_image_url = scanner.get_artist_image_url(artist) or '' - - # Update progress - watchlist_scan_state.update({ - 'current_artist_index': i + 1, - 'current_artist_name': artist.artist_name, - 'current_artist_image_url': artist_image_url, - 'current_phase': 'fetching_discography', - 'albums_to_check': 0, - 'albums_checked': 0, - 'current_album': '', - 'current_album_image_url': '', - 'current_track_name': '' - }) - - # Get artist discography using provider-aware method - albums = scanner.get_artist_discography_for_watchlist(artist, artist.last_scan_timestamp) - - if albums is None: - scan_results.append(type('ScanResult', (), { - 'artist_name': artist.artist_name, - 'spotify_artist_id': artist.spotify_artist_id, - 'albums_checked': 0, - 'new_tracks_found': 0, - 'tracks_added_to_wishlist': 0, - 'success': False, - 'error_message': "Failed to get artist discography" - })()) - continue - - # Update with album count - watchlist_scan_state.update({ - 'current_phase': 'checking_albums', - 'albums_to_check': len(albums), - 'albums_checked': 0 - }) - - # Track progress for this artist - artist_new_tracks = 0 - artist_added_tracks = 0 - - # Scan each album - for album_index, album in enumerate(albums): - try: - # Get album tracks using provider-aware method - album_data = scanner.metadata_service.get_album(album.id) - if not album_data or 'tracks' not in album_data: - logger.debug(f"Skipping album {album.name} (id={album.id}): no track data returned") - continue - - tracks = album_data['tracks']['items'] - - # Check release type filter (album/EP/single) - if not scanner._should_include_release(len(tracks), artist): - continue - - # Get album image - album_image_url = '' - if 'images' in album_data and album_data['images']: - album_image_url = album_data['images'][0]['url'] - - watchlist_scan_state.update({ - 'albums_checked': album_index + 1, - 'current_album': album.name, - 'current_album_image_url': album_image_url, - 'current_phase': f'checking_album_{album_index + 1}_of_{len(albums)}' - }) - - # Check each track - for track in tracks: - # Check content type filter (live/remix/acoustic/compilation) - if not scanner._should_include_track(track, album_data, artist): - continue - - # Update current track being processed - track_name = track.get('name', 'Unknown Track') - watchlist_scan_state['current_track_name'] = track_name - - if scanner.is_track_missing_from_library(track): - artist_new_tracks += 1 - watchlist_scan_state['tracks_found_this_scan'] += 1 - - # Add to wishlist - if scanner.add_track_to_wishlist(track, album_data, artist): - artist_added_tracks += 1 - watchlist_scan_state['tracks_added_this_scan'] += 1 - - # Add to recent wishlist additions feed - track_artists = track.get('artists', []) - track_artist_name = track_artists[0].get('name', 'Unknown Artist') if track_artists else 'Unknown Artist' - - watchlist_scan_state['recent_wishlist_additions'].insert(0, { - 'track_name': track_name, - 'artist_name': track_artist_name, - 'album_image_url': album_image_url - }) - - # Keep only last 10 - if len(watchlist_scan_state['recent_wishlist_additions']) > 10: - watchlist_scan_state['recent_wishlist_additions'].pop() - - # Rate-limited delay between albums - import time - time.sleep(album_delay) - - except Exception as e: - print(f"Error checking album {album.name}: {e}") - continue - - # Update scan timestamp - scanner.update_artist_scan_timestamp(artist) - - # Store result - scan_results.append(type('ScanResult', (), { - 'artist_name': artist.artist_name, - 'spotify_artist_id': artist.spotify_artist_id, - 'albums_checked': len(albums), - 'new_tracks_found': artist_new_tracks, - 'tracks_added_to_wishlist': artist_added_tracks, - 'success': True, - 'error_message': None - })()) - - print(f"Scanned {artist.artist_name}: {artist_new_tracks} new tracks found, {artist_added_tracks} added to wishlist") - - # Fetch similar artists for discovery feature - # This is critical for the discover page to work - try: - watchlist_scan_state['current_phase'] = 'fetching_similar_artists' - source_artist_id = artist.spotify_artist_id or artist.itunes_artist_id or str(artist.id) - - # If Spotify is authenticated, also require Spotify IDs to be present - spotify_authenticated = spotify_client and spotify_client.is_spotify_authenticated() - if database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=scan_profile_id): - print(f" Similar artists for {artist.artist_name} are cached and fresh") - # Still backfill missing iTunes IDs - scanner._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=scan_profile_id) - else: - print(f" Fetching similar artists for {artist.artist_name}...") - scanner.update_similar_artists(artist, profile_id=scan_profile_id) - print(f" Similar artists updated for {artist.artist_name}") - except Exception as similar_error: - print(f" Failed to update similar artists for {artist.artist_name}: {similar_error}") - - # Delay between artists - if i < len(watchlist_artists) - 1: - watchlist_scan_state['current_phase'] = 'rate_limiting' - time.sleep(artist_delay) - - # Reset circuit breaker on successful artist scan - consecutive_failures = 0 - circuit_breaker_pause = 60 - - except Exception as e: - print(f"Error scanning artist {artist.artist_name}: {e}") - - # Circuit breaker: detect consecutive rate-limit failures - error_str = str(e).lower() - if "429" in error_str or "rate limit" in error_str: - consecutive_failures += 1 - if consecutive_failures >= CIRCUIT_BREAKER_THRESHOLD: - print(f"Circuit breaker: {consecutive_failures} consecutive rate-limit failures, pausing {circuit_breaker_pause}s") - watchlist_scan_state['current_phase'] = 'circuit_breaker_pause' - time.sleep(circuit_breaker_pause) - circuit_breaker_pause = min(circuit_breaker_pause * 2, 600) - consecutive_failures = 0 - else: - consecutive_failures = 0 - - scan_results.append(type('ScanResult', (), { - 'artist_name': artist.artist_name, - 'spotify_artist_id': artist.spotify_artist_id, - 'albums_checked': 0, - 'new_tracks_found': 0, - 'tracks_added_to_wishlist': 0, - 'success': False, - 'error_message': str(e) - })()) + scan_results = scanner.scan_watchlist_profile( + scan_profile_id, + watchlist_artists=watchlist_artists, + scan_state=watchlist_scan_state, + cancel_check=lambda: watchlist_scan_state.get('cancel_requested', False), + ) # Store final results (skip if cancelled — already set by cancel handler) was_cancelled = watchlist_scan_state.get('cancel_requested', False) if not was_cancelled: - watchlist_scan_state['status'] = 'completed' - watchlist_scan_state['results'] = scan_results - watchlist_scan_state['completed_at'] = datetime.now() _artmap_cache_invalidate(scan_profile_id) - watchlist_scan_state['current_phase'] = 'completed' - - # Calculate summary successful_scans = [r for r in scan_results if r.success] total_new_tracks = sum(r.new_tracks_found for r in successful_scans) total_added_to_wishlist = sum(r.tracks_added_to_wishlist for r in successful_scans) + watchlist_scan_state['status'] = 'completed' + watchlist_scan_state['results'] = scan_results + watchlist_scan_state['completed_at'] = datetime.now() + watchlist_scan_state['current_phase'] = 'completed' + watchlist_scan_state['summary'] = { 'total_artists': len(scan_results), 'successful_scans': len(successful_scans), @@ -40773,33 +40549,6 @@ def watchlist_global_config(): traceback.print_exc() return jsonify({"success": False, "error": str(e)}), 500 -def _apply_watchlist_global_overrides(watchlist_artists): - """If global override is enabled, overwrite per-artist settings on WatchlistArtist objects.""" - if not config_manager.get('watchlist.global_override_enabled', False): - return - # Read global settings once - g_albums = config_manager.get('watchlist.global_include_albums', True) - g_eps = config_manager.get('watchlist.global_include_eps', True) - g_singles = config_manager.get('watchlist.global_include_singles', True) - g_live = config_manager.get('watchlist.global_include_live', False) - g_remixes = config_manager.get('watchlist.global_include_remixes', False) - g_acoustic = config_manager.get('watchlist.global_include_acoustic', False) - g_compilations = config_manager.get('watchlist.global_include_compilations', False) - g_instrumentals = config_manager.get('watchlist.global_include_instrumentals', False) - print(f"[Watchlist] Global override is ACTIVE — applying to {len(watchlist_artists)} artists " - f"(albums={g_albums}, eps={g_eps}, singles={g_singles}, live={g_live}, " - f"remixes={g_remixes}, acoustic={g_acoustic}, compilations={g_compilations}, " - f"instrumentals={g_instrumentals})") - for artist in watchlist_artists: - artist.include_albums = g_albums - artist.include_eps = g_eps - artist.include_singles = g_singles - artist.include_live = g_live - artist.include_remixes = g_remixes - artist.include_acoustic = g_acoustic - artist.include_compilations = g_compilations - artist.include_instrumentals = g_instrumentals - def _update_similar_artists_worker(): """Background worker to update similar artists for all watchlist artists""" global similar_artists_update_state @@ -40954,9 +40703,6 @@ def _process_watchlist_scan_automatically(automation_id=None, profile_id=None): scanner = get_watchlist_scanner(spotify_client) all_profiles = scan_profiles # Used later for discovery pool population - # Apply global overrides if enabled - _apply_watchlist_global_overrides(watchlist_artists) - # Initialize detailed progress tracking (same as manual scan) watchlist_scan_state = { 'status': 'scanning', @@ -40985,259 +40731,79 @@ def _process_watchlist_scan_automatically(automation_id=None, profile_id=None): # Pause enrichment workers during scan to reduce API contention _ew_state = _pause_enrichment_workers('auto-watchlist scan') - # Dynamic delay calculation based on scan scope - lookback_period = scanner._get_lookback_period_setting() - is_full_discography = (lookback_period == 'all') - artist_count = len(watchlist_artists) - - base_artist_delay = 2.0 - base_album_delay = 0.5 - - # Scale up for full discography (way more albums per artist) - if is_full_discography: - base_artist_delay *= 2.0 - base_album_delay *= 2.0 - - # Scale up further for large artist counts (sustained API pressure) - if artist_count > 200: - base_artist_delay *= 1.5 - base_album_delay *= 1.25 - elif artist_count > 100: - base_artist_delay *= 1.25 - - artist_delay = base_artist_delay - album_delay = base_album_delay - print(f"[Auto-Watchlist] Scan parameters: {artist_count} artists, lookback={lookback_period}, " - f"delays: {artist_delay:.1f}s/artist, {album_delay:.1f}s/album") - - # Circuit breaker: pause scan on consecutive rate-limit failures - consecutive_failures = 0 - CIRCUIT_BREAKER_THRESHOLD = 3 - circuit_breaker_pause = 60 # seconds, doubles each trigger, max 600s - - # Scan each artist with detailed tracking - for i, artist in enumerate(watchlist_artists): - # Check for cancel request - if watchlist_scan_state.get('cancel_requested'): - print(f"[Auto-Watchlist] Cancel requested after {i}/{len(watchlist_artists)} artists") - watchlist_scan_state['status'] = 'cancelled' - watchlist_scan_state['current_phase'] = 'cancelled' - watchlist_scan_state['summary'] = { - 'total_artists': i, - 'successful_scans': len([r for r in scan_results if r.success]), - 'new_tracks_found': sum(r.new_tracks_found for r in scan_results if r.success), - 'tracks_added_to_wishlist': sum(r.tracks_added_to_wishlist for r in scan_results if r.success), - 'cancelled': True - } - _update_automation_progress(automation_id, progress=100, phase='Cancelled by user', - log_line='Scan cancelled by user', log_type='warning') - break - - try: - # Fetch artist image using provider-aware method - artist_image_url = scanner.get_artist_image_url(artist) or '' - - pct = 5 + (i / max(1, len(watchlist_artists))) * 90 - _update_automation_progress(automation_id, progress=pct, - phase=f'Scanning: {artist.artist_name} ({i+1}/{len(watchlist_artists)})', - current_item=artist.artist_name, - processed=i, total=len(watchlist_artists)) - - # Update progress - watchlist_scan_state.update({ - 'current_artist_index': i + 1, - 'current_artist_name': artist.artist_name, - 'current_artist_image_url': artist_image_url, - 'current_phase': 'fetching_discography', - 'albums_to_check': 0, - 'albums_checked': 0, - 'current_album': '', - 'current_album_image_url': '', - 'current_track_name': '' - }) - - # Get artist discography using provider-aware method - albums = scanner.get_artist_discography_for_watchlist(artist, artist.last_scan_timestamp) - - if albums is None: - scan_results.append(type('ScanResult', (), { - 'artist_name': artist.artist_name, - 'spotify_artist_id': artist.spotify_artist_id, - 'albums_checked': 0, - 'new_tracks_found': 0, - 'tracks_added_to_wishlist': 0, - 'success': False, - 'error_message': "Failed to get artist discography" - })()) - continue - - # Update with album count - watchlist_scan_state.update({ - 'current_phase': 'checking_albums', - 'albums_to_check': len(albums), - 'albums_checked': 0 - }) - - # Track progress for this artist - artist_new_tracks = 0 - artist_added_tracks = 0 - - # Scan each album - for album_index, album in enumerate(albums): - try: - # Get album tracks using provider-aware method - album_data = scanner.metadata_service.get_album(album.id) - if not album_data or 'tracks' not in album_data: - logger.debug(f"Skipping album {album.name} (id={album.id}): no track data returned") - continue - - tracks = album_data['tracks']['items'] - - # Check release type filter (album/EP/single) - if not scanner._should_include_release(len(tracks), artist): - continue - - # Get album image - album_image_url = '' - if 'images' in album_data and album_data['images']: - album_image_url = album_data['images'][0]['url'] - - watchlist_scan_state.update({ - 'albums_checked': album_index + 1, - 'current_album': album.name, - 'current_album_image_url': album_image_url, - 'current_phase': f'checking_album_{album_index + 1}_of_{len(albums)}' - }) - - # Check each track - for track in tracks: - # Check content type filter (live/remix/acoustic/compilation) - if not scanner._should_include_track(track, album_data, artist): - continue - - # Update current track being processed - track_name = track.get('name', 'Unknown Track') - watchlist_scan_state['current_track_name'] = track_name - - if scanner.is_track_missing_from_library(track): - artist_new_tracks += 1 - watchlist_scan_state['tracks_found_this_scan'] += 1 - - # Add to wishlist - if scanner.add_track_to_wishlist(track, album_data, artist): - artist_added_tracks += 1 - watchlist_scan_state['tracks_added_this_scan'] += 1 - - # Add to recent wishlist additions feed - track_artists = track.get('artists', []) - track_artist_name = track_artists[0].get('name', 'Unknown Artist') if track_artists else 'Unknown Artist' - - watchlist_scan_state['recent_wishlist_additions'].insert(0, { - 'track_name': track_name, - 'artist_name': track_artist_name, - 'album_image_url': album_image_url - }) - - # Keep only last 10 - if len(watchlist_scan_state['recent_wishlist_additions']) > 10: - watchlist_scan_state['recent_wishlist_additions'].pop() - - # Rate-limited delay between albums - import time - time.sleep(album_delay) - - except Exception as e: - print(f"Error checking album {album.name}: {e}") - continue - - # Update scan timestamp - scanner.update_artist_scan_timestamp(artist) - - # Store result - scan_results.append(type('ScanResult', (), { - 'artist_name': artist.artist_name, - 'spotify_artist_id': artist.spotify_artist_id, - 'albums_checked': len(albums), - 'new_tracks_found': artist_new_tracks, - 'tracks_added_to_wishlist': artist_added_tracks, - 'success': True, - 'error_message': None - })()) - - print(f"Scanned {artist.artist_name}: {artist_new_tracks} new tracks found, {artist_added_tracks} added to wishlist") - if artist_new_tracks > 0: - _update_automation_progress(automation_id, - log_line=f'{artist.artist_name} — {artist_new_tracks} new, {artist_added_tracks} added', log_type='success') - else: - _update_automation_progress(automation_id, - log_line=f'{artist.artist_name} — no new tracks', log_type='skip') - - # Emit watchlist_new_release event if new tracks were found - if artist_new_tracks > 0: - try: - if automation_engine: - automation_engine.emit('watchlist_new_release', { - 'artist': artist.artist_name, - 'new_tracks': str(artist_new_tracks), - 'added_to_wishlist': str(artist_added_tracks), - }) - except Exception: - pass - - # Fetch similar artists for discovery feature (per-profile) - try: - watchlist_scan_state['current_phase'] = 'fetching_similar_artists' - source_artist_id = artist.spotify_artist_id or artist.itunes_artist_id or str(artist.id) - artist_profile_id = getattr(artist, 'profile_id', 1) - - spotify_authenticated = spotify_client and spotify_client.is_spotify_authenticated() - if database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=artist_profile_id): - print(f" Similar artists for {artist.artist_name} are cached and fresh (profile {artist_profile_id})") - scanner._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=artist_profile_id) - else: - print(f" Fetching similar artists for {artist.artist_name} (profile {artist_profile_id})...") - scanner.update_similar_artists(artist, profile_id=artist_profile_id) - print(f" Similar artists updated for {artist.artist_name}") - except Exception as similar_error: - print(f" Failed to update similar artists for {artist.artist_name}: {similar_error}") - - # Delay between artists - if i < len(watchlist_artists) - 1: - watchlist_scan_state['current_phase'] = 'rate_limiting' - time.sleep(artist_delay) - - # Reset circuit breaker on successful artist scan - consecutive_failures = 0 - circuit_breaker_pause = 60 - - except Exception as e: - print(f"Error scanning artist {artist.artist_name}: {e}") - _update_automation_progress(automation_id, - log_line=f'{artist.artist_name} — error: {str(e)[:60]}', log_type='error') - - # Circuit breaker: detect consecutive rate-limit failures - error_str = str(e).lower() - if "429" in error_str or "rate limit" in error_str: - consecutive_failures += 1 - if consecutive_failures >= CIRCUIT_BREAKER_THRESHOLD: - print(f"[Auto-Watchlist] Circuit breaker: {consecutive_failures} consecutive rate-limit failures, pausing {circuit_breaker_pause}s") - watchlist_scan_state['current_phase'] = 'circuit_breaker_pause' - time.sleep(circuit_breaker_pause) - circuit_breaker_pause = min(circuit_breaker_pause * 2, 600) - consecutive_failures = 0 + def _scan_progress(event_type, payload): + if event_type == 'scan_started': + _update_automation_progress( + automation_id, + progress=5, + phase='Loading watchlist', + log_line=f"{len(watchlist_artists)} artists ({profile_label})", + log_type='info', + ) + elif event_type == 'artist_started': + total = max(1, payload.get('total_artists', len(watchlist_artists))) + idx = payload.get('artist_index', 1) + artist_name = payload.get('artist_name', '') + pct = 5 + ((idx - 1) / total) * 90 + _update_automation_progress( + automation_id, + progress=pct, + phase=f'Scanning: {artist_name} ({idx}/{total})', + current_item=artist_name, + processed=idx - 1, + total=total, + ) + elif event_type == 'artist_completed': + artist_name = payload.get('artist_name', '') + new_tracks = payload.get('new_tracks_found', 0) + added = payload.get('tracks_added_to_wishlist', 0) + if new_tracks > 0: + _update_automation_progress( + automation_id, + log_line=f'{artist_name} — {new_tracks} new, {added} added', + log_type='success', + ) else: - consecutive_failures = 0 + _update_automation_progress( + automation_id, + log_line=f'{artist_name} — no new tracks', + log_type='skip', + ) + elif event_type == 'artist_error': + artist_name = payload.get('artist_name', '') + error_message = payload.get('error_message', 'error') + _update_automation_progress( + automation_id, + log_line=f'{artist_name} — error: {error_message[:60]}', + log_type='error', + ) + elif event_type == 'cancelled': + _update_automation_progress( + automation_id, + progress=100, + phase='Cancelled by user', + log_line='Scan cancelled by user', + log_type='warning', + ) + elif event_type == 'scan_completed': + _update_automation_progress( + automation_id, + progress=95, + phase='Scan complete', + log_line=( + f"Scanned {payload.get('successful_scans', 0)} artists — " + f"{payload.get('new_tracks_found', 0)} new tracks, " + f"{payload.get('tracks_added_to_wishlist', 0)} added to wishlist" + ), + log_type='success' if payload.get('new_tracks_found', 0) > 0 else 'info', + ) - scan_results.append(type('ScanResult', (), { - 'artist_name': artist.artist_name, - 'spotify_artist_id': artist.spotify_artist_id, - 'albums_checked': 0, - 'new_tracks_found': 0, - 'tracks_added_to_wishlist': 0, - 'success': False, - 'error_message': str(e) - })()) - continue + scan_results = scanner.scan_watchlist_artists( + watchlist_artists, + scan_state=watchlist_scan_state, + progress_callback=_scan_progress, + cancel_check=lambda: watchlist_scan_state.get('cancel_requested'), + ) # Update state with results (skip if cancelled — already set by cancel handler) was_cancelled = watchlist_scan_state.get('cancel_requested', False)