Consolidate web watchlist scanning

Move the shared watchlist scan loop into core/watchlist_scanner.py so web_server.py only handles triggers, locks, progress, and post-scan orchestration.

Manual and scheduled watchlist scans now share the same scanner-side core, while the web entrypoints keep profile selection and automation progress updates.
pull/303/head
Antti Kettunen 4 weeks ago
parent bf123fed63
commit 657d86cace

@ -4,7 +4,7 @@
Watchlist Scanner Service - Monitors watched artists for new releases
"""
from typing import List, Dict, Any, Optional
from typing import List, Dict, Any, Optional, Callable
from datetime import datetime, timezone, timedelta
from dataclasses import dataclass
import re
@ -901,6 +901,423 @@ class WatchlistScanner:
success=False,
error_message=str(e)
)
def _apply_global_watchlist_overrides(self, watchlist_artists: List[WatchlistArtist]):
"""Apply global watchlist release-type overrides to a batch of artists."""
try:
from config.settings import config_manager
except Exception:
return
if not config_manager.get('watchlist.global_override_enabled', False):
return
g_albums = config_manager.get('watchlist.global_include_albums', True)
g_eps = config_manager.get('watchlist.global_include_eps', True)
g_singles = config_manager.get('watchlist.global_include_singles', True)
g_live = config_manager.get('watchlist.global_include_live', False)
g_remixes = config_manager.get('watchlist.global_include_remixes', False)
g_acoustic = config_manager.get('watchlist.global_include_acoustic', False)
g_compilations = config_manager.get('watchlist.global_include_compilations', False)
g_instrumentals = config_manager.get('watchlist.global_include_instrumentals', False)
logger.info(
"Applying global watchlist override to %s artists "
"(albums=%s, eps=%s, singles=%s, live=%s, remixes=%s, acoustic=%s, compilations=%s, instrumentals=%s)",
len(watchlist_artists),
g_albums,
g_eps,
g_singles,
g_live,
g_remixes,
g_acoustic,
g_compilations,
g_instrumentals,
)
for artist in watchlist_artists:
artist.include_albums = g_albums
artist.include_eps = g_eps
artist.include_singles = g_singles
artist.include_live = g_live
artist.include_remixes = g_remixes
artist.include_acoustic = g_acoustic
artist.include_compilations = g_compilations
artist.include_instrumentals = g_instrumentals
def scan_watchlist_profile(
self,
profile_id: int,
watchlist_artists: Optional[List[WatchlistArtist]] = None,
*,
scan_state: Optional[Dict[str, Any]] = None,
progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
cancel_check: Optional[Callable[[], bool]] = None,
artist_index_offset: int = 0,
total_artists_override: Optional[int] = None,
apply_global_overrides: bool = True,
) -> List[ScanResult]:
"""Scan a single watchlist profile using the shared watchlist scan engine."""
if watchlist_artists is None:
watchlist_artists = self.database.get_watchlist_artists(profile_id=profile_id)
if apply_global_overrides:
self._apply_global_watchlist_overrides(watchlist_artists)
return self.scan_watchlist_artists(
watchlist_artists,
profile_id=profile_id,
scan_state=scan_state,
progress_callback=progress_callback,
cancel_check=cancel_check,
artist_index_offset=artist_index_offset,
total_artists_override=total_artists_override,
)
def scan_watchlist_artists(
self,
watchlist_artists: List[WatchlistArtist],
*,
profile_id: int = 1,
scan_state: Optional[Dict[str, Any]] = None,
progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
cancel_check: Optional[Callable[[], bool]] = None,
artist_index_offset: int = 0,
total_artists_override: Optional[int] = None,
) -> List[ScanResult]:
"""Scan a list of watchlist artists using the shared web watchlist scan flow."""
scan_results: List[ScanResult] = []
if not watchlist_artists:
if scan_state is not None:
scan_state.update({
'status': 'completed',
'total_artists': 0,
'current_artist_index': 0,
'current_artist_name': '',
'current_artist_image_url': '',
'current_phase': 'completed',
'albums_to_check': 0,
'albums_checked': 0,
'current_album': '',
'current_album_image_url': '',
'current_track_name': '',
'tracks_found_this_scan': 0,
'tracks_added_this_scan': 0,
'recent_wishlist_additions': [],
'results': [],
'summary': {
'total_artists': 0,
'successful_scans': 0,
'new_tracks_found': 0,
'tracks_added_to_wishlist': 0,
},
'completed_at': datetime.now(),
'error': None,
})
return scan_results
if scan_state is not None:
scan_state.update({
'status': 'scanning',
'started_at': scan_state.get('started_at') or datetime.now(),
'total_artists': total_artists_override if total_artists_override is not None else len(watchlist_artists),
'current_artist_index': scan_state.get('current_artist_index', artist_index_offset),
'current_artist_name': scan_state.get('current_artist_name', ''),
'current_artist_image_url': scan_state.get('current_artist_image_url', ''),
'current_phase': 'starting',
'albums_to_check': 0,
'albums_checked': 0,
'current_album': '',
'current_album_image_url': '',
'current_track_name': '',
'tracks_found_this_scan': scan_state.get('tracks_found_this_scan', 0),
'tracks_added_this_scan': scan_state.get('tracks_added_this_scan', 0),
'recent_wishlist_additions': scan_state.get('recent_wishlist_additions', []),
'results': scan_state.get('results', []),
'summary': scan_state.get('summary', {}),
'error': None,
})
def _emit(event_type: str, **payload):
if progress_callback:
try:
progress_callback(event_type, payload)
except Exception:
logger.debug("Watchlist scan progress callback failed for %s", event_type, exc_info=True)
_emit('scan_started', profile_id=profile_id, total_artists=len(watchlist_artists))
# Rate-limit-aware backfill of artist IDs for the providers we can safely resolve.
providers_to_backfill = ['itunes', 'deezer']
if self.spotify_client and self.spotify_client.is_spotify_authenticated():
providers_to_backfill.append('spotify')
try:
from config.settings import config_manager as _cfg
if _cfg.get('discogs.token', ''):
providers_to_backfill.append('discogs')
except Exception:
pass
for provider in providers_to_backfill:
try:
logger.info("Checking for missing %s IDs in watchlist...", provider)
self._backfill_missing_ids(watchlist_artists, provider)
except Exception as backfill_error:
logger.warning("Error during %s ID backfilling: %s", provider, backfill_error)
lookback_period = self._get_lookback_period_setting()
is_full_discography = (lookback_period == 'all')
artist_count = len(watchlist_artists)
base_artist_delay = DELAY_BETWEEN_ARTISTS
base_album_delay = DELAY_BETWEEN_ALBUMS
if is_full_discography:
base_artist_delay *= 2.0
base_album_delay *= 2.0
if artist_count > 200:
base_artist_delay *= 1.5
base_album_delay *= 1.25
elif artist_count > 100:
base_artist_delay *= 1.25
artist_delay = base_artist_delay
album_delay = base_album_delay
logger.info(
"Scan parameters: %s artists, lookback=%s, delays: %.1fs/artist, %.1fs/album",
artist_count,
lookback_period,
artist_delay,
album_delay,
)
for i, artist in enumerate(watchlist_artists):
if cancel_check and cancel_check():
logger.info("Watchlist scan cancelled after %s/%s artists", i, len(watchlist_artists))
if scan_state is not None:
successful_scans = [r for r in scan_results if r.success]
scan_state['status'] = 'cancelled'
scan_state['current_phase'] = 'cancelled'
scan_state['summary'] = {
'total_artists': i,
'successful_scans': len(successful_scans),
'new_tracks_found': sum(r.new_tracks_found for r in successful_scans),
'tracks_added_to_wishlist': sum(r.tracks_added_to_wishlist for r in successful_scans),
'cancelled': True,
}
_emit('cancelled', processed=i, total=len(watchlist_artists))
break
try:
artist_image_url = self.get_artist_image_url(artist) or ''
absolute_index = artist_index_offset + i + 1
if scan_state is not None:
scan_state.update({
'current_artist_index': absolute_index,
'current_artist_name': artist.artist_name,
'current_artist_image_url': artist_image_url,
'current_phase': 'fetching_discography',
'albums_to_check': 0,
'albums_checked': 0,
'current_album': '',
'current_album_image_url': '',
'current_track_name': '',
})
_emit(
'artist_started',
artist_name=artist.artist_name,
artist_index=absolute_index,
total_artists=total_artists_override if total_artists_override is not None else len(watchlist_artists),
profile_id=profile_id,
artist_image_url=artist_image_url,
)
albums = self.get_artist_discography_for_watchlist(artist, artist.last_scan_timestamp)
if albums is None:
scan_results.append(ScanResult(
artist_name=artist.artist_name,
spotify_artist_id=artist.spotify_artist_id or '',
albums_checked=0,
new_tracks_found=0,
tracks_added_to_wishlist=0,
success=False,
error_message="Failed to get artist discography",
))
_emit(
'artist_error',
artist_name=artist.artist_name,
profile_id=profile_id,
error_message="Failed to get artist discography",
)
continue
if scan_state is not None:
scan_state.update({
'current_phase': 'checking_albums',
'albums_to_check': len(albums),
'albums_checked': 0,
})
artist_new_tracks = 0
artist_added_tracks = 0
for album_index, album in enumerate(albums):
try:
album_data = self.metadata_service.get_album(album.id)
if not album_data or 'tracks' not in album_data:
logger.debug("Skipping album %s (id=%s): no track data returned", album.name, album.id)
continue
tracks = album_data['tracks']['items']
if not self._should_include_release(len(tracks), artist):
continue
album_image_url = ''
if 'images' in album_data and album_data['images']:
album_image_url = album_data['images'][0]['url']
if scan_state is not None:
scan_state.update({
'albums_checked': album_index + 1,
'current_album': album.name,
'current_album_image_url': album_image_url,
'current_phase': f'checking_album_{album_index + 1}_of_{len(albums)}',
})
_emit(
'album_started',
artist_name=artist.artist_name,
album_name=album.name,
album_index=album_index + 1,
total_albums=len(albums),
album_image_url=album_image_url,
)
for track in tracks:
if not self._should_include_track(track, album_data, artist):
continue
track_name = track.get('name', 'Unknown Track')
if scan_state is not None:
scan_state['current_track_name'] = track_name
if self.is_track_missing_from_library(track, album_name=album_data.get('name')):
artist_new_tracks += 1
if scan_state is not None:
scan_state['tracks_found_this_scan'] += 1
if self.add_track_to_wishlist(track, album_data, artist):
artist_added_tracks += 1
if scan_state is not None:
scan_state['tracks_added_this_scan'] += 1
track_artists = track.get('artists', [])
track_artist_name = track_artists[0].get('name', 'Unknown Artist') if track_artists else 'Unknown Artist'
if scan_state is not None:
scan_state['recent_wishlist_additions'].insert(0, {
'track_name': track_name,
'artist_name': track_artist_name,
'album_image_url': album_image_url,
})
if len(scan_state['recent_wishlist_additions']) > 10:
scan_state['recent_wishlist_additions'].pop()
if album_index < len(albums) - 1:
time.sleep(album_delay)
except Exception as e:
logger.warning("Error checking album %s: %s", album.name, e)
continue
self.update_artist_scan_timestamp(artist)
scan_results.append(ScanResult(
artist_name=artist.artist_name,
spotify_artist_id=artist.spotify_artist_id or '',
albums_checked=len(albums),
new_tracks_found=artist_new_tracks,
tracks_added_to_wishlist=artist_added_tracks,
success=True,
))
_emit(
'artist_completed',
artist_name=artist.artist_name,
artist_index=absolute_index,
total_artists=total_artists_override if total_artists_override is not None else len(watchlist_artists),
profile_id=profile_id,
albums_checked=len(albums),
new_tracks_found=artist_new_tracks,
tracks_added_to_wishlist=artist_added_tracks,
)
try:
if scan_state is not None:
scan_state['current_phase'] = 'fetching_similar_artists'
source_artist_id = artist.spotify_artist_id or artist.itunes_artist_id or str(artist.id)
artist_profile_id = getattr(artist, 'profile_id', profile_id)
spotify_authenticated = self.spotify_client and self.spotify_client.is_spotify_authenticated()
if self.database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=artist_profile_id):
logger.info("Similar artists for %s are cached and fresh (profile %s)", artist.artist_name, artist_profile_id)
self._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=artist_profile_id)
else:
logger.info("Fetching similar artists for %s (profile %s)...", artist.artist_name, artist_profile_id)
self.update_similar_artists(artist, profile_id=artist_profile_id)
logger.info("Similar artists updated for %s", artist.artist_name)
except Exception as similar_error:
logger.warning("Failed to update similar artists for %s: %s", artist.artist_name, similar_error)
if i < len(watchlist_artists) - 1:
if scan_state is not None:
scan_state['current_phase'] = 'rate_limiting'
time.sleep(artist_delay)
except Exception as e:
logger.error("Error scanning artist %s: %s", artist.artist_name, e)
scan_results.append(ScanResult(
artist_name=artist.artist_name,
spotify_artist_id=artist.spotify_artist_id or '',
albums_checked=0,
new_tracks_found=0,
tracks_added_to_wishlist=0,
success=False,
error_message=str(e),
))
_emit(
'artist_error',
artist_name=artist.artist_name,
artist_index=artist_index_offset + i + 1,
total_artists=total_artists_override if total_artists_override is not None else len(watchlist_artists),
profile_id=profile_id,
error_message=str(e),
)
if scan_state is not None:
successful_scans = [r for r in scan_results if r.success]
total_new_tracks = sum(r.new_tracks_found for r in successful_scans)
total_added_to_wishlist = sum(r.tracks_added_to_wishlist for r in successful_scans)
scan_state['results'] = list(scan_state.get('results', [])) + scan_results
if scan_state.get('status') != 'cancelled':
scan_state['status'] = 'completed'
scan_state['completed_at'] = datetime.now()
scan_state['current_phase'] = 'completed'
scan_state['summary'] = {
'total_artists': len(scan_results),
'successful_scans': len(successful_scans),
'new_tracks_found': total_new_tracks,
'tracks_added_to_wishlist': total_added_to_wishlist,
}
_emit(
'scan_completed',
profile_id=profile_id,
total_artists=len(watchlist_artists),
total_scanned=len(scan_results),
successful_scans=len([r for r in scan_results if r.success]),
new_tracks_found=sum(r.new_tracks_found for r in scan_results if r.success),
tracks_added_to_wishlist=sum(r.tracks_added_to_wishlist for r in scan_results if r.success),
)
return scan_results
def get_artist_discography(self, spotify_artist_id: str, last_scan_timestamp: Optional[datetime] = None) -> Optional[List]:
"""

@ -0,0 +1,199 @@
import sys
import types
if "spotipy" not in sys.modules:
spotipy = types.ModuleType("spotipy")
class _DummySpotify:
def __init__(self, *args, **kwargs):
pass
oauth2 = types.ModuleType("spotipy.oauth2")
class _DummyOAuth:
def __init__(self, *args, **kwargs):
pass
spotipy.Spotify = _DummySpotify
oauth2.SpotifyOAuth = _DummyOAuth
oauth2.SpotifyClientCredentials = _DummyOAuth
spotipy.oauth2 = oauth2
sys.modules["spotipy"] = spotipy
sys.modules["spotipy.oauth2"] = oauth2
import core.watchlist_scanner as watchlist_scanner_module
from core.watchlist_scanner import WatchlistScanner
class _FakeSpotifyClient:
def is_spotify_authenticated(self):
return False
class _FakeMetadataService:
def __init__(self, album_data):
self.spotify = _FakeSpotifyClient()
self.itunes = types.SimpleNamespace()
self._album_data = album_data
def get_album(self, album_id):
return self._album_data
class _FakeDB:
def __init__(self, artists):
self.artists = artists
self.similar_calls = []
def get_watchlist_artists(self, profile_id=None):
return list(self.artists)
def has_fresh_similar_artists(self, *args, **kwargs):
self.similar_calls.append((args, kwargs))
return False
def _build_artist(name="Artist One", profile_id=11):
return types.SimpleNamespace(
artist_name=name,
spotify_artist_id="sp-artist",
itunes_artist_id="it-artist",
deezer_artist_id="dz-artist",
discogs_artist_id="dg-artist",
last_scan_timestamp=None,
id=123,
profile_id=profile_id,
include_albums=True,
include_eps=True,
include_singles=True,
include_live=False,
include_remixes=False,
include_acoustic=False,
include_compilations=False,
include_instrumentals=False,
lookback_days=7,
image_url=None,
)
def _build_scanner(album_data, artists):
scanner = WatchlistScanner(metadata_service=_FakeMetadataService(album_data))
scanner._database = _FakeDB(artists)
scanner._wishlist_service = types.SimpleNamespace()
scanner._matching_engine = types.SimpleNamespace()
return scanner
def test_scan_watchlist_profile_loads_artists_and_applies_overrides(monkeypatch):
artist = _build_artist()
scanner = _build_scanner({"tracks": {"items": []}}, [artist])
loaded_profiles = []
override_calls = []
scan_calls = []
monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda profile_id=None: loaded_profiles.append(profile_id) or [artist])
monkeypatch.setattr(scanner, "_apply_global_watchlist_overrides", lambda artists: override_calls.append(list(artists)))
monkeypatch.setattr(scanner, "scan_watchlist_artists", lambda artists, **kwargs: scan_calls.append((list(artists), kwargs)) or ["ok"])
result = scanner.scan_watchlist_profile(42)
assert result == ["ok"]
assert loaded_profiles == [42]
assert override_calls and override_calls[0][0].artist_name == "Artist One"
assert scan_calls and scan_calls[0][0][0].artist_name == "Artist One"
assert scan_calls[0][1]["profile_id"] == 42
def test_scan_watchlist_artists_scans_tracks_and_updates_state(monkeypatch):
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ALBUMS", 0)
artist = _build_artist()
album = types.SimpleNamespace(id="album-1", name="Album One")
album_data = {
"name": "Album One",
"images": [{"url": "https://example.com/album.jpg"}],
"tracks": {
"items": [
{
"id": "track-1",
"name": "Track One",
"track_number": 1,
"disc_number": 1,
"artists": [{"name": "Artist One"}],
}
]
},
}
scanner = _build_scanner(album_data, [artist])
scanner._database.has_fresh_similar_artists = lambda *args, **kwargs: False
monkeypatch.setattr(scanner, "_backfill_missing_ids", lambda *args, **kwargs: None)
monkeypatch.setattr(scanner, "get_artist_image_url", lambda *_args, **_kwargs: "https://example.com/artist.jpg")
monkeypatch.setattr(scanner, "get_artist_discography_for_watchlist", lambda *_args, **_kwargs: [album])
monkeypatch.setattr(scanner, "_get_lookback_period_setting", lambda: "30")
monkeypatch.setattr(scanner, "_get_rescan_cutoff", lambda: None)
monkeypatch.setattr(scanner, "_should_include_release", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "_should_include_track", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "is_track_missing_from_library", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "add_track_to_wishlist", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "update_artist_scan_timestamp", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "update_similar_artists", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "_backfill_similar_artists_itunes_ids", lambda *_args, **_kwargs: 0)
scan_state = {}
results = scanner.scan_watchlist_artists([artist], scan_state=scan_state)
assert len(results) == 1
assert results[0].success is True
assert results[0].new_tracks_found == 1
assert results[0].tracks_added_to_wishlist == 1
assert scan_state["status"] == "completed"
assert scan_state["summary"]["successful_scans"] == 1
assert scan_state["summary"]["new_tracks_found"] == 1
assert scan_state["summary"]["tracks_added_to_wishlist"] == 1
assert scan_state["recent_wishlist_additions"][0]["track_name"] == "Track One"
def test_scan_watchlist_artists_honors_cancel_check(monkeypatch):
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ALBUMS", 0)
artist_a = _build_artist("Artist One")
artist_b = _build_artist("Artist Two")
album = types.SimpleNamespace(id="album-1", name="Album One")
album_data = {
"name": "Album One",
"tracks": {"items": []},
}
scanner = _build_scanner(album_data, [artist_a, artist_b])
scanner._database.has_fresh_similar_artists = lambda *args, **kwargs: False
monkeypatch.setattr(scanner, "_backfill_missing_ids", lambda *args, **kwargs: None)
monkeypatch.setattr(scanner, "get_artist_image_url", lambda *_args, **_kwargs: "https://example.com/artist.jpg")
monkeypatch.setattr(scanner, "get_artist_discography_for_watchlist", lambda *_args, **_kwargs: [album])
monkeypatch.setattr(scanner, "_get_lookback_period_setting", lambda: "30")
monkeypatch.setattr(scanner, "_get_rescan_cutoff", lambda: None)
monkeypatch.setattr(scanner, "_should_include_release", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "_should_include_track", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "is_track_missing_from_library", lambda *_args, **_kwargs: False)
monkeypatch.setattr(scanner, "add_track_to_wishlist", lambda *_args, **_kwargs: False)
monkeypatch.setattr(scanner, "update_artist_scan_timestamp", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "update_similar_artists", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "_backfill_similar_artists_itunes_ids", lambda *_args, **_kwargs: 0)
cancels = iter([False, True])
scan_state = {}
results = scanner.scan_watchlist_artists(
[artist_a, artist_b],
scan_state=scan_state,
cancel_check=lambda: next(cancels),
)
assert len(results) == 1
assert results[0].artist_name == "Artist One"
assert scan_state["status"] == "cancelled"
assert scan_state["summary"]["cancelled"] is True
assert scan_state["summary"]["successful_scans"] == 1

@ -39802,9 +39802,6 @@ def start_watchlist_scan():
database = get_database()
watchlist_artists = database.get_watchlist_artists(profile_id=scan_profile_id)
# Apply global overrides if enabled
_apply_watchlist_global_overrides(watchlist_artists)
if not watchlist_artists:
watchlist_scan_state['status'] = 'completed'
watchlist_scan_state['summary'] = {
@ -39939,247 +39936,26 @@ def start_watchlist_scan():
# Pause enrichment workers during scan to reduce API contention
_ew_state = _pause_enrichment_workers('watchlist scan')
# Dynamic delay calculation based on scan scope
lookback_period = scanner._get_lookback_period_setting()
is_full_discography = (lookback_period == 'all')
artist_count = len(watchlist_artists)
base_artist_delay = 2.0
base_album_delay = 0.5
# Scale up for full discography (way more albums per artist)
if is_full_discography:
base_artist_delay *= 2.0
base_album_delay *= 2.0
# Scale up further for large artist counts (sustained API pressure)
if artist_count > 200:
base_artist_delay *= 1.5
base_album_delay *= 1.25
elif artist_count > 100:
base_artist_delay *= 1.25
artist_delay = base_artist_delay
album_delay = base_album_delay
print(f"Scan parameters: {artist_count} artists, lookback={lookback_period}, "
f"delays: {artist_delay:.1f}s/artist, {album_delay:.1f}s/album")
# Circuit breaker: pause scan on consecutive rate-limit failures
consecutive_failures = 0
CIRCUIT_BREAKER_THRESHOLD = 3
circuit_breaker_pause = 60 # seconds, doubles each trigger, max 600s
for i, artist in enumerate(watchlist_artists):
# Check for cancel request
if watchlist_scan_state.get('cancel_requested'):
print(f"[Manual Watchlist Scan] Cancel requested after {i}/{len(watchlist_artists)} artists")
watchlist_scan_state['status'] = 'cancelled'
watchlist_scan_state['current_phase'] = 'cancelled'
watchlist_scan_state['summary'] = {
'total_artists': i,
'successful_scans': len([r for r in scan_results if r.success]),
'new_tracks_found': sum(r.new_tracks_found for r in scan_results if r.success),
'tracks_added_to_wishlist': sum(r.tracks_added_to_wishlist for r in scan_results if r.success),
'cancelled': True
}
break
try:
# Fetch artist image using provider-aware method
artist_image_url = scanner.get_artist_image_url(artist) or ''
# Update progress
watchlist_scan_state.update({
'current_artist_index': i + 1,
'current_artist_name': artist.artist_name,
'current_artist_image_url': artist_image_url,
'current_phase': 'fetching_discography',
'albums_to_check': 0,
'albums_checked': 0,
'current_album': '',
'current_album_image_url': '',
'current_track_name': ''
})
# Get artist discography using provider-aware method
albums = scanner.get_artist_discography_for_watchlist(artist, artist.last_scan_timestamp)
if albums is None:
scan_results.append(type('ScanResult', (), {
'artist_name': artist.artist_name,
'spotify_artist_id': artist.spotify_artist_id,
'albums_checked': 0,
'new_tracks_found': 0,
'tracks_added_to_wishlist': 0,
'success': False,
'error_message': "Failed to get artist discography"
})())
continue
# Update with album count
watchlist_scan_state.update({
'current_phase': 'checking_albums',
'albums_to_check': len(albums),
'albums_checked': 0
})
# Track progress for this artist
artist_new_tracks = 0
artist_added_tracks = 0
# Scan each album
for album_index, album in enumerate(albums):
try:
# Get album tracks using provider-aware method
album_data = scanner.metadata_service.get_album(album.id)
if not album_data or 'tracks' not in album_data:
logger.debug(f"Skipping album {album.name} (id={album.id}): no track data returned")
continue
tracks = album_data['tracks']['items']
# Check release type filter (album/EP/single)
if not scanner._should_include_release(len(tracks), artist):
continue
# Get album image
album_image_url = ''
if 'images' in album_data and album_data['images']:
album_image_url = album_data['images'][0]['url']
watchlist_scan_state.update({
'albums_checked': album_index + 1,
'current_album': album.name,
'current_album_image_url': album_image_url,
'current_phase': f'checking_album_{album_index + 1}_of_{len(albums)}'
})
# Check each track
for track in tracks:
# Check content type filter (live/remix/acoustic/compilation)
if not scanner._should_include_track(track, album_data, artist):
continue
# Update current track being processed
track_name = track.get('name', 'Unknown Track')
watchlist_scan_state['current_track_name'] = track_name
if scanner.is_track_missing_from_library(track):
artist_new_tracks += 1
watchlist_scan_state['tracks_found_this_scan'] += 1
# Add to wishlist
if scanner.add_track_to_wishlist(track, album_data, artist):
artist_added_tracks += 1
watchlist_scan_state['tracks_added_this_scan'] += 1
# Add to recent wishlist additions feed
track_artists = track.get('artists', [])
track_artist_name = track_artists[0].get('name', 'Unknown Artist') if track_artists else 'Unknown Artist'
watchlist_scan_state['recent_wishlist_additions'].insert(0, {
'track_name': track_name,
'artist_name': track_artist_name,
'album_image_url': album_image_url
})
# Keep only last 10
if len(watchlist_scan_state['recent_wishlist_additions']) > 10:
watchlist_scan_state['recent_wishlist_additions'].pop()
# Rate-limited delay between albums
import time
time.sleep(album_delay)
except Exception as e:
print(f"Error checking album {album.name}: {e}")
continue
# Update scan timestamp
scanner.update_artist_scan_timestamp(artist)
# Store result
scan_results.append(type('ScanResult', (), {
'artist_name': artist.artist_name,
'spotify_artist_id': artist.spotify_artist_id,
'albums_checked': len(albums),
'new_tracks_found': artist_new_tracks,
'tracks_added_to_wishlist': artist_added_tracks,
'success': True,
'error_message': None
})())
print(f"Scanned {artist.artist_name}: {artist_new_tracks} new tracks found, {artist_added_tracks} added to wishlist")
# Fetch similar artists for discovery feature
# This is critical for the discover page to work
try:
watchlist_scan_state['current_phase'] = 'fetching_similar_artists'
source_artist_id = artist.spotify_artist_id or artist.itunes_artist_id or str(artist.id)
# If Spotify is authenticated, also require Spotify IDs to be present
spotify_authenticated = spotify_client and spotify_client.is_spotify_authenticated()
if database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=scan_profile_id):
print(f" Similar artists for {artist.artist_name} are cached and fresh")
# Still backfill missing iTunes IDs
scanner._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=scan_profile_id)
else:
print(f" Fetching similar artists for {artist.artist_name}...")
scanner.update_similar_artists(artist, profile_id=scan_profile_id)
print(f" Similar artists updated for {artist.artist_name}")
except Exception as similar_error:
print(f" Failed to update similar artists for {artist.artist_name}: {similar_error}")
# Delay between artists
if i < len(watchlist_artists) - 1:
watchlist_scan_state['current_phase'] = 'rate_limiting'
time.sleep(artist_delay)
# Reset circuit breaker on successful artist scan
consecutive_failures = 0
circuit_breaker_pause = 60
except Exception as e:
print(f"Error scanning artist {artist.artist_name}: {e}")
# Circuit breaker: detect consecutive rate-limit failures
error_str = str(e).lower()
if "429" in error_str or "rate limit" in error_str:
consecutive_failures += 1
if consecutive_failures >= CIRCUIT_BREAKER_THRESHOLD:
print(f"Circuit breaker: {consecutive_failures} consecutive rate-limit failures, pausing {circuit_breaker_pause}s")
watchlist_scan_state['current_phase'] = 'circuit_breaker_pause'
time.sleep(circuit_breaker_pause)
circuit_breaker_pause = min(circuit_breaker_pause * 2, 600)
consecutive_failures = 0
else:
consecutive_failures = 0
scan_results.append(type('ScanResult', (), {
'artist_name': artist.artist_name,
'spotify_artist_id': artist.spotify_artist_id,
'albums_checked': 0,
'new_tracks_found': 0,
'tracks_added_to_wishlist': 0,
'success': False,
'error_message': str(e)
})())
scan_results = scanner.scan_watchlist_profile(
scan_profile_id,
watchlist_artists=watchlist_artists,
scan_state=watchlist_scan_state,
cancel_check=lambda: watchlist_scan_state.get('cancel_requested', False),
)
# Store final results (skip if cancelled — already set by cancel handler)
was_cancelled = watchlist_scan_state.get('cancel_requested', False)
if not was_cancelled:
watchlist_scan_state['status'] = 'completed'
watchlist_scan_state['results'] = scan_results
watchlist_scan_state['completed_at'] = datetime.now()
_artmap_cache_invalidate(scan_profile_id)
watchlist_scan_state['current_phase'] = 'completed'
# Calculate summary
successful_scans = [r for r in scan_results if r.success]
total_new_tracks = sum(r.new_tracks_found for r in successful_scans)
total_added_to_wishlist = sum(r.tracks_added_to_wishlist for r in successful_scans)
watchlist_scan_state['status'] = 'completed'
watchlist_scan_state['results'] = scan_results
watchlist_scan_state['completed_at'] = datetime.now()
watchlist_scan_state['current_phase'] = 'completed'
watchlist_scan_state['summary'] = {
'total_artists': len(scan_results),
'successful_scans': len(successful_scans),
@ -40773,33 +40549,6 @@ def watchlist_global_config():
traceback.print_exc()
return jsonify({"success": False, "error": str(e)}), 500
def _apply_watchlist_global_overrides(watchlist_artists):
"""If global override is enabled, overwrite per-artist settings on WatchlistArtist objects."""
if not config_manager.get('watchlist.global_override_enabled', False):
return
# Read global settings once
g_albums = config_manager.get('watchlist.global_include_albums', True)
g_eps = config_manager.get('watchlist.global_include_eps', True)
g_singles = config_manager.get('watchlist.global_include_singles', True)
g_live = config_manager.get('watchlist.global_include_live', False)
g_remixes = config_manager.get('watchlist.global_include_remixes', False)
g_acoustic = config_manager.get('watchlist.global_include_acoustic', False)
g_compilations = config_manager.get('watchlist.global_include_compilations', False)
g_instrumentals = config_manager.get('watchlist.global_include_instrumentals', False)
print(f"[Watchlist] Global override is ACTIVE — applying to {len(watchlist_artists)} artists "
f"(albums={g_albums}, eps={g_eps}, singles={g_singles}, live={g_live}, "
f"remixes={g_remixes}, acoustic={g_acoustic}, compilations={g_compilations}, "
f"instrumentals={g_instrumentals})")
for artist in watchlist_artists:
artist.include_albums = g_albums
artist.include_eps = g_eps
artist.include_singles = g_singles
artist.include_live = g_live
artist.include_remixes = g_remixes
artist.include_acoustic = g_acoustic
artist.include_compilations = g_compilations
artist.include_instrumentals = g_instrumentals
def _update_similar_artists_worker():
"""Background worker to update similar artists for all watchlist artists"""
global similar_artists_update_state
@ -40954,9 +40703,6 @@ def _process_watchlist_scan_automatically(automation_id=None, profile_id=None):
scanner = get_watchlist_scanner(spotify_client)
all_profiles = scan_profiles # Used later for discovery pool population
# Apply global overrides if enabled
_apply_watchlist_global_overrides(watchlist_artists)
# Initialize detailed progress tracking (same as manual scan)
watchlist_scan_state = {
'status': 'scanning',
@ -40985,259 +40731,79 @@ def _process_watchlist_scan_automatically(automation_id=None, profile_id=None):
# Pause enrichment workers during scan to reduce API contention
_ew_state = _pause_enrichment_workers('auto-watchlist scan')
# Dynamic delay calculation based on scan scope
lookback_period = scanner._get_lookback_period_setting()
is_full_discography = (lookback_period == 'all')
artist_count = len(watchlist_artists)
base_artist_delay = 2.0
base_album_delay = 0.5
# Scale up for full discography (way more albums per artist)
if is_full_discography:
base_artist_delay *= 2.0
base_album_delay *= 2.0
# Scale up further for large artist counts (sustained API pressure)
if artist_count > 200:
base_artist_delay *= 1.5
base_album_delay *= 1.25
elif artist_count > 100:
base_artist_delay *= 1.25
artist_delay = base_artist_delay
album_delay = base_album_delay
print(f"[Auto-Watchlist] Scan parameters: {artist_count} artists, lookback={lookback_period}, "
f"delays: {artist_delay:.1f}s/artist, {album_delay:.1f}s/album")
# Circuit breaker: pause scan on consecutive rate-limit failures
consecutive_failures = 0
CIRCUIT_BREAKER_THRESHOLD = 3
circuit_breaker_pause = 60 # seconds, doubles each trigger, max 600s
# Scan each artist with detailed tracking
for i, artist in enumerate(watchlist_artists):
# Check for cancel request
if watchlist_scan_state.get('cancel_requested'):
print(f"[Auto-Watchlist] Cancel requested after {i}/{len(watchlist_artists)} artists")
watchlist_scan_state['status'] = 'cancelled'
watchlist_scan_state['current_phase'] = 'cancelled'
watchlist_scan_state['summary'] = {
'total_artists': i,
'successful_scans': len([r for r in scan_results if r.success]),
'new_tracks_found': sum(r.new_tracks_found for r in scan_results if r.success),
'tracks_added_to_wishlist': sum(r.tracks_added_to_wishlist for r in scan_results if r.success),
'cancelled': True
}
_update_automation_progress(automation_id, progress=100, phase='Cancelled by user',
log_line='Scan cancelled by user', log_type='warning')
break
try:
# Fetch artist image using provider-aware method
artist_image_url = scanner.get_artist_image_url(artist) or ''
pct = 5 + (i / max(1, len(watchlist_artists))) * 90
_update_automation_progress(automation_id, progress=pct,
phase=f'Scanning: {artist.artist_name} ({i+1}/{len(watchlist_artists)})',
current_item=artist.artist_name,
processed=i, total=len(watchlist_artists))
# Update progress
watchlist_scan_state.update({
'current_artist_index': i + 1,
'current_artist_name': artist.artist_name,
'current_artist_image_url': artist_image_url,
'current_phase': 'fetching_discography',
'albums_to_check': 0,
'albums_checked': 0,
'current_album': '',
'current_album_image_url': '',
'current_track_name': ''
})
# Get artist discography using provider-aware method
albums = scanner.get_artist_discography_for_watchlist(artist, artist.last_scan_timestamp)
if albums is None:
scan_results.append(type('ScanResult', (), {
'artist_name': artist.artist_name,
'spotify_artist_id': artist.spotify_artist_id,
'albums_checked': 0,
'new_tracks_found': 0,
'tracks_added_to_wishlist': 0,
'success': False,
'error_message': "Failed to get artist discography"
})())
continue
# Update with album count
watchlist_scan_state.update({
'current_phase': 'checking_albums',
'albums_to_check': len(albums),
'albums_checked': 0
})
# Track progress for this artist
artist_new_tracks = 0
artist_added_tracks = 0
# Scan each album
for album_index, album in enumerate(albums):
try:
# Get album tracks using provider-aware method
album_data = scanner.metadata_service.get_album(album.id)
if not album_data or 'tracks' not in album_data:
logger.debug(f"Skipping album {album.name} (id={album.id}): no track data returned")
continue
tracks = album_data['tracks']['items']
# Check release type filter (album/EP/single)
if not scanner._should_include_release(len(tracks), artist):
continue
# Get album image
album_image_url = ''
if 'images' in album_data and album_data['images']:
album_image_url = album_data['images'][0]['url']
watchlist_scan_state.update({
'albums_checked': album_index + 1,
'current_album': album.name,
'current_album_image_url': album_image_url,
'current_phase': f'checking_album_{album_index + 1}_of_{len(albums)}'
})
# Check each track
for track in tracks:
# Check content type filter (live/remix/acoustic/compilation)
if not scanner._should_include_track(track, album_data, artist):
continue
# Update current track being processed
track_name = track.get('name', 'Unknown Track')
watchlist_scan_state['current_track_name'] = track_name
if scanner.is_track_missing_from_library(track):
artist_new_tracks += 1
watchlist_scan_state['tracks_found_this_scan'] += 1
# Add to wishlist
if scanner.add_track_to_wishlist(track, album_data, artist):
artist_added_tracks += 1
watchlist_scan_state['tracks_added_this_scan'] += 1
# Add to recent wishlist additions feed
track_artists = track.get('artists', [])
track_artist_name = track_artists[0].get('name', 'Unknown Artist') if track_artists else 'Unknown Artist'
watchlist_scan_state['recent_wishlist_additions'].insert(0, {
'track_name': track_name,
'artist_name': track_artist_name,
'album_image_url': album_image_url
})
# Keep only last 10
if len(watchlist_scan_state['recent_wishlist_additions']) > 10:
watchlist_scan_state['recent_wishlist_additions'].pop()
# Rate-limited delay between albums
import time
time.sleep(album_delay)
except Exception as e:
print(f"Error checking album {album.name}: {e}")
continue
# Update scan timestamp
scanner.update_artist_scan_timestamp(artist)
# Store result
scan_results.append(type('ScanResult', (), {
'artist_name': artist.artist_name,
'spotify_artist_id': artist.spotify_artist_id,
'albums_checked': len(albums),
'new_tracks_found': artist_new_tracks,
'tracks_added_to_wishlist': artist_added_tracks,
'success': True,
'error_message': None
})())
print(f"Scanned {artist.artist_name}: {artist_new_tracks} new tracks found, {artist_added_tracks} added to wishlist")
if artist_new_tracks > 0:
_update_automation_progress(automation_id,
log_line=f'{artist.artist_name}{artist_new_tracks} new, {artist_added_tracks} added', log_type='success')
else:
_update_automation_progress(automation_id,
log_line=f'{artist.artist_name} — no new tracks', log_type='skip')
# Emit watchlist_new_release event if new tracks were found
if artist_new_tracks > 0:
try:
if automation_engine:
automation_engine.emit('watchlist_new_release', {
'artist': artist.artist_name,
'new_tracks': str(artist_new_tracks),
'added_to_wishlist': str(artist_added_tracks),
})
except Exception:
pass
# Fetch similar artists for discovery feature (per-profile)
try:
watchlist_scan_state['current_phase'] = 'fetching_similar_artists'
source_artist_id = artist.spotify_artist_id or artist.itunes_artist_id or str(artist.id)
artist_profile_id = getattr(artist, 'profile_id', 1)
spotify_authenticated = spotify_client and spotify_client.is_spotify_authenticated()
if database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=artist_profile_id):
print(f" Similar artists for {artist.artist_name} are cached and fresh (profile {artist_profile_id})")
scanner._backfill_similar_artists_itunes_ids(source_artist_id, profile_id=artist_profile_id)
else:
print(f" Fetching similar artists for {artist.artist_name} (profile {artist_profile_id})...")
scanner.update_similar_artists(artist, profile_id=artist_profile_id)
print(f" Similar artists updated for {artist.artist_name}")
except Exception as similar_error:
print(f" Failed to update similar artists for {artist.artist_name}: {similar_error}")
# Delay between artists
if i < len(watchlist_artists) - 1:
watchlist_scan_state['current_phase'] = 'rate_limiting'
time.sleep(artist_delay)
# Reset circuit breaker on successful artist scan
consecutive_failures = 0
circuit_breaker_pause = 60
except Exception as e:
print(f"Error scanning artist {artist.artist_name}: {e}")
_update_automation_progress(automation_id,
log_line=f'{artist.artist_name} — error: {str(e)[:60]}', log_type='error')
# Circuit breaker: detect consecutive rate-limit failures
error_str = str(e).lower()
if "429" in error_str or "rate limit" in error_str:
consecutive_failures += 1
if consecutive_failures >= CIRCUIT_BREAKER_THRESHOLD:
print(f"[Auto-Watchlist] Circuit breaker: {consecutive_failures} consecutive rate-limit failures, pausing {circuit_breaker_pause}s")
watchlist_scan_state['current_phase'] = 'circuit_breaker_pause'
time.sleep(circuit_breaker_pause)
circuit_breaker_pause = min(circuit_breaker_pause * 2, 600)
consecutive_failures = 0
def _scan_progress(event_type, payload):
if event_type == 'scan_started':
_update_automation_progress(
automation_id,
progress=5,
phase='Loading watchlist',
log_line=f"{len(watchlist_artists)} artists ({profile_label})",
log_type='info',
)
elif event_type == 'artist_started':
total = max(1, payload.get('total_artists', len(watchlist_artists)))
idx = payload.get('artist_index', 1)
artist_name = payload.get('artist_name', '')
pct = 5 + ((idx - 1) / total) * 90
_update_automation_progress(
automation_id,
progress=pct,
phase=f'Scanning: {artist_name} ({idx}/{total})',
current_item=artist_name,
processed=idx - 1,
total=total,
)
elif event_type == 'artist_completed':
artist_name = payload.get('artist_name', '')
new_tracks = payload.get('new_tracks_found', 0)
added = payload.get('tracks_added_to_wishlist', 0)
if new_tracks > 0:
_update_automation_progress(
automation_id,
log_line=f'{artist_name}{new_tracks} new, {added} added',
log_type='success',
)
else:
consecutive_failures = 0
_update_automation_progress(
automation_id,
log_line=f'{artist_name} — no new tracks',
log_type='skip',
)
elif event_type == 'artist_error':
artist_name = payload.get('artist_name', '')
error_message = payload.get('error_message', 'error')
_update_automation_progress(
automation_id,
log_line=f'{artist_name} — error: {error_message[:60]}',
log_type='error',
)
elif event_type == 'cancelled':
_update_automation_progress(
automation_id,
progress=100,
phase='Cancelled by user',
log_line='Scan cancelled by user',
log_type='warning',
)
elif event_type == 'scan_completed':
_update_automation_progress(
automation_id,
progress=95,
phase='Scan complete',
log_line=(
f"Scanned {payload.get('successful_scans', 0)} artists — "
f"{payload.get('new_tracks_found', 0)} new tracks, "
f"{payload.get('tracks_added_to_wishlist', 0)} added to wishlist"
),
log_type='success' if payload.get('new_tracks_found', 0) > 0 else 'info',
)
scan_results.append(type('ScanResult', (), {
'artist_name': artist.artist_name,
'spotify_artist_id': artist.spotify_artist_id,
'albums_checked': 0,
'new_tracks_found': 0,
'tracks_added_to_wishlist': 0,
'success': False,
'error_message': str(e)
})())
continue
scan_results = scanner.scan_watchlist_artists(
watchlist_artists,
scan_state=watchlist_scan_state,
progress_callback=_scan_progress,
cancel_check=lambda: watchlist_scan_state.get('cancel_requested'),
)
# Update state with results (skip if cancelled — already set by cancel handler)
was_cancelled = watchlist_scan_state.get('cancel_requested', False)

Loading…
Cancel
Save