From f126cf7118e007df84021ed6494a66617c90b6b5 Mon Sep 17 00:00:00 2001 From: Broque Thomas Date: Thu, 22 Jan 2026 17:04:23 -0800 Subject: [PATCH] Add cross-provider support for watchlist artists Introduces iTunes artist ID support to WatchlistArtist and database schema, enabling proactive backfilling of missing provider IDs (Spotify/iTunes) for watchlist artists. Updates WatchlistScanner to use MetadataService for provider-agnostic scanning and ID matching, and modifies web_server to support scans with either provider. Includes new database migration and update methods for iTunes and Spotify artist IDs. --- core/watchlist_scanner.py | 117 ++++++++++++++++++++++++++++++++++++- database/music_database.py | 66 ++++++++++++++++++++- web_server.py | 35 +++++++++-- 3 files changed, 209 insertions(+), 9 deletions(-) diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 3d25872e..d85baec9 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -232,12 +232,21 @@ class ScanResult: class WatchlistScanner: """Service for scanning watched artists for new releases""" - def __init__(self, spotify_client: SpotifyClient, database_path: str = "database/music_library.db"): - self.spotify_client = spotify_client + def __init__(self, spotify_client: SpotifyClient = None, metadata_service=None, database_path: str = "database/music_library.db"): + # Support both old (spotify_client) and new (metadata_service) initialization self.database_path = database_path self._database = None self._wishlist_service = None self._matching_engine = None + + if metadata_service: + self._metadata_service = metadata_service + self.spotify_client = metadata_service.spotify # For backward compatibility + elif spotify_client: + self.spotify_client = spotify_client + self._metadata_service = None # Lazy load if needed + else: + raise ValueError("Must provide either spotify_client or metadata_service") @property def database(self): @@ -260,6 +269,14 @@ class WatchlistScanner: self._matching_engine = MusicMatchingEngine() return self._matching_engine + @property + def metadata_service(self): + """Get or create MetadataService instance (lazy loading)""" + if self._metadata_service is None: + from core.metadata_service import MetadataService + self._metadata_service = MetadataService() + return self._metadata_service + def scan_all_watchlist_artists(self) -> List[ScanResult]: """ Scan artists in the watchlist for new releases. @@ -326,6 +343,22 @@ class WatchlistScanner: watchlist_artists = artists_to_scan + # PROACTIVE ID BACKFILLING (cross-provider support) + # Before scanning, ensure all artists have IDs for the current provider + logger.info(f"DEBUG: About to check backfilling. _metadata_service = {getattr(self, '_metadata_service', 'ATTRIBUTE MISSING')}") + if self._metadata_service is not None: + try: + active_provider = self._metadata_service.get_active_provider() + logger.info(f"🔍 Checking for missing {active_provider} IDs in watchlist...") + self._backfill_missing_ids(all_watchlist_artists, active_provider) + except Exception as backfill_error: + logger.warning(f"Error during ID backfilling: {backfill_error}") + import traceback + traceback.print_exc() + # Continue with scan even if backfilling fails + else: + logger.warning(f"⚠️ Backfilling SKIPPED - _metadata_service is None") + scan_results = [] for i, artist in enumerate(watchlist_artists): try: @@ -559,6 +592,86 @@ class WatchlistScanner: logger.error(f"Error getting discography for artist {spotify_artist_id}: {e}") return None + def _backfill_missing_ids(self, artists: List[WatchlistArtist], provider: str): + """ + Proactively match ALL artists missing IDs for the current provider. + + Example: User has 50 artists with only Spotify IDs. + When iTunes becomes active, this matches ALL 50 to iTunes in one batch. + """ + artists_to_match = [] + + if provider == 'spotify': + # Find all artists missing Spotify IDs + artists_to_match = [a for a in artists if not a.spotify_artist_id and a.itunes_artist_id] + elif provider == 'itunes': + # Find all artists missing iTunes IDs + artists_to_match = [a for a in artists if not a.itunes_artist_id and a.spotify_artist_id] + + if not artists_to_match: + logger.info(f"✅ All artists already have {provider} IDs") + return + + logger.info(f"🔄 Backfilling {len(artists_to_match)} artists with {provider} IDs...") + + matched_count = 0 + for artist in artists_to_match: + try: + if provider == 'spotify': + new_id = self._match_to_spotify(artist.artist_name) + if new_id: + self.database.update_watchlist_spotify_id(artist.id, new_id) + artist.spotify_artist_id = new_id # Update in memory + matched_count += 1 + logger.info(f"✅ Matched '{artist.artist_name}' to Spotify: {new_id}") + + elif provider == 'itunes': + new_id = self._match_to_itunes(artist.artist_name) + if new_id: + self.database.update_watchlist_itunes_id(artist.id, new_id) + artist.itunes_artist_id = new_id # Update in memory + matched_count += 1 + logger.info(f"✅ Matched '{artist.artist_name}' to iTunes: {new_id}") + + # Small delay to avoid API rate limits + time.sleep(0.3) + + except Exception as e: + logger.warning(f"Could not match '{artist.artist_name}' to {provider}: {e}") + continue + + logger.info(f"✅ Backfilled {matched_count}/{len(artists_to_match)} artists with {provider} IDs") + + def _match_to_spotify(self, artist_name: str) -> Optional[str]: + """Match artist name to Spotify ID""" + try: + # Use metadata service if available, fallback to spotify_client + if hasattr(self, '_metadata_service') and self._metadata_service: + results = self._metadata_service.spotify.search_artists(artist_name, limit=1) + else: + results = self.spotify_client.search_artists(artist_name, limit=1) + + if results: + return results[0].id + except Exception as e: + logger.warning(f"Could not match {artist_name} to Spotify: {e}") + return None + + def _match_to_itunes(self, artist_name: str) -> Optional[str]: + """Match artist name to iTunes ID""" + try: + # Use metadata service's iTunes client + if hasattr(self, '_metadata_service') and self._metadata_service: + results = self._metadata_service.itunes.search_artists(artist_name, limit=1) + if results: + return results[0].id + else: + # iTunes client not available without metadata service + logger.warning(f"Cannot match to iTunes - MetadataService not available") + except Exception as e: + logger.warning(f"Could not match {artist_name} to iTunes: {e}") + return None + def _get_lookback_period_setting(self) -> str: """ Get the discovery lookback period setting from database. diff --git a/database/music_database.py b/database/music_database.py index b70d0617..71f612cb 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -79,13 +79,14 @@ class DatabaseTrackWithMetadata: class WatchlistArtist: """Artist being monitored for new releases""" id: int - spotify_artist_id: str + spotify_artist_id: Optional[str] # Can be None if added via iTunes artist_name: str date_added: datetime last_scan_timestamp: Optional[datetime] = None created_at: Optional[datetime] = None updated_at: Optional[datetime] = None image_url: Optional[str] = None + itunes_artist_id: Optional[str] = None # Cross-provider support include_albums: bool = True include_eps: bool = True include_singles: bool = True @@ -280,6 +281,9 @@ class MusicDatabase: # Add content type filter columns to watchlist_artists (migration) self._add_watchlist_content_type_filters(cursor) + # Add iTunes artist ID column to watchlist_artists (migration) + self._add_watchlist_itunes_id_column(cursor) + conn.commit() logger.info("Database initialized successfully") @@ -637,7 +641,7 @@ class MusicDatabase: columns = [column[1] for column in cursor.fetchall()] columns_to_add = { - 'include_live': ('INTEGER', '0'), # 0 = False (exclude live versions by default) + 'include_live': ('INTEGER', '0'), # 0 = False (exclude live versions by default) 'include_remixes': ('INTEGER', '0'), # 0 = False (exclude remixes by default) 'include_acoustic': ('INTEGER', '0'), # 0 = False (exclude acoustic by default) 'include_compilations': ('INTEGER', '0') # 0 = False (exclude compilations by default) @@ -652,6 +656,20 @@ class MusicDatabase: logger.error(f"Error adding content type filter columns to watchlist_artists: {e}") # Don't raise - this is a migration, database can still function + def _add_watchlist_itunes_id_column(self, cursor): + """Add iTunes artist ID column to watchlist_artists table for cross-provider support""" + try: + cursor.execute("PRAGMA table_info(watchlist_artists)") + columns = [column[1] for column in cursor.fetchall()] + + if 'itunes_artist_id' not in columns: + cursor.execute("ALTER TABLE watchlist_artists ADD COLUMN itunes_artist_id TEXT") + logger.info("Added itunes_artist_id column to watchlist_artists table for cross-provider support") + + except Exception as e: + logger.error(f"Error adding itunes_artist_id column to watchlist_artists: {e}") + # Don't raise - this is a migration, database can still function + def close(self): """Close database connection (no-op since we create connections per operation)""" # Each operation creates and closes its own connection, so nothing to do here @@ -2755,7 +2773,7 @@ class MusicDatabase: # Build SELECT query based on existing columns base_columns = ['id', 'spotify_artist_id', 'artist_name', 'date_added', 'last_scan_timestamp', 'created_at', 'updated_at'] - optional_columns = ['image_url', 'include_albums', 'include_eps', 'include_singles', + optional_columns = ['image_url', 'itunes_artist_id', 'include_albums', 'include_eps', 'include_singles', 'include_live', 'include_remixes', 'include_acoustic', 'include_compilations'] columns_to_select = base_columns + [col for col in optional_columns if col in existing_columns] @@ -2772,6 +2790,7 @@ class MusicDatabase: for row in rows: # Safely get optional columns with defaults (sqlite3.Row uses dict-style access) image_url = row['image_url'] if 'image_url' in existing_columns else None + itunes_artist_id = row['itunes_artist_id'] if 'itunes_artist_id' in existing_columns else None include_albums = bool(row['include_albums']) if 'include_albums' in existing_columns else True include_eps = bool(row['include_eps']) if 'include_eps' in existing_columns else True include_singles = bool(row['include_singles']) if 'include_singles' in existing_columns else True @@ -2789,6 +2808,7 @@ class MusicDatabase: created_at=datetime.fromisoformat(row['created_at']) if row['created_at'] else None, updated_at=datetime.fromisoformat(row['updated_at']) if row['updated_at'] else None, image_url=image_url, + itunes_artist_id=itunes_artist_id, include_albums=include_albums, include_eps=include_eps, include_singles=include_singles, @@ -2846,6 +2866,46 @@ class MusicDatabase: logger.error(f"Error updating watchlist artist image: {e}") return False + def update_watchlist_spotify_id(self, watchlist_id: int, spotify_id: str) -> bool: + """Update the Spotify artist ID for a watchlist artist (cross-provider support)""" + try: + with self._get_connection() as conn: + cursor = conn.cursor() + + cursor.execute(""" + UPDATE watchlist_artists + SET spotify_artist_id = ?, updated_at = CURRENT_TIMESTAMP + WHERE id = ? + """, (spotify_id, watchlist_id)) + + conn.commit() + logger.info(f"Updated Spotify ID for watchlist artist {watchlist_id}: {spotify_id}") + return cursor.rowcount > 0 + + except Exception as e: + logger.error(f"Error updating watchlist Spotify ID: {e}") + return False + + def update_watchlist_itunes_id(self, watchlist_id: int, itunes_id: str) -> bool: + """Update the iTunes artist ID for a watchlist artist (cross-provider support)""" + try: + with self._get_connection() as conn: + cursor = conn.cursor() + + cursor.execute(""" + UPDATE watchlist_artists + SET itunes_artist_id = ?, updated_at = CURRENT_TIMESTAMP + WHERE id = ? + """, (itunes_id, watchlist_id)) + + conn.commit() + logger.info(f"Updated iTunes ID for watchlist artist {watchlist_id}: {itunes_id}") + return cursor.rowcount > 0 + + except Exception as e: + logger.error(f"Error updating watchlist iTunes ID: {e}") + return False + # === Discovery Feature Methods === def add_or_update_similar_artist(self, source_artist_id: str, similar_artist_spotify_id: str, diff --git a/web_server.py b/web_server.py index 2bcad806..d83bb7b4 100644 --- a/web_server.py +++ b/web_server.py @@ -17093,8 +17093,22 @@ def check_watchlist_status(): def start_watchlist_scan(): """Start a watchlist scan for new releases""" try: - if not spotify_client or not spotify_client.is_authenticated(): - return jsonify({"success": False, "error": "Spotify client not available or not authenticated"}), 400 + # Check if MetadataService can provide a working client (Spotify OR iTunes) + from core.metadata_service import MetadataService + metadata_service = MetadataService() + + # Get active provider - will be either spotify or itunes + active_provider = metadata_service.get_active_provider() + provider_info = metadata_service.get_provider_info() + + # Verify we have at least one working provider + if not provider_info['spotify_authenticated'] and not provider_info['itunes_available']: + return jsonify({ + "success": False, + "error": "No music provider available. Please authenticate Spotify or ensure iTunes is accessible." + }), 400 + + logger.info(f"Starting watchlist scan with {active_provider} provider") # Check if wishlist auto-processing is currently running (using smart detection) if is_wishlist_actually_processing(): @@ -17108,7 +17122,7 @@ def start_watchlist_scan(): def run_scan(): try: global watchlist_scan_state, watchlist_auto_scanning, watchlist_auto_scanning_timestamp - from core.watchlist_scanner import get_watchlist_scanner + from core.watchlist_scanner import WatchlistScanner from database.music_database import get_database # Set flag and timestamp for manual scan @@ -17137,7 +17151,20 @@ def start_watchlist_scan(): watchlist_next_run_time = 0 # Clear timer for consistency return - scanner = get_watchlist_scanner(spotify_client) + # Initialize scanner with MetadataService for cross-provider support + scanner = WatchlistScanner(metadata_service=metadata_service) + + # PROACTIVE ID BACKFILLING (cross-provider support) + # Before scanning, ensure all artists have IDs for the current provider + try: + active_provider = metadata_service.get_active_provider() + print(f"🔍 Checking for missing {active_provider} IDs in watchlist...") + scanner._backfill_missing_ids(watchlist_artists, active_provider) + except Exception as backfill_error: + print(f"⚠️ Error during ID backfilling: {backfill_error}") + import traceback + traceback.print_exc() + # Continue with scan even if backfilling fails # Initialize detailed progress tracking watchlist_scan_state.update({