Add cross-provider support for watchlist artists

Introduces iTunes artist ID support to WatchlistArtist and database schema, enabling proactive backfilling of missing provider IDs (Spotify/iTunes) for watchlist artists. Updates WatchlistScanner to use MetadataService for provider-agnostic scanning and ID matching, and modifies web_server to support scans with either provider. Includes new database migration and update methods for iTunes and Spotify artist IDs.
pull/126/head
Broque Thomas 3 months ago
parent e7c1e44cf1
commit f126cf7118

@ -232,12 +232,21 @@ class ScanResult:
class WatchlistScanner:
"""Service for scanning watched artists for new releases"""
def __init__(self, spotify_client: SpotifyClient, database_path: str = "database/music_library.db"):
self.spotify_client = spotify_client
def __init__(self, spotify_client: SpotifyClient = None, metadata_service=None, database_path: str = "database/music_library.db"):
# Support both old (spotify_client) and new (metadata_service) initialization
self.database_path = database_path
self._database = None
self._wishlist_service = None
self._matching_engine = None
if metadata_service:
self._metadata_service = metadata_service
self.spotify_client = metadata_service.spotify # For backward compatibility
elif spotify_client:
self.spotify_client = spotify_client
self._metadata_service = None # Lazy load if needed
else:
raise ValueError("Must provide either spotify_client or metadata_service")
@property
def database(self):
@ -260,6 +269,14 @@ class WatchlistScanner:
self._matching_engine = MusicMatchingEngine()
return self._matching_engine
@property
def metadata_service(self):
"""Get or create MetadataService instance (lazy loading)"""
if self._metadata_service is None:
from core.metadata_service import MetadataService
self._metadata_service = MetadataService()
return self._metadata_service
def scan_all_watchlist_artists(self) -> List[ScanResult]:
"""
Scan artists in the watchlist for new releases.
@ -326,6 +343,22 @@ class WatchlistScanner:
watchlist_artists = artists_to_scan
# PROACTIVE ID BACKFILLING (cross-provider support)
# Before scanning, ensure all artists have IDs for the current provider
logger.info(f"DEBUG: About to check backfilling. _metadata_service = {getattr(self, '_metadata_service', 'ATTRIBUTE MISSING')}")
if self._metadata_service is not None:
try:
active_provider = self._metadata_service.get_active_provider()
logger.info(f"🔍 Checking for missing {active_provider} IDs in watchlist...")
self._backfill_missing_ids(all_watchlist_artists, active_provider)
except Exception as backfill_error:
logger.warning(f"Error during ID backfilling: {backfill_error}")
import traceback
traceback.print_exc()
# Continue with scan even if backfilling fails
else:
logger.warning(f"⚠️ Backfilling SKIPPED - _metadata_service is None")
scan_results = []
for i, artist in enumerate(watchlist_artists):
try:
@ -559,6 +592,86 @@ class WatchlistScanner:
logger.error(f"Error getting discography for artist {spotify_artist_id}: {e}")
return None
def _backfill_missing_ids(self, artists: List[WatchlistArtist], provider: str):
"""
Proactively match ALL artists missing IDs for the current provider.
Example: User has 50 artists with only Spotify IDs.
When iTunes becomes active, this matches ALL 50 to iTunes in one batch.
"""
artists_to_match = []
if provider == 'spotify':
# Find all artists missing Spotify IDs
artists_to_match = [a for a in artists if not a.spotify_artist_id and a.itunes_artist_id]
elif provider == 'itunes':
# Find all artists missing iTunes IDs
artists_to_match = [a for a in artists if not a.itunes_artist_id and a.spotify_artist_id]
if not artists_to_match:
logger.info(f"✅ All artists already have {provider} IDs")
return
logger.info(f"🔄 Backfilling {len(artists_to_match)} artists with {provider} IDs...")
matched_count = 0
for artist in artists_to_match:
try:
if provider == 'spotify':
new_id = self._match_to_spotify(artist.artist_name)
if new_id:
self.database.update_watchlist_spotify_id(artist.id, new_id)
artist.spotify_artist_id = new_id # Update in memory
matched_count += 1
logger.info(f"✅ Matched '{artist.artist_name}' to Spotify: {new_id}")
elif provider == 'itunes':
new_id = self._match_to_itunes(artist.artist_name)
if new_id:
self.database.update_watchlist_itunes_id(artist.id, new_id)
artist.itunes_artist_id = new_id # Update in memory
matched_count += 1
logger.info(f"✅ Matched '{artist.artist_name}' to iTunes: {new_id}")
# Small delay to avoid API rate limits
time.sleep(0.3)
except Exception as e:
logger.warning(f"Could not match '{artist.artist_name}' to {provider}: {e}")
continue
logger.info(f"✅ Backfilled {matched_count}/{len(artists_to_match)} artists with {provider} IDs")
def _match_to_spotify(self, artist_name: str) -> Optional[str]:
"""Match artist name to Spotify ID"""
try:
# Use metadata service if available, fallback to spotify_client
if hasattr(self, '_metadata_service') and self._metadata_service:
results = self._metadata_service.spotify.search_artists(artist_name, limit=1)
else:
results = self.spotify_client.search_artists(artist_name, limit=1)
if results:
return results[0].id
except Exception as e:
logger.warning(f"Could not match {artist_name} to Spotify: {e}")
return None
def _match_to_itunes(self, artist_name: str) -> Optional[str]:
"""Match artist name to iTunes ID"""
try:
# Use metadata service's iTunes client
if hasattr(self, '_metadata_service') and self._metadata_service:
results = self._metadata_service.itunes.search_artists(artist_name, limit=1)
if results:
return results[0].id
else:
# iTunes client not available without metadata service
logger.warning(f"Cannot match to iTunes - MetadataService not available")
except Exception as e:
logger.warning(f"Could not match {artist_name} to iTunes: {e}")
return None
def _get_lookback_period_setting(self) -> str:
"""
Get the discovery lookback period setting from database.

@ -79,13 +79,14 @@ class DatabaseTrackWithMetadata:
class WatchlistArtist:
"""Artist being monitored for new releases"""
id: int
spotify_artist_id: str
spotify_artist_id: Optional[str] # Can be None if added via iTunes
artist_name: str
date_added: datetime
last_scan_timestamp: Optional[datetime] = None
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
image_url: Optional[str] = None
itunes_artist_id: Optional[str] = None # Cross-provider support
include_albums: bool = True
include_eps: bool = True
include_singles: bool = True
@ -280,6 +281,9 @@ class MusicDatabase:
# Add content type filter columns to watchlist_artists (migration)
self._add_watchlist_content_type_filters(cursor)
# Add iTunes artist ID column to watchlist_artists (migration)
self._add_watchlist_itunes_id_column(cursor)
conn.commit()
logger.info("Database initialized successfully")
@ -637,7 +641,7 @@ class MusicDatabase:
columns = [column[1] for column in cursor.fetchall()]
columns_to_add = {
'include_live': ('INTEGER', '0'), # 0 = False (exclude live versions by default)
'include_live': ('INTEGER', '0'), # 0 = False (exclude live versions by default)
'include_remixes': ('INTEGER', '0'), # 0 = False (exclude remixes by default)
'include_acoustic': ('INTEGER', '0'), # 0 = False (exclude acoustic by default)
'include_compilations': ('INTEGER', '0') # 0 = False (exclude compilations by default)
@ -652,6 +656,20 @@ class MusicDatabase:
logger.error(f"Error adding content type filter columns to watchlist_artists: {e}")
# Don't raise - this is a migration, database can still function
def _add_watchlist_itunes_id_column(self, cursor):
"""Add iTunes artist ID column to watchlist_artists table for cross-provider support"""
try:
cursor.execute("PRAGMA table_info(watchlist_artists)")
columns = [column[1] for column in cursor.fetchall()]
if 'itunes_artist_id' not in columns:
cursor.execute("ALTER TABLE watchlist_artists ADD COLUMN itunes_artist_id TEXT")
logger.info("Added itunes_artist_id column to watchlist_artists table for cross-provider support")
except Exception as e:
logger.error(f"Error adding itunes_artist_id column to watchlist_artists: {e}")
# Don't raise - this is a migration, database can still function
def close(self):
"""Close database connection (no-op since we create connections per operation)"""
# Each operation creates and closes its own connection, so nothing to do here
@ -2755,7 +2773,7 @@ class MusicDatabase:
# Build SELECT query based on existing columns
base_columns = ['id', 'spotify_artist_id', 'artist_name', 'date_added',
'last_scan_timestamp', 'created_at', 'updated_at']
optional_columns = ['image_url', 'include_albums', 'include_eps', 'include_singles',
optional_columns = ['image_url', 'itunes_artist_id', 'include_albums', 'include_eps', 'include_singles',
'include_live', 'include_remixes', 'include_acoustic', 'include_compilations']
columns_to_select = base_columns + [col for col in optional_columns if col in existing_columns]
@ -2772,6 +2790,7 @@ class MusicDatabase:
for row in rows:
# Safely get optional columns with defaults (sqlite3.Row uses dict-style access)
image_url = row['image_url'] if 'image_url' in existing_columns else None
itunes_artist_id = row['itunes_artist_id'] if 'itunes_artist_id' in existing_columns else None
include_albums = bool(row['include_albums']) if 'include_albums' in existing_columns else True
include_eps = bool(row['include_eps']) if 'include_eps' in existing_columns else True
include_singles = bool(row['include_singles']) if 'include_singles' in existing_columns else True
@ -2789,6 +2808,7 @@ class MusicDatabase:
created_at=datetime.fromisoformat(row['created_at']) if row['created_at'] else None,
updated_at=datetime.fromisoformat(row['updated_at']) if row['updated_at'] else None,
image_url=image_url,
itunes_artist_id=itunes_artist_id,
include_albums=include_albums,
include_eps=include_eps,
include_singles=include_singles,
@ -2846,6 +2866,46 @@ class MusicDatabase:
logger.error(f"Error updating watchlist artist image: {e}")
return False
def update_watchlist_spotify_id(self, watchlist_id: int, spotify_id: str) -> bool:
"""Update the Spotify artist ID for a watchlist artist (cross-provider support)"""
try:
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
UPDATE watchlist_artists
SET spotify_artist_id = ?, updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""", (spotify_id, watchlist_id))
conn.commit()
logger.info(f"Updated Spotify ID for watchlist artist {watchlist_id}: {spotify_id}")
return cursor.rowcount > 0
except Exception as e:
logger.error(f"Error updating watchlist Spotify ID: {e}")
return False
def update_watchlist_itunes_id(self, watchlist_id: int, itunes_id: str) -> bool:
"""Update the iTunes artist ID for a watchlist artist (cross-provider support)"""
try:
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
UPDATE watchlist_artists
SET itunes_artist_id = ?, updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""", (itunes_id, watchlist_id))
conn.commit()
logger.info(f"Updated iTunes ID for watchlist artist {watchlist_id}: {itunes_id}")
return cursor.rowcount > 0
except Exception as e:
logger.error(f"Error updating watchlist iTunes ID: {e}")
return False
# === Discovery Feature Methods ===
def add_or_update_similar_artist(self, source_artist_id: str, similar_artist_spotify_id: str,

@ -17093,8 +17093,22 @@ def check_watchlist_status():
def start_watchlist_scan():
"""Start a watchlist scan for new releases"""
try:
if not spotify_client or not spotify_client.is_authenticated():
return jsonify({"success": False, "error": "Spotify client not available or not authenticated"}), 400
# Check if MetadataService can provide a working client (Spotify OR iTunes)
from core.metadata_service import MetadataService
metadata_service = MetadataService()
# Get active provider - will be either spotify or itunes
active_provider = metadata_service.get_active_provider()
provider_info = metadata_service.get_provider_info()
# Verify we have at least one working provider
if not provider_info['spotify_authenticated'] and not provider_info['itunes_available']:
return jsonify({
"success": False,
"error": "No music provider available. Please authenticate Spotify or ensure iTunes is accessible."
}), 400
logger.info(f"Starting watchlist scan with {active_provider} provider")
# Check if wishlist auto-processing is currently running (using smart detection)
if is_wishlist_actually_processing():
@ -17108,7 +17122,7 @@ def start_watchlist_scan():
def run_scan():
try:
global watchlist_scan_state, watchlist_auto_scanning, watchlist_auto_scanning_timestamp
from core.watchlist_scanner import get_watchlist_scanner
from core.watchlist_scanner import WatchlistScanner
from database.music_database import get_database
# Set flag and timestamp for manual scan
@ -17137,7 +17151,20 @@ def start_watchlist_scan():
watchlist_next_run_time = 0 # Clear timer for consistency
return
scanner = get_watchlist_scanner(spotify_client)
# Initialize scanner with MetadataService for cross-provider support
scanner = WatchlistScanner(metadata_service=metadata_service)
# PROACTIVE ID BACKFILLING (cross-provider support)
# Before scanning, ensure all artists have IDs for the current provider
try:
active_provider = metadata_service.get_active_provider()
print(f"🔍 Checking for missing {active_provider} IDs in watchlist...")
scanner._backfill_missing_ids(watchlist_artists, active_provider)
except Exception as backfill_error:
print(f"⚠️ Error during ID backfilling: {backfill_error}")
import traceback
traceback.print_exc()
# Continue with scan even if backfilling fails
# Initialize detailed progress tracking
watchlist_scan_state.update({

Loading…
Cancel
Save