mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2534 lines
125 KiB
2534 lines
125 KiB
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Watchlist Scanner Service - Monitors watched artists for new releases
|
|
"""
|
|
|
|
from typing import List, Dict, Any, Optional
|
|
from datetime import datetime, timezone, timedelta
|
|
from dataclasses import dataclass
|
|
import re
|
|
import time
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from database.music_database import get_database, WatchlistArtist
|
|
from core.spotify_client import SpotifyClient
|
|
from core.wishlist_service import get_wishlist_service
|
|
from core.matching_engine import MusicMatchingEngine
|
|
from utils.logging_config import get_logger
|
|
|
|
logger = get_logger("watchlist_scanner")
|
|
|
|
# Rate limiting constants for watchlist operations
|
|
DELAY_BETWEEN_ARTISTS = 2.0 # 2 seconds between different artists
|
|
DELAY_BETWEEN_ALBUMS = 0.5 # 500ms between albums for same artist
|
|
DELAY_BETWEEN_API_BATCHES = 1.0 # 1 second between API batch operations
|
|
|
|
# iTunes API retry configuration
|
|
ITUNES_MAX_RETRIES = 3
|
|
ITUNES_BASE_DELAY = 1.0 # Base delay in seconds for exponential backoff
|
|
|
|
|
|
def itunes_api_call_with_retry(func, *args, max_retries=ITUNES_MAX_RETRIES, **kwargs):
|
|
"""
|
|
Execute an iTunes API call with exponential backoff retry logic.
|
|
|
|
Args:
|
|
func: The function to call
|
|
*args: Arguments to pass to the function
|
|
max_retries: Maximum number of retry attempts
|
|
**kwargs: Keyword arguments to pass to the function
|
|
|
|
Returns:
|
|
The result of the function call, or None if all retries failed
|
|
"""
|
|
last_error = None
|
|
for attempt in range(max_retries):
|
|
try:
|
|
result = func(*args, **kwargs)
|
|
return result
|
|
except requests.exceptions.HTTPError as e:
|
|
# Handle rate limiting (429) and server errors (5xx)
|
|
if e.response is not None and e.response.status_code == 429:
|
|
delay = ITUNES_BASE_DELAY * (2 ** attempt)
|
|
logger.warning(f"[iTunes] Rate limited, retrying in {delay}s (attempt {attempt + 1}/{max_retries})")
|
|
time.sleep(delay)
|
|
last_error = e
|
|
elif e.response is not None and e.response.status_code >= 500:
|
|
delay = ITUNES_BASE_DELAY * (2 ** attempt)
|
|
logger.warning(f"[iTunes] Server error {e.response.status_code}, retrying in {delay}s (attempt {attempt + 1}/{max_retries})")
|
|
time.sleep(delay)
|
|
last_error = e
|
|
else:
|
|
raise # Don't retry on client errors (4xx except 429)
|
|
except requests.exceptions.RequestException as e:
|
|
# Retry on connection errors
|
|
delay = ITUNES_BASE_DELAY * (2 ** attempt)
|
|
logger.warning(f"[iTunes] Connection error, retrying in {delay}s (attempt {attempt + 1}/{max_retries}): {e}")
|
|
time.sleep(delay)
|
|
last_error = e
|
|
except Exception as e:
|
|
# Don't retry on other exceptions
|
|
raise
|
|
|
|
if last_error:
|
|
logger.error(f"[iTunes] All {max_retries} retry attempts failed: {last_error}")
|
|
return None
|
|
|
|
|
|
def clean_track_name_for_search(track_name):
|
|
"""
|
|
Intelligently cleans a track name for searching by removing noise while preserving important version information.
|
|
Removes: (feat. Artist), (Explicit), (Clean), etc.
|
|
Keeps: (Extended Version), (Live), (Acoustic), (Remix), etc.
|
|
"""
|
|
if not track_name or not isinstance(track_name, str):
|
|
return track_name
|
|
|
|
cleaned_name = track_name
|
|
|
|
# Define patterns to REMOVE (noise that doesn't affect track identity)
|
|
remove_patterns = [
|
|
r'\s*\(explicit\)', # (Explicit)
|
|
r'\s*\(clean\)', # (Clean)
|
|
r'\s*\(radio\s*edit\)', # (Radio Edit)
|
|
r'\s*\(radio\s*version\)', # (Radio Version)
|
|
r'\s*\(feat\.?\s*[^)]+\)', # (feat. Artist) or (ft. Artist)
|
|
r'\s*\(ft\.?\s*[^)]+\)', # (ft Artist)
|
|
r'\s*\(featuring\s*[^)]+\)', # (featuring Artist)
|
|
r'\s*\(with\s*[^)]+\)', # (with Artist)
|
|
r'\s*\[[^\]]*explicit[^\]]*\]', # [Explicit] in brackets
|
|
r'\s*\[[^\]]*clean[^\]]*\]', # [Clean] in brackets
|
|
]
|
|
|
|
# Apply removal patterns
|
|
for pattern in remove_patterns:
|
|
cleaned_name = re.sub(pattern, '', cleaned_name, flags=re.IGNORECASE).strip()
|
|
|
|
# PRESERVE important version information (do NOT remove these)
|
|
# These patterns are intentionally NOT in the remove list:
|
|
# - (Extended Version), (Extended), (Long Version)
|
|
# - (Live), (Live Version), (Concert)
|
|
# - (Acoustic), (Acoustic Version)
|
|
# - (Remix), (Club Mix), (Dance Mix)
|
|
# - (Remastered), (Remaster)
|
|
# - (Demo), (Studio Version)
|
|
# - (Instrumental)
|
|
# - Album/year info like (2023), (Deluxe Edition)
|
|
|
|
# If cleaning results in an empty string, return the original track name
|
|
if not cleaned_name.strip():
|
|
return track_name
|
|
|
|
# Log cleaning if significant changes were made
|
|
if cleaned_name != track_name:
|
|
logger.debug(f"🧹 Intelligent track cleaning: '{track_name}' -> '{cleaned_name}'")
|
|
|
|
return cleaned_name
|
|
|
|
def is_live_version(track_name: str, album_name: str = "") -> bool:
|
|
"""
|
|
Detect if a track or album is a live version.
|
|
|
|
Args:
|
|
track_name: Track name to check
|
|
album_name: Album name to check (optional)
|
|
|
|
Returns:
|
|
True if this is a live version, False otherwise
|
|
"""
|
|
if not track_name:
|
|
return False
|
|
|
|
# Combine track and album names for comprehensive checking
|
|
text_to_check = f"{track_name} {album_name}".lower()
|
|
|
|
# Live version patterns
|
|
live_patterns = [
|
|
r'\blive\b', # (Live), Live at, etc.
|
|
r'\blive at\b', # Live at Madison Square Garden
|
|
r'\bconcert\b', # Concert, Live Concert
|
|
r'\bin concert\b', # In Concert
|
|
r'\bunplugged\b', # MTV Unplugged (usually live)
|
|
r'\blive session\b', # Live Session
|
|
r'\blive from\b', # Live from...
|
|
r'\blive recording\b', # Live Recording
|
|
r'\bon stage\b', # On Stage
|
|
]
|
|
|
|
for pattern in live_patterns:
|
|
if re.search(pattern, text_to_check, re.IGNORECASE):
|
|
return True
|
|
|
|
return False
|
|
|
|
def is_remix_version(track_name: str, album_name: str = "") -> bool:
|
|
"""
|
|
Detect if a track is a remix.
|
|
|
|
Args:
|
|
track_name: Track name to check
|
|
album_name: Album name to check (optional)
|
|
|
|
Returns:
|
|
True if this is a remix, False otherwise
|
|
"""
|
|
if not track_name:
|
|
return False
|
|
|
|
# Combine track and album names for comprehensive checking
|
|
text_to_check = f"{track_name} {album_name}".lower()
|
|
|
|
# Remix patterns (but NOT remaster/remastered)
|
|
remix_patterns = [
|
|
r'\bremix\b', # Remix, Remixed
|
|
r'\bmix\b(?!.*\bremaster)', # Mix (but not if followed by remaster)
|
|
r'\bedit\b', # Radio Edit, Extended Edit
|
|
r'\bversion\b(?=.*\bmix\b)', # Version with Mix (e.g., "Dance Version Mix")
|
|
r'\bclub mix\b', # Club Mix
|
|
r'\bdance mix\b', # Dance Mix
|
|
r'\bradio edit\b', # Radio Edit
|
|
r'\bextended\b(?=.*\bmix\b)', # Extended Mix
|
|
r'\bdub\b', # Dub version
|
|
r'\bvip mix\b', # VIP Mix
|
|
]
|
|
|
|
# But exclude remaster/remastered - those are originals
|
|
if re.search(r'\bremaster(ed)?\b', text_to_check, re.IGNORECASE):
|
|
return False
|
|
|
|
for pattern in remix_patterns:
|
|
if re.search(pattern, text_to_check, re.IGNORECASE):
|
|
return True
|
|
|
|
return False
|
|
|
|
def is_acoustic_version(track_name: str, album_name: str = "") -> bool:
|
|
"""
|
|
Detect if a track is an acoustic version.
|
|
|
|
Args:
|
|
track_name: Track name to check
|
|
album_name: Album name to check (optional)
|
|
|
|
Returns:
|
|
True if this is an acoustic version, False otherwise
|
|
"""
|
|
if not track_name:
|
|
return False
|
|
|
|
# Combine track and album names for comprehensive checking
|
|
text_to_check = f"{track_name} {album_name}".lower()
|
|
|
|
# Acoustic version patterns
|
|
acoustic_patterns = [
|
|
r'\bacoustic\b', # Acoustic, Acoustic Version
|
|
r'\bstripped\b', # Stripped version
|
|
r'\bpiano version\b', # Piano Version
|
|
r'\bunplugged\b', # MTV Unplugged (can be acoustic)
|
|
]
|
|
|
|
for pattern in acoustic_patterns:
|
|
if re.search(pattern, text_to_check, re.IGNORECASE):
|
|
return True
|
|
|
|
return False
|
|
|
|
def is_compilation_album(album_name: str) -> bool:
|
|
"""
|
|
Detect if an album is a compilation/greatest hits album.
|
|
|
|
Args:
|
|
album_name: Album name to check
|
|
|
|
Returns:
|
|
True if this is a compilation album, False otherwise
|
|
"""
|
|
if not album_name:
|
|
return False
|
|
|
|
album_lower = album_name.lower()
|
|
|
|
# Compilation album patterns
|
|
compilation_patterns = [
|
|
r'\bgreatest hits\b', # Greatest Hits
|
|
r'\bbest of\b', # Best Of
|
|
r'\banthology\b', # Anthology
|
|
r'\bcollection\b', # Collection
|
|
r'\bcompilation\b', # Compilation
|
|
r'\bthe essential\b', # The Essential...
|
|
r'\bcomplete\b', # Complete Collection
|
|
r'\bhits\b', # Hits (standalone or at end)
|
|
r'\btop\s+\d+\b', # Top 10, Top 40, etc.
|
|
r'\bvery best\b', # Very Best Of
|
|
r'\bdefinitive\b', # Definitive Collection
|
|
]
|
|
|
|
for pattern in compilation_patterns:
|
|
if re.search(pattern, album_lower, re.IGNORECASE):
|
|
return True
|
|
|
|
return False
|
|
|
|
@dataclass
|
|
class ScanResult:
|
|
"""Result of scanning a single artist"""
|
|
artist_name: str
|
|
spotify_artist_id: str
|
|
albums_checked: int
|
|
new_tracks_found: int
|
|
tracks_added_to_wishlist: int
|
|
success: bool
|
|
error_message: Optional[str] = None
|
|
|
|
class WatchlistScanner:
|
|
"""Service for scanning watched artists for new releases"""
|
|
|
|
def __init__(self, spotify_client: SpotifyClient = None, metadata_service=None, database_path: str = "database/music_library.db"):
|
|
# Support both old (spotify_client) and new (metadata_service) initialization
|
|
self.database_path = database_path
|
|
self._database = None
|
|
self._wishlist_service = None
|
|
self._matching_engine = None
|
|
|
|
if metadata_service:
|
|
self._metadata_service = metadata_service
|
|
self.spotify_client = metadata_service.spotify # For backward compatibility
|
|
elif spotify_client:
|
|
self.spotify_client = spotify_client
|
|
self._metadata_service = None # Lazy load if needed
|
|
else:
|
|
raise ValueError("Must provide either spotify_client or metadata_service")
|
|
|
|
@property
|
|
def database(self):
|
|
"""Get database instance (lazy loading)"""
|
|
if self._database is None:
|
|
self._database = get_database(self.database_path)
|
|
return self._database
|
|
|
|
@property
|
|
def wishlist_service(self):
|
|
"""Get wishlist service instance (lazy loading)"""
|
|
if self._wishlist_service is None:
|
|
self._wishlist_service = get_wishlist_service()
|
|
return self._wishlist_service
|
|
|
|
@property
|
|
def matching_engine(self):
|
|
"""Get matching engine instance (lazy loading)"""
|
|
if self._matching_engine is None:
|
|
self._matching_engine = MusicMatchingEngine()
|
|
return self._matching_engine
|
|
|
|
@property
|
|
def metadata_service(self):
|
|
"""Get or create MetadataService instance (lazy loading)"""
|
|
if self._metadata_service is None:
|
|
from core.metadata_service import MetadataService
|
|
self._metadata_service = MetadataService()
|
|
return self._metadata_service
|
|
|
|
def _get_active_client_and_artist_id(self, watchlist_artist: WatchlistArtist):
|
|
"""
|
|
Get the appropriate client and artist ID based on active provider.
|
|
If iTunes ID is missing, searches by artist name to find and cache it.
|
|
|
|
Returns:
|
|
Tuple of (client, artist_id, provider_name) or (None, None, None) if no valid ID
|
|
"""
|
|
provider = self.metadata_service.get_active_provider()
|
|
|
|
if provider == 'spotify':
|
|
if watchlist_artist.spotify_artist_id:
|
|
return (self.metadata_service.spotify, watchlist_artist.spotify_artist_id, 'spotify')
|
|
else:
|
|
logger.warning(f"No Spotify ID for {watchlist_artist.artist_name}, cannot scan with Spotify")
|
|
return (None, None, None)
|
|
else: # itunes
|
|
if watchlist_artist.itunes_artist_id:
|
|
return (self.metadata_service.itunes, watchlist_artist.itunes_artist_id, 'itunes')
|
|
else:
|
|
# No iTunes ID stored - search by name and cache it
|
|
logger.info(f"No iTunes ID for {watchlist_artist.artist_name}, searching by name...")
|
|
try:
|
|
itunes_client = self.metadata_service.itunes
|
|
search_results = itunes_client.search_artists(watchlist_artist.artist_name, limit=1)
|
|
if search_results and len(search_results) > 0:
|
|
itunes_id = search_results[0].id
|
|
logger.info(f"Found iTunes ID {itunes_id} for {watchlist_artist.artist_name}")
|
|
# Cache the iTunes ID in the database for future use
|
|
self.database.update_watchlist_artist_itunes_id(
|
|
watchlist_artist.spotify_artist_id or str(watchlist_artist.id),
|
|
itunes_id
|
|
)
|
|
return (itunes_client, itunes_id, 'itunes')
|
|
else:
|
|
logger.warning(f"Could not find {watchlist_artist.artist_name} on iTunes")
|
|
return (None, None, None)
|
|
except Exception as e:
|
|
logger.error(f"Error searching iTunes for {watchlist_artist.artist_name}: {e}")
|
|
return (None, None, None)
|
|
|
|
def get_active_client_and_artist_id(self, watchlist_artist: WatchlistArtist):
|
|
"""
|
|
Public wrapper for _get_active_client_and_artist_id.
|
|
Gets the appropriate client and artist ID based on active provider.
|
|
|
|
Returns:
|
|
Tuple of (client, artist_id, provider_name) or (None, None, None) if no valid ID
|
|
"""
|
|
return self._get_active_client_and_artist_id(watchlist_artist)
|
|
|
|
def get_artist_image_url(self, watchlist_artist: WatchlistArtist) -> Optional[str]:
|
|
"""
|
|
Get artist image URL using the active provider.
|
|
|
|
Returns:
|
|
Image URL string or None if not available
|
|
"""
|
|
client, artist_id, provider = self._get_active_client_and_artist_id(watchlist_artist)
|
|
if not client or not artist_id:
|
|
return None
|
|
|
|
try:
|
|
artist_data = client.get_artist(artist_id)
|
|
if artist_data:
|
|
# Handle both Spotify and iTunes response formats
|
|
if 'images' in artist_data and artist_data['images']:
|
|
return artist_data['images'][0].get('url')
|
|
elif 'image_url' in artist_data:
|
|
return artist_data['image_url']
|
|
except Exception as e:
|
|
logger.debug(f"Could not fetch artist image for {watchlist_artist.artist_name}: {e}")
|
|
|
|
return None
|
|
|
|
def get_artist_discography_for_watchlist(self, watchlist_artist: WatchlistArtist, last_scan_timestamp: Optional[datetime] = None) -> Optional[List]:
|
|
"""
|
|
Get artist's discography using the active provider, with proper ID resolution.
|
|
This is the provider-aware version of get_artist_discography.
|
|
|
|
Args:
|
|
watchlist_artist: WatchlistArtist object (has both spotify and itunes IDs)
|
|
last_scan_timestamp: Only return releases after this date (for incremental scans)
|
|
|
|
Returns:
|
|
List of albums or None on error
|
|
"""
|
|
client, artist_id, provider = self._get_active_client_and_artist_id(watchlist_artist)
|
|
if not client or not artist_id:
|
|
logger.warning(f"No valid client/ID for {watchlist_artist.artist_name}")
|
|
return None
|
|
|
|
return self._get_artist_discography_with_client(client, artist_id, last_scan_timestamp)
|
|
|
|
def scan_all_watchlist_artists(self) -> List[ScanResult]:
|
|
"""
|
|
Scan artists in the watchlist for new releases.
|
|
|
|
OPTIMIZED: Scans up to 50 artists per run using smart selection:
|
|
- Priority: Artists not scanned in 7+ days (guaranteed)
|
|
- Remainder: Random selection from other artists
|
|
|
|
This reduces API calls while ensuring all artists scanned at least weekly.
|
|
Only checks releases after their last scan timestamp.
|
|
"""
|
|
logger.info("Starting watchlist scan")
|
|
|
|
try:
|
|
from datetime import datetime, timedelta
|
|
import random
|
|
|
|
# Get all watchlist artists
|
|
all_watchlist_artists = self.database.get_watchlist_artists()
|
|
if not all_watchlist_artists:
|
|
logger.info("No artists in watchlist to scan")
|
|
return []
|
|
|
|
logger.info(f"Found {len(all_watchlist_artists)} total artists in watchlist")
|
|
|
|
# OPTIMIZATION: Select up to 50 artists to scan
|
|
# 1. Must scan: Artists not scanned in 7+ days (or never scanned)
|
|
seven_days_ago = datetime.now() - timedelta(days=7)
|
|
must_scan = []
|
|
can_skip = []
|
|
|
|
for artist in all_watchlist_artists:
|
|
if artist.last_scan_timestamp is None:
|
|
# Never scanned - must scan
|
|
must_scan.append(artist)
|
|
elif artist.last_scan_timestamp < seven_days_ago:
|
|
# Not scanned in 7+ days - must scan
|
|
must_scan.append(artist)
|
|
else:
|
|
# Scanned recently - can skip (but might randomly select)
|
|
can_skip.append(artist)
|
|
|
|
logger.info(f"Artists requiring scan (not scanned in 7+ days): {len(must_scan)}")
|
|
logger.info(f"Artists scanned recently (< 7 days): {len(can_skip)}")
|
|
|
|
# 2. Fill remaining slots (up to 50 total) with random selection
|
|
max_artists_per_scan = 50
|
|
artists_to_scan = must_scan.copy()
|
|
|
|
remaining_slots = max_artists_per_scan - len(must_scan)
|
|
if remaining_slots > 0 and can_skip:
|
|
# Randomly sample from recently-scanned artists
|
|
random_sample_size = min(remaining_slots, len(can_skip))
|
|
random_selection = random.sample(can_skip, random_sample_size)
|
|
artists_to_scan.extend(random_selection)
|
|
logger.info(f"Additionally scanning {len(random_selection)} randomly selected artists")
|
|
|
|
# Shuffle to avoid always scanning same order
|
|
random.shuffle(artists_to_scan)
|
|
|
|
logger.info(f"Total artists to scan this run: {len(artists_to_scan)}")
|
|
if len(all_watchlist_artists) > max_artists_per_scan:
|
|
logger.info(f"Skipping {len(all_watchlist_artists) - len(artists_to_scan)} artists (will be scanned in future runs)")
|
|
|
|
watchlist_artists = artists_to_scan
|
|
|
|
# PROACTIVE ID BACKFILLING (cross-provider support)
|
|
# Before scanning, ensure all artists have IDs for the current provider
|
|
logger.info(f"DEBUG: About to check backfilling. _metadata_service = {getattr(self, '_metadata_service', 'ATTRIBUTE MISSING')}")
|
|
if self._metadata_service is not None:
|
|
try:
|
|
active_provider = self._metadata_service.get_active_provider()
|
|
logger.info(f"🔍 Checking for missing {active_provider} IDs in watchlist...")
|
|
self._backfill_missing_ids(all_watchlist_artists, active_provider)
|
|
except Exception as backfill_error:
|
|
logger.warning(f"Error during ID backfilling: {backfill_error}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
# Continue with scan even if backfilling fails
|
|
else:
|
|
logger.warning(f"⚠️ Backfilling SKIPPED - _metadata_service is None")
|
|
|
|
scan_results = []
|
|
for i, artist in enumerate(watchlist_artists):
|
|
try:
|
|
result = self.scan_artist(artist)
|
|
scan_results.append(result)
|
|
|
|
if result.success:
|
|
logger.info(f"✅ Scanned {artist.artist_name}: {result.new_tracks_found} new tracks found")
|
|
else:
|
|
logger.warning(f"❌ Failed to scan {artist.artist_name}: {result.error_message}")
|
|
|
|
# Rate limiting: Add delay between artists to avoid hitting Spotify API limits
|
|
# This is critical to prevent getting banned for 6+ hours
|
|
if i < len(watchlist_artists) - 1: # Don't delay after the last artist
|
|
logger.debug(f"Rate limiting: waiting {DELAY_BETWEEN_ARTISTS}s before scanning next artist")
|
|
time.sleep(DELAY_BETWEEN_ARTISTS)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error scanning artist {artist.artist_name}: {e}")
|
|
scan_results.append(ScanResult(
|
|
artist_name=artist.artist_name,
|
|
spotify_artist_id=artist.spotify_artist_id,
|
|
albums_checked=0,
|
|
new_tracks_found=0,
|
|
tracks_added_to_wishlist=0,
|
|
success=False,
|
|
error_message=str(e)
|
|
))
|
|
|
|
# Log summary
|
|
successful_scans = [r for r in scan_results if r.success]
|
|
total_new_tracks = sum(r.new_tracks_found for r in successful_scans)
|
|
total_added_to_wishlist = sum(r.tracks_added_to_wishlist for r in successful_scans)
|
|
|
|
logger.info(f"Watchlist scan complete: {len(successful_scans)}/{len(scan_results)} artists scanned successfully")
|
|
logger.info(f"Found {total_new_tracks} new tracks, added {total_added_to_wishlist} to wishlist")
|
|
|
|
# Populate discovery pool with tracks from similar artists
|
|
logger.info("Starting discovery pool population...")
|
|
self.populate_discovery_pool()
|
|
|
|
# Populate seasonal content (runs independently with its own threshold)
|
|
logger.info("Updating seasonal content...")
|
|
self._populate_seasonal_content()
|
|
|
|
return scan_results
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during watchlist scan: {e}")
|
|
return []
|
|
|
|
def scan_artist(self, watchlist_artist: WatchlistArtist) -> ScanResult:
|
|
"""
|
|
Scan a single artist for new releases.
|
|
Only checks releases after the last scan timestamp.
|
|
Uses the active provider (Spotify if authenticated, otherwise iTunes).
|
|
"""
|
|
try:
|
|
logger.info(f"Scanning artist: {watchlist_artist.artist_name}")
|
|
|
|
# Get the active client and artist ID based on provider
|
|
client, artist_id, provider = self._get_active_client_and_artist_id(watchlist_artist)
|
|
|
|
if client is None or artist_id is None:
|
|
return ScanResult(
|
|
artist_name=watchlist_artist.artist_name,
|
|
spotify_artist_id=watchlist_artist.spotify_artist_id or '',
|
|
albums_checked=0,
|
|
new_tracks_found=0,
|
|
tracks_added_to_wishlist=0,
|
|
success=False,
|
|
error_message=f"No {self.metadata_service.get_active_provider()} ID available for this artist"
|
|
)
|
|
|
|
logger.info(f"Using {provider} provider for {watchlist_artist.artist_name} (ID: {artist_id})")
|
|
|
|
# Update artist image (cached for performance)
|
|
try:
|
|
artist_data = client.get_artist(artist_id)
|
|
if artist_data and 'images' in artist_data and artist_data['images']:
|
|
# Get medium-sized image (usually the second one, or first if only one)
|
|
image_url = None
|
|
if len(artist_data['images']) > 1:
|
|
image_url = artist_data['images'][1]['url']
|
|
else:
|
|
image_url = artist_data['images'][0]['url']
|
|
|
|
# Update in database (use spotify_artist_id as the key for consistency)
|
|
if image_url:
|
|
db_artist_id = watchlist_artist.spotify_artist_id or artist_id
|
|
self.database.update_watchlist_artist_image(db_artist_id, image_url)
|
|
logger.info(f"Updated artist image for {watchlist_artist.artist_name}")
|
|
else:
|
|
logger.warning(f"No image URL found for {watchlist_artist.artist_name}")
|
|
else:
|
|
logger.warning(f"No images in {provider} data for {watchlist_artist.artist_name}")
|
|
except Exception as img_error:
|
|
logger.warning(f"Could not update artist image for {watchlist_artist.artist_name}: {img_error}")
|
|
|
|
# Get artist discography using active provider
|
|
albums = self._get_artist_discography_with_client(client, artist_id, watchlist_artist.last_scan_timestamp)
|
|
|
|
if albums is None:
|
|
return ScanResult(
|
|
artist_name=watchlist_artist.artist_name,
|
|
spotify_artist_id=watchlist_artist.spotify_artist_id or '',
|
|
albums_checked=0,
|
|
new_tracks_found=0,
|
|
tracks_added_to_wishlist=0,
|
|
success=False,
|
|
error_message=f"Failed to get artist discography from {provider}"
|
|
)
|
|
|
|
logger.info(f"Found {len(albums)} albums/singles to check for {watchlist_artist.artist_name}")
|
|
|
|
# Safety check: Limit number of albums to scan to prevent extremely long sessions
|
|
MAX_ALBUMS_PER_ARTIST = 50 # Reasonable limit to prevent API abuse
|
|
if len(albums) > MAX_ALBUMS_PER_ARTIST:
|
|
logger.warning(f"Artist {watchlist_artist.artist_name} has {len(albums)} albums, limiting to {MAX_ALBUMS_PER_ARTIST} most recent")
|
|
albums = albums[:MAX_ALBUMS_PER_ARTIST] # Most recent albums are first
|
|
|
|
# Check each album/single for missing tracks
|
|
new_tracks_found = 0
|
|
tracks_added_to_wishlist = 0
|
|
|
|
for album_index, album in enumerate(albums):
|
|
try:
|
|
# Get full album data
|
|
logger.info(f"Checking album {album_index + 1}/{len(albums)}: {album.name}")
|
|
album_data = client.get_album(album.id)
|
|
if not album_data:
|
|
continue
|
|
|
|
# Get album tracks (works for both Spotify and iTunes)
|
|
# Spotify's get_album() includes tracks, but we use get_album_tracks() for consistency
|
|
tracks_data = client.get_album_tracks(album.id)
|
|
if not tracks_data or not tracks_data.get('items'):
|
|
continue
|
|
|
|
tracks = tracks_data['items']
|
|
logger.debug(f"Checking album: {album_data.get('name', 'Unknown')} ({len(tracks)} tracks)")
|
|
|
|
# Check if user wants this type of release
|
|
if not self._should_include_release(len(tracks), watchlist_artist):
|
|
release_type = "album" if len(tracks) >= 7 else ("EP" if len(tracks) >= 4 else "single")
|
|
logger.debug(f"Skipping {release_type}: {album_data.get('name', 'Unknown')} - user preference")
|
|
continue
|
|
|
|
# Check each track
|
|
for track in tracks:
|
|
# Check content type filters (live, remix, acoustic, compilation)
|
|
if not self._should_include_track(track, album_data, watchlist_artist):
|
|
continue # Skip this track based on content type preferences
|
|
|
|
if self.is_track_missing_from_library(track):
|
|
new_tracks_found += 1
|
|
|
|
# Add to wishlist
|
|
if self.add_track_to_wishlist(track, album_data, watchlist_artist):
|
|
tracks_added_to_wishlist += 1
|
|
|
|
# Rate limiting: Add delay between albums to prevent API abuse
|
|
# This is especially important for artists with many albums
|
|
if album_index < len(albums) - 1: # Don't delay after the last album
|
|
logger.debug(f"Rate limiting: waiting {DELAY_BETWEEN_ALBUMS}s before next album")
|
|
time.sleep(DELAY_BETWEEN_ALBUMS)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error checking album {album.name}: {e}")
|
|
continue
|
|
|
|
# Update last scan timestamp for this artist
|
|
self.update_artist_scan_timestamp(watchlist_artist)
|
|
|
|
# Fetch and store similar artists for discovery feature (with caching to avoid over-polling)
|
|
# Similar artists are fetched from MusicMap (works with any source) and matched to both Spotify and iTunes
|
|
source_artist_id = watchlist_artist.spotify_artist_id or watchlist_artist.itunes_artist_id or str(watchlist_artist.id)
|
|
try:
|
|
# Check if we have fresh similar artists cached (< 30 days old)
|
|
# If Spotify is authenticated, also require Spotify IDs to be present
|
|
spotify_authenticated = self.spotify_client and self.spotify_client.is_spotify_authenticated()
|
|
if self.database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated):
|
|
logger.info(f"Similar artists for {watchlist_artist.artist_name} are cached and fresh, skipping MusicMap fetch")
|
|
# Even if cached, backfill missing iTunes IDs (seamless dual-source support)
|
|
self._backfill_similar_artists_itunes_ids(source_artist_id)
|
|
else:
|
|
logger.info(f"Fetching similar artists for {watchlist_artist.artist_name}...")
|
|
self.update_similar_artists(watchlist_artist)
|
|
logger.info(f"Similar artists updated for {watchlist_artist.artist_name}")
|
|
except Exception as similar_error:
|
|
logger.warning(f"Failed to update similar artists for {watchlist_artist.artist_name}: {similar_error}")
|
|
|
|
return ScanResult(
|
|
artist_name=watchlist_artist.artist_name,
|
|
spotify_artist_id=watchlist_artist.spotify_artist_id or '',
|
|
albums_checked=len(albums),
|
|
new_tracks_found=new_tracks_found,
|
|
tracks_added_to_wishlist=tracks_added_to_wishlist,
|
|
success=True
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error scanning artist {watchlist_artist.artist_name}: {e}")
|
|
return ScanResult(
|
|
artist_name=watchlist_artist.artist_name,
|
|
spotify_artist_id=watchlist_artist.spotify_artist_id or '',
|
|
albums_checked=0,
|
|
new_tracks_found=0,
|
|
tracks_added_to_wishlist=0,
|
|
success=False,
|
|
error_message=str(e)
|
|
)
|
|
|
|
def get_artist_discography(self, spotify_artist_id: str, last_scan_timestamp: Optional[datetime] = None) -> Optional[List]:
|
|
"""
|
|
Get artist's discography from Spotify, optionally filtered by release date.
|
|
|
|
Args:
|
|
spotify_artist_id: Spotify artist ID
|
|
last_scan_timestamp: Only return releases after this date (for incremental scans)
|
|
If None, uses lookback period setting from database
|
|
"""
|
|
try:
|
|
# Get all artist albums (albums + singles) - this is rate limited in spotify_client
|
|
logger.debug(f"Fetching discography for artist {spotify_artist_id}")
|
|
albums = self.spotify_client.get_artist_albums(spotify_artist_id, album_type='album,single', limit=50)
|
|
|
|
if not albums:
|
|
logger.warning(f"No albums found for artist {spotify_artist_id}")
|
|
return []
|
|
|
|
# Add small delay after fetching artist discography to be extra safe
|
|
time.sleep(0.3) # 300ms breathing room
|
|
|
|
# Determine cutoff date for filtering
|
|
cutoff_timestamp = last_scan_timestamp
|
|
|
|
# If no last scan timestamp, use lookback period setting
|
|
if cutoff_timestamp is None:
|
|
lookback_period = self._get_lookback_period_setting()
|
|
if lookback_period != 'all':
|
|
# Convert period to days and create cutoff date (use UTC)
|
|
days = int(lookback_period)
|
|
cutoff_timestamp = datetime.now(timezone.utc) - timedelta(days=days)
|
|
logger.info(f"Using lookback period: {lookback_period} days (cutoff: {cutoff_timestamp})")
|
|
|
|
# Filter by release date if we have a cutoff timestamp
|
|
if cutoff_timestamp:
|
|
filtered_albums = []
|
|
for album in albums:
|
|
if self.is_album_after_timestamp(album, cutoff_timestamp):
|
|
filtered_albums.append(album)
|
|
|
|
logger.info(f"Filtered {len(albums)} albums to {len(filtered_albums)} released after {cutoff_timestamp}")
|
|
return filtered_albums
|
|
|
|
# Return all albums if no cutoff (lookback_period = 'all')
|
|
return albums
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting discography for artist {spotify_artist_id}: {e}")
|
|
return None
|
|
|
|
def _get_artist_discography_with_client(self, client, artist_id: str, last_scan_timestamp: Optional[datetime] = None) -> Optional[List]:
|
|
"""
|
|
Get artist's discography using the specified client, optionally filtered by release date.
|
|
|
|
Args:
|
|
client: The metadata client to use (spotify or itunes)
|
|
artist_id: Artist ID for the given client
|
|
last_scan_timestamp: Only return releases after this date (for incremental scans)
|
|
If None, uses lookback period setting from database
|
|
"""
|
|
try:
|
|
# Get all artist albums (albums + singles)
|
|
logger.debug(f"Fetching discography for artist {artist_id}")
|
|
albums = client.get_artist_albums(artist_id, album_type='album,single', limit=50)
|
|
|
|
if not albums:
|
|
logger.warning(f"No albums found for artist {artist_id}")
|
|
return []
|
|
|
|
# Add small delay after fetching artist discography to be extra safe
|
|
time.sleep(0.3) # 300ms breathing room
|
|
|
|
# Determine cutoff date for filtering
|
|
cutoff_timestamp = last_scan_timestamp
|
|
|
|
# If no last scan timestamp, use lookback period setting
|
|
if cutoff_timestamp is None:
|
|
lookback_period = self._get_lookback_period_setting()
|
|
if lookback_period != 'all':
|
|
# Convert period to days and create cutoff date (use UTC)
|
|
days = int(lookback_period)
|
|
cutoff_timestamp = datetime.now(timezone.utc) - timedelta(days=days)
|
|
logger.info(f"Using lookback period: {lookback_period} days (cutoff: {cutoff_timestamp})")
|
|
|
|
# Filter by release date if we have a cutoff timestamp
|
|
if cutoff_timestamp:
|
|
filtered_albums = []
|
|
for album in albums:
|
|
if self.is_album_after_timestamp(album, cutoff_timestamp):
|
|
filtered_albums.append(album)
|
|
|
|
logger.info(f"Filtered {len(albums)} albums to {len(filtered_albums)} released after {cutoff_timestamp}")
|
|
return filtered_albums
|
|
|
|
# Return all albums if no cutoff (lookback_period = 'all')
|
|
return albums
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting discography for artist {artist_id}: {e}")
|
|
return None
|
|
|
|
def _backfill_missing_ids(self, artists: List[WatchlistArtist], provider: str):
|
|
"""
|
|
Proactively match ALL artists missing IDs for the current provider.
|
|
|
|
Example: User has 50 artists with only Spotify IDs.
|
|
When iTunes becomes active, this matches ALL 50 to iTunes in one batch.
|
|
"""
|
|
artists_to_match = []
|
|
|
|
if provider == 'spotify':
|
|
# Find all artists missing Spotify IDs
|
|
artists_to_match = [a for a in artists if not a.spotify_artist_id and a.itunes_artist_id]
|
|
elif provider == 'itunes':
|
|
# Find all artists missing iTunes IDs
|
|
artists_to_match = [a for a in artists if not a.itunes_artist_id and a.spotify_artist_id]
|
|
|
|
if not artists_to_match:
|
|
logger.info(f"✅ All artists already have {provider} IDs")
|
|
return
|
|
|
|
logger.info(f"🔄 Backfilling {len(artists_to_match)} artists with {provider} IDs...")
|
|
|
|
matched_count = 0
|
|
for artist in artists_to_match:
|
|
try:
|
|
if provider == 'spotify':
|
|
new_id = self._match_to_spotify(artist.artist_name)
|
|
if new_id:
|
|
self.database.update_watchlist_spotify_id(artist.id, new_id)
|
|
artist.spotify_artist_id = new_id # Update in memory
|
|
matched_count += 1
|
|
logger.info(f"✅ Matched '{artist.artist_name}' to Spotify: {new_id}")
|
|
|
|
elif provider == 'itunes':
|
|
new_id = self._match_to_itunes(artist.artist_name)
|
|
if new_id:
|
|
self.database.update_watchlist_itunes_id(artist.id, new_id)
|
|
artist.itunes_artist_id = new_id # Update in memory
|
|
matched_count += 1
|
|
logger.info(f"✅ Matched '{artist.artist_name}' to iTunes: {new_id}")
|
|
|
|
# Small delay to avoid API rate limits
|
|
time.sleep(0.3)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Could not match '{artist.artist_name}' to {provider}: {e}")
|
|
continue
|
|
|
|
logger.info(f"✅ Backfilled {matched_count}/{len(artists_to_match)} artists with {provider} IDs")
|
|
|
|
def _match_to_spotify(self, artist_name: str) -> Optional[str]:
|
|
"""Match artist name to Spotify ID"""
|
|
try:
|
|
# Use metadata service if available, fallback to spotify_client
|
|
if hasattr(self, '_metadata_service') and self._metadata_service:
|
|
results = self._metadata_service.spotify.search_artists(artist_name, limit=1)
|
|
else:
|
|
results = self.spotify_client.search_artists(artist_name, limit=1)
|
|
|
|
if results:
|
|
return results[0].id
|
|
except Exception as e:
|
|
logger.warning(f"Could not match {artist_name} to Spotify: {e}")
|
|
return None
|
|
|
|
def _match_to_itunes(self, artist_name: str) -> Optional[str]:
|
|
"""Match artist name to iTunes ID"""
|
|
try:
|
|
# Use metadata service's iTunes client
|
|
if hasattr(self, '_metadata_service') and self._metadata_service:
|
|
results = self._metadata_service.itunes.search_artists(artist_name, limit=1)
|
|
if results:
|
|
return results[0].id
|
|
else:
|
|
# iTunes client not available without metadata service
|
|
logger.warning(f"Cannot match to iTunes - MetadataService not available")
|
|
except Exception as e:
|
|
logger.warning(f"Could not match {artist_name} to iTunes: {e}")
|
|
return None
|
|
|
|
def _get_lookback_period_setting(self) -> str:
|
|
"""
|
|
Get the discovery lookback period setting from database.
|
|
|
|
Returns:
|
|
str: Period value ('7', '30', '90', '180', or 'all')
|
|
"""
|
|
try:
|
|
with self.database._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM metadata WHERE key = 'discovery_lookback_period'")
|
|
row = cursor.fetchone()
|
|
|
|
if row:
|
|
return row['value']
|
|
else:
|
|
# Default to 30 days if not set
|
|
return '30'
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error getting lookback period setting, defaulting to 30 days: {e}")
|
|
return '30'
|
|
|
|
def is_album_after_timestamp(self, album, timestamp: datetime) -> bool:
|
|
"""Check if album was released after the given timestamp"""
|
|
try:
|
|
if not album.release_date:
|
|
return True # Include albums with unknown release dates to be safe
|
|
|
|
# Parse release date - Spotify provides different precisions
|
|
release_date_str = album.release_date
|
|
|
|
# Handle different date formats
|
|
if len(release_date_str) == 4: # Year only (e.g., "2023")
|
|
album_date = datetime(int(release_date_str), 1, 1, tzinfo=timezone.utc)
|
|
elif len(release_date_str) == 7: # Year-month (e.g., "2023-10")
|
|
year, month = release_date_str.split('-')
|
|
album_date = datetime(int(year), int(month), 1, tzinfo=timezone.utc)
|
|
elif len(release_date_str) == 10: # Full date (e.g., "2023-10-15")
|
|
album_date = datetime.strptime(release_date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
elif 'T' in release_date_str: # ISO 8601 with time (e.g., "2017-12-08T08:00:00Z" from iTunes)
|
|
# Strip the time portion and parse just the date
|
|
date_part = release_date_str.split('T')[0]
|
|
album_date = datetime.strptime(date_part, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
else:
|
|
logger.warning(f"Unknown release date format: {release_date_str}")
|
|
return True # Include if we can't parse
|
|
|
|
# Ensure timestamp has timezone info
|
|
if timestamp.tzinfo is None:
|
|
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
|
|
return album_date > timestamp
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error comparing album date {album.release_date} with timestamp {timestamp}: {e}")
|
|
return True # Include if we can't determine
|
|
|
|
def _should_include_release(self, track_count: int, watchlist_artist: WatchlistArtist) -> bool:
|
|
"""
|
|
Check if a release should be included based on user's preferences.
|
|
|
|
Categorization:
|
|
- Singles: 1-3 tracks
|
|
- EPs: 4-6 tracks
|
|
- Albums: 7+ tracks
|
|
|
|
Args:
|
|
track_count: Number of tracks in the release
|
|
watchlist_artist: WatchlistArtist object with user preferences
|
|
|
|
Returns:
|
|
True if release should be included, False if should be skipped
|
|
"""
|
|
try:
|
|
# Default to including everything if preferences aren't set (backwards compatibility)
|
|
include_albums = getattr(watchlist_artist, 'include_albums', True)
|
|
include_eps = getattr(watchlist_artist, 'include_eps', True)
|
|
include_singles = getattr(watchlist_artist, 'include_singles', True)
|
|
|
|
# Determine release type based on track count
|
|
if track_count >= 7:
|
|
# This is an album
|
|
return include_albums
|
|
elif track_count >= 4:
|
|
# This is an EP (4-6 tracks)
|
|
return include_eps
|
|
else:
|
|
# This is a single (1-3 tracks)
|
|
return include_singles
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error checking release inclusion: {e}")
|
|
return True # Default to including on error
|
|
|
|
def _should_include_track(self, track, album_data, watchlist_artist: WatchlistArtist) -> bool:
|
|
"""
|
|
Check if a track should be included based on content type filters.
|
|
|
|
Filters:
|
|
- Live versions
|
|
- Remixes
|
|
- Acoustic versions
|
|
- Compilation albums
|
|
|
|
Args:
|
|
track: Track object or dict
|
|
album_data: Album data object or dict
|
|
watchlist_artist: WatchlistArtist object with user preferences
|
|
|
|
Returns:
|
|
True if track should be included, False if should be skipped
|
|
"""
|
|
try:
|
|
# Get track name and album name
|
|
if isinstance(track, dict):
|
|
track_name = track.get('name', '')
|
|
else:
|
|
track_name = getattr(track, 'name', '')
|
|
|
|
if isinstance(album_data, dict):
|
|
album_name = album_data.get('name', '')
|
|
else:
|
|
album_name = getattr(album_data, 'name', '')
|
|
|
|
# Get user preferences (default to False = exclude by default)
|
|
include_live = getattr(watchlist_artist, 'include_live', False)
|
|
include_remixes = getattr(watchlist_artist, 'include_remixes', False)
|
|
include_acoustic = getattr(watchlist_artist, 'include_acoustic', False)
|
|
include_compilations = getattr(watchlist_artist, 'include_compilations', False)
|
|
|
|
# Check compilation albums (album-level filter)
|
|
if not include_compilations:
|
|
if is_compilation_album(album_name):
|
|
logger.debug(f"Skipping compilation album: {album_name}")
|
|
return False
|
|
|
|
# Check track content type filters
|
|
if not include_live:
|
|
if is_live_version(track_name, album_name):
|
|
logger.debug(f"Skipping live version: {track_name}")
|
|
return False
|
|
|
|
if not include_remixes:
|
|
if is_remix_version(track_name, album_name):
|
|
logger.debug(f"Skipping remix: {track_name}")
|
|
return False
|
|
|
|
if not include_acoustic:
|
|
if is_acoustic_version(track_name, album_name):
|
|
logger.debug(f"Skipping acoustic version: {track_name}")
|
|
return False
|
|
|
|
# Track passes all filters
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error checking track content type inclusion: {e}")
|
|
return True # Default to including on error
|
|
|
|
def is_track_missing_from_library(self, track) -> bool:
|
|
"""
|
|
Check if a track is missing from the local Plex library.
|
|
Uses the same matching logic as the download missing tracks modals.
|
|
"""
|
|
try:
|
|
# Handle both dict and object track formats
|
|
if isinstance(track, dict):
|
|
original_title = track.get('name', 'Unknown')
|
|
track_artists = track.get('artists', [])
|
|
artists_to_search = [artist.get('name', 'Unknown') for artist in track_artists] if track_artists else ["Unknown"]
|
|
else:
|
|
original_title = track.name
|
|
artists_to_search = [artist.name for artist in track.artists] if track.artists else ["Unknown"]
|
|
|
|
# Generate title variations (same logic as sync page)
|
|
title_variations = [original_title]
|
|
|
|
# Only add cleaned version if it removes clear noise
|
|
cleaned_for_search = clean_track_name_for_search(original_title)
|
|
if cleaned_for_search.lower() != original_title.lower():
|
|
title_variations.append(cleaned_for_search)
|
|
|
|
# Use matching engine's conservative clean_title
|
|
base_title = self.matching_engine.clean_title(original_title)
|
|
if base_title.lower() not in [t.lower() for t in title_variations]:
|
|
title_variations.append(base_title)
|
|
|
|
unique_title_variations = list(dict.fromkeys(title_variations))
|
|
|
|
# Search for each artist with each title variation
|
|
|
|
for artist_name in artists_to_search:
|
|
for query_title in unique_title_variations:
|
|
# Use same database check as modals with server awareness
|
|
from config.settings import config_manager
|
|
active_server = config_manager.get_active_media_server()
|
|
db_track, confidence = self.database.check_track_exists(query_title, artist_name, confidence_threshold=0.7, server_source=active_server)
|
|
|
|
if db_track and confidence >= 0.7:
|
|
logger.debug(f"✔️ Track found in library: '{original_title}' by '{artist_name}' (confidence: {confidence:.2f})")
|
|
return False # Track exists in library
|
|
|
|
# No match found with any variation or artist
|
|
logger.info(f"❌ Track missing from library: '{original_title}' by '{artists_to_search[0] if artists_to_search else 'Unknown'}' - adding to wishlist")
|
|
return True # Track is missing
|
|
|
|
except Exception as e:
|
|
# Handle both dict and object track formats for error logging
|
|
track_name = track.get('name', 'Unknown') if isinstance(track, dict) else getattr(track, 'name', 'Unknown')
|
|
logger.warning(f"Error checking if track exists: {track_name}: {e}")
|
|
return True # Assume missing if we can't check
|
|
|
|
def add_track_to_wishlist(self, track, album, watchlist_artist: WatchlistArtist) -> bool:
|
|
"""Add a missing track to the wishlist"""
|
|
try:
|
|
# Handle both dict and object track/album formats
|
|
if isinstance(track, dict):
|
|
track_id = track.get('id', '')
|
|
track_name = track.get('name', 'Unknown')
|
|
track_artists = track.get('artists', [])
|
|
track_duration = track.get('duration_ms', 0)
|
|
track_explicit = track.get('explicit', False)
|
|
track_external_urls = track.get('external_urls', {})
|
|
track_popularity = track.get('popularity', 0)
|
|
track_preview_url = track.get('preview_url', None)
|
|
track_number = track.get('track_number', 1)
|
|
disc_number = track.get('disc_number', 1)
|
|
track_uri = track.get('uri', '')
|
|
else:
|
|
track_id = track.id
|
|
track_name = track.name
|
|
track_artists = [{'name': artist.name, 'id': artist.id} for artist in track.artists]
|
|
track_duration = getattr(track, 'duration_ms', 0)
|
|
track_explicit = getattr(track, 'explicit', False)
|
|
track_external_urls = getattr(track, 'external_urls', {})
|
|
track_popularity = getattr(track, 'popularity', 0)
|
|
track_preview_url = getattr(track, 'preview_url', None)
|
|
track_number = getattr(track, 'track_number', 1)
|
|
disc_number = getattr(track, 'disc_number', 1)
|
|
track_uri = getattr(track, 'uri', '')
|
|
|
|
if isinstance(album, dict):
|
|
album_name = album.get('name', 'Unknown')
|
|
album_id = album.get('id', '')
|
|
album_release_date = album.get('release_date', '')
|
|
album_images = album.get('images', [])
|
|
album_type = album.get('album_type', 'album') # 'album', 'single', or 'ep'
|
|
total_tracks = album.get('total_tracks', 0)
|
|
else:
|
|
album_name = album.name
|
|
album_id = album.id
|
|
album_release_date = album.release_date
|
|
album_images = album.images if hasattr(album, 'images') else []
|
|
album_type = album.album_type if hasattr(album, 'album_type') else 'album'
|
|
total_tracks = album.total_tracks if hasattr(album, 'total_tracks') else 0
|
|
|
|
# Create Spotify track data structure
|
|
spotify_track_data = {
|
|
'id': track_id,
|
|
'name': track_name,
|
|
'artists': track_artists,
|
|
'album': {
|
|
'name': album_name,
|
|
'id': album_id,
|
|
'release_date': album_release_date,
|
|
'images': album_images,
|
|
'album_type': album_type, # Store album type for category filtering
|
|
'total_tracks': total_tracks # Store track count for accurate categorization
|
|
},
|
|
'duration_ms': track_duration,
|
|
'explicit': track_explicit,
|
|
'external_urls': track_external_urls,
|
|
'popularity': track_popularity,
|
|
'preview_url': track_preview_url,
|
|
'track_number': track_number,
|
|
'disc_number': disc_number,
|
|
'uri': track_uri,
|
|
'is_local': False
|
|
}
|
|
|
|
# Add to wishlist with watchlist context
|
|
success = self.database.add_to_wishlist(
|
|
spotify_track_data=spotify_track_data,
|
|
failure_reason="Missing from library (found by watchlist scan)",
|
|
source_type="watchlist",
|
|
source_info={
|
|
'watchlist_artist_name': watchlist_artist.artist_name,
|
|
'watchlist_artist_id': watchlist_artist.spotify_artist_id,
|
|
'album_name': album_name,
|
|
'scan_timestamp': datetime.now().isoformat()
|
|
}
|
|
)
|
|
|
|
if success:
|
|
first_artist = track_artists[0].get('name', 'Unknown') if track_artists else 'Unknown'
|
|
logger.debug(f"Added track to wishlist: {track_name} by {first_artist}")
|
|
else:
|
|
logger.warning(f"Failed to add track to wishlist: {track_name}")
|
|
|
|
return success
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error adding track to wishlist: {track_name}: {e}")
|
|
return False
|
|
|
|
def update_artist_scan_timestamp(self, artist) -> bool:
|
|
"""Update the last scan timestamp for an artist.
|
|
|
|
Args:
|
|
artist: WatchlistArtist object, or a string spotify_artist_id for backward compat
|
|
"""
|
|
try:
|
|
with self.database._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Support both WatchlistArtist objects and raw string IDs
|
|
if hasattr(artist, 'id'):
|
|
# WatchlistArtist object - use database primary key (always reliable)
|
|
cursor.execute("""
|
|
UPDATE watchlist_artists
|
|
SET last_scan_timestamp = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = ?
|
|
""", (artist.id,))
|
|
artist_label = f"{artist.artist_name} (id={artist.id})"
|
|
else:
|
|
# Backward compat: raw string ID (try spotify, then itunes)
|
|
cursor.execute("""
|
|
UPDATE watchlist_artists
|
|
SET last_scan_timestamp = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP
|
|
WHERE spotify_artist_id = ? OR itunes_artist_id = ?
|
|
""", (artist, artist))
|
|
artist_label = f"ID {artist}"
|
|
|
|
conn.commit()
|
|
|
|
if cursor.rowcount > 0:
|
|
logger.debug(f"Updated scan timestamp for artist {artist_label}")
|
|
return True
|
|
else:
|
|
logger.warning(f"No artist found for {artist_label}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating scan timestamp: {e}")
|
|
return False
|
|
|
|
def _fetch_similar_artists_from_musicmap(self, artist_name: str, limit: int = 20) -> List[Dict[str, Any]]:
|
|
"""
|
|
Fetch similar artists from MusicMap and match them to both Spotify and iTunes.
|
|
|
|
Args:
|
|
artist_name: The artist name to find similar artists for
|
|
limit: Maximum number of similar artists to return (default: 20)
|
|
|
|
Returns:
|
|
List of matched artist dictionaries with both Spotify and iTunes IDs when available
|
|
"""
|
|
try:
|
|
logger.info(f"Fetching similar artists from MusicMap for: {artist_name}")
|
|
|
|
# Construct MusicMap URL
|
|
url_artist = artist_name.lower().replace(' ', '+')
|
|
musicmap_url = f'https://www.music-map.com/{url_artist}'
|
|
|
|
# Set headers to mimic a browser
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
}
|
|
|
|
# Fetch MusicMap page
|
|
response = requests.get(musicmap_url, headers=headers, timeout=10)
|
|
response.raise_for_status()
|
|
|
|
# Parse HTML
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
gnod_map = soup.find(id='gnodMap')
|
|
|
|
if not gnod_map:
|
|
logger.warning(f"Could not find artist map on MusicMap for {artist_name}")
|
|
return []
|
|
|
|
# Extract similar artist names
|
|
all_anchors = gnod_map.find_all('a')
|
|
searched_artist_lower = artist_name.lower().strip()
|
|
|
|
similar_artist_names = []
|
|
for anchor in all_anchors:
|
|
artist_text = anchor.get_text(strip=True)
|
|
|
|
# Skip if this is the searched artist
|
|
if artist_text.lower() == searched_artist_lower:
|
|
continue
|
|
|
|
similar_artist_names.append(artist_text)
|
|
|
|
logger.info(f"Found {len(similar_artist_names)} similar artists from MusicMap")
|
|
|
|
# Get iTunes client for matching
|
|
from core.itunes_client import iTunesClient
|
|
itunes_client = iTunesClient()
|
|
|
|
# Get the searched artist's IDs to exclude them
|
|
searched_spotify_id = None
|
|
searched_itunes_id = None
|
|
try:
|
|
# Try Spotify search
|
|
if self.spotify_client and self.spotify_client.is_spotify_authenticated():
|
|
searched_results = self.spotify_client.search_artists(artist_name, limit=1)
|
|
if searched_results and len(searched_results) > 0:
|
|
searched_spotify_id = searched_results[0].id
|
|
except Exception as e:
|
|
logger.debug(f"Could not get searched artist Spotify ID: {e}")
|
|
|
|
try:
|
|
# Try iTunes search
|
|
itunes_results = itunes_client.search_artists(artist_name, limit=1)
|
|
if itunes_results and len(itunes_results) > 0:
|
|
searched_itunes_id = itunes_results[0].id
|
|
except Exception as e:
|
|
logger.debug(f"Could not get searched artist iTunes ID: {e}")
|
|
|
|
# Match each artist to both Spotify and iTunes
|
|
matched_artists = []
|
|
seen_names = set() # Track seen artist names to prevent duplicates
|
|
|
|
for artist_name_to_match in similar_artist_names[:limit]:
|
|
try:
|
|
# Skip if we've already matched this artist name
|
|
name_lower = artist_name_to_match.lower().strip()
|
|
if name_lower in seen_names:
|
|
continue
|
|
|
|
artist_data = {
|
|
'name': artist_name_to_match,
|
|
'spotify_id': None,
|
|
'itunes_id': None,
|
|
'image_url': None,
|
|
'genres': [],
|
|
'popularity': 0
|
|
}
|
|
|
|
# Try to match on Spotify
|
|
if self.spotify_client and self.spotify_client.is_spotify_authenticated():
|
|
try:
|
|
spotify_results = self.spotify_client.search_artists(artist_name_to_match, limit=1)
|
|
if spotify_results and len(spotify_results) > 0:
|
|
spotify_artist = spotify_results[0]
|
|
# Skip if this is the searched artist
|
|
if spotify_artist.id != searched_spotify_id:
|
|
artist_data['spotify_id'] = spotify_artist.id
|
|
artist_data['name'] = spotify_artist.name # Use canonical name
|
|
artist_data['image_url'] = spotify_artist.image_url if hasattr(spotify_artist, 'image_url') else None
|
|
artist_data['genres'] = spotify_artist.genres if hasattr(spotify_artist, 'genres') else []
|
|
artist_data['popularity'] = spotify_artist.popularity if hasattr(spotify_artist, 'popularity') else 0
|
|
except Exception as e:
|
|
logger.debug(f"Spotify match failed for {artist_name_to_match}: {e}")
|
|
|
|
# Try to match on iTunes (with retry for rate limiting)
|
|
try:
|
|
itunes_results = itunes_api_call_with_retry(
|
|
itunes_client.search_artists, artist_name_to_match, limit=1
|
|
)
|
|
if itunes_results and len(itunes_results) > 0:
|
|
itunes_artist = itunes_results[0]
|
|
# Skip if this is the searched artist
|
|
if itunes_artist.id != searched_itunes_id:
|
|
artist_data['itunes_id'] = itunes_artist.id
|
|
# Use iTunes name if we don't have Spotify
|
|
if not artist_data['spotify_id']:
|
|
artist_data['name'] = itunes_artist.name
|
|
# Use iTunes genres if we don't have Spotify genres
|
|
if not artist_data['genres'] and hasattr(itunes_artist, 'genres'):
|
|
artist_data['genres'] = itunes_artist.genres
|
|
else:
|
|
logger.info(f" [iTunes] No match found for: {artist_name_to_match}")
|
|
except Exception as e:
|
|
logger.info(f" [iTunes] Match failed for {artist_name_to_match}: {e}")
|
|
|
|
# Only add if we got at least one ID
|
|
if artist_data['spotify_id'] or artist_data['itunes_id']:
|
|
seen_names.add(name_lower)
|
|
matched_artists.append(artist_data)
|
|
logger.debug(f" Matched: {artist_data['name']} (Spotify: {artist_data['spotify_id']}, iTunes: {artist_data['itunes_id']})")
|
|
|
|
except Exception as match_error:
|
|
logger.debug(f"Error matching {artist_name_to_match}: {match_error}")
|
|
continue
|
|
|
|
# Log detailed matching statistics
|
|
itunes_matched = sum(1 for a in matched_artists if a.get('itunes_id'))
|
|
spotify_matched = sum(1 for a in matched_artists if a.get('spotify_id'))
|
|
both_matched = sum(1 for a in matched_artists if a.get('itunes_id') and a.get('spotify_id'))
|
|
logger.info(f"Matched {len(matched_artists)} similar artists - iTunes: {itunes_matched}, Spotify: {spotify_matched}, Both: {both_matched}")
|
|
return matched_artists
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Error fetching from MusicMap: {e}")
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"Error fetching similar artists from MusicMap: {e}")
|
|
return []
|
|
|
|
def _backfill_similar_artists_itunes_ids(self, source_artist_id: str) -> int:
|
|
"""
|
|
Backfill missing iTunes IDs for cached similar artists.
|
|
This ensures seamless dual-source support without clearing cached data.
|
|
|
|
Args:
|
|
source_artist_id: The source artist ID to backfill similar artists for
|
|
|
|
Returns:
|
|
Number of similar artists updated with iTunes IDs
|
|
"""
|
|
try:
|
|
# Get similar artists that are missing iTunes IDs
|
|
similar_artists = self.database.get_similar_artists_missing_itunes_ids(source_artist_id)
|
|
|
|
if not similar_artists:
|
|
return 0
|
|
|
|
logger.info(f"Backfilling iTunes IDs for {len(similar_artists)} similar artists")
|
|
|
|
# Get iTunes client
|
|
from core.itunes_client import iTunesClient
|
|
itunes_client = iTunesClient()
|
|
|
|
updated_count = 0
|
|
for similar_artist in similar_artists:
|
|
try:
|
|
# Search iTunes by artist name
|
|
itunes_results = itunes_client.search_artists(similar_artist.similar_artist_name, limit=1)
|
|
if itunes_results and len(itunes_results) > 0:
|
|
itunes_id = itunes_results[0].id
|
|
# Update the similar artist with the iTunes ID
|
|
if self.database.update_similar_artist_itunes_id(similar_artist.id, itunes_id):
|
|
updated_count += 1
|
|
logger.debug(f" Backfilled iTunes ID {itunes_id} for {similar_artist.similar_artist_name}")
|
|
except Exception as e:
|
|
logger.debug(f" Could not backfill iTunes ID for {similar_artist.similar_artist_name}: {e}")
|
|
continue
|
|
|
|
if updated_count > 0:
|
|
logger.info(f"Backfilled {updated_count} similar artists with iTunes IDs")
|
|
|
|
return updated_count
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error backfilling similar artists iTunes IDs: {e}")
|
|
return 0
|
|
|
|
def update_similar_artists(self, watchlist_artist: WatchlistArtist, limit: int = 10) -> bool:
|
|
"""
|
|
Fetch and store similar artists for a watchlist artist.
|
|
Called after each artist scan to build discovery pool.
|
|
Uses MusicMap to find similar artists and matches them to both Spotify and iTunes.
|
|
"""
|
|
try:
|
|
logger.info(f"Fetching similar artists for {watchlist_artist.artist_name}")
|
|
|
|
# Get similar artists from MusicMap (returns list of artist dicts with both IDs)
|
|
similar_artists = self._fetch_similar_artists_from_musicmap(watchlist_artist.artist_name, limit=limit)
|
|
|
|
if not similar_artists:
|
|
logger.debug(f"No similar artists found for {watchlist_artist.artist_name}")
|
|
return True # Not an error, just no recommendations
|
|
|
|
logger.info(f"Found {len(similar_artists)} similar artists for {watchlist_artist.artist_name}")
|
|
|
|
# Use consistent source artist ID (prefer Spotify, fall back to iTunes or internal ID)
|
|
source_artist_id = watchlist_artist.spotify_artist_id or watchlist_artist.itunes_artist_id or str(watchlist_artist.id)
|
|
|
|
# Store each similar artist in database
|
|
stored_count = 0
|
|
for rank, similar_artist in enumerate(similar_artists, 1):
|
|
try:
|
|
# similar_artist has 'name', 'spotify_id', and 'itunes_id' keys
|
|
success = self.database.add_or_update_similar_artist(
|
|
source_artist_id=source_artist_id,
|
|
similar_artist_name=similar_artist['name'],
|
|
similar_artist_spotify_id=similar_artist.get('spotify_id'),
|
|
similar_artist_itunes_id=similar_artist.get('itunes_id'),
|
|
similarity_rank=rank
|
|
)
|
|
|
|
if success:
|
|
stored_count += 1
|
|
logger.debug(f" #{rank}: {similar_artist['name']} (Spotify: {similar_artist.get('spotify_id')}, iTunes: {similar_artist.get('itunes_id')})")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error storing similar artist {similar_artist.get('name', 'Unknown')}: {e}")
|
|
continue
|
|
|
|
logger.info(f"Stored {stored_count}/{len(similar_artists)} similar artists for {watchlist_artist.artist_name}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching similar artists for {watchlist_artist.artist_name}: {e}")
|
|
return False
|
|
|
|
def populate_discovery_pool(self, top_artists_limit: int = 50, albums_per_artist: int = 10):
|
|
"""
|
|
Populate discovery pool with tracks from top similar artists.
|
|
Called after watchlist scan completes.
|
|
|
|
Supports both Spotify and iTunes sources - populates for whichever is available.
|
|
- Checks if pool was updated in last 24 hours (prevents over-polling)
|
|
- Includes albums, singles, and EPs for comprehensive coverage
|
|
- Appends to existing pool instead of replacing it
|
|
- Cleans up tracks older than 365 days (maintains 1 year rolling window)
|
|
"""
|
|
try:
|
|
from datetime import datetime, timedelta
|
|
import random
|
|
|
|
# Check if we should run discovery pool population (prevents over-polling)
|
|
skip_pool_population = not self.database.should_populate_discovery_pool(hours_threshold=24)
|
|
|
|
if skip_pool_population:
|
|
logger.info("Discovery pool was populated recently (< 24 hours ago). Skipping pool population.")
|
|
logger.info("But still refreshing recent albums cache and curated playlists...")
|
|
# Still run these even when skipping main pool population
|
|
self.cache_discovery_recent_albums()
|
|
self.curate_discovery_playlists()
|
|
return
|
|
|
|
logger.info("Populating discovery pool from similar artists...")
|
|
|
|
# Determine which sources are available
|
|
spotify_available = self.spotify_client and self.spotify_client.is_spotify_authenticated()
|
|
|
|
# Import iTunes client for fallback
|
|
from core.itunes_client import iTunesClient
|
|
itunes_client = iTunesClient()
|
|
itunes_available = True # iTunes is always available (no auth needed)
|
|
|
|
if not spotify_available and not itunes_available:
|
|
logger.warning("No music sources available to populate discovery pool")
|
|
return
|
|
|
|
logger.info(f"Sources available - Spotify: {spotify_available}, iTunes: {itunes_available}")
|
|
|
|
# Get top similar artists across all watchlist (ordered by occurrence_count)
|
|
similar_artists = self.database.get_top_similar_artists(limit=top_artists_limit)
|
|
|
|
if not similar_artists:
|
|
logger.info("No similar artists found to populate discovery pool from similar artists")
|
|
logger.info("But still caching recent albums from watchlist artists and curating playlists...")
|
|
# Still run these even without similar artists - they use watchlist artists
|
|
self.cache_discovery_recent_albums()
|
|
self.curate_discovery_playlists()
|
|
return
|
|
|
|
logger.info(f"Processing {len(similar_artists)} top similar artists for discovery pool")
|
|
|
|
total_tracks_added = 0
|
|
|
|
for artist_idx, similar_artist in enumerate(similar_artists, 1):
|
|
try:
|
|
logger.info(f"[{artist_idx}/{len(similar_artists)}] Processing {similar_artist.similar_artist_name} (occurrence: {similar_artist.occurrence_count})")
|
|
|
|
# Build list of sources to process for this artist
|
|
# iTunes is ALWAYS processed (baseline), Spotify is added if authenticated
|
|
sources_to_process = []
|
|
|
|
# Always add iTunes first (baseline source)
|
|
itunes_id = similar_artist.similar_artist_itunes_id
|
|
if not itunes_id:
|
|
# On-the-fly lookup for missing iTunes ID (seamless provider switching)
|
|
try:
|
|
itunes_results = itunes_client.search_artists(similar_artist.similar_artist_name, limit=1)
|
|
if itunes_results and len(itunes_results) > 0:
|
|
itunes_id = itunes_results[0].id
|
|
# Cache it for future use
|
|
self.database.update_similar_artist_itunes_id(similar_artist.id, itunes_id)
|
|
logger.debug(f" Resolved iTunes ID {itunes_id} for {similar_artist.similar_artist_name}")
|
|
except Exception as e:
|
|
logger.debug(f" Could not resolve iTunes ID for {similar_artist.similar_artist_name}: {e}")
|
|
|
|
if itunes_id:
|
|
sources_to_process.append(('itunes', itunes_id))
|
|
|
|
# Add Spotify if authenticated and we have an ID
|
|
if spotify_available and similar_artist.similar_artist_spotify_id:
|
|
sources_to_process.append(('spotify', similar_artist.similar_artist_spotify_id))
|
|
|
|
if not sources_to_process:
|
|
logger.debug(f"No valid IDs for {similar_artist.similar_artist_name}, skipping")
|
|
continue
|
|
|
|
logger.debug(f" Processing {len(sources_to_process)} source(s): {[s[0] for s in sources_to_process]}")
|
|
|
|
# Process each source for this artist
|
|
for source, artist_id in sources_to_process:
|
|
try:
|
|
# Get artist's albums from this source
|
|
if source == 'spotify':
|
|
all_albums = self.spotify_client.get_artist_albums(
|
|
artist_id,
|
|
album_type='album,single,ep',
|
|
limit=50
|
|
)
|
|
else: # itunes
|
|
all_albums = itunes_client.get_artist_albums(
|
|
artist_id,
|
|
album_type='album,single',
|
|
limit=50
|
|
)
|
|
|
|
if not all_albums:
|
|
logger.debug(f"No albums found for {similar_artist.similar_artist_name} on {source}")
|
|
continue
|
|
|
|
# Fetch artist genres for this source
|
|
artist_genres = []
|
|
try:
|
|
if source == 'spotify':
|
|
artist_data = self.spotify_client.get_artist(artist_id)
|
|
if artist_data and 'genres' in artist_data:
|
|
artist_genres = artist_data['genres']
|
|
else: # iTunes - genres from artist lookup
|
|
artist_data = itunes_client.get_artist(artist_id)
|
|
if artist_data and 'genres' in artist_data:
|
|
artist_genres = artist_data['genres']
|
|
except Exception as e:
|
|
logger.debug(f"Could not fetch genres for {similar_artist.similar_artist_name} on {source}: {e}")
|
|
|
|
# IMPROVED: Smart selection mixing albums, singles, and EPs
|
|
# Prioritize recent releases and popular content
|
|
|
|
# Separate by type for balanced selection
|
|
albums = [a for a in all_albums if hasattr(a, 'album_type') and a.album_type == 'album']
|
|
singles_eps = [a for a in all_albums if hasattr(a, 'album_type') and a.album_type in ['single', 'ep']]
|
|
other = [a for a in all_albums if not hasattr(a, 'album_type')]
|
|
|
|
# Select albums: latest releases + popular older content
|
|
selected_albums = []
|
|
|
|
# Always include 3 most recent releases (any type) - this captures new singles/EPs
|
|
latest_releases = all_albums[:3]
|
|
selected_albums.extend(latest_releases)
|
|
|
|
# Add remaining slots with balanced mix
|
|
remaining_slots = albums_per_artist - len(selected_albums)
|
|
if remaining_slots > 0:
|
|
# Combine remaining albums and singles
|
|
remaining_content = all_albums[3:]
|
|
|
|
if len(remaining_content) > remaining_slots:
|
|
# Randomly select from remaining content
|
|
random_selection = random.sample(remaining_content, remaining_slots)
|
|
selected_albums.extend(random_selection)
|
|
else:
|
|
selected_albums.extend(remaining_content)
|
|
|
|
logger.info(f" [{source}] Selected {len(selected_albums)} releases from {len(all_albums)} available (albums: {len(albums)}, singles/EPs: {len(singles_eps)})")
|
|
|
|
# Process each selected album
|
|
for album_idx, album in enumerate(selected_albums, 1):
|
|
try:
|
|
# Get full album data with tracks from appropriate source
|
|
if source == 'spotify':
|
|
album_data = self.spotify_client.get_album(album.id)
|
|
if not album_data or 'tracks' not in album_data:
|
|
continue
|
|
tracks = album_data['tracks'].get('items', [])
|
|
else: # itunes
|
|
album_data = itunes_client.get_album(album.id)
|
|
if not album_data:
|
|
continue
|
|
# iTunes get_album doesn't include tracks inline, need separate call
|
|
tracks_data = itunes_client.get_album_tracks(album.id)
|
|
tracks = tracks_data.get('items', []) if tracks_data else []
|
|
|
|
logger.debug(f" Album {album_idx}: {album_data.get('name', 'Unknown')} ({len(tracks)} tracks)")
|
|
|
|
# Determine if this is a new release (within last 30 days)
|
|
is_new = False
|
|
try:
|
|
release_date_str = album_data.get('release_date', '')
|
|
if release_date_str:
|
|
# Handle full date or year-only
|
|
if len(release_date_str) >= 10:
|
|
release_date = datetime.strptime(release_date_str[:10], "%Y-%m-%d")
|
|
days_old = (datetime.now() - release_date).days
|
|
is_new = days_old <= 30
|
|
except:
|
|
pass
|
|
|
|
# Add each track to discovery pool
|
|
for track in tracks:
|
|
try:
|
|
# Enhance track object with full album data (including album_type)
|
|
enhanced_track = {
|
|
**track,
|
|
'album': {
|
|
'id': album_data['id'],
|
|
'name': album_data.get('name', 'Unknown Album'),
|
|
'images': album_data.get('images', []),
|
|
'release_date': album_data.get('release_date', ''),
|
|
'album_type': album_data.get('album_type', 'album'),
|
|
'total_tracks': album_data.get('total_tracks', 0)
|
|
},
|
|
'_source': source
|
|
}
|
|
|
|
# Build track data for discovery pool with source-specific IDs
|
|
track_data = {
|
|
'track_name': track.get('name', 'Unknown Track'),
|
|
'artist_name': similar_artist.similar_artist_name,
|
|
'album_name': album_data.get('name', 'Unknown Album'),
|
|
'album_cover_url': album_data.get('images', [{}])[0].get('url') if album_data.get('images') else None,
|
|
'duration_ms': track.get('duration_ms', 0),
|
|
'popularity': album_data.get('popularity', 0),
|
|
'release_date': album_data.get('release_date', ''),
|
|
'is_new_release': is_new,
|
|
'track_data_json': enhanced_track,
|
|
'artist_genres': artist_genres
|
|
}
|
|
|
|
# Add source-specific IDs
|
|
if source == 'spotify':
|
|
track_data['spotify_track_id'] = track.get('id')
|
|
track_data['spotify_album_id'] = album_data.get('id')
|
|
track_data['spotify_artist_id'] = similar_artist.similar_artist_spotify_id
|
|
else: # itunes
|
|
track_data['itunes_track_id'] = track.get('id')
|
|
track_data['itunes_album_id'] = album_data.get('id')
|
|
track_data['itunes_artist_id'] = similar_artist.similar_artist_itunes_id
|
|
|
|
# Add to discovery pool with source
|
|
if self.database.add_to_discovery_pool(track_data, source=source):
|
|
total_tracks_added += 1
|
|
|
|
except Exception as track_error:
|
|
logger.debug(f"Error adding track to discovery pool: {track_error}")
|
|
continue
|
|
|
|
# Small delay between albums
|
|
time.sleep(DELAY_BETWEEN_ALBUMS)
|
|
|
|
except Exception as album_error:
|
|
logger.warning(f"Error processing album on {source}: {album_error}")
|
|
continue
|
|
|
|
except Exception as source_error:
|
|
logger.warning(f"Error processing {source} source for {similar_artist.similar_artist_name}: {source_error}")
|
|
continue
|
|
|
|
# Delay between artists (after processing all sources for this artist)
|
|
if artist_idx < len(similar_artists):
|
|
time.sleep(DELAY_BETWEEN_ARTISTS)
|
|
|
|
except Exception as artist_error:
|
|
logger.warning(f"Error processing artist {similar_artist.similar_artist_name}: {artist_error}")
|
|
continue
|
|
|
|
logger.info(f"Discovery pool from similar artists complete: {total_tracks_added} tracks added")
|
|
|
|
# Note: Watchlist artist albums are already in discovery pool from the watchlist scan itself
|
|
# No need to re-fetch them here to avoid duplicate API calls
|
|
|
|
# Add tracks from random database albums for extra variety (reduced to 5 to save API calls)
|
|
logger.info("Adding tracks from database albums to discovery pool...")
|
|
try:
|
|
with self.database._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("""
|
|
SELECT DISTINCT a.title, ar.name as artist_name
|
|
FROM albums a
|
|
JOIN artists ar ON a.artist_id = ar.id
|
|
ORDER BY RANDOM()
|
|
LIMIT 5
|
|
""")
|
|
db_albums = cursor.fetchall()
|
|
|
|
logger.info(f"Processing {len(db_albums)} database albums for discovery pool")
|
|
|
|
for db_idx, album_row in enumerate(db_albums, 1):
|
|
try:
|
|
query = f"{album_row['title']} {album_row['artist_name']}"
|
|
album_data = None
|
|
tracks = []
|
|
db_source = None
|
|
artist_id_for_genres = None
|
|
|
|
# Try Spotify first if available
|
|
if spotify_available:
|
|
try:
|
|
search_results = self.spotify_client.search_albums(f"album:{album_row['title']} artist:{album_row['artist_name']}", limit=1)
|
|
if search_results and len(search_results) > 0:
|
|
spotify_album = search_results[0]
|
|
album_data = self.spotify_client.get_album(spotify_album.id)
|
|
if album_data and 'tracks' in album_data:
|
|
tracks = album_data['tracks'].get('items', [])
|
|
db_source = 'spotify'
|
|
if album_data.get('artists'):
|
|
artist_id_for_genres = album_data['artists'][0]['id']
|
|
except Exception as e:
|
|
logger.debug(f"Spotify search failed for {album_row['title']}: {e}")
|
|
|
|
# Fall back to iTunes if Spotify didn't work
|
|
if not tracks and itunes_available:
|
|
try:
|
|
search_results = itunes_client.search_albums(query, limit=1)
|
|
if search_results and len(search_results) > 0:
|
|
itunes_album = search_results[0]
|
|
album_data = itunes_client.get_album(itunes_album.id)
|
|
if album_data:
|
|
tracks_data = itunes_client.get_album_tracks(itunes_album.id)
|
|
tracks = tracks_data.get('items', []) if tracks_data else []
|
|
db_source = 'itunes'
|
|
# For iTunes, artist ID is in the album data
|
|
if album_data.get('artists'):
|
|
artist_id_for_genres = album_data['artists'][0].get('id')
|
|
except Exception as e:
|
|
logger.debug(f"iTunes search failed for {album_row['title']}: {e}")
|
|
|
|
if not tracks or not album_data:
|
|
continue
|
|
|
|
# Fetch artist genres
|
|
artist_genres = []
|
|
try:
|
|
if artist_id_for_genres:
|
|
if db_source == 'spotify':
|
|
artist_data = self.spotify_client.get_artist(artist_id_for_genres)
|
|
else:
|
|
artist_data = itunes_client.get_artist(artist_id_for_genres)
|
|
if artist_data and 'genres' in artist_data:
|
|
artist_genres = artist_data['genres']
|
|
except Exception as e:
|
|
logger.debug(f"Could not fetch genres for album artist: {e}")
|
|
|
|
# Check if new release
|
|
is_new = False
|
|
try:
|
|
release_date_str = album_data.get('release_date', '')
|
|
if release_date_str and len(release_date_str) >= 10:
|
|
release_date = datetime.strptime(release_date_str[:10], "%Y-%m-%d")
|
|
days_old = (datetime.now() - release_date).days
|
|
is_new = days_old <= 30
|
|
except:
|
|
pass
|
|
|
|
for track in tracks:
|
|
try:
|
|
enhanced_track = {
|
|
**track,
|
|
'album': {
|
|
'id': album_data['id'],
|
|
'name': album_row['title'],
|
|
'images': album_data.get('images', []),
|
|
'release_date': album_data.get('release_date', ''),
|
|
'album_type': album_data.get('album_type', 'album'),
|
|
'total_tracks': album_data.get('total_tracks', 0)
|
|
},
|
|
'_source': db_source
|
|
}
|
|
|
|
track_data = {
|
|
'track_name': track.get('name', 'Unknown Track'),
|
|
'artist_name': album_row['artist_name'],
|
|
'album_name': album_row['title'],
|
|
'album_cover_url': album_data.get('images', [{}])[0].get('url') if album_data.get('images') else None,
|
|
'duration_ms': track.get('duration_ms', 0),
|
|
'popularity': album_data.get('popularity', 0),
|
|
'release_date': album_data.get('release_date', ''),
|
|
'is_new_release': is_new,
|
|
'track_data_json': enhanced_track,
|
|
'artist_genres': artist_genres
|
|
}
|
|
|
|
# Add source-specific IDs
|
|
if db_source == 'spotify':
|
|
track_data['spotify_track_id'] = track.get('id')
|
|
track_data['spotify_album_id'] = album_data.get('id')
|
|
track_data['spotify_artist_id'] = artist_id_for_genres or ''
|
|
else: # itunes
|
|
track_data['itunes_track_id'] = track.get('id')
|
|
track_data['itunes_album_id'] = album_data.get('id')
|
|
track_data['itunes_artist_id'] = artist_id_for_genres or ''
|
|
|
|
if self.database.add_to_discovery_pool(track_data, source=db_source):
|
|
total_tracks_added += 1
|
|
except Exception as track_error:
|
|
continue
|
|
|
|
time.sleep(DELAY_BETWEEN_ALBUMS)
|
|
except Exception as album_error:
|
|
logger.debug(f"Error processing database album {album_row['title']}: {album_error}")
|
|
continue
|
|
|
|
# Rate limit between albums
|
|
if db_idx < len(db_albums):
|
|
time.sleep(DELAY_BETWEEN_ARTISTS)
|
|
|
|
except Exception as db_error:
|
|
logger.warning(f"Error processing database albums: {db_error}")
|
|
|
|
logger.info(f"Discovery pool population complete: {total_tracks_added} total tracks added from all sources")
|
|
|
|
# Clean up tracks older than 365 days (maintain 1 year rolling window)
|
|
logger.info("Cleaning up discovery tracks older than 365 days...")
|
|
deleted_count = self.database.cleanup_old_discovery_tracks(days_threshold=365)
|
|
logger.info(f"Cleaned up {deleted_count} old tracks from discovery pool")
|
|
|
|
# Get final track count for metadata
|
|
with self.database._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT COUNT(*) as count FROM discovery_pool")
|
|
final_count = cursor.fetchone()['count']
|
|
|
|
# Update timestamp to mark when pool was last populated
|
|
self.database.update_discovery_pool_timestamp(track_count=final_count)
|
|
logger.info(f"Discovery pool now contains {final_count} total tracks (built over time)")
|
|
|
|
# Cache recent albums for discovery page
|
|
logger.info("Caching recent albums for discovery page...")
|
|
self.cache_discovery_recent_albums()
|
|
|
|
# Curate playlists for consistent daily experience
|
|
logger.info("Curating discovery playlists...")
|
|
self.curate_discovery_playlists()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error populating discovery pool: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def update_discovery_pool_incremental(self):
|
|
"""
|
|
Lightweight incremental update for discovery pool - runs every 6 hours.
|
|
|
|
IMPROVED: Quick check for new releases from watchlist artists only
|
|
- Much faster than full populate_discovery_pool (only checks watchlist, not similar artists)
|
|
- Only fetches latest 5 releases per artist
|
|
- Only adds tracks from releases in last 7 days
|
|
- Respects 6-hour cooldown to avoid over-polling
|
|
"""
|
|
try:
|
|
from datetime import datetime, timedelta
|
|
|
|
# Check if we should run (prevents over-polling Spotify)
|
|
if not self.database.should_populate_discovery_pool(hours_threshold=6):
|
|
logger.info("Discovery pool was updated recently (< 6 hours ago). Skipping incremental update.")
|
|
return
|
|
|
|
logger.info("Starting incremental discovery pool update (watchlist artists only)...")
|
|
|
|
watchlist_artists = self.database.get_watchlist_artists()
|
|
if not watchlist_artists:
|
|
logger.info("No watchlist artists to check for incremental update")
|
|
return
|
|
|
|
cutoff_date = datetime.now() - timedelta(days=7) # Only last week's releases
|
|
total_tracks_added = 0
|
|
|
|
for artist_idx, artist in enumerate(watchlist_artists, 1):
|
|
try:
|
|
logger.info(f"[{artist_idx}/{len(watchlist_artists)}] Checking {artist.artist_name} for new releases...")
|
|
|
|
# Only fetch latest 5 releases (much faster than full scan)
|
|
recent_releases = self.spotify_client.get_artist_albums(
|
|
artist.spotify_artist_id,
|
|
album_type='album,single,ep',
|
|
limit=5
|
|
)
|
|
|
|
if not recent_releases:
|
|
continue
|
|
|
|
# Fetch artist genres once for all tracks of this artist
|
|
artist_genres = []
|
|
try:
|
|
artist_data = self.spotify_client.get_artist(artist.spotify_artist_id)
|
|
if artist_data and 'genres' in artist_data:
|
|
artist_genres = artist_data['genres']
|
|
except Exception as e:
|
|
logger.debug(f"Could not fetch genres for {artist.artist_name}: {e}")
|
|
|
|
for release in recent_releases:
|
|
try:
|
|
# Check if release is within cutoff
|
|
if not self.is_album_after_timestamp(release, cutoff_date):
|
|
continue # Skip older releases
|
|
|
|
# Get full album data with tracks
|
|
album_data = self.spotify_client.get_album(release.id)
|
|
if not album_data or 'tracks' not in album_data:
|
|
continue
|
|
|
|
tracks = album_data['tracks'].get('items', [])
|
|
logger.debug(f" New release: {release.name} ({len(tracks)} tracks)")
|
|
|
|
# Determine if this is a new release (within last 30 days)
|
|
is_new = False
|
|
try:
|
|
release_date_str = album_data.get('release_date', '')
|
|
if release_date_str and len(release_date_str) == 10:
|
|
release_date = datetime.strptime(release_date_str, "%Y-%m-%d")
|
|
days_old = (datetime.now() - release_date).days
|
|
is_new = days_old <= 30
|
|
except:
|
|
pass
|
|
|
|
# Add each track to discovery pool
|
|
for track in tracks:
|
|
try:
|
|
# Enhance track object with full album data (including album_type)
|
|
enhanced_track = {
|
|
**track,
|
|
'album': {
|
|
'id': album_data['id'],
|
|
'name': album_data.get('name', 'Unknown Album'),
|
|
'images': album_data.get('images', []),
|
|
'release_date': album_data.get('release_date', ''),
|
|
'album_type': album_data.get('album_type', 'album'),
|
|
'total_tracks': album_data.get('total_tracks', 0)
|
|
}
|
|
}
|
|
|
|
track_data = {
|
|
'spotify_track_id': track['id'],
|
|
'spotify_album_id': album_data['id'],
|
|
'spotify_artist_id': artist.spotify_artist_id,
|
|
'track_name': track['name'],
|
|
'artist_name': artist.artist_name,
|
|
'album_name': album_data.get('name', 'Unknown Album'),
|
|
'album_cover_url': album_data.get('images', [{}])[0].get('url') if album_data.get('images') else None,
|
|
'duration_ms': track.get('duration_ms', 0),
|
|
'popularity': album_data.get('popularity', 0),
|
|
'release_date': album_data.get('release_date', ''),
|
|
'is_new_release': is_new,
|
|
'track_data_json': enhanced_track, # Store enhanced track with full album data
|
|
'artist_genres': artist_genres
|
|
}
|
|
|
|
if self.database.add_to_discovery_pool(track_data):
|
|
total_tracks_added += 1
|
|
|
|
except Exception as track_error:
|
|
logger.debug(f"Error adding track to discovery pool: {track_error}")
|
|
continue
|
|
|
|
except Exception as release_error:
|
|
logger.warning(f"Error processing release: {release_error}")
|
|
continue
|
|
|
|
# Small delay between artists
|
|
if artist_idx < len(watchlist_artists):
|
|
time.sleep(DELAY_BETWEEN_ARTISTS)
|
|
|
|
except Exception as artist_error:
|
|
logger.warning(f"Error checking {artist.artist_name}: {artist_error}")
|
|
continue
|
|
|
|
logger.info(f"Incremental update complete: {total_tracks_added} new tracks added from watchlist artists")
|
|
|
|
# Update timestamp
|
|
if total_tracks_added > 0:
|
|
# Get current track count
|
|
with self.database._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT COUNT(*) as count FROM discovery_pool")
|
|
current_count = cursor.fetchone()['count']
|
|
|
|
self.database.update_discovery_pool_timestamp(track_count=current_count)
|
|
logger.info(f"Discovery pool now contains {current_count} total tracks")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during incremental discovery pool update: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def cache_discovery_recent_albums(self):
|
|
"""
|
|
Cache recent albums from watchlist and similar artists for discover page.
|
|
|
|
Supports both Spotify and iTunes sources - iTunes is always processed (baseline),
|
|
Spotify is added when authenticated. Same pattern as discovery pool.
|
|
"""
|
|
try:
|
|
from datetime import datetime, timedelta
|
|
|
|
logger.info("Caching recent albums for discover page...")
|
|
|
|
# Clear existing cache
|
|
self.database.clear_discovery_recent_albums()
|
|
|
|
# 30-day window for recent releases
|
|
cutoff_date = datetime.now() - timedelta(days=30)
|
|
cached_count = {'spotify': 0, 'itunes': 0}
|
|
albums_checked = 0
|
|
|
|
# Determine available sources
|
|
spotify_available = self.spotify_client and self.spotify_client.is_spotify_authenticated()
|
|
|
|
# Get iTunes client
|
|
from core.itunes_client import iTunesClient
|
|
itunes_client = iTunesClient()
|
|
|
|
# Get artists to check
|
|
watchlist_artists = self.database.get_watchlist_artists()
|
|
similar_artists = self.database.get_top_similar_artists(limit=50)
|
|
|
|
logger.info(f"Checking albums from {len(watchlist_artists)} watchlist + {len(similar_artists)} similar artists")
|
|
logger.info(f"Sources: Spotify={spotify_available}, iTunes=True")
|
|
|
|
def process_album(album, artist_name, artist_spotify_id, artist_itunes_id, source):
|
|
"""Helper to process and cache a single album"""
|
|
nonlocal albums_checked
|
|
try:
|
|
albums_checked += 1
|
|
release_str = album.release_date if hasattr(album, 'release_date') else None
|
|
|
|
if not release_str:
|
|
return False
|
|
|
|
# Handle iTunes ISO format (2017-12-08T08:00:00Z)
|
|
if 'T' in release_str:
|
|
release_str = release_str.split('T')[0]
|
|
|
|
if len(release_str) >= 10:
|
|
release_date = datetime.strptime(release_str[:10], "%Y-%m-%d")
|
|
if release_date >= cutoff_date:
|
|
album_data = {
|
|
'album_spotify_id': album.id if source == 'spotify' else None,
|
|
'album_itunes_id': album.id if source == 'itunes' else None,
|
|
'album_name': album.name,
|
|
'artist_name': artist_name,
|
|
'artist_spotify_id': artist_spotify_id,
|
|
'artist_itunes_id': artist_itunes_id,
|
|
'album_cover_url': album.image_url if hasattr(album, 'image_url') else None,
|
|
'release_date': release_str[:10],
|
|
'album_type': album.album_type if hasattr(album, 'album_type') else 'album'
|
|
}
|
|
if self.database.cache_discovery_recent_album(album_data, source=source):
|
|
cached_count[source] += 1
|
|
logger.debug(f"Cached [{source}] recent album: {album.name} by {artist_name} ({release_str})")
|
|
return True
|
|
except Exception as e:
|
|
logger.debug(f"Error processing album: {e}")
|
|
return False
|
|
|
|
# Track resolution stats
|
|
itunes_resolved = 0
|
|
itunes_failed_resolve = 0
|
|
|
|
# Process watchlist artists
|
|
for artist in watchlist_artists:
|
|
# Always process iTunes (baseline)
|
|
itunes_id = artist.itunes_artist_id
|
|
if not itunes_id:
|
|
# Try to resolve iTunes ID on-the-fly (with retry for rate limiting)
|
|
try:
|
|
results = itunes_api_call_with_retry(
|
|
itunes_client.search_artists, artist.artist_name, limit=1
|
|
)
|
|
if results and len(results) > 0:
|
|
itunes_id = results[0].id
|
|
itunes_resolved += 1
|
|
logger.debug(f"[iTunes] Resolved ID for {artist.artist_name}: {itunes_id}")
|
|
else:
|
|
itunes_failed_resolve += 1
|
|
logger.info(f"[iTunes] No artist found for: {artist.artist_name}")
|
|
except Exception as e:
|
|
itunes_failed_resolve += 1
|
|
logger.info(f"[iTunes] Failed to resolve {artist.artist_name}: {e}")
|
|
|
|
if itunes_id:
|
|
try:
|
|
albums = itunes_api_call_with_retry(
|
|
itunes_client.get_artist_albums, itunes_id, album_type='album,single', limit=20
|
|
)
|
|
for album in albums or []:
|
|
process_album(album, artist.artist_name, artist.spotify_artist_id, itunes_id, 'itunes')
|
|
except Exception as e:
|
|
logger.info(f"[iTunes] Error fetching albums for {artist.artist_name}: {e}")
|
|
|
|
# Process Spotify if authenticated
|
|
if spotify_available and artist.spotify_artist_id:
|
|
try:
|
|
albums = self.spotify_client.get_artist_albums(
|
|
artist.spotify_artist_id,
|
|
album_type='album,single,ep',
|
|
limit=20
|
|
)
|
|
for album in albums or []:
|
|
process_album(album, artist.artist_name, artist.spotify_artist_id, itunes_id, 'spotify')
|
|
except Exception as e:
|
|
logger.debug(f"Error fetching Spotify albums for {artist.artist_name}: {e}")
|
|
|
|
time.sleep(DELAY_BETWEEN_ARTISTS)
|
|
|
|
# Process similar artists
|
|
for artist in similar_artists:
|
|
# Always process iTunes (baseline)
|
|
itunes_id = artist.similar_artist_itunes_id
|
|
if not itunes_id:
|
|
# Try to resolve iTunes ID on-the-fly (with retry for rate limiting)
|
|
try:
|
|
results = itunes_api_call_with_retry(
|
|
itunes_client.search_artists, artist.similar_artist_name, limit=1
|
|
)
|
|
if results and len(results) > 0:
|
|
itunes_id = results[0].id
|
|
# Cache for future
|
|
self.database.update_similar_artist_itunes_id(artist.id, itunes_id)
|
|
itunes_resolved += 1
|
|
logger.debug(f"[iTunes] Resolved ID for similar artist {artist.similar_artist_name}: {itunes_id}")
|
|
else:
|
|
itunes_failed_resolve += 1
|
|
logger.info(f"[iTunes] No artist found for similar: {artist.similar_artist_name}")
|
|
except Exception as e:
|
|
itunes_failed_resolve += 1
|
|
logger.info(f"[iTunes] Failed to resolve similar {artist.similar_artist_name}: {e}")
|
|
|
|
if itunes_id:
|
|
try:
|
|
albums = itunes_api_call_with_retry(
|
|
itunes_client.get_artist_albums, itunes_id, album_type='album,single', limit=20
|
|
)
|
|
for album in albums or []:
|
|
process_album(album, artist.similar_artist_name, artist.similar_artist_spotify_id, itunes_id, 'itunes')
|
|
except Exception as e:
|
|
logger.info(f"[iTunes] Error fetching albums for similar {artist.similar_artist_name}: {e}")
|
|
|
|
# Process Spotify if authenticated
|
|
if spotify_available and artist.similar_artist_spotify_id:
|
|
try:
|
|
albums = self.spotify_client.get_artist_albums(
|
|
artist.similar_artist_spotify_id,
|
|
album_type='album,single,ep',
|
|
limit=20
|
|
)
|
|
for album in albums or []:
|
|
process_album(album, artist.similar_artist_name, artist.similar_artist_spotify_id, itunes_id, 'spotify')
|
|
except Exception as e:
|
|
logger.debug(f"Error fetching Spotify albums for {artist.similar_artist_name}: {e}")
|
|
|
|
time.sleep(DELAY_BETWEEN_ARTISTS)
|
|
|
|
total_cached = cached_count['spotify'] + cached_count['itunes']
|
|
logger.info(f"Cached {total_cached} recent albums (Spotify: {cached_count['spotify']}, iTunes: {cached_count['itunes']}) from {albums_checked} albums checked")
|
|
logger.info(f"[iTunes] ID resolution stats: {itunes_resolved} resolved, {itunes_failed_resolve} failed")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error caching discovery recent albums: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def curate_discovery_playlists(self):
|
|
"""
|
|
Curate consistent playlist selections that stay the same until next discovery pool update.
|
|
|
|
Supports both Spotify and iTunes sources - creates separate curated playlists for each.
|
|
- Release Radar: Prioritizes freshness + popularity from recent releases
|
|
- Discovery Weekly: Balanced mix of popular picks, deep cuts, and mid-tier tracks
|
|
"""
|
|
try:
|
|
import random
|
|
from datetime import datetime
|
|
from core.itunes_client import iTunesClient
|
|
|
|
logger.info("Curating discovery playlists...")
|
|
|
|
# Determine available sources
|
|
spotify_available = self.spotify_client and self.spotify_client.is_spotify_authenticated()
|
|
itunes_client = iTunesClient()
|
|
|
|
# Process each available source
|
|
sources_to_process = ['itunes'] # iTunes always available
|
|
if spotify_available:
|
|
sources_to_process.append('spotify')
|
|
|
|
logger.info(f"Curating playlists for sources: {sources_to_process}")
|
|
|
|
for source in sources_to_process:
|
|
logger.info(f"Curating Release Radar for {source}...")
|
|
|
|
# 1. Curate Release Radar - 50 tracks from recent albums
|
|
recent_albums = self.database.get_discovery_recent_albums(limit=50, source=source)
|
|
release_radar_tracks = []
|
|
|
|
if not recent_albums:
|
|
logger.warning(f"[{source.upper()}] No recent albums found for Release Radar - check cache_discovery_recent_albums()")
|
|
|
|
if recent_albums:
|
|
# Group albums by artist for variety
|
|
albums_by_artist = {}
|
|
for album in recent_albums:
|
|
artist = album['artist_name']
|
|
if artist not in albums_by_artist:
|
|
albums_by_artist[artist] = []
|
|
albums_by_artist[artist].append(album)
|
|
|
|
# Get tracks from each album
|
|
artist_track_data = {}
|
|
|
|
for artist, albums in albums_by_artist.items():
|
|
artist_track_data[artist] = []
|
|
|
|
for album in albums:
|
|
try:
|
|
# Get album data from appropriate source
|
|
album_id = album.get('album_spotify_id') if source == 'spotify' else album.get('album_itunes_id')
|
|
if not album_id:
|
|
continue
|
|
|
|
if source == 'spotify':
|
|
album_data = self.spotify_client.get_album(album_id)
|
|
else:
|
|
album_data = itunes_api_call_with_retry(
|
|
itunes_client.get_album, album_id
|
|
)
|
|
|
|
if not album_data or 'tracks' not in album_data:
|
|
continue
|
|
|
|
# Calculate days since release for recency score
|
|
days_old = 14
|
|
try:
|
|
release_date_str = album.get('release_date', '')
|
|
if release_date_str and len(release_date_str) >= 10:
|
|
release_date = datetime.strptime(release_date_str[:10], "%Y-%m-%d")
|
|
days_old = (datetime.now() - release_date).days
|
|
except:
|
|
pass
|
|
|
|
for track in album_data['tracks'].get('items', []):
|
|
track_id = track.get('id')
|
|
if not track_id:
|
|
continue
|
|
|
|
# Calculate track score
|
|
recency_score = max(0, 100 - (days_old * 7))
|
|
popularity_score = track.get('popularity', album_data.get('popularity', 50))
|
|
is_single = album.get('album_type', 'album') == 'single'
|
|
single_bonus = 20 if is_single else 0
|
|
total_score = (recency_score * 0.5) + (popularity_score * 0.3) + single_bonus
|
|
|
|
full_track = {
|
|
'id': track_id,
|
|
'name': track.get('name', 'Unknown'),
|
|
'artists': track.get('artists', [{'name': artist}]),
|
|
'album': {
|
|
'id': album_data.get('id', ''),
|
|
'name': album_data.get('name', 'Unknown Album'),
|
|
'images': album_data.get('images', []),
|
|
'release_date': album_data.get('release_date', ''),
|
|
'album_type': album_data.get('album_type', 'album'),
|
|
},
|
|
'duration_ms': track.get('duration_ms', 0),
|
|
'popularity': popularity_score,
|
|
'score': total_score,
|
|
'source': source
|
|
}
|
|
artist_track_data[artist].append(full_track)
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Error processing album for {artist}: {e}")
|
|
continue
|
|
|
|
# Balance by artist - max 6 tracks per artist
|
|
balanced_track_data = []
|
|
for artist, tracks in artist_track_data.items():
|
|
sorted_tracks = sorted(tracks, key=lambda t: t['score'], reverse=True)
|
|
balanced_track_data.extend(sorted_tracks[:6])
|
|
|
|
# Sort by score and shuffle
|
|
balanced_track_data.sort(key=lambda t: t['score'], reverse=True)
|
|
top_tracks = balanced_track_data[:75]
|
|
random.shuffle(top_tracks)
|
|
|
|
# Take final 50 tracks
|
|
release_radar_tracks = [track['id'] for track in top_tracks[:50]]
|
|
|
|
# Add tracks to discovery pool
|
|
for track_data in top_tracks[:50]:
|
|
try:
|
|
artist_name = track_data['artists'][0].get('name', 'Unknown') if track_data['artists'] else 'Unknown'
|
|
formatted_track = {
|
|
'track_name': track_data['name'],
|
|
'artist_name': artist_name,
|
|
'album_name': track_data['album'].get('name', 'Unknown'),
|
|
'album_cover_url': track_data['album']['images'][0]['url'] if track_data['album'].get('images') else None,
|
|
'duration_ms': track_data.get('duration_ms', 0),
|
|
'popularity': track_data.get('popularity', 0),
|
|
'release_date': track_data['album'].get('release_date', ''),
|
|
'is_new_release': True,
|
|
'track_data_json': track_data,
|
|
'artist_genres': []
|
|
}
|
|
if source == 'spotify':
|
|
formatted_track['spotify_track_id'] = track_data['id']
|
|
formatted_track['spotify_album_id'] = track_data['album'].get('id', '')
|
|
else:
|
|
formatted_track['itunes_track_id'] = track_data['id']
|
|
formatted_track['itunes_album_id'] = track_data['album'].get('id', '')
|
|
|
|
self.database.add_to_discovery_pool(formatted_track, source=source)
|
|
except Exception as e:
|
|
continue
|
|
|
|
# Save with source suffix for multi-source support
|
|
playlist_key = f'release_radar_{source}'
|
|
self.database.save_curated_playlist(playlist_key, release_radar_tracks)
|
|
logger.info(f"Release Radar ({source}) curated: {len(release_radar_tracks)} tracks")
|
|
|
|
# 2. Curate Discovery Weekly - 50 tracks from discovery pool
|
|
logger.info(f"Curating Discovery Weekly for {source}...")
|
|
discovery_tracks = self.database.get_discovery_pool_tracks(limit=2000, new_releases_only=False, source=source)
|
|
|
|
if not discovery_tracks:
|
|
logger.warning(f"[{source.upper()}] No discovery pool tracks found for Discovery Weekly - check populate_discovery_pool()")
|
|
|
|
discovery_weekly_tracks = []
|
|
if discovery_tracks:
|
|
# Separate tracks by popularity tiers
|
|
popular_picks = []
|
|
balanced_mix = []
|
|
deep_cuts = []
|
|
|
|
for track in discovery_tracks:
|
|
popularity = track.popularity if hasattr(track, 'popularity') else 50
|
|
if popularity >= 60:
|
|
popular_picks.append(track)
|
|
elif popularity >= 40:
|
|
balanced_mix.append(track)
|
|
else:
|
|
deep_cuts.append(track)
|
|
|
|
logger.info(f"Discovery pool ({source}): {len(popular_picks)} popular, {len(balanced_mix)} mid-tier, {len(deep_cuts)} deep cuts")
|
|
|
|
# Balanced selection
|
|
random.shuffle(popular_picks)
|
|
random.shuffle(balanced_mix)
|
|
random.shuffle(deep_cuts)
|
|
|
|
selected_tracks = []
|
|
selected_tracks.extend(popular_picks[:20])
|
|
selected_tracks.extend(balanced_mix[:20])
|
|
selected_tracks.extend(deep_cuts[:10])
|
|
random.shuffle(selected_tracks)
|
|
|
|
# Extract appropriate track IDs based on source
|
|
for track in selected_tracks:
|
|
if source == 'spotify' and track.spotify_track_id:
|
|
discovery_weekly_tracks.append(track.spotify_track_id)
|
|
elif source == 'itunes' and track.itunes_track_id:
|
|
discovery_weekly_tracks.append(track.itunes_track_id)
|
|
|
|
playlist_key = f'discovery_weekly_{source}'
|
|
self.database.save_curated_playlist(playlist_key, discovery_weekly_tracks)
|
|
logger.info(f"Discovery Weekly ({source}) curated: {len(discovery_weekly_tracks)} tracks")
|
|
|
|
# Also save without suffix for backward compatibility (use active source)
|
|
active_source = 'spotify' if spotify_available else 'itunes'
|
|
release_radar_key = f'release_radar_{active_source}'
|
|
discovery_weekly_key = f'discovery_weekly_{active_source}'
|
|
|
|
# Copy active source playlists to non-suffixed keys
|
|
release_radar_ids = self.database.get_curated_playlist(release_radar_key) or []
|
|
discovery_weekly_ids = self.database.get_curated_playlist(discovery_weekly_key) or []
|
|
self.database.save_curated_playlist('release_radar', release_radar_ids)
|
|
self.database.save_curated_playlist('discovery_weekly', discovery_weekly_ids)
|
|
|
|
logger.info("Playlist curation complete")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error curating discovery playlists: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def _populate_seasonal_content(self):
|
|
"""
|
|
Populate seasonal content as part of watchlist scan.
|
|
|
|
IMPROVED: Integrated with discovery system
|
|
- Checks if seasonal content needs update (7-day threshold)
|
|
- Populates content for all seasons
|
|
- Curates seasonal playlists
|
|
- Runs once per week automatically
|
|
"""
|
|
try:
|
|
from core.seasonal_discovery import get_seasonal_discovery_service
|
|
|
|
logger.info("Checking seasonal content update...")
|
|
|
|
seasonal_service = get_seasonal_discovery_service(self.spotify_client, self.database)
|
|
|
|
# Get current season to prioritize
|
|
current_season = seasonal_service.get_current_season()
|
|
|
|
if current_season:
|
|
# Always update current season if needed
|
|
if seasonal_service.should_populate_seasonal_content(current_season, days_threshold=7):
|
|
logger.info(f"Populating current season: {current_season}")
|
|
seasonal_service.populate_seasonal_content(current_season)
|
|
seasonal_service.curate_seasonal_playlist(current_season)
|
|
else:
|
|
logger.info(f"Current season '{current_season}' is up to date")
|
|
|
|
# Update other seasons in background (less frequently - 14 day threshold)
|
|
from core.seasonal_discovery import SEASONAL_CONFIG
|
|
for season_key in SEASONAL_CONFIG.keys():
|
|
if season_key == current_season:
|
|
continue # Already handled above
|
|
|
|
if seasonal_service.should_populate_seasonal_content(season_key, days_threshold=14):
|
|
logger.info(f"Populating season: {season_key}")
|
|
seasonal_service.populate_seasonal_content(season_key)
|
|
seasonal_service.curate_seasonal_playlist(season_key)
|
|
|
|
logger.info("Seasonal content update complete")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error populating seasonal content: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
# Singleton instance
|
|
_watchlist_scanner_instance = None
|
|
|
|
def get_watchlist_scanner(spotify_client: SpotifyClient) -> WatchlistScanner:
|
|
"""Get the global watchlist scanner instance"""
|
|
global _watchlist_scanner_instance
|
|
if _watchlist_scanner_instance is None:
|
|
_watchlist_scanner_instance = WatchlistScanner(spotify_client)
|
|
return _watchlist_scanner_instance |