diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 6bdff96b..255afcb8 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -246,8 +246,17 @@ class WatchlistScanner: # Update last scan timestamp for this artist self.update_artist_scan_timestamp(watchlist_artist.spotify_artist_id) - # Fetch and store similar artists for discovery feature - self.update_similar_artists(watchlist_artist) + # Fetch and store similar artists for discovery feature (with caching to avoid over-polling) + try: + # Check if we have fresh similar artists cached (< 30 days old) + if self.database.has_fresh_similar_artists(watchlist_artist.spotify_artist_id, days_threshold=30): + logger.info(f"Similar artists for {watchlist_artist.artist_name} are cached and fresh, skipping fetch") + else: + logger.info(f"Fetching similar artists for {watchlist_artist.artist_name}...") + self.update_similar_artists(watchlist_artist) + logger.info(f"Similar artists updated for {watchlist_artist.artist_name}") + except Exception as similar_error: + logger.warning(f"Failed to update similar artists for {watchlist_artist.artist_name}: {similar_error}") return ScanResult( artist_name=watchlist_artist.artist_name, @@ -656,11 +665,21 @@ class WatchlistScanner: """ Populate discovery pool with tracks from top similar artists. Called after watchlist scan completes. + + This method now: + - Checks if pool was updated in last 24 hours (prevents over-polling Spotify) + - Appends to existing pool instead of replacing it + - Cleans up tracks older than 365 days (maintains 1 year rolling window) """ try: from datetime import datetime, timedelta import random + # Check if we should run (prevents over-polling Spotify) + if not self.database.should_populate_discovery_pool(hours_threshold=24): + logger.info("Discovery pool was populated recently (< 24 hours ago). Skipping to avoid over-polling Spotify.") + return + logger.info("Populating discovery pool from similar artists...") # Get top similar artists across all watchlist (ordered by occurrence_count) @@ -776,15 +795,110 @@ class WatchlistScanner: logger.warning(f"Error processing artist {similar_artist.similar_artist_name}: {artist_error}") continue - logger.info(f"Discovery pool population complete: {total_tracks_added} tracks added") + logger.info(f"Discovery pool from similar artists complete: {total_tracks_added} tracks added") - # Rotate discovery pool if needed (maintain 1000-2000 track limit) - self.database.rotate_discovery_pool(max_tracks=2000, remove_count=500) + # Note: Watchlist artist albums are already in discovery pool from the watchlist scan itself + # No need to re-fetch them here to avoid duplicate API calls + + # Add tracks from random database albums for extra variety (reduced to 5 to save API calls) + logger.info("Adding tracks from database albums to discovery pool...") + try: + with self.database._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + SELECT DISTINCT a.title, ar.name as artist_name + FROM albums_new a + JOIN artists_new ar ON a.artist_id = ar.id + ORDER BY RANDOM() + LIMIT 5 + """) + db_albums = cursor.fetchall() + + logger.info(f"Processing {len(db_albums)} database albums for discovery pool") + + for db_idx, album_row in enumerate(db_albums, 1): + try: + # Search for album on Spotify + query = f"album:{album_row['title']} artist:{album_row['artist_name']}" + search_results = self.spotify_client.search_albums(query, limit=1) + + if search_results and len(search_results) > 0: + spotify_album = search_results[0] + album_data = self.spotify_client.get_album(spotify_album.id) + + if album_data and 'tracks' in album_data: + tracks = album_data['tracks'].get('items', []) + + # Check if new release + is_new = False + try: + release_date_str = album_data.get('release_date', '') + if release_date_str and len(release_date_str) == 10: + release_date = datetime.strptime(release_date_str, "%Y-%m-%d") + days_old = (datetime.now() - release_date).days + is_new = days_old <= 30 + except: + pass + + for track in tracks: + try: + track_data = { + 'spotify_track_id': track['id'], + 'spotify_album_id': album_data['id'], + 'spotify_artist_id': album_data['artists'][0]['id'] if album_data.get('artists') else '', + 'track_name': track['name'], + 'artist_name': album_row['artist_name'], + 'album_name': album_row['title'], + 'album_cover_url': album_data.get('images', [{}])[0].get('url') if album_data.get('images') else None, + 'duration_ms': track.get('duration_ms', 0), + 'popularity': album_data.get('popularity', 0), + 'release_date': album_data.get('release_date', ''), + 'is_new_release': is_new, + 'track_data_json': track + } + + if self.database.add_to_discovery_pool(track_data): + total_tracks_added += 1 + except Exception as track_error: + continue + + time.sleep(DELAY_BETWEEN_ALBUMS) + except Exception as album_error: + logger.debug(f"Error processing database album {album_row['title']}: {album_error}") + continue + + # Rate limit between albums + if db_idx < len(db_albums): + time.sleep(DELAY_BETWEEN_ARTISTS) + + except Exception as db_error: + logger.warning(f"Error processing database albums: {db_error}") + + logger.info(f"Discovery pool population complete: {total_tracks_added} total tracks added from all sources") + + # Clean up tracks older than 365 days (maintain 1 year rolling window) + logger.info("Cleaning up discovery tracks older than 365 days...") + deleted_count = self.database.cleanup_old_discovery_tracks(days_threshold=365) + logger.info(f"Cleaned up {deleted_count} old tracks from discovery pool") + + # Get final track count for metadata + with self.database._get_connection() as conn: + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) as count FROM discovery_pool") + final_count = cursor.fetchone()['count'] + + # Update timestamp to mark when pool was last populated + self.database.update_discovery_pool_timestamp(track_count=final_count) + logger.info(f"Discovery pool now contains {final_count} total tracks (built over time)") # Cache recent albums for discovery page logger.info("Caching recent albums for discovery page...") self.cache_discovery_recent_albums() + # Curate playlists for consistent daily experience + logger.info("Curating discovery playlists...") + self.curate_discovery_playlists() + except Exception as e: logger.error(f"Error populating discovery pool: {e}") import traceback @@ -902,6 +1016,72 @@ class WatchlistScanner: import traceback traceback.print_exc() + def curate_discovery_playlists(self): + """Curate consistent playlist selections that stay the same until next discovery pool update""" + try: + import random + + logger.info("Curating Release Radar playlist...") + + # 1. Curate Release Radar - 50 tracks from recent albums + recent_albums = self.database.get_discovery_recent_albums(limit=20) + release_radar_tracks = [] + + if recent_albums: + # Group albums by artist for variety + albums_by_artist = {} + for album in recent_albums: + artist = album['artist_name'] + if artist not in albums_by_artist: + albums_by_artist[artist] = [] + albums_by_artist[artist].append(album) + + # Get tracks from each album, grouped by artist + artist_tracks = {} + for artist, albums in albums_by_artist.items(): + artist_tracks[artist] = [] + for album in albums: + try: + album_data = self.spotify_client.get_album(album['album_spotify_id']) + if album_data and 'tracks' in album_data: + for track in album_data['tracks']['items']: + artist_tracks[artist].append(track['id']) + except Exception as e: + continue + + # Balance by artist - max 6 tracks per artist + balanced_tracks = [] + for artist, tracks in artist_tracks.items(): + random.shuffle(tracks) + balanced_tracks.extend(tracks[:6]) # Max 6 per artist + + # Shuffle and limit to 50 + random.shuffle(balanced_tracks) + release_radar_tracks = balanced_tracks[:50] + + self.database.save_curated_playlist('release_radar', release_radar_tracks) + logger.info(f"Release Radar curated: {len(release_radar_tracks)} tracks") + + # 2. Curate Discovery Weekly - 50 tracks from full discovery pool + logger.info("Curating Discovery Weekly playlist...") + discovery_tracks = self.database.get_discovery_pool_tracks(limit=1000, new_releases_only=False) + + discovery_weekly_tracks = [] + if discovery_tracks: + all_track_ids = [track.spotify_track_id for track in discovery_tracks] + random.shuffle(all_track_ids) + discovery_weekly_tracks = all_track_ids[:50] + + self.database.save_curated_playlist('discovery_weekly', discovery_weekly_tracks) + logger.info(f"Discovery Weekly curated: {len(discovery_weekly_tracks)} tracks") + + logger.info("Playlist curation complete") + + except Exception as e: + logger.error(f"Error curating discovery playlists: {e}") + import traceback + traceback.print_exc() + # Singleton instance _watchlist_scanner_instance = None diff --git a/database/music_database.py b/database/music_database.py index 38d26ade..cc8e579c 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -488,6 +488,26 @@ class MusicDatabase: ) """) + # Discovery Curated Playlists - store curated track selections for consistency + cursor.execute(""" + CREATE TABLE IF NOT EXISTS discovery_curated_playlists ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + playlist_type TEXT NOT NULL UNIQUE, + track_ids_json TEXT NOT NULL, + curated_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + + # Discovery Pool Metadata - track when pool was last populated to prevent over-polling + cursor.execute(""" + CREATE TABLE IF NOT EXISTS discovery_pool_metadata ( + id INTEGER PRIMARY KEY CHECK (id = 1), + last_populated_timestamp TIMESTAMP NOT NULL, + track_count INTEGER DEFAULT 0, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + # Create indexes for performance cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_source ON similar_artists (source_artist_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_spotify ON similar_artists (similar_artist_spotify_id)") @@ -2490,6 +2510,37 @@ class MusicDatabase: logger.error(f"Error getting similar artists: {e}") return [] + def has_fresh_similar_artists(self, source_artist_id: str, days_threshold: int = 30) -> bool: + """ + Check if we have cached similar artists that are still fresh (< days_threshold old). + Returns True if we have recent data, False if data is stale or missing. + """ + try: + with self._get_connection() as conn: + cursor = conn.cursor() + + cursor.execute(""" + SELECT COUNT(*) as count, MAX(last_updated) as last_updated + FROM similar_artists + WHERE source_artist_id = ? + """, (source_artist_id,)) + + row = cursor.fetchone() + + if not row or row['count'] == 0: + # No similar artists cached + return False + + # Check if data is fresh + last_updated = datetime.fromisoformat(row['last_updated']) + days_since_update = (datetime.now() - last_updated).total_seconds() / 86400 # seconds to days + + return days_since_update < days_threshold + + except Exception as e: + logger.error(f"Error checking similar artists freshness: {e}") + return False # Default to re-fetching on error + def get_top_similar_artists(self, limit: int = 50) -> List[SimilarArtist]: """Get top similar artists across all watchlist artists, ordered by occurrence count""" try: @@ -2701,6 +2752,106 @@ class MusicDatabase: logger.error(f"Error clearing discovery recent albums: {e}") return False + def save_curated_playlist(self, playlist_type: str, track_ids: List[str]) -> bool: + """Save a curated playlist selection (stays same until next discovery pool update)""" + try: + import json + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + INSERT OR REPLACE INTO discovery_curated_playlists + (playlist_type, track_ids_json, curated_date) + VALUES (?, ?, CURRENT_TIMESTAMP) + """, (playlist_type, json.dumps(track_ids))) + conn.commit() + return True + except Exception as e: + logger.error(f"Error saving curated playlist {playlist_type}: {e}") + return False + + def get_curated_playlist(self, playlist_type: str) -> Optional[List[str]]: + """Get saved curated playlist track IDs""" + try: + import json + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + SELECT track_ids_json FROM discovery_curated_playlists + WHERE playlist_type = ? + """, (playlist_type,)) + row = cursor.fetchone() + if row: + return json.loads(row['track_ids_json']) + return None + except Exception as e: + logger.error(f"Error getting curated playlist {playlist_type}: {e}") + return None + + def should_populate_discovery_pool(self, hours_threshold: int = 24) -> bool: + """Check if discovery pool should be populated (hasn't been updated in X hours)""" + try: + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + SELECT last_populated_timestamp + FROM discovery_pool_metadata + WHERE id = 1 + """) + row = cursor.fetchone() + + if not row: + # Never populated before + return True + + last_populated = datetime.fromisoformat(row['last_populated_timestamp']) + hours_since_update = (datetime.now() - last_populated).total_seconds() / 3600 + + return hours_since_update >= hours_threshold + + except Exception as e: + logger.error(f"Error checking discovery pool timestamp: {e}") + return True # Default to allowing population on error + + def update_discovery_pool_timestamp(self, track_count: int) -> bool: + """Update the last populated timestamp and track count""" + try: + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + INSERT OR REPLACE INTO discovery_pool_metadata + (id, last_populated_timestamp, track_count, updated_at) + VALUES (1, ?, ?, CURRENT_TIMESTAMP) + """, (datetime.now().isoformat(), track_count)) + conn.commit() + return True + except Exception as e: + logger.error(f"Error updating discovery pool timestamp: {e}") + return False + + def cleanup_old_discovery_tracks(self, days_threshold: int = 365) -> int: + """Remove tracks from discovery pool older than X days. Returns count of deleted tracks.""" + try: + with self._get_connection() as conn: + cursor = conn.cursor() + + # Delete tracks older than threshold + cursor.execute(""" + DELETE FROM discovery_pool + WHERE added_date < datetime('now', '-' || ? || ' days') + """, (days_threshold,)) + + deleted_count = cursor.rowcount + conn.commit() + + if deleted_count > 0: + logger.info(f"Cleaned up {deleted_count} discovery tracks older than {days_threshold} days") + + return deleted_count + + except Exception as e: + logger.error(f"Error cleaning up old discovery tracks: {e}") + return 0 + def add_recent_release(self, watchlist_artist_id: int, album_data: Dict[str, Any]) -> bool: """Add a recent release to the recent_releases table""" try: diff --git a/web_server.py b/web_server.py index 48eedeee..81c721b5 100644 --- a/web_server.py +++ b/web_server.py @@ -14466,62 +14466,39 @@ def get_discover_recent_releases(): @app.route('/api/discover/release-radar', methods=['GET']) def get_discover_release_radar(): - """Get release radar playlist - 50 tracks randomly selected from all recent albums""" + """Get release radar playlist - curated selection that stays consistent until next update""" try: - import random - database = get_database() if not spotify_client or not spotify_client.is_authenticated(): return jsonify({"success": True, "tracks": []}) - # Get all recent albums from cache - recent_albums = database.get_discovery_recent_albums(limit=20) - - if not recent_albums: - return jsonify({"success": True, "tracks": []}) - - all_tracks = [] - - # Get tracks from each recent album - for album in recent_albums: - try: - # Get album tracks from Spotify - album_data = spotify_client.get_album(album['album_spotify_id']) - if album_data and 'tracks' in album_data: - for track in album_data['tracks']['items']: - all_tracks.append({ - "spotify_track_id": track['id'], - "track_name": track['name'], - "artist_name": album['artist_name'], - "album_name": album['album_name'], - "album_cover_url": album['album_cover_url'], - "duration_ms": track.get('duration_ms', 0), - "track_data_json": track - }) - except Exception as e: - print(f"Error getting tracks for album {album['album_name']}: {e}") - continue - - # Group tracks by artist to ensure variety - tracks_by_artist = {} - for track in all_tracks: - artist_name = track['artist_name'] - if artist_name not in tracks_by_artist: - tracks_by_artist[artist_name] = [] - tracks_by_artist[artist_name].append(track) - - # Limit each artist to max 6 tracks for variety - balanced_tracks = [] - for artist_name, tracks in tracks_by_artist.items(): - random.shuffle(tracks) - balanced_tracks.extend(tracks[:6]) # Max 6 tracks per artist + # Try to get curated playlist first + curated_track_ids = database.get_curated_playlist('release_radar') + + if curated_track_ids: + # Use curated selection - fetch track data from discovery pool + discovery_tracks = database.get_discovery_pool_tracks(limit=5000, new_releases_only=False) + tracks_by_id = {track.spotify_track_id: track for track in discovery_tracks} + + selected_tracks = [] + for track_id in curated_track_ids: + if track_id in tracks_by_id: + track = tracks_by_id[track_id] + selected_tracks.append({ + "spotify_track_id": track.spotify_track_id, + "track_name": track.track_name, + "artist_name": track.artist_name, + "album_name": track.album_name, + "album_cover_url": track.album_cover_url, + "duration_ms": track.duration_ms, + "track_data_json": track.track_data_json + }) - # Randomly select up to 50 tracks from balanced pool - random.shuffle(balanced_tracks) - selected_tracks = balanced_tracks[:50] + return jsonify({"success": True, "tracks": selected_tracks}) - return jsonify({"success": True, "tracks": selected_tracks}) + # Fallback: no curated playlist exists (shouldn't happen after first scan) + return jsonify({"success": True, "tracks": []}) except Exception as e: print(f"Error getting release radar: {e}") @@ -14531,105 +14508,39 @@ def get_discover_release_radar(): @app.route('/api/discover/weekly', methods=['GET']) def get_discover_weekly(): - """Get discovery weekly playlist - 50 tracks from similar artists, watchlist artists, and database albums""" + """Get discovery weekly playlist - curated selection that stays consistent until next update""" try: - import random - database = get_database() - if not spotify_client or not spotify_client.is_authenticated(): - return jsonify({"success": True, "tracks": []}) - - all_tracks = [] - - # 1. Get tracks from discovery pool (similar artists) - aim for ~30 tracks - discovery_tracks = database.get_discovery_pool_tracks(limit=300, new_releases_only=False) - for track in discovery_tracks: - all_tracks.append({ - "spotify_track_id": track.spotify_track_id, - "track_name": track.track_name, - "artist_name": track.artist_name, - "album_name": track.album_name, - "album_cover_url": track.album_cover_url, - "duration_ms": track.duration_ms, - "track_data_json": track.track_data_json - }) - - # 2. Get tracks from random watchlist artists - aim for ~10 tracks - try: - watchlist_artists = database.get_watchlist_artists() - if watchlist_artists: - random_watchlist = random.sample(watchlist_artists, min(2, len(watchlist_artists))) - for artist in random_watchlist: - try: - albums = spotify_client.get_artist_albums(artist.spotify_artist_id, album_type='album', limit=10) - if albums: - random_album = random.choice(albums) - album_data = spotify_client.get_album(random_album.id) - if album_data and 'tracks' in album_data: - for track in album_data['tracks']['items'][:5]: # 5 tracks per album - all_tracks.append({ - "spotify_track_id": track['id'], - "track_name": track['name'], - "artist_name": artist.artist_name, - "album_name": random_album.name, - "album_cover_url": random_album.image_url if hasattr(random_album, 'image_url') else None, - "duration_ms": track.get('duration_ms', 0), - "track_data_json": track - }) - except Exception as e: - continue - except Exception as e: - print(f"Error getting watchlist tracks: {e}") - - # 3. Get tracks from random database albums - aim for ~10 tracks - try: - # Get random albums from database - with database._get_connection() as conn: - cursor = conn.cursor() - cursor.execute(""" - SELECT DISTINCT a.title, ar.name as artist_name - FROM albums_new a - JOIN artists_new ar ON a.artist_id = ar.id - ORDER BY RANDOM() - LIMIT 2 - """) - db_albums = cursor.fetchall() - - for album_row in db_albums: - try: - # Search for album on Spotify - query = f"album:{album_row['title']} artist:{album_row['artist_name']}" - search_results = spotify_client.search_albums(query, limit=1) - if search_results and len(search_results) > 0: - spotify_album = search_results[0] - album_data = spotify_client.get_album(spotify_album.id) - if album_data and 'tracks' in album_data: - for track in album_data['tracks']['items'][:5]: # 5 tracks per album - all_tracks.append({ - "spotify_track_id": track['id'], - "track_name": track['name'], - "artist_name": album_row['artist_name'], - "album_name": album_row['title'], - "album_cover_url": spotify_album.image_url if hasattr(spotify_album, 'image_url') else None, - "duration_ms": track.get('duration_ms', 0), - "track_data_json": track - }) - except Exception as e: - continue - except Exception as e: - print(f"Error getting database album tracks: {e}") + # Try to get curated playlist first + curated_track_ids = database.get_curated_playlist('discovery_weekly') + + if curated_track_ids: + # Use curated selection - fetch track data from discovery pool + discovery_tracks = database.get_discovery_pool_tracks(limit=5000, new_releases_only=False) + tracks_by_id = {track.spotify_track_id: track for track in discovery_tracks} + + selected_tracks = [] + for track_id in curated_track_ids: + if track_id in tracks_by_id: + track = tracks_by_id[track_id] + selected_tracks.append({ + "spotify_track_id": track.spotify_track_id, + "track_name": track.track_name, + "artist_name": track.artist_name, + "album_name": track.album_name, + "album_cover_url": track.album_cover_url, + "duration_ms": track.duration_ms, + "track_data_json": track.track_data_json + }) - # Randomly select 50 tracks from the combined pool - random.shuffle(all_tracks) - selected_tracks = all_tracks[:50] + return jsonify({"success": True, "tracks": selected_tracks}) - return jsonify({"success": True, "tracks": selected_tracks}) + # Fallback: no curated playlist exists (shouldn't happen after first scan) + return jsonify({"success": True, "tracks": []}) except Exception as e: print(f"Error getting discovery weekly: {e}") - import traceback - traceback.print_exc() return jsonify({"success": False, "error": str(e)}), 500 @app.route('/api/metadata/start', methods=['POST']) @@ -17830,28 +17741,4 @@ if __name__ == '__main__': # Add a test activity to verify the system is working add_activity_item("🔧", "Debug Test", "Activity feed system test", "Now") - # Populate discovery pool at startup (background task) - def startup_populate_discovery(): - """Populate discovery pool at startup in background""" - try: - print("🎵 Populating discovery pool at startup...") - from core.watchlist_scanner import get_watchlist_scanner - if spotify_client and spotify_client.is_authenticated(): - scanner = get_watchlist_scanner(spotify_client) - scanner.populate_discovery_pool() - print("✅ Discovery pool populated successfully") - add_activity_item("🎵", "Discovery Pool", "Discovery data populated successfully", "Now") - else: - print("⚠️ Spotify not authenticated - skipping discovery pool population") - except Exception as e: - print(f"❌ Error populating discovery pool at startup: {e}") - import traceback - traceback.print_exc() - - # Run discovery pool population in background thread - import threading - discovery_thread = threading.Thread(target=startup_populate_discovery, daemon=True) - discovery_thread.start() - print("🔧 Discovery pool population started in background...") - app.run(host='0.0.0.0', port=8008, debug=False)