Cache similar artist metadata at scan time to eliminate redundant Spotify API calls

1 month ago · 7da7f3b112
parent ae6fb929bf
commit 7da7f3b112
4 changed files with 134 additions and 39 deletions
--- a/core/watchlist_scanner.py
+++ b/core/watchlist_scanner.py
@ -1483,14 +1483,17 @@ class WatchlistScanner:
            stored_count = 0
            for rank, similar_artist in enumerate(similar_artists, 1):
                try:
-                    # similar_artist has 'name', 'spotify_id', and 'itunes_id' keys
+                    # similar_artist has 'name', 'spotify_id', 'itunes_id', 'image_url', 'genres', 'popularity'
                    success = self.database.add_or_update_similar_artist(
                        source_artist_id=source_artist_id,
                        similar_artist_name=similar_artist['name'],
                        similar_artist_spotify_id=similar_artist.get('spotify_id'),
                        similar_artist_itunes_id=similar_artist.get('itunes_id'),
                        similarity_rank=rank,
-                        profile_id=profile_id
+                        profile_id=profile_id,
+                        image_url=similar_artist.get('image_url'),
+                        genres=similar_artist.get('genres'),
+                        popularity=similar_artist.get('popularity', 0)
                    )

                    if success:
--- a/database/music_database.py
+++ b/database/music_database.py
@ -5216,25 +5216,36 @@ class MusicDatabase:
                                      similar_artist_spotify_id: Optional[str] = None,
                                      similar_artist_itunes_id: Optional[str] = None,
                                      similarity_rank: int = 1,
-                                      profile_id: int = 1) -> bool:
+                                      profile_id: int = 1,
+                                      image_url: Optional[str] = None,
+                                      genres: Optional[list] = None,
+                                      popularity: int = 0) -> bool:
        """Add or update a similar artist recommendation (supports both Spotify and iTunes IDs)"""
        try:
            with self._get_connection() as conn:
                cursor = conn.cursor()
+                genres_json = json.dumps(genres) if genres else None

                # Use artist name as the unique key (allows storing both IDs for same artist)
                cursor.execute("""
                    INSERT INTO similar_artists
-                    (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_name, similarity_rank, occurrence_count, last_updated, profile_id)
-                    VALUES (?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, ?)
+                    (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_name,
+                     similarity_rank, occurrence_count, last_updated, profile_id,
+                     image_url, genres, popularity, metadata_updated_at)
+                    VALUES (?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, ?, ?, ?, ?, CURRENT_TIMESTAMP)
                    ON CONFLICT(profile_id, source_artist_id, similar_artist_name)
                    DO UPDATE SET
                        similar_artist_spotify_id = COALESCE(excluded.similar_artist_spotify_id, similar_artist_spotify_id),
                        similar_artist_itunes_id = COALESCE(excluded.similar_artist_itunes_id, similar_artist_itunes_id),
                        similarity_rank = excluded.similarity_rank,
                        occurrence_count = occurrence_count + 1,
-                        last_updated = CURRENT_TIMESTAMP
-                """, (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_name, similarity_rank, profile_id))
+                        last_updated = CURRENT_TIMESTAMP,
+                        image_url = COALESCE(excluded.image_url, image_url),
+                        genres = COALESCE(excluded.genres, genres),
+                        popularity = CASE WHEN excluded.popularity > 0 THEN excluded.popularity ELSE popularity END,
+                        metadata_updated_at = CASE WHEN excluded.image_url IS NOT NULL THEN CURRENT_TIMESTAMP ELSE metadata_updated_at END
+                """, (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_name,
+                      similarity_rank, profile_id, image_url, genres_json, popularity))

                conn.commit()
                return True
@ -5338,6 +5349,29 @@ class MusicDatabase:
            logger.error(f"Error updating similar artist metadata: {e}")
            return False

+    def update_similar_artist_metadata_by_external_id(self, external_id: str, source: str = 'spotify',
+                                                       image_url: str = None, genres: list = None,
+                                                       popularity: int = None) -> bool:
+        """Cache artist metadata by Spotify or iTunes ID (updates all rows for that artist)"""
+        try:
+            with self._get_connection() as conn:
+                cursor = conn.cursor()
+                genres_json = json.dumps(genres) if genres else None
+                if source == 'spotify':
+                    where_clause = "similar_artist_spotify_id = ?"
+                else:
+                    where_clause = "similar_artist_itunes_id = ?"
+                cursor.execute(f"""
+                    UPDATE similar_artists
+                    SET image_url = ?, genres = ?, popularity = ?, metadata_updated_at = CURRENT_TIMESTAMP
+                    WHERE {where_clause}
+                """, (image_url, genres_json, popularity or 0, external_id))
+                conn.commit()
+                return cursor.rowcount > 0
+        except Exception as e:
+            logger.error(f"Error updating similar artist metadata by external ID: {e}")
+            return False
+
    def has_fresh_similar_artists(self, source_artist_id: str, days_threshold: int = 30, require_itunes: bool = True, require_spotify: bool = False, profile_id: int = 1) -> bool:
        """
        Check if we have cached similar artists that are still fresh (<days_threshold old).
--- a/web_server.py
+++ b/web_server.py
@ -29142,7 +29142,7 @@ def get_discover_similar_artists():
            else:
                artist_id = artist.similar_artist_itunes_id or artist.similar_artist_spotify_id

-            result_artists.append({
+            artist_data = {
                "artist_id": artist_id,
                "spotify_artist_id": artist.similar_artist_spotify_id,
                "itunes_artist_id": artist.similar_artist_itunes_id,
@ -29150,7 +29150,15 @@ def get_discover_similar_artists():
                "occurrence_count": artist.occurrence_count,
                "similarity_rank": artist.similarity_rank,
                "source": active_source,
-            })
+            }
+            # Include cached metadata if available
+            if artist.image_url:
+                artist_data["image_url"] = artist.image_url
+            if artist.genres:
+                artist_data["genres"] = artist.genres[:3]
+            if artist.popularity:
+                artist_data["popularity"] = artist.popularity
+            result_artists.append(artist_data)

        print(f"[Similar Artists] {len(similar_artists)} from DB, {len(result_artists)} valid for {active_source}")

@ -29168,7 +29176,9 @@ def get_discover_similar_artists():

@app.route('/api/discover/similar-artists/enrich', methods=['POST'])
 def enrich_similar_artists():
-    """Enrich a batch of artist IDs with images/genres from Spotify or iTunes"""
+    """Enrich a batch of artist IDs with images/genres from Spotify or iTunes.
+    Uses cached metadata from DB when available, only makes API calls for uncached artists,
+    and saves new results back to DB for future use."""
    try:
        data = request.get_json()
        artist_ids = data.get('artist_ids', [])
@ -29177,37 +29187,82 @@ def enrich_similar_artists():
        if not artist_ids:
            return jsonify({"success": True, "artists": {}})

+        database = get_database()
        enriched = {}
+        uncached_ids = []
+
+        # Check DB cache first — get all similar artists and index by external ID
+        cached_artists = database.get_top_similar_artists(limit=500, profile_id=get_current_profile_id())
+        cache_map = {}
+        for artist in cached_artists:
+            ext_id = artist.similar_artist_spotify_id if source == 'spotify' else artist.similar_artist_itunes_id
+            if ext_id and ext_id not in cache_map:
+                cache_map[ext_id] = artist
+
+        for aid in artist_ids[:50]:
+            cached = cache_map.get(aid)
+            if cached and cached.image_url:
+                # Use cached metadata
+                enriched[aid] = {
+                    "artist_name": cached.similar_artist_name,
+                    "image_url": cached.image_url,
+                    "genres": cached.genres[:3] if cached.genres else [],
+                    "popularity": cached.popularity or 0
+                }
+            else:
+                uncached_ids.append(aid)

-        if source == 'spotify' and spotify_client and spotify_client.is_authenticated() and not _spotify_rate_limited():
-            try:
-                batch_result = spotify_client.sp.artists(artist_ids[:50])
-                if batch_result and 'artists' in batch_result:
-                    for sp_artist in batch_result['artists']:
-                        if sp_artist:
-                            enriched[sp_artist['id']] = {
-                                "artist_name": sp_artist.get('name'),
-                                "image_url": sp_artist['images'][0]['url'] if sp_artist.get('images') else None,
-                                "genres": sp_artist.get('genres', [])[:3],
-                                "popularity": sp_artist.get('popularity', 0)
-                            }
-            except Exception as e:
-                print(f"Error enriching Spotify batch: {e}")
-        else:
-            from core.itunes_client import iTunesClient
-            itunes_client = iTunesClient()
-            for aid in artist_ids[:50]:
+        # Only make API calls for uncached artists
+        if uncached_ids:
+            if source == 'spotify' and spotify_client and spotify_client.is_authenticated() and not _spotify_rate_limited():
                try:
-                    itunes_artist = itunes_client.get_artist(aid)
-                    if itunes_artist:
-                        enriched[aid] = {
-                            "artist_name": itunes_artist.get('name'),
-                            "image_url": itunes_artist.get('images', [{}])[0].get('url') if itunes_artist.get('images') else None,
-                            "genres": itunes_artist.get('genres', [])[:3],
-                            "popularity": 0
-                        }
-                except Exception:
-                    pass
+                    batch_result = spotify_client.sp.artists(uncached_ids[:50])
+                    if batch_result and 'artists' in batch_result:
+                        for sp_artist in batch_result['artists']:
+                            if sp_artist:
+                                img_url = sp_artist['images'][0].get('url') if sp_artist.get('images') else None
+                                genres = sp_artist.get('genres', [])[:3]
+                                pop = sp_artist.get('popularity', 0)
+                                enriched[sp_artist['id']] = {
+                                    "artist_name": sp_artist.get('name'),
+                                    "image_url": img_url,
+                                    "genres": genres,
+                                    "popularity": pop
+                                }
+                                # Cache to DB for future use
+                                database.update_similar_artist_metadata_by_external_id(
+                                    sp_artist['id'], 'spotify',
+                                    image_url=img_url, genres=genres, popularity=pop
+                                )
+                except Exception as e:
+                    print(f"Error enriching Spotify batch: {e}")
+            else:
+                from core.itunes_client import iTunesClient
+                itunes_client = iTunesClient()
+                for aid in uncached_ids[:50]:
+                    try:
+                        itunes_artist = itunes_client.get_artist(aid)
+                        if itunes_artist:
+                            img_url = itunes_artist.get('images', [{}])[0].get('url') if itunes_artist.get('images') else None
+                            genres = itunes_artist.get('genres', [])[:3]
+                            enriched[aid] = {
+                                "artist_name": itunes_artist.get('name'),
+                                "image_url": img_url,
+                                "genres": genres,
+                                "popularity": 0
+                            }
+                            # Cache to DB for future use
+                            database.update_similar_artist_metadata_by_external_id(
+                                aid, 'itunes',
+                                image_url=img_url, genres=genres, popularity=0
+                            )
+                    except Exception:
+                        pass
+
+        cached_count = len(enriched) - len([aid for aid in uncached_ids if aid in enriched])
+        api_count = len([aid for aid in uncached_ids if aid in enriched])
+        if uncached_ids:
+            print(f"[Enrich] {cached_count} from cache, {api_count} from API ({len(uncached_ids) - api_count} missed)")

        return jsonify({"success": True, "artists": enriched})

--- a/webui/static/script.js
+++ b/webui/static/script.js
@ -42118,9 +42118,12 @@ async function openRecommendedArtistsModal() {
        renderRecommendedArtistsModal(modal, data.artists);

        // Phase 2: Enrich with images/genres progressively in batches of 50
+        // Skip artists that already have cached metadata from the initial response
        const source = data.source || 'spotify';
        const idKey = source === 'spotify' ? 'spotify_artist_id' : 'itunes_artist_id';
-        const allIds = data.artists.map(a => a[idKey]).filter(Boolean);
+        const allIds = data.artists
+            .filter(a => !a.image_url)  // Only enrich artists without cached images
+            .map(a => a[idKey]).filter(Boolean);

        for (let i = 0; i < allIds.length; i += 50) {
            const batchIds = allIds.slice(i, i + 50);