Cache similar artist metadata at scan time to eliminate redundant Spotify API calls

pull/253/head
Broque Thomas 1 month ago
parent ae6fb929bf
commit 7da7f3b112

@ -1483,14 +1483,17 @@ class WatchlistScanner:
stored_count = 0
for rank, similar_artist in enumerate(similar_artists, 1):
try:
# similar_artist has 'name', 'spotify_id', and 'itunes_id' keys
# similar_artist has 'name', 'spotify_id', 'itunes_id', 'image_url', 'genres', 'popularity'
success = self.database.add_or_update_similar_artist(
source_artist_id=source_artist_id,
similar_artist_name=similar_artist['name'],
similar_artist_spotify_id=similar_artist.get('spotify_id'),
similar_artist_itunes_id=similar_artist.get('itunes_id'),
similarity_rank=rank,
profile_id=profile_id
profile_id=profile_id,
image_url=similar_artist.get('image_url'),
genres=similar_artist.get('genres'),
popularity=similar_artist.get('popularity', 0)
)
if success:

@ -5216,25 +5216,36 @@ class MusicDatabase:
similar_artist_spotify_id: Optional[str] = None,
similar_artist_itunes_id: Optional[str] = None,
similarity_rank: int = 1,
profile_id: int = 1) -> bool:
profile_id: int = 1,
image_url: Optional[str] = None,
genres: Optional[list] = None,
popularity: int = 0) -> bool:
"""Add or update a similar artist recommendation (supports both Spotify and iTunes IDs)"""
try:
with self._get_connection() as conn:
cursor = conn.cursor()
genres_json = json.dumps(genres) if genres else None
# Use artist name as the unique key (allows storing both IDs for same artist)
cursor.execute("""
INSERT INTO similar_artists
(source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_name, similarity_rank, occurrence_count, last_updated, profile_id)
VALUES (?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, ?)
(source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_name,
similarity_rank, occurrence_count, last_updated, profile_id,
image_url, genres, popularity, metadata_updated_at)
VALUES (?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, ?, ?, ?, ?, CURRENT_TIMESTAMP)
ON CONFLICT(profile_id, source_artist_id, similar_artist_name)
DO UPDATE SET
similar_artist_spotify_id = COALESCE(excluded.similar_artist_spotify_id, similar_artist_spotify_id),
similar_artist_itunes_id = COALESCE(excluded.similar_artist_itunes_id, similar_artist_itunes_id),
similarity_rank = excluded.similarity_rank,
occurrence_count = occurrence_count + 1,
last_updated = CURRENT_TIMESTAMP
""", (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_name, similarity_rank, profile_id))
last_updated = CURRENT_TIMESTAMP,
image_url = COALESCE(excluded.image_url, image_url),
genres = COALESCE(excluded.genres, genres),
popularity = CASE WHEN excluded.popularity > 0 THEN excluded.popularity ELSE popularity END,
metadata_updated_at = CASE WHEN excluded.image_url IS NOT NULL THEN CURRENT_TIMESTAMP ELSE metadata_updated_at END
""", (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_name,
similarity_rank, profile_id, image_url, genres_json, popularity))
conn.commit()
return True
@ -5338,6 +5349,29 @@ class MusicDatabase:
logger.error(f"Error updating similar artist metadata: {e}")
return False
def update_similar_artist_metadata_by_external_id(self, external_id: str, source: str = 'spotify',
image_url: str = None, genres: list = None,
popularity: int = None) -> bool:
"""Cache artist metadata by Spotify or iTunes ID (updates all rows for that artist)"""
try:
with self._get_connection() as conn:
cursor = conn.cursor()
genres_json = json.dumps(genres) if genres else None
if source == 'spotify':
where_clause = "similar_artist_spotify_id = ?"
else:
where_clause = "similar_artist_itunes_id = ?"
cursor.execute(f"""
UPDATE similar_artists
SET image_url = ?, genres = ?, popularity = ?, metadata_updated_at = CURRENT_TIMESTAMP
WHERE {where_clause}
""", (image_url, genres_json, popularity or 0, external_id))
conn.commit()
return cursor.rowcount > 0
except Exception as e:
logger.error(f"Error updating similar artist metadata by external ID: {e}")
return False
def has_fresh_similar_artists(self, source_artist_id: str, days_threshold: int = 30, require_itunes: bool = True, require_spotify: bool = False, profile_id: int = 1) -> bool:
"""
Check if we have cached similar artists that are still fresh (<days_threshold old).

@ -29142,7 +29142,7 @@ def get_discover_similar_artists():
else:
artist_id = artist.similar_artist_itunes_id or artist.similar_artist_spotify_id
result_artists.append({
artist_data = {
"artist_id": artist_id,
"spotify_artist_id": artist.similar_artist_spotify_id,
"itunes_artist_id": artist.similar_artist_itunes_id,
@ -29150,7 +29150,15 @@ def get_discover_similar_artists():
"occurrence_count": artist.occurrence_count,
"similarity_rank": artist.similarity_rank,
"source": active_source,
})
}
# Include cached metadata if available
if artist.image_url:
artist_data["image_url"] = artist.image_url
if artist.genres:
artist_data["genres"] = artist.genres[:3]
if artist.popularity:
artist_data["popularity"] = artist.popularity
result_artists.append(artist_data)
print(f"[Similar Artists] {len(similar_artists)} from DB, {len(result_artists)} valid for {active_source}")
@ -29168,7 +29176,9 @@ def get_discover_similar_artists():
@app.route('/api/discover/similar-artists/enrich', methods=['POST'])
def enrich_similar_artists():
"""Enrich a batch of artist IDs with images/genres from Spotify or iTunes"""
"""Enrich a batch of artist IDs with images/genres from Spotify or iTunes.
Uses cached metadata from DB when available, only makes API calls for uncached artists,
and saves new results back to DB for future use."""
try:
data = request.get_json()
artist_ids = data.get('artist_ids', [])
@ -29177,37 +29187,82 @@ def enrich_similar_artists():
if not artist_ids:
return jsonify({"success": True, "artists": {}})
database = get_database()
enriched = {}
uncached_ids = []
# Check DB cache first — get all similar artists and index by external ID
cached_artists = database.get_top_similar_artists(limit=500, profile_id=get_current_profile_id())
cache_map = {}
for artist in cached_artists:
ext_id = artist.similar_artist_spotify_id if source == 'spotify' else artist.similar_artist_itunes_id
if ext_id and ext_id not in cache_map:
cache_map[ext_id] = artist
for aid in artist_ids[:50]:
cached = cache_map.get(aid)
if cached and cached.image_url:
# Use cached metadata
enriched[aid] = {
"artist_name": cached.similar_artist_name,
"image_url": cached.image_url,
"genres": cached.genres[:3] if cached.genres else [],
"popularity": cached.popularity or 0
}
else:
uncached_ids.append(aid)
if source == 'spotify' and spotify_client and spotify_client.is_authenticated() and not _spotify_rate_limited():
try:
batch_result = spotify_client.sp.artists(artist_ids[:50])
if batch_result and 'artists' in batch_result:
for sp_artist in batch_result['artists']:
if sp_artist:
enriched[sp_artist['id']] = {
"artist_name": sp_artist.get('name'),
"image_url": sp_artist['images'][0]['url'] if sp_artist.get('images') else None,
"genres": sp_artist.get('genres', [])[:3],
"popularity": sp_artist.get('popularity', 0)
}
except Exception as e:
print(f"Error enriching Spotify batch: {e}")
else:
from core.itunes_client import iTunesClient
itunes_client = iTunesClient()
for aid in artist_ids[:50]:
# Only make API calls for uncached artists
if uncached_ids:
if source == 'spotify' and spotify_client and spotify_client.is_authenticated() and not _spotify_rate_limited():
try:
itunes_artist = itunes_client.get_artist(aid)
if itunes_artist:
enriched[aid] = {
"artist_name": itunes_artist.get('name'),
"image_url": itunes_artist.get('images', [{}])[0].get('url') if itunes_artist.get('images') else None,
"genres": itunes_artist.get('genres', [])[:3],
"popularity": 0
}
except Exception:
pass
batch_result = spotify_client.sp.artists(uncached_ids[:50])
if batch_result and 'artists' in batch_result:
for sp_artist in batch_result['artists']:
if sp_artist:
img_url = sp_artist['images'][0].get('url') if sp_artist.get('images') else None
genres = sp_artist.get('genres', [])[:3]
pop = sp_artist.get('popularity', 0)
enriched[sp_artist['id']] = {
"artist_name": sp_artist.get('name'),
"image_url": img_url,
"genres": genres,
"popularity": pop
}
# Cache to DB for future use
database.update_similar_artist_metadata_by_external_id(
sp_artist['id'], 'spotify',
image_url=img_url, genres=genres, popularity=pop
)
except Exception as e:
print(f"Error enriching Spotify batch: {e}")
else:
from core.itunes_client import iTunesClient
itunes_client = iTunesClient()
for aid in uncached_ids[:50]:
try:
itunes_artist = itunes_client.get_artist(aid)
if itunes_artist:
img_url = itunes_artist.get('images', [{}])[0].get('url') if itunes_artist.get('images') else None
genres = itunes_artist.get('genres', [])[:3]
enriched[aid] = {
"artist_name": itunes_artist.get('name'),
"image_url": img_url,
"genres": genres,
"popularity": 0
}
# Cache to DB for future use
database.update_similar_artist_metadata_by_external_id(
aid, 'itunes',
image_url=img_url, genres=genres, popularity=0
)
except Exception:
pass
cached_count = len(enriched) - len([aid for aid in uncached_ids if aid in enriched])
api_count = len([aid for aid in uncached_ids if aid in enriched])
if uncached_ids:
print(f"[Enrich] {cached_count} from cache, {api_count} from API ({len(uncached_ids) - api_count} missed)")
return jsonify({"success": True, "artists": enriched})

@ -42118,9 +42118,12 @@ async function openRecommendedArtistsModal() {
renderRecommendedArtistsModal(modal, data.artists);
// Phase 2: Enrich with images/genres progressively in batches of 50
// Skip artists that already have cached metadata from the initial response
const source = data.source || 'spotify';
const idKey = source === 'spotify' ? 'spotify_artist_id' : 'itunes_artist_id';
const allIds = data.artists.map(a => a[idKey]).filter(Boolean);
const allIds = data.artists
.filter(a => !a.image_url) // Only enrich artists without cached images
.map(a => a[idKey]).filter(Boolean);
for (let i = 0; i < allIds.length; i += 50) {
const batchIds = allIds.slice(i, i + 50);

Loading…
Cancel
Save