"""Artist Map endpoints — lifted from web_server.py. The four route bodies (``get_artist_map_data``, ``get_artist_map_genre_list``, ``get_artist_map_genres``, ``get_artist_map_explore``) plus their cache helpers and the artist-map cache are byte-identical to the originals. Module-level shims for ``get_current_profile_id``, ``_get_itunes_client``, and the ``spotify_client`` proxy let the bodies resolve their original names without modification. """ import json import logging import time from flask import g, jsonify, request from database.music_database import get_database from core.metadata.registry import get_itunes_client, get_spotify_client logger = logging.getLogger(__name__) def get_current_profile_id() -> int: """Mirror of web_server.get_current_profile_id — uses Flask g. Catches RuntimeError too because reading `g` outside a request context raises that (not AttributeError) — happens when this is called from background threads (sync, automation, scanners).""" try: return g.profile_id except (AttributeError, RuntimeError): return 1 def _get_itunes_client(): """Mirror of web_server._get_itunes_client — delegates to registry.""" return get_itunes_client() class _SpotifyClientProxy: """Resolves the global Spotify client lazily so a Spotify re-auth that rebinds the cached client in core.metadata.registry is visible to the lifted route bodies.""" def __getattr__(self, name): client = get_spotify_client() if client is None: raise AttributeError(name) return getattr(client, name) def __bool__(self): return get_spotify_client() is not None spotify_client = _SpotifyClientProxy() # Artist Map data cache — avoids re-querying 4+ tables on every request # Keys: 'watchlist_{profile}', 'genres_{profile}', 'genre_list' # Values: {'data': , 'ts': } _artist_map_cache = {} _ARTIST_MAP_CACHE_TTL = 300 # 5 minutes def _artmap_cache_get(key): """Get cached artist map data if still fresh.""" entry = _artist_map_cache.get(key) if entry and (time.time() - entry['ts']) < _ARTIST_MAP_CACHE_TTL: return entry['data'] return None def _artmap_cache_set(key, data): """Store artist map data in cache.""" _artist_map_cache[key] = {'data': data, 'ts': time.time()} def _artmap_cache_invalidate(profile_id=None): """Invalidate artist map cache (call after watchlist changes, scans, etc.).""" if profile_id: _artist_map_cache.pop(f'watchlist_{profile_id}', None) _artist_map_cache.pop(f'genres_{profile_id}', None) _artist_map_cache.pop('genre_list', None) def get_artist_map_data(): """Get watchlist artists + their similar artists for the force-directed artist map.""" try: database = get_database() profile_id = get_current_profile_id() cached = _artmap_cache_get(f'watchlist_{profile_id}') if cached: return jsonify(cached) # Get all watchlist artists conn = database._get_connection() cursor = conn.cursor() cursor.execute(""" SELECT id, artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id, image_url FROM watchlist_artists WHERE profile_id = ? """, (profile_id,)) watchlist_rows = cursor.fetchall() nodes = [] # {id, name, image_url, type: 'watchlist'|'similar', genres, size} edges = [] # {source, target, weight} seen_names = {} # normalized_name → node index def _norm(name): return (name or '').lower().strip() # Add watchlist artists as anchor nodes for wa in watchlist_rows: w = dict(wa) norm = _norm(w['artist_name']) if norm in seen_names: continue idx = len(nodes) seen_names[norm] = idx # Get image — prefer HTTP URLs img = w.get('image_url', '') or '' if img and not img.startswith('http'): img = '' nodes.append({ 'id': idx, 'name': w['artist_name'], 'image_url': img, 'type': 'watchlist', 'genres': [], 'spotify_id': w.get('spotify_artist_id') or '', 'itunes_id': w.get('itunes_artist_id') or '', 'deezer_id': w.get('deezer_artist_id') or '', 'discogs_id': w.get('discogs_artist_id') or '', 'source_db_id': str(w['id']), }) # Get all similar artists for all watchlist artists watchlist_ids = [dict(wa)['spotify_artist_id'] or dict(wa)['itunes_artist_id'] or str(dict(wa)['id']) for wa in watchlist_rows] if watchlist_ids: placeholders = ','.join(['?'] * len(watchlist_ids)) cursor.execute(f""" SELECT source_artist_id, similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, similarity_rank, occurrence_count, image_url, genres, popularity FROM similar_artists WHERE profile_id = ? AND source_artist_id IN ({placeholders}) ORDER BY similarity_rank ASC """, [profile_id] + watchlist_ids) for row in cursor.fetchall(): r = dict(row) sim_norm = _norm(r['similar_artist_name']) # Find or create similar artist node if sim_norm not in seen_names: idx = len(nodes) seen_names[sim_norm] = idx img = r.get('image_url', '') or '' if img and not img.startswith('http'): img = '' genres = [] if r.get('genres'): try: genres = json.loads(r['genres']) except Exception as e: logger.debug("similar node genres parse failed: %s", e) nodes.append({ 'id': idx, 'name': r['similar_artist_name'], 'image_url': img, 'type': 'similar', 'genres': genres, 'spotify_id': r.get('similar_artist_spotify_id') or '', 'itunes_id': r.get('similar_artist_itunes_id') or '', 'deezer_id': r.get('similar_artist_deezer_id') or '', 'musicbrainz_id': r.get('similar_artist_musicbrainz_id') or '', 'rank': r.get('similarity_rank', 5), 'occurrence': r.get('occurrence_count', 1), 'popularity': r.get('popularity', 0), }) sim_idx = seen_names[sim_norm] # Find the watchlist node that sourced this similar artist source_norm = None for wa in watchlist_rows: w = dict(wa) sid = w.get('spotify_artist_id') or w.get('itunes_artist_id') or str(w['id']) if sid == r['source_artist_id']: source_norm = _norm(w['artist_name']) break if source_norm and source_norm in seen_names: source_idx = seen_names[source_norm] # Weight: inverse of rank (rank 1 = strongest connection) weight = max(1, 11 - (r.get('similarity_rank', 5))) edges.append({ 'source': source_idx, 'target': sim_idx, 'weight': weight, }) # Also check if any similar artists ARE watchlist artists (cross-links) # These create extra connections between watchlist nodes for i, node in enumerate(nodes): if node['type'] == 'similar': # Check if this similar artist is also a watchlist artist for j, wnode in enumerate(nodes): if wnode['type'] == 'watchlist' and i != j: if _norm(node['name']) == _norm(wnode['name']): # Merge: upgrade the similar node to watchlist node['type'] = 'watchlist' break # ── Backfill from metadata cache: batch-lookup all node names across all sources ── # Single query to get ALL cached artist entries matching ANY node name try: all_names = list(set(_norm(n['name']) for n in nodes if n.get('name'))) if all_names: # Build case-insensitive IN clause via temp matching # Lightweight query — no raw_json (can be huge) cursor.execute(""" SELECT entity_id, source, name, image_url, genres, popularity FROM metadata_cache_entities WHERE entity_type = 'artist' """) cache_rows = cursor.fetchall() # Index cache by normalized name → {source: {id, image_url, genres}} cache_by_name = {} for cr in cache_rows: cn = _norm(cr['name'] or '') if cn not in cache_by_name: cache_by_name[cn] = {} source = cr['source'] genres = [] if cr['genres']: try: genres = json.loads(cr['genres']) if isinstance(cr['genres'], str) else [] except Exception as e: logger.debug("backfill cache genres parse failed: %s", e) cache_by_name[cn][source] = { 'id': cr['entity_id'], 'image_url': cr['image_url'] or '', 'genres': genres, } # Apply cache data to nodes source_id_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} for n in nodes: nn = _norm(n['name']) cached = cache_by_name.get(nn) if not cached: continue for source, field in source_id_map.items(): if not n.get(field) and source in cached: n[field] = cached[source]['id'] # Backfill image if missing or local path if not n.get('image_url') or not n['image_url'].startswith('http'): for source in ('spotify', 'deezer', 'itunes'): if source in cached and cached[source].get('image_url', '').startswith('http'): n['image_url'] = cached[source]['image_url'] break # Backfill genres if missing if not n.get('genres') or len(n.get('genres', [])) == 0: for source in ('spotify', 'deezer', 'itunes', 'discogs', 'musicbrainz'): if source in cached and cached[source].get('genres'): n['genres'] = cached[source]['genres'][:5] break # Deezer direct URL fallback for n in nodes: if not n.get('image_url') or not n['image_url'].startswith('http'): if n.get('deezer_id'): n['image_url'] = f"https://api.deezer.com/artist/{n['deezer_id']}/image?size=big" # Album art fallback (iTunes artists have no artist images) _album_art = {} try: cursor.execute(""" SELECT artist_name, image_url FROM metadata_cache_entities WHERE entity_type = 'album' AND image_url LIKE 'http%' AND artist_name IS NOT NULL AND artist_name != '' """) for r in cursor.fetchall(): an = _norm(r['artist_name']) if an and an not in _album_art: _album_art[an] = r['image_url'] except Exception as e: logger.debug("artist map album-art cache build failed: %s", e) for n in nodes: if not n.get('image_url') or not n['image_url'].startswith('http'): nn = _norm(n['name']) if nn in _album_art: n['image_url'] = _album_art[nn] except Exception as cache_err: logger.debug(f"Artist map cache backfill error: {cache_err}") result = { 'success': True, 'nodes': nodes, 'edges': edges, 'watchlist_count': sum(1 for n in nodes if n['type'] == 'watchlist'), 'similar_count': sum(1 for n in nodes if n['type'] == 'similar'), } _artmap_cache_set(f'watchlist_{profile_id}', result) return jsonify(result) except Exception as e: logger.error(f"Error getting artist map data: {e}") import traceback traceback.print_exc() return jsonify({"success": False, "error": str(e)}), 500 def get_artist_map_genre_list(): """Lightweight endpoint — just genre names + counts for the picker. No node data.""" try: cached = _artmap_cache_get('genre_list') if cached: return jsonify(cached) database = get_database() conn = database._get_connection() cursor = conn.cursor() # Fast query: just count artists per genre from cache genre_counts = {} cursor.execute(""" SELECT genres FROM metadata_cache_entities WHERE entity_type = 'artist' AND genres IS NOT NULL AND genres != '' AND genres != '[]' """) for r in cursor.fetchall(): try: for g in json.loads(r['genres']): if g and isinstance(g, str): gl = g.lower().strip() genre_counts[gl] = genre_counts.get(gl, 0) + 1 except Exception as e: logger.debug("genre count row parse failed: %s", e) # Sort by count descending sorted_genres = sorted(genre_counts.items(), key=lambda x: -x[1]) result = { 'success': True, 'genres': [{'name': g, 'count': c} for g, c in sorted_genres], 'total': len(sorted_genres) } _artmap_cache_set('genre_list', result) return jsonify(result) except Exception as e: return jsonify({"success": False, "error": str(e)}), 500 def get_artist_map_genres(): """Get ALL artists from every data source, grouped by genre for the genre map.""" try: database = get_database() profile_id = get_current_profile_id() cached = _artmap_cache_get(f'genres_{profile_id}') if cached: return jsonify(cached) conn = database._get_connection() cursor = conn.cursor() artists_by_name = {} # normalized_name → {name, image, genres[], sources, ids} def _norm(n): return (n or '').lower().strip() def _add(name, image_url=None, genres=None, spotify_id=None, itunes_id=None, deezer_id=None, discogs_id=None, musicbrainz_id=None, source='unknown', popularity=0): n = _norm(name) if not n or len(n) < 2: return if n not in artists_by_name: artists_by_name[n] = { 'name': name, 'image_url': '', 'genres': set(), 'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': '', 'musicbrainz_id': '', 'sources': set(), 'popularity': 0 } a = artists_by_name[n] if image_url and image_url.startswith('http') and not a['image_url']: a['image_url'] = image_url if genres: for g in (genres if isinstance(genres, list) else []): if g and isinstance(g, str): a['genres'].add(g.lower().strip()) if spotify_id and not a['spotify_id']: a['spotify_id'] = str(spotify_id) if itunes_id and not a['itunes_id']: a['itunes_id'] = str(itunes_id) if deezer_id and not a['deezer_id']: a['deezer_id'] = str(deezer_id) if discogs_id and not a['discogs_id']: a['discogs_id'] = str(discogs_id) if musicbrainz_id and not a['musicbrainz_id']: a['musicbrainz_id'] = str(musicbrainz_id) if popularity > a['popularity']: a['popularity'] = popularity a['sources'].add(source) # 1. Metadata cache — biggest source cursor.execute(""" SELECT name, entity_id, source, image_url, genres, popularity FROM metadata_cache_entities WHERE entity_type = 'artist' """) for r in cursor.fetchall(): genres = [] if r['genres']: try: genres = json.loads(r['genres']) if isinstance(r['genres'], str) else [] except Exception as e: logger.debug("cache artist genres parse failed: %s", e) src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} kwargs = {src_map.get(r['source'], 'spotify_id'): r['entity_id']} _add(r['name'], image_url=r['image_url'], genres=genres, source='cache', popularity=r['popularity'] or 0, **kwargs) # 2. Similar artists cursor.execute(""" SELECT similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity FROM similar_artists WHERE profile_id = ? """, (profile_id,)) for r in cursor.fetchall(): genres = [] if r['genres']: try: genres = json.loads(r['genres']) if isinstance(r['genres'], str) else [] except Exception as e: logger.debug("similar artist genres parse failed: %s", e) _add(r['similar_artist_name'], image_url=r['image_url'], genres=genres, spotify_id=r['similar_artist_spotify_id'], itunes_id=r['similar_artist_itunes_id'], deezer_id=r['similar_artist_deezer_id'], musicbrainz_id=r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else None, source='similar', popularity=r['popularity'] or 0) # 3. Watchlist artists cursor.execute(""" SELECT artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id, image_url FROM watchlist_artists WHERE profile_id = ? """, (profile_id,)) for r in cursor.fetchall(): _add(r['artist_name'], image_url=r['image_url'], spotify_id=r['spotify_artist_id'], itunes_id=r['itunes_artist_id'], deezer_id=r['deezer_artist_id'], discogs_id=r['discogs_artist_id'], source='watchlist') # 4. Library artists cursor.execute("SELECT name, thumb_url, genres FROM artists") for r in cursor.fetchall(): genres = [] if r['genres']: try: genres = json.loads(r['genres']) if isinstance(r['genres'], str) else [] except Exception as e: logger.debug("library artist genres parse failed: %s", e) img = r['thumb_url'] if r['thumb_url'] and r['thumb_url'].startswith('http') else None _add(r['name'], image_url=img, genres=genres, source='library') # Filter: only include artists that have at least one genre genre_artists = {k: v for k, v in artists_by_name.items() if v['genres']} # Build genre → artists map genre_map = {} # genre_name → [artist_keys] for key, a in genre_artists.items(): for g in a['genres']: if g not in genre_map: genre_map[g] = [] genre_map[g].append(key) # Sort genres by artist count, take top genres sorted_genres = sorted(genre_map.items(), key=lambda x: -len(x[1])) # Build nodes nodes = [] node_idx = {} for key, a in genre_artists.items(): idx = len(nodes) node_idx[key] = idx nodes.append({ 'id': idx, 'name': a['name'], 'image_url': a['image_url'], 'genres': list(a['genres'])[:5], 'spotify_id': a['spotify_id'], 'itunes_id': a['itunes_id'], 'deezer_id': a['deezer_id'], 'discogs_id': a['discogs_id'], 'musicbrainz_id': a['musicbrainz_id'], 'popularity': a['popularity'], 'type': 'watchlist' if 'watchlist' in a['sources'] else 'similar', }) # Build genre clusters — allow artists in multiple genres top_genres = sorted_genres[:40] # Sort genres by co-occurrence so related genres are adjacent in the list. # This makes the spiral layout place related genres near each other. if len(top_genres) > 2: genre_sets = {g: set(keys) for g, keys in top_genres} ordered = [top_genres[0][0]] # Start with biggest genre remaining = {g for g, _ in top_genres[1:]} while remaining: last = ordered[-1] last_set = genre_sets.get(last, set()) # Find most similar remaining genre (highest artist overlap) best = None best_overlap = -1 for g in remaining: overlap = len(last_set & genre_sets.get(g, set())) if overlap > best_overlap: best_overlap = overlap best = g ordered.append(best) remaining.remove(best) # Rebuild top_genres in the ordered sequence genre_dict = dict(top_genres) top_genres = [(g, genre_dict[g]) for g in ordered if g in genre_dict] genres_out = [] for genre, artist_keys in top_genres: genres_out.append({ 'name': genre, 'count': len(artist_keys), 'artist_ids': [node_idx[k] for k in artist_keys if k in node_idx], }) # Image cleanup + multi-source fallback # Build two lookups: name→image_url AND name→deezer_entity_id _img_cache = {} _deezer_id_cache = {} _album_art_cache = {} # artist_name → album image (iTunes fallback) try: # Artist images + Deezer IDs cursor.execute(""" SELECT name, entity_id, source, image_url FROM metadata_cache_entities WHERE entity_type = 'artist' AND ((image_url IS NOT NULL AND image_url != '' AND image_url LIKE 'http%') OR source = 'deezer') """) for r in cursor.fetchall(): nn = (r['name'] or '').lower().strip() if not nn: continue if r['image_url'] and r['image_url'].startswith('http') and nn not in _img_cache: _img_cache[nn] = r['image_url'] if r['source'] == 'deezer' and r['entity_id'] and nn not in _deezer_id_cache: _deezer_id_cache[nn] = r['entity_id'] # Album art by artist name (for iTunes artists with no artist image) cursor.execute(""" SELECT artist_name, image_url FROM metadata_cache_entities WHERE entity_type = 'album' AND image_url IS NOT NULL AND image_url != '' AND image_url LIKE 'http%' AND artist_name IS NOT NULL AND artist_name != '' """) for r in cursor.fetchall(): nn = (r['artist_name'] or '').lower().strip() if nn and nn not in _album_art_cache: _album_art_cache[nn] = r['image_url'] except Exception as e: logger.debug("genre map cache build failed: %s", e) for n in nodes: img = n.get('image_url', '') if img in ('None', 'null', '') or (img and not img.startswith('http')): n['image_url'] = '' nn = n['name'].lower().strip() if not n['image_url']: # Try cache image by name n['image_url'] = _img_cache.get(nn, '') if not n['image_url'] and n.get('deezer_id'): n['image_url'] = f"https://api.deezer.com/artist/{n['deezer_id']}/image?size=big" if not n['image_url']: # Try Deezer ID from cache by name did = _deezer_id_cache.get(nn) if did: n['deezer_id'] = did n['image_url'] = f"https://api.deezer.com/artist/{did}/image?size=big" if not n['image_url']: # Try album art by artist name (iTunes artists have no artist images) n['image_url'] = _album_art_cache.get(nn, '') _img_count = sum(1 for n in nodes if n.get('image_url')) _deezer_count = sum(1 for n in nodes if n.get('image_url', '').startswith('https://api.deezer')) _none_count = sum(1 for n in nodes if not n.get('image_url')) logger.info(f"[Genre Map] {len(nodes)} artists, {len(sorted_genres)} genres") logger.warning(f"[Genre Map] Images: {_img_count} have URLs, {_deezer_count} Deezer fallback, {_none_count} missing") if _none_count > 0: samples = [n['name'] for n in nodes if not n.get('image_url')][:5] logger.warning(f"[Genre Map] Missing image samples: {samples}") result = { 'success': True, 'nodes': nodes, 'genres': genres_out, 'total_artists': len(nodes), 'total_genres': len(sorted_genres), } _artmap_cache_set(f'genres_{profile_id}', result) return jsonify(result) except Exception as e: logger.error(f"Error getting genre map data: {e}") import traceback traceback.print_exc() return jsonify({"success": False, "error": str(e)}), 500 def get_artist_map_explore(): """Build an exploration map outward from a single artist.""" try: artist_name = request.args.get('name', '').strip() artist_id = request.args.get('id', '').strip() if not artist_name and not artist_id: return jsonify({"success": False, "error": "Provide artist name or id"}), 400 database = get_database() profile_id = get_current_profile_id() conn = database._get_connection() cursor = conn.cursor() def _norm(n): return (n or '').lower().strip() nodes = [] edges = [] seen = {} # norm_name → node index # Find the center artist center_name = artist_name center_image = '' center_ids = {'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': '', 'musicbrainz_id': ''} center_genres = [] # Search metadata cache for the center artist if artist_id: cursor.execute(""" SELECT name, entity_id, source, image_url, genres FROM metadata_cache_entities WHERE entity_type = 'artist' AND entity_id = ? LIMIT 1 """, (artist_id,)) else: cursor.execute(""" SELECT name, entity_id, source, image_url, genres FROM metadata_cache_entities WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE LIMIT 1 """, (artist_name,)) row = cursor.fetchone() artist_found = False if row: artist_found = True center_name = row['name'] if row['image_url'] and row['image_url'].startswith('http'): center_image = row['image_url'] src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(row['source'], 'spotify_id') center_ids[k] = row['entity_id'] if row['genres']: try: center_genres = json.loads(row['genres']) if isinstance(row['genres'], str) else [] except Exception as e: logger.debug("initial center genres parse failed: %s", e) # Check watchlist + library if not in cache if not artist_found and not artist_id: cursor.execute("SELECT artist_name, image_url, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id, musicbrainz_artist_id FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE LIMIT 1", (artist_name,)) wr = cursor.fetchone() if wr: artist_found = True center_name = wr['artist_name'] if wr['image_url'] and str(wr['image_url']).startswith('http'): center_image = wr['image_url'] for k, col in [('spotify_id', 'spotify_artist_id'), ('itunes_id', 'itunes_artist_id'), ('deezer_id', 'deezer_artist_id'), ('discogs_id', 'discogs_artist_id'), ('musicbrainz_id', 'musicbrainz_artist_id')]: if wr[col]: center_ids[k] = str(wr[col]) else: cursor.execute("SELECT name, thumb_url FROM artists WHERE name = ? COLLATE NOCASE LIMIT 1", (artist_name,)) lr = cursor.fetchone() if lr: artist_found = True center_name = lr['name'] if lr['thumb_url'] and str(lr['thumb_url']).startswith('http'): center_image = lr['thumb_url'] # If not found locally, validate via metadata API search if not artist_found and not artist_id: try: api_match = None if spotify_client and spotify_client.is_spotify_authenticated(): results = spotify_client.search_artists(artist_name, limit=1) if results and len(results) > 0: sa = results[0] if sa.name.lower().strip() == artist_name.lower().strip() or \ artist_name.lower().strip() in sa.name.lower().strip(): api_match = sa center_name = sa.name center_ids['spotify_id'] = sa.id center_image = sa.image_url if hasattr(sa, 'image_url') else '' center_genres = sa.genres if hasattr(sa, 'genres') else [] artist_found = True if not artist_found: ic = _get_itunes_client() results = ic.search_artists(artist_name, limit=1) if results and len(results) > 0: ia = results[0] if ia.name.lower().strip() == artist_name.lower().strip() or \ artist_name.lower().strip() in ia.name.lower().strip(): center_name = ia.name center_ids['itunes_id'] = str(ia.id) center_image = ia.image_url if hasattr(ia, 'image_url') else '' artist_found = True except Exception as e: logger.debug(f"[Artist Explorer] API validation failed for '{artist_name}': {e}") if not artist_found: return jsonify({"success": False, "error": f"Artist '{artist_name}' not found"}), 404 # Also check cache for other source IDs cursor.execute(""" SELECT entity_id, source, image_url, genres FROM metadata_cache_entities WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE """, (center_name,)) for r in cursor.fetchall(): src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(r['source'], 'spotify_id') if not center_ids.get(k): center_ids[k] = r['entity_id'] if r['image_url'] and r['image_url'].startswith('http') and not center_image: center_image = r['image_url'] if r['genres'] and not center_genres: try: center_genres = json.loads(r['genres']) if isinstance(r['genres'], str) else [] except Exception as e: logger.debug("center genres parse failed: %s", e) # Add center node center_idx = 0 seen[_norm(center_name)] = center_idx nodes.append({ 'id': 0, 'name': center_name, 'image_url': center_image, 'type': 'center', 'genres': center_genres[:5], **center_ids, 'ring': 0 }) # Ring 1: Direct similar artists from similar_artists table # Search by all known IDs id_values = [v for v in center_ids.values() if v] ring1_artists = [] if id_values: placeholders = ','.join(['?'] * len(id_values)) cursor.execute(f""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? ORDER BY similarity_rank ASC """, id_values + [profile_id]) ring1_artists = cursor.fetchall() # Also search by name (the center artist might be a watchlist source) cursor.execute(""" SELECT DISTINCT sa.similar_artist_name, sa.similar_artist_spotify_id, sa.similar_artist_itunes_id, sa.similar_artist_deezer_id, sa.similar_artist_musicbrainz_id, sa.image_url, sa.genres, sa.popularity, sa.similarity_rank FROM similar_artists sa JOIN watchlist_artists wa ON sa.source_artist_id = COALESCE(wa.spotify_artist_id, wa.itunes_artist_id, CAST(wa.id AS TEXT)) WHERE wa.artist_name = ? COLLATE NOCASE AND sa.profile_id = ? ORDER BY sa.similarity_rank ASC """, (center_name, profile_id)) ring1_artists.extend(cursor.fetchall()) # If no similar artists in DB, fetch from MusicMap on-the-fly if not ring1_artists: try: logger.debug(f"[Artist Explorer] No stored similar artists for '{center_name}', fetching from MusicMap...") from core.watchlist_scanner import WatchlistScanner scanner = WatchlistScanner(spotify_client=spotify_client) if spotify_client else None if scanner: similar = scanner._fetch_similar_artists_from_musicmap(center_name, limit=15) if similar: source_artist_id = center_ids.get('spotify_id') or center_ids.get('itunes_id') or center_name # Store in DB for future use for rank, sa in enumerate(similar, 1): try: database.add_or_update_similar_artist( source_artist_id=source_artist_id, similar_artist_name=sa['name'], similar_artist_spotify_id=sa.get('spotify_id'), similar_artist_itunes_id=sa.get('itunes_id'), similarity_rank=rank, profile_id=profile_id, image_url=sa.get('image_url'), genres=sa.get('genres'), popularity=sa.get('popularity', 0), similar_artist_deezer_id=sa.get('deezer_id'), similar_artist_musicbrainz_id=sa.get('musicbrainz_id'), ) except Exception as e: logger.debug("similar artist insert failed: %s", e) # Re-query from DB to get consistent format if id_values: placeholders = ','.join(['?'] * len(id_values)) cursor.execute(f""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? ORDER BY similarity_rank ASC """, id_values + [profile_id]) ring1_artists = cursor.fetchall() if not ring1_artists: # Fallback: query by name-based source ID cursor.execute(""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id = ? AND profile_id = ? ORDER BY similarity_rank ASC """, (source_artist_id, profile_id)) ring1_artists = cursor.fetchall() logger.debug(f"[Artist Explorer] Fetched {len(ring1_artists)} similar artists from MusicMap for '{center_name}'") _artmap_cache_invalidate(profile_id) # New similar artists added except Exception as e: logger.debug(f"[Artist Explorer] MusicMap fetch failed for '{center_name}': {e}") # Deduplicate ring 1 for r in ring1_artists: nn = _norm(r['similar_artist_name']) if nn in seen: continue idx = len(nodes) seen[nn] = idx genres = [] if r['genres']: try: genres = json.loads(r['genres']) if isinstance(r['genres'], str) else [] except Exception as e: logger.debug("ring1 genres parse failed: %s", e) img = r['image_url'] if r['image_url'] and r['image_url'].startswith('http') else '' nodes.append({ 'id': idx, 'name': r['similar_artist_name'], 'image_url': img, 'type': 'ring1', 'genres': genres[:5], 'spotify_id': r['similar_artist_spotify_id'] or '', 'itunes_id': r['similar_artist_itunes_id'] or '', 'deezer_id': r['similar_artist_deezer_id'] or '', 'musicbrainz_id': r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else '', 'discogs_id': '', 'popularity': r['popularity'] or 0, 'rank': r['similarity_rank'] or 5, 'ring': 1, }) weight = max(1, 11 - (r['similarity_rank'] or 5)) edges.append({'source': center_idx, 'target': idx, 'weight': weight}) # Ring 2: Similar artists of ring 1 artists (from similar_artists table) ring1_ids = [] for n in nodes[1:]: # skip center for sid in [n.get('spotify_id'), n.get('itunes_id')]: if sid: ring1_ids.append(sid) if ring1_ids: placeholders = ','.join(['?'] * len(ring1_ids)) cursor.execute(f""" SELECT DISTINCT source_artist_id, similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? ORDER BY similarity_rank ASC """, ring1_ids + [profile_id]) for r in cursor.fetchall(): nn = _norm(r['similar_artist_name']) if nn in seen: # Create edge to existing node if not center existing_idx = seen[nn] # Find the ring1 node that sourced this source_norm = None for n in nodes[1:]: for sid in [n.get('spotify_id'), n.get('itunes_id')]: if sid == r['source_artist_id']: source_norm = _norm(n['name']) break if source_norm: break if source_norm and source_norm in seen and existing_idx != seen[source_norm]: edges.append({'source': seen[source_norm], 'target': existing_idx, 'weight': 3}) continue idx = len(nodes) if idx >= 500: # Cap at 500 nodes for performance break seen[nn] = idx genres = [] if r['genres']: try: genres = json.loads(r['genres']) if isinstance(r['genres'], str) else [] except Exception as e: logger.debug("ring2 genres parse failed: %s", e) img = r['image_url'] if r['image_url'] and r['image_url'].startswith('http') else '' nodes.append({ 'id': idx, 'name': r['similar_artist_name'], 'image_url': img, 'type': 'ring2', 'genres': genres[:5], 'spotify_id': r['similar_artist_spotify_id'] or '', 'itunes_id': r['similar_artist_itunes_id'] or '', 'deezer_id': r['similar_artist_deezer_id'] or '', 'musicbrainz_id': r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else '', 'discogs_id': '', 'popularity': r['popularity'] or 0, 'rank': r['similarity_rank'] or 5, 'ring': 2, }) # Find the ring1 source for n in nodes[1:]: for sid in [n.get('spotify_id'), n.get('itunes_id')]: if sid == r['source_artist_id']: edges.append({'source': n['id'], 'target': idx, 'weight': max(1, 11 - (r['similarity_rank'] or 5))}) break # Backfill images/genres from ALL cache sources + Deezer fallback for n in nodes: # Clean up string "None" stored as image URL if n['image_url'] in ('None', 'null', ''): n['image_url'] = '' if n['image_url'] and n['genres']: continue # Check all cache entries for this artist (multiple sources) cursor.execute(""" SELECT entity_id, source, image_url, genres FROM metadata_cache_entities WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE """, (n['name'],)) for cr in cursor.fetchall(): if not n['image_url'] and cr['image_url'] and cr['image_url'].startswith('http'): n['image_url'] = cr['image_url'] if not n['genres'] and cr['genres']: try: n['genres'] = json.loads(cr['genres'])[:5] if isinstance(cr['genres'], str) else [] except Exception as e: logger.debug("explorer node genres parse failed: %s", e) # Harvest missing IDs from cache src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(cr['source']) if k and not n.get(k): n[k] = cr['entity_id'] # Deezer image fallback — construct URL directly from ID if not n['image_url'] and n.get('deezer_id'): n['image_url'] = f"https://api.deezer.com/artist/{n['deezer_id']}/image?size=big" # Spotify image fallback — try API if authenticated if not n['image_url'] and n.get('spotify_id'): try: if spotify_client and spotify_client.is_spotify_authenticated(): from core.api_call_tracker import api_call_tracker api_call_tracker.record_call('spotify', endpoint='artist') artist_data = spotify_client.sp.artist(n['spotify_id']) if artist_data and artist_data.get('images'): n['image_url'] = artist_data['images'][0]['url'] if not n['genres'] and artist_data.get('genres'): n['genres'] = artist_data['genres'][:5] except Exception as e: logger.debug("spotify artist image fallback failed: %s", e) # Album art fallback (iTunes artists have no artist images) if not n['image_url']: cursor.execute(""" SELECT image_url FROM metadata_cache_entities WHERE entity_type = 'album' AND image_url LIKE 'http%' AND artist_name = ? COLLATE NOCASE LIMIT 1 """, (n['name'],)) alb = cursor.fetchone() if alb: n['image_url'] = alb['image_url'] logger.info(f"[Artist Explorer] Center: {center_name}, Ring 1: {sum(1 for n in nodes if n.get('ring')==1)}, Ring 2: {sum(1 for n in nodes if n.get('ring')==2)}, Edges: {len(edges)}") return jsonify({ 'success': True, 'nodes': nodes, 'edges': edges, 'center': center_name, }) except Exception as e: logger.error(f"Error getting artist explorer data: {e}") import traceback traceback.print_exc() return jsonify({"success": False, "error": str(e)}), 500