You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/core/artists/map.py

981 lines
45 KiB

"""Artist Map endpoints — lifted from web_server.py.
The four route bodies (``get_artist_map_data``, ``get_artist_map_genre_list``,
``get_artist_map_genres``, ``get_artist_map_explore``) plus their cache helpers
and the artist-map cache are byte-identical to the originals. Module-level
shims for ``get_current_profile_id``, ``_get_itunes_client``, and the
``spotify_client`` proxy let the bodies resolve their original names without
modification.
"""
import json
import logging
import time
from flask import g, jsonify, request
from database.music_database import get_database
from core.metadata.registry import get_itunes_client, get_spotify_client
logger = logging.getLogger(__name__)
def get_current_profile_id() -> int:
"""Mirror of web_server.get_current_profile_id — uses Flask g."""
try:
return g.profile_id
except AttributeError:
return 1
def _get_itunes_client():
"""Mirror of web_server._get_itunes_client — delegates to registry."""
return get_itunes_client()
class _SpotifyClientProxy:
"""Resolves the global Spotify client lazily so a Spotify re-auth that
rebinds the cached client in core.metadata.registry is visible to the
lifted route bodies."""
def __getattr__(self, name):
client = get_spotify_client()
if client is None:
raise AttributeError(name)
return getattr(client, name)
def __bool__(self):
return get_spotify_client() is not None
spotify_client = _SpotifyClientProxy()
# Artist Map data cache — avoids re-querying 4+ tables on every request
# Keys: 'watchlist_{profile}', 'genres_{profile}', 'genre_list'
# Values: {'data': <json-ready dict>, 'ts': <timestamp>}
_artist_map_cache = {}
_ARTIST_MAP_CACHE_TTL = 300 # 5 minutes
def _artmap_cache_get(key):
"""Get cached artist map data if still fresh."""
entry = _artist_map_cache.get(key)
if entry and (time.time() - entry['ts']) < _ARTIST_MAP_CACHE_TTL:
return entry['data']
return None
def _artmap_cache_set(key, data):
"""Store artist map data in cache."""
_artist_map_cache[key] = {'data': data, 'ts': time.time()}
def _artmap_cache_invalidate(profile_id=None):
"""Invalidate artist map cache (call after watchlist changes, scans, etc.)."""
if profile_id:
_artist_map_cache.pop(f'watchlist_{profile_id}', None)
_artist_map_cache.pop(f'genres_{profile_id}', None)
_artist_map_cache.pop('genre_list', None)
def get_artist_map_data():
"""Get watchlist artists + their similar artists for the force-directed artist map."""
try:
database = get_database()
profile_id = get_current_profile_id()
cached = _artmap_cache_get(f'watchlist_{profile_id}')
if cached:
return jsonify(cached)
# Get all watchlist artists
conn = database._get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT id, artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id,
discogs_artist_id, image_url
FROM watchlist_artists WHERE profile_id = ?
""", (profile_id,))
watchlist_rows = cursor.fetchall()
nodes = [] # {id, name, image_url, type: 'watchlist'|'similar', genres, size}
edges = [] # {source, target, weight}
seen_names = {} # normalized_name → node index
def _norm(name):
return (name or '').lower().strip()
# Add watchlist artists as anchor nodes
for wa in watchlist_rows:
w = dict(wa)
norm = _norm(w['artist_name'])
if norm in seen_names:
continue
idx = len(nodes)
seen_names[norm] = idx
# Get image — prefer HTTP URLs
img = w.get('image_url', '') or ''
if img and not img.startswith('http'):
img = ''
nodes.append({
'id': idx,
'name': w['artist_name'],
'image_url': img,
'type': 'watchlist',
'genres': [],
'spotify_id': w.get('spotify_artist_id') or '',
'itunes_id': w.get('itunes_artist_id') or '',
'deezer_id': w.get('deezer_artist_id') or '',
'discogs_id': w.get('discogs_artist_id') or '',
'source_db_id': str(w['id']),
})
# Get all similar artists for all watchlist artists
watchlist_ids = [dict(wa)['spotify_artist_id'] or dict(wa)['itunes_artist_id'] or str(dict(wa)['id']) for wa in watchlist_rows]
if watchlist_ids:
placeholders = ','.join(['?'] * len(watchlist_ids))
cursor.execute(f"""
SELECT source_artist_id, similar_artist_name, similar_artist_spotify_id,
similar_artist_itunes_id, similar_artist_deezer_id,
similarity_rank, occurrence_count, image_url, genres, popularity
FROM similar_artists
WHERE profile_id = ? AND source_artist_id IN ({placeholders})
ORDER BY similarity_rank ASC
""", [profile_id] + watchlist_ids)
for row in cursor.fetchall():
r = dict(row)
sim_norm = _norm(r['similar_artist_name'])
# Find or create similar artist node
if sim_norm not in seen_names:
idx = len(nodes)
seen_names[sim_norm] = idx
img = r.get('image_url', '') or ''
if img and not img.startswith('http'):
img = ''
genres = []
if r.get('genres'):
try:
genres = json.loads(r['genres'])
except Exception as e:
logger.debug("similar node genres parse failed: %s", e)
nodes.append({
'id': idx,
'name': r['similar_artist_name'],
'image_url': img,
'type': 'similar',
'genres': genres,
'spotify_id': r.get('similar_artist_spotify_id') or '',
'itunes_id': r.get('similar_artist_itunes_id') or '',
'deezer_id': r.get('similar_artist_deezer_id') or '',
'rank': r.get('similarity_rank', 5),
'occurrence': r.get('occurrence_count', 1),
'popularity': r.get('popularity', 0),
})
sim_idx = seen_names[sim_norm]
# Find the watchlist node that sourced this similar artist
source_norm = None
for wa in watchlist_rows:
w = dict(wa)
sid = w.get('spotify_artist_id') or w.get('itunes_artist_id') or str(w['id'])
if sid == r['source_artist_id']:
source_norm = _norm(w['artist_name'])
break
if source_norm and source_norm in seen_names:
source_idx = seen_names[source_norm]
# Weight: inverse of rank (rank 1 = strongest connection)
weight = max(1, 11 - (r.get('similarity_rank', 5)))
edges.append({
'source': source_idx,
'target': sim_idx,
'weight': weight,
})
# Also check if any similar artists ARE watchlist artists (cross-links)
# These create extra connections between watchlist nodes
for i, node in enumerate(nodes):
if node['type'] == 'similar':
# Check if this similar artist is also a watchlist artist
for j, wnode in enumerate(nodes):
if wnode['type'] == 'watchlist' and i != j:
if _norm(node['name']) == _norm(wnode['name']):
# Merge: upgrade the similar node to watchlist
node['type'] = 'watchlist'
break
# ── Backfill from metadata cache: batch-lookup all node names across all sources ──
# Single query to get ALL cached artist entries matching ANY node name
try:
all_names = list(set(_norm(n['name']) for n in nodes if n.get('name')))
if all_names:
# Build case-insensitive IN clause via temp matching
# Lightweight query — no raw_json (can be huge)
cursor.execute("""
SELECT entity_id, source, name, image_url, genres, popularity
FROM metadata_cache_entities
WHERE entity_type = 'artist'
""")
cache_rows = cursor.fetchall()
# Index cache by normalized name → {source: {id, image_url, genres}}
cache_by_name = {}
for cr in cache_rows:
cn = _norm(cr['name'] or '')
if cn not in cache_by_name:
cache_by_name[cn] = {}
source = cr['source']
genres = []
if cr['genres']:
try:
genres = json.loads(cr['genres']) if isinstance(cr['genres'], str) else []
except Exception as e:
logger.debug("backfill cache genres parse failed: %s", e)
cache_by_name[cn][source] = {
'id': cr['entity_id'],
'image_url': cr['image_url'] or '',
'genres': genres,
}
# Apply cache data to nodes
source_id_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'}
for n in nodes:
nn = _norm(n['name'])
cached = cache_by_name.get(nn)
if not cached:
continue
for source, field in source_id_map.items():
if not n.get(field) and source in cached:
n[field] = cached[source]['id']
# Backfill image if missing or local path
if not n.get('image_url') or not n['image_url'].startswith('http'):
for source in ('spotify', 'deezer', 'itunes'):
if source in cached and cached[source].get('image_url', '').startswith('http'):
n['image_url'] = cached[source]['image_url']
break
# Backfill genres if missing
if not n.get('genres') or len(n.get('genres', [])) == 0:
for source in ('spotify', 'deezer', 'itunes', 'discogs'):
if source in cached and cached[source].get('genres'):
n['genres'] = cached[source]['genres'][:5]
break
# Deezer direct URL fallback
for n in nodes:
if not n.get('image_url') or not n['image_url'].startswith('http'):
if n.get('deezer_id'):
n['image_url'] = f"https://api.deezer.com/artist/{n['deezer_id']}/image?size=big"
# Album art fallback (iTunes artists have no artist images)
_album_art = {}
try:
cursor.execute("""
SELECT artist_name, image_url FROM metadata_cache_entities
WHERE entity_type = 'album' AND image_url LIKE 'http%'
AND artist_name IS NOT NULL AND artist_name != ''
""")
for r in cursor.fetchall():
an = _norm(r['artist_name'])
if an and an not in _album_art:
_album_art[an] = r['image_url']
except Exception as e:
logger.debug("artist map album-art cache build failed: %s", e)
for n in nodes:
if not n.get('image_url') or not n['image_url'].startswith('http'):
nn = _norm(n['name'])
if nn in _album_art:
n['image_url'] = _album_art[nn]
except Exception as cache_err:
logger.debug(f"Artist map cache backfill error: {cache_err}")
result = {
'success': True,
'nodes': nodes,
'edges': edges,
'watchlist_count': sum(1 for n in nodes if n['type'] == 'watchlist'),
'similar_count': sum(1 for n in nodes if n['type'] == 'similar'),
}
_artmap_cache_set(f'watchlist_{profile_id}', result)
return jsonify(result)
except Exception as e:
logger.error(f"Error getting artist map data: {e}")
import traceback
traceback.print_exc()
return jsonify({"success": False, "error": str(e)}), 500
def get_artist_map_genre_list():
"""Lightweight endpoint — just genre names + counts for the picker. No node data."""
try:
cached = _artmap_cache_get('genre_list')
if cached:
return jsonify(cached)
database = get_database()
conn = database._get_connection()
cursor = conn.cursor()
# Fast query: just count artists per genre from cache
genre_counts = {}
cursor.execute("""
SELECT genres FROM metadata_cache_entities
WHERE entity_type = 'artist' AND genres IS NOT NULL AND genres != '' AND genres != '[]'
""")
for r in cursor.fetchall():
try:
for g in json.loads(r['genres']):
if g and isinstance(g, str):
gl = g.lower().strip()
genre_counts[gl] = genre_counts.get(gl, 0) + 1
except Exception as e:
logger.debug("genre count row parse failed: %s", e)
# Sort by count descending
sorted_genres = sorted(genre_counts.items(), key=lambda x: -x[1])
result = {
'success': True,
'genres': [{'name': g, 'count': c} for g, c in sorted_genres],
'total': len(sorted_genres)
}
_artmap_cache_set('genre_list', result)
return jsonify(result)
except Exception as e:
return jsonify({"success": False, "error": str(e)}), 500
def get_artist_map_genres():
"""Get ALL artists from every data source, grouped by genre for the genre map."""
try:
database = get_database()
profile_id = get_current_profile_id()
cached = _artmap_cache_get(f'genres_{profile_id}')
if cached:
return jsonify(cached)
conn = database._get_connection()
cursor = conn.cursor()
artists_by_name = {} # normalized_name → {name, image, genres[], sources, ids}
def _norm(n):
return (n or '').lower().strip()
def _add(name, image_url=None, genres=None, spotify_id=None, itunes_id=None, deezer_id=None, discogs_id=None, source='unknown', popularity=0):
n = _norm(name)
if not n or len(n) < 2:
return
if n not in artists_by_name:
artists_by_name[n] = {
'name': name, 'image_url': '', 'genres': set(),
'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': '',
'sources': set(), 'popularity': 0
}
a = artists_by_name[n]
if image_url and image_url.startswith('http') and not a['image_url']:
a['image_url'] = image_url
if genres:
for g in (genres if isinstance(genres, list) else []):
if g and isinstance(g, str):
a['genres'].add(g.lower().strip())
if spotify_id and not a['spotify_id']:
a['spotify_id'] = str(spotify_id)
if itunes_id and not a['itunes_id']:
a['itunes_id'] = str(itunes_id)
if deezer_id and not a['deezer_id']:
a['deezer_id'] = str(deezer_id)
if discogs_id and not a['discogs_id']:
a['discogs_id'] = str(discogs_id)
if popularity > a['popularity']:
a['popularity'] = popularity
a['sources'].add(source)
# 1. Metadata cache — biggest source
cursor.execute("""
SELECT name, entity_id, source, image_url, genres, popularity
FROM metadata_cache_entities WHERE entity_type = 'artist'
""")
for r in cursor.fetchall():
genres = []
if r['genres']:
try:
genres = json.loads(r['genres']) if isinstance(r['genres'], str) else []
except Exception as e:
logger.debug("cache artist genres parse failed: %s", e)
src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'}
kwargs = {src_map.get(r['source'], 'spotify_id'): r['entity_id']}
_add(r['name'], image_url=r['image_url'], genres=genres, source='cache', popularity=r['popularity'] or 0, **kwargs)
# 2. Similar artists
cursor.execute("""
SELECT similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id,
similar_artist_deezer_id, image_url, genres, popularity
FROM similar_artists WHERE profile_id = ?
""", (profile_id,))
for r in cursor.fetchall():
genres = []
if r['genres']:
try:
genres = json.loads(r['genres']) if isinstance(r['genres'], str) else []
except Exception as e:
logger.debug("similar artist genres parse failed: %s", e)
_add(r['similar_artist_name'], image_url=r['image_url'], genres=genres,
spotify_id=r['similar_artist_spotify_id'], itunes_id=r['similar_artist_itunes_id'],
deezer_id=r['similar_artist_deezer_id'], source='similar', popularity=r['popularity'] or 0)
# 3. Watchlist artists
cursor.execute("""
SELECT artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id,
discogs_artist_id, image_url
FROM watchlist_artists WHERE profile_id = ?
""", (profile_id,))
for r in cursor.fetchall():
_add(r['artist_name'], image_url=r['image_url'],
spotify_id=r['spotify_artist_id'], itunes_id=r['itunes_artist_id'],
deezer_id=r['deezer_artist_id'], discogs_id=r['discogs_artist_id'], source='watchlist')
# 4. Library artists
cursor.execute("SELECT name, thumb_url, genres FROM artists")
for r in cursor.fetchall():
genres = []
if r['genres']:
try:
genres = json.loads(r['genres']) if isinstance(r['genres'], str) else []
except Exception as e:
logger.debug("library artist genres parse failed: %s", e)
img = r['thumb_url'] if r['thumb_url'] and r['thumb_url'].startswith('http') else None
_add(r['name'], image_url=img, genres=genres, source='library')
# Filter: only include artists that have at least one genre
genre_artists = {k: v for k, v in artists_by_name.items() if v['genres']}
# Build genre → artists map
genre_map = {} # genre_name → [artist_keys]
for key, a in genre_artists.items():
for g in a['genres']:
if g not in genre_map:
genre_map[g] = []
genre_map[g].append(key)
# Sort genres by artist count, take top genres
sorted_genres = sorted(genre_map.items(), key=lambda x: -len(x[1]))
# Build nodes
nodes = []
node_idx = {}
for key, a in genre_artists.items():
idx = len(nodes)
node_idx[key] = idx
nodes.append({
'id': idx,
'name': a['name'],
'image_url': a['image_url'],
'genres': list(a['genres'])[:5],
'spotify_id': a['spotify_id'],
'itunes_id': a['itunes_id'],
'deezer_id': a['deezer_id'],
'discogs_id': a['discogs_id'],
'popularity': a['popularity'],
'type': 'watchlist' if 'watchlist' in a['sources'] else 'similar',
})
# Build genre clusters — allow artists in multiple genres
top_genres = sorted_genres[:40]
# Sort genres by co-occurrence so related genres are adjacent in the list.
# This makes the spiral layout place related genres near each other.
if len(top_genres) > 2:
genre_sets = {g: set(keys) for g, keys in top_genres}
ordered = [top_genres[0][0]] # Start with biggest genre
remaining = {g for g, _ in top_genres[1:]}
while remaining:
last = ordered[-1]
last_set = genre_sets.get(last, set())
# Find most similar remaining genre (highest artist overlap)
best = None
best_overlap = -1
for g in remaining:
overlap = len(last_set & genre_sets.get(g, set()))
if overlap > best_overlap:
best_overlap = overlap
best = g
ordered.append(best)
remaining.remove(best)
# Rebuild top_genres in the ordered sequence
genre_dict = dict(top_genres)
top_genres = [(g, genre_dict[g]) for g in ordered if g in genre_dict]
genres_out = []
for genre, artist_keys in top_genres:
genres_out.append({
'name': genre,
'count': len(artist_keys),
'artist_ids': [node_idx[k] for k in artist_keys if k in node_idx],
})
# Image cleanup + multi-source fallback
# Build two lookups: name→image_url AND name→deezer_entity_id
_img_cache = {}
_deezer_id_cache = {}
_album_art_cache = {} # artist_name → album image (iTunes fallback)
try:
# Artist images + Deezer IDs
cursor.execute("""
SELECT name, entity_id, source, image_url FROM metadata_cache_entities
WHERE entity_type = 'artist'
AND ((image_url IS NOT NULL AND image_url != '' AND image_url LIKE 'http%')
OR source = 'deezer')
""")
for r in cursor.fetchall():
nn = (r['name'] or '').lower().strip()
if not nn:
continue
if r['image_url'] and r['image_url'].startswith('http') and nn not in _img_cache:
_img_cache[nn] = r['image_url']
if r['source'] == 'deezer' and r['entity_id'] and nn not in _deezer_id_cache:
_deezer_id_cache[nn] = r['entity_id']
# Album art by artist name (for iTunes artists with no artist image)
cursor.execute("""
SELECT artist_name, image_url FROM metadata_cache_entities
WHERE entity_type = 'album'
AND image_url IS NOT NULL AND image_url != '' AND image_url LIKE 'http%'
AND artist_name IS NOT NULL AND artist_name != ''
""")
for r in cursor.fetchall():
nn = (r['artist_name'] or '').lower().strip()
if nn and nn not in _album_art_cache:
_album_art_cache[nn] = r['image_url']
except Exception as e:
logger.debug("genre map cache build failed: %s", e)
for n in nodes:
img = n.get('image_url', '')
if img in ('None', 'null', '') or (img and not img.startswith('http')):
n['image_url'] = ''
nn = n['name'].lower().strip()
if not n['image_url']:
# Try cache image by name
n['image_url'] = _img_cache.get(nn, '')
if not n['image_url'] and n.get('deezer_id'):
n['image_url'] = f"https://api.deezer.com/artist/{n['deezer_id']}/image?size=big"
if not n['image_url']:
# Try Deezer ID from cache by name
did = _deezer_id_cache.get(nn)
if did:
n['deezer_id'] = did
n['image_url'] = f"https://api.deezer.com/artist/{did}/image?size=big"
if not n['image_url']:
# Try album art by artist name (iTunes artists have no artist images)
n['image_url'] = _album_art_cache.get(nn, '')
_img_count = sum(1 for n in nodes if n.get('image_url'))
_deezer_count = sum(1 for n in nodes if n.get('image_url', '').startswith('https://api.deezer'))
_none_count = sum(1 for n in nodes if not n.get('image_url'))
logger.info(f"[Genre Map] {len(nodes)} artists, {len(sorted_genres)} genres")
logger.warning(f"[Genre Map] Images: {_img_count} have URLs, {_deezer_count} Deezer fallback, {_none_count} missing")
if _none_count > 0:
samples = [n['name'] for n in nodes if not n.get('image_url')][:5]
logger.warning(f"[Genre Map] Missing image samples: {samples}")
result = {
'success': True,
'nodes': nodes,
'genres': genres_out,
'total_artists': len(nodes),
'total_genres': len(sorted_genres),
}
_artmap_cache_set(f'genres_{profile_id}', result)
return jsonify(result)
except Exception as e:
logger.error(f"Error getting genre map data: {e}")
import traceback
traceback.print_exc()
return jsonify({"success": False, "error": str(e)}), 500
def get_artist_map_explore():
"""Build an exploration map outward from a single artist."""
try:
artist_name = request.args.get('name', '').strip()
artist_id = request.args.get('id', '').strip()
if not artist_name and not artist_id:
return jsonify({"success": False, "error": "Provide artist name or id"}), 400
database = get_database()
profile_id = get_current_profile_id()
conn = database._get_connection()
cursor = conn.cursor()
def _norm(n):
return (n or '').lower().strip()
nodes = []
edges = []
seen = {} # norm_name → node index
# Find the center artist
center_name = artist_name
center_image = ''
center_ids = {'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': ''}
center_genres = []
# Search metadata cache for the center artist
if artist_id:
cursor.execute("""
SELECT name, entity_id, source, image_url, genres FROM metadata_cache_entities
WHERE entity_type = 'artist' AND entity_id = ? LIMIT 1
""", (artist_id,))
else:
cursor.execute("""
SELECT name, entity_id, source, image_url, genres FROM metadata_cache_entities
WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE LIMIT 1
""", (artist_name,))
row = cursor.fetchone()
artist_found = False
if row:
artist_found = True
center_name = row['name']
if row['image_url'] and row['image_url'].startswith('http'):
center_image = row['image_url']
src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'}
k = src_map.get(row['source'], 'spotify_id')
center_ids[k] = row['entity_id']
if row['genres']:
try:
center_genres = json.loads(row['genres']) if isinstance(row['genres'], str) else []
except Exception as e:
logger.debug("initial center genres parse failed: %s", e)
# Check watchlist + library if not in cache
if not artist_found and not artist_id:
cursor.execute("SELECT artist_name, image_url, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE LIMIT 1", (artist_name,))
wr = cursor.fetchone()
if wr:
artist_found = True
center_name = wr['artist_name']
if wr['image_url'] and str(wr['image_url']).startswith('http'):
center_image = wr['image_url']
for k, col in [('spotify_id', 'spotify_artist_id'), ('itunes_id', 'itunes_artist_id'), ('deezer_id', 'deezer_artist_id'), ('discogs_id', 'discogs_artist_id')]:
if wr[col]:
center_ids[k] = str(wr[col])
else:
cursor.execute("SELECT name, thumb_url FROM artists WHERE name = ? COLLATE NOCASE LIMIT 1", (artist_name,))
lr = cursor.fetchone()
if lr:
artist_found = True
center_name = lr['name']
if lr['thumb_url'] and str(lr['thumb_url']).startswith('http'):
center_image = lr['thumb_url']
# If not found locally, validate via metadata API search
if not artist_found and not artist_id:
try:
api_match = None
if spotify_client and spotify_client.is_spotify_authenticated():
results = spotify_client.search_artists(artist_name, limit=1)
if results and len(results) > 0:
sa = results[0]
if sa.name.lower().strip() == artist_name.lower().strip() or \
artist_name.lower().strip() in sa.name.lower().strip():
api_match = sa
center_name = sa.name
center_ids['spotify_id'] = sa.id
center_image = sa.image_url if hasattr(sa, 'image_url') else ''
center_genres = sa.genres if hasattr(sa, 'genres') else []
artist_found = True
if not artist_found:
ic = _get_itunes_client()
results = ic.search_artists(artist_name, limit=1)
if results and len(results) > 0:
ia = results[0]
if ia.name.lower().strip() == artist_name.lower().strip() or \
artist_name.lower().strip() in ia.name.lower().strip():
center_name = ia.name
center_ids['itunes_id'] = str(ia.id)
center_image = ia.image_url if hasattr(ia, 'image_url') else ''
artist_found = True
except Exception as e:
logger.debug(f"[Artist Explorer] API validation failed for '{artist_name}': {e}")
if not artist_found:
return jsonify({"success": False, "error": f"Artist '{artist_name}' not found"}), 404
# Also check cache for other source IDs
cursor.execute("""
SELECT entity_id, source, image_url, genres FROM metadata_cache_entities
WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE
""", (center_name,))
for r in cursor.fetchall():
src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'}
k = src_map.get(r['source'], 'spotify_id')
if not center_ids.get(k):
center_ids[k] = r['entity_id']
if r['image_url'] and r['image_url'].startswith('http') and not center_image:
center_image = r['image_url']
if r['genres'] and not center_genres:
try:
center_genres = json.loads(r['genres']) if isinstance(r['genres'], str) else []
except Exception as e:
logger.debug("center genres parse failed: %s", e)
# Add center node
center_idx = 0
seen[_norm(center_name)] = center_idx
nodes.append({
'id': 0, 'name': center_name, 'image_url': center_image,
'type': 'center', 'genres': center_genres[:5],
**center_ids, 'ring': 0
})
# Ring 1: Direct similar artists from similar_artists table
# Search by all known IDs
id_values = [v for v in center_ids.values() if v]
ring1_artists = []
if id_values:
placeholders = ','.join(['?'] * len(id_values))
cursor.execute(f"""
SELECT DISTINCT similar_artist_name, similar_artist_spotify_id,
similar_artist_itunes_id, similar_artist_deezer_id,
image_url, genres, popularity, similarity_rank
FROM similar_artists
WHERE source_artist_id IN ({placeholders}) AND profile_id = ?
ORDER BY similarity_rank ASC
""", id_values + [profile_id])
ring1_artists = cursor.fetchall()
# Also search by name (the center artist might be a watchlist source)
cursor.execute("""
SELECT DISTINCT sa.similar_artist_name, sa.similar_artist_spotify_id,
sa.similar_artist_itunes_id, sa.similar_artist_deezer_id,
sa.image_url, sa.genres, sa.popularity, sa.similarity_rank
FROM similar_artists sa
JOIN watchlist_artists wa ON sa.source_artist_id = COALESCE(wa.spotify_artist_id, wa.itunes_artist_id, CAST(wa.id AS TEXT))
WHERE wa.artist_name = ? COLLATE NOCASE AND sa.profile_id = ?
ORDER BY sa.similarity_rank ASC
""", (center_name, profile_id))
ring1_artists.extend(cursor.fetchall())
# If no similar artists in DB, fetch from MusicMap on-the-fly
if not ring1_artists:
try:
logger.debug(f"[Artist Explorer] No stored similar artists for '{center_name}', fetching from MusicMap...")
from core.watchlist_scanner import WatchlistScanner
scanner = WatchlistScanner(spotify_client=spotify_client) if spotify_client else None
if scanner:
similar = scanner._fetch_similar_artists_from_musicmap(center_name, limit=15)
if similar:
source_artist_id = center_ids.get('spotify_id') or center_ids.get('itunes_id') or center_name
# Store in DB for future use
for rank, sa in enumerate(similar, 1):
try:
database.add_or_update_similar_artist(
source_artist_id=source_artist_id,
similar_artist_name=sa['name'],
similar_artist_spotify_id=sa.get('spotify_id'),
similar_artist_itunes_id=sa.get('itunes_id'),
similarity_rank=rank,
profile_id=profile_id,
image_url=sa.get('image_url'),
genres=sa.get('genres'),
popularity=sa.get('popularity', 0),
similar_artist_deezer_id=sa.get('deezer_id')
)
except Exception as e:
logger.debug("similar artist insert failed: %s", e)
# Re-query from DB to get consistent format
if id_values:
placeholders = ','.join(['?'] * len(id_values))
cursor.execute(f"""
SELECT DISTINCT similar_artist_name, similar_artist_spotify_id,
similar_artist_itunes_id, similar_artist_deezer_id,
image_url, genres, popularity, similarity_rank
FROM similar_artists
WHERE source_artist_id IN ({placeholders}) AND profile_id = ?
ORDER BY similarity_rank ASC
""", id_values + [profile_id])
ring1_artists = cursor.fetchall()
if not ring1_artists:
# Fallback: query by name-based source ID
cursor.execute("""
SELECT DISTINCT similar_artist_name, similar_artist_spotify_id,
similar_artist_itunes_id, similar_artist_deezer_id,
image_url, genres, popularity, similarity_rank
FROM similar_artists
WHERE source_artist_id = ? AND profile_id = ?
ORDER BY similarity_rank ASC
""", (source_artist_id, profile_id))
ring1_artists = cursor.fetchall()
logger.debug(f"[Artist Explorer] Fetched {len(ring1_artists)} similar artists from MusicMap for '{center_name}'")
_artmap_cache_invalidate(profile_id) # New similar artists added
except Exception as e:
logger.debug(f"[Artist Explorer] MusicMap fetch failed for '{center_name}': {e}")
# Deduplicate ring 1
for r in ring1_artists:
nn = _norm(r['similar_artist_name'])
if nn in seen:
continue
idx = len(nodes)
seen[nn] = idx
genres = []
if r['genres']:
try:
genres = json.loads(r['genres']) if isinstance(r['genres'], str) else []
except Exception as e:
logger.debug("ring1 genres parse failed: %s", e)
img = r['image_url'] if r['image_url'] and r['image_url'].startswith('http') else ''
nodes.append({
'id': idx, 'name': r['similar_artist_name'], 'image_url': img,
'type': 'ring1', 'genres': genres[:5],
'spotify_id': r['similar_artist_spotify_id'] or '',
'itunes_id': r['similar_artist_itunes_id'] or '',
'deezer_id': r['similar_artist_deezer_id'] or '',
'discogs_id': '',
'popularity': r['popularity'] or 0,
'rank': r['similarity_rank'] or 5,
'ring': 1,
})
weight = max(1, 11 - (r['similarity_rank'] or 5))
edges.append({'source': center_idx, 'target': idx, 'weight': weight})
# Ring 2: Similar artists of ring 1 artists (from similar_artists table)
ring1_ids = []
for n in nodes[1:]: # skip center
for sid in [n.get('spotify_id'), n.get('itunes_id')]:
if sid:
ring1_ids.append(sid)
if ring1_ids:
placeholders = ','.join(['?'] * len(ring1_ids))
cursor.execute(f"""
SELECT DISTINCT source_artist_id, similar_artist_name,
similar_artist_spotify_id, similar_artist_itunes_id,
similar_artist_deezer_id, image_url, genres, popularity, similarity_rank
FROM similar_artists
WHERE source_artist_id IN ({placeholders}) AND profile_id = ?
ORDER BY similarity_rank ASC
""", ring1_ids + [profile_id])
for r in cursor.fetchall():
nn = _norm(r['similar_artist_name'])
if nn in seen:
# Create edge to existing node if not center
existing_idx = seen[nn]
# Find the ring1 node that sourced this
source_norm = None
for n in nodes[1:]:
for sid in [n.get('spotify_id'), n.get('itunes_id')]:
if sid == r['source_artist_id']:
source_norm = _norm(n['name'])
break
if source_norm:
break
if source_norm and source_norm in seen and existing_idx != seen[source_norm]:
edges.append({'source': seen[source_norm], 'target': existing_idx, 'weight': 3})
continue
idx = len(nodes)
if idx >= 500: # Cap at 500 nodes for performance
break
seen[nn] = idx
genres = []
if r['genres']:
try:
genres = json.loads(r['genres']) if isinstance(r['genres'], str) else []
except Exception as e:
logger.debug("ring2 genres parse failed: %s", e)
img = r['image_url'] if r['image_url'] and r['image_url'].startswith('http') else ''
nodes.append({
'id': idx, 'name': r['similar_artist_name'], 'image_url': img,
'type': 'ring2', 'genres': genres[:5],
'spotify_id': r['similar_artist_spotify_id'] or '',
'itunes_id': r['similar_artist_itunes_id'] or '',
'deezer_id': r['similar_artist_deezer_id'] or '',
'discogs_id': '',
'popularity': r['popularity'] or 0,
'rank': r['similarity_rank'] or 5,
'ring': 2,
})
# Find the ring1 source
for n in nodes[1:]:
for sid in [n.get('spotify_id'), n.get('itunes_id')]:
if sid == r['source_artist_id']:
edges.append({'source': n['id'], 'target': idx, 'weight': max(1, 11 - (r['similarity_rank'] or 5))})
break
# Backfill images/genres from ALL cache sources + Deezer fallback
for n in nodes:
# Clean up string "None" stored as image URL
if n['image_url'] in ('None', 'null', ''):
n['image_url'] = ''
if n['image_url'] and n['genres']:
continue
# Check all cache entries for this artist (multiple sources)
cursor.execute("""
SELECT entity_id, source, image_url, genres FROM metadata_cache_entities
WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE
""", (n['name'],))
for cr in cursor.fetchall():
if not n['image_url'] and cr['image_url'] and cr['image_url'].startswith('http'):
n['image_url'] = cr['image_url']
if not n['genres'] and cr['genres']:
try:
n['genres'] = json.loads(cr['genres'])[:5] if isinstance(cr['genres'], str) else []
except Exception as e:
logger.debug("explorer node genres parse failed: %s", e)
# Harvest missing IDs from cache
src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'}
k = src_map.get(cr['source'])
if k and not n.get(k):
n[k] = cr['entity_id']
# Deezer image fallback — construct URL directly from ID
if not n['image_url'] and n.get('deezer_id'):
n['image_url'] = f"https://api.deezer.com/artist/{n['deezer_id']}/image?size=big"
# Spotify image fallback — try API if authenticated
if not n['image_url'] and n.get('spotify_id'):
try:
if spotify_client and spotify_client.is_spotify_authenticated():
from core.api_call_tracker import api_call_tracker
api_call_tracker.record_call('spotify', endpoint='artist')
artist_data = spotify_client.sp.artist(n['spotify_id'])
if artist_data and artist_data.get('images'):
n['image_url'] = artist_data['images'][0]['url']
if not n['genres'] and artist_data.get('genres'):
n['genres'] = artist_data['genres'][:5]
except Exception as e:
logger.debug("spotify artist image fallback failed: %s", e)
# Album art fallback (iTunes artists have no artist images)
if not n['image_url']:
cursor.execute("""
SELECT image_url FROM metadata_cache_entities
WHERE entity_type = 'album' AND image_url LIKE 'http%'
AND artist_name = ? COLLATE NOCASE LIMIT 1
""", (n['name'],))
alb = cursor.fetchone()
if alb:
n['image_url'] = alb['image_url']
logger.info(f"[Artist Explorer] Center: {center_name}, Ring 1: {sum(1 for n in nodes if n.get('ring')==1)}, Ring 2: {sum(1 for n in nodes if n.get('ring')==2)}, Edges: {len(edges)}")
return jsonify({
'success': True,
'nodes': nodes,
'edges': edges,
'center': center_name,
})
except Exception as e:
logger.error(f"Error getting artist explorer data: {e}")
import traceback
traceback.print_exc()
return jsonify({"success": False, "error": str(e)}), 500