mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
325 lines
12 KiB
325 lines
12 KiB
"""Liked-artist multi-source matching — lifted from web_server.py.
|
|
|
|
Both function bodies are byte-identical to the originals. The
|
|
``spotify_client`` proxy + ``_get_*_client`` shims let the bodies resolve
|
|
their original names without any modification.
|
|
"""
|
|
import logging
|
|
import time
|
|
|
|
from config.settings import config_manager
|
|
from core.metadata.registry import (
|
|
get_deezer_client,
|
|
get_discogs_client,
|
|
get_itunes_client,
|
|
get_musicbrainz_client,
|
|
get_spotify_client,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _get_itunes_client():
|
|
"""Mirror of web_server._get_itunes_client — delegates to registry."""
|
|
return get_itunes_client()
|
|
|
|
|
|
def _get_deezer_client():
|
|
"""Mirror of web_server._get_deezer_client — delegates to registry."""
|
|
return get_deezer_client()
|
|
|
|
|
|
def _get_discogs_client(token=None):
|
|
"""Mirror of web_server._get_discogs_client — delegates to registry."""
|
|
return get_discogs_client(token)
|
|
|
|
|
|
def _get_musicbrainz_client():
|
|
"""Mirror of web_server._get_musicbrainz_client — delegates to registry."""
|
|
return get_musicbrainz_client()
|
|
|
|
|
|
class _SpotifyClientProxy:
|
|
"""Resolves the global Spotify client lazily so a Spotify re-auth that
|
|
rebinds the cached client in core.metadata.registry is visible to the
|
|
lifted bodies."""
|
|
|
|
def __getattr__(self, name):
|
|
client = get_spotify_client()
|
|
if client is None:
|
|
raise AttributeError(name)
|
|
return getattr(client, name)
|
|
|
|
def __bool__(self):
|
|
return get_spotify_client() is not None
|
|
|
|
|
|
spotify_client = _SpotifyClientProxy()
|
|
|
|
|
|
def _match_liked_artists_to_all_sources(database, profile_id: int):
|
|
"""Match pending liked artists to ALL metadata sources (Spotify, iTunes, Deezer, Discogs).
|
|
Uses the same matching pattern as the watchlist scanner: DB-first, then API search
|
|
with fuzzy name matching. Stores all resolved IDs so source switching works instantly."""
|
|
pending = database.get_liked_artists_pending_match(profile_id, limit=200)
|
|
if not pending:
|
|
return
|
|
|
|
# Source → column mapping
|
|
source_cols = {
|
|
'spotify': 'spotify_artist_id',
|
|
'itunes': 'itunes_artist_id',
|
|
'deezer': 'deezer_artist_id',
|
|
'discogs': 'discogs_artist_id',
|
|
'musicbrainz': 'musicbrainz_artist_id',
|
|
}
|
|
id_cols = list(source_cols.values())
|
|
|
|
# Reject known placeholder images and local server paths
|
|
_placeholder_hashes = {'2a96cbd8b46e442fc41c2b86b821562f'}
|
|
def _valid_image(url):
|
|
if not url or not url.strip():
|
|
return None
|
|
if any(ph in url for ph in _placeholder_hashes):
|
|
return None
|
|
# Reject local media server paths (Plex/Jellyfin) — not loadable in browser
|
|
if url.startswith('/') or url.startswith('\\'):
|
|
return None
|
|
if not url.startswith('http'):
|
|
return None
|
|
return url
|
|
|
|
# Build search clients for each source
|
|
from core.deezer_client import DeezerClient
|
|
search_clients = {}
|
|
if spotify_client and spotify_client.is_spotify_authenticated():
|
|
search_clients['spotify'] = spotify_client
|
|
try:
|
|
search_clients['itunes'] = _get_itunes_client()
|
|
except Exception as e:
|
|
logger.debug("itunes client init failed: %s", e)
|
|
try:
|
|
search_clients['deezer'] = _get_deezer_client()
|
|
except Exception as e:
|
|
logger.debug("deezer client init failed: %s", e)
|
|
try:
|
|
dc = _get_discogs_client()
|
|
# Only use Discogs if token is configured
|
|
from config.settings import config_manager as _cm
|
|
if _cm.get('discogs.token', ''):
|
|
search_clients['discogs'] = dc
|
|
except Exception as e:
|
|
logger.debug("discogs client init failed: %s", e)
|
|
try:
|
|
search_clients['musicbrainz'] = _get_musicbrainz_client()
|
|
except Exception as e:
|
|
logger.debug("musicbrainz client init failed: %s", e)
|
|
|
|
# Reuse watchlist scanner's fuzzy matching logic
|
|
from core.watchlist_scanner import WatchlistScanner
|
|
_normalize = WatchlistScanner._normalize_artist_name
|
|
|
|
def _best_match(results, artist_name):
|
|
"""Pick best match from search results using name similarity (same as watchlist scanner)."""
|
|
if not results:
|
|
return None
|
|
# Exact normalized match
|
|
for r in results:
|
|
if _normalize(r.name) == _normalize(artist_name):
|
|
return r
|
|
# Fuzzy scoring
|
|
best = None
|
|
best_sim = 0
|
|
for r in results:
|
|
# Simple normalized comparison
|
|
n1 = _normalize(artist_name)
|
|
n2 = _normalize(r.name)
|
|
if n1 == n2:
|
|
return r
|
|
# Levenshtein-style similarity
|
|
max_len = max(len(n1), len(n2))
|
|
if max_len == 0:
|
|
continue
|
|
distance = sum(1 for a, b in zip(n1, n2, strict=False) if a != b) + abs(len(n1) - len(n2))
|
|
sim = (max_len - distance) / max_len
|
|
if sim > best_sim:
|
|
best_sim = sim
|
|
best = r
|
|
if best and best_sim >= 0.85:
|
|
return best
|
|
return None
|
|
|
|
api_calls = 0
|
|
matched = 0
|
|
|
|
for entry in pending:
|
|
name = entry['artist_name']
|
|
pool_id = entry['id']
|
|
harvested_ids = {}
|
|
best_image = None
|
|
|
|
# Pre-load existing IDs from the entry itself
|
|
for col in id_cols:
|
|
if entry.get(col):
|
|
harvested_ids[col] = entry[col]
|
|
|
|
# --- DB STRATEGIES (free, no API calls) ---
|
|
|
|
# 1. Library artists table
|
|
try:
|
|
conn = database._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT * FROM artists WHERE name = ? COLLATE NOCASE LIMIT 1", (name,))
|
|
row = cursor.fetchone()
|
|
if row:
|
|
r = dict(row)
|
|
for col in id_cols:
|
|
if r.get(col) and col not in harvested_ids:
|
|
harvested_ids[col] = str(r[col])
|
|
if _valid_image(r.get('thumb_url')):
|
|
best_image = r['thumb_url']
|
|
except Exception as e:
|
|
logger.debug("library artist lookup failed: %s", e)
|
|
|
|
# 2. Watchlist artists
|
|
try:
|
|
conn = database._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT * FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE AND profile_id = ? LIMIT 1",
|
|
(name, profile_id)
|
|
)
|
|
row = cursor.fetchone()
|
|
if row:
|
|
wl = dict(row)
|
|
for col in id_cols:
|
|
if wl.get(col) and col not in harvested_ids:
|
|
harvested_ids[col] = str(wl[col])
|
|
if _valid_image(wl.get('image_url')) and not best_image:
|
|
best_image = wl['image_url']
|
|
except Exception as e:
|
|
logger.debug("watchlist artist lookup failed: %s", e)
|
|
|
|
# 3. Metadata cache (all sources)
|
|
try:
|
|
conn = database._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT entity_id, source, image_url FROM metadata_cache_entities WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE",
|
|
(name,)
|
|
)
|
|
for row in cursor.fetchall():
|
|
col = source_cols.get(row['source'])
|
|
if col and col not in harvested_ids:
|
|
harvested_ids[col] = row['entity_id']
|
|
if _valid_image(row['image_url']) and not best_image:
|
|
best_image = row['image_url']
|
|
except Exception as e:
|
|
logger.debug("metadata cache lookup failed: %s", e)
|
|
|
|
# --- API STRATEGIES (search each missing source) ---
|
|
# Same pattern as watchlist scanner's _backfill_missing_ids
|
|
for source, col in source_cols.items():
|
|
if col in harvested_ids:
|
|
continue # Already have this source's ID
|
|
client = search_clients.get(source)
|
|
if not client:
|
|
continue
|
|
if api_calls >= 200: # Hard cap per refresh cycle
|
|
break
|
|
try:
|
|
results = client.search_artists(name, limit=5)
|
|
best = _best_match(results, name)
|
|
if best:
|
|
harvested_ids[col] = best.id
|
|
if hasattr(best, 'image_url') and _valid_image(best.image_url) and not best_image:
|
|
best_image = best.image_url
|
|
api_calls += 1
|
|
time.sleep(0.4) # Rate limit breathing room
|
|
except Exception as e:
|
|
logger.debug(f"[Your Artists] {source} search failed for '{name}': {e}")
|
|
api_calls += 1
|
|
|
|
# Save all harvested IDs
|
|
if harvested_ids:
|
|
# Determine best active source/ID — prefer Spotify, then iTunes, Deezer, Discogs
|
|
resolved_source = None
|
|
resolved_id = None
|
|
for src in ('spotify', 'itunes', 'deezer', 'discogs', 'musicbrainz'):
|
|
col = source_cols[src]
|
|
if col in harvested_ids:
|
|
resolved_source = src
|
|
resolved_id = harvested_ids[col]
|
|
break
|
|
|
|
database.update_liked_artist_match(
|
|
pool_id, active_source=resolved_source, active_source_id=resolved_id,
|
|
image_url=best_image, all_ids=harvested_ids
|
|
)
|
|
matched += 1
|
|
|
|
database.sync_liked_artists_watchlist_flags(profile_id)
|
|
logger.info(f"[Your Artists] Matched {matched}/{len(pending)} artists to {len(search_clients)} sources ({api_calls} API calls)")
|
|
|
|
# Image backfill: fetch images for matched artists that have IDs but no image
|
|
_backfill_liked_artist_images(database, profile_id, search_clients)
|
|
|
|
|
|
def _backfill_liked_artist_images(database, profile_id: int, search_clients: dict):
|
|
"""Fetch images for matched artists missing artwork using their stored source IDs."""
|
|
try:
|
|
conn = database._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("""
|
|
SELECT id, artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id
|
|
FROM liked_artists_pool
|
|
WHERE profile_id = ? AND match_status = 'matched'
|
|
AND (image_url IS NULL OR image_url = ''
|
|
OR image_url LIKE '%2a96cbd8b46e442fc41c2b86b821562f%'
|
|
OR image_url NOT LIKE 'http%')
|
|
LIMIT 100
|
|
""", (profile_id,))
|
|
rows = cursor.fetchall()
|
|
if not rows:
|
|
return
|
|
|
|
logger.info(f"[Your Artists] Backfilling images for {len(rows)} artists...")
|
|
filled = 0
|
|
|
|
for row in rows:
|
|
r = dict(row)
|
|
image_url = None
|
|
|
|
# Try Spotify artist lookup (has best images)
|
|
if r.get('spotify_artist_id') and 'spotify' in search_clients:
|
|
try:
|
|
sp = search_clients['spotify']
|
|
if hasattr(sp, 'sp') and sp.sp:
|
|
artist_data = sp.sp.artist(r['spotify_artist_id'])
|
|
if artist_data and artist_data.get('images'):
|
|
image_url = artist_data['images'][0]['url']
|
|
except Exception as e:
|
|
logger.debug("spotify artist image fetch failed: %s", e)
|
|
|
|
# Try Deezer (direct image URL from ID)
|
|
if not image_url and r.get('deezer_artist_id'):
|
|
image_url = f"https://api.deezer.com/artist/{r['deezer_artist_id']}/image?size=big"
|
|
|
|
if image_url:
|
|
try:
|
|
cursor2 = conn.cursor()
|
|
cursor2.execute(
|
|
"UPDATE liked_artists_pool SET image_url = ? WHERE id = ?",
|
|
(image_url, r['id'])
|
|
)
|
|
filled += 1
|
|
except Exception as e:
|
|
logger.debug("liked artist image update failed: %s", e)
|
|
time.sleep(0.3)
|
|
|
|
conn.commit()
|
|
if filled:
|
|
logger.info(f"[Your Artists] Backfilled {filled}/{len(rows)} artist images")
|
|
except Exception as e:
|
|
logger.debug(f"[Your Artists] Image backfill error: {e}")
|