From 0e237f14d4f097504f0f79bb65f50e34e2fb3aba Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Wed, 29 Apr 2026 17:30:22 -0700 Subject: [PATCH] Lift liked-artist matching to core/artists/liked_match.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lifts _match_liked_artists_to_all_sources and _backfill_liked_artist_images. Both bodies are byte-identical to the originals. Uses the same _SpotifyClientProxy + _get_*_client shim pattern as core/artists/map.py so the bodies resolve their original names without modification. web_server.py: 37501 → 37245 (-256 lines). --- core/artists/liked_match.py | 313 ++++++++++++++++++++++++++++++++++++ web_server.py | 264 +----------------------------- 2 files changed, 317 insertions(+), 260 deletions(-) create mode 100644 core/artists/liked_match.py diff --git a/core/artists/liked_match.py b/core/artists/liked_match.py new file mode 100644 index 00000000..9607e31d --- /dev/null +++ b/core/artists/liked_match.py @@ -0,0 +1,313 @@ +"""Liked-artist multi-source matching — lifted from web_server.py. + +Both function bodies are byte-identical to the originals. The +``spotify_client`` proxy + ``_get_*_client`` shims let the bodies resolve +their original names without any modification. +""" +import logging +import time + +from config.settings import config_manager +from core.metadata.registry import ( + get_deezer_client, + get_discogs_client, + get_itunes_client, + get_spotify_client, +) + +logger = logging.getLogger(__name__) + + +def _get_itunes_client(): + """Mirror of web_server._get_itunes_client — delegates to registry.""" + return get_itunes_client() + + +def _get_deezer_client(): + """Mirror of web_server._get_deezer_client — delegates to registry.""" + return get_deezer_client() + + +def _get_discogs_client(token=None): + """Mirror of web_server._get_discogs_client — delegates to registry.""" + return get_discogs_client(token) + + +class _SpotifyClientProxy: + """Resolves the global Spotify client lazily so a Spotify re-auth that + rebinds the cached client in core.metadata.registry is visible to the + lifted bodies.""" + + def __getattr__(self, name): + client = get_spotify_client() + if client is None: + raise AttributeError(name) + return getattr(client, name) + + def __bool__(self): + return get_spotify_client() is not None + + +spotify_client = _SpotifyClientProxy() + + +def _match_liked_artists_to_all_sources(database, profile_id: int): + """Match pending liked artists to ALL metadata sources (Spotify, iTunes, Deezer, Discogs). + Uses the same matching pattern as the watchlist scanner: DB-first, then API search + with fuzzy name matching. Stores all resolved IDs so source switching works instantly.""" + pending = database.get_liked_artists_pending_match(profile_id, limit=200) + if not pending: + return + + # Source → column mapping + source_cols = { + 'spotify': 'spotify_artist_id', + 'itunes': 'itunes_artist_id', + 'deezer': 'deezer_artist_id', + 'discogs': 'discogs_artist_id', + } + id_cols = list(source_cols.values()) + + # Reject known placeholder images and local server paths + _placeholder_hashes = {'2a96cbd8b46e442fc41c2b86b821562f'} + def _valid_image(url): + if not url or not url.strip(): + return None + if any(ph in url for ph in _placeholder_hashes): + return None + # Reject local media server paths (Plex/Jellyfin) — not loadable in browser + if url.startswith('/') or url.startswith('\\'): + return None + if not url.startswith('http'): + return None + return url + + # Build search clients for each source + from core.deezer_client import DeezerClient + search_clients = {} + if spotify_client and spotify_client.is_spotify_authenticated(): + search_clients['spotify'] = spotify_client + try: + search_clients['itunes'] = _get_itunes_client() + except Exception: + pass + try: + search_clients['deezer'] = _get_deezer_client() + except Exception: + pass + try: + dc = _get_discogs_client() + # Only use Discogs if token is configured + from config.settings import config_manager as _cm + if _cm.get('discogs.token', ''): + search_clients['discogs'] = dc + except Exception: + pass + + # Reuse watchlist scanner's fuzzy matching logic + from core.watchlist_scanner import WatchlistScanner + _normalize = WatchlistScanner._normalize_artist_name + + def _best_match(results, artist_name): + """Pick best match from search results using name similarity (same as watchlist scanner).""" + if not results: + return None + # Exact normalized match + for r in results: + if _normalize(r.name) == _normalize(artist_name): + return r + # Fuzzy scoring + best = None + best_sim = 0 + for r in results: + # Simple normalized comparison + n1 = _normalize(artist_name) + n2 = _normalize(r.name) + if n1 == n2: + return r + # Levenshtein-style similarity + max_len = max(len(n1), len(n2)) + if max_len == 0: + continue + distance = sum(1 for a, b in zip(n1, n2, strict=False) if a != b) + abs(len(n1) - len(n2)) + sim = (max_len - distance) / max_len + if sim > best_sim: + best_sim = sim + best = r + if best and best_sim >= 0.85: + return best + return None + + api_calls = 0 + matched = 0 + + for entry in pending: + name = entry['artist_name'] + pool_id = entry['id'] + harvested_ids = {} + best_image = None + + # Pre-load existing IDs from the entry itself + for col in id_cols: + if entry.get(col): + harvested_ids[col] = entry[col] + + # --- DB STRATEGIES (free, no API calls) --- + + # 1. Library artists table + try: + conn = database._get_connection() + cursor = conn.cursor() + cursor.execute("SELECT * FROM artists WHERE name = ? COLLATE NOCASE LIMIT 1", (name,)) + row = cursor.fetchone() + if row: + r = dict(row) + for col in id_cols: + if r.get(col) and col not in harvested_ids: + harvested_ids[col] = str(r[col]) + if _valid_image(r.get('thumb_url')): + best_image = r['thumb_url'] + except Exception: + pass + + # 2. Watchlist artists + try: + conn = database._get_connection() + cursor = conn.cursor() + cursor.execute( + "SELECT * FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE AND profile_id = ? LIMIT 1", + (name, profile_id) + ) + row = cursor.fetchone() + if row: + wl = dict(row) + for col in id_cols: + if wl.get(col) and col not in harvested_ids: + harvested_ids[col] = str(wl[col]) + if _valid_image(wl.get('image_url')) and not best_image: + best_image = wl['image_url'] + except Exception: + pass + + # 3. Metadata cache (all sources) + try: + conn = database._get_connection() + cursor = conn.cursor() + cursor.execute( + "SELECT entity_id, source, image_url FROM metadata_cache_entities WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE", + (name,) + ) + for row in cursor.fetchall(): + col = source_cols.get(row['source']) + if col and col not in harvested_ids: + harvested_ids[col] = row['entity_id'] + if _valid_image(row['image_url']) and not best_image: + best_image = row['image_url'] + except Exception: + pass + + # --- API STRATEGIES (search each missing source) --- + # Same pattern as watchlist scanner's _backfill_missing_ids + for source, col in source_cols.items(): + if col in harvested_ids: + continue # Already have this source's ID + client = search_clients.get(source) + if not client: + continue + if api_calls >= 200: # Hard cap per refresh cycle + break + try: + results = client.search_artists(name, limit=5) + best = _best_match(results, name) + if best: + harvested_ids[col] = best.id + if hasattr(best, 'image_url') and _valid_image(best.image_url) and not best_image: + best_image = best.image_url + api_calls += 1 + time.sleep(0.4) # Rate limit breathing room + except Exception as e: + logger.debug(f"[Your Artists] {source} search failed for '{name}': {e}") + api_calls += 1 + + # Save all harvested IDs + if harvested_ids: + # Determine best active source/ID — prefer Spotify, then iTunes, Deezer, Discogs + resolved_source = None + resolved_id = None + for src in ('spotify', 'itunes', 'deezer', 'discogs'): + col = source_cols[src] + if col in harvested_ids: + resolved_source = src + resolved_id = harvested_ids[col] + break + + database.update_liked_artist_match( + pool_id, active_source=resolved_source, active_source_id=resolved_id, + image_url=best_image, all_ids=harvested_ids + ) + matched += 1 + + database.sync_liked_artists_watchlist_flags(profile_id) + logger.info(f"[Your Artists] Matched {matched}/{len(pending)} artists to {len(search_clients)} sources ({api_calls} API calls)") + + # Image backfill: fetch images for matched artists that have IDs but no image + _backfill_liked_artist_images(database, profile_id, search_clients) + + +def _backfill_liked_artist_images(database, profile_id: int, search_clients: dict): + """Fetch images for matched artists missing artwork using their stored source IDs.""" + try: + conn = database._get_connection() + cursor = conn.cursor() + cursor.execute(""" + SELECT id, artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id + FROM liked_artists_pool + WHERE profile_id = ? AND match_status = 'matched' + AND (image_url IS NULL OR image_url = '' + OR image_url LIKE '%2a96cbd8b46e442fc41c2b86b821562f%' + OR image_url NOT LIKE 'http%') + LIMIT 100 + """, (profile_id,)) + rows = cursor.fetchall() + if not rows: + return + + logger.info(f"[Your Artists] Backfilling images for {len(rows)} artists...") + filled = 0 + + for row in rows: + r = dict(row) + image_url = None + + # Try Spotify artist lookup (has best images) + if r.get('spotify_artist_id') and 'spotify' in search_clients: + try: + sp = search_clients['spotify'] + if hasattr(sp, 'sp') and sp.sp: + artist_data = sp.sp.artist(r['spotify_artist_id']) + if artist_data and artist_data.get('images'): + image_url = artist_data['images'][0]['url'] + except Exception: + pass + + # Try Deezer (direct image URL from ID) + if not image_url and r.get('deezer_artist_id'): + image_url = f"https://api.deezer.com/artist/{r['deezer_artist_id']}/image?size=big" + + if image_url: + try: + cursor2 = conn.cursor() + cursor2.execute( + "UPDATE liked_artists_pool SET image_url = ? WHERE id = ?", + (image_url, r['id']) + ) + filled += 1 + except Exception: + pass + time.sleep(0.3) + + conn.commit() + if filled: + logger.info(f"[Your Artists] Backfilled {filled}/{len(rows)} artist images") + except Exception as e: + logger.debug(f"[Your Artists] Image backfill error: {e}") diff --git a/web_server.py b/web_server.py index 88c88f98..40ea2cf8 100644 --- a/web_server.py +++ b/web_server.py @@ -29594,266 +29594,10 @@ def _fetch_and_match_liked_artists(profile_id: int): _match_liked_artists_to_all_sources(database, profile_id) -def _match_liked_artists_to_all_sources(database, profile_id: int): - """Match pending liked artists to ALL metadata sources (Spotify, iTunes, Deezer, Discogs). - Uses the same matching pattern as the watchlist scanner: DB-first, then API search - with fuzzy name matching. Stores all resolved IDs so source switching works instantly.""" - pending = database.get_liked_artists_pending_match(profile_id, limit=200) - if not pending: - return - - # Source → column mapping - source_cols = { - 'spotify': 'spotify_artist_id', - 'itunes': 'itunes_artist_id', - 'deezer': 'deezer_artist_id', - 'discogs': 'discogs_artist_id', - } - id_cols = list(source_cols.values()) - - # Reject known placeholder images and local server paths - _placeholder_hashes = {'2a96cbd8b46e442fc41c2b86b821562f'} - def _valid_image(url): - if not url or not url.strip(): - return None - if any(ph in url for ph in _placeholder_hashes): - return None - # Reject local media server paths (Plex/Jellyfin) — not loadable in browser - if url.startswith('/') or url.startswith('\\'): - return None - if not url.startswith('http'): - return None - return url - - # Build search clients for each source - from core.deezer_client import DeezerClient - search_clients = {} - if spotify_client and spotify_client.is_spotify_authenticated(): - search_clients['spotify'] = spotify_client - try: - search_clients['itunes'] = _get_itunes_client() - except Exception: - pass - try: - search_clients['deezer'] = _get_deezer_client() - except Exception: - pass - try: - dc = _get_discogs_client() - # Only use Discogs if token is configured - from config.settings import config_manager as _cm - if _cm.get('discogs.token', ''): - search_clients['discogs'] = dc - except Exception: - pass - - # Reuse watchlist scanner's fuzzy matching logic - from core.watchlist_scanner import WatchlistScanner - _normalize = WatchlistScanner._normalize_artist_name - - def _best_match(results, artist_name): - """Pick best match from search results using name similarity (same as watchlist scanner).""" - if not results: - return None - # Exact normalized match - for r in results: - if _normalize(r.name) == _normalize(artist_name): - return r - # Fuzzy scoring - best = None - best_sim = 0 - for r in results: - # Simple normalized comparison - n1 = _normalize(artist_name) - n2 = _normalize(r.name) - if n1 == n2: - return r - # Levenshtein-style similarity - max_len = max(len(n1), len(n2)) - if max_len == 0: - continue - distance = sum(1 for a, b in zip(n1, n2, strict=False) if a != b) + abs(len(n1) - len(n2)) - sim = (max_len - distance) / max_len - if sim > best_sim: - best_sim = sim - best = r - if best and best_sim >= 0.85: - return best - return None - - api_calls = 0 - matched = 0 - - for entry in pending: - name = entry['artist_name'] - pool_id = entry['id'] - harvested_ids = {} - best_image = None - - # Pre-load existing IDs from the entry itself - for col in id_cols: - if entry.get(col): - harvested_ids[col] = entry[col] - - # --- DB STRATEGIES (free, no API calls) --- - - # 1. Library artists table - try: - conn = database._get_connection() - cursor = conn.cursor() - cursor.execute("SELECT * FROM artists WHERE name = ? COLLATE NOCASE LIMIT 1", (name,)) - row = cursor.fetchone() - if row: - r = dict(row) - for col in id_cols: - if r.get(col) and col not in harvested_ids: - harvested_ids[col] = str(r[col]) - if _valid_image(r.get('thumb_url')): - best_image = r['thumb_url'] - except Exception: - pass - - # 2. Watchlist artists - try: - conn = database._get_connection() - cursor = conn.cursor() - cursor.execute( - "SELECT * FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE AND profile_id = ? LIMIT 1", - (name, profile_id) - ) - row = cursor.fetchone() - if row: - wl = dict(row) - for col in id_cols: - if wl.get(col) and col not in harvested_ids: - harvested_ids[col] = str(wl[col]) - if _valid_image(wl.get('image_url')) and not best_image: - best_image = wl['image_url'] - except Exception: - pass - - # 3. Metadata cache (all sources) - try: - conn = database._get_connection() - cursor = conn.cursor() - cursor.execute( - "SELECT entity_id, source, image_url FROM metadata_cache_entities WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE", - (name,) - ) - for row in cursor.fetchall(): - col = source_cols.get(row['source']) - if col and col not in harvested_ids: - harvested_ids[col] = row['entity_id'] - if _valid_image(row['image_url']) and not best_image: - best_image = row['image_url'] - except Exception: - pass - - # --- API STRATEGIES (search each missing source) --- - # Same pattern as watchlist scanner's _backfill_missing_ids - for source, col in source_cols.items(): - if col in harvested_ids: - continue # Already have this source's ID - client = search_clients.get(source) - if not client: - continue - if api_calls >= 200: # Hard cap per refresh cycle - break - try: - results = client.search_artists(name, limit=5) - best = _best_match(results, name) - if best: - harvested_ids[col] = best.id - if hasattr(best, 'image_url') and _valid_image(best.image_url) and not best_image: - best_image = best.image_url - api_calls += 1 - time.sleep(0.4) # Rate limit breathing room - except Exception as e: - logger.debug(f"[Your Artists] {source} search failed for '{name}': {e}") - api_calls += 1 - - # Save all harvested IDs - if harvested_ids: - # Determine best active source/ID — prefer Spotify, then iTunes, Deezer, Discogs - resolved_source = None - resolved_id = None - for src in ('spotify', 'itunes', 'deezer', 'discogs'): - col = source_cols[src] - if col in harvested_ids: - resolved_source = src - resolved_id = harvested_ids[col] - break - - database.update_liked_artist_match( - pool_id, active_source=resolved_source, active_source_id=resolved_id, - image_url=best_image, all_ids=harvested_ids - ) - matched += 1 - - database.sync_liked_artists_watchlist_flags(profile_id) - logger.info(f"[Your Artists] Matched {matched}/{len(pending)} artists to {len(search_clients)} sources ({api_calls} API calls)") - - # Image backfill: fetch images for matched artists that have IDs but no image - _backfill_liked_artist_images(database, profile_id, search_clients) - - -def _backfill_liked_artist_images(database, profile_id: int, search_clients: dict): - """Fetch images for matched artists missing artwork using their stored source IDs.""" - try: - conn = database._get_connection() - cursor = conn.cursor() - cursor.execute(""" - SELECT id, artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id - FROM liked_artists_pool - WHERE profile_id = ? AND match_status = 'matched' - AND (image_url IS NULL OR image_url = '' - OR image_url LIKE '%2a96cbd8b46e442fc41c2b86b821562f%' - OR image_url NOT LIKE 'http%') - LIMIT 100 - """, (profile_id,)) - rows = cursor.fetchall() - if not rows: - return - - logger.info(f"[Your Artists] Backfilling images for {len(rows)} artists...") - filled = 0 - - for row in rows: - r = dict(row) - image_url = None - - # Try Spotify artist lookup (has best images) - if r.get('spotify_artist_id') and 'spotify' in search_clients: - try: - sp = search_clients['spotify'] - if hasattr(sp, 'sp') and sp.sp: - artist_data = sp.sp.artist(r['spotify_artist_id']) - if artist_data and artist_data.get('images'): - image_url = artist_data['images'][0]['url'] - except Exception: - pass - - # Try Deezer (direct image URL from ID) - if not image_url and r.get('deezer_artist_id'): - image_url = f"https://api.deezer.com/artist/{r['deezer_artist_id']}/image?size=big" - - if image_url: - try: - cursor2 = conn.cursor() - cursor2.execute( - "UPDATE liked_artists_pool SET image_url = ? WHERE id = ?", - (image_url, r['id']) - ) - filled += 1 - except Exception: - pass - time.sleep(0.3) - - conn.commit() - if filled: - logger.info(f"[Your Artists] Backfilled {filled}/{len(rows)} artist images") - except Exception as e: - logger.debug(f"[Your Artists] Image backfill error: {e}") +from core.artists.liked_match import ( + _backfill_liked_artist_images, + _match_liked_artists_to_all_sources, +) # ── Your Albums (Liked Albums Pool) ──