Lift liked-artist matching to core/artists/liked_match.py

Lifts _match_liked_artists_to_all_sources and _backfill_liked_artist_images. Both bodies are byte-identical to the originals. Uses the same _SpotifyClientProxy + _get_*_client shim pattern as core/artists/map.py so the bodies resolve their original names without modification. web_server.py: 37501 → 37245 (-256 lines).
4 weeks ago · 0e237f14d4
parent 3714a46434
commit 0e237f14d4
2 changed files with 317 additions and 260 deletions
--- a/core/artists/liked_match.py
+++ b/core/artists/liked_match.py
@ -0,0 +1,313 @@
+"""Liked-artist multi-source matching — lifted from web_server.py.
+
+Both function bodies are byte-identical to the originals. The
+``spotify_client`` proxy + ``_get_*_client`` shims let the bodies resolve
+their original names without any modification.
+"""
+import logging
+import time
+
+from config.settings import config_manager
+from core.metadata.registry import (
+    get_deezer_client,
+    get_discogs_client,
+    get_itunes_client,
+    get_spotify_client,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _get_itunes_client():
+    """Mirror of web_server._get_itunes_client — delegates to registry."""
+    return get_itunes_client()
+
+
+def _get_deezer_client():
+    """Mirror of web_server._get_deezer_client — delegates to registry."""
+    return get_deezer_client()
+
+
+def _get_discogs_client(token=None):
+    """Mirror of web_server._get_discogs_client — delegates to registry."""
+    return get_discogs_client(token)
+
+
+class _SpotifyClientProxy:
+    """Resolves the global Spotify client lazily so a Spotify re-auth that
+    rebinds the cached client in core.metadata.registry is visible to the
+    lifted bodies."""
+
+    def __getattr__(self, name):
+        client = get_spotify_client()
+        if client is None:
+            raise AttributeError(name)
+        return getattr(client, name)
+
+    def __bool__(self):
+        return get_spotify_client() is not None
+
+
+spotify_client = _SpotifyClientProxy()
+
+
+def _match_liked_artists_to_all_sources(database, profile_id: int):
+    """Match pending liked artists to ALL metadata sources (Spotify, iTunes, Deezer, Discogs).
+    Uses the same matching pattern as the watchlist scanner: DB-first, then API search
+    with fuzzy name matching. Stores all resolved IDs so source switching works instantly."""
+    pending = database.get_liked_artists_pending_match(profile_id, limit=200)
+    if not pending:
+        return
+
+    # Source → column mapping
+    source_cols = {
+        'spotify': 'spotify_artist_id',
+        'itunes': 'itunes_artist_id',
+        'deezer': 'deezer_artist_id',
+        'discogs': 'discogs_artist_id',
+    }
+    id_cols = list(source_cols.values())
+
+    # Reject known placeholder images and local server paths
+    _placeholder_hashes = {'2a96cbd8b46e442fc41c2b86b821562f'}
+    def _valid_image(url):
+        if not url or not url.strip():
+            return None
+        if any(ph in url for ph in _placeholder_hashes):
+            return None
+        # Reject local media server paths (Plex/Jellyfin) — not loadable in browser
+        if url.startswith('/') or url.startswith('\\'):
+            return None
+        if not url.startswith('http'):
+            return None
+        return url
+
+    # Build search clients for each source
+    from core.deezer_client import DeezerClient
+    search_clients = {}
+    if spotify_client and spotify_client.is_spotify_authenticated():
+        search_clients['spotify'] = spotify_client
+    try:
+        search_clients['itunes'] = _get_itunes_client()
+    except Exception:
+        pass
+    try:
+        search_clients['deezer'] = _get_deezer_client()
+    except Exception:
+        pass
+    try:
+        dc = _get_discogs_client()
+        # Only use Discogs if token is configured
+        from config.settings import config_manager as _cm
+        if _cm.get('discogs.token', ''):
+            search_clients['discogs'] = dc
+    except Exception:
+        pass
+
+    # Reuse watchlist scanner's fuzzy matching logic
+    from core.watchlist_scanner import WatchlistScanner
+    _normalize = WatchlistScanner._normalize_artist_name
+
+    def _best_match(results, artist_name):
+        """Pick best match from search results using name similarity (same as watchlist scanner)."""
+        if not results:
+            return None
+        # Exact normalized match
+        for r in results:
+            if _normalize(r.name) == _normalize(artist_name):
+                return r
+        # Fuzzy scoring
+        best = None
+        best_sim = 0
+        for r in results:
+            # Simple normalized comparison
+            n1 = _normalize(artist_name)
+            n2 = _normalize(r.name)
+            if n1 == n2:
+                return r
+            # Levenshtein-style similarity
+            max_len = max(len(n1), len(n2))
+            if max_len == 0:
+                continue
+            distance = sum(1 for a, b in zip(n1, n2, strict=False) if a != b) + abs(len(n1) - len(n2))
+            sim = (max_len - distance) / max_len
+            if sim > best_sim:
+                best_sim = sim
+                best = r
+        if best and best_sim >= 0.85:
+            return best
+        return None
+
+    api_calls = 0
+    matched = 0
+
+    for entry in pending:
+        name = entry['artist_name']
+        pool_id = entry['id']
+        harvested_ids = {}
+        best_image = None
+
+        # Pre-load existing IDs from the entry itself
+        for col in id_cols:
+            if entry.get(col):
+                harvested_ids[col] = entry[col]
+
+        # --- DB STRATEGIES (free, no API calls) ---
+
+        # 1. Library artists table
+        try:
+            conn = database._get_connection()
+            cursor = conn.cursor()
+            cursor.execute("SELECT * FROM artists WHERE name = ? COLLATE NOCASE LIMIT 1", (name,))
+            row = cursor.fetchone()
+            if row:
+                r = dict(row)
+                for col in id_cols:
+                    if r.get(col) and col not in harvested_ids:
+                        harvested_ids[col] = str(r[col])
+                if _valid_image(r.get('thumb_url')):
+                    best_image = r['thumb_url']
+        except Exception:
+            pass
+
+        # 2. Watchlist artists
+        try:
+            conn = database._get_connection()
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT * FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE AND profile_id = ? LIMIT 1",
+                (name, profile_id)
+            )
+            row = cursor.fetchone()
+            if row:
+                wl = dict(row)
+                for col in id_cols:
+                    if wl.get(col) and col not in harvested_ids:
+                        harvested_ids[col] = str(wl[col])
+                if _valid_image(wl.get('image_url')) and not best_image:
+                    best_image = wl['image_url']
+        except Exception:
+            pass
+
+        # 3. Metadata cache (all sources)
+        try:
+            conn = database._get_connection()
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT entity_id, source, image_url FROM metadata_cache_entities WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE",
+                (name,)
+            )
+            for row in cursor.fetchall():
+                col = source_cols.get(row['source'])
+                if col and col not in harvested_ids:
+                    harvested_ids[col] = row['entity_id']
+                if _valid_image(row['image_url']) and not best_image:
+                    best_image = row['image_url']
+        except Exception:
+            pass
+
+        # --- API STRATEGIES (search each missing source) ---
+        # Same pattern as watchlist scanner's _backfill_missing_ids
+        for source, col in source_cols.items():
+            if col in harvested_ids:
+                continue  # Already have this source's ID
+            client = search_clients.get(source)
+            if not client:
+                continue
+            if api_calls >= 200:  # Hard cap per refresh cycle
+                break
+            try:
+                results = client.search_artists(name, limit=5)
+                best = _best_match(results, name)
+                if best:
+                    harvested_ids[col] = best.id
+                    if hasattr(best, 'image_url') and _valid_image(best.image_url) and not best_image:
+                        best_image = best.image_url
+                api_calls += 1
+                time.sleep(0.4)  # Rate limit breathing room
+            except Exception as e:
+                logger.debug(f"[Your Artists] {source} search failed for '{name}': {e}")
+                api_calls += 1
+
+        # Save all harvested IDs
+        if harvested_ids:
+            # Determine best active source/ID — prefer Spotify, then iTunes, Deezer, Discogs
+            resolved_source = None
+            resolved_id = None
+            for src in ('spotify', 'itunes', 'deezer', 'discogs'):
+                col = source_cols[src]
+                if col in harvested_ids:
+                    resolved_source = src
+                    resolved_id = harvested_ids[col]
+                    break
+
+            database.update_liked_artist_match(
+                pool_id, active_source=resolved_source, active_source_id=resolved_id,
+                image_url=best_image, all_ids=harvested_ids
+            )
+            matched += 1
+
+    database.sync_liked_artists_watchlist_flags(profile_id)
+    logger.info(f"[Your Artists] Matched {matched}/{len(pending)} artists to {len(search_clients)} sources ({api_calls} API calls)")
+
+    # Image backfill: fetch images for matched artists that have IDs but no image
+    _backfill_liked_artist_images(database, profile_id, search_clients)
+
+
+def _backfill_liked_artist_images(database, profile_id: int, search_clients: dict):
+    """Fetch images for matched artists missing artwork using their stored source IDs."""
+    try:
+        conn = database._get_connection()
+        cursor = conn.cursor()
+        cursor.execute("""
+            SELECT id, artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id
+            FROM liked_artists_pool
+            WHERE profile_id = ? AND match_status = 'matched'
+              AND (image_url IS NULL OR image_url = ''
+                   OR image_url LIKE '%2a96cbd8b46e442fc41c2b86b821562f%'
+                   OR image_url NOT LIKE 'http%')
+            LIMIT 100
+        """, (profile_id,))
+        rows = cursor.fetchall()
+        if not rows:
+            return
+
+        logger.info(f"[Your Artists] Backfilling images for {len(rows)} artists...")
+        filled = 0
+
+        for row in rows:
+            r = dict(row)
+            image_url = None
+
+            # Try Spotify artist lookup (has best images)
+            if r.get('spotify_artist_id') and 'spotify' in search_clients:
+                try:
+                    sp = search_clients['spotify']
+                    if hasattr(sp, 'sp') and sp.sp:
+                        artist_data = sp.sp.artist(r['spotify_artist_id'])
+                        if artist_data and artist_data.get('images'):
+                            image_url = artist_data['images'][0]['url']
+                except Exception:
+                    pass
+
+            # Try Deezer (direct image URL from ID)
+            if not image_url and r.get('deezer_artist_id'):
+                image_url = f"https://api.deezer.com/artist/{r['deezer_artist_id']}/image?size=big"
+
+            if image_url:
+                try:
+                    cursor2 = conn.cursor()
+                    cursor2.execute(
+                        "UPDATE liked_artists_pool SET image_url = ? WHERE id = ?",
+                        (image_url, r['id'])
+                    )
+                    filled += 1
+                except Exception:
+                    pass
+                time.sleep(0.3)
+
+        conn.commit()
+        if filled:
+            logger.info(f"[Your Artists] Backfilled {filled}/{len(rows)} artist images")
+    except Exception as e:
+        logger.debug(f"[Your Artists] Image backfill error: {e}")
--- a/web_server.py
+++ b/web_server.py
@ -29594,266 +29594,10 @@ def _fetch_and_match_liked_artists(profile_id: int):
    _match_liked_artists_to_all_sources(database, profile_id)


-def _match_liked_artists_to_all_sources(database, profile_id: int):
-    """Match pending liked artists to ALL metadata sources (Spotify, iTunes, Deezer, Discogs).
-    Uses the same matching pattern as the watchlist scanner: DB-first, then API search
-    with fuzzy name matching. Stores all resolved IDs so source switching works instantly."""
-    pending = database.get_liked_artists_pending_match(profile_id, limit=200)
-    if not pending:
-        return
-
-    # Source → column mapping
-    source_cols = {
-        'spotify': 'spotify_artist_id',
-        'itunes': 'itunes_artist_id',
-        'deezer': 'deezer_artist_id',
-        'discogs': 'discogs_artist_id',
-    }
-    id_cols = list(source_cols.values())
-
-    # Reject known placeholder images and local server paths
-    _placeholder_hashes = {'2a96cbd8b46e442fc41c2b86b821562f'}
-    def _valid_image(url):
-        if not url or not url.strip():
-            return None
-        if any(ph in url for ph in _placeholder_hashes):
-            return None
-        # Reject local media server paths (Plex/Jellyfin) — not loadable in browser
-        if url.startswith('/') or url.startswith('\\'):
-            return None
-        if not url.startswith('http'):
-            return None
-        return url
-
-    # Build search clients for each source
-    from core.deezer_client import DeezerClient
-    search_clients = {}
-    if spotify_client and spotify_client.is_spotify_authenticated():
-        search_clients['spotify'] = spotify_client
-    try:
-        search_clients['itunes'] = _get_itunes_client()
-    except Exception:
-        pass
-    try:
-        search_clients['deezer'] = _get_deezer_client()
-    except Exception:
-        pass
-    try:
-        dc = _get_discogs_client()
-        # Only use Discogs if token is configured
-        from config.settings import config_manager as _cm
-        if _cm.get('discogs.token', ''):
-            search_clients['discogs'] = dc
-    except Exception:
-        pass
-
-    # Reuse watchlist scanner's fuzzy matching logic
-    from core.watchlist_scanner import WatchlistScanner
-    _normalize = WatchlistScanner._normalize_artist_name
-
-    def _best_match(results, artist_name):
-        """Pick best match from search results using name similarity (same as watchlist scanner)."""
-        if not results:
-            return None
-        # Exact normalized match
-        for r in results:
-            if _normalize(r.name) == _normalize(artist_name):
-                return r
-        # Fuzzy scoring
-        best = None
-        best_sim = 0
-        for r in results:
-            # Simple normalized comparison
-            n1 = _normalize(artist_name)
-            n2 = _normalize(r.name)
-            if n1 == n2:
-                return r
-            # Levenshtein-style similarity
-            max_len = max(len(n1), len(n2))
-            if max_len == 0:
-                continue
-            distance = sum(1 for a, b in zip(n1, n2, strict=False) if a != b) + abs(len(n1) - len(n2))
-            sim = (max_len - distance) / max_len
-            if sim > best_sim:
-                best_sim = sim
-                best = r
-        if best and best_sim >= 0.85:
-            return best
-        return None
-
-    api_calls = 0
-    matched = 0
-
-    for entry in pending:
-        name = entry['artist_name']
-        pool_id = entry['id']
-        harvested_ids = {}
-        best_image = None
-
-        # Pre-load existing IDs from the entry itself
-        for col in id_cols:
-            if entry.get(col):
-                harvested_ids[col] = entry[col]
-
-        # --- DB STRATEGIES (free, no API calls) ---
-
-        # 1. Library artists table
-        try:
-            conn = database._get_connection()
-            cursor = conn.cursor()
-            cursor.execute("SELECT * FROM artists WHERE name = ? COLLATE NOCASE LIMIT 1", (name,))
-            row = cursor.fetchone()
-            if row:
-                r = dict(row)
-                for col in id_cols:
-                    if r.get(col) and col not in harvested_ids:
-                        harvested_ids[col] = str(r[col])
-                if _valid_image(r.get('thumb_url')):
-                    best_image = r['thumb_url']
-        except Exception:
-            pass
-
-        # 2. Watchlist artists
-        try:
-            conn = database._get_connection()
-            cursor = conn.cursor()
-            cursor.execute(
-                "SELECT * FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE AND profile_id = ? LIMIT 1",
-                (name, profile_id)
-            )
-            row = cursor.fetchone()
-            if row:
-                wl = dict(row)
-                for col in id_cols:
-                    if wl.get(col) and col not in harvested_ids:
-                        harvested_ids[col] = str(wl[col])
-                if _valid_image(wl.get('image_url')) and not best_image:
-                    best_image = wl['image_url']
-        except Exception:
-            pass
-
-        # 3. Metadata cache (all sources)
-        try:
-            conn = database._get_connection()
-            cursor = conn.cursor()
-            cursor.execute(
-                "SELECT entity_id, source, image_url FROM metadata_cache_entities WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE",
-                (name,)
-            )
-            for row in cursor.fetchall():
-                col = source_cols.get(row['source'])
-                if col and col not in harvested_ids:
-                    harvested_ids[col] = row['entity_id']
-                if _valid_image(row['image_url']) and not best_image:
-                    best_image = row['image_url']
-        except Exception:
-            pass
-
-        # --- API STRATEGIES (search each missing source) ---
-        # Same pattern as watchlist scanner's _backfill_missing_ids
-        for source, col in source_cols.items():
-            if col in harvested_ids:
-                continue  # Already have this source's ID
-            client = search_clients.get(source)
-            if not client:
-                continue
-            if api_calls >= 200:  # Hard cap per refresh cycle
-                break
-            try:
-                results = client.search_artists(name, limit=5)
-                best = _best_match(results, name)
-                if best:
-                    harvested_ids[col] = best.id
-                    if hasattr(best, 'image_url') and _valid_image(best.image_url) and not best_image:
-                        best_image = best.image_url
-                api_calls += 1
-                time.sleep(0.4)  # Rate limit breathing room
-            except Exception as e:
-                logger.debug(f"[Your Artists] {source} search failed for '{name}': {e}")
-                api_calls += 1
-
-        # Save all harvested IDs
-        if harvested_ids:
-            # Determine best active source/ID — prefer Spotify, then iTunes, Deezer, Discogs
-            resolved_source = None
-            resolved_id = None
-            for src in ('spotify', 'itunes', 'deezer', 'discogs'):
-                col = source_cols[src]
-                if col in harvested_ids:
-                    resolved_source = src
-                    resolved_id = harvested_ids[col]
-                    break
-
-            database.update_liked_artist_match(
-                pool_id, active_source=resolved_source, active_source_id=resolved_id,
-                image_url=best_image, all_ids=harvested_ids
-            )
-            matched += 1
-
-    database.sync_liked_artists_watchlist_flags(profile_id)
-    logger.info(f"[Your Artists] Matched {matched}/{len(pending)} artists to {len(search_clients)} sources ({api_calls} API calls)")
-
-    # Image backfill: fetch images for matched artists that have IDs but no image
-    _backfill_liked_artist_images(database, profile_id, search_clients)
-
-
-def _backfill_liked_artist_images(database, profile_id: int, search_clients: dict):
-    """Fetch images for matched artists missing artwork using their stored source IDs."""
-    try:
-        conn = database._get_connection()
-        cursor = conn.cursor()
-        cursor.execute("""
-            SELECT id, artist_name, spotify_artist_id, itunes_artist_id, deezer_artist_id
-            FROM liked_artists_pool
-            WHERE profile_id = ? AND match_status = 'matched'
-              AND (image_url IS NULL OR image_url = ''
-                   OR image_url LIKE '%2a96cbd8b46e442fc41c2b86b821562f%'
-                   OR image_url NOT LIKE 'http%')
-            LIMIT 100
-        """, (profile_id,))
-        rows = cursor.fetchall()
-        if not rows:
-            return
-
-        logger.info(f"[Your Artists] Backfilling images for {len(rows)} artists...")
-        filled = 0
-
-        for row in rows:
-            r = dict(row)
-            image_url = None
-
-            # Try Spotify artist lookup (has best images)
-            if r.get('spotify_artist_id') and 'spotify' in search_clients:
-                try:
-                    sp = search_clients['spotify']
-                    if hasattr(sp, 'sp') and sp.sp:
-                        artist_data = sp.sp.artist(r['spotify_artist_id'])
-                        if artist_data and artist_data.get('images'):
-                            image_url = artist_data['images'][0]['url']
-                except Exception:
-                    pass
-
-            # Try Deezer (direct image URL from ID)
-            if not image_url and r.get('deezer_artist_id'):
-                image_url = f"https://api.deezer.com/artist/{r['deezer_artist_id']}/image?size=big"
-
-            if image_url:
-                try:
-                    cursor2 = conn.cursor()
-                    cursor2.execute(
-                        "UPDATE liked_artists_pool SET image_url = ? WHERE id = ?",
-                        (image_url, r['id'])
-                    )
-                    filled += 1
-                except Exception:
-                    pass
-                time.sleep(0.3)
-
-        conn.commit()
-        if filled:
-            logger.info(f"[Your Artists] Backfilled {filled}/{len(rows)} artist images")
-    except Exception as e:
-        logger.debug(f"[Your Artists] Image backfill error: {e}")
+from core.artists.liked_match import (
+    _backfill_liked_artist_images,
+    _match_liked_artists_to_all_sources,
+)


 # ── Your Albums (Liked Albums Pool) ──