Fix Discogs cache — add field extractor, wire worker caching, browser UI

- Add _extract_discogs_fields to metadata cache — handles Discogs field
  names (title vs name, images array, Artist - Title format)
- Worker uses _fetch_and_cache_artist/_fetch_and_cache_album helpers
  that cache raw data while returning it for enrichment
- All search/lookup methods cache results for repeat queries
- Cache browser: Discogs stat pill, source filter, clear button, badge
- Fixes albums showing as 'Unknown' and artists missing images in cache
pull/253/head
Broque Thomas 1 month ago
parent 1455112d40
commit 240dd87727

@ -12,6 +12,7 @@ import re
import time
import threading
import requests
from core.metadata_cache import get_metadata_cache
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from functools import wraps
@ -372,6 +373,18 @@ class DiscogsClient:
def search_artists(self, query: str, limit: int = 10) -> List[Artist]:
"""Search for artists on Discogs."""
cache = get_metadata_cache()
cached_results = cache.get_search_results('discogs', 'artist', query, limit)
if cached_results is not None:
artists = []
for raw in cached_results:
try:
artists.append(Artist.from_discogs_artist(raw))
except Exception:
pass
if artists:
return artists
data = self._api_get('/database/search', {
'q': query, 'type': 'artist', 'per_page': min(limit, 50),
})
@ -379,15 +392,36 @@ class DiscogsClient:
return []
artists = []
raw_items = []
for item in data['results'][:limit]:
try:
artists.append(Artist.from_discogs_artist(item))
raw_items.append(item)
except Exception as e:
logger.debug(f"Error parsing Discogs artist: {e}")
if raw_items:
entries = [(str(r.get('id', '')), r) for r in raw_items if r.get('id')]
if entries:
cache.store_entities_bulk('discogs', 'artist', entries)
cache.store_search_results('discogs', 'artist', query, limit,
[str(r.get('id', '')) for r in raw_items if r.get('id')])
return artists
def search_albums(self, query: str, limit: int = 10) -> List[Album]:
"""Search for releases/albums on Discogs."""
cache = get_metadata_cache()
cached_results = cache.get_search_results('discogs', 'album', query, limit)
if cached_results is not None:
albums = []
for raw in cached_results:
try:
albums.append(Album.from_discogs_release(raw))
except Exception:
pass
if albums:
return albums
data = self._api_get('/database/search', {
'q': query, 'type': 'release', 'per_page': min(limit, 50),
})
@ -395,20 +429,28 @@ class DiscogsClient:
return []
albums = []
raw_items = []
seen_titles = set()
for item in data['results'][:limit * 2]:
try:
album = Album.from_discogs_release(item)
# Deduplicate by title+artist (Discogs has many pressings of same album)
dedup_key = f"{album.name.lower()}|{album.artists[0].lower() if album.artists else ''}"
if dedup_key in seen_titles:
continue
seen_titles.add(dedup_key)
albums.append(album)
raw_items.append(item)
if len(albums) >= limit:
break
except Exception as e:
logger.debug(f"Error parsing Discogs release: {e}")
if raw_items:
entries = [(str(r.get('id', '')), r) for r in raw_items if r.get('id')]
if entries:
cache.store_entities_bulk('discogs', 'album', entries, skip_if_exists=True)
cache.store_search_results('discogs', 'album', query, limit,
[str(r.get('id', '')) for r in raw_items if r.get('id')])
return albums
def search_tracks(self, query: str, limit: int = 10) -> List[Track]:
@ -423,16 +465,23 @@ class DiscogsClient:
def get_artist(self, artist_id: str) -> Optional[Dict[str, Any]]:
"""Get artist details by Discogs ID."""
data = self._api_get(f'/artists/{artist_id}')
if not data:
return None
cache = get_metadata_cache()
cached = cache.get_entity('discogs', 'artist', artist_id)
if cached and cached.get('name'):
# Rebuild normalized result from cached raw data
data = cached
else:
data = self._api_get(f'/artists/{artist_id}')
if not data:
return None
cache.store_entity('discogs', 'artist', artist_id, data)
artist = Artist.from_discogs_artist(data)
# Get profile/bio
profile = data.get('profile', '')
return {
result = {
'id': artist.id,
'name': artist.name,
'image_url': artist.image_url,
@ -444,15 +493,22 @@ class DiscogsClient:
'images': [{'url': artist.image_url}] if artist.image_url else [],
}
return result
def get_album(self, release_id: str, include_tracks: bool = True) -> Optional[Dict[str, Any]]:
"""Get release/album details by Discogs ID. Tries master first, falls back to release."""
# Try as master first (artist discography returns master IDs)
data = self._api_get(f'/masters/{release_id}')
if not data or not data.get('title'):
# Fall back to release
data = self._api_get(f'/releases/{release_id}')
if not data:
return None
cache = get_metadata_cache()
cached = cache.get_entity('discogs', 'album', release_id)
if cached and cached.get('title'):
data = cached
else:
# Try as master first (artist discography returns master IDs)
data = self._api_get(f'/masters/{release_id}')
if not data or not data.get('title'):
data = self._api_get(f'/releases/{release_id}')
if not data:
return None
cache.store_entity('discogs', 'album', release_id, data)
album = Album.from_discogs_release(data)
@ -560,10 +616,15 @@ class DiscogsClient:
def get_album_tracks(self, release_id: str) -> Optional[Dict[str, Any]]:
"""Get album tracks by Discogs release or master ID. Returns Spotify-compatible format."""
cache = get_metadata_cache()
cache_key = f"{release_id}_tracks"
cached = cache.get_entity('discogs', 'album', cache_key)
if cached:
return cached
# Try as master first (master IDs are used in artist discography)
data = self._api_get(f'/masters/{release_id}')
if not data or not data.get('tracklist'):
# Fall back to release
data = self._api_get(f'/releases/{release_id}')
if not data or not data.get('tracklist'):
return None
@ -630,13 +691,40 @@ class DiscogsClient:
'_source': 'discogs',
})
return {
result = {
'items': tracks,
'total': len(tracks),
'limit': len(tracks),
'next': None,
}
cache.store_entity('discogs', 'album', cache_key, result)
return result
def _fetch_and_cache_artist(self, artist_id: str) -> Optional[Dict]:
"""Fetch raw artist data with cache. Used by enrichment worker."""
cache = get_metadata_cache()
cached = cache.get_entity('discogs', 'artist', str(artist_id))
if cached and cached.get('name'):
return cached
data = self._api_get(f'/artists/{artist_id}')
if data:
cache.store_entity('discogs', 'artist', str(artist_id), data)
return data
def _fetch_and_cache_album(self, release_id: str) -> Optional[Dict]:
"""Fetch raw album/release data with cache. Used by enrichment worker."""
cache = get_metadata_cache()
cached = cache.get_entity('discogs', 'album', str(release_id))
if cached and cached.get('title'):
return cached
data = self._api_get(f'/masters/{release_id}')
if not data or not data.get('title'):
data = self._api_get(f'/releases/{release_id}')
if data:
cache.store_entity('discogs', 'album', str(release_id), data)
return data
def _get_artist_image_from_albums(self, artist_id: str) -> Optional[str]:
"""Get artist image by fetching their first album's cover art.
Used as fallback when artist has no direct image."""

@ -244,14 +244,14 @@ class DiscogsWorker:
if existing_id:
try:
if item_type == 'artist':
data = self.client._api_get(f'/artists/{existing_id}')
data = self.client._fetch_and_cache_artist(existing_id)
if data:
self._update_artist(item_id, data)
self.stats['matched'] += 1
logger.info(f"Enriched artist '{item_name}' from existing Discogs ID: {existing_id}")
return
elif item_type == 'album':
data = self.client._api_get(f'/releases/{existing_id}')
data = self.client._fetch_and_cache_album(existing_id)
if data:
self._update_album(item_id, data)
self.stats['matched'] += 1
@ -298,8 +298,8 @@ class DiscogsWorker:
# Find best match by name similarity
for result in results:
if self._name_matches(artist_name, result.name):
# Fetch full artist detail
data = self.client._api_get(f'/artists/{result.id}')
# Fetch full artist detail (uses cache)
data = self.client._fetch_and_cache_artist(result.id)
if data:
self._update_artist(artist_id, data)
self.stats['matched'] += 1
@ -322,8 +322,8 @@ class DiscogsWorker:
for result in results:
if self._name_matches(album_name, result.name):
# Fetch full release detail
data = self.client._api_get(f'/releases/{result.id}')
# Fetch full release detail (uses cache)
data = self.client._fetch_and_cache_album(result.id)
if data:
self._update_album(album_id, data)
self.stats['matched'] += 1

@ -489,9 +489,9 @@ class MetadataCache:
cursor = conn.cursor()
stats = {
'artists': {'spotify': 0, 'itunes': 0, 'deezer': 0},
'albums': {'spotify': 0, 'itunes': 0, 'deezer': 0},
'tracks': {'spotify': 0, 'itunes': 0, 'deezer': 0},
'artists': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0},
'albums': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0},
'tracks': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0},
'searches': 0,
'total_entries': 0,
'total_hits': 0,
@ -541,9 +541,9 @@ class MetadataCache:
except Exception as e:
logger.error(f"Cache stats error: {e}")
return {
'artists': {'spotify': 0, 'itunes': 0, 'deezer': 0},
'albums': {'spotify': 0, 'itunes': 0, 'deezer': 0},
'tracks': {'spotify': 0, 'itunes': 0, 'deezer': 0},
'artists': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0},
'albums': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0},
'tracks': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0},
'searches': 0, 'total_entries': 0, 'total_hits': 0,
'oldest': None, 'newest': None,
}
@ -848,6 +848,8 @@ class MetadataCache:
return self._extract_deezer_fields(entity_type, raw_data)
elif source == 'beatport':
return self._extract_beatport_fields(entity_type, raw_data)
elif source == 'discogs':
return self._extract_discogs_fields(entity_type, raw_data)
return {'name': str(raw_data.get('name', raw_data.get('trackName', '')))}
def _extract_spotify_fields(self, entity_type: str, data: dict) -> dict:
@ -1043,6 +1045,56 @@ class MetadataCache:
return fields
def _extract_discogs_fields(self, entity_type: str, data: dict) -> dict:
"""Extract fields from Discogs API response."""
fields = {}
# Discogs uses 'name' for artists, 'title' for releases/masters
if entity_type == 'artist':
fields['name'] = data.get('name', data.get('title', ''))
fields['genres'] = json.dumps([])
fields['popularity'] = 0
fields['followers'] = 0
# Images array — prefer primary type
images = data.get('images', [])
if images:
primary = next((img for img in images if img.get('type') == 'primary'), None)
fields['image_url'] = (primary or images[0]).get('uri')
# Search results use cover_image/thumb
if not fields.get('image_url'):
img = data.get('cover_image') or data.get('thumb')
if img and 'spacer.gif' not in img:
fields['image_url'] = img
elif entity_type == 'album':
# Handle "Artist - Title" format from search results
raw_title = data.get('title', '')
if data.get('artists'):
fields['name'] = raw_title
fields['artist_name'] = data['artists'][0].get('name', '')
elif ' - ' in raw_title:
parts = raw_title.split(' - ', 1)
fields['artist_name'] = parts[0].strip()
fields['name'] = parts[1].strip()
else:
fields['name'] = raw_title
fields['release_date'] = str(data.get('year', '')) if data.get('year') else ''
tracklist = data.get('tracklist', [])
fields['total_tracks'] = len(tracklist) if tracklist else 0
fields['genres'] = json.dumps(data.get('genres', []))
images = data.get('images', [])
if images:
primary = next((img for img in images if img.get('type') == 'primary'), None)
fields['image_url'] = (primary or images[0]).get('uri')
if not fields.get('image_url'):
img = data.get('cover_image') or data.get('thumb')
if img and 'spacer.gif' not in img:
fields['image_url'] = img
return fields
def _extract_beatport_fields(self, entity_type: str, data: dict) -> dict:
"""Extract fields from Beatport enriched track data."""
fields = {}

@ -6625,6 +6625,7 @@
<button onclick="clearMetadataCacheBySource('beatport')"><span class="mcache-source-badge beatport" style="margin-right:6px">beatport</span>Clear Beatport</button>
<div style="border-top:1px solid rgba(255,255,255,0.08);margin:4px 0"></div>
<button onclick="clearMusicBrainzCache()"><span class="mcache-source-badge musicbrainz" style="margin-right:6px">musicbrainz</span>Clear MusicBrainz</button>
<button onclick="clearMetadataCacheBySource('discogs')"><span class="mcache-source-badge discogs" style="margin-right:6px">discogs</span>Clear Discogs</button>
<button onclick="clearMusicBrainzCache(true)"><span class="mcache-source-badge musicbrainz" style="margin-right:6px;opacity:0.6">musicbrainz</span>Clear Failed MB Only</button>
<div style="border-top:1px solid rgba(255,255,255,0.08);margin:4px 0"></div>
<button onclick="clearMetadataCache()">Clear All</button>
@ -6654,6 +6655,10 @@
<span class="mcache-stat-pill-label">MusicBrainz</span>
<span class="mcache-stat-pill-value" id="mcache-browse-musicbrainz-count">0</span>
</div>
<div class="mcache-stat-pill">
<span class="mcache-stat-pill-label">Discogs</span>
<span class="mcache-stat-pill-value" id="mcache-browse-discogs-count">0</span>
</div>
<div class="mcache-stat-pill">
<span class="mcache-stat-pill-label">Total Hits</span>
<span class="mcache-stat-pill-value" id="mcache-browse-hits">0</span>
@ -6677,6 +6682,7 @@
<option value="deezer">Deezer</option>
<option value="beatport">Beatport</option>
<option value="musicbrainz">MusicBrainz</option>
<option value="discogs">Discogs</option>
</select>
<select class="mcache-sort-filter" id="mcache-sort-filter" onchange="loadMetadataCacheBrowse()">
<option value="last_accessed_at">Recently Accessed</option>

@ -21642,6 +21642,8 @@ async function loadMetadataCacheBrowseStats() {
el('mcache-browse-itunes-count', itunesTotal);
el('mcache-browse-deezer-count', deezerTotal);
el('mcache-browse-beatport-count', beatportTotal);
const discogsTotal = (stats.artists?.discogs || 0) + (stats.albums?.discogs || 0) + (stats.tracks?.discogs || 0);
el('mcache-browse-discogs-count', discogsTotal);
el('mcache-browse-musicbrainz-count', stats.musicbrainz_total || 0);
el('mcache-browse-hits', stats.total_hits || 0);
el('mcache-browse-searches', stats.searches || 0);

@ -47303,6 +47303,11 @@ tr.tag-diff-same {
color: #BA478F;
}
.mcache-source-badge.discogs {
background: rgba(212, 165, 116, 0.15);
color: #D4A574;
}
.mcache-card.mb-matched {
border-left: 2px solid rgba(76, 175, 80, 0.5);
}

Loading…
Cancel
Save