diff --git a/core/discogs_client.py b/core/discogs_client.py index 358d5e4d..aeb8e2da 100644 --- a/core/discogs_client.py +++ b/core/discogs_client.py @@ -12,6 +12,7 @@ import re import time import threading import requests +from core.metadata_cache import get_metadata_cache from typing import List, Dict, Any, Optional from dataclasses import dataclass from functools import wraps @@ -372,6 +373,18 @@ class DiscogsClient: def search_artists(self, query: str, limit: int = 10) -> List[Artist]: """Search for artists on Discogs.""" + cache = get_metadata_cache() + cached_results = cache.get_search_results('discogs', 'artist', query, limit) + if cached_results is not None: + artists = [] + for raw in cached_results: + try: + artists.append(Artist.from_discogs_artist(raw)) + except Exception: + pass + if artists: + return artists + data = self._api_get('/database/search', { 'q': query, 'type': 'artist', 'per_page': min(limit, 50), }) @@ -379,15 +392,36 @@ class DiscogsClient: return [] artists = [] + raw_items = [] for item in data['results'][:limit]: try: artists.append(Artist.from_discogs_artist(item)) + raw_items.append(item) except Exception as e: logger.debug(f"Error parsing Discogs artist: {e}") + + if raw_items: + entries = [(str(r.get('id', '')), r) for r in raw_items if r.get('id')] + if entries: + cache.store_entities_bulk('discogs', 'artist', entries) + cache.store_search_results('discogs', 'artist', query, limit, + [str(r.get('id', '')) for r in raw_items if r.get('id')]) return artists def search_albums(self, query: str, limit: int = 10) -> List[Album]: """Search for releases/albums on Discogs.""" + cache = get_metadata_cache() + cached_results = cache.get_search_results('discogs', 'album', query, limit) + if cached_results is not None: + albums = [] + for raw in cached_results: + try: + albums.append(Album.from_discogs_release(raw)) + except Exception: + pass + if albums: + return albums + data = self._api_get('/database/search', { 'q': query, 'type': 'release', 'per_page': min(limit, 50), }) @@ -395,20 +429,28 @@ class DiscogsClient: return [] albums = [] + raw_items = [] seen_titles = set() for item in data['results'][:limit * 2]: try: album = Album.from_discogs_release(item) - # Deduplicate by title+artist (Discogs has many pressings of same album) dedup_key = f"{album.name.lower()}|{album.artists[0].lower() if album.artists else ''}" if dedup_key in seen_titles: continue seen_titles.add(dedup_key) albums.append(album) + raw_items.append(item) if len(albums) >= limit: break except Exception as e: logger.debug(f"Error parsing Discogs release: {e}") + + if raw_items: + entries = [(str(r.get('id', '')), r) for r in raw_items if r.get('id')] + if entries: + cache.store_entities_bulk('discogs', 'album', entries, skip_if_exists=True) + cache.store_search_results('discogs', 'album', query, limit, + [str(r.get('id', '')) for r in raw_items if r.get('id')]) return albums def search_tracks(self, query: str, limit: int = 10) -> List[Track]: @@ -423,16 +465,23 @@ class DiscogsClient: def get_artist(self, artist_id: str) -> Optional[Dict[str, Any]]: """Get artist details by Discogs ID.""" - data = self._api_get(f'/artists/{artist_id}') - if not data: - return None + cache = get_metadata_cache() + cached = cache.get_entity('discogs', 'artist', artist_id) + if cached and cached.get('name'): + # Rebuild normalized result from cached raw data + data = cached + else: + data = self._api_get(f'/artists/{artist_id}') + if not data: + return None + cache.store_entity('discogs', 'artist', artist_id, data) artist = Artist.from_discogs_artist(data) # Get profile/bio profile = data.get('profile', '') - return { + result = { 'id': artist.id, 'name': artist.name, 'image_url': artist.image_url, @@ -444,15 +493,22 @@ class DiscogsClient: 'images': [{'url': artist.image_url}] if artist.image_url else [], } + return result + def get_album(self, release_id: str, include_tracks: bool = True) -> Optional[Dict[str, Any]]: """Get release/album details by Discogs ID. Tries master first, falls back to release.""" - # Try as master first (artist discography returns master IDs) - data = self._api_get(f'/masters/{release_id}') - if not data or not data.get('title'): - # Fall back to release - data = self._api_get(f'/releases/{release_id}') - if not data: - return None + cache = get_metadata_cache() + cached = cache.get_entity('discogs', 'album', release_id) + if cached and cached.get('title'): + data = cached + else: + # Try as master first (artist discography returns master IDs) + data = self._api_get(f'/masters/{release_id}') + if not data or not data.get('title'): + data = self._api_get(f'/releases/{release_id}') + if not data: + return None + cache.store_entity('discogs', 'album', release_id, data) album = Album.from_discogs_release(data) @@ -560,10 +616,15 @@ class DiscogsClient: def get_album_tracks(self, release_id: str) -> Optional[Dict[str, Any]]: """Get album tracks by Discogs release or master ID. Returns Spotify-compatible format.""" + cache = get_metadata_cache() + cache_key = f"{release_id}_tracks" + cached = cache.get_entity('discogs', 'album', cache_key) + if cached: + return cached + # Try as master first (master IDs are used in artist discography) data = self._api_get(f'/masters/{release_id}') if not data or not data.get('tracklist'): - # Fall back to release data = self._api_get(f'/releases/{release_id}') if not data or not data.get('tracklist'): return None @@ -630,13 +691,40 @@ class DiscogsClient: '_source': 'discogs', }) - return { + result = { 'items': tracks, 'total': len(tracks), 'limit': len(tracks), 'next': None, } + cache.store_entity('discogs', 'album', cache_key, result) + return result + + def _fetch_and_cache_artist(self, artist_id: str) -> Optional[Dict]: + """Fetch raw artist data with cache. Used by enrichment worker.""" + cache = get_metadata_cache() + cached = cache.get_entity('discogs', 'artist', str(artist_id)) + if cached and cached.get('name'): + return cached + data = self._api_get(f'/artists/{artist_id}') + if data: + cache.store_entity('discogs', 'artist', str(artist_id), data) + return data + + def _fetch_and_cache_album(self, release_id: str) -> Optional[Dict]: + """Fetch raw album/release data with cache. Used by enrichment worker.""" + cache = get_metadata_cache() + cached = cache.get_entity('discogs', 'album', str(release_id)) + if cached and cached.get('title'): + return cached + data = self._api_get(f'/masters/{release_id}') + if not data or not data.get('title'): + data = self._api_get(f'/releases/{release_id}') + if data: + cache.store_entity('discogs', 'album', str(release_id), data) + return data + def _get_artist_image_from_albums(self, artist_id: str) -> Optional[str]: """Get artist image by fetching their first album's cover art. Used as fallback when artist has no direct image.""" diff --git a/core/discogs_worker.py b/core/discogs_worker.py index 7563c378..6bcdda31 100644 --- a/core/discogs_worker.py +++ b/core/discogs_worker.py @@ -244,14 +244,14 @@ class DiscogsWorker: if existing_id: try: if item_type == 'artist': - data = self.client._api_get(f'/artists/{existing_id}') + data = self.client._fetch_and_cache_artist(existing_id) if data: self._update_artist(item_id, data) self.stats['matched'] += 1 logger.info(f"Enriched artist '{item_name}' from existing Discogs ID: {existing_id}") return elif item_type == 'album': - data = self.client._api_get(f'/releases/{existing_id}') + data = self.client._fetch_and_cache_album(existing_id) if data: self._update_album(item_id, data) self.stats['matched'] += 1 @@ -298,8 +298,8 @@ class DiscogsWorker: # Find best match by name similarity for result in results: if self._name_matches(artist_name, result.name): - # Fetch full artist detail - data = self.client._api_get(f'/artists/{result.id}') + # Fetch full artist detail (uses cache) + data = self.client._fetch_and_cache_artist(result.id) if data: self._update_artist(artist_id, data) self.stats['matched'] += 1 @@ -322,8 +322,8 @@ class DiscogsWorker: for result in results: if self._name_matches(album_name, result.name): - # Fetch full release detail - data = self.client._api_get(f'/releases/{result.id}') + # Fetch full release detail (uses cache) + data = self.client._fetch_and_cache_album(result.id) if data: self._update_album(album_id, data) self.stats['matched'] += 1 diff --git a/core/metadata_cache.py b/core/metadata_cache.py index 5e6544eb..6c13df6d 100644 --- a/core/metadata_cache.py +++ b/core/metadata_cache.py @@ -489,9 +489,9 @@ class MetadataCache: cursor = conn.cursor() stats = { - 'artists': {'spotify': 0, 'itunes': 0, 'deezer': 0}, - 'albums': {'spotify': 0, 'itunes': 0, 'deezer': 0}, - 'tracks': {'spotify': 0, 'itunes': 0, 'deezer': 0}, + 'artists': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0}, + 'albums': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0}, + 'tracks': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0}, 'searches': 0, 'total_entries': 0, 'total_hits': 0, @@ -541,9 +541,9 @@ class MetadataCache: except Exception as e: logger.error(f"Cache stats error: {e}") return { - 'artists': {'spotify': 0, 'itunes': 0, 'deezer': 0}, - 'albums': {'spotify': 0, 'itunes': 0, 'deezer': 0}, - 'tracks': {'spotify': 0, 'itunes': 0, 'deezer': 0}, + 'artists': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0}, + 'albums': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0}, + 'tracks': {'spotify': 0, 'itunes': 0, 'deezer': 0, 'discogs': 0}, 'searches': 0, 'total_entries': 0, 'total_hits': 0, 'oldest': None, 'newest': None, } @@ -848,6 +848,8 @@ class MetadataCache: return self._extract_deezer_fields(entity_type, raw_data) elif source == 'beatport': return self._extract_beatport_fields(entity_type, raw_data) + elif source == 'discogs': + return self._extract_discogs_fields(entity_type, raw_data) return {'name': str(raw_data.get('name', raw_data.get('trackName', '')))} def _extract_spotify_fields(self, entity_type: str, data: dict) -> dict: @@ -1043,6 +1045,56 @@ class MetadataCache: return fields + def _extract_discogs_fields(self, entity_type: str, data: dict) -> dict: + """Extract fields from Discogs API response.""" + fields = {} + + # Discogs uses 'name' for artists, 'title' for releases/masters + if entity_type == 'artist': + fields['name'] = data.get('name', data.get('title', '')) + fields['genres'] = json.dumps([]) + fields['popularity'] = 0 + fields['followers'] = 0 + # Images array — prefer primary type + images = data.get('images', []) + if images: + primary = next((img for img in images if img.get('type') == 'primary'), None) + fields['image_url'] = (primary or images[0]).get('uri') + # Search results use cover_image/thumb + if not fields.get('image_url'): + img = data.get('cover_image') or data.get('thumb') + if img and 'spacer.gif' not in img: + fields['image_url'] = img + + elif entity_type == 'album': + # Handle "Artist - Title" format from search results + raw_title = data.get('title', '') + if data.get('artists'): + fields['name'] = raw_title + fields['artist_name'] = data['artists'][0].get('name', '') + elif ' - ' in raw_title: + parts = raw_title.split(' - ', 1) + fields['artist_name'] = parts[0].strip() + fields['name'] = parts[1].strip() + else: + fields['name'] = raw_title + + fields['release_date'] = str(data.get('year', '')) if data.get('year') else '' + tracklist = data.get('tracklist', []) + fields['total_tracks'] = len(tracklist) if tracklist else 0 + fields['genres'] = json.dumps(data.get('genres', [])) + + images = data.get('images', []) + if images: + primary = next((img for img in images if img.get('type') == 'primary'), None) + fields['image_url'] = (primary or images[0]).get('uri') + if not fields.get('image_url'): + img = data.get('cover_image') or data.get('thumb') + if img and 'spacer.gif' not in img: + fields['image_url'] = img + + return fields + def _extract_beatport_fields(self, entity_type: str, data: dict) -> dict: """Extract fields from Beatport enriched track data.""" fields = {} diff --git a/webui/index.html b/webui/index.html index 7b7c047f..a90ace01 100644 --- a/webui/index.html +++ b/webui/index.html @@ -6625,6 +6625,7 @@
+
@@ -6654,6 +6655,10 @@ MusicBrainz 0 +
+ Discogs + 0 +
Total Hits 0 @@ -6677,6 +6682,7 @@ +