From 5bc5fbb662807b8208490263065905e096c5ca84 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 18 May 2026 18:47:13 -0700 Subject: [PATCH 1/3] Add MusicBrainz as a metadata source Register MusicBrainz as a first-class metadata source alongside Deezer, iTunes, Spotify, Discogs, and Hydrabase. Expose the shared client through metadata services, add the settings option, and expand the MusicBrainz search adapter with source-compatible artist, album, track, and detail methods. Carry MusicBrainz IDs through similar-artist discovery, recommended artists, artist map serialization, and personalized playlist selection. Update DB migrations and lookup filters so similar_artist_musicbrainz_id is preserved on older schemas and used for source requirements and library exclusion. Normalize MusicBrainz album adapter output for import context and add regression coverage for registry mapping, typed album conversion, and similar-artist filtering. Verified by user with 120 focused tests passing. --- api/serializers.py | 1 + core/artists/map.py | 42 ++++-- core/discovery/hero.py | 32 +++- core/metadata/__init__.py | 2 + core/metadata/registry.py | 45 +++++- core/metadata/types.py | 47 +++++- core/metadata_service.py | 2 + core/musicbrainz_search.py | 137 +++++++++++++++++- core/personalized_playlists.py | 15 +- core/watchlist_scanner.py | 41 ++++-- database/music_database.py | 72 +++++++-- .../test_similar_artists_library_filter.py | 40 ++++- tests/metadata/test_metadata_registry.py | 10 ++ tests/metadata/test_typed_metadata_types.py | 25 ++++ tools/diagnose_itunes_discover.py | 8 + web_server.py | 13 +- webui/index.html | 3 +- webui/static/settings.js | 15 +- 18 files changed, 470 insertions(+), 80 deletions(-) diff --git a/api/serializers.py b/api/serializers.py index f7b0c1be..2102818d 100644 --- a/api/serializers.py +++ b/api/serializers.py @@ -368,6 +368,7 @@ def serialize_similar_artist(obj, fields: Optional[Set[str]] = None) -> dict: "source_artist_id": d.get("source_artist_id"), "similar_artist_spotify_id": d.get("similar_artist_spotify_id"), "similar_artist_itunes_id": d.get("similar_artist_itunes_id"), + "similar_artist_musicbrainz_id": d.get("similar_artist_musicbrainz_id"), "similar_artist_name": d.get("similar_artist_name"), "similarity_rank": d.get("similarity_rank"), "occurrence_count": d.get("occurrence_count"), diff --git a/core/artists/map.py b/core/artists/map.py index c2ceb0c5..71a81b12 100644 --- a/core/artists/map.py +++ b/core/artists/map.py @@ -140,7 +140,7 @@ def get_artist_map_data(): placeholders = ','.join(['?'] * len(watchlist_ids)) cursor.execute(f""" SELECT source_artist_id, similar_artist_name, similar_artist_spotify_id, - similar_artist_itunes_id, similar_artist_deezer_id, + similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, similarity_rank, occurrence_count, image_url, genres, popularity FROM similar_artists WHERE profile_id = ? AND source_artist_id IN ({placeholders}) @@ -173,6 +173,7 @@ def get_artist_map_data(): 'spotify_id': r.get('similar_artist_spotify_id') or '', 'itunes_id': r.get('similar_artist_itunes_id') or '', 'deezer_id': r.get('similar_artist_deezer_id') or '', + 'musicbrainz_id': r.get('similar_artist_musicbrainz_id') or '', 'rank': r.get('similarity_rank', 5), 'occurrence': r.get('occurrence_count', 1), 'popularity': r.get('popularity', 0), @@ -245,7 +246,7 @@ def get_artist_map_data(): } # Apply cache data to nodes - source_id_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + source_id_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} for n in nodes: nn = _norm(n['name']) cached = cache_by_name.get(nn) @@ -369,14 +370,14 @@ def get_artist_map_genres(): def _norm(n): return (n or '').lower().strip() - def _add(name, image_url=None, genres=None, spotify_id=None, itunes_id=None, deezer_id=None, discogs_id=None, source='unknown', popularity=0): + def _add(name, image_url=None, genres=None, spotify_id=None, itunes_id=None, deezer_id=None, discogs_id=None, musicbrainz_id=None, source='unknown', popularity=0): n = _norm(name) if not n or len(n) < 2: return if n not in artists_by_name: artists_by_name[n] = { 'name': name, 'image_url': '', 'genres': set(), - 'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': '', + 'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': '', 'musicbrainz_id': '', 'sources': set(), 'popularity': 0 } a = artists_by_name[n] @@ -394,6 +395,8 @@ def get_artist_map_genres(): a['deezer_id'] = str(deezer_id) if discogs_id and not a['discogs_id']: a['discogs_id'] = str(discogs_id) + if musicbrainz_id and not a['musicbrainz_id']: + a['musicbrainz_id'] = str(musicbrainz_id) if popularity > a['popularity']: a['popularity'] = popularity a['sources'].add(source) @@ -410,14 +413,14 @@ def get_artist_map_genres(): genres = json.loads(r['genres']) if isinstance(r['genres'], str) else [] except Exception as e: logger.debug("cache artist genres parse failed: %s", e) - src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} kwargs = {src_map.get(r['source'], 'spotify_id'): r['entity_id']} _add(r['name'], image_url=r['image_url'], genres=genres, source='cache', popularity=r['popularity'] or 0, **kwargs) # 2. Similar artists cursor.execute(""" SELECT similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, - similar_artist_deezer_id, image_url, genres, popularity + similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity FROM similar_artists WHERE profile_id = ? """, (profile_id,)) for r in cursor.fetchall(): @@ -429,7 +432,9 @@ def get_artist_map_genres(): logger.debug("similar artist genres parse failed: %s", e) _add(r['similar_artist_name'], image_url=r['image_url'], genres=genres, spotify_id=r['similar_artist_spotify_id'], itunes_id=r['similar_artist_itunes_id'], - deezer_id=r['similar_artist_deezer_id'], source='similar', popularity=r['popularity'] or 0) + deezer_id=r['similar_artist_deezer_id'], + musicbrainz_id=r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else None, + source='similar', popularity=r['popularity'] or 0) # 3. Watchlist artists cursor.execute(""" @@ -483,6 +488,7 @@ def get_artist_map_genres(): 'itunes_id': a['itunes_id'], 'deezer_id': a['deezer_id'], 'discogs_id': a['discogs_id'], + 'musicbrainz_id': a['musicbrainz_id'], 'popularity': a['popularity'], 'type': 'watchlist' if 'watchlist' in a['sources'] else 'similar', }) @@ -648,7 +654,7 @@ def get_artist_map_explore(): center_name = row['name'] if row['image_url'] and row['image_url'].startswith('http'): center_image = row['image_url'] - src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(row['source'], 'spotify_id') center_ids[k] = row['entity_id'] if row['genres']: @@ -717,7 +723,7 @@ def get_artist_map_explore(): WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE """, (center_name,)) for r in cursor.fetchall(): - src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(r['source'], 'spotify_id') if not center_ids.get(k): center_ids[k] = r['entity_id'] @@ -746,7 +752,7 @@ def get_artist_map_explore(): placeholders = ','.join(['?'] * len(id_values)) cursor.execute(f""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, - similar_artist_itunes_id, similar_artist_deezer_id, + similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? @@ -757,7 +763,7 @@ def get_artist_map_explore(): # Also search by name (the center artist might be a watchlist source) cursor.execute(""" SELECT DISTINCT sa.similar_artist_name, sa.similar_artist_spotify_id, - sa.similar_artist_itunes_id, sa.similar_artist_deezer_id, + sa.similar_artist_itunes_id, sa.similar_artist_deezer_id, sa.similar_artist_musicbrainz_id, sa.image_url, sa.genres, sa.popularity, sa.similarity_rank FROM similar_artists sa JOIN watchlist_artists wa ON sa.source_artist_id = COALESCE(wa.spotify_artist_id, wa.itunes_artist_id, CAST(wa.id AS TEXT)) @@ -789,7 +795,8 @@ def get_artist_map_explore(): image_url=sa.get('image_url'), genres=sa.get('genres'), popularity=sa.get('popularity', 0), - similar_artist_deezer_id=sa.get('deezer_id') + similar_artist_deezer_id=sa.get('deezer_id'), + similar_artist_musicbrainz_id=sa.get('musicbrainz_id'), ) except Exception as e: logger.debug("similar artist insert failed: %s", e) @@ -798,7 +805,7 @@ def get_artist_map_explore(): placeholders = ','.join(['?'] * len(id_values)) cursor.execute(f""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, - similar_artist_itunes_id, similar_artist_deezer_id, + similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? @@ -809,7 +816,7 @@ def get_artist_map_explore(): # Fallback: query by name-based source ID cursor.execute(""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, - similar_artist_itunes_id, similar_artist_deezer_id, + similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id = ? AND profile_id = ? @@ -841,6 +848,7 @@ def get_artist_map_explore(): 'spotify_id': r['similar_artist_spotify_id'] or '', 'itunes_id': r['similar_artist_itunes_id'] or '', 'deezer_id': r['similar_artist_deezer_id'] or '', + 'musicbrainz_id': r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else '', 'discogs_id': '', 'popularity': r['popularity'] or 0, 'rank': r['similarity_rank'] or 5, @@ -861,7 +869,8 @@ def get_artist_map_explore(): cursor.execute(f""" SELECT DISTINCT source_artist_id, similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, - similar_artist_deezer_id, image_url, genres, popularity, similarity_rank + similar_artist_deezer_id, similar_artist_musicbrainz_id, + image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? ORDER BY similarity_rank ASC @@ -902,6 +911,7 @@ def get_artist_map_explore(): 'spotify_id': r['similar_artist_spotify_id'] or '', 'itunes_id': r['similar_artist_itunes_id'] or '', 'deezer_id': r['similar_artist_deezer_id'] or '', + 'musicbrainz_id': r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else '', 'discogs_id': '', 'popularity': r['popularity'] or 0, 'rank': r['similarity_rank'] or 5, @@ -935,7 +945,7 @@ def get_artist_map_explore(): except Exception as e: logger.debug("explorer node genres parse failed: %s", e) # Harvest missing IDs from cache - src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(cr['source']) if k and not n.get(k): n[k] = cr['entity_id'] diff --git a/core/discovery/hero.py b/core/discovery/hero.py index 41143924..420de342 100644 --- a/core/discovery/hero.py +++ b/core/discovery/hero.py @@ -96,6 +96,8 @@ def get_discover_hero(): artist_id = artist.spotify_artist_id elif active_source == 'deezer': artist_id = getattr(artist, 'deezer_artist_id', None) or artist.itunes_artist_id + elif active_source == 'musicbrainz': + artist_id = getattr(artist, 'musicbrainz_artist_id', None) or artist.itunes_artist_id else: artist_id = artist.itunes_artist_id if not artist_id: @@ -125,7 +127,7 @@ def get_discover_hero(): valid_artists = list(similar_artists) # FALLBACK: If no valid artists for fallback source, try to resolve IDs on-the-fly - if active_source in ('itunes', 'deezer') and not valid_artists: + if active_source in ('itunes', 'deezer', 'musicbrainz') and not valid_artists: logger.warning(f"[{active_source} Fallback] No artists with {active_source} IDs found, attempting on-the-fly resolution for {len(similar_artists)} artists") resolved_count = 0 for artist in similar_artists: @@ -135,13 +137,20 @@ def get_discover_hero(): continue # Try to resolve ID by name try: - search_results = itunes_client.search_artists(artist.similar_artist_name, limit=1) + resolve_client = itunes_client + if active_source == 'musicbrainz': + from core.metadata.registry import get_musicbrainz_client + resolve_client = get_musicbrainz_client() + search_results = resolve_client.search_artists(artist.similar_artist_name, limit=1) if search_results and len(search_results) > 0: resolved_id = search_results[0].id # Cache the resolved ID for future use if active_source == 'deezer': database.update_similar_artist_deezer_id(artist.id, resolved_id) artist.similar_artist_deezer_id = resolved_id + elif active_source == 'musicbrainz': + database.update_similar_artist_musicbrainz_id(artist.id, resolved_id) + artist.similar_artist_musicbrainz_id = resolved_id else: database.update_similar_artist_itunes_id(artist.id, resolved_id) artist.similar_artist_itunes_id = resolved_id @@ -173,12 +182,15 @@ def get_discover_hero(): artist_id = artist.similar_artist_spotify_id or artist.similar_artist_itunes_id elif active_source == 'deezer': artist_id = getattr(artist, 'similar_artist_deezer_id', None) or artist.similar_artist_itunes_id or artist.similar_artist_spotify_id + elif active_source == 'musicbrainz': + artist_id = getattr(artist, 'similar_artist_musicbrainz_id', None) or artist.similar_artist_itunes_id or artist.similar_artist_spotify_id else: artist_id = artist.similar_artist_itunes_id or artist.similar_artist_spotify_id artist_data = { "spotify_artist_id": artist.similar_artist_spotify_id, "itunes_artist_id": artist.similar_artist_itunes_id, + "musicbrainz_artist_id": getattr(artist, 'similar_artist_musicbrainz_id', None), "artist_id": artist_id, "artist_name": artist.similar_artist_name, "occurrence_count": artist.occurrence_count, @@ -207,11 +219,19 @@ def get_discover_hero(): artist.id, artist_data.get('image_url'), artist_data.get('genres'), artist_data.get('popularity') ) - elif active_source in ('itunes', 'deezer'): - fb_artist_id = getattr(artist, 'similar_artist_deezer_id', None) if active_source == 'deezer' else None - fb_artist_id = fb_artist_id or artist.similar_artist_itunes_id + elif active_source in ('itunes', 'deezer', 'musicbrainz'): + if active_source == 'deezer': + fb_artist_id = getattr(artist, 'similar_artist_deezer_id', None) or artist.similar_artist_itunes_id + fetch_client = itunes_client + elif active_source == 'musicbrainz': + fb_artist_id = getattr(artist, 'similar_artist_musicbrainz_id', None) + from core.metadata.registry import get_musicbrainz_client + fetch_client = get_musicbrainz_client() + else: + fb_artist_id = artist.similar_artist_itunes_id + fetch_client = itunes_client if fb_artist_id: - fb_artist_data = itunes_client.get_artist(fb_artist_id) + fb_artist_data = fetch_client.get_artist(fb_artist_id) if fb_artist_data: artist_data['artist_name'] = fb_artist_data.get('name', artist.similar_artist_name) artist_data['image_url'] = fb_artist_data.get('images', [{}])[0].get('url') if fb_artist_data.get('images') else None diff --git a/core/metadata/__init__.py b/core/metadata/__init__.py index e7142adf..ee89ee19 100644 --- a/core/metadata/__init__.py +++ b/core/metadata/__init__.py @@ -31,6 +31,7 @@ from core.metadata.registry import ( get_discogs_client, get_hydrabase_client, get_itunes_client, + get_musicbrainz_client, get_primary_client, get_primary_source, get_spotify_client_for_profile, @@ -82,6 +83,7 @@ __all__ = [ "get_metadata_cache", "get_metadata_source_status", "get_metadata_service", + "get_musicbrainz_client", "get_musicmap_similar_artists", "get_primary_client", "get_primary_source", diff --git a/core/metadata/registry.py b/core/metadata/registry.py index ef7fbc8f..3299d55a 100644 --- a/core/metadata/registry.py +++ b/core/metadata/registry.py @@ -18,13 +18,14 @@ logger = get_logger("metadata.registry") MetadataClientFactory = Callable[[], Any] -METADATA_SOURCE_PRIORITY = ("deezer", "itunes", "spotify", "discogs", "hydrabase") +METADATA_SOURCE_PRIORITY = ("deezer", "itunes", "spotify", "discogs", "hydrabase", "musicbrainz") METADATA_SOURCE_LABELS = { "spotify": "Spotify", "itunes": "iTunes", "deezer": "Deezer", "discogs": "Discogs", "hydrabase": "Hydrabase", + "musicbrainz": "MusicBrainz", } _UNSET = object() @@ -148,6 +149,14 @@ def _get_amazon_factory(client_factory: Optional[MetadataClientFactory]) -> Meta return AmazonClient +def _get_musicbrainz_factory(client_factory: Optional[MetadataClientFactory]) -> MetadataClientFactory: + if client_factory is not None: + return client_factory + from core.musicbrainz_search import MusicBrainzSearchClient + + return MusicBrainzSearchClient + + def get_spotify_client(client_factory: Optional[MetadataClientFactory] = None): """Get shared Spotify client. @@ -280,6 +289,18 @@ def get_amazon_client(client_factory: Optional[MetadataClientFactory] = None): return client +def get_musicbrainz_client(client_factory: Optional[MetadataClientFactory] = None): + """Get cached MusicBrainz primary source client.""" + cache_key = "musicbrainz" + factory = _get_musicbrainz_factory(client_factory) + with _client_cache_lock: + client = _client_cache.get(cache_key) + if client is None: + client = factory() + _client_cache[cache_key] = client + return client + + def is_hydrabase_enabled() -> bool: """Return True when Hydrabase is connected and app-enabled.""" try: @@ -308,24 +329,26 @@ def get_hydrabase_client(allow_fallback: bool = True, require_enabled: bool = Tr def get_primary_source(spotify_client_factory: Optional[MetadataClientFactory] = None) -> str: """Return configured primary metadata source.""" - source = _get_config_value("metadata.fallback_source", "deezer") or "deezer" + _default = METADATA_SOURCE_PRIORITY[0] + source = _get_config_value("metadata.fallback_source", _default) or _default if source == "spotify": try: spotify = get_spotify_client(client_factory=spotify_client_factory) if not spotify or not spotify.is_spotify_authenticated(): - return "deezer" + return _default except Exception: - return "deezer" + return _default return source def get_spotify_disconnect_source(configured_source: Optional[str] = None) -> str: """Return the active metadata source after Spotify is disconnected.""" - source = configured_source if configured_source is not None else _get_config_value("metadata.fallback_source", "deezer") - source = source or "deezer" - return "deezer" if source == "spotify" else source + _default = METADATA_SOURCE_PRIORITY[0] + source = configured_source if configured_source is not None else _get_config_value("metadata.fallback_source", _default) + source = source or _default + return _default if source == "spotify" else source def get_metadata_source_label(source: str) -> str: @@ -352,6 +375,7 @@ def get_primary_client( deezer_client_factory: Optional[MetadataClientFactory] = None, discogs_client_factory: Optional[MetadataClientFactory] = None, amazon_client_factory: Optional[MetadataClientFactory] = None, + musicbrainz_client_factory: Optional[MetadataClientFactory] = None, ): """Return client for configured primary source.""" return get_client_for_source( @@ -361,6 +385,7 @@ def get_primary_client( deezer_client_factory=deezer_client_factory, discogs_client_factory=discogs_client_factory, amazon_client_factory=amazon_client_factory, + musicbrainz_client_factory=musicbrainz_client_factory, ) @@ -371,6 +396,7 @@ def get_primary_source_status( deezer_client_factory: Optional[MetadataClientFactory] = None, discogs_client_factory: Optional[MetadataClientFactory] = None, amazon_client_factory: Optional[MetadataClientFactory] = None, + musicbrainz_client_factory: Optional[MetadataClientFactory] = None, ) -> Dict[str, Any]: """Return a generic status snapshot for the active primary metadata source.""" source = _get_config_value("metadata.fallback_source", "deezer") or "deezer" @@ -385,6 +411,7 @@ def get_primary_source_status( deezer_client_factory=deezer_client_factory, discogs_client_factory=discogs_client_factory, amazon_client_factory=amazon_client_factory, + musicbrainz_client_factory=musicbrainz_client_factory, ) if source == "spotify": connected = bool(client and client.is_spotify_authenticated()) @@ -412,6 +439,7 @@ def get_client_for_source( deezer_client_factory: Optional[MetadataClientFactory] = None, discogs_client_factory: Optional[MetadataClientFactory] = None, amazon_client_factory: Optional[MetadataClientFactory] = None, + musicbrainz_client_factory: Optional[MetadataClientFactory] = None, ): """Return exact client for a source, or None if unavailable.""" if source == "spotify": @@ -438,4 +466,7 @@ def get_client_for_source( if source == "amazon": return get_amazon_client(client_factory=amazon_client_factory) + if source == "musicbrainz": + return get_musicbrainz_client(client_factory=musicbrainz_client_factory) + return None diff --git a/core/metadata/types.py b/core/metadata/types.py index 00c9749b..27d5a7c5 100644 --- a/core/metadata/types.py +++ b/core/metadata/types.py @@ -333,7 +333,52 @@ class Album: @classmethod def from_musicbrainz_dict(cls, raw: Dict[str, Any]) -> 'Album': - """MusicBrainz ``/release/{mbid}`` response shape (release, not release-group).""" + """MusicBrainz album shape. + + Accepts both raw ``/release/{mbid}`` responses and the normalized + MusicBrainz search adapter shape used by app-facing metadata clients. + """ + if raw.get('name') and not raw.get('title'): + artists = raw.get('artists') or [] + artist_names = [] + primary_artist_id = '' + for artist in artists: + if isinstance(artist, dict): + name = _str(artist.get('name')) + if name: + artist_names.append(name) + if not primary_artist_id and artist.get('id'): + primary_artist_id = _str(artist['id']) + else: + name = _str(artist) + if name: + artist_names.append(name) + + images = raw.get('images') or [] + image_url = '' + if images and isinstance(images[0], dict): + image_url = _str(images[0].get('url')) + image_url = image_url or _str(raw.get('image_url')) + + external_ids = {} + if raw.get('id'): + external_ids['musicbrainz'] = _str(raw['id']) + + return cls( + id=_str(raw.get('id')), + name=_str(raw.get('name')), + artists=artist_names or ['Unknown Artist'], + release_date=_str(raw.get('release_date')), + total_tracks=_int(raw.get('total_tracks')), + album_type=_str(raw.get('album_type'), default='album') or 'album', + image_url=image_url or None, + artist_id=primary_artist_id or None, + genres=list(raw.get('genres') or []), + source='musicbrainz', + external_ids=external_ids, + external_urls=dict(raw.get('external_urls') or {}), + ) + artist_credit = raw.get('artist-credit') or [] artist_names = [] primary_artist_id = '' diff --git a/core/metadata_service.py b/core/metadata_service.py index 748a9cf9..a0bdadc1 100644 --- a/core/metadata_service.py +++ b/core/metadata_service.py @@ -45,6 +45,7 @@ from core.metadata.registry import ( get_amazon_client, get_client_for_source, get_deezer_client, + get_musicbrainz_client, get_discogs_client, get_hydrabase_client, get_itunes_client, @@ -77,6 +78,7 @@ except Exception: # pragma: no cover - optional dependency fallback __all__ = [ "METADATA_SOURCE_PRIORITY", "get_amazon_client", + "get_musicbrainz_client", "MetadataCache", "MetadataLookupOptions", "MetadataProvider", diff --git a/core/musicbrainz_search.py b/core/musicbrainz_search.py index 965317be..0047e6c5 100644 --- a/core/musicbrainz_search.py +++ b/core/musicbrainz_search.py @@ -678,7 +678,131 @@ class MusicBrainzSearchClient: return sorted(releases, key=_key)[0] - def get_album(self, album_mbid: str) -> Optional[Dict[str, Any]]: + def is_authenticated(self) -> bool: + return True + + def reload_config(self) -> None: + pass + + def get_track_features(self, track_id: str) -> None: + return None + + def get_user_info(self) -> None: + return None + + def get_track_details(self, track_id: str) -> Optional[Dict[str, Any]]: + """Return Spotify-compatible track detail dict by recording MBID.""" + try: + rec = self._client.get_recording(track_id, includes=['releases', 'artist-credits', 'release-groups']) + if not rec: + return None + releases = rec.get('releases', []) or [] + releases.sort(key=self._release_preference_key) + first_rel = releases[0] if releases else {} + rg = first_rel.get('release-group', {}) or {} + release_id = first_rel.get('id', '') + rg_id = rg.get('id', '') + image_url = self._cached_art(release_id, rg_id) + artists = _extract_artist_credit(rec.get('artist-credit', [])) + return { + 'id': rec.get('id', ''), + 'name': rec.get('title', ''), + 'artists': [{'name': a, 'id': ''} for a in artists], + 'album': { + 'id': rg_id or release_id, + 'name': first_rel.get('title', ''), + 'images': [{'url': image_url, 'height': 250, 'width': 250}] if image_url else [], + 'release_date': first_rel.get('date') or rg.get('first-release-date') or '', + }, + 'duration_ms': rec.get('length') or 0, + 'track_number': 1, + 'disc_number': 1, + 'preview_url': None, + 'popularity': 0, + 'external_urls': {'musicbrainz': f'https://musicbrainz.org/recording/{track_id}'}, + } + except Exception as e: + logger.error(f'get_track_details({track_id}) error: {e}') + return None + + def get_album_tracks(self, album_mbid: str) -> Optional[Dict[str, Any]]: + """Return {items: [...], total: N} track listing for a release/release-group MBID.""" + album = self.get_album(album_mbid, include_tracks=True) + if album is None: + return None + flat = album.get('tracks', []) + if isinstance(flat, dict): + return flat + return {'items': flat, 'total': len(flat)} + + def get_artist(self, artist_id: str) -> Optional[Dict[str, Any]]: + """Return Spotify-compatible artist detail dict.""" + try: + artist = self._client.get_artist(artist_id, includes=['tags', 'url-rels']) + if not artist: + return None + genres = [t['name'] for t in (artist.get('tags') or []) if isinstance(t, dict) and t.get('name')] + return { + 'id': artist.get('id', artist_id), + 'name': artist.get('name', ''), + 'genres': genres, + 'followers': {'total': 0}, + 'popularity': 0, + 'images': [], + 'external_urls': {'musicbrainz': f'https://musicbrainz.org/artist/{artist_id}'}, + } + except Exception as e: + logger.error(f'get_artist({artist_id}) error: {e}') + return None + + def get_artist_top_tracks(self, artist_id: str, limit: int = 10) -> List[Dict[str, Any]]: + """Return top recordings for an artist, deduplicated by title and sorted by year.""" + try: + recs = self._client.search_recordings_by_artist_mbid(artist_id, limit=100) + for r in recs: + rels = r.get('releases') or [] + if rels: + rels.sort(key=self._release_preference_key) + r['releases'] = rels + studio = [r for r in recs if self._has_studio_release(r)] + recs = studio or recs + seen: set = set() + deduped = [] + for r in recs: + key = (r.get('title') or '').lower().strip() + if not key or key in seen: + continue + seen.add(key) + deduped.append(r) + results = [] + for r in deduped[:limit]: + releases = r.get('releases', []) + first_rel = releases[0] if releases else {} + rg = first_rel.get('release-group', {}) or {} + release_id = first_rel.get('id', '') + rg_id = rg.get('id', '') + artists = _extract_artist_credit(r.get('artist-credit', [])) + image_url = self._cached_art(release_id, rg_id) + results.append({ + 'id': r.get('id', ''), + 'name': r.get('title', ''), + 'artists': [{'name': a, 'id': ''} for a in artists], + 'album': { + 'id': rg_id or release_id, + 'name': first_rel.get('title', ''), + 'images': [{'url': image_url}] if image_url else [], + }, + 'duration_ms': r.get('length') or 0, + 'popularity': 0, + 'preview_url': None, + 'external_urls': {'musicbrainz': f'https://musicbrainz.org/recording/{r.get("id", "")}'}, + }) + return results + except Exception as e: + logger.error(f'get_artist_top_tracks({artist_id}) error: {e}') + return [] + + def get_album(self, album_mbid: str, include_tracks: bool = True) -> Optional[Dict[str, Any]]: """Get full album details with track listing for download modal. The MBID passed in could be either: @@ -713,10 +837,15 @@ class MusicBrainzSearchClient: album['external_urls'] = { 'musicbrainz': f'https://musicbrainz.org/release-group/{album_mbid}' } + if not include_tracks: + album.pop('tracks', None) return album # Path B: release MBID (text-search fallback path) - return self._render_release_as_album(album_mbid) + album = self._render_release_as_album(album_mbid) + if album and not include_tracks: + album.pop('tracks', None) + return album except Exception as e: logger.error(f"MusicBrainz album detail failed for {album_mbid}: {e}") return None @@ -789,7 +918,7 @@ class MusicBrainzSearchClient: 'external_urls': {'musicbrainz': f'https://musicbrainz.org/release/{release_mbid}'}, } - def get_artist_albums(self, artist_mbid: str, album_type: str = 'album,single') -> List: + def get_artist_albums(self, artist_mbid: str, album_type: str = 'album,single', limit: int = 200) -> List: """Get artist's releases for discography view.""" try: artist = self._client.get_artist(artist_mbid, includes=['release-groups']) @@ -814,7 +943,7 @@ class MusicBrainzSearchClient: image_url=image_url, external_urls={'musicbrainz': f'https://musicbrainz.org/release-group/{rg_mbid}'}, )) - return albums + return albums[:limit] except Exception as e: logger.warning(f"MusicBrainz artist albums failed: {e}") return [] diff --git a/core/personalized_playlists.py b/core/personalized_playlists.py index 602520ab..ec2505dd 100644 --- a/core/personalized_playlists.py +++ b/core/personalized_playlists.py @@ -902,7 +902,9 @@ class PersonalizedPlaylistsService: with self.database._get_connection() as conn: cursor = conn.cursor() cursor.execute(""" - SELECT similar_artist_spotify_id, similar_artist_name + SELECT similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, + similar_artist_name FROM similar_artists WHERE source_artist_id = ? ORDER BY similarity_rank ASC @@ -911,9 +913,16 @@ class PersonalizedPlaylistsService: db_results = cursor.fetchall() if db_results: + source_id_col = { + 'spotify': 'similar_artist_spotify_id', + 'itunes': 'similar_artist_itunes_id', + 'deezer': 'similar_artist_deezer_id', + 'musicbrainz': 'similar_artist_musicbrainz_id', + }.get(active_source, 'similar_artist_itunes_id') for row in db_results: - artist_id = row['similar_artist_spotify_id'] - artist_name = row['similar_artist_name'] + r = dict(row) + artist_id = r.get(source_id_col) or r.get('similar_artist_spotify_id') or r.get('similar_artist_itunes_id') + artist_name = r['similar_artist_name'] if artist_id and artist_id not in seen_artist_ids: all_similar_artists.append({'id': artist_id, 'name': artist_name}) seen_artist_ids.add(artist_id) diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 5bf82809..694da08b 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -520,12 +520,8 @@ class WatchlistScanner: return list(get_source_priority(get_primary_source())) def _discovery_source_priority(self) -> List[str]: - """Return discovery sources in configured priority order. - - Discovery pool writes only support Spotify, iTunes, and Deezer IDs, so - we filter the broader metadata priority list down to those sources. - """ - return [source for source in self._watchlist_source_priority() if source in {'spotify', 'itunes', 'deezer'}] + """Return discovery sources in configured priority order.""" + return [source for source in self._watchlist_source_priority() if source in {'spotify', 'itunes', 'deezer', 'musicbrainz'}] @staticmethod def _artist_id_attribute_for_source(source: str) -> Optional[str]: @@ -544,6 +540,7 @@ class WatchlistScanner: 'spotify': 'similar_artist_spotify_id', 'itunes': 'similar_artist_itunes_id', 'deezer': 'similar_artist_deezer_id', + 'musicbrainz': 'similar_artist_musicbrainz_id', }.get(source) @staticmethod @@ -2372,6 +2369,7 @@ class WatchlistScanner: 'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', + 'musicbrainz': 'musicbrainz_id', } searched_source_ids = {} available_sources = [] @@ -2403,6 +2401,7 @@ class WatchlistScanner: 'spotify_id': None, 'itunes_id': None, 'deezer_id': None, + 'musicbrainz_id': None, 'image_url': None, 'genres': [], 'popularity': 0, @@ -2470,6 +2469,8 @@ class WatchlistScanner: return self.database.update_similar_artist_deezer_id(similar_artist_id, source_id) if source == 'itunes': return self.database.update_similar_artist_itunes_id(similar_artist_id, source_id) + if source == 'musicbrainz': + return self.database.update_similar_artist_musicbrainz_id(similar_artist_id, source_id) return False def _backfill_similar_artists_fallback_ids(self, source_artist_id: str, profile_id: int = 1) -> int: @@ -2480,7 +2481,7 @@ class WatchlistScanner: writable similar-artist ID columns. This keeps old cached rows usable when the active metadata provider changes. """ - backfill_sources = [source for source in self._discovery_source_priority() if source in {'itunes', 'deezer'}] + backfill_sources = [source for source in self._discovery_source_priority() if source in {'itunes', 'deezer', 'musicbrainz'}] if not backfill_sources: logger.debug("No fallback metadata providers available for similar-artist backfill") return 0 @@ -2582,14 +2583,18 @@ class WatchlistScanner: image_url=similar_artist.get('image_url'), genres=similar_artist.get('genres'), popularity=similar_artist.get('popularity', 0), - similar_artist_deezer_id=similar_artist.get('deezer_id') + similar_artist_deezer_id=similar_artist.get('deezer_id'), + similar_artist_musicbrainz_id=similar_artist.get('musicbrainz_id'), ) if success: stored_count += 1 - fallback_id = similar_artist.get('deezer_id') or similar_artist.get('itunes_id') - fallback_label = 'Deezer' if similar_artist.get('deezer_id') else 'iTunes' - logger.debug(f" #{rank}: {similar_artist['name']} (Spotify: {similar_artist.get('spotify_id')}, {fallback_label}: {fallback_id})") + ids = ', '.join( + f"{k}: {similar_artist.get(v)}" + for k, v in [('Spotify', 'spotify_id'), ('iTunes', 'itunes_id'), ('Deezer', 'deezer_id'), ('MB', 'musicbrainz_id')] + if similar_artist.get(v) + ) + logger.debug(f" #{rank}: {similar_artist['name']} ({ids})") except Exception as e: logger.warning(f"Error storing similar artist {similar_artist.get('name', 'Unknown')}: {e}") @@ -2685,6 +2690,8 @@ class WatchlistScanner: cache_callback = lambda found_id, artist_id=similar_artist.id: self.database.update_similar_artist_itunes_id(artist_id, found_id) elif source == 'deezer': cache_callback = lambda found_id, artist_id=similar_artist.id: self.database.update_similar_artist_deezer_id(artist_id, found_id) + elif source == 'musicbrainz': + cache_callback = lambda found_id, artist_id=similar_artist.id: self.database.update_similar_artist_musicbrainz_id(artist_id, found_id) artist_id = self._resolve_artist_id_for_source( source, @@ -2820,7 +2827,7 @@ class WatchlistScanner: track_data['deezer_track_id'] = track.get('id') track_data['deezer_album_id'] = album_data.get('id') track_data['deezer_artist_id'] = selected_artist_id - else: + elif selected_source == 'itunes': track_data['itunes_track_id'] = track.get('id') track_data['itunes_album_id'] = album_data.get('id') track_data['itunes_artist_id'] = selected_artist_id @@ -2954,7 +2961,7 @@ class WatchlistScanner: track_data['deezer_track_id'] = track.get('id') track_data['deezer_album_id'] = album_data.get('id') track_data['deezer_artist_id'] = artist_id_for_genres or '' - else: + elif db_source == 'itunes': track_data['itunes_track_id'] = track.get('id') track_data['itunes_album_id'] = album_data.get('id') track_data['itunes_artist_id'] = artist_id_for_genres or '' @@ -3176,7 +3183,7 @@ class WatchlistScanner: track_data['deezer_track_id'] = track['id'] track_data['deezer_album_id'] = album_data['id'] track_data['deezer_artist_id'] = selected_artist_id - else: + elif selected_source == 'itunes': track_data['itunes_track_id'] = track['id'] track_data['itunes_album_id'] = album_data['id'] track_data['itunes_artist_id'] = selected_artist_id @@ -3351,6 +3358,8 @@ class WatchlistScanner: selected_watchlist_id = artist.itunes_artist_id or artist_id elif source == 'deezer': selected_watchlist_id = getattr(artist, 'deezer_artist_id', None) or artist_id + elif source == 'musicbrainz': + selected_watchlist_id = artist_id break if not selected_source or not selected_artist_id or not selected_albums: @@ -3384,6 +3393,8 @@ class WatchlistScanner: cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_itunes_id(similar_id, found_id) elif source == 'deezer': cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_deezer_id(similar_id, found_id) + elif source == 'musicbrainz': + cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_musicbrainz_id(similar_id, found_id) artist_id = self._resolve_artist_id_for_source( source, @@ -3415,6 +3426,8 @@ class WatchlistScanner: selected_similar_id = artist.similar_artist_itunes_id or artist_id elif source == 'deezer': selected_similar_id = getattr(artist, 'similar_artist_deezer_id', None) or artist_id + elif source == 'musicbrainz': + selected_similar_id = getattr(artist, 'similar_artist_musicbrainz_id', None) or artist_id break if not selected_source or not selected_artist_id or not selected_albums: diff --git a/database/music_database.py b/database/music_database.py index c59b9b3a..af64008d 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -118,6 +118,7 @@ class SimilarArtist: genres: Optional[List[str]] = None # Cached genres popularity: int = 0 # Cached popularity score similar_artist_deezer_id: Optional[str] = None # Deezer artist ID + similar_artist_musicbrainz_id: Optional[str] = None # MusicBrainz artist ID @dataclass class DiscoveryTrack: @@ -1174,6 +1175,10 @@ class MusicDatabase: cursor.execute("ALTER TABLE similar_artists ADD COLUMN similar_artist_deezer_id TEXT") logger.info("Added similar_artist_deezer_id column to similar_artists table") + if 'similar_artist_musicbrainz_id' not in similar_artists_columns: + cursor.execute("ALTER TABLE similar_artists ADD COLUMN similar_artist_musicbrainz_id TEXT") + logger.info("Added similar_artist_musicbrainz_id column to similar_artists table") + # Migration: Add iTunes columns to recent_releases for dual-source discovery cursor.execute("PRAGMA table_info(recent_releases)") recent_releases_columns = [column[1] for column in cursor.fetchall()] @@ -1288,6 +1293,8 @@ class MusicDatabase: source_artist_id TEXT NOT NULL, similar_artist_spotify_id TEXT, similar_artist_itunes_id TEXT, + similar_artist_deezer_id TEXT, + similar_artist_musicbrainz_id TEXT, similar_artist_name TEXT NOT NULL, similarity_rank INTEGER DEFAULT 1, occurrence_count INTEGER DEFAULT 1, @@ -1298,8 +1305,10 @@ class MusicDatabase: migration_cursor.execute(""" INSERT OR IGNORE INTO similar_artists_new (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, similar_artist_name, similarity_rank, occurrence_count, last_updated) SELECT source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, similar_artist_name, similarity_rank, occurrence_count, last_updated FROM similar_artists """) @@ -1312,6 +1321,7 @@ class MusicDatabase: cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_source ON similar_artists (source_artist_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_spotify ON similar_artists (similar_artist_spotify_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_itunes ON similar_artists (similar_artist_itunes_id)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_musicbrainz ON similar_artists (similar_artist_musicbrainz_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_occurrence ON similar_artists (occurrence_count)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_name ON similar_artists (similar_artist_name)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_discovery_pool_spotify_track ON discovery_pool (spotify_track_id)") @@ -2958,6 +2968,7 @@ class MusicDatabase: similar_artist_spotify_id TEXT, similar_artist_itunes_id TEXT, similar_artist_deezer_id TEXT, + similar_artist_musicbrainz_id TEXT, similar_artist_name TEXT NOT NULL, similarity_rank INTEGER DEFAULT 1, occurrence_count INTEGER DEFAULT 1, @@ -2974,7 +2985,8 @@ class MusicDatabase: new_cols = ['id', 'source_artist_id', 'similar_artist_spotify_id', 'similar_artist_itunes_id', 'similar_artist_deezer_id', - 'similar_artist_name', 'similarity_rank', 'occurrence_count', + 'similar_artist_musicbrainz_id', 'similar_artist_name', + 'similarity_rank', 'occurrence_count', 'last_updated', 'image_url', 'genres', 'popularity', 'metadata_updated_at', 'last_featured', 'profile_id'] shared_cols = [c for c in new_cols if c in old_cols] @@ -8260,25 +8272,27 @@ class MusicDatabase: image_url: Optional[str] = None, genres: Optional[list] = None, popularity: int = 0, - similar_artist_deezer_id: Optional[str] = None) -> bool: - """Add or update a similar artist recommendation (supports Spotify, iTunes, and Deezer IDs)""" + similar_artist_deezer_id: Optional[str] = None, + similar_artist_musicbrainz_id: Optional[str] = None) -> bool: + """Add or update a similar artist recommendation.""" try: with self._get_connection() as conn: cursor = conn.cursor() genres_json = json.dumps(genres) if genres else None - # Use artist name as the unique key (allows storing both IDs for same artist) cursor.execute(""" INSERT INTO similar_artists - (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_name, + (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, similar_artist_name, similarity_rank, occurrence_count, last_updated, profile_id, image_url, genres, popularity, metadata_updated_at) - VALUES (?, ?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, ?, ?, ?, ?, CURRENT_TIMESTAMP) + VALUES (?, ?, ?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, ?, ?, ?, ?, CURRENT_TIMESTAMP) ON CONFLICT(profile_id, source_artist_id, similar_artist_name) DO UPDATE SET similar_artist_spotify_id = COALESCE(excluded.similar_artist_spotify_id, similar_artist_spotify_id), similar_artist_itunes_id = COALESCE(excluded.similar_artist_itunes_id, similar_artist_itunes_id), similar_artist_deezer_id = COALESCE(excluded.similar_artist_deezer_id, similar_artist_deezer_id), + similar_artist_musicbrainz_id = COALESCE(excluded.similar_artist_musicbrainz_id, similar_artist_musicbrainz_id), similarity_rank = excluded.similarity_rank, occurrence_count = occurrence_count + 1, last_updated = CURRENT_TIMESTAMP, @@ -8286,7 +8300,8 @@ class MusicDatabase: genres = COALESCE(excluded.genres, genres), popularity = CASE WHEN excluded.popularity > 0 THEN excluded.popularity ELSE popularity END, metadata_updated_at = CASE WHEN excluded.image_url IS NOT NULL THEN CURRENT_TIMESTAMP ELSE metadata_updated_at END - """, (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_name, + """, (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, similar_artist_name, similarity_rank, profile_id, image_url, genres_json, popularity)) conn.commit() @@ -8319,6 +8334,7 @@ class MusicDatabase: occurrence_count=row['occurrence_count'], last_updated=datetime.fromisoformat(row['last_updated']), similar_artist_deezer_id=row['similar_artist_deezer_id'] if 'similar_artist_deezer_id' in row.keys() else None, + similar_artist_musicbrainz_id=row['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in row.keys() else None, ) for row in rows] except Exception as e: @@ -8328,11 +8344,14 @@ class MusicDatabase: def get_similar_artists_missing_fallback_ids(self, source_artist_id: str, fallback_source: str = 'itunes', profile_id: int = 1) -> List[SimilarArtist]: """Get similar artists missing fallback-provider IDs for backfill.""" try: - if fallback_source not in {'itunes', 'deezer'}: + if fallback_source not in {'itunes', 'deezer', 'musicbrainz'}: logger.error("Unsupported similar-artist fallback source: %s", fallback_source) return [] - col = 'similar_artist_deezer_id' if fallback_source == 'deezer' else 'similar_artist_itunes_id' + col = { + 'deezer': 'similar_artist_deezer_id', + 'musicbrainz': 'similar_artist_musicbrainz_id', + }.get(fallback_source, 'similar_artist_itunes_id') with self._get_connection() as conn: cursor = conn.cursor() @@ -8355,6 +8374,7 @@ class MusicDatabase: occurrence_count=row['occurrence_count'], last_updated=datetime.fromisoformat(row['last_updated']), similar_artist_deezer_id=row['similar_artist_deezer_id'] if 'similar_artist_deezer_id' in row.keys() else None, + similar_artist_musicbrainz_id=row['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in row.keys() else None, ) for row in rows] except Exception as e: @@ -8399,6 +8419,25 @@ class MusicDatabase: logger.error(f"Error updating similar artist Deezer ID: {e}") return False + def update_similar_artist_musicbrainz_id(self, similar_artist_id: int, musicbrainz_id: str) -> bool: + """Update a similar artist's MusicBrainz ID (for backfill)""" + try: + with self._get_connection() as conn: + cursor = conn.cursor() + + cursor.execute(""" + UPDATE similar_artists + SET similar_artist_musicbrainz_id = ? + WHERE id = ? + """, (musicbrainz_id, similar_artist_id)) + + conn.commit() + return cursor.rowcount > 0 + + except Exception as e: + logger.error(f"Error updating similar artist MusicBrainz ID: {e}") + return False + def update_similar_artist_metadata(self, similar_artist_id: int, image_url: str = None, genres: list = None, popularity: int = None) -> bool: """Cache artist metadata (image, genres, popularity) to avoid repeated API calls""" @@ -8420,7 +8459,7 @@ class MusicDatabase: def update_similar_artist_metadata_by_external_id(self, external_id: str, source: str = 'spotify', image_url: str = None, genres: list = None, popularity: int = None) -> bool: - """Cache artist metadata by Spotify or iTunes ID (updates all rows for that artist)""" + """Cache artist metadata by external source ID (updates all rows for that artist).""" try: with self._get_connection() as conn: cursor = conn.cursor() @@ -8429,6 +8468,8 @@ class MusicDatabase: where_clause = "similar_artist_spotify_id = ?" elif source == 'deezer': where_clause = "similar_artist_deezer_id = ?" + elif source == 'musicbrainz': + where_clause = "similar_artist_musicbrainz_id = ?" else: where_clause = "similar_artist_itunes_id = ?" cursor.execute(f""" @@ -8490,7 +8531,7 @@ class MusicDatabase: exclude_library_server: str = None, ) -> List[SimilarArtist]: """Get top similar artists excluding watchlist artists, with cycling support. - require_source: if set ('spotify','itunes','deezer'), only returns artists with that source ID. + require_source: if set, only returns artists with that source ID. exclude_library_server: if set, also excludes artists already present in that media server.""" try: with self._get_connection() as conn: @@ -8504,12 +8545,14 @@ class MusicDatabase: source_filter = "AND sa.similar_artist_itunes_id IS NOT NULL AND sa.similar_artist_itunes_id != ''" elif require_source == 'deezer': source_filter = "AND sa.similar_artist_deezer_id IS NOT NULL AND sa.similar_artist_deezer_id != ''" + elif require_source == 'musicbrainz': + source_filter = "AND sa.similar_artist_musicbrainz_id IS NOT NULL AND sa.similar_artist_musicbrainz_id != ''" library_artist_keys = None sql_limit = limit if exclude_library_server: cursor.execute(""" - SELECT name, spotify_artist_id, itunes_artist_id, deezer_id + SELECT name, spotify_artist_id, itunes_artist_id, deezer_id, musicbrainz_id FROM artists WHERE server_source = ? """, (exclude_library_server,)) @@ -8518,6 +8561,7 @@ class MusicDatabase: 'spotify': {r['spotify_artist_id'] for r in library_rows if r['spotify_artist_id']}, 'itunes': {r['itunes_artist_id'] for r in library_rows if r['itunes_artist_id']}, 'deezer': {r['deezer_id'] for r in library_rows if r['deezer_id']}, + 'musicbrainz': {r['musicbrainz_id'] for r in library_rows if r['musicbrainz_id']}, 'names': { self._normalize_for_comparison(r['name']) for r in library_rows @@ -8533,6 +8577,7 @@ class MusicDatabase: MAX(sa.similar_artist_spotify_id) as similar_artist_spotify_id, MAX(sa.similar_artist_itunes_id) as similar_artist_itunes_id, MAX(sa.similar_artist_deezer_id) as similar_artist_deezer_id, + MAX(sa.similar_artist_musicbrainz_id) as similar_artist_musicbrainz_id, sa.similar_artist_name, AVG(sa.similarity_rank) as similarity_rank, SUM(sa.occurrence_count) as occurrence_count, @@ -8564,11 +8609,13 @@ class MusicDatabase: spotify_id = row['similar_artist_spotify_id'] itunes_id = row['similar_artist_itunes_id'] if 'similar_artist_itunes_id' in row.keys() else None deezer_id = row['similar_artist_deezer_id'] if 'similar_artist_deezer_id' in row.keys() else None + musicbrainz_id = row['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in row.keys() else None normalized_name = self._normalize_for_comparison(row['similar_artist_name']) if ( (spotify_id and spotify_id in library_artist_keys['spotify']) or (itunes_id and itunes_id in library_artist_keys['itunes']) or (deezer_id and deezer_id in library_artist_keys['deezer']) + or (musicbrainz_id and musicbrainz_id in library_artist_keys['musicbrainz']) or (normalized_name and normalized_name in library_artist_keys['names']) ): continue @@ -8584,6 +8631,7 @@ class MusicDatabase: similar_artist_spotify_id=row['similar_artist_spotify_id'], similar_artist_itunes_id=row['similar_artist_itunes_id'] if 'similar_artist_itunes_id' in row.keys() else None, similar_artist_deezer_id=row['similar_artist_deezer_id'] if 'similar_artist_deezer_id' in row.keys() else None, + similar_artist_musicbrainz_id=row['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in row.keys() else None, similar_artist_name=row['similar_artist_name'], similarity_rank=int(row['similarity_rank']), occurrence_count=row['occurrence_count'], diff --git a/tests/discovery/test_similar_artists_library_filter.py b/tests/discovery/test_similar_artists_library_filter.py index fc065276..3a379289 100644 --- a/tests/discovery/test_similar_artists_library_filter.py +++ b/tests/discovery/test_similar_artists_library_filter.py @@ -19,6 +19,12 @@ def test_top_similar_artists_can_exclude_active_server_library_artists(tmp_path) similar_artist_deezer_id="dz-owned", profile_id=1, ) + db.add_or_update_similar_artist( + source_artist_id="seed-1", + similar_artist_name="Owned By MusicBrainz ID", + similar_artist_musicbrainz_id="mb-owned", + profile_id=1, + ) db.add_or_update_similar_artist( source_artist_id="seed-1", similar_artist_name="Owned By Name", @@ -41,14 +47,15 @@ def test_top_similar_artists_can_exclude_active_server_library_artists(tmp_path) with db._get_connection() as conn: conn.executemany( """ - INSERT INTO artists (name, server_source, spotify_artist_id, deezer_id) - VALUES (?, ?, ?, ?) + INSERT INTO artists (name, server_source, spotify_artist_id, deezer_id, musicbrainz_id) + VALUES (?, ?, ?, ?, ?) """, [ - ("Library Alias", "navidrome", "sp-owned", None), - ("Library Deezer Alias", "navidrome", None, "dz-owned"), - ("owned by name", "navidrome", None, None), - ("Different Server Artist", "plex", "sp-other-server", None), + ("Library Alias", "navidrome", "sp-owned", None, None), + ("Library Deezer Alias", "navidrome", None, "dz-owned", None), + ("Library MusicBrainz Alias", "navidrome", None, None, "mb-owned"), + ("owned by name", "navidrome", None, None, None), + ("Different Server Artist", "plex", "sp-other-server", None, None), ], ) conn.commit() @@ -62,6 +69,27 @@ def test_top_similar_artists_can_exclude_active_server_library_artists(tmp_path) assert _names(artists) == {"Different Server Artist", "Fresh Artist"} +def test_top_similar_artists_can_require_musicbrainz_source(tmp_path): + db = MusicDatabase(str(tmp_path / "music.db")) + db.add_or_update_similar_artist( + source_artist_id="seed-1", + similar_artist_name="MB Artist", + similar_artist_musicbrainz_id="mb-artist", + profile_id=1, + ) + db.add_or_update_similar_artist( + source_artist_id="seed-1", + similar_artist_name="Spotify Only", + similar_artist_spotify_id="sp-artist", + profile_id=1, + ) + + artists = db.get_top_similar_artists(limit=20, profile_id=1, require_source="musicbrainz") + + assert _names(artists) == {"MB Artist"} + assert artists[0].similar_artist_musicbrainz_id == "mb-artist" + + def test_top_similar_artists_keeps_existing_behavior_without_library_filter(tmp_path): db = MusicDatabase(str(tmp_path / "music.db")) db.add_or_update_similar_artist( diff --git a/tests/metadata/test_metadata_registry.py b/tests/metadata/test_metadata_registry.py index cc83af52..9860c7fc 100644 --- a/tests/metadata/test_metadata_registry.py +++ b/tests/metadata/test_metadata_registry.py @@ -20,6 +20,16 @@ def test_metadata_source_label_maps_known_sources(): assert registry.get_metadata_source_label("deezer") == "Deezer" assert registry.get_metadata_source_label("discogs") == "Discogs" assert registry.get_metadata_source_label("hydrabase") == "Hydrabase" + assert registry.get_metadata_source_label("musicbrainz") == "MusicBrainz" + + +def test_musicbrainz_is_first_class_metadata_client(): + registry.clear_cached_metadata_clients() + client = object() + assert registry.get_client_for_source( + "musicbrainz", + musicbrainz_client_factory=lambda: client, + ) is client def test_metadata_source_label_falls_back_to_unmapped(): diff --git a/tests/metadata/test_typed_metadata_types.py b/tests/metadata/test_typed_metadata_types.py index 20478f75..2006636d 100644 --- a/tests/metadata/test_typed_metadata_types.py +++ b/tests/metadata/test_typed_metadata_types.py @@ -342,6 +342,31 @@ def test_album_from_musicbrainz_dict_release_group_type_overrides_default(): assert Album.from_musicbrainz_dict(raw).album_type == 'single' +def test_album_from_musicbrainz_dict_accepts_adapter_shape(): + raw = { + 'id': 'rg-or-release-mbid', + 'name': 'Coffee Break', + 'artists': [{'id': 'artist-mbid', 'name': 'Zeds Dead'}], + 'release_date': '2011-07-12', + 'total_tracks': 1, + 'album_type': 'single', + 'images': [{'url': 'https://cover.example/front.jpg'}], + 'external_urls': {'musicbrainz': 'https://musicbrainz.org/release/rg-or-release-mbid'}, + } + + album = Album.from_musicbrainz_dict(raw) + + assert album.id == 'rg-or-release-mbid' + assert album.name == 'Coffee Break' + assert album.artists == ['Zeds Dead'] + assert album.artist_id == 'artist-mbid' + assert album.release_date == '2011-07-12' + assert album.total_tracks == 1 + assert album.album_type == 'single' + assert album.image_url == 'https://cover.example/front.jpg' + assert album.external_ids['musicbrainz'] == 'rg-or-release-mbid' + + # --------------------------------------------------------------------------- # Qobuz # --------------------------------------------------------------------------- diff --git a/tools/diagnose_itunes_discover.py b/tools/diagnose_itunes_discover.py index ff047c26..8abae0ba 100644 --- a/tools/diagnose_itunes_discover.py +++ b/tools/diagnose_itunes_discover.py @@ -63,9 +63,17 @@ def diagnose_itunes_discover(): """) with_both = cursor.fetchone()['count'] + with_musicbrainz = 0 + try: + cursor.execute("SELECT COUNT(*) as count FROM similar_artists WHERE similar_artist_musicbrainz_id IS NOT NULL") + with_musicbrainz = cursor.fetchone()['count'] + except Exception: + pass + logger.info(f" Total similar artists: {total}") logger.info(f" With iTunes ID: {with_itunes} ({100 * with_itunes / total:.1f}%)" if total > 0 else " With iTunes ID: 0") logger.info(f" With Spotify ID: {with_spotify} ({100 * with_spotify / total:.1f}%)" if total > 0 else " With Spotify ID: 0") + logger.info(f" With MusicBrainz ID: {with_musicbrainz} ({100 * with_musicbrainz / total:.1f}%)" if total > 0 else " With MusicBrainz ID: 0") logger.info(f" With BOTH IDs: {with_both} ({100 * with_both / total:.1f}%)" if total > 0 else " With BOTH IDs: 0") if with_itunes == 0 and total > 0: diff --git a/web_server.py b/web_server.py index 515fae4e..a3550b66 100644 --- a/web_server.py +++ b/web_server.py @@ -18847,8 +18847,12 @@ def get_spotify_album_tracks(album_id): if not album_data: return jsonify({"error": "Album not found"}), 404 - # Extract tracks from album data (Spotify format) - tracks = album_data.get('tracks', {}).get('items', []) + # Extract tracks — handle Spotify {items, total} or flat-list formats + tracks_container = album_data.get('tracks', {}) + if isinstance(tracks_container, list): + tracks = tracks_container + else: + tracks = tracks_container.get('items', []) # If no tracks in album data (iTunes format), fetch them separately if not tracks: @@ -25901,6 +25905,8 @@ def get_discover_similar_artists(): artist_id = artist.similar_artist_spotify_id elif active_source == 'deezer': artist_id = getattr(artist, 'similar_artist_deezer_id', None) or artist.similar_artist_itunes_id + elif active_source == 'musicbrainz': + artist_id = getattr(artist, 'similar_artist_musicbrainz_id', None) or artist.similar_artist_itunes_id else: artist_id = artist.similar_artist_itunes_id @@ -25908,6 +25914,7 @@ def get_discover_similar_artists(): "artist_id": artist_id, "spotify_artist_id": artist.similar_artist_spotify_id, "itunes_artist_id": artist.similar_artist_itunes_id, + "musicbrainz_artist_id": getattr(artist, 'similar_artist_musicbrainz_id', None), "artist_name": artist.similar_artist_name, "occurrence_count": artist.occurrence_count, "similarity_rank": artist.similarity_rank, @@ -25964,6 +25971,8 @@ def enrich_similar_artists(): ext_id = artist.similar_artist_spotify_id elif source == 'deezer': ext_id = getattr(artist, 'similar_artist_deezer_id', None) or artist.similar_artist_itunes_id + elif source == 'musicbrainz': + ext_id = getattr(artist, 'similar_artist_musicbrainz_id', None) or artist.similar_artist_itunes_id else: ext_id = artist.similar_artist_itunes_id if ext_id and ext_id not in cache_map: diff --git a/webui/index.html b/webui/index.html index a0f01074..c0c86dd8 100644 --- a/webui/index.html +++ b/webui/index.html @@ -3658,10 +3658,11 @@ +