From 5bc5fbb662807b8208490263065905e096c5ca84 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 18 May 2026 18:47:13 -0700 Subject: [PATCH 1/3] Add MusicBrainz as a metadata source Register MusicBrainz as a first-class metadata source alongside Deezer, iTunes, Spotify, Discogs, and Hydrabase. Expose the shared client through metadata services, add the settings option, and expand the MusicBrainz search adapter with source-compatible artist, album, track, and detail methods. Carry MusicBrainz IDs through similar-artist discovery, recommended artists, artist map serialization, and personalized playlist selection. Update DB migrations and lookup filters so similar_artist_musicbrainz_id is preserved on older schemas and used for source requirements and library exclusion. Normalize MusicBrainz album adapter output for import context and add regression coverage for registry mapping, typed album conversion, and similar-artist filtering. Verified by user with 120 focused tests passing. --- api/serializers.py | 1 + core/artists/map.py | 42 ++++-- core/discovery/hero.py | 32 +++- core/metadata/__init__.py | 2 + core/metadata/registry.py | 45 +++++- core/metadata/types.py | 47 +++++- core/metadata_service.py | 2 + core/musicbrainz_search.py | 137 +++++++++++++++++- core/personalized_playlists.py | 15 +- core/watchlist_scanner.py | 41 ++++-- database/music_database.py | 72 +++++++-- .../test_similar_artists_library_filter.py | 40 ++++- tests/metadata/test_metadata_registry.py | 10 ++ tests/metadata/test_typed_metadata_types.py | 25 ++++ tools/diagnose_itunes_discover.py | 8 + web_server.py | 13 +- webui/index.html | 3 +- webui/static/settings.js | 15 +- 18 files changed, 470 insertions(+), 80 deletions(-) diff --git a/api/serializers.py b/api/serializers.py index f7b0c1be..2102818d 100644 --- a/api/serializers.py +++ b/api/serializers.py @@ -368,6 +368,7 @@ def serialize_similar_artist(obj, fields: Optional[Set[str]] = None) -> dict: "source_artist_id": d.get("source_artist_id"), "similar_artist_spotify_id": d.get("similar_artist_spotify_id"), "similar_artist_itunes_id": d.get("similar_artist_itunes_id"), + "similar_artist_musicbrainz_id": d.get("similar_artist_musicbrainz_id"), "similar_artist_name": d.get("similar_artist_name"), "similarity_rank": d.get("similarity_rank"), "occurrence_count": d.get("occurrence_count"), diff --git a/core/artists/map.py b/core/artists/map.py index c2ceb0c5..71a81b12 100644 --- a/core/artists/map.py +++ b/core/artists/map.py @@ -140,7 +140,7 @@ def get_artist_map_data(): placeholders = ','.join(['?'] * len(watchlist_ids)) cursor.execute(f""" SELECT source_artist_id, similar_artist_name, similar_artist_spotify_id, - similar_artist_itunes_id, similar_artist_deezer_id, + similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, similarity_rank, occurrence_count, image_url, genres, popularity FROM similar_artists WHERE profile_id = ? AND source_artist_id IN ({placeholders}) @@ -173,6 +173,7 @@ def get_artist_map_data(): 'spotify_id': r.get('similar_artist_spotify_id') or '', 'itunes_id': r.get('similar_artist_itunes_id') or '', 'deezer_id': r.get('similar_artist_deezer_id') or '', + 'musicbrainz_id': r.get('similar_artist_musicbrainz_id') or '', 'rank': r.get('similarity_rank', 5), 'occurrence': r.get('occurrence_count', 1), 'popularity': r.get('popularity', 0), @@ -245,7 +246,7 @@ def get_artist_map_data(): } # Apply cache data to nodes - source_id_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + source_id_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} for n in nodes: nn = _norm(n['name']) cached = cache_by_name.get(nn) @@ -369,14 +370,14 @@ def get_artist_map_genres(): def _norm(n): return (n or '').lower().strip() - def _add(name, image_url=None, genres=None, spotify_id=None, itunes_id=None, deezer_id=None, discogs_id=None, source='unknown', popularity=0): + def _add(name, image_url=None, genres=None, spotify_id=None, itunes_id=None, deezer_id=None, discogs_id=None, musicbrainz_id=None, source='unknown', popularity=0): n = _norm(name) if not n or len(n) < 2: return if n not in artists_by_name: artists_by_name[n] = { 'name': name, 'image_url': '', 'genres': set(), - 'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': '', + 'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': '', 'musicbrainz_id': '', 'sources': set(), 'popularity': 0 } a = artists_by_name[n] @@ -394,6 +395,8 @@ def get_artist_map_genres(): a['deezer_id'] = str(deezer_id) if discogs_id and not a['discogs_id']: a['discogs_id'] = str(discogs_id) + if musicbrainz_id and not a['musicbrainz_id']: + a['musicbrainz_id'] = str(musicbrainz_id) if popularity > a['popularity']: a['popularity'] = popularity a['sources'].add(source) @@ -410,14 +413,14 @@ def get_artist_map_genres(): genres = json.loads(r['genres']) if isinstance(r['genres'], str) else [] except Exception as e: logger.debug("cache artist genres parse failed: %s", e) - src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} kwargs = {src_map.get(r['source'], 'spotify_id'): r['entity_id']} _add(r['name'], image_url=r['image_url'], genres=genres, source='cache', popularity=r['popularity'] or 0, **kwargs) # 2. Similar artists cursor.execute(""" SELECT similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, - similar_artist_deezer_id, image_url, genres, popularity + similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity FROM similar_artists WHERE profile_id = ? """, (profile_id,)) for r in cursor.fetchall(): @@ -429,7 +432,9 @@ def get_artist_map_genres(): logger.debug("similar artist genres parse failed: %s", e) _add(r['similar_artist_name'], image_url=r['image_url'], genres=genres, spotify_id=r['similar_artist_spotify_id'], itunes_id=r['similar_artist_itunes_id'], - deezer_id=r['similar_artist_deezer_id'], source='similar', popularity=r['popularity'] or 0) + deezer_id=r['similar_artist_deezer_id'], + musicbrainz_id=r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else None, + source='similar', popularity=r['popularity'] or 0) # 3. Watchlist artists cursor.execute(""" @@ -483,6 +488,7 @@ def get_artist_map_genres(): 'itunes_id': a['itunes_id'], 'deezer_id': a['deezer_id'], 'discogs_id': a['discogs_id'], + 'musicbrainz_id': a['musicbrainz_id'], 'popularity': a['popularity'], 'type': 'watchlist' if 'watchlist' in a['sources'] else 'similar', }) @@ -648,7 +654,7 @@ def get_artist_map_explore(): center_name = row['name'] if row['image_url'] and row['image_url'].startswith('http'): center_image = row['image_url'] - src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(row['source'], 'spotify_id') center_ids[k] = row['entity_id'] if row['genres']: @@ -717,7 +723,7 @@ def get_artist_map_explore(): WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE """, (center_name,)) for r in cursor.fetchall(): - src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(r['source'], 'spotify_id') if not center_ids.get(k): center_ids[k] = r['entity_id'] @@ -746,7 +752,7 @@ def get_artist_map_explore(): placeholders = ','.join(['?'] * len(id_values)) cursor.execute(f""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, - similar_artist_itunes_id, similar_artist_deezer_id, + similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? @@ -757,7 +763,7 @@ def get_artist_map_explore(): # Also search by name (the center artist might be a watchlist source) cursor.execute(""" SELECT DISTINCT sa.similar_artist_name, sa.similar_artist_spotify_id, - sa.similar_artist_itunes_id, sa.similar_artist_deezer_id, + sa.similar_artist_itunes_id, sa.similar_artist_deezer_id, sa.similar_artist_musicbrainz_id, sa.image_url, sa.genres, sa.popularity, sa.similarity_rank FROM similar_artists sa JOIN watchlist_artists wa ON sa.source_artist_id = COALESCE(wa.spotify_artist_id, wa.itunes_artist_id, CAST(wa.id AS TEXT)) @@ -789,7 +795,8 @@ def get_artist_map_explore(): image_url=sa.get('image_url'), genres=sa.get('genres'), popularity=sa.get('popularity', 0), - similar_artist_deezer_id=sa.get('deezer_id') + similar_artist_deezer_id=sa.get('deezer_id'), + similar_artist_musicbrainz_id=sa.get('musicbrainz_id'), ) except Exception as e: logger.debug("similar artist insert failed: %s", e) @@ -798,7 +805,7 @@ def get_artist_map_explore(): placeholders = ','.join(['?'] * len(id_values)) cursor.execute(f""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, - similar_artist_itunes_id, similar_artist_deezer_id, + similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? @@ -809,7 +816,7 @@ def get_artist_map_explore(): # Fallback: query by name-based source ID cursor.execute(""" SELECT DISTINCT similar_artist_name, similar_artist_spotify_id, - similar_artist_itunes_id, similar_artist_deezer_id, + similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_musicbrainz_id, image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id = ? AND profile_id = ? @@ -841,6 +848,7 @@ def get_artist_map_explore(): 'spotify_id': r['similar_artist_spotify_id'] or '', 'itunes_id': r['similar_artist_itunes_id'] or '', 'deezer_id': r['similar_artist_deezer_id'] or '', + 'musicbrainz_id': r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else '', 'discogs_id': '', 'popularity': r['popularity'] or 0, 'rank': r['similarity_rank'] or 5, @@ -861,7 +869,8 @@ def get_artist_map_explore(): cursor.execute(f""" SELECT DISTINCT source_artist_id, similar_artist_name, similar_artist_spotify_id, similar_artist_itunes_id, - similar_artist_deezer_id, image_url, genres, popularity, similarity_rank + similar_artist_deezer_id, similar_artist_musicbrainz_id, + image_url, genres, popularity, similarity_rank FROM similar_artists WHERE source_artist_id IN ({placeholders}) AND profile_id = ? ORDER BY similarity_rank ASC @@ -902,6 +911,7 @@ def get_artist_map_explore(): 'spotify_id': r['similar_artist_spotify_id'] or '', 'itunes_id': r['similar_artist_itunes_id'] or '', 'deezer_id': r['similar_artist_deezer_id'] or '', + 'musicbrainz_id': r['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in r.keys() else '', 'discogs_id': '', 'popularity': r['popularity'] or 0, 'rank': r['similarity_rank'] or 5, @@ -935,7 +945,7 @@ def get_artist_map_explore(): except Exception as e: logger.debug("explorer node genres parse failed: %s", e) # Harvest missing IDs from cache - src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id'} + src_map = {'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', 'discogs': 'discogs_id', 'musicbrainz': 'musicbrainz_id'} k = src_map.get(cr['source']) if k and not n.get(k): n[k] = cr['entity_id'] diff --git a/core/discovery/hero.py b/core/discovery/hero.py index 41143924..420de342 100644 --- a/core/discovery/hero.py +++ b/core/discovery/hero.py @@ -96,6 +96,8 @@ def get_discover_hero(): artist_id = artist.spotify_artist_id elif active_source == 'deezer': artist_id = getattr(artist, 'deezer_artist_id', None) or artist.itunes_artist_id + elif active_source == 'musicbrainz': + artist_id = getattr(artist, 'musicbrainz_artist_id', None) or artist.itunes_artist_id else: artist_id = artist.itunes_artist_id if not artist_id: @@ -125,7 +127,7 @@ def get_discover_hero(): valid_artists = list(similar_artists) # FALLBACK: If no valid artists for fallback source, try to resolve IDs on-the-fly - if active_source in ('itunes', 'deezer') and not valid_artists: + if active_source in ('itunes', 'deezer', 'musicbrainz') and not valid_artists: logger.warning(f"[{active_source} Fallback] No artists with {active_source} IDs found, attempting on-the-fly resolution for {len(similar_artists)} artists") resolved_count = 0 for artist in similar_artists: @@ -135,13 +137,20 @@ def get_discover_hero(): continue # Try to resolve ID by name try: - search_results = itunes_client.search_artists(artist.similar_artist_name, limit=1) + resolve_client = itunes_client + if active_source == 'musicbrainz': + from core.metadata.registry import get_musicbrainz_client + resolve_client = get_musicbrainz_client() + search_results = resolve_client.search_artists(artist.similar_artist_name, limit=1) if search_results and len(search_results) > 0: resolved_id = search_results[0].id # Cache the resolved ID for future use if active_source == 'deezer': database.update_similar_artist_deezer_id(artist.id, resolved_id) artist.similar_artist_deezer_id = resolved_id + elif active_source == 'musicbrainz': + database.update_similar_artist_musicbrainz_id(artist.id, resolved_id) + artist.similar_artist_musicbrainz_id = resolved_id else: database.update_similar_artist_itunes_id(artist.id, resolved_id) artist.similar_artist_itunes_id = resolved_id @@ -173,12 +182,15 @@ def get_discover_hero(): artist_id = artist.similar_artist_spotify_id or artist.similar_artist_itunes_id elif active_source == 'deezer': artist_id = getattr(artist, 'similar_artist_deezer_id', None) or artist.similar_artist_itunes_id or artist.similar_artist_spotify_id + elif active_source == 'musicbrainz': + artist_id = getattr(artist, 'similar_artist_musicbrainz_id', None) or artist.similar_artist_itunes_id or artist.similar_artist_spotify_id else: artist_id = artist.similar_artist_itunes_id or artist.similar_artist_spotify_id artist_data = { "spotify_artist_id": artist.similar_artist_spotify_id, "itunes_artist_id": artist.similar_artist_itunes_id, + "musicbrainz_artist_id": getattr(artist, 'similar_artist_musicbrainz_id', None), "artist_id": artist_id, "artist_name": artist.similar_artist_name, "occurrence_count": artist.occurrence_count, @@ -207,11 +219,19 @@ def get_discover_hero(): artist.id, artist_data.get('image_url'), artist_data.get('genres'), artist_data.get('popularity') ) - elif active_source in ('itunes', 'deezer'): - fb_artist_id = getattr(artist, 'similar_artist_deezer_id', None) if active_source == 'deezer' else None - fb_artist_id = fb_artist_id or artist.similar_artist_itunes_id + elif active_source in ('itunes', 'deezer', 'musicbrainz'): + if active_source == 'deezer': + fb_artist_id = getattr(artist, 'similar_artist_deezer_id', None) or artist.similar_artist_itunes_id + fetch_client = itunes_client + elif active_source == 'musicbrainz': + fb_artist_id = getattr(artist, 'similar_artist_musicbrainz_id', None) + from core.metadata.registry import get_musicbrainz_client + fetch_client = get_musicbrainz_client() + else: + fb_artist_id = artist.similar_artist_itunes_id + fetch_client = itunes_client if fb_artist_id: - fb_artist_data = itunes_client.get_artist(fb_artist_id) + fb_artist_data = fetch_client.get_artist(fb_artist_id) if fb_artist_data: artist_data['artist_name'] = fb_artist_data.get('name', artist.similar_artist_name) artist_data['image_url'] = fb_artist_data.get('images', [{}])[0].get('url') if fb_artist_data.get('images') else None diff --git a/core/metadata/__init__.py b/core/metadata/__init__.py index e7142adf..ee89ee19 100644 --- a/core/metadata/__init__.py +++ b/core/metadata/__init__.py @@ -31,6 +31,7 @@ from core.metadata.registry import ( get_discogs_client, get_hydrabase_client, get_itunes_client, + get_musicbrainz_client, get_primary_client, get_primary_source, get_spotify_client_for_profile, @@ -82,6 +83,7 @@ __all__ = [ "get_metadata_cache", "get_metadata_source_status", "get_metadata_service", + "get_musicbrainz_client", "get_musicmap_similar_artists", "get_primary_client", "get_primary_source", diff --git a/core/metadata/registry.py b/core/metadata/registry.py index ef7fbc8f..3299d55a 100644 --- a/core/metadata/registry.py +++ b/core/metadata/registry.py @@ -18,13 +18,14 @@ logger = get_logger("metadata.registry") MetadataClientFactory = Callable[[], Any] -METADATA_SOURCE_PRIORITY = ("deezer", "itunes", "spotify", "discogs", "hydrabase") +METADATA_SOURCE_PRIORITY = ("deezer", "itunes", "spotify", "discogs", "hydrabase", "musicbrainz") METADATA_SOURCE_LABELS = { "spotify": "Spotify", "itunes": "iTunes", "deezer": "Deezer", "discogs": "Discogs", "hydrabase": "Hydrabase", + "musicbrainz": "MusicBrainz", } _UNSET = object() @@ -148,6 +149,14 @@ def _get_amazon_factory(client_factory: Optional[MetadataClientFactory]) -> Meta return AmazonClient +def _get_musicbrainz_factory(client_factory: Optional[MetadataClientFactory]) -> MetadataClientFactory: + if client_factory is not None: + return client_factory + from core.musicbrainz_search import MusicBrainzSearchClient + + return MusicBrainzSearchClient + + def get_spotify_client(client_factory: Optional[MetadataClientFactory] = None): """Get shared Spotify client. @@ -280,6 +289,18 @@ def get_amazon_client(client_factory: Optional[MetadataClientFactory] = None): return client +def get_musicbrainz_client(client_factory: Optional[MetadataClientFactory] = None): + """Get cached MusicBrainz primary source client.""" + cache_key = "musicbrainz" + factory = _get_musicbrainz_factory(client_factory) + with _client_cache_lock: + client = _client_cache.get(cache_key) + if client is None: + client = factory() + _client_cache[cache_key] = client + return client + + def is_hydrabase_enabled() -> bool: """Return True when Hydrabase is connected and app-enabled.""" try: @@ -308,24 +329,26 @@ def get_hydrabase_client(allow_fallback: bool = True, require_enabled: bool = Tr def get_primary_source(spotify_client_factory: Optional[MetadataClientFactory] = None) -> str: """Return configured primary metadata source.""" - source = _get_config_value("metadata.fallback_source", "deezer") or "deezer" + _default = METADATA_SOURCE_PRIORITY[0] + source = _get_config_value("metadata.fallback_source", _default) or _default if source == "spotify": try: spotify = get_spotify_client(client_factory=spotify_client_factory) if not spotify or not spotify.is_spotify_authenticated(): - return "deezer" + return _default except Exception: - return "deezer" + return _default return source def get_spotify_disconnect_source(configured_source: Optional[str] = None) -> str: """Return the active metadata source after Spotify is disconnected.""" - source = configured_source if configured_source is not None else _get_config_value("metadata.fallback_source", "deezer") - source = source or "deezer" - return "deezer" if source == "spotify" else source + _default = METADATA_SOURCE_PRIORITY[0] + source = configured_source if configured_source is not None else _get_config_value("metadata.fallback_source", _default) + source = source or _default + return _default if source == "spotify" else source def get_metadata_source_label(source: str) -> str: @@ -352,6 +375,7 @@ def get_primary_client( deezer_client_factory: Optional[MetadataClientFactory] = None, discogs_client_factory: Optional[MetadataClientFactory] = None, amazon_client_factory: Optional[MetadataClientFactory] = None, + musicbrainz_client_factory: Optional[MetadataClientFactory] = None, ): """Return client for configured primary source.""" return get_client_for_source( @@ -361,6 +385,7 @@ def get_primary_client( deezer_client_factory=deezer_client_factory, discogs_client_factory=discogs_client_factory, amazon_client_factory=amazon_client_factory, + musicbrainz_client_factory=musicbrainz_client_factory, ) @@ -371,6 +396,7 @@ def get_primary_source_status( deezer_client_factory: Optional[MetadataClientFactory] = None, discogs_client_factory: Optional[MetadataClientFactory] = None, amazon_client_factory: Optional[MetadataClientFactory] = None, + musicbrainz_client_factory: Optional[MetadataClientFactory] = None, ) -> Dict[str, Any]: """Return a generic status snapshot for the active primary metadata source.""" source = _get_config_value("metadata.fallback_source", "deezer") or "deezer" @@ -385,6 +411,7 @@ def get_primary_source_status( deezer_client_factory=deezer_client_factory, discogs_client_factory=discogs_client_factory, amazon_client_factory=amazon_client_factory, + musicbrainz_client_factory=musicbrainz_client_factory, ) if source == "spotify": connected = bool(client and client.is_spotify_authenticated()) @@ -412,6 +439,7 @@ def get_client_for_source( deezer_client_factory: Optional[MetadataClientFactory] = None, discogs_client_factory: Optional[MetadataClientFactory] = None, amazon_client_factory: Optional[MetadataClientFactory] = None, + musicbrainz_client_factory: Optional[MetadataClientFactory] = None, ): """Return exact client for a source, or None if unavailable.""" if source == "spotify": @@ -438,4 +466,7 @@ def get_client_for_source( if source == "amazon": return get_amazon_client(client_factory=amazon_client_factory) + if source == "musicbrainz": + return get_musicbrainz_client(client_factory=musicbrainz_client_factory) + return None diff --git a/core/metadata/types.py b/core/metadata/types.py index 00c9749b..27d5a7c5 100644 --- a/core/metadata/types.py +++ b/core/metadata/types.py @@ -333,7 +333,52 @@ class Album: @classmethod def from_musicbrainz_dict(cls, raw: Dict[str, Any]) -> 'Album': - """MusicBrainz ``/release/{mbid}`` response shape (release, not release-group).""" + """MusicBrainz album shape. + + Accepts both raw ``/release/{mbid}`` responses and the normalized + MusicBrainz search adapter shape used by app-facing metadata clients. + """ + if raw.get('name') and not raw.get('title'): + artists = raw.get('artists') or [] + artist_names = [] + primary_artist_id = '' + for artist in artists: + if isinstance(artist, dict): + name = _str(artist.get('name')) + if name: + artist_names.append(name) + if not primary_artist_id and artist.get('id'): + primary_artist_id = _str(artist['id']) + else: + name = _str(artist) + if name: + artist_names.append(name) + + images = raw.get('images') or [] + image_url = '' + if images and isinstance(images[0], dict): + image_url = _str(images[0].get('url')) + image_url = image_url or _str(raw.get('image_url')) + + external_ids = {} + if raw.get('id'): + external_ids['musicbrainz'] = _str(raw['id']) + + return cls( + id=_str(raw.get('id')), + name=_str(raw.get('name')), + artists=artist_names or ['Unknown Artist'], + release_date=_str(raw.get('release_date')), + total_tracks=_int(raw.get('total_tracks')), + album_type=_str(raw.get('album_type'), default='album') or 'album', + image_url=image_url or None, + artist_id=primary_artist_id or None, + genres=list(raw.get('genres') or []), + source='musicbrainz', + external_ids=external_ids, + external_urls=dict(raw.get('external_urls') or {}), + ) + artist_credit = raw.get('artist-credit') or [] artist_names = [] primary_artist_id = '' diff --git a/core/metadata_service.py b/core/metadata_service.py index 748a9cf9..a0bdadc1 100644 --- a/core/metadata_service.py +++ b/core/metadata_service.py @@ -45,6 +45,7 @@ from core.metadata.registry import ( get_amazon_client, get_client_for_source, get_deezer_client, + get_musicbrainz_client, get_discogs_client, get_hydrabase_client, get_itunes_client, @@ -77,6 +78,7 @@ except Exception: # pragma: no cover - optional dependency fallback __all__ = [ "METADATA_SOURCE_PRIORITY", "get_amazon_client", + "get_musicbrainz_client", "MetadataCache", "MetadataLookupOptions", "MetadataProvider", diff --git a/core/musicbrainz_search.py b/core/musicbrainz_search.py index 965317be..0047e6c5 100644 --- a/core/musicbrainz_search.py +++ b/core/musicbrainz_search.py @@ -678,7 +678,131 @@ class MusicBrainzSearchClient: return sorted(releases, key=_key)[0] - def get_album(self, album_mbid: str) -> Optional[Dict[str, Any]]: + def is_authenticated(self) -> bool: + return True + + def reload_config(self) -> None: + pass + + def get_track_features(self, track_id: str) -> None: + return None + + def get_user_info(self) -> None: + return None + + def get_track_details(self, track_id: str) -> Optional[Dict[str, Any]]: + """Return Spotify-compatible track detail dict by recording MBID.""" + try: + rec = self._client.get_recording(track_id, includes=['releases', 'artist-credits', 'release-groups']) + if not rec: + return None + releases = rec.get('releases', []) or [] + releases.sort(key=self._release_preference_key) + first_rel = releases[0] if releases else {} + rg = first_rel.get('release-group', {}) or {} + release_id = first_rel.get('id', '') + rg_id = rg.get('id', '') + image_url = self._cached_art(release_id, rg_id) + artists = _extract_artist_credit(rec.get('artist-credit', [])) + return { + 'id': rec.get('id', ''), + 'name': rec.get('title', ''), + 'artists': [{'name': a, 'id': ''} for a in artists], + 'album': { + 'id': rg_id or release_id, + 'name': first_rel.get('title', ''), + 'images': [{'url': image_url, 'height': 250, 'width': 250}] if image_url else [], + 'release_date': first_rel.get('date') or rg.get('first-release-date') or '', + }, + 'duration_ms': rec.get('length') or 0, + 'track_number': 1, + 'disc_number': 1, + 'preview_url': None, + 'popularity': 0, + 'external_urls': {'musicbrainz': f'https://musicbrainz.org/recording/{track_id}'}, + } + except Exception as e: + logger.error(f'get_track_details({track_id}) error: {e}') + return None + + def get_album_tracks(self, album_mbid: str) -> Optional[Dict[str, Any]]: + """Return {items: [...], total: N} track listing for a release/release-group MBID.""" + album = self.get_album(album_mbid, include_tracks=True) + if album is None: + return None + flat = album.get('tracks', []) + if isinstance(flat, dict): + return flat + return {'items': flat, 'total': len(flat)} + + def get_artist(self, artist_id: str) -> Optional[Dict[str, Any]]: + """Return Spotify-compatible artist detail dict.""" + try: + artist = self._client.get_artist(artist_id, includes=['tags', 'url-rels']) + if not artist: + return None + genres = [t['name'] for t in (artist.get('tags') or []) if isinstance(t, dict) and t.get('name')] + return { + 'id': artist.get('id', artist_id), + 'name': artist.get('name', ''), + 'genres': genres, + 'followers': {'total': 0}, + 'popularity': 0, + 'images': [], + 'external_urls': {'musicbrainz': f'https://musicbrainz.org/artist/{artist_id}'}, + } + except Exception as e: + logger.error(f'get_artist({artist_id}) error: {e}') + return None + + def get_artist_top_tracks(self, artist_id: str, limit: int = 10) -> List[Dict[str, Any]]: + """Return top recordings for an artist, deduplicated by title and sorted by year.""" + try: + recs = self._client.search_recordings_by_artist_mbid(artist_id, limit=100) + for r in recs: + rels = r.get('releases') or [] + if rels: + rels.sort(key=self._release_preference_key) + r['releases'] = rels + studio = [r for r in recs if self._has_studio_release(r)] + recs = studio or recs + seen: set = set() + deduped = [] + for r in recs: + key = (r.get('title') or '').lower().strip() + if not key or key in seen: + continue + seen.add(key) + deduped.append(r) + results = [] + for r in deduped[:limit]: + releases = r.get('releases', []) + first_rel = releases[0] if releases else {} + rg = first_rel.get('release-group', {}) or {} + release_id = first_rel.get('id', '') + rg_id = rg.get('id', '') + artists = _extract_artist_credit(r.get('artist-credit', [])) + image_url = self._cached_art(release_id, rg_id) + results.append({ + 'id': r.get('id', ''), + 'name': r.get('title', ''), + 'artists': [{'name': a, 'id': ''} for a in artists], + 'album': { + 'id': rg_id or release_id, + 'name': first_rel.get('title', ''), + 'images': [{'url': image_url}] if image_url else [], + }, + 'duration_ms': r.get('length') or 0, + 'popularity': 0, + 'preview_url': None, + 'external_urls': {'musicbrainz': f'https://musicbrainz.org/recording/{r.get("id", "")}'}, + }) + return results + except Exception as e: + logger.error(f'get_artist_top_tracks({artist_id}) error: {e}') + return [] + + def get_album(self, album_mbid: str, include_tracks: bool = True) -> Optional[Dict[str, Any]]: """Get full album details with track listing for download modal. The MBID passed in could be either: @@ -713,10 +837,15 @@ class MusicBrainzSearchClient: album['external_urls'] = { 'musicbrainz': f'https://musicbrainz.org/release-group/{album_mbid}' } + if not include_tracks: + album.pop('tracks', None) return album # Path B: release MBID (text-search fallback path) - return self._render_release_as_album(album_mbid) + album = self._render_release_as_album(album_mbid) + if album and not include_tracks: + album.pop('tracks', None) + return album except Exception as e: logger.error(f"MusicBrainz album detail failed for {album_mbid}: {e}") return None @@ -789,7 +918,7 @@ class MusicBrainzSearchClient: 'external_urls': {'musicbrainz': f'https://musicbrainz.org/release/{release_mbid}'}, } - def get_artist_albums(self, artist_mbid: str, album_type: str = 'album,single') -> List: + def get_artist_albums(self, artist_mbid: str, album_type: str = 'album,single', limit: int = 200) -> List: """Get artist's releases for discography view.""" try: artist = self._client.get_artist(artist_mbid, includes=['release-groups']) @@ -814,7 +943,7 @@ class MusicBrainzSearchClient: image_url=image_url, external_urls={'musicbrainz': f'https://musicbrainz.org/release-group/{rg_mbid}'}, )) - return albums + return albums[:limit] except Exception as e: logger.warning(f"MusicBrainz artist albums failed: {e}") return [] diff --git a/core/personalized_playlists.py b/core/personalized_playlists.py index 602520ab..ec2505dd 100644 --- a/core/personalized_playlists.py +++ b/core/personalized_playlists.py @@ -902,7 +902,9 @@ class PersonalizedPlaylistsService: with self.database._get_connection() as conn: cursor = conn.cursor() cursor.execute(""" - SELECT similar_artist_spotify_id, similar_artist_name + SELECT similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, + similar_artist_name FROM similar_artists WHERE source_artist_id = ? ORDER BY similarity_rank ASC @@ -911,9 +913,16 @@ class PersonalizedPlaylistsService: db_results = cursor.fetchall() if db_results: + source_id_col = { + 'spotify': 'similar_artist_spotify_id', + 'itunes': 'similar_artist_itunes_id', + 'deezer': 'similar_artist_deezer_id', + 'musicbrainz': 'similar_artist_musicbrainz_id', + }.get(active_source, 'similar_artist_itunes_id') for row in db_results: - artist_id = row['similar_artist_spotify_id'] - artist_name = row['similar_artist_name'] + r = dict(row) + artist_id = r.get(source_id_col) or r.get('similar_artist_spotify_id') or r.get('similar_artist_itunes_id') + artist_name = r['similar_artist_name'] if artist_id and artist_id not in seen_artist_ids: all_similar_artists.append({'id': artist_id, 'name': artist_name}) seen_artist_ids.add(artist_id) diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 5bf82809..694da08b 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -520,12 +520,8 @@ class WatchlistScanner: return list(get_source_priority(get_primary_source())) def _discovery_source_priority(self) -> List[str]: - """Return discovery sources in configured priority order. - - Discovery pool writes only support Spotify, iTunes, and Deezer IDs, so - we filter the broader metadata priority list down to those sources. - """ - return [source for source in self._watchlist_source_priority() if source in {'spotify', 'itunes', 'deezer'}] + """Return discovery sources in configured priority order.""" + return [source for source in self._watchlist_source_priority() if source in {'spotify', 'itunes', 'deezer', 'musicbrainz'}] @staticmethod def _artist_id_attribute_for_source(source: str) -> Optional[str]: @@ -544,6 +540,7 @@ class WatchlistScanner: 'spotify': 'similar_artist_spotify_id', 'itunes': 'similar_artist_itunes_id', 'deezer': 'similar_artist_deezer_id', + 'musicbrainz': 'similar_artist_musicbrainz_id', }.get(source) @staticmethod @@ -2372,6 +2369,7 @@ class WatchlistScanner: 'spotify': 'spotify_id', 'itunes': 'itunes_id', 'deezer': 'deezer_id', + 'musicbrainz': 'musicbrainz_id', } searched_source_ids = {} available_sources = [] @@ -2403,6 +2401,7 @@ class WatchlistScanner: 'spotify_id': None, 'itunes_id': None, 'deezer_id': None, + 'musicbrainz_id': None, 'image_url': None, 'genres': [], 'popularity': 0, @@ -2470,6 +2469,8 @@ class WatchlistScanner: return self.database.update_similar_artist_deezer_id(similar_artist_id, source_id) if source == 'itunes': return self.database.update_similar_artist_itunes_id(similar_artist_id, source_id) + if source == 'musicbrainz': + return self.database.update_similar_artist_musicbrainz_id(similar_artist_id, source_id) return False def _backfill_similar_artists_fallback_ids(self, source_artist_id: str, profile_id: int = 1) -> int: @@ -2480,7 +2481,7 @@ class WatchlistScanner: writable similar-artist ID columns. This keeps old cached rows usable when the active metadata provider changes. """ - backfill_sources = [source for source in self._discovery_source_priority() if source in {'itunes', 'deezer'}] + backfill_sources = [source for source in self._discovery_source_priority() if source in {'itunes', 'deezer', 'musicbrainz'}] if not backfill_sources: logger.debug("No fallback metadata providers available for similar-artist backfill") return 0 @@ -2582,14 +2583,18 @@ class WatchlistScanner: image_url=similar_artist.get('image_url'), genres=similar_artist.get('genres'), popularity=similar_artist.get('popularity', 0), - similar_artist_deezer_id=similar_artist.get('deezer_id') + similar_artist_deezer_id=similar_artist.get('deezer_id'), + similar_artist_musicbrainz_id=similar_artist.get('musicbrainz_id'), ) if success: stored_count += 1 - fallback_id = similar_artist.get('deezer_id') or similar_artist.get('itunes_id') - fallback_label = 'Deezer' if similar_artist.get('deezer_id') else 'iTunes' - logger.debug(f" #{rank}: {similar_artist['name']} (Spotify: {similar_artist.get('spotify_id')}, {fallback_label}: {fallback_id})") + ids = ', '.join( + f"{k}: {similar_artist.get(v)}" + for k, v in [('Spotify', 'spotify_id'), ('iTunes', 'itunes_id'), ('Deezer', 'deezer_id'), ('MB', 'musicbrainz_id')] + if similar_artist.get(v) + ) + logger.debug(f" #{rank}: {similar_artist['name']} ({ids})") except Exception as e: logger.warning(f"Error storing similar artist {similar_artist.get('name', 'Unknown')}: {e}") @@ -2685,6 +2690,8 @@ class WatchlistScanner: cache_callback = lambda found_id, artist_id=similar_artist.id: self.database.update_similar_artist_itunes_id(artist_id, found_id) elif source == 'deezer': cache_callback = lambda found_id, artist_id=similar_artist.id: self.database.update_similar_artist_deezer_id(artist_id, found_id) + elif source == 'musicbrainz': + cache_callback = lambda found_id, artist_id=similar_artist.id: self.database.update_similar_artist_musicbrainz_id(artist_id, found_id) artist_id = self._resolve_artist_id_for_source( source, @@ -2820,7 +2827,7 @@ class WatchlistScanner: track_data['deezer_track_id'] = track.get('id') track_data['deezer_album_id'] = album_data.get('id') track_data['deezer_artist_id'] = selected_artist_id - else: + elif selected_source == 'itunes': track_data['itunes_track_id'] = track.get('id') track_data['itunes_album_id'] = album_data.get('id') track_data['itunes_artist_id'] = selected_artist_id @@ -2954,7 +2961,7 @@ class WatchlistScanner: track_data['deezer_track_id'] = track.get('id') track_data['deezer_album_id'] = album_data.get('id') track_data['deezer_artist_id'] = artist_id_for_genres or '' - else: + elif db_source == 'itunes': track_data['itunes_track_id'] = track.get('id') track_data['itunes_album_id'] = album_data.get('id') track_data['itunes_artist_id'] = artist_id_for_genres or '' @@ -3176,7 +3183,7 @@ class WatchlistScanner: track_data['deezer_track_id'] = track['id'] track_data['deezer_album_id'] = album_data['id'] track_data['deezer_artist_id'] = selected_artist_id - else: + elif selected_source == 'itunes': track_data['itunes_track_id'] = track['id'] track_data['itunes_album_id'] = album_data['id'] track_data['itunes_artist_id'] = selected_artist_id @@ -3351,6 +3358,8 @@ class WatchlistScanner: selected_watchlist_id = artist.itunes_artist_id or artist_id elif source == 'deezer': selected_watchlist_id = getattr(artist, 'deezer_artist_id', None) or artist_id + elif source == 'musicbrainz': + selected_watchlist_id = artist_id break if not selected_source or not selected_artist_id or not selected_albums: @@ -3384,6 +3393,8 @@ class WatchlistScanner: cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_itunes_id(similar_id, found_id) elif source == 'deezer': cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_deezer_id(similar_id, found_id) + elif source == 'musicbrainz': + cache_callback = lambda found_id, similar_id=artist.id: self.database.update_similar_artist_musicbrainz_id(similar_id, found_id) artist_id = self._resolve_artist_id_for_source( source, @@ -3415,6 +3426,8 @@ class WatchlistScanner: selected_similar_id = artist.similar_artist_itunes_id or artist_id elif source == 'deezer': selected_similar_id = getattr(artist, 'similar_artist_deezer_id', None) or artist_id + elif source == 'musicbrainz': + selected_similar_id = getattr(artist, 'similar_artist_musicbrainz_id', None) or artist_id break if not selected_source or not selected_artist_id or not selected_albums: diff --git a/database/music_database.py b/database/music_database.py index c59b9b3a..af64008d 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -118,6 +118,7 @@ class SimilarArtist: genres: Optional[List[str]] = None # Cached genres popularity: int = 0 # Cached popularity score similar_artist_deezer_id: Optional[str] = None # Deezer artist ID + similar_artist_musicbrainz_id: Optional[str] = None # MusicBrainz artist ID @dataclass class DiscoveryTrack: @@ -1174,6 +1175,10 @@ class MusicDatabase: cursor.execute("ALTER TABLE similar_artists ADD COLUMN similar_artist_deezer_id TEXT") logger.info("Added similar_artist_deezer_id column to similar_artists table") + if 'similar_artist_musicbrainz_id' not in similar_artists_columns: + cursor.execute("ALTER TABLE similar_artists ADD COLUMN similar_artist_musicbrainz_id TEXT") + logger.info("Added similar_artist_musicbrainz_id column to similar_artists table") + # Migration: Add iTunes columns to recent_releases for dual-source discovery cursor.execute("PRAGMA table_info(recent_releases)") recent_releases_columns = [column[1] for column in cursor.fetchall()] @@ -1288,6 +1293,8 @@ class MusicDatabase: source_artist_id TEXT NOT NULL, similar_artist_spotify_id TEXT, similar_artist_itunes_id TEXT, + similar_artist_deezer_id TEXT, + similar_artist_musicbrainz_id TEXT, similar_artist_name TEXT NOT NULL, similarity_rank INTEGER DEFAULT 1, occurrence_count INTEGER DEFAULT 1, @@ -1298,8 +1305,10 @@ class MusicDatabase: migration_cursor.execute(""" INSERT OR IGNORE INTO similar_artists_new (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, similar_artist_name, similarity_rank, occurrence_count, last_updated) SELECT source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, similar_artist_name, similarity_rank, occurrence_count, last_updated FROM similar_artists """) @@ -1312,6 +1321,7 @@ class MusicDatabase: cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_source ON similar_artists (source_artist_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_spotify ON similar_artists (similar_artist_spotify_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_itunes ON similar_artists (similar_artist_itunes_id)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_musicbrainz ON similar_artists (similar_artist_musicbrainz_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_occurrence ON similar_artists (occurrence_count)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_similar_artists_name ON similar_artists (similar_artist_name)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_discovery_pool_spotify_track ON discovery_pool (spotify_track_id)") @@ -2958,6 +2968,7 @@ class MusicDatabase: similar_artist_spotify_id TEXT, similar_artist_itunes_id TEXT, similar_artist_deezer_id TEXT, + similar_artist_musicbrainz_id TEXT, similar_artist_name TEXT NOT NULL, similarity_rank INTEGER DEFAULT 1, occurrence_count INTEGER DEFAULT 1, @@ -2974,7 +2985,8 @@ class MusicDatabase: new_cols = ['id', 'source_artist_id', 'similar_artist_spotify_id', 'similar_artist_itunes_id', 'similar_artist_deezer_id', - 'similar_artist_name', 'similarity_rank', 'occurrence_count', + 'similar_artist_musicbrainz_id', 'similar_artist_name', + 'similarity_rank', 'occurrence_count', 'last_updated', 'image_url', 'genres', 'popularity', 'metadata_updated_at', 'last_featured', 'profile_id'] shared_cols = [c for c in new_cols if c in old_cols] @@ -8260,25 +8272,27 @@ class MusicDatabase: image_url: Optional[str] = None, genres: Optional[list] = None, popularity: int = 0, - similar_artist_deezer_id: Optional[str] = None) -> bool: - """Add or update a similar artist recommendation (supports Spotify, iTunes, and Deezer IDs)""" + similar_artist_deezer_id: Optional[str] = None, + similar_artist_musicbrainz_id: Optional[str] = None) -> bool: + """Add or update a similar artist recommendation.""" try: with self._get_connection() as conn: cursor = conn.cursor() genres_json = json.dumps(genres) if genres else None - # Use artist name as the unique key (allows storing both IDs for same artist) cursor.execute(""" INSERT INTO similar_artists - (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_name, + (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, similar_artist_name, similarity_rank, occurrence_count, last_updated, profile_id, image_url, genres, popularity, metadata_updated_at) - VALUES (?, ?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, ?, ?, ?, ?, CURRENT_TIMESTAMP) + VALUES (?, ?, ?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, ?, ?, ?, ?, CURRENT_TIMESTAMP) ON CONFLICT(profile_id, source_artist_id, similar_artist_name) DO UPDATE SET similar_artist_spotify_id = COALESCE(excluded.similar_artist_spotify_id, similar_artist_spotify_id), similar_artist_itunes_id = COALESCE(excluded.similar_artist_itunes_id, similar_artist_itunes_id), similar_artist_deezer_id = COALESCE(excluded.similar_artist_deezer_id, similar_artist_deezer_id), + similar_artist_musicbrainz_id = COALESCE(excluded.similar_artist_musicbrainz_id, similar_artist_musicbrainz_id), similarity_rank = excluded.similarity_rank, occurrence_count = occurrence_count + 1, last_updated = CURRENT_TIMESTAMP, @@ -8286,7 +8300,8 @@ class MusicDatabase: genres = COALESCE(excluded.genres, genres), popularity = CASE WHEN excluded.popularity > 0 THEN excluded.popularity ELSE popularity END, metadata_updated_at = CASE WHEN excluded.image_url IS NOT NULL THEN CURRENT_TIMESTAMP ELSE metadata_updated_at END - """, (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, similar_artist_deezer_id, similar_artist_name, + """, (source_artist_id, similar_artist_spotify_id, similar_artist_itunes_id, + similar_artist_deezer_id, similar_artist_musicbrainz_id, similar_artist_name, similarity_rank, profile_id, image_url, genres_json, popularity)) conn.commit() @@ -8319,6 +8334,7 @@ class MusicDatabase: occurrence_count=row['occurrence_count'], last_updated=datetime.fromisoformat(row['last_updated']), similar_artist_deezer_id=row['similar_artist_deezer_id'] if 'similar_artist_deezer_id' in row.keys() else None, + similar_artist_musicbrainz_id=row['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in row.keys() else None, ) for row in rows] except Exception as e: @@ -8328,11 +8344,14 @@ class MusicDatabase: def get_similar_artists_missing_fallback_ids(self, source_artist_id: str, fallback_source: str = 'itunes', profile_id: int = 1) -> List[SimilarArtist]: """Get similar artists missing fallback-provider IDs for backfill.""" try: - if fallback_source not in {'itunes', 'deezer'}: + if fallback_source not in {'itunes', 'deezer', 'musicbrainz'}: logger.error("Unsupported similar-artist fallback source: %s", fallback_source) return [] - col = 'similar_artist_deezer_id' if fallback_source == 'deezer' else 'similar_artist_itunes_id' + col = { + 'deezer': 'similar_artist_deezer_id', + 'musicbrainz': 'similar_artist_musicbrainz_id', + }.get(fallback_source, 'similar_artist_itunes_id') with self._get_connection() as conn: cursor = conn.cursor() @@ -8355,6 +8374,7 @@ class MusicDatabase: occurrence_count=row['occurrence_count'], last_updated=datetime.fromisoformat(row['last_updated']), similar_artist_deezer_id=row['similar_artist_deezer_id'] if 'similar_artist_deezer_id' in row.keys() else None, + similar_artist_musicbrainz_id=row['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in row.keys() else None, ) for row in rows] except Exception as e: @@ -8399,6 +8419,25 @@ class MusicDatabase: logger.error(f"Error updating similar artist Deezer ID: {e}") return False + def update_similar_artist_musicbrainz_id(self, similar_artist_id: int, musicbrainz_id: str) -> bool: + """Update a similar artist's MusicBrainz ID (for backfill)""" + try: + with self._get_connection() as conn: + cursor = conn.cursor() + + cursor.execute(""" + UPDATE similar_artists + SET similar_artist_musicbrainz_id = ? + WHERE id = ? + """, (musicbrainz_id, similar_artist_id)) + + conn.commit() + return cursor.rowcount > 0 + + except Exception as e: + logger.error(f"Error updating similar artist MusicBrainz ID: {e}") + return False + def update_similar_artist_metadata(self, similar_artist_id: int, image_url: str = None, genres: list = None, popularity: int = None) -> bool: """Cache artist metadata (image, genres, popularity) to avoid repeated API calls""" @@ -8420,7 +8459,7 @@ class MusicDatabase: def update_similar_artist_metadata_by_external_id(self, external_id: str, source: str = 'spotify', image_url: str = None, genres: list = None, popularity: int = None) -> bool: - """Cache artist metadata by Spotify or iTunes ID (updates all rows for that artist)""" + """Cache artist metadata by external source ID (updates all rows for that artist).""" try: with self._get_connection() as conn: cursor = conn.cursor() @@ -8429,6 +8468,8 @@ class MusicDatabase: where_clause = "similar_artist_spotify_id = ?" elif source == 'deezer': where_clause = "similar_artist_deezer_id = ?" + elif source == 'musicbrainz': + where_clause = "similar_artist_musicbrainz_id = ?" else: where_clause = "similar_artist_itunes_id = ?" cursor.execute(f""" @@ -8490,7 +8531,7 @@ class MusicDatabase: exclude_library_server: str = None, ) -> List[SimilarArtist]: """Get top similar artists excluding watchlist artists, with cycling support. - require_source: if set ('spotify','itunes','deezer'), only returns artists with that source ID. + require_source: if set, only returns artists with that source ID. exclude_library_server: if set, also excludes artists already present in that media server.""" try: with self._get_connection() as conn: @@ -8504,12 +8545,14 @@ class MusicDatabase: source_filter = "AND sa.similar_artist_itunes_id IS NOT NULL AND sa.similar_artist_itunes_id != ''" elif require_source == 'deezer': source_filter = "AND sa.similar_artist_deezer_id IS NOT NULL AND sa.similar_artist_deezer_id != ''" + elif require_source == 'musicbrainz': + source_filter = "AND sa.similar_artist_musicbrainz_id IS NOT NULL AND sa.similar_artist_musicbrainz_id != ''" library_artist_keys = None sql_limit = limit if exclude_library_server: cursor.execute(""" - SELECT name, spotify_artist_id, itunes_artist_id, deezer_id + SELECT name, spotify_artist_id, itunes_artist_id, deezer_id, musicbrainz_id FROM artists WHERE server_source = ? """, (exclude_library_server,)) @@ -8518,6 +8561,7 @@ class MusicDatabase: 'spotify': {r['spotify_artist_id'] for r in library_rows if r['spotify_artist_id']}, 'itunes': {r['itunes_artist_id'] for r in library_rows if r['itunes_artist_id']}, 'deezer': {r['deezer_id'] for r in library_rows if r['deezer_id']}, + 'musicbrainz': {r['musicbrainz_id'] for r in library_rows if r['musicbrainz_id']}, 'names': { self._normalize_for_comparison(r['name']) for r in library_rows @@ -8533,6 +8577,7 @@ class MusicDatabase: MAX(sa.similar_artist_spotify_id) as similar_artist_spotify_id, MAX(sa.similar_artist_itunes_id) as similar_artist_itunes_id, MAX(sa.similar_artist_deezer_id) as similar_artist_deezer_id, + MAX(sa.similar_artist_musicbrainz_id) as similar_artist_musicbrainz_id, sa.similar_artist_name, AVG(sa.similarity_rank) as similarity_rank, SUM(sa.occurrence_count) as occurrence_count, @@ -8564,11 +8609,13 @@ class MusicDatabase: spotify_id = row['similar_artist_spotify_id'] itunes_id = row['similar_artist_itunes_id'] if 'similar_artist_itunes_id' in row.keys() else None deezer_id = row['similar_artist_deezer_id'] if 'similar_artist_deezer_id' in row.keys() else None + musicbrainz_id = row['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in row.keys() else None normalized_name = self._normalize_for_comparison(row['similar_artist_name']) if ( (spotify_id and spotify_id in library_artist_keys['spotify']) or (itunes_id and itunes_id in library_artist_keys['itunes']) or (deezer_id and deezer_id in library_artist_keys['deezer']) + or (musicbrainz_id and musicbrainz_id in library_artist_keys['musicbrainz']) or (normalized_name and normalized_name in library_artist_keys['names']) ): continue @@ -8584,6 +8631,7 @@ class MusicDatabase: similar_artist_spotify_id=row['similar_artist_spotify_id'], similar_artist_itunes_id=row['similar_artist_itunes_id'] if 'similar_artist_itunes_id' in row.keys() else None, similar_artist_deezer_id=row['similar_artist_deezer_id'] if 'similar_artist_deezer_id' in row.keys() else None, + similar_artist_musicbrainz_id=row['similar_artist_musicbrainz_id'] if 'similar_artist_musicbrainz_id' in row.keys() else None, similar_artist_name=row['similar_artist_name'], similarity_rank=int(row['similarity_rank']), occurrence_count=row['occurrence_count'], diff --git a/tests/discovery/test_similar_artists_library_filter.py b/tests/discovery/test_similar_artists_library_filter.py index fc065276..3a379289 100644 --- a/tests/discovery/test_similar_artists_library_filter.py +++ b/tests/discovery/test_similar_artists_library_filter.py @@ -19,6 +19,12 @@ def test_top_similar_artists_can_exclude_active_server_library_artists(tmp_path) similar_artist_deezer_id="dz-owned", profile_id=1, ) + db.add_or_update_similar_artist( + source_artist_id="seed-1", + similar_artist_name="Owned By MusicBrainz ID", + similar_artist_musicbrainz_id="mb-owned", + profile_id=1, + ) db.add_or_update_similar_artist( source_artist_id="seed-1", similar_artist_name="Owned By Name", @@ -41,14 +47,15 @@ def test_top_similar_artists_can_exclude_active_server_library_artists(tmp_path) with db._get_connection() as conn: conn.executemany( """ - INSERT INTO artists (name, server_source, spotify_artist_id, deezer_id) - VALUES (?, ?, ?, ?) + INSERT INTO artists (name, server_source, spotify_artist_id, deezer_id, musicbrainz_id) + VALUES (?, ?, ?, ?, ?) """, [ - ("Library Alias", "navidrome", "sp-owned", None), - ("Library Deezer Alias", "navidrome", None, "dz-owned"), - ("owned by name", "navidrome", None, None), - ("Different Server Artist", "plex", "sp-other-server", None), + ("Library Alias", "navidrome", "sp-owned", None, None), + ("Library Deezer Alias", "navidrome", None, "dz-owned", None), + ("Library MusicBrainz Alias", "navidrome", None, None, "mb-owned"), + ("owned by name", "navidrome", None, None, None), + ("Different Server Artist", "plex", "sp-other-server", None, None), ], ) conn.commit() @@ -62,6 +69,27 @@ def test_top_similar_artists_can_exclude_active_server_library_artists(tmp_path) assert _names(artists) == {"Different Server Artist", "Fresh Artist"} +def test_top_similar_artists_can_require_musicbrainz_source(tmp_path): + db = MusicDatabase(str(tmp_path / "music.db")) + db.add_or_update_similar_artist( + source_artist_id="seed-1", + similar_artist_name="MB Artist", + similar_artist_musicbrainz_id="mb-artist", + profile_id=1, + ) + db.add_or_update_similar_artist( + source_artist_id="seed-1", + similar_artist_name="Spotify Only", + similar_artist_spotify_id="sp-artist", + profile_id=1, + ) + + artists = db.get_top_similar_artists(limit=20, profile_id=1, require_source="musicbrainz") + + assert _names(artists) == {"MB Artist"} + assert artists[0].similar_artist_musicbrainz_id == "mb-artist" + + def test_top_similar_artists_keeps_existing_behavior_without_library_filter(tmp_path): db = MusicDatabase(str(tmp_path / "music.db")) db.add_or_update_similar_artist( diff --git a/tests/metadata/test_metadata_registry.py b/tests/metadata/test_metadata_registry.py index cc83af52..9860c7fc 100644 --- a/tests/metadata/test_metadata_registry.py +++ b/tests/metadata/test_metadata_registry.py @@ -20,6 +20,16 @@ def test_metadata_source_label_maps_known_sources(): assert registry.get_metadata_source_label("deezer") == "Deezer" assert registry.get_metadata_source_label("discogs") == "Discogs" assert registry.get_metadata_source_label("hydrabase") == "Hydrabase" + assert registry.get_metadata_source_label("musicbrainz") == "MusicBrainz" + + +def test_musicbrainz_is_first_class_metadata_client(): + registry.clear_cached_metadata_clients() + client = object() + assert registry.get_client_for_source( + "musicbrainz", + musicbrainz_client_factory=lambda: client, + ) is client def test_metadata_source_label_falls_back_to_unmapped(): diff --git a/tests/metadata/test_typed_metadata_types.py b/tests/metadata/test_typed_metadata_types.py index 20478f75..2006636d 100644 --- a/tests/metadata/test_typed_metadata_types.py +++ b/tests/metadata/test_typed_metadata_types.py @@ -342,6 +342,31 @@ def test_album_from_musicbrainz_dict_release_group_type_overrides_default(): assert Album.from_musicbrainz_dict(raw).album_type == 'single' +def test_album_from_musicbrainz_dict_accepts_adapter_shape(): + raw = { + 'id': 'rg-or-release-mbid', + 'name': 'Coffee Break', + 'artists': [{'id': 'artist-mbid', 'name': 'Zeds Dead'}], + 'release_date': '2011-07-12', + 'total_tracks': 1, + 'album_type': 'single', + 'images': [{'url': 'https://cover.example/front.jpg'}], + 'external_urls': {'musicbrainz': 'https://musicbrainz.org/release/rg-or-release-mbid'}, + } + + album = Album.from_musicbrainz_dict(raw) + + assert album.id == 'rg-or-release-mbid' + assert album.name == 'Coffee Break' + assert album.artists == ['Zeds Dead'] + assert album.artist_id == 'artist-mbid' + assert album.release_date == '2011-07-12' + assert album.total_tracks == 1 + assert album.album_type == 'single' + assert album.image_url == 'https://cover.example/front.jpg' + assert album.external_ids['musicbrainz'] == 'rg-or-release-mbid' + + # --------------------------------------------------------------------------- # Qobuz # --------------------------------------------------------------------------- diff --git a/tools/diagnose_itunes_discover.py b/tools/diagnose_itunes_discover.py index ff047c26..8abae0ba 100644 --- a/tools/diagnose_itunes_discover.py +++ b/tools/diagnose_itunes_discover.py @@ -63,9 +63,17 @@ def diagnose_itunes_discover(): """) with_both = cursor.fetchone()['count'] + with_musicbrainz = 0 + try: + cursor.execute("SELECT COUNT(*) as count FROM similar_artists WHERE similar_artist_musicbrainz_id IS NOT NULL") + with_musicbrainz = cursor.fetchone()['count'] + except Exception: + pass + logger.info(f" Total similar artists: {total}") logger.info(f" With iTunes ID: {with_itunes} ({100 * with_itunes / total:.1f}%)" if total > 0 else " With iTunes ID: 0") logger.info(f" With Spotify ID: {with_spotify} ({100 * with_spotify / total:.1f}%)" if total > 0 else " With Spotify ID: 0") + logger.info(f" With MusicBrainz ID: {with_musicbrainz} ({100 * with_musicbrainz / total:.1f}%)" if total > 0 else " With MusicBrainz ID: 0") logger.info(f" With BOTH IDs: {with_both} ({100 * with_both / total:.1f}%)" if total > 0 else " With BOTH IDs: 0") if with_itunes == 0 and total > 0: diff --git a/web_server.py b/web_server.py index 515fae4e..a3550b66 100644 --- a/web_server.py +++ b/web_server.py @@ -18847,8 +18847,12 @@ def get_spotify_album_tracks(album_id): if not album_data: return jsonify({"error": "Album not found"}), 404 - # Extract tracks from album data (Spotify format) - tracks = album_data.get('tracks', {}).get('items', []) + # Extract tracks — handle Spotify {items, total} or flat-list formats + tracks_container = album_data.get('tracks', {}) + if isinstance(tracks_container, list): + tracks = tracks_container + else: + tracks = tracks_container.get('items', []) # If no tracks in album data (iTunes format), fetch them separately if not tracks: @@ -25901,6 +25905,8 @@ def get_discover_similar_artists(): artist_id = artist.similar_artist_spotify_id elif active_source == 'deezer': artist_id = getattr(artist, 'similar_artist_deezer_id', None) or artist.similar_artist_itunes_id + elif active_source == 'musicbrainz': + artist_id = getattr(artist, 'similar_artist_musicbrainz_id', None) or artist.similar_artist_itunes_id else: artist_id = artist.similar_artist_itunes_id @@ -25908,6 +25914,7 @@ def get_discover_similar_artists(): "artist_id": artist_id, "spotify_artist_id": artist.similar_artist_spotify_id, "itunes_artist_id": artist.similar_artist_itunes_id, + "musicbrainz_artist_id": getattr(artist, 'similar_artist_musicbrainz_id', None), "artist_name": artist.similar_artist_name, "occurrence_count": artist.occurrence_count, "similarity_rank": artist.similarity_rank, @@ -25964,6 +25971,8 @@ def enrich_similar_artists(): ext_id = artist.similar_artist_spotify_id elif source == 'deezer': ext_id = getattr(artist, 'similar_artist_deezer_id', None) or artist.similar_artist_itunes_id + elif source == 'musicbrainz': + ext_id = getattr(artist, 'similar_artist_musicbrainz_id', None) or artist.similar_artist_itunes_id else: ext_id = artist.similar_artist_itunes_id if ext_id and ext_id not in cache_map: diff --git a/webui/index.html b/webui/index.html index a0f01074..c0c86dd8 100644 --- a/webui/index.html +++ b/webui/index.html @@ -3658,10 +3658,11 @@ +
-
Choose the primary source for artist, album, and track metadata. Spotify can only be selected while an active Spotify session exists. Discogs requires a personal token.
+
Choose the primary source for artist, album, and track metadata. Spotify can only be selected while an active Spotify session exists. Discogs requires a personal token. MusicBrainz is always available but rate-limited to 1 req/sec.
diff --git a/webui/static/settings.js b/webui/static/settings.js index 8fdb9d70..2329d142 100644 --- a/webui/static/settings.js +++ b/webui/static/settings.js @@ -71,8 +71,7 @@ function _isMetadataSourceSelectable(source) { function _metadataSourceFallback(source) { if (source === 'spotify') return 'deezer'; - if (source === 'discogs') return 'itunes'; - return 'itunes'; + return 'deezer'; } function focusServiceSettingsSection(service, message) { @@ -100,7 +99,7 @@ function sanitizeMetadataSourceSelection({ quiet = true } = {}) { const select = document.getElementById('metadata-fallback-source'); if (!select) return false; - const selectedSource = select.value || 'itunes'; + const selectedSource = select.value || 'deezer'; if (_isMetadataSourceSelectable(selectedSource)) { select.dataset.lastValidSource = selectedSource; return false; @@ -893,7 +892,7 @@ async function loadSettingsData() { document.getElementById('discogs-token').value = settings.discogs?.token || ''; // Populate Metadata source setting - document.getElementById('metadata-fallback-source').value = settings.metadata?.fallback_source || 'itunes'; + document.getElementById('metadata-fallback-source').value = settings.metadata?.fallback_source || 'deezer'; // Populate Hydrabase settings const hbConfig = settings.hydrabase || {}; @@ -2563,16 +2562,16 @@ async function saveSettings(quiet = false) { const metadataSourceSelect = document.getElementById('metadata-fallback-source'); const discogsTokenInput = document.getElementById('discogs-token'); const discogsTokenPresent = !!discogsTokenInput?.value?.trim(); - let metadataSource = metadataSourceSelect?.value || 'itunes'; + let metadataSource = metadataSourceSelect?.value || 'deezer'; const spotifySessionActive = _lastStatusPayload?.spotify?.authenticated === true; if (metadataSource === 'spotify' && !spotifySessionActive) { - metadataSource = 'deezer'; + metadataSource = _metadataSourceFallback('spotify'); if (metadataSourceSelect) metadataSourceSelect.value = metadataSource; if (!quiet) { - showToast('Spotify is disconnected, so Deezer is used as the primary metadata source.', 'warning'); + showToast('Spotify is disconnected, so the primary metadata source was switched.', 'warning'); } } else if (metadataSource === 'discogs' && !discogsTokenPresent) { - metadataSource = 'itunes'; + metadataSource = _metadataSourceFallback('discogs'); if (metadataSourceSelect) metadataSourceSelect.value = metadataSource; if (!quiet) { showToast('Discogs requires a personal access token before it can be selected as the primary metadata source.', 'warning'); From f3ad65de34dc489105f904882f5939512dd2cdfb Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 18 May 2026 19:19:25 -0700 Subject: [PATCH 2/3] Complete MusicBrainz watchlist source parity Add MusicBrainz watchlist artist ID storage, badges, linked-provider editing, and per-artist preferred source support. Backfill watchlist MusicBrainz matches from already-enriched library artists so existing MusicBrainz worker matches appear in watchlist cards and settings. Extend bulk watchlist add, liked artist matching, artist map source picking, and service status labels to recognize MusicBrainz, with regression tests for watchlist ID persistence and backfill. --- core/artists/liked_match.py | 13 +- core/artists/map.py | 8 +- core/watchlist/source_picker.py | 1 + core/watchlist_scanner.py | 28 ++- database/music_database.py | 177 ++++++++++++++++-- tests/test_watchlist_bulk_add.py | 17 ++ .../test_musicbrainz_watchlist_ids.py | 81 ++++++++ web_server.py | 74 ++++++-- webui/static/api-monitor.js | 26 ++- webui/static/discover.js | 11 +- webui/static/shared-helpers.js | 2 + webui/static/style.css | 5 + 12 files changed, 383 insertions(+), 60 deletions(-) create mode 100644 tests/watchlist/test_musicbrainz_watchlist_ids.py diff --git a/core/artists/liked_match.py b/core/artists/liked_match.py index 324d8cbc..a8a973ab 100644 --- a/core/artists/liked_match.py +++ b/core/artists/liked_match.py @@ -12,6 +12,7 @@ from core.metadata.registry import ( get_deezer_client, get_discogs_client, get_itunes_client, + get_musicbrainz_client, get_spotify_client, ) @@ -33,6 +34,11 @@ def _get_discogs_client(token=None): return get_discogs_client(token) +def _get_musicbrainz_client(): + """Mirror of web_server._get_musicbrainz_client — delegates to registry.""" + return get_musicbrainz_client() + + class _SpotifyClientProxy: """Resolves the global Spotify client lazily so a Spotify re-auth that rebinds the cached client in core.metadata.registry is visible to the @@ -65,6 +71,7 @@ def _match_liked_artists_to_all_sources(database, profile_id: int): 'itunes': 'itunes_artist_id', 'deezer': 'deezer_artist_id', 'discogs': 'discogs_artist_id', + 'musicbrainz': 'musicbrainz_artist_id', } id_cols = list(source_cols.values()) @@ -103,6 +110,10 @@ def _match_liked_artists_to_all_sources(database, profile_id: int): search_clients['discogs'] = dc except Exception as e: logger.debug("discogs client init failed: %s", e) + try: + search_clients['musicbrainz'] = _get_musicbrainz_client() + except Exception as e: + logger.debug("musicbrainz client init failed: %s", e) # Reuse watchlist scanner's fuzzy matching logic from core.watchlist_scanner import WatchlistScanner @@ -234,7 +245,7 @@ def _match_liked_artists_to_all_sources(database, profile_id: int): # Determine best active source/ID — prefer Spotify, then iTunes, Deezer, Discogs resolved_source = None resolved_id = None - for src in ('spotify', 'itunes', 'deezer', 'discogs'): + for src in ('spotify', 'itunes', 'deezer', 'discogs', 'musicbrainz'): col = source_cols[src] if col in harvested_ids: resolved_source = src diff --git a/core/artists/map.py b/core/artists/map.py index 71a81b12..781a8d1e 100644 --- a/core/artists/map.py +++ b/core/artists/map.py @@ -263,7 +263,7 @@ def get_artist_map_data(): break # Backfill genres if missing if not n.get('genres') or len(n.get('genres', [])) == 0: - for source in ('spotify', 'deezer', 'itunes', 'discogs'): + for source in ('spotify', 'deezer', 'itunes', 'discogs', 'musicbrainz'): if source in cached and cached[source].get('genres'): n['genres'] = cached[source]['genres'][:5] break @@ -632,7 +632,7 @@ def get_artist_map_explore(): # Find the center artist center_name = artist_name center_image = '' - center_ids = {'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': ''} + center_ids = {'spotify_id': '', 'itunes_id': '', 'deezer_id': '', 'discogs_id': '', 'musicbrainz_id': ''} center_genres = [] # Search metadata cache for the center artist @@ -665,14 +665,14 @@ def get_artist_map_explore(): # Check watchlist + library if not in cache if not artist_found and not artist_id: - cursor.execute("SELECT artist_name, image_url, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE LIMIT 1", (artist_name,)) + cursor.execute("SELECT artist_name, image_url, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id, musicbrainz_artist_id FROM watchlist_artists WHERE artist_name = ? COLLATE NOCASE LIMIT 1", (artist_name,)) wr = cursor.fetchone() if wr: artist_found = True center_name = wr['artist_name'] if wr['image_url'] and str(wr['image_url']).startswith('http'): center_image = wr['image_url'] - for k, col in [('spotify_id', 'spotify_artist_id'), ('itunes_id', 'itunes_artist_id'), ('deezer_id', 'deezer_artist_id'), ('discogs_id', 'discogs_artist_id')]: + for k, col in [('spotify_id', 'spotify_artist_id'), ('itunes_id', 'itunes_artist_id'), ('deezer_id', 'deezer_artist_id'), ('discogs_id', 'discogs_artist_id'), ('musicbrainz_id', 'musicbrainz_artist_id')]: if wr[col]: center_ids[k] = str(wr[col]) else: diff --git a/core/watchlist/source_picker.py b/core/watchlist/source_picker.py index 666a5a91..0c40a21f 100644 --- a/core/watchlist/source_picker.py +++ b/core/watchlist/source_picker.py @@ -28,6 +28,7 @@ SOURCE_ID_COLUMNS = ( ('itunes', 'itunes_artist_id'), ('deezer', 'deezer_id'), ('discogs', 'discogs_id'), + ('musicbrainz', 'musicbrainz_id'), ) diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 694da08b..6f9cbfb5 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -531,6 +531,7 @@ class WatchlistScanner: 'itunes': 'itunes_artist_id', 'deezer': 'deezer_artist_id', 'discogs': 'discogs_artist_id', + 'musicbrainz': 'musicbrainz_artist_id', }.get(source) @staticmethod @@ -571,6 +572,9 @@ class WatchlistScanner: elif source == 'discogs': self.database.update_watchlist_discogs_id(watchlist_artist.id, source_id) watchlist_artist.discogs_artist_id = source_id + elif source == 'musicbrainz': + self.database.update_watchlist_musicbrainz_id(watchlist_artist.id, source_id) + watchlist_artist.musicbrainz_artist_id = source_id def _resolve_watchlist_artist_source_id(self, watchlist_artist: WatchlistArtist, source: str, client: Any) -> Optional[str]: """Resolve the artist ID for an exact source, searching by name if needed.""" @@ -901,7 +905,7 @@ class WatchlistScanner: cursor = conn.cursor() cursor.execute(""" SELECT id, artist_name, spotify_artist_id, itunes_artist_id, - deezer_artist_id, discogs_artist_id + deezer_artist_id, discogs_artist_id, musicbrainz_artist_id FROM watchlist_artists WHERE profile_id = ? AND (image_url IS NULL OR image_url = '' OR image_url = 'None' OR image_url NOT LIKE 'http%') @@ -956,7 +960,8 @@ class WatchlistScanner: if img: aid = (row['spotify_artist_id'] or row['itunes_artist_id'] - or row['deezer_artist_id'] or row['discogs_artist_id']) + or row['deezer_artist_id'] or row['discogs_artist_id'] + or row['musicbrainz_artist_id']) if aid: self.database.update_watchlist_artist_image(aid, img) else: @@ -989,7 +994,7 @@ class WatchlistScanner: """ # Per-artist metadata source override — if set, use that source first with fallback preferred = getattr(watchlist_artist, 'preferred_metadata_source', None) - if preferred and preferred in ('spotify', 'deezer', 'itunes', 'discogs'): + if preferred and preferred in ('spotify', 'deezer', 'itunes', 'discogs', 'musicbrainz'): source_priority = list(get_source_priority(preferred)) else: source_priority = self._watchlist_source_priority() @@ -1161,7 +1166,7 @@ class WatchlistScanner: # Keep this as a plain source list; resolve the client right before each use. providers_to_backfill = [ source for source in self._watchlist_source_priority() - if source in {'spotify', 'itunes', 'deezer', 'discogs'} + if source in {'spotify', 'itunes', 'deezer', 'discogs', 'musicbrainz'} ] for provider in providers_to_backfill: @@ -1218,6 +1223,7 @@ class WatchlistScanner: or artist.itunes_artist_id or artist.deezer_artist_id or artist.discogs_artist_id + or getattr(artist, 'musicbrainz_artist_id', None) or str(artist.id) ) @@ -1592,6 +1598,7 @@ class WatchlistScanner: 'itunes': 'itunes_artist_id', 'deezer': 'deezer_artist_id', 'discogs': 'discogs_artist_id', + 'musicbrainz': 'musicbrainz_artist_id', }.get(provider) if not id_attr: @@ -1611,6 +1618,7 @@ class WatchlistScanner: 'itunes': self._match_to_itunes, 'deezer': self._match_to_deezer, 'discogs': self._match_to_discogs, + 'musicbrainz': self._match_to_musicbrainz, }.get(provider) update_fn = { @@ -1618,6 +1626,7 @@ class WatchlistScanner: 'itunes': self.database.update_watchlist_itunes_id, 'deezer': self.database.update_watchlist_deezer_id, 'discogs': self.database.update_watchlist_discogs_id, + 'musicbrainz': self.database.update_watchlist_musicbrainz_id, }.get(provider) if not match_fn or not update_fn: @@ -1777,6 +1786,17 @@ class WatchlistScanner: logger.warning(f"Could not match {artist_name} to Discogs: {e}") return None + def _match_to_musicbrainz(self, artist_name: str) -> Optional[str]: + """Match artist name to MusicBrainz ID using fuzzy name comparison.""" + try: + from core.metadata.registry import get_musicbrainz_client + client = get_musicbrainz_client() + results = client.search_artists(artist_name, limit=5) + return self._best_artist_match(results, artist_name) + except Exception as e: + logger.warning(f"Could not match {artist_name} to MusicBrainz: {e}") + return None + def _get_lookback_period_setting(self) -> str: """ Get the discovery lookback period setting from database. diff --git a/database/music_database.py b/database/music_database.py index af64008d..02cc5246 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -91,6 +91,7 @@ class WatchlistArtist: itunes_artist_id: Optional[str] = None # Cross-provider support deezer_artist_id: Optional[str] = None # Cross-provider support discogs_artist_id: Optional[str] = None # Cross-provider support + musicbrainz_artist_id: Optional[str] = None # Cross-provider support include_albums: bool = True include_eps: bool = True include_singles: bool = True @@ -335,6 +336,7 @@ class MusicDatabase: itunes_artist_id TEXT, deezer_artist_id TEXT, discogs_artist_id TEXT, + musicbrainz_artist_id TEXT, artist_name TEXT NOT NULL, date_added TIMESTAMP DEFAULT CURRENT_TIMESTAMP, last_scan_timestamp TIMESTAMP, @@ -1502,6 +1504,7 @@ class MusicDatabase: itunes_artist_id TEXT, deezer_artist_id TEXT, discogs_artist_id TEXT, + musicbrainz_artist_id TEXT, image_url TEXT, genres TEXT, source_services TEXT DEFAULT '[]', @@ -1518,6 +1521,10 @@ class MusicDatabase: """) cursor.execute("CREATE INDEX IF NOT EXISTS idx_lap_profile ON liked_artists_pool (profile_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_lap_status ON liked_artists_pool (profile_id, match_status)") + cursor.execute("PRAGMA table_info(liked_artists_pool)") + liked_artist_columns = {column[1] for column in cursor.fetchall()} + if 'musicbrainz_artist_id' not in liked_artist_columns: + cursor.execute("ALTER TABLE liked_artists_pool ADD COLUMN musicbrainz_artist_id TEXT") # Liked albums pool — aggregated saved/liked albums from connected services cursor.execute(""" @@ -1653,6 +1660,10 @@ class MusicDatabase: cursor.execute("ALTER TABLE watchlist_artists ADD COLUMN amazon_artist_id TEXT") logger.info("Added amazon_artist_id column to watchlist_artists table for Amazon Music support") + if 'musicbrainz_artist_id' not in columns: + cursor.execute("ALTER TABLE watchlist_artists ADD COLUMN musicbrainz_artist_id TEXT") + logger.info("Added musicbrainz_artist_id column to watchlist_artists table for MusicBrainz support") + except Exception as e: logger.error(f"Error adding itunes_artist_id column to watchlist_artists: {e}") # Don't raise - this is a migration, database can still function @@ -1742,6 +1753,7 @@ class MusicDatabase: itunes_artist_id TEXT, deezer_artist_id TEXT, discogs_artist_id TEXT, + musicbrainz_artist_id TEXT, profile_id INTEGER DEFAULT 1, UNIQUE(profile_id, spotify_artist_id), UNIQUE(profile_id, itunes_artist_id) @@ -1769,7 +1781,8 @@ class MusicDatabase: lookback_days INTEGER DEFAULT NULL, itunes_artist_id TEXT, deezer_artist_id TEXT, - discogs_artist_id TEXT + discogs_artist_id TEXT, + musicbrainz_artist_id TEXT ) """) @@ -1781,7 +1794,8 @@ class MusicDatabase: 'include_albums', 'include_eps', 'include_singles', 'include_live', 'include_remixes', 'include_acoustic', 'include_compilations', 'include_instrumentals', 'lookback_days', - 'itunes_artist_id', 'deezer_artist_id', 'discogs_artist_id', 'profile_id'] + 'itunes_artist_id', 'deezer_artist_id', 'discogs_artist_id', + 'musicbrainz_artist_id', 'profile_id'] shared_cols = [c for c in new_cols if c in old_cols] cols_str = ', '.join(shared_cols) cursor.execute(f"INSERT INTO watchlist_artists_new ({cols_str}) SELECT {cols_str} FROM watchlist_artists") @@ -2624,6 +2638,7 @@ class MusicDatabase: itunes_artist_id TEXT, deezer_artist_id TEXT, discogs_artist_id TEXT, + musicbrainz_artist_id TEXT, profile_id INTEGER DEFAULT 1, UNIQUE(profile_id, spotify_artist_id), UNIQUE(profile_id, itunes_artist_id) @@ -2636,7 +2651,8 @@ class MusicDatabase: 'include_albums', 'include_eps', 'include_singles', 'include_live', 'include_remixes', 'include_acoustic', 'include_compilations', 'include_instrumentals', 'lookback_days', - 'itunes_artist_id', 'deezer_artist_id', 'discogs_artist_id', 'profile_id'] + 'itunes_artist_id', 'deezer_artist_id', 'discogs_artist_id', + 'musicbrainz_artist_id', 'profile_id'] shared_cols = [c for c in new_cols if c in col_names] cols_str = ', '.join(shared_cols) @@ -7747,7 +7763,8 @@ class MusicDatabase: # Check if artist already exists by name (case-insensitive) for this profile cursor.execute(""" - SELECT id, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id + SELECT id, spotify_artist_id, itunes_artist_id, deezer_artist_id, + discogs_artist_id, musicbrainz_artist_id FROM watchlist_artists WHERE LOWER(artist_name) = LOWER(?) AND profile_id = ? LIMIT 1 @@ -7760,7 +7777,13 @@ class MusicDatabase: if existing: # Artist already on watchlist — update with new source ID if missing - col_map = {'spotify': 'spotify_artist_id', 'itunes': 'itunes_artist_id', 'deezer': 'deezer_artist_id', 'discogs': 'discogs_artist_id'} + col_map = { + 'spotify': 'spotify_artist_id', + 'itunes': 'itunes_artist_id', + 'deezer': 'deezer_artist_id', + 'discogs': 'discogs_artist_id', + 'musicbrainz': 'musicbrainz_artist_id', + } col = col_map.get(source) if col and not existing[col]: cursor.execute(f""" @@ -7796,6 +7819,13 @@ class MusicDatabase: VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?) """, (artist_id, artist_name, profile_id)) logger.info(f"Added artist '{artist_name}' to watchlist (Discogs ID: {artist_id}, profile: {profile_id})") + elif source == 'musicbrainz': + cursor.execute(""" + INSERT INTO watchlist_artists + (musicbrainz_artist_id, artist_name, date_added, updated_at, profile_id) + VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?) + """, (artist_id, artist_name, profile_id)) + logger.info(f"Added artist '{artist_name}' to watchlist (MusicBrainz ID: {artist_id}, profile: {profile_id})") else: cursor.execute(""" INSERT INTO watchlist_artists @@ -7812,7 +7842,7 @@ class MusicDatabase: return False def remove_artist_from_watchlist(self, artist_id: str, profile_id: int = 1) -> bool: - """Remove an artist from the watchlist (checks Spotify, iTunes, Deezer, and Discogs IDs)""" + """Remove an artist from the watchlist (checks cross-provider artist IDs)""" try: with self._get_connection() as conn: cursor = conn.cursor() @@ -7820,15 +7850,17 @@ class MusicDatabase: # Get artist name for logging (check all ID columns) cursor.execute(""" SELECT artist_name FROM watchlist_artists - WHERE (spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ?) AND profile_id = ? - """, (artist_id, artist_id, artist_id, artist_id, profile_id)) + WHERE (spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR musicbrainz_artist_id = ?) AND profile_id = ? + """, (artist_id, artist_id, artist_id, artist_id, artist_id, profile_id)) result = cursor.fetchone() artist_name = result['artist_name'] if result else "Unknown" cursor.execute(""" DELETE FROM watchlist_artists - WHERE (spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ?) AND profile_id = ? - """, (artist_id, artist_id, artist_id, artist_id, profile_id)) + WHERE (spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR musicbrainz_artist_id = ?) AND profile_id = ? + """, (artist_id, artist_id, artist_id, artist_id, artist_id, profile_id)) if cursor.rowcount > 0: conn.commit() @@ -7843,7 +7875,7 @@ class MusicDatabase: return False def is_artist_in_watchlist(self, artist_id: str, profile_id: int = 1, artist_name: str = None) -> bool: - """Check if an artist is currently in the watchlist (checks Spotify, iTunes, Deezer, Discogs IDs and name)""" + """Check if an artist is currently in the watchlist (checks cross-provider IDs and name)""" try: with self._get_connection() as conn: cursor = conn.cursor() @@ -7852,15 +7884,18 @@ class MusicDatabase: if artist_name: cursor.execute(""" SELECT 1 FROM watchlist_artists - WHERE (spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ? OR LOWER(artist_name) = LOWER(?)) AND profile_id = ? + WHERE (spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR musicbrainz_artist_id = ? + OR LOWER(artist_name) = LOWER(?)) AND profile_id = ? LIMIT 1 - """, (artist_id, artist_id, artist_id, artist_id, artist_name, profile_id)) + """, (artist_id, artist_id, artist_id, artist_id, artist_id, artist_name, profile_id)) else: cursor.execute(""" SELECT 1 FROM watchlist_artists - WHERE (spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ?) AND profile_id = ? + WHERE (spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR musicbrainz_artist_id = ?) AND profile_id = ? LIMIT 1 - """, (artist_id, artist_id, artist_id, artist_id, profile_id)) + """, (artist_id, artist_id, artist_id, artist_id, artist_id, profile_id)) result = cursor.fetchone() return result is not None @@ -7882,7 +7917,7 @@ class MusicDatabase: # Build SELECT query based on existing columns base_columns = ['id', 'spotify_artist_id', 'artist_name', 'date_added', 'last_scan_timestamp', 'created_at', 'updated_at'] - optional_columns = ['image_url', 'itunes_artist_id', 'deezer_artist_id', 'discogs_artist_id', 'include_albums', 'include_eps', 'include_singles', + optional_columns = ['image_url', 'itunes_artist_id', 'deezer_artist_id', 'discogs_artist_id', 'musicbrainz_artist_id', 'include_albums', 'include_eps', 'include_singles', 'include_live', 'include_remixes', 'include_acoustic', 'include_compilations', 'include_instrumentals', 'lookback_days', 'preferred_metadata_source'] @@ -7911,6 +7946,7 @@ class MusicDatabase: itunes_artist_id = row['itunes_artist_id'] if 'itunes_artist_id' in existing_columns else None deezer_artist_id = row['deezer_artist_id'] if 'deezer_artist_id' in existing_columns else None discogs_artist_id = row['discogs_artist_id'] if 'discogs_artist_id' in existing_columns else None + musicbrainz_artist_id = row['musicbrainz_artist_id'] if 'musicbrainz_artist_id' in existing_columns else None include_albums = bool(row['include_albums']) if 'include_albums' in existing_columns else True include_eps = bool(row['include_eps']) if 'include_eps' in existing_columns else True include_singles = bool(row['include_singles']) if 'include_singles' in existing_columns else True @@ -7934,6 +7970,7 @@ class MusicDatabase: itunes_artist_id=itunes_artist_id, deezer_artist_id=deezer_artist_id, discogs_artist_id=discogs_artist_id, + musicbrainz_artist_id=musicbrainz_artist_id, include_albums=include_albums, include_eps=include_eps, include_singles=include_singles, @@ -8133,8 +8170,9 @@ class MusicDatabase: cursor.execute(""" UPDATE watchlist_artists SET image_url = ?, updated_at = CURRENT_TIMESTAMP - WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ? - """, (image_url, artist_id, artist_id, artist_id, artist_id)) + WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR musicbrainz_artist_id = ? + """, (image_url, artist_id, artist_id, artist_id, artist_id, artist_id)) conn.commit() return cursor.rowcount > 0 @@ -8220,6 +8258,107 @@ class MusicDatabase: logger.error(f"Error updating watchlist Discogs ID: {e}") return False + def update_watchlist_musicbrainz_id(self, watchlist_id: int, musicbrainz_id: str) -> bool: + """Update the MusicBrainz artist ID for a watchlist artist (cross-provider support)""" + try: + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + UPDATE watchlist_artists + SET musicbrainz_artist_id = ?, updated_at = CURRENT_TIMESTAMP + WHERE id = ? + """, (musicbrainz_id, watchlist_id)) + conn.commit() + logger.info(f"Updated MusicBrainz ID for watchlist artist {watchlist_id}: {musicbrainz_id}") + return cursor.rowcount > 0 + except Exception as e: + logger.error(f"Error updating watchlist MusicBrainz ID: {e}") + return False + + def backfill_watchlist_musicbrainz_ids_from_library(self, profile_id: int = 1) -> int: + """Copy existing library MusicBrainz artist IDs onto matching watchlist rows. + + The MusicBrainz enrichment worker writes IDs to ``artists.musicbrainz_id``. + Watchlist UI reads ``watchlist_artists.musicbrainz_artist_id``, so this + bridge lets existing enriched library matches show up as watchlist + MusicBrainz matches without waiting for a separate watchlist scan. + """ + try: + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + UPDATE watchlist_artists + SET musicbrainz_artist_id = ( + SELECT a.musicbrainz_id + FROM artists a + WHERE a.musicbrainz_id IS NOT NULL + AND a.musicbrainz_id != '' + AND ( + LOWER(a.name) = LOWER(watchlist_artists.artist_name) + OR ( + watchlist_artists.spotify_artist_id IS NOT NULL + AND watchlist_artists.spotify_artist_id != '' + AND a.spotify_artist_id = watchlist_artists.spotify_artist_id + ) + OR ( + watchlist_artists.itunes_artist_id IS NOT NULL + AND watchlist_artists.itunes_artist_id != '' + AND a.itunes_artist_id = watchlist_artists.itunes_artist_id + ) + OR ( + watchlist_artists.deezer_artist_id IS NOT NULL + AND watchlist_artists.deezer_artist_id != '' + AND a.deezer_id = watchlist_artists.deezer_artist_id + ) + OR ( + watchlist_artists.discogs_artist_id IS NOT NULL + AND watchlist_artists.discogs_artist_id != '' + AND a.discogs_id = watchlist_artists.discogs_artist_id + ) + ) + LIMIT 1 + ), + updated_at = CURRENT_TIMESTAMP + WHERE profile_id = ? + AND (musicbrainz_artist_id IS NULL OR musicbrainz_artist_id = '') + AND EXISTS ( + SELECT 1 + FROM artists a + WHERE a.musicbrainz_id IS NOT NULL + AND a.musicbrainz_id != '' + AND ( + LOWER(a.name) = LOWER(watchlist_artists.artist_name) + OR ( + watchlist_artists.spotify_artist_id IS NOT NULL + AND watchlist_artists.spotify_artist_id != '' + AND a.spotify_artist_id = watchlist_artists.spotify_artist_id + ) + OR ( + watchlist_artists.itunes_artist_id IS NOT NULL + AND watchlist_artists.itunes_artist_id != '' + AND a.itunes_artist_id = watchlist_artists.itunes_artist_id + ) + OR ( + watchlist_artists.deezer_artist_id IS NOT NULL + AND watchlist_artists.deezer_artist_id != '' + AND a.deezer_id = watchlist_artists.deezer_artist_id + ) + OR ( + watchlist_artists.discogs_artist_id IS NOT NULL + AND watchlist_artists.discogs_artist_id != '' + AND a.discogs_id = watchlist_artists.discogs_artist_id + ) + ) + ) + """, (profile_id,)) + conn.commit() + if cursor.rowcount: + logger.info("Backfilled %s watchlist MusicBrainz artist IDs from library", cursor.rowcount) + return cursor.rowcount + except Exception as e: + logger.error(f"Error backfilling watchlist MusicBrainz IDs from library: {e}") + return 0 + def update_watchlist_artist_itunes_id(self, spotify_artist_id: str, itunes_id: str) -> bool: """Update the iTunes artist ID for a watchlist artist by Spotify ID (for cross-provider caching)""" try: @@ -10301,7 +10440,7 @@ class MusicDatabase: # Store all discovered source IDs (COALESCE preserves existing values) if all_ids: - for col in ('spotify_artist_id', 'itunes_artist_id', 'deezer_artist_id', 'discogs_artist_id'): + for col in ('spotify_artist_id', 'itunes_artist_id', 'deezer_artist_id', 'discogs_artist_id', 'musicbrainz_artist_id'): val = all_ids.get(col) if val: set_parts.append(f"{col} = COALESCE({col}, ?)") diff --git a/tests/test_watchlist_bulk_add.py b/tests/test_watchlist_bulk_add.py index b193dd7c..19f86b42 100644 --- a/tests/test_watchlist_bulk_add.py +++ b/tests/test_watchlist_bulk_add.py @@ -69,6 +69,23 @@ def test_falls_back_to_discogs_as_last_resort() -> None: assert pick(artist) == ('dg-999', 'discogs') +def test_falls_back_to_musicbrainz_after_other_sources() -> None: + pick = _make_picker('spotify') + artist = { + 'musicbrainz_id': 'mb-999', + } + assert pick(artist) == ('mb-999', 'musicbrainz') + + +def test_active_source_musicbrainz_picks_musicbrainz_first() -> None: + pick = _make_picker('musicbrainz') + artist = { + 'spotify_artist_id': 'sp-123', + 'musicbrainz_id': 'mb-999', + } + assert pick(artist) == ('mb-999', 'musicbrainz') + + def test_returns_none_when_artist_has_zero_source_ids() -> None: """Drop only when the artist has no source IDs at all — that's the only legitimate skip reason now.""" diff --git a/tests/watchlist/test_musicbrainz_watchlist_ids.py b/tests/watchlist/test_musicbrainz_watchlist_ids.py new file mode 100644 index 00000000..a4092d94 --- /dev/null +++ b/tests/watchlist/test_musicbrainz_watchlist_ids.py @@ -0,0 +1,81 @@ +from database.music_database import MusicDatabase + + +def test_watchlist_artist_can_store_musicbrainz_match(tmp_path): + db = MusicDatabase(str(tmp_path / "music.db")) + + assert db.add_artist_to_watchlist( + "mb-artist-1", + "MusicBrainz Artist", + profile_id=1, + source="musicbrainz", + ) + + artists = db.get_watchlist_artists(profile_id=1) + + assert len(artists) == 1 + assert artists[0].artist_name == "MusicBrainz Artist" + assert artists[0].musicbrainz_artist_id == "mb-artist-1" + assert artists[0].spotify_artist_id is None + + +def test_watchlist_musicbrainz_match_can_be_added_to_existing_artist(tmp_path): + db = MusicDatabase(str(tmp_path / "music.db")) + + assert db.add_artist_to_watchlist("sp-artist-1", "Linked Artist", profile_id=1, source="spotify") + assert db.add_artist_to_watchlist("mb-artist-1", "Linked Artist", profile_id=1, source="musicbrainz") + + artists = db.get_watchlist_artists(profile_id=1) + + assert len(artists) == 1 + assert artists[0].spotify_artist_id == "sp-artist-1" + assert artists[0].musicbrainz_artist_id == "mb-artist-1" + + +def test_watchlist_musicbrainz_match_supports_presence_and_removal(tmp_path): + db = MusicDatabase(str(tmp_path / "music.db")) + db.add_artist_to_watchlist("sp-artist-1", "Removable Artist", profile_id=1, source="spotify") + artist = db.get_watchlist_artists(profile_id=1)[0] + + assert db.update_watchlist_musicbrainz_id(artist.id, "mb-artist-1") + assert db.is_artist_in_watchlist("mb-artist-1", profile_id=1) + assert db.remove_artist_from_watchlist("mb-artist-1", profile_id=1) + assert db.get_watchlist_artists(profile_id=1) == [] + + +def test_watchlist_musicbrainz_match_backfills_from_library_by_name(tmp_path): + db = MusicDatabase(str(tmp_path / "music.db")) + db.add_artist_to_watchlist("sp-artist-1", "Library Matched Artist", profile_id=1, source="spotify") + with db._get_connection() as conn: + conn.execute( + """ + INSERT INTO artists (id, name, musicbrainz_id) + VALUES (?, ?, ?) + """, + ("library-artist-1", "Library Matched Artist", "mb-library-1"), + ) + conn.commit() + + assert db.backfill_watchlist_musicbrainz_ids_from_library(profile_id=1) == 1 + + artist = db.get_watchlist_artists(profile_id=1)[0] + assert artist.musicbrainz_artist_id == "mb-library-1" + + +def test_watchlist_musicbrainz_match_backfills_from_library_by_linked_id(tmp_path): + db = MusicDatabase(str(tmp_path / "music.db")) + db.add_artist_to_watchlist("sp-artist-1", "Different Watchlist Name", profile_id=1, source="spotify") + with db._get_connection() as conn: + conn.execute( + """ + INSERT INTO artists (id, name, spotify_artist_id, musicbrainz_id) + VALUES (?, ?, ?, ?) + """, + ("library-artist-1", "Canonical Library Name", "sp-artist-1", "mb-library-1"), + ) + conn.commit() + + assert db.backfill_watchlist_musicbrainz_ids_from_library(profile_id=1) == 1 + + artist = db.get_watchlist_artists(profile_id=1)[0] + assert artist.musicbrainz_artist_id == "mb-library-1" diff --git a/web_server.py b/web_server.py index a3550b66..0e284c86 100644 --- a/web_server.py +++ b/web_server.py @@ -24511,6 +24511,7 @@ def get_watchlist_artists(): """Get all artists in the watchlist with cached images""" try: database = get_database() + database.backfill_watchlist_musicbrainz_ids_from_library(profile_id=get_current_profile_id()) watchlist_artists = database.get_watchlist_artists(profile_id=get_current_profile_id()) # Convert to JSON serializable format (images are cached from watchlist scans) @@ -24528,6 +24529,7 @@ def get_watchlist_artists(): "itunes_artist_id": artist.itunes_artist_id, # For iTunes-only artists "deezer_artist_id": getattr(artist, 'deezer_artist_id', None), "discogs_artist_id": getattr(artist, 'discogs_artist_id', None), + "musicbrainz_artist_id": getattr(artist, 'musicbrainz_artist_id', None), "amazon_artist_id": getattr(artist, 'amazon_artist_id', None), "include_albums": artist.include_albums, "include_eps": artist.include_eps, @@ -24565,7 +24567,7 @@ def add_to_watchlist(): conn = database._get_connection() cursor = conn.cursor() cursor.execute(""" - SELECT spotify_artist_id, itunes_artist_id, deezer_id, discogs_id + SELECT spotify_artist_id, itunes_artist_id, deezer_id, discogs_id, musicbrainz_id FROM artists WHERE id = ? LIMIT 1 """, (artist_id,)) row = cursor.fetchone() @@ -24576,6 +24578,9 @@ def add_to_watchlist(): if fallback == 'discogs' and row['discogs_id']: artist_id = row['discogs_id'] source = 'discogs' + elif fallback == 'musicbrainz' and row['musicbrainz_id']: + artist_id = row['musicbrainz_id'] + source = 'musicbrainz' elif fallback == 'deezer' and row['deezer_id']: artist_id = row['deezer_id'] source = 'deezer' @@ -24591,12 +24596,17 @@ def add_to_watchlist(): elif row['discogs_id']: artist_id = row['discogs_id'] source = 'discogs' + elif row['musicbrainz_id']: + artist_id = row['musicbrainz_id'] + source = 'musicbrainz' except Exception as e: logger.debug("watchlist artist source lookup failed: %s", e) if not source: fallback_source = _get_metadata_fallback_source() source = fallback_source if is_numeric_id else 'spotify' success = database.add_artist_to_watchlist(artist_id, artist_name, profile_id=get_current_profile_id(), source=source) + if success: + database.backfill_watchlist_musicbrainz_ids_from_library(profile_id=get_current_profile_id()) if success: @@ -25014,7 +25024,7 @@ def start_watchlist_scan(): # PROACTIVE ID BACKFILLING (cross-provider support) # Before scanning, ensure all artists have IDs for ALL available sources - providers_to_backfill = ['itunes', 'deezer'] + providers_to_backfill = ['itunes', 'deezer', 'musicbrainz'] if spotify_client and spotify_client.is_spotify_authenticated(): providers_to_backfill.append('spotify') try: @@ -25320,6 +25330,7 @@ def watchlist_artist_config(artist_id): database = get_database() if request.method == 'GET': + database.backfill_watchlist_musicbrainz_ids_from_library(profile_id=get_current_profile_id()) # Get current config from database conn = sqlite3.connect(str(database.database_path)) cursor = conn.cursor() @@ -25328,10 +25339,12 @@ def watchlist_artist_config(artist_id): include_live, include_remixes, include_acoustic, include_compilations, artist_name, image_url, spotify_artist_id, itunes_artist_id, last_scan_timestamp, date_added, include_instrumentals, deezer_artist_id, - lookback_days, discogs_artist_id, preferred_metadata_source, amazon_artist_id + lookback_days, discogs_artist_id, preferred_metadata_source, + amazon_artist_id, musicbrainz_artist_id FROM watchlist_artists - WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ? OR amazon_artist_id = ? - """, (artist_id, artist_id, artist_id, artist_id, artist_id)) + WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR amazon_artist_id = ? OR musicbrainz_artist_id = ? + """, (artist_id, artist_id, artist_id, artist_id, artist_id, artist_id)) result = cursor.fetchone() conn.close() @@ -25345,6 +25358,7 @@ def watchlist_artist_config(artist_id): deezer_id = result[14] # deezer_artist_id from query discogs_id = result[16] # discogs_artist_id from query amazon_id = result[18] if len(result) > 18 else None # amazon_artist_id from query + musicbrainz_id = result[19] if len(result) > 19 else None # musicbrainz_artist_id from query # Get artist info from Spotify (only for Spotify artists) artist_info = None @@ -25387,9 +25401,16 @@ def watchlist_artist_config(artist_id): cur2.execute(""" SELECT banner_url, summary, style, mood, label, genres FROM artists - WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_id = ? OR discogs_id = ? + WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_id = ? + OR discogs_id = ? OR musicbrainz_id = ? LIMIT 1 - """, (artist_id, artist_id, artist_id, artist_id)) + """, ( + spotify_id or artist_id, + itunes_id or artist_id, + deezer_id or artist_id, + discogs_id or artist_id, + musicbrainz_id or artist_id, + )) lib_row = cur2.fetchone() if lib_row: artist_info['banner_url'] = lib_row[0] @@ -25410,9 +25431,17 @@ def watchlist_artist_config(artist_id): FROM recent_releases rr JOIN watchlist_artists wa ON rr.watchlist_artist_id = wa.id WHERE wa.spotify_artist_id = ? OR wa.itunes_artist_id = ? OR wa.deezer_artist_id = ? + OR wa.discogs_artist_id = ? OR wa.amazon_artist_id = ? OR wa.musicbrainz_artist_id = ? ORDER BY rr.release_date DESC LIMIT 6 - """, (artist_id, artist_id, artist_id)) + """, ( + spotify_id or artist_id, + itunes_id or artist_id, + deezer_id or artist_id, + discogs_id or artist_id, + amazon_id or artist_id, + musicbrainz_id or artist_id, + )) releases = [ { 'album_name': r[0], @@ -25453,6 +25482,7 @@ def watchlist_artist_config(artist_id): "deezer_artist_id": deezer_id, "discogs_artist_id": discogs_id, "amazon_artist_id": amazon_id, + "musicbrainz_artist_id": musicbrainz_id, "watchlist_name": result[7], # Original stored watchlist artist name "global_metadata_source": get_primary_source(), }) @@ -25476,7 +25506,7 @@ def watchlist_artist_config(artist_id): lookback_days = int(lookback_days) if lookback_days != '' else None preferred_metadata_source = data.get('preferred_metadata_source', None) # Validate — only accept known sources, empty string means clear override - if preferred_metadata_source == '' or preferred_metadata_source not in ('spotify', 'deezer', 'itunes', 'discogs'): + if preferred_metadata_source == '' or preferred_metadata_source not in ('spotify', 'deezer', 'itunes', 'discogs', 'musicbrainz'): preferred_metadata_source = None # Validate at least one release type is selected @@ -25490,8 +25520,9 @@ def watchlist_artist_config(artist_id): # Check if lookback_days changed — if so, clear last_scan_timestamp to force rescan cursor.execute(""" SELECT lookback_days FROM watchlist_artists - WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ? - """, (artist_id, artist_id, artist_id, artist_id)) + WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR musicbrainz_artist_id = ? + """, (artist_id, artist_id, artist_id, artist_id, artist_id)) old_row = cursor.fetchone() old_lookback = old_row[0] if old_row else None lookback_changed = old_lookback != lookback_days @@ -25503,11 +25534,12 @@ def watchlist_artist_config(artist_id): include_instrumentals = ?, lookback_days = ?, preferred_metadata_source = ?, last_scan_timestamp = CASE WHEN ? THEN NULL ELSE last_scan_timestamp END, updated_at = CURRENT_TIMESTAMP - WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ? + WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR musicbrainz_artist_id = ? """, (int(include_albums), int(include_eps), int(include_singles), int(include_live), int(include_remixes), int(include_acoustic), int(include_compilations), int(include_instrumentals), lookback_days, preferred_metadata_source, lookback_changed, - artist_id, artist_id, artist_id, artist_id)) + artist_id, artist_id, artist_id, artist_id, artist_id)) conn.commit() if cursor.rowcount == 0: @@ -25553,7 +25585,7 @@ def watchlist_artist_link_provider(artist_id): new_provider_id = data.get('provider_id', '').strip() provider = data.get('provider', '').strip() - valid_providers = ('spotify', 'itunes', 'deezer', 'discogs', 'amazon') + valid_providers = ('spotify', 'itunes', 'deezer', 'discogs', 'amazon', 'musicbrainz') if provider not in valid_providers: return jsonify({"success": False, "error": f"Invalid provider. Must be one of: {', '.join(valid_providers)}"}), 400 @@ -25567,8 +25599,9 @@ def watchlist_artist_link_provider(artist_id): cursor.execute(""" SELECT id, artist_name, spotify_artist_id, itunes_artist_id FROM watchlist_artists - WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? OR discogs_artist_id = ? OR amazon_artist_id = ? - """, (artist_id, artist_id, artist_id, artist_id, artist_id)) + WHERE spotify_artist_id = ? OR itunes_artist_id = ? OR deezer_artist_id = ? + OR discogs_artist_id = ? OR amazon_artist_id = ? OR musicbrainz_artist_id = ? + """, (artist_id, artist_id, artist_id, artist_id, artist_id, artist_id)) row = cursor.fetchone() if not row: @@ -25579,7 +25612,14 @@ def watchlist_artist_link_provider(artist_id): artist_name = row[1] # Check for duplicate — another watchlist artist already has this provider ID - col_map = {'spotify': 'spotify_artist_id', 'itunes': 'itunes_artist_id', 'deezer': 'deezer_artist_id', 'discogs': 'discogs_artist_id', 'amazon': 'amazon_artist_id'} + col_map = { + 'spotify': 'spotify_artist_id', + 'itunes': 'itunes_artist_id', + 'deezer': 'deezer_artist_id', + 'discogs': 'discogs_artist_id', + 'amazon': 'amazon_artist_id', + 'musicbrainz': 'musicbrainz_artist_id', + } col = col_map[provider] if not is_clear: diff --git a/webui/static/api-monitor.js b/webui/static/api-monitor.js index 58561523..4b0fc398 100644 --- a/webui/static/api-monitor.js +++ b/webui/static/api-monitor.js @@ -974,8 +974,9 @@ async function initializeWatchlistPage() { if (artist.itunes_artist_id) sourceBadges.push('iTunes'); if (artist.deezer_artist_id) sourceBadges.push('Deezer'); if (artist.discogs_artist_id) sourceBadges.push('Discogs'); + if (artist.musicbrainz_artist_id) sourceBadges.push('MusicBrainz'); if (artist.amazon_artist_id) sourceBadges.push('Amazon'); - const artistPrimaryId = artist.spotify_artist_id || artist.itunes_artist_id || artist.deezer_artist_id || artist.discogs_artist_id || artist.amazon_artist_id; + const artistPrimaryId = artist.spotify_artist_id || artist.itunes_artist_id || artist.deezer_artist_id || artist.discogs_artist_id || artist.musicbrainz_artist_id || artist.amazon_artist_id; return `
iTunes'); if (artist.deezer_artist_id) sourceBadges.push('Deezer'); if (artist.discogs_artist_id) sourceBadges.push('Discogs'); + if (artist.musicbrainz_artist_id) sourceBadges.push('MusicBrainz'); if (artist.amazon_artist_id) sourceBadges.push('Amazon'); - const artistPrimaryId = artist.spotify_artist_id || artist.itunes_artist_id || artist.deezer_artist_id || artist.discogs_artist_id || artist.amazon_artist_id; + const artistPrimaryId = artist.spotify_artist_id || artist.itunes_artist_id || artist.deezer_artist_id || artist.discogs_artist_id || artist.musicbrainz_artist_id || artist.amazon_artist_id; return `
🌐Default (${globalLabel}) @@ -2283,7 +2287,7 @@ async function openWatchlistArtistDetailView(artistId, artistName) { return; } - const { config, artist, recent_releases, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id } = data; + const { config, artist, recent_releases, spotify_artist_id, itunes_artist_id, deezer_artist_id, discogs_artist_id, musicbrainz_artist_id } = data; // Remove existing overlay if any const existing = document.querySelector('.watchlist-artist-detail-overlay'); @@ -2416,11 +2420,13 @@ async function openWatchlistArtistDetailView(artistId, artistName) { discogId = discogs_artist_id; source = 'discogs'; } else if (activeSrc.includes('deezer') && deezer_artist_id) { discogId = deezer_artist_id; source = 'deezer'; + } else if (activeSrc.includes('musicbrainz') && musicbrainz_artist_id) { + discogId = musicbrainz_artist_id; source = 'musicbrainz'; } else if (itunes_artist_id) { discogId = itunes_artist_id; source = 'itunes'; } else { - discogId = spotify_artist_id || discogs_artist_id || deezer_artist_id || itunes_artist_id; - source = spotify_artist_id ? 'spotify' : discogs_artist_id ? 'discogs' : deezer_artist_id ? 'deezer' : 'itunes'; + discogId = spotify_artist_id || discogs_artist_id || deezer_artist_id || musicbrainz_artist_id || itunes_artist_id; + source = spotify_artist_id ? 'spotify' : discogs_artist_id ? 'discogs' : deezer_artist_id ? 'deezer' : musicbrainz_artist_id ? 'musicbrainz' : 'itunes'; } if (discogId) { closeWatchlistArtistDetailView(); diff --git a/webui/static/discover.js b/webui/static/discover.js index af8fdd91..99e7124c 100644 --- a/webui/static/discover.js +++ b/webui/static/discover.js @@ -6782,11 +6782,12 @@ async function openYourArtistInfoModal_direct(node) { let bestId = '', bestSource = ''; // Check what the active source is const activeSource = window._yaActiveSource || 'spotify'; - const sourceOrder = activeSource === 'spotify' ? ['spotify_id', 'itunes_id', 'deezer_id', 'discogs_id'] - : activeSource === 'itunes' ? ['itunes_id', 'spotify_id', 'deezer_id', 'discogs_id'] - : activeSource === 'deezer' ? ['deezer_id', 'spotify_id', 'itunes_id', 'discogs_id'] - : ['spotify_id', 'itunes_id', 'deezer_id', 'discogs_id']; - const sourceMap = { spotify_id: 'spotify', itunes_id: 'itunes', deezer_id: 'deezer', discogs_id: 'discogs' }; + const sourceOrder = activeSource === 'spotify' ? ['spotify_id', 'itunes_id', 'deezer_id', 'discogs_id', 'musicbrainz_id'] + : activeSource === 'itunes' ? ['itunes_id', 'spotify_id', 'deezer_id', 'discogs_id', 'musicbrainz_id'] + : activeSource === 'deezer' ? ['deezer_id', 'spotify_id', 'itunes_id', 'discogs_id', 'musicbrainz_id'] + : activeSource === 'musicbrainz' ? ['musicbrainz_id', 'spotify_id', 'itunes_id', 'deezer_id', 'discogs_id'] + : ['spotify_id', 'itunes_id', 'deezer_id', 'discogs_id', 'musicbrainz_id']; + const sourceMap = { spotify_id: 'spotify', itunes_id: 'itunes', deezer_id: 'deezer', discogs_id: 'discogs', musicbrainz_id: 'musicbrainz' }; for (const key of sourceOrder) { if (node[key]) { bestId = node[key]; bestSource = sourceMap[key]; break; } } diff --git a/webui/static/shared-helpers.js b/webui/static/shared-helpers.js index e3058380..7a509f6c 100644 --- a/webui/static/shared-helpers.js +++ b/webui/static/shared-helpers.js @@ -3244,7 +3244,9 @@ function syncPrimaryMetadataSourceAvailability(statusData) { function getMetadataSourceLabel(source) { if (source === 'deezer') return 'Deezer'; if (source === 'discogs') return 'Discogs'; + if (source === 'hydrabase') return 'Hydrabase'; if (source === 'itunes') return 'iTunes'; + if (source === 'musicbrainz') return 'MusicBrainz'; if (source === 'spotify') return 'Spotify'; return 'Unmapped'; } diff --git a/webui/static/style.css b/webui/static/style.css index caa6a3eb..7b1a32d3 100644 --- a/webui/static/style.css +++ b/webui/static/style.css @@ -16445,6 +16445,11 @@ body.helper-mode-active #dashboard-activity-feed:hover { color: #D4A574; } +.watchlist-source-musicbrainz { + background: rgba(186, 71, 143, 0.15); + color: #BA478F; +} + .watchlist-source-amazon { background: rgba(255, 153, 0, 0.15); color: #FF9900; From 24d0482697fec0f8a8053aab0e87cadd9ad7afd7 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 18 May 2026 19:31:36 -0700 Subject: [PATCH 3/3] Update diagnose_itunes_discover.py --- tools/diagnose_itunes_discover.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/diagnose_itunes_discover.py b/tools/diagnose_itunes_discover.py index 8abae0ba..8aa9c821 100644 --- a/tools/diagnose_itunes_discover.py +++ b/tools/diagnose_itunes_discover.py @@ -67,8 +67,8 @@ def diagnose_itunes_discover(): try: cursor.execute("SELECT COUNT(*) as count FROM similar_artists WHERE similar_artist_musicbrainz_id IS NOT NULL") with_musicbrainz = cursor.fetchone()['count'] - except Exception: - pass + except Exception as exc: + logger.debug("similar_artist_musicbrainz_id column is unavailable: %s", exc) logger.info(f" Total similar artists: {total}") logger.info(f" With iTunes ID: {with_itunes} ({100 * with_itunes / total:.1f}%)" if total > 0 else " With iTunes ID: 0")