feat: embed MusicBrainz, Spotify/iTunes IDs, ISRC, and merged genres into audio file tags

Enrich downloaded audio files with external identifiers and improved genre metadata in a single post-processing write. During metadata enhancement, the app now looks up the MusicBrainz recording and artist MBIDs, retrieves the ISRC and MusicBrainz genres from a follow-up detail lookup, merges them with Spotify's artist-level genres (deduplicated, capped at 5), and embeds everything alongside the Spotify/iTunes track, artist, and album IDs. All MusicBrainz API calls are serialized through the existing global rate limiter, making concurrent download workers safe without needing to pause the background worker. Includes a database migration adding Spotify/iTunes ID columns to the library tables.
2 weeks ago · d08a2e91a2
parent d9efcbdf99
commit d08a2e91a2
5 changed files with 302 additions and 76 deletions
--- a/config/settings.py
+++ b/config/settings.py
@ -211,6 +211,9 @@ class ConfigManager:
                "enabled": True,
                "embed_album_art": True
            },
+            "musicbrainz": {
+                "embed_tags": True
+            },
            "playlist_sync": {
                "create_backup": True
            },
--- a/core/musicbrainz_service.py
+++ b/core/musicbrainz_service.py
@ -1,4 +1,4 @@
-from typing import Optional, Dict, Any, List
+from typing import Optional, Dict, Any
 import json
 from datetime import datetime, timedelta
 from difflib import SequenceMatcher
@ -430,3 +430,4 @@ class MusicBrainzService:
        finally:
            if conn:
                conn.close()
+
--- a/database/music_database.py
+++ b/database/music_database.py
@ -297,6 +297,9 @@ class MusicDatabase:
            # Add MusicBrainz columns to library tables (migration)
            self._add_musicbrainz_columns(cursor)

+            # Add external ID columns (Spotify/iTunes) to library tables (migration)
+            self._add_external_id_columns(cursor)
+
            conn.commit()
            logger.info("Database initialized successfully")
            
@ -952,6 +955,46 @@ class MusicDatabase:
            logger.error(f"Error adding MusicBrainz columns: {e}")
            # Don't raise - this is a migration, database can still function

+    def _add_external_id_columns(self, cursor):
+        """Add Spotify/iTunes external ID columns to library tables for enrichment"""
+        try:
+            # Artists table
+            cursor.execute("PRAGMA table_info(artists)")
+            artists_columns = [column[1] for column in cursor.fetchall()]
+
+            if 'spotify_artist_id' not in artists_columns:
+                cursor.execute("ALTER TABLE artists ADD COLUMN spotify_artist_id TEXT")
+                cursor.execute("ALTER TABLE artists ADD COLUMN itunes_artist_id TEXT")
+                cursor.execute("CREATE INDEX IF NOT EXISTS idx_artists_spotify_id ON artists (spotify_artist_id)")
+                cursor.execute("CREATE INDEX IF NOT EXISTS idx_artists_itunes_id ON artists (itunes_artist_id)")
+                logger.info("Added external ID columns to artists table")
+
+            # Albums table
+            cursor.execute("PRAGMA table_info(albums)")
+            albums_columns = [column[1] for column in cursor.fetchall()]
+
+            if 'spotify_album_id' not in albums_columns:
+                cursor.execute("ALTER TABLE albums ADD COLUMN spotify_album_id TEXT")
+                cursor.execute("ALTER TABLE albums ADD COLUMN itunes_album_id TEXT")
+                cursor.execute("CREATE INDEX IF NOT EXISTS idx_albums_spotify_id ON albums (spotify_album_id)")
+                cursor.execute("CREATE INDEX IF NOT EXISTS idx_albums_itunes_id ON albums (itunes_album_id)")
+                logger.info("Added external ID columns to albums table")
+
+            # Tracks table
+            cursor.execute("PRAGMA table_info(tracks)")
+            tracks_columns = [column[1] for column in cursor.fetchall()]
+
+            if 'spotify_track_id' not in tracks_columns:
+                cursor.execute("ALTER TABLE tracks ADD COLUMN spotify_track_id TEXT")
+                cursor.execute("ALTER TABLE tracks ADD COLUMN itunes_track_id TEXT")
+                cursor.execute("CREATE INDEX IF NOT EXISTS idx_tracks_spotify_id ON tracks (spotify_track_id)")
+                cursor.execute("CREATE INDEX IF NOT EXISTS idx_tracks_itunes_id ON tracks (itunes_track_id)")
+                logger.info("Added external ID columns to tracks table")
+
+        except Exception as e:
+            logger.error(f"Error adding external ID columns: {e}")
+            # Don't raise - this is a migration, database can still function
+
    def close(self):
        """Close database connection (no-op since we create connections per operation)"""
        # Each operation creates and closes its own connection, so nothing to do here
--- a/web_server.py
+++ b/web_server.py
@ -7767,9 +7767,9 @@ def _get_file_path_from_template(context: dict, template_type: str = 'album_path
 # METADATA & COVER ART HELPERS (Ported from downloads.py)
 # ===================================================================
 from mutagen import File as MutagenFile
-from mutagen.id3 import ID3, TIT2, TPE1, TALB, TDRC, TRCK, TCON, TPE2, TPOS, TXXX, APIC
+from mutagen.id3 import ID3, TIT2, TPE1, TALB, TDRC, TRCK, TCON, TPE2, TPOS, TXXX, APIC, UFID, TSRC
 from mutagen.flac import FLAC, Picture
-from mutagen.mp4 import MP4, MP4Cover
+from mutagen.mp4 import MP4, MP4Cover, MP4FreeForm
 from mutagen.oggvorbis import OggVorbis
 import urllib.request

@ -7883,6 +7883,8 @@ def _enhance_file_metadata(file_path: str, context: dict, artist: dict, album_in
                if config_manager.get('metadata_enhancement.embed_album_art', True):
                    _embed_album_art_metadata(audio_file_raw, metadata)

+                _embed_source_ids(audio_file_raw, metadata)
+
                audio_file_raw.save()

            print("✅ Metadata enhanced successfully.")
@ -8025,6 +8027,28 @@ def _extract_spotify_metadata(context: dict, artist: dict, album_info: dict) ->

    metadata['album_art_url'] = album_info.get('album_image_url')

+    # Extract source IDs (Spotify or iTunes) for tag embedding
+    track_info = context.get("track_info", {})
+    if track_info and track_info.get('id'):
+        # Spotify track IDs are alphanumeric strings; iTunes IDs are numeric
+        track_id = str(track_info['id'])
+        if track_id.isdigit():
+            metadata['itunes_track_id'] = track_id
+        else:
+            metadata['spotify_track_id'] = track_id
+    if artist.get('id'):
+        artist_id = str(artist['id'])
+        if artist_id.isdigit():
+            pass  # iTunes artist ID not available in this context reliably
+        else:
+            metadata['spotify_artist_id'] = artist_id
+    if spotify_album and spotify_album.get('id'):
+        album_id = str(spotify_album['id'])
+        if album_id.isdigit():
+            pass  # iTunes album ID not available in this context reliably
+        else:
+            metadata['spotify_album_id'] = album_id
+
    return metadata

 def _embed_album_art_metadata(audio_file, metadata: dict):
@ -8065,6 +8089,161 @@ def _embed_album_art_metadata(audio_file, metadata: dict):
    except Exception as e:
        print(f"❌ Error embedding album art: {e}")

+def _embed_source_ids(audio_file, metadata: dict):
+    """
+    Lookup MusicBrainz recording MBID, ISRC, and genres, then embed them along
+    with Spotify/iTunes source IDs as custom tags into the audio file.
+    One file write, one shot.  Concurrent calls are safe — the global rate
+    limiter in musicbrainz_client.py serializes all MB API access.
+    Operates on a non-easy-mode MutagenFile object (caller must save).
+    """
+    try:
+        # ── 1. Collect Spotify / iTunes IDs already in metadata ──
+        id_tags = {}
+        if metadata.get('spotify_track_id'):
+            id_tags['SPOTIFY_TRACK_ID'] = metadata['spotify_track_id']
+        if metadata.get('spotify_artist_id'):
+            id_tags['SPOTIFY_ARTIST_ID'] = metadata['spotify_artist_id']
+        if metadata.get('spotify_album_id'):
+            id_tags['SPOTIFY_ALBUM_ID'] = metadata['spotify_album_id']
+        if metadata.get('itunes_track_id'):
+            id_tags['ITUNES_TRACK_ID'] = metadata['itunes_track_id']
+
+        # ── 2. MusicBrainz lookup for MBID, genres, and ISRC ──
+        # The global rate limiter in musicbrainz_client.py serializes all API
+        # calls (worker + any number of post-processing threads) to 1 req/sec
+        # via _api_call_lock, so no pause/resume needed.
+        recording_mbid = None
+        artist_mbid = None
+        mb_genres = []
+        isrc = None
+        track_title = metadata.get('title', '')
+        # Use album_artist (single primary artist) for MB lookup, not the
+        # comma-joined multi-artist field which would give bad search results
+        artist_name = metadata.get('album_artist', '') or metadata.get('artist', '')
+
+        if not config_manager.get('musicbrainz.embed_tags', True):
+            # Skip MB lookup, just write Spotify/iTunes IDs if any
+            pass
+        elif track_title and artist_name:
+            try:
+                mb_service = mb_worker.mb_service if mb_worker else None
+                if mb_service:
+                    result = mb_service.match_recording(track_title, artist_name)
+                    if result and result.get('mbid'):
+                        recording_mbid = result['mbid']
+                        id_tags['MUSICBRAINZ_RECORDING_ID'] = recording_mbid
+                        print(f"🎵 MusicBrainz recording matched: {recording_mbid}")
+
+                        # Lookup recording details for ISRC and genres
+                        details = mb_service.mb_client.get_recording(
+                            recording_mbid, includes=['isrcs', 'genres']
+                        )
+                        if details:
+                            isrcs = details.get('isrcs', [])
+                            if isrcs:
+                                isrc = isrcs[0]
+                            mb_genres = [
+                                g['name'] for g in sorted(
+                                    details.get('genres', []),
+                                    key=lambda x: x.get('count', 0),
+                                    reverse=True
+                                )
+                            ]
+
+                    # Also try to get artist MBID (may already be cached from worker)
+                    artist_result = mb_service.match_artist(artist_name)
+                    if artist_result and artist_result.get('mbid'):
+                        artist_mbid = artist_result['mbid']
+                        id_tags['MUSICBRAINZ_ARTIST_ID'] = artist_mbid
+                else:
+                    print("⚠️ MusicBrainz worker not available, skipping MBID lookup")
+            except Exception as e:
+                print(f"⚠️ MusicBrainz lookup failed (non-fatal): {e}")
+
+        if not id_tags:
+            return
+
+        # ── 3. Write all tags into the file ──
+        written = []
+
+        # MP3 (ID3)
+        if isinstance(audio_file.tags, ID3):
+            for tag_name, value in id_tags.items():
+                if tag_name == 'MUSICBRAINZ_RECORDING_ID':
+                    audio_file.tags.add(UFID(owner='http://musicbrainz.org', data=value.encode('ascii')))
+                    written.append('UFID:http://musicbrainz.org')
+                elif tag_name == 'MUSICBRAINZ_ARTIST_ID':
+                    audio_file.tags.add(TXXX(encoding=3, desc='MusicBrainz Artist Id', text=[value]))
+                    written.append('TXXX:MusicBrainz Artist Id')
+                else:
+                    audio_file.tags.add(TXXX(encoding=3, desc=tag_name, text=[str(value)]))
+                    written.append(f'TXXX:{tag_name}')
+
+        # FLAC / OGG Vorbis
+        elif isinstance(audio_file, (FLAC, OggVorbis)):
+            for tag_name, value in id_tags.items():
+                if tag_name == 'MUSICBRAINZ_RECORDING_ID':
+                    audio_file['MUSICBRAINZ_TRACKID'] = [value]
+                    written.append('MUSICBRAINZ_TRACKID')
+                elif tag_name == 'MUSICBRAINZ_ARTIST_ID':
+                    audio_file['MUSICBRAINZ_ARTISTID'] = [value]
+                    written.append('MUSICBRAINZ_ARTISTID')
+                else:
+                    audio_file[tag_name] = [str(value)]
+                    written.append(tag_name)
+
+        # MP4 (M4A/AAC)
+        elif isinstance(audio_file, MP4):
+            for tag_name, value in id_tags.items():
+                if tag_name == 'MUSICBRAINZ_RECORDING_ID':
+                    key = '----:com.apple.iTunes:MusicBrainz Track Id'
+                elif tag_name == 'MUSICBRAINZ_ARTIST_ID':
+                    key = '----:com.apple.iTunes:MusicBrainz Artist Id'
+                else:
+                    key = f'----:com.apple.iTunes:{tag_name}'
+                audio_file[key] = [MP4FreeForm(str(value).encode('utf-8'))]
+                written.append(key)
+
+        if written:
+            print(f"🔗 Embedded IDs: {', '.join(written)}")
+
+        # ── 4. Merge genres (Spotify + MusicBrainz) and overwrite tag ──
+        if mb_genres:
+            spotify_genres = [g.strip() for g in metadata.get('genre', '').split(',') if g.strip()]
+            seen = set()
+            merged = []
+            for g in spotify_genres + mb_genres:
+                key = g.strip().lower()
+                if key and key not in seen:
+                    seen.add(key)
+                    merged.append(g.strip().title())
+                if len(merged) >= 5:
+                    break
+
+            if merged:
+                genre_string = ', '.join(merged)
+                if isinstance(audio_file.tags, ID3):
+                    audio_file.tags.add(TCON(encoding=3, text=[genre_string]))
+                elif isinstance(audio_file, (FLAC, OggVorbis)):
+                    audio_file['GENRE'] = [genre_string]
+                elif isinstance(audio_file, MP4):
+                    audio_file['\xa9gen'] = [genre_string]
+                print(f"🎶 Genres merged: {genre_string}")
+
+        # ── 5. Write ISRC if available ──
+        if isrc:
+            if isinstance(audio_file.tags, ID3):
+                audio_file.tags.add(TSRC(encoding=3, text=[isrc]))
+            elif isinstance(audio_file, (FLAC, OggVorbis)):
+                audio_file['ISRC'] = [isrc]
+            elif isinstance(audio_file, MP4):
+                audio_file['----:com.apple.iTunes:ISRC'] = [MP4FreeForm(isrc.encode('utf-8'))]
+            print(f"🔖 ISRC: {isrc}")
+
+    except Exception as e:
+        print(f"⚠️ Error embedding source IDs (non-fatal): {e}")
+
 def _download_cover_art(album_info: dict, target_dir: str):
    """Downloads cover.jpg into the specified directory."""
    try: