feat: embed MusicBrainz, Spotify/iTunes IDs, ISRC, and merged genres into audio file tags

Enrich downloaded audio files with external identifiers and improved genre metadata in a single post-processing write. During metadata enhancement, the app now looks up the MusicBrainz recording and artist MBIDs, retrieves the ISRC and MusicBrainz genres from a follow-up detail lookup, merges them with Spotify's artist-level genres (deduplicated, capped at 5), and embeds everything alongside the Spotify/iTunes track, artist, and album IDs. All MusicBrainz API calls are serialized through the existing global rate limiter, making concurrent download workers safe without needing to pause the background worker. Includes a database migration adding Spotify/iTunes ID columns to the library tables.
pull/130/head
Broque Thomas 2 weeks ago
parent d9efcbdf99
commit d08a2e91a2

@ -211,6 +211,9 @@ class ConfigManager:
"enabled": True, "enabled": True,
"embed_album_art": True "embed_album_art": True
}, },
"musicbrainz": {
"embed_tags": True
},
"playlist_sync": { "playlist_sync": {
"create_backup": True "create_backup": True
}, },

@ -1,4 +1,4 @@
from typing import Optional, Dict, Any, List from typing import Optional, Dict, Any
import json import json
from datetime import datetime, timedelta from datetime import datetime, timedelta
from difflib import SequenceMatcher from difflib import SequenceMatcher
@ -87,26 +87,26 @@ class MusicBrainzService:
if conn: if conn:
conn.close() conn.close()
def _save_to_cache(self, entity_type: str, entity_name: str, artist_name: Optional[str], def _save_to_cache(self, entity_type: str, entity_name: str, artist_name: Optional[str],
musicbrainz_id: Optional[str], metadata: Optional[Dict], confidence: int): musicbrainz_id: Optional[str], metadata: Optional[Dict], confidence: int):
"""Save MusicBrainz result to cache""" """Save MusicBrainz result to cache"""
conn = None conn = None
try: try:
conn = self.db._get_connection() conn = self.db._get_connection()
cursor = conn.cursor() cursor = conn.cursor()
metadata_json = json.dumps(metadata) if metadata else None metadata_json = json.dumps(metadata) if metadata else None
cursor.execute(""" cursor.execute("""
INSERT OR REPLACE INTO musicbrainz_cache INSERT OR REPLACE INTO musicbrainz_cache
(entity_type, entity_name, artist_name, musicbrainz_id, metadata_json, match_confidence, last_updated) (entity_type, entity_name, artist_name, musicbrainz_id, metadata_json, match_confidence, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?)
""", (entity_type, entity_name, artist_name, musicbrainz_id, metadata_json, confidence, datetime.now())) """, (entity_type, entity_name, artist_name, musicbrainz_id, metadata_json, confidence, datetime.now()))
conn.commit() conn.commit()
logger.debug(f"Cached {entity_type} '{entity_name}' (MBID: {musicbrainz_id}, confidence: {confidence})") logger.debug(f"Cached {entity_type} '{entity_name}' (MBID: {musicbrainz_id}, confidence: {confidence})")
except Exception as e: except Exception as e:
logger.error(f"Error saving to cache: {e}") logger.error(f"Error saving to cache: {e}")
if conn: if conn:
@ -410,7 +410,7 @@ class MusicBrainzService:
try: try:
conn = self.db._get_connection() conn = self.db._get_connection()
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(""" cursor.execute("""
UPDATE tracks UPDATE tracks
SET musicbrainz_recording_id = ?, SET musicbrainz_recording_id = ?,
@ -418,11 +418,11 @@ class MusicBrainzService:
musicbrainz_match_status = ? musicbrainz_match_status = ?
WHERE id = ? WHERE id = ?
""", (mbid, datetime.now(), status, track_id)) """, (mbid, datetime.now(), status, track_id))
conn.commit() conn.commit()
logger.debug(f"Updated track {track_id} with MBID: {mbid}, status: {status}") logger.debug(f"Updated track {track_id} with MBID: {mbid}, status: {status}")
except Exception as e: except Exception as e:
logger.error(f"Error updating track {track_id}: {e}") logger.error(f"Error updating track {track_id}: {e}")
if conn: if conn:
@ -430,3 +430,4 @@ class MusicBrainzService:
finally: finally:
if conn: if conn:
conn.close() conn.close()

@ -10,20 +10,20 @@ logger = get_logger("musicbrainz_worker")
class MusicBrainzWorker: class MusicBrainzWorker:
"""Background worker for enriching library with MusicBrainz IDs""" """Background worker for enriching library with MusicBrainz IDs"""
def __init__(self, database: MusicDatabase, app_name: str = "SoulSync", app_version: str = "1.0", contact_email: str = ""): def __init__(self, database: MusicDatabase, app_name: str = "SoulSync", app_version: str = "1.0", contact_email: str = ""):
self.db = database self.db = database
self.mb_service = MusicBrainzService(database, app_name, app_version, contact_email) self.mb_service = MusicBrainzService(database, app_name, app_version, contact_email)
# Worker state # Worker state
self.running = False self.running = False
self.paused = False self.paused = False
self.should_stop = False self.should_stop = False
self.thread = None self.thread = None
# Current item being processed (for UI tooltip) # Current item being processed (for UI tooltip)
self.current_item = None self.current_item = None
# Statistics # Statistics
self.stats = { self.stats = {
'matched': 0, 'matched': 0,
@ -31,67 +31,67 @@ class MusicBrainzWorker:
'pending': 0, 'pending': 0,
'errors': 0 'errors': 0
} }
# Retry configuration # Retry configuration
self.retry_days = 30 # Retry 'not_found' items after 30 days self.retry_days = 30 # Retry 'not_found' items after 30 days
logger.info("MusicBrainz background worker initialized") logger.info("MusicBrainz background worker initialized")
def start(self): def start(self):
"""Start the background worker""" """Start the background worker"""
if self.running: if self.running:
logger.warning("Worker already running") logger.warning("Worker already running")
return return
self.running = True self.running = True
self.should_stop = False self.should_stop = False
self.thread = threading.Thread(target=self._run, daemon=True) self.thread = threading.Thread(target=self._run, daemon=True)
self.thread.start() self.thread.start()
logger.info("MusicBrainz background worker started") logger.info("MusicBrainz background worker started")
def stop(self): def stop(self):
"""Stop the background worker""" """Stop the background worker"""
if not self.running: if not self.running:
return return
logger.info("Stopping MusicBrainz worker...") logger.info("Stopping MusicBrainz worker...")
self.should_stop = True self.should_stop = True
self.running = False self.running = False
if self.thread: if self.thread:
self.thread.join(timeout=5) self.thread.join(timeout=5)
logger.info("Music Brainz worker stopped") logger.info("Music Brainz worker stopped")
def pause(self): def pause(self):
"""Pause the worker""" """Pause the worker"""
if not self.running: if not self.running:
logger.warning("Worker not running, cannot pause") logger.warning("Worker not running, cannot pause")
return return
self.paused = True self.paused = True
logger.info("MusicBrainz worker paused") logger.info("MusicBrainz worker paused")
def resume(self): def resume(self):
"""Resume the worker""" """Resume the worker"""
if not self.running: if not self.running:
logger.warning("Worker not running, start it first") logger.warning("Worker not running, start it first")
return return
self.paused = False self.paused = False
logger.info("MusicBrainz worker resumed") logger.info("MusicBrainz worker resumed")
def get_stats(self) -> Dict[str, Any]: def get_stats(self) -> Dict[str, Any]:
"""Get current statistics""" """Get current statistics"""
# Update pending count # Update pending count
self.stats['pending'] = self._count_pending_items() self.stats['pending'] = self._count_pending_items()
# Get progress breakdown by entity type # Get progress breakdown by entity type
progress = self._get_progress_breakdown() progress = self._get_progress_breakdown()
# Check if thread is actually alive (in case it crashed) # Check if thread is actually alive (in case it crashed)
is_actually_running = self.running and (self.thread is not None and self.thread.is_alive()) is_actually_running = self.running and (self.thread is not None and self.thread.is_alive())
return { return {
'enabled': True, 'enabled': True,
'running': is_actually_running and not self.paused, 'running': is_actually_running and not self.paused,
@ -100,53 +100,53 @@ class MusicBrainzWorker:
'stats': self.stats.copy(), 'stats': self.stats.copy(),
'progress': progress 'progress': progress
} }
def _run(self): def _run(self):
"""Main worker loop""" """Main worker loop"""
logger.info("MusicBrainz worker thread started") logger.info("MusicBrainz worker thread started")
while not self.should_stop: while not self.should_stop:
try: try:
# Check if paused # Check if paused
if self.paused: if self.paused:
time.sleep(1) time.sleep(1)
continue continue
# Clear previous item before getting next # Clear previous item before getting next
self.current_item = None self.current_item = None
# Get next item to process # Get next item to process
item = self._get_next_item() item = self._get_next_item()
if not item: if not item:
# No more items - sleep for a bit # No more items - sleep for a bit
logger.debug("No pending items, sleeping...") logger.debug("No pending items, sleeping...")
time.sleep(10) time.sleep(10)
continue continue
# Set current item for UI tracking # Set current item for UI tracking
self.current_item = item self.current_item = item
# Process the item # Process the item
self._process_item(item) self._process_item(item)
# Keep current_item set during sleep so UI can see what was just processed # Keep current_item set during sleep so UI can see what was just processed
# Rate limit: 1 request per second # Rate limit: 1 request per second
time.sleep(1) time.sleep(1)
except Exception as e: except Exception as e:
logger.error(f"Error in worker loop: {e}") logger.error(f"Error in worker loop: {e}")
time.sleep(5) # Back off on errors time.sleep(5) # Back off on errors
logger.info("MusicBrainz worker thread finished") logger.info("MusicBrainz worker thread finished")
def _get_next_item(self) -> Optional[Dict[str, Any]]: def _get_next_item(self) -> Optional[Dict[str, Any]]:
"""Get next item to process from priority queue""" """Get next item to process from priority queue"""
conn = None conn = None
try: try:
conn = self.db._get_connection() conn = self.db._get_connection()
cursor = conn.cursor() cursor = conn.cursor()
# Priority 1: Unattempted artists # Priority 1: Unattempted artists
cursor.execute(""" cursor.execute("""
SELECT id, name SELECT id, name
@ -158,7 +158,7 @@ class MusicBrainzWorker:
row = cursor.fetchone() row = cursor.fetchone()
if row: if row:
return {'type': 'artist', 'id': row[0], 'name': row[1]} return {'type': 'artist', 'id': row[0], 'name': row[1]}
# Priority 2: Unattempted albums # Priority 2: Unattempted albums
cursor.execute(""" cursor.execute("""
SELECT a.id, a.title, ar.name AS artist_name SELECT a.id, a.title, ar.name AS artist_name
@ -171,7 +171,7 @@ class MusicBrainzWorker:
row = cursor.fetchone() row = cursor.fetchone()
if row: if row:
return {'type': 'album', 'id': row[0], 'name': row[1], 'artist': row[2]} return {'type': 'album', 'id': row[0], 'name': row[1], 'artist': row[2]}
# Priority 3: Unattempted tracks # Priority 3: Unattempted tracks
cursor.execute(""" cursor.execute("""
SELECT t.id, t.title, ar.name AS artist_name SELECT t.id, t.title, ar.name AS artist_name
@ -184,7 +184,7 @@ class MusicBrainzWorker:
row = cursor.fetchone() row = cursor.fetchone()
if row: if row:
return {'type': 'track', 'id': row[0], 'name': row[1], 'artist': row[2]} return {'type': 'track', 'id': row[0], 'name': row[1], 'artist': row[2]}
# Priority 4: Retry 'not_found' artists after retry_days # Priority 4: Retry 'not_found' artists after retry_days
cutoff_date = datetime.now() - timedelta(days=self.retry_days) cutoff_date = datetime.now() - timedelta(days=self.retry_days)
cursor.execute(""" cursor.execute("""
@ -199,7 +199,7 @@ class MusicBrainzWorker:
if row: if row:
logger.info(f"Retrying artist '{row[1]}' (last attempted: {cutoff_date})") logger.info(f"Retrying artist '{row[1]}' (last attempted: {cutoff_date})")
return {'type': 'artist', 'id': row[0], 'name': row[1]} return {'type': 'artist', 'id': row[0], 'name': row[1]}
# Priority 5: Retry 'not_found' albums # Priority 5: Retry 'not_found' albums
cursor.execute(""" cursor.execute("""
SELECT a.id, a.title, ar.name AS artist_name SELECT a.id, a.title, ar.name AS artist_name
@ -213,7 +213,7 @@ class MusicBrainzWorker:
row = cursor.fetchone() row = cursor.fetchone()
if row: if row:
return {'type': 'album', 'id': row[0], 'name': row[1], 'artist': row[2]} return {'type': 'album', 'id': row[0], 'name': row[1], 'artist': row[2]}
# Priority 6: Retry 'not_found' tracks # Priority 6: Retry 'not_found' tracks
cursor.execute(""" cursor.execute("""
SELECT t.id, t.title, ar.name AS artist_name SELECT t.id, t.title, ar.name AS artist_name
@ -227,25 +227,25 @@ class MusicBrainzWorker:
row = cursor.fetchone() row = cursor.fetchone()
if row: if row:
return {'type': 'track', 'id': row[0], 'name': row[1], 'artist': row[2]} return {'type': 'track', 'id': row[0], 'name': row[1], 'artist': row[2]}
return None return None
except Exception as e: except Exception as e:
logger.error(f"Error getting next item: {e}") logger.error(f"Error getting next item: {e}")
return None return None
finally: finally:
if conn: if conn:
conn.close() conn.close()
def _process_item(self, item: Dict[str, Any]): def _process_item(self, item: Dict[str, Any]):
"""Process a single item (artist, album, or track)""" """Process a single item (artist, album, or track)"""
try: try:
item_type = item['type'] item_type = item['type']
item_id = item['id'] item_id = item['id']
item_name = item['name'] item_name = item['name']
logger.debug(f"Processing {item_type} #{item_id}: {item_name}") logger.debug(f"Processing {item_type} #{item_id}: {item_name}")
if item_type == 'artist': if item_type == 'artist':
result = self.mb_service.match_artist(item_name) result = self.mb_service.match_artist(item_name)
if result and result.get('mbid'): if result and result.get('mbid'):
@ -256,7 +256,7 @@ class MusicBrainzWorker:
self.mb_service.update_artist_mbid(item_id, None, 'not_found') self.mb_service.update_artist_mbid(item_id, None, 'not_found')
self.stats['not_found'] += 1 self.stats['not_found'] += 1
logger.debug(f"❌ No match for artist '{item_name}'") logger.debug(f"❌ No match for artist '{item_name}'")
elif item_type == 'album': elif item_type == 'album':
artist_name = item.get('artist') artist_name = item.get('artist')
result = self.mb_service.match_release(item_name, artist_name) result = self.mb_service.match_release(item_name, artist_name)
@ -268,7 +268,7 @@ class MusicBrainzWorker:
self.mb_service.update_album_mbid(item_id, None, 'not_found') self.mb_service.update_album_mbid(item_id, None, 'not_found')
self.stats['not_found'] += 1 self.stats['not_found'] += 1
logger.debug(f"❌ No match for album '{item_name}'") logger.debug(f"❌ No match for album '{item_name}'")
elif item_type == 'track': elif item_type == 'track':
artist_name = item.get('artist') artist_name = item.get('artist')
result = self.mb_service.match_recording(item_name, artist_name) result = self.mb_service.match_recording(item_name, artist_name)
@ -280,11 +280,11 @@ class MusicBrainzWorker:
self.mb_service.update_track_mbid(item_id, None, 'not_found') self.mb_service.update_track_mbid(item_id, None, 'not_found')
self.stats['not_found'] += 1 self.stats['not_found'] += 1
logger.debug(f"❌ No match for track '{item_name}'") logger.debug(f"❌ No match for track '{item_name}'")
except Exception as e: except Exception as e:
logger.error(f"Error processing {item['type']} #{item['id']}: {e}") logger.error(f"Error processing {item['type']} #{item['id']}: {e}")
self.stats['errors'] += 1 self.stats['errors'] += 1
# Mark as error in database # Mark as error in database
try: try:
if item['type'] == 'artist': if item['type'] == 'artist':
@ -295,46 +295,46 @@ class MusicBrainzWorker:
self.mb_service.update_track_mbid(item['id'], None, 'error') self.mb_service.update_track_mbid(item['id'], None, 'error')
except Exception as e2: except Exception as e2:
logger.error(f"Error updating item status: {e2}") logger.error(f"Error updating item status: {e2}")
def _count_pending_items(self) -> int: def _count_pending_items(self) -> int:
"""Count how many items still need processing""" """Count how many items still need processing"""
conn = None conn = None
try: try:
conn = self.db._get_connection() conn = self.db._get_connection()
cursor = conn.cursor() cursor = conn.cursor()
# Count unattempted items # Count unattempted items
cursor.execute(""" cursor.execute("""
SELECT SELECT
(SELECT COUNT(*) FROM artists WHERE musicbrainz_match_status IS NULL) + (SELECT COUNT(*) FROM artists WHERE musicbrainz_match_status IS NULL) +
(SELECT COUNT(*) FROM albums WHERE musicbrainz_match_status IS NULL) + (SELECT COUNT(*) FROM albums WHERE musicbrainz_match_status IS NULL) +
(SELECT COUNT(*) FROM tracks WHERE musicbrainz_match_status IS NULL) (SELECT COUNT(*) FROM tracks WHERE musicbrainz_match_status IS NULL)
AS pending AS pending
""") """)
row = cursor.fetchone() row = cursor.fetchone()
return row[0] if row else 0 return row[0] if row else 0
except Exception as e: except Exception as e:
logger.error(f"Error counting pending items: {e}") logger.error(f"Error counting pending items: {e}")
return 0 return 0
finally: finally:
if conn: if conn:
conn.close() conn.close()
def _get_progress_breakdown(self) -> Dict[str, Dict[str, int]]: def _get_progress_breakdown(self) -> Dict[str, Dict[str, int]]:
"""Get progress breakdown by entity type""" """Get progress breakdown by entity type"""
conn = None conn = None
try: try:
conn = self.db._get_connection() conn = self.db._get_connection()
cursor = conn.cursor() cursor = conn.cursor()
progress = {} progress = {}
# Artists progress # Artists progress
cursor.execute(""" cursor.execute("""
SELECT SELECT
COUNT(*) AS total, COUNT(*) AS total,
SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed
FROM artists FROM artists
@ -347,10 +347,10 @@ class MusicBrainzWorker:
'total': total, 'total': total,
'percent': int((processed / total * 100) if total > 0 else 0) 'percent': int((processed / total * 100) if total > 0 else 0)
} }
# Albums progress # Albums progress
cursor.execute(""" cursor.execute("""
SELECT SELECT
COUNT(*) AS total, COUNT(*) AS total,
SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed
FROM albums FROM albums
@ -363,10 +363,10 @@ class MusicBrainzWorker:
'total': total, 'total': total,
'percent': int((processed / total * 100) if total > 0 else 0) 'percent': int((processed / total * 100) if total > 0 else 0)
} }
# Tracks progress # Tracks progress
cursor.execute(""" cursor.execute("""
SELECT SELECT
COUNT(*) AS total, COUNT(*) AS total,
SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed
FROM tracks FROM tracks
@ -379,9 +379,9 @@ class MusicBrainzWorker:
'total': total, 'total': total,
'percent': int((processed / total * 100) if total > 0 else 0) 'percent': int((processed / total * 100) if total > 0 else 0)
} }
return progress return progress
except Exception as e: except Exception as e:
logger.error(f"Error getting progress breakdown: {e}") logger.error(f"Error getting progress breakdown: {e}")
return {} return {}

@ -297,6 +297,9 @@ class MusicDatabase:
# Add MusicBrainz columns to library tables (migration) # Add MusicBrainz columns to library tables (migration)
self._add_musicbrainz_columns(cursor) self._add_musicbrainz_columns(cursor)
# Add external ID columns (Spotify/iTunes) to library tables (migration)
self._add_external_id_columns(cursor)
conn.commit() conn.commit()
logger.info("Database initialized successfully") logger.info("Database initialized successfully")
@ -952,6 +955,46 @@ class MusicDatabase:
logger.error(f"Error adding MusicBrainz columns: {e}") logger.error(f"Error adding MusicBrainz columns: {e}")
# Don't raise - this is a migration, database can still function # Don't raise - this is a migration, database can still function
def _add_external_id_columns(self, cursor):
"""Add Spotify/iTunes external ID columns to library tables for enrichment"""
try:
# Artists table
cursor.execute("PRAGMA table_info(artists)")
artists_columns = [column[1] for column in cursor.fetchall()]
if 'spotify_artist_id' not in artists_columns:
cursor.execute("ALTER TABLE artists ADD COLUMN spotify_artist_id TEXT")
cursor.execute("ALTER TABLE artists ADD COLUMN itunes_artist_id TEXT")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_artists_spotify_id ON artists (spotify_artist_id)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_artists_itunes_id ON artists (itunes_artist_id)")
logger.info("Added external ID columns to artists table")
# Albums table
cursor.execute("PRAGMA table_info(albums)")
albums_columns = [column[1] for column in cursor.fetchall()]
if 'spotify_album_id' not in albums_columns:
cursor.execute("ALTER TABLE albums ADD COLUMN spotify_album_id TEXT")
cursor.execute("ALTER TABLE albums ADD COLUMN itunes_album_id TEXT")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_albums_spotify_id ON albums (spotify_album_id)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_albums_itunes_id ON albums (itunes_album_id)")
logger.info("Added external ID columns to albums table")
# Tracks table
cursor.execute("PRAGMA table_info(tracks)")
tracks_columns = [column[1] for column in cursor.fetchall()]
if 'spotify_track_id' not in tracks_columns:
cursor.execute("ALTER TABLE tracks ADD COLUMN spotify_track_id TEXT")
cursor.execute("ALTER TABLE tracks ADD COLUMN itunes_track_id TEXT")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tracks_spotify_id ON tracks (spotify_track_id)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tracks_itunes_id ON tracks (itunes_track_id)")
logger.info("Added external ID columns to tracks table")
except Exception as e:
logger.error(f"Error adding external ID columns: {e}")
# Don't raise - this is a migration, database can still function
def close(self): def close(self):
"""Close database connection (no-op since we create connections per operation)""" """Close database connection (no-op since we create connections per operation)"""
# Each operation creates and closes its own connection, so nothing to do here # Each operation creates and closes its own connection, so nothing to do here

@ -7767,9 +7767,9 @@ def _get_file_path_from_template(context: dict, template_type: str = 'album_path
# METADATA & COVER ART HELPERS (Ported from downloads.py) # METADATA & COVER ART HELPERS (Ported from downloads.py)
# =================================================================== # ===================================================================
from mutagen import File as MutagenFile from mutagen import File as MutagenFile
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TDRC, TRCK, TCON, TPE2, TPOS, TXXX, APIC from mutagen.id3 import ID3, TIT2, TPE1, TALB, TDRC, TRCK, TCON, TPE2, TPOS, TXXX, APIC, UFID, TSRC
from mutagen.flac import FLAC, Picture from mutagen.flac import FLAC, Picture
from mutagen.mp4 import MP4, MP4Cover from mutagen.mp4 import MP4, MP4Cover, MP4FreeForm
from mutagen.oggvorbis import OggVorbis from mutagen.oggvorbis import OggVorbis
import urllib.request import urllib.request
@ -7883,6 +7883,8 @@ def _enhance_file_metadata(file_path: str, context: dict, artist: dict, album_in
if config_manager.get('metadata_enhancement.embed_album_art', True): if config_manager.get('metadata_enhancement.embed_album_art', True):
_embed_album_art_metadata(audio_file_raw, metadata) _embed_album_art_metadata(audio_file_raw, metadata)
_embed_source_ids(audio_file_raw, metadata)
audio_file_raw.save() audio_file_raw.save()
print("✅ Metadata enhanced successfully.") print("✅ Metadata enhanced successfully.")
@ -8025,6 +8027,28 @@ def _extract_spotify_metadata(context: dict, artist: dict, album_info: dict) ->
metadata['album_art_url'] = album_info.get('album_image_url') metadata['album_art_url'] = album_info.get('album_image_url')
# Extract source IDs (Spotify or iTunes) for tag embedding
track_info = context.get("track_info", {})
if track_info and track_info.get('id'):
# Spotify track IDs are alphanumeric strings; iTunes IDs are numeric
track_id = str(track_info['id'])
if track_id.isdigit():
metadata['itunes_track_id'] = track_id
else:
metadata['spotify_track_id'] = track_id
if artist.get('id'):
artist_id = str(artist['id'])
if artist_id.isdigit():
pass # iTunes artist ID not available in this context reliably
else:
metadata['spotify_artist_id'] = artist_id
if spotify_album and spotify_album.get('id'):
album_id = str(spotify_album['id'])
if album_id.isdigit():
pass # iTunes album ID not available in this context reliably
else:
metadata['spotify_album_id'] = album_id
return metadata return metadata
def _embed_album_art_metadata(audio_file, metadata: dict): def _embed_album_art_metadata(audio_file, metadata: dict):
@ -8065,6 +8089,161 @@ def _embed_album_art_metadata(audio_file, metadata: dict):
except Exception as e: except Exception as e:
print(f"❌ Error embedding album art: {e}") print(f"❌ Error embedding album art: {e}")
def _embed_source_ids(audio_file, metadata: dict):
"""
Lookup MusicBrainz recording MBID, ISRC, and genres, then embed them along
with Spotify/iTunes source IDs as custom tags into the audio file.
One file write, one shot. Concurrent calls are safe the global rate
limiter in musicbrainz_client.py serializes all MB API access.
Operates on a non-easy-mode MutagenFile object (caller must save).
"""
try:
# ── 1. Collect Spotify / iTunes IDs already in metadata ──
id_tags = {}
if metadata.get('spotify_track_id'):
id_tags['SPOTIFY_TRACK_ID'] = metadata['spotify_track_id']
if metadata.get('spotify_artist_id'):
id_tags['SPOTIFY_ARTIST_ID'] = metadata['spotify_artist_id']
if metadata.get('spotify_album_id'):
id_tags['SPOTIFY_ALBUM_ID'] = metadata['spotify_album_id']
if metadata.get('itunes_track_id'):
id_tags['ITUNES_TRACK_ID'] = metadata['itunes_track_id']
# ── 2. MusicBrainz lookup for MBID, genres, and ISRC ──
# The global rate limiter in musicbrainz_client.py serializes all API
# calls (worker + any number of post-processing threads) to 1 req/sec
# via _api_call_lock, so no pause/resume needed.
recording_mbid = None
artist_mbid = None
mb_genres = []
isrc = None
track_title = metadata.get('title', '')
# Use album_artist (single primary artist) for MB lookup, not the
# comma-joined multi-artist field which would give bad search results
artist_name = metadata.get('album_artist', '') or metadata.get('artist', '')
if not config_manager.get('musicbrainz.embed_tags', True):
# Skip MB lookup, just write Spotify/iTunes IDs if any
pass
elif track_title and artist_name:
try:
mb_service = mb_worker.mb_service if mb_worker else None
if mb_service:
result = mb_service.match_recording(track_title, artist_name)
if result and result.get('mbid'):
recording_mbid = result['mbid']
id_tags['MUSICBRAINZ_RECORDING_ID'] = recording_mbid
print(f"🎵 MusicBrainz recording matched: {recording_mbid}")
# Lookup recording details for ISRC and genres
details = mb_service.mb_client.get_recording(
recording_mbid, includes=['isrcs', 'genres']
)
if details:
isrcs = details.get('isrcs', [])
if isrcs:
isrc = isrcs[0]
mb_genres = [
g['name'] for g in sorted(
details.get('genres', []),
key=lambda x: x.get('count', 0),
reverse=True
)
]
# Also try to get artist MBID (may already be cached from worker)
artist_result = mb_service.match_artist(artist_name)
if artist_result and artist_result.get('mbid'):
artist_mbid = artist_result['mbid']
id_tags['MUSICBRAINZ_ARTIST_ID'] = artist_mbid
else:
print("⚠️ MusicBrainz worker not available, skipping MBID lookup")
except Exception as e:
print(f"⚠️ MusicBrainz lookup failed (non-fatal): {e}")
if not id_tags:
return
# ── 3. Write all tags into the file ──
written = []
# MP3 (ID3)
if isinstance(audio_file.tags, ID3):
for tag_name, value in id_tags.items():
if tag_name == 'MUSICBRAINZ_RECORDING_ID':
audio_file.tags.add(UFID(owner='http://musicbrainz.org', data=value.encode('ascii')))
written.append('UFID:http://musicbrainz.org')
elif tag_name == 'MUSICBRAINZ_ARTIST_ID':
audio_file.tags.add(TXXX(encoding=3, desc='MusicBrainz Artist Id', text=[value]))
written.append('TXXX:MusicBrainz Artist Id')
else:
audio_file.tags.add(TXXX(encoding=3, desc=tag_name, text=[str(value)]))
written.append(f'TXXX:{tag_name}')
# FLAC / OGG Vorbis
elif isinstance(audio_file, (FLAC, OggVorbis)):
for tag_name, value in id_tags.items():
if tag_name == 'MUSICBRAINZ_RECORDING_ID':
audio_file['MUSICBRAINZ_TRACKID'] = [value]
written.append('MUSICBRAINZ_TRACKID')
elif tag_name == 'MUSICBRAINZ_ARTIST_ID':
audio_file['MUSICBRAINZ_ARTISTID'] = [value]
written.append('MUSICBRAINZ_ARTISTID')
else:
audio_file[tag_name] = [str(value)]
written.append(tag_name)
# MP4 (M4A/AAC)
elif isinstance(audio_file, MP4):
for tag_name, value in id_tags.items():
if tag_name == 'MUSICBRAINZ_RECORDING_ID':
key = '----:com.apple.iTunes:MusicBrainz Track Id'
elif tag_name == 'MUSICBRAINZ_ARTIST_ID':
key = '----:com.apple.iTunes:MusicBrainz Artist Id'
else:
key = f'----:com.apple.iTunes:{tag_name}'
audio_file[key] = [MP4FreeForm(str(value).encode('utf-8'))]
written.append(key)
if written:
print(f"🔗 Embedded IDs: {', '.join(written)}")
# ── 4. Merge genres (Spotify + MusicBrainz) and overwrite tag ──
if mb_genres:
spotify_genres = [g.strip() for g in metadata.get('genre', '').split(',') if g.strip()]
seen = set()
merged = []
for g in spotify_genres + mb_genres:
key = g.strip().lower()
if key and key not in seen:
seen.add(key)
merged.append(g.strip().title())
if len(merged) >= 5:
break
if merged:
genre_string = ', '.join(merged)
if isinstance(audio_file.tags, ID3):
audio_file.tags.add(TCON(encoding=3, text=[genre_string]))
elif isinstance(audio_file, (FLAC, OggVorbis)):
audio_file['GENRE'] = [genre_string]
elif isinstance(audio_file, MP4):
audio_file['\xa9gen'] = [genre_string]
print(f"🎶 Genres merged: {genre_string}")
# ── 5. Write ISRC if available ──
if isrc:
if isinstance(audio_file.tags, ID3):
audio_file.tags.add(TSRC(encoding=3, text=[isrc]))
elif isinstance(audio_file, (FLAC, OggVorbis)):
audio_file['ISRC'] = [isrc]
elif isinstance(audio_file, MP4):
audio_file['----:com.apple.iTunes:ISRC'] = [MP4FreeForm(isrc.encode('utf-8'))]
print(f"🔖 ISRC: {isrc}")
except Exception as e:
print(f"⚠️ Error embedding source IDs (non-fatal): {e}")
def _download_cover_art(album_info: dict, target_dir: str): def _download_cover_art(album_info: dict, target_dir: str):
"""Downloads cover.jpg into the specified directory.""" """Downloads cover.jpg into the specified directory."""
try: try:

Loading…
Cancel
Save