diff --git a/config/settings.py b/config/settings.py index 3311151..20a0e1a 100644 --- a/config/settings.py +++ b/config/settings.py @@ -211,6 +211,9 @@ class ConfigManager: "enabled": True, "embed_album_art": True }, + "musicbrainz": { + "embed_tags": True + }, "playlist_sync": { "create_backup": True }, diff --git a/core/musicbrainz_service.py b/core/musicbrainz_service.py index d299f36..331a059 100644 --- a/core/musicbrainz_service.py +++ b/core/musicbrainz_service.py @@ -1,4 +1,4 @@ -from typing import Optional, Dict, Any, List +from typing import Optional, Dict, Any import json from datetime import datetime, timedelta from difflib import SequenceMatcher @@ -87,26 +87,26 @@ class MusicBrainzService: if conn: conn.close() - def _save_to_cache(self, entity_type: str, entity_name: str, artist_name: Optional[str], + def _save_to_cache(self, entity_type: str, entity_name: str, artist_name: Optional[str], musicbrainz_id: Optional[str], metadata: Optional[Dict], confidence: int): """Save MusicBrainz result to cache""" conn = None try: conn = self.db._get_connection() cursor = conn.cursor() - + metadata_json = json.dumps(metadata) if metadata else None - + cursor.execute(""" - INSERT OR REPLACE INTO musicbrainz_cache + INSERT OR REPLACE INTO musicbrainz_cache (entity_type, entity_name, artist_name, musicbrainz_id, metadata_json, match_confidence, last_updated) VALUES (?, ?, ?, ?, ?, ?, ?) """, (entity_type, entity_name, artist_name, musicbrainz_id, metadata_json, confidence, datetime.now())) - + conn.commit() - + logger.debug(f"Cached {entity_type} '{entity_name}' (MBID: {musicbrainz_id}, confidence: {confidence})") - + except Exception as e: logger.error(f"Error saving to cache: {e}") if conn: @@ -410,7 +410,7 @@ class MusicBrainzService: try: conn = self.db._get_connection() cursor = conn.cursor() - + cursor.execute(""" UPDATE tracks SET musicbrainz_recording_id = ?, @@ -418,11 +418,11 @@ class MusicBrainzService: musicbrainz_match_status = ? WHERE id = ? """, (mbid, datetime.now(), status, track_id)) - + conn.commit() - + logger.debug(f"Updated track {track_id} with MBID: {mbid}, status: {status}") - + except Exception as e: logger.error(f"Error updating track {track_id}: {e}") if conn: @@ -430,3 +430,4 @@ class MusicBrainzService: finally: if conn: conn.close() + diff --git a/core/musicbrainz_worker.py b/core/musicbrainz_worker.py index 808ab00..fcb3e1c 100644 --- a/core/musicbrainz_worker.py +++ b/core/musicbrainz_worker.py @@ -10,20 +10,20 @@ logger = get_logger("musicbrainz_worker") class MusicBrainzWorker: """Background worker for enriching library with MusicBrainz IDs""" - + def __init__(self, database: MusicDatabase, app_name: str = "SoulSync", app_version: str = "1.0", contact_email: str = ""): self.db = database self.mb_service = MusicBrainzService(database, app_name, app_version, contact_email) - + # Worker state self.running = False self.paused = False self.should_stop = False self.thread = None - + # Current item being processed (for UI tooltip) self.current_item = None - + # Statistics self.stats = { 'matched': 0, @@ -31,67 +31,67 @@ class MusicBrainzWorker: 'pending': 0, 'errors': 0 } - + # Retry configuration self.retry_days = 30 # Retry 'not_found' items after 30 days - + logger.info("MusicBrainz background worker initialized") - + def start(self): """Start the background worker""" if self.running: logger.warning("Worker already running") return - + self.running = True self.should_stop = False self.thread = threading.Thread(target=self._run, daemon=True) self.thread.start() logger.info("MusicBrainz background worker started") - + def stop(self): """Stop the background worker""" if not self.running: return - + logger.info("Stopping MusicBrainz worker...") self.should_stop = True self.running = False - + if self.thread: self.thread.join(timeout=5) - + logger.info("Music Brainz worker stopped") - + def pause(self): """Pause the worker""" if not self.running: logger.warning("Worker not running, cannot pause") return - + self.paused = True logger.info("MusicBrainz worker paused") - + def resume(self): """Resume the worker""" if not self.running: logger.warning("Worker not running, start it first") return - + self.paused = False logger.info("MusicBrainz worker resumed") - + def get_stats(self) -> Dict[str, Any]: """Get current statistics""" # Update pending count self.stats['pending'] = self._count_pending_items() - + # Get progress breakdown by entity type progress = self._get_progress_breakdown() - + # Check if thread is actually alive (in case it crashed) is_actually_running = self.running and (self.thread is not None and self.thread.is_alive()) - + return { 'enabled': True, 'running': is_actually_running and not self.paused, @@ -100,53 +100,53 @@ class MusicBrainzWorker: 'stats': self.stats.copy(), 'progress': progress } - + def _run(self): """Main worker loop""" logger.info("MusicBrainz worker thread started") - + while not self.should_stop: try: # Check if paused if self.paused: time.sleep(1) continue - + # Clear previous item before getting next self.current_item = None - + # Get next item to process item = self._get_next_item() - + if not item: # No more items - sleep for a bit logger.debug("No pending items, sleeping...") time.sleep(10) continue - + # Set current item for UI tracking self.current_item = item - + # Process the item self._process_item(item) - + # Keep current_item set during sleep so UI can see what was just processed # Rate limit: 1 request per second time.sleep(1) - + except Exception as e: logger.error(f"Error in worker loop: {e}") time.sleep(5) # Back off on errors - + logger.info("MusicBrainz worker thread finished") - + def _get_next_item(self) -> Optional[Dict[str, Any]]: """Get next item to process from priority queue""" conn = None try: conn = self.db._get_connection() cursor = conn.cursor() - + # Priority 1: Unattempted artists cursor.execute(""" SELECT id, name @@ -158,7 +158,7 @@ class MusicBrainzWorker: row = cursor.fetchone() if row: return {'type': 'artist', 'id': row[0], 'name': row[1]} - + # Priority 2: Unattempted albums cursor.execute(""" SELECT a.id, a.title, ar.name AS artist_name @@ -171,7 +171,7 @@ class MusicBrainzWorker: row = cursor.fetchone() if row: return {'type': 'album', 'id': row[0], 'name': row[1], 'artist': row[2]} - + # Priority 3: Unattempted tracks cursor.execute(""" SELECT t.id, t.title, ar.name AS artist_name @@ -184,7 +184,7 @@ class MusicBrainzWorker: row = cursor.fetchone() if row: return {'type': 'track', 'id': row[0], 'name': row[1], 'artist': row[2]} - + # Priority 4: Retry 'not_found' artists after retry_days cutoff_date = datetime.now() - timedelta(days=self.retry_days) cursor.execute(""" @@ -199,7 +199,7 @@ class MusicBrainzWorker: if row: logger.info(f"Retrying artist '{row[1]}' (last attempted: {cutoff_date})") return {'type': 'artist', 'id': row[0], 'name': row[1]} - + # Priority 5: Retry 'not_found' albums cursor.execute(""" SELECT a.id, a.title, ar.name AS artist_name @@ -213,7 +213,7 @@ class MusicBrainzWorker: row = cursor.fetchone() if row: return {'type': 'album', 'id': row[0], 'name': row[1], 'artist': row[2]} - + # Priority 6: Retry 'not_found' tracks cursor.execute(""" SELECT t.id, t.title, ar.name AS artist_name @@ -227,25 +227,25 @@ class MusicBrainzWorker: row = cursor.fetchone() if row: return {'type': 'track', 'id': row[0], 'name': row[1], 'artist': row[2]} - + return None - + except Exception as e: logger.error(f"Error getting next item: {e}") return None finally: if conn: conn.close() - + def _process_item(self, item: Dict[str, Any]): """Process a single item (artist, album, or track)""" try: item_type = item['type'] item_id = item['id'] item_name = item['name'] - + logger.debug(f"Processing {item_type} #{item_id}: {item_name}") - + if item_type == 'artist': result = self.mb_service.match_artist(item_name) if result and result.get('mbid'): @@ -256,7 +256,7 @@ class MusicBrainzWorker: self.mb_service.update_artist_mbid(item_id, None, 'not_found') self.stats['not_found'] += 1 logger.debug(f"❌ No match for artist '{item_name}'") - + elif item_type == 'album': artist_name = item.get('artist') result = self.mb_service.match_release(item_name, artist_name) @@ -268,7 +268,7 @@ class MusicBrainzWorker: self.mb_service.update_album_mbid(item_id, None, 'not_found') self.stats['not_found'] += 1 logger.debug(f"❌ No match for album '{item_name}'") - + elif item_type == 'track': artist_name = item.get('artist') result = self.mb_service.match_recording(item_name, artist_name) @@ -280,11 +280,11 @@ class MusicBrainzWorker: self.mb_service.update_track_mbid(item_id, None, 'not_found') self.stats['not_found'] += 1 logger.debug(f"❌ No match for track '{item_name}'") - + except Exception as e: logger.error(f"Error processing {item['type']} #{item['id']}: {e}") self.stats['errors'] += 1 - + # Mark as error in database try: if item['type'] == 'artist': @@ -295,46 +295,46 @@ class MusicBrainzWorker: self.mb_service.update_track_mbid(item['id'], None, 'error') except Exception as e2: logger.error(f"Error updating item status: {e2}") - + def _count_pending_items(self) -> int: """Count how many items still need processing""" conn = None try: conn = self.db._get_connection() cursor = conn.cursor() - + # Count unattempted items cursor.execute(""" - SELECT + SELECT (SELECT COUNT(*) FROM artists WHERE musicbrainz_match_status IS NULL) + (SELECT COUNT(*) FROM albums WHERE musicbrainz_match_status IS NULL) + (SELECT COUNT(*) FROM tracks WHERE musicbrainz_match_status IS NULL) AS pending """) - + row = cursor.fetchone() - + return row[0] if row else 0 - + except Exception as e: logger.error(f"Error counting pending items: {e}") return 0 finally: if conn: conn.close() - + def _get_progress_breakdown(self) -> Dict[str, Dict[str, int]]: """Get progress breakdown by entity type""" conn = None try: conn = self.db._get_connection() cursor = conn.cursor() - + progress = {} - + # Artists progress cursor.execute(""" - SELECT + SELECT COUNT(*) AS total, SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed FROM artists @@ -347,10 +347,10 @@ class MusicBrainzWorker: 'total': total, 'percent': int((processed / total * 100) if total > 0 else 0) } - + # Albums progress cursor.execute(""" - SELECT + SELECT COUNT(*) AS total, SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed FROM albums @@ -363,10 +363,10 @@ class MusicBrainzWorker: 'total': total, 'percent': int((processed / total * 100) if total > 0 else 0) } - + # Tracks progress cursor.execute(""" - SELECT + SELECT COUNT(*) AS total, SUM(CASE WHEN musicbrainz_match_status IS NOT NULL THEN 1 ELSE 0 END) AS processed FROM tracks @@ -379,9 +379,9 @@ class MusicBrainzWorker: 'total': total, 'percent': int((processed / total * 100) if total > 0 else 0) } - + return progress - + except Exception as e: logger.error(f"Error getting progress breakdown: {e}") return {} diff --git a/database/music_database.py b/database/music_database.py index ba27176..7701eed 100644 --- a/database/music_database.py +++ b/database/music_database.py @@ -297,6 +297,9 @@ class MusicDatabase: # Add MusicBrainz columns to library tables (migration) self._add_musicbrainz_columns(cursor) + # Add external ID columns (Spotify/iTunes) to library tables (migration) + self._add_external_id_columns(cursor) + conn.commit() logger.info("Database initialized successfully") @@ -952,6 +955,46 @@ class MusicDatabase: logger.error(f"Error adding MusicBrainz columns: {e}") # Don't raise - this is a migration, database can still function + def _add_external_id_columns(self, cursor): + """Add Spotify/iTunes external ID columns to library tables for enrichment""" + try: + # Artists table + cursor.execute("PRAGMA table_info(artists)") + artists_columns = [column[1] for column in cursor.fetchall()] + + if 'spotify_artist_id' not in artists_columns: + cursor.execute("ALTER TABLE artists ADD COLUMN spotify_artist_id TEXT") + cursor.execute("ALTER TABLE artists ADD COLUMN itunes_artist_id TEXT") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_artists_spotify_id ON artists (spotify_artist_id)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_artists_itunes_id ON artists (itunes_artist_id)") + logger.info("Added external ID columns to artists table") + + # Albums table + cursor.execute("PRAGMA table_info(albums)") + albums_columns = [column[1] for column in cursor.fetchall()] + + if 'spotify_album_id' not in albums_columns: + cursor.execute("ALTER TABLE albums ADD COLUMN spotify_album_id TEXT") + cursor.execute("ALTER TABLE albums ADD COLUMN itunes_album_id TEXT") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_albums_spotify_id ON albums (spotify_album_id)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_albums_itunes_id ON albums (itunes_album_id)") + logger.info("Added external ID columns to albums table") + + # Tracks table + cursor.execute("PRAGMA table_info(tracks)") + tracks_columns = [column[1] for column in cursor.fetchall()] + + if 'spotify_track_id' not in tracks_columns: + cursor.execute("ALTER TABLE tracks ADD COLUMN spotify_track_id TEXT") + cursor.execute("ALTER TABLE tracks ADD COLUMN itunes_track_id TEXT") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_tracks_spotify_id ON tracks (spotify_track_id)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_tracks_itunes_id ON tracks (itunes_track_id)") + logger.info("Added external ID columns to tracks table") + + except Exception as e: + logger.error(f"Error adding external ID columns: {e}") + # Don't raise - this is a migration, database can still function + def close(self): """Close database connection (no-op since we create connections per operation)""" # Each operation creates and closes its own connection, so nothing to do here diff --git a/web_server.py b/web_server.py index 6ee43a6..99ea11e 100644 --- a/web_server.py +++ b/web_server.py @@ -7767,9 +7767,9 @@ def _get_file_path_from_template(context: dict, template_type: str = 'album_path # METADATA & COVER ART HELPERS (Ported from downloads.py) # =================================================================== from mutagen import File as MutagenFile -from mutagen.id3 import ID3, TIT2, TPE1, TALB, TDRC, TRCK, TCON, TPE2, TPOS, TXXX, APIC +from mutagen.id3 import ID3, TIT2, TPE1, TALB, TDRC, TRCK, TCON, TPE2, TPOS, TXXX, APIC, UFID, TSRC from mutagen.flac import FLAC, Picture -from mutagen.mp4 import MP4, MP4Cover +from mutagen.mp4 import MP4, MP4Cover, MP4FreeForm from mutagen.oggvorbis import OggVorbis import urllib.request @@ -7883,6 +7883,8 @@ def _enhance_file_metadata(file_path: str, context: dict, artist: dict, album_in if config_manager.get('metadata_enhancement.embed_album_art', True): _embed_album_art_metadata(audio_file_raw, metadata) + _embed_source_ids(audio_file_raw, metadata) + audio_file_raw.save() print("✅ Metadata enhanced successfully.") @@ -8025,6 +8027,28 @@ def _extract_spotify_metadata(context: dict, artist: dict, album_info: dict) -> metadata['album_art_url'] = album_info.get('album_image_url') + # Extract source IDs (Spotify or iTunes) for tag embedding + track_info = context.get("track_info", {}) + if track_info and track_info.get('id'): + # Spotify track IDs are alphanumeric strings; iTunes IDs are numeric + track_id = str(track_info['id']) + if track_id.isdigit(): + metadata['itunes_track_id'] = track_id + else: + metadata['spotify_track_id'] = track_id + if artist.get('id'): + artist_id = str(artist['id']) + if artist_id.isdigit(): + pass # iTunes artist ID not available in this context reliably + else: + metadata['spotify_artist_id'] = artist_id + if spotify_album and spotify_album.get('id'): + album_id = str(spotify_album['id']) + if album_id.isdigit(): + pass # iTunes album ID not available in this context reliably + else: + metadata['spotify_album_id'] = album_id + return metadata def _embed_album_art_metadata(audio_file, metadata: dict): @@ -8065,6 +8089,161 @@ def _embed_album_art_metadata(audio_file, metadata: dict): except Exception as e: print(f"❌ Error embedding album art: {e}") +def _embed_source_ids(audio_file, metadata: dict): + """ + Lookup MusicBrainz recording MBID, ISRC, and genres, then embed them along + with Spotify/iTunes source IDs as custom tags into the audio file. + One file write, one shot. Concurrent calls are safe — the global rate + limiter in musicbrainz_client.py serializes all MB API access. + Operates on a non-easy-mode MutagenFile object (caller must save). + """ + try: + # ── 1. Collect Spotify / iTunes IDs already in metadata ── + id_tags = {} + if metadata.get('spotify_track_id'): + id_tags['SPOTIFY_TRACK_ID'] = metadata['spotify_track_id'] + if metadata.get('spotify_artist_id'): + id_tags['SPOTIFY_ARTIST_ID'] = metadata['spotify_artist_id'] + if metadata.get('spotify_album_id'): + id_tags['SPOTIFY_ALBUM_ID'] = metadata['spotify_album_id'] + if metadata.get('itunes_track_id'): + id_tags['ITUNES_TRACK_ID'] = metadata['itunes_track_id'] + + # ── 2. MusicBrainz lookup for MBID, genres, and ISRC ── + # The global rate limiter in musicbrainz_client.py serializes all API + # calls (worker + any number of post-processing threads) to 1 req/sec + # via _api_call_lock, so no pause/resume needed. + recording_mbid = None + artist_mbid = None + mb_genres = [] + isrc = None + track_title = metadata.get('title', '') + # Use album_artist (single primary artist) for MB lookup, not the + # comma-joined multi-artist field which would give bad search results + artist_name = metadata.get('album_artist', '') or metadata.get('artist', '') + + if not config_manager.get('musicbrainz.embed_tags', True): + # Skip MB lookup, just write Spotify/iTunes IDs if any + pass + elif track_title and artist_name: + try: + mb_service = mb_worker.mb_service if mb_worker else None + if mb_service: + result = mb_service.match_recording(track_title, artist_name) + if result and result.get('mbid'): + recording_mbid = result['mbid'] + id_tags['MUSICBRAINZ_RECORDING_ID'] = recording_mbid + print(f"🎵 MusicBrainz recording matched: {recording_mbid}") + + # Lookup recording details for ISRC and genres + details = mb_service.mb_client.get_recording( + recording_mbid, includes=['isrcs', 'genres'] + ) + if details: + isrcs = details.get('isrcs', []) + if isrcs: + isrc = isrcs[0] + mb_genres = [ + g['name'] for g in sorted( + details.get('genres', []), + key=lambda x: x.get('count', 0), + reverse=True + ) + ] + + # Also try to get artist MBID (may already be cached from worker) + artist_result = mb_service.match_artist(artist_name) + if artist_result and artist_result.get('mbid'): + artist_mbid = artist_result['mbid'] + id_tags['MUSICBRAINZ_ARTIST_ID'] = artist_mbid + else: + print("⚠️ MusicBrainz worker not available, skipping MBID lookup") + except Exception as e: + print(f"⚠️ MusicBrainz lookup failed (non-fatal): {e}") + + if not id_tags: + return + + # ── 3. Write all tags into the file ── + written = [] + + # MP3 (ID3) + if isinstance(audio_file.tags, ID3): + for tag_name, value in id_tags.items(): + if tag_name == 'MUSICBRAINZ_RECORDING_ID': + audio_file.tags.add(UFID(owner='http://musicbrainz.org', data=value.encode('ascii'))) + written.append('UFID:http://musicbrainz.org') + elif tag_name == 'MUSICBRAINZ_ARTIST_ID': + audio_file.tags.add(TXXX(encoding=3, desc='MusicBrainz Artist Id', text=[value])) + written.append('TXXX:MusicBrainz Artist Id') + else: + audio_file.tags.add(TXXX(encoding=3, desc=tag_name, text=[str(value)])) + written.append(f'TXXX:{tag_name}') + + # FLAC / OGG Vorbis + elif isinstance(audio_file, (FLAC, OggVorbis)): + for tag_name, value in id_tags.items(): + if tag_name == 'MUSICBRAINZ_RECORDING_ID': + audio_file['MUSICBRAINZ_TRACKID'] = [value] + written.append('MUSICBRAINZ_TRACKID') + elif tag_name == 'MUSICBRAINZ_ARTIST_ID': + audio_file['MUSICBRAINZ_ARTISTID'] = [value] + written.append('MUSICBRAINZ_ARTISTID') + else: + audio_file[tag_name] = [str(value)] + written.append(tag_name) + + # MP4 (M4A/AAC) + elif isinstance(audio_file, MP4): + for tag_name, value in id_tags.items(): + if tag_name == 'MUSICBRAINZ_RECORDING_ID': + key = '----:com.apple.iTunes:MusicBrainz Track Id' + elif tag_name == 'MUSICBRAINZ_ARTIST_ID': + key = '----:com.apple.iTunes:MusicBrainz Artist Id' + else: + key = f'----:com.apple.iTunes:{tag_name}' + audio_file[key] = [MP4FreeForm(str(value).encode('utf-8'))] + written.append(key) + + if written: + print(f"🔗 Embedded IDs: {', '.join(written)}") + + # ── 4. Merge genres (Spotify + MusicBrainz) and overwrite tag ── + if mb_genres: + spotify_genres = [g.strip() for g in metadata.get('genre', '').split(',') if g.strip()] + seen = set() + merged = [] + for g in spotify_genres + mb_genres: + key = g.strip().lower() + if key and key not in seen: + seen.add(key) + merged.append(g.strip().title()) + if len(merged) >= 5: + break + + if merged: + genre_string = ', '.join(merged) + if isinstance(audio_file.tags, ID3): + audio_file.tags.add(TCON(encoding=3, text=[genre_string])) + elif isinstance(audio_file, (FLAC, OggVorbis)): + audio_file['GENRE'] = [genre_string] + elif isinstance(audio_file, MP4): + audio_file['\xa9gen'] = [genre_string] + print(f"🎶 Genres merged: {genre_string}") + + # ── 5. Write ISRC if available ── + if isrc: + if isinstance(audio_file.tags, ID3): + audio_file.tags.add(TSRC(encoding=3, text=[isrc])) + elif isinstance(audio_file, (FLAC, OggVorbis)): + audio_file['ISRC'] = [isrc] + elif isinstance(audio_file, MP4): + audio_file['----:com.apple.iTunes:ISRC'] = [MP4FreeForm(isrc.encode('utf-8'))] + print(f"🔖 ISRC: {isrc}") + + except Exception as e: + print(f"⚠️ Error embedding source IDs (non-fatal): {e}") + def _download_cover_art(album_info: dict, target_dir: str): """Downloads cover.jpg into the specified directory.""" try: