From d75893bc3030d37dcdabb5bb1c8a5f7ae19bad9d Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 23 Mar 2026 10:44:36 -0700 Subject: [PATCH] Add Album Tag Consistency repair job: detect and fix inconsistent tags across album tracks New maintenance job scans albums for tracks with mismatched album names, album artist names, or MusicBrainz release IDs. These inconsistencies cause Navidrome and other media servers to split one album into multiple entries. The fix normalizes outlier tracks to the majority value by rewriting file tags. --- core/repair_jobs/__init__.py | 1 + core/repair_jobs/album_tag_consistency.py | 335 ++++++++++++++++++++++ core/repair_worker.py | 59 ++++ 3 files changed, 395 insertions(+) create mode 100644 core/repair_jobs/album_tag_consistency.py diff --git a/core/repair_jobs/__init__.py b/core/repair_jobs/__init__.py index 31328ec..49cbe82 100644 --- a/core/repair_jobs/__init__.py +++ b/core/repair_jobs/__init__.py @@ -40,6 +40,7 @@ _JOB_MODULES = [ 'core.repair_jobs.mbid_mismatch_detector', 'core.repair_jobs.single_album_dedup', 'core.repair_jobs.lossy_converter', + 'core.repair_jobs.album_tag_consistency', ] diff --git a/core/repair_jobs/album_tag_consistency.py b/core/repair_jobs/album_tag_consistency.py new file mode 100644 index 0000000..30f0189 --- /dev/null +++ b/core/repair_jobs/album_tag_consistency.py @@ -0,0 +1,335 @@ +"""Album Tag Consistency Job — finds albums where tracks have inconsistent tags. + +When tracks in the same album have different artist names, album names, or +MusicBrainz release IDs, media servers like Navidrome split them into separate +albums. This job detects these inconsistencies and offers to fix them by +normalizing all tracks to the canonical (majority) value. +""" + +import json +import os +from collections import Counter + +from mutagen import File as MutagenFile +from mutagen.id3 import ID3 +from mutagen.flac import FLAC +from mutagen.oggvorbis import OggVorbis +from mutagen.mp4 import MP4 + +from core.repair_jobs import register_job +from core.repair_jobs.base import JobContext, JobResult, RepairJob +from utils.logging_config import get_logger + +logger = get_logger("repair_job.album_tag_consistency") + + +def _read_tag(audio, tag_name): + """Read a tag value from a Mutagen file object, handling format differences.""" + if audio is None: + return None + try: + if isinstance(audio.tags, ID3): + # MP3 + if tag_name == 'album': + frame = audio.tags.get('TALB') + return str(frame) if frame else None + elif tag_name == 'artist': + frame = audio.tags.get('TPE1') + return str(frame) if frame else None + elif tag_name == 'albumartist': + frame = audio.tags.get('TPE2') + return str(frame) if frame else None + elif tag_name == 'musicbrainz_albumid': + for key in audio.tags: + if key.startswith('TXXX:') and 'MusicBrainz Album Id' in key: + return str(audio.tags[key]) + return None + elif isinstance(audio, (FLAC, OggVorbis)): + vals = audio.get(tag_name.upper(), []) + return vals[0] if vals else None + elif isinstance(audio, MP4): + tag_map = { + 'album': '\xa9alb', + 'artist': '\xa9ART', + 'albumartist': 'aART', + } + key = tag_map.get(tag_name) + if key: + vals = audio.get(key, []) + return vals[0] if vals else None + if tag_name == 'musicbrainz_albumid': + vals = audio.get('----:com.apple.iTunes:MusicBrainz Album Id', []) + if vals: + return vals[0].decode('utf-8') if isinstance(vals[0], bytes) else str(vals[0]) + return None + except Exception: + pass + return None + + +def _write_tag(audio, tag_name, value): + """Write a tag value to a Mutagen file object, handling format differences.""" + if audio is None or value is None: + return False + try: + if isinstance(audio.tags, ID3): + from mutagen.id3 import TALB, TPE1, TPE2, TXXX + if tag_name == 'album': + audio.tags.delall('TALB') + audio.tags.add(TALB(encoding=3, text=[value])) + elif tag_name == 'artist': + audio.tags.delall('TPE1') + audio.tags.add(TPE1(encoding=3, text=[value])) + elif tag_name == 'albumartist': + audio.tags.delall('TPE2') + audio.tags.add(TPE2(encoding=3, text=[value])) + elif tag_name == 'musicbrainz_albumid': + # Remove existing + to_remove = [k for k in audio.tags if k.startswith('TXXX:') and 'MusicBrainz Album Id' in k] + for k in to_remove: + del audio.tags[k] + audio.tags.add(TXXX(encoding=3, desc='MusicBrainz Album Id', text=[value])) + return True + elif isinstance(audio, (FLAC, OggVorbis)): + audio[tag_name.upper()] = [value] + return True + elif isinstance(audio, MP4): + tag_map = { + 'album': '\xa9alb', + 'artist': '\xa9ART', + 'albumartist': 'aART', + } + key = tag_map.get(tag_name) + if key: + audio[key] = [value] + return True + if tag_name == 'musicbrainz_albumid': + from mutagen.mp4 import MP4FreeForm + audio['----:com.apple.iTunes:MusicBrainz Album Id'] = [ + MP4FreeForm(value.encode('utf-8')) + ] + return True + except Exception as e: + logger.debug(f"Failed to write tag {tag_name}: {e}") + return False + + +@register_job +class AlbumTagConsistencyJob(RepairJob): + job_id = 'album_tag_consistency' + display_name = 'Album Tag Consistency' + description = 'Finds albums where tracks have inconsistent tags causing media server splits' + help_text = ( + 'Scans your library for albums where tracks have mismatched metadata — ' + 'different album names, artist names, or MusicBrainz release IDs across ' + 'tracks that belong to the same album.\n\n' + 'These inconsistencies cause media servers like Navidrome to split one album ' + 'into multiple entries (e.g. "Simulation Theory" and "Simulation Theory (Super Deluxe)").\n\n' + 'The fix normalizes all tracks in the album to the most common (majority) value, ' + 'then writes the corrected tags to the actual audio files.\n\n' + 'Settings:\n' + '- Check album name: Detect inconsistent album title tags\n' + '- Check album artist: Detect inconsistent album artist tags\n' + '- Check MB release ID: Detect inconsistent MusicBrainz Album IDs' + ) + icon = 'repair-icon-consistency' + default_enabled = False + default_interval_hours = 168 # Weekly + default_settings = { + 'check_album_name': True, + 'check_album_artist': True, + 'check_mb_release_id': True, + } + auto_fix = False + + def scan(self, context: JobContext) -> JobResult: + result = JobResult() + settings = self._get_settings(context) + check_album = settings.get('check_album_name', True) + check_artist = settings.get('check_album_artist', True) + check_mbid = settings.get('check_mb_release_id', True) + + if not any([check_album, check_artist, check_mbid]): + return result + + try: + conn = context.db._get_connection() + cursor = conn.cursor() + + # Get all albums with 2+ tracks that have file paths + cursor.execute(""" + SELECT al.id, al.title, ar.name as artist_name, + COUNT(t.id) as track_count + FROM albums al + JOIN artists ar ON ar.id = al.artist_id + JOIN tracks t ON t.album_id = al.id + WHERE t.file_path IS NOT NULL AND t.file_path != '' + GROUP BY al.id + HAVING COUNT(t.id) >= 2 + ORDER BY ar.name, al.title + """) + albums = cursor.fetchall() + total = len(albums) + + if context.report_progress: + context.report_progress(phase=f'Scanning {total} albums for tag consistency...', total=total) + + for idx, album_row in enumerate(albums): + if context.check_stop(): + break + if idx % 10 == 0 and context.wait_if_paused(): + break + + album_id = album_row['id'] + album_title = album_row['title'] + artist_name = album_row['artist_name'] + result.scanned += 1 + + if context.report_progress and idx % 20 == 0: + context.report_progress( + scanned=idx + 1, total=total, + phase=f'Scanning {idx + 1} / {total}', + log_line=f'{artist_name} — {album_title}', + log_type='info' + ) + + # Get all tracks in this album with file paths + cursor.execute(""" + SELECT id, title, file_path FROM tracks + WHERE album_id = ? AND file_path IS NOT NULL AND file_path != '' + """, (album_id,)) + tracks = cursor.fetchall() + + if len(tracks) < 2: + continue + + # Read tags from each file + tag_data = [] + for track in tracks: + file_path = track['file_path'] + # Resolve path + resolved = self._resolve_path(file_path, context) + if not resolved or not os.path.exists(resolved): + continue + + try: + audio = MutagenFile(resolved, easy=False) + if audio is None: + continue + tag_data.append({ + 'track_id': track['id'], + 'track_title': track['title'], + 'file_path': file_path, + 'resolved_path': resolved, + 'album_tag': _read_tag(audio, 'album'), + 'albumartist_tag': _read_tag(audio, 'albumartist'), + 'mbid_tag': _read_tag(audio, 'musicbrainz_albumid'), + }) + except Exception: + continue + + if len(tag_data) < 2: + continue + + # Check for inconsistencies + inconsistencies = [] + + if check_album: + album_values = [t['album_tag'] for t in tag_data if t['album_tag']] + if album_values and len(set(album_values)) > 1: + majority = Counter(album_values).most_common(1)[0][0] + outliers = [t for t in tag_data if t['album_tag'] and t['album_tag'] != majority] + inconsistencies.append({ + 'field': 'album', + 'canonical': majority, + 'variants': list(set(album_values)), + 'outlier_count': len(outliers), + }) + + if check_artist: + artist_values = [t['albumartist_tag'] for t in tag_data if t['albumartist_tag']] + if artist_values and len(set(artist_values)) > 1: + majority = Counter(artist_values).most_common(1)[0][0] + outliers = [t for t in tag_data if t['albumartist_tag'] and t['albumartist_tag'] != majority] + inconsistencies.append({ + 'field': 'albumartist', + 'canonical': majority, + 'variants': list(set(artist_values)), + 'outlier_count': len(outliers), + }) + + if check_mbid: + mbid_values = [t['mbid_tag'] for t in tag_data if t['mbid_tag']] + if mbid_values and len(set(mbid_values)) > 1: + majority = Counter(mbid_values).most_common(1)[0][0] + outliers = [t for t in tag_data if t['mbid_tag'] and t['mbid_tag'] != majority] + inconsistencies.append({ + 'field': 'musicbrainz_albumid', + 'canonical': majority, + 'variants': list(set(mbid_values)), + 'outlier_count': len(outliers), + }) + + if inconsistencies: + fields_affected = ', '.join(i['field'] for i in inconsistencies) + total_outliers = sum(i['outlier_count'] for i in inconsistencies) + + # Build description with specifics + desc_parts = [] + for inc in inconsistencies: + variants_str = ' vs '.join(f'"{v}"' for v in inc['variants'][:3]) + desc_parts.append(f"{inc['field']}: {variants_str}") + + context.create_finding( + finding_type='album_tag_inconsistency', + severity='warning', + entity_type='album', + entity_id=str(album_id), + file_path=None, + title=f'Inconsistent tags: {album_title} by {artist_name}', + description=f'{total_outliers} track(s) have mismatched {fields_affected}. ' + '; '.join(desc_parts), + details={ + 'album_id': album_id, + 'album_title': album_title, + 'artist_name': artist_name, + 'inconsistencies': inconsistencies, + 'track_count': len(tag_data), + 'tracks': [{'id': t['track_id'], 'title': t['track_title'], + 'file_path': t['file_path']} for t in tag_data], + } + ) + result.findings_created += 1 + + if context.report_progress: + context.report_progress( + log_line=f'Found: {album_title} — {fields_affected}', + log_type='warning' + ) + + conn.close() + + except Exception as e: + logger.error(f"Album tag consistency scan error: {e}") + result.errors += 1 + + return result + + def _resolve_path(self, file_path, context): + """Resolve a DB file path to an actual filesystem path.""" + if not file_path: + return None + # Try as-is first + if os.path.exists(file_path): + return file_path + # Try relative to transfer folder + if context.transfer_folder: + joined = os.path.join(context.transfer_folder, file_path) + if os.path.exists(joined): + return joined + # Try with download path + download_path = context.config_manager.get('soulseek.download_path', '') if context.config_manager else '' + if download_path: + joined = os.path.join(download_path, file_path) + if os.path.exists(joined): + return joined + return None diff --git a/core/repair_worker.py b/core/repair_worker.py index 052e4ab..524bc86 100644 --- a/core/repair_worker.py +++ b/core/repair_worker.py @@ -806,6 +806,7 @@ class RepairWorker: 'duplicate_tracks': self._fix_duplicates, 'single_album_redundant': self._fix_single_album_redundant, 'mbid_mismatch': self._fix_mbid_mismatch, + 'album_tag_inconsistency': self._fix_album_tag_inconsistency, 'incomplete_album': self._fix_incomplete_album, 'path_mismatch': self._fix_path_mismatch, 'missing_lossy_copy': self._fix_missing_lossy_copy, @@ -1302,6 +1303,63 @@ class RepairWorker: except Exception as e: return {'success': False, 'error': f'Failed to remove MBID: {str(e)}'} + def _fix_album_tag_inconsistency(self, entity_type, entity_id, file_path, details): + """Normalize inconsistent tags across all tracks in an album to the canonical (majority) value.""" + inconsistencies = details.get('inconsistencies', []) + tracks = details.get('tracks', []) + if not inconsistencies or not tracks: + return {'success': False, 'error': 'No inconsistency data in finding'} + + from mutagen import File as MutagenFile + from core.repair_jobs.album_tag_consistency import _read_tag, _write_tag + + fixed_files = 0 + errors = 0 + changes = [] + + for inc in inconsistencies: + field = inc['field'] + canonical = inc['canonical'] + + for track_info in tracks: + track_file = track_info.get('file_path', '') + if not track_file: + continue + + # Resolve path + download_folder = None + if self._config_manager: + download_folder = self._config_manager.get('soulseek.download_path', '') + resolved = _resolve_file_path(track_file, self.transfer_folder, download_folder) + if not resolved or not os.path.exists(resolved): + continue + + try: + audio = MutagenFile(resolved, easy=False) + if audio is None: + continue + + current = _read_tag(audio, field) + if current and current != canonical: + if _write_tag(audio, field, canonical): + audio.save() + fixed_files += 1 + changes.append(f'{field}: "{current}" → "{canonical}" in {os.path.basename(resolved)}') + except Exception as e: + logger.error(f"Error fixing tag consistency for {resolved}: {e}") + errors += 1 + + if fixed_files > 0: + return { + 'success': True, + 'action': 'normalized_tags', + 'message': f'Fixed {fixed_files} file(s): {"; ".join(changes[:3])}{"..." if len(changes) > 3 else ""}', + } + elif errors > 0: + return {'success': False, 'error': f'Failed to fix {errors} file(s)'} + else: + return {'success': True, 'action': 'already_consistent', 'message': 'All tags already consistent'} + # --- Album Completeness Auto-Fill --- @staticmethod @@ -2115,6 +2173,7 @@ class RepairWorker: fixable_types = ('dead_file', 'orphan_file', 'track_number_mismatch', 'missing_cover_art', 'metadata_gap', 'duplicate_tracks', 'single_album_redundant', 'mbid_mismatch', + 'album_tag_inconsistency', 'incomplete_album', 'path_mismatch', 'missing_lossy_copy') placeholders = ','.join(['?'] * len(fixable_types))