diff --git a/core/repair_jobs/__init__.py b/core/repair_jobs/__init__.py index 31328ec..49cbe82 100644 --- a/core/repair_jobs/__init__.py +++ b/core/repair_jobs/__init__.py @@ -40,6 +40,7 @@ _JOB_MODULES = [ 'core.repair_jobs.mbid_mismatch_detector', 'core.repair_jobs.single_album_dedup', 'core.repair_jobs.lossy_converter', + 'core.repair_jobs.album_tag_consistency', ] diff --git a/core/repair_jobs/album_tag_consistency.py b/core/repair_jobs/album_tag_consistency.py new file mode 100644 index 0000000..30f0189 --- /dev/null +++ b/core/repair_jobs/album_tag_consistency.py @@ -0,0 +1,335 @@ +"""Album Tag Consistency Job — finds albums where tracks have inconsistent tags. + +When tracks in the same album have different artist names, album names, or +MusicBrainz release IDs, media servers like Navidrome split them into separate +albums. This job detects these inconsistencies and offers to fix them by +normalizing all tracks to the canonical (majority) value. +""" + +import json +import os +from collections import Counter + +from mutagen import File as MutagenFile +from mutagen.id3 import ID3 +from mutagen.flac import FLAC +from mutagen.oggvorbis import OggVorbis +from mutagen.mp4 import MP4 + +from core.repair_jobs import register_job +from core.repair_jobs.base import JobContext, JobResult, RepairJob +from utils.logging_config import get_logger + +logger = get_logger("repair_job.album_tag_consistency") + + +def _read_tag(audio, tag_name): + """Read a tag value from a Mutagen file object, handling format differences.""" + if audio is None: + return None + try: + if isinstance(audio.tags, ID3): + # MP3 + if tag_name == 'album': + frame = audio.tags.get('TALB') + return str(frame) if frame else None + elif tag_name == 'artist': + frame = audio.tags.get('TPE1') + return str(frame) if frame else None + elif tag_name == 'albumartist': + frame = audio.tags.get('TPE2') + return str(frame) if frame else None + elif tag_name == 'musicbrainz_albumid': + for key in audio.tags: + if key.startswith('TXXX:') and 'MusicBrainz Album Id' in key: + return str(audio.tags[key]) + return None + elif isinstance(audio, (FLAC, OggVorbis)): + vals = audio.get(tag_name.upper(), []) + return vals[0] if vals else None + elif isinstance(audio, MP4): + tag_map = { + 'album': '\xa9alb', + 'artist': '\xa9ART', + 'albumartist': 'aART', + } + key = tag_map.get(tag_name) + if key: + vals = audio.get(key, []) + return vals[0] if vals else None + if tag_name == 'musicbrainz_albumid': + vals = audio.get('----:com.apple.iTunes:MusicBrainz Album Id', []) + if vals: + return vals[0].decode('utf-8') if isinstance(vals[0], bytes) else str(vals[0]) + return None + except Exception: + pass + return None + + +def _write_tag(audio, tag_name, value): + """Write a tag value to a Mutagen file object, handling format differences.""" + if audio is None or value is None: + return False + try: + if isinstance(audio.tags, ID3): + from mutagen.id3 import TALB, TPE1, TPE2, TXXX + if tag_name == 'album': + audio.tags.delall('TALB') + audio.tags.add(TALB(encoding=3, text=[value])) + elif tag_name == 'artist': + audio.tags.delall('TPE1') + audio.tags.add(TPE1(encoding=3, text=[value])) + elif tag_name == 'albumartist': + audio.tags.delall('TPE2') + audio.tags.add(TPE2(encoding=3, text=[value])) + elif tag_name == 'musicbrainz_albumid': + # Remove existing + to_remove = [k for k in audio.tags if k.startswith('TXXX:') and 'MusicBrainz Album Id' in k] + for k in to_remove: + del audio.tags[k] + audio.tags.add(TXXX(encoding=3, desc='MusicBrainz Album Id', text=[value])) + return True + elif isinstance(audio, (FLAC, OggVorbis)): + audio[tag_name.upper()] = [value] + return True + elif isinstance(audio, MP4): + tag_map = { + 'album': '\xa9alb', + 'artist': '\xa9ART', + 'albumartist': 'aART', + } + key = tag_map.get(tag_name) + if key: + audio[key] = [value] + return True + if tag_name == 'musicbrainz_albumid': + from mutagen.mp4 import MP4FreeForm + audio['----:com.apple.iTunes:MusicBrainz Album Id'] = [ + MP4FreeForm(value.encode('utf-8')) + ] + return True + except Exception as e: + logger.debug(f"Failed to write tag {tag_name}: {e}") + return False + + +@register_job +class AlbumTagConsistencyJob(RepairJob): + job_id = 'album_tag_consistency' + display_name = 'Album Tag Consistency' + description = 'Finds albums where tracks have inconsistent tags causing media server splits' + help_text = ( + 'Scans your library for albums where tracks have mismatched metadata — ' + 'different album names, artist names, or MusicBrainz release IDs across ' + 'tracks that belong to the same album.\n\n' + 'These inconsistencies cause media servers like Navidrome to split one album ' + 'into multiple entries (e.g. "Simulation Theory" and "Simulation Theory (Super Deluxe)").\n\n' + 'The fix normalizes all tracks in the album to the most common (majority) value, ' + 'then writes the corrected tags to the actual audio files.\n\n' + 'Settings:\n' + '- Check album name: Detect inconsistent album title tags\n' + '- Check album artist: Detect inconsistent album artist tags\n' + '- Check MB release ID: Detect inconsistent MusicBrainz Album IDs' + ) + icon = 'repair-icon-consistency' + default_enabled = False + default_interval_hours = 168 # Weekly + default_settings = { + 'check_album_name': True, + 'check_album_artist': True, + 'check_mb_release_id': True, + } + auto_fix = False + + def scan(self, context: JobContext) -> JobResult: + result = JobResult() + settings = self._get_settings(context) + check_album = settings.get('check_album_name', True) + check_artist = settings.get('check_album_artist', True) + check_mbid = settings.get('check_mb_release_id', True) + + if not any([check_album, check_artist, check_mbid]): + return result + + try: + conn = context.db._get_connection() + cursor = conn.cursor() + + # Get all albums with 2+ tracks that have file paths + cursor.execute(""" + SELECT al.id, al.title, ar.name as artist_name, + COUNT(t.id) as track_count + FROM albums al + JOIN artists ar ON ar.id = al.artist_id + JOIN tracks t ON t.album_id = al.id + WHERE t.file_path IS NOT NULL AND t.file_path != '' + GROUP BY al.id + HAVING COUNT(t.id) >= 2 + ORDER BY ar.name, al.title + """) + albums = cursor.fetchall() + total = len(albums) + + if context.report_progress: + context.report_progress(phase=f'Scanning {total} albums for tag consistency...', total=total) + + for idx, album_row in enumerate(albums): + if context.check_stop(): + break + if idx % 10 == 0 and context.wait_if_paused(): + break + + album_id = album_row['id'] + album_title = album_row['title'] + artist_name = album_row['artist_name'] + result.scanned += 1 + + if context.report_progress and idx % 20 == 0: + context.report_progress( + scanned=idx + 1, total=total, + phase=f'Scanning {idx + 1} / {total}', + log_line=f'{artist_name} — {album_title}', + log_type='info' + ) + + # Get all tracks in this album with file paths + cursor.execute(""" + SELECT id, title, file_path FROM tracks + WHERE album_id = ? AND file_path IS NOT NULL AND file_path != '' + """, (album_id,)) + tracks = cursor.fetchall() + + if len(tracks) < 2: + continue + + # Read tags from each file + tag_data = [] + for track in tracks: + file_path = track['file_path'] + # Resolve path + resolved = self._resolve_path(file_path, context) + if not resolved or not os.path.exists(resolved): + continue + + try: + audio = MutagenFile(resolved, easy=False) + if audio is None: + continue + tag_data.append({ + 'track_id': track['id'], + 'track_title': track['title'], + 'file_path': file_path, + 'resolved_path': resolved, + 'album_tag': _read_tag(audio, 'album'), + 'albumartist_tag': _read_tag(audio, 'albumartist'), + 'mbid_tag': _read_tag(audio, 'musicbrainz_albumid'), + }) + except Exception: + continue + + if len(tag_data) < 2: + continue + + # Check for inconsistencies + inconsistencies = [] + + if check_album: + album_values = [t['album_tag'] for t in tag_data if t['album_tag']] + if album_values and len(set(album_values)) > 1: + majority = Counter(album_values).most_common(1)[0][0] + outliers = [t for t in tag_data if t['album_tag'] and t['album_tag'] != majority] + inconsistencies.append({ + 'field': 'album', + 'canonical': majority, + 'variants': list(set(album_values)), + 'outlier_count': len(outliers), + }) + + if check_artist: + artist_values = [t['albumartist_tag'] for t in tag_data if t['albumartist_tag']] + if artist_values and len(set(artist_values)) > 1: + majority = Counter(artist_values).most_common(1)[0][0] + outliers = [t for t in tag_data if t['albumartist_tag'] and t['albumartist_tag'] != majority] + inconsistencies.append({ + 'field': 'albumartist', + 'canonical': majority, + 'variants': list(set(artist_values)), + 'outlier_count': len(outliers), + }) + + if check_mbid: + mbid_values = [t['mbid_tag'] for t in tag_data if t['mbid_tag']] + if mbid_values and len(set(mbid_values)) > 1: + majority = Counter(mbid_values).most_common(1)[0][0] + outliers = [t for t in tag_data if t['mbid_tag'] and t['mbid_tag'] != majority] + inconsistencies.append({ + 'field': 'musicbrainz_albumid', + 'canonical': majority, + 'variants': list(set(mbid_values)), + 'outlier_count': len(outliers), + }) + + if inconsistencies: + fields_affected = ', '.join(i['field'] for i in inconsistencies) + total_outliers = sum(i['outlier_count'] for i in inconsistencies) + + # Build description with specifics + desc_parts = [] + for inc in inconsistencies: + variants_str = ' vs '.join(f'"{v}"' for v in inc['variants'][:3]) + desc_parts.append(f"{inc['field']}: {variants_str}") + + context.create_finding( + finding_type='album_tag_inconsistency', + severity='warning', + entity_type='album', + entity_id=str(album_id), + file_path=None, + title=f'Inconsistent tags: {album_title} by {artist_name}', + description=f'{total_outliers} track(s) have mismatched {fields_affected}. ' + '; '.join(desc_parts), + details={ + 'album_id': album_id, + 'album_title': album_title, + 'artist_name': artist_name, + 'inconsistencies': inconsistencies, + 'track_count': len(tag_data), + 'tracks': [{'id': t['track_id'], 'title': t['track_title'], + 'file_path': t['file_path']} for t in tag_data], + } + ) + result.findings_created += 1 + + if context.report_progress: + context.report_progress( + log_line=f'Found: {album_title} — {fields_affected}', + log_type='warning' + ) + + conn.close() + + except Exception as e: + logger.error(f"Album tag consistency scan error: {e}") + result.errors += 1 + + return result + + def _resolve_path(self, file_path, context): + """Resolve a DB file path to an actual filesystem path.""" + if not file_path: + return None + # Try as-is first + if os.path.exists(file_path): + return file_path + # Try relative to transfer folder + if context.transfer_folder: + joined = os.path.join(context.transfer_folder, file_path) + if os.path.exists(joined): + return joined + # Try with download path + download_path = context.config_manager.get('soulseek.download_path', '') if context.config_manager else '' + if download_path: + joined = os.path.join(download_path, file_path) + if os.path.exists(joined): + return joined + return None diff --git a/core/repair_worker.py b/core/repair_worker.py index 052e4ab..524bc86 100644 --- a/core/repair_worker.py +++ b/core/repair_worker.py @@ -806,6 +806,7 @@ class RepairWorker: 'duplicate_tracks': self._fix_duplicates, 'single_album_redundant': self._fix_single_album_redundant, 'mbid_mismatch': self._fix_mbid_mismatch, + 'album_tag_inconsistency': self._fix_album_tag_inconsistency, 'incomplete_album': self._fix_incomplete_album, 'path_mismatch': self._fix_path_mismatch, 'missing_lossy_copy': self._fix_missing_lossy_copy, @@ -1302,6 +1303,63 @@ class RepairWorker: except Exception as e: return {'success': False, 'error': f'Failed to remove MBID: {str(e)}'} + def _fix_album_tag_inconsistency(self, entity_type, entity_id, file_path, details): + """Normalize inconsistent tags across all tracks in an album to the canonical (majority) value.""" + inconsistencies = details.get('inconsistencies', []) + tracks = details.get('tracks', []) + if not inconsistencies or not tracks: + return {'success': False, 'error': 'No inconsistency data in finding'} + + from mutagen import File as MutagenFile + from core.repair_jobs.album_tag_consistency import _read_tag, _write_tag + + fixed_files = 0 + errors = 0 + changes = [] + + for inc in inconsistencies: + field = inc['field'] + canonical = inc['canonical'] + + for track_info in tracks: + track_file = track_info.get('file_path', '') + if not track_file: + continue + + # Resolve path + download_folder = None + if self._config_manager: + download_folder = self._config_manager.get('soulseek.download_path', '') + resolved = _resolve_file_path(track_file, self.transfer_folder, download_folder) + if not resolved or not os.path.exists(resolved): + continue + + try: + audio = MutagenFile(resolved, easy=False) + if audio is None: + continue + + current = _read_tag(audio, field) + if current and current != canonical: + if _write_tag(audio, field, canonical): + audio.save() + fixed_files += 1 + changes.append(f'{field}: "{current}" → "{canonical}" in {os.path.basename(resolved)}') + except Exception as e: + logger.error(f"Error fixing tag consistency for {resolved}: {e}") + errors += 1 + + if fixed_files > 0: + return { + 'success': True, + 'action': 'normalized_tags', + 'message': f'Fixed {fixed_files} file(s): {"; ".join(changes[:3])}{"..." if len(changes) > 3 else ""}', + } + elif errors > 0: + return {'success': False, 'error': f'Failed to fix {errors} file(s)'} + else: + return {'success': True, 'action': 'already_consistent', 'message': 'All tags already consistent'} + # --- Album Completeness Auto-Fill --- @staticmethod @@ -2115,6 +2173,7 @@ class RepairWorker: fixable_types = ('dead_file', 'orphan_file', 'track_number_mismatch', 'missing_cover_art', 'metadata_gap', 'duplicate_tracks', 'single_album_redundant', 'mbid_mismatch', + 'album_tag_inconsistency', 'incomplete_album', 'path_mismatch', 'missing_lossy_copy') placeholders = ','.join(['?'] * len(fixable_types))