Add Album Tag Consistency repair job: detect and fix inconsistent tags across album tracks

New maintenance job scans albums for tracks with mismatched album names, album
artist names, or MusicBrainz release IDs. These inconsistencies cause Navidrome
and other media servers to split one album into multiple entries. The fix
normalizes outlier tracks to the majority value by rewriting file tags.
pull/253/head
Broque Thomas 2 weeks ago
parent 68f06d663b
commit d75893bc30

@ -40,6 +40,7 @@ _JOB_MODULES = [
'core.repair_jobs.mbid_mismatch_detector',
'core.repair_jobs.single_album_dedup',
'core.repair_jobs.lossy_converter',
'core.repair_jobs.album_tag_consistency',
]

@ -0,0 +1,335 @@
"""Album Tag Consistency Job — finds albums where tracks have inconsistent tags.
When tracks in the same album have different artist names, album names, or
MusicBrainz release IDs, media servers like Navidrome split them into separate
albums. This job detects these inconsistencies and offers to fix them by
normalizing all tracks to the canonical (majority) value.
"""
import json
import os
from collections import Counter
from mutagen import File as MutagenFile
from mutagen.id3 import ID3
from mutagen.flac import FLAC
from mutagen.oggvorbis import OggVorbis
from mutagen.mp4 import MP4
from core.repair_jobs import register_job
from core.repair_jobs.base import JobContext, JobResult, RepairJob
from utils.logging_config import get_logger
logger = get_logger("repair_job.album_tag_consistency")
def _read_tag(audio, tag_name):
"""Read a tag value from a Mutagen file object, handling format differences."""
if audio is None:
return None
try:
if isinstance(audio.tags, ID3):
# MP3
if tag_name == 'album':
frame = audio.tags.get('TALB')
return str(frame) if frame else None
elif tag_name == 'artist':
frame = audio.tags.get('TPE1')
return str(frame) if frame else None
elif tag_name == 'albumartist':
frame = audio.tags.get('TPE2')
return str(frame) if frame else None
elif tag_name == 'musicbrainz_albumid':
for key in audio.tags:
if key.startswith('TXXX:') and 'MusicBrainz Album Id' in key:
return str(audio.tags[key])
return None
elif isinstance(audio, (FLAC, OggVorbis)):
vals = audio.get(tag_name.upper(), [])
return vals[0] if vals else None
elif isinstance(audio, MP4):
tag_map = {
'album': '\xa9alb',
'artist': '\xa9ART',
'albumartist': 'aART',
}
key = tag_map.get(tag_name)
if key:
vals = audio.get(key, [])
return vals[0] if vals else None
if tag_name == 'musicbrainz_albumid':
vals = audio.get('----:com.apple.iTunes:MusicBrainz Album Id', [])
if vals:
return vals[0].decode('utf-8') if isinstance(vals[0], bytes) else str(vals[0])
return None
except Exception:
pass
return None
def _write_tag(audio, tag_name, value):
"""Write a tag value to a Mutagen file object, handling format differences."""
if audio is None or value is None:
return False
try:
if isinstance(audio.tags, ID3):
from mutagen.id3 import TALB, TPE1, TPE2, TXXX
if tag_name == 'album':
audio.tags.delall('TALB')
audio.tags.add(TALB(encoding=3, text=[value]))
elif tag_name == 'artist':
audio.tags.delall('TPE1')
audio.tags.add(TPE1(encoding=3, text=[value]))
elif tag_name == 'albumartist':
audio.tags.delall('TPE2')
audio.tags.add(TPE2(encoding=3, text=[value]))
elif tag_name == 'musicbrainz_albumid':
# Remove existing
to_remove = [k for k in audio.tags if k.startswith('TXXX:') and 'MusicBrainz Album Id' in k]
for k in to_remove:
del audio.tags[k]
audio.tags.add(TXXX(encoding=3, desc='MusicBrainz Album Id', text=[value]))
return True
elif isinstance(audio, (FLAC, OggVorbis)):
audio[tag_name.upper()] = [value]
return True
elif isinstance(audio, MP4):
tag_map = {
'album': '\xa9alb',
'artist': '\xa9ART',
'albumartist': 'aART',
}
key = tag_map.get(tag_name)
if key:
audio[key] = [value]
return True
if tag_name == 'musicbrainz_albumid':
from mutagen.mp4 import MP4FreeForm
audio['----:com.apple.iTunes:MusicBrainz Album Id'] = [
MP4FreeForm(value.encode('utf-8'))
]
return True
except Exception as e:
logger.debug(f"Failed to write tag {tag_name}: {e}")
return False
@register_job
class AlbumTagConsistencyJob(RepairJob):
job_id = 'album_tag_consistency'
display_name = 'Album Tag Consistency'
description = 'Finds albums where tracks have inconsistent tags causing media server splits'
help_text = (
'Scans your library for albums where tracks have mismatched metadata — '
'different album names, artist names, or MusicBrainz release IDs across '
'tracks that belong to the same album.\n\n'
'These inconsistencies cause media servers like Navidrome to split one album '
'into multiple entries (e.g. "Simulation Theory" and "Simulation Theory (Super Deluxe)").\n\n'
'The fix normalizes all tracks in the album to the most common (majority) value, '
'then writes the corrected tags to the actual audio files.\n\n'
'Settings:\n'
'- Check album name: Detect inconsistent album title tags\n'
'- Check album artist: Detect inconsistent album artist tags\n'
'- Check MB release ID: Detect inconsistent MusicBrainz Album IDs'
)
icon = 'repair-icon-consistency'
default_enabled = False
default_interval_hours = 168 # Weekly
default_settings = {
'check_album_name': True,
'check_album_artist': True,
'check_mb_release_id': True,
}
auto_fix = False
def scan(self, context: JobContext) -> JobResult:
result = JobResult()
settings = self._get_settings(context)
check_album = settings.get('check_album_name', True)
check_artist = settings.get('check_album_artist', True)
check_mbid = settings.get('check_mb_release_id', True)
if not any([check_album, check_artist, check_mbid]):
return result
try:
conn = context.db._get_connection()
cursor = conn.cursor()
# Get all albums with 2+ tracks that have file paths
cursor.execute("""
SELECT al.id, al.title, ar.name as artist_name,
COUNT(t.id) as track_count
FROM albums al
JOIN artists ar ON ar.id = al.artist_id
JOIN tracks t ON t.album_id = al.id
WHERE t.file_path IS NOT NULL AND t.file_path != ''
GROUP BY al.id
HAVING COUNT(t.id) >= 2
ORDER BY ar.name, al.title
""")
albums = cursor.fetchall()
total = len(albums)
if context.report_progress:
context.report_progress(phase=f'Scanning {total} albums for tag consistency...', total=total)
for idx, album_row in enumerate(albums):
if context.check_stop():
break
if idx % 10 == 0 and context.wait_if_paused():
break
album_id = album_row['id']
album_title = album_row['title']
artist_name = album_row['artist_name']
result.scanned += 1
if context.report_progress and idx % 20 == 0:
context.report_progress(
scanned=idx + 1, total=total,
phase=f'Scanning {idx + 1} / {total}',
log_line=f'{artist_name}{album_title}',
log_type='info'
)
# Get all tracks in this album with file paths
cursor.execute("""
SELECT id, title, file_path FROM tracks
WHERE album_id = ? AND file_path IS NOT NULL AND file_path != ''
""", (album_id,))
tracks = cursor.fetchall()
if len(tracks) < 2:
continue
# Read tags from each file
tag_data = []
for track in tracks:
file_path = track['file_path']
# Resolve path
resolved = self._resolve_path(file_path, context)
if not resolved or not os.path.exists(resolved):
continue
try:
audio = MutagenFile(resolved, easy=False)
if audio is None:
continue
tag_data.append({
'track_id': track['id'],
'track_title': track['title'],
'file_path': file_path,
'resolved_path': resolved,
'album_tag': _read_tag(audio, 'album'),
'albumartist_tag': _read_tag(audio, 'albumartist'),
'mbid_tag': _read_tag(audio, 'musicbrainz_albumid'),
})
except Exception:
continue
if len(tag_data) < 2:
continue
# Check for inconsistencies
inconsistencies = []
if check_album:
album_values = [t['album_tag'] for t in tag_data if t['album_tag']]
if album_values and len(set(album_values)) > 1:
majority = Counter(album_values).most_common(1)[0][0]
outliers = [t for t in tag_data if t['album_tag'] and t['album_tag'] != majority]
inconsistencies.append({
'field': 'album',
'canonical': majority,
'variants': list(set(album_values)),
'outlier_count': len(outliers),
})
if check_artist:
artist_values = [t['albumartist_tag'] for t in tag_data if t['albumartist_tag']]
if artist_values and len(set(artist_values)) > 1:
majority = Counter(artist_values).most_common(1)[0][0]
outliers = [t for t in tag_data if t['albumartist_tag'] and t['albumartist_tag'] != majority]
inconsistencies.append({
'field': 'albumartist',
'canonical': majority,
'variants': list(set(artist_values)),
'outlier_count': len(outliers),
})
if check_mbid:
mbid_values = [t['mbid_tag'] for t in tag_data if t['mbid_tag']]
if mbid_values and len(set(mbid_values)) > 1:
majority = Counter(mbid_values).most_common(1)[0][0]
outliers = [t for t in tag_data if t['mbid_tag'] and t['mbid_tag'] != majority]
inconsistencies.append({
'field': 'musicbrainz_albumid',
'canonical': majority,
'variants': list(set(mbid_values)),
'outlier_count': len(outliers),
})
if inconsistencies:
fields_affected = ', '.join(i['field'] for i in inconsistencies)
total_outliers = sum(i['outlier_count'] for i in inconsistencies)
# Build description with specifics
desc_parts = []
for inc in inconsistencies:
variants_str = ' vs '.join(f'"{v}"' for v in inc['variants'][:3])
desc_parts.append(f"{inc['field']}: {variants_str}")
context.create_finding(
finding_type='album_tag_inconsistency',
severity='warning',
entity_type='album',
entity_id=str(album_id),
file_path=None,
title=f'Inconsistent tags: {album_title} by {artist_name}',
description=f'{total_outliers} track(s) have mismatched {fields_affected}. ' + '; '.join(desc_parts),
details={
'album_id': album_id,
'album_title': album_title,
'artist_name': artist_name,
'inconsistencies': inconsistencies,
'track_count': len(tag_data),
'tracks': [{'id': t['track_id'], 'title': t['track_title'],
'file_path': t['file_path']} for t in tag_data],
}
)
result.findings_created += 1
if context.report_progress:
context.report_progress(
log_line=f'Found: {album_title}{fields_affected}',
log_type='warning'
)
conn.close()
except Exception as e:
logger.error(f"Album tag consistency scan error: {e}")
result.errors += 1
return result
def _resolve_path(self, file_path, context):
"""Resolve a DB file path to an actual filesystem path."""
if not file_path:
return None
# Try as-is first
if os.path.exists(file_path):
return file_path
# Try relative to transfer folder
if context.transfer_folder:
joined = os.path.join(context.transfer_folder, file_path)
if os.path.exists(joined):
return joined
# Try with download path
download_path = context.config_manager.get('soulseek.download_path', '') if context.config_manager else ''
if download_path:
joined = os.path.join(download_path, file_path)
if os.path.exists(joined):
return joined
return None

@ -806,6 +806,7 @@ class RepairWorker:
'duplicate_tracks': self._fix_duplicates,
'single_album_redundant': self._fix_single_album_redundant,
'mbid_mismatch': self._fix_mbid_mismatch,
'album_tag_inconsistency': self._fix_album_tag_inconsistency,
'incomplete_album': self._fix_incomplete_album,
'path_mismatch': self._fix_path_mismatch,
'missing_lossy_copy': self._fix_missing_lossy_copy,
@ -1302,6 +1303,63 @@ class RepairWorker:
except Exception as e:
return {'success': False, 'error': f'Failed to remove MBID: {str(e)}'}
def _fix_album_tag_inconsistency(self, entity_type, entity_id, file_path, details):
"""Normalize inconsistent tags across all tracks in an album to the canonical (majority) value."""
inconsistencies = details.get('inconsistencies', [])
tracks = details.get('tracks', [])
if not inconsistencies or not tracks:
return {'success': False, 'error': 'No inconsistency data in finding'}
from mutagen import File as MutagenFile
from core.repair_jobs.album_tag_consistency import _read_tag, _write_tag
fixed_files = 0
errors = 0
changes = []
for inc in inconsistencies:
field = inc['field']
canonical = inc['canonical']
for track_info in tracks:
track_file = track_info.get('file_path', '')
if not track_file:
continue
# Resolve path
download_folder = None
if self._config_manager:
download_folder = self._config_manager.get('soulseek.download_path', '')
resolved = _resolve_file_path(track_file, self.transfer_folder, download_folder)
if not resolved or not os.path.exists(resolved):
continue
try:
audio = MutagenFile(resolved, easy=False)
if audio is None:
continue
current = _read_tag(audio, field)
if current and current != canonical:
if _write_tag(audio, field, canonical):
audio.save()
fixed_files += 1
changes.append(f'{field}: "{current}""{canonical}" in {os.path.basename(resolved)}')
except Exception as e:
logger.error(f"Error fixing tag consistency for {resolved}: {e}")
errors += 1
if fixed_files > 0:
return {
'success': True,
'action': 'normalized_tags',
'message': f'Fixed {fixed_files} file(s): {"; ".join(changes[:3])}{"..." if len(changes) > 3 else ""}',
}
elif errors > 0:
return {'success': False, 'error': f'Failed to fix {errors} file(s)'}
else:
return {'success': True, 'action': 'already_consistent', 'message': 'All tags already consistent'}
# --- Album Completeness Auto-Fill ---
@staticmethod
@ -2115,6 +2173,7 @@ class RepairWorker:
fixable_types = ('dead_file', 'orphan_file', 'track_number_mismatch',
'missing_cover_art', 'metadata_gap', 'duplicate_tracks',
'single_album_redundant', 'mbid_mismatch',
'album_tag_inconsistency',
'incomplete_album', 'path_mismatch',
'missing_lossy_copy')
placeholders = ','.join(['?'] * len(fixable_types))

Loading…
Cancel
Save