From cde5754f0fcac2749539ee2a02bd5c18bd5d73da Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Tue, 17 Mar 2026 22:56:24 -0700 Subject: [PATCH] Add Auto-Fill fix handler for incomplete album findings When the maintenance worker flags an incomplete album, users can now click "Auto-Fill" to automatically locate missing tracks in the library, move/copy them into the album folder, and apply full metadata enhancement (MusicBrainz, Deezer, cover art, etc.). Singles are moved; tracks from multi-track albums are copied. Quality gate prevents filling FLAC albums with lossy files. Tracks not found in library are added to wishlist with album context for auto-download. --- core/repair_jobs/album_completeness.py | 10 + core/repair_worker.py | 513 ++++++++++++++++++++++++- web_server.py | 1 + webui/static/script.js | 1 + 4 files changed, 524 insertions(+), 1 deletion(-) diff --git a/core/repair_jobs/album_completeness.py b/core/repair_jobs/album_completeness.py index 69686b60..05f4af79 100644 --- a/core/repair_jobs/album_completeness.py +++ b/core/repair_jobs/album_completeness.py @@ -125,10 +125,20 @@ class AlbumCompletenessJob(RepairJob): for item in api_tracks['items']: tn = item.get('track_number') if tn and tn not in owned_numbers: + # Extract artist names from Spotify track data + track_artists = [] + for a in item.get('artists', []): + if isinstance(a, dict): + track_artists.append(a.get('name', '')) + elif isinstance(a, str): + track_artists.append(a) missing_tracks.append({ 'track_number': tn, 'name': item.get('name', ''), 'disc_number': item.get('disc_number', 1), + 'spotify_track_id': item.get('id', ''), + 'duration_ms': item.get('duration_ms', 0), + 'artists': track_artists, }) except Exception as e: logger.debug("Error getting album tracks for %s: %s", spotify_album_id, e) diff --git a/core/repair_worker.py b/core/repair_worker.py index 0a4d871e..3455404b 100644 --- a/core/repair_worker.py +++ b/core/repair_worker.py @@ -9,8 +9,11 @@ The worker is deactivated by default — the user must explicitly enable it. import json import os +import re +import shutil import threading import time +from difflib import SequenceMatcher from datetime import datetime, timedelta from typing import Any, Dict, List, Optional, Tuple @@ -105,6 +108,9 @@ class RepairWorker: self._acoustid_client = None self._metadata_cache = None + # Metadata enhancement callback (injected from web_server.py) + self._enhance_file_metadata = None + logger.info("Repair worker initialized (transfer_folder=%s)", self.transfer_folder) # ------------------------------------------------------------------ @@ -129,6 +135,14 @@ class RepairWorker: if config_manager: self.enabled = config_manager.get('repair.master_enabled', True) + def set_metadata_enhancer(self, enhance_fn): + """Inject the metadata enhancement function from web_server.py. + + This is _enhance_file_metadata(file_path, context, artist, album_info) + which handles full tag writing, source ID embedding, cover art, etc. + """ + self._enhance_file_metadata = enhance_fn + # ------------------------------------------------------------------ # Lazy client accessors # ------------------------------------------------------------------ @@ -781,6 +795,7 @@ class RepairWorker: 'duplicate_tracks': self._fix_duplicates, 'single_album_redundant': self._fix_single_album_redundant, 'mbid_mismatch': self._fix_mbid_mismatch, + 'incomplete_album': self._fix_incomplete_album, } handler = handlers.get(finding_type) if not handler: @@ -1089,6 +1104,501 @@ class RepairWorker: except Exception as e: return {'success': False, 'error': f'Failed to remove MBID: {str(e)}'} + # --- Album Completeness Auto-Fill --- + + @staticmethod + def _quality_score(file_path, bitrate): + """Return numeric quality score from file extension + bitrate. + + Lossless formats (FLAC/WAV/ALAC/AIFF) → 9999. + Lossy → bitrate value (e.g. 320 for MP3-320). + """ + ext = os.path.splitext(file_path or '')[1].lstrip('.').upper() if file_path else '' + if ext in ('FLAC', 'WAV', 'ALAC', 'AIFF', 'AIF'): + return 9999 + br = bitrate or 0 + try: + return int(str(br).replace('k', '').replace('K', '').strip()) + except (ValueError, TypeError): + return 0 + + @staticmethod + def _detect_filename_pattern(file_paths): + """Detect naming convention from existing track filenames. + + Returns a format string like '{num:02d} - {title}' or '{num} {title}'. + """ + patterns_found = {'dash': 0, 'dot': 0, 'space': 0, 'none': 0} + zero_padded = 0 + total = 0 + + for fp in file_paths: + if not fp: + continue + basename = os.path.splitext(os.path.basename(fp))[0] + total += 1 + # Check for leading number patterns + m = re.match(r'^(\d+)\s*[-–—]\s*(.+)', basename) + if m: + patterns_found['dash'] += 1 + if m.group(1).startswith('0'): + zero_padded += 1 + continue + m = re.match(r'^(\d+)\.\s*(.+)', basename) + if m: + patterns_found['dot'] += 1 + if m.group(1).startswith('0'): + zero_padded += 1 + continue + m = re.match(r'^(\d+)\s+(.+)', basename) + if m: + patterns_found['space'] += 1 + if m.group(1).startswith('0'): + zero_padded += 1 + continue + patterns_found['none'] += 1 + + pad = zero_padded > total / 2 if total else True + num_fmt = '{num:02d}' if pad else '{num}' + + best = max(patterns_found, key=patterns_found.get) + if best == 'dash': + return num_fmt + ' - {title}' + elif best == 'dot': + return num_fmt + '. {title}' + elif best == 'space': + return num_fmt + ' {title}' + # Default + return '{num:02d} - {title}' + + def _fix_incomplete_album(self, entity_type, entity_id, file_path, details): + """Auto-fill an incomplete album by finding missing tracks in the library. + + For each missing track: + 1. Search library for matching tracks + 2. Quality gate — candidate must meet album's minimum quality + 3. Single source (1-track album) → MOVE file; multi-track → COPY + 4. Retag the file with correct album metadata + 5. If no candidate found or quality too low → add to wishlist + """ + album_id = details.get('album_id') + missing_tracks = details.get('missing_tracks', []) + album_title = details.get('album_title', 'Unknown Album') + artist_name = details.get('artist', 'Unknown Artist') + spotify_album_id = details.get('spotify_album_id', '') + + if not album_id or not missing_tracks: + return {'success': False, 'error': 'Missing album_id or missing_tracks in finding details'} + + # Phase 1: Gather context from existing album tracks + existing_tracks = self.db.get_tracks_by_album(int(album_id)) + if not existing_tracks: + return {'success': False, 'error': 'No existing tracks found for this album — cannot determine album folder or quality'} + + # Compute quality floor from existing tracks + quality_scores = [self._quality_score(t.file_path, t.bitrate) for t in existing_tracks] + album_quality_floor = min(quality_scores) if quality_scores else 0 + + # Infer album folder from existing track file paths + download_folder = None + if self._config_manager: + download_folder = self._config_manager.get('soulseek.download_path', '') + + album_folder = None + for t in existing_tracks: + resolved = _resolve_file_path(t.file_path, self.transfer_folder, download_folder) + if resolved and os.path.exists(resolved): + album_folder = os.path.dirname(resolved) + break + + if not album_folder: + return {'success': False, 'error': 'Could not determine album folder from existing tracks'} + + # Detect filename pattern + resolved_paths = [] + for t in existing_tracks: + rp = _resolve_file_path(t.file_path, self.transfer_folder, download_folder) + if rp: + resolved_paths.append(rp) + filename_pattern = self._detect_filename_pattern(resolved_paths) + + # Phase 2-4: Process each missing track + fixed_count = 0 + wishlisted_count = 0 + skipped_count = 0 + track_details = [] + existing_track_ids = {t.id for t in existing_tracks} + + for mt in missing_tracks: + track_name = mt.get('name', '') + track_number = mt.get('track_number', 0) + disc_number = mt.get('disc_number', 1) + track_artists = mt.get('artists', []) + spotify_track_id = mt.get('spotify_track_id', '') + artist_search = track_artists[0] if track_artists else artist_name + + if not track_name: + skipped_count += 1 + track_details.append({'track': track_name, 'status': 'skipped', 'reason': 'no track name'}) + continue + + # Search library for this track + candidates = self.db.search_tracks(title=track_name, artist=artist_search, limit=20) + + # Filter: exclude tracks already in target album, require title similarity + best_candidate = None + best_score = -1 + + for cand in candidates: + if cand.id in existing_track_ids: + continue + if cand.album_id == int(album_id): + continue + + # Fuzzy title match + title_sim = SequenceMatcher(None, track_name.lower(), cand.title.lower()).ratio() + if title_sim < 0.70: + continue + + # Artist match (more lenient) + cand_artist = getattr(cand, 'artist_name', '') or '' + artist_sim = SequenceMatcher(None, artist_search.lower(), cand_artist.lower()).ratio() + if artist_sim < 0.50: + continue + + # Quality gate + cand_quality = self._quality_score(cand.file_path, cand.bitrate) + if cand_quality < album_quality_floor: + continue + + # Score: prefer higher quality, then better title match + score = cand_quality * 1000 + title_sim * 100 + if score > best_score: + best_score = score + best_candidate = cand + + if best_candidate: + # Phase 3: File operation + result = self._perform_album_fill( + best_candidate, album_id, album_title, artist_name, + track_name, track_number, disc_number, + album_folder, filename_pattern, download_folder + ) + if result.get('success'): + fixed_count += 1 + track_details.append({ + 'track': track_name, + 'status': 'fixed', + 'action': result.get('action', ''), + 'message': result.get('message', '') + }) + # Add the candidate ID to existing so we don't reuse it + existing_track_ids.add(best_candidate.id) + continue + else: + # File operation failed — fall through to wishlist + logger.warning("File operation failed for '%s': %s", track_name, result.get('error')) + + # Phase 4: Wishlist fallback + if spotify_track_id: + try: + wishlist_data = { + 'id': spotify_track_id, + 'name': track_name, + 'artists': [{'name': a} for a in track_artists] if track_artists else [{'name': artist_name}], + 'album': {'name': album_title}, + 'duration_ms': mt.get('duration_ms', 0), + } + source_info = { + 'album_title': album_title, + 'artist': artist_name, + 'track_number': track_number, + 'spotify_album_id': spotify_album_id, + 'reason': 'album_completeness_auto_fill', + } + self.db.add_to_wishlist( + wishlist_data, + failure_reason='Missing from incomplete album', + source_type='album', + source_info=source_info, + ) + wishlisted_count += 1 + track_details.append({ + 'track': track_name, + 'status': 'wishlisted', + 'reason': 'no suitable candidate in library' if not best_candidate else 'quality too low' + }) + except Exception as e: + logger.debug("Failed to add '%s' to wishlist: %s", track_name, e) + skipped_count += 1 + track_details.append({'track': track_name, 'status': 'skipped', 'reason': f'wishlist error: {e}'}) + else: + skipped_count += 1 + track_details.append({'track': track_name, 'status': 'skipped', 'reason': 'no spotify_track_id for wishlist'}) + + # Build result message + parts = [] + if fixed_count: + parts.append(f'{fixed_count} track(s) filled') + if wishlisted_count: + parts.append(f'{wishlisted_count} added to wishlist') + if skipped_count: + parts.append(f'{skipped_count} skipped') + message = f'Album "{album_title}": ' + ', '.join(parts) if parts else 'No tracks processed' + + success = fixed_count > 0 or wishlisted_count > 0 + return { + 'success': success, + 'action': 'auto_fill_album', + 'message': message, + 'fixed': fixed_count, + 'wishlisted': wishlisted_count, + 'skipped': skipped_count, + 'details': track_details, + } + + def _perform_album_fill(self, candidate, album_id, album_title, artist_name, + track_name, track_number, disc_number, + album_folder, filename_pattern, download_folder): + """Move or copy a candidate track into the album folder and update DB.""" + try: + # Resolve source file + src_path = _resolve_file_path(candidate.file_path, self.transfer_folder, download_folder) + if not src_path or not os.path.exists(src_path): + return {'success': False, 'error': f'Source file not found: {candidate.file_path}'} + + # Determine source type: single (1-track album) vs multi-track + source_album_tracks = self.db.get_tracks_by_album(candidate.album_id) + is_single_source = len(source_album_tracks) <= 1 + + # Build target filename + src_ext = os.path.splitext(src_path)[1] # e.g. '.flac' + # Sanitize title for filesystem + safe_title = re.sub(r'[<>:"/\\|?*]', '', track_name).strip() + target_name = filename_pattern.format(num=track_number, title=safe_title) + src_ext + target_path = os.path.join(album_folder, target_name) + + # Avoid overwriting existing files + if os.path.exists(target_path): + return {'success': False, 'error': f'Target file already exists: {target_path}'} + + # Ensure album folder exists + os.makedirs(album_folder, exist_ok=True) + + conn = None + try: + if is_single_source: + # MOVE: relocate file and update DB record + shutil.move(src_path, target_path) + action = 'moved' + + # Update existing DB record to point to new album and path + conn = self.db._get_connection() + cursor = conn.cursor() + # Get the target album's artist_id for consistency + cursor.execute("SELECT artist_id FROM tracks WHERE album_id = ? LIMIT 1", (album_id,)) + artist_row = cursor.fetchone() + target_artist_id = artist_row[0] if artist_row else candidate.artist_id + cursor.execute(""" + UPDATE tracks + SET album_id = ?, artist_id = ?, title = ?, + file_path = ?, track_number = ?, + updated_at = CURRENT_TIMESTAMP + WHERE id = ? + """, (album_id, target_artist_id, track_name, + target_path, track_number, candidate.id)) + + # Clean up the source single's album if it's now empty + cursor.execute("SELECT COUNT(*) FROM tracks WHERE album_id = ?", (candidate.album_id,)) + remaining = cursor.fetchone()[0] + if remaining == 0: + cursor.execute("DELETE FROM albums WHERE id = ?", (candidate.album_id,)) + + conn.commit() + + # Clean up empty source directories + self._cleanup_empty_dirs(os.path.dirname(src_path)) + else: + # COPY: duplicate file, create new DB record + shutil.copy2(src_path, target_path) + action = 'copied' + + conn = self.db._get_connection() + cursor = conn.cursor() + # Get artist_id from existing album tracks + cursor.execute("SELECT artist_id FROM tracks WHERE album_id = ? LIMIT 1", (album_id,)) + artist_row = cursor.fetchone() + target_artist_id = artist_row[0] if artist_row else candidate.artist_id + + cursor.execute(""" + INSERT INTO tracks (album_id, artist_id, title, track_number, duration, + file_path, bitrate, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """, (album_id, target_artist_id, track_name, track_number, + candidate.duration, target_path, candidate.bitrate)) + conn.commit() + + finally: + if conn: + conn.close() + + # Enhance the file with full metadata pipeline (same as fresh downloads) + # Clears existing tags, writes standard + source IDs, embeds cover art + self._enhance_placed_track( + target_path, album_id, album_title, artist_name, + track_name, track_number, disc_number + ) + + return { + 'success': True, + 'action': action, + 'message': f'{action.title()} "{track_name}" from {"single" if is_single_source else "compilation"}' + } + + except Exception as e: + logger.error("Error filling track '%s': %s", track_name, e, exc_info=True) + return {'success': False, 'error': str(e)} + + def _cleanup_empty_dirs(self, directory): + """Remove empty parent directories up to 3 levels, never removing transfer folder.""" + if not directory: + return + transfer_norm = os.path.normpath(self.transfer_folder) + parent = directory + for _ in range(3): + if (parent and os.path.isdir(parent) + and os.path.normpath(parent) != transfer_norm + and not os.listdir(parent)): + try: + os.rmdir(parent) + except OSError: + break + parent = os.path.dirname(parent) + else: + break + + def _enhance_placed_track(self, file_path, album_id, album_title, artist_name, + track_name, track_number, disc_number): + """Run full metadata enhancement on a placed track. + + Uses the injected _enhance_file_metadata from web_server.py (same pipeline + as fresh downloads) — clears tags, writes standard metadata, embeds source + IDs from MusicBrainz/Deezer/etc., and embeds cover art. + + Falls back to basic tag_writer if the enhancer isn't available. + """ + # Fetch album metadata from DB for building synthetic context + album_year = None + album_genres = [] + album_thumb = None + album_track_count = None + spotify_album_id = None + conn_meta = None + try: + conn_meta = self.db._get_connection() + cursor_meta = conn_meta.cursor() + cursor_meta.execute( + "SELECT year, genres, thumb_url, track_count, spotify_album_id FROM albums WHERE id = ?", + (album_id,) + ) + album_row = cursor_meta.fetchone() + if album_row: + album_year = album_row[0] + if album_row[1]: + try: + parsed = json.loads(album_row[1]) + if isinstance(parsed, list): + album_genres = parsed + except (json.JSONDecodeError, TypeError): + pass + album_thumb = album_row[2] + album_track_count = album_row[3] + spotify_album_id = album_row[4] if len(album_row) > 4 else None + except Exception: + pass + finally: + if conn_meta: + conn_meta.close() + + # Try full enhancement pipeline if available AND enabled in config + # _enhance_file_metadata returns True without writing when enhancement is disabled, + # so we must check the config ourselves to avoid skipping the basic fallback + enhancement_enabled = ( + self._enhance_file_metadata is not None + and self._config_manager + and self._config_manager.get('metadata_enhancement.enabled', True) + ) + if enhancement_enabled: + try: + # Build synthetic context dicts (same pattern as _execute_retag in web_server.py) + context = { + 'original_search_result': { + 'spotify_clean_title': track_name, + 'title': track_name, + 'disc_number': disc_number, + 'artists': [{'name': artist_name}], + }, + 'spotify_album': { + 'id': spotify_album_id or '', + 'name': album_title, + 'release_date': str(album_year) if album_year else '', + 'total_tracks': album_track_count or 1, + 'image_url': album_thumb or '', + }, + 'track_info': { + 'id': '', # No specific track ID available + }, + } + artist = { + 'name': artist_name, + 'id': '', + 'genres': album_genres[:2] if album_genres else [], + } + album_info = { + 'is_album': True, + 'album_name': album_title, + 'track_number': track_number, + 'total_tracks': album_track_count or 1, + 'disc_number': disc_number, + 'clean_track_name': track_name, + 'album_image_url': album_thumb or '', + } + + result = self._enhance_file_metadata(file_path, context, artist, album_info) + if result: + logger.info("Full metadata enhancement applied to '%s'", track_name) + return + else: + logger.warning("Full enhancement returned False for '%s', falling back to basic tags", track_name) + except Exception as e: + logger.warning("Full enhancement failed for '%s': %s — falling back to basic tags", track_name, e) + + # Fallback: basic tag writer (title, artist, album, track#, disc#, year, genre, cover art) + # Used when: enhancer not injected, metadata enhancement disabled, or enhancer failed + try: + from core.tag_writer import write_tags_to_file + tag_data = { + 'title': track_name, + 'artist': artist_name, + 'album_artist': artist_name, + 'album': album_title, + 'track_number': track_number, + 'disc_number': disc_number, + } + if album_year: + tag_data['year'] = album_year + if album_genres: + tag_data['genre'] = ', '.join(album_genres[:5]) + if album_track_count: + tag_data['total_tracks'] = album_track_count + + write_tags_to_file(file_path, tag_data, + embed_cover=bool(album_thumb), + cover_url=album_thumb) + logger.info("Basic tag enhancement applied to '%s'", track_name) + except Exception as e: + logger.warning("Retagging failed for '%s' (file still placed): %s", file_path, e) + def dismiss_finding(self, finding_id: int) -> bool: """Dismiss a finding.""" conn = None @@ -1120,7 +1630,8 @@ class RepairWorker: # Build query for pending fixable findings fixable_types = ('dead_file', 'orphan_file', 'track_number_mismatch', - 'missing_cover_art', 'metadata_gap', 'duplicate_tracks', 'mbid_mismatch') + 'missing_cover_art', 'metadata_gap', 'duplicate_tracks', 'mbid_mismatch', + 'incomplete_album') placeholders = ','.join(['?'] * len(fixable_types)) where_parts = [f"finding_type IN ({placeholders})", "status = 'pending'"] params = list(fixable_types) diff --git a/web_server.py b/web_server.py index d5cfb994..4a86522f 100644 --- a/web_server.py +++ b/web_server.py @@ -39796,6 +39796,7 @@ try: transfer_path = docker_resolve_path(config_manager.get('soulseek.transfer_path', './Transfer')) repair_worker = RepairWorker(database=repair_db, transfer_folder=transfer_path) repair_worker.set_config_manager(config_manager) + repair_worker.set_metadata_enhancer(_enhance_file_metadata) # --- Repair Job Progress Tracking (live progress like automation cards) --- repair_job_progress_states = {} # job_id (str) -> state dict diff --git a/webui/static/script.js b/webui/static/script.js index 09d18092..b9e645a2 100644 --- a/webui/static/script.js +++ b/webui/static/script.js @@ -52659,6 +52659,7 @@ async function loadRepairFindings() { missing_cover_art: 'Apply Art', metadata_gap: 'Apply', duplicate_tracks: 'Keep Best', + incomplete_album: 'Auto-Fill', }; container.innerHTML = items.map(f => {