From 4dba3757be56a2372383b832949139a2ca1eb386 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Sat, 21 Mar 2026 11:27:51 -0700 Subject: [PATCH] Fix orphan detector false positives and add staging/delete choice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Orphan detector: add normalized tag matching that strips parentheticals and brackets (feat. X, [FLAC 16bit], etc.) and tries first-artist-only for comma-separated artists. Prevents false orphan flags for tracks like "The Mountain (feat. Dennis Hopper...)" that exist in DB as "The Mountain". All lookups remain O(1) set operations. Orphan fix: replace auto-delete with user choice prompt. Single Fix and Fix All both show modal asking "Move to Staging" or "Delete". Move to Staging relocates file to import staging folder for proper re-import with metadata matching. Fix action flows through API endpoint → repair_worker.fix_finding → _fix_orphan_file handler. Staging path uses docker_resolve_path for container compatibility. --- core/repair_jobs/orphan_file_detector.py | 36 ++++++-- core/repair_worker.py | 102 ++++++++++++++++++----- web_server.py | 8 +- webui/static/script.js | 77 ++++++++++++++++- 4 files changed, 191 insertions(+), 32 deletions(-) diff --git a/core/repair_jobs/orphan_file_detector.py b/core/repair_jobs/orphan_file_detector.py index 6a9e1559..875b7665 100644 --- a/core/repair_jobs/orphan_file_detector.py +++ b/core/repair_jobs/orphan_file_detector.py @@ -1,6 +1,7 @@ """Orphan File Detector Job — finds files in the transfer folder not tracked in the DB.""" import os +import re import time from core.repair_jobs import register_job @@ -46,8 +47,14 @@ class OrphanFileDetectorJob(RepairJob): # of depth 1-3 (filename, album/filename, artist/album/filename) which # covers all realistic path-prefix mismatches. known_suffixes = set() - known_titles = set() # (title_lower, artist_lower) for tag-based fallback + known_titles = set() # (title_lower, artist_lower) for exact match + known_titles_clean = set() # (clean_title, clean_artist) for normalized match conn = None + + def _strip_extras(s): + """Strip parentheticals/brackets for normalized comparison.""" + return re.sub(r'\s*[\(\[][^)\]]*[\)\]]', '', s).strip() + try: conn = context.db._get_connection() cursor = conn.cursor() @@ -59,7 +66,7 @@ class OrphanFileDetectorJob(RepairJob): suffix = '/'.join(parts[-depth:]).lower() known_suffixes.add(suffix) - # Build title+artist set for tag-based fallback matching + # Build title+artist sets for tag-based fallback matching cursor.execute(""" SELECT t.title, ar.name FROM tracks t LEFT JOIN artists ar ON ar.id = t.artist_id @@ -70,6 +77,11 @@ class OrphanFileDetectorJob(RepairJob): artist = (row[1] or '').lower().strip() if title: known_titles.add((title, artist)) + # Also store normalized version (stripped of feat., parentheticals, etc.) + clean_t = _strip_extras(title) + clean_a = _strip_extras(artist) + if clean_t: + known_titles_clean.add((clean_t, clean_a)) except Exception as e: logger.error("Error reading known file paths from DB: %s", e, exc_info=True) result.errors += 1 @@ -122,7 +134,8 @@ class OrphanFileDetectorJob(RepairJob): break # Fallback: read file tags and check if title+artist exists in DB - # Catches path mismatches where the file is tracked but under a different path + # Catches path mismatches where the file is tracked but under a different path. + # Uses both exact and normalized comparison to handle feat. suffixes, etc. if not is_known and known_titles: try: from mutagen import File as MutagenFile @@ -130,8 +143,21 @@ class OrphanFileDetectorJob(RepairJob): if audio: file_title = ((audio.get('title') or [None])[0] or '').lower().strip() file_artist = ((audio.get('artist') or [None])[0] or '').lower().strip() - if file_title and (file_title, file_artist) in known_titles: - is_known = True + if file_title: + # Exact match first (fast path) + if (file_title, file_artist) in known_titles: + is_known = True + else: + # Normalized match: strip (feat. X), [FLAC 16bit], etc. + clean_title = _strip_extras(file_title) + clean_artist = _strip_extras(file_artist) + # Also try first artist only (handles "Gorillaz, Dennis Hopper" → "Gorillaz") + first_artist = clean_artist.split(',')[0].strip() if clean_artist else '' + if clean_title and ( + (clean_title, clean_artist) in known_titles_clean or + (first_artist and (clean_title, first_artist) in known_titles_clean) + ): + is_known = True except Exception: pass diff --git a/core/repair_worker.py b/core/repair_worker.py index 62d7aad4..3b0fe74f 100644 --- a/core/repair_worker.py +++ b/core/repair_worker.py @@ -749,8 +749,13 @@ class RepairWorker: if conn: conn.close() - def fix_finding(self, finding_id: int) -> dict: - """Execute the appropriate fix action for a finding, then mark it resolved.""" + def fix_finding(self, finding_id: int, fix_action: str = None) -> dict: + """Execute the appropriate fix action for a finding, then mark it resolved. + + Args: + finding_id: ID of the finding to fix + fix_action: Optional action override (e.g. 'staging' or 'delete' for orphan files) + """ conn = None try: conn = self.db._get_connection() @@ -769,6 +774,10 @@ class RepairWorker: conn.close() conn = None + # Pass fix_action through to handler via details + if fix_action: + details['_fix_action'] = fix_action + # Dispatch fix by finding type result = self._execute_fix(finding_type, entity_type, entity_id, file_path, details) @@ -902,9 +911,21 @@ class RepairWorker: conn.close() def _fix_orphan_file(self, entity_type, entity_id, file_path, details): - """Delete the orphan file from disk.""" + """Handle an orphan file — move to staging or delete based on user choice. + + The fix_action is passed via details['_fix_action']: + 'staging' — move file to the staging folder for import + 'delete' — delete file from disk + If no action specified, returns an error asking the user to choose. + """ + fix_action = details.get('_fix_action', '') + if fix_action not in ('staging', 'delete'): + return {'success': False, 'error': 'Please choose an action: move to staging or delete', + 'needs_action': True} + if not file_path: return {'success': False, 'error': 'No file path associated with this finding'} + try: # Resolve path in case of cross-environment mismatch download_folder = None @@ -912,25 +933,60 @@ class RepairWorker: download_folder = self._config_manager.get('soulseek.download_path', '') resolved = _resolve_file_path(file_path, self.transfer_folder, download_folder) or file_path - if os.path.exists(resolved): + if not os.path.exists(resolved): + return {'success': True, 'action': 'already_gone', + 'message': 'File was already removed'} + + if fix_action == 'staging': + # Move to staging folder + staging_path = './Staging' + if self._config_manager: + staging_path = self._config_manager.get('import.staging_path', './Staging') + staging_path = self._resolve_path(staging_path) + os.makedirs(staging_path, exist_ok=True) + + dest = os.path.join(staging_path, os.path.basename(resolved)) + # Avoid overwriting existing files in staging + if os.path.exists(dest): + base, ext = os.path.splitext(os.path.basename(resolved)) + counter = 1 + while os.path.exists(dest): + dest = os.path.join(staging_path, f"{base} ({counter}){ext}") + counter += 1 + + import shutil + shutil.move(resolved, dest) + + # Clean up empty parent directories + self._cleanup_empty_parents(resolved) + + return {'success': True, 'action': 'moved_to_staging', + 'message': f'Moved to staging folder for import'} + + elif fix_action == 'delete': os.remove(resolved) - # Clean up empty parent directories (never remove transfer folder itself) - transfer_norm = os.path.normpath(self.transfer_folder) - parent = os.path.dirname(resolved) - for _ in range(3): # Up to 3 levels (track/album/artist) - if (parent and os.path.isdir(parent) - and os.path.normpath(parent) != transfer_norm - and not os.listdir(parent)): - os.rmdir(parent) - parent = os.path.dirname(parent) - else: - break + self._cleanup_empty_parents(resolved) return {'success': True, 'action': 'deleted_file', 'message': 'Deleted orphan file from disk'} - return {'success': True, 'action': 'already_gone', - 'message': 'File was already removed'} + except OSError as e: - return {'success': False, 'error': f'Failed to delete file: {e}'} + return {'success': False, 'error': f'Failed to handle orphan file: {e}'} + + def _cleanup_empty_parents(self, file_path): + """Remove empty parent directories up to 3 levels, never removing the transfer folder.""" + try: + transfer_norm = os.path.normpath(self.transfer_folder) + parent = os.path.dirname(file_path) + for _ in range(3): + if (parent and os.path.isdir(parent) + and os.path.normpath(parent) != transfer_norm + and not os.listdir(parent)): + os.rmdir(parent) + parent = os.path.dirname(parent) + else: + break + except OSError: + pass def _fix_track_number(self, entity_type, entity_id, file_path, details): """Fix track number in file tags, rename file, and update DB.""" @@ -1874,8 +1930,12 @@ class RepairWorker: conn.close() def bulk_fix_findings(self, job_id: str = None, severity: str = None, - finding_ids: List[int] = None) -> dict: - """Fix all pending fixable findings matching filters. Returns {fixed, failed, skipped}.""" + finding_ids: List[int] = None, fix_action: str = None) -> dict: + """Fix all pending fixable findings matching filters. Returns {fixed, failed, skipped}. + + Args: + fix_action: Optional action for findings that need user choice (e.g. orphan files) + """ conn = None try: conn = self.db._get_connection() @@ -1910,7 +1970,7 @@ class RepairWorker: fixed = 0 failed = 0 for fid in ids_to_fix: - result = self.fix_finding(fid) + result = self.fix_finding(fid, fix_action=fix_action) if result.get('success'): fixed += 1 else: diff --git a/web_server.py b/web_server.py index 04978989..3cf03025 100644 --- a/web_server.py +++ b/web_server.py @@ -41622,7 +41622,9 @@ def repair_finding_fix(finding_id): if repair_worker is None: return jsonify({'error': 'Repair worker not initialized'}), 400 - result = repair_worker.fix_finding(finding_id) + data = request.get_json(silent=True) or {} + fix_action = data.get('fix_action') # e.g. 'staging' or 'delete' for orphan files + result = repair_worker.fix_finding(finding_id, fix_action=fix_action) return jsonify(result), 200 if result.get('success') else 400 except Exception as e: logger.error(f"Error fixing finding {finding_id}: {e}") @@ -41667,9 +41669,11 @@ def repair_findings_bulk_fix(): job_id = data.get('job_id') or None severity = data.get('severity') or None finding_ids = data.get('ids') or None + fix_action = data.get('fix_action') or None result = repair_worker.bulk_fix_findings( - job_id=job_id, severity=severity, finding_ids=finding_ids + job_id=job_id, severity=severity, finding_ids=finding_ids, + fix_action=fix_action ) return jsonify({ 'success': True, diff --git a/webui/static/script.js b/webui/static/script.js index 97c4a815..73f25559 100644 --- a/webui/static/script.js +++ b/webui/static/script.js @@ -53854,7 +53854,7 @@ async function loadRepairFindings() {
${f.status === 'pending' ? ` - ${fixLabel ? `` : ''} + ${fixLabel ? `` : ''} ` : ''} @@ -54221,12 +54221,20 @@ async function fixAllMatchingFindings() { })) return; } + // If fixing orphan files, prompt for action + let fixAction = null; + if (jobId === 'orphan_file_detector') { + fixAction = await _promptOrphanAction(); + if (!fixAction) return; + } + showToast(`Fixing ${_repairFindingsTotal} findings...`, 'info'); try { const body = {}; if (jobId) body.job_id = jobId; if (severity) body.severity = severity; + if (fixAction) body.fix_action = fixAction; const response = await fetch('/api/repair/findings/bulk-fix', { method: 'POST', @@ -54287,7 +54295,14 @@ function renderRepairFindingsPagination(total, currentPage) { container.innerHTML = html; } -async function fixRepairFinding(id) { +async function fixRepairFinding(id, findingType) { + // Orphan files require user to choose an action + let fixAction = null; + if (findingType === 'orphan_file') { + fixAction = await _promptOrphanAction(); + if (!fixAction) return; // User cancelled + } + const card = document.querySelector(`.repair-finding-card[data-id="${id}"]`); const fixBtn = card ? card.querySelector('.repair-finding-btn.fix') : null; let originalText = ''; @@ -54297,7 +54312,12 @@ async function fixRepairFinding(id) { fixBtn.textContent = '...'; } try { - const response = await fetch(`/api/repair/findings/${id}/fix`, { method: 'POST' }); + const body = fixAction ? { fix_action: fixAction } : {}; + const response = await fetch(`/api/repair/findings/${id}/fix`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }); const result = await response.json(); if (result.success) { showToast(result.message || 'Fixed successfully', 'success'); @@ -54317,6 +54337,39 @@ async function fixRepairFinding(id) { } } +function _promptOrphanAction() { + return new Promise(resolve => { + const overlay = document.createElement('div'); + overlay.className = 'modal-overlay'; + overlay.style.cssText = 'display:flex;align-items:center;justify-content:center;z-index:10000;'; + overlay.innerHTML = ` +
+
Orphan File Action
+
+ What would you like to do with this file? +
+
+ + +
+ +
+ `; + document.body.appendChild(overlay); + + overlay.querySelector('#_orphan-staging').onclick = () => { overlay.remove(); resolve('staging'); }; + overlay.querySelector('#_orphan-delete').onclick = () => { overlay.remove(); resolve('delete'); }; + overlay.querySelector('#_orphan-cancel').onclick = () => { overlay.remove(); resolve(null); }; + overlay.onclick = (e) => { if (e.target === overlay) { overlay.remove(); resolve(null); } }; + }); +} + async function resolveRepairFinding(id) { try { await fetch(`/api/repair/findings/${id}/resolve`, { method: 'POST' }); @@ -54375,12 +54428,28 @@ async function bulkFixFindings() { if (hasMassFlag && !await showWitnessMeDialog(selectedOrphanCards.length)) return; } + // If any selected findings are orphan files, prompt for action once + let orphanFixAction = null; + if (selectedOrphanCards.length > 0) { + orphanFixAction = await _promptOrphanAction(); + if (!orphanFixAction) return; + } + let fixed = 0, failed = 0; showToast(`Fixing ${ids.length} findings...`, 'info'); for (const id of ids) { try { - const response = await fetch(`/api/repair/findings/${id}/fix`, { method: 'POST' }); + // Determine if this finding is an orphan that needs the action + const card = document.querySelector(`.repair-finding-card[data-id="${id}"]`); + const isOrphan = card && card.dataset.jobId === 'orphan_file_detector'; + const body = isOrphan && orphanFixAction ? { fix_action: orphanFixAction } : {}; + + const response = await fetch(`/api/repair/findings/${id}/fix`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }); const result = await response.json(); if (result.success) fixed++; else failed++;