Fix orphan detector false positives and add staging/delete choice

Orphan detector: add normalized tag matching that strips parentheticals
and brackets (feat. X, [FLAC 16bit], etc.) and tries first-artist-only
for comma-separated artists. Prevents false orphan flags for tracks
like "The Mountain (feat. Dennis Hopper...)" that exist in DB as
"The Mountain". All lookups remain O(1) set operations.

Orphan fix: replace auto-delete with user choice prompt. Single Fix
and Fix All both show modal asking "Move to Staging" or "Delete".
Move to Staging relocates file to import staging folder for proper
re-import with metadata matching. Fix action flows through API
endpoint → repair_worker.fix_finding → _fix_orphan_file handler.
Staging path uses docker_resolve_path for container compatibility.
pull/253/head
Broque Thomas 2 months ago
parent f991d02f0c
commit 4dba3757be

@ -1,6 +1,7 @@
"""Orphan File Detector Job — finds files in the transfer folder not tracked in the DB."""
import os
import re
import time
from core.repair_jobs import register_job
@ -46,8 +47,14 @@ class OrphanFileDetectorJob(RepairJob):
# of depth 1-3 (filename, album/filename, artist/album/filename) which
# covers all realistic path-prefix mismatches.
known_suffixes = set()
known_titles = set() # (title_lower, artist_lower) for tag-based fallback
known_titles = set() # (title_lower, artist_lower) for exact match
known_titles_clean = set() # (clean_title, clean_artist) for normalized match
conn = None
def _strip_extras(s):
"""Strip parentheticals/brackets for normalized comparison."""
return re.sub(r'\s*[\(\[][^)\]]*[\)\]]', '', s).strip()
try:
conn = context.db._get_connection()
cursor = conn.cursor()
@ -59,7 +66,7 @@ class OrphanFileDetectorJob(RepairJob):
suffix = '/'.join(parts[-depth:]).lower()
known_suffixes.add(suffix)
# Build title+artist set for tag-based fallback matching
# Build title+artist sets for tag-based fallback matching
cursor.execute("""
SELECT t.title, ar.name FROM tracks t
LEFT JOIN artists ar ON ar.id = t.artist_id
@ -70,6 +77,11 @@ class OrphanFileDetectorJob(RepairJob):
artist = (row[1] or '').lower().strip()
if title:
known_titles.add((title, artist))
# Also store normalized version (stripped of feat., parentheticals, etc.)
clean_t = _strip_extras(title)
clean_a = _strip_extras(artist)
if clean_t:
known_titles_clean.add((clean_t, clean_a))
except Exception as e:
logger.error("Error reading known file paths from DB: %s", e, exc_info=True)
result.errors += 1
@ -122,7 +134,8 @@ class OrphanFileDetectorJob(RepairJob):
break
# Fallback: read file tags and check if title+artist exists in DB
# Catches path mismatches where the file is tracked but under a different path
# Catches path mismatches where the file is tracked but under a different path.
# Uses both exact and normalized comparison to handle feat. suffixes, etc.
if not is_known and known_titles:
try:
from mutagen import File as MutagenFile
@ -130,8 +143,21 @@ class OrphanFileDetectorJob(RepairJob):
if audio:
file_title = ((audio.get('title') or [None])[0] or '').lower().strip()
file_artist = ((audio.get('artist') or [None])[0] or '').lower().strip()
if file_title and (file_title, file_artist) in known_titles:
is_known = True
if file_title:
# Exact match first (fast path)
if (file_title, file_artist) in known_titles:
is_known = True
else:
# Normalized match: strip (feat. X), [FLAC 16bit], etc.
clean_title = _strip_extras(file_title)
clean_artist = _strip_extras(file_artist)
# Also try first artist only (handles "Gorillaz, Dennis Hopper" → "Gorillaz")
first_artist = clean_artist.split(',')[0].strip() if clean_artist else ''
if clean_title and (
(clean_title, clean_artist) in known_titles_clean or
(first_artist and (clean_title, first_artist) in known_titles_clean)
):
is_known = True
except Exception:
pass

@ -749,8 +749,13 @@ class RepairWorker:
if conn:
conn.close()
def fix_finding(self, finding_id: int) -> dict:
"""Execute the appropriate fix action for a finding, then mark it resolved."""
def fix_finding(self, finding_id: int, fix_action: str = None) -> dict:
"""Execute the appropriate fix action for a finding, then mark it resolved.
Args:
finding_id: ID of the finding to fix
fix_action: Optional action override (e.g. 'staging' or 'delete' for orphan files)
"""
conn = None
try:
conn = self.db._get_connection()
@ -769,6 +774,10 @@ class RepairWorker:
conn.close()
conn = None
# Pass fix_action through to handler via details
if fix_action:
details['_fix_action'] = fix_action
# Dispatch fix by finding type
result = self._execute_fix(finding_type, entity_type, entity_id, file_path, details)
@ -902,9 +911,21 @@ class RepairWorker:
conn.close()
def _fix_orphan_file(self, entity_type, entity_id, file_path, details):
"""Delete the orphan file from disk."""
"""Handle an orphan file — move to staging or delete based on user choice.
The fix_action is passed via details['_fix_action']:
'staging' move file to the staging folder for import
'delete' delete file from disk
If no action specified, returns an error asking the user to choose.
"""
fix_action = details.get('_fix_action', '')
if fix_action not in ('staging', 'delete'):
return {'success': False, 'error': 'Please choose an action: move to staging or delete',
'needs_action': True}
if not file_path:
return {'success': False, 'error': 'No file path associated with this finding'}
try:
# Resolve path in case of cross-environment mismatch
download_folder = None
@ -912,25 +933,60 @@ class RepairWorker:
download_folder = self._config_manager.get('soulseek.download_path', '')
resolved = _resolve_file_path(file_path, self.transfer_folder, download_folder) or file_path
if os.path.exists(resolved):
if not os.path.exists(resolved):
return {'success': True, 'action': 'already_gone',
'message': 'File was already removed'}
if fix_action == 'staging':
# Move to staging folder
staging_path = './Staging'
if self._config_manager:
staging_path = self._config_manager.get('import.staging_path', './Staging')
staging_path = self._resolve_path(staging_path)
os.makedirs(staging_path, exist_ok=True)
dest = os.path.join(staging_path, os.path.basename(resolved))
# Avoid overwriting existing files in staging
if os.path.exists(dest):
base, ext = os.path.splitext(os.path.basename(resolved))
counter = 1
while os.path.exists(dest):
dest = os.path.join(staging_path, f"{base} ({counter}){ext}")
counter += 1
import shutil
shutil.move(resolved, dest)
# Clean up empty parent directories
self._cleanup_empty_parents(resolved)
return {'success': True, 'action': 'moved_to_staging',
'message': f'Moved to staging folder for import'}
elif fix_action == 'delete':
os.remove(resolved)
# Clean up empty parent directories (never remove transfer folder itself)
transfer_norm = os.path.normpath(self.transfer_folder)
parent = os.path.dirname(resolved)
for _ in range(3): # Up to 3 levels (track/album/artist)
if (parent and os.path.isdir(parent)
and os.path.normpath(parent) != transfer_norm
and not os.listdir(parent)):
os.rmdir(parent)
parent = os.path.dirname(parent)
else:
break
self._cleanup_empty_parents(resolved)
return {'success': True, 'action': 'deleted_file',
'message': 'Deleted orphan file from disk'}
return {'success': True, 'action': 'already_gone',
'message': 'File was already removed'}
except OSError as e:
return {'success': False, 'error': f'Failed to delete file: {e}'}
return {'success': False, 'error': f'Failed to handle orphan file: {e}'}
def _cleanup_empty_parents(self, file_path):
"""Remove empty parent directories up to 3 levels, never removing the transfer folder."""
try:
transfer_norm = os.path.normpath(self.transfer_folder)
parent = os.path.dirname(file_path)
for _ in range(3):
if (parent and os.path.isdir(parent)
and os.path.normpath(parent) != transfer_norm
and not os.listdir(parent)):
os.rmdir(parent)
parent = os.path.dirname(parent)
else:
break
except OSError:
pass
def _fix_track_number(self, entity_type, entity_id, file_path, details):
"""Fix track number in file tags, rename file, and update DB."""
@ -1874,8 +1930,12 @@ class RepairWorker:
conn.close()
def bulk_fix_findings(self, job_id: str = None, severity: str = None,
finding_ids: List[int] = None) -> dict:
"""Fix all pending fixable findings matching filters. Returns {fixed, failed, skipped}."""
finding_ids: List[int] = None, fix_action: str = None) -> dict:
"""Fix all pending fixable findings matching filters. Returns {fixed, failed, skipped}.
Args:
fix_action: Optional action for findings that need user choice (e.g. orphan files)
"""
conn = None
try:
conn = self.db._get_connection()
@ -1910,7 +1970,7 @@ class RepairWorker:
fixed = 0
failed = 0
for fid in ids_to_fix:
result = self.fix_finding(fid)
result = self.fix_finding(fid, fix_action=fix_action)
if result.get('success'):
fixed += 1
else:

@ -41622,7 +41622,9 @@ def repair_finding_fix(finding_id):
if repair_worker is None:
return jsonify({'error': 'Repair worker not initialized'}), 400
result = repair_worker.fix_finding(finding_id)
data = request.get_json(silent=True) or {}
fix_action = data.get('fix_action') # e.g. 'staging' or 'delete' for orphan files
result = repair_worker.fix_finding(finding_id, fix_action=fix_action)
return jsonify(result), 200 if result.get('success') else 400
except Exception as e:
logger.error(f"Error fixing finding {finding_id}: {e}")
@ -41667,9 +41669,11 @@ def repair_findings_bulk_fix():
job_id = data.get('job_id') or None
severity = data.get('severity') or None
finding_ids = data.get('ids') or None
fix_action = data.get('fix_action') or None
result = repair_worker.bulk_fix_findings(
job_id=job_id, severity=severity, finding_ids=finding_ids
job_id=job_id, severity=severity, finding_ids=finding_ids,
fix_action=fix_action
)
return jsonify({
'success': True,

@ -53854,7 +53854,7 @@ async function loadRepairFindings() {
</div>
<div class="repair-finding-actions" onclick="event.stopPropagation()">
${f.status === 'pending' ? `
${fixLabel ? `<button class="repair-finding-btn fix" onclick="fixRepairFinding(${f.id})" title="${fixLabel}">${_escFinding(fixLabel)}</button>` : ''}
${fixLabel ? `<button class="repair-finding-btn fix" onclick="fixRepairFinding(${f.id}, '${f.finding_type}')" title="${fixLabel}">${_escFinding(fixLabel)}</button>` : ''}
<button class="repair-finding-btn dismiss" onclick="dismissRepairFinding(${f.id})" title="Dismiss">&times;</button>
` : ''}
<button class="repair-finding-expand-btn" data-finding="${f.id}" title="Details">&#9660;</button>
@ -54221,12 +54221,20 @@ async function fixAllMatchingFindings() {
})) return;
}
// If fixing orphan files, prompt for action
let fixAction = null;
if (jobId === 'orphan_file_detector') {
fixAction = await _promptOrphanAction();
if (!fixAction) return;
}
showToast(`Fixing ${_repairFindingsTotal} findings...`, 'info');
try {
const body = {};
if (jobId) body.job_id = jobId;
if (severity) body.severity = severity;
if (fixAction) body.fix_action = fixAction;
const response = await fetch('/api/repair/findings/bulk-fix', {
method: 'POST',
@ -54287,7 +54295,14 @@ function renderRepairFindingsPagination(total, currentPage) {
container.innerHTML = html;
}
async function fixRepairFinding(id) {
async function fixRepairFinding(id, findingType) {
// Orphan files require user to choose an action
let fixAction = null;
if (findingType === 'orphan_file') {
fixAction = await _promptOrphanAction();
if (!fixAction) return; // User cancelled
}
const card = document.querySelector(`.repair-finding-card[data-id="${id}"]`);
const fixBtn = card ? card.querySelector('.repair-finding-btn.fix') : null;
let originalText = '';
@ -54297,7 +54312,12 @@ async function fixRepairFinding(id) {
fixBtn.textContent = '...';
}
try {
const response = await fetch(`/api/repair/findings/${id}/fix`, { method: 'POST' });
const body = fixAction ? { fix_action: fixAction } : {};
const response = await fetch(`/api/repair/findings/${id}/fix`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
const result = await response.json();
if (result.success) {
showToast(result.message || 'Fixed successfully', 'success');
@ -54317,6 +54337,39 @@ async function fixRepairFinding(id) {
}
}
function _promptOrphanAction() {
return new Promise(resolve => {
const overlay = document.createElement('div');
overlay.className = 'modal-overlay';
overlay.style.cssText = 'display:flex;align-items:center;justify-content:center;z-index:10000;';
overlay.innerHTML = `
<div style="background:#1e1e2e;border:1px solid rgba(255,255,255,0.1);border-radius:16px;padding:28px;max-width:380px;width:90%;text-align:center;">
<div style="font-size:1.1em;font-weight:600;color:#fff;margin-bottom:8px;">Orphan File Action</div>
<div style="font-size:0.88em;color:rgba(255,255,255,0.6);margin-bottom:20px;">
What would you like to do with this file?
</div>
<div style="display:flex;gap:10px;justify-content:center;">
<button id="_orphan-staging" style="padding:10px 20px;border-radius:10px;border:1px solid rgba(29,185,84,0.4);background:rgba(29,185,84,0.15);color:#1db954;font-weight:600;cursor:pointer;font-family:inherit;">
Move to Staging
</button>
<button id="_orphan-delete" style="padding:10px 20px;border-radius:10px;border:1px solid rgba(239,68,68,0.4);background:rgba(239,68,68,0.1);color:#ef4444;font-weight:500;cursor:pointer;font-family:inherit;">
Delete
</button>
</div>
<button id="_orphan-cancel" style="margin-top:12px;padding:6px 16px;border:none;background:none;color:rgba(255,255,255,0.4);cursor:pointer;font-size:0.82em;font-family:inherit;">
Cancel
</button>
</div>
`;
document.body.appendChild(overlay);
overlay.querySelector('#_orphan-staging').onclick = () => { overlay.remove(); resolve('staging'); };
overlay.querySelector('#_orphan-delete').onclick = () => { overlay.remove(); resolve('delete'); };
overlay.querySelector('#_orphan-cancel').onclick = () => { overlay.remove(); resolve(null); };
overlay.onclick = (e) => { if (e.target === overlay) { overlay.remove(); resolve(null); } };
});
}
async function resolveRepairFinding(id) {
try {
await fetch(`/api/repair/findings/${id}/resolve`, { method: 'POST' });
@ -54375,12 +54428,28 @@ async function bulkFixFindings() {
if (hasMassFlag && !await showWitnessMeDialog(selectedOrphanCards.length)) return;
}
// If any selected findings are orphan files, prompt for action once
let orphanFixAction = null;
if (selectedOrphanCards.length > 0) {
orphanFixAction = await _promptOrphanAction();
if (!orphanFixAction) return;
}
let fixed = 0, failed = 0;
showToast(`Fixing ${ids.length} findings...`, 'info');
for (const id of ids) {
try {
const response = await fetch(`/api/repair/findings/${id}/fix`, { method: 'POST' });
// Determine if this finding is an orphan that needs the action
const card = document.querySelector(`.repair-finding-card[data-id="${id}"]`);
const isOrphan = card && card.dataset.jobId === 'orphan_file_detector';
const body = isOrphan && orphanFixAction ? { fix_action: orphanFixAction } : {};
const response = await fetch(`/api/repair/findings/${id}/fix`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
const result = await response.json();
if (result.success) fixed++;
else failed++;

Loading…
Cancel
Save