mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
342 lines
12 KiB
342 lines
12 KiB
"""Quarantine entry management — pure helpers for list/delete/approve/recover.
|
|
|
|
Quarantined files live in `<download_path>/ss_quarantine/` as
|
|
`<timestamp>_<original>.<ext>.quarantined` paired with a JSON sidecar
|
|
`<timestamp>_<original>.json` written by `core.imports.guards.move_to_quarantine`.
|
|
|
|
This module provides the read/write/restore primitives. Web routes are
|
|
thin glue around these. Pipeline re-run on approval is the caller's
|
|
job (we hand back `(file_path, context, bypass_check)`).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import shutil
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
from utils.logging_config import get_logger
|
|
|
|
logger = get_logger("imports.quarantine")
|
|
|
|
|
|
_QUARANTINE_SUFFIX = ".quarantined"
|
|
|
|
|
|
# JSON-serializable scalar predicate. dict / list values get walked
|
|
# recursively; anything else is dropped during sidecar serialization.
|
|
_SAFE_SCALARS = (str, int, float, bool, type(None))
|
|
|
|
|
|
def serialize_quarantine_context(context: Any) -> Dict[str, Any]:
|
|
"""Walk a context dict and emit a JSON-safe copy.
|
|
|
|
Drops non-serializable values (sets, custom objects, callables,
|
|
open file handles, etc) silently — sidecar must round-trip through
|
|
`json.dump` / `json.load` without raising. Lists are walked element
|
|
by element; dicts are walked recursively. Anything that isn't a
|
|
scalar / dict / list is converted to a string fallback so caller
|
|
still sees *something* (rather than a silent drop) but won't break
|
|
the JSON write.
|
|
"""
|
|
if not isinstance(context, dict):
|
|
return {}
|
|
return _coerce_dict(context)
|
|
|
|
|
|
def _coerce_value(value: Any) -> Any:
|
|
if isinstance(value, _SAFE_SCALARS):
|
|
return value
|
|
if isinstance(value, dict):
|
|
return _coerce_dict(value)
|
|
if isinstance(value, (list, tuple)):
|
|
return [_coerce_value(v) for v in value]
|
|
if isinstance(value, set):
|
|
return [_coerce_value(v) for v in value]
|
|
# Fallback — preserve via str() so caller sees the value's shape
|
|
# without breaking JSON serialization.
|
|
try:
|
|
return str(value)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _coerce_dict(d: Dict[str, Any]) -> Dict[str, Any]:
|
|
out: Dict[str, Any] = {}
|
|
for key, value in d.items():
|
|
if not isinstance(key, str):
|
|
try:
|
|
key = str(key)
|
|
except Exception:
|
|
continue
|
|
out[key] = _coerce_value(value)
|
|
return out
|
|
|
|
|
|
def _entry_id_from_filename(quarantined_filename: str) -> str:
|
|
"""Derive a stable entry id from the quarantined filename.
|
|
|
|
Strip the `.quarantined` suffix; strip the original file extension;
|
|
return the bare `<timestamp>_<original>` stem. Sidecar uses the
|
|
same stem with a `.json` extension, so the id pairs both sides.
|
|
"""
|
|
base = quarantined_filename
|
|
if base.endswith(_QUARANTINE_SUFFIX):
|
|
base = base[: -len(_QUARANTINE_SUFFIX)]
|
|
return Path(base).stem
|
|
|
|
|
|
def list_quarantine_entries(quarantine_dir: str) -> List[Dict[str, Any]]:
|
|
"""Enumerate quarantined files paired with their sidecars.
|
|
|
|
Returns one dict per `.quarantined` file with: id, filename,
|
|
original_filename (from sidecar), reason, expected_track,
|
|
expected_artist, timestamp, size_bytes, has_full_context (True
|
|
when the sidecar carries a `context` field — required for one-click
|
|
Approve), trigger (which check fired: integrity / acoustid /
|
|
bit_depth / unknown).
|
|
|
|
Orphaned `.quarantined` files (no sidecar) still surface — caller
|
|
can delete them. Orphaned sidecars (no file) are skipped silently.
|
|
Sorted newest-first by timestamp prefix.
|
|
"""
|
|
entries: List[Dict[str, Any]] = []
|
|
if not os.path.isdir(quarantine_dir):
|
|
return entries
|
|
|
|
for name in os.listdir(quarantine_dir):
|
|
if not name.endswith(_QUARANTINE_SUFFIX):
|
|
continue
|
|
full_path = os.path.join(quarantine_dir, name)
|
|
if not os.path.isfile(full_path):
|
|
continue
|
|
|
|
entry_id = _entry_id_from_filename(name)
|
|
sidecar_path = os.path.join(quarantine_dir, f"{entry_id}.json")
|
|
sidecar: Dict[str, Any] = {}
|
|
if os.path.isfile(sidecar_path):
|
|
try:
|
|
with open(sidecar_path, encoding="utf-8") as f:
|
|
loaded = json.load(f)
|
|
if isinstance(loaded, dict):
|
|
sidecar = loaded
|
|
except Exception as exc:
|
|
logger.debug("sidecar read failed for %s: %s", entry_id, exc)
|
|
|
|
try:
|
|
size_bytes = os.path.getsize(full_path)
|
|
except OSError:
|
|
size_bytes = 0
|
|
|
|
# Issue #608 follow-up (AfonsoG6): surface the source username
|
|
# + filename that was originally downloaded, so the user can see
|
|
# at a glance which uploader the bad file came from. Lives
|
|
# under `context.original_search_result` when full context is
|
|
# persisted; absent on legacy thin sidecars.
|
|
ctx = sidecar.get("context") if isinstance(sidecar.get("context"), dict) else {}
|
|
osr = ctx.get("original_search_result") if isinstance(ctx.get("original_search_result"), dict) else {}
|
|
source_username = osr.get("username", "") if isinstance(osr, dict) else ""
|
|
source_filename = osr.get("filename", "") if isinstance(osr, dict) else ""
|
|
|
|
entries.append(
|
|
{
|
|
"id": entry_id,
|
|
"filename": name,
|
|
"original_filename": sidecar.get("original_filename", name),
|
|
"reason": sidecar.get("quarantine_reason", "Unknown reason"),
|
|
"expected_track": sidecar.get("expected_track", ""),
|
|
"expected_artist": sidecar.get("expected_artist", ""),
|
|
"timestamp": sidecar.get("timestamp", ""),
|
|
"size_bytes": size_bytes,
|
|
"has_full_context": isinstance(sidecar.get("context"), dict),
|
|
"trigger": sidecar.get("trigger", "unknown"),
|
|
"source_username": source_username,
|
|
"source_filename": source_filename,
|
|
}
|
|
)
|
|
|
|
entries.sort(key=lambda e: e["id"], reverse=True)
|
|
return entries
|
|
|
|
|
|
def _resolve_entry_paths(quarantine_dir: str, entry_id: str) -> Tuple[Optional[str], Optional[str]]:
|
|
"""Locate the `.quarantined` file + JSON sidecar for an entry id.
|
|
|
|
Returns (file_path, sidecar_path), either may be None if missing.
|
|
"""
|
|
if not os.path.isdir(quarantine_dir) or not entry_id:
|
|
return None, None
|
|
file_path: Optional[str] = None
|
|
for name in os.listdir(quarantine_dir):
|
|
if not name.endswith(_QUARANTINE_SUFFIX):
|
|
continue
|
|
if _entry_id_from_filename(name) == entry_id:
|
|
file_path = os.path.join(quarantine_dir, name)
|
|
break
|
|
sidecar_path = os.path.join(quarantine_dir, f"{entry_id}.json")
|
|
if not os.path.isfile(sidecar_path):
|
|
sidecar_path = None
|
|
return file_path, sidecar_path
|
|
|
|
|
|
def delete_quarantine_entry(quarantine_dir: str, entry_id: str) -> bool:
|
|
"""Delete the quarantined file + sidecar for the given entry id.
|
|
|
|
Returns True if at least one of the two was removed. False when
|
|
neither existed (entry already gone).
|
|
"""
|
|
file_path, sidecar_path = _resolve_entry_paths(quarantine_dir, entry_id)
|
|
removed = False
|
|
if file_path and os.path.isfile(file_path):
|
|
try:
|
|
os.remove(file_path)
|
|
removed = True
|
|
except OSError as exc:
|
|
logger.error("Failed to delete quarantine file %s: %s", file_path, exc)
|
|
if sidecar_path and os.path.isfile(sidecar_path):
|
|
try:
|
|
os.remove(sidecar_path)
|
|
removed = True
|
|
except OSError as exc:
|
|
logger.error("Failed to delete quarantine sidecar %s: %s", sidecar_path, exc)
|
|
return removed
|
|
|
|
|
|
def _restore_filename(quarantined_filename: str, sidecar_original: Optional[str] = None) -> str:
|
|
"""Resolve the filename to restore.
|
|
|
|
Sidecar's `original_filename` wins when provided — it's the
|
|
canonical record of what the file was named before quarantine.
|
|
Otherwise parse the `<YYYYMMDD_HHMMSS>_<original>.<ext>.quarantined`
|
|
convention written by `move_to_quarantine`, dropping the timestamp
|
|
prefix and `.quarantined` suffix. Final fallback returns the
|
|
quarantined filename minus the suffix unchanged.
|
|
"""
|
|
if sidecar_original:
|
|
return sidecar_original
|
|
base = quarantined_filename
|
|
if base.endswith(_QUARANTINE_SUFFIX):
|
|
base = base[: -len(_QUARANTINE_SUFFIX)]
|
|
parts = base.split("_", 2)
|
|
if len(parts) >= 3 and parts[0].isdigit() and parts[1].isdigit():
|
|
return parts[2]
|
|
return base
|
|
|
|
|
|
def approve_quarantine_entry(
|
|
quarantine_dir: str,
|
|
entry_id: str,
|
|
restore_dir: str,
|
|
) -> Optional[Tuple[str, Dict[str, Any], str]]:
|
|
"""Restore a quarantined file for re-import via the post-process pipeline.
|
|
|
|
Reads the sidecar's `context` + `trigger`, moves the file out of
|
|
quarantine to `restore_dir` (with the original filename + extension),
|
|
deletes the sidecar.
|
|
|
|
Returns `(restored_file_path, context, trigger)` so the caller can
|
|
set the appropriate `_skip_quarantine_check` bypass flag and
|
|
dispatch the post-process pipeline.
|
|
|
|
Returns None when:
|
|
- the entry doesn't exist
|
|
- the sidecar lacks a serialized `context` (legacy thin sidecar
|
|
— caller should fall back to `recover_to_staging` instead)
|
|
- the file move fails
|
|
"""
|
|
file_path, sidecar_path = _resolve_entry_paths(quarantine_dir, entry_id)
|
|
if not file_path or not sidecar_path:
|
|
logger.warning("approve: entry %s missing file or sidecar", entry_id)
|
|
return None
|
|
|
|
try:
|
|
with open(sidecar_path, encoding="utf-8") as f:
|
|
sidecar = json.load(f)
|
|
except Exception as exc:
|
|
logger.error("approve: sidecar read failed for %s: %s", entry_id, exc)
|
|
return None
|
|
|
|
context = sidecar.get("context")
|
|
if not isinstance(context, dict):
|
|
logger.info("approve: entry %s has thin sidecar (no context) — caller should recover-to-staging", entry_id)
|
|
return None
|
|
|
|
trigger = str(sidecar.get("trigger", "unknown"))
|
|
|
|
original_name = sidecar.get("original_filename") or _restore_filename(os.path.basename(file_path))
|
|
os.makedirs(restore_dir, exist_ok=True)
|
|
restored_path = os.path.join(restore_dir, original_name)
|
|
restored_path = _ensure_unique_path(restored_path)
|
|
|
|
try:
|
|
shutil.move(file_path, restored_path)
|
|
except OSError as exc:
|
|
logger.error("approve: failed to restore %s -> %s: %s", file_path, restored_path, exc)
|
|
return None
|
|
|
|
try:
|
|
os.remove(sidecar_path)
|
|
except OSError as exc:
|
|
logger.warning("approve: failed to remove sidecar %s: %s", sidecar_path, exc)
|
|
|
|
return restored_path, context, trigger
|
|
|
|
|
|
def recover_to_staging(
|
|
quarantine_dir: str,
|
|
staging_dir: str,
|
|
entry_id: str,
|
|
) -> Optional[str]:
|
|
"""Move a quarantined file into Staging for manual import.
|
|
|
|
Strips the timestamp prefix + `.quarantined` suffix, drops the file
|
|
into `staging_dir` so the user can finish via the existing Import
|
|
flow. Sidecar is removed. Used as the fallback path for legacy thin
|
|
sidecars (no embedded `context`) where one-click Approve is
|
|
impossible.
|
|
"""
|
|
file_path, sidecar_path = _resolve_entry_paths(quarantine_dir, entry_id)
|
|
if not file_path:
|
|
return None
|
|
|
|
sidecar_original = None
|
|
if sidecar_path:
|
|
try:
|
|
with open(sidecar_path, encoding="utf-8") as f:
|
|
sidecar_original = json.load(f).get("original_filename")
|
|
except Exception as exc:
|
|
logger.debug("recover: sidecar read failed for %s: %s", entry_id, exc)
|
|
|
|
restored_name = _restore_filename(os.path.basename(file_path), sidecar_original)
|
|
os.makedirs(staging_dir, exist_ok=True)
|
|
target = _ensure_unique_path(os.path.join(staging_dir, restored_name))
|
|
|
|
try:
|
|
shutil.move(file_path, target)
|
|
except OSError as exc:
|
|
logger.error("recover: failed to move %s -> %s: %s", file_path, target, exc)
|
|
return None
|
|
|
|
if sidecar_path and os.path.isfile(sidecar_path):
|
|
try:
|
|
os.remove(sidecar_path)
|
|
except OSError as exc:
|
|
logger.warning("recover: failed to remove sidecar %s: %s", sidecar_path, exc)
|
|
|
|
return target
|
|
|
|
|
|
def _ensure_unique_path(target: str) -> str:
|
|
"""Append `_(2)`, `_(3)`, ... before the extension when target exists."""
|
|
if not os.path.exists(target):
|
|
return target
|
|
base, ext = os.path.splitext(target)
|
|
counter = 2
|
|
while True:
|
|
candidate = f"{base}_({counter}){ext}"
|
|
if not os.path.exists(candidate):
|
|
return candidate
|
|
counter += 1
|