You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/core/reorganize_queue.py

454 lines
19 KiB

"""FIFO queue for library album reorganize requests.
Replaces the single-slot "one reorganize at a time, return 409 on
collision" model with a queue: clicks always succeed (or surface
"already queued" on dedupe), the user can fan-out clicks across
albums or hit "Reorganize All", and a single background worker
chews through the queue in submission order.
Design rules:
- **Single global queue**, single worker thread. Reorganize is
I/O-heavy (file copy, mutagen tagging, AcoustID, possibly ffmpeg)
and post-process is not designed for cross-album concurrency.
In-album track parallelism still happens inside `reorganize_album`
(3 worker threads — see `_REORGANIZE_MAX_WORKERS`).
- **Dedupe on enqueue**: an album that's already queued or currently
running is rejected silently. Stops the user from spamming the
same album N times by clicking the button repeatedly.
- **Per-item source**: each queued item carries its own `source`
string (the user's per-album modal pick). Worker passes it
through to `reorganize_album(primary_source=..., strict_source=...)`.
- **Continue on failure**: a failed item doesn't stop the queue.
Worker logs the failure, marks the item `failed`, moves on.
- **Cancel queued items**: items in `queued` state can be cancelled
(drop from queue). The currently-running item can NOT be cancelled
mid-flight — Python threads aren't cleanly killable, and post-
process spawns subprocesses we can't safely interrupt. Cancel
changes the item's status to `cancelled` and removes it from the
active queue.
- **In-memory only**: queue state lives in a module-level singleton.
A server restart loses the queue (in-flight item likely also lost
half-way through post-process). DB persistence is a follow-up if
this turns out to matter operationally.
"""
import threading
import time
import uuid
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from utils.logging_config import get_logger
logger = get_logger("reorganize_queue")
# How many recently-completed items to retain for the snapshot endpoint.
# The status panel uses these to show "just-finished" cards briefly so
# the user sees outcomes scroll past instead of items vanishing.
_RECENT_HISTORY_CAP = 30
@dataclass
class QueueItem:
"""One album waiting (or being processed) in the reorganize queue."""
queue_id: str # uuid; how the API references this item
album_id: str
album_title: str # captured at enqueue time for UI display
artist_id: Optional[str]
artist_name: str # captured at enqueue time for UI display
source: Optional[str] # the user's per-modal pick (None = auto)
enqueued_at: float
status: str = 'queued' # queued | running | done | failed | cancelled
started_at: Optional[float] = None
finished_at: Optional[float] = None
# Populated by the worker after each item finishes — surfaced to the
# status panel so users see counts + per-item error messages.
result_status: Optional[str] = None # mirrors `reorganize_album` summary['status']
result_source: Optional[str] = None # which source the orchestrator actually used
moved: int = 0
skipped: int = 0
failed: int = 0
error: Optional[str] = None # shorthand for the first error, for the toast
# Live-progress fields for the currently-running item; cleared when
# the worker moves on so the snapshot stays small.
current_track: Optional[str] = None
progress_total: int = 0
progress_processed: int = 0
def to_snapshot(self) -> dict:
return {
'queue_id': self.queue_id,
'album_id': self.album_id,
'album_title': self.album_title,
'artist_id': self.artist_id,
'artist_name': self.artist_name,
'source': self.source,
'enqueued_at': self.enqueued_at,
'started_at': self.started_at,
'finished_at': self.finished_at,
'status': self.status,
'result_status': self.result_status,
'result_source': self.result_source,
'moved': self.moved,
'skipped': self.skipped,
'failed': self.failed,
'error': self.error,
'current_track': self.current_track,
'progress_total': self.progress_total,
'progress_processed': self.progress_processed,
}
class ReorganizeQueue:
"""Module-level singleton that owns the queue + worker thread.
Use the module-level :func:`get_queue` accessor — don't construct
directly. The class is documented public-style so tests can spin
up isolated instances.
"""
def __init__(self, *, runner: Optional[Callable[[QueueItem], dict]] = None):
"""
Args:
runner: Callable that takes a `QueueItem` and runs the
actual reorganize, returning a summary dict with
``status``, ``source``, ``moved``, ``skipped``,
``failed``, ``errors`` keys (the shape
``reorganize_album`` already returns). Tests inject
a fake runner; production wires the real one in
via :func:`set_runner`.
"""
# Single Condition variable owns both mutual exclusion and the
# idle-worker wait. Using a Condition (vs Lock + Event) closes a
# race where the worker could clear an event right after enqueue
# set it, causing the new item to sleep for the timeout window.
# cond.wait() releases the lock and re-acquires on notify, so
# state checks and waits are properly interleaved.
self._cond = threading.Condition()
self._items: List[QueueItem] = [] # everything ever submitted (active + recent)
self._runner = runner
self._worker: Optional[threading.Thread] = None
self._stopped = False
# -- public API --------------------------------------------------
def set_runner(self, runner: Callable[[QueueItem], dict]) -> None:
"""Inject the function that does the actual reorganize work.
Web_server calls this once at startup with a closure over the
injected dependencies (post-process fn, db, etc.)."""
with self._cond:
self._runner = runner
def enqueue(
self,
*,
album_id: str,
album_title: str,
artist_id: Optional[str],
artist_name: str,
source: Optional[str] = None,
) -> dict:
"""Add an album to the queue. Returns a result dict:
{'queued': True, 'queue_id': '...', 'position': N}
{'queued': False, 'reason': 'already_queued', 'queue_id': '...'}
Dedupe: if this album is already in `queued` or `running`
status, returns the existing entry's queue_id rather than
adding a duplicate. ``cancelled`` / ``done`` / ``failed``
items don't block re-enqueue (user retried after a failure).
"""
with self._cond:
for existing in self._items:
if existing.album_id == album_id and existing.status in ('queued', 'running'):
return {
'queued': False,
'reason': 'already_queued',
'queue_id': existing.queue_id,
}
item = QueueItem(
queue_id=uuid.uuid4().hex[:12],
album_id=album_id,
album_title=album_title,
artist_id=artist_id,
artist_name=artist_name,
source=source,
enqueued_at=time.time(),
)
self._items.append(item)
position = sum(1 for i in self._items if i.status == 'queued')
self._ensure_worker()
self._cond.notify_all()
logger.info(
f"[Queue] Enqueued '{album_title}' (album_id={album_id}, "
f"queue_id={item.queue_id}, position={position}, source={source or 'auto'})"
)
return {
'queued': True,
'queue_id': item.queue_id,
'position': position,
}
def enqueue_many(self, items: List[Dict[str, Any]]) -> Dict[str, int]:
"""Bulk-enqueue a list of items. Each ``item`` is a dict with
the same keys :meth:`enqueue` accepts (``album_id``,
``album_title``, ``artist_id``, ``artist_name``, ``source``).
Dedupe still applies per-album-id.
Holds the queue lock for the entire batch so two things hold:
(1) the worker can't start draining mid-batch, and (2) duplicate
album_ids inside the same batch get deduped against each other,
not just against pre-existing items. Without (2), a fast runner
could finish the first copy before the loop reached the second
and both would enqueue.
Returns a tally dict ``{'enqueued': N, 'already_queued': M,
'total': len(items)}`` so the caller can report bulk results
without doing the counting themselves. Used by the bulk
Reorganize-All endpoint and any future maintenance jobs that
enqueue at scale.
"""
enqueued = 0
already = 0
seen_in_batch: set = set()
with self._cond:
# Snapshot album_ids that already block re-enqueue so we don't
# rescan self._items per row.
blocked = {
i.album_id for i in self._items if i.status in ('queued', 'running')
}
for raw in items:
album_id = str(raw['album_id'])
if album_id in blocked or album_id in seen_in_batch:
already += 1
continue
seen_in_batch.add(album_id)
item = QueueItem(
queue_id=uuid.uuid4().hex[:12],
album_id=album_id,
album_title=raw.get('album_title') or 'Unknown Album',
artist_id=str(raw['artist_id']) if raw.get('artist_id') is not None else None,
artist_name=raw.get('artist_name') or 'Unknown Artist',
source=raw.get('source'),
enqueued_at=time.time(),
)
self._items.append(item)
enqueued += 1
logger.info(
f"[Queue] Bulk-enqueued '{item.album_title}' (album_id={album_id}, "
f"queue_id={item.queue_id}, source={item.source or 'auto'})"
)
if enqueued:
self._ensure_worker()
self._cond.notify_all()
return {'enqueued': enqueued, 'already_queued': already, 'total': len(items)}
def cancel(self, queue_id: str) -> dict:
"""Cancel a queued item. The currently-running item cannot be
cancelled (Python threads aren't cleanly killable; post-process
may have spawned ffmpeg)."""
with self._cond:
for item in self._items:
if item.queue_id != queue_id:
continue
if item.status == 'queued':
item.status = 'cancelled'
item.finished_at = time.time()
logger.info(f"[Queue] Cancelled queued item {queue_id} ('{item.album_title}')")
return {'cancelled': True}
if item.status == 'running':
return {'cancelled': False, 'reason': 'running_cant_cancel'}
return {'cancelled': False, 'reason': 'not_active'}
return {'cancelled': False, 'reason': 'not_found'}
def clear_queued(self) -> int:
"""Cancel ALL queued items (running item continues). Returns
the count of items cancelled."""
cancelled = 0
with self._cond:
now = time.time()
for item in self._items:
if item.status == 'queued':
item.status = 'cancelled'
item.finished_at = now
cancelled += 1
if cancelled:
logger.info(f"[Queue] Bulk-cancelled {cancelled} queued items")
return cancelled
def snapshot(self) -> dict:
"""Current queue state for the status panel. Returns:
{
'active': item dict | None,
'queued': [item dicts in FIFO order],
'recent': [item dicts in finish order, newest first, capped],
'totals': {'queued': N, 'running': M, 'done_today': K, ...},
}
"""
with self._cond:
active = next((i for i in self._items if i.status == 'running'), None)
queued = [i for i in self._items if i.status == 'queued']
recent = [i for i in self._items if i.status in ('done', 'failed', 'cancelled')]
recent.sort(key=lambda i: i.finished_at or 0, reverse=True)
recent = recent[:_RECENT_HISTORY_CAP]
return {
'active': active.to_snapshot() if active else None,
'queued': [i.to_snapshot() for i in queued],
'recent': [i.to_snapshot() for i in recent],
'totals': {
'queued': len(queued),
'running': 1 if active else 0,
'done': sum(1 for i in self._items if i.status == 'done'),
'failed': sum(1 for i in self._items if i.status == 'failed'),
'cancelled': sum(1 for i in self._items if i.status == 'cancelled'),
},
}
def stop(self) -> None:
"""Stop the worker (called on server shutdown)."""
with self._cond:
self._stopped = True
self._cond.notify_all()
# -- internals ---------------------------------------------------
def _ensure_worker(self) -> None:
"""Lazy worker start — only spawn the thread when there's
actually something to process. Caller MUST hold ``_cond``."""
if self._worker is not None and self._worker.is_alive():
return
self._worker = threading.Thread(
target=self._run, daemon=True, name='ReorganizeQueueWorker'
)
self._worker.start()
def _claim_next_or_wait(self) -> Optional[QueueItem]:
"""Atomically pick the next queued item AND flip it to 'running'
under a single lock acquisition. If the queue is empty, block
on ``_cond.wait()`` (which releases the lock while sleeping)
and return None when we're notified or timeout. Returning the
item already-marked-running closes the cancel-vs-run race: a
cancel() call now sees status='running' and is rejected."""
with self._cond:
while not self._stopped:
for item in self._items:
if item.status == 'queued':
item.status = 'running'
item.started_at = time.time()
return item
# No queued items — wait for an enqueue or shutdown.
# 60s timeout so a stuck notify (shouldn't happen, but
# defensive) doesn't park the worker forever.
self._cond.wait(timeout=60)
return None
def _run(self) -> None:
"""Worker loop: pull next queued, run it, mark done, repeat.
Idles on `_cond.wait()` when queue is empty."""
logger.info("[Queue] Worker thread started")
while not self._stopped:
item = self._claim_next_or_wait()
if item is None:
# Only happens on shutdown — `_claim_next_or_wait` only
# returns None once `_stopped` is True. Loop back to the
# `while not self._stopped` check, which exits.
continue
logger.info(f"[Queue] Starting '{item.album_title}' (queue_id={item.queue_id})")
try:
runner = self._runner
if runner is None:
raise RuntimeError("Queue has no runner configured — call set_runner() at startup")
summary = runner(item)
except Exception as e:
logger.error(
f"[Queue] Runner raised for '{item.album_title}': {e}",
exc_info=True,
)
with self._cond:
item.status = 'failed'
item.error = str(e)
item.finished_at = time.time()
continue
with self._cond:
item.moved = int(summary.get('moved', 0))
item.skipped = int(summary.get('skipped', 0))
item.failed = int(summary.get('failed', 0))
item.result_status = summary.get('status')
item.result_source = summary.get('source')
errors = summary.get('errors') or []
if errors:
first_err = errors[0] if isinstance(errors[0], dict) else {'error': str(errors[0])}
item.error = first_err.get('error') or first_err.get('reason')
# 'failed' status only when the run produced concrete failed tracks
# OR ended in a non-completed state (no_source_id / no_album / etc).
item.status = 'failed' if (item.failed > 0 or item.result_status not in (None, 'completed')) else 'done'
item.finished_at = time.time()
# Clear live-progress fields — done items don't need them.
item.current_track = None
item.progress_total = 0
item.progress_processed = 0
logger.info(
f"[Queue] Finished '{item.album_title}' — status={item.status}, "
f"moved={item.moved}, skipped={item.skipped}, failed={item.failed}"
)
logger.info("[Queue] Worker thread exiting")
# Called by the runner (or test) to push live progress onto the
# currently-running item. Safe to call from worker thread inside
# reorganize_album's on_progress callback.
def update_active_progress(self, *, queue_id: str, **fields) -> None:
with self._cond:
for item in self._items:
if item.queue_id == queue_id and item.status == 'running':
if 'current_track' in fields:
item.current_track = fields['current_track']
if 'total' in fields:
item.progress_total = int(fields['total'])
if 'processed' in fields:
item.progress_processed = int(fields['processed'])
if 'moved' in fields:
item.moved = int(fields['moved'])
if 'skipped' in fields:
item.skipped = int(fields['skipped'])
if 'failed' in fields:
item.failed = int(fields['failed'])
return
# Module-level singleton accessor ---------------------------------------------
_singleton: Optional[ReorganizeQueue] = None
_singleton_lock = threading.Lock()
def get_queue() -> ReorganizeQueue:
global _singleton
with _singleton_lock:
if _singleton is None:
_singleton = ReorganizeQueue()
return _singleton
def reset_queue_for_tests() -> None:
"""Test-only: drop the singleton so the next get_queue() returns
a fresh instance. Production code never calls this."""
global _singleton
with _singleton_lock:
if _singleton is not None:
_singleton.stop()
_singleton = None