|
|
"""Re-route a library album's existing files through the same
|
|
|
post-processing pipeline that handles fresh downloads.
|
|
|
|
|
|
The old reorganize endpoint reinvented several wheels — its own template
|
|
|
engine, its own disc-number resolution from file tags, its own sidecar
|
|
|
sweep, its own collision detection. Each of those drifted from the
|
|
|
canonical post-processing path over time, producing reorganize-only
|
|
|
bugs (multi-disc deluxe collapsing to single-disc when even one file's
|
|
|
tag was missing; tracks silently skipped when their file paths didn't
|
|
|
resolve on disk; etc.).
|
|
|
|
|
|
The new design follows the import page's pattern: copy each file to a
|
|
|
staging folder, build the same context dict the download workers
|
|
|
build, then call ``_post_process_matched_download`` for each one.
|
|
|
Post-processing already knows how to pick the right destination, write
|
|
|
the right tags, handle multi-disc subfolders, recreate sidecars (cover
|
|
|
art, lyrics), and run AcoustID verification — there's nothing for
|
|
|
reorganize to add on top.
|
|
|
|
|
|
Hard requirement: the album must have at least one stored
|
|
|
metadata-source ID (spotify_album_id / itunes_album_id / deezer_id /
|
|
|
discogs_id / soul_id). With no source ID we have nothing authoritative
|
|
|
to ask for the canonical tracklist, and silently degrading to file
|
|
|
tags is exactly the failure mode the old code path produced. Albums
|
|
|
without a source ID are reported back to the caller and skipped
|
|
|
entirely.
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
import shutil
|
|
|
import threading
|
|
|
import time
|
|
|
import uuid
|
|
|
from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait
|
|
|
from dataclasses import dataclass, field
|
|
|
from typing import Any, Callable, Dict, List, Optional, Set
|
|
|
|
|
|
# Per-album track concurrency. Matches the download workers' per-batch
|
|
|
# concurrency (3) so reorganize feels comparable to a fresh download.
|
|
|
#
|
|
|
# Operational note: post-processing can spawn an ffmpeg subprocess per
|
|
|
# track if `lossy_copy.downsample_hires` is enabled. With 3 workers
|
|
|
# that's up to 3 concurrent ffmpeg processes. Acceptable for typical
|
|
|
# album sizes (10-20 tracks); on a giant single-album reorganize
|
|
|
# (50+ tracks) ffmpeg's transient memory could be noticeable but each
|
|
|
# subprocess is short-lived so total RAM doesn't pile up. If we ever
|
|
|
# see resource issues from this, drop to 2 here rather than disabling
|
|
|
# concurrency entirely.
|
|
|
_REORGANIZE_MAX_WORKERS = 3
|
|
|
|
|
|
# Watchdog interval — how often the orchestrator checks the worker
|
|
|
# pool while waiting for tasks to finish. Setting this to 30s means
|
|
|
# we log a warning naming any track that's been in flight longer than
|
|
|
# `_HUNG_WORKER_THRESHOLD` (so an operator can investigate) without
|
|
|
# burning CPU on a tight poll. Doesn't kill stuck threads (Python
|
|
|
# can't), just surfaces them.
|
|
|
_WATCHDOG_INTERVAL_SECONDS = 30
|
|
|
_HUNG_WORKER_THRESHOLD_SECONDS = 300 # 5 min — generous; real worst-case
|
|
|
# is ffmpeg downsampling a long
|
|
|
# hi-res FLAC, ~30-60s typically.
|
|
|
|
|
|
from core.metadata_service import (
|
|
|
get_album_for_source,
|
|
|
get_album_tracks_for_source,
|
|
|
get_client_for_source,
|
|
|
get_primary_source,
|
|
|
get_source_priority,
|
|
|
)
|
|
|
from utils.logging_config import get_logger
|
|
|
|
|
|
logger = get_logger("library_reorganize")
|
|
|
|
|
|
|
|
|
def _safe_filename(name: str) -> str:
|
|
|
"""Strip path-illegal characters so we can use the value as a
|
|
|
filename component on the staging path."""
|
|
|
return ''.join(c for c in (name or 'unknown') if c not in '<>:"/\\|?*').strip() or 'unknown'
|
|
|
|
|
|
|
|
|
def _normalize_album_tracks(api_tracks):
|
|
|
"""Normalize the various provider tracklist shapes (dict-with-`items`,
|
|
|
bare list, ``None``) to a single list of item dicts."""
|
|
|
if not api_tracks:
|
|
|
return []
|
|
|
if isinstance(api_tracks, dict):
|
|
|
items = api_tracks.get('items') or []
|
|
|
return items if items else []
|
|
|
if isinstance(api_tracks, list):
|
|
|
return api_tracks
|
|
|
return []
|
|
|
|
|
|
|
|
|
SUPPORTED_SOURCES = ('spotify', 'itunes', 'deezer', 'discogs', 'hydrabase')
|
|
|
|
|
|
# Per-source album-ID column mapping on the `albums` table row.
|
|
|
_ALBUM_ID_COLUMNS = {
|
|
|
'spotify': 'spotify_album_id',
|
|
|
'itunes': 'itunes_album_id',
|
|
|
'deezer': 'deezer_id',
|
|
|
'discogs': 'discogs_id',
|
|
|
'hydrabase': 'soul_id',
|
|
|
}
|
|
|
|
|
|
# Human-facing label for each source.
|
|
|
SOURCE_LABELS = {
|
|
|
'spotify': 'Spotify',
|
|
|
'itunes': 'Apple Music (iTunes)',
|
|
|
'deezer': 'Deezer',
|
|
|
'discogs': 'Discogs',
|
|
|
'hydrabase': 'Hydrabase',
|
|
|
}
|
|
|
|
|
|
|
|
|
def _extract_source_ids(album_data: dict) -> Dict[str, str]:
|
|
|
"""Pull the per-source album-ID strings off an album row."""
|
|
|
return {
|
|
|
source: (album_data.get(column) or '')
|
|
|
for source, column in _ALBUM_ID_COLUMNS.items()
|
|
|
}
|
|
|
|
|
|
|
|
|
def available_sources_for_album(album_data: dict) -> List[dict]:
|
|
|
"""Return the list of metadata sources the user can pick for this
|
|
|
album's reorganize. Every entry has both (a) a stored album ID on
|
|
|
the local row AND (b) an authenticated / configured client on this
|
|
|
SoulSync instance.
|
|
|
|
|
|
Returns entries in source-priority order (preferred source first).
|
|
|
Each entry is ``{'source': str, 'label': str}``. No API calls —
|
|
|
purely local inspection.
|
|
|
"""
|
|
|
source_ids = _extract_source_ids(album_data)
|
|
|
try:
|
|
|
primary = get_primary_source()
|
|
|
except Exception:
|
|
|
primary = 'deezer'
|
|
|
|
|
|
out = []
|
|
|
for source in get_source_priority(primary):
|
|
|
if source not in SUPPORTED_SOURCES:
|
|
|
continue
|
|
|
if not source_ids.get(source):
|
|
|
continue
|
|
|
if get_client_for_source(source) is None:
|
|
|
continue
|
|
|
out.append({
|
|
|
'source': source,
|
|
|
'label': SOURCE_LABELS.get(source, source),
|
|
|
})
|
|
|
return out
|
|
|
|
|
|
|
|
|
def authed_sources() -> List[dict]:
|
|
|
"""Return all metadata sources the user has authed/configured on
|
|
|
this SoulSync instance. Doesn't require any album-specific stored
|
|
|
ID — used by the bulk "Reorganize All" picker where each album
|
|
|
has its own ID coverage and we just want to know which sources
|
|
|
are reachable. Returned in priority order."""
|
|
|
try:
|
|
|
primary = get_primary_source()
|
|
|
except Exception:
|
|
|
primary = 'deezer'
|
|
|
|
|
|
out = []
|
|
|
for source in get_source_priority(primary):
|
|
|
if source not in SUPPORTED_SOURCES:
|
|
|
continue
|
|
|
if get_client_for_source(source) is None:
|
|
|
continue
|
|
|
out.append({
|
|
|
'source': source,
|
|
|
'label': SOURCE_LABELS.get(source, source),
|
|
|
})
|
|
|
return out
|
|
|
|
|
|
|
|
|
_UNKNOWN_ARTIST_NAMES = {'unknown artist', 'unknown', ''}
|
|
|
|
|
|
|
|
|
def _is_unknown_artist(artist_name: Optional[str]) -> bool:
|
|
|
if not artist_name:
|
|
|
return True
|
|
|
return str(artist_name).strip().lower() in _UNKNOWN_ARTIST_NAMES
|
|
|
|
|
|
|
|
|
def _looks_like_album_id_title(album_title: Optional[str]) -> bool:
|
|
|
"""Pre-#524 manual-import bug left some albums with a numeric
|
|
|
album_id stored as `albums.title`. Detect that shape so reorganize
|
|
|
can point the user at Unknown Artist Fixer instead of the generic
|
|
|
'run enrichment' hint."""
|
|
|
if not album_title:
|
|
|
return False
|
|
|
stripped = str(album_title).strip()
|
|
|
return len(stripped) >= 6 and stripped.isdigit()
|
|
|
|
|
|
|
|
|
def _unresolvable_reason(album_data: dict, primary_source: str, strict_source: bool) -> str:
|
|
|
"""Reason text for albums reorganize can't place. Surfaces the
|
|
|
Unknown Artist Fixer hint when the row matches the bad-metadata
|
|
|
shape (Unknown Artist OR album-id-as-title) — that fixer reads
|
|
|
file tags + re-resolves metadata, which reorganize itself doesn't
|
|
|
do."""
|
|
|
artist = album_data.get('artist_name')
|
|
|
title = album_data.get('title')
|
|
|
if _is_unknown_artist(artist) or _looks_like_album_id_title(title):
|
|
|
return (
|
|
|
"Album has placeholder metadata (Unknown Artist or numeric "
|
|
|
"title) — run the 'Fix Unknown Artists' repair job to "
|
|
|
"recover real artist/album from file tags before reorganize"
|
|
|
)
|
|
|
if strict_source:
|
|
|
return f"Source '{primary_source}' has no usable tracklist for this album"
|
|
|
return "No metadata source ID for this album"
|
|
|
|
|
|
|
|
|
def _resolve_source(album_data: dict, primary_source: str, strict_source: bool = False):
|
|
|
"""Walk the configured source priority looking for the first source
|
|
|
we have an ID for AND that returns a usable tracklist.
|
|
|
|
|
|
When ``strict_source`` is True, only the caller-provided
|
|
|
``primary_source`` is tried — no fallback. Used when the user has
|
|
|
explicitly picked a source in the reorganize modal: picking Spotify
|
|
|
means "use Spotify or fail", not "use Spotify and silently fall
|
|
|
back to Deezer".
|
|
|
|
|
|
Returns ``(source_name, album_meta, tracks_list)`` or ``(None, None, None)``.
|
|
|
"""
|
|
|
source_ids = _extract_source_ids(album_data)
|
|
|
|
|
|
if strict_source:
|
|
|
sources_to_try = [primary_source] if primary_source else []
|
|
|
else:
|
|
|
sources_to_try = get_source_priority(primary_source)
|
|
|
|
|
|
for source in sources_to_try:
|
|
|
sid = source_ids.get(source) or ''
|
|
|
if not sid:
|
|
|
continue
|
|
|
try:
|
|
|
api_album = get_album_for_source(source, sid)
|
|
|
api_tracks = get_album_tracks_for_source(source, sid)
|
|
|
except Exception as e:
|
|
|
logger.warning(f"[Reorganize] {source} lookup raised: {e}")
|
|
|
continue
|
|
|
items = _normalize_album_tracks(api_tracks)
|
|
|
if not items or not api_album:
|
|
|
continue
|
|
|
return source, api_album, items
|
|
|
|
|
|
return None, None, None
|
|
|
|
|
|
|
|
|
# Tokens that indicate a *different recording* of a track — when one
|
|
|
# side of a comparison has these and the other doesn't, the two are NOT
|
|
|
# the same track (e.g. "Bitch Don't Kill My Vibe" vs "Bitch Don't Kill
|
|
|
# My Vibe (Remix)" are different recordings; the tier 4 substring match
|
|
|
# would silently merge them otherwise). "Bonus track" is intentionally
|
|
|
# NOT here — it's a marketing annotation, not a recording difference.
|
|
|
_VERSION_DIFFERENTIATORS = frozenset({
|
|
|
'remix', 'remixed',
|
|
|
'live', 'unplugged', 'concert',
|
|
|
'acoustic',
|
|
|
'demo',
|
|
|
'extended', 'edit',
|
|
|
'instrumental', 'karaoke',
|
|
|
'remaster', 'remastered', 'remastering',
|
|
|
'mono', 'stereo',
|
|
|
'acapella', 'cappella',
|
|
|
'cover',
|
|
|
'reprise',
|
|
|
'alternate', 'alt',
|
|
|
'rehearsal',
|
|
|
})
|
|
|
|
|
|
|
|
|
def _differentiators_in(norm_title: str) -> frozenset:
|
|
|
"""Return the set of version-differentiator tokens present in a
|
|
|
normalized title. Used by the tier-4 matcher to reject substring
|
|
|
matches across different recordings of the same song."""
|
|
|
if not norm_title:
|
|
|
return frozenset()
|
|
|
return frozenset(t for t in norm_title.split() if t in _VERSION_DIFFERENTIATORS)
|
|
|
|
|
|
|
|
|
def _normalize_title(value) -> str:
|
|
|
"""Lowercase + strip cosmetic punctuation and treat brackets / dashes
|
|
|
/ slashes as word separators so the same track named slightly
|
|
|
differently across providers and user libraries still matches.
|
|
|
|
|
|
Examples that should normalize equal:
|
|
|
|
|
|
- ``Bitch, Don't Kill My Vibe - Remix`` ↔ ``Bitch, Don't Kill My Vibe (Remix)``
|
|
|
- ``Don't Stop Believin'`` ↔ ``Don’t Stop Believin’``
|
|
|
- ``Swimming Pools (Drank) - Extended Version``
|
|
|
↔ ``Swimming Pools (Drank) (Extended Version)``
|
|
|
"""
|
|
|
if value is None:
|
|
|
return ''
|
|
|
out = str(value).strip().lower()
|
|
|
# Strip characters that don't carry meaning across providers.
|
|
|
for ch in ('"', "'", '‘', '’', '“', '”', '.', ',', '!', '?',
|
|
|
'(', ')', '[', ']', '{', '}'):
|
|
|
out = out.replace(ch, '')
|
|
|
# Treat separators as whitespace so "foo - bar" and "foo (bar)" align.
|
|
|
for ch in ('-', '–', '—', ':', '/', '\\'):
|
|
|
out = out.replace(ch, ' ')
|
|
|
return ' '.join(out.split())
|
|
|
|
|
|
|
|
|
# Title-match scoring grid. Each component's weight was picked to
|
|
|
# satisfy these design rules:
|
|
|
#
|
|
|
# 1. EXACT title alone is enough to win.
|
|
|
# 2. SUBSTRING at the high-confidence floor (≥0.6) is enough to win.
|
|
|
# 3. SUBSTRING at the lower with-tn-match floor (≥0.3) needs the
|
|
|
# track_number bonus to win — track_number provides the missing
|
|
|
# confidence.
|
|
|
# 4. TRACK-NUMBER alone is NOT enough — never falls through to a
|
|
|
# blind track-number lookup on multi-disc albums (that's the
|
|
|
# bug that mis-routed winecountrygames's bonus tracks).
|
|
|
# 5. Different version-differentiator tokens (Remix vs no-remix)
|
|
|
# hard-reject before scoring (see `_score_candidate`).
|
|
|
#
|
|
|
# Worked examples (with threshold = 50):
|
|
|
#
|
|
|
# exact title + tn match 100 + 20 = 120 → match
|
|
|
# exact title alone 100 = 100 → match
|
|
|
# substring ratio 1.0 (no tn match) 50 + 40 = 90 → match
|
|
|
# substring ratio 0.6 (no tn match) 50 + 0 = 50 → match
|
|
|
# substring ratio 0.5 (no tn match) 0 = 0 → no match
|
|
|
# substring ratio 0.45 + tn match 40 + 20 = 60 → match
|
|
|
# substring ratio 0.28 + tn match 0 + 20 = 20 → no match
|
|
|
# (Real vs "Real Real Real")
|
|
|
# track_number alone (no title signal) 0 + 20 = 20 → no match
|
|
|
# different version diffs (any inputs) hard-reject → 0
|
|
|
#
|
|
|
# Weights are deliberately spaced so each gate is well-clear of the
|
|
|
# threshold; small ratio adjustments don't flip a borderline case
|
|
|
# unexpectedly.
|
|
|
|
|
|
_MATCH_SCORE_THRESHOLD = 50
|
|
|
|
|
|
_W_EXACT_TITLE = 100
|
|
|
_W_TRACK_NUMBER = 20
|
|
|
|
|
|
# Standalone substring (no tn match required): floor + scaled bonus.
|
|
|
# At ratio = floor: contribute base only. At ratio = 1.0: contribute
|
|
|
# base + range. Linear in between.
|
|
|
_W_SUBSTRING_BASE_STANDALONE = 50
|
|
|
_W_SUBSTRING_RATIO_RANGE = 40
|
|
|
_SUBSTRING_RATIO_FLOOR_STANDALONE = 0.6
|
|
|
|
|
|
# With-tn-match substring: lower floor (0.3) but slightly reduced
|
|
|
# base (40) so this path never beats a standalone high-ratio match
|
|
|
# on equal-tn ties.
|
|
|
_W_SUBSTRING_BASE_WITH_TN = 40
|
|
|
_SUBSTRING_RATIO_FLOOR_WITH_TN = 0.3
|
|
|
|
|
|
|
|
|
def _score_candidate(
|
|
|
norm_local: str,
|
|
|
local_tn: Optional[int],
|
|
|
local_diffs: frozenset,
|
|
|
api_norm: str,
|
|
|
api_tn: Optional[int],
|
|
|
) -> int:
|
|
|
"""Score a single API candidate against the local track. Higher
|
|
|
means more confident match; 0 means no usable signal. The orchestrator
|
|
|
picks the highest-scoring candidate above
|
|
|
:data:`_MATCH_SCORE_THRESHOLD` and treats sub-threshold tracks as
|
|
|
unmatched (the "trust the source — if it doesn't have the track,
|
|
|
skip it" design policy).
|
|
|
|
|
|
Components:
|
|
|
|
|
|
- **Exact normalized-title match** is the strongest signal — usually
|
|
|
enough on its own, especially because local titles SoulSync wrote
|
|
|
should already match the source's text after normalization.
|
|
|
- **Substring containment** with a length-ratio guard handles
|
|
|
annotation drift like ``"The Recipe - Bonus Track"`` (local)
|
|
|
matching ``"The Recipe"`` (API). The ratio bonus rewards more
|
|
|
specific matches, so longer common prefixes win over shorter ones.
|
|
|
- **Track-number agreement** is a tiebreaker, never enough alone
|
|
|
(track_number-only would mis-route on multi-disc).
|
|
|
- **Version-differentiator mismatch** is a hard reject — if local
|
|
|
has ``Remix`` and API doesn't (or vice versa), they're different
|
|
|
recordings, not annotation drift. Returns 0 unconditionally.
|
|
|
"""
|
|
|
if not norm_local or not api_norm:
|
|
|
return 0
|
|
|
|
|
|
# Hard reject: version differentiators must agree exactly. ``Remix``
|
|
|
# vs no-remix means different recordings, regardless of how
|
|
|
# otherwise-similar the titles are.
|
|
|
if _differentiators_in(api_norm) != local_diffs:
|
|
|
return 0
|
|
|
|
|
|
score = 0
|
|
|
tn_match = local_tn is not None and api_tn == local_tn
|
|
|
|
|
|
if api_norm == norm_local:
|
|
|
score += _W_EXACT_TITLE
|
|
|
else:
|
|
|
if api_norm in norm_local:
|
|
|
ratio = len(api_norm) / max(len(norm_local), 1)
|
|
|
elif norm_local in api_norm:
|
|
|
ratio = len(norm_local) / max(len(api_norm), 1)
|
|
|
else:
|
|
|
ratio = 0.0
|
|
|
if ratio >= _SUBSTRING_RATIO_FLOOR_STANDALONE:
|
|
|
# Strong substring — credit regardless of tn agreement.
|
|
|
normalized = (
|
|
|
(ratio - _SUBSTRING_RATIO_FLOOR_STANDALONE)
|
|
|
/ (1.0 - _SUBSTRING_RATIO_FLOOR_STANDALONE)
|
|
|
)
|
|
|
score += _W_SUBSTRING_BASE_STANDALONE + int(normalized * _W_SUBSTRING_RATIO_RANGE)
|
|
|
elif tn_match and ratio >= _SUBSTRING_RATIO_FLOOR_WITH_TN:
|
|
|
# Weaker substring (e.g., "the recipe" in "the recipe bonus
|
|
|
# track" at ratio 0.45) — accept ONLY because track_number
|
|
|
# also matches, and at slightly reduced base score.
|
|
|
score += _W_SUBSTRING_BASE_WITH_TN
|
|
|
|
|
|
if tn_match:
|
|
|
score += _W_TRACK_NUMBER
|
|
|
|
|
|
return score
|
|
|
|
|
|
|
|
|
def _prenormalize_api_tracks(api_tracks: List[dict]) -> List[tuple]:
|
|
|
"""Compute ``(item, normalized_title, parsed_track_number)`` once
|
|
|
per API track so the matcher doesn't redo this work on every local
|
|
|
track. Callers that match many local tracks against the same API
|
|
|
list (the orchestrator's per-album loop) should hold this list and
|
|
|
pass it to :func:`_find_api_track`.
|
|
|
|
|
|
For a 17-track local library matched against a 22-track API list,
|
|
|
avoiding re-normalization saves 17×22 = 374 normalize calls per
|
|
|
album reorganize."""
|
|
|
out = []
|
|
|
for item in api_tracks:
|
|
|
api_norm = _normalize_title(item.get('name') or item.get('title'))
|
|
|
try:
|
|
|
api_tn = int(item.get('track_number')) if item.get('track_number') is not None else None
|
|
|
except (TypeError, ValueError):
|
|
|
api_tn = None
|
|
|
out.append((item, api_norm, api_tn))
|
|
|
return out
|
|
|
|
|
|
|
|
|
def _find_api_track(api_tracks, db_title: str, db_track_number) -> Optional[dict]:
|
|
|
"""Find the API track that corresponds to a given local track row.
|
|
|
|
|
|
``api_tracks`` may be either a raw list of API dicts (will be
|
|
|
normalized internally) OR a list of pre-normalized 3-tuples from
|
|
|
:func:`_prenormalize_api_tracks`. The orchestrator uses the
|
|
|
pre-normalized form to avoid O(n*m) normalization calls; tests
|
|
|
use the raw list for convenience.
|
|
|
|
|
|
Local rows carry (title, track_number) but NOT disc_number.
|
|
|
Multi-disc albums repeat track_numbers across discs, so a
|
|
|
track_number-only join would collapse the mapping. Title is the
|
|
|
natural disambiguator (each disc's track 1 has a different title),
|
|
|
but local titles drift from API titles in predictable ways:
|
|
|
trailing ``- Bonus Track`` annotations, ``- Remix`` vs ``(Remix)``,
|
|
|
etc.
|
|
|
|
|
|
Implementation: each candidate is scored by :func:`_score_candidate`;
|
|
|
the highest-scoring one above :data:`_MATCH_SCORE_THRESHOLD` wins.
|
|
|
If nothing clears the threshold the source genuinely doesn't have a
|
|
|
plausible match and we return ``None`` — the orchestrator surfaces
|
|
|
that as ``"not in tracklist, left in place"`` rather than silently
|
|
|
mis-routing.
|
|
|
"""
|
|
|
norm_local = _normalize_title(db_title)
|
|
|
if not norm_local:
|
|
|
return None
|
|
|
try:
|
|
|
tn = int(db_track_number) if db_track_number is not None else None
|
|
|
except (TypeError, ValueError):
|
|
|
tn = None
|
|
|
local_diffs = _differentiators_in(norm_local)
|
|
|
|
|
|
# Accept either pre-normalized candidates or raw API dicts.
|
|
|
if api_tracks and isinstance(api_tracks[0], tuple):
|
|
|
candidates = api_tracks # type: ignore[assignment]
|
|
|
else:
|
|
|
candidates = _prenormalize_api_tracks(api_tracks) # type: ignore[arg-type]
|
|
|
|
|
|
best_item: Optional[dict] = None
|
|
|
best_score = 0
|
|
|
best_tn_match = False
|
|
|
|
|
|
for item, api_norm, api_tn in candidates:
|
|
|
score = _score_candidate(norm_local, tn, local_diffs, api_norm, api_tn)
|
|
|
if score < _MATCH_SCORE_THRESHOLD:
|
|
|
continue
|
|
|
tn_match = tn is not None and api_tn == tn
|
|
|
if score > best_score or (score == best_score and tn_match and not best_tn_match):
|
|
|
best_item = item
|
|
|
best_score = score
|
|
|
best_tn_match = tn_match
|
|
|
|
|
|
return best_item
|
|
|
|
|
|
|
|
|
def load_album_and_tracks(db, album_id):
|
|
|
"""Load the album row + all its track rows from the local DB.
|
|
|
|
|
|
Returns ``(album_dict | None, tracks_list)``. ``album_dict`` is None
|
|
|
when the album doesn't exist; tracks_list is empty when the album
|
|
|
has no tracks. The caller decides what status to surface for each
|
|
|
state.
|
|
|
"""
|
|
|
conn = None
|
|
|
try:
|
|
|
conn = db._get_connection()
|
|
|
cursor = conn.cursor()
|
|
|
cursor.execute(
|
|
|
"""
|
|
|
SELECT al.*, ar.name as artist_name
|
|
|
FROM albums al
|
|
|
JOIN artists ar ON al.artist_id = ar.id
|
|
|
WHERE al.id = ?
|
|
|
""",
|
|
|
(str(album_id),),
|
|
|
)
|
|
|
album_row = cursor.fetchone()
|
|
|
if not album_row:
|
|
|
return None, []
|
|
|
album_data = dict(album_row)
|
|
|
|
|
|
cursor.execute(
|
|
|
"""
|
|
|
SELECT t.*, ar.name as artist_name
|
|
|
FROM tracks t
|
|
|
JOIN artists ar ON t.artist_id = ar.id
|
|
|
WHERE t.album_id = ?
|
|
|
ORDER BY t.track_number
|
|
|
""",
|
|
|
(str(album_id),),
|
|
|
)
|
|
|
tracks = [dict(r) for r in cursor.fetchall()]
|
|
|
return album_data, tracks
|
|
|
finally:
|
|
|
if conn is not None:
|
|
|
try:
|
|
|
conn.close()
|
|
|
except Exception: # noqa: S110 — finally-block cleanup, logger may be torn down
|
|
|
pass
|
|
|
|
|
|
|
|
|
def _plan_from_tags(
|
|
|
album_data: dict,
|
|
|
tracks: List[dict],
|
|
|
resolve_file_path_fn: Optional[Callable[[Optional[str]], Optional[str]]],
|
|
|
) -> dict:
|
|
|
"""Tag-mode planner: build per-track ``api_track`` shapes from each
|
|
|
file's own embedded metadata instead of a live source API call.
|
|
|
|
|
|
Per-track behavior:
|
|
|
- File missing on disk → unmatched with reason.
|
|
|
- Tags missing essentials (title / artist / album) → unmatched
|
|
|
with reason.
|
|
|
- Otherwise matched with the per-file extracted ``api_track`` and
|
|
|
a per-file ``api_album``. The plan stores the FIRST matched
|
|
|
track's album dict on the top-level ``api_album`` field for
|
|
|
backward compatibility with downstream callers; downstream
|
|
|
consumers that need the per-track album shape read it off
|
|
|
``items[i]['api_album']``.
|
|
|
|
|
|
Returns the same status / source / api_album / total_discs / items
|
|
|
shape as :func:`plan_album_reorganize`. ``source`` is the literal
|
|
|
string ``'tags'`` so callers can distinguish from API sources."""
|
|
|
if resolve_file_path_fn is None:
|
|
|
# Without the file-path resolver we can't read anything off
|
|
|
# disk. Return an unmatched plan so callers surface a clear
|
|
|
# error instead of silently returning empty.
|
|
|
reason = 'Tag-mode reorganize requires the file path resolver.'
|
|
|
return {
|
|
|
'status': 'no_source_id', 'source': None, 'api_album': None,
|
|
|
'total_discs': 1,
|
|
|
'items': [{
|
|
|
'track': t, 'api_track': None, 'matched': False,
|
|
|
'reason': reason,
|
|
|
} for t in tracks],
|
|
|
}
|
|
|
|
|
|
from core.library.reorganize_tag_source import read_album_track_from_file
|
|
|
|
|
|
items: List[dict] = []
|
|
|
first_album_meta: Optional[dict] = None
|
|
|
max_disc = 1
|
|
|
|
|
|
for track in tracks:
|
|
|
db_path = track.get('file_path')
|
|
|
resolved = resolve_file_path_fn(db_path) if db_path else None
|
|
|
if not resolved:
|
|
|
items.append({
|
|
|
'track': track, 'api_track': None, 'api_album': None,
|
|
|
'matched': False,
|
|
|
'reason': 'File no longer exists on disk for this track.',
|
|
|
})
|
|
|
continue
|
|
|
|
|
|
album_meta, track_meta, err = read_album_track_from_file(resolved)
|
|
|
if err is not None or track_meta is None or album_meta is None:
|
|
|
items.append({
|
|
|
'track': track, 'api_track': None, 'api_album': None,
|
|
|
'matched': False,
|
|
|
'reason': err or 'Could not extract metadata from embedded tags.',
|
|
|
})
|
|
|
continue
|
|
|
|
|
|
if first_album_meta is None:
|
|
|
first_album_meta = album_meta
|
|
|
try:
|
|
|
disc = int(track_meta.get('disc_number') or 1)
|
|
|
except (TypeError, ValueError):
|
|
|
disc = 1
|
|
|
if disc > max_disc:
|
|
|
max_disc = disc
|
|
|
# Respect an explicit `totaldiscs` tag (or "1/2" disc-number
|
|
|
# form) so a partial-album reorganize (only disc 1 present
|
|
|
# locally) still routes into `Disc 1/` when the file's tags
|
|
|
# know there are 2 discs total.
|
|
|
try:
|
|
|
tagged_total = int(album_meta.get('total_discs') or 0)
|
|
|
except (TypeError, ValueError):
|
|
|
tagged_total = 0
|
|
|
if tagged_total > max_disc:
|
|
|
max_disc = tagged_total
|
|
|
|
|
|
items.append({
|
|
|
'track': track,
|
|
|
'api_track': track_meta,
|
|
|
'api_album': album_meta,
|
|
|
'matched': True,
|
|
|
'reason': None,
|
|
|
})
|
|
|
|
|
|
if not any(it['matched'] for it in items):
|
|
|
return {
|
|
|
'status': 'no_source_id',
|
|
|
'source': 'tags',
|
|
|
'api_album': None,
|
|
|
'total_discs': 1,
|
|
|
'items': items,
|
|
|
}
|
|
|
|
|
|
return {
|
|
|
'status': 'planned',
|
|
|
'source': 'tags',
|
|
|
'api_album': first_album_meta or {},
|
|
|
'total_discs': max_disc,
|
|
|
'items': items,
|
|
|
}
|
|
|
|
|
|
|
|
|
def plan_album_reorganize(
|
|
|
album_data: dict,
|
|
|
tracks: List[dict],
|
|
|
primary_source: Optional[str] = None,
|
|
|
strict_source: bool = False,
|
|
|
metadata_source: str = 'api',
|
|
|
resolve_file_path_fn: Optional[Callable[[Optional[str]], Optional[str]]] = None,
|
|
|
) -> dict:
|
|
|
"""Compute the per-track plan for an album reorganize without doing
|
|
|
any file IO. Both the actual reorganize orchestrator and the preview
|
|
|
endpoint share this so the preview is guaranteed to match what would
|
|
|
happen on apply.
|
|
|
|
|
|
``metadata_source``:
|
|
|
- ``'api'`` (default): query the configured metadata source(s)
|
|
|
for the canonical tracklist (existing behavior). Issues an
|
|
|
API call.
|
|
|
- ``'tags'``: read each file's embedded tags as the source of
|
|
|
truth (issue #592). Zero API calls; trusts the user's
|
|
|
enriched library.
|
|
|
|
|
|
When ``metadata_source='tags'``, ``resolve_file_path_fn`` MUST be
|
|
|
provided (the planner needs to read the actual files). The
|
|
|
``primary_source`` and ``strict_source`` params are ignored in
|
|
|
tag mode.
|
|
|
|
|
|
Returns:
|
|
|
``{'status': 'planned' | 'no_source_id' | 'no_tracks',
|
|
|
'source': str | None,
|
|
|
'api_album': dict | None,
|
|
|
'total_discs': int,
|
|
|
'items': [{'track': dict, 'api_track': dict | None,
|
|
|
'matched': bool, 'reason': str | None}, ...]}``
|
|
|
|
|
|
Per-track behavior matches the orchestrator exactly:
|
|
|
- Match by `(normalized_title, track_number)`, then title alone, then
|
|
|
track_number alone.
|
|
|
- Tracks with no match are reported with `matched=False` and a reason.
|
|
|
- `disc_number` for each track comes from its matched API entry; if
|
|
|
unmatched, `api_track is None` and the caller decides what to do.
|
|
|
"""
|
|
|
if not tracks:
|
|
|
return {
|
|
|
'status': 'no_tracks', 'source': None, 'api_album': None,
|
|
|
'total_discs': 1, 'items': [],
|
|
|
}
|
|
|
|
|
|
if metadata_source == 'tags':
|
|
|
return _plan_from_tags(album_data, tracks, resolve_file_path_fn)
|
|
|
|
|
|
if primary_source is None:
|
|
|
try:
|
|
|
primary_source = get_primary_source()
|
|
|
except Exception:
|
|
|
primary_source = 'deezer'
|
|
|
|
|
|
source, api_album, api_tracks = _resolve_source(
|
|
|
album_data, primary_source, strict_source=strict_source
|
|
|
)
|
|
|
if not source:
|
|
|
reason = _unresolvable_reason(album_data, primary_source, strict_source)
|
|
|
return {
|
|
|
'status': 'no_source_id', 'source': None, 'api_album': None,
|
|
|
'total_discs': 1,
|
|
|
'items': [{
|
|
|
'track': t, 'api_track': None, 'matched': False,
|
|
|
'reason': reason,
|
|
|
} for t in tracks],
|
|
|
}
|
|
|
|
|
|
total_discs = max(
|
|
|
(int(item.get('disc_number') or 1) for item in api_tracks),
|
|
|
default=1,
|
|
|
)
|
|
|
|
|
|
# Pre-normalize once so the matcher doesn't redo the work per track.
|
|
|
prenormalized = _prenormalize_api_tracks(api_tracks)
|
|
|
items = []
|
|
|
for track in tracks:
|
|
|
api_track = _find_api_track(prenormalized, track.get('title', ''), track.get('track_number'))
|
|
|
if api_track is None:
|
|
|
items.append({
|
|
|
'track': track, 'api_track': None, 'matched': False,
|
|
|
'reason': f"No matching track in {source} tracklist (likely a bonus / non-canonical track)",
|
|
|
})
|
|
|
else:
|
|
|
items.append({
|
|
|
'track': track, 'api_track': api_track, 'matched': True,
|
|
|
'reason': None,
|
|
|
})
|
|
|
|
|
|
return {
|
|
|
'status': 'planned',
|
|
|
'source': source,
|
|
|
'api_album': api_album,
|
|
|
'total_discs': total_discs,
|
|
|
'items': items,
|
|
|
}
|
|
|
|
|
|
|
|
|
def _build_post_process_context(
|
|
|
api_album: dict,
|
|
|
api_track: dict,
|
|
|
artist_name: str,
|
|
|
album_title: str,
|
|
|
total_discs: int,
|
|
|
) -> dict:
|
|
|
"""Build the same shape `import_album_process` builds so post-process
|
|
|
treats this exactly like a fresh download with full Spotify-style
|
|
|
metadata in hand."""
|
|
|
track_number = int(api_track.get('track_number') or 1)
|
|
|
disc_number = int(api_track.get('disc_number') or 1)
|
|
|
track_artists = api_track.get('artists') or [artist_name]
|
|
|
normalized_artists = [
|
|
|
({'name': a} if isinstance(a, str) else a) for a in track_artists
|
|
|
]
|
|
|
|
|
|
api_album_id = api_album.get('id') or api_album.get('album_id') or ''
|
|
|
api_album_name = api_album.get('name') or api_album.get('title') or album_title
|
|
|
api_album_release = (
|
|
|
api_album.get('release_date')
|
|
|
or api_album.get('releaseDate')
|
|
|
or ''
|
|
|
)
|
|
|
api_album_total_tracks = (
|
|
|
api_album.get('total_tracks')
|
|
|
or api_album.get('totalTracks')
|
|
|
or 0
|
|
|
)
|
|
|
# Spotify shape: {'images': [{'url': ...}, ...]}.
|
|
|
# Deezer shape: {'image_url': '...'}.
|
|
|
api_album_image = api_album.get('image_url') or ''
|
|
|
if not api_album_image:
|
|
|
images = api_album.get('images')
|
|
|
if isinstance(images, list) and images:
|
|
|
first = images[0]
|
|
|
if isinstance(first, dict):
|
|
|
api_album_image = first.get('url') or ''
|
|
|
|
|
|
track_name = api_track.get('name') or api_track.get('title') or ''
|
|
|
|
|
|
return {
|
|
|
'spotify_artist': {
|
|
|
'name': artist_name,
|
|
|
'id': '',
|
|
|
'genres': [],
|
|
|
},
|
|
|
'spotify_album': {
|
|
|
'id': api_album_id,
|
|
|
'name': api_album_name,
|
|
|
'release_date': api_album_release,
|
|
|
'total_tracks': api_album_total_tracks,
|
|
|
'total_discs': total_discs,
|
|
|
'image_url': api_album_image,
|
|
|
},
|
|
|
'track_info': {
|
|
|
'name': track_name,
|
|
|
'id': api_track.get('id', ''),
|
|
|
'track_number': track_number,
|
|
|
'disc_number': disc_number,
|
|
|
'duration_ms': api_track.get('duration_ms', 0),
|
|
|
'artists': normalized_artists,
|
|
|
'uri': api_track.get('uri', ''),
|
|
|
},
|
|
|
'original_search_result': {
|
|
|
'title': track_name,
|
|
|
'artist': artist_name,
|
|
|
'album': api_album_name,
|
|
|
'track_number': track_number,
|
|
|
'disc_number': disc_number,
|
|
|
'spotify_clean_title': track_name,
|
|
|
'spotify_clean_album': api_album_name,
|
|
|
'artists': normalized_artists,
|
|
|
},
|
|
|
'is_album_download': True,
|
|
|
'has_clean_spotify_data': True,
|
|
|
'has_full_spotify_metadata': True,
|
|
|
}
|
|
|
|
|
|
|
|
|
def preview_album_reorganize(
|
|
|
*,
|
|
|
album_id: str,
|
|
|
db,
|
|
|
transfer_dir: str,
|
|
|
resolve_file_path_fn: Callable[[Optional[str]], Optional[str]],
|
|
|
build_final_path_fn: Callable,
|
|
|
primary_source: Optional[str] = None,
|
|
|
strict_source: bool = False,
|
|
|
metadata_source: str = 'api',
|
|
|
) -> dict:
|
|
|
"""Compute the planned destination paths for a reorganize WITHOUT
|
|
|
moving any files. The preview UI uses this to show users what the
|
|
|
"Apply" run would do.
|
|
|
|
|
|
Critically: the destination per track comes from
|
|
|
``build_final_path_fn(context, spotify_artist, None, file_ext)`` —
|
|
|
the same shared helper post-processing uses. So the preview is
|
|
|
guaranteed to match what the orchestrator would actually produce.
|
|
|
|
|
|
Args:
|
|
|
album_id: Library album ID.
|
|
|
db: Database object exposing ``_get_connection()``.
|
|
|
transfer_dir: Configured transfer directory (for trimming the
|
|
|
display-relative current-path string).
|
|
|
resolve_file_path_fn: Resolves a DB-stored file path to the
|
|
|
actual on-disk path (or ``None`` if missing).
|
|
|
build_final_path_fn: ``_build_final_path_for_track`` from
|
|
|
web_server. Signature is
|
|
|
``(context, spotify_artist, album_info_or_none, file_ext) -> (path, ok)``.
|
|
|
Injected so this module stays Flask-free.
|
|
|
primary_source: Optional override for the configured primary
|
|
|
source.
|
|
|
|
|
|
Returns:
|
|
|
``{
|
|
|
'success': bool,
|
|
|
'status': str, # 'planned' | 'no_album' | 'no_tracks' | 'no_source_id'
|
|
|
'source': str | None,
|
|
|
'album': str,
|
|
|
'artist': str,
|
|
|
'transfer_dir': str,
|
|
|
'tracks': [
|
|
|
{'track_id', 'title', 'track_number', 'current_path',
|
|
|
'new_path', 'file_exists', 'unchanged', 'collision',
|
|
|
'matched', 'reason', 'disc_number'},
|
|
|
...
|
|
|
],
|
|
|
}``
|
|
|
"""
|
|
|
album_data, tracks = load_album_and_tracks(db, album_id)
|
|
|
if album_data is None:
|
|
|
return {'success': False, 'status': 'no_album', 'tracks': []}
|
|
|
|
|
|
if not tracks:
|
|
|
return {
|
|
|
'success': False, 'status': 'no_tracks',
|
|
|
'album': album_data.get('title', ''),
|
|
|
'artist': album_data.get('artist_name', ''),
|
|
|
'tracks': [],
|
|
|
}
|
|
|
|
|
|
plan = plan_album_reorganize(
|
|
|
album_data, tracks,
|
|
|
primary_source=primary_source, strict_source=strict_source,
|
|
|
metadata_source=metadata_source,
|
|
|
resolve_file_path_fn=resolve_file_path_fn,
|
|
|
)
|
|
|
artist_name = album_data.get('artist_name') or 'Unknown Artist'
|
|
|
album_title = album_data.get('title') or 'Unknown Album'
|
|
|
|
|
|
common = {
|
|
|
'album': album_title,
|
|
|
'artist': artist_name,
|
|
|
'transfer_dir': transfer_dir,
|
|
|
'source': plan['source'],
|
|
|
}
|
|
|
|
|
|
if plan['status'] == 'no_source_id':
|
|
|
return {
|
|
|
'success': False, 'status': 'no_source_id',
|
|
|
**common,
|
|
|
'tracks': [{
|
|
|
'track_id': t.get('id'),
|
|
|
'title': t.get('title', ''),
|
|
|
'track_number': t.get('track_number', 0),
|
|
|
'current_path': t.get('file_path', ''),
|
|
|
'new_path': '',
|
|
|
'file_exists': False, 'unchanged': False, 'collision': False,
|
|
|
'matched': False,
|
|
|
'reason': 'No metadata source ID — run enrichment first',
|
|
|
'disc_number': None,
|
|
|
} for t in tracks],
|
|
|
}
|
|
|
|
|
|
total_discs = plan['total_discs']
|
|
|
api_album = plan['api_album'] or {}
|
|
|
preview_tracks = []
|
|
|
|
|
|
for plan_item in plan['items']:
|
|
|
track = plan_item['track']
|
|
|
title = track.get('title', '')
|
|
|
db_path = track.get('file_path')
|
|
|
resolved = resolve_file_path_fn(db_path) if db_path else None
|
|
|
file_ext = os.path.splitext(resolved or db_path or '.flac')[1] or '.flac'
|
|
|
|
|
|
item = {
|
|
|
'track_id': track.get('id'),
|
|
|
'title': title,
|
|
|
'track_number': track.get('track_number', 0),
|
|
|
'current_path': _trim_to_transfer(db_path, resolved, transfer_dir),
|
|
|
'new_path': '',
|
|
|
'file_exists': resolved is not None,
|
|
|
'unchanged': False,
|
|
|
'collision': False,
|
|
|
'matched': plan_item['matched'],
|
|
|
'reason': plan_item.get('reason'),
|
|
|
'disc_number': None,
|
|
|
}
|
|
|
|
|
|
if not plan_item['matched']:
|
|
|
preview_tracks.append(item)
|
|
|
continue
|
|
|
|
|
|
api_track = plan_item['api_track']
|
|
|
item['disc_number'] = int(api_track.get('disc_number') or 1)
|
|
|
# Build the same context the orchestrator builds so the path
|
|
|
# builder produces the same destination it would on apply.
|
|
|
# Tag-mode plan items carry per-item album metadata; fall back
|
|
|
# to the shared api_album in API mode (where every plan item
|
|
|
# shares the same one).
|
|
|
per_item_album = plan_item.get('api_album') or api_album
|
|
|
context = _build_post_process_context(
|
|
|
per_item_album, api_track, artist_name, album_title, total_discs
|
|
|
)
|
|
|
# `_build_final_path_for_track` switches between ALBUM and SINGLE
|
|
|
# modes based on `album_info.get('is_album')` — must be passed,
|
|
|
# not None, otherwise multi-disc deluxes degrade to single-track
|
|
|
# folders (the exact bug winecountrygames hit).
|
|
|
album_info = _build_album_info(context)
|
|
|
try:
|
|
|
spotify_artist = context['spotify_artist']
|
|
|
new_full, _ok = build_final_path_fn(context, spotify_artist, album_info, file_ext)
|
|
|
item['new_path'] = (
|
|
|
os.path.relpath(new_full, transfer_dir)
|
|
|
if transfer_dir and new_full and new_full.startswith(transfer_dir)
|
|
|
else new_full or ''
|
|
|
)
|
|
|
if resolved and new_full and os.path.normpath(resolved) == os.path.normpath(new_full):
|
|
|
item['unchanged'] = True
|
|
|
except Exception as e:
|
|
|
item['reason'] = f"Couldn't compute destination path: {e}"
|
|
|
|
|
|
preview_tracks.append(item)
|
|
|
|
|
|
# Collision detection: multiple matched tracks mapping to the same
|
|
|
# destination would overwrite each other on apply.
|
|
|
seen = {}
|
|
|
for it in preview_tracks:
|
|
|
if not it['matched'] or it['unchanged'] or not it['new_path']:
|
|
|
continue
|
|
|
norm = os.path.normpath(it['new_path'])
|
|
|
if norm in seen:
|
|
|
it['collision'] = True
|
|
|
seen[norm]['collision'] = True
|
|
|
else:
|
|
|
seen[norm] = it
|
|
|
|
|
|
return {
|
|
|
'success': True, 'status': 'planned',
|
|
|
**common,
|
|
|
'tracks': preview_tracks,
|
|
|
}
|
|
|
|
|
|
|
|
|
def _trim_to_transfer(db_path, resolved, transfer_dir):
|
|
|
"""Compose the user-facing 'current path' string — relative to the
|
|
|
transfer dir if the file lives there, else the raw DB value."""
|
|
|
if resolved and transfer_dir and resolved.startswith(transfer_dir):
|
|
|
return resolved[len(transfer_dir):].lstrip(os.sep).lstrip('/')
|
|
|
return db_path or 'No file'
|
|
|
|
|
|
|
|
|
def _build_album_info(context: dict) -> dict:
|
|
|
"""Build the ``album_info`` dict that ``_build_final_path_for_track``
|
|
|
consumes to enter ALBUM MODE. Without this (passing None) the path
|
|
|
builder falls through to SINGLE MODE and produces per-track folders
|
|
|
named after each track title — the exact bug we're fixing.
|
|
|
|
|
|
Mirrors the shape the download path produces at write time.
|
|
|
"""
|
|
|
spotify_album = context.get('spotify_album', {}) or {}
|
|
|
track_info = context.get('track_info', {}) or {}
|
|
|
return {
|
|
|
'is_album': True,
|
|
|
'album_name': spotify_album.get('name') or 'Unknown Album',
|
|
|
'clean_track_name': track_info.get('name') or 'Unknown Track',
|
|
|
'track_number': track_info.get('track_number') or 1,
|
|
|
'disc_number': track_info.get('disc_number') or 1,
|
|
|
'album_image_url': spotify_album.get('image_url') or '',
|
|
|
'spotify_album_id': spotify_album.get('id') or '',
|
|
|
}
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class _RunContext:
|
|
|
"""Bundles all state + injected dependencies a single
|
|
|
``_process_one_track`` call needs.
|
|
|
|
|
|
Hoisted out of orchestrator-local closures so the per-track
|
|
|
helpers can be unit-tested directly with a fake ctx, and so a
|
|
|
stack trace into a failing helper is intelligible (closures
|
|
|
captured 16+ values, none of which were visible in tracebacks).
|
|
|
|
|
|
Thread-safety contract — read this before adding new fields:
|
|
|
|
|
|
- ``state_lock`` MUST be held when mutating any of the
|
|
|
lock-protected fields below. The provided ``record_error``
|
|
|
method already takes the lock; direct mutation outside that
|
|
|
method is the only place where future contributors might
|
|
|
forget. Add new mutable shared state with the same discipline.
|
|
|
|
|
|
Lock-protected fields (mutate only inside ``state_lock``):
|
|
|
|
|
|
summary dict — counts and errors list
|
|
|
src_dirs_touched set — populated by `_finalize_track`
|
|
|
dst_dirs_touched set — populated by `_finalize_track`
|
|
|
|
|
|
Read-only after construction (safe to read without locking):
|
|
|
|
|
|
album_id, api_album, artist_name, album_title, total_discs,
|
|
|
staging_album_dir, resolve_file_path_fn, post_process_fn,
|
|
|
update_track_path_fn, on_progress, stop_check, state_lock
|
|
|
|
|
|
Side-effecting methods that take the lock internally:
|
|
|
|
|
|
record_error() — records a per-track failure
|
|
|
emit() — fires on_progress callback (no lock;
|
|
|
assumes caller holds it when also
|
|
|
passing summary fields, which the
|
|
|
record_error and orchestrator-success
|
|
|
paths both do)
|
|
|
"""
|
|
|
album_id: str
|
|
|
api_album: dict
|
|
|
artist_name: str
|
|
|
album_title: str
|
|
|
total_discs: int
|
|
|
staging_album_dir: str
|
|
|
state_lock: threading.Lock # required to mutate lock-protected fields
|
|
|
summary: dict # LOCK-PROTECTED
|
|
|
src_dirs_touched: Set[str] # LOCK-PROTECTED
|
|
|
dst_dirs_touched: Set[str] # LOCK-PROTECTED
|
|
|
resolve_file_path_fn: Callable[[Optional[str]], Optional[str]]
|
|
|
post_process_fn: Callable[[str, dict, str], None]
|
|
|
update_track_path_fn: Optional[Callable[[Any, str], None]] = None
|
|
|
on_progress: Optional[Callable[[dict], None]] = None
|
|
|
stop_check: Optional[Callable[[], bool]] = None
|
|
|
|
|
|
def emit(self, **updates) -> None:
|
|
|
"""Fire the progress callback. Caller is responsible for
|
|
|
holding ``state_lock`` when the updates payload includes
|
|
|
snapshots of lock-protected fields (so the snapshot is
|
|
|
coherent). Currently always called from inside the lock by
|
|
|
``record_error`` and the orchestrator's success path."""
|
|
|
if self.on_progress is None:
|
|
|
return
|
|
|
try:
|
|
|
self.on_progress(updates)
|
|
|
except Exception as e:
|
|
|
logger.debug("progress emit failed: %s", e)
|
|
|
|
|
|
def record_error(self, track_id, title, message, kind: str = 'skipped') -> None:
|
|
|
with self.state_lock:
|
|
|
self.summary['errors'].append({
|
|
|
'track_id': track_id,
|
|
|
'title': title,
|
|
|
'error': message,
|
|
|
})
|
|
|
self.summary[kind] += 1
|
|
|
self.emit(**{
|
|
|
kind: self.summary[kind],
|
|
|
'errors': list(self.summary['errors']),
|
|
|
'processed': (
|
|
|
self.summary['moved']
|
|
|
+ self.summary['skipped']
|
|
|
+ self.summary['failed']
|
|
|
),
|
|
|
})
|
|
|
|
|
|
|
|
|
def _stage_track(ctx: _RunContext, track_id, title, resolved_src) -> Optional[str]:
|
|
|
"""Stage a copy of ``resolved_src`` into a per-track UUID
|
|
|
subdirectory under ``ctx.staging_album_dir``.
|
|
|
|
|
|
Per-track subdirs are required for concurrent safety: post-process
|
|
|
calls ``_cleanup_empty_directories`` after each move, which walks
|
|
|
UP from the source file removing empty dirs. With a shared
|
|
|
``staging_album_dir`` that walk would race with other workers'
|
|
|
in-flight ``makedirs``/``copy2`` calls — worker A finishing could
|
|
|
nuke the dir between worker B's ``makedirs`` and ``copy2``,
|
|
|
causing intermittent ``[WinError 3]`` / ``ENOENT`` failures.
|
|
|
|
|
|
With per-track subdirs:
|
|
|
|
|
|
- Worker A's cleanup walks: per-track subdir (empty after move →
|
|
|
removed) → ``staging_album_dir`` (still has other workers'
|
|
|
subdirs → not empty → walk stops). ✓
|
|
|
- Worker B's stage-in: makedirs its OWN subdir, copies into
|
|
|
it. No interference from worker A. ✓
|
|
|
"""
|
|
|
worker_dir = os.path.join(ctx.staging_album_dir, uuid.uuid4().hex[:8])
|
|
|
try:
|
|
|
os.makedirs(worker_dir, exist_ok=True)
|
|
|
except OSError as mk_err:
|
|
|
ctx.record_error(track_id, title,
|
|
|
f"Couldn't create staging subdirectory: {mk_err}",
|
|
|
kind='failed')
|
|
|
return None
|
|
|
staging_file = os.path.join(worker_dir, os.path.basename(resolved_src))
|
|
|
try:
|
|
|
shutil.copy2(resolved_src, staging_file)
|
|
|
except OSError as copy_err:
|
|
|
ctx.record_error(track_id, title,
|
|
|
f"Couldn't copy to staging: {copy_err}",
|
|
|
kind='failed')
|
|
|
return None
|
|
|
return staging_file
|
|
|
|
|
|
|
|
|
def _run_post_process_for_track(ctx: _RunContext, track_id, title, api_track, staging_file, *, per_item_api_album=None) -> Optional[str]:
|
|
|
"""Build the per-track context, hand it to post-processing, and
|
|
|
return the final on-disk path it produced. Returns None on any
|
|
|
failure (exception, AcoustID rejection, internal skip); the caller
|
|
|
leaves the original file alone.
|
|
|
|
|
|
``per_item_api_album`` overrides ``ctx.api_album`` for this track —
|
|
|
used in tag-mode reorganize where each file may carry its own
|
|
|
embedded album metadata."""
|
|
|
api_album = per_item_api_album if per_item_api_album else ctx.api_album
|
|
|
context = _build_post_process_context(
|
|
|
api_album, api_track, ctx.artist_name, ctx.album_title, ctx.total_discs
|
|
|
)
|
|
|
context_key = f"reorganize_{ctx.album_id}_{track_id}_{uuid.uuid4().hex[:8]}"
|
|
|
try:
|
|
|
ctx.post_process_fn(context_key, context, staging_file)
|
|
|
except Exception as pp_err:
|
|
|
ctx.record_error(track_id, title,
|
|
|
f"Post-processing failed: {pp_err}",
|
|
|
kind='failed')
|
|
|
return None
|
|
|
new_path = context.get('_final_processed_path')
|
|
|
if not new_path or not os.path.exists(new_path):
|
|
|
ctx.record_error(track_id, title,
|
|
|
'Post-processing did not produce a final file '
|
|
|
'(AcoustID rejection, quarantine, or skip).',
|
|
|
kind='failed')
|
|
|
return None
|
|
|
return new_path
|
|
|
|
|
|
|
|
|
def _finalize_track(ctx: _RunContext, track_id, resolved_src, new_path) -> bool:
|
|
|
"""Update the DB row, then remove the original (in that order — DB
|
|
|
failure leaves the file at both locations, recoverable by library
|
|
|
scan; the reverse would orphan the row). Records src/dst dirs for
|
|
|
end-of-run cleanup, deletes per-track sidecars.
|
|
|
|
|
|
Returns ``True`` if the track is fully landed (DB row points to
|
|
|
``new_path`` AND the original is dealt with), ``False`` if DB
|
|
|
update failed. Caller MUST treat False as a failure for counting
|
|
|
purposes — the file is at both locations, the DB still points to
|
|
|
the old path, and counting it as "moved" overstates how many
|
|
|
tracks the user can actually find via the UI."""
|
|
|
if ctx.update_track_path_fn:
|
|
|
try:
|
|
|
ctx.update_track_path_fn(track_id, new_path)
|
|
|
except Exception as db_err:
|
|
|
logger.warning(
|
|
|
f"[Reorganize] DB path update failed for {track_id}: {db_err} "
|
|
|
f"— leaving original at {resolved_src} so the library scan can recover."
|
|
|
)
|
|
|
return False
|
|
|
if os.path.normpath(resolved_src) == os.path.normpath(new_path):
|
|
|
return True # in-place edit; DB already correct, nothing to remove
|
|
|
with ctx.state_lock:
|
|
|
ctx.src_dirs_touched.add(os.path.dirname(resolved_src))
|
|
|
ctx.dst_dirs_touched.add(os.path.dirname(new_path))
|
|
|
|
|
|
# Discord report (Foxxify): users with lossy-copy enabled have
|
|
|
# `track.flac` AND `track.opus` side-by-side. The DB tracks ONE
|
|
|
# (the lossy copy). Reorganize used to move only the canonical
|
|
|
# and leave the orphan behind, blocking empty-folder cleanup.
|
|
|
# Move sibling-format audio to the same destination dir BEFORE
|
|
|
# removing the canonical source, preserving both formats with
|
|
|
# the canonical's renamed stem.
|
|
|
siblings = _find_sibling_audio_files(resolved_src)
|
|
|
for sibling_src in siblings:
|
|
|
moved_to = _move_sibling_to_destination(sibling_src, new_path)
|
|
|
if moved_to:
|
|
|
logger.debug(
|
|
|
"[Reorganize] Moved sibling-format file alongside canonical: %s",
|
|
|
moved_to,
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
os.remove(resolved_src)
|
|
|
except OSError as rm_err:
|
|
|
logger.warning(f"[Reorganize] Couldn't remove original {resolved_src}: {rm_err}")
|
|
|
_delete_track_sidecars(resolved_src)
|
|
|
return True
|
|
|
|
|
|
|
|
|
def _process_one_track(ctx: _RunContext, plan_item: dict) -> None:
|
|
|
"""Process a single plan item end-to-end. Safe to call concurrently
|
|
|
from multiple workers — all shared-state mutations go through
|
|
|
``ctx.state_lock`` (via ``record_error`` and ``_finalize_track``)."""
|
|
|
if ctx.stop_check and ctx.stop_check():
|
|
|
return
|
|
|
track = plan_item['track']
|
|
|
title = track.get('title', 'Unknown')
|
|
|
track_id = track.get('id')
|
|
|
ctx.emit(current_track=title)
|
|
|
|
|
|
if not plan_item['matched']:
|
|
|
ctx.record_error(track_id, title,
|
|
|
plan_item.get('reason') or 'No matching API track')
|
|
|
return
|
|
|
|
|
|
db_path = track.get('file_path')
|
|
|
resolved_src = ctx.resolve_file_path_fn(db_path) if db_path else None
|
|
|
if not resolved_src:
|
|
|
ctx.record_error(track_id, title,
|
|
|
f"File not found on disk — DB path: {db_path or '(empty)'}")
|
|
|
return
|
|
|
|
|
|
staging_file = _stage_track(ctx, track_id, title, resolved_src)
|
|
|
if staging_file is None:
|
|
|
return
|
|
|
|
|
|
new_path = _run_post_process_for_track(
|
|
|
ctx, track_id, title, plan_item['api_track'], staging_file,
|
|
|
per_item_api_album=plan_item.get('api_album'),
|
|
|
)
|
|
|
if new_path is None:
|
|
|
return
|
|
|
|
|
|
finalized = _finalize_track(ctx, track_id, resolved_src, new_path)
|
|
|
if not finalized:
|
|
|
# File landed at new_path but DB row + original-removal didn't.
|
|
|
# User can still find the track (library scan will re-index from
|
|
|
# new_path), but we can't honestly count it as "moved" — that
|
|
|
# would overstate how many tracks the UI knows are at their new
|
|
|
# locations. Surfacing as failed lets the user see something
|
|
|
# needs attention (per kettui's PR #377 review).
|
|
|
ctx.record_error(
|
|
|
track_id, title,
|
|
|
'Track landed at new location but DB update failed — '
|
|
|
'file is at both old and new paths until library scan re-indexes.',
|
|
|
kind='failed',
|
|
|
)
|
|
|
return
|
|
|
|
|
|
with ctx.state_lock:
|
|
|
ctx.summary['moved'] += 1
|
|
|
ctx.emit(
|
|
|
moved=ctx.summary['moved'],
|
|
|
processed=ctx.summary['moved'] + ctx.summary['skipped'] + ctx.summary['failed'],
|
|
|
)
|
|
|
|
|
|
|
|
|
def reorganize_album(
|
|
|
*,
|
|
|
album_id: str,
|
|
|
db,
|
|
|
staging_root: str,
|
|
|
resolve_file_path_fn: Callable[[Optional[str]], Optional[str]],
|
|
|
post_process_fn: Callable[[str, dict, str], None],
|
|
|
update_track_path_fn: Optional[Callable[[object, str], None]] = None,
|
|
|
cleanup_empty_dir_fn: Optional[Callable[[str], None]] = None,
|
|
|
transfer_dir: Optional[str] = None,
|
|
|
on_progress: Optional[Callable[[dict], None]] = None,
|
|
|
primary_source: Optional[str] = None,
|
|
|
strict_source: bool = False,
|
|
|
stop_check: Optional[Callable[[], bool]] = None,
|
|
|
metadata_source: str = 'api',
|
|
|
) -> dict:
|
|
|
"""Run a single album through the post-processing pipeline.
|
|
|
|
|
|
See module docstring for the rationale. Dependencies (file
|
|
|
resolution, post-processing, DB-path update, empty-dir cleanup)
|
|
|
are injected so the orchestrator stays in ``core/`` and is unit
|
|
|
testable without spinning up the Flask app.
|
|
|
|
|
|
Args:
|
|
|
album_id: Library album ID.
|
|
|
db: Database object exposing ``_get_connection()``.
|
|
|
staging_root: Root staging directory under the user's download
|
|
|
path. A per-album subfolder is created beneath it; the
|
|
|
whole subfolder is removed at the end of the run.
|
|
|
resolve_file_path_fn: Resolves a DB-stored file path to the
|
|
|
actual on-disk path (or ``None`` if missing). Injected
|
|
|
because the resolution logic lives in ``web_server``.
|
|
|
post_process_fn: ``_post_process_matched_download``. Must set
|
|
|
``context['_final_processed_path']`` on success.
|
|
|
update_track_path_fn: Called as
|
|
|
``update_track_path_fn(track_id, new_path)`` after each
|
|
|
successful post-process to update the DB row. ``None`` to
|
|
|
skip (e.g. in tests).
|
|
|
cleanup_empty_dir_fn: Called with each source directory we
|
|
|
emptied so the caller can prune empty parents. ``None`` to
|
|
|
skip.
|
|
|
on_progress: Optional callback for live status updates.
|
|
|
Receives a dict with any subset of the standard reorganize
|
|
|
state keys (``current_track``, ``processed``, ``moved``,
|
|
|
``skipped``, ``failed``, ``errors``).
|
|
|
primary_source: Override for the configured primary source.
|
|
|
Defaults to ``get_primary_source()``.
|
|
|
stop_check: Returns True when the caller wants the reorganize
|
|
|
to abort early (e.g. server shutdown).
|
|
|
|
|
|
Returns:
|
|
|
Status summary dict with ``status`` ∈ ``{'completed',
|
|
|
'no_album', 'no_tracks', 'no_source_id'}`` plus per-track
|
|
|
counters.
|
|
|
"""
|
|
|
summary = {
|
|
|
'status': 'completed',
|
|
|
'source': None,
|
|
|
'total': 0,
|
|
|
'moved': 0,
|
|
|
'skipped': 0,
|
|
|
'failed': 0,
|
|
|
'errors': [],
|
|
|
}
|
|
|
|
|
|
state_lock = threading.Lock()
|
|
|
|
|
|
def _emit(**updates):
|
|
|
if on_progress is None:
|
|
|
return
|
|
|
try:
|
|
|
on_progress(updates)
|
|
|
except Exception as e:
|
|
|
logger.debug("reorganize progress callback failed: %s", e)
|
|
|
|
|
|
# Load album + tracks
|
|
|
album_data, tracks = load_album_and_tracks(db, album_id)
|
|
|
if album_data is None:
|
|
|
summary['status'] = 'no_album'
|
|
|
return summary
|
|
|
|
|
|
if not tracks:
|
|
|
summary['status'] = 'no_tracks'
|
|
|
return summary
|
|
|
|
|
|
summary['total'] = len(tracks)
|
|
|
_emit(total=len(tracks))
|
|
|
|
|
|
# Build the per-track plan (same logic the preview uses).
|
|
|
plan = plan_album_reorganize(
|
|
|
album_data, tracks,
|
|
|
primary_source=primary_source, strict_source=strict_source,
|
|
|
metadata_source=metadata_source,
|
|
|
resolve_file_path_fn=resolve_file_path_fn,
|
|
|
)
|
|
|
if plan['status'] == 'no_source_id':
|
|
|
summary['status'] = 'no_source_id'
|
|
|
summary['source'] = plan.get('source') # 'tags' or None
|
|
|
if plan.get('source') == 'tags':
|
|
|
err_text = (
|
|
|
f"No tracks of '{album_data.get('title', '?')}' have readable "
|
|
|
"embedded tags (missing title / artist / album, or file unreadable). "
|
|
|
"Switch back to API mode or fix the embedded tags first."
|
|
|
)
|
|
|
elif _is_unknown_artist(album_data.get('artist_name')) or _looks_like_album_id_title(album_data.get('title')):
|
|
|
err_text = (
|
|
|
f"Album '{album_data.get('title', '?')}' has placeholder metadata "
|
|
|
"(Unknown Artist or numeric title) — run the 'Fix Unknown Artists' "
|
|
|
"repair job to recover real artist/album from file tags first."
|
|
|
)
|
|
|
else:
|
|
|
err_text = (
|
|
|
f"No reachable metadata source ID for '{album_data.get('title', '?')}' — "
|
|
|
"run enrichment first to populate at least one of "
|
|
|
"spotify_album_id / itunes_album_id / deezer_id / discogs_id / soul_id."
|
|
|
)
|
|
|
summary['errors'].append({'error': err_text})
|
|
|
return summary
|
|
|
|
|
|
source = plan['source']
|
|
|
api_album = plan['api_album']
|
|
|
total_discs = plan['total_discs']
|
|
|
summary['source'] = source
|
|
|
logger.info(
|
|
|
f"[Reorganize] Album '{album_data.get('title')}' resolved via {source}: "
|
|
|
f"{len(plan['items'])} item(s) planned"
|
|
|
)
|
|
|
|
|
|
# Per-album staging dir under the configured download path. Cleaned
|
|
|
# up (best-effort) at the end of the run regardless of outcome.
|
|
|
artist_name = album_data.get('artist_name') or 'Unknown Artist'
|
|
|
album_title = album_data.get('title') or 'Unknown Album'
|
|
|
staging_album_dir = os.path.join(
|
|
|
staging_root,
|
|
|
f"{_safe_filename(artist_name)} - {_safe_filename(album_title)}_{uuid.uuid4().hex[:8]}",
|
|
|
)
|
|
|
try:
|
|
|
os.makedirs(staging_album_dir, exist_ok=True)
|
|
|
except OSError as e:
|
|
|
summary['status'] = 'setup_failed'
|
|
|
summary['errors'].append({
|
|
|
'error': f"Couldn't create staging directory '{staging_album_dir}': {e}",
|
|
|
})
|
|
|
return summary
|
|
|
|
|
|
src_dirs_touched: Set[str] = set()
|
|
|
dst_dirs_touched: Set[str] = set()
|
|
|
|
|
|
ctx = _RunContext(
|
|
|
album_id=str(album_id),
|
|
|
api_album=api_album or {},
|
|
|
artist_name=artist_name,
|
|
|
album_title=album_title,
|
|
|
total_discs=total_discs,
|
|
|
staging_album_dir=staging_album_dir,
|
|
|
state_lock=state_lock,
|
|
|
summary=summary,
|
|
|
src_dirs_touched=src_dirs_touched,
|
|
|
dst_dirs_touched=dst_dirs_touched,
|
|
|
resolve_file_path_fn=resolve_file_path_fn,
|
|
|
post_process_fn=post_process_fn,
|
|
|
update_track_path_fn=update_track_path_fn,
|
|
|
on_progress=on_progress,
|
|
|
stop_check=stop_check,
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
# 3 concurrent workers per album — matches the download-side
|
|
|
# batch worker count. Post-process has its own per-context-key
|
|
|
# lock so concurrent calls don't race on the same file, and
|
|
|
# all shared-state mutations here are inside `state_lock`.
|
|
|
#
|
|
|
# Wait loop with a periodic watchdog: instead of blocking
|
|
|
# indefinitely on `as_completed`, we wake every
|
|
|
# `_WATCHDOG_INTERVAL_SECONDS` so we can react to stop_check
|
|
|
# promptly AND log a warning if any track has been processing
|
|
|
# for longer than `_HUNG_WORKER_THRESHOLD_SECONDS`. We can't
|
|
|
# kill the thread (Python doesn't allow that cleanly), but
|
|
|
# surfacing it lets operators investigate.
|
|
|
with ThreadPoolExecutor(
|
|
|
max_workers=_REORGANIZE_MAX_WORKERS,
|
|
|
thread_name_prefix='Reorganize',
|
|
|
) as executor:
|
|
|
future_to_item = {
|
|
|
executor.submit(_process_one_track, ctx, item): item
|
|
|
for item in plan['items']
|
|
|
}
|
|
|
future_started_at = {f: time.monotonic() for f in future_to_item}
|
|
|
pending = set(future_to_item.keys())
|
|
|
warned_about: Set[Any] = set()
|
|
|
|
|
|
while pending:
|
|
|
if stop_check and stop_check():
|
|
|
for f in pending:
|
|
|
f.cancel()
|
|
|
break
|
|
|
|
|
|
done, pending = wait(
|
|
|
pending,
|
|
|
timeout=_WATCHDOG_INTERVAL_SECONDS,
|
|
|
return_when=FIRST_COMPLETED,
|
|
|
)
|
|
|
for finished in done:
|
|
|
try:
|
|
|
finished.result()
|
|
|
except Exception as worker_err:
|
|
|
logger.error(
|
|
|
f"[Reorganize] Worker raised: {worker_err}",
|
|
|
exc_info=True,
|
|
|
)
|
|
|
|
|
|
# Watchdog pass — log once per stuck future.
|
|
|
now = time.monotonic()
|
|
|
for f in pending:
|
|
|
if f in warned_about:
|
|
|
continue
|
|
|
elapsed = now - future_started_at[f]
|
|
|
if elapsed >= _HUNG_WORKER_THRESHOLD_SECONDS:
|
|
|
item = future_to_item.get(f, {})
|
|
|
track_title = (item.get('track') or {}).get('title', 'Unknown')
|
|
|
logger.warning(
|
|
|
f"[Reorganize] Worker stuck for {elapsed:.0f}s on track "
|
|
|
f"'{track_title}' — leaving it running, other workers continuing."
|
|
|
)
|
|
|
warned_about.add(f)
|
|
|
|
|
|
finally:
|
|
|
# Best-effort cleanup of the staging dir.
|
|
|
try:
|
|
|
if os.path.isdir(staging_album_dir):
|
|
|
shutil.rmtree(staging_album_dir, ignore_errors=True)
|
|
|
except Exception: # noqa: S110 — finally-block cleanup, logger may be torn down
|
|
|
pass
|
|
|
|
|
|
# Best-effort cleanup of source directories. For each touched dir
|
|
|
# that has no audio files left (i.e. every track in this dir was
|
|
|
# successfully moved), delete album-level sidecars (cover.jpg,
|
|
|
# folder.jpg, etc.) so the dir is empty enough for the empty-dir
|
|
|
# pruner to take it. If audio remains (a track failed to move),
|
|
|
# leave everything alone so the user can see what's still there.
|
|
|
for src_dir in src_dirs_touched:
|
|
|
try:
|
|
|
if _has_remaining_audio(src_dir):
|
|
|
continue
|
|
|
_delete_album_sidecars(src_dir)
|
|
|
except Exception: # noqa: S110 — finally-block cleanup, logger may be torn down
|
|
|
pass
|
|
|
|
|
|
if cleanup_empty_dir_fn:
|
|
|
for src_dir in src_dirs_touched:
|
|
|
try:
|
|
|
cleanup_empty_dir_fn(src_dir)
|
|
|
except Exception: # noqa: S110 — finally-block cleanup, logger may be torn down
|
|
|
pass
|
|
|
|
|
|
# Prune empty *destination* siblings — e.g. when a previous
|
|
|
# failed reorganize attempt left ``Artist/Album-Sibling/`` dirs
|
|
|
# behind that we never end up using, OR when a current-run
|
|
|
# post-process created a destination dir then failed AcoustID
|
|
|
# before landing the file. Walk up from any successful
|
|
|
# destination to the artist folder, then prune one level of
|
|
|
# empty children. Bounded depth = safer than recursive sweep.
|
|
|
if transfer_dir and dst_dirs_touched:
|
|
|
artist_dirs = set()
|
|
|
for dst in dst_dirs_touched:
|
|
|
artist = _find_artist_dir(dst, transfer_dir)
|
|
|
if artist:
|
|
|
artist_dirs.add(artist)
|
|
|
for artist_dir in artist_dirs:
|
|
|
_prune_empty_album_dirs(artist_dir)
|
|
|
|
|
|
return summary
|
|
|
|
|
|
|
|
|
def _find_artist_dir(dest_path: str, transfer_dir: str) -> Optional[str]:
|
|
|
"""Walk up from ``dest_path`` until the parent equals ``transfer_dir``;
|
|
|
the directory at that point is the artist folder. Returns None if
|
|
|
``dest_path`` isn't inside ``transfer_dir`` at all."""
|
|
|
if not transfer_dir:
|
|
|
return None
|
|
|
transfer_norm = os.path.normpath(transfer_dir)
|
|
|
cur = os.path.normpath(dest_path)
|
|
|
while True:
|
|
|
parent = os.path.dirname(cur)
|
|
|
if parent == cur:
|
|
|
return None # filesystem root
|
|
|
if os.path.normpath(parent) == transfer_norm:
|
|
|
return cur
|
|
|
cur = parent
|
|
|
|
|
|
|
|
|
def _prune_empty_album_dirs(artist_dir: str) -> None:
|
|
|
"""Remove direct subdirectories of ``artist_dir`` that are empty.
|
|
|
Single-level prune: deliberately doesn't recurse — we want to
|
|
|
catch leftover album-sibling folders without aggressively touching
|
|
|
the user's nested directory tree.
|
|
|
|
|
|
Also walks one level deeper into each album dir to remove empty
|
|
|
Disc-N subfolders that previous runs may have created."""
|
|
|
if not os.path.isdir(artist_dir):
|
|
|
return
|
|
|
try:
|
|
|
children = list(os.listdir(artist_dir))
|
|
|
except OSError:
|
|
|
return
|
|
|
for entry in children:
|
|
|
album_path = os.path.join(artist_dir, entry)
|
|
|
if not os.path.isdir(album_path):
|
|
|
continue
|
|
|
# First pass: prune empty Disc-N subfolders inside this album.
|
|
|
try:
|
|
|
for sub in list(os.listdir(album_path)):
|
|
|
disc_path = os.path.join(album_path, sub)
|
|
|
if os.path.isdir(disc_path):
|
|
|
try:
|
|
|
if not os.listdir(disc_path):
|
|
|
os.rmdir(disc_path)
|
|
|
except OSError:
|
|
|
pass
|
|
|
except OSError:
|
|
|
pass
|
|
|
# Then: if the whole album dir is now empty, prune it.
|
|
|
try:
|
|
|
if not os.listdir(album_path):
|
|
|
os.rmdir(album_path)
|
|
|
logger.info(f"[Reorganize] Pruned empty album dir: {album_path}")
|
|
|
except OSError:
|
|
|
pass
|
|
|
|
|
|
|
|
|
# Sidecar / cleanup helpers --------------------------------------------------
|
|
|
|
|
|
# Sidecars that live alongside ONE audio file (same filename stem).
|
|
|
_TRACK_SIDECAR_EXTS = ('.lrc', '.nfo', '.txt', '.cue', '.json')
|
|
|
|
|
|
# Sidecars that live at the ALBUM level (one per directory).
|
|
|
_ALBUM_SIDECARS = (
|
|
|
'cover.jpg', 'cover.jpeg', 'cover.png',
|
|
|
'folder.jpg', 'folder.png',
|
|
|
'front.jpg', 'front.png',
|
|
|
'album.jpg', 'album.png',
|
|
|
'artwork.jpg', 'artwork.png',
|
|
|
)
|
|
|
|
|
|
# Audio extensions used to decide whether a source directory still has
|
|
|
# tracks the user might care about (i.e. a per-track failure left audio
|
|
|
# behind that we shouldn't strip the cover art from).
|
|
|
_AUDIO_EXTS = frozenset(
|
|
|
{'.flac', '.mp3', '.m4a', '.ogg', '.opus', '.wav', '.aac', '.wma', '.mp4'}
|
|
|
)
|
|
|
|
|
|
|
|
|
def _find_sibling_audio_files(audio_path: str) -> list:
|
|
|
"""Find OTHER audio files at the same source directory that share
|
|
|
the canonical file's stem.
|
|
|
|
|
|
Discord report (Foxxify): users with the lossy-copy feature
|
|
|
enabled end up with `track.flac` AND `track.opus` side-by-side.
|
|
|
Reorganize is DB-driven and only knows about ONE file per track
|
|
|
(the lossy copy in library), so the other format gets left behind
|
|
|
in the old location while the canonical moves to the new
|
|
|
destination. Cleanup never fires because the source dir still has
|
|
|
audio.
|
|
|
|
|
|
This helper returns the orphan-format paths so the caller can
|
|
|
move them alongside the canonical to the new destination dir.
|
|
|
Same stem + audio extension + NOT the canonical itself.
|
|
|
|
|
|
Returns empty list when source dir doesn't exist or read fails
|
|
|
(defensive — never raises).
|
|
|
"""
|
|
|
src_dir = os.path.dirname(audio_path)
|
|
|
if not os.path.isdir(src_dir):
|
|
|
return []
|
|
|
stem = os.path.splitext(os.path.basename(audio_path))[0]
|
|
|
canonical_basename = os.path.basename(audio_path)
|
|
|
siblings = []
|
|
|
try:
|
|
|
entries = os.listdir(src_dir)
|
|
|
except OSError:
|
|
|
return []
|
|
|
for name in entries:
|
|
|
if name == canonical_basename:
|
|
|
continue
|
|
|
sibling_stem, ext = os.path.splitext(name)
|
|
|
if sibling_stem != stem:
|
|
|
continue
|
|
|
if ext.lower() not in _AUDIO_EXTS:
|
|
|
continue
|
|
|
full = os.path.join(src_dir, name)
|
|
|
if os.path.isfile(full):
|
|
|
siblings.append(full)
|
|
|
return siblings
|
|
|
|
|
|
|
|
|
def _move_sibling_to_destination(sibling_src: str, canonical_dst: str) -> Optional[str]:
|
|
|
"""Move a sibling-format audio file to the same destination
|
|
|
directory as the canonical, preserving its extension.
|
|
|
|
|
|
Example: canonical at ``/library/Artist/Album/01 Track.opus`` +
|
|
|
sibling source ``/old/01 Track.flac`` → destination ``/library/
|
|
|
Artist/Album/01 Track.flac``. The destination filename uses the
|
|
|
canonical's stem (post-template-rename) + the sibling's original
|
|
|
extension — so a renamed canonical gets matching siblings.
|
|
|
|
|
|
Returns the destination path on success, None on failure (logged
|
|
|
at warning, doesn't raise — sibling moves are best-effort).
|
|
|
"""
|
|
|
dst_dir = os.path.dirname(canonical_dst)
|
|
|
canonical_stem = os.path.splitext(os.path.basename(canonical_dst))[0]
|
|
|
_, sibling_ext = os.path.splitext(sibling_src)
|
|
|
sibling_dst = os.path.join(dst_dir, canonical_stem + sibling_ext)
|
|
|
if os.path.normpath(sibling_src) == os.path.normpath(sibling_dst):
|
|
|
return sibling_dst # already at the right place
|
|
|
try:
|
|
|
os.makedirs(dst_dir, exist_ok=True)
|
|
|
shutil.move(sibling_src, sibling_dst)
|
|
|
return sibling_dst
|
|
|
except OSError as e:
|
|
|
logger.warning(
|
|
|
"[Reorganize] Couldn't move sibling-format file %s → %s: %s",
|
|
|
sibling_src, sibling_dst, e,
|
|
|
)
|
|
|
return None
|
|
|
|
|
|
|
|
|
def _delete_track_sidecars(audio_path: str) -> None:
|
|
|
"""Delete per-track sidecars (.lrc / .nfo / .txt / .cue / .json) that
|
|
|
sit alongside `audio_path` and share its filename stem. Best-effort —
|
|
|
individual failures are logged at debug and never raised."""
|
|
|
src_dir = os.path.dirname(audio_path)
|
|
|
stem = os.path.splitext(os.path.basename(audio_path))[0]
|
|
|
for ext in _TRACK_SIDECAR_EXTS:
|
|
|
sidecar = os.path.join(src_dir, stem + ext)
|
|
|
if os.path.isfile(sidecar):
|
|
|
try:
|
|
|
os.remove(sidecar)
|
|
|
except OSError as e:
|
|
|
logger.debug(f"[Reorganize] Couldn't remove sidecar {sidecar}: {e}")
|
|
|
|
|
|
|
|
|
def _delete_album_sidecars(src_dir: str) -> None:
|
|
|
"""Delete album-level sidecars (cover.jpg, folder.jpg, etc.) from
|
|
|
`src_dir`. Used during end-of-run cleanup when no audio files remain
|
|
|
in the directory. Best-effort — individual failures are debug-logged."""
|
|
|
for name in _ALBUM_SIDECARS:
|
|
|
sidecar = os.path.join(src_dir, name)
|
|
|
if os.path.isfile(sidecar):
|
|
|
try:
|
|
|
os.remove(sidecar)
|
|
|
except OSError as e:
|
|
|
logger.debug(f"[Reorganize] Couldn't remove album sidecar {sidecar}: {e}")
|
|
|
|
|
|
|
|
|
def _has_remaining_audio(directory: str) -> bool:
|
|
|
"""Return True if `directory` contains any audio files. Used as the
|
|
|
safety check before stripping album-level sidecars: if a track
|
|
|
failed to move, leave its cover art and friends in place."""
|
|
|
if not os.path.isdir(directory):
|
|
|
return False
|
|
|
try:
|
|
|
for name in os.listdir(directory):
|
|
|
full = os.path.join(directory, name)
|
|
|
if not os.path.isfile(full):
|
|
|
continue
|
|
|
if os.path.splitext(name)[1].lower() in _AUDIO_EXTS:
|
|
|
return True
|
|
|
except OSError:
|
|
|
return True # Safer to assume "yes, leave it" if we can't check
|
|
|
return False
|