Configurable duration tolerance for downloaded-file integrity check

Previously hardcoded at 3s (5s for tracks >10min) — files drifting
past that got quarantined with no user override. Live recordings,
alternate masterings, and some legitimate uploads routinely drift
further.

New setting `post_processing.duration_tolerance_seconds`. Default 0
means "use auto-scaled defaults" (unchanged behavior for users who
don't touch it). Positive value overrides the per-track defaults.
Capped at 60s — past that the check is effectively off.

Logic lifted to pure helper `resolve_duration_tolerance` in
file_integrity.py. Coerces every plausible input (None / empty /
zero / negative / unparseable / above-cap / numeric string / float)
to either a float override or None for auto. 12 tests pin every
shape.

Wired into `core/imports/pipeline.py` at the integrity-check call
site — runs for ALL matched downloads (Soulseek / Tidal / Qobuz /
HiFi / YouTube / Deezer-direct) since they all share that pipeline.
Settings UI input under Settings → Metadata → Post-Processing.
pull/589/head
Broque Thomas 1 day ago
parent 977f3b7415
commit 177bd85355

@ -52,6 +52,37 @@ _DEFAULT_LENGTH_TOLERANCE_S = 3.0
_LENGTH_TOLERANCE_LONG_TRACK_S = 5.0
_LONG_TRACK_THRESHOLD_S = 600.0 # 10 minutes
# Upper bound for the user-configurable override. Anything past 60s
# means the check is effectively off — cap defends against accidental
# nonsense like 9999 making logs misleading. Users who genuinely want
# to disable the check can set 60.
_MAX_USER_TOLERANCE_S = 60.0
def resolve_duration_tolerance(value: Any) -> Optional[float]:
"""Coerce a user-configured tolerance value to a float override.
Returns:
- None when value is missing / 0 / negative / unparseable, so
callers fall back to the auto-scaled defaults (3s/5s).
- float in (0, _MAX_USER_TOLERANCE_S] when value is a positive
numeric string or float clamped to the upper bound.
Pure helper. No I/O. Drives the `length_tolerance_s` override on
`check_audio_integrity`.
"""
if value is None:
return None
try:
parsed = float(value)
except (TypeError, ValueError):
return None
if parsed <= 0:
return None
if parsed > _MAX_USER_TOLERANCE_S:
return _MAX_USER_TOLERANCE_S
return parsed
@dataclass
class IntegrityResult:

@ -32,7 +32,7 @@ from core.imports.context import (
get_import_track_info,
normalize_import_context,
)
from core.imports.file_integrity import check_audio_integrity
from core.imports.file_integrity import check_audio_integrity, resolve_duration_tolerance
from core.imports.filename import extract_track_number_from_filename
from core.imports.guards import check_flac_bit_depth, move_to_quarantine
from core.imports.side_effects import (
@ -155,8 +155,20 @@ def post_process_matched_download(context_key, context, file_path, runtime, meta
except Exception:
_expected_duration_ms = None
# User-configurable tolerance override. None = use built-in
# auto-scaled defaults (3s normal / 5s for tracks >10min). Set
# higher (e.g. 10) when matched files routinely drift from the
# source's reported duration (live recordings, alternate
# masterings, etc).
_duration_tolerance_override = resolve_duration_tolerance(
config_manager.get('post_processing.duration_tolerance_seconds', 0)
)
try:
integrity = check_audio_integrity(file_path, _expected_duration_ms)
integrity = check_audio_integrity(
file_path,
_expected_duration_ms,
length_tolerance_s=_duration_tolerance_override,
)
except Exception as integrity_error:
logger.error(f"[Integrity] Check raised unexpectedly (continuing): {integrity_error}")
integrity = None

@ -0,0 +1,70 @@
from core.imports.file_integrity import _MAX_USER_TOLERANCE_S, resolve_duration_tolerance
def test_none_returns_none_so_caller_uses_auto_scaled_default():
assert resolve_duration_tolerance(None) is None
def test_missing_or_empty_string_returns_none():
assert resolve_duration_tolerance("") is None
assert resolve_duration_tolerance(" ") is None
def test_zero_returns_none_to_avoid_strict_mode_ambiguity():
# 0 means "unset" — never strict-mode (which would fail any drift).
# Users who want strict have no use-case; users who want disabled
# set a high value (capped to _MAX_USER_TOLERANCE_S).
assert resolve_duration_tolerance(0) is None
assert resolve_duration_tolerance(0.0) is None
assert resolve_duration_tolerance("0") is None
def test_negative_returns_none():
assert resolve_duration_tolerance(-1) is None
assert resolve_duration_tolerance(-3.5) is None
assert resolve_duration_tolerance("-10") is None
def test_positive_integer_passes_through_as_float():
assert resolve_duration_tolerance(5) == 5.0
assert resolve_duration_tolerance(10) == 10.0
def test_positive_float_passes_through():
assert resolve_duration_tolerance(3.5) == 3.5
assert resolve_duration_tolerance(0.1) == 0.1
def test_numeric_string_parsed():
assert resolve_duration_tolerance("5") == 5.0
assert resolve_duration_tolerance("3.5") == 3.5
assert resolve_duration_tolerance("10.0") == 10.0
def test_unparseable_string_returns_none():
assert resolve_duration_tolerance("abc") is None
assert resolve_duration_tolerance("five") is None
assert resolve_duration_tolerance("3s") is None
def test_above_max_clamped_to_ceiling():
assert resolve_duration_tolerance(9999) == _MAX_USER_TOLERANCE_S
assert resolve_duration_tolerance(_MAX_USER_TOLERANCE_S + 1) == _MAX_USER_TOLERANCE_S
def test_at_ceiling_passes_through():
assert resolve_duration_tolerance(_MAX_USER_TOLERANCE_S) == _MAX_USER_TOLERANCE_S
def test_non_numeric_types_return_none():
assert resolve_duration_tolerance([5]) is None
assert resolve_duration_tolerance({"value": 5}) is None
assert resolve_duration_tolerance(object()) is None
def test_bool_treated_as_int_python_semantics():
# Python: bool is int subclass. True -> 1.0, False -> 0 -> None.
# Documented behavior, not a bug — config values won't realistically
# be booleans for a numeric setting.
assert resolve_duration_tolerance(True) == 1.0
assert resolve_duration_tolerance(False) is None

@ -5038,6 +5038,11 @@
</label>
<small class="settings-hint">Analyzes loudness and writes ReplayGain track gain/peak tags. Requires ffmpeg. Adds a few seconds per track.</small>
</div>
<div class="form-group">
<label for="duration-tolerance-seconds">Duration tolerance (seconds)</label>
<input type="number" id="duration-tolerance-seconds" min="0" max="60" step="0.5" value="0" style="width: 100px;">
<small class="settings-hint">Maximum drift between the file's actual length and the metadata source's expected length before the file is quarantined. <strong>0 = auto</strong> (3s normal, 5s for tracks &gt;10min). Raise this if tracks routinely quarantine for being a few seconds off (live recordings, alternate masterings, etc). Capped at 60s.</small>
</div>
</div>
<!-- Tag Embedding — per-tag toggles grouped by source -->

@ -3416,6 +3416,7 @@ const WHATS_NEW = {
'2.5.2': [
// --- May 13, 2026 — 2.5.2 release ---
{ date: 'May 13, 2026 — 2.5.2 release' },
{ title: 'Configurable Duration Tolerance For Quarantined Tracks', desc: 'discord question: tracks were quarantining when their actual length drifted by a few seconds from what spotify/musicbrainz reported (3s tolerance hardcoded, 5s for tracks >10min). live recordings, alternate masterings, and some legitimate uploads routinely drift more than that. new setting on settings → metadata → post-processing: "duration tolerance (seconds)". `0 = auto` (preserves the existing 3s/5s defaults). raise it to 10 / 15 / 20 if your library has a lot of drift-prone material. capped at 60s — past that the check is effectively off. applies to ALL matched downloads (soulseek / tidal / qobuz / hifi / youtube / deezer-direct) since they all flow through the same post-process integrity check. logic lifted to a pure helper `core/imports/file_integrity.py:resolve_duration_tolerance` that coerces the config value (none / empty / 0 / negative / unparseable / above-cap) to either a float override or `None` for the auto-scaled default. 12 tests pin every input shape.', page: 'settings' },
{ title: 'Soulseek Downloads: Multi-Artist Tags Now Get Written Properly', desc: 'discord report: tracks downloaded via soulseek were getting tagged with primary artist only (no collab artists), while the same track downloaded via deezer tagged everyone correctly. trace: the soulseek matched-download context constructed `original_search_result` with `artist` (singular string) but no `artists` (list), even though the full multi-artist list lived on `track_info` (the matched spotify track object). `core/metadata/source.py:extract_source_metadata` only read `original_search.artists`, so soulseek path always fell through to the single-artist branch. fix: lifted artist resolution into a pure helper `core/metadata/artist_resolution.py:resolve_track_artists` that walks `original_search.artists` → `track_info.artists` → `artist_dict.name` fallback chain. handles all three list-item shapes (spotify-style dicts, bare strings, anything else stringified). 13 tests pin the resolution order, fallback chain, mixed-shape normalization, whitespace stripping, empty/none handling. composes with the existing deezer per-track upgrade (still fires when single-artist + track_id available) and feat_in_title / artist_separator settings (still drive the joined ARTIST string downstream).', page: 'downloads' },
{ title: 'Download Missing Modal: Tracklist Got A Polish Pass', desc: 'visual tune-up only — column layout untouched. hairline row dividers, accent gradient + edge bar on hover, monospace track numbers (glow accent on row hover), monospace tabular duration. status text in both library-match + download-status columns picks up a leading colored dot with a soft halo (green found / amber missing / blue checking / orange downloading / red failed) and pulses while in-flight. artist column centered. soft scrollbar.', page: 'downloads' },
{ title: 'Search Source Picker: Fix Default Always Sticking To Spotify', desc: 'enhanced search + global search source picker always defaulted to spotify even when the user\'s primary metadata source was deezer / itunes / discogs / etc. trace: `shared-helpers.js:createSearchController` reads `/status.metadata_source` to pick the initial active icon, then checks `SOURCE_LABELS[src]` to validate. backend was returning `metadata_source` as a dict (`{source, connected, response_time, ...}` — used elsewhere for connection-state display), so `SOURCE_LABELS[<dict>]` was always undefined, the `if` guard never fired, and `state.activeSource` silently stayed at the hardcoded `\'spotify\'` default. fix: read `.source` off the dict (with forward-compat fallback to plain-string in case any older /status response shape predates the dict change). other consumers (core.js sidebar tile, helper.js status checker, search.js display) already used `?.source` correctly — this was the only stale call site.', page: 'search' },

@ -970,6 +970,7 @@ async function loadSettingsData() {
document.getElementById('prefer-caa-art').checked = settings.metadata_enhancement?.prefer_caa_art === true;
document.getElementById('lrclib-enabled').checked = settings.metadata_enhancement?.lrclib_enabled !== false;
document.getElementById('replaygain-enabled').checked = settings.post_processing?.replaygain_enabled === true;
document.getElementById('duration-tolerance-seconds').value = settings.post_processing?.duration_tolerance_seconds ?? 0;
// Load service master toggles
document.getElementById('embed-spotify').checked = settings.spotify?.embed_tags !== false;
document.getElementById('embed-itunes').checked = settings.itunes?.embed_tags !== false;
@ -2747,6 +2748,7 @@ async function saveSettings(quiet = false) {
},
post_processing: {
replaygain_enabled: document.getElementById('replaygain-enabled').checked,
duration_tolerance_seconds: parseFloat(document.getElementById('duration-tolerance-seconds').value) || 0,
},
library: {
music_paths: collectMusicPaths(),

Loading…
Cancel
Save