From 177bd853551857e540dc8804565c6e9fb17636d2 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Thu, 14 May 2026 06:53:36 -0700 Subject: [PATCH] Configurable duration tolerance for downloaded-file integrity check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously hardcoded at 3s (5s for tracks >10min) — files drifting past that got quarantined with no user override. Live recordings, alternate masterings, and some legitimate uploads routinely drift further. New setting `post_processing.duration_tolerance_seconds`. Default 0 means "use auto-scaled defaults" (unchanged behavior for users who don't touch it). Positive value overrides the per-track defaults. Capped at 60s — past that the check is effectively off. Logic lifted to pure helper `resolve_duration_tolerance` in file_integrity.py. Coerces every plausible input (None / empty / zero / negative / unparseable / above-cap / numeric string / float) to either a float override or None for auto. 12 tests pin every shape. Wired into `core/imports/pipeline.py` at the integrity-check call site — runs for ALL matched downloads (Soulseek / Tidal / Qobuz / HiFi / YouTube / Deezer-direct) since they all share that pipeline. Settings UI input under Settings → Metadata → Post-Processing. --- core/imports/file_integrity.py | 31 ++++++++ core/imports/pipeline.py | 16 ++++- .../test_duration_tolerance_resolution.py | 70 +++++++++++++++++++ webui/index.html | 5 ++ webui/static/helper.js | 1 + webui/static/settings.js | 2 + 6 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 tests/imports/test_duration_tolerance_resolution.py diff --git a/core/imports/file_integrity.py b/core/imports/file_integrity.py index 82ced0d1..a0ebb18a 100644 --- a/core/imports/file_integrity.py +++ b/core/imports/file_integrity.py @@ -52,6 +52,37 @@ _DEFAULT_LENGTH_TOLERANCE_S = 3.0 _LENGTH_TOLERANCE_LONG_TRACK_S = 5.0 _LONG_TRACK_THRESHOLD_S = 600.0 # 10 minutes +# Upper bound for the user-configurable override. Anything past 60s +# means the check is effectively off — cap defends against accidental +# nonsense like 9999 making logs misleading. Users who genuinely want +# to disable the check can set 60. +_MAX_USER_TOLERANCE_S = 60.0 + + +def resolve_duration_tolerance(value: Any) -> Optional[float]: + """Coerce a user-configured tolerance value to a float override. + + Returns: + - None when value is missing / 0 / negative / unparseable, so + callers fall back to the auto-scaled defaults (3s/5s). + - float in (0, _MAX_USER_TOLERANCE_S] when value is a positive + numeric string or float — clamped to the upper bound. + + Pure helper. No I/O. Drives the `length_tolerance_s` override on + `check_audio_integrity`. + """ + if value is None: + return None + try: + parsed = float(value) + except (TypeError, ValueError): + return None + if parsed <= 0: + return None + if parsed > _MAX_USER_TOLERANCE_S: + return _MAX_USER_TOLERANCE_S + return parsed + @dataclass class IntegrityResult: diff --git a/core/imports/pipeline.py b/core/imports/pipeline.py index d7b2b7d0..2e103953 100644 --- a/core/imports/pipeline.py +++ b/core/imports/pipeline.py @@ -32,7 +32,7 @@ from core.imports.context import ( get_import_track_info, normalize_import_context, ) -from core.imports.file_integrity import check_audio_integrity +from core.imports.file_integrity import check_audio_integrity, resolve_duration_tolerance from core.imports.filename import extract_track_number_from_filename from core.imports.guards import check_flac_bit_depth, move_to_quarantine from core.imports.side_effects import ( @@ -155,8 +155,20 @@ def post_process_matched_download(context_key, context, file_path, runtime, meta except Exception: _expected_duration_ms = None + # User-configurable tolerance override. None = use built-in + # auto-scaled defaults (3s normal / 5s for tracks >10min). Set + # higher (e.g. 10) when matched files routinely drift from the + # source's reported duration (live recordings, alternate + # masterings, etc). + _duration_tolerance_override = resolve_duration_tolerance( + config_manager.get('post_processing.duration_tolerance_seconds', 0) + ) try: - integrity = check_audio_integrity(file_path, _expected_duration_ms) + integrity = check_audio_integrity( + file_path, + _expected_duration_ms, + length_tolerance_s=_duration_tolerance_override, + ) except Exception as integrity_error: logger.error(f"[Integrity] Check raised unexpectedly (continuing): {integrity_error}") integrity = None diff --git a/tests/imports/test_duration_tolerance_resolution.py b/tests/imports/test_duration_tolerance_resolution.py new file mode 100644 index 00000000..a5ba6386 --- /dev/null +++ b/tests/imports/test_duration_tolerance_resolution.py @@ -0,0 +1,70 @@ +from core.imports.file_integrity import _MAX_USER_TOLERANCE_S, resolve_duration_tolerance + + +def test_none_returns_none_so_caller_uses_auto_scaled_default(): + assert resolve_duration_tolerance(None) is None + + +def test_missing_or_empty_string_returns_none(): + assert resolve_duration_tolerance("") is None + assert resolve_duration_tolerance(" ") is None + + +def test_zero_returns_none_to_avoid_strict_mode_ambiguity(): + # 0 means "unset" — never strict-mode (which would fail any drift). + # Users who want strict have no use-case; users who want disabled + # set a high value (capped to _MAX_USER_TOLERANCE_S). + assert resolve_duration_tolerance(0) is None + assert resolve_duration_tolerance(0.0) is None + assert resolve_duration_tolerance("0") is None + + +def test_negative_returns_none(): + assert resolve_duration_tolerance(-1) is None + assert resolve_duration_tolerance(-3.5) is None + assert resolve_duration_tolerance("-10") is None + + +def test_positive_integer_passes_through_as_float(): + assert resolve_duration_tolerance(5) == 5.0 + assert resolve_duration_tolerance(10) == 10.0 + + +def test_positive_float_passes_through(): + assert resolve_duration_tolerance(3.5) == 3.5 + assert resolve_duration_tolerance(0.1) == 0.1 + + +def test_numeric_string_parsed(): + assert resolve_duration_tolerance("5") == 5.0 + assert resolve_duration_tolerance("3.5") == 3.5 + assert resolve_duration_tolerance("10.0") == 10.0 + + +def test_unparseable_string_returns_none(): + assert resolve_duration_tolerance("abc") is None + assert resolve_duration_tolerance("five") is None + assert resolve_duration_tolerance("3s") is None + + +def test_above_max_clamped_to_ceiling(): + assert resolve_duration_tolerance(9999) == _MAX_USER_TOLERANCE_S + assert resolve_duration_tolerance(_MAX_USER_TOLERANCE_S + 1) == _MAX_USER_TOLERANCE_S + + +def test_at_ceiling_passes_through(): + assert resolve_duration_tolerance(_MAX_USER_TOLERANCE_S) == _MAX_USER_TOLERANCE_S + + +def test_non_numeric_types_return_none(): + assert resolve_duration_tolerance([5]) is None + assert resolve_duration_tolerance({"value": 5}) is None + assert resolve_duration_tolerance(object()) is None + + +def test_bool_treated_as_int_python_semantics(): + # Python: bool is int subclass. True -> 1.0, False -> 0 -> None. + # Documented behavior, not a bug — config values won't realistically + # be booleans for a numeric setting. + assert resolve_duration_tolerance(True) == 1.0 + assert resolve_duration_tolerance(False) is None diff --git a/webui/index.html b/webui/index.html index ecb109f9..c0aa2fb9 100644 --- a/webui/index.html +++ b/webui/index.html @@ -5038,6 +5038,11 @@ Analyzes loudness and writes ReplayGain track gain/peak tags. Requires ffmpeg. Adds a few seconds per track. +
+ + + Maximum drift between the file's actual length and the metadata source's expected length before the file is quarantined. 0 = auto (3s normal, 5s for tracks >10min). Raise this if tracks routinely quarantine for being a few seconds off (live recordings, alternate masterings, etc). Capped at 60s. +
diff --git a/webui/static/helper.js b/webui/static/helper.js index f07b5f1f..d9441983 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3416,6 +3416,7 @@ const WHATS_NEW = { '2.5.2': [ // --- May 13, 2026 — 2.5.2 release --- { date: 'May 13, 2026 — 2.5.2 release' }, + { title: 'Configurable Duration Tolerance For Quarantined Tracks', desc: 'discord question: tracks were quarantining when their actual length drifted by a few seconds from what spotify/musicbrainz reported (3s tolerance hardcoded, 5s for tracks >10min). live recordings, alternate masterings, and some legitimate uploads routinely drift more than that. new setting on settings → metadata → post-processing: "duration tolerance (seconds)". `0 = auto` (preserves the existing 3s/5s defaults). raise it to 10 / 15 / 20 if your library has a lot of drift-prone material. capped at 60s — past that the check is effectively off. applies to ALL matched downloads (soulseek / tidal / qobuz / hifi / youtube / deezer-direct) since they all flow through the same post-process integrity check. logic lifted to a pure helper `core/imports/file_integrity.py:resolve_duration_tolerance` that coerces the config value (none / empty / 0 / negative / unparseable / above-cap) to either a float override or `None` for the auto-scaled default. 12 tests pin every input shape.', page: 'settings' }, { title: 'Soulseek Downloads: Multi-Artist Tags Now Get Written Properly', desc: 'discord report: tracks downloaded via soulseek were getting tagged with primary artist only (no collab artists), while the same track downloaded via deezer tagged everyone correctly. trace: the soulseek matched-download context constructed `original_search_result` with `artist` (singular string) but no `artists` (list), even though the full multi-artist list lived on `track_info` (the matched spotify track object). `core/metadata/source.py:extract_source_metadata` only read `original_search.artists`, so soulseek path always fell through to the single-artist branch. fix: lifted artist resolution into a pure helper `core/metadata/artist_resolution.py:resolve_track_artists` that walks `original_search.artists` → `track_info.artists` → `artist_dict.name` fallback chain. handles all three list-item shapes (spotify-style dicts, bare strings, anything else stringified). 13 tests pin the resolution order, fallback chain, mixed-shape normalization, whitespace stripping, empty/none handling. composes with the existing deezer per-track upgrade (still fires when single-artist + track_id available) and feat_in_title / artist_separator settings (still drive the joined ARTIST string downstream).', page: 'downloads' }, { title: 'Download Missing Modal: Tracklist Got A Polish Pass', desc: 'visual tune-up only — column layout untouched. hairline row dividers, accent gradient + edge bar on hover, monospace track numbers (glow accent on row hover), monospace tabular duration. status text in both library-match + download-status columns picks up a leading colored dot with a soft halo (green found / amber missing / blue checking / orange downloading / red failed) and pulses while in-flight. artist column centered. soft scrollbar.', page: 'downloads' }, { title: 'Search Source Picker: Fix Default Always Sticking To Spotify', desc: 'enhanced search + global search source picker always defaulted to spotify even when the user\'s primary metadata source was deezer / itunes / discogs / etc. trace: `shared-helpers.js:createSearchController` reads `/status.metadata_source` to pick the initial active icon, then checks `SOURCE_LABELS[src]` to validate. backend was returning `metadata_source` as a dict (`{source, connected, response_time, ...}` — used elsewhere for connection-state display), so `SOURCE_LABELS[]` was always undefined, the `if` guard never fired, and `state.activeSource` silently stayed at the hardcoded `\'spotify\'` default. fix: read `.source` off the dict (with forward-compat fallback to plain-string in case any older /status response shape predates the dict change). other consumers (core.js sidebar tile, helper.js status checker, search.js display) already used `?.source` correctly — this was the only stale call site.', page: 'search' }, diff --git a/webui/static/settings.js b/webui/static/settings.js index 5364f643..c38a8fda 100644 --- a/webui/static/settings.js +++ b/webui/static/settings.js @@ -970,6 +970,7 @@ async function loadSettingsData() { document.getElementById('prefer-caa-art').checked = settings.metadata_enhancement?.prefer_caa_art === true; document.getElementById('lrclib-enabled').checked = settings.metadata_enhancement?.lrclib_enabled !== false; document.getElementById('replaygain-enabled').checked = settings.post_processing?.replaygain_enabled === true; + document.getElementById('duration-tolerance-seconds').value = settings.post_processing?.duration_tolerance_seconds ?? 0; // Load service master toggles document.getElementById('embed-spotify').checked = settings.spotify?.embed_tags !== false; document.getElementById('embed-itunes').checked = settings.itunes?.embed_tags !== false; @@ -2747,6 +2748,7 @@ async function saveSettings(quiet = false) { }, post_processing: { replaygain_enabled: document.getElementById('replaygain-enabled').checked, + duration_tolerance_seconds: parseFloat(document.getElementById('duration-tolerance-seconds').value) || 0, }, library: { music_paths: collectMusicPaths(),