Enforce duration tolerance for strict sources

Add duration tolerance logic and pre-download rejection for structured sources (tidal, qobuz, hifi, deezer_dl, amazon) when candidate duration deviates beyond allowed tolerance. Introduces helper functions _duration_tolerance_seconds and _duration_mismatch_exceeds_integrity_tolerance and uses resolve_duration_tolerance from core.imports.file_integrity. Log and skip candidates that would fail post-processing integrity checks to avoid wasted downloads. Update tests to include matching engine stub and new cases covering rejection and acceptance based on duration tolerance; also adjust imports and test fixtures.
pull/666/head
Broque Thomas 6 days ago
parent 136d665c8a
commit 2fc08e199e

@ -9,6 +9,7 @@ import logging
import re
from config.settings import config_manager
from core.imports.file_integrity import resolve_duration_tolerance
logger = logging.getLogger(__name__)
@ -61,6 +62,24 @@ def filter_soundcloud_previews(results, expected_track):
return [r for r in results if not _is_preview(r)]
def _duration_tolerance_seconds(expected_duration_ms):
override = resolve_duration_tolerance(
config_manager.get('post_processing.duration_tolerance_seconds', 0)
)
if override is not None:
return override
expected_seconds = expected_duration_ms / 1000.0
return 5.0 if expected_seconds > 600.0 else 3.0
def _duration_mismatch_exceeds_integrity_tolerance(expected_duration_ms, candidate_duration_ms):
if not expected_duration_ms or not candidate_duration_ms:
return False
tolerance = _duration_tolerance_seconds(expected_duration_ms)
drift = abs((candidate_duration_ms / 1000.0) - (expected_duration_ms / 1000.0))
return drift > tolerance
def get_valid_candidates(results, spotify_track, query):
"""
This function is a direct port from sync.py. It scores and filters
@ -98,7 +117,21 @@ def get_valid_candidates(results, spotify_track, query):
expected_is_version = any(kw in expected_title_lower for kw in _version_keywords)
scored = []
_strict_duration_sources = {'tidal', 'qobuz', 'hifi', 'deezer_dl', 'amazon'}
for r in results:
if (
r.username in _strict_duration_sources
and _duration_mismatch_exceeds_integrity_tolerance(expected_duration, r.duration or 0)
):
logger.info(
"[%s] Rejecting candidate due to duration mismatch before download: "
"expected %.1fs, candidate %.1fs",
source_label,
expected_duration / 1000.0,
(r.duration or 0) / 1000.0,
)
continue
# Score using matching engine's generic scorer (same weights as Soulseek)
confidence, match_type = matching_engine.score_track_match(
source_title=expected_title,

@ -11,19 +11,32 @@ from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
from core.downloads.validation import filter_soundcloud_previews
from core.downloads import validation
from core.downloads.validation import filter_soundcloud_previews, get_valid_candidates
@dataclass
class _Track:
duration_ms: int
name: str = 'Song'
artists: tuple[str, ...] = ('Artist',)
@dataclass
class _Candidate:
username: str
duration: Optional[int] # milliseconds
title: str = ''
title: str = 'Song'
artist: str = 'Artist'
filename: str = 'candidate'
class _MatchingEngine:
def score_track_match(self, **kwargs):
return 0.99, 'core_title_match'
def normalize_string(self, text):
return (text or '').lower()
def test_drops_soundcloud_30s_preview_when_expected_long():
@ -90,3 +103,23 @@ def test_keeps_soundcloud_candidate_at_threshold():
# 110s passes both checks: > 35s AND > 100s (half of 200s)
cand = _Candidate(username='soundcloud', duration=110_000)
assert filter_soundcloud_previews([cand], expected) == [cand]
def test_rejects_tidal_candidate_that_would_fail_integrity_duration(monkeypatch):
"""Structured sources should not download candidates that post-processing
will immediately quarantine for the same duration mismatch."""
monkeypatch.setattr(validation, 'matching_engine', _MatchingEngine())
expected = _Track(duration_ms=338_000)
wrong_tidal = _Candidate(username='tidal', duration=30_000)
assert get_valid_candidates([wrong_tidal], expected, 'Artist Song') == []
def test_keeps_tidal_candidate_inside_integrity_duration_tolerance(monkeypatch):
monkeypatch.setattr(validation, 'matching_engine', _MatchingEngine())
expected = _Track(duration_ms=338_000)
tidal = _Candidate(username='tidal', duration=340_000)
result = get_valid_candidates([tidal], expected, 'Artist Song')
assert result == [tidal]

Loading…
Cancel
Save