From 3c1f614b6ed54fcd656f15742b2b463b7cabe041 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Tue, 28 Apr 2026 12:07:54 -0700 Subject: [PATCH] fix: cast duration_ms to int before :02d format in discovery workers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit yt_dlp sometimes returns float `duration_ms` for YouTube tracks. The discovery workers format the duration with `f"{x // 60000}:{(x % 60000) // 1000:02d}"` — and `:02d` requires an int. When the duration is a float, the format string raises: Unknown format code 'd' for object of type 'float' Caught when running YouTube discovery on a real playlist (bbno$ tracks) — every track failed with status='Error'. Pre-existing bug, surfaced now because of yt_dlp returning float durations on this playlist. Fixed at all 8 sites by casting through `int()` before the `// 60000` and `% 60000` operations: - core/discovery/youtube.py: 2 sites in run_youtube_discovery_worker (cache hit + main result construction). - web_server.py L29238/L29372: 2 sites in _run_listenbrainz_discovery_worker. - web_server.py L40112/L40136/L40161/L40178: 4 sites in the YouTube retry/pre-discovered results assembly path. The `if duration_ms` / `if dur` guard already protects against None and 0, so `int(...)` is only called on truthy numeric values. Tests: 1 new regression test under tests/discovery/test_discovery_youtube.py (`test_float_duration_does_not_crash_format`) — passes a float duration_ms and asserts the worker completes without an error result. Ruff clean. --- core/discovery/youtube.py | 4 ++-- tests/discovery/test_discovery_youtube.py | 16 ++++++++++++++++ web_server.py | 12 ++++++------ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/core/discovery/youtube.py b/core/discovery/youtube.py index ba4ab268..d9830f38 100644 --- a/core/discovery/youtube.py +++ b/core/discovery/youtube.py @@ -112,7 +112,7 @@ def run_youtube_discovery_worker(url_hash, deps: YoutubeDiscoveryDeps): 'spotify_track': cached_match.get('name', ''), 'spotify_artist': deps.extract_artist_name(cached_match.get('artists', [''])[0]) if cached_match.get('artists') else '', 'spotify_album': cached_match.get('album', {}).get('name', '') if isinstance(cached_match.get('album'), dict) else cached_match.get('album', ''), - 'duration': f"{track['duration_ms'] // 60000}:{(track['duration_ms'] % 60000) // 1000:02d}" if track['duration_ms'] else '0:00', + 'duration': f"{int(track['duration_ms']) // 60000}:{(int(track['duration_ms']) % 60000) // 1000:02d}" if track['duration_ms'] else '0:00', 'discovery_source': discovery_source, 'matched_data': cached_match, 'spotify_data': cached_match @@ -247,7 +247,7 @@ def run_youtube_discovery_worker(url_hash, deps: YoutubeDiscoveryDeps): 'spotify_track': matched_track.name if matched_track else '', 'spotify_artist': deps.extract_artist_name(matched_track.artists[0]) if matched_track else '', 'spotify_album': matched_track.album if matched_track else '', - 'duration': f"{track['duration_ms'] // 60000}:{(track['duration_ms'] % 60000) // 1000:02d}" if track['duration_ms'] else '0:00', + 'duration': f"{int(track['duration_ms']) // 60000}:{(int(track['duration_ms']) % 60000) // 1000:02d}" if track['duration_ms'] else '0:00', 'discovery_source': discovery_source, 'confidence': best_confidence } diff --git a/tests/discovery/test_discovery_youtube.py b/tests/discovery/test_discovery_youtube.py index 36e8ad46..5114f159 100644 --- a/tests/discovery/test_discovery_youtube.py +++ b/tests/discovery/test_discovery_youtube.py @@ -279,6 +279,22 @@ def test_skip_discovery_flag_skips_track(): # Completion: phase + activity feed # --------------------------------------------------------------------------- +def test_float_duration_does_not_crash_format(): + """yt_dlp can return float duration_ms — format string must handle it (regression).""" + states = {} + tr = _track(duration_ms=212345.7) # float, not int + _seed_state('hflt', states, tracks=[tr]) + deps = _build_deps(states=states) + + # Before fix: raised "Unknown format code 'd' for object of type 'float'". + # After fix: int() cast makes it work and produces a clean duration string. + dy.run_youtube_discovery_worker('hflt', deps) + + result = states['hflt']['discovery_results'][0] + assert result['status'] != 'Error' # didn't crash mid-loop + assert ':' in result['duration'] # duration string formatted + + def test_completion_marks_phase_discovered(): """All tracks processed → phase='discovered', status='complete', progress=100.""" states = {} diff --git a/web_server.py b/web_server.py index 8ac0a188..1bfba480 100644 --- a/web_server.py +++ b/web_server.py @@ -29235,7 +29235,7 @@ def _run_listenbrainz_discovery_worker(state_key): 'spotify_track': cached_match.get('name', ''), 'spotify_artist': _extract_artist_name(cached_match.get('artists', [''])[0]) if cached_match.get('artists') else '', 'spotify_album': cached_match.get('album', {}).get('name', '') if isinstance(cached_match.get('album'), dict) else cached_match.get('album', ''), - 'duration': f"{duration_ms // 60000}:{(duration_ms % 60000) // 1000:02d}" if duration_ms else '0:00', + 'duration': f"{int(duration_ms) // 60000}:{(int(duration_ms) % 60000) // 1000:02d}" if duration_ms else '0:00', 'discovery_source': discovery_source, 'matched_data': cached_match, 'spotify_data': cached_match @@ -29369,7 +29369,7 @@ def _run_listenbrainz_discovery_worker(state_key): 'spotify_track': matched_track.name if matched_track else '', 'spotify_artist': _extract_artist_name(matched_track.artists[0]) if matched_track else '', 'spotify_album': matched_track.album if matched_track else '', - 'duration': f"{duration_ms // 60000}:{(duration_ms % 60000) // 1000:02d}" if duration_ms else '0:00', + 'duration': f"{int(duration_ms) // 60000}:{(int(duration_ms) % 60000) // 1000:02d}" if duration_ms else '0:00', 'discovery_source': discovery_source, 'confidence': best_confidence } @@ -40109,7 +40109,7 @@ def prepare_mirrored_discovery(playlist_id): 'spotify_track': '', 'spotify_artist': '', 'spotify_album': '', - 'duration': f"{dur // 60000}:{(dur % 60000) // 1000:02d}" if dur else '0:00', + 'duration': f"{int(dur) // 60000}:{(int(dur) % 60000) // 1000:02d}" if dur else '0:00', 'confidence': 0, }) continue @@ -40133,7 +40133,7 @@ def prepare_mirrored_discovery(playlist_id): 'spotify_track': matched.get('name', ''), 'spotify_artist': artist_str, 'spotify_album': album_str, - 'duration': f"{dur // 60000}:{(dur % 60000) // 1000:02d}" if dur else '0:00', + 'duration': f"{int(dur) // 60000}:{(int(dur) % 60000) // 1000:02d}" if dur else '0:00', 'discovery_source': extra.get('provider', 'spotify'), 'confidence': extra.get('confidence', 0), 'matched_data': matched, @@ -40158,7 +40158,7 @@ def prepare_mirrored_discovery(playlist_id): 'spotify_track': '', 'spotify_artist': '', 'spotify_album': '', - 'duration': f"{dur // 60000}:{(dur % 60000) // 1000:02d}" if dur else '0:00', + 'duration': f"{int(dur) // 60000}:{(int(dur) % 60000) // 1000:02d}" if dur else '0:00', 'discovery_source': cached_provider, 'confidence': 0, }) @@ -40175,7 +40175,7 @@ def prepare_mirrored_discovery(playlist_id): 'spotify_track': '', 'spotify_artist': '', 'spotify_album': '', - 'duration': f"{dur // 60000}:{(dur % 60000) // 1000:02d}" if dur else '0:00', + 'duration': f"{int(dur) // 60000}:{(int(dur) % 60000) // 1000:02d}" if dur else '0:00', 'confidence': 0, })