fix: cast duration_ms to int before :02d format in discovery workers

yt_dlp sometimes returns float `duration_ms` for YouTube tracks. The
discovery workers format the duration with `f"{x // 60000}:{(x % 60000)
// 1000:02d}"` — and `:02d` requires an int. When the duration is a
float, the format string raises:

    Unknown format code 'd' for object of type 'float'

Caught when running YouTube discovery on a real playlist (bbno$ tracks)
— every track failed with status='Error'.

Pre-existing bug, surfaced now because of yt_dlp returning float
durations on this playlist. Fixed at all 8 sites by casting through
`int()` before the `// 60000` and `% 60000` operations:

- core/discovery/youtube.py: 2 sites in run_youtube_discovery_worker
  (cache hit + main result construction).
- web_server.py L29238/L29372: 2 sites in _run_listenbrainz_discovery_worker.
- web_server.py L40112/L40136/L40161/L40178: 4 sites in the YouTube
  retry/pre-discovered results assembly path.

The `if duration_ms` / `if dur` guard already protects against None and 0,
so `int(...)` is only called on truthy numeric values.

Tests: 1 new regression test under tests/discovery/test_discovery_youtube.py
(`test_float_duration_does_not_crash_format`) — passes a float
duration_ms and asserts the worker completes without an error result.
Ruff clean.
pull/406/head
Broque Thomas 4 weeks ago
parent d15777ea99
commit 3c1f614b6e

@ -112,7 +112,7 @@ def run_youtube_discovery_worker(url_hash, deps: YoutubeDiscoveryDeps):
'spotify_track': cached_match.get('name', ''),
'spotify_artist': deps.extract_artist_name(cached_match.get('artists', [''])[0]) if cached_match.get('artists') else '',
'spotify_album': cached_match.get('album', {}).get('name', '') if isinstance(cached_match.get('album'), dict) else cached_match.get('album', ''),
'duration': f"{track['duration_ms'] // 60000}:{(track['duration_ms'] % 60000) // 1000:02d}" if track['duration_ms'] else '0:00',
'duration': f"{int(track['duration_ms']) // 60000}:{(int(track['duration_ms']) % 60000) // 1000:02d}" if track['duration_ms'] else '0:00',
'discovery_source': discovery_source,
'matched_data': cached_match,
'spotify_data': cached_match
@ -247,7 +247,7 @@ def run_youtube_discovery_worker(url_hash, deps: YoutubeDiscoveryDeps):
'spotify_track': matched_track.name if matched_track else '',
'spotify_artist': deps.extract_artist_name(matched_track.artists[0]) if matched_track else '',
'spotify_album': matched_track.album if matched_track else '',
'duration': f"{track['duration_ms'] // 60000}:{(track['duration_ms'] % 60000) // 1000:02d}" if track['duration_ms'] else '0:00',
'duration': f"{int(track['duration_ms']) // 60000}:{(int(track['duration_ms']) % 60000) // 1000:02d}" if track['duration_ms'] else '0:00',
'discovery_source': discovery_source,
'confidence': best_confidence
}

@ -279,6 +279,22 @@ def test_skip_discovery_flag_skips_track():
# Completion: phase + activity feed
# ---------------------------------------------------------------------------
def test_float_duration_does_not_crash_format():
"""yt_dlp can return float duration_ms — format string must handle it (regression)."""
states = {}
tr = _track(duration_ms=212345.7) # float, not int
_seed_state('hflt', states, tracks=[tr])
deps = _build_deps(states=states)
# Before fix: raised "Unknown format code 'd' for object of type 'float'".
# After fix: int() cast makes it work and produces a clean duration string.
dy.run_youtube_discovery_worker('hflt', deps)
result = states['hflt']['discovery_results'][0]
assert result['status'] != 'Error' # didn't crash mid-loop
assert ':' in result['duration'] # duration string formatted
def test_completion_marks_phase_discovered():
"""All tracks processed → phase='discovered', status='complete', progress=100."""
states = {}

@ -29235,7 +29235,7 @@ def _run_listenbrainz_discovery_worker(state_key):
'spotify_track': cached_match.get('name', ''),
'spotify_artist': _extract_artist_name(cached_match.get('artists', [''])[0]) if cached_match.get('artists') else '',
'spotify_album': cached_match.get('album', {}).get('name', '') if isinstance(cached_match.get('album'), dict) else cached_match.get('album', ''),
'duration': f"{duration_ms // 60000}:{(duration_ms % 60000) // 1000:02d}" if duration_ms else '0:00',
'duration': f"{int(duration_ms) // 60000}:{(int(duration_ms) % 60000) // 1000:02d}" if duration_ms else '0:00',
'discovery_source': discovery_source,
'matched_data': cached_match,
'spotify_data': cached_match
@ -29369,7 +29369,7 @@ def _run_listenbrainz_discovery_worker(state_key):
'spotify_track': matched_track.name if matched_track else '',
'spotify_artist': _extract_artist_name(matched_track.artists[0]) if matched_track else '',
'spotify_album': matched_track.album if matched_track else '',
'duration': f"{duration_ms // 60000}:{(duration_ms % 60000) // 1000:02d}" if duration_ms else '0:00',
'duration': f"{int(duration_ms) // 60000}:{(int(duration_ms) % 60000) // 1000:02d}" if duration_ms else '0:00',
'discovery_source': discovery_source,
'confidence': best_confidence
}
@ -40109,7 +40109,7 @@ def prepare_mirrored_discovery(playlist_id):
'spotify_track': '',
'spotify_artist': '',
'spotify_album': '',
'duration': f"{dur // 60000}:{(dur % 60000) // 1000:02d}" if dur else '0:00',
'duration': f"{int(dur) // 60000}:{(int(dur) % 60000) // 1000:02d}" if dur else '0:00',
'confidence': 0,
})
continue
@ -40133,7 +40133,7 @@ def prepare_mirrored_discovery(playlist_id):
'spotify_track': matched.get('name', ''),
'spotify_artist': artist_str,
'spotify_album': album_str,
'duration': f"{dur // 60000}:{(dur % 60000) // 1000:02d}" if dur else '0:00',
'duration': f"{int(dur) // 60000}:{(int(dur) % 60000) // 1000:02d}" if dur else '0:00',
'discovery_source': extra.get('provider', 'spotify'),
'confidence': extra.get('confidence', 0),
'matched_data': matched,
@ -40158,7 +40158,7 @@ def prepare_mirrored_discovery(playlist_id):
'spotify_track': '',
'spotify_artist': '',
'spotify_album': '',
'duration': f"{dur // 60000}:{(dur % 60000) // 1000:02d}" if dur else '0:00',
'duration': f"{int(dur) // 60000}:{(int(dur) % 60000) // 1000:02d}" if dur else '0:00',
'discovery_source': cached_provider,
'confidence': 0,
})
@ -40175,7 +40175,7 @@ def prepare_mirrored_discovery(playlist_id):
'spotify_track': '',
'spotify_artist': '',
'spotify_album': '',
'duration': f"{dur // 60000}:{(dur % 60000) // 1000:02d}" if dur else '0:00',
'duration': f"{int(dur) // 60000}:{(int(dur) % 60000) // 1000:02d}" if dur else '0:00',
'confidence': 0,
})

Loading…
Cancel
Save