mirror of https://github.com/Nezreka/SoulSync.git
User report: every downloaded track in an album came out with
``replaygain_track_gain: +52.00 dB`` regardless of actual loudness.
Root cause: the parser at ``core/replaygain.py:79`` used
``re.search('I:\s+...')`` which returns the FIRST match. ffmpeg's
ebur128 filter emits ``I:`` per measurement window (running partial
integrated loudness) AND in a final Summary block. The first
per-window reading is at t=0.5s — almost always ~-70 LUFS because
nearly every track starts with silence / encoder padding. So:
gain = RG2_reference - lufs = -18 - (-70) = +52.00 dB
…on EVERY track. Same regex pattern, same first per-window match,
same +52 dB written to every file's REPLAYGAIN_TRACK_GAIN tag.
Verified by running ffmpeg ebur128 against a real generated FLAC
and inspecting the stderr output — first per-window line at t=0.5s
shows ``I: -70.0 LUFS`` (silent intro), and the Summary block at
the end shows the real integrated value (e.g. ``I: -27.8 LUFS``
for the test sine wave). Old code captured the -70.0 reading.
Fix: anchor LUFS parsing to the ``Summary:`` block via
``stderr.rfind('Summary:')``. The Summary block is always emitted
last and contains the authoritative final integrated loudness.
Peak parsing already worked correctly (per-window output uses
``TPK:``/``FTPK:`` labels; only the Summary uses ``Peak:``), but
applied the same Summary anchor for consistency.
Defensive fallback: if no Summary block is present (truncated
output / unusual ffmpeg version), use the LAST per-window reading
instead of the first. Still better than the buggy first-window
behavior.
Smoke verified end-to-end: a freshly-generated FLAC of a -24 dBFS
sine wave now reports LUFS=-27.80, gain=+9.80 dB (correct, was
+52.00 before fix).
Tests: ``tests/test_replaygain_summary_parse.py`` — 7 cases pinning
the parser behavior with realistic ffmpeg ebur128 stderr samples:
- Summary value parsed correctly even when first per-window is -70
- Resulting gain is realistic (NOT +52)
- Two tracks with same first per-window but different summaries get
different LUFS (regression assertion for "all tracks same gain")
- Per-window reading higher than Summary doesn't leak through
- Fallback to last per-window when Summary absent
- Clean RuntimeError raised when no LUFS values anywhere
- Peak still correctly anchored to Summary
Verified: full suite 1800 pass (7 new), ruff clean.
WHATS_NEW entry under '2.4.2' dev cycle.
pull/486/head
parent
fbf4bad47a
commit
776d195f71
@ -0,0 +1,198 @@
|
||||
"""Pin the ReplayGain analysis fix.
|
||||
|
||||
User report: every track in a downloaded album got the same
|
||||
``replaygain_track_gain`` of ``+52.00 dB`` after post-processing.
|
||||
Smoking gun: ``-18 (RG2 reference) - (-70.0) = +52.00``. Every track's
|
||||
first ebur128 measurement window reads ~-70 LUFS because the first
|
||||
window covers the silent intro / encoder padding.
|
||||
|
||||
The old code used ``re.search('I:\\s+...')`` which returns the FIRST
|
||||
match — capturing that initial -70 LUFS reading instead of the final
|
||||
integrated value from the Summary block.
|
||||
|
||||
These tests use representative ffmpeg ebur128 output (per-window
|
||||
progress + final Summary block) to pin: parser anchors to the
|
||||
Summary, ignores per-window partials, and falls back gracefully when
|
||||
Summary is absent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from core.replaygain import analyze_track
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fabricated ebur128 stderr samples
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_REAL_EBUR128_STDERR = """
|
||||
[Parsed_ebur128_0 @ 0x000001] Summary:
|
||||
[Parsed_ebur128_0 @ 0x000001] t: 0.500000 TARGET:-23 LUFS M: -70.0 S:-70.0 I: -70.0 LUFS LRA: 0.0 LU FTPK: -70.0 dBFS TPK: -70.0 dBFS
|
||||
[Parsed_ebur128_0 @ 0x000001] t: 1.000000 TARGET:-23 LUFS M: -50.0 S:-60.0 I: -60.0 LUFS LRA: 0.0 LU FTPK: -50.0 dBFS TPK: -50.0 dBFS
|
||||
[Parsed_ebur128_0 @ 0x000001] t: 1.500000 TARGET:-23 LUFS M: -20.0 S:-30.0 I: -25.0 LUFS LRA: 0.0 LU FTPK: -2.5 dBFS TPK: -2.5 dBFS
|
||||
[Parsed_ebur128_0 @ 0x000001] t: 2.000000 TARGET:-23 LUFS M: -18.0 S:-20.0 I: -14.5 LUFS LRA: 1.5 LU FTPK: -0.4 dBFS TPK: -0.4 dBFS
|
||||
[Parsed_ebur128_0 @ 0x000001] Summary:
|
||||
|
||||
Integrated loudness:
|
||||
I: -14.3 LUFS
|
||||
Threshold: -24.3 LUFS
|
||||
|
||||
Loudness range:
|
||||
LRA: 3.2 LU
|
||||
Threshold: -34.3 LUFS
|
||||
LRA low: -16.5 LUFS
|
||||
LRA high: -13.3 LUFS
|
||||
|
||||
True peak:
|
||||
Peak: -0.4 dBFS
|
||||
[out#0/null @ 0x000002] video:0KiB audio:172KiB
|
||||
"""
|
||||
|
||||
|
||||
def _stub_ffmpeg(stderr_output: str):
|
||||
"""Patch subprocess.run to return a fake ffmpeg result with the
|
||||
given stderr."""
|
||||
class _FakeResult:
|
||||
def __init__(self):
|
||||
self.stderr = stderr_output
|
||||
self.returncode = 0
|
||||
return patch.object(subprocess, 'run', return_value=_FakeResult())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Headline regression: don't grab the first per-window reading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_parses_summary_lufs_not_first_per_window_reading():
|
||||
"""The per-window stream contains 'I: -70.0 LUFS' (silent intro)
|
||||
BEFORE the Summary block's 'I: -14.3 LUFS'. Parser must return
|
||||
-14.3 (summary), NOT -70.0 (first per-window).
|
||||
|
||||
This is the exact bug from the user's +52.00 dB report:
|
||||
-18 RG2 reference - (-70.0) = +52.00 was the symptom."""
|
||||
with _stub_ffmpeg(_REAL_EBUR128_STDERR):
|
||||
lufs, peak = analyze_track('/fake/path.flac')
|
||||
|
||||
assert lufs == pytest.approx(-14.3, abs=0.01)
|
||||
assert peak == pytest.approx(-0.4, abs=0.01)
|
||||
|
||||
|
||||
def test_resulting_gain_is_realistic_not_plus_52():
|
||||
"""Computed gain must be a normal real-world value (a few dB
|
||||
range), NOT the symptomatic +52.00 dB the bug produced."""
|
||||
with _stub_ffmpeg(_REAL_EBUR128_STDERR):
|
||||
lufs, _peak = analyze_track('/fake/path.flac')
|
||||
gain = -18.0 - lufs # RG2 reference
|
||||
assert -10.0 < gain < 10.0, f"Unrealistic gain {gain:+.2f} dB — bug regression"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Different per-track values stay different
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_stderr(per_window_lufs: list[float], summary_lufs: float, summary_peak: float) -> str:
|
||||
"""Build an ebur128 stderr blob with controllable per-window and
|
||||
summary values. Lets each test verify the summary is what gets
|
||||
used regardless of what's in the per-window stream."""
|
||||
per_window = '\n'.join(
|
||||
f"[Parsed_ebur128_0 @ 0x1] t: {(i + 1) * 0.5:.6f} TARGET:-23 LUFS "
|
||||
f"M: {lufs:.1f} S:{lufs:.1f} I: {lufs:.1f} LUFS LRA: 0.0 LU "
|
||||
f"FTPK: {lufs / 2:.1f} dBFS TPK: {lufs / 2:.1f} dBFS"
|
||||
for i, lufs in enumerate(per_window_lufs)
|
||||
)
|
||||
return f"""{per_window}
|
||||
[Parsed_ebur128_0 @ 0x1] Summary:
|
||||
|
||||
Integrated loudness:
|
||||
I: {summary_lufs:.1f} LUFS
|
||||
Threshold: -24.0 LUFS
|
||||
|
||||
Loudness range:
|
||||
LRA: 3.2 LU
|
||||
|
||||
True peak:
|
||||
Peak: {summary_peak:+.1f} dBFS
|
||||
"""
|
||||
|
||||
|
||||
def test_two_tracks_with_different_summaries_get_different_lufs():
|
||||
"""Two simulated tracks with the SAME first per-window value (-70)
|
||||
but DIFFERENT summary integrated loudness values. Old buggy parser
|
||||
would report -70 for both. Fixed parser correctly reports the
|
||||
distinct summary values."""
|
||||
track_a_stderr = _make_stderr([-70.0, -50.0, -20.0], summary_lufs=-14.3, summary_peak=-0.4)
|
||||
track_b_stderr = _make_stderr([-70.0, -45.0, -10.0], summary_lufs=-7.8, summary_peak=-1.2)
|
||||
|
||||
with _stub_ffmpeg(track_a_stderr):
|
||||
lufs_a, _ = analyze_track('/fake/a.flac')
|
||||
with _stub_ffmpeg(track_b_stderr):
|
||||
lufs_b, _ = analyze_track('/fake/b.flac')
|
||||
|
||||
assert lufs_a != lufs_b
|
||||
assert lufs_a == pytest.approx(-14.3, abs=0.01)
|
||||
assert lufs_b == pytest.approx(-7.8, abs=0.01)
|
||||
|
||||
|
||||
def test_per_window_lufs_with_higher_value_doesnt_leak_into_summary():
|
||||
"""If per-window readings include a value HIGHER than the summary
|
||||
(a transient loud window), the parser must still return the
|
||||
summary value, not the loudest per-window."""
|
||||
stderr = _make_stderr([-70.0, -5.0, -3.0], summary_lufs=-12.0, summary_peak=-0.5)
|
||||
with _stub_ffmpeg(stderr):
|
||||
lufs, _ = analyze_track('/fake/loud.flac')
|
||||
assert lufs == pytest.approx(-12.0, abs=0.01)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Defensive fallback when Summary block is absent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_falls_back_to_last_per_window_when_no_summary():
|
||||
"""Some ffmpeg versions or truncated outputs may lack a Summary
|
||||
block. Defensive fallback uses the LAST per-window reading (still
|
||||
closer to the final integrated value than the first)."""
|
||||
stderr = """
|
||||
[Parsed_ebur128_0 @ 0x1] t: 0.5 I: -70.0 LUFS LRA: 0.0 LU
|
||||
[Parsed_ebur128_0 @ 0x1] t: 1.0 I: -25.0 LUFS LRA: 0.0 LU
|
||||
[Parsed_ebur128_0 @ 0x1] t: 1.5 I: -14.5 LUFS LRA: 1.5 LU
|
||||
""".strip()
|
||||
|
||||
with _stub_ffmpeg(stderr):
|
||||
lufs, _peak = analyze_track('/fake/no_summary.flac')
|
||||
|
||||
# Last per-window reading, NOT the first
|
||||
assert lufs == pytest.approx(-14.5, abs=0.01)
|
||||
|
||||
|
||||
def test_raises_when_no_lufs_anywhere():
|
||||
"""If ffmpeg output contains no LUFS values at all (parse failure
|
||||
/ wrong format), surface a clear RuntimeError so the caller can
|
||||
decide whether to skip RG analysis."""
|
||||
stderr = "ffmpeg: garbled output, no LUFS data anywhere\n"
|
||||
with _stub_ffmpeg(stderr):
|
||||
with pytest.raises(RuntimeError, match='Could not parse'):
|
||||
analyze_track('/fake/garbage.flac')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Peak parsing — ensure it stays anchored to summary too
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_peak_uses_summary_value_not_per_window_max():
|
||||
"""Per-window output uses 'TPK:'/'FTPK:' labels; only the summary
|
||||
uses 'Peak:'. Pin that the parser only catches the summary peak
|
||||
even when per-window TPK values would be larger."""
|
||||
stderr = _make_stderr([-70.0, -45.0, -10.0], summary_lufs=-12.0, summary_peak=-0.4)
|
||||
with _stub_ffmpeg(stderr):
|
||||
_lufs, peak = analyze_track('/fake/peak.flac')
|
||||
assert peak == pytest.approx(-0.4, abs=0.01)
|
||||
Loading…
Reference in new issue