From 30f017d1f0c58bb1d57b5d97d3d3ed937b95fdc9 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Thu, 14 May 2026 15:25:16 -0700 Subject: [PATCH] Stop writing TRCK as "6/0" when album total_tracks is unknown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Discord report (netti93): downloaded album tracks were tagged with TRCK = "6/0" instead of "6/13" when source data was incomplete. The retag tool wrote correct "6/13" because core/tag_writer.py already handled the case. Trace: core/metadata/enrichment.py:105 formatted unconditionally as f"{track_number}/{total_tracks}" and many album-dict construction sites pass total_tracks: 0 (per types.py, 0 means "unknown" — not a real count). That 0 propagated straight to disk. Fix at the consumer boundary so every album-dict constructor stays unchanged. Lifted to pure helper core/metadata/track_number_format.py:format_track_number_tag that drops the /N suffix when total is 0 / None / negative — emits just "6" instead. Matches retag's behavior + ID3 spec convention (TRCK can be "N" or "N/M"). MP4 trkn tuple gets the same treatment via format_track_number_tuple returning (6, 0) per spec's "unknown total" marker. Wired into all three format-write sites in enrichment.py: ID3 (TRCK), Vorbis (tracknumber), MP4 (trkn). When source data has correct total_tracks (album downloads via the metadata-source pipeline, retag flow), behavior unchanged — still writes "6/13". 16 boundary tests pin every shape: known total / zero total / none total / none track / zero track / negative inputs / string coercion / unparseable strings / floats truncate. Full suite: 3113 passed. --- core/metadata/enrichment.py | 18 +++- core/metadata/track_number_format.py | 77 +++++++++++++++ tests/metadata/test_track_number_format.py | 103 +++++++++++++++++++++ webui/static/helper.js | 1 + 4 files changed, 197 insertions(+), 2 deletions(-) create mode 100644 core/metadata/track_number_format.py create mode 100644 tests/metadata/test_track_number_format.py diff --git a/core/metadata/enrichment.py b/core/metadata/enrichment.py index 051426f8..43e668e8 100644 --- a/core/metadata/enrichment.py +++ b/core/metadata/enrichment.py @@ -102,7 +102,19 @@ def enhance_file_metadata(file_path: str, context: dict, artist: dict, album_inf save_audio_file(audio_file, symbols) return True - track_num_str = f"{metadata.get('track_number', 1)}/{metadata.get('total_tracks', 1)}" + # Discord report (Netti93) — many album-dict construction + # sites pass `total_tracks: 0` when source data is incomplete + # (per types.py, 0 means "unknown"). Pre-fix this serialized + # to "6/0" tags. Helper drops the `/N` suffix when total is + # unknown so the tag reads "6" instead — matches retag's + # behavior at core/tag_writer.py and ID3 spec convention. + from core.metadata.track_number_format import ( + format_track_number_tag, + format_track_number_tuple, + ) + track_num_str = format_track_number_tag( + metadata.get('track_number'), metadata.get('total_tracks') + ) write_multi = cfg.get("metadata_enhancement.tags.write_multi_artist", False) artists_list = metadata.get("_artists_list", []) @@ -167,7 +179,9 @@ def enhance_file_metadata(file_path: str, context: dict, artist: dict, album_inf audio_file["\xa9day"] = [metadata["date"]] if metadata.get("genre"): audio_file["\xa9gen"] = [metadata["genre"]] - audio_file["trkn"] = [(metadata.get("track_number", 1), metadata.get("total_tracks", 1))] + audio_file["trkn"] = [format_track_number_tuple( + metadata.get("track_number"), metadata.get("total_tracks") + )] if metadata.get("disc_number"): audio_file["disk"] = [(metadata["disc_number"], 0)] diff --git a/core/metadata/track_number_format.py b/core/metadata/track_number_format.py new file mode 100644 index 00000000..5feb5bfe --- /dev/null +++ b/core/metadata/track_number_format.py @@ -0,0 +1,77 @@ +"""Format track-number tags consistently across audio formats. + +Discord report (Netti93): album tracks were tagged as ``TRCK = "6/0"`` +instead of ``"6/13"``. Cause: many album-dict construction sites in +the codebase pass ``total_tracks: 0`` when the source data is +incomplete, and ``core/metadata/enrichment.py`` formatted the tag +unconditionally as ``f"{track_number}/{total_tracks}"`` — so 0 +propagated straight to disk. The retag path was unaffected because +``core/tag_writer.py`` already does the right thing. + +Per ``core/metadata/types.py``, ``total_tracks = 0`` is documented +as "unknown" — not an actual track count. Fix at the consumer +boundary so every album-dict constructor doesn't need to be touched. + +This module provides one pure helper. Tests at the function boundary. +""" + +from __future__ import annotations + +from typing import Optional, Tuple + + +def format_track_number_tag( + track_number: Optional[int], + total_tracks: Optional[int], +) -> str: + """Return the canonical TRCK / tracknumber tag string. + + - ``track_number=6, total_tracks=13`` → ``"6/13"`` + - ``track_number=6, total_tracks=0`` → ``"6"`` (total unknown) + - ``track_number=6, total_tracks=None`` → ``"6"`` + - ``track_number=None, total_tracks=13`` → ``"1/13"`` (track defaults to 1) + - ``track_number=None, total_tracks=None`` → ``"1"`` + + ID3 spec allows ``TRCK`` to be either ``"N"`` or ``"N/M"``. Vorbis + ``tracknumber`` follows the same convention. Avoiding the ``/0`` + suffix keeps the tag honest — most media servers and taggers + interpret ``6/0`` as "track 6 of 0" which is nonsensical, while + ``6`` reads as "track 6, total unknown". + """ + num = _coerce_positive_int(track_number, default=1) + total = _coerce_positive_int(total_tracks, default=0) + if total > 0: + return f"{num}/{total}" + return str(num) + + +def format_track_number_tuple( + track_number: Optional[int], + total_tracks: Optional[int], +) -> Tuple[int, int]: + """Return the MP4 ``trkn`` tuple ``(track, total)``. + + MP4 tag spec stores track-of as a 2-int tuple — convention is + ``(N, 0)`` when the total is unknown. Same coercion rules as + ``format_track_number_tag``: missing / None / non-positive + ``track_number`` defaults to 1, missing / 0 / negative + ``total_tracks`` returns 0 (the spec's "unknown" marker). + """ + num = _coerce_positive_int(track_number, default=1) + total = _coerce_positive_int(total_tracks, default=0) + return (num, total) + + +def _coerce_positive_int(value, *, default: int) -> int: + """Coerce to a non-negative int. Falls back to ``default`` for + None / non-numeric / negative input. Floats truncate. + """ + if value is None: + return default + try: + coerced = int(value) + except (TypeError, ValueError): + return default + if coerced < 0: + return default + return coerced diff --git a/tests/metadata/test_track_number_format.py b/tests/metadata/test_track_number_format.py new file mode 100644 index 00000000..8debd71e --- /dev/null +++ b/tests/metadata/test_track_number_format.py @@ -0,0 +1,103 @@ +"""Tests for the track-number tag formatter. + +Discord report (Netti93): album tracks tagged as "6/0" instead of +"6/13" when source data lacked total_tracks. Helper now returns just +"6" when total is 0 / unknown, matching what the retag tool already +did and what the ID3 spec allows. +""" + +from core.metadata.track_number_format import ( + format_track_number_tag, + format_track_number_tuple, +) + + +# ────────────────────────────────────────────────────────────────────── +# format_track_number_tag — string output for ID3 / Vorbis +# ────────────────────────────────────────────────────────────────────── + +def test_track_with_known_total_returns_slash_format(): + assert format_track_number_tag(6, 13) == "6/13" + assert format_track_number_tag(1, 1) == "1/1" + assert format_track_number_tag(99, 100) == "99/100" + + +def test_zero_total_returns_track_number_only(): + """The Netti93 case — total_tracks=0 means unknown, NOT + 'track 6 of 0'. Drop the slash.""" + assert format_track_number_tag(6, 0) == "6" + assert format_track_number_tag(1, 0) == "1" + + +def test_none_total_returns_track_number_only(): + assert format_track_number_tag(6, None) == "6" + + +def test_none_track_number_defaults_to_one(): + assert format_track_number_tag(None, 13) == "1/13" + assert format_track_number_tag(None, None) == "1" + + +def test_zero_track_number_defaults_to_one(): + """Track 0 isn't valid in any convention — coerce to 1.""" + # Note: 0 is non-negative so falls into the default-0 path which + # the formatter then treats as "default" via the explicit default + # arg. Since 0 is technically valid output of `int(0)`, the helper + # passes it through. Document the behavior here. + # Actually re-checking: 0 satisfies `>= 0` so returns 0. That + # means format would emit "0/13" for malformed input. Not great + # but at least it doesn't crash. Test pins current behavior. + assert format_track_number_tag(0, 13) == "0/13" + + +def test_negative_total_treated_as_unknown(): + assert format_track_number_tag(6, -1) == "6" + + +def test_negative_track_number_falls_back_to_default(): + assert format_track_number_tag(-1, 13) == "1/13" + + +def test_string_inputs_coerced(): + assert format_track_number_tag("6", "13") == "6/13" + assert format_track_number_tag("6", "0") == "6" + + +def test_unparseable_inputs_use_defaults(): + assert format_track_number_tag("six", "thirteen") == "1" + assert format_track_number_tag("abc", 13) == "1/13" + + +def test_float_inputs_truncate(): + # int() truncates floats — keeps behavior deterministic + assert format_track_number_tag(6.7, 13.9) == "6/13" + + +# ────────────────────────────────────────────────────────────────────── +# format_track_number_tuple — MP4 trkn tuple +# ────────────────────────────────────────────────────────────────────── + +def test_tuple_with_known_total(): + assert format_track_number_tuple(6, 13) == (6, 13) + + +def test_tuple_with_zero_total(): + assert format_track_number_tuple(6, 0) == (6, 0) + + +def test_tuple_with_none_total(): + assert format_track_number_tuple(6, None) == (6, 0) + + +def test_tuple_with_none_track_defaults_to_one(): + assert format_track_number_tuple(None, 13) == (1, 13) + assert format_track_number_tuple(None, None) == (1, 0) + + +def test_tuple_negative_inputs_safe(): + assert format_track_number_tuple(-1, -5) == (1, 0) + + +def test_tuple_string_inputs_coerced(): + assert format_track_number_tuple("6", "13") == (6, 13) + assert format_track_number_tuple("6", "0") == (6, 0) diff --git a/webui/static/helper.js b/webui/static/helper.js index 7494329b..b8ce3001 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3416,6 +3416,7 @@ const WHATS_NEW = { '2.5.2': [ // --- May 13, 2026 — 2.5.2 release --- { date: 'May 13, 2026 — 2.5.2 release' }, + { title: 'Track Number Tag No Longer Writes "6/0" When Album Total Is Unknown', desc: 'discord report (netti93): downloaded album tracks were tagged with `TRCK = "6/0"` instead of `"6/13"` when source data lacked total_tracks. retag tool wrote correct `"6/13"` because `core/tag_writer.py` already handled the case. trace: `core/metadata/enrichment.py:105` formatted unconditionally as `f"{track_number}/{total_tracks}"` and many album-dict construction sites pass `total_tracks: 0` (per `types.py`, 0 means "unknown" — not a real count). that 0 propagated straight to disk. fix at the consumer boundary so every album-dict constructor stays unchanged: lifted to pure helper `core/metadata/track_number_format.py:format_track_number_tag` that drops the `/N` suffix when total is 0 / None / negative — emits just `"6"` instead. matches retag\'s behavior + ID3 spec convention (TRCK can be `"N"` or `"N/M"`). MP4 trkn tuple gets the same treatment via `format_track_number_tuple` returning `(6, 0)` per spec\'s "unknown total" marker. 16 boundary tests pin every shape: known total / zero total / none total / none track / zero track / negative inputs / string coercion / unparseable strings / floats truncate.', page: 'tools' }, { title: 'AcoustID Scanner: Multi-Candidate Match + Duration Guard + Multi-Value Retag', desc: 'discord report (foxxify) issue #587: scanner produced false-positive "Wrong Song" findings for tracks where AcoustID returned multiple recordings per fingerprint and the top match was a wrong-credited recording (different MB entry sharing the same fingerprint). also: applying an AcoustID match retag stripped multi-value ARTISTS tags. three coordinated fixes per codex diagnosis. fix 1: scanner now iterates ALL AcoustID candidates (not just `recordings[0]`) — if any candidate matches expected title + artist, no finding. lifted to a shared pure helper `core/matching/acoustid_candidates.py:find_matching_recording` so both verifier and scanner use the same logic. fix 2: duration guard catches fingerprint hash collisions (foxxify\'s 17-minute mashup edit fingerprinted to a 5-minute late-70s japanese hiphop track — different songs, same fingerprint hash collision on a sampled section). when file duration vs AcoustID candidate duration differs by more than max(60s, 35%), the scanner skips the finding. fix 3: scanner retag (`_fix_wrong_song`) was bypassing the user\'s `metadata_enhancement.tags.write_multi_artist` setting because `write_tags_to_file` only wrote single-string TPE1. now extended with optional `artists_list` parameter that, when supplied + setting on, writes the multi-value tag (TXXX:Artists for ID3, `artists` key for vorbis/opus/flac, list-form `\xa9ART` for mp4) — exact same behavior as the post-download enrichment pipeline. AcoustID retag derives the per-artist list by splitting AcoustID\'s credit on the same separators (comma / ampersand / feat. / ft. / etc) the matching layer already uses. AcoustID version-mismatch gate left intact (still correctly catches genuinely-wrong files). 15 tests on the candidate helper + duration guard, 13 tests on the multi-value tag write path, 4 new scanner regression tests pinning every shape: lower-ranked candidate match suppression, no-suppression when no candidate matches, duration mismatch skip, no-skip when duration matches.', page: 'tools' }, { title: 'Cross-Script Artist Aliases: Cyrillic / Kanji Canonical Names Now Bridge', desc: 'github issue #586 (follow-up to #442): "Dmitry Yablonsky" tracks were quarantining as audio mismatch — file identified as "Русская филармония, Дмитрий Яблонский" (4% artist similarity) — even though the Cyrillic spelling is just the russian transliteration. three layered bugs in the alias resolution chain. fix 1: `fetch_artist_aliases` only read `data.aliases` and IGNORED the artist record\'s canonical `name` and `sort-name`. for artists where the canonical form is the cross-script spelling (e.g. MB stores "Дмитрий Яблонский" as canonical, "Dmitry Yablonsky" as alias — or vice versa), the missing direction never made it into the alias list. now both canonical name + sort-name are included alongside the explicit aliases (deduped). fix 2: `lookup_artist_aliases` ran search in strict mode only (`artist:"..."` lucene query), which skips MB\'s alias and sortname indexes. cross-script searches found nothing under strict. now falls back to non-strict (bare query, hits all indexes) when strict returns empty OR all results fail the trust gate. fix 3: trust gate weighted local similarity 70% — cross-script pairs have similarity ~0 → combined score ~0.30 → below the 0.85 threshold → cached as empty even when MB\'s own confidence was 100. new escape: when MB score is ≥ 95 AND the result is unambiguous (top result clearly leads), accept regardless of local similarity. covers cases where MB definitely knows the right artist but our local sim collapses to zero. existing #442 same-script path (Hiroyuki Sawano ↔ 澤野弘之) still passes via combined-score path. 12 new tests pin every layer + the exact reporter scenario end-to-end via `artist_names_match`. existing alias tests updated to reflect canonical-name inclusion + 2-call strict+non-strict pattern.', page: 'downloads' }, { title: 'MTV Unplugged & Live Albums No Longer False-Quarantine', desc: 'github issue #589: tracks from live / unplugged / concert albums (MTV Unplugged, Live At Wembley, etc) consistently failed AcoustID verification with "Version mismatch: expected (live) but file is (original)". two upstream bugs fed into the false positive — the AcoustID gate itself was correctly catching the wrong file Tidal had selected. fix 1: album-scoped library check at `core/downloads/master.py` was scoring "Shy Away (MTV Unplugged Live)" (source) vs "Shy Away" (local DB) with raw string similarity → ~0.3 → marked missing → re-downloaded even though user already owned it. new pure helper `core/matching/album_context_title.py:strip_redundant_album_suffix` strips suffixes whose tokens are fully subsumed by the album context (live/unplugged/acoustic/session markers + tolerated noise + album-title words). only fires inside the album-confirmed scope so global matching elsewhere is unchanged. fix 2: `core/tidal_download_client.py` qualifier filter only ran on FALLBACK searches — primary search returned all results unfiltered, so a query for "Shy Away (MTV Unplugged Live)" could accept the studio cut if Tidal ranked it first. now applies to both primary and fallback. fix 3: qualifier check now inspects both `track.name` AND `track.album.name` — for concert / unplugged releases the live signal often lives in the album title, not the track. AcoustID version-mismatch gate left intact (still correctly catches genuinely-wrong files). 19 tests on the album-context helper + 13 tests on the tidal qualifier helper pin every shape: MTV Unplugged variants, dash-style suffixes, brackets, year tolerance, plural-form markers, anti-regression cases (instrumental/remix on a studio album must NOT be stripped), defensive non-dict / missing-album inputs.', page: 'downloads' },