mirror of https://github.com/Nezreka/SoulSync.git
- new "Audit" button on each download row in the library history modal opens a second modal visualizing the download lifecycle as an interactive horizontal stepper (request → source → match → verify → process → place) with click-to-expand detail cards - hero header with album art + track title + meta line + status pills (source / quality / acoustid result) - three tabs: Lifecycle / Tags / Lyrics - Tags tab reads the audio file live via mutagen at audit-open time via new GET /api/library/history/<id>/file-tags endpoint; file is the single source of truth so background enrichment writes (audiodb / lastfm / genius / replaygain / lyrics fetch) show up too. flat key/value rows stacked vertically (label-above- value) so long MBIDs / URLs / joined genre lists wrap cleanly. source IDs grouped per-service into 2-col sub-card grid. - Lyrics tab renders the full transcript with dimmed timecodes. - post-processing step infers observable changes from source-vs- final state (format conversion, file rename via tag template, folder template). - "Download History" button also added to the Downloads page batch panel header so it's reachable outside the dashboard. - mobile responsive: tabs + stepper scroll horizontally, modal goes full-screen, hero stacks below 480px. 19 helper tests pin the mutagen reader: id3 (TIT2/TPE1/TALB + TXXX + USLT + APIC), vorbis (FLAC dict + _id/_url passthrough), file metadata (format / bitrate / duration), defensive paths (empty / missing file / mutagen returns None / mutagen raises), stringify edge cases (list / tuple / int / frame-with-text / whitespace).pull/573/head
parent
253c7676d6
commit
6ce185491d
@ -0,0 +1,367 @@
|
||||
"""Read embedded tags from an audio file for the Audit Trail UI.
|
||||
|
||||
The Audit Trail modal on the Library History view needs to show
|
||||
exactly what tags are currently embedded in a downloaded file —
|
||||
title/artist/album metadata, MusicBrainz/Spotify/Tidal IDs,
|
||||
ReplayGain values, ISRC, cover-art presence, lyrics, and anything
|
||||
else SoulSync or its background enrichment workers wrote.
|
||||
|
||||
The file is the single source of truth. A persisted snapshot at
|
||||
post-process time would drift the moment a background worker
|
||||
(audiodb, lastfm, genius, deezer enrichment, lyrics fetch) writes
|
||||
more tags, or if the user manually re-tags. So the audit endpoint
|
||||
reads the file live on demand.
|
||||
|
||||
This module is the pure mutagen wrapper. Returns a canonical
|
||||
JSON-serializable dict; never raises (failure modes degrade to an
|
||||
``{'available': False, 'reason': '...'}`` shape so the caller can
|
||||
surface a useful error to the user).
|
||||
|
||||
Frontend renders the canonical shape directly — no per-source
|
||||
mapping at the API layer.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
from core.metadata.common import get_mutagen_symbols
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
|
||||
logger = get_logger("library.file_tags")
|
||||
|
||||
|
||||
# ID3 frame names that carry textual values we want to surface
|
||||
# under the "core" tag group. mutagen exposes ID3 frames keyed by
|
||||
# their 4-letter codes, so map those codes to friendly labels.
|
||||
_ID3_TEXT_FRAMES = {
|
||||
"TIT2": "title",
|
||||
"TPE1": "artist",
|
||||
"TPE2": "album_artist",
|
||||
"TALB": "album",
|
||||
"TDRC": "date",
|
||||
"TCON": "genre",
|
||||
"TRCK": "tracknumber",
|
||||
"TPOS": "discnumber",
|
||||
"TBPM": "bpm",
|
||||
"TMOO": "mood",
|
||||
"TCOP": "copyright",
|
||||
"TPUB": "publisher",
|
||||
"TLAN": "language",
|
||||
}
|
||||
|
||||
|
||||
# TXXX-style ID3 frames carry user-defined keys via their `desc`
|
||||
# attribute. We pick known descriptions out of those.
|
||||
_KNOWN_TXXX_DESCS = {
|
||||
"MusicBrainz Album Id": "musicbrainz_albumid",
|
||||
"MusicBrainz Artist Id": "musicbrainz_artistid",
|
||||
"MusicBrainz Album Artist Id": "musicbrainz_albumartistid",
|
||||
"MusicBrainz Release Group Id": "musicbrainz_releasegroupid",
|
||||
"MusicBrainz Release Track Id": "musicbrainz_releasetrackid",
|
||||
"MusicBrainz Track Id": "musicbrainz_trackid",
|
||||
"Spotify Track Id": "spotify_track_id",
|
||||
"Spotify Artist Id": "spotify_artist_id",
|
||||
"Spotify Album Id": "spotify_album_id",
|
||||
"Tidal Track Id": "tidal_track_id",
|
||||
"Tidal Artist Id": "tidal_artist_id",
|
||||
"Tidal Album Id": "tidal_album_id",
|
||||
"Deezer Track Id": "deezer_track_id",
|
||||
"Deezer Artist Id": "deezer_artist_id",
|
||||
"Deezer Album Id": "deezer_album_id",
|
||||
"AudioDB Track Id": "audiodb_track_id",
|
||||
"AudioDB Artist Id": "audiodb_artist_id",
|
||||
"AudioDB Album Id": "audiodb_album_id",
|
||||
"iTunes Track Id": "itunes_track_id",
|
||||
"iTunes Artist Id": "itunes_artist_id",
|
||||
"iTunes Album Id": "itunes_album_id",
|
||||
"Genius Track Id": "genius_track_id",
|
||||
"Genius Url": "genius_url",
|
||||
"LastFm Url": "lastfm_url",
|
||||
"ASIN": "asin",
|
||||
"BARCODE": "barcode",
|
||||
"CATALOGNUMBER": "catalognumber",
|
||||
"ISRC": "isrc",
|
||||
"ORIGINALDATE": "originaldate",
|
||||
"RELEASECOUNTRY": "releasecountry",
|
||||
"RELEASESTATUS": "releasestatus",
|
||||
"RELEASETYPE": "releasetype",
|
||||
"SCRIPT": "script",
|
||||
"MEDIA": "media",
|
||||
"TOTALDISCS": "totaldiscs",
|
||||
"TOTALTRACKS": "tracktotal",
|
||||
"STYLE": "style",
|
||||
"QUALITY": "quality",
|
||||
"Artists": "artists",
|
||||
"replaygain_track_gain": "replaygain_track_gain",
|
||||
"replaygain_track_peak": "replaygain_track_peak",
|
||||
"replaygain_album_gain": "replaygain_album_gain",
|
||||
"replaygain_album_peak": "replaygain_album_peak",
|
||||
}
|
||||
|
||||
|
||||
# Vorbis (FLAC/OGG/OPUS) tag keys map 1:1 with our friendly names —
|
||||
# Vorbis is the most permissive container, every key is just a
|
||||
# string. mutagen surfaces them as lowercase by convention.
|
||||
# This passlist filters out the noise (encoder, comment, ...) and
|
||||
# whitelists everything we want to show.
|
||||
_VORBIS_ALLOWED_KEYS = frozenset({
|
||||
"title", "artist", "albumartist", "album_artist", "album",
|
||||
"date", "year", "genre", "tracknumber", "discnumber",
|
||||
"tracktotal", "totaltracks", "totaldiscs", "bpm", "mood",
|
||||
"copyright", "publisher", "language", "style", "quality",
|
||||
"isrc", "barcode", "catalognumber", "asin", "script",
|
||||
"media", "originaldate", "releasecountry", "releasestatus",
|
||||
"releasetype", "artists", "composer", "performer",
|
||||
"musicbrainz_albumid", "musicbrainz_artistid",
|
||||
"musicbrainz_albumartistid", "musicbrainz_releasegroupid",
|
||||
"musicbrainz_releasetrackid", "musicbrainz_trackid",
|
||||
"spotify_track_id", "spotify_artist_id", "spotify_album_id",
|
||||
"tidal_track_id", "tidal_artist_id", "tidal_album_id",
|
||||
"deezer_track_id", "deezer_artist_id", "deezer_album_id",
|
||||
"audiodb_track_id", "audiodb_artist_id", "audiodb_album_id",
|
||||
"itunes_track_id", "itunes_artist_id", "itunes_album_id",
|
||||
"genius_track_id", "genius_url", "lastfm_url",
|
||||
"replaygain_track_gain", "replaygain_track_peak",
|
||||
"replaygain_album_gain", "replaygain_album_peak",
|
||||
"lyrics", "unsyncedlyrics",
|
||||
})
|
||||
|
||||
|
||||
def read_embedded_tags(file_path: str) -> Dict[str, Any]:
|
||||
"""Read embedded tags from an audio file via mutagen.
|
||||
|
||||
Returns a dict with one of two shapes:
|
||||
|
||||
- ``{"available": True, "format": "...", "bitrate": ..., "tags": {...}, "has_picture": bool}``
|
||||
on success. ``tags`` is a flat dict of lowercase friendly key →
|
||||
string value (lists joined with ', '). Long fields like
|
||||
``lyrics`` are returned in full — caller decides how to display.
|
||||
|
||||
- ``{"available": False, "reason": "..."}`` when the file doesn't
|
||||
exist, isn't readable, or mutagen can't recognise the format.
|
||||
|
||||
Never raises. Caller surfaces ``reason`` to the user verbatim.
|
||||
"""
|
||||
if not file_path or not isinstance(file_path, str):
|
||||
return {"available": False, "reason": "No file path on this row."}
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
return {
|
||||
"available": False,
|
||||
"reason": f"File no longer exists at: {file_path}",
|
||||
}
|
||||
|
||||
symbols = get_mutagen_symbols()
|
||||
if symbols is None:
|
||||
return {"available": False, "reason": "Mutagen is unavailable."}
|
||||
|
||||
try:
|
||||
audio = symbols.File(file_path)
|
||||
except Exception as exc:
|
||||
logger.debug("Mutagen open failed for %s: %s", file_path, exc)
|
||||
return {
|
||||
"available": False,
|
||||
"reason": f"Could not open file: {exc}",
|
||||
}
|
||||
|
||||
if audio is None:
|
||||
return {
|
||||
"available": False,
|
||||
"reason": "File format not recognised by mutagen.",
|
||||
}
|
||||
|
||||
fmt = type(audio).__name__
|
||||
bitrate = 0
|
||||
duration = 0.0
|
||||
try:
|
||||
if getattr(audio, "info", None) is not None:
|
||||
bitrate = int(getattr(audio.info, "bitrate", 0) or 0)
|
||||
duration = float(getattr(audio.info, "length", 0) or 0)
|
||||
except Exception as exc: # noqa: S110 — optional info, missing is fine
|
||||
logger.debug("audio info read failed: %s", exc)
|
||||
|
||||
has_picture = _detect_picture(audio, symbols)
|
||||
tags = _extract_tags(audio, symbols)
|
||||
|
||||
return {
|
||||
"available": True,
|
||||
"format": fmt,
|
||||
"bitrate": bitrate,
|
||||
"duration": duration,
|
||||
"has_picture": has_picture,
|
||||
"tags": tags,
|
||||
}
|
||||
|
||||
|
||||
def _detect_picture(audio: Any, symbols: Any) -> bool:
|
||||
"""True when the file has at least one embedded cover-art picture."""
|
||||
# FLAC / OGG-Vorbis expose pictures via `audio.pictures` list.
|
||||
pictures = getattr(audio, "pictures", None)
|
||||
if pictures:
|
||||
return True
|
||||
# ID3 stores pictures as APIC frames.
|
||||
tags = getattr(audio, "tags", None)
|
||||
if tags is None:
|
||||
return False
|
||||
try:
|
||||
if hasattr(tags, "getall"):
|
||||
apics = tags.getall("APIC")
|
||||
if apics:
|
||||
return True
|
||||
# MP4 covers under 'covr' key.
|
||||
if "covr" in tags and tags["covr"]:
|
||||
return True
|
||||
# Vorbis embedded base64 picture frame.
|
||||
if "metadata_block_picture" in tags:
|
||||
return True
|
||||
except Exception as exc: # noqa: S110 — optional probe, missing is fine
|
||||
logger.debug("picture detect failed: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
def _extract_tags(audio: Any, symbols: Any) -> Dict[str, str]:
|
||||
"""Flatten the audio file's tag store to a {key: string} dict.
|
||||
|
||||
Handles the three container families we ship: ID3 (MP3),
|
||||
Vorbis-like (FLAC/OGG/OPUS), and MP4. Everything else falls
|
||||
through to a generic key/value dump.
|
||||
"""
|
||||
out: Dict[str, str] = {}
|
||||
tags = getattr(audio, "tags", None)
|
||||
if tags is None:
|
||||
return out
|
||||
|
||||
# ID3 path.
|
||||
if isinstance(tags, symbols.ID3):
|
||||
for code, label in _ID3_TEXT_FRAMES.items():
|
||||
frame = tags.get(code)
|
||||
if frame is not None:
|
||||
val = _stringify(frame)
|
||||
if val:
|
||||
out[label] = val
|
||||
# TXXX user-defined frames (most of our extra IDs / replay
|
||||
# gain / source IDs live here).
|
||||
try:
|
||||
for frame in tags.getall("TXXX"):
|
||||
desc = getattr(frame, "desc", "")
|
||||
if not desc:
|
||||
continue
|
||||
# mutagen's TXXX comparison is case-sensitive; the
|
||||
# dict lookup matches the exact desc string.
|
||||
key = _KNOWN_TXXX_DESCS.get(desc) or desc.lower().replace(" ", "_")
|
||||
val = _stringify(frame)
|
||||
if val:
|
||||
out[key] = val
|
||||
except Exception as exc: # noqa: S110 — optional TXXX walk
|
||||
logger.debug("ID3 TXXX walk failed: %s", exc)
|
||||
# USLT (unsynchronised lyrics).
|
||||
try:
|
||||
for frame in tags.getall("USLT"):
|
||||
val = _stringify(frame)
|
||||
if val:
|
||||
out.setdefault("lyrics", val)
|
||||
except Exception as exc: # noqa: S110 — optional USLT walk
|
||||
logger.debug("ID3 USLT walk failed: %s", exc)
|
||||
return out
|
||||
|
||||
# MP4 path.
|
||||
if isinstance(audio, symbols.MP4):
|
||||
_MP4_MAP = {
|
||||
"\xa9nam": "title",
|
||||
"\xa9ART": "artist",
|
||||
"aART": "album_artist",
|
||||
"\xa9alb": "album",
|
||||
"\xa9day": "date",
|
||||
"\xa9gen": "genre",
|
||||
"trkn": "tracknumber",
|
||||
"disk": "discnumber",
|
||||
"\xa9lyr": "lyrics",
|
||||
"tmpo": "bpm",
|
||||
"cprt": "copyright",
|
||||
}
|
||||
for key, label in _MP4_MAP.items():
|
||||
if key in tags:
|
||||
val = _stringify(tags[key])
|
||||
if val:
|
||||
out[label] = val
|
||||
# Freeform MP4 atoms — prefix ----:com.apple.iTunes:
|
||||
for k in tags.keys():
|
||||
if not isinstance(k, str) or not k.startswith("----"):
|
||||
continue
|
||||
label = k.split(":")[-1].lower()
|
||||
val = _stringify(tags[k])
|
||||
if val:
|
||||
out[label] = val
|
||||
return out
|
||||
|
||||
# Vorbis-like (FLAC, OGG, OPUS): tags acts dict-like, values are
|
||||
# lists of strings.
|
||||
try:
|
||||
for raw_key in tags.keys():
|
||||
if not isinstance(raw_key, str):
|
||||
continue
|
||||
lower = raw_key.lower()
|
||||
if lower not in _VORBIS_ALLOWED_KEYS:
|
||||
# Pass through anything that looks like a known
|
||||
# source/ID-style key even if not in the allowed
|
||||
# set — covers `*_id`, `*_url` shapes we didn't
|
||||
# explicitly list.
|
||||
if not (lower.endswith("_id") or lower.endswith("_url") or lower.startswith("musicbrainz_")):
|
||||
continue
|
||||
val = _stringify(tags[raw_key])
|
||||
if val:
|
||||
out[lower] = val
|
||||
except Exception as exc: # noqa: S110 — optional vorbis walk
|
||||
logger.debug("Vorbis tag walk failed: %s", exc)
|
||||
return out
|
||||
|
||||
|
||||
def _stringify(value: Any) -> str:
|
||||
"""Coerce a mutagen tag value into a human-readable string.
|
||||
|
||||
mutagen returns various shapes depending on the container —
|
||||
bare strings, lists of strings, frame objects with `.text` or
|
||||
`.data` attributes, MP4Cover objects, integer tuples (trkn,
|
||||
disk), etc. Best-effort flatten.
|
||||
"""
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, str):
|
||||
return value.strip()
|
||||
if isinstance(value, (int, float)):
|
||||
return str(value)
|
||||
if isinstance(value, (list, tuple)):
|
||||
parts = []
|
||||
for item in value:
|
||||
if isinstance(item, tuple):
|
||||
# (track_num, total) shape from MP4 trkn / disk.
|
||||
if len(item) >= 1 and item[0]:
|
||||
if len(item) >= 2 and item[1]:
|
||||
parts.append(f"{item[0]}/{item[1]}")
|
||||
else:
|
||||
parts.append(str(item[0]))
|
||||
continue
|
||||
s = _stringify(item)
|
||||
if s:
|
||||
parts.append(s)
|
||||
return ", ".join(parts)
|
||||
# mutagen frame objects: prefer .text, then .data, then str().
|
||||
text = getattr(value, "text", None)
|
||||
if text is not None and text is not value:
|
||||
return _stringify(text)
|
||||
data = getattr(value, "data", None)
|
||||
if isinstance(data, (str, bytes)):
|
||||
try:
|
||||
return data.decode("utf-8", errors="replace").strip() if isinstance(data, bytes) else data.strip()
|
||||
except Exception:
|
||||
return ""
|
||||
try:
|
||||
return str(value).strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
__all__ = ["read_embedded_tags"]
|
||||
@ -0,0 +1,354 @@
|
||||
"""Pin `read_embedded_tags` — pure mutagen reader backing the audit
|
||||
trail's "Embedded Tags" section.
|
||||
|
||||
Tests use mock mutagen objects to verify the extraction logic
|
||||
without needing real audio fixtures checked in. The reader handles
|
||||
three container families:
|
||||
|
||||
- ID3 (MP3): text frames keyed by 4-letter codes + TXXX user-defined
|
||||
frames keyed by `desc`.
|
||||
- Vorbis-like (FLAC, OGG, OPUS): dict-like tags, lowercase keys,
|
||||
list-of-strings values.
|
||||
- MP4: dict-like with weird atom keys including the iTunes
|
||||
``----:com.apple.iTunes:`` freeform atoms.
|
||||
|
||||
Every test pins ONE behavior — easier to debug when one regresses.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Boundary cases — bad inputs, missing files, mutagen returns None
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_returns_unavailable_for_empty_path():
|
||||
from core.library.file_tags import read_embedded_tags
|
||||
result = read_embedded_tags('')
|
||||
assert result['available'] is False
|
||||
assert 'No file path' in result['reason']
|
||||
|
||||
|
||||
def test_returns_unavailable_for_none():
|
||||
from core.library.file_tags import read_embedded_tags
|
||||
result = read_embedded_tags(None) # type: ignore[arg-type]
|
||||
assert result['available'] is False
|
||||
|
||||
|
||||
def test_returns_unavailable_when_file_missing(tmp_path):
|
||||
from core.library.file_tags import read_embedded_tags
|
||||
fake = tmp_path / 'gone.mp3'
|
||||
result = read_embedded_tags(str(fake))
|
||||
assert result['available'] is False
|
||||
assert 'no longer exists' in result['reason']
|
||||
|
||||
|
||||
def test_returns_unavailable_when_mutagen_returns_none(tmp_path):
|
||||
"""File exists but mutagen can't recognise the format — should
|
||||
fall through to a clear `available: false` rather than raising."""
|
||||
real = tmp_path / 'garbage.txt'
|
||||
real.write_bytes(b'not audio')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.File.return_value = None
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
assert result['available'] is False
|
||||
assert 'not recognised' in result['reason']
|
||||
|
||||
|
||||
def test_mutagen_open_exception_swallowed(tmp_path):
|
||||
"""Mutagen raises on a malformed file — caller still gets a
|
||||
clean error dict, no propagated exception."""
|
||||
real = tmp_path / 'malformed.mp3'
|
||||
real.write_bytes(b'not really an mp3')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.File.side_effect = RuntimeError('mutagen blew up')
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
assert result['available'] is False
|
||||
assert 'Could not open file' in result['reason']
|
||||
assert 'mutagen blew up' in result['reason']
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ID3 path (MP3) — TIT2/TPE1/TALB + TXXX user-defined frames
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _build_id3_audio(symbols, frames, txxx_frames=None, pictures=False):
|
||||
"""Helper to build a fake mutagen ID3 audio object.
|
||||
|
||||
`frames` is a dict of {code: text}. `txxx_frames` is a list of
|
||||
(desc, text) tuples for user-defined ID3 frames.
|
||||
"""
|
||||
tags = MagicMock()
|
||||
tags.__class__ = symbols.ID3
|
||||
frame_map = {}
|
||||
for code, text in frames.items():
|
||||
f = SimpleNamespace(text=[text])
|
||||
frame_map[code] = f
|
||||
tags.get.side_effect = lambda code: frame_map.get(code)
|
||||
|
||||
def _getall(code):
|
||||
if code == 'TXXX':
|
||||
return [SimpleNamespace(desc=d, text=[t]) for d, t in (txxx_frames or [])]
|
||||
if code == 'USLT':
|
||||
return []
|
||||
if code == 'APIC':
|
||||
return [object()] if pictures else []
|
||||
return []
|
||||
|
||||
tags.getall.side_effect = _getall
|
||||
audio = MagicMock()
|
||||
audio.tags = tags
|
||||
audio.info = SimpleNamespace(bitrate=320000, length=204.5)
|
||||
type(audio).__name__ = 'MP3'
|
||||
return audio
|
||||
|
||||
|
||||
def test_id3_extracts_core_text_frames(tmp_path):
|
||||
real = tmp_path / 't.mp3'
|
||||
real.write_bytes(b'\x00')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.ID3 = MagicMock # isinstance check uses this
|
||||
audio = _build_id3_audio(symbols, frames={
|
||||
'TIT2': 'Without Me',
|
||||
'TPE1': 'Eminem',
|
||||
'TPE2': 'Eminem',
|
||||
'TALB': 'The Eminem Show',
|
||||
'TDRC': '2002',
|
||||
'TCON': 'Hip-Hop',
|
||||
'TRCK': '10/20',
|
||||
'TPOS': '1',
|
||||
})
|
||||
symbols.MP4 = type('MP4', (), {}) # not an MP4
|
||||
symbols.File.return_value = audio
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
|
||||
assert result['available'] is True
|
||||
assert result['tags']['title'] == 'Without Me'
|
||||
assert result['tags']['artist'] == 'Eminem'
|
||||
assert result['tags']['album_artist'] == 'Eminem'
|
||||
assert result['tags']['album'] == 'The Eminem Show'
|
||||
assert result['tags']['date'] == '2002'
|
||||
assert result['tags']['genre'] == 'Hip-Hop'
|
||||
assert result['tags']['tracknumber'] == '10/20'
|
||||
assert result['tags']['discnumber'] == '1'
|
||||
|
||||
|
||||
def test_id3_extracts_txxx_known_descriptions(tmp_path):
|
||||
"""Source IDs land in TXXX frames keyed by description. Reader
|
||||
maps known descs to friendly snake_case keys."""
|
||||
real = tmp_path / 't.mp3'
|
||||
real.write_bytes(b'\x00')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.ID3 = MagicMock
|
||||
symbols.MP4 = type('MP4', (), {})
|
||||
audio = _build_id3_audio(symbols, frames={'TIT2': 'X'}, txxx_frames=[
|
||||
('Spotify Track Id', 'sp_abc'),
|
||||
('MusicBrainz Release Group Id', 'mb_def'),
|
||||
('replaygain_track_gain', '-9.90 dB'),
|
||||
('replaygain_track_peak', '1.161449'),
|
||||
])
|
||||
symbols.File.return_value = audio
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
|
||||
assert result['tags']['spotify_track_id'] == 'sp_abc'
|
||||
assert result['tags']['musicbrainz_releasegroupid'] == 'mb_def'
|
||||
assert result['tags']['replaygain_track_gain'] == '-9.90 dB'
|
||||
assert result['tags']['replaygain_track_peak'] == '1.161449'
|
||||
|
||||
|
||||
def test_id3_unknown_txxx_desc_falls_back_to_snake_case(tmp_path):
|
||||
real = tmp_path / 't.mp3'
|
||||
real.write_bytes(b'\x00')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.ID3 = MagicMock
|
||||
symbols.MP4 = type('MP4', (), {})
|
||||
audio = _build_id3_audio(symbols, frames={'TIT2': 'X'}, txxx_frames=[
|
||||
('Custom Vendor Field', 'foo'),
|
||||
])
|
||||
symbols.File.return_value = audio
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
# Unknown desc → lowercased + underscored
|
||||
assert result['tags']['custom_vendor_field'] == 'foo'
|
||||
|
||||
|
||||
def test_id3_detects_apic_cover_art(tmp_path):
|
||||
real = tmp_path / 't.mp3'
|
||||
real.write_bytes(b'\x00')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.ID3 = MagicMock
|
||||
symbols.MP4 = type('MP4', (), {})
|
||||
audio = _build_id3_audio(symbols, frames={'TIT2': 'X'}, pictures=True)
|
||||
symbols.File.return_value = audio
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
assert result['has_picture'] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Vorbis-like (FLAC) — dict-style lowercase keys, list values
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_vorbis_passes_through_whitelisted_keys(tmp_path):
|
||||
real = tmp_path / 't.flac'
|
||||
real.write_bytes(b'\x00')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
# Not ID3, not MP4 — falls through to the vorbis branch.
|
||||
symbols.ID3 = type('ID3', (), {})
|
||||
symbols.MP4 = type('MP4', (), {})
|
||||
tags = {
|
||||
'title': ['Teenage Dream'],
|
||||
'artist': ['Katy Perry'],
|
||||
'album': ['Teenage Dream'],
|
||||
'date': ['2010'],
|
||||
'isrc': ['USCA21001255'],
|
||||
'musicbrainz_albumid': ['mb-album-id'],
|
||||
'tidal_track_id': ['14165831'],
|
||||
'unrelated_internal_key': ['skip-me'],
|
||||
}
|
||||
audio = MagicMock()
|
||||
audio.tags = tags
|
||||
audio.info = SimpleNamespace(bitrate=900000, length=180.0)
|
||||
audio.pictures = []
|
||||
type(audio).__name__ = 'FLAC'
|
||||
symbols.File.return_value = audio
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
|
||||
assert result['tags']['title'] == 'Teenage Dream'
|
||||
assert result['tags']['artist'] == 'Katy Perry'
|
||||
assert result['tags']['isrc'] == 'USCA21001255'
|
||||
assert result['tags']['musicbrainz_albumid'] == 'mb-album-id'
|
||||
assert result['tags']['tidal_track_id'] == '14165831'
|
||||
# Non-whitelisted, non-_id/_url keys are dropped.
|
||||
assert 'unrelated_internal_key' not in result['tags']
|
||||
|
||||
|
||||
def test_vorbis_pass_through_for_unknown_id_url_keys(tmp_path):
|
||||
"""Vendor-prefixed `*_id` / `*_url` keys should pass through even
|
||||
if they're not in the explicit whitelist — covers future
|
||||
enrichment workers we haven't anticipated."""
|
||||
real = tmp_path / 't.flac'
|
||||
real.write_bytes(b'\x00')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.ID3 = type('ID3', (), {})
|
||||
symbols.MP4 = type('MP4', (), {})
|
||||
tags = {
|
||||
'title': ['X'],
|
||||
'beatport_track_id': ['bp_xyz'],
|
||||
'songkick_url': ['https://...'],
|
||||
}
|
||||
audio = MagicMock()
|
||||
audio.tags = tags
|
||||
audio.info = SimpleNamespace(bitrate=900000, length=1.0)
|
||||
audio.pictures = []
|
||||
symbols.File.return_value = audio
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
assert result['tags']['beatport_track_id'] == 'bp_xyz'
|
||||
assert result['tags']['songkick_url'] == 'https://...'
|
||||
|
||||
|
||||
def test_vorbis_detects_pictures(tmp_path):
|
||||
real = tmp_path / 't.flac'
|
||||
real.write_bytes(b'\x00')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.ID3 = type('ID3', (), {})
|
||||
symbols.MP4 = type('MP4', (), {})
|
||||
audio = MagicMock()
|
||||
audio.tags = {'title': ['X']}
|
||||
audio.info = SimpleNamespace(bitrate=900000, length=1.0)
|
||||
audio.pictures = [object()] # one embedded image
|
||||
symbols.File.return_value = audio
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
assert result['has_picture'] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Format + bitrate metadata
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_returns_format_and_bitrate(tmp_path):
|
||||
real = tmp_path / 't.mp3'
|
||||
real.write_bytes(b'\x00')
|
||||
from core.library import file_tags as ft
|
||||
with patch.object(ft, 'get_mutagen_symbols') as g:
|
||||
symbols = MagicMock()
|
||||
symbols.ID3 = MagicMock
|
||||
symbols.MP4 = type('MP4', (), {})
|
||||
audio = _build_id3_audio(symbols, frames={'TIT2': 'X'})
|
||||
type(audio).__name__ = 'MP3' # mutagen exposes class name
|
||||
audio.info = SimpleNamespace(bitrate=320000, length=204.5)
|
||||
symbols.File.return_value = audio
|
||||
g.return_value = symbols
|
||||
result = ft.read_embedded_tags(str(real))
|
||||
assert result['format'] == 'MP3'
|
||||
assert result['bitrate'] == 320000
|
||||
assert result['duration'] == pytest.approx(204.5)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stringify defensive cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStringify:
|
||||
def test_list_of_strings_joined(self):
|
||||
from core.library.file_tags import _stringify
|
||||
assert _stringify(['a', 'b', 'c']) == 'a, b, c'
|
||||
|
||||
def test_tuple_pair_joined_with_slash(self):
|
||||
"""MP4 trkn / disk values come as (current, total) tuples."""
|
||||
from core.library.file_tags import _stringify
|
||||
assert _stringify([(10, 20)]) == '10/20'
|
||||
|
||||
def test_int_coerced_to_string(self):
|
||||
from core.library.file_tags import _stringify
|
||||
assert _stringify(42) == '42'
|
||||
|
||||
def test_none_returns_empty(self):
|
||||
from core.library.file_tags import _stringify
|
||||
assert _stringify(None) == ''
|
||||
|
||||
def test_frame_with_text_attribute_unwrapped(self):
|
||||
"""mutagen frames expose `.text` as a list of strings."""
|
||||
from core.library.file_tags import _stringify
|
||||
frame = SimpleNamespace(text=['Title Here'])
|
||||
assert _stringify(frame) == 'Title Here'
|
||||
|
||||
def test_whitespace_stripped(self):
|
||||
from core.library.file_tags import _stringify
|
||||
assert _stringify(' spaced ') == 'spaced'
|
||||
Loading…
Reference in new issue