Merge pull request #492 from Nezreka/refactor/typed-album-consumers

Migrate discography + quality scanner to typed Album path
pull/496/head
BoulderBadgeDad 3 weeks ago committed by GitHub
commit ea741df286
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -32,14 +32,30 @@ import logging
import time
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Callable
from typing import Any, Callable, Dict, Optional
from core.metadata.registry import get_client_for_source, get_primary_source, get_source_priority
from core.metadata.types import Album
from core.wishlist.payloads import ensure_wishlist_track_format
logger = logging.getLogger(__name__)
# Per-source typed converter dispatch — same registry pattern as
# the metadata builders. Quality-scanner result normalization routes
# the embedded ``track.album`` blob through Album.from_<source>_dict()
# when provider is known. Falls back to legacy duck-typed extraction.
_TYPED_ALBUM_CONVERTERS: Dict[str, Callable[[Dict[str, Any]], Album]] = {
'spotify': Album.from_spotify_dict,
'itunes': Album.from_itunes_dict,
'deezer': Album.from_deezer_dict,
'discogs': Album.from_discogs_dict,
'musicbrainz': Album.from_musicbrainz_dict,
'hydrabase': Album.from_hydrabase_dict,
'qobuz': Album.from_qobuz_dict,
}
@dataclass
class QualityScannerDeps:
"""Bundle of cross-cutting deps the quality scanner needs."""
@ -140,7 +156,16 @@ def _normalize_image_entries(image_value: Any) -> list[dict]:
return normalized
def _normalize_track_album(track_item: Any) -> dict:
def _normalize_track_album(track_item: Any, provider: Optional[str] = None) -> dict:
"""Normalize a track's embedded album blob into a flat dict.
When ``provider`` is provided AND maps to a registered typed Album
converter, routes through the typed path to seed canonical fields
on ``album_data`` before legacy fallback chains fill any gaps.
Falls back to legacy duck-typed extraction on unknown provider /
non-dict input / typed converter error same pattern as the
metadata builders.
"""
album = _extract_lookup_value(track_item, 'album', default={})
if isinstance(album, dict):
album_data = dict(album)
@ -152,6 +177,27 @@ def _normalize_track_album(track_item: Any) -> dict:
'release_date': _extract_lookup_value(album, 'release_date', default='') or '',
}
if provider and isinstance(album, dict):
converter = _TYPED_ALBUM_CONVERTERS.get(provider.strip().lower())
if converter is not None:
try:
typed_album = converter(album)
if typed_album.name:
album_data.setdefault('name', typed_album.name)
if typed_album.album_type:
album_data.setdefault('album_type', typed_album.album_type)
if typed_album.total_tracks:
album_data.setdefault('total_tracks', typed_album.total_tracks)
if typed_album.release_date:
album_data.setdefault('release_date', typed_album.release_date)
if typed_album.id:
album_data.setdefault('id', typed_album.id)
except Exception as exc:
logger.debug(
"Typed album converter failed for provider %s in quality "
"scanner normalize, falling back to legacy: %s", provider, exc,
)
album_data.setdefault('name', _extract_lookup_value(track_item, 'album_name', default='Unknown Album') or 'Unknown Album')
album_data.setdefault('album_type', _extract_lookup_value(track_item, 'album_type', default='album') or 'album')
album_data.setdefault('total_tracks', _extract_lookup_value(track_item, 'total_tracks', 'track_count', default=0) or 0)
@ -189,7 +235,7 @@ def _normalize_track_match(track_item: Any, provider: str) -> dict:
'id': _extract_lookup_value(track_item, 'id', 'track_id', default='') or '',
'name': _extract_lookup_value(track_item, 'name', 'title', default='Unknown Track') or 'Unknown Track',
'artists': _normalize_track_artists(track_item),
'album': _normalize_track_album(track_item),
'album': _normalize_track_album(track_item, provider=provider),
'image_url': _extract_lookup_value(track_item, 'image_url', 'album_cover_url', default=None),
'duration_ms': _extract_lookup_value(track_item, 'duration_ms', default=0) or 0,
'track_number': _extract_lookup_value(track_item, 'track_number', default=1) or 1,

@ -2,16 +2,53 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
from typing import Any, Callable, Dict, List, Optional
from core.metadata import registry as metadata_registry
from core.metadata.album_tracks import get_artist_albums_for_source
from core.metadata.lookup import MetadataLookupOptions
from core.metadata.types import Album
from utils.logging_config import get_logger
logger = get_logger("metadata.discography")
# Per-source typed converter dispatch — same registry pattern as
# ``core/metadata/album_tracks.py`` and ``core/imports/resolution.py``.
# Discography release builders dispatch through the typed Album
# converter when the active source is known. Falls back to legacy
# duck-typed extraction below on unknown source / converter error.
_TYPED_ALBUM_CONVERTERS: Dict[str, Callable[[Dict[str, Any]], Album]] = {
'spotify': Album.from_spotify_dict,
'itunes': Album.from_itunes_dict,
'deezer': Album.from_deezer_dict,
'discogs': Album.from_discogs_dict,
'musicbrainz': Album.from_musicbrainz_dict,
'hydrabase': Album.from_hydrabase_dict,
'qobuz': Album.from_qobuz_dict,
}
def _typed_album_for_source(release: Any, source: Optional[str]) -> Optional[Album]:
"""Return a typed Album when source maps to a registered converter
and the converter succeeds. ``None`` means the caller should fall
back to the legacy duck-typed extraction.
"""
if not source or not isinstance(release, dict):
return None
converter = _TYPED_ALBUM_CONVERTERS.get(source.strip().lower())
if converter is None:
return None
try:
return converter(release)
except Exception as exc:
logger.debug(
"Typed album converter failed for source %s in discography "
"build, falling back to legacy: %s", source, exc,
)
return None
def _extract_lookup_value(value: Any, *names: str, default: Any = None) -> Any:
if value is None:
return default
@ -89,7 +126,32 @@ def _pick_best_artist_match(search_results: List[Any], artist_name: str) -> Opti
return search_results[0]
def _build_discography_release_dict(release: Any, artist_id: str) -> Optional[Dict[str, Any]]:
def _build_discography_release_dict(release: Any, artist_id: str,
source: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""Build a normalized discography release dict.
When ``source`` is provided AND maps to a registered typed Album
converter, routes through ``Album.from_<source>_dict()`` and pulls
canonical fields off the typed Album. Falls back to the legacy
duck-typed extraction on unknown source / non-dict input / typed
converter error.
"""
typed_album = _typed_album_for_source(release, source)
if typed_album is not None:
if not typed_album.id:
return None
artist_name = typed_album.artists[0] if typed_album.artists else ''
return {
'id': typed_album.id,
'name': typed_album.name or typed_album.id,
'artist_name': artist_name,
'release_date': typed_album.release_date or None,
'album_type': typed_album.album_type or 'album',
'image_url': typed_album.image_url,
'total_tracks': typed_album.total_tracks or 0,
'external_urls': typed_album.external_urls or {},
}
release_id = _extract_lookup_value(release, 'id', 'album_id', 'release_id')
if not release_id:
return None
@ -308,7 +370,7 @@ def get_artist_discography(
seen_albums = set()
for release in albums or []:
release_data = _build_discography_release_dict(release, artist_id)
release_data = _build_discography_release_dict(release, artist_id, source=active_source)
if not release_data:
continue
@ -341,7 +403,46 @@ def get_artist_discography(
}
def _build_artist_detail_release_card(release: Dict[str, Any]) -> Optional[Dict[str, Any]]:
def _build_artist_detail_release_card(release: Dict[str, Any],
source: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""Build an artist-detail release card.
NOTE on inputs: this function may receive EITHER raw provider
release dicts (when called directly during a fresh discography
lookup) OR pre-built canonical release dicts produced by
``_build_discography_release_dict`` (the more common case via
``get_artist_detail_discography``). Pre-built dicts already carry
canonical keys, so the typed dispatch is a no-op for them and
the legacy duck-typed path also handles them correctly. The typed
dispatch only kicks in when the caller passes a known source AND
the release dict matches a provider's wire shape.
"""
typed_album = _typed_album_for_source(release, source)
if typed_album is not None and typed_album.id:
release_year = None
if typed_album.release_date:
try:
release_year = str(typed_album.release_date)[:4]
except Exception:
release_year = None
card = {
'id': typed_album.id,
'name': typed_album.name or typed_album.id,
'title': typed_album.name or typed_album.id,
'album_type': (typed_album.album_type or 'album').lower(),
'image_url': typed_album.image_url,
'year': release_year,
'track_count': typed_album.total_tracks or 0,
'owned': None,
'track_completion': 'checking',
}
if typed_album.release_date:
card['release_date'] = typed_album.release_date
elif release_year:
card['release_date'] = f"{release_year}-01-01"
return card
release_id = _extract_lookup_value(release, 'id', 'album_id', 'release_id')
if not release_id:
return None

@ -0,0 +1,87 @@
"""Pin the typed-path migration of `_normalize_track_album` in
`core/discovery/quality_scanner.py`.
Quality scanner result normalization now routes the embedded
`track.album` blob through `Album.from_<source>_dict()` when provider
is known. Falls back to legacy duck-typed extraction below.
"""
from __future__ import annotations
from unittest.mock import patch
import pytest
from core.discovery import quality_scanner
SPOTIFY_TRACK = {
'id': 'tr1',
'name': 'HUMBLE.',
'artists': [{'id': 'kdot', 'name': 'Kendrick Lamar'}],
'album': {
'id': 'sp_album',
'name': 'DAMN.',
'artists': [{'id': 'kdot', 'name': 'Kendrick Lamar'}],
'release_date': '2017-04-14',
'total_tracks': 14,
'album_type': 'album',
'images': [{'url': 'https://i.scdn.co/640.jpg'}],
},
}
def test_typed_path_seeds_album_fields_from_known_provider():
out = quality_scanner._normalize_track_album(SPOTIFY_TRACK, provider='spotify')
assert out['name'] == 'DAMN.'
assert out['album_type'] == 'album'
assert out['total_tracks'] == 14
assert out['release_date'] == '2017-04-14'
assert out['id'] == 'sp_album'
def test_legacy_path_used_when_no_provider():
out = quality_scanner._normalize_track_album(SPOTIFY_TRACK)
# Legacy path still resolves album from raw `album` dict.
assert out['name'] == 'DAMN.'
assert out['album_type'] == 'album'
assert out['total_tracks'] == 14
def test_legacy_path_used_for_unknown_provider():
out = quality_scanner._normalize_track_album(SPOTIFY_TRACK, provider='made_up')
assert out['name'] == 'DAMN.'
def test_legacy_path_used_when_typed_converter_raises():
def _explode(_):
raise RuntimeError('simulated converter bug')
with patch.dict(quality_scanner._TYPED_ALBUM_CONVERTERS,
{'spotify': _explode}):
out = quality_scanner._normalize_track_album(SPOTIFY_TRACK, provider='spotify')
# Fell back to legacy — still has the album fields.
assert out['name'] == 'DAMN.'
assert out['total_tracks'] == 14
@pytest.mark.parametrize('provider,track', [
('itunes', {
'id': 1, 'name': 'X',
'album': {'collectionId': 99, 'collectionName': 'iTunes Album',
'artistName': 'Artist', 'trackCount': 10},
}),
('deezer', {
'id': 2, 'name': 'X',
'album': {'id': 99, 'title': 'Deezer Album',
'artist': {'name': 'Artist'}, 'nb_tracks': 8},
}),
('discogs', {
'id': 3, 'name': 'X',
'album': {'id': 99, 'title': 'Discogs Album',
'artists': [{'name': 'Artist'}], 'year': 2020},
}),
])
def test_typed_path_works_for_other_providers(provider, track):
out = quality_scanner._normalize_track_album(track, provider=provider)
assert out['name'] # populated

@ -0,0 +1,160 @@
"""Pin the typed-path migration in `core/metadata/discography.py`.
`_build_discography_release_dict` and `_build_artist_detail_release_card`
historically did duck-typed extraction with fallback chains. This pr
routes them through `Album.from_<source>_dict()` when caller supplies
a known source. Legacy duck-typing kicks in as fallback.
These tests pin:
- Typed path used when source is recognized.
- Typed path output matches expected fields the legacy path produced.
- Legacy path runs unchanged when source is empty/unknown OR when
the typed converter raises.
- Cross-provider parametrized smoke for every registered source.
"""
from __future__ import annotations
from unittest.mock import patch
import pytest
from core.metadata import discography
SAMPLE_SPOTIFY_RELEASE = {
'id': 'sp123',
'name': 'GNX',
'artists': [{'id': 'kdot', 'name': 'Kendrick Lamar'}],
'release_date': '2024-11-22',
'total_tracks': 12,
'album_type': 'album',
'images': [{'url': 'https://i.scdn.co/640.jpg', 'height': 640}],
'external_urls': {'spotify': 'https://open.spotify.com/album/sp123'},
}
# ---------------------------------------------------------------------------
# _build_discography_release_dict
# ---------------------------------------------------------------------------
def test_typed_path_used_for_known_source():
out = discography._build_discography_release_dict(
SAMPLE_SPOTIFY_RELEASE, artist_id='kdot', source='spotify',
)
assert out['id'] == 'sp123'
assert out['name'] == 'GNX'
assert out['artist_name'] == 'Kendrick Lamar'
assert out['release_date'] == '2024-11-22'
assert out['album_type'] == 'album'
assert out['total_tracks'] == 12
assert out['image_url'] == 'https://i.scdn.co/640.jpg'
assert out['external_urls'] == {'spotify': 'https://open.spotify.com/album/sp123'}
def test_legacy_path_used_when_no_source():
out = discography._build_discography_release_dict(
SAMPLE_SPOTIFY_RELEASE, artist_id='kdot',
)
assert out['id'] == 'sp123'
assert out['name'] == 'GNX'
assert out['artist_name'] == 'Kendrick Lamar'
def test_legacy_path_used_for_unknown_source():
out = discography._build_discography_release_dict(
SAMPLE_SPOTIFY_RELEASE, artist_id='kdot', source='made_up',
)
assert out['id'] == 'sp123'
def test_legacy_path_used_when_typed_converter_raises():
def _explode(_):
raise RuntimeError('simulated converter bug')
with patch.dict(discography._TYPED_ALBUM_CONVERTERS,
{'spotify': _explode}):
out = discography._build_discography_release_dict(
SAMPLE_SPOTIFY_RELEASE, artist_id='kdot', source='spotify',
)
# Legacy path still produced a result.
assert out['id'] == 'sp123'
assert out['name'] == 'GNX'
def test_release_with_no_id_returns_none():
raw = dict(SAMPLE_SPOTIFY_RELEASE)
raw.pop('id')
out = discography._build_discography_release_dict(
raw, artist_id='kdot', source='spotify',
)
assert out is None
@pytest.mark.parametrize('source,raw', [
('itunes', {
'collectionId': 1, 'collectionName': 'GNX',
'artistName': 'Kendrick Lamar', 'trackCount': 12,
}),
('deezer', {
'id': 1, 'title': 'GNX',
'artist': {'name': 'Kendrick Lamar'}, 'nb_tracks': 12,
}),
('discogs', {
'id': 1, 'title': 'GNX',
'artists': [{'name': 'Kendrick Lamar'}], 'year': 2024,
}),
('musicbrainz', {
'id': 'mbid', 'title': 'GNX',
'artist-credit': [{'artist': {'name': 'Kendrick Lamar'}}],
}),
('hydrabase', {
'id': 'hb', 'name': 'GNX',
'artists': [{'name': 'Kendrick Lamar'}],
}),
('qobuz', {
'id': 1, 'title': 'GNX',
'artist': {'name': 'Kendrick Lamar'}, 'tracks_count': 12,
}),
])
def test_typed_path_works_for_every_registered_source(source, raw):
out = discography._build_discography_release_dict(
raw, artist_id='whatever', source=source,
)
assert out is not None
assert out['name'] == 'GNX'
# ---------------------------------------------------------------------------
# _build_artist_detail_release_card — typed dispatch on raw input
# ---------------------------------------------------------------------------
def test_artist_detail_card_typed_path():
card = discography._build_artist_detail_release_card(
SAMPLE_SPOTIFY_RELEASE, source='spotify',
)
assert card['id'] == 'sp123'
assert card['name'] == 'GNX'
assert card['album_type'] == 'album'
assert card['year'] == '2024'
assert card['release_date'] == '2024-11-22'
assert card['image_url'] == 'https://i.scdn.co/640.jpg'
assert card['track_count'] == 12
def test_artist_detail_card_legacy_path_no_source():
"""Existing canonical-dict input (no source) takes legacy path."""
canonical = {
'id': 'sp123',
'name': 'GNX',
'album_type': 'album',
'release_date': '2024-11-22',
'image_url': 'https://i.scdn.co/640.jpg',
'total_tracks': 12,
}
card = discography._build_artist_detail_release_card(canonical)
assert card['id'] == 'sp123'
assert card['name'] == 'GNX'
assert card['year'] == '2024'

@ -3434,6 +3434,7 @@ const WHATS_NEW = {
{ date: 'Unreleased — 2.4.2 dev cycle' },
{ title: 'Internal: Typed Metadata Foundation', desc: 'internal — first step of a multi-pr migration to give the metadata pipeline a real contract. the codebase historically grew duck-typed extractors (`_extract_lookup_value(album_data, "id", "album_id", "collectionId", "release_id", default=...)`) at every consumer site because each provider returns its own response shape. ~150 of those across the codebase. new `core/metadata/types.py` defines canonical typed `Album` / `Track` / `Artist` dataclasses with strict required fields. per-source classmethod converters (from_spotify_dict, from_itunes_dict, from_deezer_dict, from_discogs_dict, from_musicbrainz_dict, from_hydrabase_dict) are the SINGLE place that knows each provider\'s wire shape. zero behavior changes in this pr — pure additive foundation. follow-up prs migrate consumers one at a time. full migration plan documented at docs/metadata-types-migration.md.', page: 'library' },
{ title: 'Internal: Migrate Album-Info Builders to Typed Path', desc: 'internal — steps 2+3 of the typed metadata migration in one pr. two album-info builders now route through `Album.from_<source>_dict()` when the caller passes a known source: `_build_album_info` (used by every album-tracks lookup) and the embedded album section of `_build_single_import_context_payload` (used by single-track import context resolution). legacy duck-typed extraction stays as the fallback when source is empty/unknown, raw input isn\'t a dict, or the typed converter raises — so a converter bug can\'t break album resolution or import context. caller-provided album_id / album_name / artist_name fallbacks apply on the typed path the same way they did on legacy. zero behavior change for existing callers since they don\'t pass a source yet — opt-in only. 22 new tests pin the typed path, the legacy fallback, and parametrized coverage across registered providers.' },
{ title: 'Internal: Migrate Discography + Quality Scanner to Typed Path', desc: 'internal — next round of the typed metadata migration. three more album-shape consumers now route through `Album.from_<source>_dict()` when the caller passes a known source: `_build_discography_release_dict` (artist discography release cards), `_build_artist_detail_release_card` (artist detail page release cards), and `_normalize_track_album` (quality scanner result normalization). legacy duck-typed extraction stays as the fallback when source is empty/unknown, raw input isn\'t a dict, or the typed converter raises — same safety contract as the prior migration steps. 20 new tests pin the typed path + legacy fallback + parametrized coverage across registered providers.' },
{ title: 'Discogs Collection in "Your Albums"', desc: 'discord request: pull your discogs collection into the your albums section on discover, similar to spotify liked albums. set your discogs personal access token on settings → connections (already there from prior work) and add discogs as one of the configured sources via the gear button on your albums. background fetcher pulls your full collection (all folders, all pages — capped at 5000 releases), normalizes artist names (strips discogs `(N)` disambiguation suffix), dedupes against any spotify/tidal/deezer-saved versions of the same album. clicking a discogs-only album opens with discogs context — full release detail (year, format, label, country, tracklist) from the /releases endpoint. clicking an album that exists in both your spotify saved AND discogs collection prefers spotify (download flow is more direct). discogs is physical-media-first so many releases won\'t have streaming equivalents — those still show in the grid but the modal flow may need to fall back to a name search to find a downloadable digital version.', page: 'discover' },
{ title: 'Drop Redundant "Your Spotify Library" Section on Discover', desc: 'discover page used to show two near-identical sections: "Your Albums" (cross-source aggregator across spotify/deezer/etc) AND "Your Spotify Library" (spotify-only). same UI, same grid, same filter / sort / download-missing controls — the spotify-only one was a strict subset of what your albums already covers. removed it. spotify saved albums still surface via the your albums section with spotify as one of its configured sources (gear button → configure sources). backend collection / storage is unchanged — the watchlist scanner still populates the spotify_library_albums cache for your albums to read.', page: 'discover' },
{ title: 'Library Disk Usage on Stats Page', desc: 'discord request (samuel [KC]): show how much disk space the library takes. new card on stats → system statistics shows total bytes + per-format breakdown (FLAC vs MP3 vs M4A bars). data comes from `tracks.file_size` populated during deep scan from whatever the media server already returns (plex MediaPart.size, jellyfin MediaSources[].Size, navidrome song.size, soulsync standalone os.path.getsize) — zero filesystem walk overhead. existing libraries see "Run a Deep Scan to populate" until the next deep scan fills in sizes; partial coverage shown as "X tracks measured (+Y pending)". migration is additive (NULL on legacy rows) so upgrading users have nothing to do.', page: 'stats' },

Loading…
Cancel
Save