From 77c54ab7a78175396226b868c81c891b72020522 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 4 May 2026 08:12:40 -0700 Subject: [PATCH] Migrate discography + quality scanner to typed Album path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three more album-shape consumers now route through Album.from__dict() when caller passes a known source: - _build_discography_release_dict (artist discography cards) - _build_artist_detail_release_card (artist detail release cards) - _normalize_track_album (quality scanner result normalization) Legacy duck-typing stays as fallback for unknown source, non-dict input, or converter errors. Pure additive — existing callers without source kwarg unchanged. --- core/discovery/quality_scanner.py | 52 +++++- core/metadata/discography.py | 109 +++++++++++- .../test_quality_scanner_typed_album.py | 87 ++++++++++ tests/metadata/test_discography_typed_path.py | 160 ++++++++++++++++++ webui/static/helper.js | 1 + 5 files changed, 402 insertions(+), 7 deletions(-) create mode 100644 tests/discovery/test_quality_scanner_typed_album.py create mode 100644 tests/metadata/test_discography_typed_path.py diff --git a/core/discovery/quality_scanner.py b/core/discovery/quality_scanner.py index 6c213c80..c7c38343 100644 --- a/core/discovery/quality_scanner.py +++ b/core/discovery/quality_scanner.py @@ -32,14 +32,30 @@ import logging import time from dataclasses import dataclass from datetime import datetime -from typing import Any, Callable +from typing import Any, Callable, Dict, Optional from core.metadata.registry import get_client_for_source, get_primary_source, get_source_priority +from core.metadata.types import Album from core.wishlist.payloads import ensure_wishlist_track_format logger = logging.getLogger(__name__) +# Per-source typed converter dispatch — same registry pattern as +# the metadata builders. Quality-scanner result normalization routes +# the embedded ``track.album`` blob through Album.from__dict() +# when provider is known. Falls back to legacy duck-typed extraction. +_TYPED_ALBUM_CONVERTERS: Dict[str, Callable[[Dict[str, Any]], Album]] = { + 'spotify': Album.from_spotify_dict, + 'itunes': Album.from_itunes_dict, + 'deezer': Album.from_deezer_dict, + 'discogs': Album.from_discogs_dict, + 'musicbrainz': Album.from_musicbrainz_dict, + 'hydrabase': Album.from_hydrabase_dict, + 'qobuz': Album.from_qobuz_dict, +} + + @dataclass class QualityScannerDeps: """Bundle of cross-cutting deps the quality scanner needs.""" @@ -140,7 +156,16 @@ def _normalize_image_entries(image_value: Any) -> list[dict]: return normalized -def _normalize_track_album(track_item: Any) -> dict: +def _normalize_track_album(track_item: Any, provider: Optional[str] = None) -> dict: + """Normalize a track's embedded album blob into a flat dict. + + When ``provider`` is provided AND maps to a registered typed Album + converter, routes through the typed path to seed canonical fields + on ``album_data`` before legacy fallback chains fill any gaps. + Falls back to legacy duck-typed extraction on unknown provider / + non-dict input / typed converter error — same pattern as the + metadata builders. + """ album = _extract_lookup_value(track_item, 'album', default={}) if isinstance(album, dict): album_data = dict(album) @@ -152,6 +177,27 @@ def _normalize_track_album(track_item: Any) -> dict: 'release_date': _extract_lookup_value(album, 'release_date', default='') or '', } + if provider and isinstance(album, dict): + converter = _TYPED_ALBUM_CONVERTERS.get(provider.strip().lower()) + if converter is not None: + try: + typed_album = converter(album) + if typed_album.name: + album_data.setdefault('name', typed_album.name) + if typed_album.album_type: + album_data.setdefault('album_type', typed_album.album_type) + if typed_album.total_tracks: + album_data.setdefault('total_tracks', typed_album.total_tracks) + if typed_album.release_date: + album_data.setdefault('release_date', typed_album.release_date) + if typed_album.id: + album_data.setdefault('id', typed_album.id) + except Exception as exc: + logger.debug( + "Typed album converter failed for provider %s in quality " + "scanner normalize, falling back to legacy: %s", provider, exc, + ) + album_data.setdefault('name', _extract_lookup_value(track_item, 'album_name', default='Unknown Album') or 'Unknown Album') album_data.setdefault('album_type', _extract_lookup_value(track_item, 'album_type', default='album') or 'album') album_data.setdefault('total_tracks', _extract_lookup_value(track_item, 'total_tracks', 'track_count', default=0) or 0) @@ -189,7 +235,7 @@ def _normalize_track_match(track_item: Any, provider: str) -> dict: 'id': _extract_lookup_value(track_item, 'id', 'track_id', default='') or '', 'name': _extract_lookup_value(track_item, 'name', 'title', default='Unknown Track') or 'Unknown Track', 'artists': _normalize_track_artists(track_item), - 'album': _normalize_track_album(track_item), + 'album': _normalize_track_album(track_item, provider=provider), 'image_url': _extract_lookup_value(track_item, 'image_url', 'album_cover_url', default=None), 'duration_ms': _extract_lookup_value(track_item, 'duration_ms', default=0) or 0, 'track_number': _extract_lookup_value(track_item, 'track_number', default=1) or 1, diff --git a/core/metadata/discography.py b/core/metadata/discography.py index ac51e141..35f6df77 100644 --- a/core/metadata/discography.py +++ b/core/metadata/discography.py @@ -2,16 +2,53 @@ from __future__ import annotations -from typing import Any, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional from core.metadata import registry as metadata_registry from core.metadata.album_tracks import get_artist_albums_for_source from core.metadata.lookup import MetadataLookupOptions +from core.metadata.types import Album from utils.logging_config import get_logger logger = get_logger("metadata.discography") +# Per-source typed converter dispatch — same registry pattern as +# ``core/metadata/album_tracks.py`` and ``core/imports/resolution.py``. +# Discography release builders dispatch through the typed Album +# converter when the active source is known. Falls back to legacy +# duck-typed extraction below on unknown source / converter error. +_TYPED_ALBUM_CONVERTERS: Dict[str, Callable[[Dict[str, Any]], Album]] = { + 'spotify': Album.from_spotify_dict, + 'itunes': Album.from_itunes_dict, + 'deezer': Album.from_deezer_dict, + 'discogs': Album.from_discogs_dict, + 'musicbrainz': Album.from_musicbrainz_dict, + 'hydrabase': Album.from_hydrabase_dict, + 'qobuz': Album.from_qobuz_dict, +} + + +def _typed_album_for_source(release: Any, source: Optional[str]) -> Optional[Album]: + """Return a typed Album when source maps to a registered converter + and the converter succeeds. ``None`` means the caller should fall + back to the legacy duck-typed extraction. + """ + if not source or not isinstance(release, dict): + return None + converter = _TYPED_ALBUM_CONVERTERS.get(source.strip().lower()) + if converter is None: + return None + try: + return converter(release) + except Exception as exc: + logger.debug( + "Typed album converter failed for source %s in discography " + "build, falling back to legacy: %s", source, exc, + ) + return None + + def _extract_lookup_value(value: Any, *names: str, default: Any = None) -> Any: if value is None: return default @@ -89,7 +126,32 @@ def _pick_best_artist_match(search_results: List[Any], artist_name: str) -> Opti return search_results[0] -def _build_discography_release_dict(release: Any, artist_id: str) -> Optional[Dict[str, Any]]: +def _build_discography_release_dict(release: Any, artist_id: str, + source: Optional[str] = None) -> Optional[Dict[str, Any]]: + """Build a normalized discography release dict. + + When ``source`` is provided AND maps to a registered typed Album + converter, routes through ``Album.from__dict()`` and pulls + canonical fields off the typed Album. Falls back to the legacy + duck-typed extraction on unknown source / non-dict input / typed + converter error. + """ + typed_album = _typed_album_for_source(release, source) + if typed_album is not None: + if not typed_album.id: + return None + artist_name = typed_album.artists[0] if typed_album.artists else '' + return { + 'id': typed_album.id, + 'name': typed_album.name or typed_album.id, + 'artist_name': artist_name, + 'release_date': typed_album.release_date or None, + 'album_type': typed_album.album_type or 'album', + 'image_url': typed_album.image_url, + 'total_tracks': typed_album.total_tracks or 0, + 'external_urls': typed_album.external_urls or {}, + } + release_id = _extract_lookup_value(release, 'id', 'album_id', 'release_id') if not release_id: return None @@ -308,7 +370,7 @@ def get_artist_discography( seen_albums = set() for release in albums or []: - release_data = _build_discography_release_dict(release, artist_id) + release_data = _build_discography_release_dict(release, artist_id, source=active_source) if not release_data: continue @@ -341,7 +403,46 @@ def get_artist_discography( } -def _build_artist_detail_release_card(release: Dict[str, Any]) -> Optional[Dict[str, Any]]: +def _build_artist_detail_release_card(release: Dict[str, Any], + source: Optional[str] = None) -> Optional[Dict[str, Any]]: + """Build an artist-detail release card. + + NOTE on inputs: this function may receive EITHER raw provider + release dicts (when called directly during a fresh discography + lookup) OR pre-built canonical release dicts produced by + ``_build_discography_release_dict`` (the more common case via + ``get_artist_detail_discography``). Pre-built dicts already carry + canonical keys, so the typed dispatch is a no-op for them — and + the legacy duck-typed path also handles them correctly. The typed + dispatch only kicks in when the caller passes a known source AND + the release dict matches a provider's wire shape. + """ + typed_album = _typed_album_for_source(release, source) + if typed_album is not None and typed_album.id: + release_year = None + if typed_album.release_date: + try: + release_year = str(typed_album.release_date)[:4] + except Exception: + release_year = None + + card = { + 'id': typed_album.id, + 'name': typed_album.name or typed_album.id, + 'title': typed_album.name or typed_album.id, + 'album_type': (typed_album.album_type or 'album').lower(), + 'image_url': typed_album.image_url, + 'year': release_year, + 'track_count': typed_album.total_tracks or 0, + 'owned': None, + 'track_completion': 'checking', + } + if typed_album.release_date: + card['release_date'] = typed_album.release_date + elif release_year: + card['release_date'] = f"{release_year}-01-01" + return card + release_id = _extract_lookup_value(release, 'id', 'album_id', 'release_id') if not release_id: return None diff --git a/tests/discovery/test_quality_scanner_typed_album.py b/tests/discovery/test_quality_scanner_typed_album.py new file mode 100644 index 00000000..16cebacc --- /dev/null +++ b/tests/discovery/test_quality_scanner_typed_album.py @@ -0,0 +1,87 @@ +"""Pin the typed-path migration of `_normalize_track_album` in +`core/discovery/quality_scanner.py`. + +Quality scanner result normalization now routes the embedded +`track.album` blob through `Album.from__dict()` when provider +is known. Falls back to legacy duck-typed extraction below. +""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from core.discovery import quality_scanner + + +SPOTIFY_TRACK = { + 'id': 'tr1', + 'name': 'HUMBLE.', + 'artists': [{'id': 'kdot', 'name': 'Kendrick Lamar'}], + 'album': { + 'id': 'sp_album', + 'name': 'DAMN.', + 'artists': [{'id': 'kdot', 'name': 'Kendrick Lamar'}], + 'release_date': '2017-04-14', + 'total_tracks': 14, + 'album_type': 'album', + 'images': [{'url': 'https://i.scdn.co/640.jpg'}], + }, +} + + +def test_typed_path_seeds_album_fields_from_known_provider(): + out = quality_scanner._normalize_track_album(SPOTIFY_TRACK, provider='spotify') + assert out['name'] == 'DAMN.' + assert out['album_type'] == 'album' + assert out['total_tracks'] == 14 + assert out['release_date'] == '2017-04-14' + assert out['id'] == 'sp_album' + + +def test_legacy_path_used_when_no_provider(): + out = quality_scanner._normalize_track_album(SPOTIFY_TRACK) + # Legacy path still resolves album from raw `album` dict. + assert out['name'] == 'DAMN.' + assert out['album_type'] == 'album' + assert out['total_tracks'] == 14 + + +def test_legacy_path_used_for_unknown_provider(): + out = quality_scanner._normalize_track_album(SPOTIFY_TRACK, provider='made_up') + assert out['name'] == 'DAMN.' + + +def test_legacy_path_used_when_typed_converter_raises(): + def _explode(_): + raise RuntimeError('simulated converter bug') + + with patch.dict(quality_scanner._TYPED_ALBUM_CONVERTERS, + {'spotify': _explode}): + out = quality_scanner._normalize_track_album(SPOTIFY_TRACK, provider='spotify') + # Fell back to legacy — still has the album fields. + assert out['name'] == 'DAMN.' + assert out['total_tracks'] == 14 + + +@pytest.mark.parametrize('provider,track', [ + ('itunes', { + 'id': 1, 'name': 'X', + 'album': {'collectionId': 99, 'collectionName': 'iTunes Album', + 'artistName': 'Artist', 'trackCount': 10}, + }), + ('deezer', { + 'id': 2, 'name': 'X', + 'album': {'id': 99, 'title': 'Deezer Album', + 'artist': {'name': 'Artist'}, 'nb_tracks': 8}, + }), + ('discogs', { + 'id': 3, 'name': 'X', + 'album': {'id': 99, 'title': 'Discogs Album', + 'artists': [{'name': 'Artist'}], 'year': 2020}, + }), +]) +def test_typed_path_works_for_other_providers(provider, track): + out = quality_scanner._normalize_track_album(track, provider=provider) + assert out['name'] # populated diff --git a/tests/metadata/test_discography_typed_path.py b/tests/metadata/test_discography_typed_path.py new file mode 100644 index 00000000..9b664dca --- /dev/null +++ b/tests/metadata/test_discography_typed_path.py @@ -0,0 +1,160 @@ +"""Pin the typed-path migration in `core/metadata/discography.py`. + +`_build_discography_release_dict` and `_build_artist_detail_release_card` +historically did duck-typed extraction with fallback chains. This pr +routes them through `Album.from__dict()` when caller supplies +a known source. Legacy duck-typing kicks in as fallback. + +These tests pin: +- Typed path used when source is recognized. +- Typed path output matches expected fields the legacy path produced. +- Legacy path runs unchanged when source is empty/unknown OR when + the typed converter raises. +- Cross-provider parametrized smoke for every registered source. +""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from core.metadata import discography + + +SAMPLE_SPOTIFY_RELEASE = { + 'id': 'sp123', + 'name': 'GNX', + 'artists': [{'id': 'kdot', 'name': 'Kendrick Lamar'}], + 'release_date': '2024-11-22', + 'total_tracks': 12, + 'album_type': 'album', + 'images': [{'url': 'https://i.scdn.co/640.jpg', 'height': 640}], + 'external_urls': {'spotify': 'https://open.spotify.com/album/sp123'}, +} + + +# --------------------------------------------------------------------------- +# _build_discography_release_dict +# --------------------------------------------------------------------------- + + +def test_typed_path_used_for_known_source(): + out = discography._build_discography_release_dict( + SAMPLE_SPOTIFY_RELEASE, artist_id='kdot', source='spotify', + ) + assert out['id'] == 'sp123' + assert out['name'] == 'GNX' + assert out['artist_name'] == 'Kendrick Lamar' + assert out['release_date'] == '2024-11-22' + assert out['album_type'] == 'album' + assert out['total_tracks'] == 12 + assert out['image_url'] == 'https://i.scdn.co/640.jpg' + assert out['external_urls'] == {'spotify': 'https://open.spotify.com/album/sp123'} + + +def test_legacy_path_used_when_no_source(): + out = discography._build_discography_release_dict( + SAMPLE_SPOTIFY_RELEASE, artist_id='kdot', + ) + assert out['id'] == 'sp123' + assert out['name'] == 'GNX' + assert out['artist_name'] == 'Kendrick Lamar' + + +def test_legacy_path_used_for_unknown_source(): + out = discography._build_discography_release_dict( + SAMPLE_SPOTIFY_RELEASE, artist_id='kdot', source='made_up', + ) + assert out['id'] == 'sp123' + + +def test_legacy_path_used_when_typed_converter_raises(): + def _explode(_): + raise RuntimeError('simulated converter bug') + + with patch.dict(discography._TYPED_ALBUM_CONVERTERS, + {'spotify': _explode}): + out = discography._build_discography_release_dict( + SAMPLE_SPOTIFY_RELEASE, artist_id='kdot', source='spotify', + ) + # Legacy path still produced a result. + assert out['id'] == 'sp123' + assert out['name'] == 'GNX' + + +def test_release_with_no_id_returns_none(): + raw = dict(SAMPLE_SPOTIFY_RELEASE) + raw.pop('id') + out = discography._build_discography_release_dict( + raw, artist_id='kdot', source='spotify', + ) + assert out is None + + +@pytest.mark.parametrize('source,raw', [ + ('itunes', { + 'collectionId': 1, 'collectionName': 'GNX', + 'artistName': 'Kendrick Lamar', 'trackCount': 12, + }), + ('deezer', { + 'id': 1, 'title': 'GNX', + 'artist': {'name': 'Kendrick Lamar'}, 'nb_tracks': 12, + }), + ('discogs', { + 'id': 1, 'title': 'GNX', + 'artists': [{'name': 'Kendrick Lamar'}], 'year': 2024, + }), + ('musicbrainz', { + 'id': 'mbid', 'title': 'GNX', + 'artist-credit': [{'artist': {'name': 'Kendrick Lamar'}}], + }), + ('hydrabase', { + 'id': 'hb', 'name': 'GNX', + 'artists': [{'name': 'Kendrick Lamar'}], + }), + ('qobuz', { + 'id': 1, 'title': 'GNX', + 'artist': {'name': 'Kendrick Lamar'}, 'tracks_count': 12, + }), +]) +def test_typed_path_works_for_every_registered_source(source, raw): + out = discography._build_discography_release_dict( + raw, artist_id='whatever', source=source, + ) + assert out is not None + assert out['name'] == 'GNX' + + +# --------------------------------------------------------------------------- +# _build_artist_detail_release_card — typed dispatch on raw input +# --------------------------------------------------------------------------- + + +def test_artist_detail_card_typed_path(): + card = discography._build_artist_detail_release_card( + SAMPLE_SPOTIFY_RELEASE, source='spotify', + ) + assert card['id'] == 'sp123' + assert card['name'] == 'GNX' + assert card['album_type'] == 'album' + assert card['year'] == '2024' + assert card['release_date'] == '2024-11-22' + assert card['image_url'] == 'https://i.scdn.co/640.jpg' + assert card['track_count'] == 12 + + +def test_artist_detail_card_legacy_path_no_source(): + """Existing canonical-dict input (no source) takes legacy path.""" + canonical = { + 'id': 'sp123', + 'name': 'GNX', + 'album_type': 'album', + 'release_date': '2024-11-22', + 'image_url': 'https://i.scdn.co/640.jpg', + 'total_tracks': 12, + } + card = discography._build_artist_detail_release_card(canonical) + assert card['id'] == 'sp123' + assert card['name'] == 'GNX' + assert card['year'] == '2024' diff --git a/webui/static/helper.js b/webui/static/helper.js index 0746016c..5bbc125c 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3434,6 +3434,7 @@ const WHATS_NEW = { { date: 'Unreleased — 2.4.2 dev cycle' }, { title: 'Internal: Typed Metadata Foundation', desc: 'internal — first step of a multi-pr migration to give the metadata pipeline a real contract. the codebase historically grew duck-typed extractors (`_extract_lookup_value(album_data, "id", "album_id", "collectionId", "release_id", default=...)`) at every consumer site because each provider returns its own response shape. ~150 of those across the codebase. new `core/metadata/types.py` defines canonical typed `Album` / `Track` / `Artist` dataclasses with strict required fields. per-source classmethod converters (from_spotify_dict, from_itunes_dict, from_deezer_dict, from_discogs_dict, from_musicbrainz_dict, from_hydrabase_dict) are the SINGLE place that knows each provider\'s wire shape. zero behavior changes in this pr — pure additive foundation. follow-up prs migrate consumers one at a time. full migration plan documented at docs/metadata-types-migration.md.', page: 'library' }, { title: 'Internal: Migrate Album-Info Builders to Typed Path', desc: 'internal — steps 2+3 of the typed metadata migration in one pr. two album-info builders now route through `Album.from__dict()` when the caller passes a known source: `_build_album_info` (used by every album-tracks lookup) and the embedded album section of `_build_single_import_context_payload` (used by single-track import context resolution). legacy duck-typed extraction stays as the fallback when source is empty/unknown, raw input isn\'t a dict, or the typed converter raises — so a converter bug can\'t break album resolution or import context. caller-provided album_id / album_name / artist_name fallbacks apply on the typed path the same way they did on legacy. zero behavior change for existing callers since they don\'t pass a source yet — opt-in only. 22 new tests pin the typed path, the legacy fallback, and parametrized coverage across registered providers.' }, + { title: 'Internal: Migrate Discography + Quality Scanner to Typed Path', desc: 'internal — next round of the typed metadata migration. three more album-shape consumers now route through `Album.from__dict()` when the caller passes a known source: `_build_discography_release_dict` (artist discography release cards), `_build_artist_detail_release_card` (artist detail page release cards), and `_normalize_track_album` (quality scanner result normalization). legacy duck-typed extraction stays as the fallback when source is empty/unknown, raw input isn\'t a dict, or the typed converter raises — same safety contract as the prior migration steps. 20 new tests pin the typed path + legacy fallback + parametrized coverage across registered providers.' }, { title: 'Discogs Collection in "Your Albums"', desc: 'discord request: pull your discogs collection into the your albums section on discover, similar to spotify liked albums. set your discogs personal access token on settings → connections (already there from prior work) and add discogs as one of the configured sources via the gear button on your albums. background fetcher pulls your full collection (all folders, all pages — capped at 5000 releases), normalizes artist names (strips discogs `(N)` disambiguation suffix), dedupes against any spotify/tidal/deezer-saved versions of the same album. clicking a discogs-only album opens with discogs context — full release detail (year, format, label, country, tracklist) from the /releases endpoint. clicking an album that exists in both your spotify saved AND discogs collection prefers spotify (download flow is more direct). discogs is physical-media-first so many releases won\'t have streaming equivalents — those still show in the grid but the modal flow may need to fall back to a name search to find a downloadable digital version.', page: 'discover' }, { title: 'Drop Redundant "Your Spotify Library" Section on Discover', desc: 'discover page used to show two near-identical sections: "Your Albums" (cross-source aggregator across spotify/deezer/etc) AND "Your Spotify Library" (spotify-only). same UI, same grid, same filter / sort / download-missing controls — the spotify-only one was a strict subset of what your albums already covers. removed it. spotify saved albums still surface via the your albums section with spotify as one of its configured sources (gear button → configure sources). backend collection / storage is unchanged — the watchlist scanner still populates the spotify_library_albums cache for your albums to read.', page: 'discover' }, { title: 'Library Disk Usage on Stats Page', desc: 'discord request (samuel [KC]): show how much disk space the library takes. new card on stats → system statistics shows total bytes + per-format breakdown (FLAC vs MP3 vs M4A bars). data comes from `tracks.file_size` populated during deep scan from whatever the media server already returns (plex MediaPart.size, jellyfin MediaSources[].Size, navidrome song.size, soulsync standalone os.path.getsize) — zero filesystem walk overhead. existing libraries see "Run a Deep Scan to populate" until the next deep scan fills in sizes; partial coverage shown as "X tracks measured (+Y pending)". migration is additive (NULL on legacy rows) so upgrading users have nothing to do.', page: 'stats' },