Merge pull request #490 from Nezreka/refactor/typed-metadata-types-foundation

Refactor/typed metadata types foundation
3 weeks ago · e12969fbbe
parent e0b15a9e69 eab1297afc
commit e12969fbbe
5 changed files with 1328 additions and 2 deletions
--- a/core/metadata/types.py
+++ b/core/metadata/types.py
@ -0,0 +1,618 @@
+"""Canonical typed dataclasses for metadata across all providers.
+
+The metadata pipeline historically grew organically: each new provider
+(Spotify → iTunes → Deezer → Tidal → Qobuz → MusicBrainz → AudioDB →
+Discogs → Hydrabase) returns its own response shape, and consumer code
+defensively extracts every field via fallback chains:
+
+    _extract_lookup_value(album_data, 'id', 'album_id', 'collectionId',
+                          'release_id', default=album_id)
+
+That pattern works but is brittle: each new provider adds more keys to
+chase, each consumer re-runs the same defensive logic, and there's no
+contract about what shape any given consumer can trust.
+
+This module is the canonical contract. Every provider produces these
+types via a single ``from_<provider>_dict()`` classmethod. Every
+consumer accepts these types and trusts the fields. Field names are
+provider-neutral (``release_date`` not ``releaseDate``,
+``image_url`` not ``artworkUrl100``).
+
+This is the foundation PR. It only DEFINES the contract and provides
+the converters; no consumer is migrated in this PR. Future PRs each
+migrate one consumer to accept ``Album`` / ``Track`` / ``Artist``
+instead of raw dicts.
+
+The ``Album`` / ``Track`` / ``Artist`` symbols also re-export from
+``core.itunes_client`` for backward compatibility — existing callers
+don't need to change anything.
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+# ---------------------------------------------------------------------------
+# Helpers shared by converters
+# ---------------------------------------------------------------------------
+
+
+def _str(value: Any, default: str = '') -> str:
+    """Coerce to non-None str, never None."""
+    if value is None:
+        return default
+    return str(value)
+
+
+def _int(value: Any, default: int = 0) -> int:
+    """Coerce to int, default on parse failure."""
+    if value is None or value == '':
+        return default
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _strip_discogs_disambiguation(name: str) -> str:
+    """Discogs appends ``(N)`` to artist names when there are multiple
+    artists with the same name. Strip so cross-provider matches work."""
+    return re.sub(r'\s*\(\d+\)$', '', name or '').strip()
+
+
+def _itunes_artwork(url: Optional[str]) -> Optional[str]:
+    """iTunes serves cover art at any size by template substitution.
+    Always upgrade ``100x100bb`` → ``3000x3000bb`` for highest quality."""
+    if not url:
+        return None
+    return url.replace('100x100bb', '3000x3000bb')
+
+
+# ---------------------------------------------------------------------------
+# Album
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class Album:
+    """Provider-neutral album.
+
+    Required fields are guaranteed to be set by every converter. Optional
+    fields are explicit ``Optional[...]`` so consumers know they may be
+    None / empty. Source-specific raw IDs that don't fit the typed schema
+    can be stashed in ``external_ids`` (provider name → id string).
+    """
+
+    id: str                                      # Source-native id, always set
+    name: str                                    # Album title, always set
+    artists: List[str]                           # Display names, may be ['Unknown Artist']
+    release_date: str                            # ISO 'YYYY' or 'YYYY-MM-DD' or '' when unknown
+    total_tracks: int                            # 0 when unknown
+    album_type: str                              # 'album' / 'single' / 'ep' / 'compilation'
+
+    # Optional but commonly populated
+    image_url: Optional[str] = None              # Highest-quality cover URL
+    artist_id: Optional[str] = None              # Primary artist's source-native id
+    genres: List[str] = field(default_factory=list)
+    label: Optional[str] = None                  # Record label / publisher
+    barcode: Optional[str] = None                # UPC/EAN — Discogs/MusicBrainz only
+
+    # Source provenance
+    source: str = ''                             # 'spotify' / 'itunes' / etc — set by converter
+    external_ids: Dict[str, str] = field(default_factory=dict)
+    external_urls: Dict[str, str] = field(default_factory=dict)
+
+    # ------------------------------------------------------------------
+    # Per-source converters. Each one is the SINGLE source of truth for
+    # how that provider's response maps to the canonical Album. Adding
+    # a new provider = adding one more converter here. Consumer code
+    # never needs to know any provider's wire shape.
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def from_spotify_dict(cls, raw: Dict[str, Any]) -> 'Album':
+        """Spotify Web API ``/albums/{id}`` response shape."""
+        artists_raw = raw.get('artists') or []
+        artist_names = [_str(a.get('name')) for a in artists_raw
+                        if isinstance(a, dict) and a.get('name')]
+        primary_artist_id = ''
+        if artists_raw and isinstance(artists_raw[0], dict):
+            primary_artist_id = _str(artists_raw[0].get('id'))
+
+        images = raw.get('images') or []
+        image_url = None
+        if images and isinstance(images[0], dict):
+            image_url = _str(images[0].get('url')) or None
+
+        external_ids = {}
+        if raw.get('id'):
+            external_ids['spotify'] = _str(raw['id'])
+        upc = (raw.get('external_ids') or {}).get('upc')
+        if upc:
+            external_ids['upc'] = _str(upc)
+
+        external_urls = {}
+        sp_url = (raw.get('external_urls') or {}).get('spotify')
+        if sp_url:
+            external_urls['spotify'] = _str(sp_url)
+
+        return cls(
+            id=_str(raw.get('id')),
+            name=_str(raw.get('name')),
+            artists=artist_names or ['Unknown Artist'],
+            release_date=_str(raw.get('release_date')),
+            total_tracks=_int(raw.get('total_tracks')),
+            album_type=_str(raw.get('album_type'), default='album'),
+            image_url=image_url,
+            artist_id=primary_artist_id or None,
+            genres=list(raw.get('genres') or []),
+            label=_str(raw.get('label')) or None,
+            barcode=external_ids.get('upc'),
+            source='spotify',
+            external_ids=external_ids,
+            external_urls=external_urls,
+        )
+
+    @classmethod
+    def from_itunes_dict(cls, raw: Dict[str, Any]) -> 'Album':
+        """iTunes Search API album response shape (`collectionType=Album`)."""
+        track_count = _int(raw.get('trackCount'))
+
+        # iTunes doesn't tag album type; infer from track count + collectionType.
+        collection_type = _str(raw.get('collectionType'), default='Album')
+        if 'compilation' in collection_type.lower():
+            album_type = 'compilation'
+        elif track_count <= 3:
+            album_type = 'single'
+        elif track_count <= 6:
+            album_type = 'ep'
+        else:
+            album_type = 'album'
+
+        artist_id = _str(raw.get('artistId')) or None
+        external_ids = {}
+        if raw.get('collectionId'):
+            external_ids['itunes'] = _str(raw['collectionId'])
+        if artist_id:
+            external_ids['itunes_artist'] = artist_id
+
+        external_urls = {}
+        if raw.get('collectionViewUrl'):
+            external_urls['itunes'] = _str(raw['collectionViewUrl'])
+
+        # Strip iTunes "(Single)" / "(EP)" / "(Deluxe)" suffixes from name
+        # the same way the existing _clean_itunes_album_name helper does.
+        name = _str(raw.get('collectionName'))
+        name = re.sub(r'\s*[-(]\s*(Single|EP)\s*[)]?$', '', name, flags=re.IGNORECASE).strip()
+
+        release_date = _str(raw.get('releaseDate'))
+        if release_date and 'T' in release_date:
+            release_date = release_date.split('T', 1)[0]
+
+        primary_genre = _str(raw.get('primaryGenreName'))
+        return cls(
+            id=_str(raw.get('collectionId')),
+            name=name,
+            artists=[_str(raw.get('artistName'), default='Unknown Artist')],
+            release_date=release_date,
+            total_tracks=track_count,
+            album_type=album_type,
+            image_url=_itunes_artwork(raw.get('artworkUrl100')),
+            artist_id=artist_id,
+            genres=[primary_genre] if primary_genre else [],
+            source='itunes',
+            external_ids=external_ids,
+            external_urls=external_urls,
+        )
+
+    @classmethod
+    def from_deezer_dict(cls, raw: Dict[str, Any]) -> 'Album':
+        """Deezer API ``/album/{id}`` response shape."""
+        artist = raw.get('artist') or {}
+        artist_name = _str(artist.get('name'), default='Unknown Artist') if isinstance(artist, dict) else _str(artist) or 'Unknown Artist'
+        artist_id = _str(artist.get('id')) if isinstance(artist, dict) else ''
+
+        # Deezer cover URLs come in size suffixes (cover_xl, cover_big,
+        # cover_medium, cover_small). Prefer xl.
+        image_url = (
+            _str(raw.get('cover_xl'))
+            or _str(raw.get('cover_big'))
+            or _str(raw.get('cover_medium'))
+            or _str(raw.get('cover'))
+            or None
+        )
+
+        record_type = _str(raw.get('record_type'), default='album').lower()
+        album_type = {'single': 'single', 'ep': 'ep'}.get(record_type, 'album')
+
+        external_ids = {}
+        if raw.get('id'):
+            external_ids['deezer'] = _str(raw['id'])
+        if raw.get('upc'):
+            external_ids['upc'] = _str(raw['upc'])
+
+        external_urls = {}
+        if raw.get('link'):
+            external_urls['deezer'] = _str(raw['link'])
+
+        return cls(
+            id=_str(raw.get('id')),
+            name=_str(raw.get('title')),
+            artists=[artist_name],
+            release_date=_str(raw.get('release_date')),
+            total_tracks=_int(raw.get('nb_tracks')),
+            album_type=album_type,
+            image_url=image_url,
+            artist_id=artist_id or None,
+            genres=[g.get('name', '') for g in (raw.get('genres', {}) or {}).get('data', [])
+                    if isinstance(g, dict) and g.get('name')],
+            label=_str(raw.get('label')) or None,
+            barcode=external_ids.get('upc'),
+            source='deezer',
+            external_ids=external_ids,
+            external_urls=external_urls,
+        )
+
+    @classmethod
+    def from_discogs_dict(cls, raw: Dict[str, Any]) -> 'Album':
+        """Discogs API ``/releases/{id}`` response shape."""
+        artists_raw = raw.get('artists') or []
+        artist_names = []
+        primary_artist_id = ''
+        for a in artists_raw:
+            if not isinstance(a, dict):
+                continue
+            name = _strip_discogs_disambiguation(_str(a.get('name')))
+            if name:
+                artist_names.append(name)
+            if not primary_artist_id and a.get('id'):
+                primary_artist_id = _str(a['id'])
+
+        images = raw.get('images') or []
+        image_url = None
+        if images and isinstance(images[0], dict):
+            image_url = _str(images[0].get('uri') or images[0].get('uri150')) or None
+
+        # Discogs `tracklist` is the source of total_tracks.
+        tracklist = raw.get('tracklist') or []
+        total_tracks = sum(1 for t in tracklist if isinstance(t, dict)
+                           and t.get('type_') == 'track')
+        if not total_tracks:
+            total_tracks = len(tracklist)
+
+        labels = raw.get('labels') or []
+        label_name = ''
+        if labels and isinstance(labels[0], dict):
+            label_name = _str(labels[0].get('name'))
+
+        external_ids = {}
+        if raw.get('id'):
+            external_ids['discogs'] = _str(raw['id'])
+        # Discogs `identifiers` array can include barcode entries
+        for ident in raw.get('identifiers', []) or []:
+            if isinstance(ident, dict) and ident.get('type', '').lower() == 'barcode':
+                bc = _str(ident.get('value')).strip()
+                if bc:
+                    external_ids['barcode'] = bc
+                    break
+
+        external_urls = {}
+        if raw.get('uri'):
+            external_urls['discogs'] = _str(raw['uri'])
+
+        year = raw.get('year')
+        release_date = str(year) if year and _int(year) > 0 else ''
+
+        return cls(
+            id=_str(raw.get('id')),
+            name=_str(raw.get('title')),
+            artists=artist_names or ['Unknown Artist'],
+            release_date=release_date,
+            total_tracks=total_tracks,
+            album_type='album',  # Discogs doesn't tag this; default to album
+            image_url=image_url,
+            artist_id=primary_artist_id or None,
+            genres=list(raw.get('genres') or []) + list(raw.get('styles') or []),
+            label=label_name or None,
+            barcode=external_ids.get('barcode'),
+            source='discogs',
+            external_ids=external_ids,
+            external_urls=external_urls,
+        )
+
+    @classmethod
+    def from_musicbrainz_dict(cls, raw: Dict[str, Any]) -> 'Album':
+        """MusicBrainz ``/release/{mbid}`` response shape (release, not release-group)."""
+        artist_credit = raw.get('artist-credit') or []
+        artist_names = []
+        primary_artist_id = ''
+        for credit in artist_credit:
+            if isinstance(credit, dict) and 'artist' in credit:
+                name = _str(credit['artist'].get('name'))
+                if name:
+                    artist_names.append(name)
+                if not primary_artist_id and credit['artist'].get('id'):
+                    primary_artist_id = _str(credit['artist']['id'])
+
+        # Total tracks: sum across media (MB stores per-disc).
+        media = raw.get('media') or []
+        total_tracks = sum(_int(m.get('track-count')) for m in media if isinstance(m, dict))
+
+        external_ids = {}
+        if raw.get('id'):
+            external_ids['musicbrainz'] = _str(raw['id'])
+        if raw.get('barcode'):
+            external_ids['barcode'] = _str(raw['barcode'])
+
+        # MB `release-group` carries the album-level type (album/single/ep)
+        rg = raw.get('release-group') or {}
+        primary_type = _str(rg.get('primary-type'), default='Album').lower()
+        album_type = {'single': 'single', 'ep': 'ep'}.get(primary_type, 'album')
+        if rg.get('id'):
+            external_ids['musicbrainz_release_group'] = _str(rg['id'])
+
+        labels = raw.get('label-info') or []
+        label_name = ''
+        if labels and isinstance(labels[0], dict):
+            lbl = labels[0].get('label') or {}
+            label_name = _str(lbl.get('name'))
+
+        return cls(
+            id=_str(raw.get('id')),
+            name=_str(raw.get('title')),
+            artists=artist_names or ['Unknown Artist'],
+            release_date=_str(raw.get('date')),
+            total_tracks=total_tracks,
+            album_type=album_type,
+            image_url=None,  # MB doesn't serve cover art directly; CAA is separate
+            artist_id=primary_artist_id or None,
+            genres=[],  # MB has tags but they're noisy; consumer can fetch separately
+            label=label_name or None,
+            barcode=external_ids.get('barcode'),
+            source='musicbrainz',
+            external_ids=external_ids,
+            external_urls={},
+        )
+
+    @classmethod
+    def from_qobuz_dict(cls, raw: Dict[str, Any]) -> 'Album':
+        """Qobuz API ``album/get`` response shape."""
+        artist = raw.get('artist') or {}
+        artist_name = _str(artist.get('name'), default='Unknown Artist') if isinstance(artist, dict) else _str(artist) or 'Unknown Artist'
+        artist_id = _str(artist.get('id')) if isinstance(artist, dict) else ''
+
+        # Qobuz `image` is a dict with small/large/thumbnail variants.
+        image = raw.get('image') or {}
+        image_url = None
+        if isinstance(image, dict):
+            image_url = (
+                _str(image.get('large'))
+                or _str(image.get('small'))
+                or _str(image.get('thumbnail'))
+                or None
+            )
+
+        external_ids = {}
+        if raw.get('id'):
+            external_ids['qobuz'] = _str(raw['id'])
+        if raw.get('upc'):
+            external_ids['upc'] = _str(raw['upc'])
+
+        external_urls = {}
+        if raw.get('url'):
+            external_urls['qobuz'] = _str(raw['url'])
+
+        # Qobuz exposes both `release_date_original` (vinyl/original
+        # press date) and `released_at` (digital release timestamp).
+        # Prefer the original date for cross-provider matching.
+        release_date = _str(raw.get('release_date_original') or raw.get('released_at'))
+        if release_date and 'T' in release_date:
+            release_date = release_date.split('T', 1)[0]
+
+        genre = raw.get('genre') or {}
+        genre_name = _str(genre.get('name')) if isinstance(genre, dict) else _str(genre)
+
+        label = raw.get('label') or {}
+        label_name = _str(label.get('name')) if isinstance(label, dict) else _str(label)
+
+        return cls(
+            id=_str(raw.get('id')),
+            name=_str(raw.get('title')),
+            artists=[artist_name],
+            release_date=release_date,
+            total_tracks=_int(raw.get('tracks_count')),
+            album_type='album',  # Qobuz doesn't tag this consistently
+            image_url=image_url,
+            artist_id=artist_id or None,
+            genres=[genre_name] if genre_name else [],
+            label=label_name or None,
+            barcode=external_ids.get('upc'),
+            source='qobuz',
+            external_ids=external_ids,
+            external_urls=external_urls,
+        )
+
+    @classmethod
+    def from_tidal_object(cls, obj: Any) -> 'Album':
+        """tidalapi ``Album`` object shape.
+
+        Tidal goes through the ``tidalapi`` library which returns
+        Python objects, not raw dicts — so this converter is named
+        ``from_tidal_object`` to make the input contract explicit.
+        Duck-types attribute access so unit tests can pass simple
+        SimpleNamespace stand-ins."""
+        artist = getattr(obj, 'artist', None)
+        artist_name = _str(getattr(artist, 'name', None), default='Unknown Artist')
+        artist_id = _str(getattr(artist, 'id', '')) if artist else ''
+
+        # tidalapi exposes `image()` as a method that returns a URL at
+        # a given size. Try a sensible default size; fall back to the
+        # `picture` field (the raw image id) if the method's missing.
+        image_url = None
+        try:
+            if hasattr(obj, 'image') and callable(obj.image):
+                image_url = obj.image(640) or None
+        except Exception:
+            image_url = None
+        if not image_url:
+            picture = _str(getattr(obj, 'picture', ''))
+            if picture:
+                # Tidal CDN URL format
+                pic_path = picture.replace('-', '/')
+                image_url = f"https://resources.tidal.com/images/{pic_path}/640x640.jpg"
+
+        release_date = ''
+        rd = getattr(obj, 'release_date', None)
+        if rd is not None:
+            release_date = _str(rd).split('T')[0] if 'T' in _str(rd) else _str(rd)
+
+        external_ids = {}
+        if getattr(obj, 'id', None):
+            external_ids['tidal'] = _str(obj.id)
+        if getattr(obj, 'universal_product_number', None):
+            external_ids['upc'] = _str(obj.universal_product_number)
+
+        return cls(
+            id=_str(getattr(obj, 'id', '')),
+            name=_str(getattr(obj, 'name', '')),
+            artists=[artist_name],
+            release_date=release_date,
+            total_tracks=_int(getattr(obj, 'num_tracks', 0)),
+            album_type=_str(getattr(obj, 'type', None), default='album').lower() or 'album',
+            image_url=image_url,
+            artist_id=artist_id or None,
+            genres=[],  # tidalapi doesn't expose genres on Album
+            barcode=external_ids.get('upc'),
+            source='tidal',
+            external_ids=external_ids,
+            external_urls={},
+        )
+
+    @classmethod
+    def from_hydrabase_dict(cls, raw: Dict[str, Any]) -> 'Album':
+        """Hydrabase metadata service response shape."""
+        artists_raw = raw.get('artists') or []
+        if isinstance(artists_raw, str):
+            artist_names = [artists_raw]
+        else:
+            artist_names = []
+            for a in artists_raw:
+                if isinstance(a, dict):
+                    name = _str(a.get('name'))
+                else:
+                    name = _str(a)
+                if name:
+                    artist_names.append(name)
+
+        external_ids = {}
+        if raw.get('id'):
+            external_ids['hydrabase'] = _str(raw['id'])
+        if raw.get('soul_id'):
+            external_ids['soul'] = _str(raw['soul_id'])
+
+        return cls(
+            id=_str(raw.get('id')),
+            name=_str(raw.get('name') or raw.get('title')),
+            artists=artist_names or ['Unknown Artist'],
+            release_date=_str(raw.get('release_date')),
+            total_tracks=_int(raw.get('total_tracks')),
+            album_type=_str(raw.get('album_type'), default='album'),
+            image_url=_str(raw.get('image_url') or raw.get('thumb_url')) or None,
+            artist_id=_str(raw.get('artist_id')) or None,
+            source='hydrabase',
+            external_ids=external_ids,
+        )
+
+    # ------------------------------------------------------------------
+    # Consumer-side helpers
+    # ------------------------------------------------------------------
+
+    def to_context_dict(self) -> Dict[str, Any]:
+        """Return the canonical dict shape SoulSync's import / download
+        pipelines expect. This is the bridge between typed metadata and
+        the existing dict-passing internal API. Future PRs migrate
+        consumers off this dict shape and onto the typed Album directly,
+        at which point this helper becomes unnecessary."""
+        primary_artist = self.artists[0] if self.artists else 'Unknown Artist'
+        artists_dicts = [{'name': name, 'id': self.artist_id if i == 0 else ''}
+                         for i, name in enumerate(self.artists)]
+        images = [{'url': self.image_url}] if self.image_url else []
+
+        return {
+            'id': self.id,
+            'name': self.name,
+            'artist': primary_artist,
+            'artist_name': primary_artist,
+            'artist_id': self.artist_id or '',
+            'artists': artists_dicts,
+            'image_url': self.image_url,
+            'images': images,
+            'release_date': self.release_date,
+            'album_type': self.album_type,
+            'total_tracks': self.total_tracks,
+            'source': self.source,
+            'genres': list(self.genres),
+            'label': self.label or '',
+            'barcode': self.barcode or '',
+            'external_ids': dict(self.external_ids),
+            'external_urls': dict(self.external_urls),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Track and Artist — kept lighter for now. Future PRs flesh these out
+# in the same per-source-converter pattern as Album.
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class Track:
+    """Provider-neutral track. Required fields are always populated by
+    every provider's converter; optional fields may be None."""
+
+    id: str
+    name: str
+    artists: List[str]
+    album: str
+    duration_ms: int
+
+    # Optional
+    track_number: Optional[int] = None
+    disc_number: Optional[int] = None
+    image_url: Optional[str] = None
+    release_date: Optional[str] = None
+    album_type: Optional[str] = None
+    total_tracks: Optional[int] = None
+    preview_url: Optional[str] = None
+    isrc: Optional[str] = None
+    popularity: int = 0  # Spotify-only; 0 elsewhere
+
+    # Source provenance
+    source: str = ''
+    external_ids: Dict[str, str] = field(default_factory=dict)
+    external_urls: Dict[str, str] = field(default_factory=dict)
+
+
+@dataclass
+class Artist:
+    """Provider-neutral artist."""
+
+    id: str
+    name: str
+
+    # Optional
+    image_url: Optional[str] = None
+    genres: List[str] = field(default_factory=list)
+    popularity: int = 0  # Spotify-only; 0 elsewhere
+    followers: int = 0   # Spotify-only; 0 elsewhere
+
+    # Source provenance
+    source: str = ''
+    external_ids: Dict[str, str] = field(default_factory=dict)
+    external_urls: Dict[str, str] = field(default_factory=dict)
+
+
+__all__ = ['Album', 'Track', 'Artist']
--- a/docs/metadata-types-migration.md
+++ b/docs/metadata-types-migration.md
@ -0,0 +1,125 @@
+# Typed Metadata Migration Plan
+
+## Why
+
+Right now the metadata pipeline has no real contract about the shape
+of data flowing between providers and consumers. Each provider
+(Spotify, iTunes, Deezer, Tidal, Qobuz, MusicBrainz, AudioDB,
+Discogs, Hydrabase) returns its own response shape, and consumer
+code defensively extracts every field via fallback chains:
+
+```python
+def _build_album_info(album_data, album_id, album_name='', artist_name=''):
+    images = _extract_lookup_value(album_data, 'images', default=[]) or []
+    ...
+    return {
+        'id': _extract_lookup_value(album_data, 'id', 'album_id',
+                                    'collectionId', 'release_id',
+                                    default=album_id) or album_id,
+        ...
+    }
+```
+
+This pattern works but makes the codebase hard to extend safely:
+
+- Adding a new provider means adding more keys to the fallback chains
+  in every consumer file (currently ~150 call sites of
+  `_extract_lookup_value` across the codebase).
+- Fixing a bug in extraction means fixing it in N places.
+- New consumers can't trust the data — they re-run defensive logic.
+- Tests are theatre because the contract is "whatever shape happens
+  to come in."
+
+## What this PR adds
+
+`core/metadata/types.py` defines the canonical typed dataclasses:
+
+- `Album` — required fields: `id`, `name`, `artists`, `release_date`,
+  `total_tracks`, `album_type`. Optional: `image_url`, `artist_id`,
+  `genres`, `label`, `barcode`, `external_ids`, `external_urls`.
+- `Track` — required fields: `id`, `name`, `artists`, `album`,
+  `duration_ms`. Optional: track/disc number, image, ISRC, etc.
+- `Artist` — required fields: `id`, `name`. Optional: image, genres.
+
+Plus per-provider classmethod converters on `Album`:
+
+- `Album.from_spotify_dict(raw)`
+- `Album.from_itunes_dict(raw)`
+- `Album.from_deezer_dict(raw)`
+- `Album.from_discogs_dict(raw)`
+- `Album.from_musicbrainz_dict(raw)`
+- `Album.from_hydrabase_dict(raw)`
+- `Album.from_qobuz_dict(raw)`
+- `Album.from_tidal_object(obj)` — note: Tidal goes through the
+  ``tidalapi`` library which returns Python objects rather than
+  raw dicts, so this converter is named ``_object`` not ``_dict``
+  to make the input contract explicit.
+
+Enrichment-only providers (Last.fm, Genius, AcoustID, ListenBrainz,
+AudioDB) don't return Album-shaped responses — they enrich
+existing rows with tags, lyrics URLs, fingerprint matches, etc.
+No Album converter needed for those.
+
+Each converter is the SINGLE place that knows that provider's wire
+shape. Adding a new provider = adding one classmethod here and
+nothing else needs to change.
+
+`Album.to_context_dict()` returns the canonical dict shape SoulSync's
+existing import / download pipelines expect — the bridge between
+typed data and the current dict-passing internal API.
+
+## What this PR DOES NOT do
+
+This PR does not migrate any consumer. No behavior changes. The new
+types and converters are pure additive — every existing code path
+keeps using `_extract_lookup_value` exactly as before.
+
+The reason: a single big-bang migration would be a 153-call-site
+refactor with subtle behavior risk. Better to land the foundation
+in isolation, prove the contract via tests, then migrate consumers
+one at a time in follow-up PRs that are individually reviewable
+and revertable.
+
+## Migration roadmap
+
+Numbered in suggested order. Each item is its own PR.
+
+1. **Foundation (this PR).** Land `core/metadata/types.py` +
+   converters + tests. Document migration plan.
+2. **Migrate `_build_album_info`** in
+   `core/metadata/album_tracks.py` — accept either a typed `Album`
+   OR a raw dict. When it gets a typed Album, return
+   `album.to_context_dict()`. When it gets a raw dict, normalize
+   via the appropriate `from_<source>_dict()` based on the
+   provided `source` argument. Reduces from 41 LOC of fallback
+   chains to ~5 LOC of dispatch.
+3. **Migrate `_build_single_import_context_payload`** in the same
+   file — same pattern.
+4. **Migrate Spotify client.** `SpotifyClient.get_album()` returns
+   `Album` instead of raw dict. Internal callers update. Public
+   API surface unchanged where it has to be (return both for one
+   release, deprecate dict version).
+5. **Migrate iTunes/Deezer/Tidal/Qobuz/Discogs/Hydrabase clients.**
+   Same pattern. Each client's `get_album()` returns `Album`.
+6. **Migrate consumers in `core/discovery/quality_scanner.py`,
+   `core/imports/context.py`, etc.** Drop their fallback chains
+   in favor of typed access.
+7. **Add `Track` converters and migrate Track-shaped consumers.**
+   Same pattern as Album.
+8. **Add `Artist` converters and migrate Artist-shaped consumers.**
+9. **Deprecate `_extract_lookup_value`.** Once no caller needs it,
+   delete it.
+
+Each PR is independently revertable. Behavior preserved at every
+step.
+
+## Acceptance criteria for this PR
+
+- All converters produce a fully-populated `Album` from realistic
+  provider response samples.
+- Every required field is set even when source data is partial.
+- `to_context_dict()` shape is identical across all six providers
+  (pinned via cross-provider parametrized tests).
+- No existing consumer is changed; existing tests pass unchanged.
+- Cross-provider invariants (release_date format, album_type values,
+  Discogs `(N)` stripping, iTunes artwork upgrade) are pinned.
--- a/tests/metadata/test_typed_metadata_types.py
+++ b/tests/metadata/test_typed_metadata_types.py
@ -0,0 +1,572 @@
+"""Pin the per-provider Album converter contracts.
+
+Each provider returns its own response shape. The
+``Album.from_<provider>_dict()`` classmethods are the SINGLE place
+that knows that shape. Consumers must be able to trust that an
+``Album`` instance has the same field semantics regardless of which
+provider it came from.
+
+These tests use realistic sample payloads (truncated from real API
+responses) and pin:
+- Required fields are always populated even when source data is
+  partial or messy (defaults applied uniformly).
+- Cross-provider field semantics match — e.g. ``release_date`` is
+  always 'YYYY' or 'YYYY-MM-DD' regardless of whether Spotify gave
+  us 'YYYY-MM-DD', iTunes gave us '2024-01-15T00:00:00Z', or
+  Discogs gave us a bare year integer.
+- Provider-specific quirks are normalized at the converter boundary
+  (Discogs `(N)` disambiguation suffix, iTunes `100x100bb` artwork
+  URLs, Deezer's nested `artist` object).
+- ``to_context_dict()`` produces the canonical SoulSync-internal
+  shape consumers currently expect.
+
+When a future PR adds a new provider, this file is where the
+contract test goes.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from core.metadata.types import Album, Artist, Track
+
+
+# ---------------------------------------------------------------------------
+# Spotify
+# ---------------------------------------------------------------------------
+
+
+def test_album_from_spotify_dict_full_response():
+    """A typical /albums/{id} response — populated fields, full track list."""
+    raw = {
+        'id': '0hvT3yIEysuuvkK73vgdcW',
+        'name': 'GNX',
+        'artists': [
+            {'id': '2YZyLoL8N0Wb9xBt1NhZWg', 'name': 'Kendrick Lamar'},
+        ],
+        'release_date': '2024-11-22',
+        'total_tracks': 12,
+        'album_type': 'album',
+        'images': [
+            {'url': 'https://i.scdn.co/image/abc123', 'height': 640, 'width': 640},
+        ],
+        'genres': ['hip hop', 'rap'],
+        'label': 'pgLang/Interscope',
+        'external_ids': {'upc': '00602465123456'},
+        'external_urls': {'spotify': 'https://open.spotify.com/album/0hvT3yIEysuuvkK73vgdcW'},
+    }
+
+    album = Album.from_spotify_dict(raw)
+
+    assert album.id == '0hvT3yIEysuuvkK73vgdcW'
+    assert album.name == 'GNX'
+    assert album.artists == ['Kendrick Lamar']
+    assert album.artist_id == '2YZyLoL8N0Wb9xBt1NhZWg'
+    assert album.release_date == '2024-11-22'
+    assert album.total_tracks == 12
+    assert album.album_type == 'album'
+    assert album.image_url == 'https://i.scdn.co/image/abc123'
+    assert album.genres == ['hip hop', 'rap']
+    assert album.label == 'pgLang/Interscope'
+    assert album.barcode == '00602465123456'
+    assert album.source == 'spotify'
+    assert album.external_ids == {'spotify': '0hvT3yIEysuuvkK73vgdcW', 'upc': '00602465123456'}
+
+
+def test_album_from_spotify_dict_handles_missing_fields():
+    """Defensive: minimal payload still produces a valid Album."""
+    raw = {'id': 'x', 'name': 'Y'}
+    album = Album.from_spotify_dict(raw)
+    assert album.id == 'x'
+    assert album.name == 'Y'
+    assert album.artists == ['Unknown Artist']
+    assert album.release_date == ''
+    assert album.total_tracks == 0
+    assert album.album_type == 'album'
+    assert album.image_url is None
+    assert album.label is None
+
+
+def test_album_from_spotify_dict_multi_artist():
+    """Featured artists / collabs — all names captured, primary artist
+    id is the first one."""
+    raw = {
+        'id': 'a1',
+        'name': 'Luther',
+        'artists': [
+            {'id': 'kdot', 'name': 'Kendrick Lamar'},
+            {'id': 'sza', 'name': 'SZA'},
+        ],
+        'total_tracks': 1,
+    }
+    album = Album.from_spotify_dict(raw)
+    assert album.artists == ['Kendrick Lamar', 'SZA']
+    assert album.artist_id == 'kdot'
+
+
+# ---------------------------------------------------------------------------
+# iTunes
+# ---------------------------------------------------------------------------
+
+
+def test_album_from_itunes_dict_full_response():
+    raw = {
+        'collectionId': 1782145638,
+        'collectionName': 'GNX',
+        'artistName': 'Kendrick Lamar',
+        'artistId': 368183298,
+        'releaseDate': '2024-11-22T08:00:00Z',
+        'trackCount': 12,
+        'collectionType': 'Album',
+        'artworkUrl100': 'https://is1.mzstatic.com/image/100x100bb.jpg',
+        'collectionViewUrl': 'https://music.apple.com/album/gnx/1782145638',
+        'primaryGenreName': 'Hip-Hop/Rap',
+    }
+    album = Album.from_itunes_dict(raw)
+    assert album.id == '1782145638'
+    assert album.name == 'GNX'
+    assert album.artists == ['Kendrick Lamar']
+    # iTunes ISO timestamp truncated to date
+    assert album.release_date == '2024-11-22'
+    assert album.total_tracks == 12
+    assert album.album_type == 'album'
+    # 100x100bb upgraded to 3000x3000bb
+    assert album.image_url == 'https://is1.mzstatic.com/image/3000x3000bb.jpg'
+    assert album.artist_id == '368183298'
+    assert album.genres == ['Hip-Hop/Rap']
+    assert album.source == 'itunes'
+    assert album.external_ids['itunes'] == '1782145638'
+    assert album.external_ids['itunes_artist'] == '368183298'
+
+
+def test_album_from_itunes_dict_infers_album_type_from_track_count():
+    """iTunes doesn't tag album type — convert per the existing
+    heuristic (1-3 single, 4-6 EP, 7+ album)."""
+    base = {'collectionId': 1, 'collectionName': 'X', 'artistName': 'A',
+            'collectionType': 'Album'}
+    assert Album.from_itunes_dict({**base, 'trackCount': 1}).album_type == 'single'
+    assert Album.from_itunes_dict({**base, 'trackCount': 5}).album_type == 'ep'
+    assert Album.from_itunes_dict({**base, 'trackCount': 12}).album_type == 'album'
+
+
+def test_album_from_itunes_dict_detects_compilation():
+    raw = {'collectionId': 1, 'collectionName': 'Best Of', 'artistName': 'V/A',
+           'collectionType': 'Compilation', 'trackCount': 20}
+    assert Album.from_itunes_dict(raw).album_type == 'compilation'
+
+
+def test_album_from_itunes_dict_strips_single_ep_suffix():
+    """iTunes appends ' - Single' / ' - EP' to single/EP collection
+    names. Strip so cross-provider matching works on the actual title."""
+    raw = {'collectionId': 1, 'collectionName': 'Track Name - Single',
+           'artistName': 'A', 'trackCount': 1}
+    assert Album.from_itunes_dict(raw).name == 'Track Name'
+
+
+# ---------------------------------------------------------------------------
+# Deezer
+# ---------------------------------------------------------------------------
+
+
+def test_album_from_deezer_dict_full_response():
+    raw = {
+        'id': 12345,
+        'title': 'GNX',
+        'artist': {'id': 67890, 'name': 'Kendrick Lamar'},
+        'release_date': '2024-11-22',
+        'nb_tracks': 12,
+        'record_type': 'album',
+        'cover_xl': 'https://e-cdns-images.dzcdn.net/images/cover/abc/1000x1000-000000-80-0-0.jpg',
+        'genres': {'data': [{'id': 116, 'name': 'Rap/Hip Hop'}]},
+        'label': 'pgLang',
+        'upc': '00602465123456',
+        'link': 'https://www.deezer.com/album/12345',
+    }
+    album = Album.from_deezer_dict(raw)
+    assert album.id == '12345'
+    assert album.name == 'GNX'
+    assert album.artists == ['Kendrick Lamar']
+    assert album.artist_id == '67890'
+    assert album.release_date == '2024-11-22'
+    assert album.total_tracks == 12
+    assert album.album_type == 'album'
+    assert 'cover/abc' in album.image_url
+    assert album.genres == ['Rap/Hip Hop']
+    assert album.label == 'pgLang'
+    assert album.barcode == '00602465123456'
+    assert album.source == 'deezer'
+
+
+def test_album_from_deezer_dict_falls_back_through_cover_sizes():
+    """Deezer cover URLs come in xl/big/medium/small variants. Prefer xl."""
+    base = {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}
+    # xl present
+    a = Album.from_deezer_dict({**base, 'cover_xl': 'XL', 'cover_big': 'BIG'})
+    assert a.image_url == 'XL'
+    # only big
+    b = Album.from_deezer_dict({**base, 'cover_big': 'BIG'})
+    assert b.image_url == 'BIG'
+    # nothing
+    c = Album.from_deezer_dict(base)
+    assert c.image_url is None
+
+
+# ---------------------------------------------------------------------------
+# Discogs
+# ---------------------------------------------------------------------------
+
+
+def test_album_from_discogs_dict_full_response():
+    raw = {
+        'id': 33445566,
+        'title': 'GNX',
+        'artists': [
+            {'id': 1234, 'name': 'Kendrick Lamar'},
+        ],
+        'year': 2024,
+        'tracklist': [
+            {'position': 'A1', 'title': 'wacced out murals', 'type_': 'track'},
+            {'position': 'A2', 'title': 'squabble up', 'type_': 'track'},
+            {'position': 'B1', 'title': 'luther', 'type_': 'track'},
+        ],
+        'images': [
+            {'type': 'primary', 'uri': 'https://img.discogs.com/abc.jpg', 'uri150': 'https://img.discogs.com/abc-150.jpg'},
+        ],
+        'genres': ['Hip Hop'],
+        'styles': ['Conscious'],
+        'labels': [{'name': 'pgLang', 'catno': 'PG001'}],
+        'identifiers': [
+            {'type': 'Barcode', 'value': '00602465123456'},
+            {'type': 'Other', 'value': 'XYZ'},
+        ],
+        'uri': 'https://www.discogs.com/release/33445566',
+    }
+    album = Album.from_discogs_dict(raw)
+    assert album.id == '33445566'
+    assert album.name == 'GNX'
+    assert album.artists == ['Kendrick Lamar']
+    assert album.artist_id == '1234'
+    assert album.release_date == '2024'
+    assert album.total_tracks == 3
+    assert album.album_type == 'album'
+    # uri preferred over uri150
+    assert album.image_url == 'https://img.discogs.com/abc.jpg'
+    # Discogs genres + styles merged
+    assert 'Hip Hop' in album.genres and 'Conscious' in album.genres
+    assert album.label == 'pgLang'
+    assert album.barcode == '00602465123456'
+    assert album.source == 'discogs'
+
+
+def test_album_from_discogs_dict_strips_disambiguation_suffix():
+    """`Madonna (3)` → `Madonna` so cross-provider matches work."""
+    raw = {'id': 1, 'title': 'Y', 'artists': [{'name': 'Madonna (3)'}]}
+    album = Album.from_discogs_dict(raw)
+    assert album.artists == ['Madonna']
+
+
+def test_album_from_discogs_dict_year_zero_means_unknown():
+    """Discogs `year=0` is the sentinel for unknown — empty release_date."""
+    raw = {'id': 1, 'title': 'Y', 'artists': [{'name': 'X'}], 'year': 0}
+    assert Album.from_discogs_dict(raw).release_date == ''
+
+
+def test_album_from_discogs_dict_counts_only_track_type_entries():
+    """Discogs tracklists include heading rows, indices, etc (type_='heading').
+    Only count actual tracks (type_='track')."""
+    raw = {
+        'id': 1, 'title': 'Y', 'artists': [{'name': 'X'}],
+        'tracklist': [
+            {'title': 'Side A', 'type_': 'heading'},
+            {'title': 'Track 1', 'type_': 'track'},
+            {'title': 'Track 2', 'type_': 'track'},
+            {'title': 'Side B', 'type_': 'heading'},
+            {'title': 'Track 3', 'type_': 'track'},
+        ],
+    }
+    assert Album.from_discogs_dict(raw).total_tracks == 3
+
+
+# ---------------------------------------------------------------------------
+# MusicBrainz
+# ---------------------------------------------------------------------------
+
+
+def test_album_from_musicbrainz_dict_full_response():
+    raw = {
+        'id': 'abc-123-mbid',
+        'title': 'GNX',
+        'artist-credit': [
+            {'artist': {'id': 'kdot-mbid', 'name': 'Kendrick Lamar'}},
+        ],
+        'date': '2024-11-22',
+        'media': [{'track-count': 12}],
+        'release-group': {
+            'id': 'rg-mbid',
+            'primary-type': 'Album',
+        },
+        'label-info': [{'label': {'name': 'pgLang'}}],
+        'barcode': '00602465123456',
+    }
+    album = Album.from_musicbrainz_dict(raw)
+    assert album.id == 'abc-123-mbid'
+    assert album.name == 'GNX'
+    assert album.artists == ['Kendrick Lamar']
+    assert album.artist_id == 'kdot-mbid'
+    assert album.release_date == '2024-11-22'
+    assert album.total_tracks == 12
+    assert album.album_type == 'album'
+    assert album.label == 'pgLang'
+    assert album.barcode == '00602465123456'
+    assert album.external_ids['musicbrainz'] == 'abc-123-mbid'
+    assert album.external_ids['musicbrainz_release_group'] == 'rg-mbid'
+
+
+def test_album_from_musicbrainz_dict_sums_multi_disc_tracks():
+    """MB stores per-disc track counts; total = sum across media."""
+    raw = {
+        'id': 'x', 'title': 'Multi Disc',
+        'artist-credit': [{'artist': {'name': 'A'}}],
+        'media': [{'track-count': 14}, {'track-count': 5}],
+    }
+    assert Album.from_musicbrainz_dict(raw).total_tracks == 19
+
+
+def test_album_from_musicbrainz_dict_release_group_type_overrides_default():
+    raw = {
+        'id': 'x', 'title': 'X',
+        'artist-credit': [{'artist': {'name': 'A'}}],
+        'release-group': {'id': 'rg', 'primary-type': 'Single'},
+        'media': [{'track-count': 1}],
+    }
+    assert Album.from_musicbrainz_dict(raw).album_type == 'single'
+
+
+# ---------------------------------------------------------------------------
+# Qobuz
+# ---------------------------------------------------------------------------
+
+
+def test_album_from_qobuz_dict_full_response():
+    raw = {
+        'id': 12345,
+        'title': 'GNX',
+        'artist': {'id': 67890, 'name': 'Kendrick Lamar'},
+        'release_date_original': '2024-11-22',
+        'released_at': '2024-11-22T08:00:00',
+        'tracks_count': 12,
+        'image': {
+            'small': 'https://qobuz/small.jpg',
+            'large': 'https://qobuz/large.jpg',
+            'thumbnail': 'https://qobuz/thumb.jpg',
+        },
+        'genre': {'id': 116, 'name': 'Hip-Hop/Rap'},
+        'label': {'id': 999, 'name': 'pgLang'},
+        'upc': '00602465123456',
+        'url': 'https://www.qobuz.com/album/gnx/12345',
+    }
+    album = Album.from_qobuz_dict(raw)
+    assert album.id == '12345'
+    assert album.name == 'GNX'
+    assert album.artists == ['Kendrick Lamar']
+    assert album.artist_id == '67890'
+    assert album.release_date == '2024-11-22'
+    assert album.total_tracks == 12
+    assert album.image_url == 'https://qobuz/large.jpg'
+    assert album.genres == ['Hip-Hop/Rap']
+    assert album.label == 'pgLang'
+    assert album.barcode == '00602465123456'
+    assert album.source == 'qobuz'
+
+
+def test_album_from_qobuz_dict_falls_back_through_image_sizes():
+    base = {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}
+    a = Album.from_qobuz_dict({**base, 'image': {'small': 'S'}})
+    assert a.image_url == 'S'
+    b = Album.from_qobuz_dict({**base, 'image': {}})
+    assert b.image_url is None
+
+
+def test_album_from_qobuz_dict_strips_iso_timestamp_to_date():
+    raw = {'id': 1, 'title': 'X', 'artist': {'name': 'A'},
+           'released_at': '2024-11-22T08:00:00'}
+    assert Album.from_qobuz_dict(raw).release_date == '2024-11-22'
+
+
+# ---------------------------------------------------------------------------
+# Tidal
+# ---------------------------------------------------------------------------
+
+
+def test_album_from_tidal_object_full_shape():
+    """tidalapi returns objects, not dicts. Use SimpleNamespace stand-ins
+    to mirror the tidalapi.Album shape."""
+    from types import SimpleNamespace
+
+    artist_obj = SimpleNamespace(id=67890, name='Kendrick Lamar')
+    album_obj = SimpleNamespace(
+        id=12345,
+        name='GNX',
+        artist=artist_obj,
+        release_date='2024-11-22',
+        num_tracks=12,
+        type='ALBUM',
+        picture='abc-123-def',
+        universal_product_number='00602465123456',
+        image=lambda size=640: f'https://resources.tidal.com/images/abc/123/def/{size}x{size}.jpg',
+    )
+
+    album = Album.from_tidal_object(album_obj)
+    assert album.id == '12345'
+    assert album.name == 'GNX'
+    assert album.artists == ['Kendrick Lamar']
+    assert album.artist_id == '67890'
+    assert album.release_date == '2024-11-22'
+    assert album.total_tracks == 12
+    assert album.album_type == 'album'  # lowercased
+    assert album.image_url and 'tidal.com' in album.image_url
+    assert album.barcode == '00602465123456'
+    assert album.source == 'tidal'
+    assert album.external_ids['tidal'] == '12345'
+
+
+def test_album_from_tidal_object_falls_back_to_picture_url_when_image_method_missing():
+    from types import SimpleNamespace
+    album_obj = SimpleNamespace(
+        id=1, name='X',
+        artist=SimpleNamespace(name='A', id=2),
+        release_date='2024',
+        num_tracks=10,
+        picture='aa-bb-cc',
+    )
+    album = Album.from_tidal_object(album_obj)
+    assert album.image_url and 'aa/bb/cc' in album.image_url
+
+
+def test_album_from_tidal_object_handles_missing_attrs():
+    """Bare-minimum tidalapi-shaped object — should still produce a
+    valid Album with sensible defaults."""
+    from types import SimpleNamespace
+    album_obj = SimpleNamespace(id=1, name='X', artist=None)
+    album = Album.from_tidal_object(album_obj)
+    assert album.id == '1'
+    assert album.name == 'X'
+    assert album.artists == ['Unknown Artist']
+    assert album.total_tracks == 0
+    assert album.album_type == 'album'
+    assert album.image_url is None
+
+
+# ---------------------------------------------------------------------------
+# Hydrabase
+# ---------------------------------------------------------------------------
+
+
+def test_album_from_hydrabase_dict_full_response():
+    raw = {
+        'id': 'soul-12345',
+        'name': 'GNX',
+        'artists': [{'id': 'soul-artist-1', 'name': 'Kendrick Lamar'}],
+        'release_date': '2024-11-22',
+        'total_tracks': 12,
+        'album_type': 'album',
+        'image_url': 'https://hydrabase.example/cover.jpg',
+        'soul_id': 'soul-12345',
+        'artist_id': 'soul-artist-1',
+    }
+    album = Album.from_hydrabase_dict(raw)
+    assert album.id == 'soul-12345'
+    assert album.name == 'GNX'
+    assert album.artists == ['Kendrick Lamar']
+    assert album.artist_id == 'soul-artist-1'
+    assert album.image_url == 'https://hydrabase.example/cover.jpg'
+    assert album.source == 'hydrabase'
+    assert album.external_ids['hydrabase'] == 'soul-12345'
+    assert album.external_ids['soul'] == 'soul-12345'
+
+
+def test_album_from_hydrabase_dict_handles_string_artists():
+    """Hydrabase responses sometimes return artists as a flat list of
+    name strings, sometimes as dicts. Both shapes work."""
+    raw_str = {'id': '1', 'name': 'X', 'artists': ['Artist A']}
+    assert Album.from_hydrabase_dict(raw_str).artists == ['Artist A']
+
+    raw_dict = {'id': '1', 'name': 'X', 'artists': [{'name': 'Artist B'}]}
+    assert Album.from_hydrabase_dict(raw_dict).artists == ['Artist B']
+
+
+# ---------------------------------------------------------------------------
+# Cross-provider invariants
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize('factory,raw', [
+    ('from_spotify_dict', {'id': 'x', 'name': 'X'}),
+    ('from_itunes_dict', {'collectionId': 1, 'collectionName': 'X', 'artistName': 'A'}),
+    ('from_deezer_dict', {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}),
+    ('from_discogs_dict', {'id': 1, 'title': 'X', 'artists': [{'name': 'A'}]}),
+    ('from_musicbrainz_dict', {'id': 'x', 'title': 'X',
+                                'artist-credit': [{'artist': {'name': 'A'}}]}),
+    ('from_hydrabase_dict', {'id': 'x', 'name': 'X', 'artists': [{'name': 'A'}]}),
+    ('from_qobuz_dict', {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}),
+])
+def test_every_converter_produces_required_fields(factory, raw):
+    """Every converter MUST populate the required fields with sensible
+    defaults even on minimal input. This is the contract consumers
+    rely on to drop their fallback chains."""
+    album = getattr(Album, factory)(raw)
+    assert isinstance(album.id, str) and album.id
+    assert isinstance(album.name, str) and album.name
+    assert isinstance(album.artists, list) and len(album.artists) >= 1
+    assert isinstance(album.release_date, str)  # may be empty
+    assert isinstance(album.total_tracks, int)
+    assert isinstance(album.album_type, str) and album.album_type
+    assert isinstance(album.genres, list)
+    assert isinstance(album.external_ids, dict)
+    assert isinstance(album.external_urls, dict)
+    assert album.source  # always set by converter
+
+
+@pytest.mark.parametrize('factory,raw', [
+    ('from_spotify_dict', {'id': 'x', 'name': 'X'}),
+    ('from_itunes_dict', {'collectionId': 1, 'collectionName': 'X', 'artistName': 'A'}),
+    ('from_deezer_dict', {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}),
+    ('from_discogs_dict', {'id': 1, 'title': 'X', 'artists': [{'name': 'A'}]}),
+    ('from_musicbrainz_dict', {'id': 'x', 'title': 'X',
+                                'artist-credit': [{'artist': {'name': 'A'}}]}),
+    ('from_hydrabase_dict', {'id': 'x', 'name': 'X', 'artists': [{'name': 'A'}]}),
+    ('from_qobuz_dict', {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}),
+])
+def test_to_context_dict_shape_is_uniform_across_providers(factory, raw):
+    """The bridge dict every consumer currently expects has the same
+    shape regardless of provider. Pin so a future converter change
+    can't subtly break consumer expectations."""
+    album = getattr(Album, factory)(raw)
+    ctx = album.to_context_dict()
+
+    expected_keys = {
+        'id', 'name', 'artist', 'artist_name', 'artist_id', 'artists',
+        'image_url', 'images', 'release_date', 'album_type',
+        'total_tracks', 'source', 'genres', 'label', 'barcode',
+        'external_ids', 'external_urls',
+    }
+    assert set(ctx.keys()) == expected_keys
+
+
+# ---------------------------------------------------------------------------
+# Track / Artist — light coverage; full converters land in a follow-up PR
+# ---------------------------------------------------------------------------
+
+
+def test_track_dataclass_required_fields():
+    t = Track(id='1', name='X', artists=['A'], album='Y', duration_ms=1000)
+    assert t.id == '1'
+    assert t.popularity == 0  # default
+    assert t.external_ids == {}
+
+
+def test_artist_dataclass_required_fields():
+    a = Artist(id='1', name='X')
+    assert a.id == '1'
+    assert a.followers == 0  # default
+    assert a.genres == []
--- a/tests/test_discogs_collection_source.py
+++ b/tests/test_discogs_collection_source.py
@ -37,9 +37,19 @@ def authed_client():
    return DiscogsClient(token='dummy_test_token')


-def test_get_user_collection_returns_empty_without_token():
+def test_get_user_collection_returns_empty_without_token(monkeypatch):
    """Defensive: no token → empty list, never raises. Discogs collection
-    is private so an unauthenticated call would 403 anyway."""
+    is private so an unauthenticated call would 403 anyway.
+
+    DiscogsClient's constructor falls back to ``config_manager.get(
+    'discogs.token')`` when no token is passed — including when the
+    empty-string sentinel is passed (because empty-string is falsy).
+    Stub the config lookup so this test stays deterministic regardless
+    of the developer's local config (which may have a real token set
+    after using the Your Albums Discogs source feature)."""
+    from config.settings import config_manager
+    monkeypatch.setattr(config_manager, 'get',
+                        lambda key, default=None: '' if key == 'discogs.token' else default)
    client = DiscogsClient(token='')
    assert client.get_user_collection() == []

--- a/webui/static/helper.js
+++ b/webui/static/helper.js
@ -3432,6 +3432,7 @@ const WHATS_NEW = {
    '2.4.2': [
        // --- post-2.4.1 dev work — entries hidden by _getLatestWhatsNewVersion until the build version bumps ---
        { date: 'Unreleased — 2.4.2 dev cycle' },
+        { title: 'Internal: Typed Metadata Foundation', desc: 'internal — first step of a multi-pr migration to give the metadata pipeline a real contract. the codebase historically grew duck-typed extractors (`_extract_lookup_value(album_data, "id", "album_id", "collectionId", "release_id", default=...)`) at every consumer site because each provider returns its own response shape. ~150 of those across the codebase. new `core/metadata/types.py` defines canonical typed `Album` / `Track` / `Artist` dataclasses with strict required fields. per-source classmethod converters (from_spotify_dict, from_itunes_dict, from_deezer_dict, from_discogs_dict, from_musicbrainz_dict, from_hydrabase_dict) are the SINGLE place that knows each provider\'s wire shape. zero behavior changes in this pr — pure additive foundation. follow-up prs migrate consumers one at a time. full migration plan documented at docs/metadata-types-migration.md.', page: 'library' },
        { title: 'Discogs Collection in "Your Albums"', desc: 'discord request: pull your discogs collection into the your albums section on discover, similar to spotify liked albums. set your discogs personal access token on settings → connections (already there from prior work) and add discogs as one of the configured sources via the gear button on your albums. background fetcher pulls your full collection (all folders, all pages — capped at 5000 releases), normalizes artist names (strips discogs `(N)` disambiguation suffix), dedupes against any spotify/tidal/deezer-saved versions of the same album. clicking a discogs-only album opens with discogs context — full release detail (year, format, label, country, tracklist) from the /releases endpoint. clicking an album that exists in both your spotify saved AND discogs collection prefers spotify (download flow is more direct). discogs is physical-media-first so many releases won\'t have streaming equivalents — those still show in the grid but the modal flow may need to fall back to a name search to find a downloadable digital version.', page: 'discover' },
        { title: 'Drop Redundant "Your Spotify Library" Section on Discover', desc: 'discover page used to show two near-identical sections: "Your Albums" (cross-source aggregator across spotify/deezer/etc) AND "Your Spotify Library" (spotify-only). same UI, same grid, same filter / sort / download-missing controls — the spotify-only one was a strict subset of what your albums already covers. removed it. spotify saved albums still surface via the your albums section with spotify as one of its configured sources (gear button → configure sources). backend collection / storage is unchanged — the watchlist scanner still populates the spotify_library_albums cache for your albums to read.', page: 'discover' },
        { title: 'Library Disk Usage on Stats Page', desc: 'discord request (samuel [KC]): show how much disk space the library takes. new card on stats → system statistics shows total bytes + per-format breakdown (FLAC vs MP3 vs M4A bars). data comes from `tracks.file_size` populated during deep scan from whatever the media server already returns (plex MediaPart.size, jellyfin MediaSources[].Size, navidrome song.size, soulsync standalone os.path.getsize) — zero filesystem walk overhead. existing libraries see "Run a Deep Scan to populate" until the next deep scan fills in sizes; partial coverage shown as "X tracks measured (+Y pending)". migration is additive (NULL on legacy rows) so upgrading users have nothing to do.', page: 'stats' },