mirror of https://github.com/Nezreka/SoulSync.git
Merge pull request #490 from Nezreka/refactor/typed-metadata-types-foundation
Refactor/typed metadata types foundationpull/492/head
commit
e12969fbbe
@ -0,0 +1,618 @@
|
||||
"""Canonical typed dataclasses for metadata across all providers.
|
||||
|
||||
The metadata pipeline historically grew organically: each new provider
|
||||
(Spotify → iTunes → Deezer → Tidal → Qobuz → MusicBrainz → AudioDB →
|
||||
Discogs → Hydrabase) returns its own response shape, and consumer code
|
||||
defensively extracts every field via fallback chains:
|
||||
|
||||
_extract_lookup_value(album_data, 'id', 'album_id', 'collectionId',
|
||||
'release_id', default=album_id)
|
||||
|
||||
That pattern works but is brittle: each new provider adds more keys to
|
||||
chase, each consumer re-runs the same defensive logic, and there's no
|
||||
contract about what shape any given consumer can trust.
|
||||
|
||||
This module is the canonical contract. Every provider produces these
|
||||
types via a single ``from_<provider>_dict()`` classmethod. Every
|
||||
consumer accepts these types and trusts the fields. Field names are
|
||||
provider-neutral (``release_date`` not ``releaseDate``,
|
||||
``image_url`` not ``artworkUrl100``).
|
||||
|
||||
This is the foundation PR. It only DEFINES the contract and provides
|
||||
the converters; no consumer is migrated in this PR. Future PRs each
|
||||
migrate one consumer to accept ``Album`` / ``Track`` / ``Artist``
|
||||
instead of raw dicts.
|
||||
|
||||
The ``Album`` / ``Track`` / ``Artist`` symbols also re-export from
|
||||
``core.itunes_client`` for backward compatibility — existing callers
|
||||
don't need to change anything.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers shared by converters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _str(value: Any, default: str = '') -> str:
|
||||
"""Coerce to non-None str, never None."""
|
||||
if value is None:
|
||||
return default
|
||||
return str(value)
|
||||
|
||||
|
||||
def _int(value: Any, default: int = 0) -> int:
|
||||
"""Coerce to int, default on parse failure."""
|
||||
if value is None or value == '':
|
||||
return default
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _strip_discogs_disambiguation(name: str) -> str:
|
||||
"""Discogs appends ``(N)`` to artist names when there are multiple
|
||||
artists with the same name. Strip so cross-provider matches work."""
|
||||
return re.sub(r'\s*\(\d+\)$', '', name or '').strip()
|
||||
|
||||
|
||||
def _itunes_artwork(url: Optional[str]) -> Optional[str]:
|
||||
"""iTunes serves cover art at any size by template substitution.
|
||||
Always upgrade ``100x100bb`` → ``3000x3000bb`` for highest quality."""
|
||||
if not url:
|
||||
return None
|
||||
return url.replace('100x100bb', '3000x3000bb')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Album
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class Album:
|
||||
"""Provider-neutral album.
|
||||
|
||||
Required fields are guaranteed to be set by every converter. Optional
|
||||
fields are explicit ``Optional[...]`` so consumers know they may be
|
||||
None / empty. Source-specific raw IDs that don't fit the typed schema
|
||||
can be stashed in ``external_ids`` (provider name → id string).
|
||||
"""
|
||||
|
||||
id: str # Source-native id, always set
|
||||
name: str # Album title, always set
|
||||
artists: List[str] # Display names, may be ['Unknown Artist']
|
||||
release_date: str # ISO 'YYYY' or 'YYYY-MM-DD' or '' when unknown
|
||||
total_tracks: int # 0 when unknown
|
||||
album_type: str # 'album' / 'single' / 'ep' / 'compilation'
|
||||
|
||||
# Optional but commonly populated
|
||||
image_url: Optional[str] = None # Highest-quality cover URL
|
||||
artist_id: Optional[str] = None # Primary artist's source-native id
|
||||
genres: List[str] = field(default_factory=list)
|
||||
label: Optional[str] = None # Record label / publisher
|
||||
barcode: Optional[str] = None # UPC/EAN — Discogs/MusicBrainz only
|
||||
|
||||
# Source provenance
|
||||
source: str = '' # 'spotify' / 'itunes' / etc — set by converter
|
||||
external_ids: Dict[str, str] = field(default_factory=dict)
|
||||
external_urls: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Per-source converters. Each one is the SINGLE source of truth for
|
||||
# how that provider's response maps to the canonical Album. Adding
|
||||
# a new provider = adding one more converter here. Consumer code
|
||||
# never needs to know any provider's wire shape.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def from_spotify_dict(cls, raw: Dict[str, Any]) -> 'Album':
|
||||
"""Spotify Web API ``/albums/{id}`` response shape."""
|
||||
artists_raw = raw.get('artists') or []
|
||||
artist_names = [_str(a.get('name')) for a in artists_raw
|
||||
if isinstance(a, dict) and a.get('name')]
|
||||
primary_artist_id = ''
|
||||
if artists_raw and isinstance(artists_raw[0], dict):
|
||||
primary_artist_id = _str(artists_raw[0].get('id'))
|
||||
|
||||
images = raw.get('images') or []
|
||||
image_url = None
|
||||
if images and isinstance(images[0], dict):
|
||||
image_url = _str(images[0].get('url')) or None
|
||||
|
||||
external_ids = {}
|
||||
if raw.get('id'):
|
||||
external_ids['spotify'] = _str(raw['id'])
|
||||
upc = (raw.get('external_ids') or {}).get('upc')
|
||||
if upc:
|
||||
external_ids['upc'] = _str(upc)
|
||||
|
||||
external_urls = {}
|
||||
sp_url = (raw.get('external_urls') or {}).get('spotify')
|
||||
if sp_url:
|
||||
external_urls['spotify'] = _str(sp_url)
|
||||
|
||||
return cls(
|
||||
id=_str(raw.get('id')),
|
||||
name=_str(raw.get('name')),
|
||||
artists=artist_names or ['Unknown Artist'],
|
||||
release_date=_str(raw.get('release_date')),
|
||||
total_tracks=_int(raw.get('total_tracks')),
|
||||
album_type=_str(raw.get('album_type'), default='album'),
|
||||
image_url=image_url,
|
||||
artist_id=primary_artist_id or None,
|
||||
genres=list(raw.get('genres') or []),
|
||||
label=_str(raw.get('label')) or None,
|
||||
barcode=external_ids.get('upc'),
|
||||
source='spotify',
|
||||
external_ids=external_ids,
|
||||
external_urls=external_urls,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_itunes_dict(cls, raw: Dict[str, Any]) -> 'Album':
|
||||
"""iTunes Search API album response shape (`collectionType=Album`)."""
|
||||
track_count = _int(raw.get('trackCount'))
|
||||
|
||||
# iTunes doesn't tag album type; infer from track count + collectionType.
|
||||
collection_type = _str(raw.get('collectionType'), default='Album')
|
||||
if 'compilation' in collection_type.lower():
|
||||
album_type = 'compilation'
|
||||
elif track_count <= 3:
|
||||
album_type = 'single'
|
||||
elif track_count <= 6:
|
||||
album_type = 'ep'
|
||||
else:
|
||||
album_type = 'album'
|
||||
|
||||
artist_id = _str(raw.get('artistId')) or None
|
||||
external_ids = {}
|
||||
if raw.get('collectionId'):
|
||||
external_ids['itunes'] = _str(raw['collectionId'])
|
||||
if artist_id:
|
||||
external_ids['itunes_artist'] = artist_id
|
||||
|
||||
external_urls = {}
|
||||
if raw.get('collectionViewUrl'):
|
||||
external_urls['itunes'] = _str(raw['collectionViewUrl'])
|
||||
|
||||
# Strip iTunes "(Single)" / "(EP)" / "(Deluxe)" suffixes from name
|
||||
# the same way the existing _clean_itunes_album_name helper does.
|
||||
name = _str(raw.get('collectionName'))
|
||||
name = re.sub(r'\s*[-(]\s*(Single|EP)\s*[)]?$', '', name, flags=re.IGNORECASE).strip()
|
||||
|
||||
release_date = _str(raw.get('releaseDate'))
|
||||
if release_date and 'T' in release_date:
|
||||
release_date = release_date.split('T', 1)[0]
|
||||
|
||||
primary_genre = _str(raw.get('primaryGenreName'))
|
||||
return cls(
|
||||
id=_str(raw.get('collectionId')),
|
||||
name=name,
|
||||
artists=[_str(raw.get('artistName'), default='Unknown Artist')],
|
||||
release_date=release_date,
|
||||
total_tracks=track_count,
|
||||
album_type=album_type,
|
||||
image_url=_itunes_artwork(raw.get('artworkUrl100')),
|
||||
artist_id=artist_id,
|
||||
genres=[primary_genre] if primary_genre else [],
|
||||
source='itunes',
|
||||
external_ids=external_ids,
|
||||
external_urls=external_urls,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_deezer_dict(cls, raw: Dict[str, Any]) -> 'Album':
|
||||
"""Deezer API ``/album/{id}`` response shape."""
|
||||
artist = raw.get('artist') or {}
|
||||
artist_name = _str(artist.get('name'), default='Unknown Artist') if isinstance(artist, dict) else _str(artist) or 'Unknown Artist'
|
||||
artist_id = _str(artist.get('id')) if isinstance(artist, dict) else ''
|
||||
|
||||
# Deezer cover URLs come in size suffixes (cover_xl, cover_big,
|
||||
# cover_medium, cover_small). Prefer xl.
|
||||
image_url = (
|
||||
_str(raw.get('cover_xl'))
|
||||
or _str(raw.get('cover_big'))
|
||||
or _str(raw.get('cover_medium'))
|
||||
or _str(raw.get('cover'))
|
||||
or None
|
||||
)
|
||||
|
||||
record_type = _str(raw.get('record_type'), default='album').lower()
|
||||
album_type = {'single': 'single', 'ep': 'ep'}.get(record_type, 'album')
|
||||
|
||||
external_ids = {}
|
||||
if raw.get('id'):
|
||||
external_ids['deezer'] = _str(raw['id'])
|
||||
if raw.get('upc'):
|
||||
external_ids['upc'] = _str(raw['upc'])
|
||||
|
||||
external_urls = {}
|
||||
if raw.get('link'):
|
||||
external_urls['deezer'] = _str(raw['link'])
|
||||
|
||||
return cls(
|
||||
id=_str(raw.get('id')),
|
||||
name=_str(raw.get('title')),
|
||||
artists=[artist_name],
|
||||
release_date=_str(raw.get('release_date')),
|
||||
total_tracks=_int(raw.get('nb_tracks')),
|
||||
album_type=album_type,
|
||||
image_url=image_url,
|
||||
artist_id=artist_id or None,
|
||||
genres=[g.get('name', '') for g in (raw.get('genres', {}) or {}).get('data', [])
|
||||
if isinstance(g, dict) and g.get('name')],
|
||||
label=_str(raw.get('label')) or None,
|
||||
barcode=external_ids.get('upc'),
|
||||
source='deezer',
|
||||
external_ids=external_ids,
|
||||
external_urls=external_urls,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_discogs_dict(cls, raw: Dict[str, Any]) -> 'Album':
|
||||
"""Discogs API ``/releases/{id}`` response shape."""
|
||||
artists_raw = raw.get('artists') or []
|
||||
artist_names = []
|
||||
primary_artist_id = ''
|
||||
for a in artists_raw:
|
||||
if not isinstance(a, dict):
|
||||
continue
|
||||
name = _strip_discogs_disambiguation(_str(a.get('name')))
|
||||
if name:
|
||||
artist_names.append(name)
|
||||
if not primary_artist_id and a.get('id'):
|
||||
primary_artist_id = _str(a['id'])
|
||||
|
||||
images = raw.get('images') or []
|
||||
image_url = None
|
||||
if images and isinstance(images[0], dict):
|
||||
image_url = _str(images[0].get('uri') or images[0].get('uri150')) or None
|
||||
|
||||
# Discogs `tracklist` is the source of total_tracks.
|
||||
tracklist = raw.get('tracklist') or []
|
||||
total_tracks = sum(1 for t in tracklist if isinstance(t, dict)
|
||||
and t.get('type_') == 'track')
|
||||
if not total_tracks:
|
||||
total_tracks = len(tracklist)
|
||||
|
||||
labels = raw.get('labels') or []
|
||||
label_name = ''
|
||||
if labels and isinstance(labels[0], dict):
|
||||
label_name = _str(labels[0].get('name'))
|
||||
|
||||
external_ids = {}
|
||||
if raw.get('id'):
|
||||
external_ids['discogs'] = _str(raw['id'])
|
||||
# Discogs `identifiers` array can include barcode entries
|
||||
for ident in raw.get('identifiers', []) or []:
|
||||
if isinstance(ident, dict) and ident.get('type', '').lower() == 'barcode':
|
||||
bc = _str(ident.get('value')).strip()
|
||||
if bc:
|
||||
external_ids['barcode'] = bc
|
||||
break
|
||||
|
||||
external_urls = {}
|
||||
if raw.get('uri'):
|
||||
external_urls['discogs'] = _str(raw['uri'])
|
||||
|
||||
year = raw.get('year')
|
||||
release_date = str(year) if year and _int(year) > 0 else ''
|
||||
|
||||
return cls(
|
||||
id=_str(raw.get('id')),
|
||||
name=_str(raw.get('title')),
|
||||
artists=artist_names or ['Unknown Artist'],
|
||||
release_date=release_date,
|
||||
total_tracks=total_tracks,
|
||||
album_type='album', # Discogs doesn't tag this; default to album
|
||||
image_url=image_url,
|
||||
artist_id=primary_artist_id or None,
|
||||
genres=list(raw.get('genres') or []) + list(raw.get('styles') or []),
|
||||
label=label_name or None,
|
||||
barcode=external_ids.get('barcode'),
|
||||
source='discogs',
|
||||
external_ids=external_ids,
|
||||
external_urls=external_urls,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_musicbrainz_dict(cls, raw: Dict[str, Any]) -> 'Album':
|
||||
"""MusicBrainz ``/release/{mbid}`` response shape (release, not release-group)."""
|
||||
artist_credit = raw.get('artist-credit') or []
|
||||
artist_names = []
|
||||
primary_artist_id = ''
|
||||
for credit in artist_credit:
|
||||
if isinstance(credit, dict) and 'artist' in credit:
|
||||
name = _str(credit['artist'].get('name'))
|
||||
if name:
|
||||
artist_names.append(name)
|
||||
if not primary_artist_id and credit['artist'].get('id'):
|
||||
primary_artist_id = _str(credit['artist']['id'])
|
||||
|
||||
# Total tracks: sum across media (MB stores per-disc).
|
||||
media = raw.get('media') or []
|
||||
total_tracks = sum(_int(m.get('track-count')) for m in media if isinstance(m, dict))
|
||||
|
||||
external_ids = {}
|
||||
if raw.get('id'):
|
||||
external_ids['musicbrainz'] = _str(raw['id'])
|
||||
if raw.get('barcode'):
|
||||
external_ids['barcode'] = _str(raw['barcode'])
|
||||
|
||||
# MB `release-group` carries the album-level type (album/single/ep)
|
||||
rg = raw.get('release-group') or {}
|
||||
primary_type = _str(rg.get('primary-type'), default='Album').lower()
|
||||
album_type = {'single': 'single', 'ep': 'ep'}.get(primary_type, 'album')
|
||||
if rg.get('id'):
|
||||
external_ids['musicbrainz_release_group'] = _str(rg['id'])
|
||||
|
||||
labels = raw.get('label-info') or []
|
||||
label_name = ''
|
||||
if labels and isinstance(labels[0], dict):
|
||||
lbl = labels[0].get('label') or {}
|
||||
label_name = _str(lbl.get('name'))
|
||||
|
||||
return cls(
|
||||
id=_str(raw.get('id')),
|
||||
name=_str(raw.get('title')),
|
||||
artists=artist_names or ['Unknown Artist'],
|
||||
release_date=_str(raw.get('date')),
|
||||
total_tracks=total_tracks,
|
||||
album_type=album_type,
|
||||
image_url=None, # MB doesn't serve cover art directly; CAA is separate
|
||||
artist_id=primary_artist_id or None,
|
||||
genres=[], # MB has tags but they're noisy; consumer can fetch separately
|
||||
label=label_name or None,
|
||||
barcode=external_ids.get('barcode'),
|
||||
source='musicbrainz',
|
||||
external_ids=external_ids,
|
||||
external_urls={},
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_qobuz_dict(cls, raw: Dict[str, Any]) -> 'Album':
|
||||
"""Qobuz API ``album/get`` response shape."""
|
||||
artist = raw.get('artist') or {}
|
||||
artist_name = _str(artist.get('name'), default='Unknown Artist') if isinstance(artist, dict) else _str(artist) or 'Unknown Artist'
|
||||
artist_id = _str(artist.get('id')) if isinstance(artist, dict) else ''
|
||||
|
||||
# Qobuz `image` is a dict with small/large/thumbnail variants.
|
||||
image = raw.get('image') or {}
|
||||
image_url = None
|
||||
if isinstance(image, dict):
|
||||
image_url = (
|
||||
_str(image.get('large'))
|
||||
or _str(image.get('small'))
|
||||
or _str(image.get('thumbnail'))
|
||||
or None
|
||||
)
|
||||
|
||||
external_ids = {}
|
||||
if raw.get('id'):
|
||||
external_ids['qobuz'] = _str(raw['id'])
|
||||
if raw.get('upc'):
|
||||
external_ids['upc'] = _str(raw['upc'])
|
||||
|
||||
external_urls = {}
|
||||
if raw.get('url'):
|
||||
external_urls['qobuz'] = _str(raw['url'])
|
||||
|
||||
# Qobuz exposes both `release_date_original` (vinyl/original
|
||||
# press date) and `released_at` (digital release timestamp).
|
||||
# Prefer the original date for cross-provider matching.
|
||||
release_date = _str(raw.get('release_date_original') or raw.get('released_at'))
|
||||
if release_date and 'T' in release_date:
|
||||
release_date = release_date.split('T', 1)[0]
|
||||
|
||||
genre = raw.get('genre') or {}
|
||||
genre_name = _str(genre.get('name')) if isinstance(genre, dict) else _str(genre)
|
||||
|
||||
label = raw.get('label') or {}
|
||||
label_name = _str(label.get('name')) if isinstance(label, dict) else _str(label)
|
||||
|
||||
return cls(
|
||||
id=_str(raw.get('id')),
|
||||
name=_str(raw.get('title')),
|
||||
artists=[artist_name],
|
||||
release_date=release_date,
|
||||
total_tracks=_int(raw.get('tracks_count')),
|
||||
album_type='album', # Qobuz doesn't tag this consistently
|
||||
image_url=image_url,
|
||||
artist_id=artist_id or None,
|
||||
genres=[genre_name] if genre_name else [],
|
||||
label=label_name or None,
|
||||
barcode=external_ids.get('upc'),
|
||||
source='qobuz',
|
||||
external_ids=external_ids,
|
||||
external_urls=external_urls,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_tidal_object(cls, obj: Any) -> 'Album':
|
||||
"""tidalapi ``Album`` object shape.
|
||||
|
||||
Tidal goes through the ``tidalapi`` library which returns
|
||||
Python objects, not raw dicts — so this converter is named
|
||||
``from_tidal_object`` to make the input contract explicit.
|
||||
Duck-types attribute access so unit tests can pass simple
|
||||
SimpleNamespace stand-ins."""
|
||||
artist = getattr(obj, 'artist', None)
|
||||
artist_name = _str(getattr(artist, 'name', None), default='Unknown Artist')
|
||||
artist_id = _str(getattr(artist, 'id', '')) if artist else ''
|
||||
|
||||
# tidalapi exposes `image()` as a method that returns a URL at
|
||||
# a given size. Try a sensible default size; fall back to the
|
||||
# `picture` field (the raw image id) if the method's missing.
|
||||
image_url = None
|
||||
try:
|
||||
if hasattr(obj, 'image') and callable(obj.image):
|
||||
image_url = obj.image(640) or None
|
||||
except Exception:
|
||||
image_url = None
|
||||
if not image_url:
|
||||
picture = _str(getattr(obj, 'picture', ''))
|
||||
if picture:
|
||||
# Tidal CDN URL format
|
||||
pic_path = picture.replace('-', '/')
|
||||
image_url = f"https://resources.tidal.com/images/{pic_path}/640x640.jpg"
|
||||
|
||||
release_date = ''
|
||||
rd = getattr(obj, 'release_date', None)
|
||||
if rd is not None:
|
||||
release_date = _str(rd).split('T')[0] if 'T' in _str(rd) else _str(rd)
|
||||
|
||||
external_ids = {}
|
||||
if getattr(obj, 'id', None):
|
||||
external_ids['tidal'] = _str(obj.id)
|
||||
if getattr(obj, 'universal_product_number', None):
|
||||
external_ids['upc'] = _str(obj.universal_product_number)
|
||||
|
||||
return cls(
|
||||
id=_str(getattr(obj, 'id', '')),
|
||||
name=_str(getattr(obj, 'name', '')),
|
||||
artists=[artist_name],
|
||||
release_date=release_date,
|
||||
total_tracks=_int(getattr(obj, 'num_tracks', 0)),
|
||||
album_type=_str(getattr(obj, 'type', None), default='album').lower() or 'album',
|
||||
image_url=image_url,
|
||||
artist_id=artist_id or None,
|
||||
genres=[], # tidalapi doesn't expose genres on Album
|
||||
barcode=external_ids.get('upc'),
|
||||
source='tidal',
|
||||
external_ids=external_ids,
|
||||
external_urls={},
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_hydrabase_dict(cls, raw: Dict[str, Any]) -> 'Album':
|
||||
"""Hydrabase metadata service response shape."""
|
||||
artists_raw = raw.get('artists') or []
|
||||
if isinstance(artists_raw, str):
|
||||
artist_names = [artists_raw]
|
||||
else:
|
||||
artist_names = []
|
||||
for a in artists_raw:
|
||||
if isinstance(a, dict):
|
||||
name = _str(a.get('name'))
|
||||
else:
|
||||
name = _str(a)
|
||||
if name:
|
||||
artist_names.append(name)
|
||||
|
||||
external_ids = {}
|
||||
if raw.get('id'):
|
||||
external_ids['hydrabase'] = _str(raw['id'])
|
||||
if raw.get('soul_id'):
|
||||
external_ids['soul'] = _str(raw['soul_id'])
|
||||
|
||||
return cls(
|
||||
id=_str(raw.get('id')),
|
||||
name=_str(raw.get('name') or raw.get('title')),
|
||||
artists=artist_names or ['Unknown Artist'],
|
||||
release_date=_str(raw.get('release_date')),
|
||||
total_tracks=_int(raw.get('total_tracks')),
|
||||
album_type=_str(raw.get('album_type'), default='album'),
|
||||
image_url=_str(raw.get('image_url') or raw.get('thumb_url')) or None,
|
||||
artist_id=_str(raw.get('artist_id')) or None,
|
||||
source='hydrabase',
|
||||
external_ids=external_ids,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Consumer-side helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def to_context_dict(self) -> Dict[str, Any]:
|
||||
"""Return the canonical dict shape SoulSync's import / download
|
||||
pipelines expect. This is the bridge between typed metadata and
|
||||
the existing dict-passing internal API. Future PRs migrate
|
||||
consumers off this dict shape and onto the typed Album directly,
|
||||
at which point this helper becomes unnecessary."""
|
||||
primary_artist = self.artists[0] if self.artists else 'Unknown Artist'
|
||||
artists_dicts = [{'name': name, 'id': self.artist_id if i == 0 else ''}
|
||||
for i, name in enumerate(self.artists)]
|
||||
images = [{'url': self.image_url}] if self.image_url else []
|
||||
|
||||
return {
|
||||
'id': self.id,
|
||||
'name': self.name,
|
||||
'artist': primary_artist,
|
||||
'artist_name': primary_artist,
|
||||
'artist_id': self.artist_id or '',
|
||||
'artists': artists_dicts,
|
||||
'image_url': self.image_url,
|
||||
'images': images,
|
||||
'release_date': self.release_date,
|
||||
'album_type': self.album_type,
|
||||
'total_tracks': self.total_tracks,
|
||||
'source': self.source,
|
||||
'genres': list(self.genres),
|
||||
'label': self.label or '',
|
||||
'barcode': self.barcode or '',
|
||||
'external_ids': dict(self.external_ids),
|
||||
'external_urls': dict(self.external_urls),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Track and Artist — kept lighter for now. Future PRs flesh these out
|
||||
# in the same per-source-converter pattern as Album.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class Track:
|
||||
"""Provider-neutral track. Required fields are always populated by
|
||||
every provider's converter; optional fields may be None."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
artists: List[str]
|
||||
album: str
|
||||
duration_ms: int
|
||||
|
||||
# Optional
|
||||
track_number: Optional[int] = None
|
||||
disc_number: Optional[int] = None
|
||||
image_url: Optional[str] = None
|
||||
release_date: Optional[str] = None
|
||||
album_type: Optional[str] = None
|
||||
total_tracks: Optional[int] = None
|
||||
preview_url: Optional[str] = None
|
||||
isrc: Optional[str] = None
|
||||
popularity: int = 0 # Spotify-only; 0 elsewhere
|
||||
|
||||
# Source provenance
|
||||
source: str = ''
|
||||
external_ids: Dict[str, str] = field(default_factory=dict)
|
||||
external_urls: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Artist:
|
||||
"""Provider-neutral artist."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
|
||||
# Optional
|
||||
image_url: Optional[str] = None
|
||||
genres: List[str] = field(default_factory=list)
|
||||
popularity: int = 0 # Spotify-only; 0 elsewhere
|
||||
followers: int = 0 # Spotify-only; 0 elsewhere
|
||||
|
||||
# Source provenance
|
||||
source: str = ''
|
||||
external_ids: Dict[str, str] = field(default_factory=dict)
|
||||
external_urls: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
__all__ = ['Album', 'Track', 'Artist']
|
||||
@ -0,0 +1,125 @@
|
||||
# Typed Metadata Migration Plan
|
||||
|
||||
## Why
|
||||
|
||||
Right now the metadata pipeline has no real contract about the shape
|
||||
of data flowing between providers and consumers. Each provider
|
||||
(Spotify, iTunes, Deezer, Tidal, Qobuz, MusicBrainz, AudioDB,
|
||||
Discogs, Hydrabase) returns its own response shape, and consumer
|
||||
code defensively extracts every field via fallback chains:
|
||||
|
||||
```python
|
||||
def _build_album_info(album_data, album_id, album_name='', artist_name=''):
|
||||
images = _extract_lookup_value(album_data, 'images', default=[]) or []
|
||||
...
|
||||
return {
|
||||
'id': _extract_lookup_value(album_data, 'id', 'album_id',
|
||||
'collectionId', 'release_id',
|
||||
default=album_id) or album_id,
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
This pattern works but makes the codebase hard to extend safely:
|
||||
|
||||
- Adding a new provider means adding more keys to the fallback chains
|
||||
in every consumer file (currently ~150 call sites of
|
||||
`_extract_lookup_value` across the codebase).
|
||||
- Fixing a bug in extraction means fixing it in N places.
|
||||
- New consumers can't trust the data — they re-run defensive logic.
|
||||
- Tests are theatre because the contract is "whatever shape happens
|
||||
to come in."
|
||||
|
||||
## What this PR adds
|
||||
|
||||
`core/metadata/types.py` defines the canonical typed dataclasses:
|
||||
|
||||
- `Album` — required fields: `id`, `name`, `artists`, `release_date`,
|
||||
`total_tracks`, `album_type`. Optional: `image_url`, `artist_id`,
|
||||
`genres`, `label`, `barcode`, `external_ids`, `external_urls`.
|
||||
- `Track` — required fields: `id`, `name`, `artists`, `album`,
|
||||
`duration_ms`. Optional: track/disc number, image, ISRC, etc.
|
||||
- `Artist` — required fields: `id`, `name`. Optional: image, genres.
|
||||
|
||||
Plus per-provider classmethod converters on `Album`:
|
||||
|
||||
- `Album.from_spotify_dict(raw)`
|
||||
- `Album.from_itunes_dict(raw)`
|
||||
- `Album.from_deezer_dict(raw)`
|
||||
- `Album.from_discogs_dict(raw)`
|
||||
- `Album.from_musicbrainz_dict(raw)`
|
||||
- `Album.from_hydrabase_dict(raw)`
|
||||
- `Album.from_qobuz_dict(raw)`
|
||||
- `Album.from_tidal_object(obj)` — note: Tidal goes through the
|
||||
``tidalapi`` library which returns Python objects rather than
|
||||
raw dicts, so this converter is named ``_object`` not ``_dict``
|
||||
to make the input contract explicit.
|
||||
|
||||
Enrichment-only providers (Last.fm, Genius, AcoustID, ListenBrainz,
|
||||
AudioDB) don't return Album-shaped responses — they enrich
|
||||
existing rows with tags, lyrics URLs, fingerprint matches, etc.
|
||||
No Album converter needed for those.
|
||||
|
||||
Each converter is the SINGLE place that knows that provider's wire
|
||||
shape. Adding a new provider = adding one classmethod here and
|
||||
nothing else needs to change.
|
||||
|
||||
`Album.to_context_dict()` returns the canonical dict shape SoulSync's
|
||||
existing import / download pipelines expect — the bridge between
|
||||
typed data and the current dict-passing internal API.
|
||||
|
||||
## What this PR DOES NOT do
|
||||
|
||||
This PR does not migrate any consumer. No behavior changes. The new
|
||||
types and converters are pure additive — every existing code path
|
||||
keeps using `_extract_lookup_value` exactly as before.
|
||||
|
||||
The reason: a single big-bang migration would be a 153-call-site
|
||||
refactor with subtle behavior risk. Better to land the foundation
|
||||
in isolation, prove the contract via tests, then migrate consumers
|
||||
one at a time in follow-up PRs that are individually reviewable
|
||||
and revertable.
|
||||
|
||||
## Migration roadmap
|
||||
|
||||
Numbered in suggested order. Each item is its own PR.
|
||||
|
||||
1. **Foundation (this PR).** Land `core/metadata/types.py` +
|
||||
converters + tests. Document migration plan.
|
||||
2. **Migrate `_build_album_info`** in
|
||||
`core/metadata/album_tracks.py` — accept either a typed `Album`
|
||||
OR a raw dict. When it gets a typed Album, return
|
||||
`album.to_context_dict()`. When it gets a raw dict, normalize
|
||||
via the appropriate `from_<source>_dict()` based on the
|
||||
provided `source` argument. Reduces from 41 LOC of fallback
|
||||
chains to ~5 LOC of dispatch.
|
||||
3. **Migrate `_build_single_import_context_payload`** in the same
|
||||
file — same pattern.
|
||||
4. **Migrate Spotify client.** `SpotifyClient.get_album()` returns
|
||||
`Album` instead of raw dict. Internal callers update. Public
|
||||
API surface unchanged where it has to be (return both for one
|
||||
release, deprecate dict version).
|
||||
5. **Migrate iTunes/Deezer/Tidal/Qobuz/Discogs/Hydrabase clients.**
|
||||
Same pattern. Each client's `get_album()` returns `Album`.
|
||||
6. **Migrate consumers in `core/discovery/quality_scanner.py`,
|
||||
`core/imports/context.py`, etc.** Drop their fallback chains
|
||||
in favor of typed access.
|
||||
7. **Add `Track` converters and migrate Track-shaped consumers.**
|
||||
Same pattern as Album.
|
||||
8. **Add `Artist` converters and migrate Artist-shaped consumers.**
|
||||
9. **Deprecate `_extract_lookup_value`.** Once no caller needs it,
|
||||
delete it.
|
||||
|
||||
Each PR is independently revertable. Behavior preserved at every
|
||||
step.
|
||||
|
||||
## Acceptance criteria for this PR
|
||||
|
||||
- All converters produce a fully-populated `Album` from realistic
|
||||
provider response samples.
|
||||
- Every required field is set even when source data is partial.
|
||||
- `to_context_dict()` shape is identical across all six providers
|
||||
(pinned via cross-provider parametrized tests).
|
||||
- No existing consumer is changed; existing tests pass unchanged.
|
||||
- Cross-provider invariants (release_date format, album_type values,
|
||||
Discogs `(N)` stripping, iTunes artwork upgrade) are pinned.
|
||||
@ -0,0 +1,572 @@
|
||||
"""Pin the per-provider Album converter contracts.
|
||||
|
||||
Each provider returns its own response shape. The
|
||||
``Album.from_<provider>_dict()`` classmethods are the SINGLE place
|
||||
that knows that shape. Consumers must be able to trust that an
|
||||
``Album`` instance has the same field semantics regardless of which
|
||||
provider it came from.
|
||||
|
||||
These tests use realistic sample payloads (truncated from real API
|
||||
responses) and pin:
|
||||
- Required fields are always populated even when source data is
|
||||
partial or messy (defaults applied uniformly).
|
||||
- Cross-provider field semantics match — e.g. ``release_date`` is
|
||||
always 'YYYY' or 'YYYY-MM-DD' regardless of whether Spotify gave
|
||||
us 'YYYY-MM-DD', iTunes gave us '2024-01-15T00:00:00Z', or
|
||||
Discogs gave us a bare year integer.
|
||||
- Provider-specific quirks are normalized at the converter boundary
|
||||
(Discogs `(N)` disambiguation suffix, iTunes `100x100bb` artwork
|
||||
URLs, Deezer's nested `artist` object).
|
||||
- ``to_context_dict()`` produces the canonical SoulSync-internal
|
||||
shape consumers currently expect.
|
||||
|
||||
When a future PR adds a new provider, this file is where the
|
||||
contract test goes.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from core.metadata.types import Album, Artist, Track
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Spotify
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_album_from_spotify_dict_full_response():
|
||||
"""A typical /albums/{id} response — populated fields, full track list."""
|
||||
raw = {
|
||||
'id': '0hvT3yIEysuuvkK73vgdcW',
|
||||
'name': 'GNX',
|
||||
'artists': [
|
||||
{'id': '2YZyLoL8N0Wb9xBt1NhZWg', 'name': 'Kendrick Lamar'},
|
||||
],
|
||||
'release_date': '2024-11-22',
|
||||
'total_tracks': 12,
|
||||
'album_type': 'album',
|
||||
'images': [
|
||||
{'url': 'https://i.scdn.co/image/abc123', 'height': 640, 'width': 640},
|
||||
],
|
||||
'genres': ['hip hop', 'rap'],
|
||||
'label': 'pgLang/Interscope',
|
||||
'external_ids': {'upc': '00602465123456'},
|
||||
'external_urls': {'spotify': 'https://open.spotify.com/album/0hvT3yIEysuuvkK73vgdcW'},
|
||||
}
|
||||
|
||||
album = Album.from_spotify_dict(raw)
|
||||
|
||||
assert album.id == '0hvT3yIEysuuvkK73vgdcW'
|
||||
assert album.name == 'GNX'
|
||||
assert album.artists == ['Kendrick Lamar']
|
||||
assert album.artist_id == '2YZyLoL8N0Wb9xBt1NhZWg'
|
||||
assert album.release_date == '2024-11-22'
|
||||
assert album.total_tracks == 12
|
||||
assert album.album_type == 'album'
|
||||
assert album.image_url == 'https://i.scdn.co/image/abc123'
|
||||
assert album.genres == ['hip hop', 'rap']
|
||||
assert album.label == 'pgLang/Interscope'
|
||||
assert album.barcode == '00602465123456'
|
||||
assert album.source == 'spotify'
|
||||
assert album.external_ids == {'spotify': '0hvT3yIEysuuvkK73vgdcW', 'upc': '00602465123456'}
|
||||
|
||||
|
||||
def test_album_from_spotify_dict_handles_missing_fields():
|
||||
"""Defensive: minimal payload still produces a valid Album."""
|
||||
raw = {'id': 'x', 'name': 'Y'}
|
||||
album = Album.from_spotify_dict(raw)
|
||||
assert album.id == 'x'
|
||||
assert album.name == 'Y'
|
||||
assert album.artists == ['Unknown Artist']
|
||||
assert album.release_date == ''
|
||||
assert album.total_tracks == 0
|
||||
assert album.album_type == 'album'
|
||||
assert album.image_url is None
|
||||
assert album.label is None
|
||||
|
||||
|
||||
def test_album_from_spotify_dict_multi_artist():
|
||||
"""Featured artists / collabs — all names captured, primary artist
|
||||
id is the first one."""
|
||||
raw = {
|
||||
'id': 'a1',
|
||||
'name': 'Luther',
|
||||
'artists': [
|
||||
{'id': 'kdot', 'name': 'Kendrick Lamar'},
|
||||
{'id': 'sza', 'name': 'SZA'},
|
||||
],
|
||||
'total_tracks': 1,
|
||||
}
|
||||
album = Album.from_spotify_dict(raw)
|
||||
assert album.artists == ['Kendrick Lamar', 'SZA']
|
||||
assert album.artist_id == 'kdot'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# iTunes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_album_from_itunes_dict_full_response():
|
||||
raw = {
|
||||
'collectionId': 1782145638,
|
||||
'collectionName': 'GNX',
|
||||
'artistName': 'Kendrick Lamar',
|
||||
'artistId': 368183298,
|
||||
'releaseDate': '2024-11-22T08:00:00Z',
|
||||
'trackCount': 12,
|
||||
'collectionType': 'Album',
|
||||
'artworkUrl100': 'https://is1.mzstatic.com/image/100x100bb.jpg',
|
||||
'collectionViewUrl': 'https://music.apple.com/album/gnx/1782145638',
|
||||
'primaryGenreName': 'Hip-Hop/Rap',
|
||||
}
|
||||
album = Album.from_itunes_dict(raw)
|
||||
assert album.id == '1782145638'
|
||||
assert album.name == 'GNX'
|
||||
assert album.artists == ['Kendrick Lamar']
|
||||
# iTunes ISO timestamp truncated to date
|
||||
assert album.release_date == '2024-11-22'
|
||||
assert album.total_tracks == 12
|
||||
assert album.album_type == 'album'
|
||||
# 100x100bb upgraded to 3000x3000bb
|
||||
assert album.image_url == 'https://is1.mzstatic.com/image/3000x3000bb.jpg'
|
||||
assert album.artist_id == '368183298'
|
||||
assert album.genres == ['Hip-Hop/Rap']
|
||||
assert album.source == 'itunes'
|
||||
assert album.external_ids['itunes'] == '1782145638'
|
||||
assert album.external_ids['itunes_artist'] == '368183298'
|
||||
|
||||
|
||||
def test_album_from_itunes_dict_infers_album_type_from_track_count():
|
||||
"""iTunes doesn't tag album type — convert per the existing
|
||||
heuristic (1-3 single, 4-6 EP, 7+ album)."""
|
||||
base = {'collectionId': 1, 'collectionName': 'X', 'artistName': 'A',
|
||||
'collectionType': 'Album'}
|
||||
assert Album.from_itunes_dict({**base, 'trackCount': 1}).album_type == 'single'
|
||||
assert Album.from_itunes_dict({**base, 'trackCount': 5}).album_type == 'ep'
|
||||
assert Album.from_itunes_dict({**base, 'trackCount': 12}).album_type == 'album'
|
||||
|
||||
|
||||
def test_album_from_itunes_dict_detects_compilation():
|
||||
raw = {'collectionId': 1, 'collectionName': 'Best Of', 'artistName': 'V/A',
|
||||
'collectionType': 'Compilation', 'trackCount': 20}
|
||||
assert Album.from_itunes_dict(raw).album_type == 'compilation'
|
||||
|
||||
|
||||
def test_album_from_itunes_dict_strips_single_ep_suffix():
|
||||
"""iTunes appends ' - Single' / ' - EP' to single/EP collection
|
||||
names. Strip so cross-provider matching works on the actual title."""
|
||||
raw = {'collectionId': 1, 'collectionName': 'Track Name - Single',
|
||||
'artistName': 'A', 'trackCount': 1}
|
||||
assert Album.from_itunes_dict(raw).name == 'Track Name'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Deezer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_album_from_deezer_dict_full_response():
|
||||
raw = {
|
||||
'id': 12345,
|
||||
'title': 'GNX',
|
||||
'artist': {'id': 67890, 'name': 'Kendrick Lamar'},
|
||||
'release_date': '2024-11-22',
|
||||
'nb_tracks': 12,
|
||||
'record_type': 'album',
|
||||
'cover_xl': 'https://e-cdns-images.dzcdn.net/images/cover/abc/1000x1000-000000-80-0-0.jpg',
|
||||
'genres': {'data': [{'id': 116, 'name': 'Rap/Hip Hop'}]},
|
||||
'label': 'pgLang',
|
||||
'upc': '00602465123456',
|
||||
'link': 'https://www.deezer.com/album/12345',
|
||||
}
|
||||
album = Album.from_deezer_dict(raw)
|
||||
assert album.id == '12345'
|
||||
assert album.name == 'GNX'
|
||||
assert album.artists == ['Kendrick Lamar']
|
||||
assert album.artist_id == '67890'
|
||||
assert album.release_date == '2024-11-22'
|
||||
assert album.total_tracks == 12
|
||||
assert album.album_type == 'album'
|
||||
assert 'cover/abc' in album.image_url
|
||||
assert album.genres == ['Rap/Hip Hop']
|
||||
assert album.label == 'pgLang'
|
||||
assert album.barcode == '00602465123456'
|
||||
assert album.source == 'deezer'
|
||||
|
||||
|
||||
def test_album_from_deezer_dict_falls_back_through_cover_sizes():
|
||||
"""Deezer cover URLs come in xl/big/medium/small variants. Prefer xl."""
|
||||
base = {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}
|
||||
# xl present
|
||||
a = Album.from_deezer_dict({**base, 'cover_xl': 'XL', 'cover_big': 'BIG'})
|
||||
assert a.image_url == 'XL'
|
||||
# only big
|
||||
b = Album.from_deezer_dict({**base, 'cover_big': 'BIG'})
|
||||
assert b.image_url == 'BIG'
|
||||
# nothing
|
||||
c = Album.from_deezer_dict(base)
|
||||
assert c.image_url is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Discogs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_album_from_discogs_dict_full_response():
|
||||
raw = {
|
||||
'id': 33445566,
|
||||
'title': 'GNX',
|
||||
'artists': [
|
||||
{'id': 1234, 'name': 'Kendrick Lamar'},
|
||||
],
|
||||
'year': 2024,
|
||||
'tracklist': [
|
||||
{'position': 'A1', 'title': 'wacced out murals', 'type_': 'track'},
|
||||
{'position': 'A2', 'title': 'squabble up', 'type_': 'track'},
|
||||
{'position': 'B1', 'title': 'luther', 'type_': 'track'},
|
||||
],
|
||||
'images': [
|
||||
{'type': 'primary', 'uri': 'https://img.discogs.com/abc.jpg', 'uri150': 'https://img.discogs.com/abc-150.jpg'},
|
||||
],
|
||||
'genres': ['Hip Hop'],
|
||||
'styles': ['Conscious'],
|
||||
'labels': [{'name': 'pgLang', 'catno': 'PG001'}],
|
||||
'identifiers': [
|
||||
{'type': 'Barcode', 'value': '00602465123456'},
|
||||
{'type': 'Other', 'value': 'XYZ'},
|
||||
],
|
||||
'uri': 'https://www.discogs.com/release/33445566',
|
||||
}
|
||||
album = Album.from_discogs_dict(raw)
|
||||
assert album.id == '33445566'
|
||||
assert album.name == 'GNX'
|
||||
assert album.artists == ['Kendrick Lamar']
|
||||
assert album.artist_id == '1234'
|
||||
assert album.release_date == '2024'
|
||||
assert album.total_tracks == 3
|
||||
assert album.album_type == 'album'
|
||||
# uri preferred over uri150
|
||||
assert album.image_url == 'https://img.discogs.com/abc.jpg'
|
||||
# Discogs genres + styles merged
|
||||
assert 'Hip Hop' in album.genres and 'Conscious' in album.genres
|
||||
assert album.label == 'pgLang'
|
||||
assert album.barcode == '00602465123456'
|
||||
assert album.source == 'discogs'
|
||||
|
||||
|
||||
def test_album_from_discogs_dict_strips_disambiguation_suffix():
|
||||
"""`Madonna (3)` → `Madonna` so cross-provider matches work."""
|
||||
raw = {'id': 1, 'title': 'Y', 'artists': [{'name': 'Madonna (3)'}]}
|
||||
album = Album.from_discogs_dict(raw)
|
||||
assert album.artists == ['Madonna']
|
||||
|
||||
|
||||
def test_album_from_discogs_dict_year_zero_means_unknown():
|
||||
"""Discogs `year=0` is the sentinel for unknown — empty release_date."""
|
||||
raw = {'id': 1, 'title': 'Y', 'artists': [{'name': 'X'}], 'year': 0}
|
||||
assert Album.from_discogs_dict(raw).release_date == ''
|
||||
|
||||
|
||||
def test_album_from_discogs_dict_counts_only_track_type_entries():
|
||||
"""Discogs tracklists include heading rows, indices, etc (type_='heading').
|
||||
Only count actual tracks (type_='track')."""
|
||||
raw = {
|
||||
'id': 1, 'title': 'Y', 'artists': [{'name': 'X'}],
|
||||
'tracklist': [
|
||||
{'title': 'Side A', 'type_': 'heading'},
|
||||
{'title': 'Track 1', 'type_': 'track'},
|
||||
{'title': 'Track 2', 'type_': 'track'},
|
||||
{'title': 'Side B', 'type_': 'heading'},
|
||||
{'title': 'Track 3', 'type_': 'track'},
|
||||
],
|
||||
}
|
||||
assert Album.from_discogs_dict(raw).total_tracks == 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MusicBrainz
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_album_from_musicbrainz_dict_full_response():
|
||||
raw = {
|
||||
'id': 'abc-123-mbid',
|
||||
'title': 'GNX',
|
||||
'artist-credit': [
|
||||
{'artist': {'id': 'kdot-mbid', 'name': 'Kendrick Lamar'}},
|
||||
],
|
||||
'date': '2024-11-22',
|
||||
'media': [{'track-count': 12}],
|
||||
'release-group': {
|
||||
'id': 'rg-mbid',
|
||||
'primary-type': 'Album',
|
||||
},
|
||||
'label-info': [{'label': {'name': 'pgLang'}}],
|
||||
'barcode': '00602465123456',
|
||||
}
|
||||
album = Album.from_musicbrainz_dict(raw)
|
||||
assert album.id == 'abc-123-mbid'
|
||||
assert album.name == 'GNX'
|
||||
assert album.artists == ['Kendrick Lamar']
|
||||
assert album.artist_id == 'kdot-mbid'
|
||||
assert album.release_date == '2024-11-22'
|
||||
assert album.total_tracks == 12
|
||||
assert album.album_type == 'album'
|
||||
assert album.label == 'pgLang'
|
||||
assert album.barcode == '00602465123456'
|
||||
assert album.external_ids['musicbrainz'] == 'abc-123-mbid'
|
||||
assert album.external_ids['musicbrainz_release_group'] == 'rg-mbid'
|
||||
|
||||
|
||||
def test_album_from_musicbrainz_dict_sums_multi_disc_tracks():
|
||||
"""MB stores per-disc track counts; total = sum across media."""
|
||||
raw = {
|
||||
'id': 'x', 'title': 'Multi Disc',
|
||||
'artist-credit': [{'artist': {'name': 'A'}}],
|
||||
'media': [{'track-count': 14}, {'track-count': 5}],
|
||||
}
|
||||
assert Album.from_musicbrainz_dict(raw).total_tracks == 19
|
||||
|
||||
|
||||
def test_album_from_musicbrainz_dict_release_group_type_overrides_default():
|
||||
raw = {
|
||||
'id': 'x', 'title': 'X',
|
||||
'artist-credit': [{'artist': {'name': 'A'}}],
|
||||
'release-group': {'id': 'rg', 'primary-type': 'Single'},
|
||||
'media': [{'track-count': 1}],
|
||||
}
|
||||
assert Album.from_musicbrainz_dict(raw).album_type == 'single'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Qobuz
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_album_from_qobuz_dict_full_response():
|
||||
raw = {
|
||||
'id': 12345,
|
||||
'title': 'GNX',
|
||||
'artist': {'id': 67890, 'name': 'Kendrick Lamar'},
|
||||
'release_date_original': '2024-11-22',
|
||||
'released_at': '2024-11-22T08:00:00',
|
||||
'tracks_count': 12,
|
||||
'image': {
|
||||
'small': 'https://qobuz/small.jpg',
|
||||
'large': 'https://qobuz/large.jpg',
|
||||
'thumbnail': 'https://qobuz/thumb.jpg',
|
||||
},
|
||||
'genre': {'id': 116, 'name': 'Hip-Hop/Rap'},
|
||||
'label': {'id': 999, 'name': 'pgLang'},
|
||||
'upc': '00602465123456',
|
||||
'url': 'https://www.qobuz.com/album/gnx/12345',
|
||||
}
|
||||
album = Album.from_qobuz_dict(raw)
|
||||
assert album.id == '12345'
|
||||
assert album.name == 'GNX'
|
||||
assert album.artists == ['Kendrick Lamar']
|
||||
assert album.artist_id == '67890'
|
||||
assert album.release_date == '2024-11-22'
|
||||
assert album.total_tracks == 12
|
||||
assert album.image_url == 'https://qobuz/large.jpg'
|
||||
assert album.genres == ['Hip-Hop/Rap']
|
||||
assert album.label == 'pgLang'
|
||||
assert album.barcode == '00602465123456'
|
||||
assert album.source == 'qobuz'
|
||||
|
||||
|
||||
def test_album_from_qobuz_dict_falls_back_through_image_sizes():
|
||||
base = {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}
|
||||
a = Album.from_qobuz_dict({**base, 'image': {'small': 'S'}})
|
||||
assert a.image_url == 'S'
|
||||
b = Album.from_qobuz_dict({**base, 'image': {}})
|
||||
assert b.image_url is None
|
||||
|
||||
|
||||
def test_album_from_qobuz_dict_strips_iso_timestamp_to_date():
|
||||
raw = {'id': 1, 'title': 'X', 'artist': {'name': 'A'},
|
||||
'released_at': '2024-11-22T08:00:00'}
|
||||
assert Album.from_qobuz_dict(raw).release_date == '2024-11-22'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tidal
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_album_from_tidal_object_full_shape():
|
||||
"""tidalapi returns objects, not dicts. Use SimpleNamespace stand-ins
|
||||
to mirror the tidalapi.Album shape."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
artist_obj = SimpleNamespace(id=67890, name='Kendrick Lamar')
|
||||
album_obj = SimpleNamespace(
|
||||
id=12345,
|
||||
name='GNX',
|
||||
artist=artist_obj,
|
||||
release_date='2024-11-22',
|
||||
num_tracks=12,
|
||||
type='ALBUM',
|
||||
picture='abc-123-def',
|
||||
universal_product_number='00602465123456',
|
||||
image=lambda size=640: f'https://resources.tidal.com/images/abc/123/def/{size}x{size}.jpg',
|
||||
)
|
||||
|
||||
album = Album.from_tidal_object(album_obj)
|
||||
assert album.id == '12345'
|
||||
assert album.name == 'GNX'
|
||||
assert album.artists == ['Kendrick Lamar']
|
||||
assert album.artist_id == '67890'
|
||||
assert album.release_date == '2024-11-22'
|
||||
assert album.total_tracks == 12
|
||||
assert album.album_type == 'album' # lowercased
|
||||
assert album.image_url and 'tidal.com' in album.image_url
|
||||
assert album.barcode == '00602465123456'
|
||||
assert album.source == 'tidal'
|
||||
assert album.external_ids['tidal'] == '12345'
|
||||
|
||||
|
||||
def test_album_from_tidal_object_falls_back_to_picture_url_when_image_method_missing():
|
||||
from types import SimpleNamespace
|
||||
album_obj = SimpleNamespace(
|
||||
id=1, name='X',
|
||||
artist=SimpleNamespace(name='A', id=2),
|
||||
release_date='2024',
|
||||
num_tracks=10,
|
||||
picture='aa-bb-cc',
|
||||
)
|
||||
album = Album.from_tidal_object(album_obj)
|
||||
assert album.image_url and 'aa/bb/cc' in album.image_url
|
||||
|
||||
|
||||
def test_album_from_tidal_object_handles_missing_attrs():
|
||||
"""Bare-minimum tidalapi-shaped object — should still produce a
|
||||
valid Album with sensible defaults."""
|
||||
from types import SimpleNamespace
|
||||
album_obj = SimpleNamespace(id=1, name='X', artist=None)
|
||||
album = Album.from_tidal_object(album_obj)
|
||||
assert album.id == '1'
|
||||
assert album.name == 'X'
|
||||
assert album.artists == ['Unknown Artist']
|
||||
assert album.total_tracks == 0
|
||||
assert album.album_type == 'album'
|
||||
assert album.image_url is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hydrabase
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_album_from_hydrabase_dict_full_response():
|
||||
raw = {
|
||||
'id': 'soul-12345',
|
||||
'name': 'GNX',
|
||||
'artists': [{'id': 'soul-artist-1', 'name': 'Kendrick Lamar'}],
|
||||
'release_date': '2024-11-22',
|
||||
'total_tracks': 12,
|
||||
'album_type': 'album',
|
||||
'image_url': 'https://hydrabase.example/cover.jpg',
|
||||
'soul_id': 'soul-12345',
|
||||
'artist_id': 'soul-artist-1',
|
||||
}
|
||||
album = Album.from_hydrabase_dict(raw)
|
||||
assert album.id == 'soul-12345'
|
||||
assert album.name == 'GNX'
|
||||
assert album.artists == ['Kendrick Lamar']
|
||||
assert album.artist_id == 'soul-artist-1'
|
||||
assert album.image_url == 'https://hydrabase.example/cover.jpg'
|
||||
assert album.source == 'hydrabase'
|
||||
assert album.external_ids['hydrabase'] == 'soul-12345'
|
||||
assert album.external_ids['soul'] == 'soul-12345'
|
||||
|
||||
|
||||
def test_album_from_hydrabase_dict_handles_string_artists():
|
||||
"""Hydrabase responses sometimes return artists as a flat list of
|
||||
name strings, sometimes as dicts. Both shapes work."""
|
||||
raw_str = {'id': '1', 'name': 'X', 'artists': ['Artist A']}
|
||||
assert Album.from_hydrabase_dict(raw_str).artists == ['Artist A']
|
||||
|
||||
raw_dict = {'id': '1', 'name': 'X', 'artists': [{'name': 'Artist B'}]}
|
||||
assert Album.from_hydrabase_dict(raw_dict).artists == ['Artist B']
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cross-provider invariants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize('factory,raw', [
|
||||
('from_spotify_dict', {'id': 'x', 'name': 'X'}),
|
||||
('from_itunes_dict', {'collectionId': 1, 'collectionName': 'X', 'artistName': 'A'}),
|
||||
('from_deezer_dict', {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}),
|
||||
('from_discogs_dict', {'id': 1, 'title': 'X', 'artists': [{'name': 'A'}]}),
|
||||
('from_musicbrainz_dict', {'id': 'x', 'title': 'X',
|
||||
'artist-credit': [{'artist': {'name': 'A'}}]}),
|
||||
('from_hydrabase_dict', {'id': 'x', 'name': 'X', 'artists': [{'name': 'A'}]}),
|
||||
('from_qobuz_dict', {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}),
|
||||
])
|
||||
def test_every_converter_produces_required_fields(factory, raw):
|
||||
"""Every converter MUST populate the required fields with sensible
|
||||
defaults even on minimal input. This is the contract consumers
|
||||
rely on to drop their fallback chains."""
|
||||
album = getattr(Album, factory)(raw)
|
||||
assert isinstance(album.id, str) and album.id
|
||||
assert isinstance(album.name, str) and album.name
|
||||
assert isinstance(album.artists, list) and len(album.artists) >= 1
|
||||
assert isinstance(album.release_date, str) # may be empty
|
||||
assert isinstance(album.total_tracks, int)
|
||||
assert isinstance(album.album_type, str) and album.album_type
|
||||
assert isinstance(album.genres, list)
|
||||
assert isinstance(album.external_ids, dict)
|
||||
assert isinstance(album.external_urls, dict)
|
||||
assert album.source # always set by converter
|
||||
|
||||
|
||||
@pytest.mark.parametrize('factory,raw', [
|
||||
('from_spotify_dict', {'id': 'x', 'name': 'X'}),
|
||||
('from_itunes_dict', {'collectionId': 1, 'collectionName': 'X', 'artistName': 'A'}),
|
||||
('from_deezer_dict', {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}),
|
||||
('from_discogs_dict', {'id': 1, 'title': 'X', 'artists': [{'name': 'A'}]}),
|
||||
('from_musicbrainz_dict', {'id': 'x', 'title': 'X',
|
||||
'artist-credit': [{'artist': {'name': 'A'}}]}),
|
||||
('from_hydrabase_dict', {'id': 'x', 'name': 'X', 'artists': [{'name': 'A'}]}),
|
||||
('from_qobuz_dict', {'id': 1, 'title': 'X', 'artist': {'name': 'A'}}),
|
||||
])
|
||||
def test_to_context_dict_shape_is_uniform_across_providers(factory, raw):
|
||||
"""The bridge dict every consumer currently expects has the same
|
||||
shape regardless of provider. Pin so a future converter change
|
||||
can't subtly break consumer expectations."""
|
||||
album = getattr(Album, factory)(raw)
|
||||
ctx = album.to_context_dict()
|
||||
|
||||
expected_keys = {
|
||||
'id', 'name', 'artist', 'artist_name', 'artist_id', 'artists',
|
||||
'image_url', 'images', 'release_date', 'album_type',
|
||||
'total_tracks', 'source', 'genres', 'label', 'barcode',
|
||||
'external_ids', 'external_urls',
|
||||
}
|
||||
assert set(ctx.keys()) == expected_keys
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Track / Artist — light coverage; full converters land in a follow-up PR
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_track_dataclass_required_fields():
|
||||
t = Track(id='1', name='X', artists=['A'], album='Y', duration_ms=1000)
|
||||
assert t.id == '1'
|
||||
assert t.popularity == 0 # default
|
||||
assert t.external_ids == {}
|
||||
|
||||
|
||||
def test_artist_dataclass_required_fields():
|
||||
a = Artist(id='1', name='X')
|
||||
assert a.id == '1'
|
||||
assert a.followers == 0 # default
|
||||
assert a.genres == []
|
||||
Loading…
Reference in new issue