|
|
"""Canonical registry of external/source ID column names.
|
|
|
|
|
|
SoulSync stores each metadata provider's ID for an artist/album/track under a
|
|
|
column whose NAME is inconsistent across tables — e.g. Deezer's artist id is
|
|
|
``deezer_id`` on the ``artists`` table but ``deezer_artist_id`` on
|
|
|
``watchlist_artists`` and ``album_deezer_id`` / ``similar_artist_deezer_id`` on
|
|
|
the discovery tables. Spotify/iTunes keep an entity qualifier on the core tables
|
|
|
while Deezer/Amazon/Tidal/... don't, and MusicBrainz uses three different nouns.
|
|
|
The result is code that checks 2–5 property-name variants everywhere.
|
|
|
|
|
|
This module is the single source of truth for "(provider, entity) → column".
|
|
|
It does NOT rename any database column — these ARE the real names today; the
|
|
|
registry just centralizes the knowledge and offers accessors that read an ID
|
|
|
from a dict / sqlite3.Row robustly (canonical column first, then known aliases),
|
|
|
so callers stop hand-rolling variant checks.
|
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
from typing import Any, Dict, Iterable, Optional
|
|
|
|
|
|
# Entity types this registry knows about.
|
|
|
ENTITIES = ("artist", "album", "track")
|
|
|
|
|
|
# Canonical column name on the CORE table (artists / albums / tracks) for each
|
|
|
# (entity, provider). This is the name to prefer when reading/writing.
|
|
|
_CORE_ID_COLUMNS: Dict[str, Dict[str, str]] = {
|
|
|
"artist": {
|
|
|
"spotify": "spotify_artist_id",
|
|
|
"itunes": "itunes_artist_id",
|
|
|
"deezer": "deezer_id",
|
|
|
"musicbrainz": "musicbrainz_id",
|
|
|
"discogs": "discogs_id",
|
|
|
"amazon": "amazon_id",
|
|
|
"tidal": "tidal_id",
|
|
|
"qobuz": "qobuz_id",
|
|
|
"audiodb": "audiodb_id",
|
|
|
"genius": "genius_id",
|
|
|
"hydrabase": "soul_id",
|
|
|
},
|
|
|
"album": {
|
|
|
"spotify": "spotify_album_id",
|
|
|
"itunes": "itunes_album_id",
|
|
|
"deezer": "deezer_id",
|
|
|
"musicbrainz": "musicbrainz_release_id",
|
|
|
"discogs": "discogs_id",
|
|
|
"amazon": "amazon_id",
|
|
|
"tidal": "tidal_id",
|
|
|
"qobuz": "qobuz_id",
|
|
|
"audiodb": "audiodb_id",
|
|
|
"hydrabase": "soul_id",
|
|
|
},
|
|
|
"track": {
|
|
|
"spotify": "spotify_track_id",
|
|
|
"itunes": "itunes_track_id",
|
|
|
"deezer": "deezer_id",
|
|
|
"musicbrainz": "musicbrainz_recording_id",
|
|
|
"amazon": "amazon_id",
|
|
|
"tidal": "tidal_id",
|
|
|
"qobuz": "qobuz_id",
|
|
|
"audiodb": "audiodb_id",
|
|
|
"genius": "genius_id",
|
|
|
"hydrabase": "soul_id",
|
|
|
},
|
|
|
}
|
|
|
|
|
|
# Other column / dict-key names the SAME (entity, provider) ID appears under
|
|
|
# elsewhere (satellite tables, API payloads). Accessors check the canonical
|
|
|
# column first, then these, so a read works regardless of where the row/dict
|
|
|
# came from. Keyed by (entity, provider).
|
|
|
_ALIASES: Dict[tuple, tuple] = {
|
|
|
("artist", "spotify"): ("similar_artist_spotify_id",),
|
|
|
("artist", "itunes"): ("artist_itunes_id", "similar_artist_itunes_id"),
|
|
|
("artist", "deezer"): ("deezer_artist_id", "artist_deezer_id", "similar_artist_deezer_id"),
|
|
|
("artist", "musicbrainz"): ("musicbrainz_artist_id", "similar_artist_musicbrainz_id"),
|
|
|
("artist", "discogs"): ("discogs_artist_id",),
|
|
|
("artist", "amazon"): ("amazon_artist_id",),
|
|
|
("album", "spotify"): ("album_spotify_id",),
|
|
|
("album", "itunes"): ("album_itunes_id",),
|
|
|
("album", "deezer"): ("deezer_album_id", "album_deezer_id"),
|
|
|
("album", "discogs"): ("discogs_release_id",),
|
|
|
("track", "deezer"): ("deezer_track_id",),
|
|
|
}
|
|
|
|
|
|
|
|
|
def id_column(provider: str, entity: str = "artist") -> Optional[str]:
|
|
|
"""Canonical core-table column for this provider + entity, or None if the
|
|
|
provider isn't tracked for that entity."""
|
|
|
return _CORE_ID_COLUMNS.get(entity, {}).get(provider)
|
|
|
|
|
|
|
|
|
def id_keys(provider: str, entity: str = "artist") -> tuple:
|
|
|
"""All known key names (canonical first, then aliases) the ID may live
|
|
|
under. Useful for code that needs the full variant list explicitly."""
|
|
|
keys = []
|
|
|
canon = id_column(provider, entity)
|
|
|
if canon:
|
|
|
keys.append(canon)
|
|
|
for alias in _ALIASES.get((entity, provider), ()): # preserve order, no dups
|
|
|
if alias not in keys:
|
|
|
keys.append(alias)
|
|
|
return tuple(keys)
|
|
|
|
|
|
|
|
|
def _read(data: Any, key: str) -> Any:
|
|
|
"""Read ``key`` from a dict or sqlite3.Row, returning None if absent."""
|
|
|
try:
|
|
|
keys = data.keys() # dict and sqlite3.Row both support .keys()
|
|
|
except AttributeError:
|
|
|
return None
|
|
|
if key in keys:
|
|
|
try:
|
|
|
return data[key]
|
|
|
except (KeyError, IndexError):
|
|
|
return None
|
|
|
return None
|
|
|
|
|
|
|
|
|
def get_id(data: Any, provider: str, entity: str = "artist") -> Optional[str]:
|
|
|
"""Read this provider's ID for ``entity`` from a dict / sqlite3.Row.
|
|
|
|
|
|
Tries the canonical column first, then every known alias, and returns the
|
|
|
first non-empty value (or None). Replaces hand-rolled
|
|
|
``row.get('deezer_id') or row.get('deezer_artist_id')`` chains.
|
|
|
"""
|
|
|
for key in id_keys(provider, entity):
|
|
|
value = _read(data, key)
|
|
|
if value:
|
|
|
return value
|
|
|
return None
|
|
|
|
|
|
|
|
|
def source_id_map(
|
|
|
data: Any,
|
|
|
entity: str = "artist",
|
|
|
providers: Optional[Iterable[str]] = None,
|
|
|
) -> Dict[str, Optional[str]]:
|
|
|
"""Build a ``{provider: id}`` dict for ``entity`` from a row/dict — the
|
|
|
common "artist_source_ids" pattern. Defaults to every provider known for the
|
|
|
entity; pass ``providers`` to restrict/order the result.
|
|
|
"""
|
|
|
if providers is None:
|
|
|
providers = list(_CORE_ID_COLUMNS.get(entity, {}).keys())
|
|
|
return {p: get_id(data, p, entity) for p in providers}
|