You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/core/source_ids.py

145 lines
5.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""Canonical registry of external/source ID column names.
SoulSync stores each metadata provider's ID for an artist/album/track under a
column whose NAME is inconsistent across tables — e.g. Deezer's artist id is
``deezer_id`` on the ``artists`` table but ``deezer_artist_id`` on
``watchlist_artists`` and ``album_deezer_id`` / ``similar_artist_deezer_id`` on
the discovery tables. Spotify/iTunes keep an entity qualifier on the core tables
while Deezer/Amazon/Tidal/... don't, and MusicBrainz uses three different nouns.
The result is code that checks 25 property-name variants everywhere.
This module is the single source of truth for "(provider, entity) → column".
It does NOT rename any database column — these ARE the real names today; the
registry just centralizes the knowledge and offers accessors that read an ID
from a dict / sqlite3.Row robustly (canonical column first, then known aliases),
so callers stop hand-rolling variant checks.
"""
from __future__ import annotations
from typing import Any, Dict, Iterable, Optional
# Entity types this registry knows about.
ENTITIES = ("artist", "album", "track")
# Canonical column name on the CORE table (artists / albums / tracks) for each
# (entity, provider). This is the name to prefer when reading/writing.
_CORE_ID_COLUMNS: Dict[str, Dict[str, str]] = {
"artist": {
"spotify": "spotify_artist_id",
"itunes": "itunes_artist_id",
"deezer": "deezer_id",
"musicbrainz": "musicbrainz_id",
"discogs": "discogs_id",
"amazon": "amazon_id",
"tidal": "tidal_id",
"qobuz": "qobuz_id",
"audiodb": "audiodb_id",
"genius": "genius_id",
"hydrabase": "soul_id",
},
"album": {
"spotify": "spotify_album_id",
"itunes": "itunes_album_id",
"deezer": "deezer_id",
"musicbrainz": "musicbrainz_release_id",
"discogs": "discogs_id",
"amazon": "amazon_id",
"tidal": "tidal_id",
"qobuz": "qobuz_id",
"audiodb": "audiodb_id",
"hydrabase": "soul_id",
},
"track": {
"spotify": "spotify_track_id",
"itunes": "itunes_track_id",
"deezer": "deezer_id",
"musicbrainz": "musicbrainz_recording_id",
"amazon": "amazon_id",
"tidal": "tidal_id",
"qobuz": "qobuz_id",
"audiodb": "audiodb_id",
"genius": "genius_id",
"hydrabase": "soul_id",
},
}
# Other column / dict-key names the SAME (entity, provider) ID appears under
# elsewhere (satellite tables, API payloads). Accessors check the canonical
# column first, then these, so a read works regardless of where the row/dict
# came from. Keyed by (entity, provider).
_ALIASES: Dict[tuple, tuple] = {
("artist", "spotify"): ("similar_artist_spotify_id",),
("artist", "itunes"): ("artist_itunes_id", "similar_artist_itunes_id"),
("artist", "deezer"): ("deezer_artist_id", "artist_deezer_id", "similar_artist_deezer_id"),
("artist", "musicbrainz"): ("musicbrainz_artist_id", "similar_artist_musicbrainz_id"),
("artist", "discogs"): ("discogs_artist_id",),
("artist", "amazon"): ("amazon_artist_id",),
("album", "spotify"): ("album_spotify_id",),
("album", "itunes"): ("album_itunes_id",),
("album", "deezer"): ("deezer_album_id", "album_deezer_id"),
("album", "discogs"): ("discogs_release_id",),
("track", "deezer"): ("deezer_track_id",),
}
def id_column(provider: str, entity: str = "artist") -> Optional[str]:
"""Canonical core-table column for this provider + entity, or None if the
provider isn't tracked for that entity."""
return _CORE_ID_COLUMNS.get(entity, {}).get(provider)
def id_keys(provider: str, entity: str = "artist") -> tuple:
"""All known key names (canonical first, then aliases) the ID may live
under. Useful for code that needs the full variant list explicitly."""
keys = []
canon = id_column(provider, entity)
if canon:
keys.append(canon)
for alias in _ALIASES.get((entity, provider), ()): # preserve order, no dups
if alias not in keys:
keys.append(alias)
return tuple(keys)
def _read(data: Any, key: str) -> Any:
"""Read ``key`` from a dict or sqlite3.Row, returning None if absent."""
try:
keys = data.keys() # dict and sqlite3.Row both support .keys()
except AttributeError:
return None
if key in keys:
try:
return data[key]
except (KeyError, IndexError):
return None
return None
def get_id(data: Any, provider: str, entity: str = "artist") -> Optional[str]:
"""Read this provider's ID for ``entity`` from a dict / sqlite3.Row.
Tries the canonical column first, then every known alias, and returns the
first non-empty value (or None). Replaces hand-rolled
``row.get('deezer_id') or row.get('deezer_artist_id')`` chains.
"""
for key in id_keys(provider, entity):
value = _read(data, key)
if value:
return value
return None
def source_id_map(
data: Any,
entity: str = "artist",
providers: Optional[Iterable[str]] = None,
) -> Dict[str, Optional[str]]:
"""Build a ``{provider: id}`` dict for ``entity`` from a row/dict — the
common "artist_source_ids" pattern. Defaults to every provider known for the
entity; pass ``providers`` to restrict/order the result.
"""
if providers is None:
providers = list(_CORE_ID_COLUMNS.get(entity, {}).keys())
return {p: get_id(data, p, entity) for p in providers}