mirror of https://github.com/Nezreka/SoulSync.git
Files SoulSync (or MusicBrainz Picard) already tagged carry Spotify /
iTunes / MusicBrainz / Deezer / Tidal / AudioDB / Genius / Last.fm IDs in
their metadata. Enrichment workers gate their queues on
{provider}_match_status IS NULL, so reading those IDs back and gap-filling
the {provider}_id + match_status='matched' columns lets the workers skip
the API lookup entirely — big API savings on an already-tagged library.
New manual job in Tools -> Database & Scanning ("Import IDs from File
Tags"): scans every library file, reads embedded IDs, fills any that are
missing in the DB. Background job + progress card, mirroring the
write-tags-batch pattern.
core/library/embedded_id_reconcile.py (pure + tested):
- plan_reconcile(): gap-fill plan for a track + its album + artist. Only
empty id columns are planned; a disagreeing embedded id is a conflict,
never applied.
- apply_reconcile_plan(): one guarded UPDATE per id column —
WHERE id=? AND (col IS NULL OR col=''). The guard makes the fill atomic:
if an enrichment worker matched the same entity between our read and
this write, the UPDATE affects 0 rows instead of clobbering it. Columns
are introspected so a schema missing a provider's columns is skipped.
- reconcile_track_row(): per-track orchestration (id extraction, plan ->
apply, keeping the in-memory parent maps fresh for sibling tracks).
Job hardening: paged track scan (bounded memory), per-page commits (don't
starve concurrent workers), per-file try/finally (one bad file can't abort
the run), counters from real rowcount.
Scope: 19 column-fills across 8 providers. MB *recording* (track) id is
left out (UFID frame the reader doesn't surface; Vorbis key ambiguous) —
MB album+artist are covered. Amazon/ASIN deliberately excluded (ASIN is a
different namespace than the worker's amazon_id). All target columns
verified against the live schema.
Purely additive: new module, two new endpoints, one new Tools card —
no existing behavior changed. 20 unit tests (incl. the concurrency guard).
Full suite clean (only pre-existing soundcloud /app env failures remain).
pull/803/head
parent
2604704a27
commit
e6d86dea26
@ -0,0 +1,299 @@
|
||||
"""Reconcile provider IDs embedded in audio files into the library DB.
|
||||
|
||||
Enrichment workers (Spotify / iTunes / MusicBrainz / Deezer / Tidal /
|
||||
AudioDB / Genius / Last.fm) resolve each artist / album / track to a provider ID
|
||||
via API calls, gating their work queues on ``{provider}_match_status IS
|
||||
NULL``. But files that SoulSync (or MusicBrainz Picard) already tagged
|
||||
carry those IDs in their metadata. Reading them back and gap-filling the
|
||||
``{provider}_id`` + ``{provider}_match_status = 'matched'`` columns lets
|
||||
the workers skip the API lookup entirely — large API savings on an
|
||||
already-tagged library.
|
||||
|
||||
Split into a PURE planning layer and a thin DB apply layer:
|
||||
|
||||
- :func:`plan_reconcile` takes the tags read from ONE file (via
|
||||
``core.library.file_tags.read_embedded_tags``) plus the current IDs of
|
||||
that file's track + its parent album + artist, and produces the list of
|
||||
:class:`Fill` operations to perform. It is gap-fill only: a provider id
|
||||
that already has a value is never planned for change; a DISAGREEING
|
||||
embedded id is reported as a conflict instead.
|
||||
|
||||
- :func:`apply_reconcile_plan` writes a plan, one guarded ``UPDATE`` per
|
||||
id column: ``WHERE id = ? AND ({id_col} IS NULL OR {id_col} = '')``.
|
||||
The guard makes the gap-fill ATOMIC — even if an enrichment worker
|
||||
matched the same entity between the plan's read and this write, the
|
||||
fill simply affects 0 rows instead of clobbering the worker's value.
|
||||
Columns are introspected first so a schema version missing a provider's
|
||||
columns is skipped, not errored.
|
||||
|
||||
Scope note: the MusicBrainz *recording* (track) ID is intentionally not
|
||||
reconciled — on ID3 it lives in a ``UFID`` frame the shared reader
|
||||
doesn't surface and the Vorbis ``musicbrainz_trackid`` convention is
|
||||
format-ambiguous. MB *album* and *artist* IDs (which drive most worker
|
||||
API calls) ARE reconciled, as are the clean per-provider track/album/
|
||||
artist IDs of the other services.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
# Each entry: (embedded-tag key from read_embedded_tags, entity, id column,
|
||||
# match-status column). The id columns mirror web_server._SERVICE_ID_COLUMNS;
|
||||
# they're spelled out here so this module stays importable without the Flask
|
||||
# app. Single-column providers (deezer/tidal/audiodb/genius) reuse one id
|
||||
# column across entity types — that's fine, fills are keyed by (entity, col).
|
||||
_RECONCILE_FIELDS = (
|
||||
('spotify_track_id', 'track', 'spotify_track_id', 'spotify_match_status'),
|
||||
('spotify_album_id', 'album', 'spotify_album_id', 'spotify_match_status'),
|
||||
('spotify_artist_id', 'artist', 'spotify_artist_id', 'spotify_match_status'),
|
||||
('itunes_track_id', 'track', 'itunes_track_id', 'itunes_match_status'),
|
||||
('itunes_album_id', 'album', 'itunes_album_id', 'itunes_match_status'),
|
||||
('itunes_artist_id', 'artist', 'itunes_artist_id', 'itunes_match_status'),
|
||||
('musicbrainz_albumid', 'album', 'musicbrainz_release_id', 'musicbrainz_match_status'),
|
||||
('musicbrainz_artistid', 'artist', 'musicbrainz_id', 'musicbrainz_match_status'),
|
||||
('deezer_track_id', 'track', 'deezer_id', 'deezer_match_status'),
|
||||
('deezer_album_id', 'album', 'deezer_id', 'deezer_match_status'),
|
||||
('deezer_artist_id', 'artist', 'deezer_id', 'deezer_match_status'),
|
||||
('tidal_track_id', 'track', 'tidal_id', 'tidal_match_status'),
|
||||
('tidal_album_id', 'album', 'tidal_id', 'tidal_match_status'),
|
||||
('tidal_artist_id', 'artist', 'tidal_id', 'tidal_match_status'),
|
||||
('audiodb_track_id', 'track', 'audiodb_id', 'audiodb_match_status'),
|
||||
('audiodb_album_id', 'album', 'audiodb_id', 'audiodb_match_status'),
|
||||
('audiodb_artist_id', 'artist', 'audiodb_id', 'audiodb_match_status'),
|
||||
('genius_track_id', 'track', 'genius_id', 'genius_match_status'),
|
||||
# Last.fm embeds a single LASTFM_URL — sourced from get_track_info(), so it
|
||||
# is the TRACK's url. Map to tracks.lastfm_url only (artist/album last.fm
|
||||
# urls are different urls and aren't carried in the file).
|
||||
('lastfm_url', 'track', 'lastfm_url', 'lastfm_match_status'),
|
||||
)
|
||||
|
||||
_ENTITIES = ('track', 'album', 'artist')
|
||||
_ENTITY_TABLE = {'track': 'tracks', 'album': 'albums', 'artist': 'artists'}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Fill:
|
||||
"""One provider-id column to gap-fill on one entity."""
|
||||
entity: str # 'track' | 'album' | 'artist'
|
||||
id_column: str # e.g. 'spotify_artist_id'
|
||||
status_column: str # e.g. 'spotify_match_status'
|
||||
value: str # the embedded id to write
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReconcilePlan:
|
||||
"""The outcome of planning one file against its current DB rows.
|
||||
|
||||
``fills`` are the gap-fill operations to apply (empty id columns only).
|
||||
``already_present`` counts embedded ids that matched a value already
|
||||
stored (no-op). ``conflicts`` lists embedded ids that DISAGREE with a
|
||||
stored value — never applied, surfaced for review.
|
||||
"""
|
||||
|
||||
fills: List[Fill] = field(default_factory=list)
|
||||
already_present: int = 0
|
||||
conflicts: List[Dict[str, str]] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def filled(self) -> int:
|
||||
return len(self.fills)
|
||||
|
||||
@property
|
||||
def has_updates(self) -> bool:
|
||||
return bool(self.fills)
|
||||
|
||||
def fills_for(self, entity: str) -> List[Fill]:
|
||||
return [f for f in self.fills if f.entity == entity]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReconcileApplied:
|
||||
"""Counts from actually writing a plan (based on real ``rowcount``)."""
|
||||
rows_updated: int = 0 # distinct entity rows touched
|
||||
ids_filled: int = 0 # id columns that actually landed (guard passed)
|
||||
|
||||
|
||||
def _clean(value: Any) -> Optional[str]:
|
||||
"""Normalise a tag/column value to a non-empty stripped string or None."""
|
||||
if value is None:
|
||||
return None
|
||||
s = str(value).strip()
|
||||
return s or None
|
||||
|
||||
|
||||
def plan_reconcile(
|
||||
embedded_tags: Optional[Dict[str, Any]],
|
||||
current_ids: Optional[Dict[str, Dict[str, Any]]],
|
||||
) -> ReconcilePlan:
|
||||
"""Plan which provider-ID columns to gap-fill from one file's tags.
|
||||
|
||||
Args:
|
||||
embedded_tags: the ``tags`` dict from ``read_embedded_tags`` (flat
|
||||
``friendly_key -> value``). ``None`` / empty yields an empty plan.
|
||||
current_ids: ``{'track': {...}, 'album': {...}, 'artist': {...}}``
|
||||
where each inner dict holds the entity's CURRENT column values
|
||||
(at minimum the id columns this module touches). Missing
|
||||
entities / keys are treated as empty (eligible to fill).
|
||||
|
||||
Returns:
|
||||
A :class:`ReconcilePlan`. Gap-fill only — an id column with any
|
||||
existing value is never planned; a disagreeing embedded id is
|
||||
recorded in ``conflicts``.
|
||||
"""
|
||||
plan = ReconcilePlan()
|
||||
tags = embedded_tags or {}
|
||||
current = current_ids or {}
|
||||
queued: Dict[tuple, str] = {} # (entity, id_col) already queued this pass
|
||||
|
||||
for embedded_key, entity, id_col, status_col in _RECONCILE_FIELDS:
|
||||
new_val = _clean(tags.get(embedded_key))
|
||||
if not new_val:
|
||||
continue
|
||||
|
||||
row = current.get(entity) or {}
|
||||
existing = _clean(row.get(id_col))
|
||||
if existing is not None:
|
||||
if existing != new_val:
|
||||
plan.conflicts.append({
|
||||
'entity': entity, 'column': id_col,
|
||||
'existing': existing, 'embedded': new_val,
|
||||
})
|
||||
else:
|
||||
plan.already_present += 1
|
||||
continue
|
||||
|
||||
key = (entity, id_col)
|
||||
if key in queued:
|
||||
# A single-column provider already queued this id col this pass.
|
||||
if queued[key] != new_val:
|
||||
plan.conflicts.append({
|
||||
'entity': entity, 'column': id_col,
|
||||
'existing': queued[key], 'embedded': new_val,
|
||||
})
|
||||
continue
|
||||
|
||||
queued[key] = new_val
|
||||
plan.fills.append(Fill(entity, id_col, status_col, new_val))
|
||||
|
||||
return plan
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrackReconcileResult:
|
||||
"""Outcome of reconciling one track row against its file's tags."""
|
||||
applied: 'ReconcileApplied'
|
||||
conflicts: int = 0
|
||||
readable: bool = True # False when the file's tags couldn't be read
|
||||
|
||||
|
||||
def reconcile_track_row(
|
||||
cursor,
|
||||
track_row: Dict[str, Any],
|
||||
album_map: Dict[str, Dict[str, Any]],
|
||||
artist_map: Dict[str, Dict[str, Any]],
|
||||
embedded_tags: Optional[Dict[str, Any]],
|
||||
) -> TrackReconcileResult:
|
||||
"""Reconcile one track row + its parent album/artist against one file.
|
||||
|
||||
Pure orchestration over :func:`plan_reconcile` / :func:`apply_reconcile_plan`,
|
||||
extracted so the per-track logic (id extraction, plan→apply chaining,
|
||||
keeping the in-memory parent maps fresh for sibling tracks) is testable
|
||||
without the Flask job. ``embedded_tags`` is the ``tags`` dict from
|
||||
``read_embedded_tags`` (``None`` => unreadable file).
|
||||
|
||||
``album_map`` / ``artist_map`` map entity-id -> current column dict; this
|
||||
function UPDATES them in place with any fills it applies so a later track
|
||||
on the same album/artist sees the value and doesn't re-plan it. (DB safety
|
||||
is the guarded UPDATE in apply, never these maps.)
|
||||
"""
|
||||
if not embedded_tags:
|
||||
return TrackReconcileResult(ReconcileApplied(), 0, readable=False)
|
||||
|
||||
album_id = str(track_row['album_id']) if track_row.get('album_id') is not None else None
|
||||
artist_id = str(track_row['artist_id']) if track_row.get('artist_id') is not None else None
|
||||
|
||||
plan = plan_reconcile(embedded_tags, {
|
||||
'track': track_row,
|
||||
'album': album_map.get(album_id, {}) if album_id else {},
|
||||
'artist': artist_map.get(artist_id, {}) if artist_id else {},
|
||||
})
|
||||
applied = apply_reconcile_plan(cursor, {
|
||||
'track': track_row.get('id'), 'album': album_id, 'artist': artist_id,
|
||||
}, plan)
|
||||
|
||||
if album_id:
|
||||
for f in plan.fills_for('album'):
|
||||
album_map.setdefault(album_id, {})[f.id_column] = f.value
|
||||
if artist_id:
|
||||
for f in plan.fills_for('artist'):
|
||||
artist_map.setdefault(artist_id, {})[f.id_column] = f.value
|
||||
|
||||
return TrackReconcileResult(applied, len(plan.conflicts), readable=True)
|
||||
|
||||
|
||||
def _existing_columns(cursor, table: str) -> set:
|
||||
"""Return the set of column names on ``table`` (migration-safe guard)."""
|
||||
cursor.execute(f"PRAGMA table_info({table})")
|
||||
return {r[1] for r in cursor.fetchall()}
|
||||
|
||||
|
||||
def apply_reconcile_plan(cursor, entity_ids: Dict[str, Any], plan: ReconcilePlan) -> ReconcileApplied:
|
||||
"""Apply a :class:`ReconcilePlan` to the DB via ``cursor``.
|
||||
|
||||
Each fill is a single guarded ``UPDATE``:
|
||||
|
||||
UPDATE {table} SET {id}=?, {status}='matched', {attempted}=now
|
||||
WHERE id=? AND ({id} IS NULL OR {id}='')
|
||||
|
||||
The ``id IS NULL OR id=''`` guard makes the gap-fill atomic: if the
|
||||
column became non-empty between the plan's read and now (an enrichment
|
||||
worker matched it concurrently), the UPDATE affects 0 rows and the
|
||||
worker's value is preserved. Only columns that exist on the table are
|
||||
written (introspected + cached per call), so a schema missing a
|
||||
provider's columns is silently skipped.
|
||||
|
||||
Args:
|
||||
cursor: an open DB cursor (caller owns the transaction/commit).
|
||||
entity_ids: ``{'track': id, 'album': id, 'artist': id}``. An entity
|
||||
with no id is skipped.
|
||||
|
||||
Returns:
|
||||
A :class:`ReconcileApplied` with counts derived from real rowcounts.
|
||||
"""
|
||||
result = ReconcileApplied()
|
||||
touched: set = set()
|
||||
col_cache: Dict[str, set] = {}
|
||||
|
||||
for fill in plan.fills:
|
||||
ent_id = entity_ids.get(fill.entity)
|
||||
if ent_id is None or ent_id == '':
|
||||
continue
|
||||
table = _ENTITY_TABLE[fill.entity]
|
||||
if table not in col_cache:
|
||||
col_cache[table] = _existing_columns(cursor, table)
|
||||
cols = col_cache[table]
|
||||
if fill.id_column not in cols:
|
||||
continue
|
||||
|
||||
assignments = [f"{fill.id_column} = ?"]
|
||||
values: List[Any] = [fill.value]
|
||||
if fill.status_column in cols:
|
||||
assignments.append(f"{fill.status_column} = ?")
|
||||
values.append('matched')
|
||||
attempted = fill.status_column.replace('_match_status', '_last_attempted')
|
||||
if attempted in cols:
|
||||
assignments.append(f"{attempted} = CURRENT_TIMESTAMP")
|
||||
|
||||
cursor.execute(
|
||||
f"UPDATE {table} SET {', '.join(assignments)} "
|
||||
f"WHERE id = ? AND ({fill.id_column} IS NULL OR {fill.id_column} = '')",
|
||||
values + [str(ent_id)],
|
||||
)
|
||||
if cursor.rowcount:
|
||||
result.ids_filled += 1
|
||||
touched.add((fill.entity, str(ent_id)))
|
||||
|
||||
result.rows_updated = len(touched)
|
||||
return result
|
||||
@ -0,0 +1,273 @@
|
||||
"""Tests for core/library/embedded_id_reconcile.py.
|
||||
|
||||
The reconcile job reads provider IDs already embedded in a file's tags
|
||||
(by SoulSync or MusicBrainz Picard) and gap-fills them into the library
|
||||
DB so enrichment workers skip the API call. These pin the guarantees that
|
||||
make it safe to run across a whole library while workers run concurrently:
|
||||
|
||||
1. gap-fill only — an existing id is NEVER overwritten,
|
||||
2. disagreements are reported as conflicts, not applied,
|
||||
3. the write is ATOMICALLY guarded — if a worker fills the column
|
||||
between plan and apply, the apply no-ops (no clobber).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
|
||||
from core.library.embedded_id_reconcile import (
|
||||
Fill,
|
||||
ReconcileApplied,
|
||||
ReconcilePlan,
|
||||
apply_reconcile_plan,
|
||||
plan_reconcile,
|
||||
reconcile_track_row,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# plan_reconcile — the pure planning layer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_empty_inputs_yield_empty_plan():
|
||||
plan = plan_reconcile(None, None)
|
||||
assert isinstance(plan, ReconcilePlan)
|
||||
assert plan.has_updates is False
|
||||
assert plan.filled == 0
|
||||
assert plan.conflicts == []
|
||||
|
||||
|
||||
def test_fills_all_three_entities_from_one_file():
|
||||
tags = {'spotify_track_id': 'TRK', 'spotify_album_id': 'ALB', 'spotify_artist_id': 'ART'}
|
||||
plan = plan_reconcile(tags, {'track': {}, 'album': {}, 'artist': {}})
|
||||
|
||||
assert plan.filled == 3
|
||||
by_entity = {(f.entity, f.id_column): f.value for f in plan.fills}
|
||||
assert by_entity[('track', 'spotify_track_id')] == 'TRK'
|
||||
assert by_entity[('album', 'spotify_album_id')] == 'ALB'
|
||||
assert by_entity[('artist', 'spotify_artist_id')] == 'ART'
|
||||
# status column pairing is carried on each Fill
|
||||
track_fill = plan.fills_for('track')[0]
|
||||
assert track_fill.status_column == 'spotify_match_status'
|
||||
|
||||
|
||||
def test_never_overwrites_an_existing_id():
|
||||
plan = plan_reconcile({'spotify_artist_id': 'NEW'},
|
||||
{'artist': {'spotify_artist_id': 'EXISTING'}})
|
||||
assert plan.filled == 0
|
||||
assert plan.fills_for('artist') == []
|
||||
assert len(plan.conflicts) == 1
|
||||
c = plan.conflicts[0]
|
||||
assert c['existing'] == 'EXISTING' and c['embedded'] == 'NEW' and c['entity'] == 'artist'
|
||||
|
||||
|
||||
def test_matching_existing_id_is_noop_not_conflict():
|
||||
plan = plan_reconcile({'spotify_artist_id': 'SAME'},
|
||||
{'artist': {'spotify_artist_id': 'SAME'}})
|
||||
assert plan.filled == 0
|
||||
assert plan.conflicts == []
|
||||
assert plan.already_present == 1
|
||||
|
||||
|
||||
def test_blank_and_whitespace_values_ignored():
|
||||
tags = {'spotify_artist_id': ' ', 'spotify_album_id': '', 'itunes_track_id': None}
|
||||
plan = plan_reconcile(tags, {'track': {}, 'album': {}, 'artist': {}})
|
||||
assert plan.has_updates is False
|
||||
|
||||
|
||||
def test_whitespace_padded_embedded_id_is_trimmed_and_filled():
|
||||
plan = plan_reconcile({'spotify_track_id': ' TRK '}, {'track': {}})
|
||||
assert plan.fills_for('track')[0].value == 'TRK'
|
||||
|
||||
|
||||
def test_single_column_provider_maps_per_entity():
|
||||
# Deezer/Tidal/AudioDB reuse one id column across entity types; fills
|
||||
# must be keyed by entity so they don't collide.
|
||||
tags = {'deezer_track_id': 'DT', 'deezer_album_id': 'DA', 'deezer_artist_id': 'DR'}
|
||||
plan = plan_reconcile(tags, {'track': {}, 'album': {}, 'artist': {}})
|
||||
vals = {f.entity: f.value for f in plan.fills}
|
||||
assert vals == {'track': 'DT', 'album': 'DA', 'artist': 'DR'}
|
||||
assert plan.filled == 3
|
||||
|
||||
|
||||
def test_mb_album_and_artist_filled_track_recording_skipped():
|
||||
tags = {'musicbrainz_albumid': 'MBA', 'musicbrainz_artistid': 'MBR', 'musicbrainz_trackid': 'MBT'}
|
||||
plan = plan_reconcile(tags, {'track': {}, 'album': {}, 'artist': {}})
|
||||
cols = {(f.entity, f.id_column): f.value for f in plan.fills}
|
||||
assert cols[('album', 'musicbrainz_release_id')] == 'MBA'
|
||||
assert cols[('artist', 'musicbrainz_id')] == 'MBR'
|
||||
assert plan.fills_for('track') == [] # recording id not reconciled
|
||||
|
||||
|
||||
def test_lastfm_url_maps_to_track_only():
|
||||
# The file carries a single LASTFM_URL = the TRACK's last.fm url. It must
|
||||
# fill tracks.lastfm_url and NOT be smeared onto album/artist (whose
|
||||
# last.fm urls are different urls entirely).
|
||||
plan = plan_reconcile({'lastfm_url': 'https://last.fm/music/A/_/Song'},
|
||||
{'track': {}, 'album': {}, 'artist': {}})
|
||||
assert plan.filled == 1
|
||||
f = plan.fills_for('track')[0]
|
||||
assert f.id_column == 'lastfm_url' and f.status_column == 'lastfm_match_status'
|
||||
assert plan.fills_for('album') == [] and plan.fills_for('artist') == []
|
||||
|
||||
|
||||
def test_partial_fill_when_one_entity_already_matched():
|
||||
tags = {'spotify_artist_id': 'ART', 'spotify_album_id': 'ALB'}
|
||||
current = {'artist': {'spotify_artist_id': 'ART'}, 'album': {}}
|
||||
plan = plan_reconcile(tags, current)
|
||||
assert plan.filled == 1
|
||||
assert plan.fills_for('album')[0].value == 'ALB'
|
||||
assert plan.fills_for('artist') == []
|
||||
assert plan.already_present == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# apply_reconcile_plan — the DB layer (in-memory sqlite)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_db():
|
||||
conn = sqlite3.connect(':memory:')
|
||||
cur = conn.cursor()
|
||||
for table, idcol in (('tracks', 'spotify_track_id'), ('albums', 'spotify_album_id'),
|
||||
('artists', 'spotify_artist_id')):
|
||||
cur.execute(f"""CREATE TABLE {table} (id TEXT PRIMARY KEY, {idcol} TEXT,
|
||||
spotify_match_status TEXT, spotify_last_attempted TIMESTAMP)""")
|
||||
cur.execute("INSERT INTO tracks (id) VALUES ('t1')")
|
||||
cur.execute("INSERT INTO albums (id) VALUES ('al1')")
|
||||
cur.execute("INSERT INTO artists (id) VALUES ('ar1')")
|
||||
conn.commit()
|
||||
return conn, cur
|
||||
|
||||
|
||||
def test_apply_writes_ids_status_and_timestamp():
|
||||
conn, cur = _make_db()
|
||||
plan = plan_reconcile(
|
||||
{'spotify_track_id': 'TRK', 'spotify_album_id': 'ALB', 'spotify_artist_id': 'ART'},
|
||||
{'track': {}, 'album': {}, 'artist': {}},
|
||||
)
|
||||
applied = apply_reconcile_plan(cur, {'track': 't1', 'album': 'al1', 'artist': 'ar1'}, plan)
|
||||
conn.commit()
|
||||
assert isinstance(applied, ReconcileApplied)
|
||||
assert applied.rows_updated == 3 and applied.ids_filled == 3
|
||||
|
||||
cur.execute("SELECT spotify_track_id, spotify_match_status, spotify_last_attempted FROM tracks WHERE id='t1'")
|
||||
tid, status, attempted = cur.fetchone()
|
||||
assert tid == 'TRK' and status == 'matched' and attempted is not None
|
||||
|
||||
|
||||
def test_apply_guard_blocks_overwrite_under_concurrency():
|
||||
# THE headline hardening: a worker fills the column AFTER we planned
|
||||
# (plan saw empty) but BEFORE we apply. The guarded UPDATE must no-op
|
||||
# and leave the worker's value intact.
|
||||
conn, cur = _make_db()
|
||||
plan = plan_reconcile({'spotify_artist_id': 'FROM_FILE'}, {'artist': {}}) # planned: empty
|
||||
# Simulate a concurrent enrichment worker matching it in the meantime.
|
||||
cur.execute("UPDATE artists SET spotify_artist_id='FROM_WORKER', spotify_match_status='matched' WHERE id='ar1'")
|
||||
conn.commit()
|
||||
|
||||
applied = apply_reconcile_plan(cur, {'artist': 'ar1'}, plan)
|
||||
conn.commit()
|
||||
assert applied.ids_filled == 0 and applied.rows_updated == 0 # guard blocked it
|
||||
|
||||
cur.execute("SELECT spotify_artist_id FROM artists WHERE id='ar1'")
|
||||
assert cur.fetchone()[0] == 'FROM_WORKER' # worker's value preserved
|
||||
|
||||
|
||||
def test_apply_guard_treats_empty_string_as_fillable():
|
||||
conn, cur = _make_db()
|
||||
cur.execute("UPDATE artists SET spotify_artist_id='' WHERE id='ar1'") # empty string, not NULL
|
||||
conn.commit()
|
||||
plan = plan_reconcile({'spotify_artist_id': 'ART'}, {'artist': {}})
|
||||
applied = apply_reconcile_plan(cur, {'artist': 'ar1'}, plan)
|
||||
conn.commit()
|
||||
assert applied.ids_filled == 1
|
||||
cur.execute("SELECT spotify_artist_id FROM artists WHERE id='ar1'")
|
||||
assert cur.fetchone()[0] == 'ART'
|
||||
|
||||
|
||||
def test_apply_skips_unknown_columns_without_erroring():
|
||||
# Schema missing a provider's columns must not raise — the plan targets
|
||||
# tidal_id which this minimal schema lacks; it's silently skipped.
|
||||
conn, cur = _make_db()
|
||||
plan = plan_reconcile({'tidal_artist_id': 'TID', 'spotify_artist_id': 'ART'},
|
||||
{'track': {}, 'album': {}, 'artist': {}})
|
||||
applied = apply_reconcile_plan(cur, {'artist': 'ar1'}, plan)
|
||||
conn.commit()
|
||||
cur.execute("SELECT spotify_artist_id FROM artists WHERE id='ar1'")
|
||||
assert cur.fetchone()[0] == 'ART'
|
||||
assert applied.ids_filled == 1 # only the existing spotify column landed
|
||||
|
||||
|
||||
def test_apply_skips_entity_with_no_id():
|
||||
conn, cur = _make_db()
|
||||
plan = plan_reconcile({'spotify_album_id': 'ALB'}, {'album': {}})
|
||||
applied = apply_reconcile_plan(cur, {'track': 't1'}, plan) # no album id supplied
|
||||
assert applied.rows_updated == 0 and applied.ids_filled == 0
|
||||
|
||||
|
||||
def test_apply_empty_plan_is_noop():
|
||||
conn, cur = _make_db()
|
||||
applied = apply_reconcile_plan(cur, {'track': 't1'}, ReconcilePlan())
|
||||
assert applied.rows_updated == 0 and applied.ids_filled == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# reconcile_track_row — the per-track orchestration (id extraction, plan→apply,
|
||||
# sibling-map freshening)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_reconcile_track_row_unreadable_file_is_noop():
|
||||
conn, cur = _make_db()
|
||||
result = reconcile_track_row(cur, {'id': 't1'}, {}, {}, None)
|
||||
assert result.readable is False
|
||||
assert result.applied.ids_filled == 0
|
||||
|
||||
|
||||
def test_reconcile_track_row_fills_track_and_parents():
|
||||
conn, cur = _make_db()
|
||||
track_row = {'id': 't1', 'album_id': 'al1', 'artist_id': 'ar1'}
|
||||
album_map = {'al1': {}}
|
||||
artist_map = {'ar1': {}}
|
||||
tags = {'spotify_track_id': 'TRK', 'spotify_album_id': 'ALB', 'spotify_artist_id': 'ART'}
|
||||
result = reconcile_track_row(cur, track_row, album_map, artist_map, tags)
|
||||
conn.commit()
|
||||
assert result.readable is True
|
||||
assert result.applied.ids_filled == 3 and result.applied.rows_updated == 3
|
||||
# parent maps were freshened in place
|
||||
assert album_map['al1']['spotify_album_id'] == 'ALB'
|
||||
assert artist_map['ar1']['spotify_artist_id'] == 'ART'
|
||||
|
||||
|
||||
def test_reconcile_sibling_tracks_dont_refill_shared_parent():
|
||||
# Two tracks on the same album/artist. The first fills the album+artist
|
||||
# ids; the second must see them already present (via the freshened map)
|
||||
# and NOT re-apply — proving the map keeps siblings from redundant work.
|
||||
conn, cur = _make_db()
|
||||
cur.execute("INSERT INTO tracks (id) VALUES ('t2')")
|
||||
conn.commit()
|
||||
album_map = {'al1': {}}
|
||||
artist_map = {'ar1': {}}
|
||||
tags = {'spotify_album_id': 'ALB', 'spotify_artist_id': 'ART', 'spotify_track_id': 'T1'}
|
||||
|
||||
r1 = reconcile_track_row(cur, {'id': 't1', 'album_id': 'al1', 'artist_id': 'ar1'},
|
||||
album_map, artist_map, tags)
|
||||
# Second track: same album/artist ids embedded, its own track id.
|
||||
tags2 = {'spotify_album_id': 'ALB', 'spotify_artist_id': 'ART', 'spotify_track_id': 'T2'}
|
||||
r2 = reconcile_track_row(cur, {'id': 't2', 'album_id': 'al1', 'artist_id': 'ar1'},
|
||||
album_map, artist_map, tags2)
|
||||
conn.commit()
|
||||
|
||||
assert r1.applied.ids_filled == 3 # track + album + artist
|
||||
assert r2.applied.ids_filled == 1 # only t2's own track id; parents already filled
|
||||
assert r2.conflicts == 0
|
||||
|
||||
|
||||
def test_reconcile_track_row_handles_null_parent_ids():
|
||||
conn, cur = _make_db()
|
||||
# Track with no album/artist linkage — only its own id should fill.
|
||||
result = reconcile_track_row(cur, {'id': 't1', 'album_id': None, 'artist_id': None},
|
||||
{}, {}, {'spotify_track_id': 'TRK', 'spotify_album_id': 'ALB'})
|
||||
conn.commit()
|
||||
assert result.applied.ids_filled == 1 # album fill has no album id to land on
|
||||
cur.execute("SELECT spotify_track_id FROM tracks WHERE id='t1'")
|
||||
assert cur.fetchone()[0] == 'TRK'
|
||||
Loading…
Reference in new issue