mirror of https://github.com/Nezreka/SoulSync.git
GitHub issue #501 (@Tacobell444). After manually matching an album to a specific source ID via the match-chip UI, clicking "Enrich" on that album would fuzzy-search by name and overwrite the manual match with whatever the search returned — or revert the match status to ``not_found`` if name search missed. Reorganize then read the now- wrong ID and moved files to the wrong destination. Root cause was in the per-source enrichment workers' ``_process_*_individual`` methods. Several workers (Spotify, iTunes) ran search-by-name unconditionally with no check for an existing stored ID. Others (Deezer, Tidal, Qobuz) skipped on existing-ID but without refreshing metadata — preserved the ID but didn't actually honor the user's intent of "use this match to pull fresh data". Cin-shape lift: same fix needed in 5 workers, so extracted the shared behavior into ``core/enrichment/manual_match_honoring.py``: honor_stored_match( db, entity_table, entity_id, id_column, client_fetch_fn, on_match_fn, log_prefix, ) -> bool Per-worker variability (DB column name, client fetch method, response shape) plugs in via callbacks. Workers call the helper at the top of ``_process_album_individual`` / ``_process_track_individual``; if it returns True, the manual match was honored and the search-by-name fallback is skipped. If False (no stored ID, fetch failed, or empty response), the worker's existing search-by-name flow runs as before. Workers wired: - spotify_worker — album + track (was overwriting; now honors) - itunes_worker — album + track (was overwriting; now honors) - deezer_worker — album + track (was skip-on-id; now refreshes) - tidal_worker — album + track (was skip-on-id; now refreshes) - qobuz_worker — album + track (was skip-on-id; now refreshes) Workers left alone (already correct): - discogs_worker — already had inline stored-ID fast path that refreshes metadata. Same behavior, just inline; refactoring to use the shared helper would be churn for zero behavior change. - audiodb_worker — same — inline fast path with full metadata refresh. - musicbrainz_worker — preserves existing MBID and marks status, which is the correct behavior for MB (the MBID itself is the match payload — no separate metadata fetch). - lastfm_worker / genius_worker — name-based services with no source-specific IDs to honor. Inherent re-search per call. Reorganize fixed indirectly — it always honored stored IDs correctly via ``library_reorganize._extract_source_ids``. The "Reorganize broken" symptom was downstream of broken Enrich corrupting the stored ID. Tests: - ``tests/enrichment/test_manual_match_honoring.py`` — 11 tests pinning the shared helper contract: stored-ID fast path, no-ID fallthrough, empty-string treated as no ID, missing row, fetch exception caught and falls through, fetch returns None falls through, callback exceptions propagate, configurable table + column, defensive table-name whitelist. - Per-worker wiring NOT tested individually — the workers depend on live DB / client objects that are heavy to mock. The shared helper's contract is pinned; per-worker call sites are short enough to verify by code review. 2173/2173 full suite green. Closes #501.pull/510/head
parent
4dd0640799
commit
cceffbd8ec
@ -0,0 +1,128 @@
|
||||
"""Honor manually-matched source IDs in per-source enrichment workers.
|
||||
|
||||
GitHub issue #501 (@Tacobell444): every per-source enrichment worker's
|
||||
``_process_*_individual`` method ran a fuzzy text search on the album /
|
||||
track name and overwrote the stored source ID with whatever the search
|
||||
returned. If the user had manually matched an album to a specific source
|
||||
ID (e.g. set ``albums.spotify_album_id = 'ABC'`` via the match-chip UI),
|
||||
the next "Enrich" click would search by name → pick a different result
|
||||
→ overwrite the manual match with the wrong ID, OR fail to match
|
||||
anything and revert the status to ``not_found``.
|
||||
|
||||
This module lifts the "honor stored ID" fast path into one shared
|
||||
helper. Each per-source worker (Spotify / iTunes / Deezer / Discogs /
|
||||
MusicBrainz / AudioDB / Tidal / Qobuz) calls it before falling back
|
||||
to its existing search-by-name flow. Same fix in 8 workers gets
|
||||
exactly one implementation; per-worker variability (column name,
|
||||
client fetch method, response shape) plugs in via callbacks.
|
||||
|
||||
Lift what's truly shared. Caller knows its own column + client
|
||||
method + update logic; the helper just orchestrates.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger("enrichment.manual_match_honoring")
|
||||
|
||||
|
||||
def _read_id_column(db, entity_table: str, entity_id, id_column: str) -> Optional[str]:
|
||||
"""Read the stored source ID for one entity. Returns None when the
|
||||
column is empty / unset."""
|
||||
if entity_table not in ('albums', 'tracks', 'artists'):
|
||||
# Defensive: we only operate on these three. Avoids SQL injection
|
||||
# via a bad table name (id_column is also restricted to known
|
||||
# column names by callers but defense in depth never hurts).
|
||||
return None
|
||||
conn = db._get_connection()
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
f"SELECT {id_column} FROM {entity_table} WHERE id = ?",
|
||||
(entity_id,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
if not row:
|
||||
return None
|
||||
value = row[0] if not hasattr(row, 'keys') else row[id_column]
|
||||
return str(value).strip() if value else None
|
||||
|
||||
|
||||
def honor_stored_match(
|
||||
*,
|
||||
db,
|
||||
entity_table: str,
|
||||
entity_id,
|
||||
id_column: str,
|
||||
client_fetch_fn: Callable[[str], Any],
|
||||
on_match_fn: Callable[[Any, str, Any], None],
|
||||
log_prefix: str = '',
|
||||
) -> bool:
|
||||
"""Fast-path enrichment via a stored source ID — preserves manual
|
||||
matches.
|
||||
|
||||
Args:
|
||||
db: ``MusicDatabase`` instance (for the column read).
|
||||
entity_table: ``'albums'``, ``'tracks'``, or ``'artists'``.
|
||||
entity_id: Library DB ID of the entity to enrich.
|
||||
id_column: Column on ``entity_table`` that stores the source-
|
||||
specific ID (``spotify_album_id`` / ``itunes_album_id`` /
|
||||
``deezer_id`` / etc).
|
||||
client_fetch_fn: Callable taking the stored ID and returning
|
||||
the source's raw response (Album dataclass, dict, or
|
||||
whatever the client returns). Typically
|
||||
``self.client.get_album`` or ``self.client.get_track``.
|
||||
on_match_fn: Worker callback invoked with
|
||||
``(entity_id, stored_id, api_response)`` to apply the
|
||||
metadata refresh. Worker knows the response shape; helper
|
||||
doesn't.
|
||||
log_prefix: Display name for log lines (``'Spotify'`` /
|
||||
``'iTunes'`` / etc).
|
||||
|
||||
Returns:
|
||||
True if a stored ID was found AND the fetch returned data AND
|
||||
the on-match callback ran. Caller skips its search-by-name
|
||||
flow and counts a match.
|
||||
|
||||
False if no stored ID is set, the fetch failed, or the fetch
|
||||
returned empty. Caller falls through to its existing search-
|
||||
by-name flow (the legacy behavior for un-matched entities).
|
||||
|
||||
Notes:
|
||||
- Exceptions in ``client_fetch_fn`` are caught and logged at
|
||||
warning level — caller falls through to search.
|
||||
- Exceptions in ``on_match_fn`` propagate (those are real
|
||||
DB errors the worker should know about).
|
||||
"""
|
||||
stored_id = _read_id_column(db, entity_table, entity_id, id_column)
|
||||
if not stored_id:
|
||||
return False
|
||||
|
||||
try:
|
||||
api_data = client_fetch_fn(stored_id)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
f"[{log_prefix}] Stored-ID fetch failed for "
|
||||
f"{entity_table[:-1]} #{entity_id} (id={stored_id}): {exc}"
|
||||
)
|
||||
return False
|
||||
|
||||
if not api_data:
|
||||
logger.debug(
|
||||
f"[{log_prefix}] Stored ID {stored_id} for "
|
||||
f"{entity_table[:-1]} #{entity_id} returned empty data — "
|
||||
f"falling through to search-by-name"
|
||||
)
|
||||
return False
|
||||
|
||||
on_match_fn(entity_id, stored_id, api_data)
|
||||
logger.info(
|
||||
f"[{log_prefix}] Honored manual match: "
|
||||
f"{entity_table[:-1]} #{entity_id} → {id_column}={stored_id}"
|
||||
)
|
||||
return True
|
||||
@ -0,0 +1,302 @@
|
||||
"""Tests for ``core.enrichment.manual_match_honoring.honor_stored_match``.
|
||||
|
||||
The helper is the shared "fast-path enrichment via stored source ID"
|
||||
used by every per-source enrichment worker (Spotify / iTunes / Deezer
|
||||
/ Discogs / MusicBrainz / AudioDB / Tidal / Qobuz). It reads the
|
||||
stored ID from a configurable column, fetches via a caller-supplied
|
||||
client method, and invokes a caller-supplied update callback. Pin
|
||||
the contract so per-worker wiring can rely on uniform semantics.
|
||||
|
||||
Issue #501: enrichment workers were running fuzzy name search and
|
||||
overwriting manually-set source IDs. This helper is the lift point —
|
||||
all 8 workers will plug in the same way.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from core.enrichment.manual_match_honoring import honor_stored_match
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake DB fixture (just enough to exercise _read_id_column)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeConn:
|
||||
"""Minimal sqlite3.Connection-like that the helper can use."""
|
||||
|
||||
def __init__(self, real_conn):
|
||||
self._real = real_conn
|
||||
|
||||
def cursor(self):
|
||||
return self._real.cursor()
|
||||
|
||||
def close(self):
|
||||
# Don't actually close — tests share the connection.
|
||||
pass
|
||||
|
||||
|
||||
class _FakeDB:
|
||||
"""Stand-in MusicDatabase. Supports ``_get_connection()`` returning
|
||||
a wrapper that doesn't close, so per-test in-memory state survives
|
||||
across helper invocations."""
|
||||
|
||||
def __init__(self):
|
||||
self._conn = sqlite3.connect(":memory:")
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
cur = self._conn.cursor()
|
||||
cur.execute("""
|
||||
CREATE TABLE albums (
|
||||
id INTEGER PRIMARY KEY,
|
||||
spotify_album_id TEXT,
|
||||
deezer_id TEXT,
|
||||
itunes_album_id TEXT
|
||||
)
|
||||
""")
|
||||
cur.execute("""
|
||||
CREATE TABLE tracks (
|
||||
id INTEGER PRIMARY KEY,
|
||||
spotify_track_id TEXT,
|
||||
deezer_id TEXT
|
||||
)
|
||||
""")
|
||||
self._conn.commit()
|
||||
|
||||
def insert_album(self, album_id, **id_columns):
|
||||
cols = ['id'] + list(id_columns.keys())
|
||||
placeholders = ','.join('?' for _ in cols)
|
||||
values = [album_id] + list(id_columns.values())
|
||||
self._conn.execute(
|
||||
f"INSERT INTO albums ({','.join(cols)}) VALUES ({placeholders})",
|
||||
values,
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def insert_track(self, track_id, **id_columns):
|
||||
cols = ['id'] + list(id_columns.keys())
|
||||
placeholders = ','.join('?' for _ in cols)
|
||||
values = [track_id] + list(id_columns.values())
|
||||
self._conn.execute(
|
||||
f"INSERT INTO tracks ({','.join(cols)}) VALUES ({placeholders})",
|
||||
values,
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def _get_connection(self):
|
||||
return _FakeConn(self._conn)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db():
|
||||
return _FakeDB()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stored-ID fast path (the new behavior)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_honors_stored_id_when_present(db):
|
||||
"""Pin: stored ID found → fetch → on_match called → returns True.
|
||||
Caller skips its search-by-name flow."""
|
||||
db.insert_album(42, spotify_album_id='SP-ABC')
|
||||
api_payload = {'id': 'SP-ABC', 'name': 'Real Album'}
|
||||
fetch = MagicMock(return_value=api_payload)
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=42,
|
||||
id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
log_prefix='Spotify',
|
||||
)
|
||||
|
||||
assert result is True
|
||||
fetch.assert_called_once_with('SP-ABC')
|
||||
on_match.assert_called_once_with(42, 'SP-ABC', api_payload)
|
||||
|
||||
|
||||
def test_returns_false_when_no_stored_id(db):
|
||||
"""Pin: no stored ID → returns False, no fetch attempted, no
|
||||
callback. Caller proceeds with its search-by-name fallback."""
|
||||
db.insert_album(42, spotify_album_id=None)
|
||||
fetch = MagicMock()
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=42,
|
||||
id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
log_prefix='Spotify',
|
||||
)
|
||||
|
||||
assert result is False
|
||||
fetch.assert_not_called()
|
||||
on_match.assert_not_called()
|
||||
|
||||
|
||||
def test_returns_false_when_stored_id_empty_string(db):
|
||||
"""Pin: empty string treated same as NULL — no fetch, return False."""
|
||||
db.insert_album(42, spotify_album_id='')
|
||||
fetch = MagicMock()
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=42,
|
||||
id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
assert result is False
|
||||
fetch.assert_not_called()
|
||||
|
||||
|
||||
def test_returns_false_when_entity_not_in_db(db):
|
||||
"""Pin: missing row → returns False, no fetch, no callback."""
|
||||
fetch = MagicMock()
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=999,
|
||||
id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
assert result is False
|
||||
fetch.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Failure paths — fall through to search instead of crashing the worker
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_returns_false_when_fetch_raises(db):
|
||||
"""Pin: client.get_X(stored_id) raises → caught, logged at warning,
|
||||
returns False so caller falls through to search. Worker must not
|
||||
crash on a transient API failure."""
|
||||
db.insert_album(42, spotify_album_id='SP-ABC')
|
||||
fetch = MagicMock(side_effect=RuntimeError("API down"))
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=42,
|
||||
id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
assert result is False
|
||||
on_match.assert_not_called()
|
||||
|
||||
|
||||
def test_returns_false_when_fetch_returns_none(db):
|
||||
"""Pin: stored ID points at a removed/invalid catalog entry →
|
||||
fetch returns None → falls through to search instead of writing
|
||||
junk to DB."""
|
||||
db.insert_album(42, spotify_album_id='SP-DEAD')
|
||||
fetch = MagicMock(return_value=None)
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=42,
|
||||
id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
assert result is False
|
||||
on_match.assert_not_called()
|
||||
|
||||
|
||||
def test_returns_false_when_fetch_returns_empty_dict(db):
|
||||
"""Pin: empty dict treated same as None — falsy result skips
|
||||
callback."""
|
||||
db.insert_album(42, spotify_album_id='SP-EMPTY')
|
||||
fetch = MagicMock(return_value={})
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=42,
|
||||
id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
assert result is False
|
||||
on_match.assert_not_called()
|
||||
|
||||
|
||||
def test_on_match_exceptions_propagate(db):
|
||||
"""Pin: exceptions inside on_match (DB write errors) propagate to
|
||||
the worker — they're real errors the worker should surface, not
|
||||
swallowed silently."""
|
||||
db.insert_album(42, spotify_album_id='SP-ABC')
|
||||
fetch = MagicMock(return_value={'id': 'SP-ABC'})
|
||||
on_match = MagicMock(side_effect=ValueError("bad write"))
|
||||
|
||||
with pytest.raises(ValueError, match="bad write"):
|
||||
honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=42,
|
||||
id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-table / per-column wiring
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_works_with_tracks_table(db):
|
||||
"""Pin: ``entity_table='tracks'`` works the same as 'albums' — the
|
||||
helper is generic across both."""
|
||||
db.insert_track(7, spotify_track_id='SP-T-1')
|
||||
fetch = MagicMock(return_value={'id': 'SP-T-1'})
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='tracks', entity_id=7,
|
||||
id_column='spotify_track_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
assert result is True
|
||||
fetch.assert_called_once_with('SP-T-1')
|
||||
|
||||
|
||||
def test_works_with_alternate_columns(db):
|
||||
"""Pin: ``id_column`` is configurable so each worker reads its own
|
||||
column (deezer_id, itunes_album_id, etc)."""
|
||||
db.insert_album(42, deezer_id='12345', itunes_album_id='IT-99')
|
||||
fetch = MagicMock(return_value={'id': '12345'})
|
||||
on_match = MagicMock()
|
||||
|
||||
# Read the deezer_id column even though spotify_album_id exists too.
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='albums', entity_id=42,
|
||||
id_column='deezer_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
assert result is True
|
||||
fetch.assert_called_once_with('12345')
|
||||
|
||||
|
||||
def test_rejects_invalid_table_name(db):
|
||||
"""Pin: defensive — only known tables (albums/tracks/artists)
|
||||
accepted. Avoids SQL injection via crafted table name even though
|
||||
every caller is hard-coded."""
|
||||
fetch = MagicMock()
|
||||
on_match = MagicMock()
|
||||
|
||||
result = honor_stored_match(
|
||||
db=db, entity_table='not_a_real_table; DROP TABLE albums',
|
||||
entity_id=1, id_column='spotify_album_id',
|
||||
client_fetch_fn=fetch, on_match_fn=on_match,
|
||||
)
|
||||
|
||||
assert result is False
|
||||
fetch.assert_not_called()
|
||||
Loading…
Reference in new issue