mirror of https://github.com/Nezreka/SoulSync.git
Closes #585. When a Spotify source track had a versioned suffix not present in the local file ("Iron Man - 2012 - Remaster" vs "Iron Man"), the auto-matcher missed the pair. User could click Find & Add to pick the right local file — that worked, file got added to the Plex playlist — but the source track stayed in Missing while the added file appeared in Extra, because the matcher kept no record of the user-confirmed pairing. On the next sync the source track re-tried to download. Fix: every Find & Add selection now writes a (spotify_track_id → server_track_id) override into sync_match_cache at confidence=1.0. The matching algorithm runs an override pass BEFORE the existing exact and fuzzy passes, so any user-confirmed pair short-circuits straight to "matched" without going through title normalization. Covers every mismatch class — dash-suffix remasters, covers / karaoke, alt masters, cross-language titles, typo'd local files. - core/sync/match_overrides.py (new) — pure helpers resolve_match_overrides + record_manual_match. 18 boundary tests pin: cache hits, cache misses falling through to normal matching, stale-cache (server track removed) handled gracefully, str/int id coercion, partial cache hits, defensive against non-dict inputs and DB exceptions. - web_server.py — get_server_playlist_tracks runs the override pre-pass before exact/fuzzy matching. server_playlist_add_track accepts source_track_id + source_title + source_artist and persists the override after every successful add (Plex / Jellyfin / Navidrome). source_track_id added to source_tracks payload so the frontend has it. - webui/static/pages-extra.js — _serverSelectTrack sends source_track_id + source_title + source_artist when adding a track from a mirrored playlist context. - Sync match cache schema unchanged — already had UNIQUE (spotify_track_id, server_source) which fits the override semantics perfectly. Manual overrides distinguished from auto-discovered matches by confidence=1.0. Full suite: 3010 passed.pull/593/head
parent
4226be761a
commit
083355ec8c
@ -0,0 +1,119 @@
|
||||
"""Sync match overrides — user-confirmed source→server track pairings.
|
||||
|
||||
When a user picks a local file via "Find & Add" on the Server Playlist
|
||||
compare view, that selection should persist as a hard match across
|
||||
future syncs — bypassing the fuzzy/exact title-match algorithm
|
||||
entirely. This module provides pure helpers that the web layer calls
|
||||
to resolve and persist those overrides through the existing
|
||||
`sync_match_cache` table.
|
||||
|
||||
Override semantics:
|
||||
- One mapping per (source_track_id, server_source). UNIQUE
|
||||
constraint on the table enforces single mapping per pair.
|
||||
- Stored with confidence=1.0 to distinguish from auto-discovered
|
||||
matches (which use the actual title-similarity score).
|
||||
- Read at the START of the matching algorithm — before pass-1
|
||||
exact and pass-2 fuzzy. Skipped sources don't re-enter the
|
||||
normal matching pool.
|
||||
- Stale-cache safe: if the cached server_track_id doesn't exist
|
||||
in the current server_tracks list (track removed from server),
|
||||
the override is silently skipped and normal matching runs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
|
||||
def resolve_match_overrides(
|
||||
source_tracks: List[Dict[str, Any]],
|
||||
server_tracks: List[Dict[str, Any]],
|
||||
cache_lookup: Callable[[str], Optional[Any]],
|
||||
) -> Dict[int, int]:
|
||||
"""Map source-track indexes to server-track indexes for cached overrides.
|
||||
|
||||
Pure function. `cache_lookup(source_track_id) -> server_track_id or
|
||||
None` is injected by the caller (web layer wraps the DB call).
|
||||
|
||||
Returns ``{source_idx: server_idx}``. Only includes pairs where:
|
||||
- source_track has a non-empty `source_track_id`
|
||||
- cache_lookup returns a server_track_id
|
||||
- that server_track_id exists in server_tracks (no stale cache
|
||||
entries pointing at deleted tracks)
|
||||
- the server_track hasn't already been claimed by an earlier
|
||||
override (defensive — UNIQUE on the cache table prevents this
|
||||
in practice)
|
||||
|
||||
Caller uses the returned dict to short-circuit the per-source
|
||||
matching loop: indices in the dict skip the exact/fuzzy passes.
|
||||
"""
|
||||
if not source_tracks or not server_tracks:
|
||||
return {}
|
||||
|
||||
server_id_to_idx: Dict[str, int] = {}
|
||||
for j, svr in enumerate(server_tracks):
|
||||
sid = svr.get("id") if isinstance(svr, dict) else None
|
||||
if sid is not None:
|
||||
key = str(sid)
|
||||
if key not in server_id_to_idx:
|
||||
server_id_to_idx[key] = j
|
||||
|
||||
overrides: Dict[int, int] = {}
|
||||
used_server: set[int] = set()
|
||||
|
||||
for i, src in enumerate(source_tracks):
|
||||
if not isinstance(src, dict):
|
||||
continue
|
||||
src_id = src.get("source_track_id")
|
||||
if not src_id:
|
||||
continue
|
||||
try:
|
||||
cached_server_id = cache_lookup(str(src_id))
|
||||
except Exception:
|
||||
cached_server_id = None
|
||||
if not cached_server_id:
|
||||
continue
|
||||
j = server_id_to_idx.get(str(cached_server_id))
|
||||
if j is None or j in used_server:
|
||||
continue
|
||||
overrides[i] = j
|
||||
used_server.add(j)
|
||||
|
||||
return overrides
|
||||
|
||||
|
||||
def record_manual_match(
|
||||
db: Any,
|
||||
source_track_id: str,
|
||||
server_source: str,
|
||||
server_track_id: Any,
|
||||
server_track_title: str = "",
|
||||
source_title: str = "",
|
||||
source_artist: str = "",
|
||||
) -> bool:
|
||||
"""Persist a user-confirmed source→server pairing as a hard override.
|
||||
|
||||
Wraps `db.save_sync_match_cache` with confidence=1.0 (the manual
|
||||
match marker). Normalized title/artist fields are informational
|
||||
only — the cache is keyed by `(spotify_track_id, server_source)`,
|
||||
so the normalization is just for inspection and future debugging.
|
||||
|
||||
Returns True on persist success, False on any failure (DB, missing
|
||||
args, etc). Never raises.
|
||||
"""
|
||||
if not source_track_id or not server_source or server_track_id is None:
|
||||
return False
|
||||
if not hasattr(db, "save_sync_match_cache"):
|
||||
return False
|
||||
try:
|
||||
return bool(db.save_sync_match_cache(
|
||||
spotify_track_id=str(source_track_id),
|
||||
normalized_title=(source_title or "").lower().strip(),
|
||||
normalized_artist=(source_artist or "").lower().strip(),
|
||||
server_source=server_source,
|
||||
server_track_id=server_track_id,
|
||||
server_track_title=server_track_title or "",
|
||||
confidence=1.0,
|
||||
))
|
||||
except Exception:
|
||||
return False
|
||||
@ -0,0 +1,193 @@
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from core.sync.match_overrides import record_manual_match, resolve_match_overrides
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# resolve_match_overrides — pre-pair source→server from cache
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_empty_inputs_return_empty_dict():
|
||||
assert resolve_match_overrides([], [], lambda _id: None) == {}
|
||||
assert resolve_match_overrides([{"source_track_id": "x"}], [], lambda _id: "y") == {}
|
||||
assert resolve_match_overrides([], [{"id": "y"}], lambda _id: None) == {}
|
||||
|
||||
|
||||
def test_single_cache_hit_returns_pair():
|
||||
sources = [{"source_track_id": "spotify-iron-man", "name": "Iron Man - 2012 - Remaster"}]
|
||||
servers = [{"id": 5001, "title": "Iron Man"}]
|
||||
cache = {"spotify-iron-man": 5001}
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
assert result == {0: 0}
|
||||
|
||||
|
||||
def test_multiple_overrides_resolve_correctly():
|
||||
sources = [
|
||||
{"source_track_id": "iron"},
|
||||
{"source_track_id": "para"},
|
||||
{"source_track_id": "war"},
|
||||
]
|
||||
servers = [
|
||||
{"id": 5001, "title": "Iron Man"},
|
||||
{"id": 5002, "title": "Paranoid"},
|
||||
{"id": 5003, "title": "War Pigs"},
|
||||
]
|
||||
cache = {"iron": 5001, "para": 5002, "war": 5003}
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
assert result == {0: 0, 1: 1, 2: 2}
|
||||
|
||||
|
||||
def test_source_without_track_id_skipped():
|
||||
sources = [
|
||||
{"source_track_id": "iron", "name": "Iron Man"},
|
||||
{"name": "Paranoid"}, # no source_track_id (e.g. legacy / non-mirrored)
|
||||
]
|
||||
servers = [{"id": 5001, "title": "Iron Man"}, {"id": 5002, "title": "Paranoid"}]
|
||||
cache = {"iron": 5001}
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
assert result == {0: 0}
|
||||
|
||||
|
||||
def test_cache_miss_skipped():
|
||||
sources = [{"source_track_id": "iron"}, {"source_track_id": "para"}]
|
||||
servers = [{"id": 5001, "title": "Iron Man"}, {"id": 5002, "title": "Paranoid"}]
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: None)
|
||||
assert result == {}
|
||||
|
||||
|
||||
def test_stale_cache_pointing_at_missing_server_track_skipped():
|
||||
# User cached a match → file got deleted from server → server_tracks
|
||||
# no longer has 5001 → don't pair, fall through to normal matching.
|
||||
sources = [{"source_track_id": "iron"}]
|
||||
servers = [{"id": 9999, "title": "Different Track"}]
|
||||
cache = {"iron": 5001} # 5001 no longer exists
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
assert result == {}
|
||||
|
||||
|
||||
def test_server_id_str_int_coercion():
|
||||
# Cache might store ints, server_tracks might have str IDs (Plex
|
||||
# ratingKey is str). Helper coerces both sides to str.
|
||||
sources = [{"source_track_id": "iron"}]
|
||||
servers = [{"id": "5001", "title": "Iron Man"}]
|
||||
cache = {"iron": 5001} # int from cache
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
assert result == {0: 0}
|
||||
|
||||
|
||||
def test_two_sources_pointing_at_same_server_track_only_first_wins():
|
||||
# Defensive — UNIQUE constraint prevents this in production but
|
||||
# cache_lookup is injectable so we verify the safety.
|
||||
sources = [{"source_track_id": "a"}, {"source_track_id": "b"}]
|
||||
servers = [{"id": 5001, "title": "Iron Man"}]
|
||||
cache = {"a": 5001, "b": 5001}
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
assert result == {0: 0}
|
||||
|
||||
|
||||
def test_cache_lookup_raising_treated_as_miss():
|
||||
sources = [{"source_track_id": "iron"}]
|
||||
servers = [{"id": 5001, "title": "Iron Man"}]
|
||||
def boom(_sid):
|
||||
raise RuntimeError("db down")
|
||||
result = resolve_match_overrides(sources, servers, boom)
|
||||
assert result == {}
|
||||
|
||||
|
||||
def test_non_dict_source_or_server_skipped():
|
||||
sources = [None, "string", {"source_track_id": "iron"}]
|
||||
servers = [{"id": 5001, "title": "Iron Man"}]
|
||||
cache = {"iron": 5001}
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
# source idx 2 → server idx 0
|
||||
assert result == {2: 0}
|
||||
|
||||
|
||||
def test_server_without_id_skipped():
|
||||
sources = [{"source_track_id": "iron"}]
|
||||
servers = [{"title": "Iron Man"}] # no id
|
||||
cache = {"iron": 5001}
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
assert result == {}
|
||||
|
||||
|
||||
def test_partial_cache_hits_only_pair_those():
|
||||
sources = [
|
||||
{"source_track_id": "iron"},
|
||||
{"source_track_id": "para"},
|
||||
{"source_track_id": "war"},
|
||||
]
|
||||
servers = [
|
||||
{"id": 5001, "title": "Iron Man"},
|
||||
{"id": 5002, "title": "Paranoid"},
|
||||
{"id": 5003, "title": "War Pigs"},
|
||||
]
|
||||
# Only iron + war cached, para falls through to normal matching
|
||||
cache = {"iron": 5001, "war": 5003}
|
||||
result = resolve_match_overrides(sources, servers, lambda sid: cache.get(sid))
|
||||
assert result == {0: 0, 2: 2}
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# record_manual_match — persist user-confirmed pair
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_record_persists_with_confidence_one():
|
||||
db = MagicMock()
|
||||
db.save_sync_match_cache.return_value = True
|
||||
ok = record_manual_match(
|
||||
db,
|
||||
source_track_id="spotify-iron-man",
|
||||
server_source="plex",
|
||||
server_track_id=5001,
|
||||
server_track_title="Iron Man",
|
||||
source_title="Iron Man - 2012 - Remaster",
|
||||
source_artist="Black Sabbath",
|
||||
)
|
||||
assert ok is True
|
||||
db.save_sync_match_cache.assert_called_once()
|
||||
kwargs = db.save_sync_match_cache.call_args.kwargs
|
||||
assert kwargs["spotify_track_id"] == "spotify-iron-man"
|
||||
assert kwargs["server_source"] == "plex"
|
||||
assert kwargs["server_track_id"] == 5001
|
||||
assert kwargs["server_track_title"] == "Iron Man"
|
||||
assert kwargs["confidence"] == 1.0
|
||||
assert kwargs["normalized_title"] == "iron man - 2012 - remaster"
|
||||
assert kwargs["normalized_artist"] == "black sabbath"
|
||||
|
||||
|
||||
def test_record_returns_false_when_required_fields_missing():
|
||||
db = MagicMock()
|
||||
assert record_manual_match(db, source_track_id="", server_source="plex", server_track_id=1) is False
|
||||
assert record_manual_match(db, source_track_id="x", server_source="", server_track_id=1) is False
|
||||
assert record_manual_match(db, source_track_id="x", server_source="plex", server_track_id=None) is False
|
||||
db.save_sync_match_cache.assert_not_called()
|
||||
|
||||
|
||||
def test_record_returns_false_when_db_save_returns_false():
|
||||
db = MagicMock()
|
||||
db.save_sync_match_cache.return_value = False
|
||||
assert record_manual_match(db, source_track_id="x", server_source="plex", server_track_id=1) is False
|
||||
|
||||
|
||||
def test_record_swallows_db_exception():
|
||||
db = MagicMock()
|
||||
db.save_sync_match_cache.side_effect = RuntimeError("db boom")
|
||||
assert record_manual_match(db, source_track_id="x", server_source="plex", server_track_id=1) is False
|
||||
|
||||
|
||||
def test_record_returns_false_when_db_lacks_method():
|
||||
class NoSaveDB:
|
||||
pass
|
||||
assert record_manual_match(NoSaveDB(), source_track_id="x", server_source="plex", server_track_id=1) is False
|
||||
|
||||
|
||||
def test_record_handles_empty_optional_strings():
|
||||
db = MagicMock()
|
||||
db.save_sync_match_cache.return_value = True
|
||||
ok = record_manual_match(db, source_track_id="x", server_source="plex", server_track_id=1)
|
||||
assert ok is True
|
||||
kwargs = db.save_sync_match_cache.call_args.kwargs
|
||||
assert kwargs["normalized_title"] == ""
|
||||
assert kwargs["normalized_artist"] == ""
|
||||
assert kwargs["server_track_title"] == ""
|
||||
Loading…
Reference in new issue