mirror of https://github.com/Nezreka/SoulSync.git
#808: album-context qualifiers stop blocking library-presence matching
carlosjfcasero: 'Champagne Supernova (OurVinyl Sessions)' is in the library
but the artist page shows it unowned and wishlist cleanup never removes it.
Measured with the real catalogs: Deezer/iTunes title the TRACK with the
qualifier while the library track is bare (the qualifier lives in the album
title) — and _calculate_track_confidence crushed that pair to ~0.17: the
"clean" titles keep parenthetical words, so the length-ratio penalty treats
'Champagne Supernova' vs 'Champagne Supernova (OurVinyl Sessions)' as
different songs. (Also confirmed: the OurVinyl release is absent from
Deezer's discography for the artist, so the standard page's 25-release list
not showing it is the source catalog, not a bug.)
Fix 1 — core.text.title_match.strip_redundant_context_qualifiers: a
parenthetical qualifier whose text appears (word-bounded) in the db track's
ALBUM title — or in the other title — restates release context and is
stripped for a comparison variant scored with its own length guard. Genuine
version markers keep their penalty: '(Live)' on a studio album appears in no
context and still blocks; '(Live)' on 'Live at Wembley' correctly matches —
owning the live album IS owning the live cut. Wired into
_calculate_track_confidence, so every check_track_exists consumer (wishlist
cleanup, discography dedup, repair jobs) benefits.
Fix 2 — the artist-page ownership endpoint's album gate: when album-aware
narrowing eliminates EVERY library candidate (the source's album naming just
doesn't resemble the library's — 'Jillette Johnson | OurVinyl Sessions' vs
'Champagne Supernova (OurVinyl Sessions)' ~0.5), fall back to artist-wide
title matching instead of declaring everything unowned off a failed
album-NAME comparison.
Tests: 8 — the exact reported pair end-to-end through check_track_exists,
word-boundary containment ('live' in 'alive' doesn't count), version-marker
safety both ways, and prefix songs still blocked. 1125 matching/wishlist/
library tests pass.
pull/812/head
parent
157d19f3b9
commit
f250eaa228
@ -0,0 +1,104 @@
|
||||
"""#808: parenthetical qualifiers that restate album context must not block
|
||||
library-presence matching.
|
||||
|
||||
carlosjfcasero's case: the wishlist held 'Champagne Supernova (OurVinyl
|
||||
Sessions)' (Deezer/iTunes title) while the library track was on the album
|
||||
'Champagne Supernova (OurVinyl Sessions)'. When one side's title carries the
|
||||
qualifier and the other doesn't, the length-ratio penalty crushed the pair to
|
||||
~0.17 — wishlist cleanup never recognised the owned edition and the track
|
||||
re-appeared every cycle. The qualifier appearing in the (db) album title
|
||||
proves it's album context, not a different version.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from core.text.title_match import strip_redundant_context_qualifiers
|
||||
from database.music_database import MusicDatabase
|
||||
|
||||
|
||||
# ── the pure helper ──────────────────────────────────────────────────────────
|
||||
|
||||
def test_qualifier_confirmed_by_album_is_stripped():
|
||||
out = strip_redundant_context_qualifiers(
|
||||
'champagne supernova (ourvinyl sessions)',
|
||||
'champagne supernova (ourvinyl sessions)', # db album title
|
||||
)
|
||||
assert out == 'champagne supernova'
|
||||
|
||||
|
||||
def test_version_marker_on_unrelated_album_is_kept():
|
||||
assert strip_redundant_context_qualifiers('song (live)', 'studio album') == 'song (live)'
|
||||
assert strip_redundant_context_qualifiers('song (remix)', 'the album') == 'song (remix)'
|
||||
|
||||
|
||||
def test_version_marker_confirmed_by_album_is_stripped():
|
||||
# Owning 'Song (Live)' on the album 'Live at Wembley' IS owning that cut.
|
||||
assert strip_redundant_context_qualifiers('song (live)', 'live at wembley') == 'song'
|
||||
|
||||
|
||||
def test_word_boundary_containment():
|
||||
# 'live' inside 'alive' must NOT count as context confirmation.
|
||||
assert strip_redundant_context_qualifiers('song (live)', 'alive and well') == 'song (live)'
|
||||
|
||||
|
||||
def test_no_context_or_title_untouched():
|
||||
assert strip_redundant_context_qualifiers('plain title', 'anything') == 'plain title'
|
||||
assert strip_redundant_context_qualifiers('', 'ctx') == ''
|
||||
assert strip_redundant_context_qualifiers('song (x)') == 'song (x)'
|
||||
|
||||
|
||||
# ── end to end through check_track_exists (the wishlist-cleanup contract) ────
|
||||
|
||||
@pytest.fixture()
|
||||
def lib_db(tmp_path):
|
||||
db = MusicDatabase(str(tmp_path / 'm.db'))
|
||||
conn = db._get_connection()
|
||||
c = conn.cursor()
|
||||
c.execute("INSERT INTO artists (id, name, server_source) VALUES ('a1', 'Jillette Johnson', 'plex')")
|
||||
c.execute("""INSERT INTO albums (id, title, artist_id, server_source)
|
||||
VALUES ('al1', 'Champagne Supernova (OurVinyl Sessions)', 'a1', 'plex')""")
|
||||
c.execute("""INSERT INTO tracks (id, album_id, artist_id, title, file_path, server_source)
|
||||
VALUES ('t1', 'al1', 'a1', 'Champagne Supernova', '/m/cs.mp3', 'plex')""")
|
||||
# Version-safety control: a live cut on a studio-named album.
|
||||
c.execute("""INSERT INTO albums (id, title, artist_id, server_source)
|
||||
VALUES ('al2', 'Water In A Whale', 'a1', 'plex')""")
|
||||
c.execute("""INSERT INTO tracks (id, album_id, artist_id, title, file_path, server_source)
|
||||
VALUES ('t2', 'al2', 'a1', 'Cameron', '/m/c.mp3', 'plex')""")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return db
|
||||
|
||||
|
||||
def test_808_qualified_search_matches_bare_library_track(lib_db):
|
||||
"""The reported direction: source/wishlist title carries the qualifier,
|
||||
library title is bare, the library ALBUM carries the qualifier."""
|
||||
match, conf = lib_db.check_track_exists(
|
||||
'Champagne Supernova (OurVinyl Sessions)', 'Jillette Johnson',
|
||||
confidence_threshold=0.7, server_source='plex',
|
||||
album='Jillette Johnson | OurVinyl Sessions',
|
||||
)
|
||||
assert match is not None and conf >= 0.7
|
||||
|
||||
|
||||
def test_version_marker_still_blocks_without_album_confirmation(lib_db):
|
||||
"""'Cameron (Live)' must NOT match the studio 'Cameron' — the qualifier
|
||||
appears in no album context, so the mismatch penalty stands."""
|
||||
match, conf = lib_db.check_track_exists(
|
||||
'Cameron (Live)', 'Jillette Johnson',
|
||||
confidence_threshold=0.7, server_source='plex',
|
||||
)
|
||||
assert conf < 0.7
|
||||
|
||||
|
||||
def test_different_song_prefix_still_blocked(lib_db):
|
||||
"""'Champagne' alone is a different (hypothetical) song — the length
|
||||
penalty on the reduced forms still applies."""
|
||||
match, conf = lib_db.check_track_exists(
|
||||
'Champagne', 'Jillette Johnson',
|
||||
confidence_threshold=0.7, server_source='plex',
|
||||
)
|
||||
assert conf < 0.7
|
||||
Loading…
Reference in new issue