Fix watchlist content filters: live false positives + auto-scan bypass

Two bugs reported in issue #320:

1. Auto-watchlist scan bypassed Global Override settings.
   scan_watchlist_profile applied _apply_global_watchlist_overrides, but
   the scheduled auto-scan called scan_watchlist_artists directly —
   bypassing the override. Users who unchecked "Albums" or "Live" under
   Watchlist → Global Override still saw full albums and live tracks
   added during nightly scans (per-artist defaults, which include
   everything, won).

   Moved override application into scan_watchlist_artists itself so
   every entry point respects it. scan_watchlist_profile now forwards
   the apply_global_overrides flag through to avoid double-application.

2. is_live_version (watchlist + discography backfill) and
   live_commentary_cleaner's content patterns used bare \blive\b, which
   matched verb uses like "What We Live For" by American Authors,
   "Live Forever" by Oasis, "Live and Let Die" by Wings.

   Tightened the live patterns to require clear recording context:
   (Live) / [Live Version] / - Live / Live at|from|in|on|version|
   session|recording|performance|album|show|tour|concert|edit|cut|take
   / In Concert / On Stage / Unplugged / Concert.

   Locked in 11 regression tests covering the reported false positives
   (What We Live For, Live Forever, Living on a Prayer, Live and Let Die)
   and the reported true positives (Dimension - Live at Big Day Out,
   MTV Unplugged, etc.).

Version bumped to 2.37 with changelog entries.
pull/349/head
Broque Thomas 1 month ago
parent 178719e443
commit e5d4d61c0e

@ -11,13 +11,18 @@ logger = get_logger("repair_job.live_commentary_cleaner")
# Keywords that indicate unwanted content types
# Each tuple: (keyword, content_type_label)
#
# Live patterns require clear recording context — the bare `\blive\b` was
# too loose and falsely flagged verb uses like "What We Live For" by
# American Authors or "Live Forever" by Oasis.
_CONTENT_PATTERNS = [
# Live
(r'\blive\b', 'live'),
(r'\blive at\b', 'live'),
(r'\blive from\b', 'live'),
(r'\blive in\b', 'live'),
(r'[\(\[]live\b', 'live'), # (Live), [Live at ...]
(r'-\s*live\b', 'live'), # Song - Live
(r'\blive (at|from|in|on|version|session|recording|performance|album|show|tour|concert|edit|cut|take)\b', 'live'),
(r'\bin concert\b', 'live'),
(r'\bconcert\b', 'live'),
(r'\bon stage\b', 'live'),
(r'\bunplugged\b', 'live'),
# Commentary
(r'\bcommentary\b', 'commentary'),

@ -85,6 +85,11 @@ def is_live_version(track_name: str, album_name: str = "") -> bool:
"""
Detect if a track or album is a live version.
Uses patterns that require a clear live-recording context (parenthesized
"(Live)", dash-suffixed "- Live", or "live" with a location/format
modifier). The bare `\\blive\\b` pattern was too loose it falsely
flagged verb uses like "What We Live For" or "Live Forever".
Args:
track_name: Track name to check
album_name: Album name to check (optional)
@ -98,17 +103,18 @@ def is_live_version(track_name: str, album_name: str = "") -> bool:
# Combine track and album names for comprehensive checking
text_to_check = f"{track_name} {album_name}".lower()
# Live version patterns
# Live-recording patterns — each one requires clear context so verbs
# like "What We Live For" / "Live Forever" / "Living on a Prayer" don't
# get swept up.
live_patterns = [
r'\blive\b', # (Live), Live at, etc.
r'\blive at\b', # Live at Madison Square Garden
r'\bconcert\b', # Concert, Live Concert
r'[\(\[]live\b', # (Live), (Live at ...), [Live Version]
r'-\s*live\b', # Song - Live, Song - Live at ...
# "live" followed by a recording-context word
r'\blive (at|from|in|on|version|session|recording|performance|album|show|tour|concert|edit|cut|take)\b',
r'\bin concert\b', # In Concert
r'\bunplugged\b', # MTV Unplugged (usually live)
r'\blive session\b', # Live Session
r'\blive from\b', # Live from...
r'\blive recording\b', # Live Recording
r'\bconcert\b', # Concert (album name)
r'\bon stage\b', # On Stage
r'\bunplugged\b', # MTV Unplugged
]
for pattern in live_patterns:
@ -939,9 +945,8 @@ class WatchlistScanner:
if watchlist_artists is None:
watchlist_artists = self.database.get_watchlist_artists(profile_id=profile_id)
if apply_global_overrides:
self._apply_global_watchlist_overrides(watchlist_artists)
# scan_watchlist_artists applies overrides itself now — pass the flag
# through instead of applying here (prevents double-application).
return self.scan_watchlist_artists(
watchlist_artists,
profile_id=profile_id,
@ -950,6 +955,7 @@ class WatchlistScanner:
cancel_check=cancel_check,
artist_index_offset=artist_index_offset,
total_artists_override=total_artists_override,
apply_global_overrides=apply_global_overrides,
)
def scan_watchlist_artists(
@ -962,8 +968,19 @@ class WatchlistScanner:
cancel_check: Optional[Callable[[], bool]] = None,
artist_index_offset: int = 0,
total_artists_override: Optional[int] = None,
apply_global_overrides: bool = True,
) -> List[ScanResult]:
"""Scan a list of watchlist artists using the shared web watchlist scan flow."""
"""Scan a list of watchlist artists using the shared web watchlist scan flow.
apply_global_overrides: when True (default), per-artist include_*
flags are overwritten with the global values if
`watchlist.global_override_enabled` is set. This matches the
behaviour of `scan_watchlist_profile` so every entry point respects
the user's Global Override toggle.
"""
if apply_global_overrides:
self._apply_global_watchlist_overrides(watchlist_artists)
scan_results: List[ScanResult] = []
if not watchlist_artists:
if scan_state is not None:

@ -0,0 +1,106 @@
"""Regression tests for the content-filter regex patterns used by the
watchlist scanner and the Live/Commentary Cleaner repair job.
The bare `\\blive\\b` pattern was too loose it flagged verb uses like
"What We Live For" or "Live Forever" as live recordings. These tests lock
in the tightened behaviour: clear live-recording context required.
"""
import sys
import types
# Minimal stubs so the watchlist_scanner module imports without the full app.
if "config.settings" not in sys.modules:
config_pkg = types.ModuleType("config")
settings_mod = types.ModuleType("config.settings")
class _DummyConfigManager:
def get(self, key, default=None):
return default
def get_active_media_server(self):
return "plex"
settings_mod.config_manager = _DummyConfigManager()
config_pkg.settings = settings_mod
sys.modules.setdefault("config", config_pkg)
sys.modules.setdefault("config.settings", settings_mod)
from core.watchlist_scanner import is_live_version # noqa: E402
from core.repair_jobs.live_commentary_cleaner import _detect_content_type # noqa: E402
# ── is_live_version ─────────────────────────────────────────────────────────
def test_is_live_version_catches_live_at_suffix():
# Reported case: Wolfmother 10th Anniversary deluxe bonus tracks
assert is_live_version("Dimension - Live at Big Day Out", "Wolfmother")
assert is_live_version("Minds Eye - Live at Triple J", "Wolfmother")
def test_is_live_version_catches_parenthesized_suffix():
assert is_live_version("Thriller (Live)", "")
assert is_live_version("Song (Live at Wembley)", "")
assert is_live_version("Song [Live Version]", "")
def test_is_live_version_catches_dash_suffix():
assert is_live_version("Song - Live", "")
assert is_live_version("Dimension - Live", "Wolfmother")
def test_is_live_version_catches_modifiers():
assert is_live_version("Song", "Live in Tokyo")
assert is_live_version("Live Recording of Dimension", "")
assert is_live_version("Acoustic Session", "Live Session Vol 1")
def test_is_live_version_catches_other_signals():
assert is_live_version("MTV Unplugged", "MTV Unplugged in New York")
assert is_live_version("The Concert for Bangladesh", "")
assert is_live_version("On Stage", "ABBA")
assert is_live_version("Dead Man's Party (Live in Concert)", "")
def test_is_live_version_does_not_flag_verb_live():
# Reported false positive: "What We Live For" by American Authors
assert not is_live_version("What We Live For", "What We Live For")
assert not is_live_version("Live Forever", "Definitely Maybe")
assert not is_live_version("Live and Let Die", "Band on the Run")
def test_is_live_version_does_not_flag_similar_words():
assert not is_live_version("Living on a Prayer", "Slippery When Wet")
assert not is_live_version("Believe", "Believe")
assert not is_live_version("Symphony No. 5", "")
assert not is_live_version("Alive", "Ten")
def test_is_live_version_handles_empty_input():
assert not is_live_version("", "")
assert not is_live_version("", "Some Album")
# ── live_commentary_cleaner._detect_content_type ────────────────────────────
def test_detect_content_type_flags_live_recordings():
assert _detect_content_type("Dimension - Live at Big Day Out", "Wolfmother") == "live"
assert _detect_content_type("Thriller (Live)", "") == "live"
assert _detect_content_type("Song [Live Version]", "") == "live"
assert _detect_content_type("MTV Unplugged", "Unplugged in NY") == "live"
def test_detect_content_type_does_not_flag_verb_live():
# Reported false positive: "What We Live For" by American Authors
assert _detect_content_type("What We Live For", "What We Live For") is None
assert _detect_content_type("Live Forever", "Definitely Maybe") is None
assert _detect_content_type("Living on a Prayer", "Slippery When Wet") is None
def test_detect_content_type_still_catches_other_categories():
assert _detect_content_type("The Interview", "Press Kit") == "interview"
assert _detect_content_type("Director's Commentary", "Album") == "commentary"
assert _detect_content_type("Spoken Word Poem", "") == "spoken_word"
assert _detect_content_type("A Cappella Version", "") == "acappella"

@ -344,25 +344,53 @@ def test_backfill_similar_artists_fallback_ids_uses_provider_priority(monkeypatc
assert [call[0] for call in itunes_client.search_calls] == ["iTunes Artist"]
def test_scan_watchlist_profile_loads_artists_and_applies_overrides(monkeypatch):
def test_scan_watchlist_profile_loads_artists_and_forwards_override_flag(monkeypatch):
# Global-override application moved from scan_watchlist_profile into
# scan_watchlist_artists so every entry point (including the automation
# auto-scan) respects it. scan_watchlist_profile now just forwards the
# apply_global_overrides flag through.
artist = _build_artist()
scanner = _build_scanner({"tracks": {"items": []}}, [artist])
loaded_profiles = []
override_calls = []
scan_calls = []
monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda profile_id=None: loaded_profiles.append(profile_id) or [artist])
monkeypatch.setattr(scanner, "_apply_global_watchlist_overrides", lambda artists: override_calls.append(list(artists)))
monkeypatch.setattr(scanner, "scan_watchlist_artists", lambda artists, **kwargs: scan_calls.append((list(artists), kwargs)) or ["ok"])
result = scanner.scan_watchlist_profile(42)
assert result == ["ok"]
assert loaded_profiles == [42]
assert override_calls and override_calls[0][0].artist_name == "Artist One"
assert scan_calls and scan_calls[0][0][0].artist_name == "Artist One"
assert scan_calls[0][1]["profile_id"] == 42
assert scan_calls[0][1]["apply_global_overrides"] is True
def test_scan_watchlist_artists_applies_global_overrides(monkeypatch):
# Regression guard: auto-watchlist automation bypassed overrides by
# calling scan_watchlist_artists directly. Overrides now run inside
# scan_watchlist_artists so every caller gets the global filter.
scanner = _build_scanner({"tracks": {"items": []}}, [])
override_calls = []
monkeypatch.setattr(scanner, "_apply_global_watchlist_overrides", lambda artists: override_calls.append(list(artists)))
# Empty artist list exits early but override must still have been invoked.
scanner.scan_watchlist_artists([])
assert override_calls == [[]]
def test_scan_watchlist_artists_skips_overrides_when_flag_false(monkeypatch):
scanner = _build_scanner({"tracks": {"items": []}}, [])
override_calls = []
monkeypatch.setattr(scanner, "_apply_global_watchlist_overrides", lambda artists: override_calls.append(list(artists)))
scanner.scan_watchlist_artists([], apply_global_overrides=False)
assert override_calls == []
def test_scan_watchlist_artists_scans_tracks_and_updates_state(monkeypatch):

@ -37,7 +37,7 @@ _log_dir = Path(_log_path).parent
logger = setup_logging(_log_level, _log_path)
# App version — single source of truth for backup metadata, version-info endpoint, etc.
_SOULSYNC_BASE_VERSION = "2.36"
_SOULSYNC_BASE_VERSION = "2.37"
def _build_version_string():
"""Append short commit hash to version when available (e.g. 2.35+abc1234)."""
@ -22526,6 +22526,16 @@ def get_version_info():
"title": "What's New in SoulSync",
"subtitle": f"Version {SOULSYNC_VERSION} — Latest Changes",
"sections": [
{
"title": "Watchlist Content Filters Fixed",
"description": "Global Override settings and live-version detection now behave the way the UI implies",
"features": [
"• Scheduled auto-watchlist now honors Watchlist → Global Override (was bypassing it and using per-artist defaults)",
"'Live' detection tightened — no more false positives on titles like 'What We Live For' or 'Live Forever'",
"• Same fix applies to the Library Maintenance Live/Commentary Cleaner",
"• Still catches (Live), - Live, Live at/from/in/on/version/session/recording, Unplugged, In Concert",
],
},
{
"title": "Discography Backfill",
"description": "New maintenance job that fills gaps in your library — scans each artist's full discography and finds what you're missing",

@ -3599,6 +3599,12 @@ function closeHelperSearch() {
// ═══════════════════════════════════════════════════════════════════════════
const WHATS_NEW = {
'2.37': [
// --- April 21, 2026 (evening) ---
{ date: 'April 21, 2026 (evening)' },
{ title: 'Fix Auto-Watchlist Ignoring Global Override Settings', desc: 'The scheduled auto-watchlist scan (not the manual one) called scan_watchlist_artists directly, which bypassed Global Override application. So if you disabled Albums or Live under Watchlist → Global Override, full albums and live tracks still got added to the wishlist during the nightly scan. Override logic now runs inside scan_watchlist_artists so every entry point respects it', page: 'watchlist' },
{ title: 'Fix Live Version Filter False Positives', desc: 'The \\blive\\b regex was too loose — it flagged any title with the word "live" regardless of context, so "What We Live For" by American Authors, "Live Forever" by Oasis, and similar verb uses got treated as live recordings. Tightened to require clear live-recording context: "(Live)", "- Live", "Live at/from/in/on/version/session/etc". Fixes both the watchlist/backfill track filter and the Library Maintenance Live/Commentary Cleaner', page: 'library' },
],
'2.36': [
// --- April 21, 2026 ---
{ date: 'April 21, 2026' },
@ -3780,12 +3786,12 @@ const WHATS_NEW = {
function _getCurrentVersion() {
const btn = document.querySelector('.version-button');
return btn ? btn.textContent.trim().replace('v', '') : '2.36';
return btn ? btn.textContent.trim().replace('v', '') : '2.37';
}
function _getLatestWhatsNewVersion() {
const versions = Object.keys(WHATS_NEW).sort((a, b) => parseFloat(b) - parseFloat(a));
return versions[0] || '2.36';
return versions[0] || '2.37';
}
function openWhatsNew() {

Loading…
Cancel
Save