diff --git a/core/repair_jobs/live_commentary_cleaner.py b/core/repair_jobs/live_commentary_cleaner.py index 5049687b..b3532567 100644 --- a/core/repair_jobs/live_commentary_cleaner.py +++ b/core/repair_jobs/live_commentary_cleaner.py @@ -11,13 +11,18 @@ logger = get_logger("repair_job.live_commentary_cleaner") # Keywords that indicate unwanted content types # Each tuple: (keyword, content_type_label) +# +# Live patterns require clear recording context — the bare `\blive\b` was +# too loose and falsely flagged verb uses like "What We Live For" by +# American Authors or "Live Forever" by Oasis. _CONTENT_PATTERNS = [ # Live - (r'\blive\b', 'live'), - (r'\blive at\b', 'live'), - (r'\blive from\b', 'live'), - (r'\blive in\b', 'live'), + (r'[\(\[]live\b', 'live'), # (Live), [Live at ...] + (r'-\s*live\b', 'live'), # Song - Live + (r'\blive (at|from|in|on|version|session|recording|performance|album|show|tour|concert|edit|cut|take)\b', 'live'), (r'\bin concert\b', 'live'), + (r'\bconcert\b', 'live'), + (r'\bon stage\b', 'live'), (r'\bunplugged\b', 'live'), # Commentary (r'\bcommentary\b', 'commentary'), diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 7207c9f5..a0cdd66b 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -85,6 +85,11 @@ def is_live_version(track_name: str, album_name: str = "") -> bool: """ Detect if a track or album is a live version. + Uses patterns that require a clear live-recording context (parenthesized + "(Live)", dash-suffixed "- Live", or "live" with a location/format + modifier). The bare `\\blive\\b` pattern was too loose — it falsely + flagged verb uses like "What We Live For" or "Live Forever". + Args: track_name: Track name to check album_name: Album name to check (optional) @@ -98,17 +103,18 @@ def is_live_version(track_name: str, album_name: str = "") -> bool: # Combine track and album names for comprehensive checking text_to_check = f"{track_name} {album_name}".lower() - # Live version patterns + # Live-recording patterns — each one requires clear context so verbs + # like "What We Live For" / "Live Forever" / "Living on a Prayer" don't + # get swept up. live_patterns = [ - r'\blive\b', # (Live), Live at, etc. - r'\blive at\b', # Live at Madison Square Garden - r'\bconcert\b', # Concert, Live Concert + r'[\(\[]live\b', # (Live), (Live at ...), [Live Version] + r'-\s*live\b', # Song - Live, Song - Live at ... + # "live" followed by a recording-context word + r'\blive (at|from|in|on|version|session|recording|performance|album|show|tour|concert|edit|cut|take)\b', r'\bin concert\b', # In Concert - r'\bunplugged\b', # MTV Unplugged (usually live) - r'\blive session\b', # Live Session - r'\blive from\b', # Live from... - r'\blive recording\b', # Live Recording + r'\bconcert\b', # Concert (album name) r'\bon stage\b', # On Stage + r'\bunplugged\b', # MTV Unplugged ] for pattern in live_patterns: @@ -939,9 +945,8 @@ class WatchlistScanner: if watchlist_artists is None: watchlist_artists = self.database.get_watchlist_artists(profile_id=profile_id) - if apply_global_overrides: - self._apply_global_watchlist_overrides(watchlist_artists) - + # scan_watchlist_artists applies overrides itself now — pass the flag + # through instead of applying here (prevents double-application). return self.scan_watchlist_artists( watchlist_artists, profile_id=profile_id, @@ -950,6 +955,7 @@ class WatchlistScanner: cancel_check=cancel_check, artist_index_offset=artist_index_offset, total_artists_override=total_artists_override, + apply_global_overrides=apply_global_overrides, ) def scan_watchlist_artists( @@ -962,8 +968,19 @@ class WatchlistScanner: cancel_check: Optional[Callable[[], bool]] = None, artist_index_offset: int = 0, total_artists_override: Optional[int] = None, + apply_global_overrides: bool = True, ) -> List[ScanResult]: - """Scan a list of watchlist artists using the shared web watchlist scan flow.""" + """Scan a list of watchlist artists using the shared web watchlist scan flow. + + apply_global_overrides: when True (default), per-artist include_* + flags are overwritten with the global values if + `watchlist.global_override_enabled` is set. This matches the + behaviour of `scan_watchlist_profile` so every entry point respects + the user's Global Override toggle. + """ + if apply_global_overrides: + self._apply_global_watchlist_overrides(watchlist_artists) + scan_results: List[ScanResult] = [] if not watchlist_artists: if scan_state is not None: diff --git a/tests/test_content_filter_regex.py b/tests/test_content_filter_regex.py new file mode 100644 index 00000000..29f528d8 --- /dev/null +++ b/tests/test_content_filter_regex.py @@ -0,0 +1,106 @@ +"""Regression tests for the content-filter regex patterns used by the +watchlist scanner and the Live/Commentary Cleaner repair job. + +The bare `\\blive\\b` pattern was too loose — it flagged verb uses like +"What We Live For" or "Live Forever" as live recordings. These tests lock +in the tightened behaviour: clear live-recording context required. +""" + +import sys +import types + + +# Minimal stubs so the watchlist_scanner module imports without the full app. +if "config.settings" not in sys.modules: + config_pkg = types.ModuleType("config") + settings_mod = types.ModuleType("config.settings") + + class _DummyConfigManager: + def get(self, key, default=None): + return default + + def get_active_media_server(self): + return "plex" + + settings_mod.config_manager = _DummyConfigManager() + config_pkg.settings = settings_mod + sys.modules.setdefault("config", config_pkg) + sys.modules.setdefault("config.settings", settings_mod) + + +from core.watchlist_scanner import is_live_version # noqa: E402 +from core.repair_jobs.live_commentary_cleaner import _detect_content_type # noqa: E402 + + +# ── is_live_version ───────────────────────────────────────────────────────── + +def test_is_live_version_catches_live_at_suffix(): + # Reported case: Wolfmother 10th Anniversary deluxe bonus tracks + assert is_live_version("Dimension - Live at Big Day Out", "Wolfmother") + assert is_live_version("Minds Eye - Live at Triple J", "Wolfmother") + + +def test_is_live_version_catches_parenthesized_suffix(): + assert is_live_version("Thriller (Live)", "") + assert is_live_version("Song (Live at Wembley)", "") + assert is_live_version("Song [Live Version]", "") + + +def test_is_live_version_catches_dash_suffix(): + assert is_live_version("Song - Live", "") + assert is_live_version("Dimension - Live", "Wolfmother") + + +def test_is_live_version_catches_modifiers(): + assert is_live_version("Song", "Live in Tokyo") + assert is_live_version("Live Recording of Dimension", "") + assert is_live_version("Acoustic Session", "Live Session Vol 1") + + +def test_is_live_version_catches_other_signals(): + assert is_live_version("MTV Unplugged", "MTV Unplugged in New York") + assert is_live_version("The Concert for Bangladesh", "") + assert is_live_version("On Stage", "ABBA") + assert is_live_version("Dead Man's Party (Live in Concert)", "") + + +def test_is_live_version_does_not_flag_verb_live(): + # Reported false positive: "What We Live For" by American Authors + assert not is_live_version("What We Live For", "What We Live For") + assert not is_live_version("Live Forever", "Definitely Maybe") + assert not is_live_version("Live and Let Die", "Band on the Run") + + +def test_is_live_version_does_not_flag_similar_words(): + assert not is_live_version("Living on a Prayer", "Slippery When Wet") + assert not is_live_version("Believe", "Believe") + assert not is_live_version("Symphony No. 5", "") + assert not is_live_version("Alive", "Ten") + + +def test_is_live_version_handles_empty_input(): + assert not is_live_version("", "") + assert not is_live_version("", "Some Album") + + +# ── live_commentary_cleaner._detect_content_type ──────────────────────────── + +def test_detect_content_type_flags_live_recordings(): + assert _detect_content_type("Dimension - Live at Big Day Out", "Wolfmother") == "live" + assert _detect_content_type("Thriller (Live)", "") == "live" + assert _detect_content_type("Song [Live Version]", "") == "live" + assert _detect_content_type("MTV Unplugged", "Unplugged in NY") == "live" + + +def test_detect_content_type_does_not_flag_verb_live(): + # Reported false positive: "What We Live For" by American Authors + assert _detect_content_type("What We Live For", "What We Live For") is None + assert _detect_content_type("Live Forever", "Definitely Maybe") is None + assert _detect_content_type("Living on a Prayer", "Slippery When Wet") is None + + +def test_detect_content_type_still_catches_other_categories(): + assert _detect_content_type("The Interview", "Press Kit") == "interview" + assert _detect_content_type("Director's Commentary", "Album") == "commentary" + assert _detect_content_type("Spoken Word Poem", "") == "spoken_word" + assert _detect_content_type("A Cappella Version", "") == "acappella" diff --git a/tests/test_watchlist_scanner_scan.py b/tests/test_watchlist_scanner_scan.py index 85d8d607..1a077363 100644 --- a/tests/test_watchlist_scanner_scan.py +++ b/tests/test_watchlist_scanner_scan.py @@ -344,25 +344,53 @@ def test_backfill_similar_artists_fallback_ids_uses_provider_priority(monkeypatc assert [call[0] for call in itunes_client.search_calls] == ["iTunes Artist"] -def test_scan_watchlist_profile_loads_artists_and_applies_overrides(monkeypatch): +def test_scan_watchlist_profile_loads_artists_and_forwards_override_flag(monkeypatch): + # Global-override application moved from scan_watchlist_profile into + # scan_watchlist_artists so every entry point (including the automation + # auto-scan) respects it. scan_watchlist_profile now just forwards the + # apply_global_overrides flag through. artist = _build_artist() scanner = _build_scanner({"tracks": {"items": []}}, [artist]) loaded_profiles = [] - override_calls = [] scan_calls = [] monkeypatch.setattr(scanner.database, "get_watchlist_artists", lambda profile_id=None: loaded_profiles.append(profile_id) or [artist]) - monkeypatch.setattr(scanner, "_apply_global_watchlist_overrides", lambda artists: override_calls.append(list(artists))) monkeypatch.setattr(scanner, "scan_watchlist_artists", lambda artists, **kwargs: scan_calls.append((list(artists), kwargs)) or ["ok"]) result = scanner.scan_watchlist_profile(42) assert result == ["ok"] assert loaded_profiles == [42] - assert override_calls and override_calls[0][0].artist_name == "Artist One" assert scan_calls and scan_calls[0][0][0].artist_name == "Artist One" assert scan_calls[0][1]["profile_id"] == 42 + assert scan_calls[0][1]["apply_global_overrides"] is True + + +def test_scan_watchlist_artists_applies_global_overrides(monkeypatch): + # Regression guard: auto-watchlist automation bypassed overrides by + # calling scan_watchlist_artists directly. Overrides now run inside + # scan_watchlist_artists so every caller gets the global filter. + scanner = _build_scanner({"tracks": {"items": []}}, []) + + override_calls = [] + monkeypatch.setattr(scanner, "_apply_global_watchlist_overrides", lambda artists: override_calls.append(list(artists))) + + # Empty artist list exits early but override must still have been invoked. + scanner.scan_watchlist_artists([]) + + assert override_calls == [[]] + + +def test_scan_watchlist_artists_skips_overrides_when_flag_false(monkeypatch): + scanner = _build_scanner({"tracks": {"items": []}}, []) + + override_calls = [] + monkeypatch.setattr(scanner, "_apply_global_watchlist_overrides", lambda artists: override_calls.append(list(artists))) + + scanner.scan_watchlist_artists([], apply_global_overrides=False) + + assert override_calls == [] def test_scan_watchlist_artists_scans_tracks_and_updates_state(monkeypatch): diff --git a/web_server.py b/web_server.py index 9042854f..503b8169 100644 --- a/web_server.py +++ b/web_server.py @@ -37,7 +37,7 @@ _log_dir = Path(_log_path).parent logger = setup_logging(_log_level, _log_path) # App version — single source of truth for backup metadata, version-info endpoint, etc. -_SOULSYNC_BASE_VERSION = "2.36" +_SOULSYNC_BASE_VERSION = "2.37" def _build_version_string(): """Append short commit hash to version when available (e.g. 2.35+abc1234).""" @@ -22526,6 +22526,16 @@ def get_version_info(): "title": "What's New in SoulSync", "subtitle": f"Version {SOULSYNC_VERSION} — Latest Changes", "sections": [ + { + "title": "Watchlist Content Filters Fixed", + "description": "Global Override settings and live-version detection now behave the way the UI implies", + "features": [ + "• Scheduled auto-watchlist now honors Watchlist → Global Override (was bypassing it and using per-artist defaults)", + "• 'Live' detection tightened — no more false positives on titles like 'What We Live For' or 'Live Forever'", + "• Same fix applies to the Library Maintenance Live/Commentary Cleaner", + "• Still catches (Live), - Live, Live at/from/in/on/version/session/recording, Unplugged, In Concert", + ], + }, { "title": "Discography Backfill", "description": "New maintenance job that fills gaps in your library — scans each artist's full discography and finds what you're missing", diff --git a/webui/static/helper.js b/webui/static/helper.js index 9f6ef9ad..34d186b4 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3599,6 +3599,12 @@ function closeHelperSearch() { // ═══════════════════════════════════════════════════════════════════════════ const WHATS_NEW = { + '2.37': [ + // --- April 21, 2026 (evening) --- + { date: 'April 21, 2026 (evening)' }, + { title: 'Fix Auto-Watchlist Ignoring Global Override Settings', desc: 'The scheduled auto-watchlist scan (not the manual one) called scan_watchlist_artists directly, which bypassed Global Override application. So if you disabled Albums or Live under Watchlist → Global Override, full albums and live tracks still got added to the wishlist during the nightly scan. Override logic now runs inside scan_watchlist_artists so every entry point respects it', page: 'watchlist' }, + { title: 'Fix Live Version Filter False Positives', desc: 'The \\blive\\b regex was too loose — it flagged any title with the word "live" regardless of context, so "What We Live For" by American Authors, "Live Forever" by Oasis, and similar verb uses got treated as live recordings. Tightened to require clear live-recording context: "(Live)", "- Live", "Live at/from/in/on/version/session/etc". Fixes both the watchlist/backfill track filter and the Library Maintenance Live/Commentary Cleaner', page: 'library' }, + ], '2.36': [ // --- April 21, 2026 --- { date: 'April 21, 2026' }, @@ -3780,12 +3786,12 @@ const WHATS_NEW = { function _getCurrentVersion() { const btn = document.querySelector('.version-button'); - return btn ? btn.textContent.trim().replace('v', '') : '2.36'; + return btn ? btn.textContent.trim().replace('v', '') : '2.37'; } function _getLatestWhatsNewVersion() { const versions = Object.keys(WHATS_NEW).sort((a, b) => parseFloat(b) - parseFloat(a)); - return versions[0] || '2.36'; + return versions[0] || '2.37'; } function openWhatsNew() {