diff --git a/core/listenbrainz_manager.py b/core/listenbrainz_manager.py index 09855211..dda2536b 100644 --- a/core/listenbrainz_manager.py +++ b/core/listenbrainz_manager.py @@ -326,6 +326,58 @@ class ListenBrainzManager: covers_found = sum(1 for t in track_data_list if t.get('album_cover_url')) logger.info(f"Fetched {covers_found}/{len(track_data_list)} cover art URLs") + def _cleanup_per_period_series_mirrors(self, cursor): + """Delete mirrored_playlists rows that belong to a rotating LB + series but were created under the per-period MBID instead of + the new synthetic series id. + + Background: pre-Phase-1c.2.1 the auto-mirror hook keyed mirrors + by the per-week (or per-year) MBID, so users accumulated one + mirror per period. The new flow collapses them into a single + rolling mirror per series. This sweeper removes the legacy + per-period rows so the Mirrored / Auto-Sync UIs only show the + consolidated rolling mirror. Idempotent — only matches titles + that were once per-period.""" + # Each pattern's WHERE clause matches per-period titles + # ("Weekly Jams for X, week of YYYY-MM-DD ...") but NOT the + # canonical rolling-mirror titles ("ListenBrainz Weekly Jams"). + per_period_title_patterns = [ + ('listenbrainz', 'Weekly Jams for %, week of %'), + ('listenbrainz', 'Weekly Exploration for %, week of %'), + ('listenbrainz', 'Top Discoveries of % for %'), + ('listenbrainz', 'Top Missed Recordings of % for %'), + ] + try: + total = 0 + for source, like in per_period_title_patterns: + cursor.execute( + """ + SELECT id FROM mirrored_playlists + WHERE source = ? AND name LIKE ? + """, + (source, like), + ) + mirror_ids = [row[0] for row in cursor.fetchall()] + if not mirror_ids: + continue + ph = ','.join('?' * len(mirror_ids)) + cursor.execute( + f"DELETE FROM mirrored_playlist_tracks WHERE playlist_id IN ({ph})", + mirror_ids, + ) + cursor.execute( + f"DELETE FROM mirrored_playlists WHERE id IN ({ph})", + mirror_ids, + ) + total += len(mirror_ids) + if total: + logger.info( + f"Removed {total} legacy per-period LB series mirrors " + "(consolidated into rolling series mirrors)" + ) + except Exception as exc: + logger.debug(f"Per-period series mirror cleanup skipped: {exc}") + def _retag_misrouted_lastfm_radio_mirrors(self, cursor): """Re-tag mirrored_playlists rows that should be 'lastfm' but were inserted as 'listenbrainz'. @@ -362,6 +414,9 @@ class ListenBrainzManager: # One-shot backfill for legacy misrouting (see method docstring). self._retag_misrouted_lastfm_radio_mirrors(cursor) + # Consolidate legacy per-week / per-year LB series mirrors into + # the new rolling series mirrors (Phase 1c.2.1). + self._cleanup_per_period_series_mirrors(cursor) # For each playlist type, keep only the N most recent # lastfm_radio keeps fewer since they're auto-regenerated weekly diff --git a/core/playlists/lb_series.py b/core/playlists/lb_series.py new file mode 100644 index 00000000..611cf16b --- /dev/null +++ b/core/playlists/lb_series.py @@ -0,0 +1,125 @@ +"""ListenBrainz series detection for rolling mirrored playlists. + +ListenBrainz publishes a few playlist families that get a brand new +MBID every period (week or year) — e.g. "Weekly Jams for Nezreka, +week of 2026-05-25 Mon" gets a fresh row each Monday, the previous +Monday's row rotates out of the cache after ~25 weeks. Auto-syncing +the per-period MBID is useless because the underlying ListenBrainz +playlist never updates — only the new period gets new tracks. + +This module lets the auto-mirror code collapse those families into +a single rolling mirror per series. The mirror's +``source_playlist_id`` is a synthetic identifier (e.g. +``lb_weekly_jams_Nezreka``) instead of the per-period MBID, and the +refresh path resolves the synthetic id back to the latest period's +cached playlist at refresh time. + +One-off playlists (user-created, collaborative, Last.fm radios) are +NOT collapsed — they have stable identifiers in their own right. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import List, Optional + + +@dataclass(frozen=True) +class SeriesMatch: + """A playlist whose title matches one of the rotating series.""" + + series_id: str # synthetic id, e.g. "lb_weekly_jams_Nezreka" + canonical_name: str # display name for the rolling mirror + source_for_mirror: str # "listenbrainz" or "lastfm" + title_pattern: str # SQL LIKE pattern for finding members + # (e.g. "Weekly Jams for Nezreka, week of %") + + +# Each series is identified by a regex + a template for the +# canonical mirror name + the source field the resulting mirror +# should sit under. ``user`` is the ListenBrainz username. +_SERIES_PATTERNS = [ + { + "regex": re.compile(r"^Weekly Jams for (?P.+?), week of "), + "series_format": "lb_weekly_jams_{user}", + "canonical_name": "ListenBrainz Weekly Jams", + "source": "listenbrainz", + "like_format": "Weekly Jams for {user}, week of %", + }, + { + "regex": re.compile(r"^Weekly Exploration for (?P.+?), week of "), + "series_format": "lb_weekly_exploration_{user}", + "canonical_name": "ListenBrainz Weekly Exploration", + "source": "listenbrainz", + "like_format": "Weekly Exploration for {user}, week of %", + }, + { + "regex": re.compile(r"^Top Discoveries of (?P\d{4}) for (?P.+)$"), + "series_format": "lb_top_discoveries_{user}", + "canonical_name": "ListenBrainz Top Discoveries (latest year)", + "source": "listenbrainz", + # ``$`` end-anchor on the year means trailing whitespace would + # break the LIKE — but ListenBrainz titles don't have trailing + # whitespace; the % covers the year position. + "like_format": "Top Discoveries of % for {user}", + }, + { + "regex": re.compile(r"^Top Missed Recordings of (?P\d{4}) for (?P.+)$"), + "series_format": "lb_top_missed_{user}", + "canonical_name": "ListenBrainz Top Missed Recordings (latest year)", + "source": "listenbrainz", + "like_format": "Top Missed Recordings of % for {user}", + }, +] + + +def detect_series(title: str) -> Optional[SeriesMatch]: + """Return a ``SeriesMatch`` if ``title`` belongs to a known series, + else ``None``. + + ``title`` is the raw playlist title as stored on the LB cache row + (e.g. ``"Weekly Jams for Nezreka, week of 2026-05-25 Mon"``). + """ + if not title: + return None + for spec in _SERIES_PATTERNS: + m = spec["regex"].match(title) + if not m: + continue + groups = m.groupdict() + # The pattern only ever captures ``user`` (and optionally + # ``year``); ``series_format`` / ``like_format`` reference + # ``user`` so both interpolate cleanly with .format(**groups). + return SeriesMatch( + series_id=spec["series_format"].format(**groups), + canonical_name=spec["canonical_name"], + source_for_mirror=spec["source"], + title_pattern=spec["like_format"].format(**groups), + ) + return None + + +def list_series_synthetic_ids() -> List[str]: + """Return all known series-id PREFIXES (e.g. ``lb_weekly_jams_``). + + Used by callers (e.g. the LB adapter's refresh path) to tell + whether a ``source_playlist_id`` is a synthetic series id and + needs special resolution.""" + return [ + spec["series_format"].format(user="").rstrip("_") + "_" + for spec in _SERIES_PATTERNS + ] + + +def is_series_synthetic_id(source_playlist_id: str) -> bool: + """Cheap check: is the value one of our synthetic series ids? + + All series ids start with ``lb_`` and contain a recognizable + series tag. MusicBrainz MBIDs are 8-4-4-4-12 hex with dashes; no + overlap risk.""" + if not source_playlist_id or not source_playlist_id.startswith("lb_"): + return False + return any( + source_playlist_id.startswith(pref) for pref in list_series_synthetic_ids() + ) diff --git a/core/playlists/sources/listenbrainz.py b/core/playlists/sources/listenbrainz.py index d6c96107..0c5f72ff 100644 --- a/core/playlists/sources/listenbrainz.py +++ b/core/playlists/sources/listenbrainz.py @@ -84,13 +84,38 @@ class ListenBrainzPlaylistSource(PlaylistSource): return out def get_playlist(self, playlist_id: str) -> Optional[PlaylistDetail]: - """``playlist_id`` is the ListenBrainz playlist MBID.""" + """``playlist_id`` is the ListenBrainz playlist MBID, OR a + synthetic series id (e.g. ``lb_weekly_jams_``) that + resolves to the newest member of a rotating series.""" manager = self._manager() if manager is None: return None + + # Rolling-series resolution: synthetic ids look up the + # latest matching cache row and continue with that MBID. + from core.playlists.lb_series import is_series_synthetic_id + if is_series_synthetic_id(playlist_id): + resolved_mbid = self._resolve_series_to_latest_mbid(manager, playlist_id) + if not resolved_mbid: + return None + return self._fetch_playlist_by_mbid(manager, resolved_mbid, override_meta_id=playlist_id) + + return self._fetch_playlist_by_mbid(manager, playlist_id) + + def _fetch_playlist_by_mbid( + self, + manager: Any, + playlist_mbid: str, + override_meta_id: Optional[str] = None, + ) -> Optional[PlaylistDetail]: + """Resolve a real LB playlist MBID into a PlaylistDetail. + + ``override_meta_id`` lets the rolling-series path keep the + synthetic id on the meta object so the caller can write the + mirror row back under that id.""" ptype = "" try: - ptype = manager.get_playlist_type(playlist_id) or "" + ptype = manager.get_playlist_type(playlist_mbid) or "" except Exception: ptype = "" @@ -100,12 +125,12 @@ class ListenBrainzPlaylistSource(PlaylistSource): except Exception: cached_rows = [] meta_row = next( - (r for r in cached_rows if str(r.get("playlist_mbid")) == str(playlist_id)), + (r for r in cached_rows if str(r.get("playlist_mbid")) == str(playlist_mbid)), None, ) try: - tracks_raw = manager.get_cached_tracks(playlist_id) or [] + tracks_raw = manager.get_cached_tracks(playlist_mbid) or [] except Exception: tracks_raw = [] @@ -113,13 +138,64 @@ class ListenBrainzPlaylistSource(PlaylistSource): return None meta = self._meta_from_cache_row( - meta_row or {"playlist_mbid": playlist_id, "track_count": len(tracks_raw)}, + meta_row or {"playlist_mbid": playlist_mbid, "track_count": len(tracks_raw)}, ptype or "listenbrainz", ) + if override_meta_id: + meta.source_playlist_id = override_meta_id meta.track_count = len(tracks_raw) tracks = [self._track_from_cache_row(t, idx) for idx, t in enumerate(tracks_raw)] return PlaylistDetail(meta=meta, tracks=tracks) + def _resolve_series_to_latest_mbid(self, manager: Any, series_id: str) -> Optional[str]: + """Find the newest LB cache row matching a series synthetic id. + + Series synthetic ids encode both the series type and the + ListenBrainz username. We query the LB cache (via the + manager's DB connection) for the row whose title matches the + series' LIKE pattern and has the most recent ``last_updated``, + then return that row's MBID for normal fetching downstream.""" + try: + # The synthetic id alone doesn't carry the title pattern, + # so we re-derive it from any per-period sibling that's + # already in the cache. Iterate the known series specs and + # ask which one this synthetic id belongs to. + from core.playlists.lb_series import _SERIES_PATTERNS + spec = None + user_token = "" + for entry in _SERIES_PATTERNS: + series_prefix = entry["series_format"].format(user="").rstrip("_") + "_" + if series_id.startswith(series_prefix): + spec = entry + user_token = series_id[len(series_prefix):] + break + if spec is None or not user_token: + return None + like_pattern = spec["like_format"].format(user=user_token) + + # Query the LB cache for the newest matching row. The + # manager's connection helper returns a plain sqlite3 + # connection — explicit try/finally for close parity with + # the manager's own usage pattern. + conn = manager._get_db_connection() + try: + cur = conn.cursor() + cur.execute( + """ + SELECT playlist_mbid FROM listenbrainz_playlists + WHERE profile_id = ? AND title LIKE ? + ORDER BY last_updated DESC + LIMIT 1 + """, + (manager.profile_id, like_pattern), + ) + row = cur.fetchone() + finally: + conn.close() + return row[0] if row else None + except Exception: + return None + def discover_tracks(self, tracks: List[NormalizedTrack]) -> List[NormalizedTrack]: """Run each MB-metadata track through the matching engine. diff --git a/tests/test_lb_series_detect.py b/tests/test_lb_series_detect.py new file mode 100644 index 00000000..60498260 --- /dev/null +++ b/tests/test_lb_series_detect.py @@ -0,0 +1,89 @@ +"""Tests for the LB rotating-series detector that powers the +rolling-mirror collapse on the Sync page. + +Pins the title patterns + canonical-name templates so accidental +regex tweaks don't silently break the auto-mirror grouping the +Auto-Sync manager + Mirrored tab rely on. +""" + +from __future__ import annotations + +import pytest + +from core.playlists.lb_series import ( + detect_series, + is_series_synthetic_id, + list_series_synthetic_ids, +) + + +class TestDetectSeries: + def test_weekly_jams_collapses_into_rolling_series(self): + m = detect_series("Weekly Jams for Nezreka, week of 2026-05-25 Mon") + assert m is not None + assert m.series_id == "lb_weekly_jams_Nezreka" + assert m.canonical_name == "ListenBrainz Weekly Jams" + assert m.source_for_mirror == "listenbrainz" + assert m.title_pattern == "Weekly Jams for Nezreka, week of %" + + def test_weekly_exploration_collapses_into_rolling_series(self): + m = detect_series("Weekly Exploration for Nezreka, week of 2026-04-13 Mon") + assert m is not None + assert m.series_id == "lb_weekly_exploration_Nezreka" + assert m.canonical_name == "ListenBrainz Weekly Exploration" + assert m.title_pattern == "Weekly Exploration for Nezreka, week of %" + + def test_top_discoveries_collapses_per_user(self): + m = detect_series("Top Discoveries of 2024 for Nezreka") + assert m is not None + assert m.series_id == "lb_top_discoveries_Nezreka" + assert m.canonical_name == "ListenBrainz Top Discoveries (latest year)" + assert m.title_pattern == "Top Discoveries of % for Nezreka" + + def test_top_missed_collapses_per_user(self): + m = detect_series("Top Missed Recordings of 2025 for Nezreka") + assert m is not None + assert m.series_id == "lb_top_missed_Nezreka" + assert m.canonical_name == "ListenBrainz Top Missed Recordings (latest year)" + + def test_user_with_spaces_in_name(self): + # ListenBrainz allows usernames with spaces; the regex should + # still match and the series id propagates the literal user + # token. Whether SQLite LIKE works on that is the caller's + # problem — we just preserve the captured value. + m = detect_series("Weekly Jams for Some User, week of 2026-01-05 Mon") + assert m is not None + assert m.series_id == "lb_weekly_jams_Some User" + + def test_lastfm_radio_is_not_a_series(self): + # Last.fm radios get their own per-seed MBID — they should NOT + # be collapsed into a rolling series. + assert detect_series("Last.fm Radio: Selfish by Madison Beer") is None + + def test_user_created_playlist_is_not_a_series(self): + assert detect_series("My Custom Playlist") is None + + def test_empty_title_returns_none(self): + assert detect_series("") is None + assert detect_series(None) is None # type: ignore[arg-type] + + +class TestSyntheticIdHelpers: + def test_known_prefixes_listed(self): + prefixes = list_series_synthetic_ids() + assert "lb_weekly_jams_" in prefixes + assert "lb_weekly_exploration_" in prefixes + assert "lb_top_discoveries_" in prefixes + assert "lb_top_missed_" in prefixes + + def test_is_series_synthetic_id_matches_known(self): + assert is_series_synthetic_id("lb_weekly_jams_Nezreka") is True + assert is_series_synthetic_id("lb_weekly_exploration_OtherUser") is True + assert is_series_synthetic_id("lb_top_discoveries_X") is True + + def test_is_series_synthetic_id_rejects_mbids(self): + # Real LB playlist MBIDs are UUID-shaped, never start with ``lb_``. + assert is_series_synthetic_id("4badb5c9-266e-42ef-9d06-879ee311c9e0") is False + assert is_series_synthetic_id("") is False + assert is_series_synthetic_id("lb_") is False # not a real series + assert is_series_synthetic_id("lb_random_thing") is False diff --git a/web_server.py b/web_server.py index 9a84b014..6175cc57 100644 --- a/web_server.py +++ b/web_server.py @@ -30272,6 +30272,40 @@ def get_listenbrainz_lastfm_radio(): # LISTENBRAINZ PLAYLIST MANAGEMENT (Discovery System) # ======================================== +@app.route('/api/listenbrainz/series-detect', methods=['GET']) +def get_listenbrainz_series_detect(): + """Detect whether a LB playlist title belongs to a rotating series. + + Auto-mirror uses this to decide whether the resulting mirror + row should point at a per-playlist MBID (one-off LB playlist) + or a synthetic series id (e.g. ``lb_weekly_jams_``) that + rolls forward as ListenBrainz publishes new periods. + + Query: ``?title=`` + Response on a match: + ``{matched: true, series_id, canonical_name, + source: 'listenbrainz'|'lastfm'}`` + Response on no match: + ``{matched: false}`` + """ + try: + from core.playlists.lb_series import detect_series + + title = (request.args.get('title') or '').strip() + match = detect_series(title) + if match is None: + return jsonify({"matched": False}) + return jsonify({ + "matched": True, + "series_id": match.series_id, + "canonical_name": match.canonical_name, + "source": match.source_for_mirror, + }) + except Exception as e: + logger.error(f"Error detecting LB series: {e}") + return jsonify({"matched": False, "error": str(e)}), 500 + + def _lb_state_key(playlist_mbid, profile_id=None): """Build profile-scoped key for listenbrainz_playlist_states""" if profile_id is None: diff --git a/webui/static/sync-services.js b/webui/static/sync-services.js index a133ef04..c317aba1 100644 --- a/webui/static/sync-services.js +++ b/webui/static/sync-services.js @@ -10813,8 +10813,18 @@ async function resetBeatportChart(urlHash) { * LB tracks only have provider IDs after discovery, so we mirror at * the end. Idempotent (UPSERT on source + source_playlist_id + * profile_id), so calling it twice is a no-op. + * + * Rotating-series collapse: if the playlist title belongs to a + * known LB series (Weekly Jams, Weekly Exploration, Top Discoveries, + * Top Missed Recordings), the mirror is created under a synthetic + * ``source_playlist_id`` like ``lb_weekly_jams_`` with a + * canonical name. The next week / year UPSERTs into the same row, + * so users get one rolling mirror per series instead of accumulating + * one per period. Non-series LB playlists (user-created, + * collaborative, Last.fm radios for a specific seed) continue to + * mirror under their per-playlist MBID. */ -function _mirrorListenBrainzAfterDiscovery(playlistMbid) { +async function _mirrorListenBrainzAfterDiscovery(playlistMbid) { try { const state = listenbrainzPlaylistStates[playlistMbid]; if (!state || !state.playlist) return; @@ -10861,13 +10871,40 @@ function _mirrorListenBrainzAfterDiscovery(playlistMbid) { // Route them to ``source='lastfm'`` so the Auto-Sync manager // groups them under the Last.fm Radio section + the cascade- // delete hook targets the right mirror source. - const title = state.playlist.name || 'ListenBrainz Playlist'; - const mirrorSource = title.startsWith('Last.fm Radio:') ? 'lastfm' : 'listenbrainz'; + const rawTitle = state.playlist.name || 'ListenBrainz Playlist'; + let mirrorSource = rawTitle.startsWith('Last.fm Radio:') ? 'lastfm' : 'listenbrainz'; + let mirrorSourcePlaylistId = playlistMbid; + let mirrorName = rawTitle; + + // Rolling-series detection — backend tells us whether the title + // belongs to a known rotating LB series. If so, collapse this + // mirror onto a synthetic id + canonical name so per-week / + // per-year duplicates roll up into one row. + try { + const seriesResp = await fetch( + `/api/listenbrainz/series-detect?title=${encodeURIComponent(rawTitle)}` + ); + if (seriesResp.ok) { + const seriesData = await seriesResp.json(); + if (seriesData && seriesData.matched) { + mirrorSource = seriesData.source || mirrorSource; + mirrorSourcePlaylistId = seriesData.series_id || mirrorSourcePlaylistId; + mirrorName = seriesData.canonical_name || mirrorName; + console.log( + `🔁 [LB Series] '${rawTitle}' rolled into '${mirrorName}' ` + + `(series id: ${mirrorSourcePlaylistId})` + ); + } + } + } catch (_) { + // Non-fatal — fall through to per-playlist mirror id. + } + const ownerFallback = mirrorSource === 'lastfm' ? 'Last.fm' : 'ListenBrainz'; mirrorPlaylist( mirrorSource, - playlistMbid, - title, + mirrorSourcePlaylistId, + mirrorName, tracks, { owner: state.playlist.creator || ownerFallback, @@ -10875,7 +10912,7 @@ function _mirrorListenBrainzAfterDiscovery(playlistMbid) { image_url: state.playlist.image_url || '', } ); - console.log(`🪞 [${mirrorSource} Mirror] Mirrored '${title}' with ${tracks.length} matched tracks`); + console.log(`🪞 [${mirrorSource} Mirror] Mirrored '${mirrorName}' with ${tracks.length} matched tracks`); } catch (err) { console.warn('LB mirror-after-discovery failed:', err); }