mirror of https://github.com/Nezreka/SoulSync.git
ListenBrainz publishes "Weekly Jams for X" / "Weekly Exploration
for X" with a fresh MBID every week, and "Top Discoveries of YYYY
for X" / "Top Missed Recordings of YYYY for X" with a fresh MBID
every year. Auto-mirroring those per-period yielded one mirrored-
playlist row per week/year — useless for Auto-Sync schedules
because the underlying LB playlist never updates, only a brand new
playlist replaces it. The user accumulates 100+ dead Weekly Jams
rows per year if they discover regularly.
This commit collapses each family into a single ROLLING mirror
keyed by a synthetic ``source_playlist_id`` (e.g.
``lb_weekly_jams_Nezreka``). Each new period UPSERTs into the same
row, so the user gets one stable Auto-Sync schedule per series
that automatically picks up the latest period's tracks on every
refresh. Non-series LB playlists (user-created, collaborative,
Last.fm radios for a specific seed) continue to mirror under
their per-playlist MBID as before. Per-period LB playlists are
still visible + usable on the LB Sync tab — only the mirror layer
collapses.
- ``core/playlists/lb_series.py`` (new) — series-detect helper
with regex patterns + canonical-name + LIKE-pattern template
for each known LB family. Exposes
``detect_series(title)``, ``is_series_synthetic_id(id)``, and
``list_series_synthetic_ids()`` so both the JS auto-mirror hook
and the LB adapter can speak the same language.
- ``GET /api/listenbrainz/series-detect?title=...`` — thin HTTP
shim around ``detect_series`` so the auto-mirror JS doesn't
duplicate the regex.
- ``ListenBrainzPlaylistSource.get_playlist`` now recognizes
synthetic series ids — it queries the LB cache for the newest
cache row whose title matches the series' LIKE pattern and
resolves to that row's MBID before fetching tracks. The mirror's
meta keeps the synthetic id so refreshes always re-resolve to
the latest period.
- ``_mirrorListenBrainzAfterDiscovery`` (sync-services.js) calls
the new detect endpoint when discovery completes — if a match
comes back it swaps the per-period MBID for the synthetic id +
the canonical name. Existing Last.fm radio routing logic stays
intact (Last.fm radios aren't a series).
- ``ListenBrainzManager._cleanup_per_period_series_mirrors`` —
one-shot consolidation sweeper runs in ``_cleanup_old_playlists``
+ deletes any legacy per-period mirror rows so the consolidated
rolling mirror is the only one left. Idempotent — only matches
per-period titles ("Weekly Jams for ..., week of ...") and never
the canonical rolling-mirror titles ("ListenBrainz Weekly
Jams").
- 11 new tests pin the detector + synthetic-id helpers; 236 total
across adapter + automation + lb-series suites green.
pull/709/head
parent
e8ee8576a0
commit
cf5da04439
@ -0,0 +1,125 @@
|
||||
"""ListenBrainz series detection for rolling mirrored playlists.
|
||||
|
||||
ListenBrainz publishes a few playlist families that get a brand new
|
||||
MBID every period (week or year) — e.g. "Weekly Jams for Nezreka,
|
||||
week of 2026-05-25 Mon" gets a fresh row each Monday, the previous
|
||||
Monday's row rotates out of the cache after ~25 weeks. Auto-syncing
|
||||
the per-period MBID is useless because the underlying ListenBrainz
|
||||
playlist never updates — only the new period gets new tracks.
|
||||
|
||||
This module lets the auto-mirror code collapse those families into
|
||||
a single rolling mirror per series. The mirror's
|
||||
``source_playlist_id`` is a synthetic identifier (e.g.
|
||||
``lb_weekly_jams_Nezreka``) instead of the per-period MBID, and the
|
||||
refresh path resolves the synthetic id back to the latest period's
|
||||
cached playlist at refresh time.
|
||||
|
||||
One-off playlists (user-created, collaborative, Last.fm radios) are
|
||||
NOT collapsed — they have stable identifiers in their own right.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SeriesMatch:
|
||||
"""A playlist whose title matches one of the rotating series."""
|
||||
|
||||
series_id: str # synthetic id, e.g. "lb_weekly_jams_Nezreka"
|
||||
canonical_name: str # display name for the rolling mirror
|
||||
source_for_mirror: str # "listenbrainz" or "lastfm"
|
||||
title_pattern: str # SQL LIKE pattern for finding members
|
||||
# (e.g. "Weekly Jams for Nezreka, week of %")
|
||||
|
||||
|
||||
# Each series is identified by a regex + a template for the
|
||||
# canonical mirror name + the source field the resulting mirror
|
||||
# should sit under. ``user`` is the ListenBrainz username.
|
||||
_SERIES_PATTERNS = [
|
||||
{
|
||||
"regex": re.compile(r"^Weekly Jams for (?P<user>.+?), week of "),
|
||||
"series_format": "lb_weekly_jams_{user}",
|
||||
"canonical_name": "ListenBrainz Weekly Jams",
|
||||
"source": "listenbrainz",
|
||||
"like_format": "Weekly Jams for {user}, week of %",
|
||||
},
|
||||
{
|
||||
"regex": re.compile(r"^Weekly Exploration for (?P<user>.+?), week of "),
|
||||
"series_format": "lb_weekly_exploration_{user}",
|
||||
"canonical_name": "ListenBrainz Weekly Exploration",
|
||||
"source": "listenbrainz",
|
||||
"like_format": "Weekly Exploration for {user}, week of %",
|
||||
},
|
||||
{
|
||||
"regex": re.compile(r"^Top Discoveries of (?P<year>\d{4}) for (?P<user>.+)$"),
|
||||
"series_format": "lb_top_discoveries_{user}",
|
||||
"canonical_name": "ListenBrainz Top Discoveries (latest year)",
|
||||
"source": "listenbrainz",
|
||||
# ``$`` end-anchor on the year means trailing whitespace would
|
||||
# break the LIKE — but ListenBrainz titles don't have trailing
|
||||
# whitespace; the % covers the year position.
|
||||
"like_format": "Top Discoveries of % for {user}",
|
||||
},
|
||||
{
|
||||
"regex": re.compile(r"^Top Missed Recordings of (?P<year>\d{4}) for (?P<user>.+)$"),
|
||||
"series_format": "lb_top_missed_{user}",
|
||||
"canonical_name": "ListenBrainz Top Missed Recordings (latest year)",
|
||||
"source": "listenbrainz",
|
||||
"like_format": "Top Missed Recordings of % for {user}",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def detect_series(title: str) -> Optional[SeriesMatch]:
|
||||
"""Return a ``SeriesMatch`` if ``title`` belongs to a known series,
|
||||
else ``None``.
|
||||
|
||||
``title`` is the raw playlist title as stored on the LB cache row
|
||||
(e.g. ``"Weekly Jams for Nezreka, week of 2026-05-25 Mon"``).
|
||||
"""
|
||||
if not title:
|
||||
return None
|
||||
for spec in _SERIES_PATTERNS:
|
||||
m = spec["regex"].match(title)
|
||||
if not m:
|
||||
continue
|
||||
groups = m.groupdict()
|
||||
# The pattern only ever captures ``user`` (and optionally
|
||||
# ``year``); ``series_format`` / ``like_format`` reference
|
||||
# ``user`` so both interpolate cleanly with .format(**groups).
|
||||
return SeriesMatch(
|
||||
series_id=spec["series_format"].format(**groups),
|
||||
canonical_name=spec["canonical_name"],
|
||||
source_for_mirror=spec["source"],
|
||||
title_pattern=spec["like_format"].format(**groups),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def list_series_synthetic_ids() -> List[str]:
|
||||
"""Return all known series-id PREFIXES (e.g. ``lb_weekly_jams_``).
|
||||
|
||||
Used by callers (e.g. the LB adapter's refresh path) to tell
|
||||
whether a ``source_playlist_id`` is a synthetic series id and
|
||||
needs special resolution."""
|
||||
return [
|
||||
spec["series_format"].format(user="").rstrip("_") + "_"
|
||||
for spec in _SERIES_PATTERNS
|
||||
]
|
||||
|
||||
|
||||
def is_series_synthetic_id(source_playlist_id: str) -> bool:
|
||||
"""Cheap check: is the value one of our synthetic series ids?
|
||||
|
||||
All series ids start with ``lb_`` and contain a recognizable
|
||||
series tag. MusicBrainz MBIDs are 8-4-4-4-12 hex with dashes; no
|
||||
overlap risk."""
|
||||
if not source_playlist_id or not source_playlist_id.startswith("lb_"):
|
||||
return False
|
||||
return any(
|
||||
source_playlist_id.startswith(pref) for pref in list_series_synthetic_ids()
|
||||
)
|
||||
@ -0,0 +1,89 @@
|
||||
"""Tests for the LB rotating-series detector that powers the
|
||||
rolling-mirror collapse on the Sync page.
|
||||
|
||||
Pins the title patterns + canonical-name templates so accidental
|
||||
regex tweaks don't silently break the auto-mirror grouping the
|
||||
Auto-Sync manager + Mirrored tab rely on.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from core.playlists.lb_series import (
|
||||
detect_series,
|
||||
is_series_synthetic_id,
|
||||
list_series_synthetic_ids,
|
||||
)
|
||||
|
||||
|
||||
class TestDetectSeries:
|
||||
def test_weekly_jams_collapses_into_rolling_series(self):
|
||||
m = detect_series("Weekly Jams for Nezreka, week of 2026-05-25 Mon")
|
||||
assert m is not None
|
||||
assert m.series_id == "lb_weekly_jams_Nezreka"
|
||||
assert m.canonical_name == "ListenBrainz Weekly Jams"
|
||||
assert m.source_for_mirror == "listenbrainz"
|
||||
assert m.title_pattern == "Weekly Jams for Nezreka, week of %"
|
||||
|
||||
def test_weekly_exploration_collapses_into_rolling_series(self):
|
||||
m = detect_series("Weekly Exploration for Nezreka, week of 2026-04-13 Mon")
|
||||
assert m is not None
|
||||
assert m.series_id == "lb_weekly_exploration_Nezreka"
|
||||
assert m.canonical_name == "ListenBrainz Weekly Exploration"
|
||||
assert m.title_pattern == "Weekly Exploration for Nezreka, week of %"
|
||||
|
||||
def test_top_discoveries_collapses_per_user(self):
|
||||
m = detect_series("Top Discoveries of 2024 for Nezreka")
|
||||
assert m is not None
|
||||
assert m.series_id == "lb_top_discoveries_Nezreka"
|
||||
assert m.canonical_name == "ListenBrainz Top Discoveries (latest year)"
|
||||
assert m.title_pattern == "Top Discoveries of % for Nezreka"
|
||||
|
||||
def test_top_missed_collapses_per_user(self):
|
||||
m = detect_series("Top Missed Recordings of 2025 for Nezreka")
|
||||
assert m is not None
|
||||
assert m.series_id == "lb_top_missed_Nezreka"
|
||||
assert m.canonical_name == "ListenBrainz Top Missed Recordings (latest year)"
|
||||
|
||||
def test_user_with_spaces_in_name(self):
|
||||
# ListenBrainz allows usernames with spaces; the regex should
|
||||
# still match and the series id propagates the literal user
|
||||
# token. Whether SQLite LIKE works on that is the caller's
|
||||
# problem — we just preserve the captured value.
|
||||
m = detect_series("Weekly Jams for Some User, week of 2026-01-05 Mon")
|
||||
assert m is not None
|
||||
assert m.series_id == "lb_weekly_jams_Some User"
|
||||
|
||||
def test_lastfm_radio_is_not_a_series(self):
|
||||
# Last.fm radios get their own per-seed MBID — they should NOT
|
||||
# be collapsed into a rolling series.
|
||||
assert detect_series("Last.fm Radio: Selfish by Madison Beer") is None
|
||||
|
||||
def test_user_created_playlist_is_not_a_series(self):
|
||||
assert detect_series("My Custom Playlist") is None
|
||||
|
||||
def test_empty_title_returns_none(self):
|
||||
assert detect_series("") is None
|
||||
assert detect_series(None) is None # type: ignore[arg-type]
|
||||
|
||||
|
||||
class TestSyntheticIdHelpers:
|
||||
def test_known_prefixes_listed(self):
|
||||
prefixes = list_series_synthetic_ids()
|
||||
assert "lb_weekly_jams_" in prefixes
|
||||
assert "lb_weekly_exploration_" in prefixes
|
||||
assert "lb_top_discoveries_" in prefixes
|
||||
assert "lb_top_missed_" in prefixes
|
||||
|
||||
def test_is_series_synthetic_id_matches_known(self):
|
||||
assert is_series_synthetic_id("lb_weekly_jams_Nezreka") is True
|
||||
assert is_series_synthetic_id("lb_weekly_exploration_OtherUser") is True
|
||||
assert is_series_synthetic_id("lb_top_discoveries_X") is True
|
||||
|
||||
def test_is_series_synthetic_id_rejects_mbids(self):
|
||||
# Real LB playlist MBIDs are UUID-shaped, never start with ``lb_``.
|
||||
assert is_series_synthetic_id("4badb5c9-266e-42ef-9d06-879ee311c9e0") is False
|
||||
assert is_series_synthetic_id("") is False
|
||||
assert is_series_synthetic_id("lb_") is False # not a real series
|
||||
assert is_series_synthetic_id("lb_random_thing") is False
|
||||
Loading…
Reference in new issue