mirror of https://github.com/Nezreka/SoulSync.git
Dashboard 'enrichment bubbles' could pause/hover but offered no way to
*manage* a worker. This adds a full management modal opened from a new
header button, covering all 11 enrichment sources.
Backend (testable core helper + seam tests; no live-DB dependency):
- core/enrichment/unmatched.py: pure, whitelisted SQL builders for the
unmatched browser. service/entity validated against a support map (never
interpolated raw); search + pagination bound as params; tracks join albums
for artwork; limit capped at 200.
- database/music_database.py: get_enrichment_unmatched() +
get_enrichment_breakdown() (the breakdown splits matched/not_found/pending,
which the existing get_stats().progress lumps together).
- core/enrichment/api.py: GET /api/enrichment/<id>/{unmatched,breakdown} on
the existing blueprint + a db_getter hook.
- web_server.py: wire db_getter=get_database.
- tests/enrichment/test_unmatched.py: 19 tests across builders, DB methods,
and Flask routes.
Frontend (vanilla, matches app conventions):
- webui/static/enrichment-manager.js: worker rail with live status + coverage
micro-bars, accent-themed detail panel (hero header, segmented matched/
not_found/pending stat cards, current item, pause/resume), and a searchable
paginated unmatched browser with inline manual match (reusing
search-service + manual-match) and retry (clear-match re-queues).
- Polish: entrance/exit motion, scroll-lock, Escape, refresh control,
flicker-free polling (in-place updates), skeleton loaders, relative
timestamps, per-worker accent theming, real dashboard logos reused at
runtime (with the same invert/circle treatment), responsive rail.
- index.html: header button + script include. style.css: full styling.
Reuses existing pause/resume, status, and manual search+assign endpoints.
Backend tests green (19 new + 11 existing enrichment tests).
pull/778/head
parent
7956aaac9e
commit
0b3c3f656d
@ -0,0 +1,214 @@
|
||||
"""Read-side helpers for browsing the items an enrichment source hasn't matched.
|
||||
|
||||
The dashboard "Manage Enrichment Workers" modal lists, per source, the
|
||||
artists / albums / tracks whose ``<service>_match_status`` is ``'not_found'``
|
||||
(or still pending = ``NULL``) so the user can manually match them. Every
|
||||
enrichment source writes a uniform ``<service>_match_status`` column, so one
|
||||
parametric query serves all 11 workers.
|
||||
|
||||
This module owns the column mapping and SQL construction. ``service`` and
|
||||
``entity_type`` are whitelisted against :data:`SERVICE_ENTITY_SUPPORT` and the
|
||||
entity table map before any column name is interpolated — user-supplied values
|
||||
(the search term, pagination) are always bound parameters, never interpolated.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
# Which entity types each enrichment source covers. Mirrors the authoritative
|
||||
# ``_SERVICE_ID_COLUMNS`` map in web_server.py (used by manual-match), kept here
|
||||
# so the unmatched browser is self-contained and unit-testable. Singular keys
|
||||
# ('artist'/'album'/'track') match the manual-match entity_type vocabulary.
|
||||
SERVICE_ENTITY_SUPPORT = {
|
||||
'spotify': ('artist', 'album', 'track'),
|
||||
'musicbrainz': ('artist', 'album', 'track'),
|
||||
'deezer': ('artist', 'album', 'track'),
|
||||
'audiodb': ('artist', 'album', 'track'),
|
||||
'discogs': ('artist', 'album'), # no track-level id column
|
||||
'itunes': ('artist', 'album', 'track'),
|
||||
'lastfm': ('artist', 'album', 'track'),
|
||||
'genius': ('artist', 'track'), # no album-level id column
|
||||
'tidal': ('artist', 'album', 'track'),
|
||||
'qobuz': ('artist', 'album', 'track'),
|
||||
'amazon': ('artist', 'album', 'track'),
|
||||
}
|
||||
|
||||
# entity_type -> table / display-name column / image expression / optional join.
|
||||
# tracks carry no artwork column of their own, so we borrow the parent album's.
|
||||
_ENTITY_TABLE = {
|
||||
'artist': {
|
||||
'table': 'artists', 'name': 'name',
|
||||
'image': 'artists.thumb_url', 'join': '',
|
||||
},
|
||||
'album': {
|
||||
'table': 'albums', 'name': 'title',
|
||||
'image': 'albums.thumb_url', 'join': '',
|
||||
},
|
||||
'track': {
|
||||
'table': 'tracks', 'name': 'title',
|
||||
'image': 'al.thumb_url',
|
||||
'join': 'LEFT JOIN albums al ON tracks.album_id = al.id',
|
||||
},
|
||||
}
|
||||
|
||||
# 'unmatched' = not yet matched at all (pending OR explicitly not_found).
|
||||
VALID_STATUSES = ('not_found', 'pending', 'unmatched')
|
||||
|
||||
# Hard cap so a malicious/buggy caller can't ask for the whole library at once.
|
||||
MAX_LIMIT = 200
|
||||
|
||||
|
||||
class UnmatchedQueryError(ValueError):
|
||||
"""Raised for an unknown service / unsupported entity type / bad status."""
|
||||
|
||||
|
||||
def supported_entity_types(service: str) -> Tuple[str, ...]:
|
||||
"""Return the entity types a source enriches, or () for an unknown source."""
|
||||
return SERVICE_ENTITY_SUPPORT.get(service, ())
|
||||
|
||||
|
||||
def match_status_column(service: str) -> str:
|
||||
return f"{service}_match_status"
|
||||
|
||||
|
||||
def last_attempted_column(service: str) -> str:
|
||||
return f"{service}_last_attempted"
|
||||
|
||||
|
||||
def _validate(service: str, entity_type: str) -> None:
|
||||
support = SERVICE_ENTITY_SUPPORT.get(service)
|
||||
if support is None:
|
||||
raise UnmatchedQueryError(f"Unknown enrichment service: {service!r}")
|
||||
if entity_type not in support:
|
||||
raise UnmatchedQueryError(
|
||||
f"{service} does not enrich {entity_type!r} entities"
|
||||
)
|
||||
if entity_type not in _ENTITY_TABLE: # defensive — support map drift
|
||||
raise UnmatchedQueryError(f"No table mapping for entity type {entity_type!r}")
|
||||
|
||||
|
||||
def _status_predicate(service: str, status: str, qualifier: str) -> str:
|
||||
"""SQL predicate selecting rows in the requested match state.
|
||||
|
||||
``qualifier`` (the table name/alias) is always prefixed so the predicate is
|
||||
unambiguous even when the query joins a second table that also carries a
|
||||
``<service>_match_status`` column (tracks LEFT JOIN albums).
|
||||
"""
|
||||
col = f"{qualifier}.{match_status_column(service)}"
|
||||
if status == 'not_found':
|
||||
return f"{col} = 'not_found'"
|
||||
if status == 'pending':
|
||||
return f"{col} IS NULL"
|
||||
# 'unmatched'
|
||||
return f"({col} IS NULL OR {col} = 'not_found')"
|
||||
|
||||
|
||||
def build_unmatched_query(
|
||||
service: str,
|
||||
entity_type: str,
|
||||
status: str = 'not_found',
|
||||
query: Optional[str] = None,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> Tuple[str, List]:
|
||||
"""Build the paginated SELECT for one (service, entity_type, status) view.
|
||||
|
||||
Returns ``(sql, params)``. Selected columns: id, name, image_url, status,
|
||||
last_attempted.
|
||||
"""
|
||||
_validate(service, entity_type)
|
||||
if status not in VALID_STATUSES:
|
||||
raise UnmatchedQueryError(f"Invalid status: {status!r}")
|
||||
|
||||
meta = _ENTITY_TABLE[entity_type]
|
||||
table, name_col, image_expr, join = (
|
||||
meta['table'], meta['name'], meta['image'], meta['join'],
|
||||
)
|
||||
ms = match_status_column(service)
|
||||
la = last_attempted_column(service)
|
||||
|
||||
where = [_status_predicate(service, status, table)]
|
||||
params: List = []
|
||||
if query:
|
||||
where.append(f"{table}.{name_col} LIKE ?")
|
||||
params.append(f"%{query}%")
|
||||
|
||||
sql = (
|
||||
f"SELECT {table}.id AS id, {table}.{name_col} AS name, "
|
||||
f"{image_expr} AS image_url, {table}.{ms} AS status, "
|
||||
f"{table}.{la} AS last_attempted "
|
||||
f"FROM {table} {join} "
|
||||
f"WHERE {' AND '.join(where)} "
|
||||
f"ORDER BY {table}.{name_col} COLLATE NOCASE "
|
||||
f"LIMIT ? OFFSET ?"
|
||||
).replace(' ', ' ')
|
||||
|
||||
params.append(_clamp_limit(limit))
|
||||
params.append(max(int(offset or 0), 0))
|
||||
return sql, params
|
||||
|
||||
|
||||
def build_count_query(
|
||||
service: str,
|
||||
entity_type: str,
|
||||
status: str = 'not_found',
|
||||
query: Optional[str] = None,
|
||||
) -> Tuple[str, List]:
|
||||
"""Build the COUNT(*) matching :func:`build_unmatched_query`'s filters."""
|
||||
_validate(service, entity_type)
|
||||
if status not in VALID_STATUSES:
|
||||
raise UnmatchedQueryError(f"Invalid status: {status!r}")
|
||||
|
||||
meta = _ENTITY_TABLE[entity_type]
|
||||
table, name_col = meta['table'], meta['name']
|
||||
|
||||
where = [_status_predicate(service, status, table)]
|
||||
params: List = []
|
||||
if query:
|
||||
where.append(f"{table}.{name_col} LIKE ?")
|
||||
params.append(f"%{query}%")
|
||||
|
||||
sql = f"SELECT COUNT(*) FROM {table} WHERE {' AND '.join(where)}"
|
||||
return sql, params
|
||||
|
||||
|
||||
def build_breakdown_query(service: str, entity_type: str) -> Tuple[str, List]:
|
||||
"""Build the matched / not_found / pending / total tally for one entity type."""
|
||||
_validate(service, entity_type)
|
||||
meta = _ENTITY_TABLE[entity_type]
|
||||
table = meta['table']
|
||||
ms = f"{table}.{match_status_column(service)}"
|
||||
sql = (
|
||||
"SELECT "
|
||||
f"SUM(CASE WHEN {ms} = 'matched' THEN 1 ELSE 0 END) AS matched, "
|
||||
f"SUM(CASE WHEN {ms} = 'not_found' THEN 1 ELSE 0 END) AS not_found, "
|
||||
f"SUM(CASE WHEN {ms} IS NULL THEN 1 ELSE 0 END) AS pending, "
|
||||
f"COUNT(*) AS total "
|
||||
f"FROM {table}"
|
||||
)
|
||||
return sql, []
|
||||
|
||||
|
||||
def _clamp_limit(limit) -> int:
|
||||
try:
|
||||
n = int(limit)
|
||||
except (TypeError, ValueError):
|
||||
return 50
|
||||
if n <= 0:
|
||||
return 50
|
||||
return min(n, MAX_LIMIT)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'SERVICE_ENTITY_SUPPORT',
|
||||
'VALID_STATUSES',
|
||||
'MAX_LIMIT',
|
||||
'UnmatchedQueryError',
|
||||
'supported_entity_types',
|
||||
'match_status_column',
|
||||
'last_attempted_column',
|
||||
'build_unmatched_query',
|
||||
'build_count_query',
|
||||
'build_breakdown_query',
|
||||
]
|
||||
@ -0,0 +1,194 @@
|
||||
"""Unmatched-browser backend for the Manage Enrichment Workers modal.
|
||||
|
||||
Three seams:
|
||||
* pure SQL builders + validation (core.enrichment.unmatched)
|
||||
* the MusicDatabase read methods against a temp DB
|
||||
* the Flask routes via a test client
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
|
||||
from core.enrichment import api as enrichment_api
|
||||
from core.enrichment.unmatched import (
|
||||
MAX_LIMIT,
|
||||
UnmatchedQueryError,
|
||||
build_breakdown_query,
|
||||
build_count_query,
|
||||
build_unmatched_query,
|
||||
supported_entity_types,
|
||||
)
|
||||
from database.music_database import MusicDatabase
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Pure builders / validation
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
def test_unknown_service_rejected():
|
||||
with pytest.raises(UnmatchedQueryError):
|
||||
build_unmatched_query('not-a-service', 'artist')
|
||||
|
||||
|
||||
def test_unsupported_entity_type_rejected():
|
||||
# Genius enriches artists + tracks but has no album-level id column.
|
||||
assert 'album' not in supported_entity_types('genius')
|
||||
with pytest.raises(UnmatchedQueryError):
|
||||
build_unmatched_query('genius', 'album')
|
||||
with pytest.raises(UnmatchedQueryError):
|
||||
build_breakdown_query('discogs', 'track') # discogs has no track column
|
||||
|
||||
|
||||
def test_bad_status_rejected():
|
||||
with pytest.raises(UnmatchedQueryError):
|
||||
build_unmatched_query('spotify', 'artist', status='bogus')
|
||||
|
||||
|
||||
def test_status_predicates():
|
||||
nf, _ = build_count_query('spotify', 'artist', 'not_found')
|
||||
pend, _ = build_count_query('spotify', 'artist', 'pending')
|
||||
un, _ = build_count_query('spotify', 'artist', 'unmatched')
|
||||
assert "artists.spotify_match_status = 'not_found'" in nf
|
||||
assert "artists.spotify_match_status IS NULL" in pend
|
||||
assert "IS NULL OR" in un and "= 'not_found'" in un
|
||||
|
||||
|
||||
def test_track_query_qualifies_status_to_avoid_join_ambiguity():
|
||||
# tracks LEFT JOIN albums for artwork — both carry spotify_match_status,
|
||||
# so the predicate must be qualified or SQLite errors "ambiguous column".
|
||||
sql, _ = build_unmatched_query('spotify', 'track', 'not_found')
|
||||
assert 'LEFT JOIN albums al' in sql
|
||||
assert 'tracks.spotify_match_status' in sql
|
||||
assert 'al.thumb_url AS image_url' in sql
|
||||
|
||||
|
||||
def test_search_adds_like_param():
|
||||
sql, params = build_unmatched_query('spotify', 'artist', 'not_found', query='dragons')
|
||||
assert 'LIKE ?' in sql
|
||||
assert '%dragons%' in params
|
||||
|
||||
|
||||
def test_limit_is_clamped():
|
||||
_, params = build_unmatched_query('spotify', 'artist', 'not_found', limit=99999)
|
||||
assert params[-2] == MAX_LIMIT # limit
|
||||
assert params[-1] == 0 # offset
|
||||
_, params2 = build_unmatched_query('spotify', 'artist', 'not_found', limit=0)
|
||||
assert params2[-2] == 50 # invalid -> default
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# MusicDatabase integration (temp DB)
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
def _seed(db: MusicDatabase):
|
||||
conn = db._get_connection()
|
||||
cur = conn.cursor()
|
||||
# 3 artists: matched / not_found / pending(NULL)
|
||||
cur.execute("INSERT INTO artists (id, name, spotify_match_status) VALUES ('a1','Matched Artist','matched')")
|
||||
cur.execute("INSERT INTO artists (id, name, spotify_match_status) VALUES ('a2','Failed Dragons','not_found')")
|
||||
cur.execute("INSERT INTO artists (id, name) VALUES ('a3','Pending Person')") # NULL status
|
||||
# album + track to exercise the join-for-artwork path
|
||||
cur.execute("INSERT INTO albums (id, artist_id, title, thumb_url, spotify_match_status) "
|
||||
"VALUES ('al1','a2','Evolve','http://img/evolve.jpg','not_found')")
|
||||
cur.execute("INSERT INTO tracks (id, album_id, artist_id, title, spotify_match_status) "
|
||||
"VALUES ('t1','al1','a2','Believer','not_found')")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path):
|
||||
d = MusicDatabase(str(tmp_path / 'enrich.db'))
|
||||
_seed(d)
|
||||
return d
|
||||
|
||||
|
||||
def test_breakdown_splits_matched_notfound_pending(db):
|
||||
bd = db.get_enrichment_breakdown('spotify', 'artist')
|
||||
assert bd == {'matched': 1, 'not_found': 1, 'pending': 1, 'total': 3}
|
||||
|
||||
|
||||
def test_unmatched_not_found_only(db):
|
||||
res = db.get_enrichment_unmatched('spotify', 'artist', status='not_found')
|
||||
assert res['total'] == 1
|
||||
assert [i['name'] for i in res['items']] == ['Failed Dragons']
|
||||
assert res['items'][0]['status'] == 'not_found'
|
||||
|
||||
|
||||
def test_unmatched_pending_only(db):
|
||||
res = db.get_enrichment_unmatched('spotify', 'artist', status='pending')
|
||||
assert res['total'] == 1
|
||||
assert res['items'][0]['name'] == 'Pending Person'
|
||||
|
||||
|
||||
def test_unmatched_combined(db):
|
||||
res = db.get_enrichment_unmatched('spotify', 'artist', status='unmatched')
|
||||
assert res['total'] == 2
|
||||
assert {i['name'] for i in res['items']} == {'Failed Dragons', 'Pending Person'}
|
||||
|
||||
|
||||
def test_unmatched_search_filters_by_name(db):
|
||||
res = db.get_enrichment_unmatched('spotify', 'artist', status='unmatched', query='dragons')
|
||||
assert res['total'] == 1
|
||||
assert res['items'][0]['name'] == 'Failed Dragons'
|
||||
|
||||
|
||||
def test_unmatched_pagination(db):
|
||||
page = db.get_enrichment_unmatched('spotify', 'artist', status='unmatched', limit=1, offset=0)
|
||||
assert page['total'] == 2 and len(page['items']) == 1
|
||||
page2 = db.get_enrichment_unmatched('spotify', 'artist', status='unmatched', limit=1, offset=1)
|
||||
assert page2['items'][0]['name'] != page['items'][0]['name']
|
||||
|
||||
|
||||
def test_track_unmatched_borrows_album_artwork(db):
|
||||
res = db.get_enrichment_unmatched('spotify', 'track', status='not_found')
|
||||
assert res['total'] == 1
|
||||
assert res['items'][0]['name'] == 'Believer'
|
||||
assert res['items'][0]['image_url'] == 'http://img/evolve.jpg'
|
||||
|
||||
|
||||
def test_db_raises_on_bad_input(db):
|
||||
with pytest.raises(UnmatchedQueryError):
|
||||
db.get_enrichment_unmatched('spotify', 'artist', status='bogus')
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Flask routes
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def client(db):
|
||||
enrichment_api.configure(db_getter=lambda: db)
|
||||
app = Flask(__name__)
|
||||
app.register_blueprint(enrichment_api.create_blueprint())
|
||||
with app.test_client() as c:
|
||||
yield c
|
||||
enrichment_api.configure(db_getter=None) # reset module global
|
||||
|
||||
|
||||
def test_route_unknown_service_404(client):
|
||||
assert client.get('/api/enrichment/bogus/unmatched').status_code == 404
|
||||
|
||||
|
||||
def test_route_bad_entity_type_400(client):
|
||||
# genius has no album column -> 400, not a 500
|
||||
r = client.get('/api/enrichment/genius/unmatched?entity_type=album')
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
def test_route_happy_path(client):
|
||||
r = client.get('/api/enrichment/spotify/unmatched?entity_type=artist&status=unmatched')
|
||||
assert r.status_code == 200
|
||||
body = r.get_json()
|
||||
assert body['total'] == 2
|
||||
assert body['service'] == 'spotify'
|
||||
assert body['entity_types'] == ['artist', 'album', 'track']
|
||||
|
||||
|
||||
def test_route_breakdown(client):
|
||||
r = client.get('/api/enrichment/spotify/breakdown')
|
||||
assert r.status_code == 200
|
||||
bd = r.get_json()['breakdown']
|
||||
assert bd['artist'] == {'matched': 1, 'not_found': 1, 'pending': 1, 'total': 3}
|
||||
Loading…
Reference in new issue