mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
364 lines
16 KiB
364 lines
16 KiB
"""Playlist explorer build-tree route.
|
|
|
|
`playlist_explorer_build_tree(deps)` is the body of the
|
|
`POST /api/playlist-explorer/build-tree` route. It builds a discovery
|
|
tree from a mirrored playlist and streams the result as NDJSON
|
|
(one JSON object per artist line + a final 'complete' line).
|
|
|
|
Works with Spotify (preferred), iTunes, or Deezer as the metadata
|
|
source. Uses and populates the metadata cache to avoid redundant API
|
|
calls per discography fetch.
|
|
|
|
Two operating modes:
|
|
- `albums`: only show releases that overlap with the playlist's tracks.
|
|
- `discographies`: show the full discography of every artist in the
|
|
playlist, with `in_playlist` flag on the matching releases.
|
|
|
|
Per-artist flow inside the streaming generator:
|
|
1. Resolve discography via `_fetch_artist_discography` (cache → fall
|
|
through to live API search).
|
|
2. Tag each release with `in_playlist` based on title-similarity match
|
|
against the playlist's track/album names.
|
|
3. Apply mode filter, sort by in-playlist-first then year DESC.
|
|
4. Yield one JSON line per artist.
|
|
|
|
The route returns Flask's streaming `Response` wrapper around the NDJSON
|
|
generator. Early-exit cases (bad request, playlist not found, top-level
|
|
exception) yield via Flask's standard `jsonify(...), status` shape.
|
|
|
|
Lifted verbatim from web_server.py. Wide dependency surface (Flask
|
|
`request` + `Response`, Spotify client, multiple metadata helpers,
|
|
DB access, metadata cache) all injected via `PlaylistExplorerDeps`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import time
|
|
from dataclasses import dataclass
|
|
from typing import Any, Callable
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class PlaylistExplorerDeps:
|
|
"""Bundle of cross-cutting deps the playlist explorer needs."""
|
|
request: Any # flask.request proxy
|
|
flask_response: Any # flask.Response constructor
|
|
flask_jsonify: Any # flask.jsonify
|
|
spotify_client: Any
|
|
get_database: Callable[[], Any]
|
|
get_active_discovery_source: Callable[[], str]
|
|
get_metadata_fallback_client: Callable[[], Any]
|
|
get_metadata_fallback_source: Callable[[], str]
|
|
get_metadata_cache: Callable[[], Any]
|
|
|
|
|
|
def playlist_explorer_build_tree(deps: PlaylistExplorerDeps):
|
|
"""Build a discovery tree from a mirrored playlist.
|
|
Streams NDJSON: one line per artist with their albums.
|
|
Works with Spotify, iTunes, or Deezer as the metadata source.
|
|
Uses and populates the metadata cache to avoid redundant API calls."""
|
|
try:
|
|
data = deps.request.get_json()
|
|
if not data:
|
|
return deps.flask_jsonify({"success": False, "error": "No data provided"}), 400
|
|
|
|
playlist_id = data.get('playlist_id')
|
|
mode = data.get('mode', 'albums') # 'albums' or 'discographies'
|
|
|
|
if not playlist_id:
|
|
return deps.flask_jsonify({"success": False, "error": "playlist_id is required"}), 400
|
|
if mode not in ('albums', 'discographies'):
|
|
return deps.flask_jsonify({"success": False, "error": "mode must be 'albums' or 'discographies'"}), 400
|
|
|
|
database = deps.get_database()
|
|
playlist = database.get_mirrored_playlist(playlist_id)
|
|
if not playlist:
|
|
return deps.flask_jsonify({"success": False, "error": "Playlist not found"}), 404
|
|
|
|
tracks = database.get_mirrored_playlist_tracks(playlist_id)
|
|
if not tracks:
|
|
return deps.flask_jsonify({"success": False, "error": "Playlist has no tracks"}), 400
|
|
|
|
# Determine active metadata source — respect user's configured primary
|
|
source_name = deps.get_active_discovery_source()
|
|
if source_name == 'spotify' and deps.spotify_client and deps.spotify_client.is_spotify_authenticated():
|
|
active_client = deps.spotify_client
|
|
else:
|
|
active_client = deps.get_metadata_fallback_client()
|
|
source_name = deps.get_metadata_fallback_source()
|
|
|
|
cache = deps.get_metadata_cache()
|
|
|
|
# Parse extra_data and group tracks by artist using discovered data
|
|
artist_groups = {}
|
|
for t in tracks:
|
|
extra = {}
|
|
if t.get('extra_data'):
|
|
try:
|
|
extra = json.loads(t['extra_data']) if isinstance(t['extra_data'], str) else t['extra_data']
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
|
|
# Only use discovery data if it matches the active metadata source
|
|
is_discovered = extra.get('discovered', False)
|
|
provider = (extra.get('provider') or '').lower()
|
|
source_matches = provider == source_name or (provider in ('itunes', 'apple') and source_name == 'itunes')
|
|
|
|
matched = extra.get('matched_data', {}) if (is_discovered and source_matches) else {}
|
|
artists_list = matched.get('artists', [])
|
|
primary_artist = artists_list[0] if artists_list else None
|
|
# Artists can be dicts {"name": "X", "id": "Y"} or plain strings "X"
|
|
if isinstance(primary_artist, dict):
|
|
artist_name = primary_artist.get('name') or (t.get('artist_name') or '').strip()
|
|
artist_id = primary_artist.get('id') or None
|
|
elif isinstance(primary_artist, str):
|
|
artist_name = primary_artist or (t.get('artist_name') or '').strip()
|
|
artist_id = None
|
|
else:
|
|
artist_name = (t.get('artist_name') or '').strip()
|
|
artist_id = None
|
|
|
|
if not artist_name:
|
|
continue
|
|
|
|
key = artist_name.lower()
|
|
if key not in artist_groups:
|
|
artist_groups[key] = {
|
|
'name': artist_name,
|
|
'artist_id': artist_id, # Pre-resolved from discovery
|
|
'tracks': [],
|
|
'album_names': set(),
|
|
'discovered': extra.get('discovered', False),
|
|
}
|
|
# If we get an artist_id from a later track but didn't have one before, fill it in
|
|
if artist_id and not artist_groups[key].get('artist_id'):
|
|
artist_groups[key]['artist_id'] = artist_id
|
|
|
|
artist_groups[key]['tracks'].append(t.get('track_name', ''))
|
|
# Get album name from discovered data or playlist field
|
|
album_name = ''
|
|
album_data = matched.get('album')
|
|
if isinstance(album_data, dict) and album_data.get('name'):
|
|
album_name = album_data['name']
|
|
elif (t.get('album_name') or '').strip():
|
|
album_name = t['album_name'].strip()
|
|
if album_name:
|
|
artist_groups[key]['album_names'].add(album_name)
|
|
|
|
def _normalize_for_match(title):
|
|
import re
|
|
return re.sub(r'\s*[\(\[][^)\]]*[\)\]]', '', title).strip().lower()
|
|
|
|
def _fetch_artist_discography(artist_name, known_artist_id=None):
|
|
"""Fetch discography using the active client. Checks cache first, stores results after.
|
|
If known_artist_id is provided (from discovery cache), skips the name search."""
|
|
# Check cache for this artist's discography
|
|
cache_key = f"explorer_disco_{artist_name.lower().strip()}"
|
|
cached = cache.get_entity(source_name, 'artist_discography', cache_key) if cache else None
|
|
if cached and isinstance(cached, dict) and cached.get('albums'):
|
|
logger.debug(f"Explorer: cache hit for '{artist_name}' discography")
|
|
return cached
|
|
|
|
artist_id = known_artist_id
|
|
artist_image = None
|
|
|
|
if artist_id:
|
|
# Already have the ID from discovery — just fetch the artist image
|
|
try:
|
|
artist_info = active_client.get_artist(artist_id)
|
|
if artist_info:
|
|
if isinstance(artist_info, dict):
|
|
images = artist_info.get('images') or []
|
|
artist_image = images[0].get('url') if images else None
|
|
elif hasattr(artist_info, 'image_url'):
|
|
artist_image = artist_info.image_url
|
|
except Exception:
|
|
pass
|
|
else:
|
|
# No pre-resolved ID — search by name
|
|
try:
|
|
search_results = active_client.search_artists(artist_name, limit=5)
|
|
except Exception as e:
|
|
return {'success': False, 'error': f'Search failed: {e}'}
|
|
|
|
if not search_results:
|
|
return {'success': False, 'error': f'"{artist_name}" not found'}
|
|
|
|
# Find best match (exact first, then fuzzy)
|
|
best = None
|
|
for a in search_results:
|
|
if a.name.lower().strip() == artist_name.lower().strip():
|
|
best = a
|
|
break
|
|
if not best:
|
|
best = search_results[0]
|
|
|
|
artist_id = best.id
|
|
artist_image = best.image_url if hasattr(best, 'image_url') else None
|
|
|
|
# Fetch albums
|
|
try:
|
|
# skip_cache only supported by spotify_client — other clients don't cache this call
|
|
_skip = {'skip_cache': True} if hasattr(active_client, 'sp') else {}
|
|
all_albums = active_client.get_artist_albums(artist_id, album_type='album,single', **_skip)
|
|
except Exception as e:
|
|
return {'success': False, 'error': f'Album fetch failed: {e}'}
|
|
|
|
if not all_albums:
|
|
return {'success': False, 'error': 'No albums found'}
|
|
|
|
# Check which albums the user already owns
|
|
owned_titles = set()
|
|
try:
|
|
db = deps.get_database()
|
|
with db._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
# Find all artists in DB matching this name
|
|
cursor.execute("SELECT id FROM artists WHERE LOWER(name) = LOWER(?)", (artist_name,))
|
|
artist_rows = cursor.fetchall()
|
|
for ar in artist_rows:
|
|
cursor.execute("SELECT title FROM albums WHERE artist_id = ?", (ar['id'],))
|
|
for alb_row in cursor.fetchall():
|
|
owned_titles.add((alb_row['title'] or '').strip().lower())
|
|
except Exception:
|
|
pass # Non-critical — owned badges just won't show
|
|
|
|
# Build release list
|
|
releases = []
|
|
for album in all_albums:
|
|
# Skip albums where this artist isn't primary
|
|
if hasattr(album, 'artist_ids') and album.artist_ids and album.artist_ids[0] != artist_id:
|
|
continue
|
|
releases.append({
|
|
'title': album.name,
|
|
'year': album.release_date[:4] if album.release_date else None,
|
|
'image_url': album.image_url,
|
|
'spotify_id': album.id,
|
|
'track_count': album.total_tracks,
|
|
'album_type': (album.album_type or 'album').lower(),
|
|
'owned': (album.name or '').strip().lower() in owned_titles,
|
|
})
|
|
|
|
result = {
|
|
'success': True,
|
|
'name': artist_name, # Required for metadata cache validation
|
|
'albums': releases,
|
|
'artist_image': artist_image,
|
|
'artist_id': artist_id,
|
|
'artist_name': artist_name,
|
|
}
|
|
|
|
# Store in cache
|
|
if cache and releases:
|
|
try:
|
|
cache.store_entity(source_name, 'artist_discography', cache_key, result)
|
|
except Exception:
|
|
pass
|
|
|
|
return result
|
|
|
|
def generate():
|
|
yield json.dumps({
|
|
"type": "meta",
|
|
"playlist_name": playlist.get('name', 'Unknown Playlist'),
|
|
"playlist_image": playlist.get('image_url', ''),
|
|
"total_artists": len(artist_groups),
|
|
"total_tracks": len(tracks),
|
|
"source": source_name,
|
|
}) + '\n'
|
|
|
|
total_albums = 0
|
|
|
|
for idx, (_key, group) in enumerate(artist_groups.items()):
|
|
artist_name = group['name']
|
|
playlist_track_names = group['tracks']
|
|
playlist_album_names = group['album_names']
|
|
|
|
try:
|
|
disco = _fetch_artist_discography(artist_name, group.get('artist_id'))
|
|
|
|
if not disco.get('success'):
|
|
yield json.dumps({
|
|
"type": "artist",
|
|
"name": artist_name,
|
|
"artist_id": None,
|
|
"image_url": None,
|
|
"playlist_tracks": playlist_track_names,
|
|
"albums": [],
|
|
"error": disco.get('error', 'Not found'),
|
|
}) + '\n'
|
|
time.sleep(0.1)
|
|
continue
|
|
|
|
# Tag each release with in_playlist flag
|
|
# If no album names available, fall back to matching track names against single titles
|
|
match_names = playlist_album_names
|
|
if not match_names:
|
|
match_names = set(playlist_track_names)
|
|
|
|
all_releases = []
|
|
for release in disco.get('albums', []):
|
|
r = dict(release)
|
|
norm_title = _normalize_for_match(r['title'])
|
|
r['in_playlist'] = any(
|
|
_normalize_for_match(a) == norm_title or
|
|
norm_title in _normalize_for_match(a) or
|
|
_normalize_for_match(a) in norm_title
|
|
for a in match_names
|
|
)
|
|
all_releases.append(r)
|
|
|
|
# Filter based on mode
|
|
if mode == 'albums':
|
|
filtered = [r for r in all_releases if r['in_playlist']]
|
|
else:
|
|
filtered = all_releases
|
|
|
|
filtered.sort(key=lambda r: (not r.get('in_playlist', False), -(int(r.get('year') or 0))))
|
|
total_albums += len(filtered)
|
|
|
|
yield json.dumps({
|
|
"type": "artist",
|
|
"name": disco.get('artist_name', artist_name),
|
|
"artist_id": disco.get('artist_id'),
|
|
"image_url": disco.get('artist_image'),
|
|
"playlist_tracks": playlist_track_names,
|
|
"albums": filtered,
|
|
}) + '\n'
|
|
|
|
except Exception as e:
|
|
logger.error(f"Explorer: error processing artist '{artist_name}': {e}")
|
|
yield json.dumps({
|
|
"type": "artist",
|
|
"name": artist_name,
|
|
"artist_id": None,
|
|
"image_url": None,
|
|
"playlist_tracks": playlist_track_names,
|
|
"albums": [],
|
|
"error": str(e),
|
|
}) + '\n'
|
|
|
|
# Rate limit protection between artists
|
|
if idx < len(artist_groups) - 1:
|
|
time.sleep(0.2)
|
|
|
|
deps.get_database().mark_mirrored_playlist_explored(playlist_id)
|
|
yield json.dumps({"type": "complete", "total_artists": len(artist_groups), "total_albums": total_albums}) + '\n'
|
|
|
|
return deps.flask_response(generate(), mimetype='application/x-ndjson', headers={
|
|
'Cache-Control': 'no-cache',
|
|
'X-Accel-Buffering': 'no',
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Playlist Explorer build-tree error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return deps.flask_jsonify({"success": False, "error": str(e)}), 500
|
|
|
|
|