You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/core/discovery/spotify_public.py

338 lines
17 KiB

"""Background worker for public Spotify-link playlist discovery.
`run_spotify_public_discovery_worker(url_hash, deps)` is the function the
spotify-public discovery start-endpoint submits to its executor to match
each public Spotify playlist track against Spotify (preferred) or iTunes
(fallback):
1. Pause enrichment workers (release shared resources).
2. For each track:
- Cancellation gate (state['cancelled']).
- Discovery cache lookup; cache hit short-circuits the search and
populates display fields from the cached match.
- SimpleNamespace duck-type → `_search_spotify_for_tidal_track`
(shared search helper, returns tuple for Spotify or dict for iTunes).
- On Spotify match: build `match_data` preserving track_number /
disc_number from raw API data, image extracted from album images
or track object fallback, release_date filled from track.release_date
when album dict is missing it.
- On iTunes match: dict result populated as `match_data`, source set
to discovery_source, image extracted from album images.
- Save matched result to discovery cache.
- On miss: Wing It stub stored as 'wing-it' status.
3. After all tracks: phase='discovered', activity feed entry.
4. On error: state['phase']='error' + status with error string.
5. Finally: resume enrichment workers.
Lifted verbatim from web_server.py. Wide dependency surface (Spotify and
iTunes clients, multiple metadata helpers, state dict, shared tidal
search helper) all injected via `SpotifyPublicDiscoveryDeps`.
"""
from __future__ import annotations
import logging
import time
import types
from dataclasses import dataclass
from typing import Any, Callable
logger = logging.getLogger(__name__)
@dataclass
class SpotifyPublicDiscoveryDeps:
"""Bundle of cross-cutting deps the Spotify Public discovery worker needs."""
spotify_public_discovery_states: dict
spotify_client: Any
pause_enrichment_workers: Callable[[str], dict]
resume_enrichment_workers: Callable[[dict, str], None]
get_active_discovery_source: Callable[[], str]
get_metadata_fallback_client: Callable[[], Any]
get_discovery_cache_key: Callable
get_database: Callable[[], Any]
validate_discovery_cache_artist: Callable
search_spotify_for_tidal_track: Callable
build_discovery_wing_it_stub: Callable
add_activity_item: Callable
def run_spotify_public_discovery_worker(url_hash, deps: SpotifyPublicDiscoveryDeps):
"""Background worker for Spotify Public discovery process (Spotify preferred, iTunes fallback)"""
_ew_state = {}
try:
_ew_state = deps.pause_enrichment_workers('Spotify Public discovery')
state = deps.spotify_public_discovery_states[url_hash]
playlist = state['playlist']
# Determine which provider to use — respect user's configured primary source
discovery_source = deps.get_active_discovery_source()
use_spotify = (discovery_source == 'spotify') and deps.spotify_client and deps.spotify_client.is_spotify_authenticated()
# Initialize fallback client if needed
itunes_client_instance = None
if not use_spotify:
itunes_client_instance = deps.get_metadata_fallback_client()
logger.info(f"Starting Spotify Public discovery for: {playlist['name']} (using {discovery_source.upper()})")
# Store discovery source in state for frontend
state['discovery_source'] = discovery_source
successful_discoveries = 0
tracks = playlist['tracks']
for i, sp_track in enumerate(tracks):
if state.get('cancelled', False):
break
try:
track_name = sp_track['name']
track_artists_raw = sp_track.get('artists', [])
# Normalize artists to list of strings
track_artists = []
for a in track_artists_raw:
if isinstance(a, dict):
track_artists.append(a.get('name', ''))
else:
track_artists.append(str(a))
track_id = sp_track.get('id', '')
track_album = sp_track.get('album', '')
if isinstance(track_album, dict):
track_album_name = track_album.get('name', '')
else:
track_album_name = track_album or ''
track_duration_ms = sp_track.get('duration_ms', 0)
logger.info(f"[{i+1}/{len(tracks)}] Searching {discovery_source.upper()}: {track_name} by {', '.join(track_artists)}")
# Check discovery cache first
cache_key = deps.get_discovery_cache_key(track_name, track_artists[0] if track_artists else '')
try:
cache_db = deps.get_database()
cached_match = cache_db.get_discovery_cache_match(cache_key[0], cache_key[1], discovery_source)
if cached_match and deps.validate_discovery_cache_artist(track_artists[0] if track_artists else '', cached_match):
logger.debug(f"CACHE HIT [{i+1}/{len(tracks)}]: {track_name} by {', '.join(track_artists)}")
# Extract display-friendly artist string from cached match
cached_artists = cached_match.get('artists', [])
if cached_artists:
cached_artist_str = ', '.join(
a if isinstance(a, str) else a.get('name', '') for a in cached_artists
)
else:
cached_artist_str = ''
cached_album = cached_match.get('album', '')
if isinstance(cached_album, dict):
cached_album = cached_album.get('name', '')
result = {
'spotify_public_track': {
'id': track_id,
'name': track_name,
'artists': track_artists or [],
'album': track_album_name,
'duration_ms': track_duration_ms,
},
'spotify_data': cached_match,
'match_data': cached_match,
'status': 'Found',
'status_class': 'found',
'spotify_track': cached_match.get('name', ''),
'spotify_artist': cached_artist_str,
'spotify_album': cached_album,
'spotify_id': cached_match.get('id', ''),
'discovery_source': discovery_source,
'index': i
}
successful_discoveries += 1
state['spotify_matches'] = successful_discoveries
state['discovery_results'].append(result)
state['discovery_progress'] = int(((i + 1) / len(tracks)) * 100)
continue
except Exception as cache_err:
logger.error(f"Cache lookup error: {cache_err}")
# Create a SimpleNamespace duck-type object for _search_spotify_for_tidal_track
track_ns = types.SimpleNamespace(
id=track_id,
name=track_name,
artists=track_artists,
album=track_album_name,
duration_ms=track_duration_ms
)
# Use the search function with appropriate provider
track_result = deps.search_spotify_for_tidal_track(
track_ns,
use_spotify=use_spotify,
itunes_client=itunes_client_instance
)
# Create result entry
result = {
'spotify_public_track': {
'id': track_id,
'name': track_name,
'artists': track_artists or [],
'album': track_album_name,
'duration_ms': track_duration_ms,
},
'spotify_data': None,
'match_data': None,
'status': 'Not Found',
'status_class': 'not-found',
'spotify_track': '',
'spotify_artist': '',
'spotify_album': '',
'discovery_source': discovery_source
}
match_confidence = 0.0
if use_spotify and isinstance(track_result, tuple):
# Spotify: Function returns (Track, raw_data, confidence)
track_obj, raw_track_data, match_confidence = track_result
album_obj = raw_track_data.get('album', {}) if raw_track_data else {}
# Ensure album has a name — fall back to track_obj.album if raw_data was missing
if isinstance(album_obj, dict) and not album_obj.get('name') and track_obj.album:
album_obj['name'] = track_obj.album
elif not album_obj and track_obj.album:
album_obj = {'name': track_obj.album}
# Ensure release_date is present (raw Spotify data has it, but fallback may not)
if isinstance(album_obj, dict) and not album_obj.get('release_date'):
album_obj['release_date'] = getattr(track_obj, 'release_date', '') or ''
# Extract image URL from album data or track object
_album_images = album_obj.get('images', []) if isinstance(album_obj, dict) else []
_image_url = _album_images[0].get('url', '') if _album_images else (getattr(track_obj, 'image_url', '') or '')
match_data = {
'id': track_obj.id,
'name': track_obj.name,
'artists': track_obj.artists,
'album': album_obj,
'duration_ms': track_obj.duration_ms,
'external_urls': track_obj.external_urls,
'image_url': _image_url,
'source': 'spotify'
}
# Preserve track_number/disc_number from raw Spotify API data
if raw_track_data and raw_track_data.get('track_number'):
match_data['track_number'] = raw_track_data['track_number']
if raw_track_data and raw_track_data.get('disc_number'):
match_data['disc_number'] = raw_track_data['disc_number']
result['spotify_data'] = match_data
result['match_data'] = match_data
result['status'] = 'Found'
result['status_class'] = 'found'
result['spotify_track'] = track_obj.name
result['spotify_artist'] = ', '.join(track_obj.artists) if isinstance(track_obj.artists, list) else str(track_obj.artists)
result['spotify_album'] = album_obj.get('name', '') if isinstance(album_obj, dict) else str(album_obj)
result['spotify_id'] = track_obj.id
result['confidence'] = match_confidence
successful_discoveries += 1
state['spotify_matches'] = successful_discoveries
elif not use_spotify and track_result and isinstance(track_result, dict):
# Fallback: Function returns a dict with track data (includes 'confidence' key)
match_confidence = track_result.pop('confidence', 0.80)
match_data = track_result
match_data['source'] = discovery_source
# Extract image URL from album images
_fb_album = match_data.get('album', {})
_fb_images = _fb_album.get('images', []) if isinstance(_fb_album, dict) else []
if _fb_images and 'image_url' not in match_data:
match_data['image_url'] = _fb_images[0].get('url', '')
result['spotify_data'] = match_data
result['match_data'] = match_data
result['status'] = 'Found'
result['status_class'] = 'found'
result['spotify_track'] = match_data.get('name', '')
itunes_artists = match_data.get('artists', [])
result['spotify_artist'] = ', '.join(a if isinstance(a, str) else a.get('name', '') for a in itunes_artists) if itunes_artists else ''
result['spotify_album'] = match_data.get('album', {}).get('name', '') if isinstance(match_data.get('album'), dict) else match_data.get('album', '')
result['spotify_id'] = match_data.get('id', '')
result['confidence'] = match_confidence
successful_discoveries += 1
state['spotify_matches'] = successful_discoveries
# Save to discovery cache if match found
if result['status_class'] == 'found' and result.get('match_data'):
try:
cache_db = deps.get_database()
cache_db.save_discovery_cache_match(
cache_key[0], cache_key[1], discovery_source, match_confidence,
result['match_data'], track_name,
track_artists[0] if track_artists else ''
)
logger.info(f"CACHE SAVED: {track_name} (confidence: {match_confidence:.3f})")
except Exception as cache_err:
logger.error(f"Cache save error: {cache_err}")
# Auto Wing It fallback for unmatched tracks
if result['status_class'] == 'not-found':
sp_t = result.get('spotify_public_track', {})
stub = deps.build_discovery_wing_it_stub(
sp_t.get('name', ''),
', '.join(sp_t.get('artists', [])),
sp_t.get('duration_ms', 0)
)
result['status'] = 'Wing It'
result['status_class'] = 'wing-it'
result['spotify_data'] = stub
result['match_data'] = stub
result['spotify_track'] = sp_t.get('name', '')
result['spotify_artist'] = ', '.join(sp_t.get('artists', []))
result['wing_it_fallback'] = True
result['confidence'] = 0
successful_discoveries += 1
state['spotify_matches'] = successful_discoveries
state['wing_it_count'] = state.get('wing_it_count', 0) + 1
result['index'] = i
state['discovery_results'].append(result)
state['discovery_progress'] = int(((i + 1) / len(tracks)) * 100)
# Add delay between requests
time.sleep(0.1)
except Exception as e:
logger.error(f"Error processing track {i+1}: {e}")
# Add error result
result = {
'spotify_public_track': {
'name': sp_track.get('name', 'Unknown'),
'artists': sp_track.get('artists', []),
},
'spotify_data': None,
'match_data': None,
'status': 'Error',
'status_class': 'error',
'spotify_track': '',
'spotify_artist': '',
'spotify_album': '',
'error': str(e),
'discovery_source': discovery_source,
'index': i
}
state['discovery_results'].append(result)
state['discovery_progress'] = int(((i + 1) / len(tracks)) * 100)
# Mark as complete
state['phase'] = 'discovered'
state['status'] = 'discovered'
state['discovery_progress'] = 100
# Add activity for discovery completion
source_label = discovery_source.upper()
deps.add_activity_item("", f"Spotify Link Discovery Complete ({source_label})", f"'{playlist['name']}' - {successful_discoveries}/{len(tracks)} tracks found", "Now")
logger.info(f"Spotify Public discovery complete ({source_label}): {successful_discoveries}/{len(tracks)} tracks found")
except Exception as e:
logger.error(f"Error in Spotify Public discovery worker: {e}")
if url_hash in deps.spotify_public_discovery_states:
deps.spotify_public_discovery_states[url_hash]['phase'] = 'error'
deps.spotify_public_discovery_states[url_hash]['status'] = f'error: {str(e)}'
finally:
deps.resume_enrichment_workers(_ew_state, 'Spotify Public discovery')