From 28ca3f5ce56ea4a7d5d7264a10876a4c0e263c20 Mon Sep 17 00:00:00 2001 From: Broque Thomas Date: Sat, 31 Jan 2026 11:21:35 -0800 Subject: [PATCH] album level search for wishlist albums/eps --- core/matching_engine.py | 25 +++--- web_server.py | 167 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 181 insertions(+), 11 deletions(-) diff --git a/core/matching_engine.py b/core/matching_engine.py index 84833006..dbf011e2 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -928,7 +928,8 @@ class MusicMatchingEngine: album_result: AlbumResult, spotify_tracks: List[SpotifyTrack], spotify_album_name: str, - spotify_artist_name: str + spotify_artist_name: str, + expected_track_count: int = 0 ) -> Tuple[float, Dict[str, TrackResult]]: """ Match an AlbumResult from Soulseek against a list of Spotify tracks. @@ -1034,23 +1035,28 @@ class MusicMatchingEngine: # Calculate album confidence match_ratio = len(track_mapping) / len(spotify_tracks) if spotify_tracks else 0.0 avg_track_score = sum(matched_scores) / len(matched_scores) if matched_scores else 0.0 - track_count_ratio = ( - min(album_result.track_count, len(spotify_tracks)) / - max(album_result.track_count, len(spotify_tracks)) - ) if spotify_tracks else 0.0 + + # Source completeness: prefer sources whose track count matches the full album, + # not just the missing tracks. A complete 13/13 rip is more reliable than an 8-file folder. + reference_count = expected_track_count if expected_track_count > 0 else len(spotify_tracks) + source_completeness = ( + min(album_result.track_count, reference_count) / + max(album_result.track_count, reference_count) + ) if reference_count > 0 else 0.0 album_confidence = ( (match_ratio * 0.40) + (avg_track_score * 0.25) + - (album_title_score * 0.20) + + (album_title_score * 0.15) + (artist_score * 0.10) + - (track_count_ratio * 0.05) + (source_completeness * 0.10) ) logger.info( f"Album match: '{album_result.album_title}' by {album_result.username} -> " f"confidence={album_confidence:.2f}, matched={len(track_mapping)}/{len(spotify_tracks)}, " - f"title={album_title_score:.2f}, artist={artist_score:.2f}, tracks_avg={avg_track_score:.2f}" + f"title={album_title_score:.2f}, artist={artist_score:.2f}, tracks_avg={avg_track_score:.2f}, " + f"completeness={source_completeness:.2f} ({album_result.track_count}/{reference_count})" ) return album_confidence, track_mapping @@ -1092,7 +1098,8 @@ class MusicMatchingEngine: logger.warning(f"Quality filter error for album '{album.album_title}': {e}") confidence, mapping = self.match_album_result_to_spotify_tracks( - album, spotify_tracks, spotify_album_name, spotify_artist_name + album, spotify_tracks, spotify_album_name, spotify_artist_name, + expected_track_count=expected_track_count ) if confidence > best_confidence: diff --git a/web_server.py b/web_server.py index 20b1f5a0..0db0a081 100644 --- a/web_server.py +++ b/web_server.py @@ -11319,7 +11319,28 @@ def _ensure_spotify_track_format(track_info): if isinstance(track_info.get('artists'), list) and len(track_info.get('artists', [])) > 0: first_artist = track_info['artists'][0] if isinstance(first_artist, dict) and 'name' in first_artist: - # Already has proper Spotify format + # Already has proper Spotify format — but ensure album has images + album = track_info.get('album') + if not isinstance(album, dict) or not album.get('images'): + # Album images missing at top level, check spotify_data + spotify_data = track_info.get('spotify_data', {}) + if isinstance(spotify_data, str): + try: + import json + spotify_data = json.loads(spotify_data) + except Exception: + spotify_data = {} + if isinstance(spotify_data, dict) and isinstance(spotify_data.get('album'), dict): + s_album = spotify_data['album'] + if s_album.get('images'): + if not isinstance(album, dict): + track_info['album'] = {} + track_info['album']['images'] = s_album['images'] + if 'name' not in track_info.get('album', {}): + track_info['album']['name'] = s_album.get('name', 'Unknown Album') + for field in ('id', 'album_type', 'total_tracks', 'release_date'): + if field in s_album and field not in track_info.get('album', {}): + track_info['album'][field] = s_album[field] return track_info # Convert to proper Spotify format @@ -11349,13 +11370,36 @@ def _ensure_spotify_track_format(track_info): # Build album object with images if available album_data = track_info.get('album', {}) + + # Wishlist tracks store album data inside spotify_data, not at top level + # album_data may be a string (sanitized), empty, or a dict without images + if not album_data or not isinstance(album_data, dict) or not album_data.get('images'): + spotify_data = track_info.get('spotify_data', {}) + if isinstance(spotify_data, str): + try: + import json + spotify_data = json.loads(spotify_data) + except Exception: + spotify_data = {} + if isinstance(spotify_data, dict) and isinstance(spotify_data.get('album'), dict): + album_data = spotify_data['album'] + if isinstance(album_data, dict): album = { 'name': album_data.get('name', 'Unknown Album') } - # Preserve album images if present (important for ListenBrainz tracks) + # Preserve album images if present if 'images' in album_data: album['images'] = album_data['images'] + # Preserve album ID, type, and total_tracks for proper wishlist grouping + if 'id' in album_data: + album['id'] = album_data['id'] + if 'album_type' in album_data: + album['album_type'] = album_data['album_type'] + if 'total_tracks' in album_data: + album['total_tracks'] = album_data['total_tracks'] + if 'release_date' in album_data: + album['release_date'] = album_data['release_date'] else: album = { 'name': str(album_data) if album_data else 'Unknown Album' @@ -11721,6 +11765,19 @@ def _on_download_completed(batch_id, task_id, success=True): print(f"⏰ [Stuck Detection] Task {task_id} stuck in searching for {task_age:.0f}s - forcing failure") task['status'] = 'failed' task['error_message'] = f'Retry timeout after {task_age:.0f} seconds' + # Add to permanently_failed_tracks so it gets re-added to wishlist + original_track_info = task.get('track_info', {}) + track_info = { + 'download_index': task.get('track_index', 0), + 'table_index': task.get('track_index', 0), + 'track_name': original_track_info.get('name', 'Unknown Track'), + 'artist_name': _get_track_artist_name(original_track_info), + 'retry_count': task.get('retry_count', 0), + 'spotify_track': _ensure_spotify_track_format(original_track_info), + 'failure_reason': f'Search timeout after {task_age:.0f} seconds', + 'candidates': task.get('cached_candidates', []) + } + batch.get('permanently_failed_tracks', []).append(track_info) finished_count += 1 else: retrying_count += 1 @@ -11800,6 +11857,94 @@ def _on_download_completed(batch_id, task_id, success=True): print(f"🔄 [Batch Manager] Starting next batch for {batch_id}") _start_next_batch_of_downloads(batch_id) +def _attempt_wishlist_album_searches(batch_id, missing_tracks): + """ + Group wishlist tracks by album and run album-level search for each group. + Reuses _attempt_album_level_search for each album group with 2+ tracks. + + Returns: + Tuple of: + - all_album_matched: List of (track_analysis_result, pre_assigned_candidate: TrackResult) tuples + - all_unmatched: List of track_analysis_results that need per-track search + """ + import json as _json + + # Group tracks by album + album_groups = {} # key -> list of track analysis results + album_meta = {} # key -> {album_ctx, artist_ctx} + no_album_tracks = [] + + for res in missing_tracks: + track_data = res.get('track', {}) + spotify_data = track_data.get('spotify_data', {}) + if isinstance(spotify_data, str): + try: + spotify_data = _json.loads(spotify_data) + except Exception: + spotify_data = {} + + s_album = spotify_data.get('album') if spotify_data else None + s_artists = spotify_data.get('artists', []) if spotify_data else [] + + if not s_album or not s_album.get('name'): + no_album_tracks.append(res) + continue + + # Grouping key: album ID preferred, fallback to artist+album name + album_id = s_album.get('id', '') + album_name = s_album.get('name', '') + artist_name = '' + if s_artists and len(s_artists) > 0: + a = s_artists[0] + artist_name = a.get('name', '') if isinstance(a, dict) else str(a) + + group_key = album_id if album_id else f"{artist_name}::{album_name}" + + if group_key not in album_groups: + album_groups[group_key] = [] + album_meta[group_key] = { + 'album_ctx': { + 'name': album_name, + 'id': album_id, + 'album_type': s_album.get('album_type', 'album'), + 'total_tracks': s_album.get('total_tracks', 0), + 'release_date': s_album.get('release_date', ''), + 'images': s_album.get('images', []) + }, + 'artist_ctx': { + 'name': artist_name, + 'id': s_artists[0].get('id', '') if s_artists and isinstance(s_artists[0], dict) else '' + } + } + + album_groups[group_key].append(res) + + # Process each album group + all_matched = [] + all_unmatched = list(no_album_tracks) # Tracks with no album data go straight to per-track + + for group_key, group_tracks in album_groups.items(): + meta = album_meta[group_key] + album_ctx = meta['album_ctx'] + artist_ctx = meta['artist_ctx'] + album_type = album_ctx.get('album_type', 'album').lower() + + # Skip singles and groups with < 2 tracks + if album_type == 'single' or len(group_tracks) < 2: + all_unmatched.extend(group_tracks) + continue + + logger.info(f"[Wishlist Album Search] Searching for '{artist_ctx['name']}' - '{album_ctx['name']}' ({len(group_tracks)} tracks)") + print(f"🎵 [Wishlist] Album search: '{artist_ctx['name']}' - '{album_ctx['name']}' ({len(group_tracks)} tracks)") + + matched, unmatched = _attempt_album_level_search(batch_id, group_tracks, album_ctx, artist_ctx) + all_matched.extend(matched) + all_unmatched.extend(unmatched) + + logger.info(f"[Wishlist Album Search] Total: {len(all_matched)} pre-matched, {len(all_unmatched)} per-track search") + return all_matched, all_unmatched + + def _attempt_album_level_search(batch_id, missing_tracks, batch_album_context, batch_artist_context): """ Attempt to find a complete album source on Soulseek before falling back to per-track search. @@ -12095,9 +12240,13 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): album_unmatched = missing_tracks if batch_is_album and batch_album_context and batch_artist_context: + # Single album from artist page / discover page album_matched, album_unmatched = _attempt_album_level_search( batch_id, missing_tracks, batch_album_context, batch_artist_context ) + elif playlist_id == 'wishlist': + # Wishlist albums cycle — group tracks by album, search each + album_matched, album_unmatched = _attempt_wishlist_album_searches(batch_id, missing_tracks) # Now create download tasks under the lock with tasks_lock: @@ -13313,6 +13462,7 @@ def _build_batch_status_data(batch_id, batch, live_transfers_lookup): if task['status'] != 'post_processing': task_status['status'] = 'post_processing' task['status'] = 'post_processing' + task['status_change_time'] = time.time() # Reset so stuck detector doesn't fire prematurely print(f"🔄 Task {task_id} API reports 'Succeeded' - starting post-processing verification") # Submit post-processing worker to verify file and complete the task @@ -13907,6 +14057,19 @@ def _check_batch_completion_v2(batch_id): print(f"⏰ [Stuck Detection V2] Task {task_id} stuck in searching for {task_age:.0f}s - forcing failure") task['status'] = 'failed' task['error_message'] = f'Retry timeout after {task_age:.0f} seconds' + # Add to permanently_failed_tracks so it gets re-added to wishlist + original_track_info = task.get('track_info', {}) + track_info = { + 'download_index': task.get('track_index', 0), + 'table_index': task.get('track_index', 0), + 'track_name': original_track_info.get('name', 'Unknown Track'), + 'artist_name': _get_track_artist_name(original_track_info), + 'retry_count': task.get('retry_count', 0), + 'spotify_track': _ensure_spotify_track_format(original_track_info), + 'failure_reason': f'Search timeout after {task_age:.0f} seconds', + 'candidates': task.get('cached_candidates', []) + } + batch.get('permanently_failed_tracks', []).append(track_info) finished_count += 1 else: retrying_count += 1