album level search for wishlist albums/eps

4 months ago · 28ca3f5ce5
parent 258fd7a8ae
commit 28ca3f5ce5
2 changed files with 181 additions and 11 deletions
--- a/core/matching_engine.py
+++ b/core/matching_engine.py
@ -928,7 +928,8 @@ class MusicMatchingEngine:
        album_result: AlbumResult,
        spotify_tracks: List[SpotifyTrack],
        spotify_album_name: str,
-        spotify_artist_name: str
+        spotify_artist_name: str,
+        expected_track_count: int = 0
    ) -> Tuple[float, Dict[str, TrackResult]]:
        """
        Match an AlbumResult from Soulseek against a list of Spotify tracks.
@ -1034,23 +1035,28 @@ class MusicMatchingEngine:
        # Calculate album confidence
        match_ratio = len(track_mapping) / len(spotify_tracks) if spotify_tracks else 0.0
        avg_track_score = sum(matched_scores) / len(matched_scores) if matched_scores else 0.0
-        track_count_ratio = (
-            min(album_result.track_count, len(spotify_tracks)) /
-            max(album_result.track_count, len(spotify_tracks))
-        ) if spotify_tracks else 0.0
+
+        # Source completeness: prefer sources whose track count matches the full album,
+        # not just the missing tracks. A complete 13/13 rip is more reliable than an 8-file folder.
+        reference_count = expected_track_count if expected_track_count > 0 else len(spotify_tracks)
+        source_completeness = (
+            min(album_result.track_count, reference_count) /
+            max(album_result.track_count, reference_count)
+        ) if reference_count > 0 else 0.0

        album_confidence = (
            (match_ratio * 0.40) +
            (avg_track_score * 0.25) +
-            (album_title_score * 0.20) +
+            (album_title_score * 0.15) +
            (artist_score * 0.10) +
-            (track_count_ratio * 0.05)
+            (source_completeness * 0.10)
        )

        logger.info(
            f"Album match: '{album_result.album_title}' by {album_result.username} -> "
            f"confidence={album_confidence:.2f}, matched={len(track_mapping)}/{len(spotify_tracks)}, "
-            f"title={album_title_score:.2f}, artist={artist_score:.2f}, tracks_avg={avg_track_score:.2f}"
+            f"title={album_title_score:.2f}, artist={artist_score:.2f}, tracks_avg={avg_track_score:.2f}, "
+            f"completeness={source_completeness:.2f} ({album_result.track_count}/{reference_count})"
        )

        return album_confidence, track_mapping
@ -1092,7 +1098,8 @@ class MusicMatchingEngine:
                    logger.warning(f"Quality filter error for album '{album.album_title}': {e}")

            confidence, mapping = self.match_album_result_to_spotify_tracks(
-                album, spotify_tracks, spotify_album_name, spotify_artist_name
+                album, spotify_tracks, spotify_album_name, spotify_artist_name,
+                expected_track_count=expected_track_count
            )

            if confidence > best_confidence:
--- a/web_server.py
+++ b/web_server.py
@ -11319,7 +11319,28 @@ def _ensure_spotify_track_format(track_info):
    if isinstance(track_info.get('artists'), list) and len(track_info.get('artists', [])) > 0:
        first_artist = track_info['artists'][0]
        if isinstance(first_artist, dict) and 'name' in first_artist:
-            # Already has proper Spotify format
+            # Already has proper Spotify format — but ensure album has images
+            album = track_info.get('album')
+            if not isinstance(album, dict) or not album.get('images'):
+                # Album images missing at top level, check spotify_data
+                spotify_data = track_info.get('spotify_data', {})
+                if isinstance(spotify_data, str):
+                    try:
+                        import json
+                        spotify_data = json.loads(spotify_data)
+                    except Exception:
+                        spotify_data = {}
+                if isinstance(spotify_data, dict) and isinstance(spotify_data.get('album'), dict):
+                    s_album = spotify_data['album']
+                    if s_album.get('images'):
+                        if not isinstance(album, dict):
+                            track_info['album'] = {}
+                        track_info['album']['images'] = s_album['images']
+                        if 'name' not in track_info.get('album', {}):
+                            track_info['album']['name'] = s_album.get('name', 'Unknown Album')
+                        for field in ('id', 'album_type', 'total_tracks', 'release_date'):
+                            if field in s_album and field not in track_info.get('album', {}):
+                                track_info['album'][field] = s_album[field]
            return track_info
    
    # Convert to proper Spotify format
@ -11349,13 +11370,36 @@ def _ensure_spotify_track_format(track_info):
    
    # Build album object with images if available
    album_data = track_info.get('album', {})
+
+    # Wishlist tracks store album data inside spotify_data, not at top level
+    # album_data may be a string (sanitized), empty, or a dict without images
+    if not album_data or not isinstance(album_data, dict) or not album_data.get('images'):
+        spotify_data = track_info.get('spotify_data', {})
+        if isinstance(spotify_data, str):
+            try:
+                import json
+                spotify_data = json.loads(spotify_data)
+            except Exception:
+                spotify_data = {}
+        if isinstance(spotify_data, dict) and isinstance(spotify_data.get('album'), dict):
+            album_data = spotify_data['album']
+
    if isinstance(album_data, dict):
        album = {
            'name': album_data.get('name', 'Unknown Album')
        }
-        # Preserve album images if present (important for ListenBrainz tracks)
+        # Preserve album images if present
        if 'images' in album_data:
            album['images'] = album_data['images']
+        # Preserve album ID, type, and total_tracks for proper wishlist grouping
+        if 'id' in album_data:
+            album['id'] = album_data['id']
+        if 'album_type' in album_data:
+            album['album_type'] = album_data['album_type']
+        if 'total_tracks' in album_data:
+            album['total_tracks'] = album_data['total_tracks']
+        if 'release_date' in album_data:
+            album['release_date'] = album_data['release_date']
    else:
        album = {
            'name': str(album_data) if album_data else 'Unknown Album'
@ -11721,6 +11765,19 @@ def _on_download_completed(batch_id, task_id, success=True):
                        print(f"⏰ [Stuck Detection] Task {task_id} stuck in searching for {task_age:.0f}s - forcing failure")
                        task['status'] = 'failed'
                        task['error_message'] = f'Retry timeout after {task_age:.0f} seconds'
+                        # Add to permanently_failed_tracks so it gets re-added to wishlist
+                        original_track_info = task.get('track_info', {})
+                        track_info = {
+                            'download_index': task.get('track_index', 0),
+                            'table_index': task.get('track_index', 0),
+                            'track_name': original_track_info.get('name', 'Unknown Track'),
+                            'artist_name': _get_track_artist_name(original_track_info),
+                            'retry_count': task.get('retry_count', 0),
+                            'spotify_track': _ensure_spotify_track_format(original_track_info),
+                            'failure_reason': f'Search timeout after {task_age:.0f} seconds',
+                            'candidates': task.get('cached_candidates', [])
+                        }
+                        batch.get('permanently_failed_tracks', []).append(track_info)
                        finished_count += 1
                    else:
                        retrying_count += 1
@ -11800,6 +11857,94 @@ def _on_download_completed(batch_id, task_id, success=True):
    print(f"🔄 [Batch Manager] Starting next batch for {batch_id}")
    _start_next_batch_of_downloads(batch_id)

+def _attempt_wishlist_album_searches(batch_id, missing_tracks):
+    """
+    Group wishlist tracks by album and run album-level search for each group.
+    Reuses _attempt_album_level_search for each album group with 2+ tracks.
+
+    Returns:
+        Tuple of:
+        - all_album_matched: List of (track_analysis_result, pre_assigned_candidate: TrackResult) tuples
+        - all_unmatched: List of track_analysis_results that need per-track search
+    """
+    import json as _json
+
+    # Group tracks by album
+    album_groups = {}  # key -> list of track analysis results
+    album_meta = {}    # key -> {album_ctx, artist_ctx}
+    no_album_tracks = []
+
+    for res in missing_tracks:
+        track_data = res.get('track', {})
+        spotify_data = track_data.get('spotify_data', {})
+        if isinstance(spotify_data, str):
+            try:
+                spotify_data = _json.loads(spotify_data)
+            except Exception:
+                spotify_data = {}
+
+        s_album = spotify_data.get('album') if spotify_data else None
+        s_artists = spotify_data.get('artists', []) if spotify_data else []
+
+        if not s_album or not s_album.get('name'):
+            no_album_tracks.append(res)
+            continue
+
+        # Grouping key: album ID preferred, fallback to artist+album name
+        album_id = s_album.get('id', '')
+        album_name = s_album.get('name', '')
+        artist_name = ''
+        if s_artists and len(s_artists) > 0:
+            a = s_artists[0]
+            artist_name = a.get('name', '') if isinstance(a, dict) else str(a)
+
+        group_key = album_id if album_id else f"{artist_name}::{album_name}"
+
+        if group_key not in album_groups:
+            album_groups[group_key] = []
+            album_meta[group_key] = {
+                'album_ctx': {
+                    'name': album_name,
+                    'id': album_id,
+                    'album_type': s_album.get('album_type', 'album'),
+                    'total_tracks': s_album.get('total_tracks', 0),
+                    'release_date': s_album.get('release_date', ''),
+                    'images': s_album.get('images', [])
+                },
+                'artist_ctx': {
+                    'name': artist_name,
+                    'id': s_artists[0].get('id', '') if s_artists and isinstance(s_artists[0], dict) else ''
+                }
+            }
+
+        album_groups[group_key].append(res)
+
+    # Process each album group
+    all_matched = []
+    all_unmatched = list(no_album_tracks)  # Tracks with no album data go straight to per-track
+
+    for group_key, group_tracks in album_groups.items():
+        meta = album_meta[group_key]
+        album_ctx = meta['album_ctx']
+        artist_ctx = meta['artist_ctx']
+        album_type = album_ctx.get('album_type', 'album').lower()
+
+        # Skip singles and groups with < 2 tracks
+        if album_type == 'single' or len(group_tracks) < 2:
+            all_unmatched.extend(group_tracks)
+            continue
+
+        logger.info(f"[Wishlist Album Search] Searching for '{artist_ctx['name']}' - '{album_ctx['name']}' ({len(group_tracks)} tracks)")
+        print(f"🎵 [Wishlist] Album search: '{artist_ctx['name']}' - '{album_ctx['name']}' ({len(group_tracks)} tracks)")
+
+        matched, unmatched = _attempt_album_level_search(batch_id, group_tracks, album_ctx, artist_ctx)
+        all_matched.extend(matched)
+        all_unmatched.extend(unmatched)
+
+    logger.info(f"[Wishlist Album Search] Total: {len(all_matched)} pre-matched, {len(all_unmatched)} per-track search")
+    return all_matched, all_unmatched
+
+
 def _attempt_album_level_search(batch_id, missing_tracks, batch_album_context, batch_artist_context):
    """
    Attempt to find a complete album source on Soulseek before falling back to per-track search.
@ -12095,9 +12240,13 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
        album_unmatched = missing_tracks

        if batch_is_album and batch_album_context and batch_artist_context:
+            # Single album from artist page / discover page
            album_matched, album_unmatched = _attempt_album_level_search(
                batch_id, missing_tracks, batch_album_context, batch_artist_context
            )
+        elif playlist_id == 'wishlist':
+            # Wishlist albums cycle — group tracks by album, search each
+            album_matched, album_unmatched = _attempt_wishlist_album_searches(batch_id, missing_tracks)

        # Now create download tasks under the lock
        with tasks_lock:
@ -13313,6 +13462,7 @@ def _build_batch_status_data(batch_id, batch, live_transfers_lookup):
                            if task['status'] != 'post_processing':
                                task_status['status'] = 'post_processing'
                                task['status'] = 'post_processing'
+                                task['status_change_time'] = time.time()  # Reset so stuck detector doesn't fire prematurely
                                print(f"🔄 Task {task_id} API reports 'Succeeded' - starting post-processing verification")
                                
                                # Submit post-processing worker to verify file and complete the task
@ -13907,6 +14057,19 @@ def _check_batch_completion_v2(batch_id):
                            print(f"⏰ [Stuck Detection V2] Task {task_id} stuck in searching for {task_age:.0f}s - forcing failure")
                            task['status'] = 'failed'
                            task['error_message'] = f'Retry timeout after {task_age:.0f} seconds'
+                            # Add to permanently_failed_tracks so it gets re-added to wishlist
+                            original_track_info = task.get('track_info', {})
+                            track_info = {
+                                'download_index': task.get('track_index', 0),
+                                'table_index': task.get('track_index', 0),
+                                'track_name': original_track_info.get('name', 'Unknown Track'),
+                                'artist_name': _get_track_artist_name(original_track_info),
+                                'retry_count': task.get('retry_count', 0),
+                                'spotify_track': _ensure_spotify_track_format(original_track_info),
+                                'failure_reason': f'Search timeout after {task_age:.0f} seconds',
+                                'candidates': task.get('cached_candidates', [])
+                            }
+                            batch.get('permanently_failed_tracks', []).append(track_info)
                            finished_count += 1
                        else:
                            retrying_count += 1