diff --git a/core/matching_engine.py b/core/matching_engine.py index 81b7ea1f..38c0c1d0 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -572,13 +572,21 @@ class MusicMatchingEngine: is_short_title = len(spotify_cleaned_title) <= 5 or len(title_words) == 1 # --- Final Weighted Score --- - # Rebalanced weights: Artist matching is now more important to prevent false positives - final_confidence = (title_score * 0.45) + (artist_score * 0.40) + (duration_score * 0.15) + is_youtube = slskd_track.username == 'youtube' + + if is_youtube: + # For YouTube, rely more on Title and Duration since Artist is often missing from video titles + # and the search query already filtered by artist to some extent. + # New weights: Title 70%, Artist 10%, Duration 20% + final_confidence = (title_score * 0.70) + (artist_score * 0.10) + (duration_score * 0.20) + else: + # Standard weights for Soulseek (Artist is critical for correctness) + # Rebalanced weights: Artist matching is now more important to prevent false positives + final_confidence = (title_score * 0.45) + (artist_score * 0.40) + (duration_score * 0.15) # Apply short title penalty AFTER calculating base confidence # This allows perfect matches to still pass, but penalizes weak artist matches # For YouTube, skip penalty since artist matching is less reliable (searches are track-name-only) - is_youtube = slskd_track.username == 'youtube' if is_short_title and artist_score < 0.5 and not is_youtube: # Heavy penalty but not complete rejection # Multiply by 0.4 (60% penalty) - still possible to pass if title+duration are perfect @@ -596,13 +604,13 @@ class MusicMatchingEngine: # Debug logging to track matching decisions if final_confidence > 0.3: # Only log potential matches logger.debug( - f"Match scoring: '{spotify_track.name}' by {spotify_track.artists[0] if spotify_track.artists else 'Unknown'} " + f"Match scoring ({'YT' if is_youtube else 'SLSK'}): '{spotify_track.name}' by {spotify_track.artists[0] if spotify_track.artists else 'Unknown'} " f"vs '{slskd_track.filename[:60]}...' | " f"Title: {title_score:.2f} (ratio: {title_ratio:.2f}, boundary: {has_word_boundary}), " f"Artist: {artist_score:.2f}, Duration: {duration_score:.2f}, " - f"Final: {final_confidence:.2f} {'โœ… PASS' if final_confidence > 0.58 else 'โŒ FAIL'}" + f"Final: {final_confidence:.2f} {'โœ… PASS' if final_confidence > 0.63 else 'โŒ FAIL'}" ) - + # Ensure the final score doesn't exceed 1.0 return min(final_confidence, 1.0) diff --git a/core/soulseek_client.py b/core/soulseek_client.py index 30a0766b..c410afef 100644 --- a/core/soulseek_client.py +++ b/core/soulseek_client.py @@ -198,6 +198,7 @@ class DownloadStatus: transferred: int speed: int time_remaining: Optional[int] = None + file_path: Optional[str] = None class SoulseekClient: def __init__(self): @@ -330,8 +331,9 @@ class SoulseekClient: error_detail = response_text if response_text.strip() else "No error details provided" # Reduce noise for expected 404s during search cleanup - if response.status == 404 and 'searches/' in url and method == 'DELETE': - logger.debug(f"Search not found for deletion (expected): {url}") + # Reduce noise for expected 404s (e.g. status checks for YouTube downloads) + if response.status == 404: + logger.debug(f"API request returned 404 (Not Found) for {url}") else: logger.error(f"API request failed: HTTP {response.status} ({response.reason}) - {error_detail}") logger.debug(f"Failed request: {method} {url}") diff --git a/core/youtube_client.py b/core/youtube_client.py index 8cba55d2..d05a04ff 100644 --- a/core/youtube_client.py +++ b/core/youtube_client.py @@ -108,6 +108,13 @@ class YouTubeClient: logger.info(f"๐Ÿ“ YouTube client using download path: {self.download_path}") + # Callback for shutdown check (avoids circular imports) + self.shutdown_check = None + + def set_shutdown_check(self, check_callable): + """Set a callback function to check for system shutdown""" + self.shutdown_check = check_callable + # Initialize production matching engine for parity with Soulseek self.matching_engine = MusicMatchingEngine() logger.info("โœ… Initialized production MusicMatchingEngine") @@ -893,21 +900,35 @@ class YouTubeClient: File path if successful, None otherwise """ try: - max_retries = 2 + max_retries = 3 for attempt in range(max_retries): + # Check for server shutdown using callback + if self.shutdown_check and self.shutdown_check(): + logger.info(f"๐Ÿ›‘ Server shutting down, aborting download attempt {attempt + 1}") + return None + try: # Use default download options download_opts = self.download_opts.copy() + + # Force best audio format to prevent 'Requested format not available' errors + download_opts['format'] = 'bestaudio/best' + download_opts['noplaylist'] = True # On retry, try different player client - if attempt > 0: - logger.info(f"๐Ÿ”„ Retry {attempt + 1}/{max_retries} with different settings") + if attempt == 1: + logger.info(f"๐Ÿ”„ Retry {attempt + 1}/{max_retries} with different player client") download_opts['extractor_args'] = { 'youtube': { 'player_client': ['web'], # Try web-only on retry 'skip': ['hls', 'dash'], } } + elif attempt >= 2: + logger.info(f"๐Ÿ”„ Retry {attempt + 1}/{max_retries} with 'best' format (video fallback)") + download_opts['format'] = 'best' # Fallback to best available (including video) + download_opts.pop('extractor_args', None) # Reset extractor args + # Perform download with yt_dlp.YoutubeDL(download_opts) as ydl: @@ -1004,7 +1025,8 @@ class YouTubeClient: size=download_info['size'], transferred=download_info['transferred'], speed=download_info['speed'], - time_remaining=download_info.get('time_remaining') + time_remaining=download_info.get('time_remaining'), + file_path=download_info.get('file_path') ) async def cancel_download(self, download_id: str, username: str = None, remove: bool = False) -> bool: diff --git a/web_server.py b/web_server.py index 03911fc3..30095d9c 100644 --- a/web_server.py +++ b/web_server.py @@ -120,6 +120,12 @@ try: tidal_client = TidalClient() matching_engine = MusicMatchingEngine() sync_service = PlaylistSyncService(spotify_client, plex_client, soulseek_client, jellyfin_client, navidrome_client) + + # Inject shutdown check callback into YouTube client (avoids circular imports) + # The callback uses the global IS_SHUTTING_DOWN flag from this module + if hasattr(soulseek_client, 'youtube'): + soulseek_client.youtube.set_shutdown_check(lambda: IS_SHUTTING_DOWN) + print("โœ… Configured YouTube client shutdown callback") # Initialize web scan manager for automatic post-download scanning media_clients = { @@ -499,6 +505,22 @@ class WebUIDownloadMonitor: # Check for timeouts and errors - retries handled directly in _should_retry_task self._should_retry_task(task_id, task, live_transfers_lookup, current_time) + + # ENHANCED: Check for successful completions (especially YouTube) + task_filename = task.get('filename') or task.get('track_info', {}).get('filename') + task_username = task.get('username') or task.get('track_info', {}).get('username') + + if task_filename and task_username: + lookup_key = f"{task_username}::{extract_filename(task_filename)}" + live_info = live_transfers_lookup.get(lookup_key) + + if live_info: + state = live_info.get('state', '') + # Trigger post-processing if download is completed but still marked as downloading locally + # 'Completed' is used by YouTubeClient, 'Succeeded' by Soulseek + if state in ['Completed', 'Succeeded'] and task['status'] == 'downloading': + print(f"โœ… Monitor detected completed download for {task_id} ({state}) - triggering post-processing") + _on_download_completed(task_id, success=True) # ENHANCED: Add worker count validation to detect ghost workers self._validate_worker_counts() @@ -1008,10 +1030,23 @@ def cleanup_monitor(): batch_locks.clear() print("๐Ÿงน Cleaned up batch locks") +# Global shutdown flag +IS_SHUTTING_DOWN = False + def signal_handler(signum, frame): """Handle SIGINT (Ctrl+C) and SIGTERM""" + global IS_SHUTTING_DOWN print(f"๐Ÿ›‘ Signal {signum} received, cleaning up...") + IS_SHUTTING_DOWN = True cleanup_monitor() + + # Shutdown executor to prevent new tasks + try: + print("๐Ÿ›‘ Shutting down missing_download_executor...") + missing_download_executor.shutdown(wait=False, cancel_futures=True) + except Exception as e: + print(f"โš ๏ธ Error shutting down executor: {e}") + sys.exit(0) # Register cleanup handlers @@ -8212,14 +8247,26 @@ def _post_process_matched_download(context_key, context, file_path): if has_metadata: print(f"โš ๏ธ [Protection] Existing file already has metadata enhancement - skipping overwrite: {os.path.basename(final_path)}") print(f"๐Ÿ—‘๏ธ [Protection] Removing redundant download file: {os.path.basename(file_path)}") - os.remove(file_path) # Remove the redundant file + try: + os.remove(file_path) # Remove the redundant file + except FileNotFoundError: + print(f"โš ๏ธ [Protection] Could not remove redundant file (already gone): {file_path}") + except Exception as e: + print(f"โš ๏ธ [Protection] Error removing redundant file: {e}") return # Don't overwrite the good file else: print(f"๐Ÿ”„ [Protection] Existing file lacks metadata - safe to overwrite: {os.path.basename(final_path)}") - os.remove(final_path) + try: + os.remove(final_path) + except FileNotFoundError: + pass # It was just there, but now gone? except Exception as check_error: print(f"โš ๏ธ [Protection] Error checking existing file metadata, proceeding with overwrite: {check_error}") - os.remove(final_path) + try: + if os.path.exists(final_path): + os.remove(final_path) + except Exception as e: + print(f"โš ๏ธ [Protection] Failed to remove existing file for overwrite: {e}") _safe_move_file(file_path, final_path) @@ -10684,6 +10731,11 @@ def get_valid_candidates(results, spotify_track, query): normalized_spotify_artist = re.sub(r'[^a-zA-Z0-9]', '', spotify_artist_name).lower() for candidate in quality_filtered_candidates: + # Skip artist check for YouTube results (title matching is sufficient as processed by matching engine) + if is_youtube_source: + verified_candidates.append(candidate) + continue + # This check is critical: it ensures the artist's name is in the file path, # preventing downloads from the wrong artist. normalized_slskd_path = re.sub(r'[^a-zA-Z0-9]', '', candidate.filename).lower() @@ -10757,6 +10809,11 @@ def _start_next_batch_of_downloads(batch_id): batch_lock = _get_batch_lock(batch_id) with batch_lock: + # Prevent starting new tasks if shutting down + if IS_SHUTTING_DOWN: + print(f"๐Ÿ›‘ [Batch Manager] Server shutting down - skipping new tasks for batch {batch_id}") + return + with tasks_lock: if batch_id not in download_batches: return @@ -11640,7 +11697,58 @@ def _run_post_processing_worker(task_id, batch_id): # RESILIENT FILE-FINDING LOOP: Try up to 3 times with delays found_file = None file_location = None + + # CRITICAL FIX: For YouTube downloads, the filename in task is 'id||title' (metadata), + # but the actual file on disk is 'Title.mp3'. We must ask the client for the real path. + if (task.get('username') == 'youtube' or '||' in str(task_filename)) and not found_file: + logger.info(f"๐Ÿ”ง [Post-Processing] Detected YouTube download task: {task_id}") + try: + # Query the download orchestrator for the status which contains the real file path + # CRITICAL FIX: Use the actual download_id designated by the client, not the internal task_id + actual_download_id = task.get('download_id') or task_id + status = asyncio.run(soulseek_client.get_download_status(actual_download_id)) + if status and status.file_path: + real_path = status.file_path + if os.path.exists(real_path): + # Determine if it's in download or transfer directory + real_path_obj = Path(real_path) + download_dir_obj = Path(download_dir) + transfer_dir_obj = Path(transfer_dir) + + # Use absolute path comparison + try: + if download_dir_obj.resolve() in real_path_obj.resolve().parents: + file_location = 'download' + elif transfer_dir_obj.resolve() in real_path_obj.resolve().parents: + file_location = 'transfer' + else: + file_location = 'absolute' + except: + # Fallback if resolve fails (e.g. permission or path issues) + file_location = 'absolute' + + if file_location: + # We found the file! Use the absolute path if it confuses the joining logic, + # but usually we want just the filename if location is 'download'/'transfer' + # CRITICAL FIX: Always use the absolute real_path. + # Stripping to basename causes FileNotFoundError because post-processing + # runs with CWD as project root, not download dir. + found_file = real_path + + logger.info(f"โœ… [Post-Processing] Resolved actual YouTube filename: {found_file} (Location: {file_location})") + else: + logger.warning(f"โš ๏ธ [Post-Processing] YouTube status reported path but file missing: {real_path}") + else: + logger.warning(f"โš ๏ธ [Post-Processing] YouTube status returned no file_path for task {task_id}") + except Exception as e: + logger.error(f"โš ๏ธ [Post-Processing] Failed to retrieve YouTube task status: {e}") + for retry_count in range(3): + # If we already resolved the file (e.g. via YouTube status), skip searching + if found_file: + print(f"๐ŸŽฏ [Post-Processing] Skipping search loop, file already resolved: {found_file}") + break + print(f"๐Ÿ” [Post-Processing] Attempt {retry_count + 1}/3 to find file") print(f"๐Ÿ” [Post-Processing] Original filename: {task_basename}") if expected_final_filename: