Improve YouTube download handling and shutdown safety

Adjusts matching weights for YouTube sources to rely more on title and duration, adds a shutdown callback to the YouTube client to prevent new downloads during shutdown, and enhances post-processing to reliably resolve actual YouTube file paths. Improves error handling for file removal, ensures no new batch downloads start during shutdown, and refines download monitoring to trigger post-processing on completed YouTube downloads. Also increases YouTube download retries and improves logging for debugging.
pull/115/head
Broque Thomas 4 months ago
parent af2265a68a
commit ddd2ebdb13

@ -572,13 +572,21 @@ class MusicMatchingEngine:
is_short_title = len(spotify_cleaned_title) <= 5 or len(title_words) == 1
# --- Final Weighted Score ---
# Rebalanced weights: Artist matching is now more important to prevent false positives
final_confidence = (title_score * 0.45) + (artist_score * 0.40) + (duration_score * 0.15)
is_youtube = slskd_track.username == 'youtube'
if is_youtube:
# For YouTube, rely more on Title and Duration since Artist is often missing from video titles
# and the search query already filtered by artist to some extent.
# New weights: Title 70%, Artist 10%, Duration 20%
final_confidence = (title_score * 0.70) + (artist_score * 0.10) + (duration_score * 0.20)
else:
# Standard weights for Soulseek (Artist is critical for correctness)
# Rebalanced weights: Artist matching is now more important to prevent false positives
final_confidence = (title_score * 0.45) + (artist_score * 0.40) + (duration_score * 0.15)
# Apply short title penalty AFTER calculating base confidence
# This allows perfect matches to still pass, but penalizes weak artist matches
# For YouTube, skip penalty since artist matching is less reliable (searches are track-name-only)
is_youtube = slskd_track.username == 'youtube'
if is_short_title and artist_score < 0.5 and not is_youtube:
# Heavy penalty but not complete rejection
# Multiply by 0.4 (60% penalty) - still possible to pass if title+duration are perfect
@ -596,13 +604,13 @@ class MusicMatchingEngine:
# Debug logging to track matching decisions
if final_confidence > 0.3: # Only log potential matches
logger.debug(
f"Match scoring: '{spotify_track.name}' by {spotify_track.artists[0] if spotify_track.artists else 'Unknown'} "
f"Match scoring ({'YT' if is_youtube else 'SLSK'}): '{spotify_track.name}' by {spotify_track.artists[0] if spotify_track.artists else 'Unknown'} "
f"vs '{slskd_track.filename[:60]}...' | "
f"Title: {title_score:.2f} (ratio: {title_ratio:.2f}, boundary: {has_word_boundary}), "
f"Artist: {artist_score:.2f}, Duration: {duration_score:.2f}, "
f"Final: {final_confidence:.2f} {'✅ PASS' if final_confidence > 0.58 else '❌ FAIL'}"
f"Final: {final_confidence:.2f} {'✅ PASS' if final_confidence > 0.63 else '❌ FAIL'}"
)
# Ensure the final score doesn't exceed 1.0
return min(final_confidence, 1.0)

@ -198,6 +198,7 @@ class DownloadStatus:
transferred: int
speed: int
time_remaining: Optional[int] = None
file_path: Optional[str] = None
class SoulseekClient:
def __init__(self):
@ -330,8 +331,9 @@ class SoulseekClient:
error_detail = response_text if response_text.strip() else "No error details provided"
# Reduce noise for expected 404s during search cleanup
if response.status == 404 and 'searches/' in url and method == 'DELETE':
logger.debug(f"Search not found for deletion (expected): {url}")
# Reduce noise for expected 404s (e.g. status checks for YouTube downloads)
if response.status == 404:
logger.debug(f"API request returned 404 (Not Found) for {url}")
else:
logger.error(f"API request failed: HTTP {response.status} ({response.reason}) - {error_detail}")
logger.debug(f"Failed request: {method} {url}")

@ -108,6 +108,13 @@ class YouTubeClient:
logger.info(f"📁 YouTube client using download path: {self.download_path}")
# Callback for shutdown check (avoids circular imports)
self.shutdown_check = None
def set_shutdown_check(self, check_callable):
"""Set a callback function to check for system shutdown"""
self.shutdown_check = check_callable
# Initialize production matching engine for parity with Soulseek
self.matching_engine = MusicMatchingEngine()
logger.info("✅ Initialized production MusicMatchingEngine")
@ -893,21 +900,35 @@ class YouTubeClient:
File path if successful, None otherwise
"""
try:
max_retries = 2
max_retries = 3
for attempt in range(max_retries):
# Check for server shutdown using callback
if self.shutdown_check and self.shutdown_check():
logger.info(f"🛑 Server shutting down, aborting download attempt {attempt + 1}")
return None
try:
# Use default download options
download_opts = self.download_opts.copy()
# Force best audio format to prevent 'Requested format not available' errors
download_opts['format'] = 'bestaudio/best'
download_opts['noplaylist'] = True
# On retry, try different player client
if attempt > 0:
logger.info(f"🔄 Retry {attempt + 1}/{max_retries} with different settings")
if attempt == 1:
logger.info(f"🔄 Retry {attempt + 1}/{max_retries} with different player client")
download_opts['extractor_args'] = {
'youtube': {
'player_client': ['web'], # Try web-only on retry
'skip': ['hls', 'dash'],
}
}
elif attempt >= 2:
logger.info(f"🔄 Retry {attempt + 1}/{max_retries} with 'best' format (video fallback)")
download_opts['format'] = 'best' # Fallback to best available (including video)
download_opts.pop('extractor_args', None) # Reset extractor args
# Perform download
with yt_dlp.YoutubeDL(download_opts) as ydl:
@ -1004,7 +1025,8 @@ class YouTubeClient:
size=download_info['size'],
transferred=download_info['transferred'],
speed=download_info['speed'],
time_remaining=download_info.get('time_remaining')
time_remaining=download_info.get('time_remaining'),
file_path=download_info.get('file_path')
)
async def cancel_download(self, download_id: str, username: str = None, remove: bool = False) -> bool:

@ -120,6 +120,12 @@ try:
tidal_client = TidalClient()
matching_engine = MusicMatchingEngine()
sync_service = PlaylistSyncService(spotify_client, plex_client, soulseek_client, jellyfin_client, navidrome_client)
# Inject shutdown check callback into YouTube client (avoids circular imports)
# The callback uses the global IS_SHUTTING_DOWN flag from this module
if hasattr(soulseek_client, 'youtube'):
soulseek_client.youtube.set_shutdown_check(lambda: IS_SHUTTING_DOWN)
print("✅ Configured YouTube client shutdown callback")
# Initialize web scan manager for automatic post-download scanning
media_clients = {
@ -499,6 +505,22 @@ class WebUIDownloadMonitor:
# Check for timeouts and errors - retries handled directly in _should_retry_task
self._should_retry_task(task_id, task, live_transfers_lookup, current_time)
# ENHANCED: Check for successful completions (especially YouTube)
task_filename = task.get('filename') or task.get('track_info', {}).get('filename')
task_username = task.get('username') or task.get('track_info', {}).get('username')
if task_filename and task_username:
lookup_key = f"{task_username}::{extract_filename(task_filename)}"
live_info = live_transfers_lookup.get(lookup_key)
if live_info:
state = live_info.get('state', '')
# Trigger post-processing if download is completed but still marked as downloading locally
# 'Completed' is used by YouTubeClient, 'Succeeded' by Soulseek
if state in ['Completed', 'Succeeded'] and task['status'] == 'downloading':
print(f"✅ Monitor detected completed download for {task_id} ({state}) - triggering post-processing")
_on_download_completed(task_id, success=True)
# ENHANCED: Add worker count validation to detect ghost workers
self._validate_worker_counts()
@ -1008,10 +1030,23 @@ def cleanup_monitor():
batch_locks.clear()
print("🧹 Cleaned up batch locks")
# Global shutdown flag
IS_SHUTTING_DOWN = False
def signal_handler(signum, frame):
"""Handle SIGINT (Ctrl+C) and SIGTERM"""
global IS_SHUTTING_DOWN
print(f"🛑 Signal {signum} received, cleaning up...")
IS_SHUTTING_DOWN = True
cleanup_monitor()
# Shutdown executor to prevent new tasks
try:
print("🛑 Shutting down missing_download_executor...")
missing_download_executor.shutdown(wait=False, cancel_futures=True)
except Exception as e:
print(f"⚠️ Error shutting down executor: {e}")
sys.exit(0)
# Register cleanup handlers
@ -8212,14 +8247,26 @@ def _post_process_matched_download(context_key, context, file_path):
if has_metadata:
print(f"⚠️ [Protection] Existing file already has metadata enhancement - skipping overwrite: {os.path.basename(final_path)}")
print(f"🗑️ [Protection] Removing redundant download file: {os.path.basename(file_path)}")
os.remove(file_path) # Remove the redundant file
try:
os.remove(file_path) # Remove the redundant file
except FileNotFoundError:
print(f"⚠️ [Protection] Could not remove redundant file (already gone): {file_path}")
except Exception as e:
print(f"⚠️ [Protection] Error removing redundant file: {e}")
return # Don't overwrite the good file
else:
print(f"🔄 [Protection] Existing file lacks metadata - safe to overwrite: {os.path.basename(final_path)}")
os.remove(final_path)
try:
os.remove(final_path)
except FileNotFoundError:
pass # It was just there, but now gone?
except Exception as check_error:
print(f"⚠️ [Protection] Error checking existing file metadata, proceeding with overwrite: {check_error}")
os.remove(final_path)
try:
if os.path.exists(final_path):
os.remove(final_path)
except Exception as e:
print(f"⚠️ [Protection] Failed to remove existing file for overwrite: {e}")
_safe_move_file(file_path, final_path)
@ -10684,6 +10731,11 @@ def get_valid_candidates(results, spotify_track, query):
normalized_spotify_artist = re.sub(r'[^a-zA-Z0-9]', '', spotify_artist_name).lower()
for candidate in quality_filtered_candidates:
# Skip artist check for YouTube results (title matching is sufficient as processed by matching engine)
if is_youtube_source:
verified_candidates.append(candidate)
continue
# This check is critical: it ensures the artist's name is in the file path,
# preventing downloads from the wrong artist.
normalized_slskd_path = re.sub(r'[^a-zA-Z0-9]', '', candidate.filename).lower()
@ -10757,6 +10809,11 @@ def _start_next_batch_of_downloads(batch_id):
batch_lock = _get_batch_lock(batch_id)
with batch_lock:
# Prevent starting new tasks if shutting down
if IS_SHUTTING_DOWN:
print(f"🛑 [Batch Manager] Server shutting down - skipping new tasks for batch {batch_id}")
return
with tasks_lock:
if batch_id not in download_batches:
return
@ -11640,7 +11697,58 @@ def _run_post_processing_worker(task_id, batch_id):
# RESILIENT FILE-FINDING LOOP: Try up to 3 times with delays
found_file = None
file_location = None
# CRITICAL FIX: For YouTube downloads, the filename in task is 'id||title' (metadata),
# but the actual file on disk is 'Title.mp3'. We must ask the client for the real path.
if (task.get('username') == 'youtube' or '||' in str(task_filename)) and not found_file:
logger.info(f"🔧 [Post-Processing] Detected YouTube download task: {task_id}")
try:
# Query the download orchestrator for the status which contains the real file path
# CRITICAL FIX: Use the actual download_id designated by the client, not the internal task_id
actual_download_id = task.get('download_id') or task_id
status = asyncio.run(soulseek_client.get_download_status(actual_download_id))
if status and status.file_path:
real_path = status.file_path
if os.path.exists(real_path):
# Determine if it's in download or transfer directory
real_path_obj = Path(real_path)
download_dir_obj = Path(download_dir)
transfer_dir_obj = Path(transfer_dir)
# Use absolute path comparison
try:
if download_dir_obj.resolve() in real_path_obj.resolve().parents:
file_location = 'download'
elif transfer_dir_obj.resolve() in real_path_obj.resolve().parents:
file_location = 'transfer'
else:
file_location = 'absolute'
except:
# Fallback if resolve fails (e.g. permission or path issues)
file_location = 'absolute'
if file_location:
# We found the file! Use the absolute path if it confuses the joining logic,
# but usually we want just the filename if location is 'download'/'transfer'
# CRITICAL FIX: Always use the absolute real_path.
# Stripping to basename causes FileNotFoundError because post-processing
# runs with CWD as project root, not download dir.
found_file = real_path
logger.info(f"✅ [Post-Processing] Resolved actual YouTube filename: {found_file} (Location: {file_location})")
else:
logger.warning(f"⚠️ [Post-Processing] YouTube status reported path but file missing: {real_path}")
else:
logger.warning(f"⚠️ [Post-Processing] YouTube status returned no file_path for task {task_id}")
except Exception as e:
logger.error(f"⚠️ [Post-Processing] Failed to retrieve YouTube task status: {e}")
for retry_count in range(3):
# If we already resolved the file (e.g. via YouTube status), skip searching
if found_file:
print(f"🎯 [Post-Processing] Skipping search loop, file already resolved: {found_file}")
break
print(f"🔍 [Post-Processing] Attempt {retry_count + 1}/3 to find file")
print(f"🔍 [Post-Processing] Original filename: {task_basename}")
if expected_final_filename:

Loading…
Cancel
Save