diff --git a/core/matching_engine.py b/core/matching_engine.py index 84833006..c5ce06e0 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -922,197 +922,3 @@ class MusicMatchingEngine: return best_match, best_confidence else: return None, best_confidence - - def match_album_result_to_spotify_tracks( - self, - album_result: AlbumResult, - spotify_tracks: List[SpotifyTrack], - spotify_album_name: str, - spotify_artist_name: str - ) -> Tuple[float, Dict[str, TrackResult]]: - """ - Match an AlbumResult from Soulseek against a list of Spotify tracks. - - Returns: - Tuple of: - - album_confidence: float (0.0-1.0) indicating overall album match quality - - track_mapping: Dict mapping spotify_track_id -> matched TrackResult - """ - if not album_result or not spotify_tracks: - return 0.0, {} - - # Gate check: album title similarity - cleaned_slskd_album = self.clean_album_name(album_result.album_title) - cleaned_spotify_album = self.clean_album_name(spotify_album_name) - album_title_score = self.similarity_score(cleaned_slskd_album, cleaned_spotify_album) - - if album_title_score < 0.65: - logger.debug(f"Album title mismatch: '{album_result.album_title}' vs '{spotify_album_name}' (score: {album_title_score:.2f})") - return 0.0, {} - - # Gate check: artist similarity - # Method 1: Compare parsed artist name if available - cleaned_spotify_artist = self.clean_artist(spotify_artist_name) - artist_score = 0.0 - - if album_result.artist: - cleaned_slskd_artist = self.clean_artist(album_result.artist) - artist_score = self.similarity_score(cleaned_slskd_artist, cleaned_spotify_artist) - - # Method 2: If artist name wasn't parsed or scored low, check if artist - # appears in the full album path (e.g., "Music/Pink Floyd/Album Name/") - # This mirrors the artist verification in get_valid_candidates() - if artist_score < 0.60 and album_result.album_path: - normalized_spotify_artist = re.sub(r'[^a-zA-Z0-9]', '', spotify_artist_name).lower() - normalized_album_path = re.sub(r'[^a-zA-Z0-9]', '', album_result.album_path).lower() - if normalized_spotify_artist and normalized_spotify_artist in normalized_album_path: - artist_score = 0.85 # High confidence — artist name found in path - logger.debug(f"Artist found in album path: '{spotify_artist_name}' in '{album_result.album_path}'") - - if artist_score < 0.60: - logger.debug(f"Artist mismatch: '{album_result.artist}' vs '{spotify_artist_name}' (score: {artist_score:.2f}, path: '{album_result.album_path}')") - return 0.0, {} - - # Per-track matching: build score matrix - slskd_tracks = album_result.tracks - score_triples = [] # (spotify_idx, slskd_idx, score) - - for sp_idx, sp_track in enumerate(spotify_tracks): - sp_title_cleaned = self.clean_title(sp_track.name) - sp_track_num = sp_idx + 1 # 1-based track number from Spotify order - - for sl_idx, sl_track in enumerate(slskd_tracks): - # Title similarity (weight 0.50) - sl_title = sl_track.title if sl_track.title else '' - if not sl_title and sl_track.filename: - # Parse title from filename as fallback - fname = sl_track.filename.replace('\\', '/').split('/')[-1] - fname = re.sub(r'\.\w{3,4}$', '', fname) # Remove extension - fname = re.sub(r'^\d+[\s.\-_]+', '', fname) # Remove leading track number - sl_title = fname - sl_title_cleaned = self.clean_title(sl_title) - title_score = self.similarity_score(sp_title_cleaned, sl_title_cleaned) - - # Duration similarity (weight 0.30) - sl_duration = sl_track.duration or 0 - duration_score = self.duration_similarity(sp_track.duration_ms, sl_duration) - - # Track number match (weight 0.20) - sl_track_num = sl_track.track_number or 0 - if sl_track_num > 0 and sp_track_num > 0: - if sl_track_num == sp_track_num: - track_num_score = 1.0 - elif abs(sl_track_num - sp_track_num) == 1: - track_num_score = 0.5 - else: - track_num_score = 0.0 - else: - track_num_score = 0.3 # Neutral when track number unavailable - - combined = (title_score * 0.50) + (duration_score * 0.30) + (track_num_score * 0.20) - score_triples.append((sp_idx, sl_idx, combined)) - - # Greedy assignment: sort descending by score, assign without double-use - score_triples.sort(key=lambda x: x[2], reverse=True) - assigned_spotify = set() - assigned_slskd = set() - track_mapping = {} - matched_scores = [] - - for sp_idx, sl_idx, score in score_triples: - if sp_idx in assigned_spotify or sl_idx in assigned_slskd: - continue - if score < 0.55: - continue # Below minimum per-track threshold - - sp_track = spotify_tracks[sp_idx] - track_mapping[sp_track.id] = slskd_tracks[sl_idx] - assigned_spotify.add(sp_idx) - assigned_slskd.add(sl_idx) - matched_scores.append(score) - - # Calculate album confidence - match_ratio = len(track_mapping) / len(spotify_tracks) if spotify_tracks else 0.0 - avg_track_score = sum(matched_scores) / len(matched_scores) if matched_scores else 0.0 - track_count_ratio = ( - min(album_result.track_count, len(spotify_tracks)) / - max(album_result.track_count, len(spotify_tracks)) - ) if spotify_tracks else 0.0 - - album_confidence = ( - (match_ratio * 0.40) + - (avg_track_score * 0.25) + - (album_title_score * 0.20) + - (artist_score * 0.10) + - (track_count_ratio * 0.05) - ) - - logger.info( - f"Album match: '{album_result.album_title}' by {album_result.username} -> " - f"confidence={album_confidence:.2f}, matched={len(track_mapping)}/{len(spotify_tracks)}, " - f"title={album_title_score:.2f}, artist={artist_score:.2f}, tracks_avg={avg_track_score:.2f}" - ) - - return album_confidence, track_mapping - - def find_best_album_source( - self, - album_results: List[AlbumResult], - spotify_tracks: List[SpotifyTrack], - spotify_album_name: str, - spotify_artist_name: str, - expected_track_count: int, - quality_filter_fn=None - ) -> Tuple[Optional[AlbumResult], float, Dict[str, TrackResult]]: - """ - Find the best AlbumResult source for a complete album download. - - Returns: - Tuple of (best_album, best_confidence, track_mapping) or (None, 0.0, {}) - """ - if not album_results or not spotify_tracks: - return None, 0.0, {} - - best_album = None - best_confidence = 0.0 - best_mapping = {} - - for album in album_results: - # Skip tiny results - if album.track_count < 2: - continue - - # Quality filter if provided - if quality_filter_fn: - try: - if not quality_filter_fn(album): - logger.debug(f"Album '{album.album_title}' from {album.username} rejected by quality filter (dominant: {album.dominant_quality})") - continue - except Exception as e: - logger.warning(f"Quality filter error for album '{album.album_title}': {e}") - - confidence, mapping = self.match_album_result_to_spotify_tracks( - album, spotify_tracks, spotify_album_name, spotify_artist_name - ) - - if confidence > best_confidence: - best_confidence = confidence - best_album = album - best_mapping = mapping - - # Minimum thresholds - if best_confidence < 0.60: - logger.info(f"No album source met confidence threshold (best: {best_confidence:.2f})") - return None, 0.0, {} - - matched_ratio = len(best_mapping) / len(spotify_tracks) if spotify_tracks else 0.0 - if matched_ratio < 0.50: - logger.info(f"Best album source matched too few tracks ({len(best_mapping)}/{len(spotify_tracks)})") - return None, 0.0, {} - - logger.info( - f"Best album source: '{best_album.album_title}' from {best_album.username} " - f"(confidence={best_confidence:.2f}, matched={len(best_mapping)}/{len(spotify_tracks)}, " - f"quality={best_album.dominant_quality})" - ) - return best_album, best_confidence, best_mapping diff --git a/utils/logging_config.py b/utils/logging_config.py index 3518ee4a..49a6a82c 100644 --- a/utils/logging_config.py +++ b/utils/logging_config.py @@ -1,5 +1,4 @@ import logging -import logging.handlers import sys import re from pathlib import Path @@ -74,11 +73,8 @@ def setup_logging(level: str = "INFO", log_file: Optional[str] = None) -> loggin if log_file: log_path = Path(log_file) log_path.parent.mkdir(parents=True, exist_ok=True) - - # RotatingFileHandler: 10 MB max per file, keep 3 backups (40 MB total max) - file_handler = logging.handlers.RotatingFileHandler( - log_path, maxBytes=10*1024*1024, backupCount=3, encoding='utf-8' - ) + + file_handler = logging.FileHandler(log_path, encoding='utf-8') file_handler.setLevel(log_level) file_formatter = SafeFormatter( diff --git a/web_server.py b/web_server.py index 20b1f5a0..73181d3d 100644 --- a/web_server.py +++ b/web_server.py @@ -11800,175 +11800,6 @@ def _on_download_completed(batch_id, task_id, success=True): print(f"🔄 [Batch Manager] Starting next batch for {batch_id}") _start_next_batch_of_downloads(batch_id) -def _attempt_album_level_search(batch_id, missing_tracks, batch_album_context, batch_artist_context): - """ - Attempt to find a complete album source on Soulseek before falling back to per-track search. - Searches for "Artist Album" and uses AlbumResult objects to find a single user with the full album. - - Returns: - Tuple of: - - album_matched: List of (track_analysis_result, pre_assigned_candidate: TrackResult) tuples - - unmatched: List of track_analysis_results that need per-track search - """ - try: - # Guard checks - download_mode = config_manager.get('download_source.mode', 'soulseek') - if download_mode == 'youtube': - return [], missing_tracks - - album_name = batch_album_context.get('name', '') - album_type = batch_album_context.get('album_type', 'album') - artist_name = batch_artist_context.get('name', '') - - if not album_name or not artist_name: - return [], missing_tracks - - if album_type not in ('album', 'ep'): - return [], missing_tracks - - if len(missing_tracks) < 2: - return [], missing_tracks - - logger.info(f"[Album Search] '{artist_name}' - '{album_name}' ({album_type}), {len(missing_tracks)} missing tracks") - - # Build SpotifyTrack objects from the missing tracks - spotify_tracks = [] - - for res in missing_tracks: - track_data = res['track'] - raw_artists = track_data.get('artists', []) - processed_artists = [] - for artist in raw_artists: - if isinstance(artist, str): - processed_artists.append(artist) - elif isinstance(artist, dict) and 'name' in artist: - processed_artists.append(artist['name']) - else: - processed_artists.append(str(artist)) - - raw_album = track_data.get('album', '') - if isinstance(raw_album, dict) and 'name' in raw_album: - track_album_name = raw_album['name'] - elif isinstance(raw_album, str): - track_album_name = raw_album - else: - track_album_name = str(raw_album) - - sp_track = SpotifyTrack( - id=track_data.get('id', f'missing_{res["track_index"]}'), - name=track_data.get('name', ''), - artists=processed_artists, - album=track_album_name, - duration_ms=track_data.get('duration_ms', 0), - popularity=track_data.get('popularity', 0) - ) - spotify_tracks.append(sp_track) - - # Perform album-level search with query variations - # Soulseek can block certain artist names, so try multiple queries - artist_words = artist_name.split() - first_word = artist_words[0] if artist_words else '' - if first_word.lower() == 'the' and len(artist_words) > 1: - first_word = artist_words[1] - - search_queries = [f"{artist_name} {album_name}"] - if first_word and len(first_word) > 1: - fallback_query = f"{first_word} {album_name}" - if fallback_query.lower() != search_queries[0].lower(): - search_queries.append(fallback_query) - search_queries.append(album_name) - - album_results = [] - tracks_result = [] - for search_query in search_queries: - try: - tr, ar = asyncio.run(soulseek_client.search(search_query, timeout=30)) - logger.info(f"[Album Search] Query '{search_query}': {len(ar)} album results, {len(tr)} tracks") - tracks_result.extend(tr) - album_results.extend(ar) - if ar: - break - except Exception as search_err: - logger.warning(f"[Album Search] Query '{search_query}' failed: {search_err}") - continue - - if not album_results: - logger.info(f"[Album Search] No album results found — falling back to per-track search") - return [], missing_tracks - - # Quality filter: check if album's dominant quality is acceptable - # Uses DB quality profile (same source as filter_results_by_quality_preference) - def quality_filter(album_result): - """Check if album quality passes user's quality profile""" - try: - from database.music_database import MusicDatabase - db = MusicDatabase() - profile = db.get_quality_profile() - - # Build set of enabled quality formats from DB profile - enabled_formats = set() - for quality_name, quality_config in profile.get('qualities', {}).items(): - if quality_config.get('enabled', False): - if quality_name == 'flac': - enabled_formats.add('flac') - elif quality_name.startswith('mp3'): - enabled_formats.add('mp3') - - if not enabled_formats: - return True # No specific quality enabled, accept anything - - dominant = (album_result.dominant_quality or '').lower() - - # Accept if dominant quality matches an enabled format - if dominant in enabled_formats: - return True - - # At album selection level, be strict — we have many sources to choose from. - # Fallback logic applies at per-track download level, not here. - return False - except Exception: - return True # Accept on error - - # Find best album source - expected_count = batch_album_context.get('total_tracks', len(spotify_tracks)) - - best_album, confidence, track_mapping = matching_engine.find_best_album_source( - album_results, spotify_tracks, album_name, artist_name, - expected_count, quality_filter_fn=quality_filter - ) - - if not best_album: - logger.info(f"[Album Search] No suitable album source found — falling back to per-track search") - return [], missing_tracks - - logger.info(f"[Album Search] Match: {best_album.username} ({best_album.dominant_quality}), " - f"confidence={confidence:.2f}, matched={len(track_mapping)}/{len(spotify_tracks)}") - - # Partition missing tracks into matched (with pre-assigned candidate) and unmatched - album_matched = [] - unmatched = [] - - for res in missing_tracks: - track_data = res['track'] - track_id = track_data.get('id', f'missing_{res["track_index"]}') - - if track_id in track_mapping: - album_matched.append((res, track_mapping[track_id])) - else: - unmatched.append(res) - - logger.info(f"[Album Search] Result: {len(album_matched)} pre-matched from {best_album.username}, {len(unmatched)} per-track fallback") - print(f"🎵 [Album Search] '{artist_name}' - '{album_name}': {len(album_matched)} pre-matched from {best_album.username}, {len(unmatched)} per-track fallback") - return album_matched, unmatched - - except Exception as e: - import traceback - logger.error(f"[Album Search] Error: {e}") - logger.error(traceback.format_exc()) - print(f"⚠️ [Album Search] Error during album-level search: {e}") - return [], missing_tracks - - def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): """ A master worker that handles the entire missing tracks process: @@ -12079,9 +11910,12 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): print(f" transitioning batch {batch_id} to download phase with {len(missing_tracks)} tracks.") - # Extract batch context BEFORE album-level search (read-only, safe outside lock) with tasks_lock: if batch_id not in download_batches: return + + download_batches[batch_id]['phase'] = 'downloading' + + # Get batch album context (if this is an artist album download) batch = download_batches[batch_id] batch_album_context = batch.get('album_context') batch_artist_context = batch.get('artist_context') @@ -12089,24 +11923,10 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): batch_playlist_folder_mode = batch.get('playlist_folder_mode', False) batch_playlist_name = batch.get('playlist_name', 'Unknown Playlist') - # ALBUM-LEVEL SEARCH: Try to find a complete album source on Soulseek - # This runs OUTSIDE tasks_lock since it does network I/O (~30s) - album_matched = [] - album_unmatched = missing_tracks - - if batch_is_album and batch_album_context and batch_artist_context: - album_matched, album_unmatched = _attempt_album_level_search( - batch_id, missing_tracks, batch_album_context, batch_artist_context - ) - - # Now create download tasks under the lock - with tasks_lock: - if batch_id not in download_batches: return - - download_batches[batch_id]['phase'] = 'downloading' + for res in missing_tracks: + task_id = str(uuid.uuid4()) + track_info = res['track'].copy() - # Helper: enrich track_info with album/playlist context - def _enrich_track_info(track_info, res): # Add explicit album context to track_info for artist album downloads if batch_is_album and batch_album_context and batch_artist_context: track_info['_explicit_album_context'] = batch_album_context @@ -12124,13 +11944,13 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): spotify_data = json.loads(spotify_data) except: spotify_data = {} - + if not spotify_data: spotify_data = {} s_album = spotify_data.get('album') s_artists = spotify_data.get('artists', []) - + # We need at least an album name and artist if s_album and s_album.get('name'): # Construct minimal artist context @@ -12157,6 +11977,7 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): track_info['_is_explicit_album_download'] = True print(f"🎵 [Wishlist] Added album context for: '{track_info.get('name')}' -> '{album_ctx['name']}'") + # Add playlist folder mode flag for sync page playlists if batch_playlist_folder_mode: track_info['_playlist_folder_mode'] = True @@ -12165,30 +11986,6 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json): else: print(f"🔍 [Debug] Task Creation - playlist folder mode NOT enabled for: {track_info.get('name')}") - # Create tasks for album-matched tracks (pre-assigned candidate from album source) - for res, pre_assigned_candidate in album_matched: - task_id = str(uuid.uuid4()) - track_info = res['track'].copy() - _enrich_track_info(track_info, res) - - download_tasks[task_id] = { - 'status': 'pending', 'track_info': track_info, - 'playlist_id': playlist_id, 'batch_id': batch_id, - 'track_index': res['track_index'], 'retry_count': 0, - 'cached_candidates': [], 'used_sources': set(), - 'status_change_time': time.time(), - 'metadata_enhanced': False, - 'pre_assigned_candidate': pre_assigned_candidate - } - download_batches[batch_id]['queue'].append(task_id) - print(f"🎵 [Album Match] Task created with pre-assigned source for: {track_info.get('name')}") - - # Create tasks for unmatched tracks (normal per-track search) - for res in album_unmatched: - task_id = str(uuid.uuid4()) - track_info = res['track'].copy() - _enrich_track_info(track_info, res) - download_tasks[task_id] = { 'status': 'pending', 'track_info': track_info, 'playlist_id': playlist_id, 'batch_id': batch_id, @@ -12649,57 +12446,6 @@ def _download_track_worker(task_id, batch_id=None): ) print(f"📥 [Modal Worker] Starting download task for: {track.name} by {track.artists[0] if track.artists else 'Unknown'}") - # CHECK: Pre-assigned candidate from album-level search - pre_assigned = None - with tasks_lock: - if task_id in download_tasks: - pre_assigned = download_tasks[task_id].get('pre_assigned_candidate') - - if pre_assigned: - print(f"🎵 [Album Match] Using pre-assigned candidate for '{track.name}' from {pre_assigned.username}") - - with tasks_lock: - if task_id in download_tasks: - download_tasks[task_id]['status'] = 'searching' - - # Validate pre-assigned candidate with lightweight checks only. - # Album-level matching already confirmed track match (title, duration, track number). - # Here we only verify: 1) quality profile 2) artist in file path - from core.soulseek_client import SoulseekClient - temp_client = SoulseekClient() - quality_passed = temp_client.filter_results_by_quality_preference([pre_assigned]) - - # Artist path verification (same check as get_valid_candidates) - spotify_artist_name = track.artists[0] if track.artists else "" - normalized_spotify_artist = re.sub(r'[^a-zA-Z0-9]', '', spotify_artist_name).lower() - normalized_slskd_path = re.sub(r'[^a-zA-Z0-9]', '', pre_assigned.filename).lower() - artist_in_path = normalized_spotify_artist in normalized_slskd_path if normalized_spotify_artist else True - - logger.info(f"[Album Match] '{track.name}': quality_passed={len(quality_passed)}, artist_in_path={artist_in_path}") - - if quality_passed and artist_in_path: - # Set confidence attribute expected by _attempt_download_with_candidates sort - # (normally set by find_best_slskd_matches_enhanced, which we bypass for pre-assigned) - for c in quality_passed: - c.confidence = 1.0 - c.version_type = 'original' - candidates = quality_passed - with tasks_lock: - if task_id in download_tasks: - download_tasks[task_id]['cached_candidates'] = candidates - - success = _attempt_download_with_candidates(task_id, candidates, track, batch_id) - if success: - print(f"✅ [Album Match] Pre-assigned download initiated for '{track.name}'") - return - else: - print(f"⚠️ [Album Match] Pre-assigned candidate failed for '{track.name}', falling back to per-track search") - else: - logger.warning(f"[Album Match] Rejected '{track.name}': quality={pre_assigned.quality}, artist_in_path={artist_in_path}") - print(f"⚠️ [Album Match] Pre-assigned candidate rejected for '{track.name}', falling back to per-track search") - - # Fall through to normal per-track search below - # Initialize task state tracking (like GUI's parallel_search_tracking) with tasks_lock: if task_id in download_tasks: