Revert "Add album-level Soulseek search and fix unbounded log growth"

This reverts commit 258fd7a8ae.
pull/130/head
Broque Thomas 4 months ago
parent bbac51412d
commit 7d8bb2b88a

@ -922,197 +922,3 @@ class MusicMatchingEngine:
return best_match, best_confidence
else:
return None, best_confidence
def match_album_result_to_spotify_tracks(
self,
album_result: AlbumResult,
spotify_tracks: List[SpotifyTrack],
spotify_album_name: str,
spotify_artist_name: str
) -> Tuple[float, Dict[str, TrackResult]]:
"""
Match an AlbumResult from Soulseek against a list of Spotify tracks.
Returns:
Tuple of:
- album_confidence: float (0.0-1.0) indicating overall album match quality
- track_mapping: Dict mapping spotify_track_id -> matched TrackResult
"""
if not album_result or not spotify_tracks:
return 0.0, {}
# Gate check: album title similarity
cleaned_slskd_album = self.clean_album_name(album_result.album_title)
cleaned_spotify_album = self.clean_album_name(spotify_album_name)
album_title_score = self.similarity_score(cleaned_slskd_album, cleaned_spotify_album)
if album_title_score < 0.65:
logger.debug(f"Album title mismatch: '{album_result.album_title}' vs '{spotify_album_name}' (score: {album_title_score:.2f})")
return 0.0, {}
# Gate check: artist similarity
# Method 1: Compare parsed artist name if available
cleaned_spotify_artist = self.clean_artist(spotify_artist_name)
artist_score = 0.0
if album_result.artist:
cleaned_slskd_artist = self.clean_artist(album_result.artist)
artist_score = self.similarity_score(cleaned_slskd_artist, cleaned_spotify_artist)
# Method 2: If artist name wasn't parsed or scored low, check if artist
# appears in the full album path (e.g., "Music/Pink Floyd/Album Name/")
# This mirrors the artist verification in get_valid_candidates()
if artist_score < 0.60 and album_result.album_path:
normalized_spotify_artist = re.sub(r'[^a-zA-Z0-9]', '', spotify_artist_name).lower()
normalized_album_path = re.sub(r'[^a-zA-Z0-9]', '', album_result.album_path).lower()
if normalized_spotify_artist and normalized_spotify_artist in normalized_album_path:
artist_score = 0.85 # High confidence — artist name found in path
logger.debug(f"Artist found in album path: '{spotify_artist_name}' in '{album_result.album_path}'")
if artist_score < 0.60:
logger.debug(f"Artist mismatch: '{album_result.artist}' vs '{spotify_artist_name}' (score: {artist_score:.2f}, path: '{album_result.album_path}')")
return 0.0, {}
# Per-track matching: build score matrix
slskd_tracks = album_result.tracks
score_triples = [] # (spotify_idx, slskd_idx, score)
for sp_idx, sp_track in enumerate(spotify_tracks):
sp_title_cleaned = self.clean_title(sp_track.name)
sp_track_num = sp_idx + 1 # 1-based track number from Spotify order
for sl_idx, sl_track in enumerate(slskd_tracks):
# Title similarity (weight 0.50)
sl_title = sl_track.title if sl_track.title else ''
if not sl_title and sl_track.filename:
# Parse title from filename as fallback
fname = sl_track.filename.replace('\\', '/').split('/')[-1]
fname = re.sub(r'\.\w{3,4}$', '', fname) # Remove extension
fname = re.sub(r'^\d+[\s.\-_]+', '', fname) # Remove leading track number
sl_title = fname
sl_title_cleaned = self.clean_title(sl_title)
title_score = self.similarity_score(sp_title_cleaned, sl_title_cleaned)
# Duration similarity (weight 0.30)
sl_duration = sl_track.duration or 0
duration_score = self.duration_similarity(sp_track.duration_ms, sl_duration)
# Track number match (weight 0.20)
sl_track_num = sl_track.track_number or 0
if sl_track_num > 0 and sp_track_num > 0:
if sl_track_num == sp_track_num:
track_num_score = 1.0
elif abs(sl_track_num - sp_track_num) == 1:
track_num_score = 0.5
else:
track_num_score = 0.0
else:
track_num_score = 0.3 # Neutral when track number unavailable
combined = (title_score * 0.50) + (duration_score * 0.30) + (track_num_score * 0.20)
score_triples.append((sp_idx, sl_idx, combined))
# Greedy assignment: sort descending by score, assign without double-use
score_triples.sort(key=lambda x: x[2], reverse=True)
assigned_spotify = set()
assigned_slskd = set()
track_mapping = {}
matched_scores = []
for sp_idx, sl_idx, score in score_triples:
if sp_idx in assigned_spotify or sl_idx in assigned_slskd:
continue
if score < 0.55:
continue # Below minimum per-track threshold
sp_track = spotify_tracks[sp_idx]
track_mapping[sp_track.id] = slskd_tracks[sl_idx]
assigned_spotify.add(sp_idx)
assigned_slskd.add(sl_idx)
matched_scores.append(score)
# Calculate album confidence
match_ratio = len(track_mapping) / len(spotify_tracks) if spotify_tracks else 0.0
avg_track_score = sum(matched_scores) / len(matched_scores) if matched_scores else 0.0
track_count_ratio = (
min(album_result.track_count, len(spotify_tracks)) /
max(album_result.track_count, len(spotify_tracks))
) if spotify_tracks else 0.0
album_confidence = (
(match_ratio * 0.40) +
(avg_track_score * 0.25) +
(album_title_score * 0.20) +
(artist_score * 0.10) +
(track_count_ratio * 0.05)
)
logger.info(
f"Album match: '{album_result.album_title}' by {album_result.username} -> "
f"confidence={album_confidence:.2f}, matched={len(track_mapping)}/{len(spotify_tracks)}, "
f"title={album_title_score:.2f}, artist={artist_score:.2f}, tracks_avg={avg_track_score:.2f}"
)
return album_confidence, track_mapping
def find_best_album_source(
self,
album_results: List[AlbumResult],
spotify_tracks: List[SpotifyTrack],
spotify_album_name: str,
spotify_artist_name: str,
expected_track_count: int,
quality_filter_fn=None
) -> Tuple[Optional[AlbumResult], float, Dict[str, TrackResult]]:
"""
Find the best AlbumResult source for a complete album download.
Returns:
Tuple of (best_album, best_confidence, track_mapping) or (None, 0.0, {})
"""
if not album_results or not spotify_tracks:
return None, 0.0, {}
best_album = None
best_confidence = 0.0
best_mapping = {}
for album in album_results:
# Skip tiny results
if album.track_count < 2:
continue
# Quality filter if provided
if quality_filter_fn:
try:
if not quality_filter_fn(album):
logger.debug(f"Album '{album.album_title}' from {album.username} rejected by quality filter (dominant: {album.dominant_quality})")
continue
except Exception as e:
logger.warning(f"Quality filter error for album '{album.album_title}': {e}")
confidence, mapping = self.match_album_result_to_spotify_tracks(
album, spotify_tracks, spotify_album_name, spotify_artist_name
)
if confidence > best_confidence:
best_confidence = confidence
best_album = album
best_mapping = mapping
# Minimum thresholds
if best_confidence < 0.60:
logger.info(f"No album source met confidence threshold (best: {best_confidence:.2f})")
return None, 0.0, {}
matched_ratio = len(best_mapping) / len(spotify_tracks) if spotify_tracks else 0.0
if matched_ratio < 0.50:
logger.info(f"Best album source matched too few tracks ({len(best_mapping)}/{len(spotify_tracks)})")
return None, 0.0, {}
logger.info(
f"Best album source: '{best_album.album_title}' from {best_album.username} "
f"(confidence={best_confidence:.2f}, matched={len(best_mapping)}/{len(spotify_tracks)}, "
f"quality={best_album.dominant_quality})"
)
return best_album, best_confidence, best_mapping

@ -1,5 +1,4 @@
import logging
import logging.handlers
import sys
import re
from pathlib import Path
@ -74,11 +73,8 @@ def setup_logging(level: str = "INFO", log_file: Optional[str] = None) -> loggin
if log_file:
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
# RotatingFileHandler: 10 MB max per file, keep 3 backups (40 MB total max)
file_handler = logging.handlers.RotatingFileHandler(
log_path, maxBytes=10*1024*1024, backupCount=3, encoding='utf-8'
)
file_handler = logging.FileHandler(log_path, encoding='utf-8')
file_handler.setLevel(log_level)
file_formatter = SafeFormatter(

@ -11800,175 +11800,6 @@ def _on_download_completed(batch_id, task_id, success=True):
print(f"🔄 [Batch Manager] Starting next batch for {batch_id}")
_start_next_batch_of_downloads(batch_id)
def _attempt_album_level_search(batch_id, missing_tracks, batch_album_context, batch_artist_context):
"""
Attempt to find a complete album source on Soulseek before falling back to per-track search.
Searches for "Artist Album" and uses AlbumResult objects to find a single user with the full album.
Returns:
Tuple of:
- album_matched: List of (track_analysis_result, pre_assigned_candidate: TrackResult) tuples
- unmatched: List of track_analysis_results that need per-track search
"""
try:
# Guard checks
download_mode = config_manager.get('download_source.mode', 'soulseek')
if download_mode == 'youtube':
return [], missing_tracks
album_name = batch_album_context.get('name', '')
album_type = batch_album_context.get('album_type', 'album')
artist_name = batch_artist_context.get('name', '')
if not album_name or not artist_name:
return [], missing_tracks
if album_type not in ('album', 'ep'):
return [], missing_tracks
if len(missing_tracks) < 2:
return [], missing_tracks
logger.info(f"[Album Search] '{artist_name}' - '{album_name}' ({album_type}), {len(missing_tracks)} missing tracks")
# Build SpotifyTrack objects from the missing tracks
spotify_tracks = []
for res in missing_tracks:
track_data = res['track']
raw_artists = track_data.get('artists', [])
processed_artists = []
for artist in raw_artists:
if isinstance(artist, str):
processed_artists.append(artist)
elif isinstance(artist, dict) and 'name' in artist:
processed_artists.append(artist['name'])
else:
processed_artists.append(str(artist))
raw_album = track_data.get('album', '')
if isinstance(raw_album, dict) and 'name' in raw_album:
track_album_name = raw_album['name']
elif isinstance(raw_album, str):
track_album_name = raw_album
else:
track_album_name = str(raw_album)
sp_track = SpotifyTrack(
id=track_data.get('id', f'missing_{res["track_index"]}'),
name=track_data.get('name', ''),
artists=processed_artists,
album=track_album_name,
duration_ms=track_data.get('duration_ms', 0),
popularity=track_data.get('popularity', 0)
)
spotify_tracks.append(sp_track)
# Perform album-level search with query variations
# Soulseek can block certain artist names, so try multiple queries
artist_words = artist_name.split()
first_word = artist_words[0] if artist_words else ''
if first_word.lower() == 'the' and len(artist_words) > 1:
first_word = artist_words[1]
search_queries = [f"{artist_name} {album_name}"]
if first_word and len(first_word) > 1:
fallback_query = f"{first_word} {album_name}"
if fallback_query.lower() != search_queries[0].lower():
search_queries.append(fallback_query)
search_queries.append(album_name)
album_results = []
tracks_result = []
for search_query in search_queries:
try:
tr, ar = asyncio.run(soulseek_client.search(search_query, timeout=30))
logger.info(f"[Album Search] Query '{search_query}': {len(ar)} album results, {len(tr)} tracks")
tracks_result.extend(tr)
album_results.extend(ar)
if ar:
break
except Exception as search_err:
logger.warning(f"[Album Search] Query '{search_query}' failed: {search_err}")
continue
if not album_results:
logger.info(f"[Album Search] No album results found — falling back to per-track search")
return [], missing_tracks
# Quality filter: check if album's dominant quality is acceptable
# Uses DB quality profile (same source as filter_results_by_quality_preference)
def quality_filter(album_result):
"""Check if album quality passes user's quality profile"""
try:
from database.music_database import MusicDatabase
db = MusicDatabase()
profile = db.get_quality_profile()
# Build set of enabled quality formats from DB profile
enabled_formats = set()
for quality_name, quality_config in profile.get('qualities', {}).items():
if quality_config.get('enabled', False):
if quality_name == 'flac':
enabled_formats.add('flac')
elif quality_name.startswith('mp3'):
enabled_formats.add('mp3')
if not enabled_formats:
return True # No specific quality enabled, accept anything
dominant = (album_result.dominant_quality or '').lower()
# Accept if dominant quality matches an enabled format
if dominant in enabled_formats:
return True
# At album selection level, be strict — we have many sources to choose from.
# Fallback logic applies at per-track download level, not here.
return False
except Exception:
return True # Accept on error
# Find best album source
expected_count = batch_album_context.get('total_tracks', len(spotify_tracks))
best_album, confidence, track_mapping = matching_engine.find_best_album_source(
album_results, spotify_tracks, album_name, artist_name,
expected_count, quality_filter_fn=quality_filter
)
if not best_album:
logger.info(f"[Album Search] No suitable album source found — falling back to per-track search")
return [], missing_tracks
logger.info(f"[Album Search] Match: {best_album.username} ({best_album.dominant_quality}), "
f"confidence={confidence:.2f}, matched={len(track_mapping)}/{len(spotify_tracks)}")
# Partition missing tracks into matched (with pre-assigned candidate) and unmatched
album_matched = []
unmatched = []
for res in missing_tracks:
track_data = res['track']
track_id = track_data.get('id', f'missing_{res["track_index"]}')
if track_id in track_mapping:
album_matched.append((res, track_mapping[track_id]))
else:
unmatched.append(res)
logger.info(f"[Album Search] Result: {len(album_matched)} pre-matched from {best_album.username}, {len(unmatched)} per-track fallback")
print(f"🎵 [Album Search] '{artist_name}' - '{album_name}': {len(album_matched)} pre-matched from {best_album.username}, {len(unmatched)} per-track fallback")
return album_matched, unmatched
except Exception as e:
import traceback
logger.error(f"[Album Search] Error: {e}")
logger.error(traceback.format_exc())
print(f"⚠️ [Album Search] Error during album-level search: {e}")
return [], missing_tracks
def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
"""
A master worker that handles the entire missing tracks process:
@ -12079,9 +11910,12 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
print(f" transitioning batch {batch_id} to download phase with {len(missing_tracks)} tracks.")
# Extract batch context BEFORE album-level search (read-only, safe outside lock)
with tasks_lock:
if batch_id not in download_batches: return
download_batches[batch_id]['phase'] = 'downloading'
# Get batch album context (if this is an artist album download)
batch = download_batches[batch_id]
batch_album_context = batch.get('album_context')
batch_artist_context = batch.get('artist_context')
@ -12089,24 +11923,10 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
batch_playlist_folder_mode = batch.get('playlist_folder_mode', False)
batch_playlist_name = batch.get('playlist_name', 'Unknown Playlist')
# ALBUM-LEVEL SEARCH: Try to find a complete album source on Soulseek
# This runs OUTSIDE tasks_lock since it does network I/O (~30s)
album_matched = []
album_unmatched = missing_tracks
if batch_is_album and batch_album_context and batch_artist_context:
album_matched, album_unmatched = _attempt_album_level_search(
batch_id, missing_tracks, batch_album_context, batch_artist_context
)
# Now create download tasks under the lock
with tasks_lock:
if batch_id not in download_batches: return
download_batches[batch_id]['phase'] = 'downloading'
for res in missing_tracks:
task_id = str(uuid.uuid4())
track_info = res['track'].copy()
# Helper: enrich track_info with album/playlist context
def _enrich_track_info(track_info, res):
# Add explicit album context to track_info for artist album downloads
if batch_is_album and batch_album_context and batch_artist_context:
track_info['_explicit_album_context'] = batch_album_context
@ -12124,13 +11944,13 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
spotify_data = json.loads(spotify_data)
except:
spotify_data = {}
if not spotify_data:
spotify_data = {}
s_album = spotify_data.get('album')
s_artists = spotify_data.get('artists', [])
# We need at least an album name and artist
if s_album and s_album.get('name'):
# Construct minimal artist context
@ -12157,6 +11977,7 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
track_info['_is_explicit_album_download'] = True
print(f"🎵 [Wishlist] Added album context for: '{track_info.get('name')}' -> '{album_ctx['name']}'")
# Add playlist folder mode flag for sync page playlists
if batch_playlist_folder_mode:
track_info['_playlist_folder_mode'] = True
@ -12165,30 +11986,6 @@ def _run_full_missing_tracks_process(batch_id, playlist_id, tracks_json):
else:
print(f"🔍 [Debug] Task Creation - playlist folder mode NOT enabled for: {track_info.get('name')}")
# Create tasks for album-matched tracks (pre-assigned candidate from album source)
for res, pre_assigned_candidate in album_matched:
task_id = str(uuid.uuid4())
track_info = res['track'].copy()
_enrich_track_info(track_info, res)
download_tasks[task_id] = {
'status': 'pending', 'track_info': track_info,
'playlist_id': playlist_id, 'batch_id': batch_id,
'track_index': res['track_index'], 'retry_count': 0,
'cached_candidates': [], 'used_sources': set(),
'status_change_time': time.time(),
'metadata_enhanced': False,
'pre_assigned_candidate': pre_assigned_candidate
}
download_batches[batch_id]['queue'].append(task_id)
print(f"🎵 [Album Match] Task created with pre-assigned source for: {track_info.get('name')}")
# Create tasks for unmatched tracks (normal per-track search)
for res in album_unmatched:
task_id = str(uuid.uuid4())
track_info = res['track'].copy()
_enrich_track_info(track_info, res)
download_tasks[task_id] = {
'status': 'pending', 'track_info': track_info,
'playlist_id': playlist_id, 'batch_id': batch_id,
@ -12649,57 +12446,6 @@ def _download_track_worker(task_id, batch_id=None):
)
print(f"📥 [Modal Worker] Starting download task for: {track.name} by {track.artists[0] if track.artists else 'Unknown'}")
# CHECK: Pre-assigned candidate from album-level search
pre_assigned = None
with tasks_lock:
if task_id in download_tasks:
pre_assigned = download_tasks[task_id].get('pre_assigned_candidate')
if pre_assigned:
print(f"🎵 [Album Match] Using pre-assigned candidate for '{track.name}' from {pre_assigned.username}")
with tasks_lock:
if task_id in download_tasks:
download_tasks[task_id]['status'] = 'searching'
# Validate pre-assigned candidate with lightweight checks only.
# Album-level matching already confirmed track match (title, duration, track number).
# Here we only verify: 1) quality profile 2) artist in file path
from core.soulseek_client import SoulseekClient
temp_client = SoulseekClient()
quality_passed = temp_client.filter_results_by_quality_preference([pre_assigned])
# Artist path verification (same check as get_valid_candidates)
spotify_artist_name = track.artists[0] if track.artists else ""
normalized_spotify_artist = re.sub(r'[^a-zA-Z0-9]', '', spotify_artist_name).lower()
normalized_slskd_path = re.sub(r'[^a-zA-Z0-9]', '', pre_assigned.filename).lower()
artist_in_path = normalized_spotify_artist in normalized_slskd_path if normalized_spotify_artist else True
logger.info(f"[Album Match] '{track.name}': quality_passed={len(quality_passed)}, artist_in_path={artist_in_path}")
if quality_passed and artist_in_path:
# Set confidence attribute expected by _attempt_download_with_candidates sort
# (normally set by find_best_slskd_matches_enhanced, which we bypass for pre-assigned)
for c in quality_passed:
c.confidence = 1.0
c.version_type = 'original'
candidates = quality_passed
with tasks_lock:
if task_id in download_tasks:
download_tasks[task_id]['cached_candidates'] = candidates
success = _attempt_download_with_candidates(task_id, candidates, track, batch_id)
if success:
print(f"✅ [Album Match] Pre-assigned download initiated for '{track.name}'")
return
else:
print(f"⚠️ [Album Match] Pre-assigned candidate failed for '{track.name}', falling back to per-track search")
else:
logger.warning(f"[Album Match] Rejected '{track.name}': quality={pre_assigned.quality}, artist_in_path={artist_in_path}")
print(f"⚠️ [Album Match] Pre-assigned candidate rejected for '{track.name}', falling back to per-track search")
# Fall through to normal per-track search below
# Initialize task state tracking (like GUI's parallel_search_tracking)
with tasks_lock:
if task_id in download_tasks:

Loading…
Cancel
Save