mirror of https://github.com/Nezreka/SoulSync.git
Both function bodies (_discovery_score_candidates and _search_spotify_for_tidal_track) are byte-identical to the originals. The shared matching_engine instance is injected via init() right after _init_connection_test; the spotify proxy + _get_metadata_fallback_source shim follow the same pattern used elsewhere. web_server.py: 36019 → 35753 (-266 lines).pull/439/head
parent
7158130d5a
commit
a4eccff4a5
@ -0,0 +1,323 @@
|
||||
"""Discovery scoring + tidal-track search — lifted from web_server.py.
|
||||
|
||||
Both function bodies are byte-identical to the originals. The
|
||||
``spotify_client`` proxy and ``_get_metadata_fallback_source`` shim
|
||||
let the bodies resolve their original names without modification.
|
||||
``matching_engine`` is injected via init() because it is constructed
|
||||
in web_server.py and referenced by name throughout the bodies.
|
||||
"""
|
||||
import logging
|
||||
|
||||
from core.metadata.cache import get_metadata_cache
|
||||
from core.metadata.registry import get_primary_source, get_spotify_client
|
||||
from core.spotify_client import _is_globally_rate_limited as _spotify_rate_limited
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_metadata_fallback_source():
|
||||
"""Mirror of web_server._get_metadata_fallback_source — delegates to registry."""
|
||||
return get_primary_source()
|
||||
|
||||
|
||||
class _SpotifyClientProxy:
|
||||
"""Resolves the global Spotify client lazily through core.metadata.registry."""
|
||||
|
||||
def __getattr__(self, name):
|
||||
client = get_spotify_client()
|
||||
if client is None:
|
||||
raise AttributeError(name)
|
||||
return getattr(client, name)
|
||||
|
||||
def __bool__(self):
|
||||
return get_spotify_client() is not None
|
||||
|
||||
|
||||
spotify_client = _SpotifyClientProxy()
|
||||
|
||||
|
||||
# Injected at runtime via init().
|
||||
matching_engine = None
|
||||
|
||||
|
||||
def init(matching_engine_obj):
|
||||
"""Bind the shared matching engine instance from web_server."""
|
||||
global matching_engine
|
||||
matching_engine = matching_engine_obj
|
||||
|
||||
|
||||
def _discovery_score_candidates(source_title, source_artist, source_duration_ms, search_results):
|
||||
"""Score search results against a source track using the matching engine.
|
||||
|
||||
Both artist AND title must independently pass minimum similarity floors.
|
||||
This prevents weighted scoring from allowing a perfect artist to carry a
|
||||
garbage title (or vice versa). If either dimension doesn't match, the
|
||||
candidate is rejected — no match is better than a wrong match.
|
||||
|
||||
Args:
|
||||
source_title: The source track title (already cleaned for YouTube, raw for others)
|
||||
source_artist: The source track primary artist
|
||||
source_duration_ms: The source track duration in ms (0 if unknown)
|
||||
search_results: List of Track objects (Spotify or iTunes) from search
|
||||
|
||||
Returns:
|
||||
(best_match, best_confidence, best_index) or (None, 0.0, -1) if no results
|
||||
"""
|
||||
best_match = None
|
||||
best_confidence = 0.0
|
||||
best_index = -1
|
||||
min_artist_similarity = 0.5
|
||||
min_title_similarity = 0.5
|
||||
|
||||
source_artist_cleaned = matching_engine.clean_artist(source_artist)
|
||||
source_title_cleaned = matching_engine.clean_title(source_title)
|
||||
source_core_title = matching_engine.get_core_string(source_title)
|
||||
|
||||
for idx, result in enumerate(search_results):
|
||||
try:
|
||||
result_artists = result.artists if hasattr(result, 'artists') and result.artists else []
|
||||
result_name = result.name if hasattr(result, 'name') else ''
|
||||
result_duration = result.duration_ms if hasattr(result, 'duration_ms') else 0
|
||||
|
||||
# Artist floor — both must match, not just the weighted score
|
||||
best_artist_sim = 0.0
|
||||
for cand_artist in result_artists:
|
||||
if not cand_artist:
|
||||
continue
|
||||
cand_cleaned = matching_engine.clean_artist(cand_artist)
|
||||
cand_normalized = matching_engine.normalize_string(cand_artist)
|
||||
if source_artist_cleaned and source_artist_cleaned in cand_normalized:
|
||||
best_artist_sim = 1.0
|
||||
break
|
||||
sim = matching_engine.similarity_score(source_artist_cleaned, cand_cleaned)
|
||||
if sim > best_artist_sim:
|
||||
best_artist_sim = sim
|
||||
|
||||
if best_artist_sim < min_artist_similarity:
|
||||
continue
|
||||
|
||||
# Title floor — both must match, not just the weighted score
|
||||
cand_title_cleaned = matching_engine.clean_title(result_name)
|
||||
cand_core_title = matching_engine.get_core_string(result_name)
|
||||
|
||||
# Core title exact match bypasses the floor (e.g., "edamame" == "edamame")
|
||||
title_passes = False
|
||||
if source_core_title and cand_core_title and source_core_title == cand_core_title:
|
||||
title_passes = True
|
||||
else:
|
||||
title_sim = matching_engine.similarity_score(source_title_cleaned, cand_title_cleaned)
|
||||
if title_sim >= min_title_similarity:
|
||||
title_passes = True
|
||||
|
||||
if not title_passes:
|
||||
continue
|
||||
|
||||
# Both floors passed — now do full scoring
|
||||
confidence, match_type = matching_engine.score_track_match(
|
||||
source_title=source_title,
|
||||
source_artists=[source_artist],
|
||||
source_duration_ms=source_duration_ms,
|
||||
candidate_title=result_name,
|
||||
candidate_artists=result_artists,
|
||||
candidate_duration_ms=result_duration
|
||||
)
|
||||
|
||||
if confidence > best_confidence:
|
||||
best_confidence = confidence
|
||||
best_match = result
|
||||
best_index = idx
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error scoring candidate {idx}: {e}")
|
||||
continue
|
||||
|
||||
return best_match, best_confidence, best_index
|
||||
|
||||
|
||||
def _search_spotify_for_tidal_track(tidal_track, use_spotify=True, itunes_client=None):
|
||||
"""Search Spotify/fallback for a Tidal track using matching_engine for better accuracy
|
||||
|
||||
Args:
|
||||
tidal_track: The Tidal track to search for
|
||||
use_spotify: If True, use Spotify; if False, use fallback source
|
||||
itunes_client: Fallback client instance (required when use_spotify=False)
|
||||
|
||||
Returns:
|
||||
For Spotify: (Track, raw_data, confidence) tuple or None
|
||||
For fallback: dict with track data (includes 'confidence' key) or None
|
||||
"""
|
||||
if use_spotify:
|
||||
if not spotify_client or not spotify_client.is_authenticated():
|
||||
return None
|
||||
else:
|
||||
if not itunes_client:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Get track info
|
||||
track_name = tidal_track.name
|
||||
artists = tidal_track.artists or []
|
||||
|
||||
if not artists:
|
||||
return None
|
||||
|
||||
artist_name = artists[0] # Use primary artist
|
||||
source_duration = getattr(tidal_track, 'duration_ms', 0) or 0
|
||||
source_name = "Spotify" if use_spotify else _get_metadata_fallback_source().capitalize()
|
||||
|
||||
logger.info(f"Tidal track: '{artist_name}' - '{track_name}' (searching {source_name})")
|
||||
|
||||
# Use matching engine to generate search queries (with fallback)
|
||||
try:
|
||||
temp_track = type('TempTrack', (), {
|
||||
'name': track_name,
|
||||
'artists': [artist_name],
|
||||
'album': None
|
||||
})()
|
||||
search_queries = matching_engine.generate_download_queries(temp_track)
|
||||
logger.info(f"Generated {len(search_queries)} search queries for Tidal track")
|
||||
except Exception as e:
|
||||
logger.error(f"Matching engine failed for Tidal, falling back to basic queries: {e}")
|
||||
if use_spotify:
|
||||
search_queries = [
|
||||
f'track:"{track_name}" artist:"{artist_name}"',
|
||||
f'"{track_name}" "{artist_name}"',
|
||||
f'{track_name} {artist_name}'
|
||||
]
|
||||
else:
|
||||
search_queries = [
|
||||
f'{artist_name} {track_name}',
|
||||
f'{track_name} {artist_name}',
|
||||
track_name
|
||||
]
|
||||
|
||||
best_match = None
|
||||
best_match_raw = None
|
||||
best_confidence = 0.0
|
||||
min_confidence = 0.9
|
||||
|
||||
for query_idx, search_query in enumerate(search_queries):
|
||||
try:
|
||||
logger.debug(f"Tidal query {query_idx + 1}/{len(search_queries)}: {search_query} ({source_name})")
|
||||
|
||||
if use_spotify and not _spotify_rate_limited():
|
||||
results = spotify_client.search_tracks(search_query, limit=10)
|
||||
if not results:
|
||||
continue
|
||||
else:
|
||||
results = itunes_client.search_tracks(search_query, limit=10)
|
||||
if not results:
|
||||
continue
|
||||
|
||||
# Score all results using the matching engine
|
||||
match, confidence, match_idx = _discovery_score_candidates(
|
||||
track_name, artist_name, source_duration, results
|
||||
)
|
||||
|
||||
if match and confidence > best_confidence and confidence >= min_confidence:
|
||||
best_confidence = confidence
|
||||
best_match = match
|
||||
if use_spotify and match.id:
|
||||
_cache = get_metadata_cache()
|
||||
best_match_raw = _cache.get_entity('spotify', 'track', match.id)
|
||||
else:
|
||||
best_match_raw = None
|
||||
logger.info(f"New best Tidal match: {match.artists[0]} - {match.name} (confidence: {confidence:.3f})")
|
||||
|
||||
if best_confidence >= 0.9:
|
||||
logger.info(f"High confidence Tidal match found ({best_confidence:.3f}), stopping search")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error in Tidal {source_name} search for query '{search_query}': {e}")
|
||||
continue
|
||||
|
||||
# Strategy 4: Extended search with higher limit (last resort)
|
||||
if not best_match:
|
||||
logger.info("Tidal Strategy 4: Extended search with limit=50")
|
||||
query = f"{artist_name} {track_name}"
|
||||
if use_spotify:
|
||||
extended_results = spotify_client.search_tracks(query, limit=50)
|
||||
else:
|
||||
extended_results = itunes_client.search_tracks(query, limit=50)
|
||||
if extended_results:
|
||||
match, confidence, match_idx = _discovery_score_candidates(
|
||||
track_name, artist_name, source_duration, extended_results
|
||||
)
|
||||
if match and confidence >= min_confidence:
|
||||
best_match = match
|
||||
best_confidence = confidence
|
||||
logger.info(f"Strategy 4 Tidal match (extended): {match.artists[0]} - {match.name} (confidence: {confidence:.3f})")
|
||||
|
||||
if best_match:
|
||||
if use_spotify:
|
||||
logger.info(f"Final Tidal Spotify match: {best_match.artists[0]} - {best_match.name} (confidence: {best_confidence:.3f})")
|
||||
return (best_match, best_match_raw, best_confidence)
|
||||
else:
|
||||
result_artists = best_match.artists if hasattr(best_match, 'artists') else []
|
||||
result_artist = result_artists[0] if result_artists else 'Unknown'
|
||||
result_name = best_match.name if hasattr(best_match, 'name') else 'Unknown'
|
||||
logger.info(f"Final Tidal {source_name} match: {result_artist} - {result_name} (confidence: {best_confidence:.3f})")
|
||||
|
||||
album_name = best_match.album if hasattr(best_match, 'album') else 'Unknown Album'
|
||||
image_url = best_match.image_url if hasattr(best_match, 'image_url') else ''
|
||||
track_id = best_match.id if hasattr(best_match, 'id') else ''
|
||||
duration_ms = best_match.duration_ms if hasattr(best_match, 'duration_ms') else 0
|
||||
|
||||
# Fetch full track details to get album ID, track_number, etc.
|
||||
# The Track dataclass strips this data — the API has it
|
||||
album_obj = {
|
||||
'name': album_name,
|
||||
'album_type': 'album',
|
||||
'release_date': getattr(best_match, 'release_date', '') or '',
|
||||
'images': [{'url': image_url, 'height': 300, 'width': 300}] if image_url else []
|
||||
}
|
||||
track_number = None
|
||||
disc_number = None
|
||||
if track_id:
|
||||
try:
|
||||
detailed = itunes_client.get_track_details(track_id)
|
||||
if detailed and isinstance(detailed.get('album'), dict):
|
||||
dt_album = detailed['album']
|
||||
if dt_album.get('id'):
|
||||
album_obj['id'] = dt_album['id']
|
||||
if dt_album.get('total_tracks'):
|
||||
album_obj['total_tracks'] = dt_album['total_tracks']
|
||||
if dt_album.get('release_date') and not album_obj.get('release_date'):
|
||||
album_obj['release_date'] = dt_album['release_date']
|
||||
if dt_album.get('album_type'):
|
||||
album_obj['album_type'] = dt_album['album_type']
|
||||
if dt_album.get('images') and not album_obj.get('images'):
|
||||
album_obj['images'] = dt_album['images']
|
||||
if dt_album.get('artists'):
|
||||
album_obj['artists'] = dt_album['artists']
|
||||
if detailed:
|
||||
track_number = detailed.get('track_number')
|
||||
disc_number = detailed.get('disc_number')
|
||||
logger.info(f"[Discovery Enrich] {result_name}: track_number={track_number}, disc={disc_number}")
|
||||
else:
|
||||
logger.info(f"[Discovery Enrich] get_track_details returned None for ID {track_id} ({result_name})")
|
||||
except Exception as _enrich_err:
|
||||
logger.error(f"[Discovery Enrich] Failed for {result_name} (ID {track_id}): {_enrich_err}")
|
||||
|
||||
result_data = {
|
||||
'id': track_id,
|
||||
'name': result_name,
|
||||
'artists': [result_artist],
|
||||
'album': album_obj,
|
||||
'duration_ms': duration_ms,
|
||||
'source': _get_metadata_fallback_source(),
|
||||
'confidence': best_confidence
|
||||
}
|
||||
if track_number:
|
||||
result_data['track_number'] = track_number
|
||||
if disc_number:
|
||||
result_data['disc_number'] = disc_number
|
||||
return result_data
|
||||
else:
|
||||
logger.warning(f"No suitable Tidal match found (best confidence was {best_confidence:.3f}, required {min_confidence:.3f})")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching Spotify for Tidal track: {e}")
|
||||
return None
|
||||
Loading…
Reference in new issue