good

10 months ago · 55c36241dd
parent 77383f0178
commit 55c36241dd
6 changed files with 4985 additions and 5 deletions
--- a/.spotify_cache
+++ b/.spotify_cache
@ -1 +1 @@
-{"access_token": "BQBvbg3WwYrukruP6c8SrF_hNNq8RpE81IiDnj1pJB6smBlF5SVT7VO4Zd2jdurQtwk41A-6U8fBORnLO4_hfxYkMBfxnL4ifzsO9gQfvQLUR585hq4iL03Nrh30LhO8h9wnhWwSWDNPd9W-RakrqdfY8p23ZDMNAaGqGp0ZsvbkiKIDBMMP9Y91tye7QSSJxYRSlBr64yiJoWu2Ybwe-OA_xD9OqxZxSpTNgEKNyOuozqQ-TUWJNPSe7n_cSrPs", "token_type": "Bearer", "expires_in": 3600, "scope": "user-library-read user-read-private playlist-read-private playlist-read-collaborative user-read-email", "expires_at": 1753462488, "refresh_token": "AQDmfQkPCGObfJeTUIbW1hAAwhSqkuHRA3Qh2dqVYMRh0eCkFMQgPNJDDzF8y-BiaVbj80zePkK_XSfYH1aJutMtNbnsqRKWuxP31BTrMc7pdUdbE7Fma4oH8wpDUKdG3MM"}
+{"access_token": "BQDlZYRG1PkTF9MyEIWlv6Stz7KVj9ljTaG0vqjb1mIQGx6C7j2Gs79hjulYLuHv_FZ7w_jr0J4GC49bSGCYjV3o30sx58NqnlHMoEcbHmQGmFO6PIvZslmm5WEUXFhCtyW3AIoTCL_MDgxGty3fHstHxqJ8yWWsbgu8DaLxcgtju4_YIv53LlnbkCsGzdoHt4y6ExoFnOcTjU5ZguhdyYULkgEXVd2mvW4yMZE-SRpEaHGcFgu_V_lqTirm7ZHp", "token_type": "Bearer", "expires_in": 3600, "scope": "user-library-read user-read-private playlist-read-private playlist-read-collaborative user-read-email", "expires_at": 1753469090, "refresh_token": "AQDmfQkPCGObfJeTUIbW1hAAwhSqkuHRA3Qh2dqVYMRh0eCkFMQgPNJDDzF8y-BiaVbj80zePkK_XSfYH1aJutMtNbnsqRKWuxP31BTrMc7pdUdbE7Fma4oH8wpDUKdG3MM"}
--- a/core/pycache/matching_engine.cpython-312.pyc
+++ b/core/pycache/matching_engine.cpython-312.pyc
--- a/core/matching_engine.py
+++ b/core/matching_engine.py
@ -6,6 +6,8 @@ from unidecode import unidecode
 from utils.logging_config import get_logger
 from core.spotify_client import Track as SpotifyTrack
 from core.plex_client import PlexTrackInfo
+from core.soulseek_client import TrackResult
+

 logger = get_logger("matching_engine")

@ -181,3 +183,80 @@ class MusicMatchingEngine:
        else:
            # Fallback to just track name if no artist
            return self.clean_title(spotify_track.name)
+        
+    
+    def calculate_slskd_match_confidence(self, spotify_track: SpotifyTrack, slskd_track: TrackResult) -> float:
+        """
+        Calculates a confidence score for a Soulseek track against a Spotify track.
+        This is the core of the new matching logic.
+        """
+        # Normalize the Spotify track info once for efficiency
+        spotify_title_norm = self.normalize_string(spotify_track.name)
+        spotify_artists_norm = [self.normalize_string(a) for a in spotify_track.artists]
+
+        # The slskd filename is our primary source of truth, so normalize it
+        slskd_filename_norm = self.normalize_string(slskd_track.filename)
+
+        # 1. Title Score: How well does the Spotify title appear in the filename?
+        # We use the cleaned, core title for a strict check. This avoids matching remixes.
+        spotify_cleaned_title = self.clean_title(spotify_track.name)
+        title_score = 0.0
+        if spotify_cleaned_title in slskd_filename_norm:
+            title_score = 0.9  # High score for direct inclusion
+            # Bonus for being a standalone word/phrase, penalizing partial matches like 'in' in 'finland'
+            if re.search(r'\b' + re.escape(spotify_cleaned_title) + r'\b', slskd_filename_norm):
+                 title_score = 1.0
+        
+        # 2. Artist Score: How well do the Spotify artists appear in the filename?
+        artist_score = 0.0
+        for artist in spotify_artists_norm:
+            if artist in slskd_filename_norm:
+                artist_score = 1.0 # Perfect match if any artist is found
+                break
+        
+        # 3. Duration Score: How similar are the track lengths?
+        # We give this a lower weight as slskd duration data can be unreliable.
+        duration_score = self.duration_similarity(spotify_track.duration_ms, slskd_track.duration if slskd_track.duration else 0)
+
+        # 4. Quality Bonus: Add a small bonus for higher quality formats
+        quality_bonus = 0.0
+        if slskd_track.quality:
+            if slskd_track.quality.lower() == 'flac':
+                quality_bonus = 0.1
+            elif slskd_track.quality.lower() == 'mp3' and (slskd_track.bitrate or 0) >= 320:
+                quality_bonus = 0.05
+
+        # --- Final Weighted Score ---
+        # Title and Artist are the most important factors for an accurate match.
+        final_confidence = (title_score * 0.60) + (artist_score * 0.35) + (duration_score * 0.05)
+        
+        # Add the quality bonus to the final score
+        final_confidence += quality_bonus
+        
+        # Ensure the final score doesn't exceed 1.0
+        return min(final_confidence, 1.0)
+
+
+    def find_best_slskd_matches(self, spotify_track: SpotifyTrack, slskd_results: List[TrackResult]) -> List[TrackResult]:
+        """
+        Scores and sorts a list of Soulseek results against a Spotify track.
+        Returns the list of candidates sorted from best to worst match.
+        """
+        if not slskd_results:
+            return []
+
+        scored_results = []
+        for slskd_track in slskd_results:
+            confidence = self.calculate_slskd_match_confidence(spotify_track, slskd_track)
+            # We temporarily store the confidence score on the object itself for sorting
+            slskd_track.confidence = confidence 
+            scored_results.append(slskd_track)
+
+        # Sort by confidence score (descending), and then by size as a tie-breaker
+        sorted_results = sorted(scored_results, key=lambda r: (r.confidence, r.size), reverse=True)
+        
+        # Filter out very low-confidence results to avoid bad matches.
+        # A threshold of 0.6 means the title and artist had to have some reasonable similarity.
+        confident_results = [r for r in sorted_results if r.confidence > 0.6]
+
+        return confident_results
--- a/logs/app.log
+++ b/logs/app.log
--- a/ui/pages/pycache/sync.cpython-312.pyc
+++ b/ui/pages/pycache/sync.cpython-312.pyc
--- a/ui/pages/sync.py
+++ b/ui/pages/sync.py
@ -3443,7 +3443,7 @@ class DownloadMissingTracksModal(QDialog):
        self.playlist_item = playlist_item
        self.parent_page = parent_page
        self.downloads_page = downloads_page
-        
+        self.matching_engine = MusicMatchingEngine()
        # State tracking
        self.total_tracks = len(playlist.tracks)
        self.matched_tracks_count = 0
@ -4374,9 +4374,25 @@ class DownloadMissingTracksModal(QDialog):
                if loop: loop.close()

    def get_valid_candidates(self, results, spotify_track, query):
-        """Get all valid candidates sorted by score (for retry mechanism)"""
-        # This is a simplified version for brevity. The full scoring logic should be here.
-        return sorted(results, key=lambda r: r.size, reverse=True)
+        """
+        Scores and filters search results using the MusicMatchingEngine to find the best candidates.
+        This replaces the simple size-based sorting with intelligent, confidence-based scoring.
+        """
+        if not results:
+            return []
+
+        # Use the new matching engine function to score, filter, and sort the results.
+        # This returns a list of SlskdTrack objects with a 'confidence' attribute,
+        # already sorted from best to worst and filtered by our confidence threshold.
+        confident_matches = self.matching_engine.find_best_slskd_matches(spotify_track, results)
+
+        if confident_matches:
+            best_confidence = confident_matches[0].confidence
+            print(f"✅ Found {len(confident_matches)} confident matches for '{spotify_track.name}'. Best score: {best_confidence:.2f} from query '{query}'")
+        else:
+            print(f"⚠️ No confident matches found for '{spotify_track.name}' from query '{query}'.")
+
+        return confident_matches

    def create_spotify_based_search_result_from_validation(self, slskd_result, spotify_metadata):
        """Create SpotifyBasedSearchResult from validation results"""