Add preflight track-hash comparison and fix duplicate sync history entries

Preflight: hash track IDs before syncing and compare against last sync. Skip only if exact same tracks were already synced and all matched. Replaces the old count-based smart-skip which could miss track swaps. Sync history: update existing entry for same playlist_id instead of creating duplicates. Re-syncing the same playlist now refreshes the existing row with new timestamps and stats.
4 weeks ago · 6b5e37aded
parent 5489af2647
commit 6b5e37aded
2 changed files with 85 additions and 36 deletions
--- a/database/music_database.py
+++ b/database/music_database.py
@ -8220,6 +8220,22 @@ class MusicDatabase:
            logger.error(f"Error querying sync history: {e}")
            return [], 0

+    def get_latest_sync_history_by_playlist(self, playlist_id):
+        """Return the most recent sync_history row for a given playlist_id."""
+        try:
+            conn = self._get_connection()
+            cursor = conn.cursor()
+            cursor.execute("""
+                SELECT * FROM sync_history
+                WHERE playlist_id = ?
+                ORDER BY started_at DESC LIMIT 1
+            """, (playlist_id,))
+            row = cursor.fetchone()
+            return dict(row) if row else None
+        except Exception as e:
+            logger.debug(f"Error getting latest sync history by playlist: {e}")
+            return None
+
    def get_sync_history_entry(self, entry_id):
        """Return a single sync_history row with full tracks_json (for re-trigger)."""
        try:
--- a/web_server.py
+++ b/web_server.py
@ -757,32 +757,6 @@ def _register_automation_handlers():
            if extra.get('discovered') and extra.get('matched_data'):
                current_discovered += 1

-        # Smart skip: only re-sync if something could have changed
-        # Skip when: same discovered count, same total, AND last sync already matched all discovered tracks
-        # Re-sync when: new tracks discovered, tracks added/removed, or last sync had unmatched tracks
-        #   (unmatched = discovered but not on server — wishlist may have downloaded them since)
-        sync_id_key = f"auto_mirror_{playlist_id}"
-        try:
-            sync_statuses = _load_sync_status_file()
-            last_status = sync_statuses.get(sync_id_key, {})
-            last_matched = last_status.get('matched_tracks', -1)
-            last_discovered = last_status.get('discovered_tracks', -1)
-            last_total = last_status.get('total_tracks', -1)
-
-            if (last_discovered == current_discovered and
-                last_total == len(tracks) and
-                last_matched >= current_discovered):
-                # All discovered tracks were already matched on server last time — nothing to do
-                _update_automation_progress(auto_id,
-                    log_line=f'All {current_discovered} tracks already synced — skipping',
-                    log_type='skip')
-                return {
-                    'status': 'skipped',
-                    'reason': f'All {current_discovered} tracks already synced',
-                }
-        except Exception:
-            pass  # If we can't read last status, just run the sync
-
        # Convert mirrored tracks to format expected by _run_sync_task
        # Use discovered metadata when available, skip undiscovered tracks
        tracks_json = []
@ -813,11 +787,6 @@ def _register_automation_handlers():
                # NOT discovered — skip to prevent garbage in wishlist
                skipped_count += 1

-        _update_automation_progress(auto_id, progress=50,
-            phase=f'Syncing "{pl["name"]}"',
-            log_line=f'{len(tracks_json)} discovered, {skipped_count} skipped',
-            log_type='info' if tracks_json else 'skip')
-
        if not tracks_json:
            _update_automation_progress(auto_id,
                log_line=f'No discovered tracks — {skipped_count} need discovery first', log_type='skip')
@ -827,6 +796,37 @@ def _register_automation_handlers():
                'skipped_tracks': str(skipped_count),
            }

+        # Preflight: hash the track list and compare against last sync
+        # Skip if the exact same set of tracks was already synced and all matched
+        import hashlib
+        track_ids_str = ','.join(sorted(t.get('id', '') for t in tracks_json))
+        tracks_hash = hashlib.md5(track_ids_str.encode()).hexdigest()
+
+        sync_id_key = f"auto_mirror_{playlist_id}"
+        try:
+            sync_statuses = _load_sync_status_file()
+            last_status = sync_statuses.get(sync_id_key, {})
+            last_hash = last_status.get('tracks_hash', '')
+            last_matched = last_status.get('matched_tracks', -1)
+
+            if (last_hash == tracks_hash and
+                last_matched >= len(tracks_json)):
+                # Exact same tracks, all matched last time — nothing to do
+                _update_automation_progress(auto_id,
+                    log_line=f'All {len(tracks_json)} tracks unchanged since last sync — skipping',
+                    log_type='skip')
+                return {
+                    'status': 'skipped',
+                    'reason': f'All {len(tracks_json)} tracks unchanged since last sync',
+                }
+        except Exception:
+            pass  # If we can't read last status, just run the sync
+
+        _update_automation_progress(auto_id, progress=50,
+            phase=f'Syncing "{pl["name"]}"',
+            log_line=f'{len(tracks_json)} discovered, {skipped_count} skipped',
+            log_type='info')
+
        sync_id = f"auto_mirror_{playlist_id}"
        _update_automation_progress(auto_id, progress=90,
            log_line=f'Starting sync: {len(tracks_json)} tracks', log_type='success')
@ -24401,7 +24401,9 @@ def _detect_sync_source(playlist_id):
 def _record_sync_history_start(batch_id, playlist_id, playlist_name, tracks,
                                is_album_download, album_context, artist_context,
                                playlist_folder_mode):
-    """Record a sync start to the database."""
+    """Record a sync start to the database.
+    If a previous sync history entry exists for the same playlist_id, update it
+    instead of creating a duplicate."""
    try:
        source = _detect_sync_source(playlist_id)
        if playlist_id == 'wishlist':
@ -24427,6 +24429,33 @@ def _record_sync_history_start(batch_id, playlist_id, playlist_name, tracks,
                    thumb_url = imgs[0].get('url') if isinstance(imgs[0], dict) else imgs[0]

        db = MusicDatabase()
+
+        # Check for existing entry with same playlist_id — update instead of duplicating
+        existing = db.get_latest_sync_history_by_playlist(playlist_id)
+        if existing:
+            try:
+                conn = db._get_connection()
+                cursor = conn.cursor()
+                cursor.execute("""
+                    UPDATE sync_history
+                    SET batch_id = ?, playlist_name = ?, source = ?, sync_type = ?,
+                        tracks_json = ?, artist_context = ?, album_context = ?,
+                        thumb_url = ?, total_tracks = ?, is_album_download = ?,
+                        playlist_folder_mode = ?, started_at = CURRENT_TIMESTAMP,
+                        completed_at = NULL, tracks_found = 0, tracks_downloaded = 0, tracks_failed = 0
+                    WHERE id = ?
+                """, (batch_id, playlist_name, source, sync_type,
+                      json.dumps(tracks, ensure_ascii=False),
+                      json.dumps(artist_context, ensure_ascii=False) if artist_context else None,
+                      json.dumps(album_context, ensure_ascii=False) if album_context else None,
+                      thumb_url, len(tracks), int(is_album_download), int(playlist_folder_mode),
+                      existing['id']))
+                conn.commit()
+                logger.info(f"Updated existing sync history entry {existing['id']} for '{playlist_name}'")
+                return
+            except Exception as e:
+                logger.warning(f"Failed to update existing sync history, creating new: {e}")
+
        db.add_sync_history_entry(
            batch_id=batch_id,
            playlist_id=playlist_id,
@ -24676,8 +24705,8 @@ def _update_and_save_sync_status(playlist_id, playlist_name, playlist_owner, sna
            'snapshot_id': snapshot_id,
            'last_synced': now.isoformat()
        }
-        # Store match counts for smart-skip on scheduled syncs
-        for key in ('matched_tracks', 'total_tracks', 'discovered_tracks'):
+        # Store match counts and track hash for smart-skip on scheduled syncs
+        for key in ('matched_tracks', 'total_tracks', 'discovered_tracks', 'tracks_hash'):
            if key in kwargs:
                status[key] = kwargs[key]
        sync_statuses[playlist_id] = status
@ -30003,12 +30032,16 @@ def _run_sync_task(playlist_id, playlist_name, tracks_json, automation_id=None):
        except Exception:
            pass

-        # Save sync status with match counts for smart-skip on next scheduled sync
+        # Save sync status with match counts and track hash for smart-skip on next scheduled sync
+        import hashlib as _hl
+        _track_ids_str = ','.join(sorted(t.get('id', '') for t in tracks_json))
+        _tracks_hash = _hl.md5(_track_ids_str.encode()).hexdigest()
        snapshot_id = getattr(playlist, 'snapshot_id', None)
        _update_and_save_sync_status(playlist_id, playlist_name, playlist.owner, snapshot_id,
            matched_tracks=getattr(result, 'matched_tracks', 0),
            total_tracks=getattr(result, 'total_tracks', 0),
-            discovered_tracks=len(tracks_json))
+            discovered_tracks=len(tracks_json),
+            tracks_hash=_tracks_hash)

    except Exception as e:
        print(f"❌ SYNC FAILED for {playlist_id}: {e}")