From d58fee17b033d6757c2ae525b2bfff1e282e8884 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Mon, 16 Mar 2026 20:21:02 -0700 Subject: [PATCH] Fix duplicate mirrored playlists from YouTube pill system and add Deezer/Spotify public refresh YouTube URL hash used Python's hash() which is randomized per process restart, causing pill re-clicks to create duplicates instead of upserts. Replaced with deterministic hashlib.md5 of canonical URL. Includes auto-migration to deduplicate existing entries and update their source_playlist_id. Also adds mirrored playlist refresh support for Spotify (public embed scraper fallback when not authenticated) and Deezer playlists. --- web_server.py | 144 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 116 insertions(+), 28 deletions(-) diff --git a/web_server.py b/web_server.py index 93650e21..ae12c0c9 100644 --- a/web_server.py +++ b/web_server.py @@ -500,34 +500,89 @@ def _register_automation_handlers(): current_item=pl.get('name', '')) tracks = None - if source == 'spotify' and spotify_client and spotify_client.is_spotify_authenticated(): - playlist_obj = spotify_client.get_playlist_by_id(source_id) - if playlist_obj and playlist_obj.tracks: - tracks = [] - for t in playlist_obj.tracks: - artist_name = t.artists[0] if t.artists else '' - track_dict = { - 'track_name': t.name or '', - 'artist_name': str(artist_name), - 'album_name': t.album or '', - 'duration_ms': t.duration_ms or 0, - 'source_track_id': t.id or '', - } - # Spotify data IS official — auto-mark as discovered - if t.id: - track_dict['extra_data'] = json.dumps({ - 'discovered': True, - 'provider': 'spotify', - 'confidence': 1.0, - 'matched_data': { - 'id': t.id, - 'name': t.name or '', - 'artists': [{'name': str(a)} for a in (t.artists or [])], - 'album': t.album or '', - 'duration_ms': t.duration_ms or 0, + if source == 'spotify': + # Try authenticated API first, fall back to public embed scraper + if spotify_client and spotify_client.is_spotify_authenticated(): + playlist_obj = spotify_client.get_playlist_by_id(source_id) + if playlist_obj and playlist_obj.tracks: + tracks = [] + for t in playlist_obj.tracks: + artist_name = t.artists[0] if t.artists else '' + track_dict = { + 'track_name': t.name or '', + 'artist_name': str(artist_name), + 'album_name': t.album or '', + 'duration_ms': t.duration_ms or 0, + 'source_track_id': t.id or '', + } + # Spotify data IS official — auto-mark as discovered + if t.id: + track_dict['extra_data'] = json.dumps({ + 'discovered': True, + 'provider': 'spotify', + 'confidence': 1.0, + 'matched_data': { + 'id': t.id, + 'name': t.name or '', + 'artists': [{'name': str(a)} for a in (t.artists or [])], + 'album': t.album or '', + 'duration_ms': t.duration_ms or 0, + } + }) + tracks.append(track_dict) + + # Fallback: public embed scraper (no auth needed) + if tracks is None: + try: + from core.spotify_public_scraper import scrape_spotify_embed + embed_data = scrape_spotify_embed('playlist', source_id) + if embed_data and not embed_data.get('error') and embed_data.get('tracks'): + tracks = [] + for t in embed_data['tracks']: + artist_names = [a['name'] for a in t.get('artists', [])] + artist_name = artist_names[0] if artist_names else '' + track_dict = { + 'track_name': t.get('name', ''), + 'artist_name': artist_name, + 'album_name': '', + 'duration_ms': t.get('duration_ms', 0), + 'source_track_id': t.get('id', ''), } + if t.get('id'): + track_dict['extra_data'] = json.dumps({ + 'discovered': True, + 'provider': 'spotify', + 'confidence': 1.0, + 'matched_data': { + 'id': t['id'], + 'name': t.get('name', ''), + 'artists': t.get('artists', []), + 'album': '', + 'duration_ms': t.get('duration_ms', 0), + } + }) + tracks.append(track_dict) + except Exception as e: + logger.warning(f"Spotify public scraper fallback failed for {source_id}: {e}") + + elif source == 'deezer': + try: + from core.deezer_client import DeezerClient + deezer = DeezerClient() + playlist_data = deezer.get_playlist(source_id) + if playlist_data and playlist_data.get('tracks'): + tracks = [] + for t in playlist_data['tracks']: + artist_name = t['artists'][0] if t.get('artists') else '' + tracks.append({ + 'track_name': t.get('name', ''), + 'artist_name': str(artist_name), + 'album_name': t.get('album', ''), + 'duration_ms': t.get('duration_ms', 0), + 'source_track_id': str(t.get('id', '')), }) - tracks.append(track_dict) + except Exception as e: + logger.warning(f"Deezer playlist refresh failed for {source_id}: {e}") elif source == 'tidal' and tidal_client and tidal_client.is_authenticated(): full_playlist = tidal_client.get_playlist(source_id) @@ -27643,8 +27698,41 @@ def parse_youtube_playlist_endpoint(): if not playlist_data: return jsonify({"error": "Failed to parse YouTube playlist"}), 500 - # Create URL hash for state tracking - url_hash = str(hash(url)) + # Use deterministic hash for state tracking (built-in hash() is randomized per process restart) + import hashlib + yt_playlist_id = playlist_data.get('id', '') + if yt_playlist_id and yt_playlist_id != 'unknown_id': + # Use canonical URL with the stable YouTube playlist ID + canonical_url = f"https://youtube.com/playlist?list={yt_playlist_id}" + else: + canonical_url = url + url_hash = hashlib.md5(canonical_url.encode()).hexdigest()[:12] + + # Migrate existing mirrored playlists that used the old non-deterministic hash() + # and deduplicate any copies created by the bug + try: + database = get_database() + profile_id = get_current_profile_id() + existing = database.get_mirrored_playlists(profile_id=profile_id) + yt_dupes = [mp for mp in existing if mp['source'] == 'youtube' and mp['name'] == playlist_data['name']] + if yt_dupes: + # Keep the newest one, delete the rest + keep = yt_dupes[0] # Already sorted by updated_at DESC from get_mirrored_playlists + for dupe in yt_dupes[1:]: + database.delete_mirrored_playlist(dupe['id']) + logger.info(f"Removed duplicate YouTube mirrored playlist '{dupe['name']}' (id={dupe['id']})") + # Update the kept entry's source_playlist_id to the new deterministic hash + if keep['source_playlist_id'] != url_hash: + with database._get_connection() as conn: + cursor = conn.cursor() + cursor.execute( + "UPDATE mirrored_playlists SET source_playlist_id = ? WHERE id = ?", + (url_hash, keep['id']) + ) + conn.commit() + logger.info(f"Migrated YouTube mirrored playlist '{keep['name']}' source_playlist_id to deterministic hash {url_hash}") + except Exception as e: + logger.debug(f"YouTube mirror migration check: {e}") # Initialize persistent playlist state (similar to Spotify download_batches structure) youtube_playlist_states[url_hash] = {