"""WebMetadataUpdateWorker — lifted from web_server.py. Body is byte-identical to the original. The module-level ``metadata_update_state`` global must be initialized via ``init()`` before ``WebMetadataUpdateWorker`` is instantiated, so the dict reference inside the class body resolves to the same dict that web_server.py owns. """ import logging import threading import requests from datetime import datetime from core.matching_engine import MusicMatchingEngine from core.runtime_state import add_activity_item from config.settings import config_manager logger = logging.getLogger(__name__) # Injected at runtime via init() — points to web_server.metadata_update_state. metadata_update_state = None def init(state): """Bind the shared metadata_update_state dict from web_server.""" global metadata_update_state metadata_update_state = state class WebMetadataUpdateWorker: """Web-based metadata update worker - EXACT port of dashboard.py MetadataUpdateWorker""" def __init__(self, artists, media_client, spotify_client, server_type, refresh_interval_days=30): self.artists = artists self.media_client = media_client # Can be plex_client or jellyfin_client self.spotify_client = spotify_client self.server_type = server_type # "plex" or "jellyfin" self.matching_engine = MusicMatchingEngine() self.refresh_interval_days = refresh_interval_days self.should_stop = False self.processed_count = 0 self.successful_count = 0 self.failed_count = 0 self.max_workers = 1 # DB-first: reuse existing metadata from SoulSync database try: from database.music_database import MusicDatabase self._db = MusicDatabase() except Exception: self._db = None self.thread_lock = threading.Lock() def stop(self): self.should_stop = True def get_artist_name(self, artist): """Get artist name consistently across Plex and Jellyfin""" return getattr(artist, 'title', 'Unknown Artist') def run(self): """Process all artists one by one - EXACT copy from dashboard.py""" global metadata_update_state try: # Load artists in background if not provided - EXACTLY like dashboard.py if self.artists is None: # Enable lightweight mode for Jellyfin to skip track caching if self.server_type == "jellyfin": self.media_client.set_metadata_only_mode(True) elif self.server_type == "navidrome": # Navidrome doesn't need special mode setting pass all_artists = self.media_client.get_all_artists() logger.debug(f"Raw artists returned: {[getattr(a, 'title', 'NO_TITLE') for a in (all_artists or [])]}") if not all_artists: metadata_update_state['status'] = 'error' metadata_update_state['error'] = f"No artists found in {self.server_type.title()} library" add_activity_item("", "Metadata Update", metadata_update_state['error'], "Now") return # Filter artists that need processing artists_to_process = [artist for artist in all_artists if self.artist_needs_processing(artist)] self.artists = artists_to_process # Emit loaded signal equivalent - EXACTLY like dashboard.py if len(artists_to_process) == 0: metadata_update_state['status'] = 'completed' metadata_update_state['completed_at'] = datetime.now() add_activity_item("", "Metadata Update", "All artists already have good metadata", "Now") return else: add_activity_item("", "Metadata Update", f"Processing {len(artists_to_process)} of {len(all_artists)} artists", "Now") if not artists_to_process: metadata_update_state['status'] = 'completed' metadata_update_state['completed_at'] = datetime.now() return total_artists = len(self.artists) metadata_update_state['total'] = total_artists # Process artists in parallel using ThreadPoolExecutor - EXACTLY like dashboard.py def process_single_artist(artist): """Process a single artist and return results""" if self.should_stop or metadata_update_state['status'] == 'stopping': return None artist_name = getattr(artist, 'title', 'Unknown Artist') # Double-check ignore flag right before processing if self.media_client.is_artist_ignored(artist): return (artist_name, True, "Skipped (ignored)") try: success, details = self.update_artist_metadata(artist) return (artist_name, success, details) except Exception as e: return (artist_name, False, f"Error: {str(e)}") # Process artists sequentially with rate limiting # (no ThreadPoolExecutor — API rate limits make parallelism counterproductive) import time for artist in self.artists: if self.should_stop or metadata_update_state['status'] == 'stopping': break result = process_single_artist(artist) if result is None: continue artist_name, success, details = result with self.thread_lock: self.processed_count += 1 if success: self.successful_count += 1 else: self.failed_count += 1 progress_percent = (self.processed_count / total_artists) * 100 metadata_update_state.update({ 'current_artist': artist_name, 'processed': self.processed_count, 'percentage': progress_percent, 'successful': self.successful_count, 'failed': self.failed_count }) # Rate limit: 1.5s between artists (this actually runs between artists now) time.sleep(1.5) # Mark as completed - equivalent to finished.emit metadata_update_state['status'] = 'completed' metadata_update_state['completed_at'] = datetime.now() metadata_update_state['current_artist'] = 'Completed' summary = f"Processed {self.processed_count} artists: {self.successful_count} updated, {self.failed_count} failed" add_activity_item("", "Metadata Complete", summary, "Now") except Exception as e: logger.error(f"Metadata update failed: {e}") metadata_update_state['status'] = 'error' metadata_update_state['error'] = str(e) add_activity_item("", "Metadata Error", str(e), "Now") def artist_needs_processing(self, artist): """Check if an artist needs metadata processing using age-based detection - EXACT copy from dashboard.py""" try: # Check if artist is manually ignored if self.media_client.is_artist_ignored(artist): return False # Use media client's age-based checking with configured interval return self.media_client.needs_update_by_age(artist, self.refresh_interval_days) except Exception as e: logger.error(f"Error checking artist {getattr(artist, 'title', 'Unknown')}: {e}") return True # Process if we can't determine status def _check_db_artist(self, artist_name): """Check SoulSync DB for existing artist metadata (genres, spotify_artist_id). NOTE: DB thumb_url is a Plex/Jellyfin internal path, NOT a downloadable URL. Photos must be checked via the media server object, not the DB. Returns (db_artist_dict, has_genres, spotify_artist_id) or (None, False, None) if not found.""" if not self._db: return None, False, None try: db_artists = self._db.search_artists(artist_name, limit=5) if not db_artists: return None, False, None # Find best name match best = None best_score = 0.0 norm_name = self.matching_engine.normalize_string(artist_name) for dba in db_artists: score = self.matching_engine.similarity_score( norm_name, self.matching_engine.normalize_string(dba.name)) if score > best_score: best_score = score best = dba if not best or best_score < 0.85: return None, False, None has_genres = bool(best.genres and len(best.genres) > 0) # Get spotify_artist_id from raw DB row (not in dataclass) spotify_artist_id = None try: raw = self._db.api_get_artist(best.id) if raw: spotify_artist_id = raw.get('spotify_artist_id') except Exception: pass return best, has_genres, spotify_artist_id except Exception: return None, False, None def update_artist_metadata(self, artist): """Update a single artist's metadata. Checks SoulSync DB first to avoid unnecessary API calls. DB-first strategy: - Genres: DB stores real genre strings → can apply directly, skip Spotify - spotify_artist_id: DB may have it from enrichment → skip search_artists() call - Photos/album art: DB thumb_url is a media-server internal path (not downloadable) so these MUST come from Spotify API """ try: artist_name = getattr(artist, 'title', 'Unknown Artist') # Skip processing for artists with no valid name if artist_name == 'Unknown Artist' or not artist_name or not artist_name.strip(): return False, "Skipped: No valid artist name" # DB-first: check what we already have cached db_artist, db_has_genres, db_spotify_id = self._check_db_artist(artist_name) # Check what the media server artist is currently missing needs_photo = not self.artist_has_valid_photo(artist) if self.server_type != "jellyfin" else True needs_genres = not getattr(artist, 'genres', None) needs_album_art = self.server_type == "plex" # If media server already has valid photo + genres + album art, skip entirely if not needs_photo and not needs_genres and not needs_album_art: self.media_client.update_artist_biography(artist) return True, "Already up to date" # Determine if we actually need Spotify # Photos and album art MUST come from Spotify (DB only has internal media server paths) # Genres CAN come from DB if available need_spotify = needs_photo or needs_album_art or (needs_genres and not db_has_genres) spotify_artist = None highest_score = 0.0 if need_spotify: # Try direct lookup by cached spotify_artist_id first (1 API call vs search) if db_spotify_id: try: from core.spotify_client import Artist as SpotifyArtistDC raw = self.spotify_client.get_artist(db_spotify_id) if raw and 'name' in raw: spotify_artist = SpotifyArtistDC.from_spotify_artist(raw) highest_score = 1.0 logger.debug(f"Metadata updater: direct Spotify lookup for '{artist_name}' via cached ID {db_spotify_id}") except Exception as e: logger.debug(f"Direct Spotify lookup failed for {db_spotify_id}: {e}") spotify_artist = None # Fall back to search if direct lookup didn't work if not spotify_artist: spotify_artists = self.spotify_client.search_artists(artist_name, limit=5) if not spotify_artists: # Spotify failed — apply DB genres if available, skip photos/art changes_made = [] if needs_genres and db_has_genres and db_artist: if self._apply_db_genres(artist, db_artist.genres): changes_made.append("genres (DB)") if changes_made: self.media_client.update_artist_biography(artist) return True, f"Updated {', '.join(changes_made)} (Spotify unavailable)" return False, "Not found on Spotify" # Find the best match best_match = None plex_artist_normalized = self.matching_engine.normalize_string(artist_name) for sa in spotify_artists: spotify_artist_normalized = self.matching_engine.normalize_string(sa.name) score = self.matching_engine.similarity_score(plex_artist_normalized, spotify_artist_normalized) if score > highest_score: highest_score = score best_match = sa if not best_match or highest_score < 0.7: # No good Spotify match — still try DB genres changes_made = [] if needs_genres and db_has_genres and db_artist: if self._apply_db_genres(artist, db_artist.genres): changes_made.append("genres (DB)") if changes_made: self.media_client.update_artist_biography(artist) return True, f"Updated {', '.join(changes_made)} (no Spotify match)" return False, f"No confident match found (best: '{getattr(best_match, 'name', 'N/A')}', score: {highest_score:.2f})" spotify_artist = best_match changes_made = [] # Update photo (always from Spotify — DB only has media server paths) if needs_photo and spotify_artist: photo_updated = self.update_artist_photo(artist, spotify_artist) if photo_updated: changes_made.append("photo") # Update genres — use DB if available, otherwise Spotify if needs_genres: if db_has_genres and db_artist: genres_updated = self._apply_db_genres(artist, db_artist.genres) if genres_updated: changes_made.append("genres (DB)") elif spotify_artist: # DB genres didn't result in changes, try Spotify for newer/different genres genres_updated = self.update_artist_genres(artist, spotify_artist) if genres_updated: changes_made.append("genres") elif spotify_artist: genres_updated = self.update_artist_genres(artist, spotify_artist) if genres_updated: changes_made.append("genres") # Update album artwork (only for Plex, always from Spotify) if self.server_type == "plex" and spotify_artist: albums_updated = self.update_album_artwork(artist, spotify_artist) if albums_updated > 0: changes_made.append(f"{albums_updated} album art") elif self.server_type != "plex": logger.info(f"Skipping album artwork updates for Jellyfin artist: {artist.title}") if changes_made: biography_updated = self.media_client.update_artist_biography(artist) if biography_updated: changes_made.append("timestamp") source = f"match: '{spotify_artist.name}', score: {highest_score:.2f}" if spotify_artist else "DB cache" details = f"Updated {', '.join(changes_made)} ({source})" return True, details else: self.media_client.update_artist_biography(artist) return True, "Already up to date" except Exception as e: return False, str(e) def _apply_db_genres(self, artist, genres): """Apply genres from DB cache to media server.""" try: if not genres: return False existing_genres = set(genre.tag if hasattr(genre, 'tag') else str(genre) for genre in (getattr(artist, 'genres', None) or [])) db_genres = set(g for g in genres if g and g.strip() and len(g.strip()) > 1) if db_genres and db_genres != existing_genres: return self.media_client.update_artist_genres(artist, list(db_genres)[:10]) return False except Exception: return False def update_artist_photo(self, artist, spotify_artist): """Update artist photo from Spotify - EXACT copy from dashboard.py""" try: # Check if artist already has a good photo (skip check for Jellyfin) if self.server_type != "jellyfin" and self.artist_has_valid_photo(artist): logger.info(f"Skipping {artist.title}: already has valid photo ({getattr(artist, 'thumb', 'None')})") return False # Get the image URL from Spotify if not spotify_artist.image_url: logger.warning(f"Skipping {artist.title}: no Spotify image URL available") return False logger.info(f"Processing {artist.title}: downloading from Spotify...") image_url = spotify_artist.image_url # Download and validate image response = requests.get(image_url, timeout=10) response.raise_for_status() # Validate and convert image (skip conversion for Jellyfin to preserve format) if self.server_type == "jellyfin": # For Jellyfin, use raw image data to preserve original format image_data = response.content logger.info(f"Using raw image data for Jellyfin ({len(image_data)} bytes)") else: # For other servers, validate and convert image_data = self.validate_and_convert_image(response.content) if not image_data: return False # Upload to media server using client's method return self.media_client.update_artist_poster(artist, image_data) except Exception as e: logger.error(f"Error updating photo for {getattr(artist, 'title', 'Unknown')}: {e}") return False def update_artist_genres(self, artist, spotify_artist): """Update artist genres from Spotify and albums - EXACT copy from dashboard.py""" try: # Get existing genres existing_genres = set(genre.tag if hasattr(genre, 'tag') else str(genre) for genre in (artist.genres or [])) # Get Spotify artist genres spotify_genres = set(spotify_artist.genres or []) # Get genres from all albums album_genres = set() try: for album in artist.albums(): if hasattr(album, 'genres') and album.genres: album_genres.update(genre.tag if hasattr(genre, 'tag') else str(genre) for genre in album.genres) except Exception: pass # Albums might not be accessible # Combine all genres (prioritize Spotify genres) all_genres = spotify_genres.union(album_genres) # Filter out empty/invalid genres all_genres = {g for g in all_genres if g and g.strip() and len(g.strip()) > 1} # Only update if we have new genres and they're different if all_genres and (not existing_genres or all_genres != existing_genres): # Convert to list and limit to 10 genres genre_list = list(all_genres)[:10] # Use media client API to update genres success = self.media_client.update_artist_genres(artist, genre_list) if success: return True else: return False else: return False except Exception as e: logger.error(f"Error updating genres for {getattr(artist, 'title', 'Unknown')}: {e}") return False def update_album_artwork(self, artist, spotify_artist): """Update album artwork for all albums by this artist from Spotify. DB thumb_url is a media-server internal path, so album art must come from Spotify.""" try: updated_count = 0 skipped_count = 0 # Get all albums for this artist try: albums = list(artist.albums()) except Exception: logger.error(f"Could not access albums for artist '{artist.title}'") return 0 if not albums: logger.warning(f"No albums found for artist '{artist.title}'") return 0 import time for album in albums: try: album_title = getattr(album, 'title', 'Unknown Album') # Check if album already has good artwork on the media server if self.album_has_valid_artwork(album): skipped_count += 1 continue # Rate limit between album API calls time.sleep(0.5) # Search for this specific album on Spotify album_query = f"album:{album_title} artist:{spotify_artist.name}" spotify_albums = self.spotify_client.search_albums(album_query, limit=3) if not spotify_albums: continue # Find the best matching album best_album = None highest_score = 0.0 plex_album_normalized = self.matching_engine.normalize_string(album_title) for spotify_album in spotify_albums: spotify_album_normalized = self.matching_engine.normalize_string(spotify_album.name) score = self.matching_engine.similarity_score(plex_album_normalized, spotify_album_normalized) if score > highest_score: highest_score = score best_album = spotify_album # If we found a good match with artwork, download it if best_album and highest_score > 0.7 and best_album.image_url: if self.download_and_upload_album_artwork(album, best_album.image_url): updated_count += 1 except Exception as e: logger.error(f"Error processing album '{getattr(album, 'title', 'Unknown')}': {e}") continue return updated_count except Exception as e: logger.error(f"Error updating album artwork for artist '{getattr(artist, 'title', 'Unknown')}': {e}") return 0 def album_has_valid_artwork(self, album): """Check if album has valid artwork - EXACT copy from dashboard.py""" try: if not hasattr(album, 'thumb') or not album.thumb: return False thumb_url = str(album.thumb) # Completely empty or None if not thumb_url or thumb_url.strip() == '': return False # Obvious placeholder text in URL obvious_placeholders = ['no-image', 'placeholder', 'missing', 'default-album', 'blank.jpg', 'empty.png'] thumb_lower = thumb_url.lower() for placeholder in obvious_placeholders: if placeholder in thumb_lower: return False # Extremely short URLs (likely broken) if len(thumb_url) < 20: return False return True except Exception as e: return True def download_and_upload_album_artwork(self, album, image_url): """Download artwork from Spotify and upload to media server - EXACT copy from dashboard.py""" try: # Download image from Spotify response = requests.get(image_url, timeout=10) response.raise_for_status() # Validate and convert image image_data = self.validate_and_convert_image(response.content) if not image_data: return False # Upload using media client success = self.media_client.update_album_poster(album, image_data) return success except Exception as e: logger.error(f"Error downloading/uploading artwork for album '{getattr(album, 'title', 'Unknown')}': {e}") return False def artist_has_valid_photo(self, artist): """Check if artist has a valid photo - EXACT copy from dashboard.py""" try: if not hasattr(artist, 'thumb') or not artist.thumb: return False thumb_url = str(artist.thumb) if 'default' in thumb_url.lower() or len(thumb_url) < 50: return False return True except Exception: return False def validate_and_convert_image(self, image_data): """Validate and convert image for media server compatibility - EXACT copy from dashboard.py""" try: from PIL import Image import io # Open and validate image image = Image.open(io.BytesIO(image_data)) # Check minimum dimensions width, height = image.size if width < 200 or height < 200: return None # Convert to JPEG for consistency if image.format != 'JPEG': buffer = io.BytesIO() image.convert('RGB').save(buffer, format='JPEG', quality=95) return buffer.getvalue() return image_data except Exception: return None def upload_artist_poster(self, artist, image_data): """Upload poster using media client - EXACT copy from dashboard.py""" try: # Use media client's update method if available if hasattr(self.media_client, 'update_artist_poster'): return self.media_client.update_artist_poster(artist, image_data) # Fallback for Plex: direct API call if self.server_type == "plex": import requests server = self.media_client.server upload_url = f"{server._baseurl}/library/metadata/{artist.ratingKey}/posters" headers = { 'X-Plex-Token': server._token, 'Content-Type': 'image/jpeg' } response = requests.post(upload_url, data=image_data, headers=headers) response.raise_for_status() # Refresh artist to see changes artist.refresh() return True # Jellyfin: Use Jellyfin API to upload artist image elif self.server_type == "jellyfin": import requests jellyfin_config = config_manager.get_jellyfin_config() jellyfin_base_url = jellyfin_config.get('base_url', '') jellyfin_token = jellyfin_config.get('api_key', '') if not jellyfin_base_url or not jellyfin_token: logger.warning("Jellyfin configuration missing for image upload") return False upload_url = f"{jellyfin_base_url.rstrip('/')}/Items/{artist.ratingKey}/Images/Primary" headers = { 'Authorization': f'MediaBrowser Token="{jellyfin_token}"', 'Content-Type': 'image/jpeg' } response = requests.post(upload_url, data=image_data, headers=headers) response.raise_for_status() return True # Navidrome: Currently not supported (Subsonic API doesn't support image uploads) elif self.server_type == "navidrome": logger.info("ℹ️ Navidrome does not support artist image uploads via Subsonic API") return False else: # Unknown server type return False except Exception as e: logger.error(f"Error uploading poster: {e}") return False