From 9d73b8b561ad67ca4766413f0c3785dcbfffd2fb Mon Sep 17 00:00:00 2001 From: Antti Kettunen Date: Thu, 16 Apr 2026 08:31:04 +0300 Subject: [PATCH] Restore placeholder filtering and shared image backfill Bring placeholder tracklist skipping back into the shared watchlist scan path, and centralize the DB-only artist image backfill helper so both web scan entrypoints reuse the same logic. --- core/watchlist_scanner.py | 84 +++++++++++++++++++++++++++ tests/test_watchlist_scanner_scan.py | 58 +++++++++++++++++++ web_server.py | 86 ++++------------------------ 3 files changed, 153 insertions(+), 75 deletions(-) diff --git a/core/watchlist_scanner.py b/core/watchlist_scanner.py index 069584b3..88643a83 100644 --- a/core/watchlist_scanner.py +++ b/core/watchlist_scanner.py @@ -517,6 +517,87 @@ class WatchlistScanner: return None + def backfill_watchlist_artist_images(self, profile_id: int) -> int: + """Backfill missing watchlist artist images using cached metadata and existing album art.""" + try: + conn = self.database._get_connection() + cursor = conn.cursor() + cursor.execute(""" + SELECT id, artist_name, spotify_artist_id, itunes_artist_id, + deezer_artist_id, discogs_artist_id + FROM watchlist_artists + WHERE profile_id = ? AND (image_url IS NULL OR image_url = '' OR image_url = 'None' + OR image_url NOT LIKE 'http%') + """, (profile_id,)) + imageless = cursor.fetchall() + + if not imageless: + return 0 + + logger.info("Backfilling images for %s watchlist artists (profile %s)...", len(imageless), profile_id) + filled = 0 + for row in imageless: + name = row['artist_name'] + img = None + + # 1. Check metadata cache for artist image + cursor.execute(""" + SELECT image_url FROM metadata_cache_entities + WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE + AND image_url IS NOT NULL AND image_url LIKE 'http%' + LIMIT 1 + """, (name,)) + cr = cursor.fetchone() + if cr: + img = cr['image_url'] + + # 2. Deezer direct URL (no API call needed) + if not img and row['deezer_artist_id']: + img = f"https://api.deezer.com/artist/{row['deezer_artist_id']}/image?size=big" + + # 3. Deezer ID from cache (artist may have a Deezer match we haven't stored on watchlist) + if not img: + cursor.execute(""" + SELECT entity_id FROM metadata_cache_entities + WHERE entity_type = 'artist' AND source = 'deezer' + AND name = ? COLLATE NOCASE LIMIT 1 + """, (name,)) + dz = cursor.fetchone() + if dz and dz['entity_id']: + img = f"https://api.deezer.com/artist/{dz['entity_id']}/image?size=big" + + # 4. Album art fallback (iTunes artists have no artist images) + if not img: + cursor.execute(""" + SELECT image_url FROM metadata_cache_entities + WHERE entity_type = 'album' AND image_url LIKE 'http%' + AND artist_name = ? COLLATE NOCASE LIMIT 1 + """, (name,)) + alb = cursor.fetchone() + if alb: + img = alb['image_url'] + + if img: + aid = (row['spotify_artist_id'] or row['itunes_artist_id'] + or row['deezer_artist_id'] or row['discogs_artist_id']) + if aid: + self.database.update_watchlist_artist_image(aid, img) + else: + # No external IDs — update by internal row ID directly + cursor.execute(""" + UPDATE watchlist_artists SET image_url = ?, updated_at = CURRENT_TIMESTAMP + WHERE id = ? + """, (img, row['id'])) + conn.commit() + filled += 1 + + if filled: + logger.info("Backfilled %s/%s watchlist artist images (profile %s)", filled, len(imageless), profile_id) + return filled + except Exception as e: + logger.debug("Error backfilling watchlist artist images for profile %s: %s", profile_id, e, exc_info=True) + return 0 + def get_artist_discography_for_watchlist(self, watchlist_artist: WatchlistArtist, last_scan_timestamp: Optional[datetime] = None) -> Optional[List]: """ Get artist's discography using the active provider, with proper ID resolution. @@ -836,6 +917,9 @@ class WatchlistScanner: continue tracks = album_data['tracks']['items'] + if self._has_placeholder_tracks(tracks): + logger.info("Skipping album with placeholder tracks: %s", album_data.get('name', album.name)) + continue if not self._should_include_release(len(tracks), artist): continue diff --git a/tests/test_watchlist_scanner_scan.py b/tests/test_watchlist_scanner_scan.py index bb59d786..ca99d179 100644 --- a/tests/test_watchlist_scanner_scan.py +++ b/tests/test_watchlist_scanner_scan.py @@ -157,6 +157,64 @@ def test_scan_watchlist_artists_scans_tracks_and_updates_state(monkeypatch): assert scan_state["recent_wishlist_additions"][0]["track_name"] == "Track One" +def test_scan_watchlist_artists_skips_placeholder_tracklists(monkeypatch): + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0) + monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ALBUMS", 0) + + artist = _build_artist() + album = types.SimpleNamespace(id="album-1", name="Album One") + album_data = { + "name": "Album One", + "images": [{"url": "https://example.com/album.jpg"}], + "tracks": { + "items": [ + { + "id": "track-1", + "name": "Track 1", + "track_number": 1, + "disc_number": 1, + "artists": [{"name": "Artist One"}], + }, + { + "id": "track-2", + "name": "Track 2", + "track_number": 2, + "disc_number": 1, + "artists": [{"name": "Artist One"}], + }, + ] + }, + } + scanner = _build_scanner(album_data, [artist]) + scanner._database.has_fresh_similar_artists = lambda *args, **kwargs: False + + monkeypatch.setattr(scanner, "_backfill_missing_ids", lambda *args, **kwargs: None) + monkeypatch.setattr(scanner, "get_artist_image_url", lambda *_args, **_kwargs: "https://example.com/artist.jpg") + monkeypatch.setattr(scanner, "get_artist_discography_for_watchlist", lambda *_args, **_kwargs: [album]) + monkeypatch.setattr(scanner, "_get_lookback_period_setting", lambda: "30") + monkeypatch.setattr(scanner, "_get_rescan_cutoff", lambda: None) + monkeypatch.setattr(scanner, "_should_include_release", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "_should_include_track", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "is_track_missing_from_library", lambda *_args, **_kwargs: True) + + add_calls = [] + monkeypatch.setattr(scanner, "add_track_to_wishlist", lambda *args, **kwargs: add_calls.append((args, kwargs)) or True) + monkeypatch.setattr(scanner, "update_artist_scan_timestamp", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "update_similar_artists", lambda *_args, **_kwargs: True) + monkeypatch.setattr(scanner, "_backfill_similar_artists_itunes_ids", lambda *_args, **_kwargs: 0) + + scan_state = {} + results = scanner.scan_watchlist_artists([artist], scan_state=scan_state) + + assert len(results) == 1 + assert results[0].success is True + assert results[0].new_tracks_found == 0 + assert results[0].tracks_added_to_wishlist == 0 + assert add_calls == [] + assert scan_state["summary"]["new_tracks_found"] == 0 + assert scan_state["summary"]["tracks_added_to_wishlist"] == 0 + + def test_scan_watchlist_artists_honors_cancel_check(monkeypatch): monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0) monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ALBUMS", 0) diff --git a/web_server.py b/web_server.py index 58c08eb1..f6323a2c 100644 --- a/web_server.py +++ b/web_server.py @@ -39836,82 +39836,10 @@ def start_watchlist_scan(): except Exception as backfill_error: print(f"Error during {_bf_provider} ID backfilling: {backfill_error}") # Continue with next provider - - # IMAGE BACKFILL — fix watchlist artists with missing images - # Uses DB-only lookups (metadata cache + album art) — no API calls try: - conn = database._get_connection() - cursor = conn.cursor() - cursor.execute(""" - SELECT id, artist_name, spotify_artist_id, itunes_artist_id, - deezer_artist_id, discogs_artist_id - FROM watchlist_artists - WHERE profile_id = ? AND (image_url IS NULL OR image_url = '' OR image_url = 'None' - OR image_url NOT LIKE 'http%') - """, (scan_profile_id,)) - imageless = cursor.fetchall() - - if imageless: - print(f"Backfilling images for {len(imageless)} watchlist artists...") - filled = 0 - for row in imageless: - name = row['artist_name'] - nn = name.lower().strip() - img = None - - # 1. Check metadata cache for artist image - cursor.execute(""" - SELECT image_url FROM metadata_cache_entities - WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE - AND image_url IS NOT NULL AND image_url LIKE 'http%' - LIMIT 1 - """, (name,)) - cr = cursor.fetchone() - if cr: - img = cr['image_url'] - - # 2. Deezer direct URL (no API call needed) - if not img and row['deezer_artist_id']: - img = f"https://api.deezer.com/artist/{row['deezer_artist_id']}/image?size=big" - - # 3. Deezer ID from cache (artist may have a Deezer match we haven't stored on watchlist) - if not img: - cursor.execute(""" - SELECT entity_id FROM metadata_cache_entities - WHERE entity_type = 'artist' AND source = 'deezer' - AND name = ? COLLATE NOCASE LIMIT 1 - """, (name,)) - dz = cursor.fetchone() - if dz and dz['entity_id']: - img = f"https://api.deezer.com/artist/{dz['entity_id']}/image?size=big" - - # 4. Album art fallback (iTunes artists have no artist images) - if not img: - cursor.execute(""" - SELECT image_url FROM metadata_cache_entities - WHERE entity_type = 'album' AND image_url LIKE 'http%' - AND artist_name = ? COLLATE NOCASE LIMIT 1 - """, (name,)) - alb = cursor.fetchone() - if alb: - img = alb['image_url'] - - if img: - aid = (row['spotify_artist_id'] or row['itunes_artist_id'] - or row['deezer_artist_id'] or row['discogs_artist_id']) - if aid: - database.update_watchlist_artist_image(aid, img) - else: - # No external IDs — update by internal row ID directly - cursor.execute(""" - UPDATE watchlist_artists SET image_url = ?, updated_at = CURRENT_TIMESTAMP - WHERE id = ? - """, (img, row['id'])) - conn.commit() - filled += 1 - - if filled: - print(f"Backfilled {filled}/{len(imageless)} watchlist artist images") + filled = scanner.backfill_watchlist_artist_images(scan_profile_id) + if filled: + print(f"Backfilled {filled} watchlist artist images") except Exception as img_err: print(f"Image backfill error: {img_err}") @@ -40703,6 +40631,14 @@ def _process_watchlist_scan_automatically(automation_id=None, profile_id=None): scanner = get_watchlist_scanner(spotify_client) all_profiles = scan_profiles # Used later for discovery pool population + for p in scan_profiles: + try: + filled = scanner.backfill_watchlist_artist_images(p['id']) + if filled: + print(f"Backfilled {filled} watchlist artist images for profile {p['id']}") + except Exception as img_err: + print(f"Image backfill error for profile {p['id']}: {img_err}") + # Initialize detailed progress tracking (same as manual scan) watchlist_scan_state = { 'status': 'scanning',