Restore placeholder filtering and shared image backfill

Bring placeholder tracklist skipping back into the shared watchlist scan path, and centralize the DB-only artist image backfill helper so both web scan entrypoints reuse the same logic.
pull/303/head
Antti Kettunen 1 month ago
parent 40fa139804
commit 9d73b8b561

@ -517,6 +517,87 @@ class WatchlistScanner:
return None
def backfill_watchlist_artist_images(self, profile_id: int) -> int:
"""Backfill missing watchlist artist images using cached metadata and existing album art."""
try:
conn = self.database._get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT id, artist_name, spotify_artist_id, itunes_artist_id,
deezer_artist_id, discogs_artist_id
FROM watchlist_artists
WHERE profile_id = ? AND (image_url IS NULL OR image_url = '' OR image_url = 'None'
OR image_url NOT LIKE 'http%')
""", (profile_id,))
imageless = cursor.fetchall()
if not imageless:
return 0
logger.info("Backfilling images for %s watchlist artists (profile %s)...", len(imageless), profile_id)
filled = 0
for row in imageless:
name = row['artist_name']
img = None
# 1. Check metadata cache for artist image
cursor.execute("""
SELECT image_url FROM metadata_cache_entities
WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE
AND image_url IS NOT NULL AND image_url LIKE 'http%'
LIMIT 1
""", (name,))
cr = cursor.fetchone()
if cr:
img = cr['image_url']
# 2. Deezer direct URL (no API call needed)
if not img and row['deezer_artist_id']:
img = f"https://api.deezer.com/artist/{row['deezer_artist_id']}/image?size=big"
# 3. Deezer ID from cache (artist may have a Deezer match we haven't stored on watchlist)
if not img:
cursor.execute("""
SELECT entity_id FROM metadata_cache_entities
WHERE entity_type = 'artist' AND source = 'deezer'
AND name = ? COLLATE NOCASE LIMIT 1
""", (name,))
dz = cursor.fetchone()
if dz and dz['entity_id']:
img = f"https://api.deezer.com/artist/{dz['entity_id']}/image?size=big"
# 4. Album art fallback (iTunes artists have no artist images)
if not img:
cursor.execute("""
SELECT image_url FROM metadata_cache_entities
WHERE entity_type = 'album' AND image_url LIKE 'http%'
AND artist_name = ? COLLATE NOCASE LIMIT 1
""", (name,))
alb = cursor.fetchone()
if alb:
img = alb['image_url']
if img:
aid = (row['spotify_artist_id'] or row['itunes_artist_id']
or row['deezer_artist_id'] or row['discogs_artist_id'])
if aid:
self.database.update_watchlist_artist_image(aid, img)
else:
# No external IDs — update by internal row ID directly
cursor.execute("""
UPDATE watchlist_artists SET image_url = ?, updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""", (img, row['id']))
conn.commit()
filled += 1
if filled:
logger.info("Backfilled %s/%s watchlist artist images (profile %s)", filled, len(imageless), profile_id)
return filled
except Exception as e:
logger.debug("Error backfilling watchlist artist images for profile %s: %s", profile_id, e, exc_info=True)
return 0
def get_artist_discography_for_watchlist(self, watchlist_artist: WatchlistArtist, last_scan_timestamp: Optional[datetime] = None) -> Optional[List]:
"""
Get artist's discography using the active provider, with proper ID resolution.
@ -836,6 +917,9 @@ class WatchlistScanner:
continue
tracks = album_data['tracks']['items']
if self._has_placeholder_tracks(tracks):
logger.info("Skipping album with placeholder tracks: %s", album_data.get('name', album.name))
continue
if not self._should_include_release(len(tracks), artist):
continue

@ -157,6 +157,64 @@ def test_scan_watchlist_artists_scans_tracks_and_updates_state(monkeypatch):
assert scan_state["recent_wishlist_additions"][0]["track_name"] == "Track One"
def test_scan_watchlist_artists_skips_placeholder_tracklists(monkeypatch):
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ALBUMS", 0)
artist = _build_artist()
album = types.SimpleNamespace(id="album-1", name="Album One")
album_data = {
"name": "Album One",
"images": [{"url": "https://example.com/album.jpg"}],
"tracks": {
"items": [
{
"id": "track-1",
"name": "Track 1",
"track_number": 1,
"disc_number": 1,
"artists": [{"name": "Artist One"}],
},
{
"id": "track-2",
"name": "Track 2",
"track_number": 2,
"disc_number": 1,
"artists": [{"name": "Artist One"}],
},
]
},
}
scanner = _build_scanner(album_data, [artist])
scanner._database.has_fresh_similar_artists = lambda *args, **kwargs: False
monkeypatch.setattr(scanner, "_backfill_missing_ids", lambda *args, **kwargs: None)
monkeypatch.setattr(scanner, "get_artist_image_url", lambda *_args, **_kwargs: "https://example.com/artist.jpg")
monkeypatch.setattr(scanner, "get_artist_discography_for_watchlist", lambda *_args, **_kwargs: [album])
monkeypatch.setattr(scanner, "_get_lookback_period_setting", lambda: "30")
monkeypatch.setattr(scanner, "_get_rescan_cutoff", lambda: None)
monkeypatch.setattr(scanner, "_should_include_release", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "_should_include_track", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "is_track_missing_from_library", lambda *_args, **_kwargs: True)
add_calls = []
monkeypatch.setattr(scanner, "add_track_to_wishlist", lambda *args, **kwargs: add_calls.append((args, kwargs)) or True)
monkeypatch.setattr(scanner, "update_artist_scan_timestamp", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "update_similar_artists", lambda *_args, **_kwargs: True)
monkeypatch.setattr(scanner, "_backfill_similar_artists_itunes_ids", lambda *_args, **_kwargs: 0)
scan_state = {}
results = scanner.scan_watchlist_artists([artist], scan_state=scan_state)
assert len(results) == 1
assert results[0].success is True
assert results[0].new_tracks_found == 0
assert results[0].tracks_added_to_wishlist == 0
assert add_calls == []
assert scan_state["summary"]["new_tracks_found"] == 0
assert scan_state["summary"]["tracks_added_to_wishlist"] == 0
def test_scan_watchlist_artists_honors_cancel_check(monkeypatch):
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ARTISTS", 0)
monkeypatch.setattr(watchlist_scanner_module, "DELAY_BETWEEN_ALBUMS", 0)

@ -39836,82 +39836,10 @@ def start_watchlist_scan():
except Exception as backfill_error:
print(f"Error during {_bf_provider} ID backfilling: {backfill_error}")
# Continue with next provider
# IMAGE BACKFILL — fix watchlist artists with missing images
# Uses DB-only lookups (metadata cache + album art) — no API calls
try:
conn = database._get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT id, artist_name, spotify_artist_id, itunes_artist_id,
deezer_artist_id, discogs_artist_id
FROM watchlist_artists
WHERE profile_id = ? AND (image_url IS NULL OR image_url = '' OR image_url = 'None'
OR image_url NOT LIKE 'http%')
""", (scan_profile_id,))
imageless = cursor.fetchall()
if imageless:
print(f"Backfilling images for {len(imageless)} watchlist artists...")
filled = 0
for row in imageless:
name = row['artist_name']
nn = name.lower().strip()
img = None
# 1. Check metadata cache for artist image
cursor.execute("""
SELECT image_url FROM metadata_cache_entities
WHERE entity_type = 'artist' AND name = ? COLLATE NOCASE
AND image_url IS NOT NULL AND image_url LIKE 'http%'
LIMIT 1
""", (name,))
cr = cursor.fetchone()
if cr:
img = cr['image_url']
# 2. Deezer direct URL (no API call needed)
if not img and row['deezer_artist_id']:
img = f"https://api.deezer.com/artist/{row['deezer_artist_id']}/image?size=big"
# 3. Deezer ID from cache (artist may have a Deezer match we haven't stored on watchlist)
if not img:
cursor.execute("""
SELECT entity_id FROM metadata_cache_entities
WHERE entity_type = 'artist' AND source = 'deezer'
AND name = ? COLLATE NOCASE LIMIT 1
""", (name,))
dz = cursor.fetchone()
if dz and dz['entity_id']:
img = f"https://api.deezer.com/artist/{dz['entity_id']}/image?size=big"
# 4. Album art fallback (iTunes artists have no artist images)
if not img:
cursor.execute("""
SELECT image_url FROM metadata_cache_entities
WHERE entity_type = 'album' AND image_url LIKE 'http%'
AND artist_name = ? COLLATE NOCASE LIMIT 1
""", (name,))
alb = cursor.fetchone()
if alb:
img = alb['image_url']
if img:
aid = (row['spotify_artist_id'] or row['itunes_artist_id']
or row['deezer_artist_id'] or row['discogs_artist_id'])
if aid:
database.update_watchlist_artist_image(aid, img)
else:
# No external IDs — update by internal row ID directly
cursor.execute("""
UPDATE watchlist_artists SET image_url = ?, updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""", (img, row['id']))
conn.commit()
filled += 1
if filled:
print(f"Backfilled {filled}/{len(imageless)} watchlist artist images")
filled = scanner.backfill_watchlist_artist_images(scan_profile_id)
if filled:
print(f"Backfilled {filled} watchlist artist images")
except Exception as img_err:
print(f"Image backfill error: {img_err}")
@ -40703,6 +40631,14 @@ def _process_watchlist_scan_automatically(automation_id=None, profile_id=None):
scanner = get_watchlist_scanner(spotify_client)
all_profiles = scan_profiles # Used later for discovery pool population
for p in scan_profiles:
try:
filled = scanner.backfill_watchlist_artist_images(p['id'])
if filled:
print(f"Backfilled {filled} watchlist artist images for profile {p['id']}")
except Exception as img_err:
print(f"Image backfill error for profile {p['id']}: {img_err}")
# Initialize detailed progress tracking (same as manual scan)
watchlist_scan_state = {
'status': 'scanning',

Loading…
Cancel
Save