Clarify similar-artist freshness and backfill

Freshness is now age-only, and scan-time backfill runs separately without Spotify-auth gating or retired iTunes compatibility flags.
pull/306/head
Antti Kettunen 1 month ago
parent 8382b8e247
commit eead0c3dac

@ -1255,8 +1255,7 @@ class WatchlistScanner:
if scan_state is not None:
scan_state['current_phase'] = 'fetching_similar_artists'
artist_profile_id = getattr(artist, 'profile_id', profile_id)
spotify_authenticated = self.spotify_client and self.spotify_client.is_spotify_authenticated()
if self.database.has_fresh_similar_artists(source_artist_id, days_threshold=30, require_spotify=spotify_authenticated, profile_id=artist_profile_id):
if self.database.has_fresh_similar_artists(source_artist_id, days_threshold=30, profile_id=artist_profile_id):
logger.info("Similar artists for %s are cached and fresh (profile %s)", artist.artist_name, artist_profile_id)
self._backfill_similar_artists_fallback_ids(source_artist_id, profile_id=artist_profile_id)
else:
@ -2325,10 +2324,6 @@ class WatchlistScanner:
logger.error("Error backfilling similar artists IDs: %s", e)
return 0
def _backfill_similar_artists_itunes_ids(self, source_artist_id: str, profile_id: int = 1) -> int:
"""Backward-compatible alias for the provider-priority backfill path."""
return self._backfill_similar_artists_fallback_ids(source_artist_id, profile_id=profile_id)
def update_similar_artists(
self,
watchlist_artist: WatchlistArtist,
@ -3631,8 +3626,8 @@ class WatchlistScanner:
except Exception as e:
logger.debug(f"Error building BYLT for {played_artist.get('name', '?')}: {e}")
# Also save without suffix for backward compatibility (use active source)
active_source = 'spotify' if spotify_available else fallback_source
# Also save without suffix for backward compatibility (use first active source).
active_source = sources_to_process[0]
release_radar_key = f'release_radar_{active_source}'
discovery_weekly_key = f'discovery_weekly_{active_source}'

@ -7473,19 +7473,16 @@ class MusicDatabase:
logger.error(f"Error updating similar artist metadata by external ID: {e}")
return False
def has_fresh_similar_artists(self, source_artist_id: str, days_threshold: int = 30, require_itunes: bool = True, require_spotify: bool = False, profile_id: int = 1) -> bool:
def has_fresh_similar_artists(self, source_artist_id: str, days_threshold: int = 30, profile_id: int = 1) -> bool:
"""
Check if we have cached similar artists that are still fresh (<days_threshold old).
Also checks that similar artists have the required provider IDs.
Args:
source_artist_id: The source artist ID to check
days_threshold: Maximum age in days to consider fresh
require_itunes: If True, also requires iTunes IDs to be present (for seamless provider switching)
require_spotify: If True, also requires Spotify IDs to be present (for Spotify discovery)
profile_id: Profile to check freshness for
Returns True if we have recent data with required IDs, False if data is stale, missing, or incomplete.
Returns True if we have recent data, False if data is stale or missing.
"""
try:
with self._get_connection() as conn:
@ -7510,40 +7507,6 @@ class MusicDatabase:
if days_since_update >= days_threshold:
return False
# Check if we have iTunes IDs (for seamless provider switching)
if require_itunes:
cursor.execute("""
SELECT COUNT(*) as total,
SUM(CASE WHEN similar_artist_itunes_id IS NOT NULL AND similar_artist_itunes_id != '' THEN 1 ELSE 0 END) as has_itunes
FROM similar_artists
WHERE source_artist_id = ? AND profile_id = ?
""", (source_artist_id, profile_id))
id_row = cursor.fetchone()
if id_row and id_row['total'] > 0:
# If less than 50% have iTunes IDs, consider stale and refetch
itunes_ratio = id_row['has_itunes'] / id_row['total']
if itunes_ratio < 0.5:
logger.debug(f"Similar artists for {source_artist_id} missing iTunes IDs ({id_row['has_itunes']}/{id_row['total']}), will refetch")
return False
# Check if we have Spotify IDs (for Spotify discovery)
if require_spotify:
cursor.execute("""
SELECT COUNT(*) as total,
SUM(CASE WHEN similar_artist_spotify_id IS NOT NULL AND similar_artist_spotify_id != '' THEN 1 ELSE 0 END) as has_spotify
FROM similar_artists
WHERE source_artist_id = ? AND profile_id = ?
""", (source_artist_id, profile_id))
id_row = cursor.fetchone()
if id_row and id_row['total'] > 0:
# If less than 50% have Spotify IDs, consider stale and refetch
spotify_ratio = id_row['has_spotify'] / id_row['total']
if spotify_ratio < 0.5:
logger.debug(f"Similar artists for {source_artist_id} missing Spotify IDs ({id_row['has_spotify']}/{id_row['total']}), will refetch")
return False
return True
except Exception as e:

@ -1105,6 +1105,29 @@ def test_curate_discovery_playlists_uses_source_priority_for_recent_albums(monke
assert any(key == "discovery_weekly_deezer" for key, _ in saved_playlists)
def test_has_fresh_similar_artists_uses_age_only(tmp_path):
from datetime import datetime
from database.music_database import MusicDatabase
db = MusicDatabase(str(tmp_path / "music.db"))
db.add_or_update_similar_artist(
source_artist_id="source-1",
similar_artist_name="Similar Artist",
similar_artist_itunes_id="it-artist",
similar_artist_deezer_id="dz-artist",
profile_id=1,
)
with db._get_connection() as conn:
conn.execute(
"UPDATE similar_artists SET last_updated = ? WHERE source_artist_id = ? AND profile_id = ?",
(datetime.now().isoformat(), "source-1", 1),
)
conn.commit()
assert db.has_fresh_similar_artists("source-1", days_threshold=30, profile_id=1) is True
def test_match_to_spotify_uses_strict_lookup():
spotify_client = _FakeSpotifyClient(
search_results=[types.SimpleNamespace(id="fallback-id", name="Artist One")]

Loading…
Cancel
Save