Expand Album Completeness to support iTunes and Deezer sources

Was Spotify-only — users without Spotify got zero results. Now queries
albums with any source ID (spotify_album_id, itunes_album_id, deezer_id)
and uses the matching API client for track count and missing track lookup.
Falls back gracefully across sources with client-type detection.
pull/253/head
Broque Thomas 2 months ago
parent e2345c659d
commit f0270ce7a5

@ -14,8 +14,8 @@ class AlbumCompletenessJob(RepairJob):
description = 'Checks if all tracks from albums are present'
help_text = (
'Compares the number of tracks you have for each album against the expected total '
'from the Spotify tracklist. Albums where tracks are missing get flagged as findings '
'with details about which tracks are absent.\n\n'
'from the album tracklist (via Spotify, iTunes, or Deezer). Albums where tracks are '
'missing get flagged as findings with details about which tracks are absent.\n\n'
'Useful for catching partial downloads or albums where some tracks failed to download. '
'You can use the Download Missing feature from the album page to fill gaps.\n\n'
'Settings:\n'
@ -36,19 +36,45 @@ class AlbumCompletenessJob(RepairJob):
settings = self._get_settings(context)
min_tracks = settings.get('min_tracks_for_check', 3)
# Fetch all albums with a spotify_album_id — filter by expected track count in the loop
# Fetch all albums with ANY external source ID — not just Spotify
albums = []
conn = None
try:
conn = context.db._get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT al.id, al.title, ar.name, al.spotify_album_id, al.track_count,
COUNT(t.id) as actual_count, al.thumb_url, ar.thumb_url
# Check which source columns exist (older DBs may lack some)
cursor.execute("PRAGMA table_info(albums)")
columns = {row[1] for row in cursor.fetchall()}
has_itunes = 'itunes_album_id' in columns
has_deezer = 'deezer_id' in columns
# Build SELECT with available source ID columns
select_cols = [
'al.id', 'al.title', 'ar.name', 'al.spotify_album_id', 'al.track_count',
'COUNT(t.id) as actual_count', 'al.thumb_url', 'ar.thumb_url',
]
if has_itunes:
select_cols.append('al.itunes_album_id')
if has_deezer:
select_cols.append('al.deezer_id')
# WHERE: album has at least one source ID
where_parts = []
if True: # spotify always exists
where_parts.append("(al.spotify_album_id IS NOT NULL AND al.spotify_album_id != '')")
if has_itunes:
where_parts.append("(al.itunes_album_id IS NOT NULL AND al.itunes_album_id != '')")
if has_deezer:
where_parts.append("(al.deezer_id IS NOT NULL AND al.deezer_id != '')")
where_clause = ' OR '.join(where_parts)
cursor.execute(f"""
SELECT {', '.join(select_cols)}
FROM albums al
LEFT JOIN artists ar ON ar.id = al.artist_id
LEFT JOIN tracks t ON t.album_id = al.id
WHERE al.spotify_album_id IS NOT NULL AND al.spotify_album_id != ''
WHERE {where_clause}
GROUP BY al.id
""")
albums = cursor.fetchall()
@ -69,13 +95,28 @@ class AlbumCompletenessJob(RepairJob):
if context.report_progress:
context.report_progress(phase=f'Checking {total} albums...', total=total)
# Determine column positions based on what we selected
# Fixed: 0=id, 1=title, 2=artist, 3=spotify_id, 4=track_count, 5=actual, 6=album_thumb, 7=artist_thumb
itunes_col = 8 if has_itunes else None
deezer_col = (9 if has_itunes else 8) if has_deezer else None
for i, row in enumerate(albums):
if context.check_stop():
return result
if i % 10 == 0 and context.wait_if_paused():
return result
album_id, title, artist_name, spotify_album_id, db_track_count, actual_count, album_thumb, artist_thumb = row
album_id = row[0]
title = row[1]
artist_name = row[2]
spotify_album_id = row[3]
db_track_count = row[4]
actual_count = row[5]
album_thumb = row[6]
artist_thumb = row[7]
itunes_album_id = row[itunes_col] if itunes_col is not None else None
deezer_album_id = row[deezer_col] if deezer_col is not None else None
result.scanned += 1
if context.report_progress:
@ -86,16 +127,13 @@ class AlbumCompletenessJob(RepairJob):
log_type='info'
)
# If we don't know the expected track count, try to get it from API
# If we don't know the expected track count, try to get it from an API
expected_total = db_track_count
if not expected_total and context.spotify_client and not context.is_spotify_rate_limited():
try:
album_data = context.spotify_client.get_album(spotify_album_id)
if album_data:
expected_total = album_data.get('total_tracks', 0)
except Exception:
pass
if not expected_total:
expected_total = self._get_expected_total(
context, spotify_album_id, itunes_album_id, deezer_album_id
)
# Skip singles/EPs based on expected track count (not local count)
if expected_total and expected_total < min_tracks:
@ -111,43 +149,9 @@ class AlbumCompletenessJob(RepairJob):
continue
# Album is incomplete — try to find which tracks are missing
missing_tracks = []
if context.spotify_client and not context.is_spotify_rate_limited():
try:
api_tracks = context.spotify_client.get_album_tracks(spotify_album_id)
if api_tracks and 'items' in api_tracks:
# Get track numbers we already have
owned_numbers = set()
conn2 = context.db._get_connection()
cursor2 = conn2.cursor()
cursor2.execute(
"SELECT track_number FROM tracks WHERE album_id = ? AND track_number IS NOT NULL",
(album_id,)
)
for tr in cursor2.fetchall():
owned_numbers.add(tr[0])
conn2.close()
for item in api_tracks['items']:
tn = item.get('track_number')
if tn and tn not in owned_numbers:
# Extract artist names from Spotify track data
track_artists = []
for a in item.get('artists', []):
if isinstance(a, dict):
track_artists.append(a.get('name', ''))
elif isinstance(a, str):
track_artists.append(a)
missing_tracks.append({
'track_number': tn,
'name': item.get('name', ''),
'disc_number': item.get('disc_number', 1),
'spotify_track_id': item.get('id', ''),
'duration_ms': item.get('duration_ms', 0),
'artists': track_artists,
})
except Exception as e:
logger.debug("Error getting album tracks for %s: %s", spotify_album_id, e)
missing_tracks = self._find_missing_tracks(
context, album_id, spotify_album_id, itunes_album_id, deezer_album_id
)
if context.report_progress:
context.report_progress(
@ -156,6 +160,8 @@ class AlbumCompletenessJob(RepairJob):
)
if context.create_finding:
try:
# Use whichever source ID is available
source_id = spotify_album_id or itunes_album_id or deezer_album_id or ''
context.create_finding(
job_id=self.job_id,
finding_type='incomplete_album',
@ -172,7 +178,9 @@ class AlbumCompletenessJob(RepairJob):
'album_id': album_id,
'album_title': title,
'artist': artist_name,
'spotify_album_id': spotify_album_id,
'spotify_album_id': spotify_album_id or '',
'itunes_album_id': itunes_album_id or '',
'deezer_album_id': deezer_album_id or '',
'expected_tracks': expected_total,
'actual_tracks': actual_count,
'missing_tracks': missing_tracks,
@ -195,6 +203,106 @@ class AlbumCompletenessJob(RepairJob):
result.scanned, result.findings_created)
return result
def _get_expected_total(self, context, spotify_id, itunes_id, deezer_id):
"""Try to get the expected track count from any available API source."""
# Try Spotify first
if spotify_id and context.spotify_client and not context.is_spotify_rate_limited():
try:
album_data = context.spotify_client.get_album(spotify_id)
if album_data:
total = album_data.get('total_tracks', 0)
if total:
return total
except Exception:
pass
# Try fallback client (iTunes or Deezer) — both return Spotify-compatible format
# Match the ID to the actual client type to avoid passing iTunes ID to Deezer or vice versa
if context.itunes_client:
is_deezer = type(context.itunes_client).__name__ == 'DeezerClient'
primary_id = deezer_id if is_deezer else itunes_id
secondary_id = itunes_id if is_deezer else deezer_id
for fid in [primary_id, secondary_id]:
if not fid:
continue
try:
api_tracks = context.itunes_client.get_album_tracks(fid)
if api_tracks and 'items' in api_tracks:
return len(api_tracks['items'])
except Exception:
pass
return 0
def _find_missing_tracks(self, context, album_id, spotify_id, itunes_id, deezer_id):
"""Identify which specific tracks are missing using any available API source."""
# Get track numbers we already have
owned_numbers = set()
conn = None
try:
conn = context.db._get_connection()
cursor = conn.cursor()
cursor.execute(
"SELECT track_number FROM tracks WHERE album_id = ? AND track_number IS NOT NULL",
(album_id,)
)
for tr in cursor.fetchall():
owned_numbers.add(tr[0])
except Exception:
return []
finally:
if conn:
conn.close()
# Try Spotify first
api_tracks = None
if spotify_id and context.spotify_client and not context.is_spotify_rate_limited():
try:
api_tracks = context.spotify_client.get_album_tracks(spotify_id)
except Exception as e:
logger.debug("Error getting Spotify album tracks for %s: %s", spotify_id, e)
# Try fallback client (iTunes or Deezer)
if not api_tracks or 'items' not in (api_tracks or {}):
if context.itunes_client:
is_deezer = type(context.itunes_client).__name__ == 'DeezerClient'
primary_id = deezer_id if is_deezer else itunes_id
secondary_id = itunes_id if is_deezer else deezer_id
for fid in [primary_id, secondary_id]:
if not fid:
continue
try:
api_tracks = context.itunes_client.get_album_tracks(fid)
if api_tracks and 'items' in api_tracks:
break
except Exception as e:
logger.debug("Error getting fallback album tracks for %s: %s", fid, e)
if not api_tracks or 'items' not in api_tracks:
return []
# Both Spotify, iTunes, and Deezer return the same format:
# items[].track_number, items[].name, items[].disc_number, items[].id, items[].artists
missing_tracks = []
for item in api_tracks['items']:
tn = item.get('track_number')
if tn and tn not in owned_numbers:
track_artists = []
for a in item.get('artists', []):
if isinstance(a, dict):
track_artists.append(a.get('name', ''))
elif isinstance(a, str):
track_artists.append(a)
missing_tracks.append({
'track_number': tn,
'name': item.get('name', ''),
'disc_number': item.get('disc_number', 1),
'spotify_track_id': item.get('id', ''),
'duration_ms': item.get('duration_ms', 0),
'artists': track_artists,
})
return missing_tracks
def _get_settings(self, context: JobContext) -> dict:
if not context.config_manager:
return self.default_settings.copy()
@ -208,9 +316,20 @@ class AlbumCompletenessJob(RepairJob):
try:
conn = context.db._get_connection()
cursor = conn.cursor()
cursor.execute("""
# Check which columns exist
cursor.execute("PRAGMA table_info(albums)")
columns = {row[1] for row in cursor.fetchall()}
where_parts = ["(spotify_album_id IS NOT NULL AND spotify_album_id != '')"]
if 'itunes_album_id' in columns:
where_parts.append("(itunes_album_id IS NOT NULL AND itunes_album_id != '')")
if 'deezer_id' in columns:
where_parts.append("(deezer_id IS NOT NULL AND deezer_id != '')")
cursor.execute(f"""
SELECT COUNT(*) FROM albums
WHERE spotify_album_id IS NOT NULL AND spotify_album_id != ''
WHERE {' OR '.join(where_parts)}
""")
row = cursor.fetchone()
return row[0] if row else 0

Loading…
Cancel
Save