From f0270ce7a5e8ea58041f4eeca23be7f391c7e335 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:27:08 -0700 Subject: [PATCH] Expand Album Completeness to support iTunes and Deezer sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Was Spotify-only — users without Spotify got zero results. Now queries albums with any source ID (spotify_album_id, itunes_album_id, deezer_id) and uses the matching API client for track count and missing track lookup. Falls back gracefully across sources with client-type detection. --- core/repair_jobs/album_completeness.py | 231 +++++++++++++++++++------ 1 file changed, 175 insertions(+), 56 deletions(-) diff --git a/core/repair_jobs/album_completeness.py b/core/repair_jobs/album_completeness.py index 592300fa..885f9bdb 100644 --- a/core/repair_jobs/album_completeness.py +++ b/core/repair_jobs/album_completeness.py @@ -14,8 +14,8 @@ class AlbumCompletenessJob(RepairJob): description = 'Checks if all tracks from albums are present' help_text = ( 'Compares the number of tracks you have for each album against the expected total ' - 'from the Spotify tracklist. Albums where tracks are missing get flagged as findings ' - 'with details about which tracks are absent.\n\n' + 'from the album tracklist (via Spotify, iTunes, or Deezer). Albums where tracks are ' + 'missing get flagged as findings with details about which tracks are absent.\n\n' 'Useful for catching partial downloads or albums where some tracks failed to download. ' 'You can use the Download Missing feature from the album page to fill gaps.\n\n' 'Settings:\n' @@ -36,19 +36,45 @@ class AlbumCompletenessJob(RepairJob): settings = self._get_settings(context) min_tracks = settings.get('min_tracks_for_check', 3) - # Fetch all albums with a spotify_album_id — filter by expected track count in the loop + # Fetch all albums with ANY external source ID — not just Spotify albums = [] conn = None try: conn = context.db._get_connection() cursor = conn.cursor() - cursor.execute(""" - SELECT al.id, al.title, ar.name, al.spotify_album_id, al.track_count, - COUNT(t.id) as actual_count, al.thumb_url, ar.thumb_url + + # Check which source columns exist (older DBs may lack some) + cursor.execute("PRAGMA table_info(albums)") + columns = {row[1] for row in cursor.fetchall()} + has_itunes = 'itunes_album_id' in columns + has_deezer = 'deezer_id' in columns + + # Build SELECT with available source ID columns + select_cols = [ + 'al.id', 'al.title', 'ar.name', 'al.spotify_album_id', 'al.track_count', + 'COUNT(t.id) as actual_count', 'al.thumb_url', 'ar.thumb_url', + ] + if has_itunes: + select_cols.append('al.itunes_album_id') + if has_deezer: + select_cols.append('al.deezer_id') + + # WHERE: album has at least one source ID + where_parts = [] + if True: # spotify always exists + where_parts.append("(al.spotify_album_id IS NOT NULL AND al.spotify_album_id != '')") + if has_itunes: + where_parts.append("(al.itunes_album_id IS NOT NULL AND al.itunes_album_id != '')") + if has_deezer: + where_parts.append("(al.deezer_id IS NOT NULL AND al.deezer_id != '')") + where_clause = ' OR '.join(where_parts) + + cursor.execute(f""" + SELECT {', '.join(select_cols)} FROM albums al LEFT JOIN artists ar ON ar.id = al.artist_id LEFT JOIN tracks t ON t.album_id = al.id - WHERE al.spotify_album_id IS NOT NULL AND al.spotify_album_id != '' + WHERE {where_clause} GROUP BY al.id """) albums = cursor.fetchall() @@ -69,13 +95,28 @@ class AlbumCompletenessJob(RepairJob): if context.report_progress: context.report_progress(phase=f'Checking {total} albums...', total=total) + # Determine column positions based on what we selected + # Fixed: 0=id, 1=title, 2=artist, 3=spotify_id, 4=track_count, 5=actual, 6=album_thumb, 7=artist_thumb + itunes_col = 8 if has_itunes else None + deezer_col = (9 if has_itunes else 8) if has_deezer else None + for i, row in enumerate(albums): if context.check_stop(): return result if i % 10 == 0 and context.wait_if_paused(): return result - album_id, title, artist_name, spotify_album_id, db_track_count, actual_count, album_thumb, artist_thumb = row + album_id = row[0] + title = row[1] + artist_name = row[2] + spotify_album_id = row[3] + db_track_count = row[4] + actual_count = row[5] + album_thumb = row[6] + artist_thumb = row[7] + itunes_album_id = row[itunes_col] if itunes_col is not None else None + deezer_album_id = row[deezer_col] if deezer_col is not None else None + result.scanned += 1 if context.report_progress: @@ -86,16 +127,13 @@ class AlbumCompletenessJob(RepairJob): log_type='info' ) - # If we don't know the expected track count, try to get it from API + # If we don't know the expected track count, try to get it from an API expected_total = db_track_count - if not expected_total and context.spotify_client and not context.is_spotify_rate_limited(): - try: - album_data = context.spotify_client.get_album(spotify_album_id) - if album_data: - expected_total = album_data.get('total_tracks', 0) - except Exception: - pass + if not expected_total: + expected_total = self._get_expected_total( + context, spotify_album_id, itunes_album_id, deezer_album_id + ) # Skip singles/EPs based on expected track count (not local count) if expected_total and expected_total < min_tracks: @@ -111,43 +149,9 @@ class AlbumCompletenessJob(RepairJob): continue # Album is incomplete — try to find which tracks are missing - missing_tracks = [] - if context.spotify_client and not context.is_spotify_rate_limited(): - try: - api_tracks = context.spotify_client.get_album_tracks(spotify_album_id) - if api_tracks and 'items' in api_tracks: - # Get track numbers we already have - owned_numbers = set() - conn2 = context.db._get_connection() - cursor2 = conn2.cursor() - cursor2.execute( - "SELECT track_number FROM tracks WHERE album_id = ? AND track_number IS NOT NULL", - (album_id,) - ) - for tr in cursor2.fetchall(): - owned_numbers.add(tr[0]) - conn2.close() - - for item in api_tracks['items']: - tn = item.get('track_number') - if tn and tn not in owned_numbers: - # Extract artist names from Spotify track data - track_artists = [] - for a in item.get('artists', []): - if isinstance(a, dict): - track_artists.append(a.get('name', '')) - elif isinstance(a, str): - track_artists.append(a) - missing_tracks.append({ - 'track_number': tn, - 'name': item.get('name', ''), - 'disc_number': item.get('disc_number', 1), - 'spotify_track_id': item.get('id', ''), - 'duration_ms': item.get('duration_ms', 0), - 'artists': track_artists, - }) - except Exception as e: - logger.debug("Error getting album tracks for %s: %s", spotify_album_id, e) + missing_tracks = self._find_missing_tracks( + context, album_id, spotify_album_id, itunes_album_id, deezer_album_id + ) if context.report_progress: context.report_progress( @@ -156,6 +160,8 @@ class AlbumCompletenessJob(RepairJob): ) if context.create_finding: try: + # Use whichever source ID is available + source_id = spotify_album_id or itunes_album_id or deezer_album_id or '' context.create_finding( job_id=self.job_id, finding_type='incomplete_album', @@ -172,7 +178,9 @@ class AlbumCompletenessJob(RepairJob): 'album_id': album_id, 'album_title': title, 'artist': artist_name, - 'spotify_album_id': spotify_album_id, + 'spotify_album_id': spotify_album_id or '', + 'itunes_album_id': itunes_album_id or '', + 'deezer_album_id': deezer_album_id or '', 'expected_tracks': expected_total, 'actual_tracks': actual_count, 'missing_tracks': missing_tracks, @@ -195,6 +203,106 @@ class AlbumCompletenessJob(RepairJob): result.scanned, result.findings_created) return result + def _get_expected_total(self, context, spotify_id, itunes_id, deezer_id): + """Try to get the expected track count from any available API source.""" + # Try Spotify first + if spotify_id and context.spotify_client and not context.is_spotify_rate_limited(): + try: + album_data = context.spotify_client.get_album(spotify_id) + if album_data: + total = album_data.get('total_tracks', 0) + if total: + return total + except Exception: + pass + + # Try fallback client (iTunes or Deezer) — both return Spotify-compatible format + # Match the ID to the actual client type to avoid passing iTunes ID to Deezer or vice versa + if context.itunes_client: + is_deezer = type(context.itunes_client).__name__ == 'DeezerClient' + primary_id = deezer_id if is_deezer else itunes_id + secondary_id = itunes_id if is_deezer else deezer_id + for fid in [primary_id, secondary_id]: + if not fid: + continue + try: + api_tracks = context.itunes_client.get_album_tracks(fid) + if api_tracks and 'items' in api_tracks: + return len(api_tracks['items']) + except Exception: + pass + + return 0 + + def _find_missing_tracks(self, context, album_id, spotify_id, itunes_id, deezer_id): + """Identify which specific tracks are missing using any available API source.""" + # Get track numbers we already have + owned_numbers = set() + conn = None + try: + conn = context.db._get_connection() + cursor = conn.cursor() + cursor.execute( + "SELECT track_number FROM tracks WHERE album_id = ? AND track_number IS NOT NULL", + (album_id,) + ) + for tr in cursor.fetchall(): + owned_numbers.add(tr[0]) + except Exception: + return [] + finally: + if conn: + conn.close() + + # Try Spotify first + api_tracks = None + if spotify_id and context.spotify_client and not context.is_spotify_rate_limited(): + try: + api_tracks = context.spotify_client.get_album_tracks(spotify_id) + except Exception as e: + logger.debug("Error getting Spotify album tracks for %s: %s", spotify_id, e) + + # Try fallback client (iTunes or Deezer) + if not api_tracks or 'items' not in (api_tracks or {}): + if context.itunes_client: + is_deezer = type(context.itunes_client).__name__ == 'DeezerClient' + primary_id = deezer_id if is_deezer else itunes_id + secondary_id = itunes_id if is_deezer else deezer_id + for fid in [primary_id, secondary_id]: + if not fid: + continue + try: + api_tracks = context.itunes_client.get_album_tracks(fid) + if api_tracks and 'items' in api_tracks: + break + except Exception as e: + logger.debug("Error getting fallback album tracks for %s: %s", fid, e) + + if not api_tracks or 'items' not in api_tracks: + return [] + + # Both Spotify, iTunes, and Deezer return the same format: + # items[].track_number, items[].name, items[].disc_number, items[].id, items[].artists + missing_tracks = [] + for item in api_tracks['items']: + tn = item.get('track_number') + if tn and tn not in owned_numbers: + track_artists = [] + for a in item.get('artists', []): + if isinstance(a, dict): + track_artists.append(a.get('name', '')) + elif isinstance(a, str): + track_artists.append(a) + missing_tracks.append({ + 'track_number': tn, + 'name': item.get('name', ''), + 'disc_number': item.get('disc_number', 1), + 'spotify_track_id': item.get('id', ''), + 'duration_ms': item.get('duration_ms', 0), + 'artists': track_artists, + }) + return missing_tracks + def _get_settings(self, context: JobContext) -> dict: if not context.config_manager: return self.default_settings.copy() @@ -208,9 +316,20 @@ class AlbumCompletenessJob(RepairJob): try: conn = context.db._get_connection() cursor = conn.cursor() - cursor.execute(""" + + # Check which columns exist + cursor.execute("PRAGMA table_info(albums)") + columns = {row[1] for row in cursor.fetchall()} + + where_parts = ["(spotify_album_id IS NOT NULL AND spotify_album_id != '')"] + if 'itunes_album_id' in columns: + where_parts.append("(itunes_album_id IS NOT NULL AND itunes_album_id != '')") + if 'deezer_id' in columns: + where_parts.append("(deezer_id IS NOT NULL AND deezer_id != '')") + + cursor.execute(f""" SELECT COUNT(*) FROM albums - WHERE spotify_album_id IS NOT NULL AND spotify_album_id != '' + WHERE {' OR '.join(where_parts)} """) row = cursor.fetchone() return row[0] if row else 0