Expand Album Completeness to support iTunes and Deezer sources

Was Spotify-only — users without Spotify got zero results. Now queries albums with any source ID (spotify_album_id, itunes_album_id, deezer_id) and uses the matching API client for track count and missing track lookup. Falls back gracefully across sources with client-type detection.
2 months ago · f0270ce7a5
parent e2345c659d
commit f0270ce7a5
1 changed files with 175 additions and 56 deletions
--- a/core/repair_jobs/album_completeness.py
+++ b/core/repair_jobs/album_completeness.py
@ -14,8 +14,8 @@ class AlbumCompletenessJob(RepairJob):
    description = 'Checks if all tracks from albums are present'
    help_text = (
        'Compares the number of tracks you have for each album against the expected total '
-        'from the Spotify tracklist. Albums where tracks are missing get flagged as findings '
-        'with details about which tracks are absent.\n\n'
+        'from the album tracklist (via Spotify, iTunes, or Deezer). Albums where tracks are '
+        'missing get flagged as findings with details about which tracks are absent.\n\n'
        'Useful for catching partial downloads or albums where some tracks failed to download. '
        'You can use the Download Missing feature from the album page to fill gaps.\n\n'
        'Settings:\n'
@ -36,19 +36,45 @@ class AlbumCompletenessJob(RepairJob):
        settings = self._get_settings(context)
        min_tracks = settings.get('min_tracks_for_check', 3)

-        # Fetch all albums with a spotify_album_id — filter by expected track count in the loop
+        # Fetch all albums with ANY external source ID — not just Spotify
        albums = []
        conn = None
        try:
            conn = context.db._get_connection()
            cursor = conn.cursor()
-            cursor.execute("""
-                SELECT al.id, al.title, ar.name, al.spotify_album_id, al.track_count,
-                       COUNT(t.id) as actual_count, al.thumb_url, ar.thumb_url
+
+            # Check which source columns exist (older DBs may lack some)
+            cursor.execute("PRAGMA table_info(albums)")
+            columns = {row[1] for row in cursor.fetchall()}
+            has_itunes = 'itunes_album_id' in columns
+            has_deezer = 'deezer_id' in columns
+
+            # Build SELECT with available source ID columns
+            select_cols = [
+                'al.id', 'al.title', 'ar.name', 'al.spotify_album_id', 'al.track_count',
+                'COUNT(t.id) as actual_count', 'al.thumb_url', 'ar.thumb_url',
+            ]
+            if has_itunes:
+                select_cols.append('al.itunes_album_id')
+            if has_deezer:
+                select_cols.append('al.deezer_id')
+
+            # WHERE: album has at least one source ID
+            where_parts = []
+            if True:  # spotify always exists
+                where_parts.append("(al.spotify_album_id IS NOT NULL AND al.spotify_album_id != '')")
+            if has_itunes:
+                where_parts.append("(al.itunes_album_id IS NOT NULL AND al.itunes_album_id != '')")
+            if has_deezer:
+                where_parts.append("(al.deezer_id IS NOT NULL AND al.deezer_id != '')")
+            where_clause = ' OR '.join(where_parts)
+
+            cursor.execute(f"""
+                SELECT {', '.join(select_cols)}
                FROM albums al
                LEFT JOIN artists ar ON ar.id = al.artist_id
                LEFT JOIN tracks t ON t.album_id = al.id
-                WHERE al.spotify_album_id IS NOT NULL AND al.spotify_album_id != ''
+                WHERE {where_clause}
                GROUP BY al.id
            """)
            albums = cursor.fetchall()
@ -69,13 +95,28 @@ class AlbumCompletenessJob(RepairJob):
        if context.report_progress:
            context.report_progress(phase=f'Checking {total} albums...', total=total)

+        # Determine column positions based on what we selected
+        # Fixed: 0=id, 1=title, 2=artist, 3=spotify_id, 4=track_count, 5=actual, 6=album_thumb, 7=artist_thumb
+        itunes_col = 8 if has_itunes else None
+        deezer_col = (9 if has_itunes else 8) if has_deezer else None
+
        for i, row in enumerate(albums):
            if context.check_stop():
                return result
            if i % 10 == 0 and context.wait_if_paused():
                return result

-            album_id, title, artist_name, spotify_album_id, db_track_count, actual_count, album_thumb, artist_thumb = row
+            album_id = row[0]
+            title = row[1]
+            artist_name = row[2]
+            spotify_album_id = row[3]
+            db_track_count = row[4]
+            actual_count = row[5]
+            album_thumb = row[6]
+            artist_thumb = row[7]
+            itunes_album_id = row[itunes_col] if itunes_col is not None else None
+            deezer_album_id = row[deezer_col] if deezer_col is not None else None
+
            result.scanned += 1

            if context.report_progress:
@ -86,16 +127,13 @@ class AlbumCompletenessJob(RepairJob):
                    log_type='info'
                )

-            # If we don't know the expected track count, try to get it from API
+            # If we don't know the expected track count, try to get it from an API
            expected_total = db_track_count

-            if not expected_total and context.spotify_client and not context.is_spotify_rate_limited():
-                try:
-                    album_data = context.spotify_client.get_album(spotify_album_id)
-                    if album_data:
-                        expected_total = album_data.get('total_tracks', 0)
-                except Exception:
-                    pass
+            if not expected_total:
+                expected_total = self._get_expected_total(
+                    context, spotify_album_id, itunes_album_id, deezer_album_id
+                )

            # Skip singles/EPs based on expected track count (not local count)
            if expected_total and expected_total < min_tracks:
@ -111,43 +149,9 @@ class AlbumCompletenessJob(RepairJob):
                continue

            # Album is incomplete — try to find which tracks are missing
-            missing_tracks = []
-            if context.spotify_client and not context.is_spotify_rate_limited():
-                try:
-                    api_tracks = context.spotify_client.get_album_tracks(spotify_album_id)
-                    if api_tracks and 'items' in api_tracks:
-                        # Get track numbers we already have
-                        owned_numbers = set()
-                        conn2 = context.db._get_connection()
-                        cursor2 = conn2.cursor()
-                        cursor2.execute(
-                            "SELECT track_number FROM tracks WHERE album_id = ? AND track_number IS NOT NULL",
-                            (album_id,)
-                        )
-                        for tr in cursor2.fetchall():
-                            owned_numbers.add(tr[0])
-                        conn2.close()
-
-                        for item in api_tracks['items']:
-                            tn = item.get('track_number')
-                            if tn and tn not in owned_numbers:
-                                # Extract artist names from Spotify track data
-                                track_artists = []
-                                for a in item.get('artists', []):
-                                    if isinstance(a, dict):
-                                        track_artists.append(a.get('name', ''))
-                                    elif isinstance(a, str):
-                                        track_artists.append(a)
-                                missing_tracks.append({
-                                    'track_number': tn,
-                                    'name': item.get('name', ''),
-                                    'disc_number': item.get('disc_number', 1),
-                                    'spotify_track_id': item.get('id', ''),
-                                    'duration_ms': item.get('duration_ms', 0),
-                                    'artists': track_artists,
-                                })
-                except Exception as e:
-                    logger.debug("Error getting album tracks for %s: %s", spotify_album_id, e)
+            missing_tracks = self._find_missing_tracks(
+                context, album_id, spotify_album_id, itunes_album_id, deezer_album_id
+            )

            if context.report_progress:
                context.report_progress(
@ -156,6 +160,8 @@ class AlbumCompletenessJob(RepairJob):
                )
            if context.create_finding:
                try:
+                    # Use whichever source ID is available
+                    source_id = spotify_album_id or itunes_album_id or deezer_album_id or ''
                    context.create_finding(
                        job_id=self.job_id,
                        finding_type='incomplete_album',
@ -172,7 +178,9 @@ class AlbumCompletenessJob(RepairJob):
                            'album_id': album_id,
                            'album_title': title,
                            'artist': artist_name,
-                            'spotify_album_id': spotify_album_id,
+                            'spotify_album_id': spotify_album_id or '',
+                            'itunes_album_id': itunes_album_id or '',
+                            'deezer_album_id': deezer_album_id or '',
                            'expected_tracks': expected_total,
                            'actual_tracks': actual_count,
                            'missing_tracks': missing_tracks,
@ -195,6 +203,106 @@ class AlbumCompletenessJob(RepairJob):
                     result.scanned, result.findings_created)
        return result

+    def _get_expected_total(self, context, spotify_id, itunes_id, deezer_id):
+        """Try to get the expected track count from any available API source."""
+        # Try Spotify first
+        if spotify_id and context.spotify_client and not context.is_spotify_rate_limited():
+            try:
+                album_data = context.spotify_client.get_album(spotify_id)
+                if album_data:
+                    total = album_data.get('total_tracks', 0)
+                    if total:
+                        return total
+            except Exception:
+                pass
+
+        # Try fallback client (iTunes or Deezer) — both return Spotify-compatible format
+        # Match the ID to the actual client type to avoid passing iTunes ID to Deezer or vice versa
+        if context.itunes_client:
+            is_deezer = type(context.itunes_client).__name__ == 'DeezerClient'
+            primary_id = deezer_id if is_deezer else itunes_id
+            secondary_id = itunes_id if is_deezer else deezer_id
+            for fid in [primary_id, secondary_id]:
+                if not fid:
+                    continue
+                try:
+                    api_tracks = context.itunes_client.get_album_tracks(fid)
+                    if api_tracks and 'items' in api_tracks:
+                        return len(api_tracks['items'])
+                except Exception:
+                    pass
+
+        return 0
+
+    def _find_missing_tracks(self, context, album_id, spotify_id, itunes_id, deezer_id):
+        """Identify which specific tracks are missing using any available API source."""
+        # Get track numbers we already have
+        owned_numbers = set()
+        conn = None
+        try:
+            conn = context.db._get_connection()
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT track_number FROM tracks WHERE album_id = ? AND track_number IS NOT NULL",
+                (album_id,)
+            )
+            for tr in cursor.fetchall():
+                owned_numbers.add(tr[0])
+        except Exception:
+            return []
+        finally:
+            if conn:
+                conn.close()
+
+        # Try Spotify first
+        api_tracks = None
+        if spotify_id and context.spotify_client and not context.is_spotify_rate_limited():
+            try:
+                api_tracks = context.spotify_client.get_album_tracks(spotify_id)
+            except Exception as e:
+                logger.debug("Error getting Spotify album tracks for %s: %s", spotify_id, e)
+
+        # Try fallback client (iTunes or Deezer)
+        if not api_tracks or 'items' not in (api_tracks or {}):
+            if context.itunes_client:
+                is_deezer = type(context.itunes_client).__name__ == 'DeezerClient'
+                primary_id = deezer_id if is_deezer else itunes_id
+                secondary_id = itunes_id if is_deezer else deezer_id
+                for fid in [primary_id, secondary_id]:
+                    if not fid:
+                        continue
+                    try:
+                        api_tracks = context.itunes_client.get_album_tracks(fid)
+                        if api_tracks and 'items' in api_tracks:
+                            break
+                    except Exception as e:
+                        logger.debug("Error getting fallback album tracks for %s: %s", fid, e)
+
+        if not api_tracks or 'items' not in api_tracks:
+            return []
+
+        # Both Spotify, iTunes, and Deezer return the same format:
+        # items[].track_number, items[].name, items[].disc_number, items[].id, items[].artists
+        missing_tracks = []
+        for item in api_tracks['items']:
+            tn = item.get('track_number')
+            if tn and tn not in owned_numbers:
+                track_artists = []
+                for a in item.get('artists', []):
+                    if isinstance(a, dict):
+                        track_artists.append(a.get('name', ''))
+                    elif isinstance(a, str):
+                        track_artists.append(a)
+                missing_tracks.append({
+                    'track_number': tn,
+                    'name': item.get('name', ''),
+                    'disc_number': item.get('disc_number', 1),
+                    'spotify_track_id': item.get('id', ''),
+                    'duration_ms': item.get('duration_ms', 0),
+                    'artists': track_artists,
+                })
+        return missing_tracks
+
    def _get_settings(self, context: JobContext) -> dict:
        if not context.config_manager:
            return self.default_settings.copy()
@ -208,9 +316,20 @@ class AlbumCompletenessJob(RepairJob):
        try:
            conn = context.db._get_connection()
            cursor = conn.cursor()
-            cursor.execute("""
+
+            # Check which columns exist
+            cursor.execute("PRAGMA table_info(albums)")
+            columns = {row[1] for row in cursor.fetchall()}
+
+            where_parts = ["(spotify_album_id IS NOT NULL AND spotify_album_id != '')"]
+            if 'itunes_album_id' in columns:
+                where_parts.append("(itunes_album_id IS NOT NULL AND itunes_album_id != '')")
+            if 'deezer_id' in columns:
+                where_parts.append("(deezer_id IS NOT NULL AND deezer_id != '')")
+
+            cursor.execute(f"""
                SELECT COUNT(*) FROM albums
-                WHERE spotify_album_id IS NOT NULL AND spotify_album_id != ''
+                WHERE {' OR '.join(where_parts)}
            """)
            row = cursor.fetchone()
            return row[0] if row else 0