From 39a07e4bdf63eae7edc61c0581a3f70a6d30e77e Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:26:38 -0700 Subject: [PATCH] Fix Discography Backfill silently skipping most releases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs kept this job from finding anything useful on a typical library. 1. Wrong Deezer column name. The artists table has a deezer_id column (per music_database.py:1986), but the job looked for deezer_artist_id in both _scan_artist (line 132) and _get_library_artists (line 345). For Deezer-primary users, this meant the Deezer ID never made it into the source_ids map, so get_artist_discography fell back to artist- name-only search — slower and less accurate than an ID lookup. 2. Spotify-reported EPs were silently excluded. Spotify lumps EPs and true singles under album_type='single'. The previous _should_include_release short-circuited on album_type='single' and returned the include_singles setting (default False), so 4-6 track EPs on Spotify-primary libraries never survived the filter — even though include_eps defaulted to True. Only 7+ track full albums made it through. This is the main reason users felt the job did nothing. Fixes: - Use the correct deezer_id column name in both reference sites. - Restructure _should_include_release so only 'album', 'ep', and 'compilation' are trusted outright. Anything else (including 'single' and missing type) falls through to a track-count disambiguation matching the download pipeline's _get_album_type_display: 1-3 tracks = true single, 4-6 = EP, 7+ = album. A Spotify-returned 'single' with 5 tracks now correctly counts as an EP. Full suite stays at 263 passed. Ruff clean. --- core/repair_jobs/discography_backfill.py | 32 +++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/core/repair_jobs/discography_backfill.py b/core/repair_jobs/discography_backfill.py index 6b307293..c443ac65 100644 --- a/core/repair_jobs/discography_backfill.py +++ b/core/repair_jobs/discography_backfill.py @@ -128,8 +128,8 @@ class DiscographyBackfillJob(RepairJob): source_ids['spotify'] = artist['spotify_artist_id'] if artist.get('itunes_artist_id'): source_ids['itunes'] = artist['itunes_artist_id'] - if artist.get('deezer_artist_id'): - source_ids['deezer'] = artist['deezer_artist_id'] + if artist.get('deezer_id'): + source_ids['deezer'] = artist['deezer_id'] # Fetch full discography discography = get_artist_discography( @@ -294,21 +294,29 @@ class DiscographyBackfillJob(RepairJob): @staticmethod def _should_include_release(total_tracks, album_type, settings): - """Check if a release should be included based on type settings.""" - # Use album_type from metadata source when available + """Check if a release should be included based on type settings. + + Spotify lumps both EPs and true singles under album_type='single', so + only an explicit 'album' / 'ep' / 'compilation' is trusted outright. + Anything else (including 'single' or missing type) falls through to a + track-count disambiguation matching the download pipeline: + - 1-3 tracks -> true single + - 4-6 tracks -> EP + - 7+ tracks -> album + """ normalized = (album_type or '').lower() if normalized == 'compilation': return settings.get('include_compilations', False) - if normalized in ('single',): - return settings.get('include_singles', False) - if normalized in ('ep',): + if normalized == 'album': + return settings.get('include_albums', True) + if normalized == 'ep': return settings.get('include_eps', True) - # Fall back to track count heuristic + # 'single' or missing: disambiguate by track count if total_tracks >= 7: return settings.get('include_albums', True) - elif total_tracks >= 4: + if total_tracks >= 4: return settings.get('include_eps', True) - elif total_tracks >= 1: + if total_tracks >= 1: return settings.get('include_singles', False) return settings.get('include_albums', True) @@ -342,8 +350,8 @@ class DiscographyBackfillJob(RepairJob): select.append("spotify_artist_id") if 'itunes_artist_id' in columns: select.append("itunes_artist_id") - if 'deezer_artist_id' in columns: - select.append("deezer_artist_id") + if 'deezer_id' in columns: + select.append("deezer_id") cursor.execute(f""" SELECT {', '.join(select)}