From 0769fcd5cc80ba0beed87e202c1bc6d59fc12273 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Wed, 13 May 2026 22:11:20 -0700 Subject: [PATCH] Fix Soulseek downloads losing collab artist tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Soulseek matched-download contexts populate `original_search_result` with `artist` (singular string) and no `artists` list — the full multi-artist array lives on `track_info` (the matched Spotify track object). `extract_source_metadata` only read `original_search.artists`, so the Soulseek path always fell through to the single-artist branch and TPE1 ended up with the primary artist only. Deezer-direct downloads were unaffected because their context populates `original_search.artists` as a proper list. Lifted artist resolution into a pure helper `core/metadata/artist_resolution.py:resolve_track_artists` that walks `original_search.artists` → `track_info.artists` → `artist_dict.name` fallback chain. Normalizes mixed list-item shapes (Spotify-style dicts, bare strings, anything else stringified) and drops empty entries. 13 new tests pin the resolution order, fallback chain, mixed-shape normalization, whitespace stripping, and empty/none handling. The existing `_artists_list` no-fall-through test in `test_multi_artist_tag_settings.py` was updated to reflect the new contract (always populated; multi-value write still gated on `len > 1`) plus a new regression test for the Soulseek shape. Composes with the existing Deezer per-track upgrade (still fires when single-artist + track_id available) and feat_in_title / artist_separator settings (still drive the joined ARTIST string downstream). --- core/metadata/artist_resolution.py | 74 +++++++++++++++++ core/metadata/source.py | 19 ++--- tests/metadata/test_artist_resolution.py | 79 +++++++++++++++++++ .../test_multi_artist_tag_settings.py | 32 +++++++- webui/static/helper.js | 1 + 5 files changed, 191 insertions(+), 14 deletions(-) create mode 100644 core/metadata/artist_resolution.py create mode 100644 tests/metadata/test_artist_resolution.py diff --git a/core/metadata/artist_resolution.py b/core/metadata/artist_resolution.py new file mode 100644 index 00000000..362a6760 --- /dev/null +++ b/core/metadata/artist_resolution.py @@ -0,0 +1,74 @@ +"""Pure artist-list resolution for tag-write paths. + +Single source of truth for "what is the canonical multi-value artists +list for this track?" Different download paths populate `context` with +different keys — Deezer-direct downloads stamp `original_search.artists` +as a proper list, but Soulseek matched downloads only carry `artist` +(singular string) in `original_search_result` while the full list lives +on `track_info` (the full Spotify track object). + +Resolution order: + 1. `context.original_search_result.artists` (preferred — already- + curated by the source path that constructed the context) + 2. `context.track_info.artists` (Spotify/Deezer/Tidal full track + object — always carries the artists array when matched) + 3. `[artist_dict.name]` as a single-element fallback when neither + carries a list (primary-artist-only) + +Each list item may be a dict with a `name` key (Spotify shape), a bare +string, or any other object — the helper normalizes all three to +strings and drops empty entries. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + + +def _normalize_artists_iterable(items: Any) -> List[str]: + if not isinstance(items, list): + return [] + result: List[str] = [] + for item in items: + if isinstance(item, dict): + name = item.get("name") + if isinstance(name, str) and name.strip(): + result.append(name.strip()) + elif isinstance(item, str): + stripped = item.strip() + if stripped: + result.append(stripped) + elif item is not None: + text = str(item).strip() + if text: + result.append(text) + return result + + +def resolve_track_artists( + original_search: Optional[Dict[str, Any]], + track_info: Optional[Dict[str, Any]], + artist_dict: Optional[Dict[str, Any]], +) -> List[str]: + """Return the canonical multi-value artists list for tag-write. + + Falls through preferred → track_info → primary-artist fallback. Each + candidate is normalized to a list of stripped non-empty strings. + Empty list returned only when every candidate is empty/invalid. + """ + if isinstance(original_search, dict): + primary = _normalize_artists_iterable(original_search.get("artists")) + if primary: + return primary + + if isinstance(track_info, dict): + secondary = _normalize_artists_iterable(track_info.get("artists")) + if secondary: + return secondary + + if isinstance(artist_dict, dict): + name = artist_dict.get("name") + if isinstance(name, str) and name.strip(): + return [name.strip()] + + return [] diff --git a/core/metadata/source.py b/core/metadata/source.py index 67350205..8903eb0e 100644 --- a/core/metadata/source.py +++ b/core/metadata/source.py @@ -23,6 +23,7 @@ from core.imports.context import ( get_source_tag_names, normalize_import_context, ) +from core.metadata.artist_resolution import resolve_track_artists from core.metadata.registry import get_itunes_client from database.music_database import get_database from core.metadata.common import ( @@ -907,17 +908,13 @@ def extract_source_metadata(context: dict, artist: dict, album_info: dict) -> di else: logger.warning("Metadata: Using original title as fallback: '%s'", metadata["title"]) - artists = original_search.get("artists") - if isinstance(artists, list) and artists: - all_artists = [] - for artist_item in artists: - if isinstance(artist_item, dict) and artist_item.get("name"): - all_artists.append(artist_item["name"]) - elif isinstance(artist_item, str): - all_artists.append(artist_item) - else: - all_artists.append(str(artist_item)) - + # Resolve canonical artists list. Soulseek matched-download contexts + # only carry `original_search.artist` (singular string) — the full + # contributors list lives on `track_info` (the matched Spotify/etc + # track object). Deezer-direct contexts populate `original_search.artists` + # directly. Pure helper handles all three shapes. + all_artists = resolve_track_artists(original_search, track_info, artist_dict) + if all_artists: # Deezer upgrade path: Deezer's `/search` endpoint only returns # the primary artist for each track. The full contributors # array (feat., remix collaborators, producers credited as diff --git a/tests/metadata/test_artist_resolution.py b/tests/metadata/test_artist_resolution.py new file mode 100644 index 00000000..95eb8d3a --- /dev/null +++ b/tests/metadata/test_artist_resolution.py @@ -0,0 +1,79 @@ +from core.metadata.artist_resolution import resolve_track_artists + + +def test_prefers_original_search_artists_when_populated(): + original = {"artists": [{"name": "Kendrick Lamar"}, {"name": "Rihanna"}]} + track = {"artists": [{"name": "Should Not Be Used"}]} + artist = {"name": "Primary"} + + assert resolve_track_artists(original, track, artist) == ["Kendrick Lamar", "Rihanna"] + + +def test_falls_back_to_track_info_artists_when_original_lacks_list(): + # Soulseek context shape: original_search_result has 'artist' (string) + # and no 'artists' (list). Full Spotify track object lives on track_info. + original = {"artist": "Kendrick Lamar"} + track = {"artists": [{"name": "Kendrick Lamar"}, {"name": "Rihanna"}]} + artist = {"name": "Kendrick Lamar"} + + assert resolve_track_artists(original, track, artist) == ["Kendrick Lamar", "Rihanna"] + + +def test_falls_back_to_artist_dict_name_when_no_lists_available(): + original = {"artist": "Solo Artist"} + track = {"name": "Track Title"} + artist = {"name": "Solo Artist"} + + assert resolve_track_artists(original, track, artist) == ["Solo Artist"] + + +def test_returns_empty_list_when_everything_missing(): + assert resolve_track_artists(None, None, None) == [] + assert resolve_track_artists({}, {}, {}) == [] + + +def test_handles_bare_string_artist_items(): + original = {"artists": ["Kendrick Lamar", "Rihanna"]} + assert resolve_track_artists(original, None, None) == ["Kendrick Lamar", "Rihanna"] + + +def test_mixed_dict_and_string_items_normalized(): + original = {"artists": [{"name": "Kendrick Lamar"}, "Rihanna"]} + assert resolve_track_artists(original, None, None) == ["Kendrick Lamar", "Rihanna"] + + +def test_strips_whitespace_and_drops_empty_entries(): + original = {"artists": [{"name": " Kendrick "}, {"name": ""}, " ", "Rihanna"]} + assert resolve_track_artists(original, None, None) == ["Kendrick", "Rihanna"] + + +def test_dict_item_without_name_key_skipped(): + original = {"artists": [{"id": "abc"}, {"name": "Rihanna"}]} + assert resolve_track_artists(original, None, None) == ["Rihanna"] + + +def test_non_list_artists_value_falls_through(): + original = {"artists": "Kendrick Lamar"} # string, not list + track = {"artists": [{"name": "Kendrick Lamar"}, {"name": "Rihanna"}]} + assert resolve_track_artists(original, track, None) == ["Kendrick Lamar", "Rihanna"] + + +def test_empty_original_artists_list_falls_through_to_track_info(): + original = {"artists": []} + track = {"artists": [{"name": "Kendrick Lamar"}, {"name": "Rihanna"}]} + assert resolve_track_artists(original, track, None) == ["Kendrick Lamar", "Rihanna"] + + +def test_artist_dict_name_blank_returns_empty(): + assert resolve_track_artists({}, {}, {"name": " "}) == [] + assert resolve_track_artists({}, {}, {"name": ""}) == [] + + +def test_non_string_artist_items_coerced_to_string(): + original = {"artists": [123, {"name": "Real Artist"}]} + assert resolve_track_artists(original, None, None) == ["123", "Real Artist"] + + +def test_none_artist_items_dropped(): + original = {"artists": [None, {"name": "Real Artist"}, None]} + assert resolve_track_artists(original, None, None) == ["Real Artist"] diff --git a/tests/metadata/test_multi_artist_tag_settings.py b/tests/metadata/test_multi_artist_tag_settings.py index 550e170d..d3acf452 100644 --- a/tests/metadata/test_multi_artist_tag_settings.py +++ b/tests/metadata/test_multi_artist_tag_settings.py @@ -99,8 +99,10 @@ class TestArtistsListPopulated: assert meta.get("_artists_list") == ["Solo Artist"] def test_no_artists_falls_through(self): - """When search response has no artists list, falls through to - the single-artist branch — no `_artists_list` written.""" + """When search response has no artists list, the artist-resolution + helper falls back to the artist_dict.name as a single-element list. + Multi-value tag write still no-ops because len([primary]) == 1. + """ from core.metadata import source as src_module context = { @@ -109,7 +111,31 @@ class TestArtistsListPopulated: } with patch.object(src_module, "get_config_manager", return_value=_make_cfg()): meta = src_module.extract_source_metadata(context, {"name": "X"}, {}) - assert "_artists_list" not in meta or meta.get("_artists_list") in (None, []) + assert meta.get("_artists_list") == ["X"] + + def test_soulseek_shape_falls_back_to_track_info_artists(self): + """Soulseek matched-download regression: original_search_result + carries 'artist' (singular string) but no 'artists' list, while + track_info (the matched Spotify track object) carries the full + multi-artist array. Helper should pull from track_info. + """ + from core.metadata import source as src_module + + context = { + "original_search_result": { + "title": "DNA.", + "artist": "Kendrick Lamar", + }, + "track_info": { + "name": "DNA.", + "artists": [{"name": "Kendrick Lamar"}, {"name": "Rihanna"}], + }, + "source": "spotify", + } + with patch.object(src_module, "get_config_manager", return_value=_make_cfg()): + meta = src_module.extract_source_metadata(context, {"name": "Kendrick Lamar"}, {}) + assert meta.get("_artists_list") == ["Kendrick Lamar", "Rihanna"] + assert meta.get("artist") == "Kendrick Lamar, Rihanna" # --------------------------------------------------------------------------- diff --git a/webui/static/helper.js b/webui/static/helper.js index 736220dc..f07b5f1f 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3416,6 +3416,7 @@ const WHATS_NEW = { '2.5.2': [ // --- May 13, 2026 — 2.5.2 release --- { date: 'May 13, 2026 — 2.5.2 release' }, + { title: 'Soulseek Downloads: Multi-Artist Tags Now Get Written Properly', desc: 'discord report: tracks downloaded via soulseek were getting tagged with primary artist only (no collab artists), while the same track downloaded via deezer tagged everyone correctly. trace: the soulseek matched-download context constructed `original_search_result` with `artist` (singular string) but no `artists` (list), even though the full multi-artist list lived on `track_info` (the matched spotify track object). `core/metadata/source.py:extract_source_metadata` only read `original_search.artists`, so soulseek path always fell through to the single-artist branch. fix: lifted artist resolution into a pure helper `core/metadata/artist_resolution.py:resolve_track_artists` that walks `original_search.artists` → `track_info.artists` → `artist_dict.name` fallback chain. handles all three list-item shapes (spotify-style dicts, bare strings, anything else stringified). 13 tests pin the resolution order, fallback chain, mixed-shape normalization, whitespace stripping, empty/none handling. composes with the existing deezer per-track upgrade (still fires when single-artist + track_id available) and feat_in_title / artist_separator settings (still drive the joined ARTIST string downstream).', page: 'downloads' }, { title: 'Download Missing Modal: Tracklist Got A Polish Pass', desc: 'visual tune-up only — column layout untouched. hairline row dividers, accent gradient + edge bar on hover, monospace track numbers (glow accent on row hover), monospace tabular duration. status text in both library-match + download-status columns picks up a leading colored dot with a soft halo (green found / amber missing / blue checking / orange downloading / red failed) and pulses while in-flight. artist column centered. soft scrollbar.', page: 'downloads' }, { title: 'Search Source Picker: Fix Default Always Sticking To Spotify', desc: 'enhanced search + global search source picker always defaulted to spotify even when the user\'s primary metadata source was deezer / itunes / discogs / etc. trace: `shared-helpers.js:createSearchController` reads `/status.metadata_source` to pick the initial active icon, then checks `SOURCE_LABELS[src]` to validate. backend was returning `metadata_source` as a dict (`{source, connected, response_time, ...}` — used elsewhere for connection-state display), so `SOURCE_LABELS[]` was always undefined, the `if` guard never fired, and `state.activeSource` silently stayed at the hardcoded `\'spotify\'` default. fix: read `.source` off the dict (with forward-compat fallback to plain-string in case any older /status response shape predates the dict change). other consumers (core.js sidebar tile, helper.js status checker, search.js display) already used `?.source` correctly — this was the only stale call site.', page: 'search' }, { title: 'Download Discography: No Longer Caps Prolific Artists At 50 Releases', desc: 'discord report: clicking "download discography" on an artist with a deep catalogue (bach, beatles complete box, dance / electronic artists with hundreds of remixes) only showed ~50 albums in the modal. trace: `MetadataLookupOptions(limit=50, max_pages=0)` was hardcoded at the discography endpoint and the artist-detail discography view. spotify\'s `max_pages=0` already paginates through everything (per-page is clamped to 10 internally) so spotify-primary users were unaffected. but deezer / itunes / discogs / hydrabase all honor the outer `limit` as a hard cap. fix: bump `limit` from 50 to 200 at all three call sites (`web_server.py` discography endpoint + artist-detail view + `core/artist_source_detail.py`). 200 matches iTunes\'s and Discogs\'s own internal caps and covers near-everyone\'s full catalogue. spotify behavior unchanged.', page: 'library' },