From db65e783c7acdabcf0a7a9f99db8a4104a0a1e2c Mon Sep 17 00:00:00 2001 From: BoulderBadgeDad Date: Tue, 9 Jun 2026 14:35:54 -0700 Subject: [PATCH] Discography: keep collab tracks credited as one combined string (#830) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vicky-2418: Download Discography skipped some albums/singles as "No New Track" even on a brand-new artist, while manual one-by-one worked. The log showed the real reason wasn't ownership at all — it was "N skipped (artist mismatch)". Root cause (confirmed against the iTunes API, not guessed): iTunes returns a collab as ONE combined string. Narvent's "Miss You (Ambient Remix)" is credited 'TRVNSPORTER, Narvent & SKVLENT'. track_artist_matches did an exact full-string compare ('trvnsporter, narvent & skvlent' == 'narvent' → False), so every collaborator's discography entry was dropped — despite the #559 comment claiming it "keeps features" (it didn't, because features arrive combined, not as a list). Fix: match the requested artist as a COMPONENT of the credit — split on the common separators (, & ; / feat ft featuring vs x), while still including the full string so band names with internal separators ("Florence + the Machine") match exactly. Component matching stays exact per name, so true contamination (the #559 case — artist not credited at all) is still dropped, and substrings ("Drakeo the Ruler" ≠ "Drake") still don't match. Also corrects an over-conservative #559 test that asserted "Drake & Future" shouldn't match "Drake" — it should; Drake is a credited collaborator. The guard drops uncredited artists, not legit collabs packed into one string. Tests: the exact real case (Narvent in the combined credit) + each collaborator, contamination still dropped, feat/ft/featuring/x forms, no substring false positive. 36 filter tests + 747 discography/metadata tests pass. --- core/metadata/discography_filters.py | 42 ++++++++++++++++++---- tests/metadata/test_discography_filters.py | 41 ++++++++++++++++++--- 2 files changed, 72 insertions(+), 11 deletions(-) diff --git a/core/metadata/discography_filters.py b/core/metadata/discography_filters.py index 4e1e0ecd..c5985fdf 100644 --- a/core/metadata/discography_filters.py +++ b/core/metadata/discography_filters.py @@ -28,8 +28,31 @@ single source of truth. from __future__ import annotations +import re from typing import Any, Dict, List, Optional +# Split a combined artist credit into its individual artists. Sources like +# iTunes return collabs as ONE string ("TRVNSPORTER, Narvent & SKVLENT"), not a +# list — so an exact full-string compare drops every collaborator's discography +# entry (#830). Split on the common credit separators; " and " / " with " are +# deliberately excluded (too many real band names contain them). +_ARTIST_CREDIT_SPLIT_RE = re.compile( + r"\s*[,&;/]\s*|\s+(?:feat\.?|ft\.?|featuring|vs\.?|x)\s+", + re.IGNORECASE, +) + + +def _artist_credit_components(name: str) -> List[str]: + """Return the individual artist names within a (possibly combined) credit, + always including the full string itself (so exact band names with internal + separators still match).""" + name = (name or "").strip() + if not name: + return [] + parts = [name] + parts.extend(p.strip() for p in _ARTIST_CREDIT_SPLIT_RE.split(name) if p.strip()) + return parts + from core.watchlist_scanner import ( is_acoustic_version, is_instrumental_version, @@ -47,10 +70,12 @@ def track_artist_matches(track_artists: Any, requested_artist_name: str) -> bool what the discography fetch returns), or the list-of-dicts shape that some upstreams pass directly. Both are accepted. - Returns True for primary-artist tracks AND feature appearances — - the requested artist need only be one of the listed artists. Only - drops tracks where the requested artist isn't named at all (the - cross-artist compilation case from #559). + Returns True for primary-artist tracks AND feature/collab appearances — + the requested artist need only be one of the credited artists, INCLUDING + when a source (iTunes, etc.) packs the collab into one combined string like + "TRVNSPORTER, Narvent & SKVLENT" (#830). Only drops tracks where the + requested artist isn't credited at all (the cross-artist compilation case + from #559). """ if not requested_artist_name: # No artist to compare against — don't filter; let the caller @@ -70,8 +95,13 @@ def track_artist_matches(track_artists: Any, requested_artist_name: str) -> bool name = entry.get('name', '') or '' else: name = str(entry or '') - if name.strip().lower() == target: - return True + # Match the requested artist as a component of the credit, so combined + # collab strings ("A, B & C") keep B's discography entry. Component + # matching is still exact per-name, so true contamination (the artist + # genuinely absent) is dropped exactly as before. + for component in _artist_credit_components(name): + if component.strip().lower() == target: + return True return False diff --git a/tests/metadata/test_discography_filters.py b/tests/metadata/test_discography_filters.py index 400e0885..8c297d05 100644 --- a/tests/metadata/test_discography_filters.py +++ b/tests/metadata/test_discography_filters.py @@ -61,6 +61,35 @@ class TestTrackArtistMatches: assert track_artist_matches(['DRAKE'], 'Drake') is True assert track_artist_matches(['Drake'], 'drake') is True + def test_combined_collab_credit_string_matches_component(self): + """#830 (Vicky-2418): iTunes packs a collab into ONE string. The + requested artist is one of several credited — must match. This is the + exact real case from the report (Narvent's 'Miss You (Ambient Remix)').""" + credit = ['TRVNSPORTER, Narvent & SKVLENT'] + assert track_artist_matches(credit, 'Narvent') is True + assert track_artist_matches(credit, 'TRVNSPORTER') is True + assert track_artist_matches(credit, 'SKVLENT') is True + + def test_combined_credit_still_drops_absent_artist(self): + """The #559 contamination guard survives: an artist genuinely absent + from a combined credit is still dropped.""" + assert track_artist_matches(['TRVNSPORTER, Narvent & SKVLENT'], 'Drake') is False + + def test_feat_forms_match_component(self): + for credit in (['Drake feat. Narvent'], ['Drake ft. Narvent'], + ['Drake featuring Narvent'], ['Drake x Narvent']): + assert track_artist_matches(credit, 'Narvent') is True + + def test_no_substring_false_positive(self): + """Component matching is exact per-name — a longer name that merely + contains the target must NOT match.""" + assert track_artist_matches(['Narventos'], 'Narvent') is False + + def test_band_name_with_internal_separator_still_matches_exactly(self): + """A real band name containing a separator still matches as the full + string (we keep the whole credit as a candidate alongside the split).""" + assert track_artist_matches(['Florence + the Machine'], 'Florence + the Machine') is True + def test_match_handles_whitespace_padding(self): """Trailing whitespace in either side mustn't break the match.""" assert track_artist_matches([' Drake '], 'Drake') is True @@ -87,12 +116,14 @@ class TestTrackArtistMatches: assert track_artist_matches([{'name': 'Drake'}], 'Drake') is True assert track_artist_matches([{'name': 'Random'}], 'Drake') is False - def test_substring_does_not_match(self): - """A song by "Drake & Future" should not match "Drake" via - substring — that's exactly the false-positive case the bug - report describes. Exact full-name match only.""" - assert track_artist_matches(['Drake & Future'], 'Drake') is False + def test_substring_does_not_match_but_component_does(self): + """No SUBSTRING matching — "Drakeo the Ruler" must not match "Drake". + But a combined collab credit IS component-matched (#830): "Drake & + Future" matches "Drake" because Drake is genuinely one of the credited + artists. The #559 guard drops artists who aren't credited AT ALL, not + legit collaborators packed into one string by sources like iTunes.""" assert track_artist_matches(['Drakeo the Ruler'], 'Drake') is False + assert track_artist_matches(['Drake & Future'], 'Drake') is True # ---------------------------------------------------------------------------