Discography: keep collab tracks credited as one combined string (#830)

Vicky-2418: Download Discography skipped some albums/singles as "No New Track"
even on a brand-new artist, while manual one-by-one worked. The log showed the
real reason wasn't ownership at all — it was "N skipped (artist mismatch)".

Root cause (confirmed against the iTunes API, not guessed): iTunes returns a
collab as ONE combined string. Narvent's "Miss You (Ambient Remix)" is credited
'TRVNSPORTER, Narvent & SKVLENT'. track_artist_matches did an exact full-string
compare ('trvnsporter, narvent & skvlent' == 'narvent' → False), so every
collaborator's discography entry was dropped — despite the #559 comment claiming
it "keeps features" (it didn't, because features arrive combined, not as a list).

Fix: match the requested artist as a COMPONENT of the credit — split on the
common separators (, & ; / feat ft featuring vs x), while still including the
full string so band names with internal separators ("Florence + the Machine")
match exactly. Component matching stays exact per name, so true contamination
(the #559 case — artist not credited at all) is still dropped, and substrings
("Drakeo the Ruler" ≠ "Drake") still don't match.

Also corrects an over-conservative #559 test that asserted "Drake & Future"
shouldn't match "Drake" — it should; Drake is a credited collaborator. The guard
drops uncredited artists, not legit collabs packed into one string.

Tests: the exact real case (Narvent in the combined credit) + each collaborator,
contamination still dropped, feat/ft/featuring/x forms, no substring false
positive. 36 filter tests + 747 discography/metadata tests pass.
pull/834/head
BoulderBadgeDad 2 days ago
parent 1d16ac7978
commit db65e783c7

@ -28,8 +28,31 @@ single source of truth.
from __future__ import annotations
import re
from typing import Any, Dict, List, Optional
# Split a combined artist credit into its individual artists. Sources like
# iTunes return collabs as ONE string ("TRVNSPORTER, Narvent & SKVLENT"), not a
# list — so an exact full-string compare drops every collaborator's discography
# entry (#830). Split on the common credit separators; " and " / " with " are
# deliberately excluded (too many real band names contain them).
_ARTIST_CREDIT_SPLIT_RE = re.compile(
r"\s*[,&;/]\s*|\s+(?:feat\.?|ft\.?|featuring|vs\.?|x)\s+",
re.IGNORECASE,
)
def _artist_credit_components(name: str) -> List[str]:
"""Return the individual artist names within a (possibly combined) credit,
always including the full string itself (so exact band names with internal
separators still match)."""
name = (name or "").strip()
if not name:
return []
parts = [name]
parts.extend(p.strip() for p in _ARTIST_CREDIT_SPLIT_RE.split(name) if p.strip())
return parts
from core.watchlist_scanner import (
is_acoustic_version,
is_instrumental_version,
@ -47,10 +70,12 @@ def track_artist_matches(track_artists: Any, requested_artist_name: str) -> bool
what the discography fetch returns), or the list-of-dicts shape
that some upstreams pass directly. Both are accepted.
Returns True for primary-artist tracks AND feature appearances
the requested artist need only be one of the listed artists. Only
drops tracks where the requested artist isn't named at all (the
cross-artist compilation case from #559).
Returns True for primary-artist tracks AND feature/collab appearances
the requested artist need only be one of the credited artists, INCLUDING
when a source (iTunes, etc.) packs the collab into one combined string like
"TRVNSPORTER, Narvent & SKVLENT" (#830). Only drops tracks where the
requested artist isn't credited at all (the cross-artist compilation case
from #559).
"""
if not requested_artist_name:
# No artist to compare against — don't filter; let the caller
@ -70,8 +95,13 @@ def track_artist_matches(track_artists: Any, requested_artist_name: str) -> bool
name = entry.get('name', '') or ''
else:
name = str(entry or '')
if name.strip().lower() == target:
return True
# Match the requested artist as a component of the credit, so combined
# collab strings ("A, B & C") keep B's discography entry. Component
# matching is still exact per-name, so true contamination (the artist
# genuinely absent) is dropped exactly as before.
for component in _artist_credit_components(name):
if component.strip().lower() == target:
return True
return False

@ -61,6 +61,35 @@ class TestTrackArtistMatches:
assert track_artist_matches(['DRAKE'], 'Drake') is True
assert track_artist_matches(['Drake'], 'drake') is True
def test_combined_collab_credit_string_matches_component(self):
"""#830 (Vicky-2418): iTunes packs a collab into ONE string. The
requested artist is one of several credited must match. This is the
exact real case from the report (Narvent's 'Miss You (Ambient Remix)')."""
credit = ['TRVNSPORTER, Narvent & SKVLENT']
assert track_artist_matches(credit, 'Narvent') is True
assert track_artist_matches(credit, 'TRVNSPORTER') is True
assert track_artist_matches(credit, 'SKVLENT') is True
def test_combined_credit_still_drops_absent_artist(self):
"""The #559 contamination guard survives: an artist genuinely absent
from a combined credit is still dropped."""
assert track_artist_matches(['TRVNSPORTER, Narvent & SKVLENT'], 'Drake') is False
def test_feat_forms_match_component(self):
for credit in (['Drake feat. Narvent'], ['Drake ft. Narvent'],
['Drake featuring Narvent'], ['Drake x Narvent']):
assert track_artist_matches(credit, 'Narvent') is True
def test_no_substring_false_positive(self):
"""Component matching is exact per-name — a longer name that merely
contains the target must NOT match."""
assert track_artist_matches(['Narventos'], 'Narvent') is False
def test_band_name_with_internal_separator_still_matches_exactly(self):
"""A real band name containing a separator still matches as the full
string (we keep the whole credit as a candidate alongside the split)."""
assert track_artist_matches(['Florence + the Machine'], 'Florence + the Machine') is True
def test_match_handles_whitespace_padding(self):
"""Trailing whitespace in either side mustn't break the match."""
assert track_artist_matches([' Drake '], 'Drake') is True
@ -87,12 +116,14 @@ class TestTrackArtistMatches:
assert track_artist_matches([{'name': 'Drake'}], 'Drake') is True
assert track_artist_matches([{'name': 'Random'}], 'Drake') is False
def test_substring_does_not_match(self):
"""A song by "Drake & Future" should not match "Drake" via
substring that's exactly the false-positive case the bug
report describes. Exact full-name match only."""
assert track_artist_matches(['Drake & Future'], 'Drake') is False
def test_substring_does_not_match_but_component_does(self):
"""No SUBSTRING matching — "Drakeo the Ruler" must not match "Drake".
But a combined collab credit IS component-matched (#830): "Drake &
Future" matches "Drake" because Drake is genuinely one of the credited
artists. The #559 guard drops artists who aren't credited AT ALL, not
legit collaborators packed into one string by sources like iTunes."""
assert track_artist_matches(['Drakeo the Ruler'], 'Drake') is False
assert track_artist_matches(['Drake & Future'], 'Drake') is True
# ---------------------------------------------------------------------------

Loading…
Cancel
Save