Fix Amazon Music search quality: images, dedup, explicit stripping, album/artist clicks

- All search_raw calls switched from single-type to types="track,album" — T2Tunes only
  returns results when both types are requested together
- _fetch_album_metas: parallel fetch (up to 5 workers) of album cover art via
  album_metadata(asin) — T2Tunes search results carry no image URLs
- search_tracks: populates image_url, release_date, total_tracks from album meta
- search_artists: strips feat. credits via _primary_artist() so "Artist feat. X" and
  "Artist ft. Y" collapse to one "Artist" entry; uses album cover as artist image
  stand-in (same approach as iTunes — T2Tunes has no artist images)
- search_albums: name-based dedup (display_name + artist key) instead of ASIN-based;
  populates image_url, release_date, total_tracks from album meta (cap 10 ASIN fetches)
- _strip_edition(): strips [Explicit]/(Explicit) from track/album names — explicit is
  the default version; Clean/Edited/Censored labels kept as-is so they stay distinct
- get_album(): applies _strip_edition to name and _primary_artist to artist so
  MusicBrainz preflight matching doesn't fail on "[Explicit]" album names
- get_album_tracks(): populates track_number and disc_number from T2TunesStreamInfo
  instead of hardcoding None — fixes track ordering in multi-track album downloads
- get_artist() / get_artist_albums(): _unslugify() converts slug artist IDs back to
  search names; _primary_artist() in comparison handles feat-annotated results
- SOURCE_ONLY_ARTIST_SOURCES: added "amazon" so artist detail page doesn't 404
- build_source_only_artist_detail: added amazon_client param + dispatch branch
- web_server.py: resolve amazon_client in _build_source_only_artist_detail wrapper;
  add source_override=="amazon" branch in get_spotify_album_tracks endpoint
- 77 tests covering all above paths; all pass
pull/617/head
Broque Thomas 1 week ago
parent d39679951b
commit 51e00d4ebf

@ -19,6 +19,7 @@ Config keys (all optional — fall back to public defaults):
from __future__ import annotations
import re
import threading
import time
from dataclasses import dataclass
@ -33,6 +34,28 @@ from utils.logging_config import get_logger
logger = get_logger("amazon_client")
# Strips featuring credits like "Artist feat. X", "Artist ft. Y" so artist
# deduplication works on the primary artist name only.
_FEAT_RE = re.compile(r'\s+(?:feat(?:uring)?\.?|ft\.?)\s+.*', re.IGNORECASE)
# Strips the Explicit marker — explicit is treated as the default version.
# Clean/Edited/Censored stay in the name so users can distinguish them.
_EDITION_RE = re.compile(r'\s*[\[\(]explicit[\]\)]', re.IGNORECASE)
def _primary_artist(name: str) -> str:
return _FEAT_RE.sub('', name).strip()
def _strip_edition(name: str) -> str:
return _EDITION_RE.sub('', name).strip()
def _unslugify(name: str) -> str:
"""Convert a slug-form artist ID (e.g. 'kendrick_lamar') to a search name."""
return name.replace('_', ' ')
DEFAULT_BASE_URL = "https://t2tunes.site"
DEFAULT_COUNTRY = "US"
DEFAULT_CODEC = "flac"
@ -300,55 +323,95 @@ class AmazonClient:
def search_tracks(self, query: str, limit: int = 20) -> List[Track]:
_rate_limit()
items = self.search_raw(query, types="track")
tracks: List[Track] = []
items = self.search_raw(query, types="track,album")
track_pairs: List[tuple] = [] # (Track, album_asin)
seen_album_asins: List[str] = []
for item in items:
if not item.is_track:
continue
tracks.append(Track.from_search_hit({
track = Track.from_search_hit({
"asin": item.asin,
"title": item.title,
"artistName": item.artist_name,
"albumName": item.album_name,
"title": _strip_edition(item.title),
"artistName": _primary_artist(item.artist_name),
"albumName": _strip_edition(item.album_name),
"albumAsin": item.album_asin,
"duration": item.duration_seconds,
"isrc": item.isrc,
}))
if len(tracks) >= limit:
})
track_pairs.append((track, item.album_asin))
if item.album_asin and item.album_asin not in seen_album_asins:
seen_album_asins.append(item.album_asin)
if len(track_pairs) >= limit:
break
album_metas = self._fetch_album_metas(seen_album_asins[:5])
tracks: List[Track] = []
for track, album_asin in track_pairs:
if album_asin and album_asin in album_metas:
meta = album_metas[album_asin]
track.image_url = meta.get("image")
track.release_date = str(meta.get("release_date") or "")
track.total_tracks = meta.get("trackCount")
tracks.append(track)
return tracks
def search_artists(self, query: str, limit: int = 20) -> List[Artist]:
_rate_limit()
items = self.search_raw(query, types="track")
items = self.search_raw(query, types="track,album")
seen: Dict[str, Artist] = {}
artist_album_asin: Dict[str, str] = {} # artist name → first album ASIN seen
for item in items:
name = item.artist_name
if name and name not in seen:
name = _primary_artist(item.artist_name)
if not name:
continue
if name not in seen:
seen[name] = Artist.from_name(name)
if name not in artist_album_asin and item.album_asin:
artist_album_asin[name] = item.album_asin
if len(seen) >= limit:
break
# T2Tunes has no artist images — use an album cover as stand-in.
unique_asins = list({v for v in artist_album_asin.values()})[:5]
album_metas = self._fetch_album_metas(unique_asins)
for name, artist in seen.items():
asin = artist_album_asin.get(name)
if asin and asin in album_metas:
artist.image_url = album_metas[asin].get("image")
return list(seen.values())
def search_albums(self, query: str, limit: int = 20) -> List[Album]:
_rate_limit()
items = self.search_raw(query, types="album")
albums: List[Album] = []
seen_asins: set = set()
items = self.search_raw(query, types="track,album")
album_candidates: List[tuple] = [] # (Album, asin)
seen_keys: set = set()
for item in items:
if not item.is_album:
continue
album_asin = item.album_asin or item.asin
if album_asin in seen_asins:
raw_name = item.album_name or item.title
display_name = _strip_edition(raw_name)
artist = _primary_artist(item.artist_name)
# Collapse Explicit/Clean variants: same normalised name + artist = same album
dedup_key = (display_name.lower(), artist.lower())
if dedup_key in seen_keys:
continue
seen_asins.add(album_asin)
albums.append(Album.from_search_hit({
seen_keys.add(dedup_key)
album = Album.from_search_hit({
"albumAsin": album_asin,
"albumName": item.album_name or item.title,
"artistName": item.artist_name,
}))
if len(albums) >= limit:
"albumName": display_name,
"artistName": artist,
})
album_candidates.append((album, album_asin))
if len(album_candidates) >= limit:
break
album_metas = self._fetch_album_metas([a for _, a in album_candidates[:10]])
albums: List[Album] = []
for album, asin in album_candidates:
if asin in album_metas:
meta = album_metas[asin]
album.image_url = meta.get("image")
album.release_date = str(meta.get("release_date") or "")
album.total_tracks = int(meta.get("trackCount") or 0)
albums.append(album)
return albums
def get_track_details(self, asin: str) -> Optional[Dict[str, Any]]:
@ -413,8 +476,8 @@ class AmazonClient:
result: Dict[str, Any] = {
"id": asin,
"name": album.get("title", ""),
"artists": [{"name": album.get("artistName", ""), "id": ""}],
"name": _strip_edition(album.get("title", "")),
"artists": [{"name": _primary_artist(album.get("artistName", "")), "id": ""}],
"release_date": album.get("release_date", ""),
"total_tracks": album.get("trackCount", 0),
"album_type": "album",
@ -444,8 +507,8 @@ class AmazonClient:
"name": s.title,
"artists": [{"name": s.artist, "id": ""}],
"duration_ms": 0,
"track_number": None,
"disc_number": None,
"track_number": s.track_number,
"disc_number": s.disc_number,
"isrc": s.isrc,
}
for s in streams
@ -455,14 +518,15 @@ class AmazonClient:
def get_artist(self, artist_name: str) -> Optional[Dict[str, Any]]:
"""Return a Spotify-compatible artist dict inferred from search results."""
_rate_limit()
search_name = _unslugify(artist_name)
try:
items = self.search_raw(artist_name, types="track")
items = self.search_raw(search_name, types="track,album")
except AmazonClientError:
return None
name_lower = artist_name.lower()
name_lower = search_name.lower()
match = next(
(i for i in items if i.artist_name.lower() == name_lower),
next((i for i in items if name_lower in i.artist_name.lower()), None),
(i for i in items if _primary_artist(i.artist_name).lower() == name_lower),
next((i for i in items if name_lower in _primary_artist(i.artist_name).lower()), None),
)
if not match:
return None
@ -484,15 +548,18 @@ class AmazonClient:
) -> List[Album]:
"""Return albums for an artist inferred from search results."""
_rate_limit()
search_name = _unslugify(artist_name)
try:
items = self.search_raw(f"{artist_name} album", types="album")
items = self.search_raw(f"{search_name} album", types="track,album")
except AmazonClientError:
return []
albums: List[Album] = []
seen_asins: set = set()
name_lower = artist_name.lower()
name_lower = search_name.lower()
for item in items:
if item.artist_name.lower() != name_lower:
if not item.is_album:
continue
if _primary_artist(item.artist_name).lower() != name_lower:
continue
album_asin = item.album_asin or item.asin
if album_asin in seen_asins:
@ -520,6 +587,27 @@ class AmazonClient:
# Private helpers
# ------------------------------------------------------------------
def _fetch_album_metas(self, asins: List[str]) -> Dict[str, Dict[str, Any]]:
"""Parallel-fetch album metadata for up to N ASINs. Returns {asin: albumList[0]}."""
if not asins:
return {}
metas: Dict[str, Dict[str, Any]] = {}
def _fetch(asin: str) -> None:
_rate_limit()
try:
raw = self.album_metadata(asin)
lst = raw.get("albumList")
if isinstance(lst, list) and lst and isinstance(lst[0], dict):
metas[asin] = lst[0]
except Exception:
pass
from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=min(len(asins), 5)) as pool:
list(pool.map(_fetch, asins))
return metas
def _get_json(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any:
url = urljoin(f"{self.base_url}/", path.lstrip("/"))
try:

@ -43,6 +43,7 @@ def build_source_only_artist_detail(
deezer_client: Optional[Any] = None,
itunes_client: Optional[Any] = None,
discogs_client: Optional[Any] = None,
amazon_client: Optional[Any] = None,
lastfm_api_key: Optional[str] = None,
) -> Tuple[Dict[str, Any], int]:
"""Build the artist-detail payload for a source-only artist.
@ -84,6 +85,12 @@ def build_source_only_artist_detail(
dc_artist = discogs_client.get_artist(artist_id)
if dc_artist:
source_genres = dc_artist.get("genres") or []
elif source == "amazon" and amazon_client is not None:
az_artist = amazon_client.get_artist(resolved_name or artist_id)
if az_artist:
source_genres = az_artist.get("genres") or []
if not image_url and az_artist.get("images"):
image_url = az_artist["images"][0].get("url")
except Exception as e:
logger.debug(f"Source-side artist info lookup failed for {source}:{artist_id}: {e}")

@ -25,7 +25,7 @@ logger = logging.getLogger("artist_source_lookup")
SOURCE_ONLY_ARTIST_SOURCES = frozenset({
"spotify", "itunes", "deezer", "discogs", "hydrabase", "musicbrainz",
"spotify", "itunes", "deezer", "discogs", "hydrabase", "musicbrainz", "amazon",
})

@ -124,6 +124,8 @@ MEDIA_RESPONSE_FLAC = {
"artist": "Kendrick Lamar",
"album": "GNX",
"isrc": "USRC12345678",
"trackNumber": "3",
"discNumber": "1",
},
}
@ -536,6 +538,42 @@ class TestSearchArtists:
artists = client.search_artists("Kendrick")
assert isinstance(artists[0], Artist)
def test_artist_image_from_album(self):
resp = {
"results": [{"hits": [
{"document": {"asin": "A1", "title": "T1", "artistName": "Kendrick Lamar",
"__type": "track", "albumAsin": "B0ABCDE123"}},
]}]
}
client = _make_client({
"amazon-music/search": resp,
"amazon-music/metadata": ALBUM_METADATA_RESPONSE,
})
with patch("core.amazon_client._rate_limit"):
artists = client.search_artists("Kendrick")
assert artists[0].image_url == "https://example.com/cover.jpg"
def test_deduplicates_feat_credits(self):
resp = {
"results": [
{
"hits": [
{"document": {"asin": "A1", "title": "T1", "artistName": "Kendrick Lamar", "__type": "track"}},
{"document": {"asin": "A2", "title": "T2", "artistName": "Kendrick Lamar feat. SZA", "__type": "track"}},
{"document": {"asin": "A3", "title": "T3", "artistName": "Kendrick Lamar ft. Drake", "__type": "track"}},
{"document": {"asin": "A4", "title": "T4", "artistName": "SZA featuring Kendrick Lamar", "__type": "track"}},
]
}
]
}
client = _make_client({"amazon-music/search": resp})
with patch("core.amazon_client._rate_limit"):
artists = client.search_artists("Kendrick")
names = [a.name for a in artists]
assert "Kendrick Lamar" in names
assert "SZA" in names
assert len(artists) == 2
def test_respects_limit(self):
resp = {
"results": [
@ -591,6 +629,43 @@ class TestSearchAlbums:
albums = client.search_albums("Kendrick")
assert albums == []
def test_strips_explicit_from_album_name(self):
resp = {
"results": [{"hits": [
{"document": {**ALBUM_DOC, "albumName": "GNX (Explicit)", "title": "GNX (Explicit)"}},
]}]
}
client = _make_client({"amazon-music/search": resp})
with patch("core.amazon_client._rate_limit"):
albums = client.search_albums("GNX")
assert albums[0].name == "GNX"
def test_keeps_clean_suffix(self):
resp = {
"results": [{"hits": [
{"document": {**ALBUM_DOC, "albumName": "GNX [Clean]", "title": "GNX [Clean]"}},
]}]
}
client = _make_client({"amazon-music/search": resp})
with patch("core.amazon_client._rate_limit"):
albums = client.search_albums("GNX")
assert albums[0].name == "GNX [Clean]"
def test_deduplicates_explicit_clean_as_separate(self):
resp = {
"results": [{"hits": [
{"document": {**ALBUM_DOC, "asin": "B1", "albumAsin": "B1", "albumName": "GNX (Explicit)", "title": "GNX (Explicit)"}},
{"document": {**ALBUM_DOC, "asin": "B2", "albumAsin": "B2", "albumName": "GNX [Clean]", "title": "GNX [Clean]"}},
]}]
}
client = _make_client({"amazon-music/search": resp})
with patch("core.amazon_client._rate_limit"):
albums = client.search_albums("GNX")
names = [a.name for a in albums]
assert "GNX" in names # explicit stripped
assert "GNX [Clean]" in names
assert len(albums) == 2
# ---------------------------------------------------------------------------
# AmazonClient — album_metadata / media_from_asin
@ -766,6 +841,8 @@ class TestGetAlbumTracks:
assert item["id"] == "B09XYZ1234"
assert item["name"] == "Not Like Us"
assert item["isrc"] == "USRC12345678"
assert item["track_number"] == 3
assert item["disc_number"] == 1
def test_returns_none_on_api_error(self):
client = _make_client()

@ -7283,6 +7283,13 @@ def _build_source_only_artist_detail(artist_id, artist_name, source):
except Exception as e:
logger.debug(f"Discogs client resolution failed: {e}")
az = None
try:
from core.metadata.registry import get_amazon_client
az = get_amazon_client()
except Exception as e:
logger.debug(f"Amazon client resolution failed: {e}")
try:
lastfm_api_key = config_manager.get('lastfm.api_key', '') or None
except Exception:
@ -7296,6 +7303,7 @@ def _build_source_only_artist_detail(artist_id, artist_name, source):
deezer_client=dz,
itunes_client=it,
discogs_client=dc,
amazon_client=az,
lastfm_api_key=lastfm_api_key,
)
return jsonify(payload), status
@ -18665,6 +18673,9 @@ def get_spotify_album_tracks(album_id):
client = _get_deezer_client()
elif source_override == 'discogs':
client = _get_discogs_client()
elif source_override == 'amazon':
from core.metadata.registry import get_amazon_client
client = get_amazon_client()
elif source_override == 'musicbrainz':
try:
from core.musicbrainz_search import MusicBrainzSearchClient

@ -3417,6 +3417,7 @@ const WHATS_NEW = {
{ unreleased: true },
{ title: 'Amazon Music Metadata Source', desc: 'Amazon Music is now a selectable primary metadata source alongside Spotify, iTunes, Deezer, and Discogs. backed by the same T2Tunes proxy as the download source — no account needed. covers track search, album lookup with cover art, and artist discography. select it under Settings → Connections → Metadata Source.', page: 'settings' },
{ title: 'Amazon Music Download Source', desc: 'new download source backed by T2Tunes proxy. searches the Amazon Music catalog, downloads 24-bit/48kHz FLAC (or Opus 320kbps / Dolby Atmos EAC3 fallback). codec waterfall mirrors Tidal/Qobuz — best quality first, auto-fallback. selectable as a standalone or hybrid source from Settings.', page: 'settings' },
{ title: 'Amazon Music Search Quality', desc: 'search results now show album art, artist images (album cover stand-in, same as iTunes), and correct track/disc numbers. feat. credits stripped from artist names so the same artist does not show as duplicates. [Explicit] stripped from album names so MusicBrainz matching works cleanly — Clean / Edited / Censored labels kept as-is. album clicks and artist detail pages now open instead of 404ing.', page: 'search' },
],
'2.5.2': [
// --- May 13, 2026 — 2.5.2 release ---

Loading…
Cancel
Save