From 51e00d4ebf8c15fc59fbb9c112f19ab890b327cd Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Sat, 16 May 2026 15:55:15 -0700 Subject: [PATCH] Fix Amazon Music search quality: images, dedup, explicit stripping, album/artist clicks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - All search_raw calls switched from single-type to types="track,album" — T2Tunes only returns results when both types are requested together - _fetch_album_metas: parallel fetch (up to 5 workers) of album cover art via album_metadata(asin) — T2Tunes search results carry no image URLs - search_tracks: populates image_url, release_date, total_tracks from album meta - search_artists: strips feat. credits via _primary_artist() so "Artist feat. X" and "Artist ft. Y" collapse to one "Artist" entry; uses album cover as artist image stand-in (same approach as iTunes — T2Tunes has no artist images) - search_albums: name-based dedup (display_name + artist key) instead of ASIN-based; populates image_url, release_date, total_tracks from album meta (cap 10 ASIN fetches) - _strip_edition(): strips [Explicit]/(Explicit) from track/album names — explicit is the default version; Clean/Edited/Censored labels kept as-is so they stay distinct - get_album(): applies _strip_edition to name and _primary_artist to artist so MusicBrainz preflight matching doesn't fail on "[Explicit]" album names - get_album_tracks(): populates track_number and disc_number from T2TunesStreamInfo instead of hardcoding None — fixes track ordering in multi-track album downloads - get_artist() / get_artist_albums(): _unslugify() converts slug artist IDs back to search names; _primary_artist() in comparison handles feat-annotated results - SOURCE_ONLY_ARTIST_SOURCES: added "amazon" so artist detail page doesn't 404 - build_source_only_artist_detail: added amazon_client param + dispatch branch - web_server.py: resolve amazon_client in _build_source_only_artist_detail wrapper; add source_override=="amazon" branch in get_spotify_album_tracks endpoint - 77 tests covering all above paths; all pass --- core/amazon_client.py | 152 +++++++++++++++++++++++------- core/artist_source_detail.py | 7 ++ core/artist_source_lookup.py | 2 +- tests/tools/test_amazon_client.py | 77 +++++++++++++++ web_server.py | 11 +++ webui/static/helper.js | 1 + 6 files changed, 217 insertions(+), 33 deletions(-) diff --git a/core/amazon_client.py b/core/amazon_client.py index 9121fd0f..bfc2c21c 100644 --- a/core/amazon_client.py +++ b/core/amazon_client.py @@ -19,6 +19,7 @@ Config keys (all optional — fall back to public defaults): from __future__ import annotations +import re import threading import time from dataclasses import dataclass @@ -33,6 +34,28 @@ from utils.logging_config import get_logger logger = get_logger("amazon_client") +# Strips featuring credits like "Artist feat. X", "Artist ft. Y" so artist +# deduplication works on the primary artist name only. +_FEAT_RE = re.compile(r'\s+(?:feat(?:uring)?\.?|ft\.?)\s+.*', re.IGNORECASE) + +# Strips the Explicit marker — explicit is treated as the default version. +# Clean/Edited/Censored stay in the name so users can distinguish them. +_EDITION_RE = re.compile(r'\s*[\[\(]explicit[\]\)]', re.IGNORECASE) + + +def _primary_artist(name: str) -> str: + return _FEAT_RE.sub('', name).strip() + + +def _strip_edition(name: str) -> str: + return _EDITION_RE.sub('', name).strip() + + +def _unslugify(name: str) -> str: + """Convert a slug-form artist ID (e.g. 'kendrick_lamar') to a search name.""" + return name.replace('_', ' ') + + DEFAULT_BASE_URL = "https://t2tunes.site" DEFAULT_COUNTRY = "US" DEFAULT_CODEC = "flac" @@ -300,55 +323,95 @@ class AmazonClient: def search_tracks(self, query: str, limit: int = 20) -> List[Track]: _rate_limit() - items = self.search_raw(query, types="track") - tracks: List[Track] = [] + items = self.search_raw(query, types="track,album") + track_pairs: List[tuple] = [] # (Track, album_asin) + seen_album_asins: List[str] = [] for item in items: if not item.is_track: continue - tracks.append(Track.from_search_hit({ + track = Track.from_search_hit({ "asin": item.asin, - "title": item.title, - "artistName": item.artist_name, - "albumName": item.album_name, + "title": _strip_edition(item.title), + "artistName": _primary_artist(item.artist_name), + "albumName": _strip_edition(item.album_name), "albumAsin": item.album_asin, "duration": item.duration_seconds, "isrc": item.isrc, - })) - if len(tracks) >= limit: + }) + track_pairs.append((track, item.album_asin)) + if item.album_asin and item.album_asin not in seen_album_asins: + seen_album_asins.append(item.album_asin) + if len(track_pairs) >= limit: break + album_metas = self._fetch_album_metas(seen_album_asins[:5]) + tracks: List[Track] = [] + for track, album_asin in track_pairs: + if album_asin and album_asin in album_metas: + meta = album_metas[album_asin] + track.image_url = meta.get("image") + track.release_date = str(meta.get("release_date") or "") + track.total_tracks = meta.get("trackCount") + tracks.append(track) return tracks def search_artists(self, query: str, limit: int = 20) -> List[Artist]: _rate_limit() - items = self.search_raw(query, types="track") + items = self.search_raw(query, types="track,album") seen: Dict[str, Artist] = {} + artist_album_asin: Dict[str, str] = {} # artist name → first album ASIN seen for item in items: - name = item.artist_name - if name and name not in seen: + name = _primary_artist(item.artist_name) + if not name: + continue + if name not in seen: seen[name] = Artist.from_name(name) + if name not in artist_album_asin and item.album_asin: + artist_album_asin[name] = item.album_asin if len(seen) >= limit: break + # T2Tunes has no artist images — use an album cover as stand-in. + unique_asins = list({v for v in artist_album_asin.values()})[:5] + album_metas = self._fetch_album_metas(unique_asins) + for name, artist in seen.items(): + asin = artist_album_asin.get(name) + if asin and asin in album_metas: + artist.image_url = album_metas[asin].get("image") return list(seen.values()) def search_albums(self, query: str, limit: int = 20) -> List[Album]: _rate_limit() - items = self.search_raw(query, types="album") - albums: List[Album] = [] - seen_asins: set = set() + items = self.search_raw(query, types="track,album") + album_candidates: List[tuple] = [] # (Album, asin) + seen_keys: set = set() for item in items: if not item.is_album: continue album_asin = item.album_asin or item.asin - if album_asin in seen_asins: + raw_name = item.album_name or item.title + display_name = _strip_edition(raw_name) + artist = _primary_artist(item.artist_name) + # Collapse Explicit/Clean variants: same normalised name + artist = same album + dedup_key = (display_name.lower(), artist.lower()) + if dedup_key in seen_keys: continue - seen_asins.add(album_asin) - albums.append(Album.from_search_hit({ + seen_keys.add(dedup_key) + album = Album.from_search_hit({ "albumAsin": album_asin, - "albumName": item.album_name or item.title, - "artistName": item.artist_name, - })) - if len(albums) >= limit: + "albumName": display_name, + "artistName": artist, + }) + album_candidates.append((album, album_asin)) + if len(album_candidates) >= limit: break + album_metas = self._fetch_album_metas([a for _, a in album_candidates[:10]]) + albums: List[Album] = [] + for album, asin in album_candidates: + if asin in album_metas: + meta = album_metas[asin] + album.image_url = meta.get("image") + album.release_date = str(meta.get("release_date") or "") + album.total_tracks = int(meta.get("trackCount") or 0) + albums.append(album) return albums def get_track_details(self, asin: str) -> Optional[Dict[str, Any]]: @@ -413,8 +476,8 @@ class AmazonClient: result: Dict[str, Any] = { "id": asin, - "name": album.get("title", ""), - "artists": [{"name": album.get("artistName", ""), "id": ""}], + "name": _strip_edition(album.get("title", "")), + "artists": [{"name": _primary_artist(album.get("artistName", "")), "id": ""}], "release_date": album.get("release_date", ""), "total_tracks": album.get("trackCount", 0), "album_type": "album", @@ -444,8 +507,8 @@ class AmazonClient: "name": s.title, "artists": [{"name": s.artist, "id": ""}], "duration_ms": 0, - "track_number": None, - "disc_number": None, + "track_number": s.track_number, + "disc_number": s.disc_number, "isrc": s.isrc, } for s in streams @@ -455,14 +518,15 @@ class AmazonClient: def get_artist(self, artist_name: str) -> Optional[Dict[str, Any]]: """Return a Spotify-compatible artist dict inferred from search results.""" _rate_limit() + search_name = _unslugify(artist_name) try: - items = self.search_raw(artist_name, types="track") + items = self.search_raw(search_name, types="track,album") except AmazonClientError: return None - name_lower = artist_name.lower() + name_lower = search_name.lower() match = next( - (i for i in items if i.artist_name.lower() == name_lower), - next((i for i in items if name_lower in i.artist_name.lower()), None), + (i for i in items if _primary_artist(i.artist_name).lower() == name_lower), + next((i for i in items if name_lower in _primary_artist(i.artist_name).lower()), None), ) if not match: return None @@ -484,15 +548,18 @@ class AmazonClient: ) -> List[Album]: """Return albums for an artist inferred from search results.""" _rate_limit() + search_name = _unslugify(artist_name) try: - items = self.search_raw(f"{artist_name} album", types="album") + items = self.search_raw(f"{search_name} album", types="track,album") except AmazonClientError: return [] albums: List[Album] = [] seen_asins: set = set() - name_lower = artist_name.lower() + name_lower = search_name.lower() for item in items: - if item.artist_name.lower() != name_lower: + if not item.is_album: + continue + if _primary_artist(item.artist_name).lower() != name_lower: continue album_asin = item.album_asin or item.asin if album_asin in seen_asins: @@ -520,6 +587,27 @@ class AmazonClient: # Private helpers # ------------------------------------------------------------------ + def _fetch_album_metas(self, asins: List[str]) -> Dict[str, Dict[str, Any]]: + """Parallel-fetch album metadata for up to N ASINs. Returns {asin: albumList[0]}.""" + if not asins: + return {} + metas: Dict[str, Dict[str, Any]] = {} + + def _fetch(asin: str) -> None: + _rate_limit() + try: + raw = self.album_metadata(asin) + lst = raw.get("albumList") + if isinstance(lst, list) and lst and isinstance(lst[0], dict): + metas[asin] = lst[0] + except Exception: + pass + + from concurrent.futures import ThreadPoolExecutor + with ThreadPoolExecutor(max_workers=min(len(asins), 5)) as pool: + list(pool.map(_fetch, asins)) + return metas + def _get_json(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any: url = urljoin(f"{self.base_url}/", path.lstrip("/")) try: diff --git a/core/artist_source_detail.py b/core/artist_source_detail.py index c376052e..35b3e4de 100644 --- a/core/artist_source_detail.py +++ b/core/artist_source_detail.py @@ -43,6 +43,7 @@ def build_source_only_artist_detail( deezer_client: Optional[Any] = None, itunes_client: Optional[Any] = None, discogs_client: Optional[Any] = None, + amazon_client: Optional[Any] = None, lastfm_api_key: Optional[str] = None, ) -> Tuple[Dict[str, Any], int]: """Build the artist-detail payload for a source-only artist. @@ -84,6 +85,12 @@ def build_source_only_artist_detail( dc_artist = discogs_client.get_artist(artist_id) if dc_artist: source_genres = dc_artist.get("genres") or [] + elif source == "amazon" and amazon_client is not None: + az_artist = amazon_client.get_artist(resolved_name or artist_id) + if az_artist: + source_genres = az_artist.get("genres") or [] + if not image_url and az_artist.get("images"): + image_url = az_artist["images"][0].get("url") except Exception as e: logger.debug(f"Source-side artist info lookup failed for {source}:{artist_id}: {e}") diff --git a/core/artist_source_lookup.py b/core/artist_source_lookup.py index b965d4cf..d7497e1c 100644 --- a/core/artist_source_lookup.py +++ b/core/artist_source_lookup.py @@ -25,7 +25,7 @@ logger = logging.getLogger("artist_source_lookup") SOURCE_ONLY_ARTIST_SOURCES = frozenset({ - "spotify", "itunes", "deezer", "discogs", "hydrabase", "musicbrainz", + "spotify", "itunes", "deezer", "discogs", "hydrabase", "musicbrainz", "amazon", }) diff --git a/tests/tools/test_amazon_client.py b/tests/tools/test_amazon_client.py index bdceea74..d869c5d1 100644 --- a/tests/tools/test_amazon_client.py +++ b/tests/tools/test_amazon_client.py @@ -124,6 +124,8 @@ MEDIA_RESPONSE_FLAC = { "artist": "Kendrick Lamar", "album": "GNX", "isrc": "USRC12345678", + "trackNumber": "3", + "discNumber": "1", }, } @@ -536,6 +538,42 @@ class TestSearchArtists: artists = client.search_artists("Kendrick") assert isinstance(artists[0], Artist) + def test_artist_image_from_album(self): + resp = { + "results": [{"hits": [ + {"document": {"asin": "A1", "title": "T1", "artistName": "Kendrick Lamar", + "__type": "track", "albumAsin": "B0ABCDE123"}}, + ]}] + } + client = _make_client({ + "amazon-music/search": resp, + "amazon-music/metadata": ALBUM_METADATA_RESPONSE, + }) + with patch("core.amazon_client._rate_limit"): + artists = client.search_artists("Kendrick") + assert artists[0].image_url == "https://example.com/cover.jpg" + + def test_deduplicates_feat_credits(self): + resp = { + "results": [ + { + "hits": [ + {"document": {"asin": "A1", "title": "T1", "artistName": "Kendrick Lamar", "__type": "track"}}, + {"document": {"asin": "A2", "title": "T2", "artistName": "Kendrick Lamar feat. SZA", "__type": "track"}}, + {"document": {"asin": "A3", "title": "T3", "artistName": "Kendrick Lamar ft. Drake", "__type": "track"}}, + {"document": {"asin": "A4", "title": "T4", "artistName": "SZA featuring Kendrick Lamar", "__type": "track"}}, + ] + } + ] + } + client = _make_client({"amazon-music/search": resp}) + with patch("core.amazon_client._rate_limit"): + artists = client.search_artists("Kendrick") + names = [a.name for a in artists] + assert "Kendrick Lamar" in names + assert "SZA" in names + assert len(artists) == 2 + def test_respects_limit(self): resp = { "results": [ @@ -591,6 +629,43 @@ class TestSearchAlbums: albums = client.search_albums("Kendrick") assert albums == [] + def test_strips_explicit_from_album_name(self): + resp = { + "results": [{"hits": [ + {"document": {**ALBUM_DOC, "albumName": "GNX (Explicit)", "title": "GNX (Explicit)"}}, + ]}] + } + client = _make_client({"amazon-music/search": resp}) + with patch("core.amazon_client._rate_limit"): + albums = client.search_albums("GNX") + assert albums[0].name == "GNX" + + def test_keeps_clean_suffix(self): + resp = { + "results": [{"hits": [ + {"document": {**ALBUM_DOC, "albumName": "GNX [Clean]", "title": "GNX [Clean]"}}, + ]}] + } + client = _make_client({"amazon-music/search": resp}) + with patch("core.amazon_client._rate_limit"): + albums = client.search_albums("GNX") + assert albums[0].name == "GNX [Clean]" + + def test_deduplicates_explicit_clean_as_separate(self): + resp = { + "results": [{"hits": [ + {"document": {**ALBUM_DOC, "asin": "B1", "albumAsin": "B1", "albumName": "GNX (Explicit)", "title": "GNX (Explicit)"}}, + {"document": {**ALBUM_DOC, "asin": "B2", "albumAsin": "B2", "albumName": "GNX [Clean]", "title": "GNX [Clean]"}}, + ]}] + } + client = _make_client({"amazon-music/search": resp}) + with patch("core.amazon_client._rate_limit"): + albums = client.search_albums("GNX") + names = [a.name for a in albums] + assert "GNX" in names # explicit stripped + assert "GNX [Clean]" in names + assert len(albums) == 2 + # --------------------------------------------------------------------------- # AmazonClient — album_metadata / media_from_asin @@ -766,6 +841,8 @@ class TestGetAlbumTracks: assert item["id"] == "B09XYZ1234" assert item["name"] == "Not Like Us" assert item["isrc"] == "USRC12345678" + assert item["track_number"] == 3 + assert item["disc_number"] == 1 def test_returns_none_on_api_error(self): client = _make_client() diff --git a/web_server.py b/web_server.py index c9a774f2..bf896aa2 100644 --- a/web_server.py +++ b/web_server.py @@ -7283,6 +7283,13 @@ def _build_source_only_artist_detail(artist_id, artist_name, source): except Exception as e: logger.debug(f"Discogs client resolution failed: {e}") + az = None + try: + from core.metadata.registry import get_amazon_client + az = get_amazon_client() + except Exception as e: + logger.debug(f"Amazon client resolution failed: {e}") + try: lastfm_api_key = config_manager.get('lastfm.api_key', '') or None except Exception: @@ -7296,6 +7303,7 @@ def _build_source_only_artist_detail(artist_id, artist_name, source): deezer_client=dz, itunes_client=it, discogs_client=dc, + amazon_client=az, lastfm_api_key=lastfm_api_key, ) return jsonify(payload), status @@ -18665,6 +18673,9 @@ def get_spotify_album_tracks(album_id): client = _get_deezer_client() elif source_override == 'discogs': client = _get_discogs_client() + elif source_override == 'amazon': + from core.metadata.registry import get_amazon_client + client = get_amazon_client() elif source_override == 'musicbrainz': try: from core.musicbrainz_search import MusicBrainzSearchClient diff --git a/webui/static/helper.js b/webui/static/helper.js index 5c2414ef..cf6dfed8 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3417,6 +3417,7 @@ const WHATS_NEW = { { unreleased: true }, { title: 'Amazon Music Metadata Source', desc: 'Amazon Music is now a selectable primary metadata source alongside Spotify, iTunes, Deezer, and Discogs. backed by the same T2Tunes proxy as the download source — no account needed. covers track search, album lookup with cover art, and artist discography. select it under Settings → Connections → Metadata Source.', page: 'settings' }, { title: 'Amazon Music Download Source', desc: 'new download source backed by T2Tunes proxy. searches the Amazon Music catalog, downloads 24-bit/48kHz FLAC (or Opus 320kbps / Dolby Atmos EAC3 fallback). codec waterfall mirrors Tidal/Qobuz — best quality first, auto-fallback. selectable as a standalone or hybrid source from Settings.', page: 'settings' }, + { title: 'Amazon Music Search Quality', desc: 'search results now show album art, artist images (album cover stand-in, same as iTunes), and correct track/disc numbers. feat. credits stripped from artist names so the same artist does not show as duplicates. [Explicit] stripped from album names so MusicBrainz matching works cleanly — Clean / Edited / Censored labels kept as-is. album clicks and artist detail pages now open instead of 404ing.', page: 'search' }, ], '2.5.2': [ // --- May 13, 2026 — 2.5.2 release ---