From 51e00d4ebf8c15fc59fbb9c112f19ab890b327cd Mon Sep 17 00:00:00 2001
From: Broque Thomas <26755000+Nezreka@users.noreply.github.com>
Date: Sat, 16 May 2026 15:55:15 -0700
Subject: [PATCH] Fix Amazon Music search quality: images, dedup, explicit
 stripping, album/artist clicks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- All search_raw calls switched from single-type to types="track,album" — T2Tunes only
  returns results when both types are requested together
- _fetch_album_metas: parallel fetch (up to 5 workers) of album cover art via
  album_metadata(asin) — T2Tunes search results carry no image URLs
- search_tracks: populates image_url, release_date, total_tracks from album meta
- search_artists: strips feat. credits via _primary_artist() so "Artist feat. X" and
  "Artist ft. Y" collapse to one "Artist" entry; uses album cover as artist image
  stand-in (same approach as iTunes — T2Tunes has no artist images)
- search_albums: name-based dedup (display_name + artist key) instead of ASIN-based;
  populates image_url, release_date, total_tracks from album meta (cap 10 ASIN fetches)
- _strip_edition(): strips [Explicit]/(Explicit) from track/album names — explicit is
  the default version; Clean/Edited/Censored labels kept as-is so they stay distinct
- get_album(): applies _strip_edition to name and _primary_artist to artist so
  MusicBrainz preflight matching doesn't fail on "[Explicit]" album names
- get_album_tracks(): populates track_number and disc_number from T2TunesStreamInfo
  instead of hardcoding None — fixes track ordering in multi-track album downloads
- get_artist() / get_artist_albums(): _unslugify() converts slug artist IDs back to
  search names; _primary_artist() in comparison handles feat-annotated results
- SOURCE_ONLY_ARTIST_SOURCES: added "amazon" so artist detail page doesn't 404
- build_source_only_artist_detail: added amazon_client param + dispatch branch
- web_server.py: resolve amazon_client in _build_source_only_artist_detail wrapper;
  add source_override=="amazon" branch in get_spotify_album_tracks endpoint
- 77 tests covering all above paths; all pass
---
 core/amazon_client.py             | 152 +++++++++++++++++++++++-------
 core/artist_source_detail.py      |   7 ++
 core/artist_source_lookup.py      |   2 +-
 tests/tools/test_amazon_client.py |  77 +++++++++++++++
 web_server.py                     |  11 +++
 webui/static/helper.js            |   1 +
 6 files changed, 217 insertions(+), 33 deletions(-)

diff --git a/core/amazon_client.py b/core/amazon_client.py
index 9121fd0f..bfc2c21c 100644
--- a/core/amazon_client.py
+++ b/core/amazon_client.py
@@ -19,6 +19,7 @@ Config keys (all optional — fall back to public defaults):
 
 from __future__ import annotations
 
+import re
 import threading
 import time
 from dataclasses import dataclass
@@ -33,6 +34,28 @@ from utils.logging_config import get_logger
 
 logger = get_logger("amazon_client")
 
+# Strips featuring credits like "Artist feat. X", "Artist ft. Y" so artist
+# deduplication works on the primary artist name only.
+_FEAT_RE = re.compile(r'\s+(?:feat(?:uring)?\.?|ft\.?)\s+.*', re.IGNORECASE)
+
+# Strips the Explicit marker — explicit is treated as the default version.
+# Clean/Edited/Censored stay in the name so users can distinguish them.
+_EDITION_RE = re.compile(r'\s*[\[\(]explicit[\]\)]', re.IGNORECASE)
+
+
+def _primary_artist(name: str) -> str:
+    return _FEAT_RE.sub('', name).strip()
+
+
+def _strip_edition(name: str) -> str:
+    return _EDITION_RE.sub('', name).strip()
+
+
+def _unslugify(name: str) -> str:
+    """Convert a slug-form artist ID (e.g. 'kendrick_lamar') to a search name."""
+    return name.replace('_', ' ')
+
+
 DEFAULT_BASE_URL = "https://t2tunes.site"
 DEFAULT_COUNTRY = "US"
 DEFAULT_CODEC = "flac"
@@ -300,55 +323,95 @@ class AmazonClient:
 
     def search_tracks(self, query: str, limit: int = 20) -> List[Track]:
         _rate_limit()
-        items = self.search_raw(query, types="track")
-        tracks: List[Track] = []
+        items = self.search_raw(query, types="track,album")
+        track_pairs: List[tuple] = []   # (Track, album_asin)
+        seen_album_asins: List[str] = []
         for item in items:
             if not item.is_track:
                 continue
-            tracks.append(Track.from_search_hit({
+            track = Track.from_search_hit({
                 "asin": item.asin,
-                "title": item.title,
-                "artistName": item.artist_name,
-                "albumName": item.album_name,
+                "title": _strip_edition(item.title),
+                "artistName": _primary_artist(item.artist_name),
+                "albumName": _strip_edition(item.album_name),
                 "albumAsin": item.album_asin,
                 "duration": item.duration_seconds,
                 "isrc": item.isrc,
-            }))
-            if len(tracks) >= limit:
+            })
+            track_pairs.append((track, item.album_asin))
+            if item.album_asin and item.album_asin not in seen_album_asins:
+                seen_album_asins.append(item.album_asin)
+            if len(track_pairs) >= limit:
                 break
+        album_metas = self._fetch_album_metas(seen_album_asins[:5])
+        tracks: List[Track] = []
+        for track, album_asin in track_pairs:
+            if album_asin and album_asin in album_metas:
+                meta = album_metas[album_asin]
+                track.image_url = meta.get("image")
+                track.release_date = str(meta.get("release_date") or "")
+                track.total_tracks = meta.get("trackCount")
+            tracks.append(track)
         return tracks
 
     def search_artists(self, query: str, limit: int = 20) -> List[Artist]:
         _rate_limit()
-        items = self.search_raw(query, types="track")
+        items = self.search_raw(query, types="track,album")
         seen: Dict[str, Artist] = {}
+        artist_album_asin: Dict[str, str] = {}  # artist name → first album ASIN seen
         for item in items:
-            name = item.artist_name
-            if name and name not in seen:
+            name = _primary_artist(item.artist_name)
+            if not name:
+                continue
+            if name not in seen:
                 seen[name] = Artist.from_name(name)
+            if name not in artist_album_asin and item.album_asin:
+                artist_album_asin[name] = item.album_asin
             if len(seen) >= limit:
                 break
+        # T2Tunes has no artist images — use an album cover as stand-in.
+        unique_asins = list({v for v in artist_album_asin.values()})[:5]
+        album_metas = self._fetch_album_metas(unique_asins)
+        for name, artist in seen.items():
+            asin = artist_album_asin.get(name)
+            if asin and asin in album_metas:
+                artist.image_url = album_metas[asin].get("image")
         return list(seen.values())
 
     def search_albums(self, query: str, limit: int = 20) -> List[Album]:
         _rate_limit()
-        items = self.search_raw(query, types="album")
-        albums: List[Album] = []
-        seen_asins: set = set()
+        items = self.search_raw(query, types="track,album")
+        album_candidates: List[tuple] = []  # (Album, asin)
+        seen_keys: set = set()
         for item in items:
             if not item.is_album:
                 continue
             album_asin = item.album_asin or item.asin
-            if album_asin in seen_asins:
+            raw_name = item.album_name or item.title
+            display_name = _strip_edition(raw_name)
+            artist = _primary_artist(item.artist_name)
+            # Collapse Explicit/Clean variants: same normalised name + artist = same album
+            dedup_key = (display_name.lower(), artist.lower())
+            if dedup_key in seen_keys:
                 continue
-            seen_asins.add(album_asin)
-            albums.append(Album.from_search_hit({
+            seen_keys.add(dedup_key)
+            album = Album.from_search_hit({
                 "albumAsin": album_asin,
-                "albumName": item.album_name or item.title,
-                "artistName": item.artist_name,
-            }))
-            if len(albums) >= limit:
+                "albumName": display_name,
+                "artistName": artist,
+            })
+            album_candidates.append((album, album_asin))
+            if len(album_candidates) >= limit:
                 break
+        album_metas = self._fetch_album_metas([a for _, a in album_candidates[:10]])
+        albums: List[Album] = []
+        for album, asin in album_candidates:
+            if asin in album_metas:
+                meta = album_metas[asin]
+                album.image_url = meta.get("image")
+                album.release_date = str(meta.get("release_date") or "")
+                album.total_tracks = int(meta.get("trackCount") or 0)
+            albums.append(album)
         return albums
 
     def get_track_details(self, asin: str) -> Optional[Dict[str, Any]]:
@@ -413,8 +476,8 @@ class AmazonClient:
 
         result: Dict[str, Any] = {
             "id": asin,
-            "name": album.get("title", ""),
-            "artists": [{"name": album.get("artistName", ""), "id": ""}],
+            "name": _strip_edition(album.get("title", "")),
+            "artists": [{"name": _primary_artist(album.get("artistName", "")), "id": ""}],
             "release_date": album.get("release_date", ""),
             "total_tracks": album.get("trackCount", 0),
             "album_type": "album",
@@ -444,8 +507,8 @@ class AmazonClient:
                 "name": s.title,
                 "artists": [{"name": s.artist, "id": ""}],
                 "duration_ms": 0,
-                "track_number": None,
-                "disc_number": None,
+                "track_number": s.track_number,
+                "disc_number": s.disc_number,
                 "isrc": s.isrc,
             }
             for s in streams
@@ -455,14 +518,15 @@ class AmazonClient:
     def get_artist(self, artist_name: str) -> Optional[Dict[str, Any]]:
         """Return a Spotify-compatible artist dict inferred from search results."""
         _rate_limit()
+        search_name = _unslugify(artist_name)
         try:
-            items = self.search_raw(artist_name, types="track")
+            items = self.search_raw(search_name, types="track,album")
         except AmazonClientError:
             return None
-        name_lower = artist_name.lower()
+        name_lower = search_name.lower()
         match = next(
-            (i for i in items if i.artist_name.lower() == name_lower),
-            next((i for i in items if name_lower in i.artist_name.lower()), None),
+            (i for i in items if _primary_artist(i.artist_name).lower() == name_lower),
+            next((i for i in items if name_lower in _primary_artist(i.artist_name).lower()), None),
         )
         if not match:
             return None
@@ -484,15 +548,18 @@ class AmazonClient:
     ) -> List[Album]:
         """Return albums for an artist inferred from search results."""
         _rate_limit()
+        search_name = _unslugify(artist_name)
         try:
-            items = self.search_raw(f"{artist_name} album", types="album")
+            items = self.search_raw(f"{search_name} album", types="track,album")
         except AmazonClientError:
             return []
         albums: List[Album] = []
         seen_asins: set = set()
-        name_lower = artist_name.lower()
+        name_lower = search_name.lower()
         for item in items:
-            if item.artist_name.lower() != name_lower:
+            if not item.is_album:
+                continue
+            if _primary_artist(item.artist_name).lower() != name_lower:
                 continue
             album_asin = item.album_asin or item.asin
             if album_asin in seen_asins:
@@ -520,6 +587,27 @@ class AmazonClient:
     # Private helpers
     # ------------------------------------------------------------------
 
+    def _fetch_album_metas(self, asins: List[str]) -> Dict[str, Dict[str, Any]]:
+        """Parallel-fetch album metadata for up to N ASINs. Returns {asin: albumList[0]}."""
+        if not asins:
+            return {}
+        metas: Dict[str, Dict[str, Any]] = {}
+
+        def _fetch(asin: str) -> None:
+            _rate_limit()
+            try:
+                raw = self.album_metadata(asin)
+                lst = raw.get("albumList")
+                if isinstance(lst, list) and lst and isinstance(lst[0], dict):
+                    metas[asin] = lst[0]
+            except Exception:
+                pass
+
+        from concurrent.futures import ThreadPoolExecutor
+        with ThreadPoolExecutor(max_workers=min(len(asins), 5)) as pool:
+            list(pool.map(_fetch, asins))
+        return metas
+
     def _get_json(self, path: str, params: Optional[Dict[str, Any]] = None) -> Any:
         url = urljoin(f"{self.base_url}/", path.lstrip("/"))
         try:
diff --git a/core/artist_source_detail.py b/core/artist_source_detail.py
index c376052e..35b3e4de 100644
--- a/core/artist_source_detail.py
+++ b/core/artist_source_detail.py
@@ -43,6 +43,7 @@ def build_source_only_artist_detail(
     deezer_client: Optional[Any] = None,
     itunes_client: Optional[Any] = None,
     discogs_client: Optional[Any] = None,
+    amazon_client: Optional[Any] = None,
     lastfm_api_key: Optional[str] = None,
 ) -> Tuple[Dict[str, Any], int]:
     """Build the artist-detail payload for a source-only artist.
@@ -84,6 +85,12 @@ def build_source_only_artist_detail(
             dc_artist = discogs_client.get_artist(artist_id)
             if dc_artist:
                 source_genres = dc_artist.get("genres") or []
+        elif source == "amazon" and amazon_client is not None:
+            az_artist = amazon_client.get_artist(resolved_name or artist_id)
+            if az_artist:
+                source_genres = az_artist.get("genres") or []
+                if not image_url and az_artist.get("images"):
+                    image_url = az_artist["images"][0].get("url")
     except Exception as e:
         logger.debug(f"Source-side artist info lookup failed for {source}:{artist_id}: {e}")
 
diff --git a/core/artist_source_lookup.py b/core/artist_source_lookup.py
index b965d4cf..d7497e1c 100644
--- a/core/artist_source_lookup.py
+++ b/core/artist_source_lookup.py
@@ -25,7 +25,7 @@ logger = logging.getLogger("artist_source_lookup")
 
 
 SOURCE_ONLY_ARTIST_SOURCES = frozenset({
-    "spotify", "itunes", "deezer", "discogs", "hydrabase", "musicbrainz",
+    "spotify", "itunes", "deezer", "discogs", "hydrabase", "musicbrainz", "amazon",
 })
 
 
diff --git a/tests/tools/test_amazon_client.py b/tests/tools/test_amazon_client.py
index bdceea74..d869c5d1 100644
--- a/tests/tools/test_amazon_client.py
+++ b/tests/tools/test_amazon_client.py
@@ -124,6 +124,8 @@ MEDIA_RESPONSE_FLAC = {
         "artist": "Kendrick Lamar",
         "album": "GNX",
         "isrc": "USRC12345678",
+        "trackNumber": "3",
+        "discNumber": "1",
     },
 }
 
@@ -536,6 +538,42 @@ class TestSearchArtists:
             artists = client.search_artists("Kendrick")
         assert isinstance(artists[0], Artist)
 
+    def test_artist_image_from_album(self):
+        resp = {
+            "results": [{"hits": [
+                {"document": {"asin": "A1", "title": "T1", "artistName": "Kendrick Lamar",
+                               "__type": "track", "albumAsin": "B0ABCDE123"}},
+            ]}]
+        }
+        client = _make_client({
+            "amazon-music/search": resp,
+            "amazon-music/metadata": ALBUM_METADATA_RESPONSE,
+        })
+        with patch("core.amazon_client._rate_limit"):
+            artists = client.search_artists("Kendrick")
+        assert artists[0].image_url == "https://example.com/cover.jpg"
+
+    def test_deduplicates_feat_credits(self):
+        resp = {
+            "results": [
+                {
+                    "hits": [
+                        {"document": {"asin": "A1", "title": "T1", "artistName": "Kendrick Lamar", "__type": "track"}},
+                        {"document": {"asin": "A2", "title": "T2", "artistName": "Kendrick Lamar feat. SZA", "__type": "track"}},
+                        {"document": {"asin": "A3", "title": "T3", "artistName": "Kendrick Lamar ft. Drake", "__type": "track"}},
+                        {"document": {"asin": "A4", "title": "T4", "artistName": "SZA featuring Kendrick Lamar", "__type": "track"}},
+                    ]
+                }
+            ]
+        }
+        client = _make_client({"amazon-music/search": resp})
+        with patch("core.amazon_client._rate_limit"):
+            artists = client.search_artists("Kendrick")
+        names = [a.name for a in artists]
+        assert "Kendrick Lamar" in names
+        assert "SZA" in names
+        assert len(artists) == 2
+
     def test_respects_limit(self):
         resp = {
             "results": [
@@ -591,6 +629,43 @@ class TestSearchAlbums:
             albums = client.search_albums("Kendrick")
         assert albums == []
 
+    def test_strips_explicit_from_album_name(self):
+        resp = {
+            "results": [{"hits": [
+                {"document": {**ALBUM_DOC, "albumName": "GNX (Explicit)", "title": "GNX (Explicit)"}},
+            ]}]
+        }
+        client = _make_client({"amazon-music/search": resp})
+        with patch("core.amazon_client._rate_limit"):
+            albums = client.search_albums("GNX")
+        assert albums[0].name == "GNX"
+
+    def test_keeps_clean_suffix(self):
+        resp = {
+            "results": [{"hits": [
+                {"document": {**ALBUM_DOC, "albumName": "GNX [Clean]", "title": "GNX [Clean]"}},
+            ]}]
+        }
+        client = _make_client({"amazon-music/search": resp})
+        with patch("core.amazon_client._rate_limit"):
+            albums = client.search_albums("GNX")
+        assert albums[0].name == "GNX [Clean]"
+
+    def test_deduplicates_explicit_clean_as_separate(self):
+        resp = {
+            "results": [{"hits": [
+                {"document": {**ALBUM_DOC, "asin": "B1", "albumAsin": "B1", "albumName": "GNX (Explicit)", "title": "GNX (Explicit)"}},
+                {"document": {**ALBUM_DOC, "asin": "B2", "albumAsin": "B2", "albumName": "GNX [Clean]", "title": "GNX [Clean]"}},
+            ]}]
+        }
+        client = _make_client({"amazon-music/search": resp})
+        with patch("core.amazon_client._rate_limit"):
+            albums = client.search_albums("GNX")
+        names = [a.name for a in albums]
+        assert "GNX" in names        # explicit stripped
+        assert "GNX [Clean]" in names
+        assert len(albums) == 2
+
 
 # ---------------------------------------------------------------------------
 # AmazonClient — album_metadata / media_from_asin
@@ -766,6 +841,8 @@ class TestGetAlbumTracks:
         assert item["id"] == "B09XYZ1234"
         assert item["name"] == "Not Like Us"
         assert item["isrc"] == "USRC12345678"
+        assert item["track_number"] == 3
+        assert item["disc_number"] == 1
 
     def test_returns_none_on_api_error(self):
         client = _make_client()
diff --git a/web_server.py b/web_server.py
index c9a774f2..bf896aa2 100644
--- a/web_server.py
+++ b/web_server.py
@@ -7283,6 +7283,13 @@ def _build_source_only_artist_detail(artist_id, artist_name, source):
     except Exception as e:
         logger.debug(f"Discogs client resolution failed: {e}")
 
+    az = None
+    try:
+        from core.metadata.registry import get_amazon_client
+        az = get_amazon_client()
+    except Exception as e:
+        logger.debug(f"Amazon client resolution failed: {e}")
+
     try:
         lastfm_api_key = config_manager.get('lastfm.api_key', '') or None
     except Exception:
@@ -7296,6 +7303,7 @@ def _build_source_only_artist_detail(artist_id, artist_name, source):
         deezer_client=dz,
         itunes_client=it,
         discogs_client=dc,
+        amazon_client=az,
         lastfm_api_key=lastfm_api_key,
     )
     return jsonify(payload), status
@@ -18665,6 +18673,9 @@ def get_spotify_album_tracks(album_id):
             client = _get_deezer_client()
         elif source_override == 'discogs':
             client = _get_discogs_client()
+        elif source_override == 'amazon':
+            from core.metadata.registry import get_amazon_client
+            client = get_amazon_client()
         elif source_override == 'musicbrainz':
             try:
                 from core.musicbrainz_search import MusicBrainzSearchClient
diff --git a/webui/static/helper.js b/webui/static/helper.js
index 5c2414ef..cf6dfed8 100644
--- a/webui/static/helper.js
+++ b/webui/static/helper.js
@@ -3417,6 +3417,7 @@ const WHATS_NEW = {
         { unreleased: true },
         { title: 'Amazon Music Metadata Source', desc: 'Amazon Music is now a selectable primary metadata source alongside Spotify, iTunes, Deezer, and Discogs. backed by the same T2Tunes proxy as the download source — no account needed. covers track search, album lookup with cover art, and artist discography. select it under Settings → Connections → Metadata Source.', page: 'settings' },
         { title: 'Amazon Music Download Source', desc: 'new download source backed by T2Tunes proxy. searches the Amazon Music catalog, downloads 24-bit/48kHz FLAC (or Opus 320kbps / Dolby Atmos EAC3 fallback). codec waterfall mirrors Tidal/Qobuz — best quality first, auto-fallback. selectable as a standalone or hybrid source from Settings.', page: 'settings' },
+        { title: 'Amazon Music Search Quality', desc: 'search results now show album art, artist images (album cover stand-in, same as iTunes), and correct track/disc numbers. feat. credits stripped from artist names so the same artist does not show as duplicates. [Explicit] stripped from album names so MusicBrainz matching works cleanly — Clean / Edited / Censored labels kept as-is. album clicks and artist detail pages now open instead of 404ing.', page: 'search' },
     ],
     '2.5.2': [
         // --- May 13, 2026 — 2.5.2 release ---