From 7a2bc4945830ea63d34dc23d6e5ef5c75f1882b7 Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Sun, 29 Mar 2026 18:34:39 -0700 Subject: [PATCH] Add raw non-ASCII query for CJK track search on Soulseek MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When artist or title contains non-ASCII characters (Japanese, Chinese, Korean, etc.), prepend the original un-romanized text as the first search query. unidecode converts Japanese kanji to Chinese pinyin (e.g. "藤澤慶昌" → "wu zhi zhuan sheng") which never matches on Soulseek. The original characters match filenames directly. Romanized fallback queries are still generated after for coverage. Zero impact on ASCII-only tracks (isascii check skips them). --- core/matching_engine.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/core/matching_engine.py b/core/matching_engine.py index ac5837c3..83ee7cab 100644 --- a/core/matching_engine.py +++ b/core/matching_engine.py @@ -420,15 +420,25 @@ class MusicMatchingEngine: Returns queries in order of preference (cleaned titles first, then original). """ queries = [] - + if not spotify_track.artists: # No artist info - just use track name variations queries.append(self.clean_title(spotify_track.name)) return queries - + + # If artist or title contains non-ASCII (e.g. Japanese, Chinese, Korean), + # add a raw query first — Soulseek filenames often use original characters, + # and unidecode mangles CJK text into wrong romanizations (Chinese pinyin for Japanese kanji). + raw_artist = spotify_track.artists[0].strip() + raw_title = spotify_track.name.strip() + if raw_artist and raw_title and not (raw_artist + raw_title).isascii(): + raw_query = f"{raw_artist} {raw_title}".strip() + queries.append(raw_query) + logger.debug(f"NON-ASCII: Raw original query: '{raw_query}'") + artist = self.clean_artist(spotify_track.artists[0]) original_title = spotify_track.name - + # Get album name if available - try multiple attribute names album_name = None for attr in ['album', 'album_name', 'album_title']: