From d944d4a7d240f759ecb80694a9d01efd7c26bc40 Mon Sep 17 00:00:00 2001
From: Broque Thomas <26755000+Nezreka@users.noreply.github.com>
Date: Mon, 30 Mar 2026 18:28:25 -0700
Subject: [PATCH] Fix Japanese/CJK text mangled in Soulseek search queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

normalize_string() was running unidecode on all text, converting
Japanese kanji to Chinese pinyin gibberish (命の灯火 → "tvanimedei").
Now detects CJK characters (kanji, hiragana, katakana, hangul,
fullwidth forms) and skips unidecode for text containing them —
just lowercases instead. Non-CJK text (Latin accents, Cyrillic)
still goes through unidecode normally.
---
 core/matching_engine.py | 12 ++++++++++--
 web_server.py           |  9 +++++++++
 webui/static/helper.js  |  1 +
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/core/matching_engine.py b/core/matching_engine.py
index 83ee7cab..e5d66d51 100644
--- a/core/matching_engine.py
+++ b/core/matching_engine.py
@@ -67,8 +67,16 @@ class MusicMatchingEngine:
         # Apply the character replacements before other normalization steps
         for original, replacement in char_map.items():
             text = text.replace(original, replacement)
-        text = unidecode(text)
-        text = text.lower()
+
+        # Skip unidecode for CJK text — it converts Japanese kanji to Chinese pinyin,
+        # producing gibberish like "tvanimedei" for "命の灯火". Preserve original characters
+        # so Soulseek searches use the real title. Only apply unidecode to non-CJK text.
+        if any('\u2e80' <= c <= '\u9fff' or '\u3040' <= c <= '\u30ff' or '\uff00' <= c <= '\uffef' or '\uac00' <= c <= '\ud7af' for c in text):
+            # CJK detected — just lowercase, don't transliterate
+            text = text.lower()
+        else:
+            text = unidecode(text)
+            text = text.lower()
         
         # Expand specific abbreviations for better matching
         abbreviation_map = {
diff --git a/web_server.py b/web_server.py
index 314c924b..e0f094fb 100644
--- a/web_server.py
+++ b/web_server.py
@@ -19246,6 +19246,15 @@ def get_version_info():
         "title": "What's New in SoulSync",
         "subtitle": f"Version {SOULSYNC_VERSION} — Latest Changes",
         "sections": [
+            {
+                "title": "🔧 Fix Japanese Song Searches Producing Gibberish",
+                "description": "CJK text no longer mangled by unidecode in Soulseek search queries",
+                "features": [
+                    "• Japanese kanji, hiragana, katakana, and Korean hangul preserved in search queries",
+                    "• unidecode was converting Japanese to Chinese pinyin (e.g. 命の灯火 → 'tvanimedei')",
+                    "• Soulseek users typically share files with original CJK characters in filenames"
+                ]
+            },
             {
                 "title": "🔧 Fix Partial Name Matching False Positives (#225)",
                 "description": "Track ownership check no longer falsely matches prefix/suffix variations",
diff --git a/webui/static/helper.js b/webui/static/helper.js
index 5fa5efa4..ca3f0aa3 100644
--- a/webui/static/helper.js
+++ b/webui/static/helper.js
@@ -3403,6 +3403,7 @@ function closeHelperSearch() {
 const WHATS_NEW = {
     '2.1': [
         // Newest features first
+        { title: 'Fix Japanese/CJK Soulseek Searches',       desc: 'Japanese kanji no longer mangled into Chinese pinyin — searches now use original characters' },
         { title: 'Fix Partial Title Matching',               desc: '"Believe" no longer falsely matches "Believe In Me" — length ratio penalty prevents prefix false positives' },
         { title: 'Fix Pipeline Blocking on Discovery Fail',  desc: 'Playlist sync no longer drops tracks that failed metadata discovery — continues with original name/artist for download' },
         { title: 'Playlist Explorer',                         desc: 'New page: expand playlists into visual discovery trees of albums and discographies — select and add to wishlist', page: 'playlist-explorer' },