diff --git a/core/youtube_client.py b/core/youtube_client.py index df910e59..bd72e674 100644 --- a/core/youtube_client.py +++ b/core/youtube_client.py @@ -154,6 +154,7 @@ class YouTubeClient(DownloadSourcePlugin): # Initialize production matching engine for parity with Soulseek self.matching_engine = MusicMatchingEngine() + logger.info("Initialized production MusicMatchingEngine") # NOTE: deliberately don't call `_check_ffmpeg()` here. That call @@ -216,6 +217,23 @@ class YouTubeClient(DownloadSourcePlugin): # Optional progress callback for UI updates self.progress_callback = None + @staticmethod + def _escape_ytsearch_query(query: str) -> str: + """Escape yt-dlp search terms that begin with a dash. + + YouTube video IDs may start with ``-``. When passed through + ``ytsearchN:``, yt-dlp treats that leading dash as search + syntax unless it is escaped. Preserve already-escaped input so + users who worked around the issue manually keep the same result. + """ + if not isinstance(query, str): + return query + stripped = query.lstrip() + leading_ws_len = len(query) - len(stripped) + if stripped.startswith('-'): + return f"{query[:leading_ws_len]}\\{stripped}" + return query + def is_available(self) -> bool: """ Check if YouTube client is available (yt-dlp installed and ffmpeg available). @@ -698,8 +716,9 @@ class YouTubeClient(DownloadSourcePlugin): if cookies_browser: ydl_opts['cookiesfrombrowser'] = (cookies_browser,) + search_query = self._escape_ytsearch_query(query) with yt_dlp.YoutubeDL(ydl_opts) as ydl: - data = ydl.extract_info(f"ytsearch{max_results}:{query}", download=False) + data = ydl.extract_info(f"ytsearch{max_results}:{search_query}", download=False) if not data or 'entries' not in data: return [] @@ -777,9 +796,10 @@ class YouTubeClient(DownloadSourcePlugin): if cookies_browser: ydl_opts['cookiesfrombrowser'] = (cookies_browser,) + search_query = self._escape_ytsearch_query(query) with yt_dlp.YoutubeDL(ydl_opts) as ydl: # Search YouTube (max 50 results) - search_results = ydl.extract_info(f"ytsearch50:{query}", download=False) + search_results = ydl.extract_info(f"ytsearch50:{search_query}", download=False) if not search_results or 'entries' not in search_results: return [] diff --git a/tests/test_youtube_search_dash_query.py b/tests/test_youtube_search_dash_query.py new file mode 100644 index 00000000..a2e0318d --- /dev/null +++ b/tests/test_youtube_search_dash_query.py @@ -0,0 +1,87 @@ +"""Regression tests for YouTube searches whose query starts with ``-``. + +YouTube video IDs can start with a dash. yt-dlp's ``ytsearchN:`` parser +interprets a leading dash as search syntax unless escaped, so manual +searches for those IDs used to fan out into unrelated results. +""" + +from __future__ import annotations + +import asyncio +from types import SimpleNamespace + +from core import youtube_client +from core.youtube_client import YouTubeClient + + +def test_escape_ytsearch_query_handles_leading_dash(): + assert YouTubeClient._escape_ytsearch_query("-4WUHJRhvrM") == r"\-4WUHJRhvrM" + assert YouTubeClient._escape_ytsearch_query(r"\-4WUHJRhvrM") == r"\-4WUHJRhvrM" + assert YouTubeClient._escape_ytsearch_query("Yo-Yo Ma") == "Yo-Yo Ma" + + +def test_search_escapes_leading_dash_before_yt_dlp(monkeypatch): + captured = [] + + class _FakeYoutubeDL: + def __init__(self, opts): + self.opts = opts + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def extract_info(self, search_query, download=False): + captured.append(search_query) + return {"entries": [{"id": "-4WUHJRhvrM", "title": "Unaccompanied Cello"}]} + + monkeypatch.setattr(youtube_client.yt_dlp, "YoutubeDL", _FakeYoutubeDL) + + client = YouTubeClient.__new__(YouTubeClient) + monkeypatch.setattr(client, "_get_best_audio_format", lambda formats: None) + monkeypatch.setattr( + client, + "_youtube_to_track_result", + lambda entry, best_audio: SimpleNamespace(filename=entry["title"]), + ) + + tracks, albums = asyncio.run(client.search("-4WUHJRhvrM")) + + assert captured == [r"ytsearch50:\-4WUHJRhvrM"] + assert len(tracks) == 1 + assert albums == [] + + +def test_search_videos_escapes_leading_dash_before_yt_dlp(monkeypatch): + captured = [] + + class _FakeYoutubeDL: + def __init__(self, opts): + self.opts = opts + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def extract_info(self, search_query, download=False): + captured.append(search_query) + return { + "entries": [{ + "id": "-4WUHJRhvrM", + "title": "Unaccompanied Cello", + "duration": 152, + "uploader": "Yo-Yo Ma", + }] + } + + monkeypatch.setattr(youtube_client.yt_dlp, "YoutubeDL", _FakeYoutubeDL) + client = YouTubeClient.__new__(YouTubeClient) + + results = asyncio.run(client.search_videos("-4WUHJRhvrM", max_results=8)) + + assert captured == [r"ytsearch8:\-4WUHJRhvrM"] + assert [r.video_id for r in results] == ["-4WUHJRhvrM"]