Escape dash-leading YouTube search queries

Fix manual YouTube searches for video IDs that begin with a dash by escaping leading '-' before building yt-dlp ytsearch expressions. This preserves normal search terms and already escaped user input while preventing yt-dlp from treating the ID as search syntax.

Add regression coverage for both YouTube download search and video search paths. Fixes #684.
pull/692/head
Broque Thomas 2 days ago
parent f68afe80c8
commit 7bee424686

@ -154,6 +154,7 @@ class YouTubeClient(DownloadSourcePlugin):
# Initialize production matching engine for parity with Soulseek
self.matching_engine = MusicMatchingEngine()
logger.info("Initialized production MusicMatchingEngine")
# NOTE: deliberately don't call `_check_ffmpeg()` here. That call
@ -216,6 +217,23 @@ class YouTubeClient(DownloadSourcePlugin):
# Optional progress callback for UI updates
self.progress_callback = None
@staticmethod
def _escape_ytsearch_query(query: str) -> str:
"""Escape yt-dlp search terms that begin with a dash.
YouTube video IDs may start with ``-``. When passed through
``ytsearchN:<query>``, yt-dlp treats that leading dash as search
syntax unless it is escaped. Preserve already-escaped input so
users who worked around the issue manually keep the same result.
"""
if not isinstance(query, str):
return query
stripped = query.lstrip()
leading_ws_len = len(query) - len(stripped)
if stripped.startswith('-'):
return f"{query[:leading_ws_len]}\\{stripped}"
return query
def is_available(self) -> bool:
"""
Check if YouTube client is available (yt-dlp installed and ffmpeg available).
@ -698,8 +716,9 @@ class YouTubeClient(DownloadSourcePlugin):
if cookies_browser:
ydl_opts['cookiesfrombrowser'] = (cookies_browser,)
search_query = self._escape_ytsearch_query(query)
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
data = ydl.extract_info(f"ytsearch{max_results}:{query}", download=False)
data = ydl.extract_info(f"ytsearch{max_results}:{search_query}", download=False)
if not data or 'entries' not in data:
return []
@ -777,9 +796,10 @@ class YouTubeClient(DownloadSourcePlugin):
if cookies_browser:
ydl_opts['cookiesfrombrowser'] = (cookies_browser,)
search_query = self._escape_ytsearch_query(query)
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# Search YouTube (max 50 results)
search_results = ydl.extract_info(f"ytsearch50:{query}", download=False)
search_results = ydl.extract_info(f"ytsearch50:{search_query}", download=False)
if not search_results or 'entries' not in search_results:
return []

@ -0,0 +1,87 @@
"""Regression tests for YouTube searches whose query starts with ``-``.
YouTube video IDs can start with a dash. yt-dlp's ``ytsearchN:`` parser
interprets a leading dash as search syntax unless escaped, so manual
searches for those IDs used to fan out into unrelated results.
"""
from __future__ import annotations
import asyncio
from types import SimpleNamespace
from core import youtube_client
from core.youtube_client import YouTubeClient
def test_escape_ytsearch_query_handles_leading_dash():
assert YouTubeClient._escape_ytsearch_query("-4WUHJRhvrM") == r"\-4WUHJRhvrM"
assert YouTubeClient._escape_ytsearch_query(r"\-4WUHJRhvrM") == r"\-4WUHJRhvrM"
assert YouTubeClient._escape_ytsearch_query("Yo-Yo Ma") == "Yo-Yo Ma"
def test_search_escapes_leading_dash_before_yt_dlp(monkeypatch):
captured = []
class _FakeYoutubeDL:
def __init__(self, opts):
self.opts = opts
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def extract_info(self, search_query, download=False):
captured.append(search_query)
return {"entries": [{"id": "-4WUHJRhvrM", "title": "Unaccompanied Cello"}]}
monkeypatch.setattr(youtube_client.yt_dlp, "YoutubeDL", _FakeYoutubeDL)
client = YouTubeClient.__new__(YouTubeClient)
monkeypatch.setattr(client, "_get_best_audio_format", lambda formats: None)
monkeypatch.setattr(
client,
"_youtube_to_track_result",
lambda entry, best_audio: SimpleNamespace(filename=entry["title"]),
)
tracks, albums = asyncio.run(client.search("-4WUHJRhvrM"))
assert captured == [r"ytsearch50:\-4WUHJRhvrM"]
assert len(tracks) == 1
assert albums == []
def test_search_videos_escapes_leading_dash_before_yt_dlp(monkeypatch):
captured = []
class _FakeYoutubeDL:
def __init__(self, opts):
self.opts = opts
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def extract_info(self, search_query, download=False):
captured.append(search_query)
return {
"entries": [{
"id": "-4WUHJRhvrM",
"title": "Unaccompanied Cello",
"duration": 152,
"uploader": "Yo-Yo Ma",
}]
}
monkeypatch.setattr(youtube_client.yt_dlp, "YoutubeDL", _FakeYoutubeDL)
client = YouTubeClient.__new__(YouTubeClient)
results = asyncio.run(client.search_videos("-4WUHJRhvrM", max_results=8))
assert captured == [r"ytsearch8:\-4WUHJRhvrM"]
assert [r.video_id for r in results] == ["-4WUHJRhvrM"]
Loading…
Cancel
Save