diff --git a/.dockerignore b/.dockerignore index 4b77bb74..c3344205 100644 --- a/.dockerignore +++ b/.dockerignore @@ -51,6 +51,20 @@ Incomplete/* artist_bubble_snapshots.json .spotify_cache +# Auto-downloaded ffmpeg binaries — the YouTube client downloads these +# into tools/ when system ffmpeg isn't on PATH. The Dockerfile installs +# system ffmpeg via apt, so the container never needs the bundled +# binaries. If a CI run leaves them in the workspace before the docker +# build (e.g. because a test imported web_server which initialized the +# YouTube client), they'd otherwise get baked into the image — adding +# ~388 MB and getting duplicated again by the chown layer. +tools/ffmpeg +tools/ffprobe +tools/ffmpeg.exe +tools/ffprobe.exe +tools/*.zip +tools/*.tar.xz + # Documentation *.md README.md diff --git a/Dockerfile b/Dockerfile index fbda152c..d4caefe5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,13 +45,20 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Create non-root user for security RUN useradd --create-home --shell /bin/bash --uid 1000 soulsync -# Copy application code -COPY . . - -# Create necessary directories with proper permissions +# Copy application code with ownership baked in. +# Using `COPY --chown` instead of `COPY` + `chown -R /app` avoids an +# extra image layer that duplicates the entire /app tree just to flip +# ownership bits — Docker layers are immutable, so chown -R rewrites +# every file into a new layer. On a clean repo that's small; if any +# bulky workspace file slips in (e.g. auto-downloaded ffmpeg binaries +# in tools/), it gets counted twice in the image. Cin caught this on +# 2026-05-08 — see the .dockerignore comment for the same incident. +COPY --chown=soulsync:soulsync . . + +# Create runtime mount-point directories the app expects to exist. # NOTE: /app/data is for database FILES, /app/database is the Python package RUN mkdir -p /app/config /app/data /app/logs /app/downloads /app/Transfer /app/MusicVideos /app/scripts && \ - chown -R soulsync:soulsync /app + chown soulsync:soulsync /app/config /app/data /app/logs /app/downloads /app/Transfer /app/MusicVideos /app/scripts # Create defaults directory and copy template files # These will be used by entrypoint.sh to initialize empty volumes diff --git a/core/youtube_client.py b/core/youtube_client.py index 4f1c4b76..33078bfd 100644 --- a/core/youtube_client.py +++ b/core/youtube_client.py @@ -156,10 +156,27 @@ class YouTubeClient(DownloadSourcePlugin): self.matching_engine = MusicMatchingEngine() logger.info("Initialized production MusicMatchingEngine") - # Check for ffmpeg (REQUIRED for MP3 conversion) - if not self._check_ffmpeg(): - logger.error("ffmpeg is required but not found") - logger.error("The client will attempt to auto-download ffmpeg on first use") + # NOTE: deliberately don't call `_check_ffmpeg()` here. That call + # has a side effect — it auto-downloads a ~388 MB ffmpeg/ffprobe + # bundle into ./tools/ when system ffmpeg isn't on PATH. Firing + # that during __init__ means importing web_server (which any + # test does — see tests/test_tidal_auth_instructions.py) triggers + # the download, leaves the binaries in the repo workspace, and + # if the CI runner does its docker build right after, the + # binaries get baked into the image (and duplicated again by the + # chown layer). Cin reported the resulting size doubling on + # 2026-05-08 so we moved the check off the import path. + # + # `_check_ffmpeg()` still runs lazily — `is_available()` calls + # it before reporting True, and the actual download flow checks + # it before invoking yt-dlp. Both are call paths the user opted + # into by choosing YouTube as a download source. + if not self._locate_ffmpeg(): + logger.warning( + "ffmpeg not found on PATH or in tools/ — will auto-download " + "on first YouTube use. (Skipping eager download to keep " + "test/import side-effects out of the repo workspace.)" + ) # Configure yt-dlp options with bot detection bypass self.download_opts = { @@ -372,6 +389,37 @@ class YouTubeClient(DownloadSourcePlugin): """ return self.current_download_progress.copy() + def _locate_ffmpeg(self) -> bool: + """Check whether ffmpeg is already available WITHOUT side effects. + + Used at __init__ time to log a warning if ffmpeg is missing. + Does NOT trigger the auto-download — that lives in + ``_check_ffmpeg`` and only fires from call paths the user opted + into (``is_available()`` and the actual download dispatch). + """ + import shutil + + if shutil.which('ffmpeg'): + return True + + tools_dir = Path(__file__).parent.parent / 'tools' + if platform.system().lower() == 'windows': + ffmpeg_path = tools_dir / 'ffmpeg.exe' + ffprobe_path = tools_dir / 'ffprobe.exe' + else: + ffmpeg_path = tools_dir / 'ffmpeg' + ffprobe_path = tools_dir / 'ffprobe' + + if ffmpeg_path.exists() and ffprobe_path.exists(): + # Make sure yt-dlp can find them — same PATH bump + # _check_ffmpeg does on the happy path. + tools_dir_str = str(tools_dir.absolute()) + if tools_dir_str not in os.environ.get('PATH', ''): + os.environ['PATH'] = tools_dir_str + os.pathsep + os.environ.get('PATH', '') + return True + + return False + def _check_ffmpeg(self) -> bool: """Check if ffmpeg is available (system PATH or auto-download to tools folder)""" import shutil