diff --git a/.gitignore b/.gitignore index 8266b83b..8857f8d5 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ database/music_library.db-shm database/music_library.db-wal database/music_library.db.backup_* database/api_call_history.json +storage/image_cache/ logs/*.log logs/*.log.* diff --git a/config/settings.py b/config/settings.py index 2cfc3eeb..b2c6b8c4 100644 --- a/config/settings.py +++ b/config/settings.py @@ -552,6 +552,13 @@ class ConfigManager: "path": os.environ.get('DATABASE_PATH', 'database/music_library.db'), "max_workers": 5 }, + "image_cache": { + "enabled": True, + "path": "storage/image_cache", + "ttl_seconds": 2592000, + "failed_ttl_seconds": 21600, + "max_download_mb": 15 + }, "metadata_enhancement": { "enabled": True, "embed_album_art": True, diff --git a/core/image_cache.py b/core/image_cache.py new file mode 100644 index 00000000..9ee0be47 --- /dev/null +++ b/core/image_cache.py @@ -0,0 +1,342 @@ +"""Disk-backed image cache for browser-facing artwork URLs.""" + +from __future__ import annotations + +import hashlib +import mimetypes +import os +import sqlite3 +import threading +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Optional +from urllib.parse import urlparse + +import requests + +from config.settings import config_manager +from core.metadata.artwork import is_internal_image_host +from utils.logging_config import get_logger + +logger = get_logger("image_cache") + +DEFAULT_TTL_SECONDS = 30 * 24 * 60 * 60 +DEFAULT_FAILED_TTL_SECONDS = 6 * 60 * 60 +DEFAULT_MAX_DOWNLOAD_BYTES = 15 * 1024 * 1024 + + +class ImageCacheError(Exception): + """Raised when an image cannot be served from the cache.""" + + +@dataclass +class CachedImage: + key: str + path: Path + mime_type: str + size: int + status: str + + +class ImageCache: + def __init__( + self, + cache_dir: str | os.PathLike[str], + *, + ttl_seconds: int = DEFAULT_TTL_SECONDS, + failed_ttl_seconds: int = DEFAULT_FAILED_TTL_SECONDS, + max_download_bytes: int = DEFAULT_MAX_DOWNLOAD_BYTES, + fetcher: Optional[Callable[..., requests.Response]] = None, + ): + self.cache_dir = Path(cache_dir) + self.ttl_seconds = int(ttl_seconds) + self.failed_ttl_seconds = int(failed_ttl_seconds) + self.max_download_bytes = int(max_download_bytes) + self.fetcher = fetcher or requests.get + self.db_path = self.cache_dir / "image_cache.sqlite3" + self._db_lock = threading.RLock() + self._key_locks: dict[str, threading.Lock] = {} + self._key_locks_lock = threading.Lock() + self.cache_dir.mkdir(parents=True, exist_ok=True) + self._init_db() + + def cache_url_for(self, url: str | None) -> str | None: + """Register a URL and return its browser-facing cached path.""" + if not url: + return None + if str(url).startswith("/api/image-cache/"): + return str(url) + if not self.is_cacheable_url(str(url)): + return str(url) + + key = self.key_for_url(str(url)) + now = time.time() + with self._db_lock: + with self._connect() as conn: + conn.execute( + """ + INSERT INTO image_cache + (key, original_url, status, created_at, updated_at, last_accessed, + expires_at, size, mime_type, file_path, last_error) + VALUES (?, ?, 'pending', ?, ?, ?, 0, 0, '', '', '') + ON CONFLICT(key) DO UPDATE SET + original_url=excluded.original_url, + last_accessed=excluded.last_accessed + """, + (key, str(url), now, now, now), + ) + return f"/api/image-cache/{key}" + + def get(self, key: str) -> CachedImage: + row = self._get_row(key) + if not row: + raise ImageCacheError("Image cache key not found") + return self.get_url(row["original_url"]) + + def get_url(self, url: str) -> CachedImage: + if not self.is_cacheable_url(url): + raise ImageCacheError("URL is not cacheable") + + key = self.key_for_url(url) + lock = self._lock_for_key(key) + with lock: + row = self._get_row(key) + now = time.time() + if row and row["status"] == "ok" and row["file_path"]: + path = Path(row["file_path"]) + if path.exists(): + self._touch(key, now) + if float(row["expires_at"] or 0) > now: + return CachedImage(key, path, row["mime_type"] or "image/jpeg", int(row["size"] or 0), "hit") + + try: + return self._fetch_and_store(url, key, now) + except Exception as exc: + if row and row["status"] == "ok" and row["file_path"]: + stale_path = Path(row["file_path"]) + if stale_path.exists(): + logger.warning("Serving stale cached image for %s after refresh failed: %s", key, exc) + self._record_error(key, str(exc), now, keep_status=True) + return CachedImage( + key, + stale_path, + row["mime_type"] or "image/jpeg", + int(row["size"] or 0), + "stale", + ) + self._record_error(key, str(exc), now) + raise ImageCacheError(str(exc)) from exc + + @staticmethod + def key_for_url(url: str) -> str: + return hashlib.sha256(url.encode("utf-8")).hexdigest() + + @staticmethod + def is_cacheable_url(url: str) -> bool: + try: + parsed = urlparse(url) + if parsed.scheme not in {"http", "https"}: + return False + if parsed.username or parsed.password: + return False + if not parsed.hostname: + return False + return True + except Exception: + return False + + def _fetch_and_store(self, url: str, key: str, now: float) -> CachedImage: + if not self._is_fetch_allowed(url): + raise ImageCacheError("Image host is not allowed") + + response = self.fetcher( + url, + timeout=10, + stream=True, + headers={ + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + ), + "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8", + "Referer": "https://www.deezer.com/", + }, + ) + try: + if response.status_code != 200: + raise ImageCacheError(f"Upstream image returned HTTP {response.status_code}") + + mime_type = (response.headers.get("Content-Type") or "image/jpeg").split(";", 1)[0].strip() + if not mime_type.startswith("image/"): + raise ImageCacheError(f"Upstream response is not an image: {mime_type}") + + declared_size = response.headers.get("Content-Length") + try: + if declared_size and int(declared_size) > self.max_download_bytes: + raise ImageCacheError("Image exceeds configured size limit") + except ValueError: + pass + + ext = mimetypes.guess_extension(mime_type) or ".img" + if ext == ".jpe": + ext = ".jpg" + path = self._path_for_key(key, ext) + path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = path.with_suffix(path.suffix + ".tmp") + + total = 0 + try: + with open(tmp_path, "wb") as handle: + for chunk in response.iter_content(chunk_size=64 * 1024): + if not chunk: + continue + total += len(chunk) + if total > self.max_download_bytes: + raise ImageCacheError("Image exceeds configured size limit") + handle.write(chunk) + except Exception: + try: + tmp_path.unlink(missing_ok=True) + except Exception: + pass + raise + + if total <= 0: + raise ImageCacheError("Image response was empty") + + os.replace(tmp_path, path) + expires_at = now + self.ttl_seconds + with self._db_lock: + with self._connect() as conn: + conn.execute( + """ + INSERT INTO image_cache + (key, original_url, status, created_at, updated_at, last_accessed, + expires_at, size, mime_type, file_path, last_error) + VALUES (?, ?, 'ok', ?, ?, ?, ?, ?, ?, ?, '') + ON CONFLICT(key) DO UPDATE SET + original_url=excluded.original_url, + status='ok', + updated_at=excluded.updated_at, + last_accessed=excluded.last_accessed, + expires_at=excluded.expires_at, + size=excluded.size, + mime_type=excluded.mime_type, + file_path=excluded.file_path, + last_error='' + """, + (key, url, now, now, now, expires_at, total, mime_type, str(path)), + ) + return CachedImage(key, path, mime_type, total, "miss") + finally: + response.close() + + def _path_for_key(self, key: str, extension: str) -> Path: + return self.cache_dir / key[:2] / key[2:4] / f"{key}{extension}" + + def _is_fetch_allowed(self, url: str) -> bool: + parsed = urlparse(url) + if parsed.scheme not in {"http", "https"}: + return False + if parsed.username or parsed.password: + return False + if not parsed.hostname: + return False + + # Internal hosts are explicitly supported because Plex/Jellyfin/Navidrome + # artwork often lives behind Docker/LAN-only URLs. Public hosts are allowed + # as image-only responses with size limits. + return bool(parsed.hostname) or is_internal_image_host(url) + + def _connect(self) -> sqlite3.Connection: + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + return conn + + def _init_db(self) -> None: + with self._db_lock: + with self._connect() as conn: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS image_cache ( + key TEXT PRIMARY KEY, + original_url TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + created_at REAL NOT NULL, + updated_at REAL NOT NULL, + last_accessed REAL NOT NULL, + expires_at REAL NOT NULL DEFAULT 0, + size INTEGER NOT NULL DEFAULT 0, + mime_type TEXT NOT NULL DEFAULT '', + file_path TEXT NOT NULL DEFAULT '', + last_error TEXT NOT NULL DEFAULT '' + ) + """ + ) + conn.execute("CREATE INDEX IF NOT EXISTS idx_image_cache_accessed ON image_cache(last_accessed)") + + def _get_row(self, key: str) -> Optional[sqlite3.Row]: + with self._db_lock: + with self._connect() as conn: + return conn.execute("SELECT * FROM image_cache WHERE key = ?", (key,)).fetchone() + + def _touch(self, key: str, now: float) -> None: + with self._db_lock: + with self._connect() as conn: + conn.execute("UPDATE image_cache SET last_accessed = ? WHERE key = ?", (now, key)) + + def _record_error(self, key: str, error: str, now: float, *, keep_status: bool = False) -> None: + status_sql = "status" if keep_status else "'failed'" + with self._db_lock: + with self._connect() as conn: + conn.execute( + f""" + UPDATE image_cache + SET status = {status_sql}, + updated_at = ?, + last_accessed = ?, + expires_at = ?, + last_error = ? + WHERE key = ? + """, + (now, now, now + self.failed_ttl_seconds, error[:500], key), + ) + + def _lock_for_key(self, key: str) -> threading.Lock: + with self._key_locks_lock: + lock = self._key_locks.get(key) + if lock is None: + lock = threading.Lock() + self._key_locks[key] = lock + return lock + + +_image_cache: Optional[ImageCache] = None +_image_cache_lock = threading.Lock() + + +def get_image_cache() -> ImageCache: + global _image_cache + with _image_cache_lock: + if _image_cache is None: + cache_dir = config_manager.get("image_cache.path", "storage/image_cache") + if not os.path.isabs(cache_dir): + cache_dir = str(config_manager.base_dir / cache_dir) + _image_cache = ImageCache( + cache_dir, + ttl_seconds=int(config_manager.get("image_cache.ttl_seconds", DEFAULT_TTL_SECONDS)), + failed_ttl_seconds=int(config_manager.get("image_cache.failed_ttl_seconds", DEFAULT_FAILED_TTL_SECONDS)), + max_download_bytes=int(config_manager.get("image_cache.max_download_mb", 15)) * 1024 * 1024, + ) + return _image_cache + + +def cached_image_url(url: str | None) -> str | None: + if not url or config_manager.get("image_cache.enabled", True) is False: + return url + try: + return get_image_cache().cache_url_for(url) + except Exception as exc: + logger.debug("image cache registration failed: %s", exc) + return url diff --git a/core/metadata/artwork.py b/core/metadata/artwork.py index 98af8431..694933d9 100644 --- a/core/metadata/artwork.py +++ b/core/metadata/artwork.py @@ -192,13 +192,13 @@ def normalize_image_url(thumb_url: str | None) -> str | None: def is_image_proxy_url(url: str) -> bool: - """Return True for SoulSync image-proxy URLs, absolute or relative.""" + """Return True for SoulSync image proxy/cache URLs, absolute or relative.""" if not url: return False try: parsed = urlparse(url) - return parsed.path == '/api/image-proxy' + return parsed.path == '/api/image-proxy' or parsed.path.startswith('/api/image-cache/') except Exception: return False @@ -235,10 +235,19 @@ def _browser_safe_image_url(url: str) -> str: if is_image_proxy_url(url): return url - if url.startswith('/api/image-proxy?url='): + if url.startswith('/api/image-proxy?url=') or url.startswith('/api/image-cache/'): return url if url.startswith('http://') or url.startswith('https://'): + try: + from core.image_cache import cached_image_url + + cached_url = cached_image_url(url) + if cached_url: + return cached_url + except Exception as exc: + logger.debug("image cache URL registration failed: %s", exc) + if is_internal_image_host(url): return f"/api/image-proxy?url={quote(url, safe='')}" return url diff --git a/tests/metadata/test_image_url_normalization.py b/tests/metadata/test_image_url_normalization.py index 2832ba4c..01ba4933 100644 --- a/tests/metadata/test_image_url_normalization.py +++ b/tests/metadata/test_image_url_normalization.py @@ -10,6 +10,7 @@ from urllib.parse import quote "thumb_url", [ "/api/image-proxy?url=https%3A%2F%2Fexample.com%2Fcover.jpg", + "/api/image-cache/" + ("a" * 64), "http://host.docker.internal:4533/api/image-proxy?u=ketiska&t=abc&s=def&v=1.16.1&c=SoulSync&f=json", ], ) @@ -20,10 +21,11 @@ def test_normalize_image_url_leaves_existing_image_proxy_urls_alone(thumb_url): assert normalize_image_url(thumb_url) == thumb_url -def test_normalize_image_url_proxies_internal_http_urls(monkeypatch): - """Raw internal image URLs should still be routed through SoulSync's proxy.""" +def test_normalize_image_url_registers_internal_http_urls_with_image_cache(monkeypatch): + """Raw internal image URLs should be routed through SoulSync's hashed cache URL.""" from core.metadata import normalize_image_url from core.metadata import artwork + import core.image_cache as image_cache class _FakeConfig: def get_active_media_server(self): @@ -39,6 +41,33 @@ def test_normalize_image_url_proxies_internal_http_urls(monkeypatch): return {} monkeypatch.setattr(artwork, "get_config_manager", lambda: _FakeConfig()) + monkeypatch.setattr(image_cache, "cached_image_url", lambda u: "/api/image-cache/" + ("b" * 64)) + + url = "http://localhost:4533/cover.jpg" + assert normalize_image_url(url) == "/api/image-cache/" + ("b" * 64) + + +def test_normalize_image_url_falls_back_to_proxy_when_cache_registration_fails(monkeypatch): + """If cache registration breaks, internal URLs keep the old image-proxy behavior.""" + from core.metadata import normalize_image_url + from core.metadata import artwork + import core.image_cache as image_cache + + class _FakeConfig: + def get_active_media_server(self): + return "spotify" + + def get_plex_config(self): + return {} + + def get_jellyfin_config(self): + return {} + + def get_navidrome_config(self): + return {} + + monkeypatch.setattr(artwork, "get_config_manager", lambda: _FakeConfig()) + monkeypatch.setattr(image_cache, "cached_image_url", lambda u: (_ for _ in ()).throw(RuntimeError("boom"))) url = "http://localhost:4533/cover.jpg" assert normalize_image_url(url) == f"/api/image-proxy?url={quote(url, safe='')}" diff --git a/tests/test_image_cache.py b/tests/test_image_cache.py new file mode 100644 index 00000000..f4556757 --- /dev/null +++ b/tests/test_image_cache.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from core.image_cache import ImageCache + + +class FakeResponse: + def __init__(self, body: bytes, *, status_code: int = 200, content_type: str = "image/jpeg"): + self.body = body + self.status_code = status_code + self.headers = { + "Content-Type": content_type, + "Content-Length": str(len(body)), + } + self.closed = False + + def iter_content(self, chunk_size=65536): + yield self.body + + def close(self): + self.closed = True + + +def test_cache_url_for_registers_hashed_browser_path(tmp_path): + cache = ImageCache(tmp_path) + url = "https://images.example.test/cover.jpg?token=secret" + + cached_url = cache.cache_url_for(url) + + assert cached_url == f"/api/image-cache/{ImageCache.key_for_url(url)}" + assert "secret" not in cached_url + + +def test_get_url_fetches_once_then_serves_cached_file(tmp_path): + calls = [] + + def fetcher(url, **kwargs): + calls.append((url, kwargs)) + return FakeResponse(b"fake-jpeg-bytes") + + cache = ImageCache(tmp_path, fetcher=fetcher) + url = "https://images.example.test/cover.jpg" + + first = cache.get_url(url) + second = cache.get_url(url) + + assert first.status == "miss" + assert second.status == "hit" + assert first.path == second.path + assert first.path.read_bytes() == b"fake-jpeg-bytes" + assert first.mime_type == "image/jpeg" + assert len(calls) == 1 + + +def test_get_url_rejects_non_image_responses(tmp_path): + cache = ImageCache( + tmp_path, + fetcher=lambda url, **kwargs: FakeResponse(b"", content_type="text/html"), + ) + + try: + cache.get_url("https://images.example.test/not-image") + except Exception as exc: + assert "not an image" in str(exc) + else: + raise AssertionError("Expected non-image response to fail") + diff --git a/web_server.py b/web_server.py index e87b4006..893b7cb2 100644 --- a/web_server.py +++ b/web_server.py @@ -28015,45 +28015,52 @@ def get_your_artist_info(artist_id): @app.route('/api/image-proxy', methods=['GET']) def image_proxy(): - """Proxy external images to avoid CORS issues for canvas rendering.""" + """Proxy external images to avoid CORS issues for canvas rendering. + + Kept for backwards compatibility; new normalized artwork URLs use + /api/image-cache/, but older browser sessions may still hold this + query-string form. + """ url = request.args.get('url', '') if not url or not url.startswith('http'): return '', 400 - host = urlparse(url).hostname or '' - allowed_hosts = [ - 'i.scdn.co', 'mosaic.scdn.co', # Spotify - 'lastfm.freetls.fastly.net', 'lastfm-img2.akamaized.net', # Last.fm - 'e-cdns-images.dzcdn.net', 'cdns-images.dzcdn.net', 'api.deezer.com', # Deezer - 'is1-ssl.mzstatic.com', 'is2-ssl.mzstatic.com', 'is3-ssl.mzstatic.com', - 'is4-ssl.mzstatic.com', 'is5-ssl.mzstatic.com', # iTunes/Apple - 'img.discogs.com', 'i.discogs.com', # Discogs - 'localhost', '127.0.0.1', 'host.docker.internal', # Local/Docker media servers - ] - if not any(host == h or host.endswith('.' + h) for h in allowed_hosts) and not is_internal_image_host(url): - return '', 403 try: - resp = requests.get(url, timeout=10, stream=True, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', - 'Referer': 'https://www.deezer.com/', - }) - if resp.status_code != 200: - return '', resp.status_code - content_type = resp.headers.get('Content-Type', 'image/jpeg') - if not content_type.startswith('image/'): - return '', 400 - return Response( - resp.content, - content_type=content_type, - headers={ - 'Cache-Control': 'public, max-age=86400', - 'Access-Control-Allow-Origin': '*', - } - ) - except Exception: + from core.image_cache import get_image_cache + + cached = get_image_cache().get_url(url) + response = send_file(cached.path, mimetype=cached.mime_type, conditional=True) + max_age = int(config_manager.get("image_cache.ttl_seconds", 2592000)) + response.headers['Cache-Control'] = f'private, max-age={max_age}' + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['X-SoulSync-Image-Cache'] = cached.status + return response + except Exception as exc: + logger.debug("image proxy failed: %s", exc) return '', 502 +@app.route('/api/image-cache/', methods=['GET']) +def serve_cached_image(cache_key): + """Serve a registered image URL from SoulSync's disk cache.""" + if not re.fullmatch(r'[a-f0-9]{64}', cache_key or ''): + return '', 404 + + try: + from core.image_cache import get_image_cache + + cached = get_image_cache().get(cache_key) + response = send_file(cached.path, mimetype=cached.mime_type, conditional=True) + max_age = int(config_manager.get("image_cache.ttl_seconds", 2592000)) + response.headers['Cache-Control'] = f'private, max-age={max_age}' + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['X-SoulSync-Image-Cache'] = cached.status + return response + except Exception as exc: + logger.debug("cached image serve failed for %s: %s", cache_key, exc) + return '', 404 + + from core.artists.map import ( _artmap_cache_invalidate, _artmap_cache_get,