feat(webui): cache artwork images on disk

Add a disk-backed image cache with hashed browser URLs, SQLite metadata, size/type validation, stale fallback, and per-image fetch locking. Route normalized artwork through /api/image-cache while keeping /api/image-proxy as a compatibility shim, and align browser max-age with the image cache TTL. Add focused tests for cache behavior and image URL normalization.
pull/666/head
Broque Thomas 5 days ago
parent 6e5ea1d490
commit 136d665c8a

1
.gitignore vendored

@ -17,6 +17,7 @@ database/music_library.db-shm
database/music_library.db-wal
database/music_library.db.backup_*
database/api_call_history.json
storage/image_cache/
logs/*.log
logs/*.log.*

@ -552,6 +552,13 @@ class ConfigManager:
"path": os.environ.get('DATABASE_PATH', 'database/music_library.db'),
"max_workers": 5
},
"image_cache": {
"enabled": True,
"path": "storage/image_cache",
"ttl_seconds": 2592000,
"failed_ttl_seconds": 21600,
"max_download_mb": 15
},
"metadata_enhancement": {
"enabled": True,
"embed_album_art": True,

@ -0,0 +1,342 @@
"""Disk-backed image cache for browser-facing artwork URLs."""
from __future__ import annotations
import hashlib
import mimetypes
import os
import sqlite3
import threading
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Optional
from urllib.parse import urlparse
import requests
from config.settings import config_manager
from core.metadata.artwork import is_internal_image_host
from utils.logging_config import get_logger
logger = get_logger("image_cache")
DEFAULT_TTL_SECONDS = 30 * 24 * 60 * 60
DEFAULT_FAILED_TTL_SECONDS = 6 * 60 * 60
DEFAULT_MAX_DOWNLOAD_BYTES = 15 * 1024 * 1024
class ImageCacheError(Exception):
"""Raised when an image cannot be served from the cache."""
@dataclass
class CachedImage:
key: str
path: Path
mime_type: str
size: int
status: str
class ImageCache:
def __init__(
self,
cache_dir: str | os.PathLike[str],
*,
ttl_seconds: int = DEFAULT_TTL_SECONDS,
failed_ttl_seconds: int = DEFAULT_FAILED_TTL_SECONDS,
max_download_bytes: int = DEFAULT_MAX_DOWNLOAD_BYTES,
fetcher: Optional[Callable[..., requests.Response]] = None,
):
self.cache_dir = Path(cache_dir)
self.ttl_seconds = int(ttl_seconds)
self.failed_ttl_seconds = int(failed_ttl_seconds)
self.max_download_bytes = int(max_download_bytes)
self.fetcher = fetcher or requests.get
self.db_path = self.cache_dir / "image_cache.sqlite3"
self._db_lock = threading.RLock()
self._key_locks: dict[str, threading.Lock] = {}
self._key_locks_lock = threading.Lock()
self.cache_dir.mkdir(parents=True, exist_ok=True)
self._init_db()
def cache_url_for(self, url: str | None) -> str | None:
"""Register a URL and return its browser-facing cached path."""
if not url:
return None
if str(url).startswith("/api/image-cache/"):
return str(url)
if not self.is_cacheable_url(str(url)):
return str(url)
key = self.key_for_url(str(url))
now = time.time()
with self._db_lock:
with self._connect() as conn:
conn.execute(
"""
INSERT INTO image_cache
(key, original_url, status, created_at, updated_at, last_accessed,
expires_at, size, mime_type, file_path, last_error)
VALUES (?, ?, 'pending', ?, ?, ?, 0, 0, '', '', '')
ON CONFLICT(key) DO UPDATE SET
original_url=excluded.original_url,
last_accessed=excluded.last_accessed
""",
(key, str(url), now, now, now),
)
return f"/api/image-cache/{key}"
def get(self, key: str) -> CachedImage:
row = self._get_row(key)
if not row:
raise ImageCacheError("Image cache key not found")
return self.get_url(row["original_url"])
def get_url(self, url: str) -> CachedImage:
if not self.is_cacheable_url(url):
raise ImageCacheError("URL is not cacheable")
key = self.key_for_url(url)
lock = self._lock_for_key(key)
with lock:
row = self._get_row(key)
now = time.time()
if row and row["status"] == "ok" and row["file_path"]:
path = Path(row["file_path"])
if path.exists():
self._touch(key, now)
if float(row["expires_at"] or 0) > now:
return CachedImage(key, path, row["mime_type"] or "image/jpeg", int(row["size"] or 0), "hit")
try:
return self._fetch_and_store(url, key, now)
except Exception as exc:
if row and row["status"] == "ok" and row["file_path"]:
stale_path = Path(row["file_path"])
if stale_path.exists():
logger.warning("Serving stale cached image for %s after refresh failed: %s", key, exc)
self._record_error(key, str(exc), now, keep_status=True)
return CachedImage(
key,
stale_path,
row["mime_type"] or "image/jpeg",
int(row["size"] or 0),
"stale",
)
self._record_error(key, str(exc), now)
raise ImageCacheError(str(exc)) from exc
@staticmethod
def key_for_url(url: str) -> str:
return hashlib.sha256(url.encode("utf-8")).hexdigest()
@staticmethod
def is_cacheable_url(url: str) -> bool:
try:
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"}:
return False
if parsed.username or parsed.password:
return False
if not parsed.hostname:
return False
return True
except Exception:
return False
def _fetch_and_store(self, url: str, key: str, now: float) -> CachedImage:
if not self._is_fetch_allowed(url):
raise ImageCacheError("Image host is not allowed")
response = self.fetcher(
url,
timeout=10,
stream=True,
headers={
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
),
"Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"Referer": "https://www.deezer.com/",
},
)
try:
if response.status_code != 200:
raise ImageCacheError(f"Upstream image returned HTTP {response.status_code}")
mime_type = (response.headers.get("Content-Type") or "image/jpeg").split(";", 1)[0].strip()
if not mime_type.startswith("image/"):
raise ImageCacheError(f"Upstream response is not an image: {mime_type}")
declared_size = response.headers.get("Content-Length")
try:
if declared_size and int(declared_size) > self.max_download_bytes:
raise ImageCacheError("Image exceeds configured size limit")
except ValueError:
pass
ext = mimetypes.guess_extension(mime_type) or ".img"
if ext == ".jpe":
ext = ".jpg"
path = self._path_for_key(key, ext)
path.parent.mkdir(parents=True, exist_ok=True)
tmp_path = path.with_suffix(path.suffix + ".tmp")
total = 0
try:
with open(tmp_path, "wb") as handle:
for chunk in response.iter_content(chunk_size=64 * 1024):
if not chunk:
continue
total += len(chunk)
if total > self.max_download_bytes:
raise ImageCacheError("Image exceeds configured size limit")
handle.write(chunk)
except Exception:
try:
tmp_path.unlink(missing_ok=True)
except Exception:
pass
raise
if total <= 0:
raise ImageCacheError("Image response was empty")
os.replace(tmp_path, path)
expires_at = now + self.ttl_seconds
with self._db_lock:
with self._connect() as conn:
conn.execute(
"""
INSERT INTO image_cache
(key, original_url, status, created_at, updated_at, last_accessed,
expires_at, size, mime_type, file_path, last_error)
VALUES (?, ?, 'ok', ?, ?, ?, ?, ?, ?, ?, '')
ON CONFLICT(key) DO UPDATE SET
original_url=excluded.original_url,
status='ok',
updated_at=excluded.updated_at,
last_accessed=excluded.last_accessed,
expires_at=excluded.expires_at,
size=excluded.size,
mime_type=excluded.mime_type,
file_path=excluded.file_path,
last_error=''
""",
(key, url, now, now, now, expires_at, total, mime_type, str(path)),
)
return CachedImage(key, path, mime_type, total, "miss")
finally:
response.close()
def _path_for_key(self, key: str, extension: str) -> Path:
return self.cache_dir / key[:2] / key[2:4] / f"{key}{extension}"
def _is_fetch_allowed(self, url: str) -> bool:
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"}:
return False
if parsed.username or parsed.password:
return False
if not parsed.hostname:
return False
# Internal hosts are explicitly supported because Plex/Jellyfin/Navidrome
# artwork often lives behind Docker/LAN-only URLs. Public hosts are allowed
# as image-only responses with size limits.
return bool(parsed.hostname) or is_internal_image_host(url)
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
return conn
def _init_db(self) -> None:
with self._db_lock:
with self._connect() as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS image_cache (
key TEXT PRIMARY KEY,
original_url TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
created_at REAL NOT NULL,
updated_at REAL NOT NULL,
last_accessed REAL NOT NULL,
expires_at REAL NOT NULL DEFAULT 0,
size INTEGER NOT NULL DEFAULT 0,
mime_type TEXT NOT NULL DEFAULT '',
file_path TEXT NOT NULL DEFAULT '',
last_error TEXT NOT NULL DEFAULT ''
)
"""
)
conn.execute("CREATE INDEX IF NOT EXISTS idx_image_cache_accessed ON image_cache(last_accessed)")
def _get_row(self, key: str) -> Optional[sqlite3.Row]:
with self._db_lock:
with self._connect() as conn:
return conn.execute("SELECT * FROM image_cache WHERE key = ?", (key,)).fetchone()
def _touch(self, key: str, now: float) -> None:
with self._db_lock:
with self._connect() as conn:
conn.execute("UPDATE image_cache SET last_accessed = ? WHERE key = ?", (now, key))
def _record_error(self, key: str, error: str, now: float, *, keep_status: bool = False) -> None:
status_sql = "status" if keep_status else "'failed'"
with self._db_lock:
with self._connect() as conn:
conn.execute(
f"""
UPDATE image_cache
SET status = {status_sql},
updated_at = ?,
last_accessed = ?,
expires_at = ?,
last_error = ?
WHERE key = ?
""",
(now, now, now + self.failed_ttl_seconds, error[:500], key),
)
def _lock_for_key(self, key: str) -> threading.Lock:
with self._key_locks_lock:
lock = self._key_locks.get(key)
if lock is None:
lock = threading.Lock()
self._key_locks[key] = lock
return lock
_image_cache: Optional[ImageCache] = None
_image_cache_lock = threading.Lock()
def get_image_cache() -> ImageCache:
global _image_cache
with _image_cache_lock:
if _image_cache is None:
cache_dir = config_manager.get("image_cache.path", "storage/image_cache")
if not os.path.isabs(cache_dir):
cache_dir = str(config_manager.base_dir / cache_dir)
_image_cache = ImageCache(
cache_dir,
ttl_seconds=int(config_manager.get("image_cache.ttl_seconds", DEFAULT_TTL_SECONDS)),
failed_ttl_seconds=int(config_manager.get("image_cache.failed_ttl_seconds", DEFAULT_FAILED_TTL_SECONDS)),
max_download_bytes=int(config_manager.get("image_cache.max_download_mb", 15)) * 1024 * 1024,
)
return _image_cache
def cached_image_url(url: str | None) -> str | None:
if not url or config_manager.get("image_cache.enabled", True) is False:
return url
try:
return get_image_cache().cache_url_for(url)
except Exception as exc:
logger.debug("image cache registration failed: %s", exc)
return url

@ -192,13 +192,13 @@ def normalize_image_url(thumb_url: str | None) -> str | None:
def is_image_proxy_url(url: str) -> bool:
"""Return True for SoulSync image-proxy URLs, absolute or relative."""
"""Return True for SoulSync image proxy/cache URLs, absolute or relative."""
if not url:
return False
try:
parsed = urlparse(url)
return parsed.path == '/api/image-proxy'
return parsed.path == '/api/image-proxy' or parsed.path.startswith('/api/image-cache/')
except Exception:
return False
@ -235,10 +235,19 @@ def _browser_safe_image_url(url: str) -> str:
if is_image_proxy_url(url):
return url
if url.startswith('/api/image-proxy?url='):
if url.startswith('/api/image-proxy?url=') or url.startswith('/api/image-cache/'):
return url
if url.startswith('http://') or url.startswith('https://'):
try:
from core.image_cache import cached_image_url
cached_url = cached_image_url(url)
if cached_url:
return cached_url
except Exception as exc:
logger.debug("image cache URL registration failed: %s", exc)
if is_internal_image_host(url):
return f"/api/image-proxy?url={quote(url, safe='')}"
return url

@ -10,6 +10,7 @@ from urllib.parse import quote
"thumb_url",
[
"/api/image-proxy?url=https%3A%2F%2Fexample.com%2Fcover.jpg",
"/api/image-cache/" + ("a" * 64),
"http://host.docker.internal:4533/api/image-proxy?u=ketiska&t=abc&s=def&v=1.16.1&c=SoulSync&f=json",
],
)
@ -20,10 +21,11 @@ def test_normalize_image_url_leaves_existing_image_proxy_urls_alone(thumb_url):
assert normalize_image_url(thumb_url) == thumb_url
def test_normalize_image_url_proxies_internal_http_urls(monkeypatch):
"""Raw internal image URLs should still be routed through SoulSync's proxy."""
def test_normalize_image_url_registers_internal_http_urls_with_image_cache(monkeypatch):
"""Raw internal image URLs should be routed through SoulSync's hashed cache URL."""
from core.metadata import normalize_image_url
from core.metadata import artwork
import core.image_cache as image_cache
class _FakeConfig:
def get_active_media_server(self):
@ -39,6 +41,33 @@ def test_normalize_image_url_proxies_internal_http_urls(monkeypatch):
return {}
monkeypatch.setattr(artwork, "get_config_manager", lambda: _FakeConfig())
monkeypatch.setattr(image_cache, "cached_image_url", lambda u: "/api/image-cache/" + ("b" * 64))
url = "http://localhost:4533/cover.jpg"
assert normalize_image_url(url) == "/api/image-cache/" + ("b" * 64)
def test_normalize_image_url_falls_back_to_proxy_when_cache_registration_fails(monkeypatch):
"""If cache registration breaks, internal URLs keep the old image-proxy behavior."""
from core.metadata import normalize_image_url
from core.metadata import artwork
import core.image_cache as image_cache
class _FakeConfig:
def get_active_media_server(self):
return "spotify"
def get_plex_config(self):
return {}
def get_jellyfin_config(self):
return {}
def get_navidrome_config(self):
return {}
monkeypatch.setattr(artwork, "get_config_manager", lambda: _FakeConfig())
monkeypatch.setattr(image_cache, "cached_image_url", lambda u: (_ for _ in ()).throw(RuntimeError("boom")))
url = "http://localhost:4533/cover.jpg"
assert normalize_image_url(url) == f"/api/image-proxy?url={quote(url, safe='')}"

@ -0,0 +1,66 @@
from __future__ import annotations
from core.image_cache import ImageCache
class FakeResponse:
def __init__(self, body: bytes, *, status_code: int = 200, content_type: str = "image/jpeg"):
self.body = body
self.status_code = status_code
self.headers = {
"Content-Type": content_type,
"Content-Length": str(len(body)),
}
self.closed = False
def iter_content(self, chunk_size=65536):
yield self.body
def close(self):
self.closed = True
def test_cache_url_for_registers_hashed_browser_path(tmp_path):
cache = ImageCache(tmp_path)
url = "https://images.example.test/cover.jpg?token=secret"
cached_url = cache.cache_url_for(url)
assert cached_url == f"/api/image-cache/{ImageCache.key_for_url(url)}"
assert "secret" not in cached_url
def test_get_url_fetches_once_then_serves_cached_file(tmp_path):
calls = []
def fetcher(url, **kwargs):
calls.append((url, kwargs))
return FakeResponse(b"fake-jpeg-bytes")
cache = ImageCache(tmp_path, fetcher=fetcher)
url = "https://images.example.test/cover.jpg"
first = cache.get_url(url)
second = cache.get_url(url)
assert first.status == "miss"
assert second.status == "hit"
assert first.path == second.path
assert first.path.read_bytes() == b"fake-jpeg-bytes"
assert first.mime_type == "image/jpeg"
assert len(calls) == 1
def test_get_url_rejects_non_image_responses(tmp_path):
cache = ImageCache(
tmp_path,
fetcher=lambda url, **kwargs: FakeResponse(b"<html></html>", content_type="text/html"),
)
try:
cache.get_url("https://images.example.test/not-image")
except Exception as exc:
assert "not an image" in str(exc)
else:
raise AssertionError("Expected non-image response to fail")

@ -28015,45 +28015,52 @@ def get_your_artist_info(artist_id):
@app.route('/api/image-proxy', methods=['GET'])
def image_proxy():
"""Proxy external images to avoid CORS issues for canvas rendering."""
"""Proxy external images to avoid CORS issues for canvas rendering.
Kept for backwards compatibility; new normalized artwork URLs use
/api/image-cache/<key>, but older browser sessions may still hold this
query-string form.
"""
url = request.args.get('url', '')
if not url or not url.startswith('http'):
return '', 400
host = urlparse(url).hostname or ''
allowed_hosts = [
'i.scdn.co', 'mosaic.scdn.co', # Spotify
'lastfm.freetls.fastly.net', 'lastfm-img2.akamaized.net', # Last.fm
'e-cdns-images.dzcdn.net', 'cdns-images.dzcdn.net', 'api.deezer.com', # Deezer
'is1-ssl.mzstatic.com', 'is2-ssl.mzstatic.com', 'is3-ssl.mzstatic.com',
'is4-ssl.mzstatic.com', 'is5-ssl.mzstatic.com', # iTunes/Apple
'img.discogs.com', 'i.discogs.com', # Discogs
'localhost', '127.0.0.1', 'host.docker.internal', # Local/Docker media servers
]
if not any(host == h or host.endswith('.' + h) for h in allowed_hosts) and not is_internal_image_host(url):
return '', 403
try:
resp = requests.get(url, timeout=10, stream=True, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.deezer.com/',
})
if resp.status_code != 200:
return '', resp.status_code
content_type = resp.headers.get('Content-Type', 'image/jpeg')
if not content_type.startswith('image/'):
return '', 400
return Response(
resp.content,
content_type=content_type,
headers={
'Cache-Control': 'public, max-age=86400',
'Access-Control-Allow-Origin': '*',
}
)
except Exception:
from core.image_cache import get_image_cache
cached = get_image_cache().get_url(url)
response = send_file(cached.path, mimetype=cached.mime_type, conditional=True)
max_age = int(config_manager.get("image_cache.ttl_seconds", 2592000))
response.headers['Cache-Control'] = f'private, max-age={max_age}'
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['X-SoulSync-Image-Cache'] = cached.status
return response
except Exception as exc:
logger.debug("image proxy failed: %s", exc)
return '', 502
@app.route('/api/image-cache/<cache_key>', methods=['GET'])
def serve_cached_image(cache_key):
"""Serve a registered image URL from SoulSync's disk cache."""
if not re.fullmatch(r'[a-f0-9]{64}', cache_key or ''):
return '', 404
try:
from core.image_cache import get_image_cache
cached = get_image_cache().get(cache_key)
response = send_file(cached.path, mimetype=cached.mime_type, conditional=True)
max_age = int(config_manager.get("image_cache.ttl_seconds", 2592000))
response.headers['Cache-Control'] = f'private, max-age={max_age}'
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['X-SoulSync-Image-Cache'] = cached.status
return response
except Exception as exc:
logger.debug("cached image serve failed for %s: %s", cache_key, exc)
return '', 404
from core.artists.map import (
_artmap_cache_invalidate,
_artmap_cache_get,

Loading…
Cancel
Save