Move image URL normalization into metadata helpers

- keep existing /api/image-proxy URLs from being wrapped again
- reuse the shared metadata package instead of duplicating URL logic in web_server.py
- add regression coverage for proxy passthrough and internal URL normalization
pull/467/head
Antti Kettunen 4 weeks ago
parent 2b3022f6b0
commit b85a05fb88
No known key found for this signature in database
GPG Key ID: C6B2A3D250359BD7

@ -8,6 +8,7 @@ from core.metadata.album_tracks import (
resolve_album_reference,
)
from core.metadata.artist_image import get_artist_image_url
from core.metadata.artwork import is_internal_image_host, normalize_image_url
from core.metadata.cache import MetadataCache, get_metadata_cache
from core.metadata.completion import (
check_album_completion,
@ -93,8 +94,10 @@ __all__ = [
"iter_artist_discography_completion_events",
"iter_musicmap_similar_artist_events",
"is_hydrabase_enabled",
"is_internal_image_host",
"register_profile_spotify_credentials_provider",
"register_runtime_clients",
"normalize_image_url",
"resolve_album_reference",
"invalidate_metadata_status_caches",
]

@ -5,6 +5,8 @@ from __future__ import annotations
import os
import re
import urllib.request
from ipaddress import ip_address
from urllib.parse import quote, urlparse
from core.imports.context import get_import_context_album
from core.metadata.common import (
@ -17,12 +19,193 @@ from utils.logging_config import get_logger as _create_logger
__all__ = [
"embed_album_art_metadata",
"download_cover_art",
"is_internal_image_host",
"is_image_proxy_url",
"normalize_image_url",
]
logger = _create_logger("metadata.artwork")
def normalize_image_url(thumb_url: str | None) -> str | None:
"""Convert media-server image URLs into browser-safe URLs."""
if not thumb_url:
return None
try:
if is_image_proxy_url(thumb_url):
# Already normalized for browser use; avoid wrapping it in another proxy layer.
return thumb_url
# Check if it's a localhost URL or relative path that needs fixing
needs_fixing = (
thumb_url.startswith('http://localhost:') or
thumb_url.startswith('https://localhost:') or
thumb_url.startswith('http://127.0.0.1:') or
thumb_url.startswith('https://127.0.0.1:') or
thumb_url.startswith('http://host.docker.internal:') or
thumb_url.startswith('https://host.docker.internal:') or
(thumb_url.startswith('http://') and is_internal_image_host(thumb_url)) or
thumb_url.startswith('/library/') or # Plex relative paths
thumb_url.startswith('/Items/') or # Jellyfin relative paths
thumb_url.startswith('/api/') or # Old Navidrome API paths
thumb_url.startswith('/rest/') # Navidrome Subsonic API paths
)
if needs_fixing:
cfg = get_config_manager()
active_server = cfg.get_active_media_server()
logger.debug("Fixing URL: %s, Active server: %s", thumb_url, active_server)
if active_server == 'plex':
plex_config = cfg.get_plex_config()
plex_base_url = plex_config.get('base_url', '')
plex_token = plex_config.get('token', '')
logger.info("Plex config - base_url: %s, token: %s...", plex_base_url, plex_token[:10])
if plex_base_url and plex_token:
# Extract the path from URL
if thumb_url.startswith('/library/'):
# Already a path
path = thumb_url
else:
# Full localhost URL, extract path
parsed = urlparse(thumb_url)
path = parsed.path
# Construct proper Plex URL with token
fixed_url = f"{plex_base_url.rstrip('/')}{path}?X-Plex-Token={plex_token}"
logger.info("Fixed URL: %s", fixed_url)
return _browser_safe_image_url(fixed_url)
elif active_server == 'jellyfin':
jellyfin_config = cfg.get_jellyfin_config()
jellyfin_base_url = jellyfin_config.get('base_url', '')
jellyfin_token = jellyfin_config.get('api_key', '')
logger.info(
"Jellyfin config - base_url: %s, token: %s...",
jellyfin_base_url,
jellyfin_token[:10] if jellyfin_token else 'None',
)
if jellyfin_base_url:
# Extract the path from URL
if thumb_url.startswith('/Items/') or thumb_url.startswith('/api/'):
# Already a path
path = thumb_url
else:
# Full localhost URL, extract path
parsed = urlparse(thumb_url)
path = parsed.path
# Construct proper Jellyfin URL with token
if jellyfin_token:
separator = '&' if '?' in path else '?'
fixed_url = f"{jellyfin_base_url.rstrip('/')}{path}{separator}X-Emby-Token={jellyfin_token}"
else:
fixed_url = f"{jellyfin_base_url.rstrip('/')}{path}"
logger.info("Fixed URL: %s", fixed_url)
return _browser_safe_image_url(fixed_url)
elif active_server == 'navidrome':
navidrome_config = cfg.get_navidrome_config()
navidrome_base_url = navidrome_config.get('base_url', '')
navidrome_username = navidrome_config.get('username', '')
navidrome_password = navidrome_config.get('password', '')
logger.info("Navidrome config - base_url: %s, username: %s", navidrome_base_url, navidrome_username)
if navidrome_base_url and navidrome_username and navidrome_password:
# Extract the path from URL
if thumb_url.startswith('/rest/'):
# Already a Subsonic API path
path = thumb_url
else:
# Full localhost URL, extract path
parsed = urlparse(thumb_url)
path = parsed.path
# Generate Subsonic API authentication
import hashlib
import secrets
salt = secrets.token_hex(6)
token = hashlib.md5((navidrome_password + salt).encode()).hexdigest()
# Add authentication parameters to the URL
separator = '&' if '?' in path else '?'
auth_params = f"u={navidrome_username}&t={token}&s={salt}&v=1.16.1&c=SoulSync&f=json"
# Construct proper Navidrome Subsonic URL
fixed_url = f"{navidrome_base_url.rstrip('/')}{path}{separator}{auth_params}"
logger.info("Fixed URL: %s", fixed_url)
return _browser_safe_image_url(fixed_url)
logger.warning("No configuration found for %s or unsupported server type", active_server)
# Return a browser-safe URL even if no server-specific rebuild was possible.
return _browser_safe_image_url(thumb_url)
except Exception as exc:
logger.error("Error fixing image URL '%s': %s", thumb_url, exc)
return _browser_safe_image_url(thumb_url)
def is_image_proxy_url(url: str) -> bool:
"""Return True for SoulSync image-proxy URLs, absolute or relative."""
if not url:
return False
try:
parsed = urlparse(url)
return parsed.path == '/api/image-proxy'
except Exception:
return False
def is_internal_image_host(url: str) -> bool:
"""Return True when an image URL points at a host the browser likely cannot reach directly."""
try:
parsed = urlparse(url)
host = (parsed.hostname or '').strip('[]').lower()
if not host:
return False
if host in {'localhost', '127.0.0.1', '::1', 'host.docker.internal'}:
return True
# Single-label hosts are usually Docker service names or local LAN aliases.
if '.' not in host:
return True
try:
ip = ip_address(host)
return ip.is_loopback or ip.is_private or ip.is_link_local or ip.is_reserved
except ValueError:
return False
except Exception:
return False
def _browser_safe_image_url(url: str) -> str:
"""Return a browser-safe image URL, proxying internal hosts through SoulSync."""
if not url:
return url
if is_image_proxy_url(url):
return url
if url.startswith('/api/image-proxy?url='):
return url
if url.startswith('http://') or url.startswith('https://'):
if is_internal_image_host(url):
return f"/api/image-proxy?url={quote(url, safe='')}"
return url
# Relative media-server paths should already have been expanded before this point.
return url
def embed_album_art_metadata(audio_file, metadata: dict):
cfg = get_config_manager()
symbols = get_mutagen_symbols()

@ -0,0 +1,44 @@
"""Regression tests for image URL normalization."""
from __future__ import annotations
import pytest
from urllib.parse import quote
@pytest.mark.parametrize(
"thumb_url",
[
"/api/image-proxy?url=https%3A%2F%2Fexample.com%2Fcover.jpg",
"http://host.docker.internal:4533/api/image-proxy?u=ketiska&t=abc&s=def&v=1.16.1&c=SoulSync&f=json",
],
)
def test_normalize_image_url_leaves_existing_image_proxy_urls_alone(thumb_url):
"""Existing proxy URLs should not be wrapped in a second proxy layer."""
from core.metadata import normalize_image_url
assert normalize_image_url(thumb_url) == thumb_url
def test_normalize_image_url_proxies_internal_http_urls(monkeypatch):
"""Raw internal image URLs should still be routed through SoulSync's proxy."""
from core.metadata import normalize_image_url
from core.metadata import artwork
class _FakeConfig:
def get_active_media_server(self):
return "spotify"
def get_plex_config(self):
return {}
def get_jellyfin_config(self):
return {}
def get_navidrome_config(self):
return {}
monkeypatch.setattr(artwork, "get_config_manager", lambda: _FakeConfig())
url = "http://localhost:4533/cover.jpg"
assert normalize_image_url(url) == f"/api/image-proxy?url={quote(url, safe='')}"

@ -5,7 +5,6 @@ import json
import asyncio
import requests
import socket
import ipaddress
import subprocess
import platform
import threading
@ -18,7 +17,7 @@ import collections
import functools
from contextlib import contextmanager
from pathlib import Path
from urllib.parse import quote, urljoin, urlparse
from urllib.parse import urljoin, urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed
from flask import Flask, render_template, request, jsonify, redirect, send_file, send_from_directory, Response, session, g, abort
@ -96,6 +95,8 @@ from core.database_update_worker import DatabaseUpdateWorker
from core.web_scan_manager import WebScanManager
from core.metadata.cache import get_metadata_cache
from core.metadata import registry as metadata_registry
from core.metadata import is_internal_image_host
from core.metadata import normalize_image_url as fix_artist_image_url
from core.metadata.registry import (
clear_cached_metadata_client,
get_metadata_source_label,
@ -8088,163 +8089,6 @@ def maintain_search_history():
except Exception as e:
logger.error(f"Error maintaining search history: {e}")
return jsonify({"success": False, "error": str(e)}), 500
def fix_artist_image_url(thumb_url):
"""Convert media-server image URLs into browser-safe URLs."""
if not thumb_url:
return None
try:
# Check if it's a localhost URL or relative path that needs fixing
needs_fixing = (
thumb_url.startswith('http://localhost:') or
thumb_url.startswith('https://localhost:') or
thumb_url.startswith('http://127.0.0.1:') or
thumb_url.startswith('https://127.0.0.1:') or
thumb_url.startswith('http://host.docker.internal:') or
thumb_url.startswith('https://host.docker.internal:') or
(thumb_url.startswith('http://') and _is_internal_image_host(thumb_url)) or
thumb_url.startswith('/library/') or # Plex relative paths
thumb_url.startswith('/Items/') or # Jellyfin relative paths
thumb_url.startswith('/api/') or # Old Navidrome API paths
thumb_url.startswith('/rest/') # Navidrome Subsonic API paths
)
if needs_fixing:
active_server = config_manager.get_active_media_server()
logger.debug(f"Fixing URL: {thumb_url}, Active server: {active_server}")
if active_server == 'plex':
plex_config = config_manager.get_plex_config()
plex_base_url = plex_config.get('base_url', '')
plex_token = plex_config.get('token', '')
logger.info(f"Plex config - base_url: {plex_base_url}, token: {plex_token[:10]}...")
if plex_base_url and plex_token:
# Extract the path from URL
if thumb_url.startswith('/library/'):
# Already a path
path = thumb_url
else:
# Full localhost URL, extract path
from urllib.parse import urlparse
parsed = urlparse(thumb_url)
path = parsed.path
# Construct proper Plex URL with token
fixed_url = f"{plex_base_url.rstrip('/')}{path}?X-Plex-Token={plex_token}"
logger.info(f"Fixed URL: {fixed_url}")
return _browser_safe_image_url(fixed_url)
elif active_server == 'jellyfin':
jellyfin_config = config_manager.get_jellyfin_config()
jellyfin_base_url = jellyfin_config.get('base_url', '')
jellyfin_token = jellyfin_config.get('api_key', '')
logger.info(f"Jellyfin config - base_url: {jellyfin_base_url}, token: {jellyfin_token[:10] if jellyfin_token else 'None'}...")
if jellyfin_base_url:
# Extract the path from URL
if thumb_url.startswith('/Items/') or thumb_url.startswith('/api/'):
# Already a path
path = thumb_url
else:
# Full localhost URL, extract path
from urllib.parse import urlparse
parsed = urlparse(thumb_url)
path = parsed.path
# Construct proper Jellyfin URL with token
if jellyfin_token:
separator = '&' if '?' in path else '?'
fixed_url = f"{jellyfin_base_url.rstrip('/')}{path}{separator}X-Emby-Token={jellyfin_token}"
else:
fixed_url = f"{jellyfin_base_url.rstrip('/')}{path}"
logger.info(f"Fixed URL: {fixed_url}")
return _browser_safe_image_url(fixed_url)
elif active_server == 'navidrome':
navidrome_config = config_manager.get_navidrome_config()
navidrome_base_url = navidrome_config.get('base_url', '')
navidrome_username = navidrome_config.get('username', '')
navidrome_password = navidrome_config.get('password', '')
logger.info(f"Navidrome config - base_url: {navidrome_base_url}, username: {navidrome_username}")
if navidrome_base_url and navidrome_username and navidrome_password:
# Extract the path from URL
if thumb_url.startswith('/rest/'):
# Already a Subsonic API path
path = thumb_url
else:
# Full localhost URL, extract path
from urllib.parse import urlparse
parsed = urlparse(thumb_url)
path = parsed.path
# Generate Subsonic API authentication
import hashlib
import secrets
salt = secrets.token_hex(6)
token = hashlib.md5((navidrome_password + salt).encode()).hexdigest()
# Add authentication parameters to the URL
separator = '&' if '?' in path else '?'
auth_params = f"u={navidrome_username}&t={token}&s={salt}&v=1.16.1&c=SoulSync&f=json"
# Construct proper Navidrome Subsonic URL
fixed_url = f"{navidrome_base_url.rstrip('/')}{path}{separator}{auth_params}"
logger.info(f"Fixed URL: {fixed_url}")
return _browser_safe_image_url(fixed_url)
logger.warning(f"No configuration found for {active_server} or unsupported server type")
# Return a browser-safe URL even if no server-specific rebuild was possible.
return _browser_safe_image_url(thumb_url)
except Exception as e:
logger.error(f"Error fixing image URL '{thumb_url}': {e}")
return _browser_safe_image_url(thumb_url)
def _is_internal_image_host(url: str) -> bool:
"""Return True when an image URL points at a host the browser likely cannot reach directly."""
try:
parsed = urlparse(url)
host = (parsed.hostname or '').strip('[]').lower()
if not host:
return False
if host in {'localhost', '127.0.0.1', '::1', 'host.docker.internal'}:
return True
# Single-label hosts are usually Docker service names or local LAN aliases.
if '.' not in host:
return True
try:
ip = ipaddress.ip_address(host)
return ip.is_loopback or ip.is_private or ip.is_link_local or ip.is_reserved
except ValueError:
return False
except Exception:
return False
def _browser_safe_image_url(url: str) -> str:
"""Return a browser-safe image URL, proxying internal hosts through SoulSync."""
if not url:
return url
if url.startswith('/api/image-proxy?url='):
return url
if url.startswith('http://') or url.startswith('https://'):
if _is_internal_image_host(url):
return f"/api/image-proxy?url={quote(url, safe='')}"
return url
# Relative media-server paths should already have been expanded before this point.
return url
@app.route('/api/library/history')
def get_library_history():
"""Get persistent library history (downloads and server imports)."""
@ -27898,7 +27742,7 @@ def image_proxy():
'img.discogs.com', 'i.discogs.com', # Discogs
'localhost', '127.0.0.1', 'host.docker.internal', # Local/Docker media servers
]
if not any(host == h or host.endswith('.' + h) for h in allowed_hosts) and not _is_internal_image_host(url):
if not any(host == h or host.endswith('.' + h) for h in allowed_hosts) and not is_internal_image_host(url):
return '', 403
try:
resp = requests.get(url, timeout=10, stream=True, headers={

Loading…
Cancel
Save