mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
463 lines
18 KiB
463 lines
18 KiB
"""
|
|
AcoustID Client for audio fingerprinting and lookup.
|
|
|
|
Uses the pyacoustid library which handles:
|
|
- Fingerprint generation via chromaprint library
|
|
- AcoustID API lookups
|
|
- Rate limiting
|
|
|
|
The fpcalc binary is auto-downloaded if not found (Windows, macOS, Linux x86_64).
|
|
"""
|
|
|
|
import threading
|
|
import sys
|
|
import platform
|
|
import zipfile
|
|
import tarfile
|
|
import tempfile
|
|
import urllib.request
|
|
from typing import Dict, List, Optional, Any, Tuple
|
|
from pathlib import Path
|
|
import os
|
|
import shutil
|
|
import logging
|
|
import logging.handlers
|
|
|
|
from utils.logging_config import get_logger
|
|
from config.settings import config_manager
|
|
|
|
# fpcalc binary location (downloaded automatically if needed)
|
|
FPCALC_BIN_DIR = Path(__file__).parent.parent / "bin"
|
|
CHROMAPRINT_VERSION = "1.5.1"
|
|
|
|
_acoustid_logger = logging.getLogger("soulsync.acoustid")
|
|
_acoustid_logger.setLevel(logging.DEBUG)
|
|
_acoustid_log_path = Path(config_manager.get('logging.path', 'logs/app.log')).parent / "acoustid.log"
|
|
_acoustid_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
if not _acoustid_logger.handlers:
|
|
_acoustid_file_handler = logging.handlers.RotatingFileHandler(
|
|
_acoustid_log_path, encoding='utf-8', maxBytes=5*1024*1024, backupCount=2
|
|
)
|
|
_acoustid_file_handler.setLevel(logging.DEBUG)
|
|
_acoustid_file_handler.setFormatter(logging.Formatter(
|
|
fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
))
|
|
_acoustid_logger.addHandler(_acoustid_file_handler)
|
|
_acoustid_logger.propagate = False
|
|
|
|
logger = get_logger("acoustid.client")
|
|
|
|
# Check if pyacoustid is available
|
|
try:
|
|
import acoustid
|
|
ACOUSTID_AVAILABLE = True
|
|
logger.info("pyacoustid library loaded successfully")
|
|
except ImportError:
|
|
ACOUSTID_AVAILABLE = False
|
|
logger.warning("pyacoustid library not installed - run: pip install pyacoustid")
|
|
|
|
def _get_fpcalc_download_url() -> Optional[str]:
|
|
"""Get the download URL for fpcalc based on current platform."""
|
|
system = platform.system().lower()
|
|
machine = platform.machine().lower()
|
|
|
|
# Map architecture names
|
|
if machine in ('x86_64', 'amd64'):
|
|
arch = 'x86_64'
|
|
elif machine in ('i386', 'i686', 'x86'):
|
|
arch = 'i686'
|
|
elif machine in ('arm64', 'aarch64'):
|
|
arch = 'aarch64'
|
|
else:
|
|
logger.warning(f"Unknown architecture: {machine}")
|
|
return None
|
|
|
|
base_url = f"https://github.com/acoustid/chromaprint/releases/download/v{CHROMAPRINT_VERSION}"
|
|
|
|
if system == 'windows':
|
|
if arch == 'x86_64':
|
|
return f"{base_url}/chromaprint-fpcalc-{CHROMAPRINT_VERSION}-windows-x86_64.zip"
|
|
elif system == 'darwin':
|
|
# Universal build supports both Intel and Apple Silicon natively
|
|
return f"{base_url}/chromaprint-fpcalc-{CHROMAPRINT_VERSION}-macos-universal.tar.gz"
|
|
elif system == 'linux':
|
|
if arch == 'x86_64':
|
|
return f"{base_url}/chromaprint-fpcalc-{CHROMAPRINT_VERSION}-linux-x86_64.tar.gz"
|
|
|
|
logger.warning(f"No fpcalc download available for {system}-{arch}")
|
|
return None
|
|
|
|
|
|
def _download_fpcalc() -> Optional[str]:
|
|
"""
|
|
Download and extract fpcalc binary for the current platform.
|
|
|
|
Returns:
|
|
Path to fpcalc binary if successful, None otherwise.
|
|
"""
|
|
url = _get_fpcalc_download_url()
|
|
if not url:
|
|
return None
|
|
|
|
try:
|
|
logger.info(f"Downloading fpcalc from: {url}")
|
|
|
|
# Create bin directory
|
|
FPCALC_BIN_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Download to temp file
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(url).suffix) as tmp:
|
|
tmp_path = tmp.name
|
|
urllib.request.urlretrieve(url, tmp_path)
|
|
|
|
# Extract based on file type
|
|
fpcalc_name = "fpcalc.exe" if platform.system().lower() == 'windows' else "fpcalc"
|
|
fpcalc_dest = FPCALC_BIN_DIR / fpcalc_name
|
|
|
|
if url.endswith('.zip'):
|
|
with zipfile.ZipFile(tmp_path, 'r') as zf:
|
|
# Find fpcalc in the archive
|
|
for name in zf.namelist():
|
|
if name.endswith(fpcalc_name):
|
|
# Extract to bin directory
|
|
with zf.open(name) as src, open(fpcalc_dest, 'wb') as dst:
|
|
dst.write(src.read())
|
|
break
|
|
elif url.endswith('.tar.gz'):
|
|
with tarfile.open(tmp_path, 'r:gz') as tf:
|
|
for member in tf.getmembers():
|
|
if member.name.endswith('fpcalc'):
|
|
# Extract to bin directory
|
|
member.name = fpcalc_name
|
|
tf.extract(member, FPCALC_BIN_DIR)
|
|
break
|
|
|
|
# Clean up temp file
|
|
os.unlink(tmp_path)
|
|
|
|
# Make executable on Unix
|
|
if platform.system().lower() != 'windows':
|
|
os.chmod(fpcalc_dest, 0o755)
|
|
|
|
if fpcalc_dest.exists():
|
|
logger.info(f"fpcalc downloaded successfully: {fpcalc_dest}")
|
|
return str(fpcalc_dest)
|
|
else:
|
|
logger.error("fpcalc not found in downloaded archive")
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to download fpcalc: {e}")
|
|
return None
|
|
|
|
|
|
def _find_fpcalc() -> Optional[str]:
|
|
"""Find fpcalc binary, downloading if necessary."""
|
|
# Check PATH first
|
|
fpcalc = shutil.which("fpcalc") or shutil.which("fpcalc.exe")
|
|
if fpcalc:
|
|
return fpcalc
|
|
|
|
# Check our bin directory
|
|
fpcalc_name = "fpcalc.exe" if platform.system().lower() == 'windows' else "fpcalc"
|
|
local_fpcalc = FPCALC_BIN_DIR / fpcalc_name
|
|
if local_fpcalc.exists():
|
|
return str(local_fpcalc)
|
|
|
|
# Try to download
|
|
return _download_fpcalc()
|
|
|
|
|
|
# Check if chromaprint/fpcalc is available for fingerprinting
|
|
CHROMAPRINT_AVAILABLE = False
|
|
FPCALC_PATH = None
|
|
|
|
if ACOUSTID_AVAILABLE:
|
|
# Try to find or download fpcalc
|
|
FPCALC_PATH = _find_fpcalc()
|
|
if FPCALC_PATH:
|
|
CHROMAPRINT_AVAILABLE = True
|
|
logger.info(f"fpcalc binary ready: {FPCALC_PATH}")
|
|
# Set environment variable so pyacoustid can find it
|
|
os.environ['FPCALC'] = FPCALC_PATH
|
|
else:
|
|
logger.warning("fpcalc not available - fingerprinting will not work")
|
|
|
|
|
|
class AcoustIDClient:
|
|
"""
|
|
Client for audio fingerprinting via pyacoustid.
|
|
|
|
Usage:
|
|
client = AcoustIDClient()
|
|
available, reason = client.is_available()
|
|
if available:
|
|
result = client.fingerprint_and_lookup("/path/to/audio.mp3")
|
|
if result:
|
|
for mbid in result['recording_mbids']:
|
|
logger.info(f"Match: {mbid}")
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize AcoustID client with settings from config."""
|
|
self._api_key = None
|
|
self._enabled = None
|
|
|
|
@property
|
|
def api_key(self) -> str:
|
|
"""Get API key from config (cached)."""
|
|
if self._api_key is None:
|
|
self._api_key = config_manager.get('acoustid.api_key', '')
|
|
return self._api_key
|
|
|
|
@property
|
|
def enabled(self) -> bool:
|
|
"""Check if AcoustID verification is enabled in config."""
|
|
if self._enabled is None:
|
|
self._enabled = config_manager.get('acoustid.enabled', False)
|
|
return self._enabled
|
|
|
|
def is_available(self) -> Tuple[bool, str]:
|
|
"""
|
|
Check if AcoustID verification is available and ready.
|
|
|
|
Returns:
|
|
Tuple of (is_available, reason_message)
|
|
"""
|
|
if not ACOUSTID_AVAILABLE:
|
|
return False, "pyacoustid library not installed"
|
|
|
|
if not self.api_key:
|
|
return False, "No AcoustID API key configured"
|
|
|
|
if not self.enabled:
|
|
return False, "AcoustID verification is disabled"
|
|
|
|
# Check if chromaprint or fpcalc is available
|
|
if not self._check_fingerprint_available():
|
|
return False, "Chromaprint library not installed (install libchromaprint1)"
|
|
|
|
return True, "AcoustID verification ready"
|
|
|
|
def _check_fingerprint_available(self) -> bool:
|
|
"""Check if we can generate fingerprints (chromaprint lib or fpcalc)."""
|
|
global CHROMAPRINT_AVAILABLE, FPCALC_PATH
|
|
|
|
if CHROMAPRINT_AVAILABLE:
|
|
return True
|
|
|
|
# Try to find/download fpcalc if not already available
|
|
FPCALC_PATH = _find_fpcalc()
|
|
if FPCALC_PATH:
|
|
CHROMAPRINT_AVAILABLE = True
|
|
os.environ['FPCALC'] = FPCALC_PATH
|
|
logger.info(f"fpcalc now available: {FPCALC_PATH}")
|
|
return True
|
|
|
|
return False
|
|
|
|
def _find_test_audio_file(self) -> Optional[str]:
|
|
"""Find an audio file to use for testing the AcoustID API key."""
|
|
audio_extensions = {'.mp3', '.flac', '.ogg', '.m4a', '.wav', '.wma', '.aac'}
|
|
search_dirs = []
|
|
|
|
# Check transfer and download paths from config
|
|
transfer_path = config_manager.get('soulseek.transfer_path', '')
|
|
download_path = config_manager.get('soulseek.download_path', '')
|
|
if transfer_path:
|
|
search_dirs.append(Path(transfer_path))
|
|
if download_path:
|
|
search_dirs.append(Path(download_path))
|
|
|
|
for search_dir in search_dirs:
|
|
if not search_dir.exists():
|
|
continue
|
|
# Walk up to 2 levels deep to find an audio file quickly
|
|
for _depth, pattern in enumerate(['*', '*/*']):
|
|
for f in search_dir.glob(pattern):
|
|
if f.is_file() and f.suffix.lower() in audio_extensions:
|
|
return str(f)
|
|
return None
|
|
|
|
def test_api_key(self) -> Tuple[bool, str]:
|
|
"""
|
|
Validate the API key with a direct AcoustID lookup call. An invalid key
|
|
is reported as invalid (error code 4); any other error means the key was
|
|
accepted.
|
|
|
|
Returns:
|
|
Tuple of (success, message)
|
|
"""
|
|
if not self.api_key:
|
|
return False, "No API key configured"
|
|
|
|
import requests
|
|
|
|
try:
|
|
# Authoritative key check: a direct API lookup with a dummy
|
|
# fingerprint. AcoustID validates the client key first, so an
|
|
# invalid key returns error code 4 regardless of the fingerprint.
|
|
# (The previous real-file path trusted "no exception = valid", but
|
|
# fingerprint_and_lookup swallows the invalid-key error and returns
|
|
# None — so it reported broken keys as valid. #756-adjacent.)
|
|
url = 'https://api.acoustid.org/v2/lookup'
|
|
params = {
|
|
'client': self.api_key,
|
|
'duration': 187,
|
|
'fingerprint': 'AQADtMkWaYkSZRGO',
|
|
'meta': 'recordings'
|
|
}
|
|
|
|
response = requests.get(url, params=params, timeout=10)
|
|
data = response.json()
|
|
|
|
if data.get('status') == 'error':
|
|
error = data.get('error', {})
|
|
error_code = error.get('code', 0)
|
|
|
|
# Error code 4 is specifically "invalid API key"
|
|
if error_code == 4:
|
|
return False, "Invalid AcoustID API key - get one from https://acoustid.org/new-application"
|
|
# Any other error (e.g. "invalid fingerprint") means the API key
|
|
# was accepted — the dummy test fingerprint is just rejected as expected
|
|
return True, "AcoustID API key is valid"
|
|
|
|
# Status is 'ok' - key is valid
|
|
return True, "AcoustID API key is valid"
|
|
|
|
except requests.exceptions.Timeout:
|
|
return False, "AcoustID API timeout - try again later"
|
|
except requests.exceptions.RequestException as e:
|
|
return False, f"Network error: {str(e)}"
|
|
except Exception as e:
|
|
logger.error(f"Error testing AcoustID API key: {e}")
|
|
return False, f"Error: {str(e)}"
|
|
|
|
def lookup_with_status(self, audio_file: str) -> Dict[str, Any]:
|
|
"""Fingerprint + AcoustID lookup returning a STRUCTURED result.
|
|
|
|
Unlike fingerprint_and_lookup() (which collapses every outcome into
|
|
dict-or-None), this distinguishes a genuine no-match from an actual
|
|
error — an invalid API key, rate limit, missing chromaprint, or a
|
|
fingerprint failure. That distinction is what lets the UI show "AcoustID
|
|
Error" (something is broken — fix it) instead of a benign-looking
|
|
"Skipped" that silently hides a dead key.
|
|
|
|
Returns dict with:
|
|
'status': 'ok' | 'no_match' | 'error' | 'no_backend'
|
|
| 'fingerprint_error' | 'unsupported' | 'unavailable'
|
|
| 'not_found'
|
|
'recordings': list (meaningful only for 'ok')
|
|
'best_score': float
|
|
'recording_mbids': list
|
|
'error': human-readable detail for any non-'ok' status
|
|
'invalid_key': bool (True when the API specifically rejected the key)
|
|
"""
|
|
if not ACOUSTID_AVAILABLE:
|
|
return {'status': 'unavailable', 'recordings': [], 'error': 'pyacoustid library not installed'}
|
|
if not self.api_key:
|
|
return {'status': 'unavailable', 'recordings': [], 'error': 'No AcoustID API key configured'}
|
|
if not os.path.isfile(audio_file):
|
|
logger.warning(f"Cannot lookup: file not found: {audio_file}")
|
|
return {'status': 'not_found', 'recordings': [], 'error': f'File not found: {audio_file}'}
|
|
|
|
# Check channel count — chromaprint crashes (SIGABRT) on >2 channel files (e.g. 5.1 surround)
|
|
try:
|
|
from mutagen import File as MutagenFile
|
|
mf = MutagenFile(audio_file)
|
|
if mf and mf.info:
|
|
channels = getattr(mf.info, 'channels', 2)
|
|
if channels and channels > 2:
|
|
logger.warning(f"Skipping AcoustID: file has {channels} channels (surround audio): {audio_file}")
|
|
return {'status': 'unsupported', 'recordings': [],
|
|
'error': f'{channels}-channel (surround) audio not supported by chromaprint'}
|
|
except Exception as e:
|
|
logger.debug(f"Could not check channel count, proceeding anyway: {e}")
|
|
|
|
try:
|
|
import acoustid
|
|
|
|
api_key_preview = f"{self.api_key[:8]}..." if self.api_key and len(self.api_key) > 8 else "NOT SET"
|
|
logger.info(f"Fingerprinting and looking up: {audio_file} (API key: {api_key_preview})")
|
|
|
|
logger.debug("Running acoustid.match()...")
|
|
recordings = []
|
|
seen_mbids = set()
|
|
best_score = 0.0
|
|
|
|
for result in acoustid.match(self.api_key, audio_file, parse=True):
|
|
# match() with parse=True returns (score, recording_id, title, artist)
|
|
if not isinstance(result, tuple) or len(result) < 2:
|
|
logger.warning(f"Unexpected result format: {result}")
|
|
continue
|
|
|
|
score = result[0]
|
|
recording_id = result[1]
|
|
title = result[2] if len(result) > 2 else None
|
|
artist = result[3] if len(result) > 3 else None
|
|
|
|
logger.debug(f"Got result: score={score}, id={recording_id}, title={title}, artist={artist}")
|
|
|
|
if score > best_score:
|
|
best_score = score
|
|
|
|
if recording_id and recording_id not in seen_mbids:
|
|
seen_mbids.add(recording_id)
|
|
recordings.append({'mbid': recording_id, 'title': title, 'artist': artist, 'score': score})
|
|
logger.debug(f"Found match: {title} by {artist} (MBID: {recording_id}, score: {score})")
|
|
|
|
if not recordings:
|
|
logger.info(f"No AcoustID matches found for: {audio_file}")
|
|
return {'status': 'no_match', 'recordings': [], 'best_score': best_score,
|
|
'recording_mbids': [], 'error': 'Track not found in AcoustID database'}
|
|
|
|
logger.info(f"AcoustID found {len(recordings)} recording(s) (best score: {best_score:.2f})")
|
|
return {'status': 'ok', 'recordings': recordings, 'best_score': best_score,
|
|
'recording_mbids': list(seen_mbids)}
|
|
|
|
except acoustid.NoBackendError:
|
|
logger.error("Chromaprint library not found and fpcalc not available")
|
|
return {'status': 'no_backend', 'recordings': [],
|
|
'error': 'Chromaprint/fpcalc not installed (install libchromaprint1)'}
|
|
except acoustid.FingerprintGenerationError as e:
|
|
logger.warning(f"Failed to fingerprint {audio_file}: {e}")
|
|
return {'status': 'fingerprint_error', 'recordings': [], 'error': f'Could not fingerprint file: {e}'}
|
|
except acoustid.WebServiceError as e:
|
|
api_key_preview = f"{self.api_key[:8]}..." if self.api_key and len(self.api_key) > 8 else "???"
|
|
logger.warning(f"AcoustID API error (key: {api_key_preview}): {e}")
|
|
error_str = str(e).lower()
|
|
# Old pyacoustid reports an invalid key as the bare "status: error"
|
|
# (it drops the detail), so treat that as an invalid-key signal too.
|
|
invalid = ('invalid' in error_str or 'unknown' in error_str or 'status: error' in error_str)
|
|
if invalid:
|
|
logger.error("AcoustID API key appears to be invalid — check your AcoustID settings")
|
|
elif 'rate' in error_str or 'limit' in error_str:
|
|
logger.warning("Rate limited by AcoustID — will retry later")
|
|
return {'status': 'error', 'recordings': [], 'invalid_key': invalid,
|
|
'error': f'AcoustID API error: {e}'}
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error in AcoustID lookup: {e}", exc_info=True)
|
|
return {'status': 'error', 'recordings': [], 'error': f'Unexpected error: {e}'}
|
|
|
|
def fingerprint_and_lookup(self, audio_file: str) -> Optional[Dict[str, Any]]:
|
|
"""Legacy dict-or-None lookup. Returns the recordings dict on a confirmed
|
|
match, else None. Kept for callers that only need "did we identify it"
|
|
(library scanner, auto-import). Callers that must report WHY a lookup
|
|
didn't match (verification badge, key test) should use
|
|
``lookup_with_status`` so an error isn't mistaken for a no-match.
|
|
"""
|
|
res = self.lookup_with_status(audio_file)
|
|
if res.get('status') == 'ok':
|
|
return {
|
|
'recordings': res['recordings'],
|
|
'best_score': res.get('best_score', 0.0),
|
|
'recording_mbids': res.get('recording_mbids', []),
|
|
}
|
|
return None
|
|
|
|
def refresh_config(self):
|
|
"""Refresh cached config values (call after settings change)."""
|
|
self._api_key = None
|
|
self._enabled = None
|