mirror of https://github.com/Nezreka/SoulSync.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
468 lines
17 KiB
468 lines
17 KiB
import requests
|
|
import time
|
|
import threading
|
|
from typing import Dict, List, Optional, Any
|
|
from functools import wraps
|
|
from utils.logging_config import get_logger
|
|
|
|
logger = get_logger("musicbrainz_client")
|
|
|
|
# Global rate limiting variables
|
|
_last_api_call_time = 0
|
|
_api_call_lock = threading.Lock()
|
|
MIN_API_INTERVAL = 1.0 # 1 second between API calls (MusicBrainz requirement)
|
|
|
|
def rate_limited(func):
|
|
"""Decorator to enforce rate limiting on MusicBrainz API calls"""
|
|
@wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
global _last_api_call_time
|
|
|
|
with _api_call_lock:
|
|
current_time = time.time()
|
|
time_since_last_call = current_time - _last_api_call_time
|
|
|
|
if time_since_last_call < MIN_API_INTERVAL:
|
|
sleep_time = MIN_API_INTERVAL - time_since_last_call
|
|
time.sleep(sleep_time)
|
|
|
|
_last_api_call_time = time.time()
|
|
|
|
from core.api_call_tracker import api_call_tracker
|
|
api_call_tracker.record_call('musicbrainz')
|
|
|
|
try:
|
|
result = func(*args, **kwargs)
|
|
return result
|
|
except Exception as e:
|
|
# Implement exponential backoff for API errors
|
|
if "rate limit" in str(e).lower() or "503" in str(e):
|
|
logger.warning(f"MusicBrainz rate limit hit, implementing backoff: {e}")
|
|
time.sleep(2.0) # Wait 2 seconds before retrying
|
|
raise e
|
|
return wrapper
|
|
|
|
class MusicBrainzClient:
|
|
"""Client for interacting with MusicBrainz API"""
|
|
|
|
BASE_URL = "https://musicbrainz.org/ws/2"
|
|
# MusicBrainz mandates a meaningful User-Agent with contact info. Falling back
|
|
# to a bare name/version risks IP blocking under load — include the project
|
|
# URL so MB operators have a way to reach us if we misbehave.
|
|
DEFAULT_CONTACT = "https://github.com/Nezreka/SoulSync"
|
|
|
|
def __init__(self, app_name: str = "SoulSync", app_version: str = "1.0", contact_email: str = ""):
|
|
"""
|
|
Initialize MusicBrainz client
|
|
|
|
Args:
|
|
app_name: Name of the application
|
|
app_version: Version of the application
|
|
contact_email: Contact email or URL (defaults to project URL when empty)
|
|
"""
|
|
contact = contact_email or self.DEFAULT_CONTACT
|
|
self.user_agent = f"{app_name}/{app_version} ( {contact} )"
|
|
|
|
self.session = requests.Session()
|
|
self.session.headers.update({
|
|
'User-Agent': self.user_agent,
|
|
'Accept': 'application/json'
|
|
})
|
|
|
|
logger.info(f"MusicBrainz client initialized with user agent: {self.user_agent}")
|
|
|
|
@rate_limited
|
|
def search_artist(self, artist_name: str, limit: int = 10, strict: bool = True) -> List[Dict[str, Any]]:
|
|
"""
|
|
Search for artists by name.
|
|
|
|
Args:
|
|
artist_name: Name of the artist to search for
|
|
limit: Maximum number of results to return
|
|
strict: When True (default), builds a phrase-match query against
|
|
the `artist` field only — correct for enrichment flows that
|
|
already know the exact name. When False, sends a bare query
|
|
which MusicBrainz matches against the alias, artist, AND
|
|
sortname indexes — the right behavior for user-facing fuzzy
|
|
search (finds "Metallica" from typing "metalica", matches
|
|
aliased names, etc.).
|
|
|
|
Returns:
|
|
List of artist results with id, name, score, etc. MusicBrainz
|
|
assigns each result a `score` 0-100; the list is pre-sorted
|
|
score-descending by the server.
|
|
"""
|
|
try:
|
|
# Escape quotes and backslashes for Lucene query
|
|
safe_name = artist_name.replace('\\', '\\\\').replace('"', '\\"')
|
|
|
|
if strict:
|
|
query = f'artist:"{safe_name}"'
|
|
else:
|
|
# Bare query hits alias/artist/sortname indexes — much better
|
|
# recall for user typing. Still Lucene-escaped via the API's
|
|
# query parser.
|
|
query = safe_name
|
|
|
|
params = {
|
|
'query': query,
|
|
'fmt': 'json',
|
|
'limit': limit
|
|
}
|
|
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/artist",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
artists = data.get('artists', [])
|
|
|
|
logger.debug(f"Found {len(artists)} artists for query: {artist_name}")
|
|
return artists
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching for artist '{artist_name}': {e}")
|
|
return []
|
|
|
|
@rate_limited
|
|
def search_release(self, album_name: str, artist_name: Optional[str] = None,
|
|
limit: int = 10, strict: bool = True) -> List[Dict[str, Any]]:
|
|
"""
|
|
Search for releases (albums) by name.
|
|
|
|
Args:
|
|
album_name: Name of the album to search for
|
|
artist_name: Optional artist name to narrow search
|
|
limit: Maximum number of results to return
|
|
strict: When True (default), builds a phrase-match Lucene query
|
|
against the `release` and `artist` fields — correct for
|
|
enrichment flows where exact name+artist are known. When
|
|
False, sends a bare query (album + artist joined) so MB
|
|
hits alias / sortname indexes and folds diacritics,
|
|
dramatically improving recall for user-facing fuzzy
|
|
lookups (e.g. the manual Fix popup).
|
|
|
|
Returns:
|
|
List of release results
|
|
"""
|
|
try:
|
|
if strict:
|
|
# Escape quotes and backslashes for Lucene query
|
|
safe_album = album_name.replace('\\', '\\\\').replace('"', '\\"')
|
|
query = f'release:"{safe_album}"'
|
|
|
|
if artist_name:
|
|
safe_artist = artist_name.replace('\\', '\\\\').replace('"', '\\"')
|
|
query += f' AND artist:"{safe_artist}"'
|
|
else:
|
|
# Bare query — MB tokenizes against title + artist credit +
|
|
# alias + sortname indexes together with diacritic folding.
|
|
# Recovers cases like "Bjork" → "Björk" that strict phrase
|
|
# queries miss.
|
|
parts = [album_name]
|
|
if artist_name:
|
|
parts.append(artist_name)
|
|
query = ' '.join(p for p in parts if p)
|
|
|
|
params = {
|
|
'query': query,
|
|
'fmt': 'json',
|
|
'limit': limit
|
|
}
|
|
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/release",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
releases = data.get('releases', [])
|
|
|
|
logger.debug(f"Found {len(releases)} releases for query: {album_name}")
|
|
return releases
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching for release '{album_name}': {e}")
|
|
return []
|
|
|
|
@rate_limited
|
|
def search_recording(self, track_name: str, artist_name: Optional[str] = None,
|
|
limit: int = 10, strict: bool = True) -> List[Dict[str, Any]]:
|
|
"""
|
|
Search for recordings (tracks) by name.
|
|
|
|
Args:
|
|
track_name: Name of the track to search for
|
|
artist_name: Optional artist name to narrow search
|
|
limit: Maximum number of results to return
|
|
strict: When True (default), builds a phrase-match Lucene query
|
|
against the `recording` and `artist` fields — correct for
|
|
enrichment flows where exact name+artist are known. When
|
|
False, sends a bare query (track + artist joined) so MB
|
|
hits alias / sortname indexes and folds diacritics. The
|
|
bare path also avoids the AND-clause that kills recall
|
|
when either side mis-matches (e.g. "Bjork" vs canonical
|
|
"Björk", or a track title with bracketed suffix like
|
|
"(Live)" that strict phrase match rejects).
|
|
|
|
Returns:
|
|
List of recording results
|
|
"""
|
|
try:
|
|
if strict:
|
|
# Escape quotes and backslashes for Lucene query
|
|
safe_track = track_name.replace('\\', '\\\\').replace('"', '\\"')
|
|
query = f'recording:"{safe_track}"'
|
|
|
|
if artist_name:
|
|
safe_artist = artist_name.replace('\\', '\\\\').replace('"', '\\"')
|
|
query += f' AND artist:"{safe_artist}"'
|
|
else:
|
|
# Bare query — see search_release for rationale.
|
|
parts = [track_name]
|
|
if artist_name:
|
|
parts.append(artist_name)
|
|
query = ' '.join(p for p in parts if p)
|
|
|
|
params = {
|
|
'query': query,
|
|
'fmt': 'json',
|
|
'limit': limit
|
|
}
|
|
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/recording",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
recordings = data.get('recordings', [])
|
|
|
|
logger.debug(f"Found {len(recordings)} recordings for query: {track_name}")
|
|
return recordings
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching for recording '{track_name}': {e}")
|
|
return []
|
|
|
|
@rate_limited
|
|
def browse_artist_release_groups(self, artist_mbid: str,
|
|
release_types: Optional[List[str]] = None,
|
|
limit: int = 100,
|
|
offset: int = 0) -> List[Dict[str, Any]]:
|
|
"""Browse release-groups linked to an artist MBID.
|
|
|
|
This is the correct MusicBrainz pattern for "give me this artist's
|
|
discography" — text-based `/release?query=...` search would look at
|
|
release TITLES (matching unrelated releases literally titled after
|
|
the artist name), while browse walks the artist→release-group link
|
|
directly.
|
|
|
|
Args:
|
|
artist_mbid: Artist's MusicBrainz ID
|
|
release_types: Filter by primary type — any of 'album', 'single',
|
|
'ep', 'compilation', 'soundtrack', 'live', etc. Combined with
|
|
`|` per MB spec, e.g. `['album', 'ep']` → `type=album|ep`.
|
|
None returns all types.
|
|
limit: 1-100 (MB hard cap)
|
|
offset: Pagination offset
|
|
|
|
Returns:
|
|
List of release-group dicts. Each has `id`, `title`, `primary-type`,
|
|
`secondary-types`, `first-release-date`, `disambiguation`.
|
|
"""
|
|
try:
|
|
params = {'artist': artist_mbid, 'fmt': 'json', 'limit': min(limit, 100), 'offset': offset}
|
|
if release_types:
|
|
params['type'] = '|'.join(release_types)
|
|
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/release-group",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
rgs = data.get('release-groups', [])
|
|
logger.debug(f"Browsed {len(rgs)} release-groups for artist {artist_mbid}")
|
|
return rgs
|
|
except Exception as e:
|
|
logger.error(f"Error browsing release-groups for artist {artist_mbid}: {e}")
|
|
return []
|
|
|
|
@rate_limited
|
|
def search_recordings_by_artist_mbid(self, artist_mbid: str,
|
|
limit: int = 100) -> List[Dict[str, Any]]:
|
|
"""Search for recordings linked to an artist via Lucene `arid:` query.
|
|
|
|
This is the counterpart to `browse_artist_release_groups` for tracks.
|
|
The proper "browse" endpoint (`/recording?artist=<mbid>`) rejects
|
|
`inc=releases`, so we can't get album context per recording from
|
|
browse — only the track title/length/MBID. Without release info the
|
|
user would see tracks with no album, which is useless.
|
|
|
|
The search endpoint with a fielded `arid:<mbid>` query returns
|
|
recordings with the `releases` array already embedded (including
|
|
release-group, date, and media info), which is what the search-tab
|
|
UI needs.
|
|
|
|
Args:
|
|
artist_mbid: Artist's MusicBrainz ID
|
|
limit: 1-100 (MB hard cap)
|
|
|
|
Returns:
|
|
List of recording dicts with `id`, `title`, `length`, `score`,
|
|
`artist-credit`, and `releases` (each with release-group + date).
|
|
"""
|
|
try:
|
|
params = {
|
|
'query': f'arid:{artist_mbid}',
|
|
'fmt': 'json',
|
|
'limit': min(limit, 100),
|
|
}
|
|
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/recording",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
recs = data.get('recordings', [])
|
|
logger.debug(f"Found {len(recs)} recordings for artist {artist_mbid}")
|
|
return recs
|
|
except Exception as e:
|
|
logger.error(f"Error searching recordings for artist {artist_mbid}: {e}")
|
|
return []
|
|
|
|
@rate_limited
|
|
def get_artist(self, mbid: str, includes: Optional[List[str]] = None) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get full artist details by MusicBrainz ID
|
|
|
|
Args:
|
|
mbid: MusicBrainz ID of the artist
|
|
includes: Optional list of additional data to include (e.g., 'url-rels', 'genres')
|
|
|
|
Returns:
|
|
Artist data or None if not found
|
|
"""
|
|
try:
|
|
params = {'fmt': 'json'}
|
|
if includes:
|
|
params['inc'] = '+'.join(includes)
|
|
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/artist/{mbid}",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching artist {mbid}: {e}")
|
|
return None
|
|
|
|
@rate_limited
|
|
def get_release(self, mbid: str, includes: Optional[List[str]] = None) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get full release details by MusicBrainz ID
|
|
|
|
Args:
|
|
mbid: MusicBrainz ID of the release
|
|
includes: Optional list of additional data to include
|
|
|
|
Returns:
|
|
Release data or None if not found
|
|
"""
|
|
try:
|
|
params = {'fmt': 'json'}
|
|
if includes:
|
|
params['inc'] = '+'.join(includes)
|
|
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/release/{mbid}",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching release {mbid}: {e}")
|
|
return None
|
|
|
|
@rate_limited
|
|
def get_release_group(self, mbid: str, includes: Optional[List[str]] = None) -> Optional[Dict[str, Any]]:
|
|
"""Get full release-group details by MBID.
|
|
|
|
Release-groups are the 'canonical album' entity in MusicBrainz —
|
|
they group every edition/reissue/region-specific release of the
|
|
same logical album under one MBID. Use `inc=releases` to list the
|
|
individual releases this group contains (each with its own
|
|
tracklist); use `inc=artist-credits` for artist info.
|
|
|
|
Args:
|
|
mbid: Release-group's MusicBrainz ID
|
|
includes: Optional list, e.g. ['releases', 'artist-credits']
|
|
|
|
Returns:
|
|
Release-group data or None if not found.
|
|
"""
|
|
try:
|
|
params = {'fmt': 'json'}
|
|
if includes:
|
|
params['inc'] = '+'.join(includes)
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/release-group/{mbid}",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except Exception as e:
|
|
logger.error(f"Error fetching release-group {mbid}: {e}")
|
|
return None
|
|
|
|
@rate_limited
|
|
def get_recording(self, mbid: str, includes: Optional[List[str]] = None) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get full recording details by MusicBrainz ID
|
|
|
|
Args:
|
|
mbid: MusicBrainz ID of the recording
|
|
includes: Optional list of additional data to include
|
|
|
|
Returns:
|
|
Recording data or None if not found
|
|
"""
|
|
try:
|
|
params = {'fmt': 'json'}
|
|
if includes:
|
|
params['inc'] = '+'.join(includes)
|
|
|
|
response = self.session.get(
|
|
f"{self.BASE_URL}/recording/{mbid}",
|
|
params=params,
|
|
timeout=10
|
|
)
|
|
response.raise_for_status()
|
|
|
|
return response.json()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching recording {mbid}: {e}")
|
|
return None
|