import requests import time import threading from typing import Dict, List, Optional, Any from functools import wraps from utils.logging_config import get_logger logger = get_logger("musicbrainz_client") # Global rate limiting variables _last_api_call_time = 0 _api_call_lock = threading.Lock() MIN_API_INTERVAL = 1.0 # 1 second between API calls (MusicBrainz requirement) def rate_limited(func): """Decorator to enforce rate limiting on MusicBrainz API calls""" @wraps(func) def wrapper(*args, **kwargs): global _last_api_call_time with _api_call_lock: current_time = time.time() time_since_last_call = current_time - _last_api_call_time if time_since_last_call < MIN_API_INTERVAL: sleep_time = MIN_API_INTERVAL - time_since_last_call time.sleep(sleep_time) _last_api_call_time = time.time() from core.api_call_tracker import api_call_tracker api_call_tracker.record_call('musicbrainz') try: result = func(*args, **kwargs) return result except Exception as e: # Implement exponential backoff for API errors if "rate limit" in str(e).lower() or "503" in str(e): logger.warning(f"MusicBrainz rate limit hit, implementing backoff: {e}") time.sleep(2.0) # Wait 2 seconds before retrying raise e return wrapper class MusicBrainzClient: """Client for interacting with MusicBrainz API""" BASE_URL = "https://musicbrainz.org/ws/2" # MusicBrainz mandates a meaningful User-Agent with contact info. Falling back # to a bare name/version risks IP blocking under load — include the project # URL so MB operators have a way to reach us if we misbehave. DEFAULT_CONTACT = "https://github.com/Nezreka/SoulSync" def __init__(self, app_name: str = "SoulSync", app_version: str = "1.0", contact_email: str = ""): """ Initialize MusicBrainz client Args: app_name: Name of the application app_version: Version of the application contact_email: Contact email or URL (defaults to project URL when empty) """ contact = contact_email or self.DEFAULT_CONTACT self.user_agent = f"{app_name}/{app_version} ( {contact} )" self.session = requests.Session() self.session.headers.update({ 'User-Agent': self.user_agent, 'Accept': 'application/json' }) logger.info(f"MusicBrainz client initialized with user agent: {self.user_agent}") @rate_limited def search_artist(self, artist_name: str, limit: int = 10, strict: bool = True) -> List[Dict[str, Any]]: """ Search for artists by name. Args: artist_name: Name of the artist to search for limit: Maximum number of results to return strict: When True (default), builds a phrase-match query against the `artist` field only — correct for enrichment flows that already know the exact name. When False, sends a bare query which MusicBrainz matches against the alias, artist, AND sortname indexes — the right behavior for user-facing fuzzy search (finds "Metallica" from typing "metalica", matches aliased names, etc.). Returns: List of artist results with id, name, score, etc. MusicBrainz assigns each result a `score` 0-100; the list is pre-sorted score-descending by the server. """ try: # Escape quotes and backslashes for Lucene query safe_name = artist_name.replace('\\', '\\\\').replace('"', '\\"') if strict: query = f'artist:"{safe_name}"' else: # Bare query hits alias/artist/sortname indexes — much better # recall for user typing. Still Lucene-escaped via the API's # query parser. query = safe_name params = { 'query': query, 'fmt': 'json', 'limit': limit } response = self.session.get( f"{self.BASE_URL}/artist", params=params, timeout=10 ) response.raise_for_status() data = response.json() artists = data.get('artists', []) logger.debug(f"Found {len(artists)} artists for query: {artist_name}") return artists except Exception as e: logger.error(f"Error searching for artist '{artist_name}': {e}") return [] @rate_limited def search_release(self, album_name: str, artist_name: Optional[str] = None, limit: int = 10, strict: bool = True) -> List[Dict[str, Any]]: """ Search for releases (albums) by name. Args: album_name: Name of the album to search for artist_name: Optional artist name to narrow search limit: Maximum number of results to return strict: When True (default), builds a phrase-match Lucene query against the `release` and `artist` fields — correct for enrichment flows where exact name+artist are known. When False, sends a bare query (album + artist joined) so MB hits alias / sortname indexes and folds diacritics, dramatically improving recall for user-facing fuzzy lookups (e.g. the manual Fix popup). Returns: List of release results """ try: if strict: # Escape quotes and backslashes for Lucene query safe_album = album_name.replace('\\', '\\\\').replace('"', '\\"') query = f'release:"{safe_album}"' if artist_name: safe_artist = artist_name.replace('\\', '\\\\').replace('"', '\\"') query += f' AND artist:"{safe_artist}"' else: # Bare query — MB tokenizes against title + artist credit + # alias + sortname indexes together with diacritic folding. # Recovers cases like "Bjork" → "Björk" that strict phrase # queries miss. parts = [album_name] if artist_name: parts.append(artist_name) query = ' '.join(p for p in parts if p) params = { 'query': query, 'fmt': 'json', 'limit': limit } response = self.session.get( f"{self.BASE_URL}/release", params=params, timeout=10 ) response.raise_for_status() data = response.json() releases = data.get('releases', []) logger.debug(f"Found {len(releases)} releases for query: {album_name}") return releases except Exception as e: logger.error(f"Error searching for release '{album_name}': {e}") return [] @rate_limited def search_recording(self, track_name: str, artist_name: Optional[str] = None, limit: int = 10, strict: bool = True) -> List[Dict[str, Any]]: """ Search for recordings (tracks) by name. Args: track_name: Name of the track to search for artist_name: Optional artist name to narrow search limit: Maximum number of results to return strict: When True (default), builds a phrase-match Lucene query against the `recording` and `artist` fields — correct for enrichment flows where exact name+artist are known. When False, sends a bare query (track + artist joined) so MB hits alias / sortname indexes and folds diacritics. The bare path also avoids the AND-clause that kills recall when either side mis-matches (e.g. "Bjork" vs canonical "Björk", or a track title with bracketed suffix like "(Live)" that strict phrase match rejects). Returns: List of recording results """ try: if strict: # Escape quotes and backslashes for Lucene query safe_track = track_name.replace('\\', '\\\\').replace('"', '\\"') query = f'recording:"{safe_track}"' if artist_name: safe_artist = artist_name.replace('\\', '\\\\').replace('"', '\\"') query += f' AND artist:"{safe_artist}"' else: # Bare query — see search_release for rationale. parts = [track_name] if artist_name: parts.append(artist_name) query = ' '.join(p for p in parts if p) params = { 'query': query, 'fmt': 'json', 'limit': limit } response = self.session.get( f"{self.BASE_URL}/recording", params=params, timeout=10 ) response.raise_for_status() data = response.json() recordings = data.get('recordings', []) logger.debug(f"Found {len(recordings)} recordings for query: {track_name}") return recordings except Exception as e: logger.error(f"Error searching for recording '{track_name}': {e}") return [] @rate_limited def browse_artist_release_groups(self, artist_mbid: str, release_types: Optional[List[str]] = None, limit: int = 100, offset: int = 0) -> List[Dict[str, Any]]: """Browse release-groups linked to an artist MBID. This is the correct MusicBrainz pattern for "give me this artist's discography" — text-based `/release?query=...` search would look at release TITLES (matching unrelated releases literally titled after the artist name), while browse walks the artist→release-group link directly. Args: artist_mbid: Artist's MusicBrainz ID release_types: Filter by primary type — any of 'album', 'single', 'ep', 'compilation', 'soundtrack', 'live', etc. Combined with `|` per MB spec, e.g. `['album', 'ep']` → `type=album|ep`. None returns all types. limit: 1-100 (MB hard cap) offset: Pagination offset Returns: List of release-group dicts. Each has `id`, `title`, `primary-type`, `secondary-types`, `first-release-date`, `disambiguation`. """ try: params = {'artist': artist_mbid, 'fmt': 'json', 'limit': min(limit, 100), 'offset': offset} if release_types: params['type'] = '|'.join(release_types) response = self.session.get( f"{self.BASE_URL}/release-group", params=params, timeout=10 ) response.raise_for_status() data = response.json() rgs = data.get('release-groups', []) logger.debug(f"Browsed {len(rgs)} release-groups for artist {artist_mbid}") return rgs except Exception as e: logger.error(f"Error browsing release-groups for artist {artist_mbid}: {e}") return [] @rate_limited def browse_release_group_releases(self, release_group_mbid: str, limit: int = 100, offset: int = 0) -> List[Dict[str, Any]]: """Browse concrete releases that belong to a release-group. Release-groups identify the logical album; releases identify the actual edition the user may own (country, format, explicit/clean disambiguation, bonus tracks, track count). Manual import needs the latter so users can choose the matching tracklist. """ try: params = { 'release-group': release_group_mbid, 'fmt': 'json', 'limit': min(limit, 100), 'offset': offset, 'inc': 'artist-credits+media+labels+release-groups', } response = self.session.get( f"{self.BASE_URL}/release", params=params, timeout=10 ) response.raise_for_status() data = response.json() releases = data.get('releases', []) logger.debug(f"Browsed {len(releases)} releases for release-group {release_group_mbid}") return releases except Exception as e: logger.error(f"Error browsing releases for release-group {release_group_mbid}: {e}") return [] @rate_limited def search_recordings_by_artist_mbid(self, artist_mbid: str, limit: int = 100) -> List[Dict[str, Any]]: """Search for recordings linked to an artist via Lucene `arid:` query. This is the counterpart to `browse_artist_release_groups` for tracks. The proper "browse" endpoint (`/recording?artist=`) rejects `inc=releases`, so we can't get album context per recording from browse — only the track title/length/MBID. Without release info the user would see tracks with no album, which is useless. The search endpoint with a fielded `arid:` query returns recordings with the `releases` array already embedded (including release-group, date, and media info), which is what the search-tab UI needs. Args: artist_mbid: Artist's MusicBrainz ID limit: 1-100 (MB hard cap) Returns: List of recording dicts with `id`, `title`, `length`, `score`, `artist-credit`, and `releases` (each with release-group + date). """ try: params = { 'query': f'arid:{artist_mbid}', 'fmt': 'json', 'limit': min(limit, 100), } response = self.session.get( f"{self.BASE_URL}/recording", params=params, timeout=10 ) response.raise_for_status() data = response.json() recs = data.get('recordings', []) logger.debug(f"Found {len(recs)} recordings for artist {artist_mbid}") return recs except Exception as e: logger.error(f"Error searching recordings for artist {artist_mbid}: {e}") return [] @rate_limited def get_artist(self, mbid: str, includes: Optional[List[str]] = None) -> Optional[Dict[str, Any]]: """ Get full artist details by MusicBrainz ID Args: mbid: MusicBrainz ID of the artist includes: Optional list of additional data to include (e.g., 'url-rels', 'genres') Returns: Artist data or None if not found """ try: params = {'fmt': 'json'} if includes: params['inc'] = '+'.join(includes) response = self.session.get( f"{self.BASE_URL}/artist/{mbid}", params=params, timeout=10 ) response.raise_for_status() return response.json() except Exception as e: logger.error(f"Error fetching artist {mbid}: {e}") return None @rate_limited def get_release(self, mbid: str, includes: Optional[List[str]] = None) -> Optional[Dict[str, Any]]: """ Get full release details by MusicBrainz ID Args: mbid: MusicBrainz ID of the release includes: Optional list of additional data to include Returns: Release data or None if not found """ try: params = {'fmt': 'json'} if includes: params['inc'] = '+'.join(includes) response = self.session.get( f"{self.BASE_URL}/release/{mbid}", params=params, timeout=10 ) response.raise_for_status() return response.json() except Exception as e: logger.error(f"Error fetching release {mbid}: {e}") return None @rate_limited def get_release_group(self, mbid: str, includes: Optional[List[str]] = None) -> Optional[Dict[str, Any]]: """Get full release-group details by MBID. Release-groups are the 'canonical album' entity in MusicBrainz — they group every edition/reissue/region-specific release of the same logical album under one MBID. Use `inc=releases` to list the individual releases this group contains (each with its own tracklist); use `inc=artist-credits` for artist info. Args: mbid: Release-group's MusicBrainz ID includes: Optional list, e.g. ['releases', 'artist-credits'] Returns: Release-group data or None if not found. """ try: params = {'fmt': 'json'} if includes: params['inc'] = '+'.join(includes) response = self.session.get( f"{self.BASE_URL}/release-group/{mbid}", params=params, timeout=10 ) response.raise_for_status() return response.json() except Exception as e: logger.error(f"Error fetching release-group {mbid}: {e}") return None @rate_limited def get_recording(self, mbid: str, includes: Optional[List[str]] = None) -> Optional[Dict[str, Any]]: """ Get full recording details by MusicBrainz ID Args: mbid: MusicBrainz ID of the recording includes: Optional list of additional data to include Returns: Recording data or None if not found """ try: params = {'fmt': 'json'} if includes: params['inc'] = '+'.join(includes) response = self.session.get( f"{self.BASE_URL}/recording/{mbid}", params=params, timeout=10 ) response.raise_for_status() return response.json() except Exception as e: logger.error(f"Error fetching recording {mbid}: {e}") return None