You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
SoulSync/core/artists/quality.py

497 lines
21 KiB

"""Artist quality enhancement helper.
`enhance_artist_quality(artist_id, track_ids, deps)` is the route-handler
body for the `/api/library/artist/<artist_id>/enhance` endpoint. It walks
the user's selected tracks, finds the best metadata match against the
configured primary source, and queues high-quality re-downloads on the
wishlist with `source_type='enhance'`.
Per-track flow:
1. Resolve the existing track via the artist's full detail map (built up
front from `database.get_artist_full_detail`).
2. Read current quality tier from the file extension.
3. Build `matched_track_data` for the wishlist entry, in priority order:
- **Direct lookup using stored source IDs** — for every source the
user has configured, if the library track has the corresponding
stored ID (`spotify_track_id` / `deezer_id` / `itunes_track_id` /
`soul_id`), call `client.get_track_details(stored_id)` and convert
the result to the wishlist payload. First success wins; the user's
configured primary source is tried first. Mirrors what Download
Discography does — stable IDs straight to the source's API, no
fuzzy text matching.
- **Multi-source parallel text search fallback** — if no stored ID
resolved, run the shared `core.metadata.multi_source_search`
against every configured source in parallel and pick the best
cross-source match (auto-accept threshold 0.7).
4. Validate the match has non-empty title, album, and artists. Reject
matches with empty fields — those propagated as
"unknown artist - unknown album - unknown track" wishlist entries
pre-fix because the wishlist payload normalizer's truthy-check
passthrough accepted dicts with empty string fields.
5. Add to wishlist via `wishlist_service.add_spotify_track_to_wishlist`
with `source_type='enhance'` and a `source_context` carrying the
original file path, format tier, bitrate, and artist name.
6. Tally `enhanced_count` / `failed_count` / per-track failure reasons.
The flow originally had Spotify-only logic with an iTunes search-only
fallback. Two failure modes drove the rewrite:
- Users with neither Spotify nor Deezer connected got silent failures
("unknown artist - unknown album - unknown track" wishlist entries)
because iTunes's text search returned junk matches with empty fields
that cleared the 0.7 confidence threshold.
- Library tracks with messy tags ("Title (Live)", featured artists in
the artist field, etc.) failed fuzzy text search even when a perfect
stored ID was available — Download Discography had no such problem
because it resolves albums by stable ID.
Direct-lookup-via-stored-ID matches the Download Discography contract
for every source where we have an ID column. Text search is only the
fallback now.
Returns `(payload_dict, http_status_code)` so the route wrapper can
`jsonify()` and return.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import Any, Callable, Optional
from utils.logging_config import get_logger
logger = get_logger('artists.quality')
@dataclass
class ArtistQualityDeps:
"""Bundle of cross-cutting deps the artist quality enhancement needs."""
matching_engine: Any
get_database: Callable[[], Any]
get_wishlist_service: Callable[[], Any]
get_current_profile_id: Callable[[], int]
get_quality_tier_from_extension: Callable
# Returns ``[(source_name, client), ...]`` for every metadata source
# the user has configured. Powers both the direct-lookup fast path
# (resolves stored source IDs straight from each source's API,
# like Download Discography) and the multi-source parallel text
# search fallback (shared with Track Redownload via
# ``core.metadata.multi_source_search``).
get_metadata_search_sources: Callable[[], list]
def _has_complete_metadata(payload: Optional[dict]) -> bool:
"""Reject matches with empty / missing core fields. Pre-fix, iTunes
returned matches that cleared the 0.7 confidence threshold while
having empty artist / album / title — those propagated as junk
wishlist entries displayed as 'unknown artist - unknown album -
unknown track'."""
if not payload:
return False
if not (payload.get('name') or '').strip():
return False
artists = payload.get('artists') or []
has_artist = any(
(a.get('name') or '').strip() if isinstance(a, dict) else (a or '').strip()
for a in artists
)
if not has_artist:
return False
album = payload.get('album') or {}
if isinstance(album, dict):
if not (album.get('name') or '').strip():
return False
elif not (album or '').strip():
return False
return True
def _build_payload_from_track(track_obj) -> dict:
"""Build a Spotify-shaped wishlist payload from any metadata source's
Track-shaped object (Spotify Track, iTunes Track, Deezer Track,
Discogs Track — they all have the same .id / .name / .artists /
.album / .duration_ms / etc shape because each client mimics
Spotify's surface).
The wishlist's downstream pipeline expects Spotify shape; this helper
is the single place that knows how to produce it. Replaces the
duplicated payload construction that used to live in the Spotify
search path AND the iTunes fallback path.
Does NOT substitute defaults for missing artists / album / title —
``_has_complete_metadata`` rejects empty matches downstream so the
user sees a clear failure instead of a junk wishlist entry with
fabricated values.
"""
image_url = getattr(track_obj, 'image_url', '') or ''
album_images = (
[{'url': image_url, 'height': 600, 'width': 600}]
if image_url else []
)
artist_names = list(getattr(track_obj, 'artists', None) or [])
return {
'id': getattr(track_obj, 'id', ''),
'name': getattr(track_obj, 'name', '') or '',
'artists': [{'name': a} for a in artist_names],
'album': {
'name': getattr(track_obj, 'album', '') or '',
'artists': [{'name': a} for a in artist_names],
'album_type': getattr(track_obj, 'album_type', None) or 'album',
'images': album_images,
'release_date': getattr(track_obj, 'release_date', '') or '',
'total_tracks': 1,
},
'duration_ms': getattr(track_obj, 'duration_ms', 0) or 0,
'track_number': getattr(track_obj, 'track_number', None) or 1,
'disc_number': getattr(track_obj, 'disc_number', None) or 1,
'popularity': getattr(track_obj, 'popularity', None) or 0,
'preview_url': getattr(track_obj, 'preview_url', None),
'external_urls': getattr(track_obj, 'external_urls', None) or {},
}
# Map metadata source name → DB column on the ``tracks`` table that
# stores that source's native track ID. Used to drive the direct-lookup
# fast path: when a library track has a stored ID for source X and the
# user has source X configured, skip fuzzy text search and resolve
# straight from X's API. Mirrors what Download Discography does — stable
# IDs all the way, no fuzzy text matching.
#
# Discogs is release-based and has no per-track ID column; not listed
# here, so direct lookup never tries Discogs (search-fallback still
# runs for Discogs as one of the parallel sources).
_STORED_ID_COLUMNS = {
'spotify': 'spotify_track_id',
'deezer': 'deezer_id',
'itunes': 'itunes_track_id',
'hydrabase': 'soul_id',
}
def _enhanced_to_wishlist_payload(enhanced: dict,
fallback_title: str,
fallback_artist: str,
fallback_album: str) -> Optional[dict]:
"""Convert a ``get_track_details`` enhanced-shape dict to the
Spotify-shape wishlist payload.
Every metadata source's ``get_track_details`` returns the same
"enhanced" intermediate shape (top-level ``id``, ``name``,
``artists`` as a list of strings, ``album.artists`` as strings),
documented and pinned across spotify_client / itunes_client /
deezer_client / hydrabase_client. The wishlist downstream expects
Spotify's native shape (``artists`` as ``[{'name': ...}]``), so
this helper does the conversion in one place.
Spotify's ``raw_data`` field is already in wishlist shape (the
raw Spotify API response), so we return it as-is when detected,
preserving full ``album.images`` and ``external_urls`` that the
enhanced top-level fields drop. Other sources' ``raw_data`` is
in source-native shape and gets ignored.
"""
if not enhanced:
return None
raw = enhanced.get('raw_data')
if isinstance(raw, dict):
raw_artists = raw.get('artists')
if (isinstance(raw_artists, list) and raw_artists
and isinstance(raw_artists[0], dict)):
return raw
artists = enhanced.get('artists') or [fallback_artist]
album_data = enhanced.get('album') or {}
album_artists = album_data.get('artists') or artists
def _to_dict_artists(seq):
return [a if isinstance(a, dict) else {'name': a} for a in seq]
image_url = enhanced.get('image_url') or ''
album_images_field = album_data.get('images')
if isinstance(album_images_field, list) and album_images_field:
album_images = album_images_field
elif image_url:
album_images = [{'url': image_url, 'height': 600, 'width': 600}]
else:
album_images = []
return {
'id': str(enhanced.get('id', '')),
'name': enhanced.get('name') or fallback_title,
'artists': _to_dict_artists(artists),
'album': {
'id': str(album_data.get('id', '')),
'name': album_data.get('name') or fallback_album,
'album_type': album_data.get('album_type', 'album'),
'release_date': album_data.get('release_date', ''),
'total_tracks': album_data.get('total_tracks', 1),
'artists': _to_dict_artists(album_artists),
'images': album_images,
},
'duration_ms': enhanced.get('duration_ms', 0),
'track_number': enhanced.get('track_number', 1),
'disc_number': enhanced.get('disc_number', 1),
'popularity': enhanced.get('popularity', 0),
'preview_url': enhanced.get('preview_url'),
'external_urls': enhanced.get('external_urls', {}),
}
def _try_direct_lookup_all_sources(track: dict,
sources: list,
preferred_source: Optional[str],
title: str,
artist_name: str,
album_title: str
) -> tuple:
"""Try direct ID-based lookup on every source where the library
track has a stored ID. Returns ``(payload, source_name)`` on first
success, or ``(None, None)`` if no source has a stored ID with a
successful lookup.
Mirrors what Download Discography does — stable IDs straight to the
source's API, no fuzzy text matching. Avoids the failure mode where
library text tags don't match the source's canonical title (the
Discord report case: track tagged "Title (Live)" and source has
"Title" → fuzzy search misses, but stored ID resolves directly).
Preferred source attempted first when present in ``sources``,
typically the user's configured primary metadata source — so a
Deezer-primary user gets Deezer art / album shape on the wishlist
entry instead of whichever source happened to have a stored ID
first in iteration order.
"""
def _priority(entry):
name = entry[0]
return 0 if name == preferred_source else 1
ordered = sorted(sources, key=_priority)
for source_name, client in ordered:
column = _STORED_ID_COLUMNS.get(source_name)
if not column:
continue
stored_id = track.get(column)
if not stored_id:
continue
if not hasattr(client, 'get_track_details'):
continue
try:
enhanced = client.get_track_details(str(stored_id))
except Exception as exc:
logger.error(
f"[Enhance] {source_name} direct lookup failed for "
f"ID {stored_id}: {exc}"
)
continue
if not enhanced:
continue
payload = _enhanced_to_wishlist_payload(
enhanced, title, artist_name, album_title,
)
if _has_complete_metadata(payload):
logger.info(
f"[Enhance] Direct lookup matched: {source_name} "
f"ID {stored_id}'{payload.get('name')}'"
)
return payload, source_name
return None, None
# Minimum match-score threshold for accepting a search-fallback match
# without user confirmation. Mirrors the legacy threshold the enhance
# flow has always used.
_AUTO_ACCEPT_SCORE_THRESHOLD = 0.7
def enhance_artist_quality(artist_id, track_ids, deps: ArtistQualityDeps):
"""Add selected tracks to wishlist for quality enhancement re-download.
Per-track flow:
1. **Direct lookup using stored source IDs** (mirrors what Download
Discography does — stable IDs straight to the source's API, no
fuzzy text matching). For each source the user has configured,
if the library track has the corresponding stored ID
(``spotify_track_id`` / ``deezer_id`` / ``itunes_track_id`` /
``soul_id``), call ``client.get_track_details(stored_id)`` and
convert to wishlist payload. First success wins; preferred
source (user's configured primary) tried first.
2. **Multi-source parallel text search fallback** (via the shared
``core.metadata.multi_source_search`` module — same code path
Track Redownload uses) for tracks with no stored IDs / lookup
misses.
3. **Validation**: reject matches with empty title / album / artists
so the user sees a clear failure instead of an "unknown artist"
wishlist entry.
Pre-refactor: only Spotify had a direct-lookup fast path; everything
else went through fuzzy text search. Discogs / Hydrabase / Deezer-
primary users got far worse coverage than Download Discography
despite both flows asking the same question.
"""
from core.metadata.multi_source_search import TrackQuery, search_all_sources
from core.metadata.registry import get_primary_source
try:
if not track_ids:
return {"success": False, "error": "No track IDs provided"}, 400
database = deps.get_database()
wishlist_service = deps.get_wishlist_service()
profile_id = deps.get_current_profile_id()
# Get artist info
artist_result = database.get_artist_full_detail(artist_id)
if not artist_result.get('success'):
return {"success": False, "error": "Artist not found"}, 404
artist_name = artist_result.get('artist', {}).get('name', 'Unknown Artist')
# Build lookup of all tracks for this artist
track_lookup = {}
for album in artist_result.get('albums', []):
album_title = album.get('title', '')
for track in album.get('tracks', []):
tid = str(track.get('id', ''))
track['_album_title'] = album_title
track['_album_id'] = album.get('id')
track_lookup[tid] = track
# Resolve every configured metadata source up front.
search_sources = deps.get_metadata_search_sources()
# User's configured primary source — direct-lookup tries this
# first so Deezer-primary users get Deezer payloads on the
# wishlist entry (correct cover art / album shape) even when
# other sources also have stored IDs for the same track.
try:
preferred_source = get_primary_source()
except Exception:
preferred_source = None
enhanced_count = 0
failed_count = 0
failed_tracks = []
for track_id in track_ids:
track_id_str = str(track_id)
track = track_lookup.get(track_id_str)
if not track:
failed_count += 1
failed_tracks.append({'track_id': track_id, 'reason': 'Track not found'})
continue
file_path = track.get('file_path')
if not file_path:
failed_count += 1
failed_tracks.append({'track_id': track_id, 'reason': 'No file path'})
continue
tier_name, tier_num = deps.get_quality_tier_from_extension(file_path)
title = track.get('title', '') or ''
if not title.strip():
title = os.path.splitext(os.path.basename(file_path))[0]
album_title = track.get('_album_title', '')
matched_track_data = None
chosen_source = None
# 1. Direct lookup via every stored source ID — like Download
# Discography. Stable IDs, no fuzzy text matching.
if search_sources:
matched_track_data, chosen_source = _try_direct_lookup_all_sources(
track, search_sources, preferred_source,
title, artist_name, album_title,
)
# 2. Multi-source parallel text search fallback — for tracks
# with no stored IDs / lookup misses.
if not matched_track_data and search_sources:
try:
track_query = TrackQuery(
title=title,
artist=artist_name,
album=album_title,
duration_ms=track.get('duration', 0) or 0,
spotify_track_id=track.get('spotify_track_id'),
deezer_id=track.get('deezer_id'),
)
multi_result = search_all_sources(track_query, search_sources)
if multi_result.best_match and multi_result.best_match['score'] >= _AUTO_ACCEPT_SCORE_THRESHOLD:
chosen_source = multi_result.best_match['source']
best_track_obj = multi_result.best_track()
if best_track_obj:
matched_track_data = _build_payload_from_track(best_track_obj)
except Exception as exc:
logger.error(f"[Enhance] Multi-source search failed for {title}: {exc}")
# 3. Reject matches with empty / missing core fields.
if not _has_complete_metadata(matched_track_data):
if matched_track_data:
logger.warning(
f"[Enhance] {chosen_source} match for '{title}' rejected — "
f"empty title / album / artists (would render as 'unknown')"
)
matched_track_data = None
if not matched_track_data:
failed_count += 1
source_list = ', '.join(name for name, _ in (search_sources or []))
if not source_list:
reason = (
'No metadata source configured — connect Spotify / '
'iTunes / Deezer / Discogs / Hydrabase to enable enhance'
)
else:
reason = (
f'No usable match across {source_list}'
f'try connecting an additional metadata source'
)
failed_tracks.append({
'track_id': track_id,
'title': title,
'reason': reason,
})
continue
# Add to wishlist with enhance source
source_context = {
'enhance': True,
'original_file_path': file_path,
'original_format': tier_name,
'original_bitrate': track.get('bitrate'),
'original_tier': tier_num,
'artist_name': artist_name,
}
success = wishlist_service.add_spotify_track_to_wishlist(
spotify_track_data=matched_track_data,
failure_reason=f"Quality enhance - upgrading from {tier_name.replace('_', ' ').title()}",
source_type='enhance',
source_context=source_context,
profile_id=profile_id
)
if success:
enhanced_count += 1
logger.info(f"[Enhance] Queued for upgrade: {artist_name} - {title} ({tier_name})")
else:
failed_count += 1
failed_tracks.append({'track_id': track_id, 'title': title, 'reason': 'Wishlist add failed'})
return {
'success': True,
'enhanced_count': enhanced_count,
'failed_count': failed_count,
'failed_tracks': failed_tracks
}, 200
except Exception as e:
logger.error(f"[Enhance] {e}")
import traceback
traceback.print_exc()
return {"success": False, "error": str(e)}, 500