SoulSync/core/artists/quality.py

"""Artist quality enhancement helper.

`enhance_artist_quality(artist_id, track_ids, deps)` is the route-handler
body for the `/api/library/artist/<artist_id>/enhance` endpoint. It walks
the user's selected tracks, finds the best metadata match against the
configured primary source, and queues high-quality re-downloads on the
wishlist with `source_type='enhance'`.

Per-track flow:

1. Resolve the existing track via the artist's full detail map (built up
   front from `database.get_artist_full_detail`).
2. Read current quality tier from the file extension.
3. Build `matched_track_data` for the wishlist entry, in priority order:
   - **Direct lookup using stored source IDs** — for every source the
     user has configured, if the library track has the corresponding
     stored ID (`spotify_track_id` / `deezer_id` / `itunes_track_id` /
     `soul_id`), call `client.get_track_details(stored_id)` and convert
     the result to the wishlist payload. First success wins; the user's
     configured primary source is tried first. Mirrors what Download
     Discography does — stable IDs straight to the source's API, no
     fuzzy text matching.
   - **Multi-source parallel text search fallback** — if no stored ID
     resolved, run the shared `core.metadata.multi_source_search`
     against every configured source in parallel and pick the best
     cross-source match (auto-accept threshold 0.7).
4. Validate the match has non-empty title, album, and artists. Reject
   matches with empty fields — those propagated as
   "unknown artist - unknown album - unknown track" wishlist entries
   pre-fix because the wishlist payload normalizer's truthy-check
   passthrough accepted dicts with empty string fields.
5. Add to wishlist via `wishlist_service.add_spotify_track_to_wishlist`
   with `source_type='enhance'` and a `source_context` carrying the
   original file path, format tier, bitrate, and artist name.
6. Tally `enhanced_count` / `failed_count` / per-track failure reasons.

The flow originally had Spotify-only logic with an iTunes search-only
fallback. Two failure modes drove the rewrite:

- Users with neither Spotify nor Deezer connected got silent failures
  ("unknown artist - unknown album - unknown track" wishlist entries)
  because iTunes's text search returned junk matches with empty fields
  that cleared the 0.7 confidence threshold.
- Library tracks with messy tags ("Title (Live)", featured artists in
  the artist field, etc.) failed fuzzy text search even when a perfect
  stored ID was available — Download Discography had no such problem
  because it resolves albums by stable ID.

Direct-lookup-via-stored-ID matches the Download Discography contract
for every source where we have an ID column. Text search is only the
fallback now.

Returns `(payload_dict, http_status_code)` so the route wrapper can
`jsonify()` and return.
"""

from __future__ import annotations

import os
from dataclasses import dataclass
from typing import Any, Callable, Optional

from utils.logging_config import get_logger

logger = get_logger('artists.quality')


@dataclass
class ArtistQualityDeps:
    """Bundle of cross-cutting deps the artist quality enhancement needs."""
    matching_engine: Any
    get_database: Callable[[], Any]
    get_wishlist_service: Callable[[], Any]
    get_current_profile_id: Callable[[], int]
    get_quality_tier_from_extension: Callable
    # Returns ``[(source_name, client), ...]`` for every metadata source
    # the user has configured. Powers both the direct-lookup fast path
    # (resolves stored source IDs straight from each source's API,
    # like Download Discography) and the multi-source parallel text
    # search fallback (shared with Track Redownload via
    # ``core.metadata.multi_source_search``).
    get_metadata_search_sources: Callable[[], list]


def _has_complete_metadata(payload: Optional[dict]) -> bool:
    """Reject matches with empty / missing core fields. Pre-fix, iTunes
    returned matches that cleared the 0.7 confidence threshold while
    having empty artist / album / title — those propagated as junk
    wishlist entries displayed as 'unknown artist - unknown album -
    unknown track'."""
    if not payload:
        return False
    if not (payload.get('name') or '').strip():
        return False
    artists = payload.get('artists') or []
    has_artist = any(
        (a.get('name') or '').strip() if isinstance(a, dict) else (a or '').strip()
        for a in artists
    )
    if not has_artist:
        return False
    album = payload.get('album') or {}
    if isinstance(album, dict):
        if not (album.get('name') or '').strip():
            return False
    elif not (album or '').strip():
        return False
    return True


def _build_payload_from_track(track_obj) -> dict:
    """Build a Spotify-shaped wishlist payload from any metadata source's
    Track-shaped object (Spotify Track, iTunes Track, Deezer Track,
    Discogs Track — they all have the same .id / .name / .artists /
    .album / .duration_ms / etc shape because each client mimics
    Spotify's surface).

    The wishlist's downstream pipeline expects Spotify shape; this helper
    is the single place that knows how to produce it. Replaces the
    duplicated payload construction that used to live in the Spotify
    search path AND the iTunes fallback path.

    Does NOT substitute defaults for missing artists / album / title —
    ``_has_complete_metadata`` rejects empty matches downstream so the
    user sees a clear failure instead of a junk wishlist entry with
    fabricated values.
    """
    image_url = getattr(track_obj, 'image_url', '') or ''
    album_images = (
        [{'url': image_url, 'height': 600, 'width': 600}]
        if image_url else []
    )
    artist_names = list(getattr(track_obj, 'artists', None) or [])
    return {
        'id': getattr(track_obj, 'id', ''),
        'name': getattr(track_obj, 'name', '') or '',
        'artists': [{'name': a} for a in artist_names],
        'album': {
            'name': getattr(track_obj, 'album', '') or '',
            'artists': [{'name': a} for a in artist_names],
            'album_type': getattr(track_obj, 'album_type', None) or 'album',
            'images': album_images,
            'release_date': getattr(track_obj, 'release_date', '') or '',
            'total_tracks': 1,
        },
        'duration_ms': getattr(track_obj, 'duration_ms', 0) or 0,
        'track_number': getattr(track_obj, 'track_number', None) or 1,
        'disc_number': getattr(track_obj, 'disc_number', None) or 1,
        'popularity': getattr(track_obj, 'popularity', None) or 0,
        'preview_url': getattr(track_obj, 'preview_url', None),
        'external_urls': getattr(track_obj, 'external_urls', None) or {},
    }


# Map metadata source name → DB column on the ``tracks`` table that
# stores that source's native track ID. Used to drive the direct-lookup
# fast path: when a library track has a stored ID for source X and the
# user has source X configured, skip fuzzy text search and resolve
# straight from X's API. Mirrors what Download Discography does — stable
# IDs all the way, no fuzzy text matching.
#
# Discogs is release-based and has no per-track ID column; not listed
# here, so direct lookup never tries Discogs (search-fallback still
# runs for Discogs as one of the parallel sources).
_STORED_ID_COLUMNS = {
    'spotify': 'spotify_track_id',
    'deezer': 'deezer_id',
    'itunes': 'itunes_track_id',
    'hydrabase': 'soul_id',
}


def _enhanced_to_wishlist_payload(enhanced: dict,
                                   fallback_title: str,
                                   fallback_artist: str,
                                   fallback_album: str) -> Optional[dict]:
    """Convert a ``get_track_details`` enhanced-shape dict to the
    Spotify-shape wishlist payload.

    Every metadata source's ``get_track_details`` returns the same
    "enhanced" intermediate shape (top-level ``id``, ``name``,
    ``artists`` as a list of strings, ``album.artists`` as strings),
    documented and pinned across spotify_client / itunes_client /
    deezer_client / hydrabase_client. The wishlist downstream expects
    Spotify's native shape (``artists`` as ``[{'name': ...}]``), so
    this helper does the conversion in one place.

    Spotify's ``raw_data`` field is already in wishlist shape (the
    raw Spotify API response), so we return it as-is when detected,
    preserving full ``album.images`` and ``external_urls`` that the
    enhanced top-level fields drop. Other sources' ``raw_data`` is
    in source-native shape and gets ignored.
    """
    if not enhanced:
        return None
    raw = enhanced.get('raw_data')
    if isinstance(raw, dict):
        raw_artists = raw.get('artists')
        if (isinstance(raw_artists, list) and raw_artists
                and isinstance(raw_artists[0], dict)):
            return raw

    artists = enhanced.get('artists') or [fallback_artist]
    album_data = enhanced.get('album') or {}
    album_artists = album_data.get('artists') or artists

    def _to_dict_artists(seq):
        return [a if isinstance(a, dict) else {'name': a} for a in seq]

    image_url = enhanced.get('image_url') or ''
    album_images_field = album_data.get('images')
    if isinstance(album_images_field, list) and album_images_field:
        album_images = album_images_field
    elif image_url:
        album_images = [{'url': image_url, 'height': 600, 'width': 600}]
    else:
        album_images = []

    return {
        'id': str(enhanced.get('id', '')),
        'name': enhanced.get('name') or fallback_title,
        'artists': _to_dict_artists(artists),
        'album': {
            'id': str(album_data.get('id', '')),
            'name': album_data.get('name') or fallback_album,
            'album_type': album_data.get('album_type', 'album'),
            'release_date': album_data.get('release_date', ''),
            'total_tracks': album_data.get('total_tracks', 1),
            'artists': _to_dict_artists(album_artists),
            'images': album_images,
        },
        'duration_ms': enhanced.get('duration_ms', 0),
        'track_number': enhanced.get('track_number', 1),
        'disc_number': enhanced.get('disc_number', 1),
        'popularity': enhanced.get('popularity', 0),
        'preview_url': enhanced.get('preview_url'),
        'external_urls': enhanced.get('external_urls', {}),
    }


def _try_direct_lookup_all_sources(track: dict,
                                    sources: list,
                                    preferred_source: Optional[str],
                                    title: str,
                                    artist_name: str,
                                    album_title: str
                                    ) -> tuple:
    """Try direct ID-based lookup on every source where the library
    track has a stored ID. Returns ``(payload, source_name)`` on first
    success, or ``(None, None)`` if no source has a stored ID with a
    successful lookup.

    Mirrors what Download Discography does — stable IDs straight to the
    source's API, no fuzzy text matching. Avoids the failure mode where
    library text tags don't match the source's canonical title (the
    Discord report case: track tagged "Title (Live)" and source has
    "Title" → fuzzy search misses, but stored ID resolves directly).

    Preferred source attempted first when present in ``sources``,
    typically the user's configured primary metadata source — so a
    Deezer-primary user gets Deezer art / album shape on the wishlist
    entry instead of whichever source happened to have a stored ID
    first in iteration order.
    """
    def _priority(entry):
        name = entry[0]
        return 0 if name == preferred_source else 1
    ordered = sorted(sources, key=_priority)

    for source_name, client in ordered:
        column = _STORED_ID_COLUMNS.get(source_name)
        if not column:
            continue
        stored_id = track.get(column)
        if not stored_id:
            continue
        if not hasattr(client, 'get_track_details'):
            continue
        try:
            enhanced = client.get_track_details(str(stored_id))
        except Exception as exc:
            logger.error(
                f"[Enhance] {source_name} direct lookup failed for "
                f"ID {stored_id}: {exc}"
            )
            continue
        if not enhanced:
            continue
        payload = _enhanced_to_wishlist_payload(
            enhanced, title, artist_name, album_title,
        )
        if _has_complete_metadata(payload):
            logger.info(
                f"[Enhance] Direct lookup matched: {source_name} "
                f"ID {stored_id} → '{payload.get('name')}'"
            )
            return payload, source_name

    return None, None


# Minimum match-score threshold for accepting a search-fallback match
# without user confirmation. Mirrors the legacy threshold the enhance
# flow has always used.
_AUTO_ACCEPT_SCORE_THRESHOLD = 0.7


def enhance_artist_quality(artist_id, track_ids, deps: ArtistQualityDeps):
    """Add selected tracks to wishlist for quality enhancement re-download.

    Per-track flow:

    1. **Direct lookup using stored source IDs** (mirrors what Download
       Discography does — stable IDs straight to the source's API, no
       fuzzy text matching). For each source the user has configured,
       if the library track has the corresponding stored ID
       (``spotify_track_id`` / ``deezer_id`` / ``itunes_track_id`` /
       ``soul_id``), call ``client.get_track_details(stored_id)`` and
       convert to wishlist payload. First success wins; preferred
       source (user's configured primary) tried first.

    2. **Multi-source parallel text search fallback** (via the shared
       ``core.metadata.multi_source_search`` module — same code path
       Track Redownload uses) for tracks with no stored IDs / lookup
       misses.

    3. **Validation**: reject matches with empty title / album / artists
       so the user sees a clear failure instead of an "unknown artist"
       wishlist entry.

    Pre-refactor: only Spotify had a direct-lookup fast path; everything
    else went through fuzzy text search. Discogs / Hydrabase / Deezer-
    primary users got far worse coverage than Download Discography
    despite both flows asking the same question.
    """
    from core.metadata.multi_source_search import TrackQuery, search_all_sources
    from core.metadata.registry import get_primary_source

    try:
        if not track_ids:
            return {"success": False, "error": "No track IDs provided"}, 400

        database = deps.get_database()
        wishlist_service = deps.get_wishlist_service()
        profile_id = deps.get_current_profile_id()

        # Get artist info
        artist_result = database.get_artist_full_detail(artist_id)
        if not artist_result.get('success'):
            return {"success": False, "error": "Artist not found"}, 404

        artist_name = artist_result.get('artist', {}).get('name', 'Unknown Artist')

        # Build lookup of all tracks for this artist
        track_lookup = {}
        for album in artist_result.get('albums', []):
            album_title = album.get('title', '')
            for track in album.get('tracks', []):
                tid = str(track.get('id', ''))
                track['_album_title'] = album_title
                track['_album_id'] = album.get('id')
                track_lookup[tid] = track

        # Resolve every configured metadata source up front.
        search_sources = deps.get_metadata_search_sources()

        # User's configured primary source — direct-lookup tries this
        # first so Deezer-primary users get Deezer payloads on the
        # wishlist entry (correct cover art / album shape) even when
        # other sources also have stored IDs for the same track.
        try:
            preferred_source = get_primary_source()
        except Exception:
            preferred_source = None

        enhanced_count = 0
        failed_count = 0
        failed_tracks = []

        for track_id in track_ids:
            track_id_str = str(track_id)
            track = track_lookup.get(track_id_str)
            if not track:
                failed_count += 1
                failed_tracks.append({'track_id': track_id, 'reason': 'Track not found'})
                continue

            file_path = track.get('file_path')
            if not file_path:
                failed_count += 1
                failed_tracks.append({'track_id': track_id, 'reason': 'No file path'})
                continue

            tier_name, tier_num = deps.get_quality_tier_from_extension(file_path)
            title = track.get('title', '') or ''
            if not title.strip():
                title = os.path.splitext(os.path.basename(file_path))[0]
            album_title = track.get('_album_title', '')

            matched_track_data = None
            chosen_source = None

            # 1. Direct lookup via every stored source ID — like Download
            # Discography. Stable IDs, no fuzzy text matching.
            if search_sources:
                matched_track_data, chosen_source = _try_direct_lookup_all_sources(
                    track, search_sources, preferred_source,
                    title, artist_name, album_title,
                )

            # 2. Multi-source parallel text search fallback — for tracks
            # with no stored IDs / lookup misses.
            if not matched_track_data and search_sources:
                try:
                    track_query = TrackQuery(
                        title=title,
                        artist=artist_name,
                        album=album_title,
                        duration_ms=track.get('duration', 0) or 0,
                        spotify_track_id=track.get('spotify_track_id'),
                        deezer_id=track.get('deezer_id'),
                    )
                    multi_result = search_all_sources(track_query, search_sources)
                    if multi_result.best_match and multi_result.best_match['score'] >= _AUTO_ACCEPT_SCORE_THRESHOLD:
                        chosen_source = multi_result.best_match['source']
                        best_track_obj = multi_result.best_track()
                        if best_track_obj:
                            matched_track_data = _build_payload_from_track(best_track_obj)
                except Exception as exc:
                    logger.error(f"[Enhance] Multi-source search failed for {title}: {exc}")

            # 3. Reject matches with empty / missing core fields.
            if not _has_complete_metadata(matched_track_data):
                if matched_track_data:
                    logger.warning(
                        f"[Enhance] {chosen_source} match for '{title}' rejected — "
                        f"empty title / album / artists (would render as 'unknown')"
                    )
                matched_track_data = None

            if not matched_track_data:
                failed_count += 1
                source_list = ', '.join(name for name, _ in (search_sources or []))
                if not source_list:
                    reason = (
                        'No metadata source configured — connect Spotify / '
                        'iTunes / Deezer / Discogs / Hydrabase to enable enhance'
                    )
                else:
                    reason = (
                        f'No usable match across {source_list} — '
                        f'try connecting an additional metadata source'
                    )
                failed_tracks.append({
                    'track_id': track_id,
                    'title': title,
                    'reason': reason,
                })
                continue

            # Add to wishlist with enhance source
            source_context = {
                'enhance': True,
                'original_file_path': file_path,
                'original_format': tier_name,
                'original_bitrate': track.get('bitrate'),
                'original_tier': tier_num,
                'artist_name': artist_name,
            }

            success = wishlist_service.add_spotify_track_to_wishlist(
                spotify_track_data=matched_track_data,
                failure_reason=f"Quality enhance - upgrading from {tier_name.replace('_', ' ').title()}",
                source_type='enhance',
                source_context=source_context,
                profile_id=profile_id
            )

            if success:
                enhanced_count += 1
                logger.info(f"[Enhance] Queued for upgrade: {artist_name} - {title} ({tier_name})")
            else:
                failed_count += 1
                failed_tracks.append({'track_id': track_id, 'title': title, 'reason': 'Wishlist add failed'})

        return {
            'success': True,
            'enhanced_count': enhanced_count,
            'failed_count': failed_count,
            'failed_tracks': failed_tracks
        }, 200
    except Exception as e:
        logger.error(f"[Enhance] {e}")
        import traceback
        traceback.print_exc()
        return {"success": False, "error": str(e)}, 500