From 1051ef240291a5dcbf5fae3c8805ae23570b5901 Mon Sep 17 00:00:00 2001 From: BoulderBadgeDad Date: Sun, 7 Jun 2026 17:27:52 -0700 Subject: [PATCH] Lyrics: add a "Lyrics Filler" maintenance job + lyrics option in the Re-tag tool (Sokhi) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lyrics sibling of the Cover Art Filler, plus retag integration — reusing the existing LyricsClient (LRClib) the import pipeline already uses. - lyrics_client: extracted the LRClib fetch (exact-match-with-duration → search fallback) into a shared _fetch_remote_lyrics, used by both create_lrc_file (unchanged behavior) and a new check-only has_remote_lyrics. - MissingLyricsJob (core/repair_jobs/missing_lyrics.py): scans tracks with no .lrc sidecar and — Option A — only flags ones LRClib actually has lyrics for, so instrumentals/interludes are never surfaced or re-flagged. Registered in the job list; default OFF; respects the lrclib_enabled toggle. - _fix_missing_lyrics (repair_worker): applies a finding by fetching + writing the .lrc and embedding lyrics via create_lrc_file. - Re-tag tool: new 'lyrics' setting ('fetch'|'skip', default skip). When 'fetch', apply_track_plans now also fetches/refreshes the .lrc per track (fetch-if-missing, re-embed-if-exists) — threaded through scan gates, finding details, the auto-apply path, and the manual fix handler. Settings UI auto-renders the dropdown from setting_options; no markup needed. - Frontend: type/action/result labels for missing_lyrics + a finding detail render case. Tests: 12 — has_remote_lyrics truth table, sidecar detection, scan (only- fixable / skip-existing / lrclib-disabled), the apply handler, and retag lyrics_action on/off. 694 repair/lyrics/cover/retag tests pass. --- core/lyrics_client.py | 71 +++++----- core/repair_jobs/__init__.py | 1 + core/repair_jobs/library_retag.py | 82 +++++++++--- core/repair_jobs/missing_lyrics.py | 192 +++++++++++++++++++++++++++ core/repair_worker.py | 37 +++++- tests/test_library_retag_job.py | 3 +- tests/test_missing_lyrics_job.py | 204 +++++++++++++++++++++++++++++ webui/static/enrichment.js | 9 ++ 8 files changed, 543 insertions(+), 56 deletions(-) create mode 100644 core/repair_jobs/missing_lyrics.py create mode 100644 tests/test_missing_lyrics_job.py diff --git a/core/lyrics_client.py b/core/lyrics_client.py index eb49f6fa..d94ee67f 100644 --- a/core/lyrics_client.py +++ b/core/lyrics_client.py @@ -28,6 +28,44 @@ class LyricsClient: logger.error(f"Error initializing LRClib API: {e}") self.api = None + def _fetch_remote_lyrics(self, track_name: str, artist_name: str, + album_name: str = None, duration_seconds: int = None): + """LRClib fetch — exact match (with duration) then search fallback. + Returns the lyrics_data object or None. Shared by create_lrc_file and + has_remote_lyrics so the fetch strategy lives in one place.""" + if not self.api: + return None + lyrics_data = None + # Strategy 1: Exact match with duration (most accurate) + if duration_seconds and album_name: + try: + lyrics_data = self.api.get_lyrics( + track_name=track_name, artist_name=artist_name, + album_name=album_name, duration=duration_seconds) + except Exception as e: + logger.debug(f"Exact match failed: {e}") + # Strategy 2: Search without duration + if not lyrics_data: + try: + search_results = self.api.search_lyrics( + track_name=track_name, artist_name=artist_name) + if search_results: + lyrics_data = search_results[0] + except Exception as e: + logger.debug(f"Search fallback failed: {e}") + return lyrics_data + + def has_remote_lyrics(self, track_name: str, artist_name: str, + album_name: str = None, duration_seconds: int = None) -> bool: + """True if LRClib has (synced OR plain) lyrics for this track, without + writing anything. Powers the Missing Lyrics maintenance job's scan so + it only surfaces tracks that are actually fixable (instrumentals return + nothing → never flagged).""" + data = self._fetch_remote_lyrics(track_name, artist_name, album_name, duration_seconds) + if not data: + return False + return bool(getattr(data, 'synced_lyrics', None) or getattr(data, 'plain_lyrics', None)) + def create_lrc_file(self, audio_file_path: str, track_name: str, artist_name: str, album_name: str = None, duration_seconds: int = None) -> bool: """ @@ -79,37 +117,8 @@ class LyricsClient: # Fetch lyrics from LRClib logger.debug(f"Fetching lyrics for: {artist_name} - {track_name}") - - lyrics_data = None - - # Strategy 1: Exact match with duration (most accurate) - if duration_seconds and album_name: - try: - logger.debug(f"Trying exact match: {track_name} by {artist_name} from {album_name} ({duration_seconds}s)") - lyrics_data = self.api.get_lyrics( - track_name=track_name, - artist_name=artist_name, - album_name=album_name, - duration=duration_seconds - ) - if lyrics_data: - logger.debug("Exact match found!") - except Exception as e: - logger.debug(f"Exact match failed: {e}") - - # Strategy 2: Search without duration - if not lyrics_data: - try: - logger.debug(f"Trying search: {track_name} by {artist_name}") - search_results = self.api.search_lyrics( - track_name=track_name, - artist_name=artist_name - ) - if search_results: - lyrics_data = search_results[0] # Take first result - logger.debug(f"Search found {len(search_results)} results, using first") - except Exception as e: - logger.debug(f"Search fallback failed: {e}") + lyrics_data = self._fetch_remote_lyrics( + track_name, artist_name, album_name, duration_seconds) # No lyrics found if not lyrics_data: diff --git a/core/repair_jobs/__init__.py b/core/repair_jobs/__init__.py index 57996608..137548ac 100644 --- a/core/repair_jobs/__init__.py +++ b/core/repair_jobs/__init__.py @@ -33,6 +33,7 @@ _JOB_MODULES = [ 'core.repair_jobs.duplicate_detector', 'core.repair_jobs.acoustid_scanner', 'core.repair_jobs.missing_cover_art', + 'core.repair_jobs.missing_lyrics', 'core.repair_jobs.metadata_gap_filler', 'core.repair_jobs.album_completeness', 'core.repair_jobs.fake_lossless_detector', diff --git a/core/repair_jobs/library_retag.py b/core/repair_jobs/library_retag.py index 1617943e..1aa497e7 100644 --- a/core/repair_jobs/library_retag.py +++ b/core/repair_jobs/library_retag.py @@ -72,14 +72,19 @@ def _run_full_enrich(file_path, full_meta) -> bool: return False -def apply_track_plans(track_plans, cover_action=None, cover_url=None, full=False) -> dict: - """Write each plan's tags in place (+ optionally embed/refresh cover art), - reusing tag_writer.write_tags_to_file. ``file_path`` on each plan must be a - real, reachable path (caller resolves Docker paths). Shared by the dry-run= - False auto-apply and the repair_worker fix handler. Never raises. - """ +def apply_track_plans(track_plans, cover_action=None, cover_url=None, full=False, + lyrics_action=False) -> dict: + """Write each plan's tags in place (+ optionally embed/refresh cover art, + + optionally fetch/refresh .lrc lyrics), reusing tag_writer.write_tags_to_file. + ``file_path`` on each plan must be a real, reachable path (caller resolves + Docker paths). Shared by the dry-run=False auto-apply and the repair_worker + fix handler. Never raises. + + ``lyrics_action`` (Sokhi): when True, after a track's tags are written, fetch + + write its .lrc and embed the lyrics — the same LyricsClient the import + pipeline uses (fetch if missing, re-embed if a sidecar already exists).""" import os as _os - result = {'written': 0, 'failed': 0, 'skipped': 0, 'cover_written': False} + result = {'written': 0, 'failed': 0, 'skipped': 0, 'cover_written': False, 'lyrics_written': 0} embed_cover = bool(cover_action and cover_url) cover_data = None if embed_cover: @@ -90,12 +95,20 @@ def apply_track_plans(track_plans, cover_action=None, cover_url=None, full=False logger.debug("retag cover download failed: %s", e) embed_cover = embed_cover and cover_data is not None + _lyrics_client = None + if lyrics_action: + try: + from core.lyrics_client import lyrics_client as _lyrics_client + except Exception as e: + logger.debug("retag lyrics client unavailable: %s", e) + _lyrics_client = None + from core.tag_writer import write_tags_to_file last_dir = None for tp in track_plans or []: fp = tp.get('file_path') db_data = tp.get('db_data') or {} - if not fp or not _os.path.isfile(fp) or (not db_data and not embed_cover): + if not fp or not _os.path.isfile(fp) or (not db_data and not embed_cover and not _lyrics_client): result['skipped'] += 1 continue try: @@ -111,6 +124,22 @@ def apply_track_plans(track_plans, cover_action=None, cover_url=None, full=False logger.warning("retag write failed for %s: %s", fp, e) result['failed'] += 1 + # Lyrics: fetch/refresh the .lrc for this track (independent of tag write + # success — a track with no tag changes may still be missing lyrics). + if _lyrics_client: + try: + dur = db_data.get('duration') or db_data.get('duration_seconds') + if lyrics_client_wrote := _lyrics_client.create_lrc_file( + fp, + db_data.get('title') or db_data.get('track_title') or '', + db_data.get('artist') or db_data.get('artist_name') or '', + album_name=db_data.get('album') or db_data.get('album_title'), + duration_seconds=int(dur) if dur else None, + ): + result['lyrics_written'] += 1 + except Exception as e: + logger.debug("retag lyrics fetch failed for %s: %s", fp, e) + if cover_action and cover_data and last_dir: try: cover_path = _os.path.join(last_dir, 'cover.jpg') @@ -224,12 +253,14 @@ class LibraryRetagJob(RepairJob): 'depth': 'light', 'mode': MODE_OVERWRITE, 'cover_art': 'replace', + 'lyrics': 'skip', 'source': 'auto', } setting_options = { 'depth': ['light', 'full'], 'mode': [MODE_OVERWRITE, MODE_FILL_MISSING], 'cover_art': ['replace', 'fill_missing', 'skip'], + 'lyrics': ['fetch', 'skip'], 'source': ['auto', 'spotify', 'itunes', 'deezer', 'musicbrainz'], } auto_fix = True @@ -252,6 +283,7 @@ class LibraryRetagJob(RepairJob): settings = self._get_settings(context) mode = settings.get('mode', MODE_OVERWRITE) cover_mode = settings.get('cover_art', 'replace') + lyrics_action = (settings.get('lyrics', 'skip') or 'skip').lower() == 'fetch' dry_run = settings.get('dry_run', True) depth = settings.get('depth', 'light') source_order = self._source_order(settings) @@ -301,7 +333,8 @@ class LibraryRetagJob(RepairJob): try: self._scan_album(context, result, album_id, album_title, artist_name, - source, album_source_id, mode, cover_mode, dry_run, depth) + source, album_source_id, mode, cover_mode, dry_run, depth, + lyrics_action=lyrics_action) except Exception as e: logger.debug("Library re-tag: album %s failed: %s", album_id, e) result.errors += 1 @@ -313,7 +346,8 @@ class LibraryRetagJob(RepairJob): return result def _scan_album(self, context, result, album_id, album_title, artist_name, - source, album_source_id, mode, cover_mode, dry_run=True, depth='light'): + source, album_source_id, mode, cover_mode, dry_run=True, depth='light', + lyrics_action=False): # Local tracks for this album. with context.db._get_connection() as conn: cur = conn.cursor() @@ -389,23 +423,27 @@ class LibraryRetagJob(RepairJob): continue # genuinely unreachable from this process if src is None: unmatched.append(lib['title'] or os.path.basename(lib['file_path'])) - # No source match means no re-tag — but album cover art still - # applies to the file, so cover modes include an art-only plan - # (empty db_data: apply embeds art and writes no tags). - if cover_action: + # No source match means no re-tag — but album cover art and/or + # lyrics still apply to the file, so those modes include an + # art/lyrics-only plan (empty db_data → apply writes no tags). + if cover_action or lyrics_action: track_plans.append({ 'file_path': rp, 'track_id': lib['id'], 'title': lib['title'], 'changes': [], - 'db_data': {}, + # Carry the library title/artist so lyrics fetch has a query + # even when there's no source match to build db_data from. + 'db_data': ({'title': lib.get('title'), 'artist': artist_name} + if lyrics_action else {}), }) continue current = _read_current_tags(rp) plan = plan_track(current, src, album_meta, mode=mode) - # Include a track when its tags change, OR when there's a cover action - # to apply to it (db_data may be empty — apply embeds art either way). - if plan['changes'] or cover_action: + # Include a track when its tags change, OR there's a cover action, + # OR lyrics are being fetched (db_data may be empty — apply still + # embeds art / writes the .lrc). + if plan['changes'] or cover_action or lyrics_action: db_data = plan['db_data'] _add_source_ids(db_data, source, album_source_id, src) tp = { @@ -421,7 +459,7 @@ class LibraryRetagJob(RepairJob): track_plans.append(tp) tag_change_tracks = sum(1 for tp in track_plans if tp['changes']) - if (not tag_change_tracks and not cover_action) or not track_plans: + if (not tag_change_tracks and not cover_action and not lyrics_action) or not track_plans: # Nothing actionable. The second clause covers cover-action albums # where no track is reachable/included — creating a finding there # gives an unappliable "(0 track(s))" entry. @@ -435,8 +473,9 @@ class LibraryRetagJob(RepairJob): # Not dry-run: apply the tags in place now (the track paths were already # isfile-checked above) and count it as an auto-fix — no finding. if not dry_run: - res = apply_track_plans(track_plans, cover_action, cover_url, full=(depth == 'full')) - if res['written'] or res['cover_written']: + res = apply_track_plans(track_plans, cover_action, cover_url, full=(depth == 'full'), + lyrics_action=lyrics_action) + if res['written'] or res['cover_written'] or res.get('lyrics_written'): result.auto_fixed += 1 else: result.errors += 1 @@ -483,6 +522,7 @@ class LibraryRetagJob(RepairJob): 'cover_mode': cover_mode, 'cover_url': cover_url, 'cover_action': cover_action, + 'lyrics_action': lyrics_action, 'tracks': track_plans, # each carries its db_data for a deterministic apply 'unmatched': unmatched, }, diff --git a/core/repair_jobs/missing_lyrics.py b/core/repair_jobs/missing_lyrics.py new file mode 100644 index 00000000..4cf02a93 --- /dev/null +++ b/core/repair_jobs/missing_lyrics.py @@ -0,0 +1,192 @@ +"""Missing Lyrics maintenance job (Sokhi) — the lyrics sibling of the Cover +Art Filler. + +Scans the library for tracks that have no ``.lrc`` sidecar, asks LRClib +whether lyrics actually exist for them (so instrumentals/interludes that +genuinely have no lyrics are never flagged — Option A), and creates a finding +for each fixable track. Applying a finding fetches + writes the ``.lrc`` and +embeds the lyrics, reusing the same LyricsClient the import pipeline uses. + +Mirrors MissingCoverArtJob's "only surface actionable findings" design. +""" + +from __future__ import annotations + +import os + +from core.repair_jobs import register_job +from core.repair_jobs.base import JobContext, JobResult, RepairJob +from utils.logging_config import get_logger + +logger = get_logger("repair_jobs.missing_lyrics") + + +def _has_lrc_sidecar(file_path: str) -> bool: + """True if a .lrc (or .txt lyrics) sidecar already sits next to the file.""" + if not file_path: + return False + base = os.path.splitext(file_path)[0] + return os.path.exists(base + '.lrc') or os.path.exists(base + '.txt') + + +@register_job +class MissingLyricsJob(RepairJob): + job_id = 'missing_lyrics' + display_name = 'Lyrics Filler' + description = 'Finds tracks with no .lrc lyrics and fetches synced lyrics from LRClib' + help_text = ( + 'Scans your library for tracks that have no .lrc lyrics file next to them. ' + 'For each one it asks LRClib whether lyrics actually exist — tracks with no ' + 'lyrics available (instrumentals, interludes) are skipped, so only fixable ' + 'tracks are surfaced.\n\n' + 'When lyrics are found, a finding is created so you can review and apply it. ' + 'Applying writes a synced .lrc sidecar (or plain text if no synced version ' + 'exists) and embeds the lyrics in the file — the same way the import pipeline ' + 'and the Library Re-tag tool do.\n\n' + 'Requires LRClib to be enabled (Settings > Metadata Enhancement).' + ) + icon = 'repair-icon-lyrics' + default_enabled = False + default_interval_hours = 48 + default_settings = {} + auto_fix = False + + def scan(self, context: JobContext) -> JobResult: + result = JobResult() + + # Respect the same LRClib master toggle the import pipeline uses. + if context.config_manager and context.config_manager.get( + 'metadata_enhancement.lrclib_enabled', True) is False: + logger.info("[Lyrics Filler] LRClib disabled in settings — skipping scan") + return result + + try: + from core.lyrics_client import lyrics_client + except Exception as e: + logger.warning("[Lyrics Filler] lyrics client unavailable: %s", e) + return result + if not getattr(lyrics_client, 'api', None): + logger.info("[Lyrics Filler] LRClib API not available — skipping scan") + return result + + rows = [] + conn = None + try: + conn = context.db._get_connection() + cursor = conn.cursor() + cursor.execute(""" + SELECT t.id, t.title, ar.name, al.title, t.file_path, t.duration + FROM tracks t + LEFT JOIN albums al ON al.id = t.album_id + LEFT JOIN artists ar ON ar.id = t.artist_id + WHERE t.file_path IS NOT NULL AND t.file_path != '' + AND t.title IS NOT NULL AND t.title != '' + """) + rows = cursor.fetchall() + except Exception as e: + logger.error("[Lyrics Filler] Error reading tracks: %s", e, exc_info=True) + result.errors += 1 + return result + finally: + if conn: + conn.close() + + total = len(rows) + if context.update_progress: + context.update_progress(0, total) + if context.report_progress: + context.report_progress(phase=f'Checking lyrics for {total} tracks...', total=total) + + for i, row in enumerate(rows): + if context.check_stop(): + return result + if i % 10 == 0 and context.wait_if_paused(): + return result + + track_id, title, artist_name, album_title, file_path, duration = row[:6] + result.scanned += 1 + + # Already has a sidecar on disk → nothing to do. + if _has_lrc_sidecar(file_path): + result.skipped += 1 + continue + + # Option A: only flag tracks LRClib actually has lyrics for. An + # instrumental returns nothing here and is silently skipped (never + # re-flagged on future scans). + try: + duration_s = int(duration) if duration else None + except (TypeError, ValueError): + duration_s = None + try: + available = lyrics_client.has_remote_lyrics( + title, artist_name or '', album_title, duration_s) + except Exception as e: + logger.debug("[Lyrics Filler] availability check failed for '%s': %s", title, e) + available = False + + if not available: + result.skipped += 1 + if context.update_progress and (i + 1) % 10 == 0: + context.update_progress(i + 1, total) + continue + + if context.report_progress: + context.report_progress( + scanned=i + 1, total=total, + log_line=f'Found lyrics: {title} — {artist_name or "Unknown"}', + log_type='success') + + if context.create_finding: + try: + inserted = context.create_finding( + job_id=self.job_id, + finding_type='missing_lyrics', + severity='info', + entity_type='track', + entity_id=str(track_id), + file_path=file_path, + title=f'Missing lyrics: {title or "Unknown"}', + description=f'"{title}" by {artist_name or "Unknown"} has no .lrc — lyrics found on LRClib.', + details={ + 'track_id': track_id, + 'track_title': title, + 'artist': artist_name, + 'album_title': album_title, + 'file_path': file_path, + 'duration': duration_s, + }) + if inserted: + result.findings_created += 1 + else: + result.findings_skipped_dedup += 1 + except Exception as e: + logger.debug("[Lyrics Filler] create finding failed for track %s: %s", track_id, e) + result.errors += 1 + + if context.update_progress and (i + 1) % 5 == 0: + context.update_progress(i + 1, total) + + if context.update_progress: + context.update_progress(total, total) + logger.info("[Lyrics Filler] %d tracks checked, %d with lyrics found, %d skipped", + result.scanned, result.findings_created, result.skipped) + return result + + def estimate_scope(self, context: JobContext) -> int: + conn = None + try: + conn = context.db._get_connection() + cursor = conn.cursor() + cursor.execute(""" + SELECT COUNT(*) FROM tracks + WHERE file_path IS NOT NULL AND file_path != '' + AND title IS NOT NULL AND title != '' + """) + row = cursor.fetchone() + return row[0] if row else 0 + except Exception: + return 0 + finally: + if conn: + conn.close() diff --git a/core/repair_worker.py b/core/repair_worker.py index c6993d98..4f5392eb 100644 --- a/core/repair_worker.py +++ b/core/repair_worker.py @@ -963,6 +963,7 @@ class RepairWorker: 'orphan_file': self._fix_orphan_file, 'track_number_mismatch': self._fix_track_number, 'missing_cover_art': self._fix_missing_cover_art, + 'missing_lyrics': self._fix_missing_lyrics, 'metadata_gap': self._fix_metadata_gap, 'duplicate_tracks': self._fix_duplicates, 'single_album_redundant': self._fix_single_album_redundant, @@ -1414,6 +1415,35 @@ class RepairWorker: msg += ' + applied artist image' return {'success': True, 'action': 'applied_cover_art', 'message': msg, 'art_result': art_result} + def _fix_missing_lyrics(self, entity_type, entity_id, file_path, details): + """Apply a missing-lyrics finding: fetch + write the .lrc sidecar and + embed the lyrics, via the same LyricsClient the import pipeline uses.""" + raw_path = details.get('file_path') or file_path + if not raw_path: + return {'success': False, 'error': 'No file path in finding'} + download_folder = self._config_manager.get('soulseek.download_path', '') if self._config_manager else None + resolved = _resolve_file_path(raw_path, self.transfer_folder, download_folder, + config_manager=self._config_manager) or raw_path + if not os.path.isfile(resolved): + return {'success': False, 'error': f'File not found on disk: {os.path.basename(raw_path)}'} + try: + from core.lyrics_client import lyrics_client + duration = details.get('duration') + ok = lyrics_client.create_lrc_file( + resolved, + details.get('track_title') or '', + details.get('artist') or '', + album_name=details.get('album_title'), + duration_seconds=int(duration) if duration else None, + ) + except Exception as e: + logger.error("Lyrics fix failed for %s: %s", os.path.basename(raw_path), e) + return {'success': False, 'error': str(e)} + if not ok: + # Lyrics vanished between scan and apply (rare) — report, don't crash. + return {'success': False, 'error': 'Could not fetch lyrics (no longer available?)'} + return {'success': True, 'action': 'applied_lyrics', 'message': 'Wrote lyrics (.lrc) + embedded'} + def _fix_library_retag(self, entity_type, entity_id, file_path, details): """Apply a library re-tag finding: write each track's planned tags in place (core.tag_writer.write_tags_to_file) + optionally embed/refresh @@ -1439,9 +1469,10 @@ class RepairWorker: from core.repair_jobs.library_retag import apply_track_plans res = apply_track_plans(resolved_plans, details.get('cover_action'), details.get('cover_url'), - full=(details.get('depth') == 'full')) + full=(details.get('depth') == 'full'), + lyrics_action=details.get('lyrics_action', False)) - if res['written'] == 0 and not res['cover_written']: + if res['written'] == 0 and not res['cover_written'] and not res.get('lyrics_written'): return {'success': False, 'error': 'Nothing could be written — files unreachable or read-only?'} msg = f"Re-tagged {res['written']} track(s)" @@ -3145,7 +3176,7 @@ class RepairWorker: # Build query for pending fixable findings fixable_types = ('dead_file', 'orphan_file', 'track_number_mismatch', - 'missing_cover_art', 'metadata_gap', 'duplicate_tracks', + 'missing_cover_art', 'missing_lyrics', 'metadata_gap', 'duplicate_tracks', 'single_album_redundant', 'mbid_mismatch', 'album_mbid_mismatch', 'album_tag_inconsistency', diff --git a/tests/test_library_retag_job.py b/tests/test_library_retag_job.py index 9c45ef74..66f4c924 100644 --- a/tests/test_library_retag_job.py +++ b/tests/test_library_retag_job.py @@ -392,4 +392,5 @@ def test_apply_art_only_plan_skips_when_cover_download_fails(tmp_path, monkeypat cover_action='replace', cover_url='http://art/cover.jpg', ) - assert res == {'written': 0, 'failed': 0, 'skipped': 1, 'cover_written': False} + assert res == {'written': 0, 'failed': 0, 'skipped': 1, 'cover_written': False, + 'lyrics_written': 0} diff --git a/tests/test_missing_lyrics_job.py b/tests/test_missing_lyrics_job.py new file mode 100644 index 00000000..a3b1db92 --- /dev/null +++ b/tests/test_missing_lyrics_job.py @@ -0,0 +1,204 @@ +"""Missing Lyrics maintenance job + lyrics_client check-only seam (Sokhi). + +Mirrors the Cover Art Filler: scan only flags tracks LRClib actually has +lyrics for (Option A — instrumentals never flagged), and applying writes the +.lrc via the shared LyricsClient. +""" + +from __future__ import annotations + +import os +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from core.lyrics_client import LyricsClient +from core.repair_jobs.missing_lyrics import MissingLyricsJob, _has_lrc_sidecar + + +# ── lyrics_client.has_remote_lyrics (check-only seam) ──────────────────────── + +def _client_with_api(api): + c = LyricsClient.__new__(LyricsClient) + c.api = api + return c + + +def test_has_remote_lyrics_true_when_synced(): + api = MagicMock() + api.get_lyrics.return_value = SimpleNamespace(synced_lyrics="[00:01]hi", plain_lyrics=None) + c = _client_with_api(api) + assert c.has_remote_lyrics("Song", "Artist", "Album", 200) is True + + +def test_has_remote_lyrics_true_when_plain_only_via_search(): + api = MagicMock() + api.get_lyrics.return_value = None + api.search_lyrics.return_value = [SimpleNamespace(synced_lyrics=None, plain_lyrics="words")] + c = _client_with_api(api) + assert c.has_remote_lyrics("Song", "Artist") is True + + +def test_has_remote_lyrics_false_when_none(): + api = MagicMock() + api.get_lyrics.return_value = None + api.search_lyrics.return_value = [] + assert _client_with_api(api).has_remote_lyrics("Instrumental", "Artist") is False + + +def test_has_remote_lyrics_false_when_no_api(): + c = LyricsClient.__new__(LyricsClient) + c.api = None + assert c.has_remote_lyrics("Song", "Artist") is False + + +# ── sidecar detection ──────────────────────────────────────────────────────── + +def test_has_lrc_sidecar(tmp_path): + audio = tmp_path / "track.flac" + audio.write_bytes(b"x") + assert _has_lrc_sidecar(str(audio)) is False + (tmp_path / "track.lrc").write_text("[00:01]hi") + assert _has_lrc_sidecar(str(audio)) is True + + +# ── the scan (Option A: only flag fixable tracks) ──────────────────────────── + +class _DB: + def __init__(self, rows): + self._rows = rows + + def _get_connection(self): + cur = MagicMock() + cur.execute.return_value = None + cur.fetchone.return_value = [len(self._rows)] + cur.fetchall.return_value = self._rows + conn = MagicMock() + conn.cursor.return_value = cur + return conn + + +def _ctx(db, findings): + return SimpleNamespace( + db=db, + config_manager=SimpleNamespace(get=lambda k, d=None: d), + check_stop=lambda: False, wait_if_paused=lambda: False, + update_progress=lambda *a, **k: None, report_progress=lambda *a, **k: None, + create_finding=lambda **kw: (findings.append(kw) or True), + ) + + +def test_scan_flags_only_tracks_with_available_lyrics(tmp_path, monkeypatch): + # Two tracks, neither has a .lrc. LRClib has lyrics for the first, not the second. + t1 = tmp_path / "song.flac"; t1.write_bytes(b"x") + t2 = tmp_path / "instrumental.flac"; t2.write_bytes(b"x") + rows = [ + (1, "Song", "Artist", "Album", str(t1), 200), + (2, "Interlude", "Artist", "Album", str(t2), 60), + ] + fake_client = SimpleNamespace( + api=object(), + has_remote_lyrics=lambda title, artist, album, dur: title == "Song", + ) + monkeypatch.setattr("core.lyrics_client.lyrics_client", fake_client) + + findings = [] + result = MissingLyricsJob().scan(_ctx(_DB(rows), findings)) + + assert result.findings_created == 1 + assert findings[0]["entity_type"] == "track" + assert findings[0]["finding_type"] == "missing_lyrics" + assert findings[0]["details"]["track_title"] == "Song" # the instrumental was skipped + + +def test_scan_skips_tracks_that_already_have_lrc(tmp_path, monkeypatch): + t1 = tmp_path / "song.flac"; t1.write_bytes(b"x") + (tmp_path / "song.lrc").write_text("[00:01]hi") # already has lyrics + rows = [(1, "Song", "Artist", "Album", str(t1), 200)] + fake_client = SimpleNamespace(api=object(), + has_remote_lyrics=lambda *a, **k: True) + monkeypatch.setattr("core.lyrics_client.lyrics_client", fake_client) + + findings = [] + result = MissingLyricsJob().scan(_ctx(_DB(rows), findings)) + assert result.findings_created == 0 + assert findings == [] + + +def test_scan_noops_when_lrclib_disabled(monkeypatch): + db = _DB([(1, "Song", "Artist", "Album", "/x.flac", 200)]) + ctx = _ctx(db, []) + ctx.config_manager = SimpleNamespace( + get=lambda k, d=None: False if k == 'metadata_enhancement.lrclib_enabled' else d) + result = MissingLyricsJob().scan(ctx) + assert result.scanned == 0 and result.findings_created == 0 + + +# ── _fix_missing_lyrics apply handler ──────────────────────────────────────── + +def test_fix_missing_lyrics_calls_create_lrc(tmp_path, monkeypatch): + from core.repair_worker import RepairWorker + audio = tmp_path / "song.flac"; audio.write_bytes(b"x") + + w = RepairWorker.__new__(RepairWorker) + w.transfer_folder = str(tmp_path) + w._config_manager = SimpleNamespace(get=lambda k, d=None: d) + + calls = {} + fake_client = SimpleNamespace( + create_lrc_file=lambda path, title, artist, album_name=None, duration_seconds=None: + calls.update(path=path, title=title, artist=artist) or True) + monkeypatch.setattr("core.lyrics_client.lyrics_client", fake_client) + # _resolve_file_path: the file is already real, so identity is fine. + monkeypatch.setattr("core.repair_worker._resolve_file_path", + lambda raw, *a, **k: raw) + + res = w._fix_missing_lyrics("track", "1", None, { + "file_path": str(audio), "track_title": "Song", "artist": "Artist", + "album_title": "Album", "duration": 200}) + assert res["success"] is True and res["action"] == "applied_lyrics" + assert calls["title"] == "Song" and calls["path"] == str(audio) + + +def test_fix_missing_lyrics_missing_file(tmp_path, monkeypatch): + from core.repair_worker import RepairWorker + w = RepairWorker.__new__(RepairWorker) + w.transfer_folder = str(tmp_path) + w._config_manager = SimpleNamespace(get=lambda k, d=None: d) + monkeypatch.setattr("core.repair_worker._resolve_file_path", lambda raw, *a, **k: raw) + res = w._fix_missing_lyrics("track", "1", None, {"file_path": str(tmp_path / "gone.flac")}) + assert res["success"] is False + + +# ── retag apply_track_plans lyrics_action ──────────────────────────────────── + +def test_apply_track_plans_lyrics_action(tmp_path, monkeypatch): + from core.repair_jobs import library_retag + audio = tmp_path / "t.flac"; audio.write_bytes(b"x") + + monkeypatch.setattr(library_retag, "write_tags_to_file", + lambda *a, **k: {"success": True}, raising=False) + seen = {} + fake_client = SimpleNamespace( + create_lrc_file=lambda path, title, artist, album_name=None, duration_seconds=None: + seen.update(title=title) or True) + monkeypatch.setattr("core.lyrics_client.lyrics_client", fake_client) + + plans = [{"file_path": str(audio), "db_data": {"title": "Song", "artist": "Artist"}}] + res = library_retag.apply_track_plans(plans, lyrics_action=True) + assert res["lyrics_written"] == 1 and seen["title"] == "Song" + + +def test_apply_track_plans_no_lyrics_when_disabled(tmp_path, monkeypatch): + from core.repair_jobs import library_retag + audio = tmp_path / "t.flac"; audio.write_bytes(b"x") + monkeypatch.setattr(library_retag, "write_tags_to_file", + lambda *a, **k: {"success": True}, raising=False) + called = [] + fake_client = SimpleNamespace(create_lrc_file=lambda *a, **k: called.append(1) or True) + monkeypatch.setattr("core.lyrics_client.lyrics_client", fake_client) + + plans = [{"file_path": str(audio), "db_data": {"title": "Song"}}] + res = library_retag.apply_track_plans(plans, lyrics_action=False) + assert res["lyrics_written"] == 0 and called == [] diff --git a/webui/static/enrichment.js b/webui/static/enrichment.js index dc97dc25..c1d69bb5 100644 --- a/webui/static/enrichment.js +++ b/webui/static/enrichment.js @@ -2734,6 +2734,7 @@ async function loadRepairFindings() { duplicate_tracks: 'Duplicate', incomplete_album: 'Incomplete', path_mismatch: 'Path Mismatch', metadata_gap: 'Missing Metadata', missing_cover_art: 'Missing Art', track_number_mismatch: 'Track Number', + missing_lyrics: 'Missing Lyrics', missing_lossy_copy: 'No Lossy Copy', library_retag: 'Re-tag' }; @@ -2743,6 +2744,7 @@ async function loadRepairFindings() { orphan_file: 'Resolve', track_number_mismatch: 'Fix', missing_cover_art: 'Apply Art', + missing_lyrics: 'Apply Lyrics', metadata_gap: 'Apply', duplicate_tracks: 'Keep Best', incomplete_album: 'Auto-Fill', @@ -2759,6 +2761,7 @@ async function loadRepairFindings() { removed_db_entry: 'Entry Removed', added_to_wishlist: 'Wishlisted', deleted_file: 'File Deleted', already_gone: 'Already Gone', fixed_track_number: 'Track # Fixed', applied_cover_art: 'Art Applied', applied_metadata: 'Metadata Applied', + applied_lyrics: 'Lyrics Applied', removed_duplicates: 'Duplicates Removed', }; let statusBadge = ''; @@ -3127,6 +3130,12 @@ function _renderFindingDetail(f) { artHtml += _gridRows(rows); return artHtml; + case 'missing_lyrics': + if (d.track_title) rows.push(['Track', d.track_title]); + if (d.artist) rows.push(['Artist', d.artist]); + if (d.album_title) rows.push(['Album', d.album_title]); + return _gridRows(rows); + case 'track_number_mismatch': if (d.album_title) rows.push(['Album', d.album_title]); if (d.artist_name) rows.push(['Artist', d.artist_name]);