From 2fcdfd3145d415355b9202c406d8985fda8eb003 Mon Sep 17 00:00:00 2001 From: BoulderBadgeDad Date: Tue, 2 Jun 2026 14:10:02 -0700 Subject: [PATCH] Canonical findings: include as much (free) data as possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per request, pack each finding with everything available WITHOUT extra API calls (kettui: reuse what's already fetched, read the album row we already loaded, degrade per-field, keep it tested): - Pinned release's track titles — already fetched during scoring, so free (capped at 60 to bound details_json). - From the album row (free): year, DB track count, total duration, genres-free context, and the album's currently-linked source IDs. - file_track_titles (your library's titles) for a side-by-side with the release. - Artist + album thumbs (artist via the guarded lookup) and names. _describe_pin now renders: "Artist — Album (year)", the fit breakdown, "Currently linked: … → pinning X", "Beat: ", and the release tracklist — so the card is judge-able at a glance, and the structured fields are in details for a richer UI. NOT included (would cost an extra per-album API fetch, left as opt-in): the *release's* own year/type/cover/URL from get_album_for_source, vs the library's. Tests: _describe_pin rich-render (year/linked/tracklist), resolver release-titles, orchestration free-context fields. 94 canonical + reorganize regression pass. --- core/metadata/canonical_resolver.py | 14 +++++++++++ core/repair_jobs/canonical_version_resolve.py | 24 ++++++++++++++++++- tests/test_canonical_orchestration.py | 5 ++++ tests/test_canonical_version_job.py | 15 ++++++++++++ 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/core/metadata/canonical_resolver.py b/core/metadata/canonical_resolver.py index b521dcb1..8ce84d40 100644 --- a/core/metadata/canonical_resolver.py +++ b/core/metadata/canonical_resolver.py @@ -115,6 +115,11 @@ def resolve_canonical_for_album( return None detail = score_release_detail(file_tracks, winner['_tracks']) + # Pinned-release track titles — already fetched, so free. Capped so a giant + # box set can't bloat the finding's details_json. + release_titles = [ + (t.get('title') or t.get('name') or '') for t in winner['_tracks'] + ][:60] return { 'source': winner['source'], 'album_id': winner['album_id'], @@ -124,6 +129,7 @@ def resolve_canonical_for_album( 'count_fit': detail['count_fit'], 'duration_fit': detail['duration_fit'], 'title_fit': detail['title_fit'], + 'release_track_titles': release_titles, 'candidates': [ {'source': e['source'], 'album_id': e['album_id'], 'track_count': e['track_count'], 'score': e['score']} @@ -243,6 +249,14 @@ def resolve_and_store_canonical_for_album( # already loaded — no extra query). Storage only uses source/id/score. result['album_title'] = album_data.get('title') or '' result['artist_name'] = album_data.get('artist_name') or '' + # Free context off the album row + the data we already gathered. + if album_data.get('year'): + result['year'] = album_data['year'] + result['db_track_count'] = album_data.get('track_count') or len(file_tracks) + if album_data.get('duration'): + result['db_duration_ms'] = album_data['duration'] + result['linked_sources'] = source_ids # {source: album_id} the album points at now + result['file_track_titles'] = [ft.get('title') or '' for ft in file_tracks][:60] if album_data.get('thumb_url'): result['album_thumb_url'] = album_data['thumb_url'] # Artist thumb via a guarded lookup (not the shared album loader — some diff --git a/core/repair_jobs/canonical_version_resolve.py b/core/repair_jobs/canonical_version_resolve.py index 6c802040..ea7b91a3 100644 --- a/core/repair_jobs/canonical_version_resolve.py +++ b/core/repair_jobs/canonical_version_resolve.py @@ -28,7 +28,14 @@ def _pct(v) -> str: def _describe_pin(resolved: dict) -> str: """Human-readable, judge-able explanation of WHY this release was chosen.""" + artist = resolved.get('artist_name') or '' + album = resolved.get('album_title') or '' + head = f"{artist} — {album}".strip(" —") or resolved.get('album_id', '') + year = resolved.get('year') + if year: + head += f" ({year})" lines = [ + f"{head}" if head else "", f"Pin {resolved['source']} release {resolved['album_id']} " f"(confidence {_pct(resolved.get('score'))}).", f"Fit to your library: {resolved.get('file_track_count', '?')} files vs " @@ -37,6 +44,13 @@ def _describe_pin(resolved: dict) -> str: f"durations {_pct(resolved.get('duration_fit'))}, " f"titles {_pct(resolved.get('title_fit'))}.", ] + + # What the album is currently linked to vs what we'd pin. + linked = resolved.get('linked_sources') or {} + if linked: + linked_str = ", ".join(f"{s}={i}" for s, i in linked.items()) + lines.append(f"Currently linked: {linked_str} → pinning {resolved['source']}.") + others = [c for c in resolved.get('candidates', []) if c.get('source') != resolved.get('source')] if others: comp = ", ".join( @@ -45,7 +59,15 @@ def _describe_pin(resolved: dict) -> str: lines.append(f"Beat: {comp}.") elif len(resolved.get('candidates', [])) == 1: lines.append("Only this source had a release linked for this album.") - return "\n".join(lines) + + # Track listing of the pinned release (so you can eyeball the actual songs). + titles = resolved.get('release_track_titles') or [] + if titles: + shown = "; ".join(f"{i+1}. {t}" for i, t in enumerate(titles[:25])) + more = f" (+{len(titles) - 25} more)" if len(titles) > 25 else "" + lines.append(f"Release tracks: {shown}{more}") + + return "\n".join(l for l in lines if l) @register_job diff --git a/tests/test_canonical_orchestration.py b/tests/test_canonical_orchestration.py index 931302a3..ec933bb1 100644 --- a/tests/test_canonical_orchestration.py +++ b/tests/test_canonical_orchestration.py @@ -94,6 +94,11 @@ def test_result_includes_artist_and_album_context(tmp_path): assert out["artist_name"] == "Imagine Dragons" assert out["album_thumb_url"] == "http://album.jpg" assert out["artist_thumb_url"] == "http://artist.jpg" + # free context: db track count, linked sources, and both title lists + assert out["db_track_count"] == 11 + assert out["linked_sources"] == {"spotify": "sp1"} + assert out["file_track_titles"][0] == "Song 1" and len(out["file_track_titles"]) == 11 + assert "Song 1" in out["release_track_titles"] def test_resolve_returns_none_when_album_has_no_source_ids(tmp_path): diff --git a/tests/test_canonical_version_job.py b/tests/test_canonical_version_job.py index 9e720adc..f033ecb8 100644 --- a/tests/test_canonical_version_job.py +++ b/tests/test_canonical_version_job.py @@ -75,6 +75,21 @@ def test_describe_pin_is_judgeable(): assert "Beat:" in desc and "spotify 65% (17 tk)" in desc # what it beat +def test_describe_pin_includes_year_linked_and_tracklist(): + desc = _describe_pin({ + "source": "deezer", "album_id": "dz1", "score": 1.0, + "artist_name": "Lenka", "album_title": "Souls of Serenity", "year": 2017, + "file_track_count": 3, "release_track_count": 3, + "count_fit": 1.0, "duration_fit": 1.0, "title_fit": 1.0, + "linked_sources": {"spotify": "sp1", "deezer": "dz1"}, + "release_track_titles": ["The Show", "Trouble Is a Friend", "Everything at Once"], + "candidates": [{"source": "deezer", "album_id": "dz1", "track_count": 3, "score": 1.0}], + }) + assert "Lenka — Souls of Serenity (2017)" in desc + assert "Currently linked: spotify=sp1, deezer=dz1 → pinning deezer" in desc + assert "Release tracks: 1. The Show; 2. Trouble Is a Friend; 3. Everything at Once" in desc + + def test_describe_pin_single_source(): desc = _describe_pin({ "source": "spotify", "album_id": "x", "score": 0.9,