From 76c63b5bc4cf58ccea29f9960eb8b43f17c2f017 Mon Sep 17 00:00:00 2001 From: BoulderBadgeDad Date: Sat, 6 Jun 2026 23:36:14 -0700 Subject: [PATCH] #806 lock-in: archive.org outage cooldown for CAA originals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lock-in pass caught the cost hole: art is fetched PER TRACK, and the old code never touched archive.org at all — so an archive.org outage was free, while the new native-first chain would pay a 10s timeout on every track (a 12-track album = +2 minutes, exactly the import-slowness class we spent today killing). One failed original now puts originals on a 10-minute cooldown: subsequent fetches go straight to the 1200px CDN midpoint (the pre-#806 behavior, full speed) and recover automatically when the cooldown expires. Locked by a test: track 1 pays the failure once, track 2 never touches the original. (Also: the missing time import the first run caught.) --- core/metadata/artwork.py | 44 +++++++++++++++++------ tests/metadata/test_artwork_resolution.py | 32 +++++++++++++++++ 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/core/metadata/artwork.py b/core/metadata/artwork.py index 489472a5..3698a064 100644 --- a/core/metadata/artwork.py +++ b/core/metadata/artwork.py @@ -4,6 +4,7 @@ from __future__ import annotations import os import re +import time import urllib.request from ipaddress import ip_address from urllib.parse import quote, urlparse @@ -301,25 +302,38 @@ def _upgrade_art_url(art_url: str) -> str: return art_url +# Negative cache for CAA originals: art is fetched PER TRACK, and the bare +# /front original rides archive.org. During an archive.org outage every track +# would otherwise pay a 10s timeout before falling back — a 12-track album +# would eat +2 minutes. One failure puts originals on cooldown; fetches go +# straight to the 1200px CDN (the pre-#806 behavior, full speed) until then. +_caa_original_down_until = 0.0 +_CAA_ORIGINAL_COOLDOWN_S = 600 + + def _fetch_art_bytes(art_url: str): """Fetch artwork bytes at the highest resolution the source serves. - Upgrades the URL via `_upgrade_art_url`, then fetches. If the upgraded - (larger) size is refused by the CDN, retry once with the original URL so - we never regress vs. the un-upgraded behavior. Empirically the upgrade - works for every album tested; the fallback just defends the edge case. + Upgrades the URL via `_upgrade_art_url`, then walks a fallback chain so a + refused size degrades gracefully and never regresses below the original + URL's behavior. For Cover Art Archive that chain is + original (/front) -> 1200px CDN thumbnail -> the original sized URL. Returns `(image_data, mime_type)` or `(None, None)` on failure. """ + global _caa_original_down_until if not art_url: return None, None upgraded = _upgrade_art_url(art_url) - attempts = [upgraded] - # CAA's bare /front original redirects to archive.org, which can be flaky. - # Midpoint fallback: the 1200px CDN thumbnail (the pre-#806 behavior), - # tried BEFORE the original sized URL so a flaky archive.org degrades to - # 1200px — never all the way down to the 250px thumbnail. - if "coverartarchive.org" in upgraded and upgraded.endswith("/front"): + is_caa_original = "coverartarchive.org" in upgraded and upgraded.endswith("/front") + + attempts = [] + if not (is_caa_original and time.time() < _caa_original_down_until): + attempts.append(upgraded) + if is_caa_original: + # Midpoint fallback: the 1200px CDN thumbnail (the pre-#806 behavior), + # tried BEFORE the original sized URL so a flaky archive.org degrades + # to 1200px — never all the way down to the 250px thumbnail. attempts.append(upgraded + "-1200") if art_url not in attempts: attempts.append(art_url) @@ -331,7 +345,15 @@ def _fetch_art_bytes(art_url: str): return response.read(), (response.info().get_content_type() or "image/jpeg") except Exception as fetch_err: last_err = fetch_err - if i < len(attempts) - 1: + if is_caa_original and candidate == upgraded: + # archive.org refused the original — cool down so the next + # tracks of this batch skip straight to the CDN thumbnail. + _caa_original_down_until = time.time() + _CAA_ORIGINAL_COOLDOWN_S + logger.info( + "CAA original refused (%s); using 1200px CDN for the next %d min", + fetch_err, _CAA_ORIGINAL_COOLDOWN_S // 60, + ) + elif i < len(attempts) - 1: logger.info("Art URL refused (%s); falling back to next size", fetch_err) logger.error("Art fetch failed after %d attempt(s): %s", len(attempts), last_err) return None, None diff --git a/tests/metadata/test_artwork_resolution.py b/tests/metadata/test_artwork_resolution.py index d92526b6..dadcc2c3 100644 --- a/tests/metadata/test_artwork_resolution.py +++ b/tests/metadata/test_artwork_resolution.py @@ -164,6 +164,12 @@ class TestFetchArtBytes: # ── #806: CAA native-res chain — original → 1200px midpoint → thumbnail ── + @pytest.fixture(autouse=True) + def _reset_caa_cooldown(self, monkeypatch): + # The negative cache persists module-globally; isolate every test. + import core.metadata.artwork as aw + monkeypatch.setattr(aw, '_caa_original_down_until', 0.0) + def test_caa_fetches_native_original_first(self, monkeypatch): calls = [] @@ -218,3 +224,29 @@ class TestFetchArtBytes: 'https://coverartarchive.org/release/r1/front-1200', 'https://coverartarchive.org/release/r1/front-250', ] + + def test_caa_outage_cooldown_skips_originals_for_subsequent_fetches(self, monkeypatch): + """One archive.org failure puts originals on cooldown: the NEXT track's + fetch goes straight to the 1200px CDN — no 10s timeout per track during + an outage (art is fetched per track).""" + calls = [] + + def fake_urlopen(url, timeout=None): + calls.append(url) + if url.endswith('/front'): + raise Exception('archive.org down') + return _FakeResponse(b'cdn-1200px') + + monkeypatch.setattr('core.metadata.artwork.urllib.request.urlopen', fake_urlopen) + + # Track 1: pays the failed original once, falls back to 1200. + _fetch_art_bytes('https://coverartarchive.org/release/r1/front-250') + # Track 2: cooldown active — never touches the original. + data, _ = _fetch_art_bytes('https://coverartarchive.org/release/r2/front-250') + + assert data == b'cdn-1200px' + assert calls == [ + 'https://coverartarchive.org/release/r1/front', # track 1 pays once + 'https://coverartarchive.org/release/r1/front-1200', # and falls back + 'https://coverartarchive.org/release/r2/front-1200', # track 2 skips straight to CDN + ]