From 698ecc99f00e12941f2693bf08c76a0db216d76a Mon Sep 17 00:00:00 2001 From: Broque Thomas <26755000+Nezreka@users.noreply.github.com> Date: Tue, 12 May 2026 12:53:37 -0700 Subject: [PATCH] Import history: Clear History button now sweeps stuck 'processing' rows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported: Clear History button on the Import page left zombie rows behind. Every survivor showed "⧗ Processing" status from 2-9 days ago. Trace: `_record_in_progress` inserts a `status='processing'` row up-front so the UI can render the in-flight import while it runs; `_finalize_result` updates it to `completed`/`failed` when the import finishes. When the worker is killed mid-import (server restart, crash), the row never gets finalized — stays at `processing` forever. The clear-history endpoint's SQL `DELETE ... WHERE status IN (...)` listed every terminal status but omitted `processing`, so zombies survived every click. Fix: add `processing` to the delete list, but guard against nuking genuinely-live imports by intersecting against the worker's `_snapshot_active()` map — any folder hash currently registered in `_active_imports` is excluded from the delete via an `AND folder_hash NOT IN (...)` clause. `pending_review` deliberately left out so user still has to approve/reject those explicitly. One endpoint touched (`/api/auto-import/clear-completed` in web_server.py). No worker changes — guard reuses the existing `_snapshot_active()` method that the UI poller already calls. 5 new tests in `tests/imports/test_auto_import_clear_completed_endpoint.py`: - Zombie `processing` rows swept, live `processing` row preserved (folder_hash currently in `_active_imports` survives) - Response count matches actual delete count - Empty active-set branch (unparameterized DELETE) — pinned because an empty SQL `IN ()` would be a syntax error - Worker-unavailable returns 500 (pre-existing guard not regressed) - `pending_review` rows always survive — never auto-swept Full pytest sweep: 2758 passed (one pre-existing flaky timing test on `test_import_singles_parallel.py` failed under full-suite CPU load, passes in isolation in 2.95s — unrelated to this change). --- ...st_auto_import_clear_completed_endpoint.py | 232 ++++++++++++++++++ web_server.py | 25 +- webui/static/helper.js | 1 + 3 files changed, 256 insertions(+), 2 deletions(-) create mode 100644 tests/imports/test_auto_import_clear_completed_endpoint.py diff --git a/tests/imports/test_auto_import_clear_completed_endpoint.py b/tests/imports/test_auto_import_clear_completed_endpoint.py new file mode 100644 index 00000000..99c5077b --- /dev/null +++ b/tests/imports/test_auto_import_clear_completed_endpoint.py @@ -0,0 +1,232 @@ +"""Pin /api/auto-import/clear-completed behavior. + +Reported case: Clear History button on the Import page left zombie +rows behind — every survivor showed "⧗ Processing" from 2-9 days ago. +Trace: `_record_in_progress` inserts a `status='processing'` row up-front +so the UI can render the in-flight import; `_finalize_result` updates +it to `completed`/`failed` when the import finishes. If the worker is +killed mid-import (server restart, crash), the row never gets finalized +and stays at `processing` forever. The endpoint's SQL delete-list +omitted `processing`, so zombies survived every click. + +Fix added `processing` to the delete list, BUT guards against nuking +genuinely-live imports by intersecting against the worker's +`_snapshot_active()` map — any folder hash currently registered there +is excluded from the delete. + +These tests pin: +- `processing` rows ARE swept (no longer zombies) +- Live `processing` rows (folder hash currently in `_active_imports`) survive +- `pending_review` survives (user still must approve/reject) +- `completed` / `approved` / `failed` / `needs_identification` / + `rejected` rows still get swept (unchanged contract) +- Count returned in the JSON response matches the actual delete count +- Empty active set falls through to the unparameterized DELETE +""" + +from __future__ import annotations + +import sqlite3 +from contextlib import contextmanager +from unittest.mock import MagicMock + +import pytest + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def app_test_client(): + import web_server + web_server.app.config['TESTING'] = True + with web_server.app.test_client() as client: + yield client + + +@pytest.fixture +def seeded_db(tmp_path): + """Real sqlite DB with the auto_import_history table populated by + a mix of statuses + folder hashes. Returns a (connection_factory, + rows_seeded) tuple. The factory is a `_get_connection` lookalike + that returns the same connection so the endpoint sees the same data.""" + db_path = str(tmp_path / 'test.db') + conn = sqlite3.connect(db_path, check_same_thread=False) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + cursor.execute(""" + CREATE TABLE auto_import_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + folder_name TEXT NOT NULL, + folder_path TEXT NOT NULL, + folder_hash TEXT, + status TEXT NOT NULL DEFAULT 'scanning', + confidence REAL DEFAULT 0.0, + album_id TEXT, + album_name TEXT, + artist_name TEXT, + image_url TEXT, + total_files INTEGER DEFAULT 0, + matched_files INTEGER DEFAULT 0, + match_data TEXT, + identification_method TEXT, + error_message TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + processed_at TIMESTAMP + ) + """) + seeds = [ + # (folder_name, folder_hash, status) + ('Album A', 'hash-completed-1', 'completed'), + ('Album B', 'hash-approved-1', 'approved'), + ('Album C', 'hash-failed-1', 'failed'), + ('Album D', 'hash-needsid-1', 'needs_identification'), + ('Album E', 'hash-rejected-1', 'rejected'), + ('Zombie F', 'hash-zombie-1', 'processing'), # stale + ('Zombie G', 'hash-zombie-2', 'processing'), # stale + ('Live Import H', 'hash-LIVE', 'processing'), # active — must survive + ('Awaiting Review I', 'hash-review-1', 'pending_review'), # must survive + ] + for name, fh, status in seeds: + cursor.execute( + "INSERT INTO auto_import_history (folder_name, folder_path, folder_hash, status) " + "VALUES (?, ?, ?, ?)", + (name, f"/staging/{name}", fh, status), + ) + conn.commit() + + # Build a fake DB object whose `_get_connection` returns a context + # manager wrapping the live connection (matches the endpoint's + # `with db._get_connection() as conn` usage). + @contextmanager + def _conn_cm(): + yield conn + + fake_db = MagicMock() + fake_db._get_connection = _conn_cm + + return fake_db, conn + + +def _statuses_remaining(conn): + cur = conn.cursor() + cur.execute("SELECT folder_name, status FROM auto_import_history ORDER BY id") + return [(row['folder_name'], row['status']) for row in cur.fetchall()] + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestClearCompletedEndpoint: + def test_sweeps_zombie_processing_rows_but_keeps_live(self, app_test_client, seeded_db, monkeypatch): + """The bug: a `processing` row that's been there for days is a + zombie (server restart killed `_finalize_result`). Endpoint must + sweep it. But a `processing` row whose folder_hash is currently + registered in `_active_imports` is a LIVE import — must survive + or the UI loses its in-flight row mid-run.""" + fake_db, conn = seeded_db + # Worker reports one live import — folder_hash hash-LIVE + fake_worker = MagicMock() + fake_worker._snapshot_active.return_value = [{'folder_hash': 'hash-LIVE'}] + monkeypatch.setattr('web_server.auto_import_worker', fake_worker) + monkeypatch.setattr('web_server.get_database', lambda: fake_db) + + resp = app_test_client.post('/api/auto-import/clear-completed') + assert resp.status_code == 200 + body = resp.get_json() + assert body['success'] is True + + survivors = _statuses_remaining(conn) + names = {n for n, _ in survivors} + # Live import survives + assert 'Live Import H' in names, ( + f"Live in-flight processing row was deleted; survivors={names}" + ) + # Pending review survives — user still must approve/reject + assert 'Awaiting Review I' in names + # Both zombies swept + assert 'Zombie F' not in names + assert 'Zombie G' not in names + # Standard terminal-status rows swept + for completed_name in ('Album A', 'Album B', 'Album C', 'Album D', 'Album E'): + assert completed_name not in names, f"{completed_name} should have been deleted" + + def test_count_in_response_matches_actual_deletes(self, app_test_client, seeded_db, monkeypatch): + """JSON response carries the rowcount so the UI toast can show + accurate `Cleared N items`.""" + fake_db, conn = seeded_db + fake_worker = MagicMock() + fake_worker._snapshot_active.return_value = [{'folder_hash': 'hash-LIVE'}] + monkeypatch.setattr('web_server.auto_import_worker', fake_worker) + monkeypatch.setattr('web_server.get_database', lambda: fake_db) + + resp = app_test_client.post('/api/auto-import/clear-completed') + body = resp.get_json() + # 9 rows seeded; 7 deletable (5 terminal + 2 zombie processing); + # 2 survive (1 live, 1 pending_review) + assert body['count'] == 7, f"Expected 7 deletes; got {body['count']}" + cur = conn.cursor() + cur.execute("SELECT COUNT(*) FROM auto_import_history") + assert cur.fetchone()[0] == 2 + + def test_empty_active_set_takes_unparameterized_path(self, app_test_client, seeded_db, monkeypatch): + """When no live imports are running, the SQL skips the `AND + folder_hash NOT IN (...)` clause. Pinned because an empty + `IN ()` is a SQL syntax error in sqlite — the branch matters.""" + fake_db, conn = seeded_db + # Remove the live row so all `processing` rows are zombies + cur = conn.cursor() + cur.execute("DELETE FROM auto_import_history WHERE folder_hash = ?", ('hash-LIVE',)) + conn.commit() + + fake_worker = MagicMock() + fake_worker._snapshot_active.return_value = [] # nothing active + monkeypatch.setattr('web_server.auto_import_worker', fake_worker) + monkeypatch.setattr('web_server.get_database', lambda: fake_db) + + resp = app_test_client.post('/api/auto-import/clear-completed') + assert resp.status_code == 200 + body = resp.get_json() + assert body['success'] is True + # 5 terminal + 2 zombie processing = 7. Pending_review (1) survives. + assert body['count'] == 7 + + survivors = _statuses_remaining(conn) + assert len(survivors) == 1 + assert survivors[0][1] == 'pending_review' + + def test_worker_unavailable_returns_500(self, app_test_client, monkeypatch): + """If the auto-import worker isn't initialised, the endpoint + bails early — no DB access, clear error. Pre-fix this branch + was already in place; pinning ensures the active-hash refactor + didn't accidentally start touching the worker before the guard.""" + monkeypatch.setattr('web_server.auto_import_worker', None) + resp = app_test_client.post('/api/auto-import/clear-completed') + assert resp.status_code == 500 + body = resp.get_json() + assert body['success'] is False + assert 'not available' in body['error'].lower() + + def test_pending_review_always_survives(self, app_test_client, seeded_db, monkeypatch): + """Specific pin for the deliberate `pending_review` exclusion. + Even when no imports are active and every other status is being + swept, `pending_review` rows must be left alone — user-action + required, not automatic cleanup.""" + fake_db, conn = seeded_db + fake_worker = MagicMock() + fake_worker._snapshot_active.return_value = [] + monkeypatch.setattr('web_server.auto_import_worker', fake_worker) + monkeypatch.setattr('web_server.get_database', lambda: fake_db) + + app_test_client.post('/api/auto-import/clear-completed') + + cur = conn.cursor() + cur.execute("SELECT COUNT(*) FROM auto_import_history WHERE status = 'pending_review'") + assert cur.fetchone()[0] == 1, ( + "pending_review rows must never be swept by clear-completed" + ) diff --git a/web_server.py b/web_server.py index dd5632a7..65545a2d 100644 --- a/web_server.py +++ b/web_server.py @@ -34625,14 +34625,35 @@ def auto_import_approve_all(): @app.route('/api/auto-import/clear-completed', methods=['POST']) def auto_import_clear_completed(): - """Remove completed/imported items from history.""" + """Remove completed/imported items from history. + + `processing` rows are included so zombie entries (server restarted + mid-import → `_record_in_progress` row never got finalized) get + swept. Live in-flight imports are protected by intersecting against + `_snapshot_active()` — anything currently registered in the worker's + `_active_imports` map keeps its row. `pending_review` is left out so + user still has to approve/reject those explicitly. + """ if not auto_import_worker: return jsonify({"success": False, "error": "Auto-import not available"}), 500 try: + active_hashes = {e['folder_hash'] for e in auto_import_worker._snapshot_active()} db = get_database() with db._get_connection() as conn: cursor = conn.cursor() - cursor.execute("DELETE FROM auto_import_history WHERE status IN ('completed', 'approved', 'failed', 'needs_identification', 'rejected')") + base_sql = ( + "DELETE FROM auto_import_history " + "WHERE status IN ('completed', 'approved', 'failed', " + "'needs_identification', 'rejected', 'processing')" + ) + if active_hashes: + placeholders = ','.join('?' * len(active_hashes)) + cursor.execute( + f"{base_sql} AND folder_hash NOT IN ({placeholders})", + tuple(active_hashes), + ) + else: + cursor.execute(base_sql) count = cursor.rowcount conn.commit() return jsonify({"success": True, "count": count}) diff --git a/webui/static/helper.js b/webui/static/helper.js index fe4a07c2..577946fd 100644 --- a/webui/static/helper.js +++ b/webui/static/helper.js @@ -3416,6 +3416,7 @@ const WHATS_NEW = { '2.5.1': [ // --- post-release patch work on the 2.5.1 line — entries hidden by _getLatestWhatsNewVersion until the build version bumps --- { date: 'Unreleased — 2.5.1 patch work' }, + { title: 'Import History: Clear History Button Now Clears Stuck "Processing" Rows', desc: 'noticed on the import page: clear history left zombie rows behind that all showed "⧗ processing" status from 2-9 days ago. trace: `_record_in_progress` inserts a `status=\'processing\'` row up-front so the ui can render the in-flight import while it runs, then `_finalize_result` updates it to `completed`/`failed` when the import finishes. when the server is restarted mid-import (or the worker crashes), the row never gets finalized — stays at `processing` forever. the clear-history endpoint\'s sql `DELETE ... WHERE status IN (\'completed\', \'approved\', \'failed\', \'needs_identification\', \'rejected\')` didn\'t include `processing`, so those zombies survived every click. fix: add `processing` to the delete list, but guard against nuking actually-live imports by intersecting against `_snapshot_active()` — any folder hash currently registered in the worker\'s in-memory `_active_imports` map is excluded from the delete. `pending_review` deliberately left out so user still has to approve/reject those explicitly. one endpoint touched (`/api/auto-import/clear-completed` in web_server.py). no worker changes. zombie-row pile gets swept on next click, new imports still record + update normally.', page: 'import' }, { title: 'Auto-Import: Falls Through To Other Metadata Sources When Primary Has No Match', desc: 'discord report (mushy): 16 bandcamp indie albums sat in staging because auto-import couldn\'t identify them. manual search at the bottom of the import music tab found the same albums fine — they just weren\'t on the user\'s primary metadata source (spotify) but existed on tidal/deezer. trace: `_search_metadata_source` in `core/auto_import_worker.py` only queried `get_primary_source()` — single source, no fallback. meanwhile `search_import_albums` (the manual search bar at the bottom of the tab) already iterated the full `get_source_priority(get_primary_source())` chain and broke on first source with results. asymmetric behavior — manual search worked, auto-import didn\'t, same album. fix: lift auto-import to use the same source-chain pattern. try primary first; if it returns nothing OR scores below the 0.4 threshold, fall through to next source in priority order. first source that produces a strong-enough match wins. result dict carries the `source` that actually matched (not the primary name), so downstream `_match_tracks` calls the right client to fetch the album\'s tracklist. defensive per-source try/except so a rate-limited or auth-failed source doesn\'t abort the chain. unconfigured sources (client=None) silently skipped. scoring math lifted to pure helper `_score_album_search_result` so weight tweaks (album 50% / artist 20% / track-count 30%) are pinned at the function boundary independent of the orchestrator. weight constants exposed at module level (`_ALBUM_NAME_WEIGHT`, `_ARTIST_NAME_WEIGHT`, `_TRACK_COUNT_WEIGHT`) — greppable, bumpable in one place. 9 integration tests + 18 scoring-helper tests. integration tests pin: primary-success path unchanged (no fallback fires, only primary client called), primary-empty falls through to next source, primary-weak-score falls through, first fallback success stops the chain (no wasted api calls on remaining sources), all-sources-fail returns None, per-source exception contained, unconfigured-source skipped gracefully, result `source` field reflects winning source, `identification_confidence` from winning source. backwards compatible — single-source users see no change (chain just has one entry).', page: 'import' }, { title: 'Multi-Artist Tag Settings Now Actually Work (artist_separator + feat_in_title + write_multi_artist)', desc: 'three settings on settings → metadata → tags were partially or completely unimplemented. (1) `write_multi_artist` only worked because of a never-populated `_artists_list` field — `core/metadata/source.py` built `metadata["artist"]` as a hardcoded ", "-joined string but never assigned `metadata["_artists_list"]`, so `core/metadata/enrichment.py:114` always saw an empty list and silently no-op\'d the multi-value tag write. (2) `artist_separator` (default ", ") was referenced in the UI + settings.js save path but ZERO python code read the value — every multi-artist track ended up with hardcoded ", " regardless of what the user picked. (3) `feat_in_title` (when true: pull featured artists into the title as " (feat. X, Y)" and leave only primary in the ARTIST tag — picard convention) had no implementation at all. fix in source.py: populate `_artists_list` from the search response\'s artists array, then build the ARTIST string per the user\'s settings — primary-only when feat_in_title is on (with featured names appended to title; double-append guarded for source titles that already include "feat."), else joined with the configured separator. fix in enrichment.py id3 path: writing TPE1 twice (single-string then list) was overwriting the configured separator. now keeps TPE1 as the display string and writes a separate `TXXX:Artists` frame for the multi-value list (picard convention). vorbis path was already correct (separate "artist" + "artists" keys). deezer-specific upgrade path: deezer\'s `/search` endpoint only returns the primary artist — full contributors live on `/track/`. when source==deezer AND the search response had a single artist AND a track_id is available, enrichment now fetches the per-track endpoint and upgrades the artists list before tag-write. one extra API call per affected deezer track (skipped when search already returned multiple). spotify, tidal, itunes search responses already include all artists so they\'re unaffected. 29 new tests pin: `_artists_list` populated for multi/single/no-artist cases, separator drives ARTIST string (default + custom), single-artist case unaffected by either setting, feat_in_title pulls featured to title + leaves primary in ARTIST, feat_in_title no-op for single artist, double-append guard recognizes 9 source-title variants ("(feat. X)", "(Feat. X)", "(FEAT X)", "(feat X)", "(Featuring X)", "[feat. X]", "ft. X", "(ft X)", "FT. X"), word-boundary regex doesn\'t false-match substrings ("Aftermath" still gets the append), combined-settings precedence (feat_in_title wins over separator for ARTIST string but `_artists_list` carries everyone for the multi-value tag), deezer upgrade fires only when search returned single artist + track_id available, no upgrade for non-deezer sources, upgrade failure falls through to search-result list, no false-positive when /track/ confirms single artist.', page: 'settings' }, { title: 'AudioDB Enrichment: Track Worker No Longer Stuck In Infinite Retry Loop', desc: 'github issue #553: audiodb track enrichment "stuck" — constant requests, no progress, only error log was a 10s read-timeout from `lookup_track_by_id` repeating against the same track. trace: when an entity already has `audiodb_id` populated (from manual match or earlier scan) but `audiodb_match_status` is NULL, the worker tries a direct ID lookup. if it fails (returns None on timeout — audiodb\'s `track.php` endpoint is slow, 10s timeouts common), the prior code logged "preserving manual match" and returned WITHOUT marking status. row stayed NULL → queue picked it up next tick → tried direct lookup → timed out → returned → infinite loop. fix: (1) when direct lookup fails (None or exception), mark `audiodb_match_status="error"` so the queue\'s NULL-status filter stops re-picking the row on every tick. preserves the existing `audiodb_id` (no fallback to name-search guess that would overwrite a manual match). (2) extended the retry-after-cutoff queue priorities (4/5/6) to include `\'error\'` rows alongside `\'not_found\'` — same `retry_days=30` window. transient audiodb outages still recover automatically; permanently-broken IDs eventually get re-attempted once a month. only triggered for entities in the inconsistent state of `audiodb_id` set + `match_status` NULL — happy path and already-matched/already-not-found rows unchanged. 5 new tests pin: lookup-returns-none marks error (no infinite loop), lookup-raises-exception marks error, lookup-success preserves happy path, error-row-past-cutoff gets re-picked, error-row-within-cutoff stays skipped.', page: 'tools' },