Add regression tests for config DB retry behaviour (#434)

Pin the new save-retry contract so future changes can't silently re-introduce the spam reported in #434: - Happy-path saves emit zero ERROR logs. - Transient locks during retries log at DEBUG, not ERROR. - Six attempts run before giving up, with the documented backoff schedule (0.2 + 0.5 + 1.0 + 2.0 + 4.0s). - Genuine exhaustion logs a single ERROR and writes config.json. - sqlite3.OperationalError("database is locked") routes to DEBUG; any other OperationalError still logs ERROR. - _connect_db() actually applies WAL + busy_timeout + synchronous=NORMAL. Also moves `import time` from inside _save_config to the module top so the tests can monkeypatch sleep cleanly.
4 weeks ago · 4238aeb4d9
parent 7b2324f52e
commit 4238aeb4d9
2 changed files with 175 additions and 2 deletions
--- a/config/settings.py
+++ b/config/settings.py
@ -2,6 +2,7 @@ import copy
 import json
 import os
 import sqlite3
+import time
 from typing import Dict, Any, Optional
 from cryptography.fernet import Fernet, InvalidToken
 from pathlib import Path
@ -639,8 +640,6 @@ class ConfigManager:
        file. The single 1s retry that used to live here gave up too
        early on HDD-backed Docker volumes.
        """
-        import time
-
        # Cumulative delay across attempts: 0.2 + 0.5 + 1.0 + 2.0 + 4.0 = 7.7s
        # plus the 30s busy_timeout that already runs inside each attempt.
        retry_delays = [0.2, 0.5, 1.0, 2.0, 4.0]
--- a/tests/test_config_save_retry.py
+++ b/tests/test_config_save_retry.py
@ -0,0 +1,174 @@
+"""Regression tests for ConfigManager._save_config retry behaviour.
+
+The DB-locking spam reported in #434 was caused by an aggressive retry
+loop that gave up after one second and logged each transient lock as
+ERROR. These tests pin the new behaviour:
+
+- Lock errors during retries log at DEBUG, not ERROR (no spam).
+- Six attempts with exponential backoff before giving up.
+- Successful retry after a few transient locks emits zero ERROR logs.
+- Genuine exhaustion logs a single ERROR and falls back to config.json.
+- Non-lock OperationalErrors don't trigger the lock-specific quiet path.
+"""
+
+import json
+import logging
+import sqlite3
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from config.settings import ConfigManager
+
+
+@pytest.fixture
+def manager(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> ConfigManager:
+    """Build a ConfigManager rooted at a tmp dir so every test starts clean."""
+    config_path = tmp_path / "config.json"
+    db_path = tmp_path / "database" / "music_library.db"
+    monkeypatch.setenv("SOULSYNC_CONFIG_PATH", str(config_path))
+    monkeypatch.setenv("SOULSYNC_DB_PATH", str(db_path))
+    mgr = ConfigManager(str(config_path))
+    # Replace whatever was loaded with a known payload so we can assert on it
+    mgr.config_data = {"plex": {"base_url": "http://example.test"}}
+    return mgr
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _fail_n_times_then_succeed(n: int, manager: ConfigManager):
+    """Patch ``_save_to_database`` so the first ``n`` calls fail (lock),
+    then subsequent calls succeed."""
+    state = {"calls": 0}
+    real_save = manager._save_to_database
+
+    def stub(config_data):
+        state["calls"] += 1
+        if state["calls"] <= n:
+            return False
+        return real_save(config_data)
+
+    return stub, state
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_save_succeeds_on_first_attempt_emits_no_error_logs(
+    manager: ConfigManager, caplog: pytest.LogCaptureFixture
+) -> None:
+    """Happy path: a successful save should not log at ERROR."""
+    caplog.set_level(logging.DEBUG, logger="soulsync.config")
+    with patch("config.settings.time.sleep") as sleep_mock:
+        with patch.object(manager, "_save_to_database", return_value=True) as save_mock:
+            manager._save_config()
+    assert save_mock.call_count == 1
+    sleep_mock.assert_not_called()
+    error_logs = [r for r in caplog.records if r.levelno >= logging.ERROR]
+    assert error_logs == []
+
+
+def test_lock_errors_during_retries_log_at_debug_not_error(
+    manager: ConfigManager, caplog: pytest.LogCaptureFixture
+) -> None:
+    """Three transient locks then success should produce DEBUG noise only."""
+    caplog.set_level(logging.DEBUG, logger="soulsync.config")
+    stub, state = _fail_n_times_then_succeed(3, manager)
+    with patch("config.settings.time.sleep") as sleep_mock:
+        with patch.object(manager, "_save_to_database", side_effect=stub):
+            with patch.object(manager, "_ensure_database_exists"):
+                manager._save_config()
+    assert state["calls"] == 4
+    assert sleep_mock.call_count == 3
+    error_logs = [r for r in caplog.records if r.levelno >= logging.ERROR]
+    assert error_logs == [], "transient locks should not log ERROR"
+
+
+def test_save_uses_six_attempts_with_exponential_backoff(
+    manager: ConfigManager,
+) -> None:
+    """All six attempts must run, with the documented backoff schedule."""
+    with patch("config.settings.time.sleep") as sleep_mock:
+        with patch.object(manager, "_save_to_database", return_value=False) as save_mock:
+            with patch("builtins.open"):  # silence the json fallback's filesystem write
+                with patch.object(Path, "mkdir"):
+                    manager._save_config()
+    assert save_mock.call_count == 6
+    expected_delays = [0.2, 0.5, 1.0, 2.0, 4.0]
+    actual_delays = [c.args[0] for c in sleep_mock.call_args_list]
+    assert actual_delays == expected_delays
+
+
+def test_all_retries_exhausted_logs_single_error_and_falls_back_to_json(
+    manager: ConfigManager, caplog: pytest.LogCaptureFixture, tmp_path: Path
+) -> None:
+    """Exhausting retries should produce one ERROR log + one fallback file."""
+    caplog.set_level(logging.DEBUG, logger="soulsync.config")
+    manager.config_path = tmp_path / "config.json"
+    with patch("config.settings.time.sleep"):
+        with patch.object(manager, "_save_to_database", return_value=False):
+            manager._save_config()
+    error_logs = [r for r in caplog.records if r.levelno == logging.ERROR]
+    assert len(error_logs) == 1
+    assert "falling back to config.json" in error_logs[0].getMessage()
+    assert manager.config_path.exists()
+    payload = json.loads(manager.config_path.read_text())
+    assert payload["plex"]["base_url"] == "http://example.test"
+
+
+def test_save_to_database_lock_error_logs_at_debug(
+    manager: ConfigManager, caplog: pytest.LogCaptureFixture
+) -> None:
+    """sqlite3.OperationalError("...locked...") must surface as DEBUG only."""
+    caplog.set_level(logging.DEBUG, logger="soulsync.config")
+    with patch.object(manager, "_ensure_database_exists"):
+        with patch("config.settings.sqlite3.connect") as connect_mock:
+            connect_mock.return_value.execute.side_effect = sqlite3.OperationalError(
+                "database is locked"
+            )
+            ok = manager._save_to_database({"x": 1})
+    assert ok is False
+    error_logs = [r for r in caplog.records if r.levelno >= logging.ERROR]
+    assert error_logs == []
+    debug_logs = [
+        r for r in caplog.records
+        if r.levelno == logging.DEBUG and "locked" in r.getMessage().lower()
+    ]
+    assert len(debug_logs) == 1
+
+
+def test_save_to_database_non_lock_operational_error_logs_at_error(
+    manager: ConfigManager, caplog: pytest.LogCaptureFixture
+) -> None:
+    """A non-lock OperationalError is a real failure and must log ERROR."""
+    caplog.set_level(logging.DEBUG, logger="soulsync.config")
+    with patch.object(manager, "_ensure_database_exists"):
+        with patch("config.settings.sqlite3.connect") as connect_mock:
+            connect_mock.return_value.execute.side_effect = sqlite3.OperationalError(
+                "no such table: metadata"
+            )
+            ok = manager._save_to_database({"x": 1})
+    assert ok is False
+    error_logs = [r for r in caplog.records if r.levelno >= logging.ERROR]
+    assert len(error_logs) == 1
+
+
+def test_connect_db_sets_required_pragmas(manager: ConfigManager) -> None:
+    """All four pragmas must be applied on every config-DB connection."""
+    conn = manager._connect_db()
+    try:
+        journal_mode = conn.execute("PRAGMA journal_mode").fetchone()[0]
+        busy_timeout = conn.execute("PRAGMA busy_timeout").fetchone()[0]
+        synchronous = conn.execute("PRAGMA synchronous").fetchone()[0]
+    finally:
+        conn.close()
+    assert journal_mode.lower() == "wal"
+    assert busy_timeout == 30000
+    # synchronous returns 0=OFF, 1=NORMAL, 2=FULL, 3=EXTRA
+    assert synchronous == 1