diff --git a/lib/PgSQL_Monitor.cpp b/lib/PgSQL_Monitor.cpp index 30fab6727..538d6d993 100644 --- a/lib/PgSQL_Monitor.cpp +++ b/lib/PgSQL_Monitor.cpp @@ -2533,14 +2533,71 @@ void* PgSQL_monitor_scheduler_thread() { // Check variable version changes; refresh if needed unsigned int glover = GloPTH->get_global_version(); + bool vars_refreshed = false; if (PgSQL_Thread__variables_version < glover) { PgSQL_Thread__variables_version = glover; pgsql_thread->refresh_variables(); + vars_refreshed = true; } // Fetch config for next task scheduling tasks_conf_t tasks_conf { fetch_updated_conf(GloPgMon, PgHGM) }; + // When runtime variables were just refreshed (i.e. someone did + // SET pgsql-monitor_*_interval=; LOAD PGSQL + // VARIABLES TO RUNTIME;), the newly-read interval values may be + // smaller than the ones we used the last time we recomputed + // next_intvs. Without the clamp below, each next__at still + // points at the cycle that was scheduled under the OLD (larger) + // interval, so a LOAD doesn't visibly take effect until the old + // cycle elapses — which can be up to ~2 minutes for connect + // checks under the default pgsql-monitor_connect_interval=120000. + // + // Example of the surprise this avoids: + // + // T=0 proxysql starts with monitor_connect_interval=120000, + // next_connect_at is scheduled for T=120000ms. + // T=5 user runs SET pgsql-monitor_connect_interval=2000; + // LOAD PGSQL VARIABLES TO RUNTIME; expecting the next + // connect cycle within ~2 seconds. + // T=7 without this clamp, next_connect_at is still 120000 + // — no new connect fires for another 113 seconds. + // + // The fix: after every successful refresh, shrink any + // next__at that's now farther in the future than + // (now + new_interval). We never push next__at further + // into the future — growing the interval should not delay an + // already-imminent check — so the direction of the clamp is + // one-way (min()). Types whose interval is 0 (disabled) are + // skipped and handled by the existing compute_next_intvs() + // which sets them to ULONG_MAX. + if (vars_refreshed) { + if (tasks_conf.ping.params.interval > 0) { + const uint64_t clamped = cur_intv_start + tasks_conf.ping.params.interval; + if (next_intvs.next_ping_at > clamped) { + next_intvs.next_ping_at = clamped; + } + } + if (tasks_conf.connect.params.interval > 0) { + const uint64_t clamped = cur_intv_start + tasks_conf.connect.params.interval; + if (next_intvs.next_connect_at > clamped) { + next_intvs.next_connect_at = clamped; + } + } + if (tasks_conf.readonly.params.interval > 0) { + const uint64_t clamped = cur_intv_start + tasks_conf.readonly.params.interval; + if (next_intvs.next_readonly_at > clamped) { + next_intvs.next_readonly_at = clamped; + } + } + if (tasks_conf.repl_lag.params.interval > 0) { + const uint64_t clamped = cur_intv_start + tasks_conf.repl_lag.params.interval; + if (next_intvs.next_repl_lag_at > clamped) { + next_intvs.next_repl_lag_at = clamped; + } + } + } + // Perform table maintenance maint_mon_tables(tasks_conf, next_intvs, cur_intv_start); diff --git a/test/tap/tests/pgsql-servers_ssl_params-t.cpp b/test/tap/tests/pgsql-servers_ssl_params-t.cpp index 72705a0d7..68ccd695c 100644 --- a/test/tap/tests/pgsql-servers_ssl_params-t.cpp +++ b/test/tap/tests/pgsql-servers_ssl_params-t.cpp @@ -648,16 +648,24 @@ int main(int argc, char** argv) { // Configure monitor: use 'postgres' user which is accepted by the // backend, set a tight connect_interval so cycles run within the test // wait window. Restore original values afterwards. + // + // We deliberately do NOT change pgsql-monitor_username / + // pgsql-monitor_password. An earlier version of this test set them + // to 'postgres'/'postgres' on the assumption that the backend had a + // postgres user with password 'postgres', but the actual CI infra + // (docker-pgsql16-single) randomizes POSTGRES_PASSWORD per container + // startup (e.g. "05e792e51d"), so the hardcoded credentials never + // authenticated and every monitor connect failed with + // FATAL: password authentication failed for user "postgres" + // The default monitor/monitor user works against the infra's + // pg_hba.conf and is what the initial (pre-test) connect check + // already uses successfully. long original_connect_interval = getConnectInterval(a); std::string original_monitor_username = exec_scalar(a, "SELECT Variable_Value FROM global_variables WHERE Variable_Name='pgsql-monitor_username'"); - std::string original_monitor_password = exec_scalar(a, - "SELECT Variable_Value FROM global_variables WHERE Variable_Name='pgsql-monitor_password'"); diag("Original monitor: user=%s interval=%ld ms", original_monitor_username.c_str(), original_connect_interval); - exec_ok(a, "SET pgsql-monitor_username='postgres'"); - exec_ok(a, "SET pgsql-monitor_password='postgres'"); setConnectInterval(a, 2000); exec_ok(a, "UPDATE pgsql_servers SET use_ssl=1"); exec_ok(a, "LOAD PGSQL SERVERS TO RUNTIME"); @@ -666,15 +674,9 @@ int main(int argc, char** argv) { test_monitor_ssl_with_per_server_params(a); test_monitor_uses_per_server_row(a); - // Restore original monitor settings - { - std::stringstream q; - q << "SET pgsql-monitor_username='" << original_monitor_username << "'"; - exec_ok(a, q.str().c_str()); - q.str(""); - q << "SET pgsql-monitor_password='" << original_monitor_password << "'"; - exec_ok(a, q.str().c_str()); - } + // Restore original connect interval. Monitor username/password are + // no longer touched by this test - see comment above - so there's + // nothing to restore there. setConnectInterval(a, (int)original_connect_interval); // Part 3: Cluster query support