Extract monitor health decision logic into pure functions (Phase 3.3, #5491)

New files:
- include/MonitorHealthDecision.h: declarations for 4 pure functions
- lib/MonitorHealthDecision.cpp: implementations

Extracted functions:
- should_shun_on_connect_errors(): mirrors MySrvC::connect_error()
  threshold logic (min(shun_on_failures, connect_retries+1))
- can_unshun_server(): mirrors MyHGC recovery loop with time check,
  connect_timeout_max cap, min 1s floor, and kill_all drain check
- should_shun_on_replication_lag(): mirrors MySQL_HostGroups_Manager
  lag check with consecutive count threshold
- can_recover_from_replication_lag(): mirrors lag recovery check

All functions are pure (no global state, no I/O) and added to
libproxysql.a via lib/Makefile.
pull/5507/head
René Cannaò 2 months ago
parent b0889e1313
commit 59654e48a3

@ -0,0 +1,95 @@
/**
* @file MonitorHealthDecision.h
* @brief Pure decision functions for monitor health state transitions.
*
* Extracted from MySQL_Monitor, MySrvC, and MyHGC for unit testability.
* These functions have no global state dependencies all inputs are
* passed as parameters.
*
* @see Phase 3.3 (GitHub issue #5491)
*/
#ifndef MONITOR_HEALTH_DECISION_H
#define MONITOR_HEALTH_DECISION_H
#include <ctime>
/**
* @brief Determine if a server should be shunned based on connect errors.
*
* Mirrors the logic in MySrvC::connect_error() a server is shunned
* when errors in the current second reach min(shun_on_failures,
* connect_retries_on_failure + 1).
*
* @param errors_this_second Number of connect errors in the current second.
* @param shun_on_failures Config: mysql-shun_on_failures.
* @param connect_retries Config: mysql-connect_retries_on_failure.
* @return true if the error count meets or exceeds the shunning threshold.
*/
bool should_shun_on_connect_errors(
unsigned int errors_this_second,
int shun_on_failures,
int connect_retries
);
/**
* @brief Determine if a shunned server can be brought back online.
*
* Mirrors the recovery logic in MyHGC's server scan loop. A server
* can be unshunned when:
* 1. Enough time has elapsed since the last detected error.
* 2. If shunned_and_kill_all_connections is true, all connections
* (both used and free) must be fully drained first.
*
* @param time_last_error Timestamp of the last detected error.
* @param current_time Current time.
* @param shun_recovery_time_sec Config: mysql-shun_recovery_time_sec.
* @param connect_timeout_max_ms Config: mysql-connect_timeout_server_max (milliseconds).
* @param kill_all_conns Whether shunned_and_kill_all_connections is set.
* @param connections_used Number of in-use connections.
* @param connections_free Number of idle connections.
* @return true if the server can be unshunned.
*/
bool can_unshun_server(
time_t time_last_error,
time_t current_time,
int shun_recovery_time_sec,
int connect_timeout_max_ms,
bool kill_all_conns,
unsigned int connections_used,
unsigned int connections_free
);
/**
* @brief Determine if a server should be shunned for replication lag.
*
* Mirrors the replication lag check in MySQL_HostGroups_Manager.
* A server is shunned when its replication lag exceeds max_replication_lag
* for N consecutive checks (where N = monitor_replication_lag_count).
*
* @param current_lag Measured replication lag in seconds (-1 = unknown).
* @param max_replication_lag Configured max lag threshold (0 = disabled).
* @param consecutive_count Number of consecutive checks exceeding threshold.
* @param count_threshold Config: mysql-monitor_replication_lag_count.
* @return true if the server should be shunned for replication lag.
*/
bool should_shun_on_replication_lag(
int current_lag,
unsigned int max_replication_lag,
unsigned int consecutive_count,
int count_threshold
);
/**
* @brief Determine if a server shunned for replication lag can be recovered.
*
* @param current_lag Measured replication lag in seconds.
* @param max_replication_lag Configured max lag threshold.
* @return true if the server's lag is now within acceptable bounds.
*/
bool can_recover_from_replication_lag(
int current_lag,
unsigned int max_replication_lag
);
#endif // MONITOR_HEALTH_DECISION_H

@ -105,6 +105,7 @@ _OBJ_CXX := ProxySQL_GloVars.oo network.oo debug.oo configfile.oo Query_Cache.oo
PgSQL_Variables_Validator.oo PgSQL_ExplicitTxnStateMgr.oo \
PgSQL_PreparedStatement.oo PgSQL_Extended_Query_Message.oo \
pgsql_tokenizer.oo \
MonitorHealthDecision.oo \
proxy_sqlite3_symbols.oo
# TSDB object files

@ -0,0 +1,105 @@
/**
* @file MonitorHealthDecision.cpp
* @brief Implementation of pure monitor health decision functions.
*
* These functions extract the decision logic from MySrvC::connect_error(),
* MyHGC's unshun recovery loop, and MySQL_HostGroups_Manager's replication
* lag check. They are intentionally free of global state and I/O.
*
* @see MonitorHealthDecision.h
* @see Phase 3.3 (GitHub issue #5491)
*/
#include "MonitorHealthDecision.h"
bool should_shun_on_connect_errors(
unsigned int errors_this_second,
int shun_on_failures,
int connect_retries)
{
// Mirror MySrvC::connect_error() threshold logic:
// max_failures = min(shun_on_failures, connect_retries + 1)
int connect_retries_plus_1 = connect_retries + 1;
int max_failures = (shun_on_failures > connect_retries_plus_1)
? connect_retries_plus_1
: shun_on_failures;
return (errors_this_second >= (unsigned int)max_failures);
}
bool can_unshun_server(
time_t time_last_error,
time_t current_time,
int shun_recovery_time_sec,
int connect_timeout_max_ms,
bool kill_all_conns,
unsigned int connections_used,
unsigned int connections_free)
{
if (shun_recovery_time_sec == 0) {
return false; // recovery disabled
}
// Mirror MyHGC recovery: compute max_wait_sec with timeout cap
int max_wait_sec;
if (shun_recovery_time_sec * 1000 >= connect_timeout_max_ms) {
max_wait_sec = connect_timeout_max_ms / 1000 - 1;
} else {
max_wait_sec = shun_recovery_time_sec;
}
if (max_wait_sec < 1) {
max_wait_sec = 1;
}
// Time check
if (current_time <= time_last_error) {
return false;
}
if ((current_time - time_last_error) <= max_wait_sec) {
return false;
}
// Connection drain check for kill-all mode
if (kill_all_conns) {
if (connections_used != 0 || connections_free != 0) {
return false; // connections still draining
}
}
return true;
}
bool should_shun_on_replication_lag(
int current_lag,
unsigned int max_replication_lag,
unsigned int consecutive_count,
int count_threshold)
{
// Mirror MySQL_HostGroups_Manager replication lag logic
if (current_lag < 0) {
return false; // lag unknown, don't shun
}
if (max_replication_lag == 0) {
return false; // lag check disabled
}
if (current_lag <= (int)max_replication_lag) {
return false; // within threshold
}
// Lag exceeds threshold — check consecutive count
// The caller is expected to have incremented consecutive_count
// before calling this function
return (consecutive_count >= (unsigned int)count_threshold);
}
bool can_recover_from_replication_lag(
int current_lag,
unsigned int max_replication_lag)
{
// Mirror MySQL_HostGroups_Manager unshun for replication lag:
// recover when lag drops to <= max_replication_lag
if (current_lag < 0) {
return false; // unknown lag, don't recover
}
return (current_lag <= (int)max_replication_lag);
}
Loading…
Cancel
Save