Added new variable 'mysql-monitor_replication_lag_retries'

This new variable controls the number of retries that replication lag
needs to exceed 'max_replication_lag' in order to set a server SHUNNED.
pull/3054/head
Javier Jaramago Fernández 6 years ago committed by René Cannaò
parent a7814d2bba
commit a2727739e4

@ -140,6 +140,7 @@ class MySrvC { // MySQL Server Container
unsigned int max_connections_used; // The maximum number of connections that has been opened
unsigned int connect_OK;
unsigned int connect_ERR;
unsigned int cur_replication_lag_retries;
// note that these variables are in microsecond, while user defines max lantency in millisecond
unsigned int current_latency_us;
unsigned int max_latency_us;

@ -377,6 +377,7 @@ class MySQL_Threads_Handler
bool monitor_writer_is_also_reader;
int monitor_replication_lag_interval;
int monitor_replication_lag_timeout;
int monitor_replication_lag_retries;
int monitor_groupreplication_healthcheck_interval;
int monitor_groupreplication_healthcheck_timeout;
int monitor_groupreplication_healthcheck_max_timeout_count;

@ -799,6 +799,7 @@ __thread bool mysql_thread___monitor_wait_timeout;
__thread bool mysql_thread___monitor_writer_is_also_reader;
__thread int mysql_thread___monitor_replication_lag_interval;
__thread int mysql_thread___monitor_replication_lag_timeout;
__thread int mysql_thread___monitor_replication_lag_retries;
__thread int mysql_thread___monitor_groupreplication_healthcheck_interval;
__thread int mysql_thread___monitor_groupreplication_healthcheck_timeout;
__thread int mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count;
@ -946,6 +947,7 @@ extern __thread bool mysql_thread___monitor_wait_timeout;
extern __thread bool mysql_thread___monitor_writer_is_also_reader;
extern __thread int mysql_thread___monitor_replication_lag_interval;
extern __thread int mysql_thread___monitor_replication_lag_timeout;
extern __thread int mysql_thread___monitor_replication_lag_retries;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_interval;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_timeout;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count;

@ -840,6 +840,7 @@ MySrvC::MySrvC(char *add, uint16_t p, uint16_t gp, unsigned int _weight, enum My
max_connections=_max_connections;
max_replication_lag=_max_replication_lag;
use_ssl=_use_ssl;
cur_replication_lag_retries=0;
max_latency_us=_max_latency_ms*1000;
current_latency_us=0;
aws_aurora_current_lag_us = 0;
@ -3199,8 +3200,23 @@ void MySQL_HostGroups_Manager::replication_lag_action(int _hid, char *address, u
// ||
(current_replication_lag>=0 && ((unsigned int)current_replication_lag > mysrvc->max_replication_lag))
) {
proxy_warning("Shunning server %s:%d from HG %u with replication lag of %d second\n", address, port, myhgc->hid, current_replication_lag);
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG;
if (mysrvc->cur_replication_lag_retries >= GloMTH->variables.monitor_replication_lag_retries) {
proxy_warning("Shunning server %s:%d from HG %u with replication lag of %d second, retry number: '%d'\n", address, port, myhgc->hid, current_replication_lag, mysrvc->cur_replication_lag_retries);
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG;
} else {
proxy_info(
"Not shunning server %s:%d from HG %u with replication lag of %d second, retry number: '%d' < replication_lag_retries: '%d'\n",
address,
port,
myhgc->hid,
current_replication_lag,
mysrvc->cur_replication_lag_retries,
GloMTH->variables.monitor_replication_lag_retries
);
mysrvc->cur_replication_lag_retries += 1;
}
} else {
mysrvc->cur_replication_lag_retries = 0;
}
} else {
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) {
@ -3211,6 +3227,7 @@ void MySQL_HostGroups_Manager::replication_lag_action(int _hid, char *address, u
) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %d second\n", address, port, myhgc->hid, current_replication_lag);
mysrvc->cur_replication_lag_retries = 0;
}
}
}

@ -438,6 +438,7 @@ static char * mysql_thread_variables_names[]= {
(char *)"monitor_read_only_max_timeout_count",
(char *)"monitor_replication_lag_interval",
(char *)"monitor_replication_lag_timeout",
(char *)"monitor_replication_lag_retries",
(char *)"monitor_groupreplication_healthcheck_interval",
(char *)"monitor_groupreplication_healthcheck_timeout",
(char *)"monitor_groupreplication_healthcheck_max_timeout_count",
@ -1007,6 +1008,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() {
variables.monitor_read_only_max_timeout_count=3;
variables.monitor_replication_lag_interval=10000;
variables.monitor_replication_lag_timeout=1000;
variables.monitor_replication_lag_retries=0;
variables.monitor_groupreplication_healthcheck_interval=5000;
variables.monitor_groupreplication_healthcheck_timeout=800;
variables.monitor_groupreplication_healthcheck_max_timeout_count=3;
@ -1339,6 +1341,7 @@ int MySQL_Threads_Handler::get_variable_int(const char *name) {
if (!strcmp(name,"monitor_read_only_max_timeout_count")) return (int)variables.monitor_read_only_max_timeout_count;
if (!strcmp(name,"monitor_replication_lag_interval")) return (int)variables.monitor_replication_lag_interval;
if (!strcmp(name,"monitor_replication_lag_timeout")) return (int)variables.monitor_replication_lag_timeout;
if (!strcmp(name,"monitor_replication_lag_retries")) return (int)variables.monitor_replication_lag_retries;
}
if (a == 'g') {
char b = name[9];
@ -1697,6 +1700,10 @@ char * MySQL_Threads_Handler::get_variable(char *name) { // this is the public f
sprintf(intbuf,"%d",variables.monitor_replication_lag_timeout);
return strdup(intbuf);
}
if (!strcasecmp(name,"monitor_replication_lag_retries")) {
sprintf(intbuf,"%d",variables.monitor_replication_lag_retries);
return strdup(intbuf);
}
if (!strcasecmp(name,"monitor_groupreplication_healthcheck_interval")) {
sprintf(intbuf,"%d",variables.monitor_groupreplication_healthcheck_interval);
return strdup(intbuf);
@ -2254,6 +2261,15 @@ bool MySQL_Threads_Handler::set_variable(char *name, const char *value) { // thi
return false;
}
}
if (!strcasecmp(name,"monitor_replication_lag_retries")) {
int intv=atoi(value);
if (intv >= 0 && intv <= std::numeric_limits<int>::max()) {
variables.monitor_replication_lag_retries=intv;
return true;
} else {
return false;
}
}
if (!strcasecmp(name,"monitor_groupreplication_healthcheck_interval")) {
int intv=atoi(value);
if (intv >= 50 && intv <= 7*24*3600*1000) {
@ -4869,6 +4885,7 @@ void MySQL_Thread::refresh_variables() {
mysql_thread___monitor_read_only_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_read_only_max_timeout_count");
mysql_thread___monitor_replication_lag_interval=GloMTH->get_variable_int((char *)"monitor_replication_lag_interval");
mysql_thread___monitor_replication_lag_timeout=GloMTH->get_variable_int((char *)"monitor_replication_lag_timeout");
mysql_thread___monitor_replication_lag_retries=GloMTH->get_variable_int((char *)"monitor_replication_lag_retries");
mysql_thread___monitor_groupreplication_healthcheck_interval=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_interval");
mysql_thread___monitor_groupreplication_healthcheck_timeout=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_timeout");
mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_max_timeout_count");

Loading…
Cancel
Save