Merge pull request #3867 from sysown/v2.x-multilag

Added mysql-monitor_replication_lag_group_by_host
pull/3874/head
René Cannaò 4 years ago committed by GitHub
commit 7940d867da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -578,6 +578,7 @@ class MySQL_HostGroups_Manager {
void push_MyConn_to_pool_array(MySQL_Connection **, unsigned int);
void destroy_MyConn_from_pool(MySQL_Connection *, bool _lock=true);
void replication_lag_action_inner(MyHGC *, char*, unsigned int, int);
void replication_lag_action(int, char*, unsigned int, int);
void read_only_action(char *hostname, int port, int read_only);
unsigned int get_servers_table_version();

@ -348,6 +348,7 @@ struct p_th_gauge {
mysql_monitor_read_only_interval,
mysql_monitor_read_only_timeout,
mysql_monitor_writer_is_also_reader,
mysql_monitor_replication_lag_group_by_host,
mysql_monitor_replication_lag_interval,
mysql_monitor_replication_lag_timeout,
mysql_monitor_history,
@ -434,6 +435,7 @@ class MySQL_Threads_Handler
//! ProxySQL session wait timeout. Unit: 'ms'.
bool monitor_wait_timeout;
bool monitor_writer_is_also_reader;
bool monitor_replication_lag_group_by_host;
//! How frequently a replication lag check is performed. Unit: 'ms'.
int monitor_replication_lag_interval;
//! Read only check timeout. Unit: 'ms'.

@ -875,6 +875,7 @@ __thread int mysql_thread___monitor_read_only_timeout;
__thread int mysql_thread___monitor_read_only_max_timeout_count;
__thread bool mysql_thread___monitor_wait_timeout;
__thread bool mysql_thread___monitor_writer_is_also_reader;
__thread int mysql_thread___monitor_replication_lag_group_by_host;
__thread int mysql_thread___monitor_replication_lag_interval;
__thread int mysql_thread___monitor_replication_lag_timeout;
__thread int mysql_thread___monitor_replication_lag_count;
@ -1035,6 +1036,7 @@ extern __thread int mysql_thread___monitor_read_only_timeout;
extern __thread int mysql_thread___monitor_read_only_max_timeout_count;
extern __thread bool mysql_thread___monitor_wait_timeout;
extern __thread bool mysql_thread___monitor_writer_is_also_reader;
extern __thread bool mysql_thread___monitor_replication_lag_group_by_host;
extern __thread int mysql_thread___monitor_replication_lag_interval;
extern __thread int mysql_thread___monitor_replication_lag_timeout;
extern __thread int mysql_thread___monitor_replication_lag_count;

@ -3439,58 +3439,69 @@ void MySQL_HostGroups_Manager::add(MySrvC *mysrvc, unsigned int _hid) {
myhgc->mysrvs->add(mysrvc);
}
void MySQL_HostGroups_Manager::replication_lag_action(int _hid, char *address, unsigned int port, int current_replication_lag) {
GloAdmin->mysql_servers_wrlock();
wrlock();
void MySQL_HostGroups_Manager::replication_lag_action_inner(MyHGC *myhgc, char *address, unsigned int port, int current_replication_lag) {
int j;
MyHGC *myhgc = MyHGC_find(_hid);
if (myhgc) {
for (j=0; j<(int)myhgc->mysrvs->cnt(); j++) {
MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j);
if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) {
if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) {
if (
// (current_replication_lag==-1 )
// ||
(current_replication_lag>=0 && ((unsigned int)current_replication_lag > mysrvc->max_replication_lag))
) {
// always increase the counter
mysrvc->cur_replication_lag_count += 1;
if (mysrvc->cur_replication_lag_count >= mysql_thread___monitor_replication_lag_count) {
proxy_warning("Shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d'\n", address, port, myhgc->hid, current_replication_lag, mysrvc->cur_replication_lag_count);
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG;
} else {
proxy_info(
"Not shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d' < replication_lag_count: '%d'\n",
address,
port,
myhgc->hid,
current_replication_lag,
mysrvc->cur_replication_lag_count,
mysql_thread___monitor_replication_lag_count
);
}
for (j=0; j<(int)myhgc->mysrvs->cnt(); j++) {
MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j);
if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) {
if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) {
if (
// (current_replication_lag==-1 )
// ||
(current_replication_lag>=0 && ((unsigned int)current_replication_lag > mysrvc->max_replication_lag))
) {
// always increase the counter
mysrvc->cur_replication_lag_count += 1;
if (mysrvc->cur_replication_lag_count >= mysql_thread___monitor_replication_lag_count) {
proxy_warning("Shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d'\n", address, port, myhgc->hid, current_replication_lag, mysrvc->cur_replication_lag_count);
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG;
} else {
mysrvc->cur_replication_lag_count = 0;
proxy_info(
"Not shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d' < replication_lag_count: '%d'\n",
address,
port,
myhgc->hid,
current_replication_lag,
mysrvc->cur_replication_lag_count,
mysql_thread___monitor_replication_lag_count
);
}
} else {
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) {
if (
(current_replication_lag>=0 && ((unsigned int)current_replication_lag <= mysrvc->max_replication_lag))
||
(current_replication_lag==-2) // see issue 959
) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %d second\n", address, port, myhgc->hid, current_replication_lag);
mysrvc->cur_replication_lag_count = 0;
}
mysrvc->cur_replication_lag_count = 0;
}
} else {
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) {
if (
(current_replication_lag>=0 && ((unsigned int)current_replication_lag <= mysrvc->max_replication_lag))
||
(current_replication_lag==-2) // see issue 959
) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %d second\n", address, port, myhgc->hid, current_replication_lag);
mysrvc->cur_replication_lag_count = 0;
}
}
goto __exit_replication_lag_action;
}
return;
}
}
}
void MySQL_HostGroups_Manager::replication_lag_action(int _hid, char *address, unsigned int port, int current_replication_lag) {
GloAdmin->mysql_servers_wrlock();
wrlock();
if (mysql_thread___monitor_replication_lag_group_by_host == false) {
// legacy check. 1 check per server per hostgroup
MyHGC *myhgc = MyHGC_find(_hid);
replication_lag_action_inner(myhgc,address,port,current_replication_lag);
} else {
// only 1 check per server, no matter the hostgroup
// all hostgroups must be searched
for (unsigned int i=0; i<MyHostGroups->len; i++) {
MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i);
replication_lag_action_inner(myhgc,address,port,current_replication_lag);
}
}
__exit_replication_lag_action:
wrunlock();
GloAdmin->mysql_servers_wrunlock();
}

@ -3255,7 +3255,12 @@ void * MySQL_Monitor::monitor_replication_lag() {
char *error=NULL;
SQLite3_result *resultset=NULL;
// add support for SSL
char *query=(char *)"SELECT hostgroup_id, hostname, port, max_replication_lag, use_ssl FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3)";
char *query= NULL;
if (mysql_thread___monitor_replication_lag_group_by_host==true) {
query = (char *)"SELECT MIN(hostgroup_id), hostname, port, MIN(max_replication_lag), MAX(use_ssl) FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3) GROUP BY hostname, port";
} else {
query=(char *)"SELECT hostgroup_id, hostname, port, max_replication_lag, use_ssl FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3)";
}
t1=monotonic_time();
if (!GloMTH) return NULL; // quick exit during shutdown/restart

@ -452,6 +452,7 @@ static char * mysql_thread_variables_names[]= {
(char *)"monitor_read_only_interval",
(char *)"monitor_read_only_timeout",
(char *)"monitor_read_only_max_timeout_count",
(char *)"monitor_replication_lag_group_by_host",
(char *)"monitor_replication_lag_interval",
(char *)"monitor_replication_lag_timeout",
(char *)"monitor_replication_lag_count",
@ -990,6 +991,12 @@ th_metrics_map = std::make_tuple(
"Encodes different behaviors for nodes depending on their 'READ_ONLY' flag value.",
metric_tags {}
),
std::make_tuple (
p_th_gauge::mysql_monitor_replication_lag_group_by_host,
"proxysql_monitor_replication_lag_group_by_host",
"Encodes different replication lag check if the same server is in multiple hostgroups.",
metric_tags {}
),
std::make_tuple (
p_th_gauge::mysql_monitor_replication_lag_interval,
"proxysql_mysql_monitor_replication_lag_interval_seconds",
@ -1058,6 +1065,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() {
variables.monitor_read_only_interval=1000;
variables.monitor_read_only_timeout=800;
variables.monitor_read_only_max_timeout_count=3;
variables.monitor_replication_lag_group_by_host=false;
variables.monitor_replication_lag_interval=10000;
variables.monitor_replication_lag_timeout=1000;
variables.monitor_replication_lag_count=1;
@ -2082,6 +2090,7 @@ char ** MySQL_Threads_Handler::get_variables_list() {
VariablesPointers_bool["log_mysql_warnings_enabled"] = make_tuple(&variables.log_mysql_warnings_enabled, false);
VariablesPointers_bool["log_unhealthy_connections"] = make_tuple(&variables.log_unhealthy_connections, false);
VariablesPointers_bool["monitor_enabled"] = make_tuple(&variables.monitor_enabled, false);
VariablesPointers_bool["monitor_replication_lag_group_by_host"] = make_tuple(&variables.monitor_replication_lag_group_by_host, false);
VariablesPointers_bool["monitor_wait_timeout"] = make_tuple(&variables.monitor_wait_timeout, false);
VariablesPointers_bool["monitor_writer_is_also_reader"] = make_tuple(&variables.monitor_writer_is_also_reader, false);
VariablesPointers_bool["multiplexing"] = make_tuple(&variables.multiplexing, false);
@ -3938,6 +3947,7 @@ void MySQL_Thread::refresh_variables() {
mysql_thread___monitor_read_only_interval=GloMTH->get_variable_int((char *)"monitor_read_only_interval");
mysql_thread___monitor_read_only_timeout=GloMTH->get_variable_int((char *)"monitor_read_only_timeout");
mysql_thread___monitor_read_only_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_read_only_max_timeout_count");
mysql_thread___monitor_replication_lag_group_by_host=(bool)GloMTH->get_variable_int((char *)"monitor_replication_lag_group_by_host");
mysql_thread___monitor_replication_lag_interval=GloMTH->get_variable_int((char *)"monitor_replication_lag_interval");
mysql_thread___monitor_replication_lag_timeout=GloMTH->get_variable_int((char *)"monitor_replication_lag_timeout");
mysql_thread___monitor_replication_lag_count=GloMTH->get_variable_int((char *)"monitor_replication_lag_count");
@ -5191,6 +5201,7 @@ void MySQL_Threads_Handler::p_update_metrics() {
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_read_only_interval]->Set(this->variables.monitor_read_only_interval/1000.0);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_read_only_timeout]->Set(this->variables.monitor_read_only_timeout/1000.0);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_writer_is_also_reader]->Set(this->variables.monitor_writer_is_also_reader);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_group_by_host]->Set(this->variables.monitor_replication_lag_group_by_host);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_interval]->Set(this->variables.monitor_replication_lag_interval/1000.0);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_timeout]->Set(this->variables.monitor_replication_lag_timeout/1000.0);
this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_history]->Set(this->variables.monitor_history/1000.0);

Loading…
Cancel
Save