From 4bfe3e58784cdaf6c5443e70f0ba9dfed2606736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 5 May 2022 12:06:14 +0200 Subject: [PATCH] Added mysql-monitor_replication_lag_group_by_host If variable mysql-monitor_replication_lag_group_by_host=false: (default) Monitor will perform 1 replication lag check per server per hostgroup. If variable mysql-monitor_replication_lag_group_by_host=true: Monitor will perform 1 replication lag check per server. This variable need to be set only in setups in which the same server is configured in many hostgroups, thus reducing the number of checks --- include/MySQL_HostGroups_Manager.h | 1 + include/MySQL_Thread.h | 2 + include/proxysql_structs.h | 2 + lib/MySQL_HostGroups_Manager.cpp | 97 +++++++++++++++++------------- lib/MySQL_Monitor.cpp | 7 ++- lib/MySQL_Thread.cpp | 11 ++++ 6 files changed, 76 insertions(+), 44 deletions(-) diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index 4c807c9d8..e747b01ef 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -578,6 +578,7 @@ class MySQL_HostGroups_Manager { void push_MyConn_to_pool_array(MySQL_Connection **, unsigned int); void destroy_MyConn_from_pool(MySQL_Connection *, bool _lock=true); + void replication_lag_action_inner(MyHGC *, char*, unsigned int, int); void replication_lag_action(int, char*, unsigned int, int); void read_only_action(char *hostname, int port, int read_only); unsigned int get_servers_table_version(); diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index 74979c576..f7a6b9b6a 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -348,6 +348,7 @@ struct p_th_gauge { mysql_monitor_read_only_interval, mysql_monitor_read_only_timeout, mysql_monitor_writer_is_also_reader, + mysql_monitor_replication_lag_group_by_host, mysql_monitor_replication_lag_interval, mysql_monitor_replication_lag_timeout, mysql_monitor_history, @@ -434,6 +435,7 @@ class MySQL_Threads_Handler //! ProxySQL session wait timeout. Unit: 'ms'. bool monitor_wait_timeout; bool monitor_writer_is_also_reader; + bool monitor_replication_lag_group_by_host; //! How frequently a replication lag check is performed. Unit: 'ms'. int monitor_replication_lag_interval; //! Read only check timeout. Unit: 'ms'. diff --git a/include/proxysql_structs.h b/include/proxysql_structs.h index 670a88c7a..dc7f7efd9 100644 --- a/include/proxysql_structs.h +++ b/include/proxysql_structs.h @@ -875,6 +875,7 @@ __thread int mysql_thread___monitor_read_only_timeout; __thread int mysql_thread___monitor_read_only_max_timeout_count; __thread bool mysql_thread___monitor_wait_timeout; __thread bool mysql_thread___monitor_writer_is_also_reader; +__thread int mysql_thread___monitor_replication_lag_group_by_host; __thread int mysql_thread___monitor_replication_lag_interval; __thread int mysql_thread___monitor_replication_lag_timeout; __thread int mysql_thread___monitor_replication_lag_count; @@ -1035,6 +1036,7 @@ extern __thread int mysql_thread___monitor_read_only_timeout; extern __thread int mysql_thread___monitor_read_only_max_timeout_count; extern __thread bool mysql_thread___monitor_wait_timeout; extern __thread bool mysql_thread___monitor_writer_is_also_reader; +extern __thread bool mysql_thread___monitor_replication_lag_group_by_host; extern __thread int mysql_thread___monitor_replication_lag_interval; extern __thread int mysql_thread___monitor_replication_lag_timeout; extern __thread int mysql_thread___monitor_replication_lag_count; diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 4bfa065d9..e7ec97b2c 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -3439,58 +3439,69 @@ void MySQL_HostGroups_Manager::add(MySrvC *mysrvc, unsigned int _hid) { myhgc->mysrvs->add(mysrvc); } -void MySQL_HostGroups_Manager::replication_lag_action(int _hid, char *address, unsigned int port, int current_replication_lag) { - GloAdmin->mysql_servers_wrlock(); - wrlock(); +void MySQL_HostGroups_Manager::replication_lag_action_inner(MyHGC *myhgc, char *address, unsigned int port, int current_replication_lag) { int j; - MyHGC *myhgc = MyHGC_find(_hid); - if (myhgc) { - for (j=0; j<(int)myhgc->mysrvs->cnt(); j++) { - MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j); - if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) { - if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) { - if ( -// (current_replication_lag==-1 ) -// || - (current_replication_lag>=0 && ((unsigned int)current_replication_lag > mysrvc->max_replication_lag)) - ) { - // always increase the counter - mysrvc->cur_replication_lag_count += 1; - if (mysrvc->cur_replication_lag_count >= mysql_thread___monitor_replication_lag_count) { - proxy_warning("Shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d'\n", address, port, myhgc->hid, current_replication_lag, mysrvc->cur_replication_lag_count); - mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG; - } else { - proxy_info( - "Not shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d' < replication_lag_count: '%d'\n", - address, - port, - myhgc->hid, - current_replication_lag, - mysrvc->cur_replication_lag_count, - mysql_thread___monitor_replication_lag_count - ); - } + for (j=0; j<(int)myhgc->mysrvs->cnt(); j++) { + MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j); + if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) { + if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) { + if ( +// (current_replication_lag==-1 ) +// || + (current_replication_lag>=0 && ((unsigned int)current_replication_lag > mysrvc->max_replication_lag)) + ) { + // always increase the counter + mysrvc->cur_replication_lag_count += 1; + if (mysrvc->cur_replication_lag_count >= mysql_thread___monitor_replication_lag_count) { + proxy_warning("Shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d'\n", address, port, myhgc->hid, current_replication_lag, mysrvc->cur_replication_lag_count); + mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG; } else { - mysrvc->cur_replication_lag_count = 0; + proxy_info( + "Not shunning server %s:%d from HG %u with replication lag of %d second, count number: '%d' < replication_lag_count: '%d'\n", + address, + port, + myhgc->hid, + current_replication_lag, + mysrvc->cur_replication_lag_count, + mysql_thread___monitor_replication_lag_count + ); } } else { - if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) { - if ( - (current_replication_lag>=0 && ((unsigned int)current_replication_lag <= mysrvc->max_replication_lag)) - || - (current_replication_lag==-2) // see issue 959 - ) { - mysrvc->status=MYSQL_SERVER_STATUS_ONLINE; - proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %d second\n", address, port, myhgc->hid, current_replication_lag); - mysrvc->cur_replication_lag_count = 0; - } + mysrvc->cur_replication_lag_count = 0; + } + } else { + if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) { + if ( + (current_replication_lag>=0 && ((unsigned int)current_replication_lag <= mysrvc->max_replication_lag)) + || + (current_replication_lag==-2) // see issue 959 + ) { + mysrvc->status=MYSQL_SERVER_STATUS_ONLINE; + proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %d second\n", address, port, myhgc->hid, current_replication_lag); + mysrvc->cur_replication_lag_count = 0; } } - goto __exit_replication_lag_action; } + return; + } + } +} + +void MySQL_HostGroups_Manager::replication_lag_action(int _hid, char *address, unsigned int port, int current_replication_lag) { + GloAdmin->mysql_servers_wrlock(); + wrlock(); + if (mysql_thread___monitor_replication_lag_group_by_host == false) { + // legacy check. 1 check per server per hostgroup + MyHGC *myhgc = MyHGC_find(_hid); + replication_lag_action_inner(myhgc,address,port,current_replication_lag); + } else { + // only 1 check per server, no matter the hostgroup + // all hostgroups must be searched + for (unsigned int i=0; ilen; i++) { + MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i); + replication_lag_action_inner(myhgc,address,port,current_replication_lag); } } -__exit_replication_lag_action: wrunlock(); GloAdmin->mysql_servers_wrunlock(); } diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 9d9c55504..02fdd6d26 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -3233,7 +3233,12 @@ void * MySQL_Monitor::monitor_replication_lag() { char *error=NULL; SQLite3_result *resultset=NULL; // add support for SSL - char *query=(char *)"SELECT hostgroup_id, hostname, port, max_replication_lag, use_ssl FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3)"; + char *query= NULL; + if (mysql_thread___monitor_replication_lag_group_by_host==true) { + query = (char *)"SELECT MIN(hostgroup_id), hostname, port, MIN(max_replication_lag), MAX(use_ssl) FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3) GROUP BY hostname, port"; + } else { + query=(char *)"SELECT hostgroup_id, hostname, port, max_replication_lag, use_ssl FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3)"; + } t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index b60a64622..0179e4284 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -452,6 +452,7 @@ static char * mysql_thread_variables_names[]= { (char *)"monitor_read_only_interval", (char *)"monitor_read_only_timeout", (char *)"monitor_read_only_max_timeout_count", + (char *)"monitor_replication_lag_group_by_host", (char *)"monitor_replication_lag_interval", (char *)"monitor_replication_lag_timeout", (char *)"monitor_replication_lag_count", @@ -990,6 +991,12 @@ th_metrics_map = std::make_tuple( "Encodes different behaviors for nodes depending on their 'READ_ONLY' flag value.", metric_tags {} ), + std::make_tuple ( + p_th_gauge::mysql_monitor_replication_lag_group_by_host, + "proxysql_monitor_replication_lag_group_by_host", + "Encodes different replication lag check if the same server is in multiple hostgroups.", + metric_tags {} + ), std::make_tuple ( p_th_gauge::mysql_monitor_replication_lag_interval, "proxysql_mysql_monitor_replication_lag_interval_seconds", @@ -1058,6 +1065,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() { variables.monitor_read_only_interval=1000; variables.monitor_read_only_timeout=800; variables.monitor_read_only_max_timeout_count=3; + variables.monitor_replication_lag_group_by_host=false; variables.monitor_replication_lag_interval=10000; variables.monitor_replication_lag_timeout=1000; variables.monitor_replication_lag_count=1; @@ -2082,6 +2090,7 @@ char ** MySQL_Threads_Handler::get_variables_list() { VariablesPointers_bool["log_mysql_warnings_enabled"] = make_tuple(&variables.log_mysql_warnings_enabled, false); VariablesPointers_bool["log_unhealthy_connections"] = make_tuple(&variables.log_unhealthy_connections, false); VariablesPointers_bool["monitor_enabled"] = make_tuple(&variables.monitor_enabled, false); + VariablesPointers_bool["monitor_replication_lag_group_by_host"] = make_tuple(&variables.monitor_replication_lag_group_by_host, false); VariablesPointers_bool["monitor_wait_timeout"] = make_tuple(&variables.monitor_wait_timeout, false); VariablesPointers_bool["monitor_writer_is_also_reader"] = make_tuple(&variables.monitor_writer_is_also_reader, false); VariablesPointers_bool["multiplexing"] = make_tuple(&variables.multiplexing, false); @@ -3935,6 +3944,7 @@ void MySQL_Thread::refresh_variables() { mysql_thread___monitor_read_only_interval=GloMTH->get_variable_int((char *)"monitor_read_only_interval"); mysql_thread___monitor_read_only_timeout=GloMTH->get_variable_int((char *)"monitor_read_only_timeout"); mysql_thread___monitor_read_only_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_read_only_max_timeout_count"); + mysql_thread___monitor_replication_lag_group_by_host=(bool)GloMTH->get_variable_int((char *)"monitor_replication_lag_group_by_host"); mysql_thread___monitor_replication_lag_interval=GloMTH->get_variable_int((char *)"monitor_replication_lag_interval"); mysql_thread___monitor_replication_lag_timeout=GloMTH->get_variable_int((char *)"monitor_replication_lag_timeout"); mysql_thread___monitor_replication_lag_count=GloMTH->get_variable_int((char *)"monitor_replication_lag_count"); @@ -5188,6 +5198,7 @@ void MySQL_Threads_Handler::p_update_metrics() { this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_read_only_interval]->Set(this->variables.monitor_read_only_interval/1000.0); this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_read_only_timeout]->Set(this->variables.monitor_read_only_timeout/1000.0); this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_writer_is_also_reader]->Set(this->variables.monitor_writer_is_also_reader); + this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_group_by_host]->Set(this->variables.monitor_replication_lag_group_by_host); this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_interval]->Set(this->variables.monitor_replication_lag_interval/1000.0); this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_replication_lag_timeout]->Set(this->variables.monitor_replication_lag_timeout/1000.0); this->status_variables.p_gauge_array[p_th_gauge::mysql_monitor_history]->Set(this->variables.monitor_history/1000.0);