From dd71fcd81fe20b3549fefdb5db606d8df0da71dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Wed, 25 Aug 2021 23:56:41 +0200 Subject: [PATCH] Shun "soft" and "hard" in group replication lag When shunning a node due to replication lag in a group replication cluster, we first shun the node as MYSQL_SERVER_STATUS_SHUNNED , then we shun it as MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG . In this way we prevent (for a short time) to kill connections on that backend. This backing off from that server can give the server enough time to sync up. See discussion in comments in https://github.com/sysown/proxysql/pull/3533 --- include/MySQL_HostGroups_Manager.h | 3 ++- lib/MySQL_HostGroups_Manager.cpp | 34 +++++++++++++++++------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index 26da94cba..cc88dfcc8 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -417,6 +417,8 @@ class MySQL_HostGroups_Manager { void p_update_connection_pool_update_counter(std::string& endpoint_id, std::map labels, std::map& m_map, unsigned long long value, p_hg_dyn_counter::metric idx); void p_update_connection_pool_update_gauge(std::string& endpoint_id, std::map labels, std::map& m_map, unsigned long long value, p_hg_dyn_gauge::metric idx); + void group_replication_lag_action_set_server_status(MyHGC* myhgc, char* address, int port, int lag_count, bool enable); + public: std::mutex galera_set_writer_mutex; pthread_rwlock_t gtid_rwlock; @@ -582,7 +584,6 @@ class MySQL_HostGroups_Manager { * 'true' for enabling the server if it's 'SHUNNED', 'false' for disabling it. */ void group_replication_lag_action(int _hid, char *address, unsigned int port, int lag_counts, bool read_only, bool enable); - void update_galera_set_offline(char *_hostname, int _port, int _writer_hostgroup, char *error, bool soft=false); void update_galera_set_read_only(char *_hostname, int _port, int _writer_hostgroup, char *error); void update_galera_set_writer(char *_hostname, int _port, int _writer_hostgroup); diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 9bd767086..28ea741af 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -3386,25 +3386,29 @@ __exit_replication_lag_action: * @param lag_count The lag count, computed by 'get_lag_behind_count'. * @param enable Boolean specifying if the server should be enabled or not. */ -void lag_action_set_server_status(MyHGC* myhgc, char* address, int port, int lag_count, bool enable) { +void MySQL_HostGroups_Manager::group_replication_lag_action_set_server_status(MyHGC* myhgc, char* address, int port, int lag_count, bool enable) { if (myhgc == NULL || address == NULL) return; for (int j=0; j<(int)myhgc->mysrvs->cnt(); j++) { MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j); if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) { - if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE && enable == false) { - proxy_warning( - "Shunning server %s:%d from HG %u with replication lag, count number: '%d'\n", - address, port, myhgc->hid, lag_count - ); - mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG; - } else { - if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG && enable == true) { + + if (enable == true) { + if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG || mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED) { mysrvc->status=MYSQL_SERVER_STATUS_ONLINE; - proxy_warning( - "Re-enabling server %s:%d from HG %u with replication lag, count number: '%d'\n", - address, port, myhgc->hid, lag_count - ); + proxy_info("Re-enabling server %u:%s:%d from replication lag\n", myhgc->hid, address, port); + } + } else { + if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) { + proxy_warning("Shunning 'soft' server %u:%s:%d with replication lag, count number: %d\n", myhgc->hid, address, port, lag_count); + mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED; + } else { + if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED) { + if (lag_count >= ( mysql_thread___monitor_groupreplication_max_transactions_behind_count * 2 )) { + proxy_warning("Shunning 'hard' server %u:%s:%d with replication lag, count number: %d\n", myhgc->hid, address, port, lag_count); + mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG; + } + } } } } @@ -3456,7 +3460,7 @@ void MySQL_HostGroups_Manager::group_replication_lag_action( ) { if (read_only == false) { myhgc = MyHGM->MyHGC_find(_hid); - lag_action_set_server_status(myhgc, address, port, lag_counts, enable); + group_replication_lag_action_set_server_status(myhgc, address, port, lag_counts, enable); } } @@ -3466,7 +3470,7 @@ void MySQL_HostGroups_Manager::group_replication_lag_action( enable ) { myhgc = MyHGM->MyHGC_find(reader_hostgroup); - lag_action_set_server_status(myhgc, address, port, lag_counts, enable); + group_replication_lag_action_set_server_status(myhgc, address, port, lag_counts, enable); } }