Several improvements to 'group_replication_lag_action'

1. Introduced new global variable: 'monitor_groupreplication_max_transaction_behind_for_read_only',
   that modifies the behavior of 'group_replication_lag'.
2. Improved logic making use of 'MyHGC_find' instead of directly
   searching 'MyHostGroups' structure.
3. Improved 'group_replication_lag' documentation with new
   implementation updates.
4. Introduced changes to 'update_group_replication_set_writer'
   preserving writters placed in 'OFFLINE_SOFT' state.
pull/3533/head
Javier Jaramago Fernández 5 years ago
parent 0850c4d596
commit b929758a87

@ -563,19 +563,25 @@ class MySQL_HostGroups_Manager {
* to 'SHUNNED' those servers which replication lag is bigger than:
* - `mysql_thread___monitor_groupreplication_max_transactions_behind_count`
*
* @details The function automatically handles if the supplies server is a
* writer, and the 'writer_is_also_reader' flag is present in that
* hostgroup. In that case, it also sets as 'SHUNNED' the corresponding
* server that is present in the 'reader_hostgroup'.
* @details The function automatically handles the appropriate operation to
* perform on the supplied server, based on the supplied 'enable' flag and
* in 'monitor_groupreplication_max_transaction_behind_for_read_only'
* variable. In case the value of the variable is:
*
* * '0' or '2': It's required to search the writer hostgroup for
* finding the supplied server.
* * '1' or '2': It's required to search the reader hostgroup for
* finding the supplied server.
*
* @param _hid The writer hostgroup.
* @param address The server address.
* @param port The server port.
* @param lag_counts The computed lag for the sever.
* @param read_only Boolean specifying the read_only flag value of the server.
* @param enable Boolean specifying if the server needs to be disabled / enabled,
* 'true' for enabling the server if it's 'SHUNNED', 'false' for disabling it.
*/
void group_replication_lag_action(int _hid, char *address, unsigned int port, bool read_only, bool enable);
void group_replication_lag_action(int _hid, char *address, unsigned int port, int lag_counts, bool read_only, bool enable);
void update_galera_set_offline(char *_hostname, int _port, int _writer_hostgroup, char *error, bool soft=false);
void update_galera_set_read_only(char *_hostname, int _port, int _writer_hostgroup, char *error);

@ -410,6 +410,7 @@ class MySQL_Threads_Handler
int monitor_groupreplication_healthcheck_timeout;
int monitor_groupreplication_healthcheck_max_timeout_count;
int monitor_groupreplication_max_transactions_behind_count;
int monitor_groupreplication_max_transactions_behind_for_read_only;
int monitor_galera_healthcheck_interval;
int monitor_galera_healthcheck_timeout;
int monitor_galera_healthcheck_max_timeout_count;

@ -838,6 +838,7 @@ __thread int mysql_thread___monitor_groupreplication_healthcheck_interval;
__thread int mysql_thread___monitor_groupreplication_healthcheck_timeout;
__thread int mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count;
__thread int mysql_thread___monitor_groupreplication_max_transactions_behind_count;
__thread int mysql_thread___monitor_groupreplication_max_transaction_behind_for_read_only;
__thread int mysql_thread___monitor_galera_healthcheck_interval;
__thread int mysql_thread___monitor_galera_healthcheck_timeout;
__thread int mysql_thread___monitor_galera_healthcheck_max_timeout_count;
@ -988,6 +989,7 @@ extern __thread int mysql_thread___monitor_replication_lag_count;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_interval;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_timeout;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count;
extern __thread int mysql_thread___monitor_groupreplication_max_transaction_behind_for_read_only;
extern __thread int mysql_thread___monitor_groupreplication_max_transactions_behind_count;
extern __thread int mysql_thread___monitor_galera_healthcheck_interval;
extern __thread int mysql_thread___monitor_galera_healthcheck_timeout;

@ -3372,12 +3372,46 @@ __exit_replication_lag_action:
GloAdmin->mysql_servers_wrunlock();
}
/**
* @brief Finds the supplied server in the provided 'MyHGC' and sets the status
* either to 'MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG' if 'enable' is
* 'false' or 'MYSQL_SERVER_STATUS_ONLINE' if 'true'.
*
* @param myhgc The MySQL Hostgroup Container in which to perform the server
* search.
* @param address The server address.
* @param port The server port.
* @param lag_count The lag count, computed by 'get_lag_behind_count'.
* @param enable Boolean specifying if the server should be enabled or not.
*/
void lag_action_set_server_status(MyHGC* myhgc, char* address, int port, int lag_count, bool enable) {
for (int j=0; j<(int)myhgc->mysrvs->cnt(); j++) {
MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j);
if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) {
if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE && enable == false) {
proxy_warning(
"Shunning server %s:%d from HG %u with replication lag, count number: '%d'\n",
address, port, myhgc->hid, lag_count
);
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG;
} else {
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG && enable == true) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
proxy_warning(
"Re-enabling server %s:%d from HG %u with replication lag\n",
address, port, myhgc->hid, lag_count
);
}
}
}
}
}
void MySQL_HostGroups_Manager::group_replication_lag_action(
int _hid, char *address, unsigned int port, bool read_only, bool enable
int _hid, char *address, unsigned int port, int lag_counts, bool read_only, bool enable
) {
GloAdmin->mysql_servers_wrlock();
wrlock();
int i,j;
int reader_hostgroup = 0;
bool writer_is_also_reader = false;
@ -3408,49 +3442,27 @@ void MySQL_HostGroups_Manager::group_replication_lag_action(
reader_hostgroup = atoi(rhid_row->fields[0]);
writer_is_also_reader = atoi(rhid_row->fields[1]);
for (i=0; i<(int)MyHostGroups->len; i++) {
MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i);
{
MyHGC* myhgc = nullptr;
if (read_only) {
if (_hid >= 0 && reader_hostgroup != (int)myhgc->hid) {
continue;
}
} else {
// In case of 'writer_is_also_reader' the server can be present
// in both, the 'reader_hostgroup' and the 'writer_hostgroup'.
if (writer_is_also_reader) {
if (_hid >= 0 && _hid != (int)myhgc->hid && reader_hostgroup != (int)myhgc->hid) {
continue;
}
if (
mysql_thread___monitor_groupreplication_max_transaction_behind_for_read_only == 0 ||
mysql_thread___monitor_groupreplication_max_transaction_behind_for_read_only == 2 ||
enable
) {
if (read_only == false) {
myhgc = MyHGM->MyHGC_find(_hid);
lag_action_set_server_status(myhgc, address, port, lag_counts, enable);
}
}
int servers_found = 0;
for (j=0; j<(int)myhgc->mysrvs->cnt(); j++) {
MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j);
if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) {
// First server found
servers_found += 1;
if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE && enable == false) {
proxy_warning("Shunning server %s:%d from HG %u with replication lag, count number: '%d'\n", address, port, myhgc->hid, mysrvc->cur_replication_lag_count);
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG;
} else {
if (mysrvc->status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG && enable == true) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
proxy_warning("Re-enabling server %s:%d from HG %u with replication lag\n", address, port, myhgc->hid);
}
}
if (!writer_is_also_reader) {
goto __exit_replication_lag_action;
} else {
if (servers_found == 2) {
goto __exit_replication_lag_action;
}
}
}
if (
mysql_thread___monitor_groupreplication_max_transaction_behind_for_read_only == 1 ||
mysql_thread___monitor_groupreplication_max_transaction_behind_for_read_only == 2 ||
enable
) {
myhgc = MyHGM->MyHGC_find(reader_hostgroup);
lag_action_set_server_status(myhgc, address, port, lag_counts, enable);
}
}
@ -4749,7 +4761,7 @@ void MySQL_HostGroups_Manager::update_group_replication_set_writer(char *_hostna
char *query=NULL;
char *q=NULL;
char *error=NULL;
q=(char *)"SELECT hostgroup_id, status FROM mysql_servers JOIN mysql_group_replication_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup OR hostgroup_id=backup_writer_hostgroup OR hostgroup_id=offline_hostgroup WHERE hostname='%s' AND port=%d AND status<>3 AND status <>2";
q=(char *)"SELECT hostgroup_id, status FROM mysql_servers JOIN mysql_group_replication_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup OR hostgroup_id=backup_writer_hostgroup OR hostgroup_id=offline_hostgroup WHERE hostname='%s' AND port=%d AND status<>3";
query=(char *)malloc(strlen(q)+strlen(_hostname)+32);
sprintf(query,q,_hostname,_port);
mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset);
@ -4786,7 +4798,7 @@ void MySQL_HostGroups_Manager::update_group_replication_set_writer(char *_hostna
int hostgroup=atoi(r->fields[0]);
if (hostgroup==_writer_hostgroup) {
status = atoi(r->fields[1]);
if (status == 0) {
if (status == 0 || status == 2) {
found_writer=true;
}
}
@ -4828,10 +4840,10 @@ void MySQL_HostGroups_Manager::update_group_replication_set_writer(char *_hostna
//query=(char *)malloc(strlen(q)+strlen(_hostname)+64);
sprintf(query,q,_hostname,_port,_writer_hostgroup);
mydb->execute(query);
// NOTE: The status should be preserved in case of being SHUNNED
q=(char *)"UPDATE mysql_servers_incoming SET status=%d WHERE hostname='%s' AND port=%d AND hostgroup_id=%d";
//query=(char *)malloc(strlen(q)+strlen(_hostname)+64);
sprintf(query,q,status,_hostname,_port,_writer_hostgroup);
// NOTE: In case of the server being 'OFFLINE_SOFT' we preserve this status. Otherwise
// we set the server as 'ONLINE'.
sprintf(query, q, (status == 2 ? 2 : 0 ), _hostname, _port, _writer_hostgroup);
mydb->execute(query);
//free(query);
if (writer_is_also_reader && read_HG>=0) {

@ -1592,7 +1592,9 @@ __exit_monitor_group_replication_thread:
if (lag_counts >= mysql_thread___monitor_groupreplication_max_transactions_behind_count) {
enable = false;
}
MyHGM->group_replication_lag_action(mmsd->writer_hostgroup, mmsd->hostname, mmsd->port, read_only, enable);
MyHGM->group_replication_lag_action(
mmsd->writer_hostgroup, mmsd->hostname, mmsd->port, lag_counts, read_only, enable
);
}
}

@ -456,6 +456,7 @@ static char * mysql_thread_variables_names[]= {
(char *)"monitor_groupreplication_healthcheck_timeout",
(char *)"monitor_groupreplication_healthcheck_max_timeout_count",
(char *)"monitor_groupreplication_max_transactions_behind_count",
(char *)"monitor_groupreplication_max_transactions_behind_for_read_only",
(char *)"monitor_galera_healthcheck_interval",
(char *)"monitor_galera_healthcheck_timeout",
(char *)"monitor_galera_healthcheck_max_timeout_count",
@ -1042,6 +1043,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() {
variables.monitor_groupreplication_healthcheck_timeout=800;
variables.monitor_groupreplication_healthcheck_max_timeout_count=3;
variables.monitor_groupreplication_max_transactions_behind_count=3;
variables.monitor_groupreplication_max_transactions_behind_for_read_only=1;
variables.monitor_galera_healthcheck_interval=5000;
variables.monitor_galera_healthcheck_timeout=800;
variables.monitor_galera_healthcheck_max_timeout_count=3;
@ -2030,6 +2032,7 @@ char ** MySQL_Threads_Handler::get_variables_list() {
VariablesPointers_int["monitor_groupreplication_healthcheck_timeout"] = make_tuple(&variables.monitor_groupreplication_healthcheck_timeout, 100, 600*1000, false);
VariablesPointers_int["monitor_groupreplication_healthcheck_max_timeout_count"] = make_tuple(&variables.monitor_groupreplication_healthcheck_max_timeout_count, 1, 10, false);
VariablesPointers_int["monitor_groupreplication_max_transactions_behind_count"] = make_tuple(&variables.monitor_groupreplication_max_transactions_behind_count, 1, 10, false);
VariablesPointers_int["monitor_groupreplication_max_transactions_behind_for_read_only"] = make_tuple(&variables.monitor_groupreplication_max_transactions_behind_for_read_only, 0, 2, false);
VariablesPointers_int["monitor_galera_healthcheck_interval"] = make_tuple(&variables.monitor_galera_healthcheck_interval, 50, 7*24*3600*1000, false);
VariablesPointers_int["monitor_galera_healthcheck_timeout"] = make_tuple(&variables.monitor_galera_healthcheck_timeout, 50, 600*1000, false);
@ -3565,6 +3568,7 @@ void MySQL_Thread::refresh_variables() {
mysql_thread___monitor_groupreplication_healthcheck_timeout=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_timeout");
mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_max_timeout_count");
mysql_thread___monitor_groupreplication_max_transactions_behind_count=GloMTH->get_variable_int((char *)"monitor_groupreplication_max_transactions_behind_count");
mysql_thread___monitor_groupreplication_max_transaction_behind_for_read_only=GloMTH->get_variable_int((char *)"monitor_groupreplication_max_transactions_behind_for_read_only");
mysql_thread___monitor_galera_healthcheck_interval=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_interval");
mysql_thread___monitor_galera_healthcheck_timeout=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_timeout");
mysql_thread___monitor_galera_healthcheck_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_max_timeout_count");

Loading…
Cancel
Save