diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index 01dc8fa55..532f9450b 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -543,7 +543,7 @@ class MySQL_HostGroups_Manager { void read_only_action(char *hostname, int port, int read_only); unsigned int get_servers_table_version(); void wait_servers_table_version(unsigned, unsigned); - void shun_and_killall(char *hostname, int port); + bool shun_and_killall(char *hostname, int port); void set_server_current_latency_us(char *hostname, int port, unsigned int _current_latency_us); unsigned long long Get_Memory_Stats(); diff --git a/include/MySQL_Monitor.hpp b/include/MySQL_Monitor.hpp index 817301579..1bf668927 100644 --- a/include/MySQL_Monitor.hpp +++ b/include/MySQL_Monitor.hpp @@ -188,6 +188,7 @@ class MySQL_Monitor { void populate_monitor_mysql_server_group_replication_log(); void populate_monitor_mysql_server_galera_log(); char * galera_find_last_node(int); + bool server_responds_to_ping(char *address, int port); }; #endif /* __CLASS_MYSQL_MONITOR_H */ diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 3a3326286..29fa6f38c 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -2925,11 +2925,14 @@ __exit_read_only_action: // shun_and_killall // this function is called only from MySQL_Monitor::monitor_ping() // it temporary disables a host that is not responding to pings, and mark the host in a way that when used the connection will be dropped -void MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) { +// return true if the status was changed +bool MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) { + time_t t = time(NULL); + bool ret = false; wrlock(); MySrvC *mysrvc=NULL; - for (unsigned int i=0; ilen; i++) { - MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i); + for (unsigned int i=0; ilen; i++) { + MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i); unsigned int j; unsigned int l=myhgc->mysrvs->cnt(); if (l) { @@ -2942,6 +2945,9 @@ void MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) { break; } case MYSQL_SERVER_STATUS_ONLINE: + if (mysrvc->status == MYSQL_SERVER_STATUS_ONLINE) { + ret = true; + } mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED; case MYSQL_SERVER_STATUS_OFFLINE_SOFT: mysrvc->shunned_automatic=true; @@ -2951,11 +2957,13 @@ void MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) { default: break; } + mysrvc->time_last_detected_error = t; } } } } wrunlock(); + return ret; } // set_server_current_latency_us diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 1b97d6e2a..cb74eafbb 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -1579,12 +1579,16 @@ void * MySQL_Monitor::monitor_connect() { } for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; - MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2])); - mmsd->mondb=monitordb; - WorkItem* item; - item=new WorkItem(mmsd,monitor_connect_thread); - GloMyMon->queue.add(item); - usleep(us); + bool rc_ping = true; + rc_ping = server_responds_to_ping(r->fields[0],atoi(r->fields[1])); + if (rc_ping) { // only if server is responding to pings + MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2])); + mmsd->mondb=monitordb; + WorkItem* item; + item=new WorkItem(mmsd,monitor_connect_thread); + GloMyMon->queue.add(item); + usleep(us); + } if (GloMyMon->shutdown) return NULL; } } @@ -1757,8 +1761,11 @@ __end_monitor_ping_loop: if (resultset) { if (resultset->rows_count) { // disable host - proxy_error("Server %s:%s missed %d heartbeats, shunning it and killing all the connections\n", addresses[j], ports[j], max_failures); - MyHGM->shun_and_killall(addresses[j],atoi(ports[j])); + bool rc_shun = false; + rc_shun = MyHGM->shun_and_killall(addresses[j],atoi(ports[j])); + if (rc_shun) { + proxy_error("Server %s:%s missed %d heartbeats, shunning it and killing all the connections. Disabling other checks until the node comes back online.\n", addresses[j], ports[j], max_failures); + } } delete resultset; resultset=NULL; @@ -1856,6 +1863,39 @@ __sleep_monitor_ping_loop: return NULL; } + + +bool MySQL_Monitor::server_responds_to_ping(char *address, int port) { + bool ret = true; // default + char *error=NULL; + int cols=0; + int affected_rows=0; + SQLite3_result *resultset=NULL; + char *new_query=NULL; + new_query=(char *)"SELECT 1 FROM (SELECT hostname,port,ping_error FROM mysql_server_ping_log WHERE hostname='%s' AND port=%d ORDER BY time_start_us DESC LIMIT %d) a WHERE ping_error IS NOT NULL AND ping_error NOT LIKE 'Access denied for user%%' GROUP BY hostname,port HAVING COUNT(*)=%d"; + char *buff=(char *)malloc(strlen(new_query)+strlen(address)+32); + int max_failures = mysql_thread___monitor_ping_max_failures; + sprintf(buff,new_query,address,port,max_failures,max_failures); + monitordb->execute_statement(buff, &error , &cols , &affected_rows , &resultset); + if (!error) { + if (resultset) { + if (resultset->rows_count) { + ret = false; + } + delete resultset; + resultset=NULL; + } + } else { + proxy_error("Error on %s : %s\n", buff, error); + } + if (resultset) { + delete resultset; + resultset=NULL; + } + free(buff); + return ret; +} + void * MySQL_Monitor::monitor_read_only() { // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; @@ -1906,22 +1946,26 @@ void * MySQL_Monitor::monitor_read_only() { } for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; - MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2])); - mmsd->task_id = MON_READ_ONLY; // default - if (r->fields[3]) { - if (strcasecmp(r->fields[3],(char *)"innodb_read_only")==0) { - mmsd->task_id = MON_INNODB_READ_ONLY; - } else { - if (strcasecmp(r->fields[3],(char *)"super_read_only")==0) { - mmsd->task_id = MON_SUPER_READ_ONLY; + bool rc_ping = true; + rc_ping = server_responds_to_ping(r->fields[0],atoi(r->fields[1])); + if (rc_ping) { // only if server is responding to pings + MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2])); + mmsd->task_id = MON_READ_ONLY; // default + if (r->fields[3]) { + if (strcasecmp(r->fields[3],(char *)"innodb_read_only")==0) { + mmsd->task_id = MON_INNODB_READ_ONLY; + } else { + if (strcasecmp(r->fields[3],(char *)"super_read_only")==0) { + mmsd->task_id = MON_SUPER_READ_ONLY; + } } } + mmsd->mondb=monitordb; + WorkItem* item; + item=new WorkItem(mmsd,monitor_read_only_thread); + GloMyMon->queue.add(item); + usleep(us); } - mmsd->mondb=monitordb; - WorkItem* item; - item=new WorkItem(mmsd,monitor_read_only_thread); - GloMyMon->queue.add(item); - usleep(us); if (GloMyMon->shutdown) return NULL; } } @@ -2240,12 +2284,16 @@ void * MySQL_Monitor::monitor_replication_lag() { } for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; - MySQL_Monitor_State_Data *mmsd = new MySQL_Monitor_State_Data(r->fields[1], atoi(r->fields[2]), NULL, atoi(r->fields[4]), atoi(r->fields[0])); - mmsd->mondb=monitordb; - WorkItem* item; - item=new WorkItem(mmsd,monitor_replication_lag_thread); - GloMyMon->queue.add(item); - usleep(us); + bool rc_ping = true; + rc_ping = server_responds_to_ping(r->fields[0],atoi(r->fields[1])); + if (rc_ping) { // only if server is responding to pings + MySQL_Monitor_State_Data *mmsd = new MySQL_Monitor_State_Data(r->fields[1], atoi(r->fields[2]), NULL, atoi(r->fields[4]), atoi(r->fields[0])); + mmsd->mondb=monitordb; + WorkItem* item; + item=new WorkItem(mmsd,monitor_replication_lag_thread); + GloMyMon->queue.add(item); + usleep(us); + } if (GloMyMon->shutdown) return NULL; } }