diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index 865514980..862a0fd33 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -55,12 +55,14 @@ class MySrvC { // MySQL Server Container unsigned long long bytes_sent; unsigned long long bytes_recv; bool shunned_automatic; + bool shunned_and_kill_all_connections; // if a serious failure is detected, this will cause all connections to die even if the server is just shunned //uint8_t charset; MySrvConnList *ConnectionsUsed; MySrvConnList *ConnectionsFree; MySrvC(char *, uint16_t, unsigned int, enum MySerStatus, unsigned int, unsigned int _max_connections, unsigned int _max_replication_lag); ~MySrvC(); void connect_error(int); + void shun_and_killall(); }; class MySrvList { // MySQL Server List @@ -153,6 +155,7 @@ class MySQL_HostGroups_Manager { void replication_lag_action(int, char*, unsigned int, int); void read_only_action(char *hostname, int port, int read_only); unsigned int get_servers_table_version(); + void shun_and_killall(char *hostname, int port); }; #endif /* __CLASS_MYSQL_HOSTGROUPS_MANAGER_H */ diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 65d5eee7c..4d1649033 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -119,6 +119,7 @@ MySrvC::MySrvC(char *add, uint16_t p, unsigned int _weight, enum MySerStatus _st time_last_detected_error=0; connect_ERR_at_time_last_detected_error=0; shunned_automatic=false; + shunned_and_kill_all_connections=false; // false to default //charset=_charset; myhgc=NULL; ConnectionsUsed=new MySrvConnList(this); @@ -154,6 +155,12 @@ void MySrvC::connect_error(int err_num) { } } +void MySrvC::shun_and_killall() { + status=MYSQL_SERVER_STATUS_SHUNNED; + shunned_automatic=true; + shunned_and_kill_all_connections=true; +} + MySrvC::~MySrvC() { if (address) free(address); delete ConnectionsUsed; @@ -620,12 +627,19 @@ MySrvC *MyHGC::get_random_MySrvC() { max_wait_sec = 1; } if ((t - mysrvc->time_last_detected_error) > max_wait_sec) { - mysrvc->status=MYSQL_SERVER_STATUS_ONLINE; - mysrvc->shunned_automatic=false; - mysrvc->connect_ERR_at_time_last_detected_error=0; - mysrvc->time_last_detected_error=0; - // if a server is taken back online, consider it immediately - sum+=mysrvc->weight; + if ( + (mysrvc->shunned_and_kill_all_connections==false) // it is safe to bring it back online + || + (mysrvc->shunned_and_kill_all_connections==true && mysrvc->ConnectionsUsed->conns->len==0 && mysrvc->ConnectionsFree->conns->len==0) // if shunned_and_kill_all_connections is set, ensure all connections are already dropped + ) { + mysrvc->status=MYSQL_SERVER_STATUS_ONLINE; + mysrvc->shunned_automatic=false; + mysrvc->shunned_and_kill_all_connections=false; + mysrvc->connect_ERR_at_time_last_detected_error=0; + mysrvc->time_last_detected_error=0; + // if a server is taken back online, consider it immediately + sum+=mysrvc->weight; + } } } } @@ -1024,3 +1038,42 @@ void MySQL_HostGroups_Manager::read_only_action(char *hostname, int port, int re free(query); } + + + +// shun_and_killall +// this function is called only from MySQL_Monitor::monitor_ping() +// it temporary disables a host that is not responding to pings, and mark the host in a way that when used the connection will be dropped +void MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) { + wrlock(); + MySrvC *mysrvc=NULL; + for (unsigned int i=0; ilen; i++) { + MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i); + unsigned int j; + unsigned int sum=0; + unsigned int l=myhgc->mysrvs->cnt(); + if (l) { + for (j=0; jmysrvs->idx(j); + if (mysrvc->port==port && strcmp(mysrvc->address,hostname)==0) { + switch (mysrvc->status) { + case MYSQL_SERVER_STATUS_SHUNNED: + if (mysrvc->shunned_automatic==false) { + break; + } + case MYSQL_SERVER_STATUS_ONLINE: + case MYSQL_SERVER_STATUS_OFFLINE_SOFT: + mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED; + mysrvc->shunned_automatic=true; + mysrvc->shunned_and_kill_all_connections=true; + mysrvc->ConnectionsFree->drop_all_connections(); + break; + default: + break; + } + } + } + } + } + wrunlock(); +} diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index dd25e1f84..d33e60dde 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -979,11 +979,60 @@ __end_monitor_ping_loop: free(sds); } - if (resultset) + if (resultset) { delete resultset; + resultset=NULL; + } event_base_free(libevent_base); + // now it is time to shun all problematic hosts + query=(char *)"SELECT DISTINCT a.hostname, a.port FROM mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE status!='OFFLINE_HARD' AND b.ping_error IS NOT NULL"; + proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); + admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); + if (error) { + proxy_error("Error on %s : %s\n", query, error); + } else { + // get all addresses and ports + int i=0; + int j=0; + char **addresses=(char **)malloc(resultset->rows_count * sizeof(char *)); + char **ports=(char **)malloc(resultset->rows_count * sizeof(char *)); + for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { + SQLite3_row *r=*it; + addresses[i]=strdup(r->fields[0]); + ports[i]=strdup(r->fields[1]); + i++; + } + if (resultset) { + delete resultset; + resultset=NULL; + } + char *new_query=(char *)"SELECT 1 FROM (SELECT hostname,port FROM monitor.mysql_server_ping_log WHERE hostname='%s' AND port='%s' ORDER BY time_start DESC LIMIT %d) a GROUP BY hostname,port HAVING COUNT(*)=%d"; + for (j=0;iexecute_statement(buff, &error , &cols , &affected_rows , &resultset); + free(buff); + if (!error) { + if (resultset) { + if (resultset->rows_count) { + // disable host + MyHGM->shun_and_killall(addresses[j],atoi(ports[j])); + } + delete resultset; + resultset=NULL; + } + } + } + while (i) { // now free all the addresses/ports + i--; + free(addresses[i]); + free(ports[i]); + } + free(addresses); + free(ports); + } __sleep_monitor_ping_loop: t2=monotonic_time(); diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 0afc87bf4..1bf3ffef4 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -1043,8 +1043,11 @@ handler_again: } else { if (rc==-1) { // the query failed - if (myconn->parent->status==MYSQL_SERVER_STATUS_OFFLINE_HARD) { - // the query failed because the server is offline hard + if ( + (myconn->parent->status==MYSQL_SERVER_STATUS_OFFLINE_HARD) // the query failed because the server is offline hard + || + (myconn->parent->status==MYSQL_SERVER_STATUS_SHUNNED && myconn->parent->shunned_automatic==true && myconn->parent->shunned_and_kill_all_connections==true) // the query failed because the server is shunned due to a serious failure + ) { if (mysql_thread___connect_timeout_server_max) { myds->max_connect_time=thread->curtime+mysql_thread___connect_timeout_server_max*1000; } diff --git a/lib/mysql_connection.cpp b/lib/mysql_connection.cpp index 2f4233632..c9260ad5a 100644 --- a/lib/mysql_connection.cpp +++ b/lib/mysql_connection.cpp @@ -796,7 +796,12 @@ int MySQL_Connection::async_query(short event, char *stmt, unsigned long length) PROXY_TRACE(); assert(mysql); assert(ret_mysql); - if (parent->status==MYSQL_SERVER_STATUS_OFFLINE_HARD) + if ( + (parent->status==MYSQL_SERVER_STATUS_OFFLINE_HARD) // the server is OFFLINE as specific by the user + || + (parent->status==MYSQL_SERVER_STATUS_SHUNNED && parent->shunned_automatic==true && parent->shunned_and_kill_all_connections==true) // the server is SHUNNED due to a serious issue + ) { + } return -1; switch (async_state_machine) { case ASYNC_QUERY_END: