First draft for bugs #543

When a server isn't responding to ping, it is flagged as shunned and all the connections need to be dropped.
All the connections must be dropped before the server is brought back online
pull/642/head
René Cannaò 10 years ago
parent 2e154cf42f
commit fc9ab855da

@ -55,12 +55,14 @@ class MySrvC { // MySQL Server Container
unsigned long long bytes_sent;
unsigned long long bytes_recv;
bool shunned_automatic;
bool shunned_and_kill_all_connections; // if a serious failure is detected, this will cause all connections to die even if the server is just shunned
//uint8_t charset;
MySrvConnList *ConnectionsUsed;
MySrvConnList *ConnectionsFree;
MySrvC(char *, uint16_t, unsigned int, enum MySerStatus, unsigned int, unsigned int _max_connections, unsigned int _max_replication_lag);
~MySrvC();
void connect_error(int);
void shun_and_killall();
};
class MySrvList { // MySQL Server List
@ -153,6 +155,7 @@ class MySQL_HostGroups_Manager {
void replication_lag_action(int, char*, unsigned int, int);
void read_only_action(char *hostname, int port, int read_only);
unsigned int get_servers_table_version();
void shun_and_killall(char *hostname, int port);
};
#endif /* __CLASS_MYSQL_HOSTGROUPS_MANAGER_H */

@ -119,6 +119,7 @@ MySrvC::MySrvC(char *add, uint16_t p, unsigned int _weight, enum MySerStatus _st
time_last_detected_error=0;
connect_ERR_at_time_last_detected_error=0;
shunned_automatic=false;
shunned_and_kill_all_connections=false; // false to default
//charset=_charset;
myhgc=NULL;
ConnectionsUsed=new MySrvConnList(this);
@ -154,6 +155,12 @@ void MySrvC::connect_error(int err_num) {
}
}
void MySrvC::shun_and_killall() {
status=MYSQL_SERVER_STATUS_SHUNNED;
shunned_automatic=true;
shunned_and_kill_all_connections=true;
}
MySrvC::~MySrvC() {
if (address) free(address);
delete ConnectionsUsed;
@ -620,12 +627,19 @@ MySrvC *MyHGC::get_random_MySrvC() {
max_wait_sec = 1;
}
if ((t - mysrvc->time_last_detected_error) > max_wait_sec) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
mysrvc->shunned_automatic=false;
mysrvc->connect_ERR_at_time_last_detected_error=0;
mysrvc->time_last_detected_error=0;
// if a server is taken back online, consider it immediately
sum+=mysrvc->weight;
if (
(mysrvc->shunned_and_kill_all_connections==false) // it is safe to bring it back online
||
(mysrvc->shunned_and_kill_all_connections==true && mysrvc->ConnectionsUsed->conns->len==0 && mysrvc->ConnectionsFree->conns->len==0) // if shunned_and_kill_all_connections is set, ensure all connections are already dropped
) {
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
mysrvc->shunned_automatic=false;
mysrvc->shunned_and_kill_all_connections=false;
mysrvc->connect_ERR_at_time_last_detected_error=0;
mysrvc->time_last_detected_error=0;
// if a server is taken back online, consider it immediately
sum+=mysrvc->weight;
}
}
}
}
@ -1024,3 +1038,42 @@ void MySQL_HostGroups_Manager::read_only_action(char *hostname, int port, int re
free(query);
}
// shun_and_killall
// this function is called only from MySQL_Monitor::monitor_ping()
// it temporary disables a host that is not responding to pings, and mark the host in a way that when used the connection will be dropped
void MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) {
wrlock();
MySrvC *mysrvc=NULL;
for (unsigned int i=0; i<MyHostGroups->len; i++) {
MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i);
unsigned int j;
unsigned int sum=0;
unsigned int l=myhgc->mysrvs->cnt();
if (l) {
for (j=0; j<l; j++) {
mysrvc=myhgc->mysrvs->idx(j);
if (mysrvc->port==port && strcmp(mysrvc->address,hostname)==0) {
switch (mysrvc->status) {
case MYSQL_SERVER_STATUS_SHUNNED:
if (mysrvc->shunned_automatic==false) {
break;
}
case MYSQL_SERVER_STATUS_ONLINE:
case MYSQL_SERVER_STATUS_OFFLINE_SOFT:
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED;
mysrvc->shunned_automatic=true;
mysrvc->shunned_and_kill_all_connections=true;
mysrvc->ConnectionsFree->drop_all_connections();
break;
default:
break;
}
}
}
}
}
wrunlock();
}

@ -979,11 +979,60 @@ __end_monitor_ping_loop:
free(sds);
}
if (resultset)
if (resultset) {
delete resultset;
resultset=NULL;
}
event_base_free(libevent_base);
// now it is time to shun all problematic hosts
query=(char *)"SELECT DISTINCT a.hostname, a.port FROM mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE status!='OFFLINE_HARD' AND b.ping_error IS NOT NULL";
proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query);
admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset);
if (error) {
proxy_error("Error on %s : %s\n", query, error);
} else {
// get all addresses and ports
int i=0;
int j=0;
char **addresses=(char **)malloc(resultset->rows_count * sizeof(char *));
char **ports=(char **)malloc(resultset->rows_count * sizeof(char *));
for (std::vector<SQLite3_row *>::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) {
SQLite3_row *r=*it;
addresses[i]=strdup(r->fields[0]);
ports[i]=strdup(r->fields[1]);
i++;
}
if (resultset) {
delete resultset;
resultset=NULL;
}
char *new_query=(char *)"SELECT 1 FROM (SELECT hostname,port FROM monitor.mysql_server_ping_log WHERE hostname='%s' AND port='%s' ORDER BY time_start DESC LIMIT %d) a GROUP BY hostname,port HAVING COUNT(*)=%d";
for (j=0;i<i;j++) {
char *buff=(char *)malloc(strlen(new_query)+strlen(addresses[j])+strlen(ports[j])+16);
sprintf(buff,new_query,addresses[j],ports[j],3,3);
monitordb->execute_statement(buff, &error , &cols , &affected_rows , &resultset);
free(buff);
if (!error) {
if (resultset) {
if (resultset->rows_count) {
// disable host
MyHGM->shun_and_killall(addresses[j],atoi(ports[j]));
}
delete resultset;
resultset=NULL;
}
}
}
while (i) { // now free all the addresses/ports
i--;
free(addresses[i]);
free(ports[i]);
}
free(addresses);
free(ports);
}
__sleep_monitor_ping_loop:
t2=monotonic_time();

@ -1043,8 +1043,11 @@ handler_again:
} else {
if (rc==-1) {
// the query failed
if (myconn->parent->status==MYSQL_SERVER_STATUS_OFFLINE_HARD) {
// the query failed because the server is offline hard
if (
(myconn->parent->status==MYSQL_SERVER_STATUS_OFFLINE_HARD) // the query failed because the server is offline hard
||
(myconn->parent->status==MYSQL_SERVER_STATUS_SHUNNED && myconn->parent->shunned_automatic==true && myconn->parent->shunned_and_kill_all_connections==true) // the query failed because the server is shunned due to a serious failure
) {
if (mysql_thread___connect_timeout_server_max) {
myds->max_connect_time=thread->curtime+mysql_thread___connect_timeout_server_max*1000;
}

@ -796,7 +796,12 @@ int MySQL_Connection::async_query(short event, char *stmt, unsigned long length)
PROXY_TRACE();
assert(mysql);
assert(ret_mysql);
if (parent->status==MYSQL_SERVER_STATUS_OFFLINE_HARD)
if (
(parent->status==MYSQL_SERVER_STATUS_OFFLINE_HARD) // the server is OFFLINE as specific by the user
||
(parent->status==MYSQL_SERVER_STATUS_SHUNNED && parent->shunned_automatic==true && parent->shunned_and_kill_all_connections==true) // the server is SHUNNED due to a serious issue
) {
}
return -1;
switch (async_state_machine) {
case ASYNC_QUERY_END:

Loading…
Cancel
Save