From 6e03099b46273fecc517ac90bc10ea9c4ebc9222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 11 Apr 2019 10:35:37 +1000 Subject: [PATCH] Further error handling in Monitor --- lib/MySQL_Monitor.cpp | 75 ++++++++++++++++++++++++++++-------------- lib/ProxySQL_Admin.cpp | 4 +-- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 3b52424f9..e8e6ea0f3 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -161,7 +161,7 @@ private: // std::map, std::vector > my_connections; PtrArray *servers; public: - MYSQL * get_connection(char *hostname, int port); + MYSQL * get_connection(char *hostname, int port, MySQL_Monitor_State_Data *mmsd); void put_connection(char *hostname, int port, MYSQL *my); // void purge_idle_connections(); MySQL_Monitor_Connection_Pool() { @@ -169,15 +169,16 @@ public: conns = new PtrArray(); pthread_mutex_init(&m2, NULL); }; - void conn_register(MYSQL *my) { + void conn_register(MySQL_Monitor_State_Data *mmsd) { std::lock_guard lock(mutex); + MYSQL *my = mmsd->mysql; pthread_mutex_lock(&m2); for (unsigned int i=0; ilen; i++) { MYSQL *my1 = (MYSQL *)conns->index(i); assert(my!=my1); assert(my->net.fd!=my1->net.fd); } - fprintf(stderr,"Registering MYSQL with FD %d\n", my->net.fd); + fprintf(stderr,"Registering MYSQL with FD %d from mmsd %p and MYSQL %p\n", my->net.fd, mmsd, mmsd->mysql); conns->add(my); pthread_mutex_unlock(&m2); }; @@ -197,7 +198,7 @@ public: }; }; -MYSQL * MySQL_Monitor_Connection_Pool::get_connection(char *hostname, int port) { +MYSQL * MySQL_Monitor_Connection_Pool::get_connection(char *hostname, int port, MySQL_Monitor_State_Data *mmsd) { std::lock_guard lock(mutex); pthread_mutex_lock(&m2); MYSQL *my = NULL; @@ -222,6 +223,13 @@ MYSQL * MySQL_Monitor_Connection_Pool::get_connection(char *hostname, int port) assert(my!=my1); assert(my->net.fd!=my1->net.fd); } + for (unsigned int l=0; llen; l++) { + MYSQL *my1 = (MYSQL *)conns->index(l); + assert(my!=my1); + assert(my->net.fd!=my1->net.fd); + } + fprintf(stderr,"Registering MYSQL with FD %d from mmsd %p and MYSQL %p\n", my->net.fd, mmsd, my); + conns->add(my); } pthread_mutex_unlock(&m2); return my; @@ -240,15 +248,32 @@ void MySQL_Monitor_Connection_Pool::put_connection(char *hostname, int port, MYS MonMySrvC *srv = (MonMySrvC *)servers->index(i); if (srv->port == port && strcmp(hostname,srv->address)==0) { srv->conns->add(my); - pthread_mutex_unlock(&m2); - return; + for (unsigned int j=0; jlen; j++) { + MYSQL *my1 = (MYSQL *)conns->index(j); + if (my1 == my) { + conns->remove_index_fast(j); + fprintf(stderr,"Un-registering MYSQL with FD %d\n", my->net.fd); + pthread_mutex_unlock(&m2); + return; + } + } + assert(0); // it didn't register it } } // if no server was found MonMySrvC *srv = new MonMySrvC(hostname,port); srv->conns->add(my); servers->add(srv); - pthread_mutex_unlock(&m2); + for (unsigned int j=0; jlen; j++) { + MYSQL *my1 = (MYSQL *)conns->index(j); + if (my1 == my) { + conns->remove_index_fast(j); + fprintf(stderr,"Un-registering MYSQL with FD %d\n", my->net.fd); + pthread_mutex_unlock(&m2); + return; + } + } + assert(0); } /* @@ -633,7 +658,7 @@ void * monitor_ping_thread(void *arg) { mysql_thr->refresh_variables(); bool ping_success = false; - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; mmsd->t1=start_time; @@ -642,14 +667,14 @@ void * monitor_ping_thread(void *arg) { bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { - GloMyMon->My_Conn_Pool->conn_register(mmsd->mysql); + GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { goto __exit_monitor_ping_thread; } } else { - GloMyMon->My_Conn_Pool->conn_register(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_register(mmsd); } mmsd->t1=monotonic_time(); @@ -675,7 +700,7 @@ void * monitor_ping_thread(void *arg) { } else { if (crc==false) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mmsd->mysql=NULL; } } @@ -723,7 +748,7 @@ __fast_exit_monitor_ping_thread: bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); } else { GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mysql_close(mmsd->mysql); // set_wait_timeout failed @@ -828,7 +853,7 @@ void * monitor_read_only_thread(void *arg) { mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; bool read_only_success = false; @@ -1056,7 +1081,7 @@ void * monitor_group_replication_thread(void *arg) { mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; @@ -1321,7 +1346,7 @@ void * monitor_galera_thread(void *arg) { mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; @@ -1583,7 +1608,7 @@ void * monitor_replication_lag_thread(void *arg) { MYSQL *mysqlcopy = NULL; #endif // DEBUG - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; bool replication_lag_success = false; @@ -1598,14 +1623,14 @@ void * monitor_replication_lag_thread(void *arg) { bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { - GloMyMon->My_Conn_Pool->conn_register(mmsd->mysql); + GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { goto __fast_exit_monitor_replication_lag_thread; } } else { - GloMyMon->My_Conn_Pool->conn_register(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_register(mmsd); } #ifdef DEBUG @@ -1672,7 +1697,7 @@ void * monitor_replication_lag_thread(void *arg) { } } else { if (crc==false) { - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } @@ -1752,7 +1777,7 @@ __exit_monitor_replication_lag_thread: } } - if (mmsd->interr) { // check failed + if (mmsd->interr || mmsd->mysql_error_msg) { // check failed if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mysql_close(mmsd->mysql); @@ -1761,7 +1786,7 @@ __exit_monitor_replication_lag_thread: } else { if (mmsd->mysql) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mmsd->mysql=NULL; } } @@ -1776,7 +1801,7 @@ __fast_exit_monitor_replication_lag_thread: bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); } else { GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mysql_close(mmsd->mysql); // set_wait_timeout failed @@ -3598,7 +3623,7 @@ void * monitor_AWS_Aurora_thread_HG(void *arg) { mmsd = new MySQL_Monitor_State_Data(hpa[cur_host_idx].host, hpa[cur_host_idx].port, NULL, hpa[cur_host_idx].use_ssl); mmsd->writer_hostgroup = wHG; mmsd->aws_aurora_check_timeout_ms = check_timeout_ms; - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); //unsigned long long start_time=mysql_thr->curtime; start_time=t1; @@ -3989,7 +4014,7 @@ __sleep_monitor_aws_aurora: } for (unsigned int i=0;iqueue.add(item); + GloMyMon->queue->add(item); } return NULL; } @@ -4002,7 +4027,7 @@ void * monitor_AWS_Aurora_thread(void *arg) { mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index b1036be53..faf3c0a86 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -9718,9 +9718,9 @@ void ProxySQL_Admin::enable_aurora_testing() { //admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_replication_lag_interval'"); //admindb->execute("UPDATE global_variables SET variable_value=1500 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); admindb->execute("UPDATE global_variables SET variable_value=200 WHERE variable_name='mysql-monitor_ping_interval'"); - admindb->execute("UPDATE global_variables SET variable_value=100 WHERE variable_name='mysql-monitor_ping_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_ping_timeout'"); admindb->execute("UPDATE global_variables SET variable_value=200 WHERE variable_name='mysql-monitor_replication_lag_interval'"); - admindb->execute("UPDATE global_variables SET variable_value=100 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); admindb->execute("UPDATE global_variables SET variable_value='percona.heartbeat' WHERE variable_name='mysql-monitor_replication_lag_use_percona_heartbeat'"); load_mysql_variables_to_runtime(); admindb->execute("INSERT INTO mysql_users (username,password,default_hostgroup) VALUES ('aurora1','pass1',1271), ('aurora2','pass2',1273), ('aurora3','pass3',1275)");