/* RECENT CHANGELOG 1.2.0723 * almost completely rewritten * use of blocking call for new connections * use of Thread Pool instead of a thread per check type 0.2.0902 * original implementation */ #include #include #include #include #include #include #include "MySQL_Protocol.h" #include "MySQL_HostGroups_Manager.h" #include "MySQL_Monitor.hpp" #include "ProxySQL_Cluster.hpp" #include "proxysql.h" #include "cpp.h" #include "proxysql_utils.h" #include "thread.h" #include "wqueue.h" #include #ifdef DEBUG #define DEB "_DEBUG" #else #define DEB "" #endif /* DEBUG */ #define MYSQL_MONITOR_VERSION "2.0.1226" DEB extern ProxySQL_Admin *GloAdmin; extern MySQL_Threads_Handler *GloMTH; extern ProxySQL_Cluster* GloProxyCluster; static MySQL_Monitor *GloMyMon; #define SAFE_SQLITE3_STEP(_stmt) do {\ do {\ rc=(*proxy_sqlite3_step)(_stmt);\ if (rc!=SQLITE_DONE) {\ assert(rc==SQLITE_LOCKED);\ usleep(100);\ }\ } while (rc!=SQLITE_DONE);\ } while (0) #define SAFE_SQLITE3_STEP2(_stmt) do {\ do {\ rc=(*proxy_sqlite3_step)(_stmt);\ if (rc==SQLITE_LOCKED || rc==SQLITE_BUSY) {\ usleep(100);\ }\ } while (rc==SQLITE_LOCKED || rc==SQLITE_BUSY);\ } while (0) template class ConsumerThread : public Thread { wqueue*>& m_queue; int thrn; public: ConsumerThread(wqueue*>& queue, int _n) : m_queue(queue) { thrn=_n; } void* run() { // Remove 1 item at a time and process it. Blocks if no items are // available to process. for (int i = 0; (thrn ? i < thrn : 1); i++) { //VALGRIND_DISABLE_ERROR_REPORTING; WorkItem* item = (WorkItem*)m_queue.remove(); //VALGRIND_ENABLE_ERROR_REPORTING; if (item == NULL) { if (thrn) { // we took a NULL item that wasn't meant to reach here! Add it again WorkItem* item = NULL; m_queue.add(item); } // this is intentional to EXIT immediately return NULL; } if (item->routine) { // NULL is allowed, do nothing for it bool me = true; if (check_monitor_enabled_flag) { pthread_mutex_lock(&GloMyMon->mon_en_mutex); me = GloMyMon->monitor_enabled; pthread_mutex_unlock(&GloMyMon->mon_en_mutex); } if (me) { item->routine((void *)item->data); } } delete item->data; delete item; } return NULL; } }; using DNSResolverThread = ConsumerThread; static int wait_for_mysql(MYSQL *mysql, int status) { struct pollfd pfd; int timeout, res; pfd.fd = mysql_get_socket(mysql); pfd.events = (status & MYSQL_WAIT_READ ? POLLIN : 0) | (status & MYSQL_WAIT_WRITE ? POLLOUT : 0) | (status & MYSQL_WAIT_EXCEPT ? POLLPRI : 0); timeout = 10; res = poll(&pfd, 1, timeout); if (res == 0) return MYSQL_WAIT_TIMEOUT | status; else if (res < 0) return MYSQL_WAIT_TIMEOUT; else { int status = 0; if (pfd.revents & POLLIN) status |= MYSQL_WAIT_READ; if (pfd.revents & POLLOUT) status |= MYSQL_WAIT_WRITE; if (pfd.revents & POLLPRI) status |= MYSQL_WAIT_EXCEPT; return status; } } static void close_mysql(MYSQL *my) { if (my->net.pvio) { char buff[5]; mysql_hdr myhdr; myhdr.pkt_id=0; myhdr.pkt_length=1; memcpy(buff, &myhdr, sizeof(mysql_hdr)); buff[4]=0x01; int fd=my->net.fd; #ifdef __APPLE__ int arg_on=1; setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, (char *) &arg_on, sizeof(int)); int wb=send(fd, buff, 5, 0); #else int wb=send(fd, buff, 5, MSG_NOSIGNAL); #endif fd+=wb; // dummy, to make compiler happy fd-=wb; // dummy, to make compiler happy } mysql_close_no_command(my); } class MonMySrvC { public: char *address; uint16_t port; std::unique_ptr conns; MonMySrvC(char *a, uint16_t p) { address = strdup(a); port = p; conns = std::unique_ptr(new PtrArray()); }; ~MonMySrvC() { free(address); if (conns) { while (conns->len) { MYSQL* mysql = static_cast(conns->index(0)); if (mysql) { mysql_close(mysql); mysql=NULL; } conns->remove_index_fast(0); } } } }; class MySQL_Monitor_Connection_Pool { private: std::mutex mutex; #ifdef DEBUG pthread_mutex_t m2; PtrArray *conns; #endif // DEBUG // std::map, std::vector > my_connections; std::unique_ptr servers; public: MYSQL * get_connection(char *hostname, int port, MySQL_Monitor_State_Data *mmsd); void put_connection(char *hostname, int port, MYSQL *my); void purge_some_connections(); void purge_all_connections(); MySQL_Monitor_Connection_Pool() { servers = std::unique_ptr(new PtrArray()); #ifdef DEBUG conns = new PtrArray(); pthread_mutex_init(&m2, NULL); #endif // DEBUG }; ~MySQL_Monitor_Connection_Pool() { purge_all_connections(); #ifdef DEBUG pthread_mutex_destroy(&m2); #endif // DEBUG } void conn_register(MySQL_Monitor_State_Data *mmsd) { #ifdef DEBUG std::lock_guard lock(mutex); MYSQL *my = mmsd->mysql; pthread_mutex_lock(&m2); __conn_register_label: for (unsigned int i=0; ilen; i++) { MYSQL *my1 = (MYSQL *)conns->index(i); assert(my!=my1); //assert(my->net.fd!=my1->net.fd); // FIXME: we changed this with the next section of code if (my->net.fd == my1->net.fd) { // FIXME: we need to identify still why a connection with error 2013 is here if (my1->net.last_errno == 2013) { // we remove the connection conns->remove_index_fast(i); goto __conn_register_label; // we return to the loop } else { // we crash again, as in the old logic assert(my->net.fd!=my1->net.fd); } } } //proxy_info("Registering MYSQL with FD %d from mmsd %p and MYSQL %p\n", my->net.fd, mmsd, mmsd->mysql); conns->add(my); pthread_mutex_unlock(&m2); #endif // DEBUG return; }; void conn_unregister(MySQL_Monitor_State_Data *mmsd) { #ifdef DEBUG std::lock_guard lock(mutex); pthread_mutex_lock(&m2); MYSQL *my = mmsd->mysql; for (unsigned int i=0; ilen; i++) { MYSQL *my1 = (MYSQL *)conns->index(i); if (my1 == my) { conns->remove_index_fast(i); //proxy_info("Un-registering MYSQL with FD %d\n", my->net.fd); pthread_mutex_unlock(&m2); return; } } // LCOV_EXCL_START assert(0); // LCOV_EXCL_STOP #endif // DEBUG // LCOV_EXCL_START return; // LCOV_EXCL_STOP }; }; void MySQL_Monitor_Connection_Pool::purge_all_connections() { std::lock_guard lock(mutex); #ifdef DEBUG pthread_mutex_lock(&m2); #endif if (servers) { while (servers->len) { MonMySrvC* srv = static_cast(servers->index(0)); if (srv) { delete srv; } servers->remove_index_fast(0); } } #ifdef DEBUG conns->reset(); pthread_mutex_unlock(&m2); #endif } MYSQL * MySQL_Monitor_Connection_Pool::get_connection(char *hostname, int port, MySQL_Monitor_State_Data *mmsd) { std::lock_guard lock(mutex); #ifdef DEBUG pthread_mutex_lock(&m2); #endif // DEBUG MYSQL *my = NULL; unsigned long long now = monotonic_time(); for (unsigned int i=0; ilen; i++) { MonMySrvC *srv = (MonMySrvC *)servers->index(i); if (srv->port == port && strcmp(hostname,srv->address)==0) { if (srv->conns->len) { #ifdef DEBUG for (unsigned int j=0; jconns->len; j++) { MYSQL *my1 = (MYSQL *)srv->conns->index(j); for (unsigned int k=0; kconns->len; k++) { if (k!=j) { MYSQL *my2 = (MYSQL *)srv->conns->index(k); assert(my1!=my2); assert(my1->net.fd!=my2->net.fd); } } } #endif // DEBUG while (srv->conns->len) { unsigned int idx = rand() % srv->conns->len; MYSQL* mysql = (MYSQL*)srv->conns->remove_index_fast(idx); if (!mysql) continue; // close connection if not used for a while unsigned long long then = *(unsigned long long*)mysql->net.buff; if (now > (then + mysql_thread___monitor_ping_interval * 1000 * 10)) { MySQL_Monitor_State_Data* mmsd = new MySQL_Monitor_State_Data((char*)"", 0, NULL, false); mmsd->mysql = mysql; GloMyMon->queue->add(new WorkItem(mmsd, NULL)); continue; } my = mysql; break; } #ifdef DEBUG if (my) { for (unsigned int j=0; jlen; j++) { MYSQL *my1 = (MYSQL *)conns->index(j); assert(my!=my1); assert(my->net.fd!=my1->net.fd); } //proxy_info("Registering MYSQL with FD %d from mmsd %p and MYSQL %p\n", my->net.fd, mmsd, my); conns->add(my); } #endif // DEBUG } #ifdef DEBUG pthread_mutex_unlock(&m2); #endif // DEBUG return my; } } #ifdef DEBUG pthread_mutex_unlock(&m2); #endif // DEBUG return my; } void MySQL_Monitor_Connection_Pool::put_connection(char *hostname, int port, MYSQL *my) { unsigned long long now = monotonic_time(); std::lock_guard lock(mutex); #ifdef DEBUG pthread_mutex_lock(&m2); #endif // DEBUG *(unsigned long long*)my->net.buff = now; for (unsigned int i=0; ilen; i++) { MonMySrvC *srv = (MonMySrvC *)servers->index(i); if (srv->port == port && strcmp(hostname,srv->address)==0) { srv->conns->add(my); // pthread_mutex_unlock(&m2); // return; #ifdef DEBUG for (unsigned int j=0; jlen; j++) { MYSQL *my1 = (MYSQL *)conns->index(j); if (my1 == my) { conns->remove_index_fast(j); //proxy_info("Un-registering MYSQL with FD %d\n", my->net.fd); pthread_mutex_unlock(&m2); return; } } // LCOV_EXCL_START assert(0); // it didn't register it // LCOV_EXCL_STOP #else return; #endif // DEBUG } } // if no server was found MonMySrvC *srv = new MonMySrvC(hostname,port); srv->conns->add(my); servers->add(srv); // pthread_mutex_unlock(&m2); #ifdef DEBUG for (unsigned int j=0; jlen; j++) { MYSQL *my1 = (MYSQL *)conns->index(j); if (my1 == my) { conns->remove_index_fast(j); //proxy_info("Un-registering MYSQL with FD %d\n", my->net.fd); pthread_mutex_unlock(&m2); return; } } // LCOV_EXCL_START assert(0); // LCOV_EXCL_STOP #endif // DEBUG } void MySQL_Monitor_Connection_Pool::purge_some_connections() { unsigned long long now = monotonic_time(); std::lock_guard lock(mutex); #ifdef DEBUG pthread_mutex_lock(&m2); #endif // DEBUG for (unsigned int i=0; ilen; i++) { MonMySrvC *srv = (MonMySrvC *)servers->index(i); while (srv->conns->len > 4) { MYSQL *my = (MYSQL *)srv->conns->remove_index_fast(0); MySQL_Monitor_State_Data *mmsd= new MySQL_Monitor_State_Data((char *)"",0,NULL,false); mmsd->mysql=my; GloMyMon->queue->add(new WorkItem(mmsd,NULL)); } for (unsigned int j=0 ; jconns->len ; j++) { MYSQL *my = (MYSQL *)srv->conns->index(j); unsigned long long then = *(unsigned long long*)my->net.buff; if (now > (then + mysql_thread___monitor_ping_interval*1000 * 10)) { srv->conns->remove_index_fast(j); MySQL_Monitor_State_Data *mmsd= new MySQL_Monitor_State_Data((char *)"",0,NULL,false); mmsd->mysql=my; GloMyMon->queue->add(new WorkItem(mmsd,NULL)); } } } #ifdef DEBUG pthread_mutex_unlock(&m2); #endif // DEBUG } /* void MySQL_Monitor_Connection_Pool::purge_idle_connections() { unsigned long long now = monotonic_time(); std::lock_guard lock(mutex); for(auto it = my_connections.begin(); it != my_connections.end();) { auto& lst = it->second; for(auto it3 = lst.begin(); it3 != lst.end();) { MYSQL *my = *it3; unsigned long long then = *(unsigned long long*)my->net.buff; if (now > (then + mysql_thread___monitor_ping_interval*1000 * 3)) { MySQL_Monitor_State_Data *mmsd= new MySQL_Monitor_State_Data((char *)"",0,NULL,false); mmsd->mysql=my; GloMyMon->queue.add(new WorkItem(mmsd,NULL)); std::swap(*it3, lst.back()); if(it3 == lst.end() - 1) it3 = lst.erase(it3); else lst.pop_back(); } else ++it3; } if (lst.size()) { ++it; } else { it = my_connections.erase(it); } } } */ /* MYSQL * MySQL_Monitor_Connection_Pool::get_connection(char *hostname, int port) { std::lock_guard lock(mutex); auto it = my_connections.find(std::make_pair(hostname, port)); if (it == my_connections.end() || !it->second.size()) return NULL; MYSQL *my = it->second.back(); it->second.pop_back(); *(unsigned long long*)my->net.buff = 0; return my; } void MySQL_Monitor_Connection_Pool::put_connection(char *hostname, int port, MYSQL *my) { unsigned long long now = monotonic_time(); std::lock_guard lock(mutex); *(unsigned long long*)my->net.buff = now; //this doesn't work on old compilers // auto it = my_connections.emplace(std::piecewise_construct, // std::forward_as_tuple(hostname, port), std::forward_as_tuple()).first; // it->second.push_back(my); // code for old compilers (gcc 4.7 in debian7) auto it = my_connections.find(std::make_pair(string(hostname), port)); if (it != my_connections.end()) { it->second.push_back(my); } else { my_connections[std::make_pair(hostname,port)].push_back(my); } } */ MySQL_Monitor_State_Data::MySQL_Monitor_State_Data(char *h, int p, struct event_base *b, bool _use_ssl, int g) { task_id=MON_CONNECT; mysql=NULL; result=NULL; ret=NULL; row=NULL; mysql_error_msg=NULL; hostname=strdup(h); port=p; use_ssl=_use_ssl; ST=0; hostgroup_id=g; interr=0; }; MySQL_Monitor_State_Data::~MySQL_Monitor_State_Data() { if (hostname) { free(hostname); } //assert(mysql==NULL); // if mysql is not NULL, there is a bug if (mysql) { close_mysql(mysql); mysql=NULL; } if (mysql_error_msg) { free(mysql_error_msg); } } void * monitor_connect_pthread(void *arg) { #ifndef NOJEM bool cache=false; mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); #endif while (GloMTH==NULL) { usleep(50000); } usleep(100000); GloMyMon->monitor_connect(); return NULL; } void * monitor_ping_pthread(void *arg) { #ifndef NOJEM bool cache=false; mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); #endif while (GloMTH==NULL) { usleep(50000); } usleep(100000); GloMyMon->monitor_ping(); return NULL; } void * monitor_read_only_pthread(void *arg) { #ifndef NOJEM bool cache=false; mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); #endif while (GloMTH==NULL) { usleep(50000); } usleep(100000); GloMyMon->monitor_read_only(); return NULL; } void * monitor_group_replication_pthread(void *arg) { #ifndef NOJEM bool cache=false; mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); #endif while (GloMTH==NULL) { usleep(50000); } usleep(100000); GloMyMon->monitor_group_replication(); return NULL; } void * monitor_galera_pthread(void *arg) { #ifndef NOJEM bool cache=false; mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); #endif while (GloMTH==NULL) { usleep(50000); } usleep(100000); GloMyMon->monitor_galera(); return NULL; } void * monitor_aws_aurora_pthread(void *arg) { //#ifndef NOJEM // bool cache=false; // mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); //#endif while (GloMTH==NULL) { usleep(50000); } usleep(100000); GloMyMon->monitor_aws_aurora(); return NULL; } void * monitor_replication_lag_pthread(void *arg) { #ifndef NOJEM bool cache=false; mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); #endif while (GloMTH==NULL) { usleep(50000); } usleep(100000); GloMyMon->monitor_replication_lag(); return NULL; } void* monitor_dns_cache_pthread(void* arg) { #ifndef NOJEM bool cache = false; mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); #endif while (GloMTH == NULL) { usleep(50000); } usleep(100000); GloMyMon->monitor_dns_cache(); return NULL; } using metric_name = std::string; using metric_help = std::string; using metric_tags = std::map; using mon_counter_tuple = std::tuple< p_mon_counter::metric, metric_name, metric_help, metric_tags >; using mon_gauge_tuple = std::tuple< p_mon_gauge::metric, metric_name, metric_help, metric_tags >; using mon_counter_vector = std::vector; using mon_gauge_vector = std::vector; /** * @brief Metrics map holding the metrics for the 'MySQL_Monitor' module. * * @note Some metrics in this map, share a common "id name", because * they differ only by label, because of this, HELP is shared between * them. For better visual identification of this groups they are * sepparated using a line separator comment. */ const std::tuple mon_metrics_map = std::make_tuple( mon_counter_vector { std::make_tuple ( p_mon_counter::mysql_monitor_workers_started, "proxysql_mysql_monitor_workers_started_total", "Number of MySQL Monitor workers started.", metric_tags {} ), // ==================================================================== std::make_tuple ( p_mon_counter::mysql_monitor_connect_check_ok, "proxysql_mysql_monitor_connect_check_total", "Number of (succeed|failed) 'connect checks' from 'monitor_connect_thread'.", metric_tags { { "status", "ok" } } ), std::make_tuple ( p_mon_counter::mysql_monitor_connect_check_err, "proxysql_mysql_monitor_connect_check_total", "Number of (succeed|failed) 'connect checks' from 'monitor_connect_thread'.", metric_tags { { "status", "err" } } ), // ==================================================================== // ==================================================================== std::make_tuple ( p_mon_counter::mysql_monitor_ping_check_ok, "proxysql_mysql_monitor_ping_check_total", "Number of (succeed|failed) 'ping checks' from 'monitor_ping_thread'.", metric_tags { { "status", "ok" } } ), std::make_tuple ( p_mon_counter::mysql_monitor_ping_check_err, "proxysql_mysql_monitor_ping_check_total", "Number of (succeed|failed) 'ping checks' from 'monitor_ping_thread'.", metric_tags { { "status", "err" } } ), // ==================================================================== // ==================================================================== std::make_tuple ( p_mon_counter::mysql_monitor_read_only_check_ok, "proxysql_mysql_monitor_read_only_check_total", "Number of (succeed|failed) 'read only checks' from 'monitor_read_only_thread'.", metric_tags { { "status", "ok" } } ), std::make_tuple ( p_mon_counter::mysql_monitor_read_only_check_err, "proxysql_mysql_monitor_read_only_check_total", "Number of (succeed|failed) 'read only checks' from 'monitor_read_only_thread'.", metric_tags { { "status", "err" } } ), // ==================================================================== // ==================================================================== std::make_tuple ( p_mon_counter::mysql_monitor_replication_lag_check_ok, "proxysql_mysql_monitor_replication_lag_check_total", "Number of (succeed|failed)'replication lag checks' from 'monitor_replication_lag_thread'.", metric_tags { { "status", "ok" } } ), std::make_tuple ( p_mon_counter::mysql_monitor_replication_lag_check_err, "proxysql_mysql_monitor_replication_lag_check_total", "Number of (succeed|failed)'replication lag checks' from 'monitor_replication_lag_thread'.", metric_tags { { "status", "err" } } ), // ==================================================================== // ==================================================================== std::make_tuple( p_mon_counter::mysql_monitor_dns_cache_queried, "proxysql_mysql_monitor_dns_cache_queried", "Number of dns queried 'dns_cache_queried' from 'monitor_dns_resolver_thread'.", metric_tags {} ), std::make_tuple( p_mon_counter::mysql_monitor_dns_cache_lookup_success, "proxysql_mysql_monitor_dns_cache_lookup_success", "Number of dns queried 'dns_cache_lookup_success' from 'monitor_dns_resolver_thread'.", metric_tags {} ), std::make_tuple( p_mon_counter::mysql_monitor_dns_cache_record_updated, "proxysql_mysql_monitor_dns_cache_record_updated", "Number of dns queried 'dns_cache_record_updated' from 'monitor_dns_resolver_thread'.", metric_tags {} ) // ==================================================================== }, mon_gauge_vector { std::make_tuple ( p_mon_gauge::mysql_monitor_workers, "proxysql_mysql_monitor_workers", "Number of monitor workers threads.", metric_tags {} ), std::make_tuple ( p_mon_gauge::mysql_monitor_workers_aux, "proxysql_mysql_monitor_workers_aux", "Number of auxiliary monitor threads.", metric_tags {} ) } ); MySQL_Monitor::MySQL_Monitor() { dns_cache = std::make_shared(); GloMyMon = this; My_Conn_Pool=new MySQL_Monitor_Connection_Pool(); queue = std::unique_ptr*>>(new wqueue*>()); pthread_mutex_init(&group_replication_mutex,NULL); Group_Replication_Hosts_resultset=NULL; pthread_mutex_init(&galera_mutex,NULL); Galera_Hosts_resultset=NULL; pthread_mutex_init(&aws_aurora_mutex,NULL); pthread_mutex_init(&mysql_servers_mutex,NULL); pthread_mutex_init(&proxysql_servers_mutex, NULL); AWS_Aurora_Hosts_resultset=NULL; AWS_Aurora_Hosts_resultset_checksum = 0; shutdown=false; monitor_enabled=true; // default // create new SQLite datatabase monitordb = new SQLite3DB(); monitordb->open((char *)"file:mem_monitordb?mode=memory&cache=shared", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX); // create 'monitor_internal_db' database and attach it to 'monitor' monitor_internal_db = new SQLite3DB(); monitor_internal_db->open((char *)"file:mem_monitor_internal_db?mode=memory&cache=shared", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX); monitordb->execute("ATTACH DATABASE 'file:mem_monitor_internal_db?mode=memory&cache=shared' AS 'monitor_internal'"); // create 'admindb' and attach both 'monitor' and 'monitor_internal' admindb=new SQLite3DB(); admindb->open((char *)"file:mem_admindb?mode=memory&cache=shared", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX); admindb->execute("ATTACH DATABASE 'file:mem_monitordb?mode=memory&cache=shared' AS 'monitor'"); admindb->execute("ATTACH DATABASE 'file:mem_monitor_internal_db?mode=memory&cache=shared' AS 'monitor_internal'"); // define monitoring tables tables_defs_monitor=new std::vector; tables_defs_monitor_internal=new std::vector; //insert_into_tables_defs(tables_defs_monitor,"mysql_server_connect", MONITOR_SQLITE_TABLE_MYSQL_SERVER_CONNECT); insert_into_tables_defs(tables_defs_monitor,"mysql_server_connect_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_CONNECT_LOG); //insert_into_tables_defs(tables_defs_monitor,"mysql_server_ping", MONITOR_SQLITE_TABLE_MYSQL_SERVER_PING); insert_into_tables_defs(tables_defs_monitor,"mysql_server_ping_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_PING_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_read_only_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_READ_ONLY_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_replication_lag_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_REPLICATION_LAG_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_group_replication_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_GROUP_REPLICATION_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_galera_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_GALERA_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_check_status", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_CHECK_STATUS); insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_failovers", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_FAILOVERS); insert_into_tables_defs(tables_defs_monitor_internal,"mysql_servers", MONITOR_SQLITE_TABLE_MYSQL_SERVERS); insert_into_tables_defs(tables_defs_monitor_internal, "proxysql_servers", MONITOR_SQLITE_TABLE_PROXYSQL_SERVERS); // create monitoring tables check_and_build_standard_tables(monitordb, tables_defs_monitor); check_and_build_standard_tables(monitor_internal_db, tables_defs_monitor_internal); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_connect_log_time_start ON mysql_server_connect_log (time_start_us)"); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_ping_log_time_start ON mysql_server_ping_log (time_start_us)"); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_read_only_log_time_start ON mysql_server_read_only_log (time_start_us)"); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_replication_lag_log_time_start ON mysql_server_replication_lag_log (time_start_us)"); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_group_replication_log_time_start ON mysql_server_group_replication_log (time_start_us)"); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_galera_log_time_start ON mysql_server_galera_log (time_start_us)"); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_aws_aurora_log_time_start ON mysql_server_aws_aurora_log (time_start_us)"); num_threads=2; aux_threads=0; started_threads=0; connect_check_OK = 0; connect_check_ERR = 0; ping_check_OK = 0; ping_check_ERR = 0; read_only_check_OK = 0; read_only_check_ERR = 0; replication_lag_check_OK = 0; replication_lag_check_ERR = 0; dns_cache_queried = 0; dns_cache_lookup_success = 0; dns_cache_record_updated = 0; force_dns_cache_update = false; /* if (GloMTH) { if (GloMTH->num_threads) { num_threads=GloMTH->num_threads*2; } } if (num_threads>16) { num_threads=16; // limit to 16 } */ // Initialize prometheus metrics init_prometheus_counter_array(mon_metrics_map, this->metrics.p_counter_array); init_prometheus_gauge_array(mon_metrics_map, this->metrics.p_gauge_array); }; MySQL_Monitor::~MySQL_Monitor() { drop_tables_defs(tables_defs_monitor); delete tables_defs_monitor; drop_tables_defs(tables_defs_monitor_internal); delete tables_defs_monitor_internal; delete monitordb; delete monitor_internal_db; delete admindb; delete My_Conn_Pool; if (Group_Replication_Hosts_resultset) { delete Group_Replication_Hosts_resultset; Group_Replication_Hosts_resultset=NULL; } if (Galera_Hosts_resultset) { delete Galera_Hosts_resultset; Galera_Hosts_resultset=NULL; } if (AWS_Aurora_Hosts_resultset) { delete AWS_Aurora_Hosts_resultset; AWS_Aurora_Hosts_resultset=NULL; } std::map::iterator it2; AWS_Aurora_monitor_node *node=NULL; for (it2 = AWS_Aurora_Hosts_Map.begin(); it2 != AWS_Aurora_Hosts_Map.end(); ++it2) { node = it2->second; delete node; } AWS_Aurora_Hosts_Map.clear(); }; void MySQL_Monitor::p_update_metrics() { if (GloMyMon) { this->metrics.p_gauge_array[p_mon_gauge::mysql_monitor_workers]->Set(GloMyMon->num_threads); this->metrics.p_gauge_array[p_mon_gauge::mysql_monitor_workers_aux]->Set(GloMyMon->aux_threads); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_workers_started], GloMyMon->started_threads); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_connect_check_ok], GloMyMon->connect_check_OK); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_connect_check_err], GloMyMon->connect_check_ERR); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_ping_check_ok], GloMyMon->ping_check_OK); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_ping_check_err], GloMyMon->ping_check_ERR ); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_read_only_check_ok], GloMyMon->read_only_check_OK); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_read_only_check_err], GloMyMon->read_only_check_ERR); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_replication_lag_check_ok], GloMyMon->replication_lag_check_OK); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_replication_lag_check_err], GloMyMon->replication_lag_check_ERR); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_dns_cache_queried], GloMyMon->dns_cache_queried); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_dns_cache_lookup_success], GloMyMon->dns_cache_lookup_success); p_update_counter(this->metrics.p_counter_array[p_mon_counter::mysql_monitor_dns_cache_record_updated], GloMyMon->dns_cache_record_updated); } } void MySQL_Monitor::print_version() { fprintf(stderr,"Standard MySQL Monitor (StdMyMon) rev. %s -- %s -- %s\n", MYSQL_MONITOR_VERSION, __FILE__, __TIMESTAMP__); }; // This function is copied from ProxySQL_Admin void MySQL_Monitor::insert_into_tables_defs(std::vector *tables_defs, const char *table_name, const char *table_def) { table_def_t *td = new table_def_t; td->table_name=strdup(table_name); td->table_def=strdup(table_def); tables_defs->push_back(td); }; // This function is copied from ProxySQL_Admin void MySQL_Monitor::drop_tables_defs(std::vector *tables_defs) { table_def_t *td; while (!tables_defs->empty()) { td=tables_defs->back(); free(td->table_name); td->table_name=NULL; free(td->table_def); td->table_def=NULL; tables_defs->pop_back(); delete td; } }; // This function is copied from ProxySQL_Admin void MySQL_Monitor::check_and_build_standard_tables(SQLite3DB *db, std::vector *tables_defs) { table_def_t *td; db->execute("PRAGMA foreign_keys = OFF"); for (std::vector::iterator it=tables_defs->begin(); it!=tables_defs->end(); ++it) { td=*it; db->check_and_build_table(td->table_name, td->table_def); } db->execute("PRAGMA foreign_keys = ON"); }; void MySQL_Monitor::update_monitor_mysql_servers(SQLite3_result* resultset) { pthread_mutex_lock(&GloMyMon->mysql_servers_mutex); if (resultset != nullptr) { int rc = 0; monitordb->execute("DELETE FROM monitor_internal.mysql_servers"); sqlite3_stmt *statement1=NULL; sqlite3_stmt *statement32=NULL; std::string query32s = "INSERT INTO monitor_internal.mysql_servers VALUES " + generate_multi_rows_query(32,4); char* query1 = const_cast("INSERT INTO monitor_internal.mysql_servers VALUES (?1,?2,?3,?4)"); char* query32 = (char *)query32s.c_str(); rc = monitordb->prepare_v2(query1, &statement1); ASSERT_SQLITE_OK(rc, monitordb); rc = monitordb->prepare_v2(query32, &statement32); ASSERT_SQLITE_OK(rc, monitordb); int row_idx=0; int max_bulk_row_idx=resultset->rows_count/32; max_bulk_row_idx=max_bulk_row_idx*32; for (std::vector::iterator it = resultset->rows.begin(); it != resultset->rows.end(); ++it) { SQLite3_row *r1=*it; int idx=row_idx%32; if (row_idx < max_bulk_row_idx) { // bulk rc=(*proxy_sqlite3_bind_text)(statement32, (idx*4)+1, r1->fields[0], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+2, atoi(r1->fields[1])); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+3, atoi(r1->fields[2])); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+4, atoi(r1->fields[3])); ASSERT_SQLITE_OK(rc, monitordb); if (idx==31) { SAFE_SQLITE3_STEP2(statement32); rc=(*proxy_sqlite3_clear_bindings)(statement32); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement32); ASSERT_SQLITE_OK(rc, monitordb); } } else { // single row rc=(*proxy_sqlite3_bind_text)(statement1, 1, r1->fields[0], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 2, atoi(r1->fields[1])); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 3, atoi(r1->fields[2])); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 4, atoi(r1->fields[3])); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement1); rc=(*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, monitordb); } row_idx++; } (*proxy_sqlite3_finalize)(statement1); (*proxy_sqlite3_finalize)(statement32); } pthread_mutex_unlock(&GloMyMon->mysql_servers_mutex); } void MySQL_Monitor::update_monitor_proxysql_servers(SQLite3_result* resultset) { pthread_mutex_lock(&GloMyMon->proxysql_servers_mutex); if (resultset != nullptr) { int rc = 0; monitordb->execute("DELETE FROM monitor_internal.proxysql_servers"); sqlite3_stmt* statement1 = NULL; sqlite3_stmt* statement32 = NULL; std::string query32s = "INSERT INTO monitor_internal.proxysql_servers VALUES " + generate_multi_rows_query(32, 4); char* query1 = const_cast("INSERT INTO monitor_internal.proxysql_servers VALUES (?1,?2,?3,?4)"); char* query32 = (char*)query32s.c_str(); rc = monitordb->prepare_v2(query1, &statement1); ASSERT_SQLITE_OK(rc, monitordb); rc = monitordb->prepare_v2(query32, &statement32); ASSERT_SQLITE_OK(rc, monitordb); int row_idx = 0; int max_bulk_row_idx = resultset->rows_count / 32; max_bulk_row_idx = max_bulk_row_idx * 32; for (std::vector::iterator it = resultset->rows.begin(); it != resultset->rows.end(); ++it) { SQLite3_row* r1 = *it; int idx = row_idx % 32; if (row_idx < max_bulk_row_idx) { // bulk rc = (*proxy_sqlite3_bind_text)(statement32, (idx * 4) + 1, r1->fields[0], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc = (*proxy_sqlite3_bind_int64)(statement32, (idx * 4) + 2, atoi(r1->fields[1])); ASSERT_SQLITE_OK(rc, monitordb); rc = (*proxy_sqlite3_bind_int64)(statement32, (idx * 4) + 3, atoi(r1->fields[2])); ASSERT_SQLITE_OK(rc, monitordb); rc = (*proxy_sqlite3_bind_text)(statement32, (idx * 4) + 4, r1->fields[3], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); if (idx == 31) { SAFE_SQLITE3_STEP2(statement32); rc = (*proxy_sqlite3_clear_bindings)(statement32); ASSERT_SQLITE_OK(rc, monitordb); rc = (*proxy_sqlite3_reset)(statement32); ASSERT_SQLITE_OK(rc, monitordb); } } else { // single row rc = (*proxy_sqlite3_bind_text)(statement1, 1, r1->fields[0], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc = (*proxy_sqlite3_bind_int64)(statement1, 2, atoi(r1->fields[1])); ASSERT_SQLITE_OK(rc, monitordb); rc = (*proxy_sqlite3_bind_int64)(statement1, 3, atoi(r1->fields[2])); ASSERT_SQLITE_OK(rc, monitordb); rc = (*proxy_sqlite3_bind_text)(statement1, 4, r1->fields[3], - 1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement1); rc = (*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, monitordb); rc = (*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, monitordb); } row_idx++; } (*proxy_sqlite3_finalize)(statement1); (*proxy_sqlite3_finalize)(statement32); } pthread_mutex_unlock(&GloMyMon->proxysql_servers_mutex); } void * monitor_connect_thread(void *arg) { mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; if (!GloMTH) return NULL; // quick exit during shutdown/restart MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); bool connect_success = false; mmsd->create_new_connection(); unsigned long long start_time=mysql_thr->curtime; mmsd->t1=start_time; mmsd->t2=monotonic_time(); sqlite3_stmt *statement=NULL; //sqlite3 *mondb=mmsd->mondb->get_db(); int rc; char *query=NULL; query=(char *)"INSERT OR REPLACE INTO mysql_server_connect_log VALUES (?1 , ?2 , ?3 , ?4 , ?5)"; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); rc = mmsd->mondb->prepare_v2(query, &statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_text)(statement, 1, mmsd->hostname, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int)(statement, 2, mmsd->port); ASSERT_SQLITE_OK(rc, mmsd->mondb); unsigned long long time_now=realtime_time(); time_now=time_now-(mmsd->t2 - start_time); rc=(*proxy_sqlite3_bind_int64)(statement, 3, time_now); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int64)(statement, 4, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1)); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_text)(statement, 5, mmsd->mysql_error_msg, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); (*proxy_sqlite3_finalize)(statement); if (mmsd->mysql_error_msg) { if ( (strncmp(mmsd->mysql_error_msg,"Access denied for user",strlen("Access denied for user"))==0) || (strncmp(mmsd->mysql_error_msg,"ProxySQL Error: Access denied for user",strlen("ProxySQL Error: Access denied for user"))==0) ) { proxy_error("Server %s:%d is returning \"Access denied\" for monitoring user\n", mmsd->hostname, mmsd->port); } else if (strncmp(mmsd->mysql_error_msg,"Your password has expired.",strlen("Your password has expired."))==0) { proxy_error("Server %s:%d is returning \"Your password has expired.\" for monitoring user\n", mmsd->hostname, mmsd->port); } MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); } else { connect_success = true; } mysql_close(mmsd->mysql); mmsd->mysql=NULL; if (connect_success) { __sync_fetch_and_add(&GloMyMon->connect_check_OK,1); } else { __sync_fetch_and_add(&GloMyMon->connect_check_ERR,1); } delete mysql_thr; return NULL; } void * monitor_ping_thread(void *arg) { mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; if (!GloMTH) return NULL; // quick exit during shutdown/restart MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); bool ping_success = false; mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; mmsd->t1=start_time; bool crc=false; if (mmsd->mysql==NULL) { // we don't have a connection, let's create it bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { goto __exit_monitor_ping_thread; } } else { //GloMyMon->My_Conn_Pool->conn_register(mmsd); } mmsd->t1=monotonic_time(); //async_exit_status=mysql_change_user_start(&ret_bool, mysql,"msandbox2","msandbox2","information_schema"); mmsd->interr=0; // reset the value mmsd->async_exit_status=mysql_ping_start(&mmsd->interr,mmsd->mysql); while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_ping_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout during ping"); goto __exit_monitor_ping_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_ping_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_ping_cont(&mmsd->interr, mmsd->mysql, mmsd->async_exit_status); } } if (mmsd->interr) { // ping failed mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); //proxy_warning("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); } else { if (crc==false) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mmsd->mysql=NULL; } } __exit_monitor_ping_thread: mmsd->t2=monotonic_time(); { sqlite3_stmt *statement=NULL; //sqlite3 *mondb=mmsd->mondb->get_db(); int rc; #ifdef TEST_AURORA // if ((rand() % 10) ==0) { #endif // TEST_AURORA char *query=NULL; query=(char *)"INSERT OR REPLACE INTO mysql_server_ping_log VALUES (?1 , ?2 , ?3 , ?4 , ?5)"; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); rc = mmsd->mondb->prepare_v2(query, &statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_text)(statement, 1, mmsd->hostname, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int)(statement, 2, mmsd->port); ASSERT_SQLITE_OK(rc, mmsd->mondb); unsigned long long time_now=realtime_time(); time_now=time_now-(mmsd->t2 - start_time); rc=(*proxy_sqlite3_bind_int64)(statement, 3, time_now); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int64)(statement, 4, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1)); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_text)(statement, 5, mmsd->mysql_error_msg, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); (*proxy_sqlite3_finalize)(statement); if (mmsd->mysql_error_msg == NULL) { ping_success = true; } #ifdef TEST_AURORA // } #endif // TEST_AURORA } __fast_exit_monitor_ping_thread: if (mmsd->mysql) { // if we reached here we didn't put the connection back mmsd->t2=monotonic_time(); if (mmsd->mysql_error_msg) { #ifdef DEBUG proxy_error("Error after %lldms: server %s:%d , mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); #else proxy_error("Error after %lldms on server %s:%d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd->mysql_error_msg); #endif // DEBUG MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); mysql_close(mmsd->mysql); // if we reached here we should destroy it mmsd->mysql=NULL; } else { if (crc) { bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); } else { #ifdef DEBUG proxy_error("Error on: mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); #else proxy_error("Error on server %s:%d : %s\n", mmsd->hostname, mmsd->port, mmsd->mysql_error_msg); #endif // DEBUG MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // set_wait_timeout failed } mmsd->mysql=NULL; } else { // really not sure how we reached here, drop it proxy_error("Error after %lldms: mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } } if (ping_success) { __sync_fetch_and_add(&GloMyMon->ping_check_OK,1); } else { __sync_fetch_and_add(&GloMyMon->ping_check_ERR,1); } delete mysql_thr; return NULL; } bool MySQL_Monitor_State_Data::set_wait_timeout() { if (mysql_thread___monitor_wait_timeout==false) { return true; } #if defined(TEST_AURORA) || defined(TEST_GALERA) || defined(TEST_GROUPREP) return true; #endif // TEST_AURORA || TEST_GALERA || TEST_GROUPREP bool ret=false; char *query=NULL; char *qt=(char *)"SET wait_timeout=%d"; int wait_timeout=mysql_thread___monitor_ping_interval*10/1000; // convert to second and multiply by 10 query=(char *)malloc(strlen(qt)+32); sprintf(query,qt,wait_timeout); t1=monotonic_time(); async_exit_status=mysql_query_start(&interr,mysql,query); while (async_exit_status) { async_exit_status=wait_for_mysql(mysql, async_exit_status); unsigned long long now=monotonic_time(); if (now > t1 + mysql_thread___monitor_ping_timeout * 1000) { mysql_error_msg=strdup("timeout"); goto __exit_set_wait_timeout; } if (GloMyMon->shutdown==true) { goto __exit_set_wait_timeout; // exit immediately } if ((async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { async_exit_status=mysql_query_cont(&interr, mysql, async_exit_status); } } if (interr) { // SET failed ret=false; } else { ret=true; } __exit_set_wait_timeout: free(query); return ret; } bool MySQL_Monitor_State_Data::create_new_connection() { mysql=mysql_init(NULL); assert(mysql); if (use_ssl) { mysql_ssl_set(mysql, mysql_thread___ssl_p2s_key, mysql_thread___ssl_p2s_cert, mysql_thread___ssl_p2s_ca, mysql_thread___ssl_p2s_capath, mysql_thread___ssl_p2s_cipher); mysql_options(mysql, MYSQL_OPT_SSL_CRL, mysql_thread___ssl_p2s_crl); mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH, mysql_thread___ssl_p2s_crlpath); } unsigned int timeout=mysql_thread___monitor_connect_timeout/1000; if (timeout==0) timeout=1; mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, &timeout); mysql_options4(mysql, MYSQL_OPT_CONNECT_ATTR_ADD, "program_name", "proxysql_monitor"); mysql_options4(mysql, MYSQL_OPT_CONNECT_ATTR_ADD, "_server_host", hostname); MYSQL *myrc=NULL; if (port) { myrc=mysql_real_connect(mysql, MySQL_Monitor::dns_lookup(hostname).c_str(), mysql_thread___monitor_username, mysql_thread___monitor_password, NULL, port, NULL, 0); } else { myrc=mysql_real_connect(mysql, "localhost", mysql_thread___monitor_username, mysql_thread___monitor_password, NULL, 0, hostname, 0); } if (myrc==NULL) { mysql_error_msg=strdup(mysql_error(mysql)); int myerrno=mysql_errno(mysql); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::mysql, hostgroup_id, hostname, port, myerrno); if (myerrno < 2000) { mysql_close(mysql); } else { close_mysql(mysql); } mysql = NULL; return false; } else { // mariadb client library disables NONBLOCK for SSL connections ... re-enable it! mysql_options(mysql, MYSQL_OPT_NONBLOCK, 0); int f=fcntl(mysql->net.fd, F_GETFL); #ifdef FD_CLOEXEC // asynchronously set also FD_CLOEXEC , this to prevent then when a fork happens the FD are duplicated to new process fcntl(mysql->net.fd, F_SETFL, f|O_NONBLOCK|FD_CLOEXEC); #else fcntl(mysql->net.fd, F_SETFL, f|O_NONBLOCK); #endif /* FD_CLOEXEC */ MySQL_Monitor::dns_cache_update_socket(mysql->host, mysql->net.fd); } return true; } void * monitor_read_only_thread(void *arg) { mysql_close(mysql_init(NULL)); bool timeout_reached = false; MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; if (!GloMTH) return NULL; // quick exit during shutdown/restart MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; bool read_only_success = false; mmsd->t1=start_time; bool crc=false; if (mmsd->mysql==NULL) { // we don't have a connection, let's create it bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { unsigned long long now=monotonic_time(); char * new_error = (char *)malloc(50+strlen(mmsd->mysql_error_msg)); sprintf(new_error,"timeout on creating new connection: %s",mmsd->mysql_error_msg); free(mmsd->mysql_error_msg); mmsd->mysql_error_msg = new_error; proxy_error("Timeout on read_only check for %s:%d after %lldms. Unable to create a connection. If the server is overload, increase mysql-monitor_connect_timeout. Error: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, new_error); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_READ_ONLY_CHECK_CONN_TIMEOUT); timeout_reached = true; goto __exit_monitor_read_only_thread; //goto __fast_exit_monitor_read_only_thread; } } mmsd->t1=monotonic_time(); mmsd->interr=0; // reset the value if (mmsd->task_id == MON_INNODB_READ_ONLY) { mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT @@global.innodb_read_only read_only"); } else if (mmsd->task_id == MON_SUPER_READ_ONLY) { mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT @@global.super_read_only read_only"); } else if (mmsd->task_id == MON_READ_ONLY__AND__INNODB_READ_ONLY) { mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT @@global.read_only&@@global.innodb_read_only read_only"); } else if (mmsd->task_id == MON_READ_ONLY__OR__INNODB_READ_ONLY) { mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT @@global.read_only|@@global.innodb_read_only read_only"); } else { // default mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT @@global.read_only read_only"); } while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_read_only_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on read_only check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_read_only_timeout.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_READ_ONLY_CHECK_TIMEOUT); timeout_reached = true; goto __exit_monitor_read_only_thread; } if (mmsd->interr) { // error during query mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); goto __exit_monitor_read_only_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_read_only_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_query_cont(&mmsd->interr, mmsd->mysql, mmsd->async_exit_status); } } if (mmsd->interr) { // error during query mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); goto __exit_monitor_read_only_thread; } mmsd->async_exit_status=mysql_store_result_start(&mmsd->result,mmsd->mysql); while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_read_only_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on read_only check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_read_only_timeout.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_READ_ONLY_CHECK_TIMEOUT); timeout_reached = true; goto __exit_monitor_read_only_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_read_only_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_store_result_cont(&mmsd->result, mmsd->mysql, mmsd->async_exit_status); } } if (mmsd->interr) { // ping failed mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); } __exit_monitor_read_only_thread: mmsd->t2=monotonic_time(); { sqlite3_stmt *statement=NULL; //sqlite3 *mondb=mmsd->mondb->get_db(); int rc; char *query=NULL; query=(char *)"INSERT OR REPLACE INTO mysql_server_read_only_log VALUES (?1 , ?2 , ?3 , ?4 , ?5 , ?6)"; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); rc = mmsd->mondb->prepare_v2(query, &statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); int read_only=1; // as a safety mechanism , read_only=1 is the default rc=(*proxy_sqlite3_bind_text)(statement, 1, mmsd->hostname, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int)(statement, 2, mmsd->port); ASSERT_SQLITE_OK(rc, mmsd->mondb); unsigned long long time_now=realtime_time(); time_now=time_now-(mmsd->t2 - start_time); rc=(*proxy_sqlite3_bind_int64)(statement, 3, time_now); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int64)(statement, 4, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1)); ASSERT_SQLITE_OK(rc, mmsd->mondb); if (mmsd->interr == 0 && mmsd->result) { int num_fields=0; int k=0; MYSQL_FIELD *fields = mysql_fetch_fields(mmsd->result); int j=-1; num_fields = mysql_num_fields(mmsd->result); fields = mysql_fetch_fields(mmsd->result); if (fields && num_fields == 1) { for(k = 0; k < num_fields; k++) { if (strcmp((char *)"read_only", (char *)fields[k].name)==0) { j=k; } } if (j>-1) { MYSQL_ROW row=mysql_fetch_row(mmsd->result); if (row) { VALGRIND_DISABLE_ERROR_REPORTING; if (row[j]) { if (!strcmp(row[j],"0") || !strcasecmp(row[j],"OFF")) read_only=0; } VALGRIND_ENABLE_ERROR_REPORTING; } } // if (repl_lag>=0) { rc=(*proxy_sqlite3_bind_int64)(statement, 5, read_only); ASSERT_SQLITE_OK(rc, mmsd->mondb); // } else { // rc=(*proxy_sqlite3_bind_null)(statement, 5); ASSERT_SQLITE_OK(rc, mmsd->mondb); // } } else { proxy_error("mysql_fetch_fields returns NULL, or mysql_num_fields is incorrect. Server %s:%d . See bug #1994\n", mmsd->hostname, mmsd->port); rc=(*proxy_sqlite3_bind_null)(statement, 5); ASSERT_SQLITE_OK(rc, mmsd->mondb); } mysql_free_result(mmsd->result); mmsd->result=NULL; } else { rc=(*proxy_sqlite3_bind_null)(statement, 5); ASSERT_SQLITE_OK(rc, mmsd->mondb); } if (mmsd->result) { // make sure it is clear mysql_free_result(mmsd->result); mmsd->result=NULL; } rc=(*proxy_sqlite3_bind_text)(statement, 6, mmsd->mysql_error_msg, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); (*proxy_sqlite3_finalize)(statement); if (mmsd->mysql_error_msg == NULL) { read_only_success = true; } if (timeout_reached == false && mmsd->interr == 0) { MyHGM->read_only_action(mmsd->hostname, mmsd->port, read_only); // default behavior } else { char *error=NULL; int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; char *new_query=NULL; SQLite3DB *mondb=mmsd->mondb; new_query=(char *)"SELECT 1 FROM (SELECT hostname,port,read_only,error FROM mysql_server_read_only_log WHERE hostname='%s' AND port='%d' ORDER BY time_start_us DESC LIMIT %d) a WHERE read_only IS NULL AND SUBSTR(error,1,7) = 'timeout' GROUP BY hostname,port HAVING COUNT(*)=%d"; char *buff=(char *)malloc(strlen(new_query)+strlen(mmsd->hostname)+32); int max_failures=mysql_thread___monitor_read_only_max_timeout_count; sprintf(buff,new_query, mmsd->hostname, mmsd->port, max_failures, max_failures); mondb->execute_statement(buff, &error , &cols , &affected_rows , &resultset); if (!error) { if (resultset) { if (resultset->rows_count) { // disable host proxy_error("Server %s:%d missed %d read_only checks. Assuming read_only=1\n", mmsd->hostname, mmsd->port, max_failures); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_READ_ONLY_CHECKS_MISSED); MyHGM->read_only_action(mmsd->hostname, mmsd->port, read_only); // N timeouts reached } delete resultset; resultset=NULL; } } else { proxy_error("Error on %s : %s\n", buff, error); } free(buff); } } if (mmsd->interr || mmsd->mysql_error_msg) { // check failed if (mmsd->mysql) { proxy_error("Got error: mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } else { if (crc==false) { if (mmsd->mysql) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } } } __fast_exit_monitor_read_only_thread: if (mmsd->mysql) { // if we reached here we didn't put the connection back if (mmsd->mysql_error_msg) { proxy_error("Got error: mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // if we reached here we should destroy it mmsd->mysql=NULL; } else { if (crc) { bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); } else { proxy_error("Got error: mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // set_wait_timeout failed } mmsd->mysql=NULL; } else { // really not sure how we reached here, drop it proxy_error("Got error: mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } } if (read_only_success) { __sync_fetch_and_add(&GloMyMon->read_only_check_OK,1); } else { __sync_fetch_and_add(&GloMyMon->read_only_check_ERR,1); } delete mysql_thr; return NULL; } void * monitor_group_replication_thread(void *arg) { mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; mmsd->t1=start_time; bool crc=false; if (mmsd->mysql==NULL) { // we don't have a connection, let's create it bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { goto __fast_exit_monitor_group_replication_thread; } } mmsd->t1=monotonic_time(); //async_exit_status=mysql_change_user_start(&ret_bool, mysql,"msandbox2","msandbox2","information_schema"); //mmsd->async_exit_status=mysql_ping_start(&mmsd->interr,mmsd->mysql); mmsd->interr=0; // reset the value #ifdef TEST_GROUPREP { std::string s { "SELECT viable_candidate,read_only,transactions_behind FROM GR_MEMBER_ROUTING_CANDIDATE_STATUS" }; s += " " + std::string(mmsd->hostname) + ":" + std::to_string(mmsd->port); mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,s.c_str()); } #else mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT viable_candidate,read_only,transactions_behind FROM sys.gr_member_routing_candidate_status"); #endif while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_groupreplication_healthcheck_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on group replication health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_groupreplication_healthcheck_timeout. Assuming viable_candidate=nO and read_only=YES\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_GR_HEALTH_CHECK_TIMEOUT); goto __exit_monitor_group_replication_thread; } if (mmsd->interr) { // error during query mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); goto __exit_monitor_group_replication_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_group_replication_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_query_cont(&mmsd->interr, mmsd->mysql, mmsd->async_exit_status); } } if (mmsd->interr) { // error during query mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); goto __exit_monitor_group_replication_thread; } mmsd->async_exit_status=mysql_store_result_start(&mmsd->result,mmsd->mysql); while (mmsd->async_exit_status && ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0)) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_groupreplication_healthcheck_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on group replication health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_groupreplication_healthcheck_timeout. Assuming viable_candidate=nO and read_only=YES\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_GR_HEALTH_CHECK_TIMEOUT); goto __exit_monitor_group_replication_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_group_replication_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_store_result_cont(&mmsd->result, mmsd->mysql, mmsd->async_exit_status); } } if (mmsd->interr) { // group replication check failed mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); proxy_error("Got error: mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } else { if (crc==false) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } } __exit_monitor_group_replication_thread: mmsd->t2=monotonic_time(); { // TODO : complete this char buf[128]; char *s=NULL; int l=strlen(mmsd->hostname); if (l<110) { s=buf; } else { s=(char *)malloc(l+16); } sprintf(s,"%s:%d",mmsd->hostname,mmsd->port); bool viable_candidate=false; bool read_only=true; int num_timeouts = 0; long long transactions_behind=-1; if (mmsd->interr == 0 && mmsd->result) { int num_fields=0; int num_rows=0; MYSQL_FIELD * fields = mysql_fetch_fields(mmsd->result); num_fields = mysql_num_fields(mmsd->result); num_rows = mysql_num_rows(mmsd->result); if (fields == NULL || num_fields!=3 || num_rows!=1) { proxy_error("mysql_fetch_fields returns NULL, or mysql_num_fields is incorrect. Server %s:%d . See bug #1994\n", mmsd->hostname, mmsd->port); if (mmsd->mysql_error_msg==NULL) { mmsd->mysql_error_msg = strdup("Unknown error"); } goto __end_process_group_replication_result2; } MYSQL_ROW row=mysql_fetch_row(mmsd->result); if (row[0] && !strcasecmp(row[0],"YES")) { viable_candidate=true; } if (row[1] && !strcasecmp(row[1],"NO")) { read_only=false; } if (row[2]) { transactions_behind=atol(row[2]); } mysql_free_result(mmsd->result); mmsd->result=NULL; } if (mmsd->result) { // make sure it is clear mysql_free_result(mmsd->result); mmsd->result=NULL; } //proxy_info("GR: %s:%d , viable=%s , ro=%s, trx=%ld, err=%s\n", mmsd->hostname, mmsd->port, (viable_candidate ? "YES": "NO") , (read_only ? "YES": "NO") , transactions_behind, ( mmsd->mysql_error_msg ? mmsd->mysql_error_msg : "") ); if (mmsd->mysql_error_msg) { //proxy_warning("GR: %s:%d , viable=%s , ro=%s, trx=%ld, err=%s\n", mmsd->hostname, mmsd->port, (viable_candidate ? "YES": "NO") , (read_only ? "YES": "NO") , transactions_behind, ( mmsd->mysql_error_msg ? mmsd->mysql_error_msg : "") ); } unsigned long long time_now=realtime_time(); time_now=time_now-(mmsd->t2 - start_time); pthread_mutex_lock(&GloMyMon->group_replication_mutex); //auto it = // TODO : complete this std::map::iterator it2; it2 = GloMyMon->Group_Replication_Hosts_Map.find(s); MyGR_monitor_node *node=NULL; if (it2!=GloMyMon->Group_Replication_Hosts_Map.end()) { node=it2->second; node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , transactions_behind,viable_candidate,read_only,mmsd->mysql_error_msg); } else { node = new MyGR_monitor_node(mmsd->hostname,mmsd->port,mmsd->writer_hostgroup); node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , transactions_behind,viable_candidate,read_only,mmsd->mysql_error_msg); GloMyMon->Group_Replication_Hosts_Map.insert(std::make_pair(s,node)); } if (mmsd->mysql_error_msg) { if (strncasecmp(mmsd->mysql_error_msg, (char *)"timeout", 7) == 0) { num_timeouts=node->get_timeout_count(); proxy_warning("%s:%d : group replication health check timeout count %d. Max threshold %d.\n", mmsd->hostname, mmsd->port, num_timeouts, mmsd->max_transactions_behind_count); } } // NOTE: Previously 'lag_counts' was only updated for 'read_only' // because 'writers' were never selected for being set 'OFFLINE' due to // replication lag. Since the change of this behavior to 'SHUNNING' // with replication lag, no matter it's 'read_only' value, 'lag_counts' // is computed everytime. int lag_counts = node->get_lag_behind_count(mmsd->max_transactions_behind); pthread_mutex_unlock(&GloMyMon->group_replication_mutex); // NOTE: we update MyHGM outside the mutex group_replication_mutex if (mmsd->mysql_error_msg) { // there was an error checking the status of the server, surely we need to reconfigure GR if (num_timeouts == 0) { // it wasn't a timeout, reconfigure immediately MyHGM->update_group_replication_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); } else { // it was a timeout. Check if we are having consecutive timeout if (num_timeouts == mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count) { proxy_error("Server %s:%d missed %d group replication checks. Number retries %d, Assuming offline\n", mmsd->hostname, mmsd->port, num_timeouts, num_timeouts); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_GR_HEALTH_CHECKS_MISSED); MyHGM->update_group_replication_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); } else { // not enough timeout } } } else { if (viable_candidate==false) { MyHGM->update_group_replication_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"viable_candidate=NO"); } else { if (read_only==true) { MyHGM->update_group_replication_set_read_only(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"read_only=YES"); } else { // the node is a writer // TODO: for now we don't care about the number of writers MyHGM->update_group_replication_set_writer(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup); } // NOTE: Replication lag action should takes place **after** the // servers have been placed in the correct hostgroups, otherwise // during the reconfiguration of the servers due to 'update_group_replication_set_writer' // there would be a small window in which the 'SHUNNED' server // will be treat as 'ONLINE' letting some new connections to // take places, before it becomes 'SHUNNED' again. bool enable = true; if (lag_counts >= mysql_thread___monitor_groupreplication_max_transactions_behind_count) { enable = false; } MyHGM->group_replication_lag_action( mmsd->writer_hostgroup, mmsd->hostname, mmsd->port, lag_counts, read_only, enable ); } } // clean up if (l<110) { } else { free(s); } /* sqlite3_stmt *statement=NULL; sqlite3 *mondb=mmsd->mondb->get_db(); int rc; char *query=NULL; query=(char *)"INSERT OR REPLACE INTO mysql_server_read_only_log VALUES (?1 , ?2 , ?3 , ?4 , ?5 , ?6)"; rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); ASSERT_SQLITE_OK(rc, mmsd->mondb); int read_only=1; // as a safety mechanism , read_only=1 is the default rc=(*proxy_sqlite3_bind_text)(statement, 1, mmsd->hostname, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int)(statement, 2, mmsd->port); ASSERT_SQLITE_OK(rc, mmsd->mondb); unsigned long long time_now=realtime_time(); time_now=time_now-(mmsd->t2 - start_time); rc=(*proxy_sqlite3_bind_int64)(statement, 3, time_now); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int64)(statement, 4, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1)); ASSERT_SQLITE_OK(rc, mmsd->mondb); if (mmsd->result) { int num_fields=0; int k=0; MYSQL_FIELD *fields=NULL; int j=-1; num_fields = mysql_num_fields(mmsd->result); fields = mysql_fetch_fields(mmsd->result); for(k = 0; k < num_fields; k++) { //if (strcmp("VARIABLE_NAME", fields[k].name)==0) { if (strcmp((char *)"Value", (char *)fields[k].name)==0) { j=k; } } if (j>-1) { MYSQL_ROW row=mysql_fetch_row(mmsd->result); if (row) { if (row[j]) { if (!strcmp(row[j],"0") || !strcasecmp(row[j],"OFF")) read_only=0; } } } // if (repl_lag>=0) { rc=(*proxy_sqlite3_bind_int64)(statement, 5, read_only); ASSERT_SQLITE_OK(rc, mmsd->mondb); // } else { // rc=(*proxy_sqlite3_bind_null)(statement, 5); ASSERT_SQLITE_OK(rc, mmsd->mondb); // } mysql_free_result(mmsd->result); mmsd->result=NULL; } else { rc=(*proxy_sqlite3_bind_null)(statement, 5); ASSERT_SQLITE_OK(rc, mmsd->mondb); } rc=(*proxy_sqlite3_bind_text)(statement, 6, mmsd->mysql_error_msg, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); MyHGM->read_only_action(mmsd->hostname, mmsd->port, read_only); (*proxy_sqlite3_finalize)(statement); */ } __end_process_group_replication_result2: if (mmsd->interr || mmsd->mysql_error_msg) { // check failed if (mmsd->mysql) { proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } else { if (crc==false) { if (mmsd->mysql) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } } } __fast_exit_monitor_group_replication_thread: if (mmsd->mysql) { // if we reached here we didn't put the connection back if (mmsd->mysql_error_msg) { proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // if we reached here we should destroy it mmsd->mysql=NULL; } else { if (crc) { bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); } else { proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // set_wait_timeout failed } mmsd->mysql=NULL; } else { // really not sure how we reached here, drop it proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } } delete mysql_thr; return NULL; } void * monitor_galera_thread(void *arg) { mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; #ifdef DEBUG MYSQL *mysqlcopy __attribute__((unused)) = NULL; #endif // DEBUG mmsd->t1=start_time; mmsd->interr=0; // reset the value bool crc=false; if (mmsd->mysql==NULL) { // we don't have a connection, let's create it bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { unsigned long long now=monotonic_time(); char * new_error = (char *)malloc(50+strlen(mmsd->mysql_error_msg)); sprintf(new_error,"timeout or error in creating new connection: %s",mmsd->mysql_error_msg); free(mmsd->mysql_error_msg); mmsd->mysql_error_msg = new_error; proxy_error("Error on Galera check for %s:%d after %lldms. Unable to create a connection. If the server is overload, increase mysql-monitor_connect_timeout. Error: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, new_error); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_GALERA_HEALTH_CHECK_CONN_TIMEOUT); goto __exit_monitor_galera_thread; } } #ifdef DEBUG mysqlcopy = mmsd->mysql; #endif // DEBUG mmsd->t1=monotonic_time(); mmsd->interr=0; // reset the value { #ifdef TEST_GALERA char *q1 = (char *)"SELECT wsrep_local_state , read_only , wsrep_local_recv_queue , wsrep_desync , wsrep_reject_queries , wsrep_sst_donor_rejects_queries , " " wsrep_cluster_status, pxc_maint_mode FROM HOST_STATUS_GALERA WHERE hostgroup_id=%d AND hostname='%s' AND port=%d"; char *q2 = (char *)malloc(strlen(q1)+strlen(mmsd->hostname)+32); sprintf(q2,q1, mmsd->writer_hostgroup, mmsd->hostname, mmsd->port); mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, q2); free(q2); #else char *sv = mmsd->mysql->server_version; if (strncmp(sv,(char *)"5.7",3)==0 || strncmp(sv,(char *)"8",1)==0) { // the backend is either MySQL 5.7 or MySQL 8 : INFORMATION_SCHEMA.GLOBAL_STATUS is deprecated mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT (SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_LOCAL_STATE') " "wsrep_local_state, @@read_only read_only, (SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_LOCAL_RECV_QUEUE') wsrep_local_recv_queue , " "@@wsrep_desync wsrep_desync, @@wsrep_reject_queries wsrep_reject_queries, @@wsrep_sst_donor_rejects_queries wsrep_sst_donor_rejects_queries, " "(SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_CLUSTER_STATUS') wsrep_cluster_status , " "(SELECT COALESCE(MAX(VARIABLE_VALUE),'DISABLED') FROM performance_schema.global_variables WHERE variable_name='pxc_maint_mode') pxc_maint_mode "); } else { // any other version mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_LOCAL_STATE') " "wsrep_local_state, @@read_only read_only, (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_LOCAL_RECV_QUEUE') wsrep_local_recv_queue , " "@@wsrep_desync wsrep_desync, @@wsrep_reject_queries wsrep_reject_queries, @@wsrep_sst_donor_rejects_queries wsrep_sst_donor_rejects_queries, " "(SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_CLUSTER_STATUS') wsrep_cluster_status , (SELECT 'DISABLED') pxc_maint_mode"); } #endif // TEST_GALERA } while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_GALERA_HEALTH_CHECK_TIMEOUT); goto __exit_monitor_galera_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_galera_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_query_cont(&mmsd->interr, mmsd->mysql, mmsd->async_exit_status); } } mmsd->async_exit_status=mysql_store_result_start(&mmsd->result,mmsd->mysql); while (mmsd->async_exit_status && ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0)) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_GALERA_HEALTH_CHECK_TIMEOUT); goto __exit_monitor_galera_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_galera_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_store_result_cont(&mmsd->result, mmsd->mysql, mmsd->async_exit_status); } } if (mmsd->interr) { // ping failed mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } else { if (crc==false) { #ifdef TEST_GALERA if ( rand()%3 == 0) { // drop the connection once every 3 checks GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } else { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } #else GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; #endif // TEST_GALERA } } __exit_monitor_galera_thread: mmsd->t2=monotonic_time(); { // TODO : complete this char buf[128]; char *s=NULL; int l=strlen(mmsd->hostname); if (l<110) { s=buf; } else { s=(char *)malloc(l+16); } sprintf(s,"%s:%d",mmsd->hostname,mmsd->port); bool primary_partition = false; bool read_only=true; bool wsrep_desync = true; int wsrep_local_state = 0; bool wsrep_reject_queries = true; bool wsrep_sst_donor_rejects_queries = true; long long wsrep_local_recv_queue=0; bool pxc_maint_mode=false; int num_timeouts = 0; MYSQL_FIELD * fields=NULL; if (mmsd->interr == 0 && mmsd->result) { int num_fields=0; int num_rows=0; num_fields = mysql_num_fields(mmsd->result); fields = mysql_fetch_fields(mmsd->result); num_rows = mysql_num_rows(mmsd->result); if (fields==NULL || num_fields!=8 || num_rows!=1) { proxy_error("mysql_fetch_fields returns NULL, or mysql_num_fields is incorrect. Server %s:%d . See bug #1994\n", mmsd->hostname, mmsd->port); if (mmsd->mysql_error_msg==NULL) { mmsd->mysql_error_msg = strdup("Unknown error"); } goto __end_process_galera_result2; } MYSQL_ROW row=mysql_fetch_row(mmsd->result); if (row[0]) { wsrep_local_state = atoi(row[0]); } if (row[1]) { if (!strcasecmp(row[1],"NO") || !strcasecmp(row[1],"OFF") || !strcasecmp(row[1],"0")) { read_only=false; } } if (row[2]) { wsrep_local_recv_queue = atoll(row[2]); } if (row[3]) { if (!strcasecmp(row[3],"NO") || !strcasecmp(row[3],"OFF") || !strcasecmp(row[3],"0")) { wsrep_desync = false; } } if (row[4]) { if (!strcasecmp(row[4],"NONE")) { wsrep_reject_queries = false; } } if (row[5]) { if (!strcasecmp(row[5],"NO") || !strcasecmp(row[5],"OFF") || !strcasecmp(row[5],"0")) { wsrep_sst_donor_rejects_queries = false; } } if (row[6]) { if (!strcasecmp(row[6],"Primary")) { primary_partition = true; } } if (row[7]) { std::string s(row[7]); std::transform(s.begin(), s.end(), s.begin(), ::toupper); if (!strncmp("DISABLED",s.c_str(),8)) { pxc_maint_mode=false; } else { pxc_maint_mode=true; } } mysql_free_result(mmsd->result); mmsd->result=NULL; } if (mmsd->mysql_error_msg) { } unsigned long long time_now=realtime_time(); time_now=time_now-(mmsd->t2 - start_time); pthread_mutex_lock(&GloMyMon->galera_mutex); //auto it = // TODO : complete this std::map::iterator it2; it2 = GloMyMon->Galera_Hosts_Map.find(s); Galera_monitor_node *node=NULL; if (it2!=GloMyMon->Galera_Hosts_Map.end()) { node=it2->second; //node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , transactions_behind,viable_candidate,read_only,mmsd->mysql_error_msg); node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , wsrep_local_recv_queue, primary_partition, read_only, wsrep_local_state, wsrep_desync, wsrep_reject_queries, wsrep_sst_donor_rejects_queries, pxc_maint_mode, mmsd->mysql_error_msg); } else { node = new Galera_monitor_node(mmsd->hostname,mmsd->port,mmsd->writer_hostgroup); //node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , transactions_behind,viable_candidate,read_only,mmsd->mysql_error_msg); node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , wsrep_local_recv_queue, primary_partition, read_only, wsrep_local_state, wsrep_desync, wsrep_reject_queries, wsrep_sst_donor_rejects_queries, pxc_maint_mode, mmsd->mysql_error_msg); GloMyMon->Galera_Hosts_Map.insert(std::make_pair(s,node)); } if (mmsd->mysql_error_msg) { if (strncasecmp(mmsd->mysql_error_msg, (char *)"timeout", 7) == 0) { // it was a timeout . Let's count the number of consecutive timeouts int max_num_timeout = 10; if (mysql_thread___monitor_galera_healthcheck_max_timeout_count < max_num_timeout) { max_num_timeout = mysql_thread___monitor_galera_healthcheck_max_timeout_count; } unsigned long long start_times[max_num_timeout]; bool timeouts[max_num_timeout]; for (int i=0; ilast_entries[i].start_time) { int smallidx = 0; for (int j=0; jlast_entries[i].start_time) { start_times[smallidx] = node->last_entries[i].start_time; timeouts[smallidx] = false; if (node->last_entries[i].error) { if (strncasecmp(node->last_entries[i].error, (char *)"timeout", 7) == 0) { timeouts[smallidx] = true; } } } } } for (int i=0; igalera_mutex); // NOTE: we update MyHGM outside the mutex galera_mutex if (mmsd->mysql_error_msg) { // there was an error checking the status of the server, surely we need to reconfigure Galera if (num_timeouts == 0) { // it wasn't a timeout, reconfigure immediately MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); } else { // it was a timeout. Check if we are having consecutive timeout if (num_timeouts == mysql_thread___monitor_galera_healthcheck_max_timeout_count) { proxy_error("Server %s:%d missed %d Galera checks. Assuming offline\n", mmsd->hostname, mmsd->port, num_timeouts); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_GALERA_HEALTH_CHECKS_MISSED); MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); } else { // not enough timeout } } } else { if (fields) { // if we didn't get any error, but fileds is NULL, we are likely hitting bug #1994 if (primary_partition == false || wsrep_desync == true || (wsrep_local_state!=4 && (wsrep_local_state != 2 || wsrep_sst_donor_rejects_queries))) { if (primary_partition == false) { MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"primary_partition=NO"); } else { if (wsrep_desync == true) { MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"wsrep_desync=YES"); } else { char msg[80]; sprintf(msg,"wsrep_local_state=%d",wsrep_local_state); MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, msg); } } } else { //if (wsrep_sst_donor_rejects_queries || wsrep_reject_queries) { if (wsrep_reject_queries) { MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"wsrep_reject_queries=true"); // } else { // // wsrep_sst_donor_rejects_queries // MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"wsrep_sst_donor_rejects_queries=true"); // } } else { if (pxc_maint_mode) { MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"pxc_maint_mode=YES", true); } else { if (read_only==true) { if (wsrep_local_recv_queue > mmsd->max_transactions_behind) { MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"slave is lagging"); } else { MyHGM->update_galera_set_read_only(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"read_only=YES"); } } else { // the node is a writer // TODO: for now we don't care about the number of writers MyHGM->update_galera_set_writer(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup); } } } } } else { proxy_error("mysql_fetch_fields returns NULL. Server %s:%d . See bug #1994\n", mmsd->hostname, mmsd->port); } } // clean up if (l<110) { } else { free(s); } } __end_process_galera_result2: if (mmsd->interr || mmsd->mysql_error_msg) { // check failed if (mmsd->mysql) { proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } else { if (crc==false) { if (mmsd->mysql) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } } } __fast_exit_monitor_galera_thread: if (mmsd->mysql) { // if we reached here we didn't put the connection back if (mmsd->mysql_error_msg) { proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // if we reached here we should destroy it mmsd->mysql=NULL; } else { if (crc) { bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); } else { proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // set_wait_timeout failed } mmsd->mysql=NULL; } else { // really not sure how we reached here, drop it proxy_error("Got error. mmsd %p , MYSQL %p , FD %d : %s\n", mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } } delete mysql_thr; return NULL; } void * monitor_replication_lag_thread(void *arg) { mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; if (!GloMTH) return NULL; // quick exit during shutdown/restart MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); #ifdef DEBUG MYSQL *mysqlcopy __attribute__((unused)) = NULL; #endif // DEBUG mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; bool replication_lag_success = false; bool use_percona_heartbeat = false; char * percona_heartbeat_table = mysql_thread___monitor_replication_lag_use_percona_heartbeat; mmsd->t1=start_time; bool crc=false; if (mmsd->mysql==NULL) { // we don't have a connection, let's create it bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { goto __fast_exit_monitor_replication_lag_thread; } } else { //GloMyMon->My_Conn_Pool->conn_register(mmsd); } #ifdef DEBUG mysqlcopy = mmsd->mysql; #endif // DEBUG mmsd->t1=monotonic_time(); mmsd->interr=0; // reset the value if (percona_heartbeat_table) { int l = strlen(percona_heartbeat_table); if (l) { use_percona_heartbeat = true; char *base_query = (char *)"SELECT MAX(ROUND(TIMESTAMPDIFF(MICROSECOND, ts, SYSDATE(6))/1000000)) AS Seconds_Behind_Master FROM %s"; char *replication_query = (char *)malloc(strlen(base_query)+l); sprintf(replication_query,base_query,percona_heartbeat_table); mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,replication_query); free(replication_query); } } if (use_percona_heartbeat == false) { mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SHOW SLAVE STATUS"); } while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_replication_lag_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); goto __exit_monitor_replication_lag_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_replication_lag_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_query_cont(&mmsd->interr, mmsd->mysql, mmsd->async_exit_status); //} else { // mmsd->mysql_error_msg=strdup("timeout check"); // goto __exit_monitor_replication_lag_thread; } } mmsd->async_exit_status=mysql_store_result_start(&mmsd->result,mmsd->mysql); while (mmsd->async_exit_status && ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0)) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_replication_lag_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); goto __exit_monitor_replication_lag_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_replication_lag_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_store_result_cont(&mmsd->result, mmsd->mysql, mmsd->async_exit_status); //} else { // mmsd->mysql_error_msg=strdup("timeout check"); // goto __exit_monitor_replication_lag_thread; } } if (mmsd->interr) { // replication lag check failed mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); #ifdef DEBUG unsigned long long now=monotonic_time(); proxy_error("Error after %lldms: mmsd %p , MYSQL %p , FD %d : %s\n", (now-mmsd->t1)/1000, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); #endif // DEBUG if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } else { if (crc==false) { //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } } __exit_monitor_replication_lag_thread: mmsd->t2=monotonic_time(); { sqlite3_stmt *statement=NULL; //sqlite3 *mondb=mmsd->mondb->get_db(); int rc; char *query=NULL; query=(char *)"INSERT OR REPLACE INTO mysql_server_replication_lag_log VALUES (?1 , ?2 , ?3 , ?4 , ?5 , ?6)"; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); rc = mmsd->mondb->prepare_v2(query, &statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); // 'replication_lag' to be feed to 'replication_lag_action' int repl_lag=-2; rc=(*proxy_sqlite3_bind_text)(statement, 1, mmsd->hostname, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int)(statement, 2, mmsd->port); ASSERT_SQLITE_OK(rc, mmsd->mondb); unsigned long long time_now=realtime_time(); time_now=time_now-(mmsd->t2 - start_time); rc=(*proxy_sqlite3_bind_int64)(statement, 3, time_now); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_bind_int64)(statement, 4, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1)); ASSERT_SQLITE_OK(rc, mmsd->mondb); if (mmsd->interr == 0 && mmsd->result) { int num_fields=0; int k=0; MYSQL_FIELD * fields=NULL; int j=-1; num_fields = mysql_num_fields(mmsd->result); fields = mysql_fetch_fields(mmsd->result); if ( fields && ( ( num_fields == 1 && use_percona_heartbeat == true ) || ( num_fields > 30 && use_percona_heartbeat == false ) ) ) { for(k = 0; k < num_fields; k++) { if (fields[k].name) { if (strcmp("Seconds_Behind_Master", fields[k].name)==0) { j=k; } } } if (j>-1) { MYSQL_ROW row=mysql_fetch_row(mmsd->result); if (row) { repl_lag=-1; // this is old behavior repl_lag=mysql_thread___monitor_slave_lag_when_null; // new behavior, see 669 if (row[j]) { // if Seconds_Behind_Master is not NULL repl_lag=atoi(row[j]); } else { proxy_error("Replication lag on server %s:%d is NULL, using the value %d (mysql-monitor_slave_lag_when_null)\n", mmsd->hostname, mmsd->port, mysql_thread___monitor_slave_lag_when_null); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_SRV_NULL_REPLICATION_LAG); } } } if (repl_lag>=0) { rc=(*proxy_sqlite3_bind_int64)(statement, 5, repl_lag); ASSERT_SQLITE_OK(rc, mmsd->mondb); } else { rc=(*proxy_sqlite3_bind_null)(statement, 5); ASSERT_SQLITE_OK(rc, mmsd->mondb); } } else { proxy_error("mysql_fetch_fields returns NULL, or mysql_num_fields is incorrect. Server %s:%d . See bug #1994\n", mmsd->hostname, mmsd->port); rc=(*proxy_sqlite3_bind_null)(statement, 5); ASSERT_SQLITE_OK(rc, mmsd->mondb); } mysql_free_result(mmsd->result); mmsd->result=NULL; } else { rc=(*proxy_sqlite3_bind_null)(statement, 5); ASSERT_SQLITE_OK(rc, mmsd->mondb); // 'replication_lag_check' timed out, we set 'repl_lag' to '-3' to avoid server to be 're-enabled'. repl_lag=-3; } rc=(*proxy_sqlite3_bind_text)(statement, 6, mmsd->mysql_error_msg, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, mmsd->mondb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, mmsd->mondb); MyHGM->replication_lag_action(mmsd->hostgroup_id, mmsd->hostname, mmsd->port, repl_lag); (*proxy_sqlite3_finalize)(statement); if (mmsd->mysql_error_msg == NULL) { replication_lag_success = true; } } if (mmsd->interr || mmsd->mysql_error_msg) { // check failed if (mmsd->mysql) { #ifdef DEBUG proxy_error("Error after %lldms: server %s:%d , mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); #else proxy_error("Error after %lldms on server %s:%d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd->mysql_error_msg); #endif // DEBUG MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } else { if (mmsd->mysql) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mmsd->mysql=NULL; } } __fast_exit_monitor_replication_lag_thread: if (mmsd->mysql) { mmsd->t2=monotonic_time(); // if we reached here we didn't put the connection back if (mmsd->mysql_error_msg) { #ifdef DEBUG proxy_error("Error after %lldms: server %s:%d , mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); #else proxy_error("Error after %lldms on server %s:%d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd->mysql_error_msg); #endif // DEBUG MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); mysql_close(mmsd->mysql); // if we reached here we should destroy it mmsd->mysql=NULL; } else { if (crc) { bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); } else { #ifdef DEBUG proxy_error("Error after %lldms: server %s:%d , mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); #else proxy_error("Error after %lldms on server %s:%d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd->mysql_error_msg); #endif // DEBUG MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); mysql_close(mmsd->mysql); // set_wait_timeout failed } mmsd->mysql=NULL; } else { // really not sure how we reached here, drop it #ifdef DEBUG proxy_error("Error after %lldms: server %s:%d , mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); #else proxy_error("Error after %lldms on server %s:%d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd->mysql_error_msg); #endif // DEBUG MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } } if (replication_lag_success) { __sync_fetch_and_add(&GloMyMon->replication_lag_check_OK,1); } else { __sync_fetch_and_add(&GloMyMon->replication_lag_check_ERR,1); } delete mysql_thr; return NULL; } void * MySQL_Monitor::monitor_connect() { // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart unsigned long long t1; unsigned long long t2; unsigned long long next_loop_at=0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { // update the 'monitor_internal.mysql_servers' table with the latest 'mysql_servers' from 'MyHGM' { std::lock_guard mysql_servers_guard(MyHGM->mysql_servers_to_monitor_mutex); update_monitor_mysql_servers(MyHGM->mysql_servers_to_monitor); } char *error=NULL; int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; // add support for SSL char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM monitor_internal.mysql_servers GROUP BY hostname, port ORDER BY RANDOM()"; unsigned int glover; t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); next_loop_at=0; } if (t1 < next_loop_at) { goto __sleep_monitor_connect_loop; } next_loop_at=t1+1000*mysql_thread___monitor_connect_interval; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); if (error) { proxy_error("Error on %s : %s\n", query, error); goto __end_monitor_connect_loop; } else { if (resultset->rows_count==0) { goto __end_monitor_connect_loop; } int us=100; if (resultset->rows_count) { us=mysql_thread___monitor_connect_interval/2/resultset->rows_count; us*=40; if (us > 1000000 || us <= 0) { us = 10000; } us = us + rand()%us; if (resultset->rows_count==1) { // only 1 server, sleep also before creating the job usleep(us); } } for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; bool rc_ping = true; rc_ping = server_responds_to_ping(r->fields[0],atoi(r->fields[1])); if (rc_ping) { // only if server is responding to pings MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2])); mmsd->mondb=monitordb; WorkItem* item; item=new WorkItem(mmsd,monitor_connect_thread); GloMyMon->queue->add(item); usleep(us); } if (GloMyMon->shutdown) return NULL; } } __end_monitor_connect_loop: if (mysql_thread___monitor_enabled==true) { sqlite3_stmt *statement=NULL; //sqlite3 *mondb=monitordb->get_db(); int rc; char *query=NULL; query=(char *)"DELETE FROM mysql_server_connect_log WHERE time_start_us < ?1"; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); rc = monitordb->prepare_v2(query, &statement); ASSERT_SQLITE_OK(rc, monitordb); if (mysql_thread___monitor_history < mysql_thread___monitor_ping_interval * (mysql_thread___monitor_ping_max_failures + 1 )) { // issue #626 if (mysql_thread___monitor_ping_interval < 3600000) mysql_thread___monitor_history = mysql_thread___monitor_ping_interval * (mysql_thread___monitor_ping_max_failures + 1 ); } unsigned long long time_now=realtime_time(); rc=(*proxy_sqlite3_bind_int64)(statement, 1, time_now-(unsigned long long)mysql_thread___monitor_history*1000); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, monitordb); (*proxy_sqlite3_finalize)(statement); } if (resultset) delete resultset; __sleep_monitor_connect_loop: t2=monotonic_time(); if (t2 500000) { st = 500000; } usleep(st); } } if (mysql_thr) { delete mysql_thr; mysql_thr=NULL; } for (unsigned int i=0;i *item=NULL; GloMyMon->queue->add(item); } return NULL; } void * MySQL_Monitor::monitor_ping() { // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) // struct event_base *libevent_base; unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart unsigned long long t1; unsigned long long t2; unsigned long long next_loop_at=0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { // update the 'monitor_internal.mysql_servers' table with the latest 'mysql_servers' from 'MyHGM' { std::lock_guard mysql_servers_guard(MyHGM->mysql_servers_to_monitor_mutex); update_monitor_mysql_servers(MyHGM->mysql_servers_to_monitor); } unsigned int glover; char *error=NULL; int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM monitor_internal.mysql_servers GROUP BY hostname, port ORDER BY RANDOM()"; t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); next_loop_at=0; } if (t1 < next_loop_at) { goto __sleep_monitor_ping_loop; } next_loop_at=t1+1000*mysql_thread___monitor_ping_interval; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); if (error) { proxy_error("Error on %s : %s\n", query, error); goto __end_monitor_ping_loop; } else { if (resultset->rows_count==0) { goto __end_monitor_ping_loop; } int us=100; if (resultset->rows_count) { us=mysql_thread___monitor_ping_interval/2/resultset->rows_count; us = us == 0 ? 1 : us; us*=40; if (us > 1000000) { us = 10000; } us = us + rand()%us; if (resultset->rows_count==1) { // only 1 server, sleep also before creating the job usleep(us); } } for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; MySQL_Monitor_State_Data *mmsd = new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2])); mmsd->mondb=monitordb; WorkItem* item; item=new WorkItem(mmsd,monitor_ping_thread); GloMyMon->queue->add(item); usleep(us); if (GloMyMon->shutdown) return NULL; } } __end_monitor_ping_loop: if (mysql_thread___monitor_enabled==true) { sqlite3_stmt *statement=NULL; //sqlite3 *mondb=monitordb->get_db(); int rc; char *query=NULL; query=(char *)"DELETE FROM mysql_server_ping_log WHERE time_start_us < ?1"; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); rc = monitordb->prepare_v2(query, &statement); ASSERT_SQLITE_OK(rc, monitordb); if (mysql_thread___monitor_history < mysql_thread___monitor_ping_interval * (mysql_thread___monitor_ping_max_failures + 1 )) { // issue #626 if (mysql_thread___monitor_ping_interval < 3600000) mysql_thread___monitor_history = mysql_thread___monitor_ping_interval * (mysql_thread___monitor_ping_max_failures + 1 ); } unsigned long long time_now=realtime_time(); rc=(*proxy_sqlite3_bind_int64)(statement, 1, time_now-(unsigned long long)mysql_thread___monitor_history*1000); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, monitordb); (*proxy_sqlite3_finalize)(statement); } if (resultset) { delete resultset; resultset=NULL; } // now it is time to shun all problematic hosts query=(char *)"SELECT DISTINCT a.hostname, a.port FROM monitor_internal.mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE b.ping_error IS NOT NULL AND b.ping_error NOT LIKE 'Access denied for user\%'"; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); // we disable valgrind here. Probably a bug in SQLite3 VALGRIND_DISABLE_ERROR_REPORTING; admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); VALGRIND_ENABLE_ERROR_REPORTING; if (error) { proxy_error("Error on %s : %s\n", query, error); } else { // get all addresses and ports int i=0; int j=0; char **addresses=(char **)malloc(resultset->rows_count * sizeof(char *)); char **ports=(char **)malloc(resultset->rows_count * sizeof(char *)); for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; addresses[i]=strdup(r->fields[0]); ports[i]=strdup(r->fields[1]); i++; } if (resultset) { delete resultset; resultset=NULL; } char *new_query=NULL; new_query=(char *)"SELECT 1 FROM (SELECT hostname,port,ping_error FROM mysql_server_ping_log WHERE hostname='%s' AND port='%s' ORDER BY time_start_us DESC LIMIT %d) a WHERE ping_error IS NOT NULL AND ping_error NOT LIKE 'Access denied for user%%' AND ping_error NOT LIKE 'ProxySQL Error: Access denied for user%%' AND ping_error NOT LIKE 'Your password has expired.%%' GROUP BY hostname,port HAVING COUNT(*)=%d"; for (j=0;jexecute_statement(buff, &error , &cols , &affected_rows , &resultset); if (!error) { if (resultset) { if (resultset->rows_count) { // disable host bool rc_shun = false; rc_shun = MyHGM->shun_and_killall(addresses[j],atoi(ports[j])); if (rc_shun) { proxy_error("Server %s:%s missed %d heartbeats, shunning it and killing all the connections. Disabling other checks until the node comes back online.\n", addresses[j], ports[j], max_failures); } } delete resultset; resultset=NULL; } } else { proxy_error("Error on %s : %s\n", query, error); } free(buff); } while (i) { // now free all the addresses/ports i--; free(addresses[i]); free(ports[i]); } free(addresses); free(ports); } // now it is time to update current_lantency_ms query=(char *)"SELECT DISTINCT a.hostname, a.port FROM monitor_internal.mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE b.ping_error IS NULL"; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); VALGRIND_DISABLE_ERROR_REPORTING; admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); VALGRIND_ENABLE_ERROR_REPORTING; if (error) { proxy_error("Error on %s : %s\n", query, error); } else { // get all addresses and ports int i=0; int j=0; char **addresses=(char **)malloc(resultset->rows_count * sizeof(char *)); char **ports=(char **)malloc(resultset->rows_count * sizeof(char *)); for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; addresses[i]=strdup(r->fields[0]); ports[i]=strdup(r->fields[1]); i++; } if (resultset) { delete resultset; resultset=NULL; } char *new_query=NULL; new_query=(char *)"SELECT hostname,port,COALESCE(CAST(AVG(ping_success_time_us) AS INTEGER),10000) FROM (SELECT hostname,port,ping_success_time_us,ping_error FROM mysql_server_ping_log WHERE hostname='%s' AND port='%s' ORDER BY time_start_us DESC LIMIT 3) a WHERE ping_error IS NULL GROUP BY hostname,port"; for (j=0;jexecute_statement(buff, &error , &cols , &affected_rows , &resultset); if (!error) { if (resultset) { if (resultset->rows_count) { for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; // this should be called just once, but we create a generic for loop // update current_latency_ms MyHGM->set_server_current_latency_us(addresses[j],atoi(ports[j]), atoi(r->fields[2])); } } delete resultset; resultset=NULL; } } else { proxy_error("Error on %s : %s\n", query, error); } free(buff); } while (i) { // now free all the addresses/ports i--; free(addresses[i]); free(ports[i]); } free(addresses); free(ports); } __sleep_monitor_ping_loop: t2=monotonic_time(); if (t2 500000) { st = 500000; } usleep(st); } } if (mysql_thr) { delete mysql_thr; mysql_thr=NULL; } for (unsigned int i=0;i* item=NULL; GloMyMon->queue->add(item); } return NULL; } bool MySQL_Monitor::server_responds_to_ping(char *address, int port) { bool ret = true; // default char *error=NULL; int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; char *new_query=NULL; new_query=(char *)"SELECT 1 FROM (SELECT hostname,port,ping_error FROM mysql_server_ping_log WHERE hostname='%s' AND port=%d ORDER BY time_start_us DESC LIMIT %d) a WHERE ping_error IS NOT NULL AND ping_error NOT LIKE 'Access denied for user%%' GROUP BY hostname,port HAVING COUNT(*)=%d"; char *buff=(char *)malloc(strlen(new_query)+strlen(address)+32); int max_failures = mysql_thread___monitor_ping_max_failures; sprintf(buff,new_query,address,port,max_failures,max_failures); VALGRIND_DISABLE_ERROR_REPORTING; monitordb->execute_statement(buff, &error , &cols , &affected_rows , &resultset); VALGRIND_ENABLE_ERROR_REPORTING; if (!error) { if (resultset) { if (resultset->rows_count) { ret = false; } delete resultset; resultset=NULL; } } else { proxy_error("Error on %s : %s\n", buff, error); } if (resultset) { delete resultset; resultset=NULL; } free(buff); return ret; } void * MySQL_Monitor::monitor_read_only() { // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart unsigned long long t1; unsigned long long t2; unsigned long long next_loop_at=0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { unsigned int glover; char *error=NULL; SQLite3_result *resultset=NULL; // add support for SSL char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl, check_type FROM mysql_servers JOIN mysql_replication_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE status NOT IN (2,3) GROUP BY hostname, port ORDER BY RANDOM()"; t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); next_loop_at=0; } if (t1 < next_loop_at) { goto __sleep_monitor_read_only; } next_loop_at=t1+1000*mysql_thread___monitor_read_only_interval; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); resultset = MyHGM->execute_query(query, &error); assert(resultset); if (error) { proxy_error("Error on %s : %s\n", query, error); goto __end_monitor_read_only_loop; } else { if (resultset->rows_count==0) { goto __end_monitor_read_only_loop; } int us=100; if (resultset->rows_count) { us=mysql_thread___monitor_read_only_interval/2/resultset->rows_count; us = us == 0 ? 1 : us; us*=40; if (us > 1000000) { us = 10000; } us = us + rand()%us; if (resultset->rows_count==1) { // only 1 server, sleep also before creating the job usleep(us); } } for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; bool rc_ping = true; rc_ping = server_responds_to_ping(r->fields[0],atoi(r->fields[1])); if (rc_ping) { // only if server is responding to pings MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2])); mmsd->task_id = MON_READ_ONLY; // default if (r->fields[3]) { if (strcasecmp(r->fields[3],(char *)"innodb_read_only")==0) { mmsd->task_id = MON_INNODB_READ_ONLY; } else if (strcasecmp(r->fields[3],(char *)"super_read_only")==0) { mmsd->task_id = MON_SUPER_READ_ONLY; } else if (strcasecmp(r->fields[3],(char *)"read_only&innodb_read_only")==0) { mmsd->task_id = MON_READ_ONLY__AND__INNODB_READ_ONLY; } else if (strcasecmp(r->fields[3],(char *)"read_only|innodb_read_only")==0) { mmsd->task_id = MON_READ_ONLY__OR__INNODB_READ_ONLY; } } mmsd->mondb=monitordb; WorkItem* item; item=new WorkItem(mmsd,monitor_read_only_thread); GloMyMon->queue->add(item); usleep(us); } if (GloMyMon->shutdown) return NULL; } } __end_monitor_read_only_loop: if (mysql_thread___monitor_enabled==true) { sqlite3_stmt *statement=NULL; //sqlite3 *mondb=monitordb->get_db(); int rc; char *query=NULL; query=(char *)"DELETE FROM mysql_server_read_only_log WHERE time_start_us < ?1"; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); rc = monitordb->prepare_v2(query, &statement); ASSERT_SQLITE_OK(rc, monitordb); if (mysql_thread___monitor_history < mysql_thread___monitor_read_only_interval * (mysql_thread___monitor_read_only_max_timeout_count + 1 )) { // issue #626 if (mysql_thread___monitor_read_only_interval < 3600000) mysql_thread___monitor_history = mysql_thread___monitor_read_only_interval * (mysql_thread___monitor_read_only_max_timeout_count + 1 ); } unsigned long long time_now=realtime_time(); rc=(*proxy_sqlite3_bind_int64)(statement, 1, time_now-(unsigned long long)mysql_thread___monitor_history*1000); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, monitordb); (*proxy_sqlite3_finalize)(statement); } if (resultset) delete resultset; __sleep_monitor_read_only: t2=monotonic_time(); if (t2 500000) { st = 500000; } usleep(st); } } if (mysql_thr) { delete mysql_thr; mysql_thr=NULL; } for (unsigned int i=0;i *item=NULL; GloMyMon->queue->add(item); } return NULL; } void * MySQL_Monitor::monitor_group_replication() { // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) // struct event_base *libevent_base; unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart unsigned long long t1; unsigned long long t2; unsigned long long next_loop_at=0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { unsigned int glover; // char *error=NULL; // SQLite3_result *resultset=NULL; // add support for SSL // char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM mysql_servers JOIN mysql_group_replication_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=writer_hostgroup hostgroup_id=reader_hostgroup WHERE status NOT LIKE 'OFFLINE\%' GROUP BY hostname, port"; t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); next_loop_at=0; } if (t1 < next_loop_at) { goto __sleep_monitor_group_replication; } next_loop_at=t1+1000*mysql_thread___monitor_groupreplication_healthcheck_interval; pthread_mutex_lock(&group_replication_mutex); // proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); // admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); // resultset = MyHGM->execute_query(query, &error); // assert(resultset); if (Group_Replication_Hosts_resultset==NULL) { proxy_error("Group replication hosts result set is absent\n"); goto __end_monitor_group_replication_loop; // } // if (error) { // proxy_error("Error on %s : %s\n", query, error); // goto __end_monitor_read_only_loop; } else { if (Group_Replication_Hosts_resultset->rows_count==0) { goto __end_monitor_group_replication_loop; } int us=100; if (Group_Replication_Hosts_resultset->rows_count) { us=mysql_thread___monitor_read_only_interval/2/Group_Replication_Hosts_resultset->rows_count; } for (std::vector::iterator it = Group_Replication_Hosts_resultset->rows.begin() ; it != Group_Replication_Hosts_resultset->rows.end(); ++it) { SQLite3_row *r=*it; bool rc_ping = true; rc_ping = server_responds_to_ping(r->fields[1],atoi(r->fields[2])); if (rc_ping) { // only if server is responding to pings MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[1],atoi(r->fields[2]), NULL, atoi(r->fields[3])); mmsd->writer_hostgroup=atoi(r->fields[0]); mmsd->writer_is_also_reader=atoi(r->fields[4]); mmsd->max_transactions_behind=atoi(r->fields[5]); mmsd->max_transactions_behind_count=mysql_thread___monitor_groupreplication_max_transactions_behind_count; mmsd->mondb=monitordb; WorkItem* item; item=new WorkItem(mmsd,monitor_group_replication_thread); GloMyMon->queue->add(item); usleep(us); } if (GloMyMon->shutdown) { pthread_mutex_unlock(&group_replication_mutex); return NULL; } } } __end_monitor_group_replication_loop: pthread_mutex_unlock(&group_replication_mutex); if (mysql_thread___monitor_enabled==true) { /* sqlite3_stmt *statement=NULL; sqlite3 *mondb=monitordb->get_db(); int rc; char *query=NULL; query=(char *)"DELETE FROM mysql_server_read_only_log WHERE time_start_us < ?1"; rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); ASSERT_SQLITE_OK(rc, monitordb); if (mysql_thread___monitor_history < mysql_thread___monitor_ping_interval * (mysql_thread___monitor_ping_max_failures + 1 )) { // issue #626 if (mysql_thread___monitor_ping_interval < 3600000) mysql_thread___monitor_history = mysql_thread___monitor_ping_interval * (mysql_thread___monitor_ping_max_failures + 1 ); } unsigned long long time_now=realtime_time(); rc=(*proxy_sqlite3_bind_int64)(statement, 1, time_now-(unsigned long long)mysql_thread___monitor_history*1000); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, monitordb); (*proxy_sqlite3_finalize)(statement); */ } // if (resultset) // delete resultset; __sleep_monitor_group_replication: t2=monotonic_time(); if (t2 500000) { st = 500000; } usleep(st); } } if (mysql_thr) { delete mysql_thr; mysql_thr=NULL; } for (unsigned int i=0;i*item=NULL; GloMyMon->queue->add(item); } return NULL; } void * MySQL_Monitor::monitor_galera() { // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) // struct event_base *libevent_base; unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart unsigned long long t1; unsigned long long t2; unsigned long long next_loop_at=0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { unsigned int glover; t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); next_loop_at=0; } if (t1 < next_loop_at) { goto __sleep_monitor_galera; } next_loop_at=t1+1000*mysql_thread___monitor_galera_healthcheck_interval; pthread_mutex_lock(&galera_mutex); if (Galera_Hosts_resultset==NULL) { goto __end_monitor_galera_loop; } else { if (Galera_Hosts_resultset->rows_count==0) { goto __end_monitor_galera_loop; } int us=100; if (Galera_Hosts_resultset->rows_count) { us=mysql_thread___monitor_read_only_interval/2/Galera_Hosts_resultset->rows_count; } for (std::vector::iterator it = Galera_Hosts_resultset->rows.begin() ; it != Galera_Hosts_resultset->rows.end(); ++it) { SQLite3_row *r=*it; bool rc_ping = true; rc_ping = server_responds_to_ping(r->fields[1],atoi(r->fields[2])); if (rc_ping) { // only if server is responding to pings MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[1],atoi(r->fields[2]), NULL, atoi(r->fields[3])); mmsd->writer_hostgroup=atoi(r->fields[0]); mmsd->writer_is_also_reader=atoi(r->fields[4]); mmsd->max_transactions_behind=atoi(r->fields[5]); mmsd->mondb=monitordb; WorkItem* item; item=new WorkItem(mmsd,monitor_galera_thread); GloMyMon->queue->add(item); usleep(us); } if (GloMyMon->shutdown) { pthread_mutex_unlock(&galera_mutex); return NULL; } } } __end_monitor_galera_loop: pthread_mutex_unlock(&galera_mutex); if (mysql_thread___monitor_enabled==true) { } __sleep_monitor_galera: t2=monotonic_time(); if (t2 500000) { st = 500000; } usleep(st); } } if (mysql_thr) { delete mysql_thr; mysql_thr=NULL; } for (unsigned int i=0;i*item=NULL; GloMyMon->queue->add(item); } return NULL; } void * MySQL_Monitor::monitor_replication_lag() { // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart unsigned long long t1; unsigned long long t2; unsigned long long next_loop_at=0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { unsigned int glover; char *error=NULL; SQLite3_result *resultset=NULL; // add support for SSL char *query= NULL; if (mysql_thread___monitor_replication_lag_group_by_host==true) { query = (char *)"SELECT MIN(hostgroup_id), hostname, port, MIN(max_replication_lag), MAX(use_ssl) FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3) GROUP BY hostname, port"; } else { query=(char *)"SELECT hostgroup_id, hostname, port, max_replication_lag, use_ssl FROM mysql_servers WHERE max_replication_lag > 0 AND status NOT IN (2,3)"; } t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); next_loop_at=0; } if (t1 < next_loop_at) { goto __sleep_monitor_replication_lag; } next_loop_at=t1+1000*mysql_thread___monitor_replication_lag_interval; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); resultset = MyHGM->execute_query(query, &error); assert(resultset); if (error) { proxy_error("Error on %s : %s\n", query, error); goto __end_monitor_replication_lag_loop; } else { if (resultset->rows_count==0) { goto __end_monitor_replication_lag_loop; } int us=100; if (resultset->rows_count) { us=mysql_thread___monitor_replication_lag_interval/2/resultset->rows_count; } for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; bool rc_ping = true; rc_ping = server_responds_to_ping(r->fields[1],atoi(r->fields[2])); if (rc_ping) { // only if server is responding to pings MySQL_Monitor_State_Data *mmsd = new MySQL_Monitor_State_Data(r->fields[1], atoi(r->fields[2]), NULL, atoi(r->fields[4]), atoi(r->fields[0])); mmsd->mondb=monitordb; WorkItem* item; item=new WorkItem(mmsd,monitor_replication_lag_thread); GloMyMon->queue->add(item); usleep(us); } if (GloMyMon->shutdown) return NULL; } } __end_monitor_replication_lag_loop: if (mysql_thread___monitor_enabled==true) { sqlite3_stmt *statement=NULL; //sqlite3 *mondb=monitordb->get_db(); int rc; char *query=NULL; query=(char *)"DELETE FROM mysql_server_replication_lag_log WHERE time_start_us < ?1"; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query, -1, &statement, 0); rc = monitordb->prepare_v2(query, &statement); ASSERT_SQLITE_OK(rc, monitordb); if (mysql_thread___monitor_history < mysql_thread___monitor_ping_interval * (mysql_thread___monitor_ping_max_failures + 1 )) { // issue #626 if (mysql_thread___monitor_ping_interval < 3600000) mysql_thread___monitor_history = mysql_thread___monitor_ping_interval * (mysql_thread___monitor_ping_max_failures + 1 ); } unsigned long long time_now=realtime_time(); rc=(*proxy_sqlite3_bind_int64)(statement, 1, time_now-(unsigned long long)mysql_thread___monitor_history*1000); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement); rc=(*proxy_sqlite3_clear_bindings)(statement); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement); ASSERT_SQLITE_OK(rc, monitordb); (*proxy_sqlite3_finalize)(statement); } if (resultset) delete resultset; __sleep_monitor_replication_lag: t2=monotonic_time(); if (t2 500000) { st = 500000; } usleep(st); } } if (mysql_thr) { delete mysql_thr; mysql_thr=NULL; } for (unsigned int i=0;i*item=NULL; GloMyMon->queue->add(item); } return NULL; } bool validate_ip(const std::string& ip) { // check if ip is vaild IPV4 ip address struct sockaddr_in sa4; if (inet_pton(AF_INET, ip.c_str(), &(sa4.sin_addr)) != 0) return true; // check if ip is vaild IPV6 ip address struct sockaddr_in6 sa6; if (inet_pton(AF_INET6, ip.c_str(), &(sa6.sin6_addr)) != 0) return true; return false; } std::string get_connected_peer_ip_from_socket(int socket_fd) { std::string result; char ip_addr[INET6_ADDRSTRLEN]; union { struct sockaddr_in in; struct sockaddr_in6 in6; } custom_sockaddr; struct sockaddr* addr = (struct sockaddr*)malloc(sizeof(custom_sockaddr)); socklen_t addrlen = sizeof(custom_sockaddr); memset(addr, 0, sizeof(custom_sockaddr)); int rc = getpeername(socket_fd, addr, &addrlen); if (rc == 0) { if (addr->sa_family == AF_INET) { struct sockaddr_in* ipv4 = (struct sockaddr_in*)addr; inet_ntop(addr->sa_family, &ipv4->sin_addr, ip_addr, INET_ADDRSTRLEN); } else if (addr->sa_family == AF_INET6) { struct sockaddr_in6* ipv6 = (struct sockaddr_in6*)addr; inet_ntop(addr->sa_family, &ipv6->sin6_addr, ip_addr, INET6_ADDRSTRLEN); } result = ip_addr; } free(addr); return result; } template std::string debug_iplisttostring(const T& ips) { std::stringstream sstr; for (const std::string& ip : ips) sstr << ip << " "; return sstr.str(); } void* monitor_dns_resolver_thread(void* args) { DNS_Resolve_Data* dns_resolve_data = static_cast(args); struct addrinfo hints, *res = NULL; /* set hints for getaddrinfo */ memset(&hints, 0, sizeof(hints)); hints.ai_protocol = IPPROTO_TCP; hints.ai_family = AF_UNSPEC; /*includes: IPv4, IPv6*/ hints.ai_socktype = SOCK_STREAM; /* AI_ADDRCONFIG: IPv4 addresses are returned in the list pointed to by res only if the local system has at least one IPv4 address configured, and IPv6 addresses are returned only if the local system has at least one IPv6 address configured. The loopback address is not considered for this case as valid as a configured address. This flag is useful on, for example, IPv4-only systems, to ensure that getaddrinfo() does not return IPv6 socket addresses that would always fail in connect or bind. */ hints.ai_flags = AI_ADDRCONFIG; proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Resolving hostname:[%s] to its mapped IP address.\n", dns_resolve_data->hostname.c_str()); int gai_rc = getaddrinfo(dns_resolve_data->hostname.c_str(), NULL, &hints, &res); if (gai_rc != 0 || !res) { proxy_error("An error occurred while resolving hostname: %s [%d]\n", dns_resolve_data->hostname.c_str(), gai_rc); goto __error; } try { std::vector ips; ips.reserve(64); char ip_addr[INET6_ADDRSTRLEN]; for (auto p = res; p != NULL; p = p->ai_next) { if (p->ai_family == AF_INET) { struct sockaddr_in* ipv4 = (struct sockaddr_in*)p->ai_addr; inet_ntop(p->ai_addr->sa_family, &ipv4->sin_addr, ip_addr, INET_ADDRSTRLEN); ips.push_back(ip_addr); } else { struct sockaddr_in6* ipv6 = (struct sockaddr_in6*)p->ai_addr; inet_ntop(p->ai_addr->sa_family, &ipv6->sin6_addr, ip_addr, INET6_ADDRSTRLEN); ips.push_back(ip_addr); } } freeaddrinfo(res); if (!ips.empty()) { bool to_update_cache = false; if (!dns_resolve_data->cached_ips.empty()) { if (dns_resolve_data->cached_ips.size() == ips.size()) { for (const std::string& ip : ips) { if (dns_resolve_data->cached_ips.find(ip) == dns_resolve_data->cached_ips.end()) { to_update_cache = true; break; } } } else to_update_cache = true; // only update dns_records_bookkeeping if (!to_update_cache) { proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "DNS cache record already up-to-date. (Hostname:[%s] IP:[%s])\n", dns_resolve_data->hostname.c_str(), debug_iplisttostring(ips).c_str()); dns_resolve_data->result.set_value(std::make_tuple<>(true, DNS_Cache_Record(dns_resolve_data->hostname, std::move(dns_resolve_data->cached_ips), monotonic_time() + (1000 * dns_resolve_data->ttl)))); } } else to_update_cache = true; if (to_update_cache) { dns_resolve_data->result.set_value(std::make_tuple<>(true, DNS_Cache_Record(dns_resolve_data->hostname, ips, monotonic_time() + (1000 * dns_resolve_data->ttl)))); dns_resolve_data->dns_cache->add(dns_resolve_data->hostname, std::move(ips)); } return NULL; } } catch (std::exception& ex) { proxy_error("An exception occurred while resolving hostname: %s [%s]\n", dns_resolve_data->hostname.c_str(), ex.what()); } catch (...) { proxy_error("An unknown exception has occurred while resolving hostname: %s\n", dns_resolve_data->hostname.c_str()); } __error: dns_resolve_data->result.set_value(std::make_tuple<>(false, DNS_Cache_Record())); return NULL; } void* MySQL_Monitor::monitor_dns_cache() { // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; std::unique_ptr mysql_thr(new MySQL_Thread()); mysql_thr->curtime = monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version = GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart constexpr unsigned int num_dns_resolver_threads = 1; constexpr unsigned int num_dns_resolver_max_threads = 32; unsigned long long t1 = 0; unsigned long long t2 = 0; unsigned long long next_loop_at = 0; bool dns_cache_enable = true; // Bookkeeper for dns records and ttl std::list dns_records_bookkeeping; // Queue for DNS resolver request wqueue*> dns_resolver_queue; while (GloMyMon->shutdown == false) { if (!GloMTH) return NULL; // quick exit during shutdown/restart const unsigned int glover = GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover) { MySQL_Monitor__thread_MySQL_Thread_Variables_version = glover; mysql_thr->refresh_variables(); next_loop_at = 0; // dns cache is disabled if (mysql_thread___monitor_local_dns_cache_ttl == 0 || mysql_thread___monitor_local_dns_cache_refresh_interval == 0) { dns_cache_enable = false; dns_cache->set_enabled_flag(false); dns_cache->clear(); dns_records_bookkeeping.clear(); proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "DNS cache is disabled.\n"); /*while (dns_resolver_queue.size()) { WorkItem* item = dns_resolver_queue.remove(); if (item) { if (item->data) { delete item->data; } delete item; } }*/ } else { //dns cache enabled dns_cache_enable = true; dns_cache->set_enabled_flag(true); proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "DNS cache is enabled.\n"); } } if (!dns_cache_enable) { usleep(200000); continue; } char* error = NULL; int cols = 0; int affected_rows = 0; SQLite3_result* resultset = NULL; const char* query = (char*)"SELECT trim(hostname) FROM monitor_internal.mysql_servers UNION SELECT trim(hostname) FROM monitor_internal.proxysql_servers"; t1 = monotonic_time(); if (t1 < next_loop_at && !force_dns_cache_update) { goto __sleep_monitor_dns_cache_loop; } force_dns_cache_update = false; next_loop_at = t1 + (1000 * mysql_thread___monitor_local_dns_cache_refresh_interval); // update the 'monitor_internal.mysql_servers' table with the latest 'mysql_servers' from 'MyHGM' { std::lock_guard mysql_servers_guard(MyHGM->mysql_servers_to_monitor_mutex); update_monitor_mysql_servers(MyHGM->mysql_servers_to_monitor); } if (GloProxyCluster) { std::lock_guard proxysql_servers_guard(GloProxyCluster->proxysql_servers_to_monitor_mutex); update_monitor_proxysql_servers(GloProxyCluster->proxysql_servers_to_monitor); } proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); admindb->execute_statement(query, &error, &cols, &affected_rows, &resultset); if (error) { proxy_error("Error on %s : %s\n", query, error); goto __end_monitor_dns_cache_loop; } else { if (resultset->rows_count == 0) { // Remove orphaned records if any if (dns_cache->empty() == false) { proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Clearing all orphaned DNS records from cache.\n"); dns_cache->clear(); } if (dns_records_bookkeeping.empty() == false) { proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Clearing all orphaned DNS records from bookkeeper.\n"); dns_records_bookkeeping.clear(); } goto __end_monitor_dns_cache_loop; } std::vector dns_resolver_threads(num_dns_resolver_threads); for (unsigned int i = 0; i < num_dns_resolver_threads; i++) { dns_resolver_threads[i] = new DNSResolverThread(dns_resolver_queue, 0); dns_resolver_threads[i]->start(2048, false); } std::set hostnames; for (const auto row : resultset->rows) { const std::string& hostname = row->fields[0]; // Add only hostnames/domain and ignore IPs if (!validate_ip(hostname)) hostnames.insert(hostname); } std::list>> dns_resolve_result; if (dns_records_bookkeeping.empty() == false) { unsigned long long current_time = monotonic_time(); for (auto itr = dns_records_bookkeeping.begin(); itr != dns_records_bookkeeping.end();) { // remove orphaned records if (hostnames.find(itr->hostname_) == hostnames.end()) { dns_cache->remove(itr->hostname_); proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Removing orphaned DNS record from bookkeeper. (Hostname:[%s] IP:[%s])\n", itr->hostname_.c_str(), debug_iplisttostring(itr->ips_).c_str()); itr = dns_records_bookkeeping.erase(itr); } else { hostnames.erase(itr->hostname_); // Renew dns records if expired if (current_time > itr->ttl_) { std::unique_ptr dns_resolve_data(new DNS_Resolve_Data()); dns_resolve_data->hostname = std::move(itr->hostname_); dns_resolve_data->cached_ips = std::move(itr->ips_); dns_resolve_data->ttl = mysql_thread___monitor_local_dns_cache_ttl; dns_resolve_data->dns_cache = dns_cache; dns_resolve_result.emplace_back(dns_resolve_data->result.get_future()); proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Removing expired DNS record from bookkeeper. (Hostname:[%s] IP:[%s])\n", itr->hostname_.c_str(), debug_iplisttostring(dns_resolve_data->cached_ips).c_str()); dns_resolver_queue.add(new WorkItem(dns_resolve_data.release(), monitor_dns_resolver_thread)); itr = dns_records_bookkeeping.erase(itr); continue; } itr++; } } } { unsigned int qsize = dns_resolver_queue.size(); unsigned int num_threads = dns_resolver_threads.size(); if (qsize > static_cast(mysql_thread___monitor_local_dns_resolver_queue_maxsize) / 8) { proxy_warning("DNS resolver queue too big: %d\n", qsize); unsigned int threads_max = num_dns_resolver_max_threads; if (threads_max > num_threads) { unsigned int new_threads = threads_max - num_threads; if ((qsize / 8) < new_threads) { new_threads = qsize / 8; // try to not burst threads } if (new_threads) { unsigned int old_num_threads = num_threads; num_threads += new_threads; dns_resolver_threads.resize(num_threads); for (unsigned int i = old_num_threads; i < num_threads; i++) { dns_resolver_threads[i] = new DNSResolverThread(dns_resolver_queue, 0); dns_resolver_threads[i]->start(2048, false); } } } } } if (hostnames.empty() == false) { for (const std::string& hostname : hostnames) { std::unique_ptr dns_resolve_data(new DNS_Resolve_Data()); dns_resolve_data->hostname = hostname; dns_resolve_data->ttl = mysql_thread___monitor_local_dns_cache_ttl; dns_resolve_data->dns_cache = dns_cache; dns_resolve_result.emplace_back(dns_resolve_data->result.get_future()); dns_resolver_queue.add(new WorkItem(dns_resolve_data.release(), monitor_dns_resolver_thread)); } } { unsigned int qsize = dns_resolver_queue.size(); unsigned int num_threads = dns_resolver_threads.size(); if (qsize > static_cast(mysql_thread___monitor_local_dns_resolver_queue_maxsize) / 8) { proxy_warning("DNS resolver queue too big: %d\n", qsize); unsigned int threads_max = num_dns_resolver_max_threads; if (threads_max > num_threads) { unsigned int new_threads = threads_max - num_threads; if ((qsize / 8) < new_threads) { new_threads = qsize / 8; // try to not burst threads } if (new_threads) { unsigned int old_num_threads = num_threads; num_threads += new_threads; dns_resolver_threads.resize(num_threads); proxy_info("Starting %d helper threads\n", new_threads); for (unsigned int i = old_num_threads; i < num_threads; i++) { dns_resolver_threads[i] = new DNSResolverThread(dns_resolver_queue, 0); dns_resolver_threads[i]->start(2048, false); } } } } } // close all worker threads for (size_t i = 0; i < dns_resolver_threads.size(); i++) dns_resolver_queue.add(NULL); // update dns records with ip and ttl for (auto& dns_result : dns_resolve_result) { auto ret_value = dns_result.get(); if (std::get<0>(ret_value)) { DNS_Cache_Record dns_record = get<1>(ret_value); proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Adding DNS record to bookkeeper. (Hostname:[%s] IP:[%s])\n", dns_record.hostname_.c_str(), debug_iplisttostring(dns_record.ips_).c_str()); dns_records_bookkeeping.emplace_back(std::move(dns_record)); } } for (DNSResolverThread* const dns_resolver_thread : dns_resolver_threads) { dns_resolver_thread->join(); delete dns_resolver_thread; } if (GloMyMon->shutdown) return NULL; } __end_monitor_dns_cache_loop: if (resultset) { delete resultset; resultset = NULL; } __sleep_monitor_dns_cache_loop: t2 = monotonic_time(); if (t2 < next_loop_at) { unsigned long long st = 0; st = next_loop_at - t2; if (st > 500000) { st = 500000; } usleep(st); } } return NULL; } void * MySQL_Monitor::run() { while (GloMTH==NULL) { usleep(50000); } usleep(100000); // initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it) unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); pthread_mutex_init(&mon_en_mutex,NULL); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); //if (!GloMTH) return NULL; // quick exit during shutdown/restart pthread_attr_t attr; pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, 2048 * 1024); // DNS Cache is not dependent on monitor enable flag, so need to initialize it here pthread_t monitor_dns_cache_thread; if (pthread_create(&monitor_dns_cache_thread, &attr, &monitor_dns_cache_pthread, NULL) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } __monitor_run: while (queue->size()) { // this is a clean up in case Monitor was restarted WorkItem* item = (WorkItem*)queue->remove(); if (item) { if (item->data) { delete item->data; } delete item; } } ConsumerThread **threads= (ConsumerThread **)malloc(sizeof(ConsumerThread *)*num_threads); for (unsigned int i=0;i(*queue, 0); threads[i]->start(2048,false); } started_threads += num_threads; this->metrics.p_counter_array[p_mon_counter::mysql_monitor_workers_started]->Increment(num_threads); pthread_t monitor_connect_thread; if (pthread_create(&monitor_connect_thread, &attr, &monitor_connect_pthread,NULL) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } pthread_t monitor_ping_thread; if (pthread_create(&monitor_ping_thread, &attr, &monitor_ping_pthread,NULL) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } pthread_t monitor_read_only_thread; if (pthread_create(&monitor_read_only_thread, &attr, &monitor_read_only_pthread,NULL) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } pthread_t monitor_group_replication_thread; if (pthread_create(&monitor_group_replication_thread, &attr, &monitor_group_replication_pthread,NULL) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } pthread_t monitor_galera_thread; if (pthread_create(&monitor_galera_thread, &attr, &monitor_galera_pthread,NULL) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } pthread_t monitor_aws_aurora_thread; if (pthread_create(&monitor_aws_aurora_thread, &attr, &monitor_aws_aurora_pthread,NULL) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } pthread_t monitor_replication_lag_thread; if (pthread_create(&monitor_replication_lag_thread, &attr, &monitor_replication_lag_pthread,NULL) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } while (shutdown==false && mysql_thread___monitor_enabled==true) { unsigned int glover; if (GloMTH) { glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); unsigned int old_num_threads = num_threads; unsigned int threads_min = (unsigned int)mysql_thread___monitor_threads_min; if (old_num_threads < threads_min) { num_threads = threads_min; this->metrics.p_gauge_array[p_mon_gauge::mysql_monitor_workers]->Set(threads_min); threads= (ConsumerThread **)realloc(threads, sizeof(ConsumerThread *)*num_threads); started_threads += (num_threads - old_num_threads); for (unsigned int i = old_num_threads ; i < num_threads ; i++) { threads[i] = new ConsumerThread(*queue, 0); threads[i]->start(2048,false); } } } } pthread_mutex_lock(&mon_en_mutex); monitor_enabled=mysql_thread___monitor_enabled; pthread_mutex_unlock(&mon_en_mutex); if ( rand()%10 == 0) { // purge once in a while My_Conn_Pool->purge_some_connections(); } usleep(200000); unsigned int qsize=queue->size(); if (qsize > (unsigned int)mysql_thread___monitor_threads_queue_maxsize/4) { proxy_warning("Monitor queue too big: %d\n", qsize); unsigned int threads_max = (unsigned int)mysql_thread___monitor_threads_max; if (threads_max > num_threads) { unsigned int new_threads = threads_max - num_threads; if ((qsize / 4) < new_threads) { new_threads = qsize/4; // try to not burst threads } if (new_threads) { unsigned int old_num_threads = num_threads; num_threads += new_threads; this->metrics.p_gauge_array[p_mon_gauge::mysql_monitor_workers]->Increment(new_threads); threads= (ConsumerThread **)realloc(threads, sizeof(ConsumerThread *)*num_threads); started_threads += new_threads; for (unsigned int i = old_num_threads ; i < num_threads ; i++) { threads[i] = new ConsumerThread(*queue, 0); threads[i]->start(2048,false); } } } // check again. Do we need also aux threads? usleep(50000); qsize=queue->size(); if (qsize > (unsigned int)mysql_thread___monitor_threads_queue_maxsize) { qsize=qsize/50; unsigned int threads_max = (unsigned int)mysql_thread___monitor_threads_max; if ((qsize + num_threads) > (threads_max * 2)) { // allow a small bursts qsize = threads_max * 2 - num_threads; } if (qsize > 0) { proxy_info("Monitor is starting %d helper threads\n", qsize); ConsumerThread **threads_aux= (ConsumerThread **)malloc(sizeof(ConsumerThread *)*qsize); aux_threads = qsize; started_threads += aux_threads; for (unsigned int i=0; i(*queue, 245); threads_aux[i]->start(2048,false); } for (unsigned int i=0; ijoin(); delete threads_aux[i]; } free(threads_aux); aux_threads = 0; } } } } for (unsigned int i=0;i*item=NULL; GloMyMon->queue->add(item); } for (unsigned int i=0;ijoin(); delete threads[i]; } free(threads); pthread_join(monitor_connect_thread,NULL); pthread_join(monitor_ping_thread,NULL); pthread_join(monitor_read_only_thread,NULL); pthread_join(monitor_group_replication_thread,NULL); pthread_join(monitor_galera_thread,NULL); pthread_join(monitor_aws_aurora_thread,NULL); pthread_join(monitor_replication_lag_thread,NULL); My_Conn_Pool->purge_all_connections(); while (shutdown==false) { unsigned int glover; if (GloMTH) { glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); } } monitor_enabled=mysql_thread___monitor_enabled; if (mysql_thread___monitor_enabled==true) { goto __monitor_run; } usleep(200000); } pthread_join(monitor_dns_cache_thread, NULL); if (mysql_thr) { delete mysql_thr; mysql_thr=NULL; } return NULL; }; MyGR_monitor_node::MyGR_monitor_node(char *_a, int _p, int _whg) { addr=NULL; if (_a) { addr=strdup(_a); } port=_p; idx_last_entry=-1; writer_hostgroup=_whg; int i; for (i=0;i txs_behind) { lags[smallidx] = true; } } } } for (int i=0; i=MyGR_Nentries) { idx_last_entry=0; } last_entries[idx_last_entry].start_time=_st; last_entries[idx_last_entry].check_time=_ct; last_entries[idx_last_entry].transactions_behind=_tb; last_entries[idx_last_entry].primary_partition=_pp; last_entries[idx_last_entry].read_only=_ro; if (last_entries[idx_last_entry].error) { free(last_entries[idx_last_entry].error); last_entries[idx_last_entry].error=NULL; } if (_error) { last_entries[idx_last_entry].error=strdup(_error); // we always copy } if (ret==false) { if (last_entries[idx_last_entry].primary_partition != last_entries[prev_last_entry].primary_partition) { ret=true; } if (last_entries[idx_last_entry].read_only != last_entries[prev_last_entry].read_only) { ret=true; } if ( (last_entries[idx_last_entry].error && last_entries[prev_last_entry].error==NULL) || (last_entries[idx_last_entry].error==NULL && last_entries[prev_last_entry].error) || (last_entries[idx_last_entry].error && last_entries[prev_last_entry].error && strcmp(last_entries[idx_last_entry].error,last_entries[prev_last_entry].error)) ) { ret=true; } } return ret; } AWS_Aurora_replica_host_status_entry::AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, char *lut, float rlm, float _c) { server_id = strdup(serid); session_id = strdup(sessid); last_update_timestamp = strdup(lut); replica_lag_ms = rlm; cpu = _c; } AWS_Aurora_replica_host_status_entry::AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, char *lut, char *rlm, char *_c) { server_id = strdup(serid); session_id = strdup(sessid); last_update_timestamp = strdup(lut); replica_lag_ms = strtof(rlm, NULL); cpu = strtof(_c, NULL); } AWS_Aurora_replica_host_status_entry::~AWS_Aurora_replica_host_status_entry() { free(server_id); free(session_id); free(last_update_timestamp); } AWS_Aurora_status_entry::AWS_Aurora_status_entry(unsigned long long st, unsigned long long ct, char *e) { start_time = st; check_time = ct; error = NULL; if (e) { error = strdup(e); } host_statuses = new std::vector; } AWS_Aurora_status_entry::~AWS_Aurora_status_entry() { if (error) { free(error); } AWS_Aurora_replica_host_status_entry *entry; for (std::vector::iterator it = host_statuses->begin(); it != host_statuses->end(); ++it) { entry=*it; delete entry; } host_statuses->clear(); delete host_statuses; } void AWS_Aurora_status_entry::add_host_status(AWS_Aurora_replica_host_status_entry *hs) { host_statuses->push_back(hs); } Galera_monitor_node::Galera_monitor_node(char *_a, int _p, int _whg) { addr=NULL; if (_a) { addr=strdup(_a); } port=_p; idx_last_entry=-1; writer_hostgroup=_whg; int i; for (i=0;i=Galera_Nentries) { idx_last_entry=0; } last_entries[idx_last_entry].start_time=_st; last_entries[idx_last_entry].check_time=_ct; last_entries[idx_last_entry].wsrep_local_recv_queue=_tb; last_entries[idx_last_entry].primary_partition=_pp; last_entries[idx_last_entry].read_only=_ro; last_entries[idx_last_entry].wsrep_local_state = _local_state; last_entries[idx_last_entry].wsrep_desync = _desync; last_entries[idx_last_entry].wsrep_reject_queries = _reject; last_entries[idx_last_entry].wsrep_sst_donor_rejects_queries = _sst_donor_reject; last_entries[idx_last_entry].pxc_maint_mode = _pxc_maint_mode; if (last_entries[idx_last_entry].error) { free(last_entries[idx_last_entry].error); last_entries[idx_last_entry].error=NULL; } if (_error) { last_entries[idx_last_entry].error=strdup(_error); // we always copy } if (ret==false) { if (last_entries[idx_last_entry].primary_partition != last_entries[prev_last_entry].primary_partition) { ret=true; } if (last_entries[idx_last_entry].read_only != last_entries[prev_last_entry].read_only) { ret=true; } if ( (last_entries[idx_last_entry].error && last_entries[prev_last_entry].error==NULL) || (last_entries[idx_last_entry].error==NULL && last_entries[prev_last_entry].error) || (last_entries[idx_last_entry].error && last_entries[prev_last_entry].error && strcmp(last_entries[idx_last_entry].error,last_entries[prev_last_entry].error)) ) { ret=true; } } return ret; } void MySQL_Monitor::populate_monitor_mysql_server_group_replication_log() { //sqlite3 *mondb=monitordb->get_db(); int rc; //char *query=NULL; char *query1=NULL; query1=(char *)"INSERT INTO mysql_server_group_replication_log VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)"; sqlite3_stmt *statement1=NULL; pthread_mutex_lock(&GloMyMon->group_replication_mutex); //rc=(*proxy_sqlite3_prepare_v2)(mondb, query1, -1, &statement1, 0); rc = monitordb->prepare_v2(query1, &statement1); ASSERT_SQLITE_OK(rc, monitordb); monitordb->execute((char *)"DELETE FROM mysql_server_group_replication_log"); std::map::iterator it2; MyGR_monitor_node *node=NULL; for (it2=GloMyMon->Group_Replication_Hosts_Map.begin(); it2!=GloMyMon->Group_Replication_Hosts_Map.end(); ++it2) { std::string s=it2->first; node=it2->second; std::size_t found=s.find_last_of(":"); std::string host=s.substr(0,found); std::string port=s.substr(found+1); int i; for (i=0; ilast_entries[i].start_time) { rc=(*proxy_sqlite3_bind_text)(statement1, 1, host.c_str(), -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 2, atoi(port.c_str())); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 3, node->last_entries[i].start_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 4, node->last_entries[i].check_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 5, ( node->last_entries[i].primary_partition ? (char *)"YES" : (char *)"NO" ) , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 6, ( node->last_entries[i].read_only ? (char *)"YES" : (char *)"NO" ) , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 7, node->last_entries[i].transactions_behind ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 8, node->last_entries[i].error , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement1); rc=(*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, monitordb); } } } (*proxy_sqlite3_finalize)(statement1); pthread_mutex_unlock(&GloMyMon->group_replication_mutex); } void MySQL_Monitor::populate_monitor_mysql_server_galera_log() { //sqlite3 *mondb=monitordb->get_db(); int rc; //char *query=NULL; char *query1=NULL; query1=(char *)"INSERT INTO mysql_server_galera_log VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)"; sqlite3_stmt *statement1=NULL; pthread_mutex_lock(&GloMyMon->galera_mutex); //rc=(*proxy_sqlite3_prepare_v2)(mondb, query1, -1, &statement1, 0); rc = monitordb->prepare_v2(query1, &statement1); ASSERT_SQLITE_OK(rc, monitordb); monitordb->execute((char *)"DELETE FROM mysql_server_galera_log"); std::map::iterator it2; Galera_monitor_node *node=NULL; for (it2=GloMyMon->Galera_Hosts_Map.begin(); it2!=GloMyMon->Galera_Hosts_Map.end(); ++it2) { std::string s=it2->first; node=it2->second; std::size_t found=s.find_last_of(":"); std::string host=s.substr(0,found); std::string port=s.substr(found+1); int i; for (i=0; ilast_entries[i].start_time) { rc=(*proxy_sqlite3_bind_text)(statement1, 1, host.c_str(), -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 2, atoi(port.c_str())); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 3, node->last_entries[i].start_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 4, node->last_entries[i].check_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 5, ( node->last_entries[i].primary_partition ? (char *)"YES" : (char *)"NO" ) , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 6, ( node->last_entries[i].read_only ? (char *)"YES" : (char *)"NO" ) , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 7, node->last_entries[i].wsrep_local_recv_queue ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 8, node->last_entries[i].wsrep_local_state ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 9, ( node->last_entries[i].wsrep_desync ? (char *)"YES" : (char *)"NO" ) , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 10, ( node->last_entries[i].wsrep_reject_queries ? (char *)"YES" : (char *)"NO" ) , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 11, ( node->last_entries[i].wsrep_sst_donor_rejects_queries ? (char *)"YES" : (char *)"NO" ) , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 12, ( node->last_entries[i].pxc_maint_mode ? (char *)"YES" : (char *)"NO" ) , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 13, node->last_entries[i].error , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement1); rc=(*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, monitordb); } } } (*proxy_sqlite3_finalize)(statement1); pthread_mutex_unlock(&GloMyMon->galera_mutex); } char * MySQL_Monitor::galera_find_last_node(int writer_hostgroup) { /* sqlite3 *mondb=monitordb->get_db(); int rc; //char *query=NULL; char *query1=NULL; query1=(char *)"INSERT INTO mysql_server_galera_log VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)"; sqlite3_stmt *statement1=NULL; */ char *str = NULL; pthread_mutex_lock(&GloMyMon->galera_mutex); /* rc=(*proxy_sqlite3_prepare_v2)(mondb, query1, -1, &statement1, 0); ASSERT_SQLITE_OK(rc, monitordb); monitordb->execute((char *)"DELETE FROM mysql_server_galera_log"); */ std::map::iterator it2; Galera_monitor_node *node=NULL; Galera_monitor_node *writer_node=NULL; unsigned int writer_nodes = 0; unsigned long long curtime = monotonic_time(); unsigned long long ti = mysql_thread___monitor_galera_healthcheck_interval; ti *= 2; std::string s = ""; for (it2=GloMyMon->Galera_Hosts_Map.begin(); it2!=GloMyMon->Galera_Hosts_Map.end(); ++it2) { node=it2->second; if (node->writer_hostgroup == (unsigned int)writer_hostgroup) { Galera_status_entry_t * st = node->last_entry(); if (st) { if (st->start_time >= curtime - ti) { // only consider recent checks if (st->error == NULL) { // no check error if (st->read_only == false) { // the server is writable (this check is arguable) if (st->wsrep_sst_donor_rejects_queries == false) { if (writer_nodes == 0) { s=it2->first; writer_node = node; } writer_nodes++; } } } } } } } if (writer_node && writer_nodes == 1) { // we have only one node let // we don't care if status str = strdup(s.c_str()); /* std::size_t found=s.find_last_of(":"); std::string host=s.substr(0,found); std::string port=s.substr(found+1); */ } pthread_mutex_unlock(&GloMyMon->galera_mutex); return str; } std::vector * MySQL_Monitor::galera_find_possible_last_nodes(int writer_hostgroup) { std::vector * result = new std::vector(); pthread_mutex_lock(&GloMyMon->galera_mutex); std::map::iterator it2; Galera_monitor_node *node=NULL; unsigned long long curtime = monotonic_time(); unsigned long long ti = mysql_thread___monitor_galera_healthcheck_interval; ti *= 2; for (it2=GloMyMon->Galera_Hosts_Map.begin(); it2!=GloMyMon->Galera_Hosts_Map.end(); ++it2) { node=it2->second; if (node->writer_hostgroup == (unsigned int)writer_hostgroup) { Galera_status_entry_t * st = node->last_entry(); if (st) { if (st->start_time >= curtime - ti) { // only consider recent checks if (st->error == NULL) { // no check error if (st->wsrep_reject_queries == false) { if (st->read_only == false) { // the server is writable (this check is arguable) if (st->wsrep_sst_donor_rejects_queries == false) { string s = it2->first; result->push_back(s); } } } } } } } } pthread_mutex_unlock(&GloMyMon->galera_mutex); return result; } void MySQL_Monitor::populate_monitor_mysql_server_aws_aurora_log() { //sqlite3 *mondb=monitordb->get_db(); int rc; //char *query=NULL; char *query1=NULL; query1=(char *)"INSERT OR IGNORE INTO mysql_server_aws_aurora_log VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)"; sqlite3_stmt *statement1=NULL; char *query2=NULL; query2=(char *)"INSERT OR IGNORE INTO mysql_server_aws_aurora_log (hostname, port, time_start_us, success_time_us, error) VALUES (?1, ?2, ?3, ?4, ?5)"; sqlite3_stmt *statement2=NULL; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query1, -1, &statement1, 0); rc = monitordb->prepare_v2(query1, &statement1); ASSERT_SQLITE_OK(rc, monitordb); //rc=(*proxy_sqlite3_prepare_v2)(mondb, query2, -1, &statement2, 0); rc = monitordb->prepare_v2(query2, &statement2); ASSERT_SQLITE_OK(rc, monitordb); pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); monitordb->execute((char *)"DELETE FROM mysql_server_aws_aurora_log"); std::map::iterator it2; AWS_Aurora_monitor_node *node=NULL; for (it2=GloMyMon->AWS_Aurora_Hosts_Map.begin(); it2!=GloMyMon->AWS_Aurora_Hosts_Map.end(); ++it2) { std::string s=it2->first; node=it2->second; std::size_t found=s.find_last_of(":"); std::string host=s.substr(0,found); std::string port=s.substr(found+1); int i; for (i=0; ilast_entries[i]; if (aase && aase->start_time) { if ( aase->host_statuses->size() ) { for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { AWS_Aurora_replica_host_status_entry *hse = *it3; if (hse) { rc=(*proxy_sqlite3_bind_text)(statement1, 1, host.c_str(), -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 2, atoi(port.c_str())); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 3, aase->start_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 4, aase->check_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 5, aase->error , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 6, hse->server_id , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 7, hse->session_id , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 8, hse->last_update_timestamp , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_double)(statement1, 9, hse->replica_lag_ms ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 10, hse->estimated_lag_ms ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_double)(statement1, 11, hse->cpu ); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement1); rc=(*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, monitordb); } } } else { rc=(*proxy_sqlite3_bind_text)(statement2, 1, host.c_str(), -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement2, 2, atoi(port.c_str())); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement2, 3, aase->start_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement2, 4, aase->check_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement2, 5, aase->error , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement2); rc=(*proxy_sqlite3_clear_bindings)(statement2); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement2); ASSERT_SQLITE_OK(rc, monitordb); } } } } (*proxy_sqlite3_finalize)(statement1); (*proxy_sqlite3_finalize)(statement2); pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); } void MySQL_Monitor::populate_monitor_mysql_server_aws_aurora_check_status() { //sqlite3 *mondb=monitordb->get_db(); int rc; //char *query=NULL; char *query1=NULL; query1=(char *)"INSERT OR IGNORE INTO mysql_server_aws_aurora_check_status VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"; sqlite3_stmt *statement1=NULL; //rc=(*proxy_sqlite3_prepare_v2)(mondb, query1, -1, &statement1, 0); rc = monitordb->prepare_v2(query1, &statement1); ASSERT_SQLITE_OK(rc, monitordb); pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); monitordb->execute((char *)"DELETE FROM mysql_server_aws_aurora_check_status"); std::map::iterator it2; AWS_Aurora_monitor_node *node=NULL; for (it2=GloMyMon->AWS_Aurora_Hosts_Map.begin(); it2!=GloMyMon->AWS_Aurora_Hosts_Map.end(); ++it2) { std::string s=it2->first; node=it2->second; std::size_t found=s.find_last_of(":"); std::string host=s.substr(0,found); std::string port=s.substr(found+1); AWS_Aurora_status_entry * aase = node->last_entry(); char *error_msg = NULL; if (aase && aase->start_time) { if (aase->error) { error_msg = aase->error; } } char lut[30]; struct tm __tm_info; localtime_r(&node->last_checked_at, &__tm_info); strftime(lut, 25, "%Y-%m-%d %H:%M:%S", &__tm_info); /* int i; for (i=0; ilast_entries[i]; if (aase && aase->start_time) { if ( aase->host_statuses->size() ) { for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { AWS_Aurora_replica_host_status_entry *hse = *it3; if (hse) { */ rc=(*proxy_sqlite3_bind_int64)(statement1, 1, node->writer_hostgroup); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 2, host.c_str(), -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 3, atoi(port.c_str())); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 4, lut, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 5, node->num_checks_tot ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement1, 6, node->num_checks_ok ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement1, 7, error_msg , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement1); rc=(*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, monitordb); /* } } } else { rc=(*proxy_sqlite3_bind_text)(statement2, 1, host.c_str(), -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement2, 2, atoi(port.c_str())); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement2, 3, aase->start_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_int64)(statement2, 4, aase->check_time ); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_bind_text)(statement2, 5, aase->error , -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); SAFE_SQLITE3_STEP2(statement2); rc=(*proxy_sqlite3_clear_bindings)(statement2); ASSERT_SQLITE_OK(rc, monitordb); rc=(*proxy_sqlite3_reset)(statement2); ASSERT_SQLITE_OK(rc, monitordb); } } } */ } (*proxy_sqlite3_finalize)(statement1); pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); } /* void MySQL_Monitor::gdb_dump___monitor_mysql_server_aws_aurora_log(char *hostname) { fprintf(stderr,"gdb_dump___monitor_mysql_server_aws_aurora_log\n"); std::map::iterator it2; AWS_Aurora_monitor_node *node=NULL; for (it2=GloMyMon->AWS_Aurora_Hosts_Map.begin(); it2!=GloMyMon->AWS_Aurora_Hosts_Map.end(); ++it2) { std::string s=it2->first; node=it2->second; std::size_t found=s.find_last_of(":"); std::string host=s.substr(0,found); std::string port=s.substr(found+1); int i; for (i=0; ilast_entries[i]; if (aase && aase->start_time) { if ( aase->host_statuses->size() ) { for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { AWS_Aurora_replica_host_status_entry *hse = *it3; if (hse) { if (hostname == NULL || (hostname && ( (strcmp(hostname,host.c_str())==0) || (strcmp(hostname,hse->server_id)==0)) )) { fprintf(stderr,"%s:%d %llu %llu %s %s %s %s %f %f\n", host.c_str(), atoi(port.c_str()), aase->start_time, aase->check_time, aase->error, hse->server_id,hse->session_id, hse->last_update_timestamp, hse->replica_lag_ms , hse->cpu); } } } } else { if (hostname == NULL || (hostname && strcmp(hostname,host.c_str())==0) ) { fprintf(stderr,"%s:%d %llu %llu %s\n", host.c_str(), atoi(port.c_str()), aase->start_time, aase->check_time, aase->error); } } } } } } */ AWS_Aurora_monitor_node::AWS_Aurora_monitor_node(char *_a, int _p, int _whg) { addr=NULL; if (_a) { addr=strdup(_a); } port=_p; idx_last_entry=-1; writer_hostgroup=_whg; int i; for (i=0;istart_time=0; //last_entries[i]->check_time=0; } num_checks_tot = 0; num_checks_ok = 0; last_checked_at = 0; } AWS_Aurora_monitor_node::~AWS_Aurora_monitor_node() { if (addr) { free(addr); } } bool AWS_Aurora_monitor_node::add_entry(AWS_Aurora_status_entry *ase) { bool ret=false; if (idx_last_entry==-1) ret=true; idx_last_entry++; if (idx_last_entry>=AWS_Aurora_Nentries) { idx_last_entry=0; } if (last_entries[idx_last_entry]) { AWS_Aurora_status_entry *old = last_entries[idx_last_entry]; delete old; } last_entries[idx_last_entry] = ase; num_checks_tot++; if (ase->error == NULL) { num_checks_ok++; } last_checked_at = time(NULL); return ret; // for now ignored } typedef struct _host_def_t { char *host; int port; int use_ssl; } host_def_t; static void shuffle_hosts(host_def_t *array, size_t n) { char tmp[sizeof(host_def_t)]; char *arr = (char *)array; size_t stride = sizeof(host_def_t) * sizeof(char); if (n > 1) { size_t i; for (i = 0; i < n - 1 ; ++i) { size_t rnd = (size_t) fastrand(); size_t j = i + rnd / (0x7FFF / (n - i) + 1); memcpy(tmp, arr + j * stride, sizeof(host_def_t)); memcpy(arr + j * stride, arr + i * stride, sizeof(host_def_t)); memcpy(arr + i * stride, tmp, sizeof(host_def_t)); } } } void * monitor_AWS_Aurora_thread_HG(void *arg) { unsigned int wHG = *(unsigned int *)arg; unsigned int rHG = 0; unsigned int num_hosts = 0; unsigned int cur_host_idx = 0; unsigned int max_lag_ms = 0; unsigned int check_interval_ms = 0; unsigned int check_timeout_ms = 0; unsigned int add_lag_ms = 0; unsigned int min_lag_ms = 0; unsigned int lag_num_checks = 1; //unsigned int i = 0; proxy_info("Started Monitor thread for AWS Aurora writer HG %u\n", wHG); unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart uint64_t initial_raw_checksum = 0; // this is a static array of the latest reads unsigned int ase_idx = 0; AWS_Aurora_status_entry *lasts_ase[N_L_ASE]; for (unsigned int i=0; iaws_aurora_mutex); initial_raw_checksum = GloMyMon->AWS_Aurora_Hosts_resultset_checksum; // count the number of hosts for (std::vector::iterator it = GloMyMon->AWS_Aurora_Hosts_resultset->rows.begin() ; it != GloMyMon->AWS_Aurora_Hosts_resultset->rows.end(); ++it) { SQLite3_row *r=*it; if (atoi(r->fields[0]) == (int)wHG) { num_hosts++; if (max_lag_ms == 0) { max_lag_ms = atoi(r->fields[5]); } if (check_interval_ms == 0) { check_interval_ms = atoi(r->fields[6]); } if (check_timeout_ms == 0) { check_timeout_ms = atoi(r->fields[7]); } if (rHG == 0) { rHG = atoi(r->fields[1]); } add_lag_ms = atoi(r->fields[8]); min_lag_ms = atoi(r->fields[9]); lag_num_checks = atoi(r->fields[10]); } } host_def_t *hpa = (host_def_t *)malloc(sizeof(host_def_t)*num_hosts); for (std::vector::iterator it = GloMyMon->AWS_Aurora_Hosts_resultset->rows.begin() ; it != GloMyMon->AWS_Aurora_Hosts_resultset->rows.end(); ++it) { SQLite3_row *r=*it; if (atoi(r->fields[0]) == (int)wHG) { hpa[cur_host_idx].host = strdup(r->fields[2]); hpa[cur_host_idx].port = atoi(r->fields[3]); hpa[cur_host_idx].use_ssl = atoi(r->fields[4]); cur_host_idx++; } } // NOTE: 'cur_host_idx' should never be higher than 'num_hosts' otherwise later an invalid memory access // can table place later when accessing 'hpa[cur_host_idx]'. if (cur_host_idx >= num_hosts) { cur_host_idx = num_hosts - 1; } pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); bool exit_now = false; unsigned long long t1 = 0; //unsigned long long t2 = 0; unsigned long long next_loop_at = 0; bool crc = false; uint64_t current_raw_checksum = 0; size_t rnd; bool found_pingable_host = false; bool rc_ping = false; MySQL_Monitor_State_Data *mmsd = NULL; t1 = monotonic_time(); unsigned long long start_time=t1; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true && exit_now==false) { unsigned int glover; t1=monotonic_time(); //proxy_info("Looping Monitor thread for AWS Aurora writer HG %u\n", wHG); if (!GloMTH) { //proxy_info("Stopping Monitor thread for AWS Aurora writer HG %u\n", wHG); goto __exit_monitor_AWS_Aurora_thread_HG_now; return NULL; // quick exit during shutdown/restart } // if variables has changed, triggers new checks glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); next_loop_at=0; } pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); current_raw_checksum = GloMyMon->AWS_Aurora_Hosts_resultset_checksum; pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); if (current_raw_checksum != initial_raw_checksum) { // the content of AWS_Aurora_Hosts_resultset has changed. Exit exit_now=true; break; } //fprintf(stderr,"%u : %llu %llu\n", wHG, t1, next_loop_at); if (t1 < next_loop_at) { unsigned long long st=0; st=next_loop_at-t1; if (st > 50000) { st = 50000; } usleep(st); //proxy_info("Looping Monitor thread for AWS Aurora writer HG %u\n", wHG); continue; } //proxy_info("Running check AWS Aurora writer HG %u\n", wHG); found_pingable_host = false; rc_ping = false; // pick a random host rnd = (size_t) rand(); rnd %= num_hosts; rc_ping = GloMyMon->server_responds_to_ping(hpa[rnd].host, hpa[rnd].port); //proxy_info("Looping Monitor thread for AWS Aurora writer HG %u\n", wHG); #ifdef TEST_AURORA if (rand() % 100 < 30) { // we randomly fail 30% of the requests rc_ping = false; } #endif // TEST_AURORA if (rc_ping) { found_pingable_host = true; cur_host_idx = rnd; } else { // the randomly picked host didn't work work shuffle_hosts(hpa,num_hosts); for (unsigned int i=0; (found_pingable_host == false && iserver_responds_to_ping(hpa[i].host, hpa[i].port); if (rc_ping) { found_pingable_host = true; cur_host_idx = i; } } } #ifdef TEST_AURORA if (rand() % 200 == 0) { // we randomly fail 0.5% of the requests found_pingable_host = false; } #endif // TEST_AURORA if (found_pingable_host == false&&mmsd) { proxy_error("No node is pingable for AWS Aurora cluster with writer HG %u\n", wHG); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_AWS_NO_PINGABLE_SRV); next_loop_at = t1 + check_interval_ms * 1000; continue; } #ifdef TEST_AURORA if (rand() % 1000 == 0) { // suppress 99.9% of the output, too verbose proxy_info("Running check for AWS Aurora writer HG %u on %s:%d\n", wHG , hpa[cur_host_idx].host, hpa[cur_host_idx].port); } #endif // TEST_AURORA if (mmsd) { delete mmsd; mmsd = NULL; } //mmsd = NULL; mmsd = new MySQL_Monitor_State_Data(hpa[cur_host_idx].host, hpa[cur_host_idx].port, NULL, hpa[cur_host_idx].use_ssl); mmsd->writer_hostgroup = wHG; mmsd->aws_aurora_check_timeout_ms = check_timeout_ms; mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); //unsigned long long start_time=mysql_thr->curtime; start_time=t1; mmsd->t1=start_time; crc=false; if (mmsd->mysql==NULL) { // we don't have a connection, let's create it bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { unsigned long long now=monotonic_time(); char * new_error = (char *)malloc(50+strlen(mmsd->mysql_error_msg)); bool access_denied = false; if (strncmp(mmsd->mysql_error_msg,(char *)"Access denied for user",strlen((char *)"Access denied for user"))==0) { access_denied = true; } sprintf(new_error,"timeout or error in creating new connection: %s",mmsd->mysql_error_msg); free(mmsd->mysql_error_msg); mmsd->mysql_error_msg = new_error; proxy_error("Error on AWS Aurora check for %s:%d after %lldms. Unable to create a connection. %sError: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, (access_denied ? "" : "If the server is overload, increase mysql-monitor_connect_timeout. " ) , new_error); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_AWS_HEALTH_CHECK_CONN_TIMEOUT); goto __exit_monitor_aws_aurora_HG_thread; } } mmsd->t1=monotonic_time(); mmsd->interr=0; // reset the value #ifdef TEST_AURORA mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, "SELECT SERVER_ID, SESSION_ID, LAST_UPDATE_TIMESTAMP, REPLICA_LAG_IN_MILLISECONDS, CPU FROM REPLICA_HOST_STATUS ORDER BY SERVER_ID"); #else // for reference we list the old queries. // original implementation: // mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, "SELECT SERVER_ID, SESSION_ID, LAST_UPDATE_TIMESTAMP, IF(SESSION_ID = 'MASTER_SESSION_ID', 0, REPLICA_LAG_IN_MILLISECONDS) AS REPLICA_LAG_IN_MILLISECONDS, CPU FROM INFORMATION_SCHEMA.REPLICA_HOST_STATUS WHERE (REPLICA_LAG_IN_MILLISECONDS > 0 AND REPLICA_LAG_IN_MILLISECONDS <= 600000) OR SESSION_ID = 'MASTER_SESSION_ID' ORDER BY SERVER_ID"); // to fix a bug in Aurora , see https://github.com/sysown/proxysql/issues/3082 // mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, "SELECT SERVER_ID, SESSION_ID, LAST_UPDATE_TIMESTAMP, IF(SESSION_ID = 'MASTER_SESSION_ID', 0, REPLICA_LAG_IN_MILLISECONDS) AS REPLICA_LAG_IN_MILLISECONDS, CPU FROM INFORMATION_SCHEMA.REPLICA_HOST_STATUS WHERE (REPLICA_LAG_IN_MILLISECONDS > 0 AND REPLICA_LAG_IN_MILLISECONDS <= 600000) OR SESSION_ID = 'MASTER_SESSION_ID' ORDER BY SERVER_ID"); // slightly modifying the previous query. Replacing: // "REPLICA_LAG_IN_MILLISECONDS > 0" // with: // "REPLICA_LAG_IN_MILLISECONDS >= 0" // mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, "SELECT SERVER_ID, SESSION_ID, LAST_UPDATE_TIMESTAMP, IF(SESSION_ID = 'MASTER_SESSION_ID', 0, REPLICA_LAG_IN_MILLISECONDS) AS REPLICA_LAG_IN_MILLISECONDS, CPU FROM INFORMATION_SCHEMA.REPLICA_HOST_STATUS WHERE (REPLICA_LAG_IN_MILLISECONDS >= 0 AND REPLICA_LAG_IN_MILLISECONDS <= 600000) OR SESSION_ID = 'MASTER_SESSION_ID' ORDER BY SERVER_ID"); { const char * query = "SELECT SERVER_ID," "IF(" "SESSION_ID = 'MASTER_SESSION_ID' AND " "SERVER_ID <> (SELECT SERVER_ID FROM INFORMATION_SCHEMA.REPLICA_HOST_STATUS WHERE SESSION_ID = 'MASTER_SESSION_ID' ORDER BY LAST_UPDATE_TIMESTAMP DESC LIMIT 1), " "'probably_former_MASTER_SESSION_ID', SESSION_ID" ") SESSION_ID, " // it seems that during a failover, the old writer can keep MASTER_SESSION_ID because not updated "LAST_UPDATE_TIMESTAMP, " "IF(SESSION_ID = 'MASTER_SESSION_ID', 0, REPLICA_LAG_IN_MILLISECONDS) AS REPLICA_LAG_IN_MILLISECONDS, " "CPU " "FROM INFORMATION_SCHEMA.REPLICA_HOST_STATUS WHERE" " ( " "(REPLICA_LAG_IN_MILLISECONDS >= 0 AND REPLICA_LAG_IN_MILLISECONDS <= 600000)" // lag between 0 and 10 minutes " OR SESSION_ID = 'MASTER_SESSION_ID'" // or server with MASTER_SESSION_ID " ) " "AND LAST_UPDATE_TIMESTAMP > NOW() - INTERVAL 180 SECOND" // ignore decommissioned or renamed nodes, see https://github.com/sysown/proxysql/issues/3484 " ORDER BY SERVER_ID"; mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, query); } #endif // TEST_AURORA while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mmsd->aws_aurora_check_timeout_ms * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on AWS Aurora health check for %s:%d after %lldms. If the server is overload, increase mysql_aws_aurora_hostgroups.check_timeout_ms\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_AWS_HEALTH_CHECK_TIMEOUT); goto __exit_monitor_aws_aurora_HG_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_aws_aurora_HG_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_query_cont(&mmsd->interr, mmsd->mysql, mmsd->async_exit_status); } } mmsd->async_exit_status=mysql_store_result_start(&mmsd->result,mmsd->mysql); while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mmsd->aws_aurora_check_timeout_ms * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on AWS Aurora health check for %s:%d after %lldms. If the server is overload, increase mysql_aws_aurora_hostgroups.check_timeout_ms\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, ER_PROXYSQL_AWS_HEALTH_CHECK_TIMEOUT); goto __exit_monitor_aws_aurora_HG_thread; } if (GloMyMon->shutdown==true) { goto __fast_exit_monitor_aws_aurora_HG_thread; // exit immediately } if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { mmsd->async_exit_status=mysql_store_result_cont(&mmsd->result, mmsd->mysql, mmsd->async_exit_status); } } if (mmsd->interr) { // check failed mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); } __exit_monitor_aws_aurora_HG_thread: mmsd->t2=monotonic_time(); next_loop_at = t1 + (check_interval_ms * 1000); if (mmsd->t2 > t1) { next_loop_at -= (mmsd->t2 - t1); } { // TODO : complete this char buf[128]; char *s=NULL; int l=strlen(mmsd->hostname); if (l<110) { s=buf; } else { s=(char *)malloc(l+16); } sprintf(s,"%s:%d",mmsd->hostname,mmsd->port); unsigned long long time_now=realtime_time(); time_now=time_now-(mmsd->t2 - start_time); //AWS_Aurora_status_entry *ase = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); //AWS_Aurora_status_entry *ase_l = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); AWS_Aurora_status_entry *ase = new AWS_Aurora_status_entry(time_now, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); AWS_Aurora_status_entry *ase_l = new AWS_Aurora_status_entry(time_now, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); if (mmsd->interr == 0 && mmsd->result) { int num_fields=0; num_fields = mysql_num_fields(mmsd->result); if (num_fields!=5) { proxy_error("Incorrect number of fields, please report a bug\n"); } else { MYSQL_ROW row; while ((row = mysql_fetch_row(mmsd->result))) { AWS_Aurora_replica_host_status_entry *arhse = new AWS_Aurora_replica_host_status_entry(row[0], row[1], row[2], row[3], row[4]); ase->add_host_status(arhse); AWS_Aurora_replica_host_status_entry *arhse_l = new AWS_Aurora_replica_host_status_entry(row[0], row[1], row[2], row[3], row[4]); ase_l->add_host_status(arhse_l); } } mysql_free_result(mmsd->result); mmsd->result=NULL; } if (lasts_ase[ase_idx]) { AWS_Aurora_status_entry * l_ase = lasts_ase[ase_idx]; delete l_ase; } lasts_ase[ase_idx] = ase_l; GloMyMon->evaluate_aws_aurora_results(wHG, rHG, &lasts_ase[0], ase_idx, max_lag_ms, add_lag_ms, min_lag_ms, lag_num_checks); for (auto h : *(ase_l->host_statuses)) { for (auto h2 : *(ase->host_statuses)) { if (strcmp(h2->server_id, h->server_id) == 0) { h2->estimated_lag_ms = h->estimated_lag_ms; } } } // remember that we call evaluate_aws_aurora_results() // *before* shifting ase_idx ase_idx++; if (ase_idx == N_L_ASE) { ase_idx = 0; } //__end_process_aws_aurora_result: if (mmsd->mysql_error_msg) { } pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); //auto it = // TODO : complete this std::map::iterator it2; it2 = GloMyMon->AWS_Aurora_Hosts_Map.find(s); AWS_Aurora_monitor_node *node=NULL; if (it2!=GloMyMon->AWS_Aurora_Hosts_Map.end()) { node=it2->second; node->add_entry(ase); } else { node = new AWS_Aurora_monitor_node(mmsd->hostname,mmsd->port,mmsd->writer_hostgroup); node->add_entry(ase); GloMyMon->AWS_Aurora_Hosts_Map.insert(std::make_pair(s,node)); } // clean up if (l<110) { } else { free(s); } pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); } if (mmsd->interr || mmsd->async_exit_status) { // check failed } else { if (crc==false) { if (mmsd->mysql) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } } } __fast_exit_monitor_aws_aurora_HG_thread: if (mmsd->mysql) { // if we reached here we didn't put the connection back if (mmsd->mysql_error_msg) { #ifdef DEBUG proxy_error("Error after %lldms: server %s:%d , mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); #else proxy_error("Error after %lldms on server %s:%d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd->mysql_error_msg); #endif // DEBUG mysql_close(mmsd->mysql); // if we reached here we should destroy it mmsd->mysql=NULL; } else { if (crc) { bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); } else { proxy_error("Error after %lldms: mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // set_wait_timeout failed } mmsd->mysql=NULL; } else { // really not sure how we reached here, drop it proxy_error("Error after %lldms: mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); MyHGM->p_update_mysql_error_counter(p_mysql_error_type::proxysql, mmsd->hostgroup_id, mmsd->hostname, mmsd->port, mysql_errno(mmsd->mysql)); GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } } } /* mmsd->writer_hostgroup=atoi(r->fields[0]); mmsd->writer_is_also_reader=atoi(r->fields[4]); mmsd->max_transactions_behind=atoi(r->fields[5]); mmsd->mondb=monitordb; WorkItem* item; item=new WorkItem(mmsd,monitor_AWS_Aurora_thread); GloMyMon->queue.add(item); usleep(us); */ // } /* for for (std::vector::iterator it = Galera_Hosts_resultset->rows.begin() ; it != Galera_Hosts_resultset->rows.end(); ++it) { } SQLite3_row *r=*it; bool rc_ping = true; rc_ping = server_responds_to_ping(r->fields[1],atoi(r->fields[2])); if (rc_ping) { // only if server is responding to pings MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[1],atoi(r->fields[2]), NULL, atoi(r->fields[3])); mmsd->writer_hostgroup=atoi(r->fields[0]); mmsd->writer_is_also_reader=atoi(r->fields[4]); mmsd->max_transactions_behind=atoi(r->fields[5]); mmsd->mondb=monitordb; */ } __exit_monitor_AWS_Aurora_thread_HG_now: if (mmsd) { delete (mmsd); mmsd = NULL; for (unsigned int i=0; icurtime=monotonic_time(); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart uint64_t last_raw_checksum = 0; // ADD here an unordered map , Writer HG => next time at // when empty, a new map is populated // when next_loop_at = 0 , the tables is emptied so to be populated again unsigned int *hgs_array = NULL; pthread_t *pthreads_array = NULL; unsigned int hgs_num = 0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { unsigned int glover; if (!GloMTH) return NULL; // quick exit during shutdown/restart // if variables has changed, triggers new checks glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; mysql_thr->refresh_variables(); } // if list of servers or HG or options has changed, triggers new checks pthread_mutex_lock(&aws_aurora_mutex); uint64_t new_raw_checksum = AWS_Aurora_Hosts_resultset->raw_checksum(); pthread_mutex_unlock(&aws_aurora_mutex); if (new_raw_checksum != last_raw_checksum) { proxy_info("Detected new/changed definition for AWS Aurora monitoring\n"); last_raw_checksum = new_raw_checksum; if (pthreads_array) { // wait all threads to terminate for (unsigned int i=0; i < hgs_num; i++) { pthread_join(pthreads_array[i], NULL); proxy_info("Stopped Monitor thread for AWS Aurora writer HG %u\n", hgs_array[i]); } free(pthreads_array); free(hgs_array); pthreads_array = NULL; hgs_array = NULL; } hgs_num = 0; pthread_mutex_lock(&aws_aurora_mutex); // scan all the writer HGs unsigned int num_rows = AWS_Aurora_Hosts_resultset->rows_count; if (num_rows) { unsigned int *tmp_hgs_array = (unsigned int *)malloc(sizeof(unsigned int)*num_rows); for (std::vector::iterator it = AWS_Aurora_Hosts_resultset->rows.begin() ; it != AWS_Aurora_Hosts_resultset->rows.end(); ++it) { SQLite3_row *r=*it; int wHG = atoi(r->fields[0]); bool found = false; // very simple search. Far from optimal, but assuming very few HGs it is fast enough for (unsigned int i=0; i < hgs_num; i++) { if (tmp_hgs_array[i] == (unsigned int)wHG) { found = true; } } if (found == false) { // new wHG found tmp_hgs_array[hgs_num]=wHG; hgs_num++; } } proxy_info("Activating Monitoring of %u AWS Aurora clusters\n", hgs_num); hgs_array = (unsigned int *)malloc(sizeof(unsigned int)*hgs_num); pthreads_array = (pthread_t *)malloc(sizeof(pthread_t)*hgs_num); for (unsigned int i=0; i < hgs_num; i++) { hgs_array[i] = tmp_hgs_array[i]; proxy_info("Starting Monitor thread for AWS Aurora writer HG %u\n", hgs_array[i]); if (pthread_create(&pthreads_array[i], NULL, monitor_AWS_Aurora_thread_HG, &hgs_array[i]) != 0) { // LCOV_EXCL_START proxy_error("Thread creation\n"); assert(0); // LCOV_EXCL_STOP } } free(tmp_hgs_array); } pthread_mutex_unlock(&aws_aurora_mutex); } /* if (t1 < next_loop_at) { goto __sleep_monitor_aws_aurora; } if (next_loop_at == 0) { // free the queue } next_loop_at=t1+1000*mysql_thread___monitor_galera_healthcheck_interval; pthread_mutex_lock(&aws_aurora_mutex); if (AWS_Aurora_Hosts_resultset==NULL) { goto __end_monitor_aws_aurora_loop; } else { if (AWS_Aurora_Hosts_resultset->rows_count==0) { goto __end_monitor_aws_aurora_loop; } int us=100; if (AWS_Aurora_Hosts_resultset->rows_count) { us=mysql_thread___monitor_read_only_interval/2/Galera_Hosts_resultset->rows_count; } for (std::vector::iterator it = Galera_Hosts_resultset->rows.begin() ; it != Galera_Hosts_resultset->rows.end(); ++it) { SQLite3_row *r=*it; bool rc_ping = true; rc_ping = server_responds_to_ping(r->fields[1],atoi(r->fields[2])); if (rc_ping) { // only if server is responding to pings MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[1],atoi(r->fields[2]), NULL, atoi(r->fields[3])); mmsd->writer_hostgroup=atoi(r->fields[0]); mmsd->writer_is_also_reader=atoi(r->fields[4]); mmsd->max_transactions_behind=atoi(r->fields[5]); mmsd->mondb=monitordb; WorkItem* item; item=new WorkItem(mmsd,monitor_AWS_Aurora_thread); GloMyMon->queue.add(item); usleep(us); } if (GloMyMon->shutdown) { pthread_mutex_unlock(&galera_mutex); return NULL; } } } __end_monitor_aws_aurora_loop: pthread_mutex_unlock(&aws_aurora_mutex); if (mysql_thread___monitor_enabled==true) { } __sleep_monitor_aws_aurora: t2=monotonic_time(); if (t2 200000) { st = 200000; } usleep(st); } */ usleep(10000); } if (mysql_thr) { delete mysql_thr; mysql_thr=NULL; } for (unsigned int i=0;i *item=NULL; GloMyMon->queue->add(item); } return NULL; } unsigned int MySQL_Monitor::estimate_lag(char* server_id, AWS_Aurora_status_entry** aase, unsigned int idx, unsigned int add_lag_ms, unsigned int min_lag_ms, unsigned int lag_num_checks) { assert(aase); assert(server_id); assert(idx >= 0 && idx < N_L_ASE); if (lag_num_checks > N_L_ASE) lag_num_checks = N_L_ASE; if (lag_num_checks <= 0) lag_num_checks = 1; unsigned int mlag = 0; unsigned int lag = 0; for (unsigned int i = 1; i <= lag_num_checks; i++) { if (!aase[idx] || !aase[idx]->host_statuses) break; for (auto hse : *(aase[idx]->host_statuses)) { if (strcmp(server_id, hse->server_id)==0 && (unsigned int)hse->replica_lag_ms != 0) { unsigned int ms = std::max(((unsigned int)hse->replica_lag_ms + add_lag_ms), min_lag_ms); if (ms > mlag) mlag = ms; if (!lag) lag = ms; } } if (idx == 0) idx = N_L_ASE; idx--; } return mlag; } void MySQL_Monitor::evaluate_aws_aurora_results(unsigned int wHG, unsigned int rHG, AWS_Aurora_status_entry **lasts_ase, unsigned int ase_idx, unsigned int max_latency_ms, unsigned int add_lag_ms, unsigned int min_lag_ms, unsigned int lag_num_checks) { #ifdef TEST_AURORA unsigned int i = 0; bool verbose = false; unsigned int action_yes = 0; unsigned int action_no = 0; unsigned int enabling = 0; unsigned int disabling = 0; if (rand() % 500 == 0) { verbose = true; bool ev = false; if (rand() % 1000 == 0) { ev = true; } for (i=0; i < N_L_ASE; i++) { AWS_Aurora_status_entry *aase = lasts_ase[i]; if (ev == true || i == ase_idx) { if (aase && aase->start_time) { if ( aase->host_statuses->size() ) { for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { AWS_Aurora_replica_host_status_entry *hse = *it3; if (hse) { fprintf(stderr,"%s %s %s %f %f\n", hse->server_id, hse->session_id, hse->last_update_timestamp, hse->replica_lag_ms , hse->cpu); } } } } } } } #endif // TEST_AURORA unsigned int prev_ase_idx = ase_idx; if (prev_ase_idx == 0) prev_ase_idx = N_L_ASE; prev_ase_idx--; AWS_Aurora_status_entry *aase = lasts_ase[ase_idx]; AWS_Aurora_status_entry *prev_aase = lasts_ase[prev_ase_idx]; if (aase && aase->start_time) { if ( aase->host_statuses->size() ) { for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { AWS_Aurora_replica_host_status_entry *hse = *it3; bool run_action = true; bool enable = true; bool is_writer = false; bool rla_rc = true; unsigned int current_lag_ms = estimate_lag(hse->server_id, lasts_ase, ase_idx, add_lag_ms, min_lag_ms, lag_num_checks); hse->estimated_lag_ms = current_lag_ms; if (current_lag_ms > max_latency_ms) { enable = false; } if (strcmp(hse->session_id,"MASTER_SESSION_ID")==0) { is_writer = true; } // we also try to determine if a change needs to be made if (prev_aase && prev_aase->start_time) { if ( prev_aase->host_statuses->size() ) { for (std::vector::iterator it4 = prev_aase->host_statuses->begin(); it4!=prev_aase->host_statuses->end(); ++it4) { AWS_Aurora_replica_host_status_entry *prev_hse = *it4; if (strcmp(prev_hse->server_id,hse->server_id)==0) { bool prev_enabled = true; unsigned int prev_lag_ms = estimate_lag(hse->server_id, lasts_ase, ase_idx, add_lag_ms, min_lag_ms, lag_num_checks); if (prev_lag_ms > max_latency_ms) { prev_enabled = false; } if (prev_enabled == enable) { // the previous status should be the same // do not run any action run_action = false; } } } } } if (run_action) { #ifdef TEST_AURORA action_yes++; (enable ? enabling++ : disabling++); rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, current_lag_ms, enable, is_writer, verbose); #else rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, current_lag_ms, enable, is_writer); #endif // TEST_AURORA } else { #ifdef TEST_AURORA action_no++; #endif // TEST_AURORA if (is_writer ) { // if the server is a writer we run it anyway. This will perform some sanity check rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, current_lag_ms, enable, is_writer); } } //if (is_writer == true && rla_rc == false) { if (rla_rc == false) { if (is_writer == true) { // the server is not configured as a writer #ifdef TEST_AURORA proxy_info("Calling update_aws_aurora_set_writer for %s\n", hse->server_id); #endif // TEST_AURORA MyHGM->update_aws_aurora_set_writer(wHG, rHG, hse->server_id); time_t __timer; char lut[30]; struct tm __tm_info; time(&__timer); localtime_r(&__timer, &__tm_info); strftime(lut, 25, "%Y-%m-%d %H:%M:%S", &__tm_info); char *q1 = (char *)"INSERT INTO mysql_server_aws_aurora_failovers VALUES (%d, '%s', '%s')"; char *q2 = (char *)malloc(strlen(q1)+strlen(lut)+strlen(hse->server_id)); sprintf(q2, q1, wHG, hse->server_id, lut); monitordb->execute(q2); free(q2); } else { #ifdef TEST_AURORA proxy_info("Calling update_aws_aurora_set_reader for %s\n", hse->server_id); #endif // TEST_AURORA MyHGM->update_aws_aurora_set_reader(wHG, rHG, hse->server_id); } } } } } #ifdef TEST_AURORA if (verbose) { proxy_info("replication_lag_actions: YES=%u , NO=%u , enabling=%u , disabling=%u\n", action_yes, action_no, enabling, disabling); } #endif // TEST_AURORA } std::string MySQL_Monitor::dns_lookup(const std::string& hostname, bool return_hostname_if_lookup_fails, size_t* ip_count) { static thread_local std::shared_ptr dns_cache_thread; // if IP was provided, no need to do lookup if (hostname.empty() || validate_ip(hostname)) return hostname; if (!dns_cache_thread && GloMyMon) dns_cache_thread = GloMyMon->dns_cache; std::string ip; if (dns_cache_thread) { ip = dns_cache_thread->lookup(trim(hostname), ip_count) ; if (ip.empty() && return_hostname_if_lookup_fails) { ip = hostname; proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "DNS cache lookup was a miss. (Hostname:[%s])\n", hostname.c_str()); } } return ip; } std::string MySQL_Monitor::dns_lookup(const char* hostname, bool return_hostname_if_lookup_fails, size_t* ip_count) { return MySQL_Monitor::dns_lookup(std::string(hostname), return_hostname_if_lookup_fails, ip_count); } bool MySQL_Monitor::dns_cache_update_socket(const std::string& hostname, int socket_fd) { // if IP was provided, no need to update dns cache if (hostname.empty() || validate_ip(hostname)) return false; bool result = false; const std::string& ip_addr = get_connected_peer_ip_from_socket(socket_fd); if (ip_addr.empty() == false) { result = _dns_cache_update(hostname, { ip_addr }); } return result; } bool MySQL_Monitor::_dns_cache_update(const std::string &hostname, std::vector&& ip_address) { static thread_local std::shared_ptr dns_cache_thread; if (!dns_cache_thread && GloMyMon) dns_cache_thread = GloMyMon->dns_cache; if (dns_cache_thread) { if (dns_cache_thread->add_if_not_exist(trim(hostname), std::move(ip_address))) { proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Direct DNS cache update. (Hostname:[%s] IP:[%s])\n", hostname.c_str(), debug_iplisttostring(ip_address).c_str()); return true; } } return false; } void MySQL_Monitor::trigger_dns_cache_update() { if (GloMyMon) { GloMyMon->force_dns_cache_update = true; proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Triggering DNS cache update sequence.\n"); } } bool DNS_Cache::add(const std::string& hostname, std::vector&& ips) { if (!enabled) return false; proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Updating DNS cache. (Hostname:[%s] IP:[%s])\n", hostname.c_str(), debug_iplisttostring(ips).c_str()); int rc = pthread_rwlock_wrlock(&rwlock_); assert(rc == 0); auto& ip_addr = records[hostname]; ip_addr.ips = std::move(ips); __sync_fetch_and_and(&ip_addr.counter, 0); rc = pthread_rwlock_unlock(&rwlock_); assert(rc == 0); if (GloMyMon) __sync_fetch_and_add(&GloMyMon->dns_cache_record_updated, 1); return true; } bool DNS_Cache::add_if_not_exist(const std::string& hostname, std::vector&& ips) { if (!enabled) return false; int rc = pthread_rwlock_wrlock(&rwlock_); assert(rc == 0); if (records.find(hostname) == records.end()) { proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Updating DNS cache. (Hostname:[%s] IP:[%s])\n", hostname.c_str(), debug_iplisttostring(ips).c_str()); auto& ip_addr = records[hostname]; ip_addr.ips = std::move(ips); __sync_fetch_and_and(&ip_addr.counter, 0); } rc = pthread_rwlock_unlock(&rwlock_); assert(rc == 0); if (GloMyMon) __sync_fetch_and_add(&GloMyMon->dns_cache_record_updated, 1); return true; } std::string DNS_Cache::get_next_ip(const IP_ADDR& ip_addr) const { if (ip_addr.ips.empty()) return ""; const auto counter_val = __sync_fetch_and_add(const_cast(&ip_addr.counter), 1); return ip_addr.ips[counter_val%ip_addr.ips.size()]; } std::string DNS_Cache::lookup(const std::string& hostname, size_t* ip_count) const { if (!enabled) return ""; std::string ip; __sync_fetch_and_add(&GloMyMon->dns_cache_queried, 1); int rc = pthread_rwlock_rdlock(&rwlock_); assert(rc == 0); auto itr = records.find(hostname); if (itr != records.end()) { ip = get_next_ip(itr->second); if (ip_count) *ip_count = itr->second.ips.size(); proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "DNS cache lookup success. (Hostname:[%s] IP returned:[%s])\n", hostname.c_str(), ip.c_str()); } else { if (ip_count) *ip_count = 0; } rc = pthread_rwlock_unlock(&rwlock_); assert(rc == 0); if (!ip.empty() && GloMyMon) { __sync_fetch_and_add(&GloMyMon->dns_cache_lookup_success, 1); } return ip; } void DNS_Cache::remove(const std::string& hostname) { bool item_removed = false; int rc = pthread_rwlock_wrlock(&rwlock_); assert(rc == 0); auto itr = records.find(hostname); if (itr != records.end()) { proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Removing DNS cache record. (Hostname:[%s] IP:[%s])\n", hostname.c_str(), debug_iplisttostring(itr->second.ips).c_str()); records.erase(itr); item_removed = true; } rc = pthread_rwlock_unlock(&rwlock_); if (item_removed && GloMyMon) __sync_fetch_and_add(&GloMyMon->dns_cache_record_updated, 1); assert(rc == 0); } void DNS_Cache::clear() { int rc = pthread_rwlock_wrlock(&rwlock_); assert(rc == 0); records.clear(); rc = pthread_rwlock_unlock(&rwlock_); assert(rc == 0); proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "DNS cache was cleared.\n"); } bool DNS_Cache::empty() const { bool result = true; int rc = pthread_rwlock_rdlock(&rwlock_); assert(rc == 0); result = records.empty(); rc = pthread_rwlock_unlock(&rwlock_); assert(rc == 0); return result; } template class WorkItem; template class WorkItem;