From c50a7053ad09b2298bb6c241c3de6aa7f22e8f5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Thu, 24 Feb 2022 15:58:16 +0100 Subject: [PATCH 1/4] Fix monitoring not working for AWS Aurora auto-discovered instances #3797 - Monitoring actions 'ping' and 'connect' now make use of a new introduced table 'monitor.mysql_servers' instead of 'mysql_servers' table from Admin. - This table is kept in sync via a shared resulset in 'MySQL_HostGroups_Manager'. --- include/MySQL_HostGroups_Manager.h | 24 +++++++++ include/MySQL_Monitor.hpp | 11 ++++ lib/MySQL_HostGroups_Manager.cpp | 38 ++++++++++++++ lib/MySQL_Monitor.cpp | 83 ++++++++++++++++++++++++++++-- 4 files changed, 152 insertions(+), 4 deletions(-) diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index 4029085f2..e4a0bfc1f 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -421,6 +421,17 @@ class MySQL_HostGroups_Manager { public: std::mutex galera_set_writer_mutex; + /** + * @brief Mutex used to guard 'mysql_servers_to_monitor' resulset. + */ + std::mutex mysql_servers_to_monitor_mutex; + /** + * @brief Resulset containing the latest 'mysql_servers' present in 'mydb'. + * @details This resulset should be updated via 'update_table_mysql_servers_for_monitor' each time actions + * that modify the 'mysql_servers' table are performed. + */ + SQLite3_result* mysql_servers_to_monitor; + pthread_rwlock_t gtid_rwlock; std::unordered_map gtid_map; struct ev_async * gtid_ev_async; @@ -533,6 +544,19 @@ class MySQL_HostGroups_Manager { SQLite3_result *dump_table_mysql_group_replication_hostgroups(); SQLite3_result *dump_table_mysql_galera_hostgroups(); SQLite3_result *dump_table_mysql_aws_aurora_hostgroups(); + /** + * @brief Update the public member resulset 'mysql_servers_to_monitor'. This resulset should contain the latest + * 'mysql_servers' present in 'MySQL_HostGroups_Manager' db, which are not 'OFFLINE_HARD'. The resulset + * fields match the definition of 'monitor.mysql_servers' table. + * @details Several details: + * - Function assumes that 'mysql_servers' table from 'MySQL_HostGroups_Manager' db is ready + * to be consumed, because of this it doesn't perform any of the following operations: + * - Purging 'mysql_servers' table. + * - Regenerating 'mysql_servers' table. + * - Function locks on 'mysql_servers_to_monitor_mutex'. + * @param lock When supplied the function calls 'wrlock()' and 'wrunlock()' functions for accessing the db. + */ + void update_table_mysql_servers_for_monitor(bool lock=false); MyHGC * MyHGC_lookup(unsigned int); void MyConn_add_to_pool(MySQL_Connection *); diff --git a/include/MySQL_Monitor.hpp b/include/MySQL_Monitor.hpp index 09364f33e..bae631cb2 100644 --- a/include/MySQL_Monitor.hpp +++ b/include/MySQL_Monitor.hpp @@ -37,6 +37,8 @@ #define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_FAILOVERS "CREATE TABLE mysql_server_aws_aurora_failovers (writer_hostgroup INT NOT NULL , hostname VARCHAR NOT NULL , inserted_at VARCHAR NOT NULL)" +#define MONITOR_SQLITE_TABLE_MYSQL_SERVERS "CREATE TABLE mysql_servers (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , status INT CHECK (status IN (0, 1, 2, 3, 4)) NOT NULL DEFAULT 0 , use_ssl INT CHECK (use_ssl IN(0,1)) NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port) )" + /* struct cmp_str { bool operator()(char const *a, char const *b) const @@ -271,6 +273,8 @@ class MySQL_Monitor { pthread_mutex_t group_replication_mutex; // for simplicity, a mutex instead of a rwlock pthread_mutex_t galera_mutex; // for simplicity, a mutex instead of a rwlock pthread_mutex_t aws_aurora_mutex; // for simplicity, a mutex instead of a rwlock + pthread_mutex_t mysql_servers_mutex; // for simplicity, a mutex instead of a rwlock + //std::map Group_Replication_Hosts_Map; std::map Group_Replication_Hosts_Map; SQLite3_result *Group_Replication_Hosts_resultset; @@ -318,6 +322,13 @@ class MySQL_Monitor { void populate_monitor_mysql_server_galera_log(); void populate_monitor_mysql_server_aws_aurora_log(); void populate_monitor_mysql_server_aws_aurora_check_status(); + /** + * @brief Helper function that uses the provided resulset for updating the table 'monitor.mysql_servers'. + * @details When supplying 'MySQL_HostGroups_Manager::mysql_servers_to_monitor' resulset as parameter, the + * mutex 'MySQL_HostGroups_Manager::mysql_servers_to_monitor_mutex' needs to be previously taken. + * @param SQLite3_result The resulset to be used for updating 'monitor.mysql_servers'. + */ + void update_monitor_mysql_servers(SQLite3_result*); char * galera_find_last_node(int); std::vector * galera_find_possible_last_nodes(int); bool server_responds_to_ping(char *address, int port); diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 180c7e4dd..c9430ba1b 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -1416,6 +1416,7 @@ MySQL_HostGroups_Manager::MySQL_HostGroups_Manager() { gtid_ev_loop=NULL; gtid_ev_timer=NULL; gtid_ev_async = (struct ev_async *)malloc(sizeof(struct ev_async)); + mysql_servers_to_monitor = NULL; { static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; @@ -2067,6 +2068,11 @@ bool MySQL_HostGroups_Manager::commit() { this->status.p_counter_array[p_hg_counter::servers_table_version]->Increment(); pthread_cond_broadcast(&status.servers_table_version_cond); pthread_mutex_unlock(&status.servers_table_version_lock); + + // NOTE: In order to guarantee the latest generated version, this should be kept after all the + // calls to 'generate_mysql_servers'. + update_table_mysql_servers_for_monitor(false); + wrunlock(); unsigned long long curtime2=monotonic_time(); curtime1 = curtime1/1000; @@ -2581,6 +2587,35 @@ SQLite3_result * MySQL_HostGroups_Manager::dump_table_mysql_servers() { return resultset; } +void MySQL_HostGroups_Manager::update_table_mysql_servers_for_monitor(bool lock) { + if (lock) { + wrlock(); + } + + std::lock_guard mysql_servers_lock(this->mysql_servers_to_monitor_mutex); + + char* error = NULL; + int cols = 0; + int affected_rows = 0; + SQLite3_result* resultset = NULL; + char* query = const_cast("SELECT hostname, port, status, use_ssl FROM mysql_servers WHERE status != 3 GROUP BY hostname, port"); + + proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 4, "%s\n", query); + mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); + + if (error != nullptr) { + proxy_error("Error on read from mysql_servers : %s\n", error); + } else { + if (resultset != nullptr) { + this->mysql_servers_to_monitor = resultset; + } + } + + if (lock) { + wrunlock(); + } +} + SQLite3_result * MySQL_HostGroups_Manager::dump_table_mysql_replication_hostgroups() { wrlock(); char *error=NULL; @@ -7030,6 +7065,9 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_reader(int _whid, int _rhid generate_mysql_servers_table(&_rhid); free(query); } + // NOTE: Because 'commit' isn't called, we are required to update 'mysql_servers_for_monitor'. + // Also note that 'generate_mysql_servers' is previously called. + update_table_mysql_servers_for_monitor(false); wrunlock(); // it is now time to build a new structure in Monitor pthread_mutex_lock(&AWS_Aurora_Info_mutex); diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 01517782c..72fc6673d 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -719,6 +719,7 @@ MySQL_Monitor::MySQL_Monitor() { insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_check_status", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_CHECK_STATUS); insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_failovers", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_FAILOVERS); + insert_into_tables_defs(tables_defs_monitor,"mysql_servers", MONITOR_SQLITE_TABLE_MYSQL_SERVERS); // create monitoring tables check_and_build_standard_tables(monitordb, tables_defs_monitor); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_connect_log_time_start ON mysql_server_connect_log (time_start_us)"); @@ -841,6 +842,70 @@ void MySQL_Monitor::check_and_build_standard_tables(SQLite3DB *db, std::vectorexecute("PRAGMA foreign_keys = ON"); }; +void MySQL_Monitor::update_monitor_mysql_servers(SQLite3_result* resultset) { + pthread_mutex_lock(&this->mysql_servers_mutex); + + if (resultset != nullptr) { + int rc = 0; + + monitordb->execute("DELETE FROM mysql_servers"); + + sqlite3_stmt *statement1=NULL; + sqlite3_stmt *statement32=NULL; + + char* query1 = const_cast("INSERT INTO mysql_servers VALUES (?1,?2,?3,?4)"); + char* query32 = const_cast( + "INSERT INTO mysql_servers VALUES (?1,?2,?3,?4),(?5,?6,?7,?8),(?9,?10,?11,?12),(?13,?14,?15,?16),(?17,?18,?19,?20),(?21,?22,?23,?24)," + "(?25,?26,?27,?28),(?29,?30,?31,?32),(?33,?34,?35,?36),(?37,?38,?39,?40),(?41,?42,?43,?44),(?45,?46,?47,?48),(?49,?50,?51,?52)," + "(?53,?54,?55,?56),(?57,?58,?59,?60),(?61,?62,?63,?64),(?65,?66,?67,?68),(?69,?70,?71,?72),(?73,?74,?75,?76),(?77,?78,?79,?80)," + "(?81,?82,?83,?84),(?85,?86,?87,?88),(?89,?90,?91,?92),(?93,?94,?95,?96),(?97,?98,?99,?100),(?101,?102,?103,?104),(?105,?106,?107,?108)," + "(?109,?110,?111,?112),(?113,?114,?115,?116),(?117,?118,?119,?120),(?121,?122,?123,?124),(?125,?126,?127,?128)" + ); + + rc = monitordb->prepare_v2(query1, &statement1); + ASSERT_SQLITE_OK(rc, monitordb); + rc = monitordb->prepare_v2(query32, &statement32); + ASSERT_SQLITE_OK(rc, monitordb); + + int row_idx=0; + int max_bulk_row_idx=resultset->rows_count/32; + max_bulk_row_idx=max_bulk_row_idx*32; + + for (std::vector::iterator it = resultset->rows.begin(); it != resultset->rows.end(); ++it) { + SQLite3_row *r1=*it; + int idx=row_idx%32; + + if (row_idx < max_bulk_row_idx) { // bulk + rc=(*proxy_sqlite3_bind_text)(statement32, (idx*4)+1, r1->fields[0], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); + rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+2, atoi(r1->fields[1])); ASSERT_SQLITE_OK(rc, monitordb); + rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+3, atoi(r1->fields[2])); ASSERT_SQLITE_OK(rc, monitordb); + rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+4, atoi(r1->fields[3])); ASSERT_SQLITE_OK(rc, monitordb); + + if (idx==31) { + SAFE_SQLITE3_STEP2(statement32); + rc=(*proxy_sqlite3_clear_bindings)(statement32); ASSERT_SQLITE_OK(rc, monitordb); + rc=(*proxy_sqlite3_reset)(statement32); ASSERT_SQLITE_OK(rc, monitordb); + } + } else { // single row + rc=(*proxy_sqlite3_bind_text)(statement1, 1, r1->fields[0], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb); + rc=(*proxy_sqlite3_bind_int64)(statement1, 2, atoi(r1->fields[1])); ASSERT_SQLITE_OK(rc, monitordb); + rc=(*proxy_sqlite3_bind_int64)(statement1, 3, atoi(r1->fields[2])); ASSERT_SQLITE_OK(rc, monitordb); + rc=(*proxy_sqlite3_bind_int64)(statement1, 4, atoi(r1->fields[3])); ASSERT_SQLITE_OK(rc, monitordb); + + SAFE_SQLITE3_STEP2(statement1); + rc=(*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, monitordb); + rc=(*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, monitordb); + } + row_idx++; + } + + (*proxy_sqlite3_finalize)(statement1); + (*proxy_sqlite3_finalize)(statement32); + } + + pthread_mutex_unlock(&this->mysql_servers_mutex); +} + void * monitor_connect_thread(void *arg) { mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; @@ -2403,13 +2468,18 @@ void * MySQL_Monitor::monitor_connect() { unsigned long long t2; unsigned long long next_loop_at=0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { + // update the 'monitor.mysql_servers' table with the latest 'mysql_servers' from 'MyHGM' + { + std::lock_guard mysql_servers_guard(MyHGM->mysql_servers_to_monitor_mutex); + update_monitor_mysql_servers(MyHGM->mysql_servers_to_monitor); + } char *error=NULL; int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; // add support for SSL - char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM mysql_servers GROUP BY hostname, port ORDER BY RANDOM()"; + char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM monitor.mysql_servers GROUP BY hostname, port ORDER BY RANDOM()"; unsigned int glover; t1=monotonic_time(); @@ -2527,13 +2597,18 @@ void * MySQL_Monitor::monitor_ping() { unsigned long long next_loop_at=0; while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { + // update the 'monitor.mysql_servers' table with the latest 'mysql_servers' from 'MyHGM' + { + std::lock_guard mysql_servers_guard(MyHGM->mysql_servers_to_monitor_mutex); + update_monitor_mysql_servers(MyHGM->mysql_servers_to_monitor); + } unsigned int glover; char *error=NULL; int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; - char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM mysql_servers WHERE status NOT LIKE 'OFFLINE\%' GROUP BY hostname, port ORDER BY RANDOM()"; + char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM monitor.mysql_servers GROUP BY hostname, port ORDER BY RANDOM()"; t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart @@ -2611,7 +2686,7 @@ __end_monitor_ping_loop: } // now it is time to shun all problematic hosts - query=(char *)"SELECT DISTINCT a.hostname, a.port FROM mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE status NOT LIKE 'OFFLINE\%' AND b.ping_error IS NOT NULL AND b.ping_error NOT LIKE 'Access denied for user\%'"; + query=(char *)"SELECT DISTINCT a.hostname, a.port FROM monitor.mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE b.ping_error IS NOT NULL AND b.ping_error NOT LIKE 'Access denied for user\%'"; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); // we disable valgrind here. Probably a bug in SQLite3 VALGRIND_DISABLE_ERROR_REPORTING; @@ -2672,7 +2747,7 @@ VALGRIND_ENABLE_ERROR_REPORTING; // now it is time to update current_lantency_ms - query=(char *)"SELECT DISTINCT a.hostname, a.port FROM mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE status NOT LIKE 'OFFLINE\%' AND b.ping_error IS NULL"; + query=(char *)"SELECT DISTINCT a.hostname, a.port FROM monitor.mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE b.ping_error IS NULL"; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); VALGRIND_DISABLE_ERROR_REPORTING; admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); From ba505284a6e53e273cacee3f7620f1f024f04b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Mon, 28 Feb 2022 17:04:09 +0100 Subject: [PATCH 2/4] Add missing 'Monitor::mysql_servers_mutex' initialization #3797 --- lib/MySQL_Monitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 72fc6673d..27aeafeb0 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -695,6 +695,7 @@ MySQL_Monitor::MySQL_Monitor() { Galera_Hosts_resultset=NULL; pthread_mutex_init(&aws_aurora_mutex,NULL); + pthread_mutex_init(&mysql_servers_mutex,NULL); AWS_Aurora_Hosts_resultset=NULL; AWS_Aurora_Hosts_resultset_checksum = 0; shutdown=false; From 9e7afd496615ea34597e790dfae64fcea9a35b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Mon, 28 Feb 2022 17:08:18 +0100 Subject: [PATCH 3/4] Access 'Monitor::mysql_servers_mutex' through 'GloMyMon' instead of 'this' #3797 --- lib/MySQL_Monitor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 27aeafeb0..9d9c55504 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -844,7 +844,7 @@ void MySQL_Monitor::check_and_build_standard_tables(SQLite3DB *db, std::vectormysql_servers_mutex); + pthread_mutex_lock(&GloMyMon->mysql_servers_mutex); if (resultset != nullptr) { int rc = 0; @@ -904,7 +904,7 @@ void MySQL_Monitor::update_monitor_mysql_servers(SQLite3_result* resultset) { (*proxy_sqlite3_finalize)(statement32); } - pthread_mutex_unlock(&this->mysql_servers_mutex); + pthread_mutex_unlock(&GloMyMon->mysql_servers_mutex); } void * monitor_connect_thread(void *arg) { From c7eda1d49d07cb5d591486d4b7167f134ac1d963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Mon, 28 Feb 2022 20:58:56 +0100 Subject: [PATCH 4/4] Add missing delete when updating 'mysql_servers_to_monitor' resulset #3797 --- lib/MySQL_HostGroups_Manager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index c9430ba1b..40f86aef8 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -2607,6 +2607,7 @@ void MySQL_HostGroups_Manager::update_table_mysql_servers_for_monitor(bool lock) proxy_error("Error on read from mysql_servers : %s\n", error); } else { if (resultset != nullptr) { + delete this->mysql_servers_to_monitor; this->mysql_servers_to_monitor = resultset; } }