Merge pull request #3798 from sysown/v2.x-3797

Closes #3797: Monitoring not working properly for AWS aurora auto-discovered servers
pull/3856/head
René Cannaò 4 years ago committed by GitHub
commit 6ea642fc55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -427,6 +427,17 @@ class MySQL_HostGroups_Manager {
public:
std::mutex galera_set_writer_mutex;
/**
* @brief Mutex used to guard 'mysql_servers_to_monitor' resulset.
*/
std::mutex mysql_servers_to_monitor_mutex;
/**
* @brief Resulset containing the latest 'mysql_servers' present in 'mydb'.
* @details This resulset should be updated via 'update_table_mysql_servers_for_monitor' each time actions
* that modify the 'mysql_servers' table are performed.
*/
SQLite3_result* mysql_servers_to_monitor;
pthread_rwlock_t gtid_rwlock;
std::unordered_map <string, GTID_Server_Data *> gtid_map;
struct ev_async * gtid_ev_async;
@ -539,6 +550,19 @@ class MySQL_HostGroups_Manager {
SQLite3_result *dump_table_mysql_group_replication_hostgroups();
SQLite3_result *dump_table_mysql_galera_hostgroups();
SQLite3_result *dump_table_mysql_aws_aurora_hostgroups();
/**
* @brief Update the public member resulset 'mysql_servers_to_monitor'. This resulset should contain the latest
* 'mysql_servers' present in 'MySQL_HostGroups_Manager' db, which are not 'OFFLINE_HARD'. The resulset
* fields match the definition of 'monitor.mysql_servers' table.
* @details Several details:
* - Function assumes that 'mysql_servers' table from 'MySQL_HostGroups_Manager' db is ready
* to be consumed, because of this it doesn't perform any of the following operations:
* - Purging 'mysql_servers' table.
* - Regenerating 'mysql_servers' table.
* - Function locks on 'mysql_servers_to_monitor_mutex'.
* @param lock When supplied the function calls 'wrlock()' and 'wrunlock()' functions for accessing the db.
*/
void update_table_mysql_servers_for_monitor(bool lock=false);
MyHGC * MyHGC_lookup(unsigned int);
void MyConn_add_to_pool(MySQL_Connection *);

@ -37,6 +37,8 @@
#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_FAILOVERS "CREATE TABLE mysql_server_aws_aurora_failovers (writer_hostgroup INT NOT NULL , hostname VARCHAR NOT NULL , inserted_at VARCHAR NOT NULL)"
#define MONITOR_SQLITE_TABLE_MYSQL_SERVERS "CREATE TABLE mysql_servers (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , status INT CHECK (status IN (0, 1, 2, 3, 4)) NOT NULL DEFAULT 0 , use_ssl INT CHECK (use_ssl IN(0,1)) NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port) )"
/*
struct cmp_str {
bool operator()(char const *a, char const *b) const
@ -271,6 +273,8 @@ class MySQL_Monitor {
pthread_mutex_t group_replication_mutex; // for simplicity, a mutex instead of a rwlock
pthread_mutex_t galera_mutex; // for simplicity, a mutex instead of a rwlock
pthread_mutex_t aws_aurora_mutex; // for simplicity, a mutex instead of a rwlock
pthread_mutex_t mysql_servers_mutex; // for simplicity, a mutex instead of a rwlock
//std::map<char *, MyGR_monitor_node *, cmp_str> Group_Replication_Hosts_Map;
std::map<std::string, MyGR_monitor_node *> Group_Replication_Hosts_Map;
SQLite3_result *Group_Replication_Hosts_resultset;
@ -318,6 +322,13 @@ class MySQL_Monitor {
void populate_monitor_mysql_server_galera_log();
void populate_monitor_mysql_server_aws_aurora_log();
void populate_monitor_mysql_server_aws_aurora_check_status();
/**
* @brief Helper function that uses the provided resulset for updating the table 'monitor.mysql_servers'.
* @details When supplying 'MySQL_HostGroups_Manager::mysql_servers_to_monitor' resulset as parameter, the
* mutex 'MySQL_HostGroups_Manager::mysql_servers_to_monitor_mutex' needs to be previously taken.
* @param SQLite3_result The resulset to be used for updating 'monitor.mysql_servers'.
*/
void update_monitor_mysql_servers(SQLite3_result*);
char * galera_find_last_node(int);
std::vector<string> * galera_find_possible_last_nodes(int);
bool server_responds_to_ping(char *address, int port);

@ -1419,6 +1419,7 @@ MySQL_HostGroups_Manager::MySQL_HostGroups_Manager() {
gtid_ev_loop=NULL;
gtid_ev_timer=NULL;
gtid_ev_async = (struct ev_async *)malloc(sizeof(struct ev_async));
mysql_servers_to_monitor = NULL;
{
static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
@ -2070,6 +2071,11 @@ bool MySQL_HostGroups_Manager::commit() {
this->status.p_counter_array[p_hg_counter::servers_table_version]->Increment();
pthread_cond_broadcast(&status.servers_table_version_cond);
pthread_mutex_unlock(&status.servers_table_version_lock);
// NOTE: In order to guarantee the latest generated version, this should be kept after all the
// calls to 'generate_mysql_servers'.
update_table_mysql_servers_for_monitor(false);
wrunlock();
unsigned long long curtime2=monotonic_time();
curtime1 = curtime1/1000;
@ -2584,6 +2590,36 @@ SQLite3_result * MySQL_HostGroups_Manager::dump_table_mysql_servers() {
return resultset;
}
void MySQL_HostGroups_Manager::update_table_mysql_servers_for_monitor(bool lock) {
if (lock) {
wrlock();
}
std::lock_guard<std::mutex> mysql_servers_lock(this->mysql_servers_to_monitor_mutex);
char* error = NULL;
int cols = 0;
int affected_rows = 0;
SQLite3_result* resultset = NULL;
char* query = const_cast<char*>("SELECT hostname, port, status, use_ssl FROM mysql_servers WHERE status != 3 GROUP BY hostname, port");
proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 4, "%s\n", query);
mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset);
if (error != nullptr) {
proxy_error("Error on read from mysql_servers : %s\n", error);
} else {
if (resultset != nullptr) {
delete this->mysql_servers_to_monitor;
this->mysql_servers_to_monitor = resultset;
}
}
if (lock) {
wrunlock();
}
}
SQLite3_result * MySQL_HostGroups_Manager::dump_table_mysql_replication_hostgroups() {
wrlock();
char *error=NULL;
@ -7045,6 +7081,9 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_reader(int _whid, int _rhid
generate_mysql_servers_table(&_rhid);
free(query);
}
// NOTE: Because 'commit' isn't called, we are required to update 'mysql_servers_for_monitor'.
// Also note that 'generate_mysql_servers' is previously called.
update_table_mysql_servers_for_monitor(false);
wrunlock();
// it is now time to build a new structure in Monitor
pthread_mutex_lock(&AWS_Aurora_Info_mutex);

@ -695,6 +695,7 @@ MySQL_Monitor::MySQL_Monitor() {
Galera_Hosts_resultset=NULL;
pthread_mutex_init(&aws_aurora_mutex,NULL);
pthread_mutex_init(&mysql_servers_mutex,NULL);
AWS_Aurora_Hosts_resultset=NULL;
AWS_Aurora_Hosts_resultset_checksum = 0;
shutdown=false;
@ -719,6 +720,7 @@ MySQL_Monitor::MySQL_Monitor() {
insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG);
insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_check_status", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_CHECK_STATUS);
insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_failovers", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_FAILOVERS);
insert_into_tables_defs(tables_defs_monitor,"mysql_servers", MONITOR_SQLITE_TABLE_MYSQL_SERVERS);
// create monitoring tables
check_and_build_standard_tables(monitordb, tables_defs_monitor);
monitordb->execute("CREATE INDEX IF NOT EXISTS idx_connect_log_time_start ON mysql_server_connect_log (time_start_us)");
@ -841,6 +843,70 @@ void MySQL_Monitor::check_and_build_standard_tables(SQLite3DB *db, std::vector<t
db->execute("PRAGMA foreign_keys = ON");
};
void MySQL_Monitor::update_monitor_mysql_servers(SQLite3_result* resultset) {
pthread_mutex_lock(&GloMyMon->mysql_servers_mutex);
if (resultset != nullptr) {
int rc = 0;
monitordb->execute("DELETE FROM mysql_servers");
sqlite3_stmt *statement1=NULL;
sqlite3_stmt *statement32=NULL;
char* query1 = const_cast<char*>("INSERT INTO mysql_servers VALUES (?1,?2,?3,?4)");
char* query32 = const_cast<char*>(
"INSERT INTO mysql_servers VALUES (?1,?2,?3,?4),(?5,?6,?7,?8),(?9,?10,?11,?12),(?13,?14,?15,?16),(?17,?18,?19,?20),(?21,?22,?23,?24),"
"(?25,?26,?27,?28),(?29,?30,?31,?32),(?33,?34,?35,?36),(?37,?38,?39,?40),(?41,?42,?43,?44),(?45,?46,?47,?48),(?49,?50,?51,?52),"
"(?53,?54,?55,?56),(?57,?58,?59,?60),(?61,?62,?63,?64),(?65,?66,?67,?68),(?69,?70,?71,?72),(?73,?74,?75,?76),(?77,?78,?79,?80),"
"(?81,?82,?83,?84),(?85,?86,?87,?88),(?89,?90,?91,?92),(?93,?94,?95,?96),(?97,?98,?99,?100),(?101,?102,?103,?104),(?105,?106,?107,?108),"
"(?109,?110,?111,?112),(?113,?114,?115,?116),(?117,?118,?119,?120),(?121,?122,?123,?124),(?125,?126,?127,?128)"
);
rc = monitordb->prepare_v2(query1, &statement1);
ASSERT_SQLITE_OK(rc, monitordb);
rc = monitordb->prepare_v2(query32, &statement32);
ASSERT_SQLITE_OK(rc, monitordb);
int row_idx=0;
int max_bulk_row_idx=resultset->rows_count/32;
max_bulk_row_idx=max_bulk_row_idx*32;
for (std::vector<SQLite3_row *>::iterator it = resultset->rows.begin(); it != resultset->rows.end(); ++it) {
SQLite3_row *r1=*it;
int idx=row_idx%32;
if (row_idx < max_bulk_row_idx) { // bulk
rc=(*proxy_sqlite3_bind_text)(statement32, (idx*4)+1, r1->fields[0], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb);
rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+2, atoi(r1->fields[1])); ASSERT_SQLITE_OK(rc, monitordb);
rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+3, atoi(r1->fields[2])); ASSERT_SQLITE_OK(rc, monitordb);
rc=(*proxy_sqlite3_bind_int64)(statement32, (idx*4)+4, atoi(r1->fields[3])); ASSERT_SQLITE_OK(rc, monitordb);
if (idx==31) {
SAFE_SQLITE3_STEP2(statement32);
rc=(*proxy_sqlite3_clear_bindings)(statement32); ASSERT_SQLITE_OK(rc, monitordb);
rc=(*proxy_sqlite3_reset)(statement32); ASSERT_SQLITE_OK(rc, monitordb);
}
} else { // single row
rc=(*proxy_sqlite3_bind_text)(statement1, 1, r1->fields[0], -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, monitordb);
rc=(*proxy_sqlite3_bind_int64)(statement1, 2, atoi(r1->fields[1])); ASSERT_SQLITE_OK(rc, monitordb);
rc=(*proxy_sqlite3_bind_int64)(statement1, 3, atoi(r1->fields[2])); ASSERT_SQLITE_OK(rc, monitordb);
rc=(*proxy_sqlite3_bind_int64)(statement1, 4, atoi(r1->fields[3])); ASSERT_SQLITE_OK(rc, monitordb);
SAFE_SQLITE3_STEP2(statement1);
rc=(*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, monitordb);
rc=(*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, monitordb);
}
row_idx++;
}
(*proxy_sqlite3_finalize)(statement1);
(*proxy_sqlite3_finalize)(statement32);
}
pthread_mutex_unlock(&GloMyMon->mysql_servers_mutex);
}
void * monitor_connect_thread(void *arg) {
mysql_close(mysql_init(NULL));
MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg;
@ -2403,13 +2469,18 @@ void * MySQL_Monitor::monitor_connect() {
unsigned long long t2;
unsigned long long next_loop_at=0;
while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) {
// update the 'monitor.mysql_servers' table with the latest 'mysql_servers' from 'MyHGM'
{
std::lock_guard<std::mutex> mysql_servers_guard(MyHGM->mysql_servers_to_monitor_mutex);
update_monitor_mysql_servers(MyHGM->mysql_servers_to_monitor);
}
char *error=NULL;
int cols=0;
int affected_rows=0;
SQLite3_result *resultset=NULL;
// add support for SSL
char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM mysql_servers GROUP BY hostname, port ORDER BY RANDOM()";
char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM monitor.mysql_servers GROUP BY hostname, port ORDER BY RANDOM()";
unsigned int glover;
t1=monotonic_time();
@ -2527,13 +2598,18 @@ void * MySQL_Monitor::monitor_ping() {
unsigned long long next_loop_at=0;
while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) {
// update the 'monitor.mysql_servers' table with the latest 'mysql_servers' from 'MyHGM'
{
std::lock_guard<std::mutex> mysql_servers_guard(MyHGM->mysql_servers_to_monitor_mutex);
update_monitor_mysql_servers(MyHGM->mysql_servers_to_monitor);
}
unsigned int glover;
char *error=NULL;
int cols=0;
int affected_rows=0;
SQLite3_result *resultset=NULL;
char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM mysql_servers WHERE status NOT LIKE 'OFFLINE\%' GROUP BY hostname, port ORDER BY RANDOM()";
char *query=(char *)"SELECT hostname, port, MAX(use_ssl) use_ssl FROM monitor.mysql_servers GROUP BY hostname, port ORDER BY RANDOM()";
t1=monotonic_time();
if (!GloMTH) return NULL; // quick exit during shutdown/restart
@ -2611,7 +2687,7 @@ __end_monitor_ping_loop:
}
// now it is time to shun all problematic hosts
query=(char *)"SELECT DISTINCT a.hostname, a.port FROM mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE status NOT LIKE 'OFFLINE\%' AND b.ping_error IS NOT NULL AND b.ping_error NOT LIKE 'Access denied for user\%'";
query=(char *)"SELECT DISTINCT a.hostname, a.port FROM monitor.mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE b.ping_error IS NOT NULL AND b.ping_error NOT LIKE 'Access denied for user\%'";
proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query);
// we disable valgrind here. Probably a bug in SQLite3
VALGRIND_DISABLE_ERROR_REPORTING;
@ -2672,7 +2748,7 @@ VALGRIND_ENABLE_ERROR_REPORTING;
// now it is time to update current_lantency_ms
query=(char *)"SELECT DISTINCT a.hostname, a.port FROM mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE status NOT LIKE 'OFFLINE\%' AND b.ping_error IS NULL";
query=(char *)"SELECT DISTINCT a.hostname, a.port FROM monitor.mysql_servers a JOIN monitor.mysql_server_ping_log b ON a.hostname=b.hostname WHERE b.ping_error IS NULL";
proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query);
VALGRIND_DISABLE_ERROR_REPORTING;
admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset);

Loading…
Cancel
Save