From a35917c3463a7de7cee63bf6d946b9481b54f856 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Mon, 11 Feb 2019 11:16:15 +1100 Subject: [PATCH 01/20] First commit (not complete) to support Aurora --- include/MySQL_HostGroups_Manager.h | 30 ++ include/MySQL_Monitor.hpp | 66 +++++ lib/MySQL_HostGroups_Manager.cpp | 176 +++++++++++ lib/MySQL_Monitor.cpp | 457 +++++++++++++++++++++++++++++ lib/ProxySQL_Admin.cpp | 144 +++++++++ 5 files changed, 873 insertions(+) diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index cb94a90a0..f75c235a6 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -36,6 +36,9 @@ #define MYHGM_MYSQL_GALERA_HOSTGROUPS "CREATE TABLE mysql_galera_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , backup_writer_hostgroup INT CHECK (backup_writer_hostgroup>=0 AND backup_writer_hostgroup<>writer_hostgroup) NOT NULL , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND backup_writer_hostgroup<>reader_hostgroup AND reader_hostgroup>0) , offline_hostgroup INT NOT NULL CHECK (offline_hostgroup<>writer_hostgroup AND offline_hostgroup<>reader_hostgroup AND backup_writer_hostgroup<>offline_hostgroup AND offline_hostgroup>=0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_writers INT NOT NULL CHECK (max_writers >= 0) DEFAULT 1 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1,2)) NOT NULL DEFAULT 0 , max_transactions_behind INT CHECK (max_transactions_behind>=0) NOT NULL DEFAULT 0 , comment VARCHAR , UNIQUE (reader_hostgroup) , UNIQUE (offline_hostgroup) , UNIQUE (backup_writer_hostgroup))" +#define MYHGM_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 1000 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 500 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 500 AND check_timeout_ms <= 3000) DEFAULT 1000 , comment VARCHAR , UNIQUE (reader_hostgroup))" + + typedef std::unordered_map umap_mysql_errors; class MySrvConnList; @@ -427,6 +430,23 @@ class Galera_Info { ~Galera_Info(); }; +class AWS_Aurora_Info { + public: + int writer_hostgroup; + int reader_hostgroup; + int max_lag_ms; + int check_interval_ms; + int check_timeout_ms; + // TODO + // add intermediary status value, for example the last check time + char * comment; + bool active; + bool __active; + AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, char *c); + bool update(int r, int ml, int ci, int ct, bool _a, char *c); + ~AWS_Aurora_Info(); +}; + class MySQL_HostGroups_Manager { private: SQLite3DB *admindb; @@ -460,6 +480,12 @@ class MySQL_HostGroups_Manager { pthread_mutex_t Galera_Info_mutex; std::map Galera_Info_Map; + void generate_mysql_aws_aurora_hostgroups_table(); + SQLite3_result *incoming_aws_aurora_hostgroups; + + pthread_mutex_t AWS_Aurora_Info_mutex; + std::map AWS_Aurora_Info_Map; + std::thread *HGCU_thread; std::thread *GTID_syncer_thread; @@ -523,11 +549,13 @@ class MySQL_HostGroups_Manager { void set_incoming_replication_hostgroups(SQLite3_result *); void set_incoming_group_replication_hostgroups(SQLite3_result *); void set_incoming_galera_hostgroups(SQLite3_result *); + void set_incoming_aws_aurora_hostgroups(SQLite3_result *); SQLite3_result * execute_query(char *query, char **error); SQLite3_result *dump_table_mysql_servers(); SQLite3_result *dump_table_mysql_replication_hostgroups(); SQLite3_result *dump_table_mysql_group_replication_hostgroups(); SQLite3_result *dump_table_mysql_galera_hostgroups(); + SQLite3_result *dump_table_mysql_aws_aurora_hostgroups(); MyHGC * MyHGC_lookup(unsigned int); void MyConn_add_to_pool(MySQL_Connection *); @@ -560,6 +588,8 @@ class MySQL_HostGroups_Manager { void update_galera_set_writer(char *_hostname, int _port, int _writer_hostgroup); void converge_galera_config(int _writer_hostgroup); + // FIXME : add action functions for AWS Aurora + SQLite3_result * get_stats_mysql_gtid_executed(); void generate_mysql_gtid_executed_tables(); bool gtid_exists(MySrvC *mysrvc, char * gtid_uuid, uint64_t gtid_trxid); diff --git a/include/MySQL_Monitor.hpp b/include/MySQL_Monitor.hpp index b40a2a42a..a2b0f0c30 100644 --- a/include/MySQL_Monitor.hpp +++ b/include/MySQL_Monitor.hpp @@ -23,6 +23,10 @@ //#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_GALERA_LOG "CREATE TABLE mysql_server_galera_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , viable_candidate VARCHAR NOT NULL DEFAULT 'NO' , read_only VARCHAR NOT NULL DEFAULT 'YES' , transactions_behind INT DEFAULT 0 , error VARCHAR , PRIMARY KEY (hostname, port, time_start_us))" #define MONITOR_SQLITE_TABLE_MYSQL_SERVER_GALERA_LOG "CREATE TABLE mysql_server_galera_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , primary_partition VARCHAR NOT NULL DEFAULT 'NO' , read_only VARCHAR NOT NULL DEFAULT 'YES' , wsrep_local_recv_queue INT DEFAULT 0 , wsrep_local_state INT DEFAULT 0 , wsrep_desync VARCHAR NOT NULL DEFAULT 'NO' , wsrep_reject_queries VARCHAR NOT NULL DEFAULT 'NO' , wsrep_sst_donor_rejects_queries VARCHAR NOT NULL DEFAULT 'NO' , error VARCHAR , PRIMARY KEY (hostname, port, time_start_us))" +#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG "CREATE TABLE mysql_server_aws_aurora_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , error VARCHAR , SERVER_ID VARCHAR NOT NULL DEFAULT '' , SESSION_ID VARCHAR NOT NULL DEFAULT '' , LAST_UPDATE_TIMESTAMP VARCHAR NOT NULL DEFAULT '' , replica_lag_in_microseconds INT NOT NULL DEFAULT 0 , CPU INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port, time_start_us, SERVER_ID))" + +//primary_partition VARCHAR NOT NULL DEFAULT 'NO' , read_only VARCHAR NOT NULL DEFAULT 'YES' , wsrep_local_recv_queue INT DEFAULT 0 , wsrep_local_state INT DEFAULT 0 , wsrep_desync VARCHAR NOT NULL DEFAULT 'NO' , wsrep_reject_queries VARCHAR NOT NULL DEFAULT 'NO' , wsrep_sst_donor_rejects_queries VARCHAR NOT NULL DEFAULT 'NO' , error VARCHAR , PRIMARY KEY (hostname, port, time_start_us))" + /* struct cmp_str { bool operator()(char const *a, char const *b) const @@ -34,6 +38,60 @@ struct cmp_str { #define MyGR_Nentries 100 #define Galera_Nentries 100 +#define AWS_Aurora_Nentries 100 + +/* + +Implementation of monitoring in AWS Aurora will be different than previous modules + +AWS_Aurora_replica_host_status_entry represents a single row returned from AWS_Aurora_replica_host_status_entry + +AWS_Aurora_status_entry represents a single check executed against a single Aurora node. +AWS_Aurora_status_entry can contain several AWS_Aurora_replica_host_status_entry + +AWS_Aurora_monitor_node represents a single Aurora node where checks are executed. +A single AWS_Aurora_monitor_node will have a AWS_Aurora_status_entry per check. + +*/ + +class AWS_Aurora_replica_host_status_entry { + public: + char * server_id; + char * session_id; + uint32_t last_update_timestamp; + float replica_lag_ms; // originally a double + float cpu; + AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, uint32_t lut, float rlm, float _c); + ~AWS_Aurora_replica_host_status_entry(); +}; + +class AWS_Aurora_status_entry { + public: + unsigned long long start_time; + unsigned long long check_time; + char *error; + std::vector * host_statuses; + AWS_Aurora_status_entry(unsigned long long st, unsigned long long ct, char *e); + void add_host_status(AWS_Aurora_replica_host_status_entry *hs); + ~AWS_Aurora_status_entry(); +}; + +class AWS_Aurora_monitor_node { + private: + int idx_last_entry; + public: + char *addr; + int port; + unsigned int writer_hostgroup; + AWS_Aurora_status_entry last_entries[AWS_Aurora_Nentries]; + AWS_Aurora_monitor_node(char *_a, int _p, int _whg); + ~AWS_Aurora_monitor_node(); + bool add_entry(AWS_Aurora_status_entry *ase); // return true if status changed + AWS_Aurora_status_entry *last_entry() { + if (idx_last_entry == -1) return NULL; + return (&last_entries[idx_last_entry]); + } +}; typedef struct _Galera_status_entry_t { unsigned long long start_time; @@ -115,6 +173,8 @@ class MySQL_Monitor_State_Data { int writer_hostgroup; // used only by group replication bool writer_is_also_reader; // used only by group replication int max_transactions_behind; // used only by group replication + int aws_aurora_max_lag_ms; + int aws_aurora_check_timeout_ms; bool use_ssl; MYSQL *mysql; MYSQL_RES *result; @@ -153,11 +213,14 @@ class MySQL_Monitor { public: pthread_mutex_t group_replication_mutex; // for simplicity, a mutex instead of a rwlock pthread_mutex_t galera_mutex; // for simplicity, a mutex instead of a rwlock + pthread_mutex_t aws_aurora_mutex; // for simplicity, a mutex instead of a rwlock //std::map Group_Replication_Hosts_Map; std::map Group_Replication_Hosts_Map; SQLite3_result *Group_Replication_Hosts_resultset; std::map Galera_Hosts_Map; SQLite3_result *Galera_Hosts_resultset; + std::map AWS_Aurora_Hosts_Map; + SQLite3_result *AWS_Aurora_Hosts_resultset; unsigned int num_threads; unsigned int aux_threads; unsigned int started_threads; @@ -183,13 +246,16 @@ class MySQL_Monitor { void * monitor_read_only(); void * monitor_group_replication(); void * monitor_galera(); + void * monitor_aws_aurora(); void * monitor_replication_lag(); void * run(); void populate_monitor_mysql_server_group_replication_log(); void populate_monitor_mysql_server_galera_log(); + void populate_monitor_mysql_server_aws_aurora_log(); char * galera_find_last_node(int); std::vector * galera_find_possible_last_nodes(int); bool server_responds_to_ping(char *address, int port); + // FIXME : add AWS Aurora actions }; #endif /* __CLASS_MYSQL_MONITOR_H */ diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index c63ad7264..f18833616 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -2063,6 +2063,19 @@ SQLite3_result * MySQL_HostGroups_Manager::dump_table_mysql_galera_hostgroups() return resultset; } +SQLite3_result * MySQL_HostGroups_Manager::dump_table_mysql_aws_aurora_hostgroups() { + wrlock(); + char *error=NULL; + int cols=0; + int affected_rows=0; + SQLite3_result *resultset=NULL; + char *query=(char *)"SELECT writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,comment FROM mysql_aws_aurora_hostgroups"; + proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 4, "%s\n", query); + mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); + wrunlock(); + return resultset; +} + MyHGC * MySQL_HostGroups_Manager::MyHGC_create(unsigned int _hid) { MyHGC *myhgc=new MyHGC(_hid); return myhgc; @@ -2636,6 +2649,14 @@ void MySQL_HostGroups_Manager::set_incoming_galera_hostgroups(SQLite3_result *s) incoming_galera_hostgroups=s; } +void MySQL_HostGroups_Manager::set_incoming_aws_aurora_hostgroups(SQLite3_result *s) { + if (incoming_aws_aurora_hostgroups) { + delete incoming_aws_aurora_hostgroups; + incoming_aws_aurora_hostgroups = NULL; + } + incoming_aws_aurora_hostgroups=s; +} + SQLite3_result * MySQL_HostGroups_Manager::SQL3_Connection_Pool(bool _reset) { const int colnum=14; proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 4, "Dumping Connection Pool\n"); @@ -4682,3 +4703,158 @@ SQLite3_result * MySQL_HostGroups_Manager::get_mysql_errors(bool reset) { pthread_mutex_unlock(&mysql_errors_mutex); return result; } + +AWS_Aurora_Info::AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, char *c) { + comment=NULL; + if (c) { + comment=strdup(c); + } + writer_hostgroup=w; + reader_hostgroup=r; + max_lag_ms=ml; + check_interval_ms=ci; + check_timeout_ms=ct; + active=_a; + __active=true; + need_converge=true; +} + +AWS_Aurora_Info::~AWS_Aurora_Info() { + if (comment) { + free(comment); + comment=NULL; + } +} + +bool AWS_Aurora_Info::update(int r, int ml, int ci, int ct, bool _a, char *c) { + bool ret=false; + __active=true; + if (reader_hostgroup!=r) { + reader_hostgroup=r; + ret=true; + } + if (max_lag_ms!=ml) { + max_lag_ms=ml; + ret=true; + } + if (check_interval_ms!=ci) { + check_interval_ms=ci; + ret=true; + } + if (check_timeout_ms!=ct) { + check_timeout_ms=ct; + ret=true; + } + if (active!=_a) { + active=_a; + ret=true; + } + // for comment we don't change return value + if (comment) { + if (c) { + if (strcmp(comment,c)) { + free(comment); + comment=strdup(c); + } + } else { + free(comment); + comment=NULL; + } + } else { + if (c) { + comment=strdup(c); + } + } + return ret; +} + +void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { + if (incoming_aws_aurora_hostgroups==NULL) { + return; + } + int rc; + sqlite3_stmt *statement=NULL; + sqlite3 *mydb3=mydb->get_db(); + char *query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"; + rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); + assert(rc==SQLITE_OK); + proxy_info("New mysql_aws_aurora_hostgroups table\n"); + pthread_mutex_lock(&AWS_Aurora_Info_mutex); + for (std::map::iterator it1 = AWS_Aurora_Info_Map.begin() ; it1 != AWS_Aurora_Info_Map.end(); ++it1) { + AWS_Aurora_Info *info=NULL; + info=it1->second; + info->__active=false; + } + for (std::vector::iterator it = incoming_aws_aurora_hostgroups->rows.begin() ; it != incoming_aws_aurora_hostgroups->rows.end(); ++it) { + SQLite3_row *r=*it; + int writer_hostgroup=atoi(r->fields[0]); + int reader_hostgroup=atoi(r->fields[1]); + int active=atoi(r->fields[2]); + int max_lag_ms = atoi(r->fields[3]); + int check_interval_ms = atoi(r->fields[4]); + int check_timeout_ms = atoi(r->fields[5]); + proxy_info("Loading AWS Aurora info for (%d,%d,%s,%d,%d,%d,\"%s\")\n", writer_hostgroup,reader_hostgroup,(active ? "on" : "off"),max_lag_ms,check_interval_ms,check_timeout_ms,r->fields[8]); + rc=sqlite3_bind_int64(statement, 1, writer_hostgroup); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 2, reader_hostgroup); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 3, active); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 4, max_lag_ms); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 5, check_interval_ms); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 6, check_timeout_ms); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement, 7, r->fields[6], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + + SAFE_SQLITE3_STEP2(statement); + rc=sqlite3_clear_bindings(statement); assert(rc==SQLITE_OK); + rc=sqlite3_reset(statement); assert(rc==SQLITE_OK); + std::map::iterator it2; + it2 = AWS_Aurora_Info_Map.find(writer_hostgroup); + AWS_Aurora_Info *info=NULL; + if (it2!=AWS_Aurora_Info_Map.end()) { + info=it2->second; + bool changed=false; + changed=info->update(reader_hostgroup, max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, r->fields[6]); + if (changed) { + //info->need_converge=true; + } + } else { + info=new AWS_Aurora_Info(writer_hostgroup, reader_hostgroup, max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, r->fields[6]); + //info->need_converge=true; + AWS_Aurora_Info_Map.insert(AWS_Aurora_Info_Map.begin(), std::pair(writer_hostgroup,info)); + } + } + sqlite3_finalize(statement); + delete incoming_aws_aurora_hostgroups; + incoming_aws_aurora_hostgroups=NULL; + + // remove missing ones + for (auto it3 = AWS_Aurora_Info_Map.begin(); it3 != AWS_Aurora_Info_Map.end(); ) { + AWS_Aurora_Info *info=it3->second; + if (info->__active==false) { + delete info; + it3 = AWS_Aurora_Info_Map.erase(it3); + } else { + it3++; + } + } + // TODO: it is now time to compute all the changes + + + // it is now time to build a new structure in Monitor + pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); + { + char *error=NULL; + int cols=0; + int affected_rows=0; + SQLite3_result *resultset=NULL; + char *query=(char *)"SELECT writer_hostgroup, hostname, port, MAX(use_ssl) use_ssl , writer_is_also_reader , max_lag_ms , check_interval_ms , check_timeout_ms FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE status NOT IN (2,3) GROUP BY hostname, port"; + mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); + if (resultset) { + if (GloMyMon->AWS_Aurora_Hosts_resultset) { + delete GloMyMon->AWS_Aurora_Hosts_resultset; + } + GloMyMon->AWS_Aurora_Hosts_resultset=resultset; + } + } + pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); + + pthread_mutex_unlock(&AWS_Aurora_Info_mutex); +} diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 9c4575d5e..23176ab1d 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -319,6 +319,9 @@ MySQL_Monitor::MySQL_Monitor() { pthread_mutex_init(&galera_mutex,NULL); Galera_Hosts_resultset=NULL; + pthread_mutex_init(&aws_aurora_mutex,NULL); + AWS_Aurora_Hosts_resultset=NULL; + shutdown=false; monitor_enabled=true; // default // create new SQLite datatabase @@ -338,6 +341,7 @@ MySQL_Monitor::MySQL_Monitor() { insert_into_tables_defs(tables_defs_monitor,"mysql_server_replication_lag_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_REPLICATION_LAG_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_group_replication_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_GROUP_REPLICATION_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_galera_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_GALERA_LOG); + insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG); // create monitoring tables check_and_build_standard_tables(monitordb, tables_defs_monitor); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_connect_log_time_start ON mysql_server_connect_log (time_start_us)"); @@ -346,6 +350,7 @@ MySQL_Monitor::MySQL_Monitor() { monitordb->execute("CREATE INDEX IF NOT EXISTS idx_replication_lag_log_time_start ON mysql_server_replication_lag_log (time_start_us)"); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_group_replication_log_time_start ON mysql_server_group_replication_log (time_start_us)"); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_galera_log_time_start ON mysql_server_galera_log (time_start_us)"); + monitordb->execute("CREATE INDEX IF NOT EXISTS idx_aws_aurora_log_time_start ON mysql_server_aws_aurora_log (time_start_us)"); num_threads=2; aux_threads=0; @@ -388,6 +393,10 @@ MySQL_Monitor::~MySQL_Monitor() { delete Galera_Hosts_resultset; Galera_Hosts_resultset=NULL; } + if (AWS_Aurora_Hosts_resultset) { + delete AWS_Aurora_Hosts_resultset; + AWS_Aurora_Hosts_resultset=NULL; + } }; @@ -2647,6 +2656,47 @@ bool MyGR_monitor_node::add_entry(unsigned long long _st, unsigned long long _ct return ret; } + +AWS_Aurora_replica_host_status_entry::AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, uint32_t lut, float rlm, float _c) { + server_id = strdup(serid); + session_id = strdup(sessid); + last_update_timestamp = lut; + replica_lag_ms = rlm; + cpu = _c; +} + +AWS_Aurora_replica_host_status_entry::~AWS_Aurora_replica_host_status_entry() { + free(server_id); + free(session_id); +} + +AWS_Aurora_status_entry::AWS_Aurora_status_entry(unsigned long long st, unsigned long long ct, char *e) { + start_time = st; + check_time = ct; + error = NULL; + if (e) { + error = strdup(e); + } + host_statuses = new std::vector; +} + +AWS_Aurora_status_entry::~AWS_Aurora_status_entry() { + if (error) { + free(error); + } + AWS_Aurora_replica_host_status_entry *entry; + for (std::vector::iterator it = host_statuses->begin(); it != host_statuses->end(); ++it) { + entry=*it; + delete entry; + } + host_statuses->clear(); + delete host_statuses; +} + +void AWS_Aurora_status_entry::add_host_status(AWS_Aurora_replica_host_status_entry *hs) { + host_statuses->push_back(hs); +} + Galera_monitor_node::Galera_monitor_node(char *_a, int _p, int _whg) { addr=NULL; if (_a) { @@ -2885,3 +2935,410 @@ std::vector * MySQL_Monitor::galera_find_possible_last_nodes(int writer_ pthread_mutex_unlock(&GloMyMon->galera_mutex); return result; } + +void MySQL_Monitor::populate_monitor_mysql_server_aws_aurora_log() { + sqlite3 *mondb=monitordb->get_db(); + int rc; + //char *query=NULL; + char *query1=NULL; + query1=(char *)"INSERT INTO populate_monitor_mysql_server_aws_aurora_log VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)"; + sqlite3_stmt *statement1=NULL; + pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); + rc=sqlite3_prepare_v2(mondb, query1, -1, &statement1, 0); + assert(rc==SQLITE_OK); + monitordb->execute((char *)"DELETE FROM mysql_server_aws_aurora_log"); + std::map::iterator it2; + AWS_Aurora_monitor_node *node=NULL; + for (it2=GloMyMon->AWS_Aurora_Hosts_Map.begin(); it2!=GloMyMon->AWS_Aurora_Hosts_Map.end(); ++it2) { + std::string s=it2->first; + node=it2->second; + std::size_t found=s.find_last_of(":"); + std::string host=s.substr(0,found); + std::string port=s.substr(found+1); + int i; + for (i=0; ilast_entries[i]; + if (aase->start_time) { + for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { + AWS_Aurora_replica_host_status_entry *hse = *it3; + if (hse) { + rc=sqlite3_bind_text(statement1, 1, host.c_str(), -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 2, atoi(port.c_str())); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 3, aase->start_time ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 4, aase->check_time ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 5, aase->error , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 6, hse->server_id , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 7, hse->session_id , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 8, hse->last_update_timestamp ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_double(statement1, 9, hse->replica_lag_ms ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_double(statement1, 10, hse->cpu ); assert(rc==SQLITE_OK); + SAFE_SQLITE3_STEP2(statement1); + rc=sqlite3_clear_bindings(statement1); assert(rc==SQLITE_OK); + rc=sqlite3_reset(statement1); assert(rc==SQLITE_OK); + } + } + } + } + } + pthread_mutex_unlock(&GloMyMon->galera_mutex); +} + +AWS_Aurora_monitor_node::AWS_Aurora_monitor_node(char *_a, int _p, int _whg) { + addr=NULL; + if (_a) { + addr=strdup(_a); + } + port=_p; + idx_last_entry=-1; + writer_hostgroup=_whg; + int i; + for (i=0;icurtime=monotonic_time(); + MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); + mysql_thr->refresh_variables(); + if (!GloMTH) return NULL; // quick exit during shutdown/restart + + unsigned long long t1; + unsigned long long t2; + unsigned long long next_loop_at=0; + + while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { + + unsigned int glover; + t1=monotonic_time(); + + if (!GloMTH) return NULL; // quick exit during shutdown/restart + glover=GloMTH->get_global_version(); + if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { + MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; + mysql_thr->refresh_variables(); + next_loop_at=0; + } + + + if (t1 < next_loop_at) { + goto __sleep_monitor_aws_aurora; + } + next_loop_at=t1+1000*mysql_thread___monitor_galera_healthcheck_interval; + pthread_mutex_lock(&aws_aurora_mutex); + if (Galera_Hosts_resultset==NULL) { + goto __end_monitor_aws_aurora_loop; + } else { + if (Galera_Hosts_resultset->rows_count==0) { + goto __end_monitor_aws_aurora_loop; + } + int us=100; + if (Galera_Hosts_resultset->rows_count) { + us=mysql_thread___monitor_read_only_interval/2/Galera_Hosts_resultset->rows_count; + } + for (std::vector::iterator it = Galera_Hosts_resultset->rows.begin() ; it != Galera_Hosts_resultset->rows.end(); ++it) { + SQLite3_row *r=*it; + bool rc_ping = true; + rc_ping = server_responds_to_ping(r->fields[1],atoi(r->fields[2])); + if (rc_ping) { // only if server is responding to pings + MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[1],atoi(r->fields[2]), NULL, atoi(r->fields[3])); + mmsd->writer_hostgroup=atoi(r->fields[0]); + mmsd->writer_is_also_reader=atoi(r->fields[4]); + mmsd->max_transactions_behind=atoi(r->fields[5]); + mmsd->mondb=monitordb; + WorkItem* item; + item=new WorkItem(mmsd,monitor_aws_aurora_thread); + GloMyMon->queue.add(item); + usleep(us); + } + if (GloMyMon->shutdown) { + pthread_mutex_unlock(&galera_mutex); + return NULL; + } + } + } + +__end_monitor_aws_aurora_loop: + pthread_mutex_unlock(&aws_aurora_mutex); + if (mysql_thread___monitor_enabled==true) { + } + + +__sleep_monitor_aws_aurora: + t2=monotonic_time(); + if (t2 500000) { + st = 500000; + } + usleep(st); + } + } + if (mysql_thr) { + delete mysql_thr; + mysql_thr=NULL; + } + for (unsigned int i=0;iqueue.add(item); + } + return NULL; +} + +void * monitor_AWS_Aurora_thread(void *arg) { +// FIXME: still referring to GALERA and not AURORA + MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; + MySQL_Thread * mysql_thr = new MySQL_Thread(); + mysql_thr->curtime=monotonic_time(); + mysql_thr->refresh_variables(); + if (!GloMTH) return NULL; // quick exit during shutdown/restart + + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + unsigned long long start_time=mysql_thr->curtime; + + + mmsd->t1=start_time; + + bool crc=false; + if (mmsd->mysql==NULL) { // we don't have a connection, let's create it + bool rc; + rc=mmsd->create_new_connection(); + crc=true; + if (rc==false) { + unsigned long long now=monotonic_time(); + char * new_error = (char *)malloc(50+strlen(mmsd->mysql_error_msg)); + sprintf(new_error,"timeout or error in creating new connection: %s",mmsd->mysql_error_msg); + free(mmsd->mysql_error_msg); + mmsd->mysql_error_msg = new_error; + proxy_error("Error on AWS Aurora check for %s:%d after %lldms. Unable to create a connection. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Error: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, new_error); + goto __exit_monitor_aws_aurora_thread; + } + } + + mmsd->t1=monotonic_time(); + mmsd->interr=0; // reset the value + { + char *sv = mmsd->mysql->server_version; + if (strncmp(sv,(char *)"5.7",3)==0 || strncmp(sv,(char *)"8",1)==0) { + // the backend is either MySQL 5.7 or MySQL 8 : INFORMATION_SCHEMA.GLOBAL_STATUS is deprecated + mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT (SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_LOCAL_STATE') wsrep_local_state, @@read_only read_only, (SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_LOCAL_RECV_QUEUE') wsrep_local_recv_queue , @@wsrep_desync wsrep_desync, @@wsrep_reject_queries wsrep_reject_queries, @@wsrep_sst_donor_rejects_queries wsrep_sst_donor_rejects_queries, (SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_CLUSTER_STATUS') wsrep_cluster_status"); + } else { + // any other version + mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_LOCAL_STATE') wsrep_local_state, @@read_only read_only, (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_LOCAL_RECV_QUEUE') wsrep_local_recv_queue , @@wsrep_desync wsrep_desync, @@wsrep_reject_queries wsrep_reject_queries, @@wsrep_sst_donor_rejects_queries wsrep_sst_donor_rejects_queries, (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_CLUSTER_STATUS') wsrep_cluster_status"); + } + } + while (mmsd->async_exit_status) { + mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); + unsigned long long now=monotonic_time(); + if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { + mmsd->mysql_error_msg=strdup("timeout check"); + proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Assuming wsrep_cluster_status is NOT Primary\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); + goto __exit_monitor_aws_aurora_thread; + } + if (GloMyMon->shutdown==true) { + goto __fast_exit_monitor_aws_aurora_thread; // exit immediately + } + if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { + mmsd->async_exit_status=mysql_query_cont(&mmsd->interr, mmsd->mysql, mmsd->async_exit_status); + } + } + mmsd->async_exit_status=mysql_store_result_start(&mmsd->result,mmsd->mysql); + while (mmsd->async_exit_status) { + mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); + unsigned long long now=monotonic_time(); + if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { + mmsd->mysql_error_msg=strdup("timeout check"); + proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Assuming wsrep_local_state is NOT 4 and read_only=YES\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); + goto __exit_monitor_aws_aurora_thread; + } + if (GloMyMon->shutdown==true) { + goto __fast_exit_monitor_aws_aurora_thread; // exit immediately + } + if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { + mmsd->async_exit_status=mysql_store_result_cont(&mmsd->result, mmsd->mysql, mmsd->async_exit_status); + } + } + if (mmsd->interr) { // ping failed + mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); + } + +__exit_monitor_aws_aurora_thread: + mmsd->t2=monotonic_time(); + { + // TODO : complete this + char buf[128]; + char *s=NULL; + int l=strlen(mmsd->hostname); + if (l<110) { + s=buf; + } else { + s=(char *)malloc(l+16); + } + sprintf(s,"%s:%d",mmsd->hostname,mmsd->port); + bool primary_partition = false; + bool read_only=true; + bool wsrep_desync = true; + int wsrep_local_state = 0; + bool wsrep_reject_queries = true; + bool wsrep_sst_donor_rejects_queries = true; + long long wsrep_local_recv_queue=0; + if (mmsd->interr == 0 && mmsd->result) { + int num_fields=0; + int num_rows=0; + num_fields = mysql_num_fields(mmsd->result); + if (num_fields!=7) { + proxy_error("Incorrect number of fields, please report a bug\n"); + goto __end_process_aws_aurora_result; + } + num_rows = mysql_num_rows(mmsd->result); + if (num_rows!=1) { + proxy_error("Incorrect number of rows, please report a bug\n"); + goto __end_process_aws_aurora_result; + } + MYSQL_ROW row=mysql_fetch_row(mmsd->result); + if (row[0]) { + wsrep_local_state = atoi(row[0]); + } + if (row[1]) { + if (!strcasecmp(row[1],"NO") || !strcasecmp(row[1],"OFF") || !strcasecmp(row[1],"0")) { + read_only=false; + } + } + if (row[2]) { + wsrep_local_recv_queue = atoll(row[2]); + } + if (row[3]) { + if (!strcasecmp(row[3],"NO") || !strcasecmp(row[3],"OFF") || !strcasecmp(row[3],"0")) { + wsrep_desync = false; + } + } + if (row[4]) { + if (!strcasecmp(row[4],"NONE")) { + wsrep_reject_queries = false; + } + } + if (row[5]) { + if (!strcasecmp(row[5],"NO") || !strcasecmp(row[5],"OFF") || !strcasecmp(row[5],"0")) { + wsrep_sst_donor_rejects_queries = false; + } + } + if (row[6]) { + if (!strcasecmp(row[6],"Primary")) { + primary_partition = true; + } + } + mysql_free_result(mmsd->result); + mmsd->result=NULL; + } +__end_process_aws_aurora_result: + if (mmsd->mysql_error_msg) { + } + unsigned long long time_now=realtime_time(); + time_now=time_now-(mmsd->t2 - start_time); + pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); + //auto it = + // TODO : complete this + std::map::iterator it2; + it2 = GloMyMon->AWS_Aurora_Hosts_Map.find(s); + AWS_Aurora_monitor_node *node=NULL; + if (it2!=GloMyMon->AWS_Aurora_Hosts_Map.end()) { + node=it2->second; + node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , wsrep_local_recv_queue, primary_partition, read_only, wsrep_local_state, wsrep_desync, wsrep_reject_queries, wsrep_sst_donor_rejects_queries, mmsd->mysql_error_msg); + } else { + node = new AWS_Aurora_monitor_node(mmsd->hostname,mmsd->port,mmsd->writer_hostgroup); + node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , wsrep_local_recv_queue, primary_partition, read_only, wsrep_local_state, wsrep_desync, wsrep_reject_queries, wsrep_sst_donor_rejects_queries, mmsd->mysql_error_msg); + GloMyMon->AWS_Aurora_Hosts_Map.insert(std::make_pair(s,node)); + } + pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); + + // NOTE: we update MyHGM outside the mutex aws_aurora_mutex + if (mmsd->mysql_error_msg) { // there was an error checking the status of the server, surely we need to reconfigure GR + MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); + } else { + if (primary_partition == false || wsrep_desync == true || wsrep_local_state!=4) { + if (primary_partition == false) { + MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"primary_partition=NO"); + } else { + if (wsrep_desync == true) { + MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"wsrep_desync=YES"); + } else { + char msg[80]; + sprintf(msg,"wsrep_local_state=%d",wsrep_local_state); + MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, msg); + } + } + } else { + //if (wsrep_sst_donor_rejects_queries || wsrep_reject_queries) { + if (wsrep_reject_queries) { + MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"wsrep_reject_queries=true"); + // } else { + // // wsrep_sst_donor_rejects_queries + // MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"wsrep_sst_donor_rejects_queries=true"); + // } + } else { + if (read_only==true) { + if (wsrep_local_recv_queue > mmsd->max_transactions_behind) { + MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"slave is lagging"); + } else { + MyHGM->update_galera_set_read_only(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"read_only=YES"); + } + } else { + // the node is a writer + // TODO: for now we don't care about the number of writers + MyHGM->update_galera_set_writer(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup); + } + } + } + } + + // clean up + if (l<110) { + } else { + free(s); + } + } + if (mmsd->interr) { // check failed + } else { + if (crc==false) { + if (mmsd->mysql) { + GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); + mmsd->mysql=NULL; + } + } + } +__fast_exit_monitor_aws_aurora_thread: + if (mmsd->mysql) { + // if we reached here we didn't put the connection back + if (mmsd->mysql_error_msg) { + mysql_close(mmsd->mysql); // if we reached here we should destroy it + mmsd->mysql=NULL; + } else { + if (crc) { + bool rc=mmsd->set_wait_timeout(); + if (rc) { + GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); + } else { + mysql_close(mmsd->mysql); // set_wait_timeout failed + } + mmsd->mysql=NULL; + } else { // really not sure how we reached here, drop it + mysql_close(mmsd->mysql); + mmsd->mysql=NULL; + } + } + } + delete mysql_thr; + return NULL; +} diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index 8ceacf1aa..dbb60ce2a 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -341,6 +341,15 @@ static int http_handler(void *cls, struct MHD_Connection *connection, const char #define ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_GALERA_HOSTGROUPS "CREATE TABLE runtime_mysql_galera_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , backup_writer_hostgroup INT CHECK (backup_writer_hostgroup>=0 AND backup_writer_hostgroup<>writer_hostgroup) NOT NULL , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND backup_writer_hostgroup<>reader_hostgroup AND reader_hostgroup>0) , offline_hostgroup INT NOT NULL CHECK (offline_hostgroup<>writer_hostgroup AND offline_hostgroup<>reader_hostgroup AND backup_writer_hostgroup<>offline_hostgroup AND offline_hostgroup>=0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_writers INT NOT NULL CHECK (max_writers >= 0) DEFAULT 1 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1,2)) NOT NULL DEFAULT 0 , max_transactions_behind INT CHECK (max_transactions_behind>=0) NOT NULL DEFAULT 0 , comment VARCHAR , UNIQUE (reader_hostgroup) , UNIQUE (offline_hostgroup) , UNIQUE (backup_writer_hostgroup))" +// AWS Aurora + +#define ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS_V2_1_0a "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 1000 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 500 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 500 AND check_timeout_ms <= 3000) DEFAULT 1000 , comment VARCHAR , UNIQUE (reader_hostgroup))" + +#define ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS_V2_1_0a + +#define ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE runtime_mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 1000 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 500 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 500 AND check_timeout_ms <= 3000) DEFAULT 1000 , comment VARCHAR , UNIQUE (reader_hostgroup))" + + // Cluster solution @@ -2055,6 +2064,8 @@ bool ProxySQL_Admin::GenericRefreshStatistics(const char *query_no_space, unsign bool monitor_mysql_server_galera_log=false; + bool monitor_mysql_server_aws_aurora_log=false; + bool stats_proxysql_servers_checksums = false; bool stats_proxysql_servers_metrics = false; bool stats_proxysql_servers_status = false; @@ -2115,6 +2126,8 @@ bool ProxySQL_Admin::GenericRefreshStatistics(const char *query_no_space, unsign strstr(query_no_space,"runtime_mysql_group_replication_hostgroups") || strstr(query_no_space,"runtime_mysql_galera_hostgroups") + || + strstr(query_no_space,"runtime_mysql_aws_aurora_hostgroups") ) { runtime_mysql_servers=true; refresh=true; } @@ -2156,6 +2169,9 @@ bool ProxySQL_Admin::GenericRefreshStatistics(const char *query_no_space, unsign if (strstr(query_no_space,"mysql_server_galera_log")) { monitor_mysql_server_galera_log=true; refresh=true; } + if (strstr(query_no_space,"mysql_server_aws_aurora_log")) { + monitor_mysql_server_aws_aurora_log=true; refresh=true; + } // if (stats_mysql_processlist || stats_mysql_connection_pool || stats_mysql_query_digest || stats_mysql_query_digest_reset) { if (refresh==true) { pthread_mutex_lock(&admin_mutex); @@ -2263,6 +2279,11 @@ bool ProxySQL_Admin::GenericRefreshStatistics(const char *query_no_space, unsign GloMyMon->populate_monitor_mysql_server_galera_log(); } } + if (monitor_mysql_server_aws_aurora_log) { + if (GloMyMon) { + GloMyMon->populate_monitor_mysql_server_aws_aurora_log(); + } + } pthread_mutex_unlock(&admin_mutex); } if ( @@ -2988,6 +3009,15 @@ void admin_session_handler(MySQL_Session *sess, void *_pa, PtrSize_t *pkt) { tablename=(char *)"MYSQL GALERA HOSTGROUPS"; SPA->admindb->execute_statement(q, &error, &cols, &affected_rows, &resultset); } + if ((strlen(query_no_space)==strlen("CHECKSUM MEMORY MYSQL AURORA HOSTGROUPS") && !strncasecmp("CHECKSUM MEMORY GROUP MYSQL REPLICATION HOSTGROUPS", query_no_space, strlen(query_no_space))) + || + (strlen(query_no_space)==strlen("CHECKSUM MEM MYSQL AURORA HOSTGROUPS") && !strncasecmp("CHECKSUM MEM MYSQL AURORA HOSTGROUPS", query_no_space, strlen(query_no_space))) + || + (strlen(query_no_space)==strlen("CHECKSUM MYSQL AURORA HOSTGROUPS") && !strncasecmp("CHECKSUM MYSQL AURORA HOSTGROUPS", query_no_space, strlen(query_no_space)))){ + char *q=(char *)"SELECT * FROM mysql_aws_aurora_hostgroups ORDER BY writer_hostgroup"; + tablename=(char *)"MYSQL AURORA HOSTGROUPS"; + SPA->admindb->execute_statement(q, &error, &cols, &affected_rows, &resultset); + } if (error) { proxy_error("Error: %s\n", error); @@ -3808,6 +3838,8 @@ bool ProxySQL_Admin::init() { insert_into_tables_defs(tables_defs_admin,"runtime_mysql_group_replication_hostgroups", ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_GROUP_REPLICATION_HOSTGROUPS); insert_into_tables_defs(tables_defs_admin,"mysql_galera_hostgroups", ADMIN_SQLITE_TABLE_MYSQL_GALERA_HOSTGROUPS); insert_into_tables_defs(tables_defs_admin,"runtime_mysql_galera_hostgroups", ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_GALERA_HOSTGROUPS); + insert_into_tables_defs(tables_defs_admin,"mysql_aws_aurora_hostgroups", ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS); + insert_into_tables_defs(tables_defs_admin,"runtime_mysql_aws_aurora_hostgroups", ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_AWS_AURORA_HOSTGROUPS); insert_into_tables_defs(tables_defs_admin,"mysql_query_rules", ADMIN_SQLITE_TABLE_MYSQL_QUERY_RULES); insert_into_tables_defs(tables_defs_admin,"mysql_query_rules_fast_routing", ADMIN_SQLITE_TABLE_MYSQL_QUERY_RULES_FAST_ROUTING); insert_into_tables_defs(tables_defs_admin,"runtime_mysql_query_rules", ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_QUERY_RULES); @@ -3833,6 +3865,7 @@ bool ProxySQL_Admin::init() { insert_into_tables_defs(tables_defs_config,"mysql_replication_hostgroups", ADMIN_SQLITE_TABLE_MYSQL_REPLICATION_HOSTGROUPS); insert_into_tables_defs(tables_defs_config,"mysql_group_replication_hostgroups", ADMIN_SQLITE_TABLE_MYSQL_GROUP_REPLICATION_HOSTGROUPS); insert_into_tables_defs(tables_defs_config,"mysql_galera_hostgroups", ADMIN_SQLITE_TABLE_MYSQL_GALERA_HOSTGROUPS); + insert_into_tables_defs(tables_defs_config,"mysql_aws_aurora_hostgroups", ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS); insert_into_tables_defs(tables_defs_config,"mysql_query_rules", ADMIN_SQLITE_TABLE_MYSQL_QUERY_RULES); insert_into_tables_defs(tables_defs_config,"mysql_query_rules_fast_routing", ADMIN_SQLITE_TABLE_MYSQL_QUERY_RULES_FAST_ROUTING); insert_into_tables_defs(tables_defs_config,"global_variables", ADMIN_SQLITE_TABLE_GLOBAL_VARIABLES); @@ -6363,6 +6396,7 @@ void ProxySQL_Admin::__insert_or_ignore_maintable_select_disktable() { admindb->execute("INSERT OR IGNORE INTO main.mysql_replication_hostgroups SELECT * FROM disk.mysql_replication_hostgroups"); admindb->execute("INSERT OR IGNORE INTO main.mysql_group_replication_hostgroups SELECT * FROM disk.mysql_group_replication_hostgroups"); admindb->execute("INSERT OR IGNORE INTO main.mysql_galera_hostgroups SELECT * FROM disk.mysql_galera_hostgroups"); + admindb->execute("INSERT OR IGNORE INTO main.mysql_aws_aurora_hostgroups SELECT * FROM disk.mysql_aws_aurora_hostgroups"); admindb->execute("INSERT OR IGNORE INTO main.mysql_users SELECT * FROM disk.mysql_users"); admindb->execute("INSERT OR IGNORE INTO main.mysql_query_rules SELECT * FROM disk.mysql_query_rules"); admindb->execute("INSERT OR IGNORE INTO main.mysql_query_rules_fast_routing SELECT * FROM disk.mysql_query_rules_fast_routing"); @@ -6389,6 +6423,7 @@ void ProxySQL_Admin::__insert_or_replace_maintable_select_disktable() { admindb->execute("INSERT OR REPLACE INTO main.mysql_replication_hostgroups SELECT * FROM disk.mysql_replication_hostgroups"); admindb->execute("INSERT OR REPLACE INTO main.mysql_group_replication_hostgroups SELECT * FROM disk.mysql_group_replication_hostgroups"); admindb->execute("INSERT OR REPLACE INTO main.mysql_galera_hostgroups SELECT * FROM disk.mysql_galera_hostgroups"); + admindb->execute("INSERT OR REPLACE INTO main.mysql_aws_aurora_hostgroups SELECT * FROM disk.mysql_aws_aurora_hostgroups"); admindb->execute("INSERT OR REPLACE INTO main.mysql_users SELECT * FROM disk.mysql_users"); admindb->execute("INSERT OR REPLACE INTO main.mysql_query_rules SELECT * FROM disk.mysql_query_rules"); admindb->execute("INSERT OR REPLACE INTO main.mysql_query_rules_fast_routing SELECT * FROM disk.mysql_query_rules_fast_routing"); @@ -6436,6 +6471,7 @@ void ProxySQL_Admin::__insert_or_replace_disktable_select_maintable() { admindb->execute("INSERT OR REPLACE INTO disk.mysql_replication_hostgroups SELECT * FROM main.mysql_replication_hostgroups"); admindb->execute("INSERT OR REPLACE INTO disk.mysql_group_replication_hostgroups SELECT * FROM main.mysql_group_replication_hostgroups"); admindb->execute("INSERT OR REPLACE INTO disk.mysql_galera_hostgroups SELECT * FROM main.mysql_galera_hostgroups"); + admindb->execute("INSERT OR REPLACE INTO disk.mysql_aws_aurora_hostgroups SELECT * FROM main.mysql_aws_aurora_hostgroups"); admindb->execute("INSERT OR REPLACE INTO disk.mysql_query_rules SELECT * FROM main.mysql_query_rules"); admindb->execute("INSERT OR REPLACE INTO disk.mysql_users SELECT * FROM main.mysql_users"); admindb->execute("INSERT OR REPLACE INTO disk.mysql_query_rules_fast_routing SELECT * FROM main.mysql_query_rules_fast_routing"); @@ -6523,10 +6559,12 @@ void ProxySQL_Admin::flush_mysql_servers__from_disk_to_memory() { admindb->execute("DELETE FROM main.mysql_replication_hostgroups"); admindb->execute("DELETE FROM main.mysql_group_replication_hostgroups"); admindb->execute("DELETE FROM main.mysql_galera_hostgroups"); + admindb->execute("DELETE FROM main.mysql_aws_aurora_hostgroups"); admindb->execute("INSERT INTO main.mysql_servers SELECT * FROM disk.mysql_servers"); admindb->execute("INSERT INTO main.mysql_replication_hostgroups SELECT * FROM disk.mysql_replication_hostgroups"); admindb->execute("INSERT INTO main.mysql_group_replication_hostgroups SELECT * FROM disk.mysql_group_replication_hostgroups"); admindb->execute("INSERT INTO main.mysql_galera_hostgroups SELECT * FROM disk.mysql_galera_hostgroups"); + admindb->execute("INSERT INTO main.mysql_aws_aurora_hostgroups SELECT * FROM disk.mysql_aws_aurora_hostgroups"); admindb->execute("PRAGMA foreign_keys = ON"); admindb->wrunlock(); } @@ -6538,10 +6576,12 @@ void ProxySQL_Admin::flush_mysql_servers__from_memory_to_disk() { admindb->execute("DELETE FROM disk.mysql_replication_hostgroups"); admindb->execute("DELETE FROM disk.mysql_group_replication_hostgroups"); admindb->execute("DELETE FROM disk.mysql_galera_hostgroups"); + admindb->execute("DELETE FROM disk.mysql_aws_aurora_hostgroups"); admindb->execute("INSERT INTO disk.mysql_servers SELECT * FROM main.mysql_servers"); admindb->execute("INSERT INTO disk.mysql_replication_hostgroups SELECT * FROM main.mysql_replication_hostgroups"); admindb->execute("INSERT INTO disk.mysql_group_replication_hostgroups SELECT * FROM main.mysql_group_replication_hostgroups"); admindb->execute("INSERT INTO disk.mysql_galera_hostgroups SELECT * FROM main.mysql_galera_hostgroups"); + admindb->execute("INSERT INTO disk.mysql_aws_aurora_hostgroups SELECT * FROM main.mysql_aws_aurora_hostgroups"); admindb->execute("PRAGMA foreign_keys = ON"); admindb->wrunlock(); } @@ -7704,6 +7744,46 @@ void ProxySQL_Admin::save_mysql_servers_runtime_to_database(bool _runtime) { } sqlite3_finalize(statement); } + + // dump mysql_aws_aurora_hostgroups + + if (_runtime) { + query=(char *)"DELETE FROM main.runtime_mysql_aws_aurora_hostgroups"; + } else { + query=(char *)"DELETE FROM main.mysql_aws_aurora_hostgroups"; + } + proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); + admindb->execute(query); + resultset=MyHGM->dump_table_mysql_aws_aurora_hostgroups(); + if (resultset) { + int rc; + sqlite3_stmt *statement=NULL; + sqlite3 *mydb3=admindb->get_db(); + char *query=NULL; + if (_runtime) { + query=(char *)"INSERT INTO runtime_mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"; + } else { + query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"; + } + rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); + assert(rc==SQLITE_OK); + //proxy_info("New mysql_aws_aurora_hostgroups table\n"); + for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { + SQLite3_row *r=*it; + rc=sqlite3_bind_int64(statement, 1, atoi(r->fields[0])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 2, atoi(r->fields[1])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 3, atoi(r->fields[2])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 4, atoi(r->fields[3])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 5, atoi(r->fields[4])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 6, atoi(r->fields[5])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement, 7, r->fields[6], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + + SAFE_SQLITE3_STEP2(statement); + rc=sqlite3_clear_bindings(statement); assert(rc==SQLITE_OK); + rc=sqlite3_reset(statement); assert(rc==SQLITE_OK); + } + sqlite3_finalize(statement); + } if(resultset) delete resultset; resultset=NULL; } @@ -7734,6 +7814,7 @@ void ProxySQL_Admin::load_mysql_servers_to_runtime() { SQLite3_result *resultset_replication=NULL; SQLite3_result *resultset_group_replication=NULL; SQLite3_result *resultset_galera=NULL; + SQLite3_result *resultset_aws_aurora=NULL; char *query=(char *)"SELECT hostgroup_id,hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment FROM main.mysql_servers"; proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); @@ -7828,6 +7909,33 @@ void ProxySQL_Admin::load_mysql_servers_to_runtime() { MyHGM->set_incoming_galera_hostgroups(resultset_galera); } + // support for AWS Aurora, table mysql_aws_aurora_hostgroups + + // look for invalid combinations + query=(char *)"SELECT a.* FROM mysql_aws_aurora_hostgroups a JOIN mysql_aws_aurora_hostgroups b ON a.writer_hostgroup=b.reader_hostgroup WHERE b.reader_hostgroup"; + proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); + admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset); + if (error) { + proxy_error("Error on %s : %s\n", query, error); + } else { + for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { + SQLite3_row *r=*it; + proxy_error("Incompatible entry in mysql_aws_aurora_hostgroups will be ignored : ( %s , %s , %s , %s )\n", r->fields[0], r->fields[1], r->fields[2], r->fields[3]); + } + } + if (resultset) delete resultset; + resultset=NULL; + + query=(char *)"SELECT a.* FROM mysql_aws_aurora_hostgroups a LEFT JOIN mysql_aws_aurora_hostgroups b ON (a.writer_hostgroup=b.reader_hostgroup) WHERE b.reader_hostgroup IS NULL"; + proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); + admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset_aws_aurora); + if (error) { + proxy_error("Error on %s : %s\n", query, error); + } else { + // Pass the resultset to MyHGM + MyHGM->set_incoming_aws_aurora_hostgroups(resultset_aws_aurora); + } + // commit all the changes MyHGM->commit(); @@ -7846,6 +7954,10 @@ void ProxySQL_Admin::load_mysql_servers_to_runtime() { //delete resultset_galera; // do not delete, resultset is stored in MyHGM resultset_galera=NULL; } + if (resultset_aws_aurora) { + //delete resultset_aws_aurora; // do not delete, resultset is stored in MyHGM + resultset_aws_aurora=NULL; + } } @@ -8571,6 +8683,38 @@ int ProxySQL_Admin::Read_MySQL_Servers_from_configfile() { free(query); rows++; } + } + if (root.exists("mysql_aws_aurora_hostgroups")==true) { + const Setting &mysql_aws_aurora_hostgroups = root["mysql_aws_aurora_hostgroups"]; + int count = mysql_aws_aurora_hostgroups.getLength(); + char *q=(char *)"INSERT OR REPLACE INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, comment) VALUES (%d, %d, %d, %d, %d, %d, '%s')"; + for (i=0; i< count; i++) { + const Setting &line = mysql_aws_aurora_hostgroups[i]; + int writer_hostgroup; + int reader_hostgroup; + int active=1; // default + int max_lag_ms; + int check_interval_ms; + int check_timeout_ms; + int max_transactions_behind; + std::string comment=""; + if (line.lookupValue("writer_hostgroup", writer_hostgroup)==false) continue; + if (line.lookupValue("reader_hostgroup", reader_hostgroup)==false) continue; + if (line.lookupValue("max_lag_ms", max_lag_ms)==false) max_lag_ms=600000; + if (line.lookupValue("check_interval_ms", check_interval_ms)==false) check_interval_ms=1000; + if (line.lookupValue("check_timeout_ms", check_timeout_ms)==false) check_timeout_ms=1000; + line.lookupValue("comment", comment); + char *o1=strdup(comment.c_str()); + char *o=escape_string_single_quotes(o1, false); + char *query=(char *)malloc(strlen(q)+strlen(o)+128); // 128 vs sizeof(int)*8 + sprintf(query,q, writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, o); + //fprintf(stderr, "%s\n", query); + admindb->execute(query); + if (o!=o1) free(o); + free(o1); + free(query); + rows++; + } } admindb->execute("PRAGMA foreign_keys = ON"); return rows; From 2e20854684e74a5c3007ba723e8841bb3bc2b9a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Wed, 27 Feb 2019 04:05:11 +1100 Subject: [PATCH 02/20] Second commit (not complete) to support Aurora --- Makefile | 11 + include/MySQL_HostGroups_Manager.h | 13 +- include/MySQL_Monitor.hpp | 27 +- include/SQLite3_Server.h | 13 + include/proxysql_admin.h | 6 + lib/MySQL_HostGroups_Manager.cpp | 456 +++++++++++++- lib/MySQL_Monitor.cpp | 969 ++++++++++++++++++++++++++--- lib/MySQL_Protocol.cpp | 10 +- lib/MySQL_Session.cpp | 11 +- lib/ProxySQL_Admin.cpp | 73 ++- lib/ProxySQL_HTTP_Server.cpp | 2 +- lib/SQLite3_Server.cpp | 188 +++++- src/Makefile | 2 +- src/main.cpp | 4 +- 14 files changed, 1640 insertions(+), 145 deletions(-) diff --git a/Makefile b/Makefile index cdd9a89d7..94966b278 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,9 @@ default: build_deps build_lib build_src .PHONY: debug debug: build_deps_debug build_lib_debug build_src_debug +.PHONY: testaurora +testaurora: build_deps_debug build_lib_testaurora build_src_testaurora + .PHONY: clickhouse clickhouse: build_deps_clickhouse build_lib_clickhouse build_src_clickhouse @@ -53,6 +56,14 @@ build_deps_debug: build_lib_debug: build_deps_debug cd lib && OPTZ="${O0} -ggdb -DDEBUG" CC=${CC} CXX=${CXX} ${MAKE} +.PHONY: build_src_testaurora +build_src_testaurora: build_deps build_lib_testaurora + cd src && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA" CC=${CC} CXX=${CXX} ${MAKE} + +.PHONY: build_lib_testaurora +build_lib_testaurora: build_deps_debug + cd lib && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA" CC=${CC} CXX=${CXX} ${MAKE} + .PHONY: build_src_debug build_src_debug: build_deps build_lib_debug cd src && OPTZ="${O0} -ggdb -DDEBUG" CC=${CC} CXX=${CXX} ${MAKE} diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index f75c235a6..2230f2f15 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -36,7 +36,7 @@ #define MYHGM_MYSQL_GALERA_HOSTGROUPS "CREATE TABLE mysql_galera_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , backup_writer_hostgroup INT CHECK (backup_writer_hostgroup>=0 AND backup_writer_hostgroup<>writer_hostgroup) NOT NULL , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND backup_writer_hostgroup<>reader_hostgroup AND reader_hostgroup>0) , offline_hostgroup INT NOT NULL CHECK (offline_hostgroup<>writer_hostgroup AND offline_hostgroup<>reader_hostgroup AND backup_writer_hostgroup<>offline_hostgroup AND offline_hostgroup>=0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_writers INT NOT NULL CHECK (max_writers >= 0) DEFAULT 1 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1,2)) NOT NULL DEFAULT 0 , max_transactions_behind INT CHECK (max_transactions_behind>=0) NOT NULL DEFAULT 0 , comment VARCHAR , UNIQUE (reader_hostgroup) , UNIQUE (offline_hostgroup) , UNIQUE (backup_writer_hostgroup))" -#define MYHGM_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 1000 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 500 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 500 AND check_timeout_ms <= 3000) DEFAULT 1000 , comment VARCHAR , UNIQUE (reader_hostgroup))" +#define MYHGM_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" typedef std::unordered_map umap_mysql_errors; @@ -324,6 +324,7 @@ class MySrvC { // MySQL Server Container enum MySerStatus status; unsigned int compression; unsigned int max_connections; + unsigned int aws_aurora_current_lag_us; unsigned int max_replication_lag; unsigned int max_connections_used; // The maximum number of connections that has been opened unsigned int connect_OK; @@ -437,13 +438,15 @@ class AWS_Aurora_Info { int max_lag_ms; int check_interval_ms; int check_timeout_ms; + int writer_is_also_reader; + int new_reader_weight; // TODO // add intermediary status value, for example the last check time char * comment; bool active; bool __active; - AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, char *c); - bool update(int r, int ml, int ci, int ct, bool _a, char *c); + AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c); + bool update(int r, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c); ~AWS_Aurora_Info(); }; @@ -589,6 +592,10 @@ class MySQL_HostGroups_Manager { void converge_galera_config(int _writer_hostgroup); // FIXME : add action functions for AWS Aurora + //void aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, float current_replication_lag, bool enable, bool verbose=true); + bool aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose=true); + void update_aws_aurora_set_writer(int _whid, int _rhid, char *address, unsigned int port, bool verbose=true); + void update_aws_aurora_set_reader(int _whid, int _rhid, char *_hostname, int _port); SQLite3_result * get_stats_mysql_gtid_executed(); void generate_mysql_gtid_executed_tables(); diff --git a/include/MySQL_Monitor.hpp b/include/MySQL_Monitor.hpp index a2b0f0c30..da7ef3b0d 100644 --- a/include/MySQL_Monitor.hpp +++ b/include/MySQL_Monitor.hpp @@ -23,9 +23,13 @@ //#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_GALERA_LOG "CREATE TABLE mysql_server_galera_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , viable_candidate VARCHAR NOT NULL DEFAULT 'NO' , read_only VARCHAR NOT NULL DEFAULT 'YES' , transactions_behind INT DEFAULT 0 , error VARCHAR , PRIMARY KEY (hostname, port, time_start_us))" #define MONITOR_SQLITE_TABLE_MYSQL_SERVER_GALERA_LOG "CREATE TABLE mysql_server_galera_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , primary_partition VARCHAR NOT NULL DEFAULT 'NO' , read_only VARCHAR NOT NULL DEFAULT 'YES' , wsrep_local_recv_queue INT DEFAULT 0 , wsrep_local_state INT DEFAULT 0 , wsrep_desync VARCHAR NOT NULL DEFAULT 'NO' , wsrep_reject_queries VARCHAR NOT NULL DEFAULT 'NO' , wsrep_sst_donor_rejects_queries VARCHAR NOT NULL DEFAULT 'NO' , error VARCHAR , PRIMARY KEY (hostname, port, time_start_us))" -#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG "CREATE TABLE mysql_server_aws_aurora_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , error VARCHAR , SERVER_ID VARCHAR NOT NULL DEFAULT '' , SESSION_ID VARCHAR NOT NULL DEFAULT '' , LAST_UPDATE_TIMESTAMP VARCHAR NOT NULL DEFAULT '' , replica_lag_in_microseconds INT NOT NULL DEFAULT 0 , CPU INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port, time_start_us, SERVER_ID))" +//#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG "CREATE TABLE mysql_server_aws_aurora_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , error VARCHAR , SERVER_ID VARCHAR NOT NULL DEFAULT '' , SESSION_ID VARCHAR NOT NULL DEFAULT '' , LAST_UPDATE_TIMESTAMP VARCHAR NOT NULL DEFAULT '' , replica_lag_in_microseconds INT NOT NULL DEFAULT 0 , CPU INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port, time_start_us, SERVER_ID))" -//primary_partition VARCHAR NOT NULL DEFAULT 'NO' , read_only VARCHAR NOT NULL DEFAULT 'YES' , wsrep_local_recv_queue INT DEFAULT 0 , wsrep_local_state INT DEFAULT 0 , wsrep_desync VARCHAR NOT NULL DEFAULT 'NO' , wsrep_reject_queries VARCHAR NOT NULL DEFAULT 'NO' , wsrep_sst_donor_rejects_queries VARCHAR NOT NULL DEFAULT 'NO' , error VARCHAR , PRIMARY KEY (hostname, port, time_start_us))" +#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG "CREATE TABLE mysql_server_aws_aurora_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , error VARCHAR , SERVER_ID VARCHAR NOT NULL DEFAULT '' , SESSION_ID VARCHAR , LAST_UPDATE_TIMESTAMP VARCHAR , replica_lag_in_microseconds INT NOT NULL DEFAULT 0 , CPU INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port, time_start_us, SERVER_ID))" + +#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_CHECK_STATUS "CREATE TABLE mysql_server_aws_aurora_check_status (writer_hostgroup INT NOT NULL , hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , last_checked_at VARCHAR , checks_tot INT NOT NULL DEFAULT 0 , checks_ok INT NOT NULL DEFAULT 0 , last_error VARCHAR , PRIMARY KEY (writer_hostgroup, hostname, port))" + +#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_FAILOVERS "CREATE TABLE mysql_server_aws_aurora_failovers (writer_hostgroup INT NOT NULL , hostname VARCHAR NOT NULL , inserted_at VARCHAR NOT NULL)" /* struct cmp_str { @@ -38,8 +42,9 @@ struct cmp_str { #define MyGR_Nentries 100 #define Galera_Nentries 100 -#define AWS_Aurora_Nentries 100 +#define AWS_Aurora_Nentries 50 +#define N_L_ASE 8 /* Implementation of monitoring in AWS Aurora will be different than previous modules @@ -58,10 +63,11 @@ class AWS_Aurora_replica_host_status_entry { public: char * server_id; char * session_id; - uint32_t last_update_timestamp; + char * last_update_timestamp; float replica_lag_ms; // originally a double float cpu; - AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, uint32_t lut, float rlm, float _c); + AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, char * lut, float rlm, float _c); + AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, char * lut, char * rlm, char * _c); ~AWS_Aurora_replica_host_status_entry(); }; @@ -83,13 +89,16 @@ class AWS_Aurora_monitor_node { char *addr; int port; unsigned int writer_hostgroup; - AWS_Aurora_status_entry last_entries[AWS_Aurora_Nentries]; + uint64_t num_checks_tot; + uint64_t num_checks_ok; + time_t last_checked_at; + AWS_Aurora_status_entry *last_entries[AWS_Aurora_Nentries]; AWS_Aurora_monitor_node(char *_a, int _p, int _whg); ~AWS_Aurora_monitor_node(); bool add_entry(AWS_Aurora_status_entry *ase); // return true if status changed AWS_Aurora_status_entry *last_entry() { if (idx_last_entry == -1) return NULL; - return (&last_entries[idx_last_entry]); + return (last_entries[idx_last_entry]); } }; @@ -221,6 +230,7 @@ class MySQL_Monitor { SQLite3_result *Galera_Hosts_resultset; std::map AWS_Aurora_Hosts_Map; SQLite3_result *AWS_Aurora_Hosts_resultset; + uint64_t AWS_Aurora_Hosts_resultset_checksum; unsigned int num_threads; unsigned int aux_threads; unsigned int started_threads; @@ -252,10 +262,13 @@ class MySQL_Monitor { void populate_monitor_mysql_server_group_replication_log(); void populate_monitor_mysql_server_galera_log(); void populate_monitor_mysql_server_aws_aurora_log(); + void populate_monitor_mysql_server_aws_aurora_check_status(); char * galera_find_last_node(int); std::vector * galera_find_possible_last_nodes(int); bool server_responds_to_ping(char *address, int port); // FIXME : add AWS Aurora actions + void evaluate_aws_aurora_results(unsigned int wHG, unsigned int rHG, AWS_Aurora_status_entry **lasts_ase, unsigned int ase_idx, unsigned int max_latency_ms); +// void gdb_dump___monitor_mysql_server_aws_aurora_log(char *hostname); }; #endif /* __CLASS_MYSQL_MONITOR_H */ diff --git a/include/SQLite3_Server.h b/include/SQLite3_Server.h index a2ceb6ab4..7a83918bc 100644 --- a/include/SQLite3_Server.h +++ b/include/SQLite3_Server.h @@ -37,7 +37,20 @@ class SQLite3_Server { bool debug; #endif // DEBUG } variables; +#ifdef TEST_AURORA + std::vector *tables_defs_aurora; + void insert_into_tables_defs(std::vector *, const char *table_name, const char *table_def); + void drop_tables_defs(std::vector *tables_defs); + void check_and_build_standard_tables(SQLite3DB *db, std::vector *tables_defs); +#endif // TEST_AURORA public: +#ifdef TEST_AURORA + unsigned int cur_aurora_writer[3]; + unsigned int num_aurora_servers[3]; + unsigned int max_num_aurora_servers; + pthread_mutex_t aurora_mutex; + void populate_aws_aurora_table(MySQL_Session *sess); +#endif // TEST_AURORA SQLite3_Server(); ~SQLite3_Server(); char **get_variables_list(); diff --git a/include/proxysql_admin.h b/include/proxysql_admin.h index 963233cdf..ebae7428b 100644 --- a/include/proxysql_admin.h +++ b/include/proxysql_admin.h @@ -307,5 +307,11 @@ class ProxySQL_Admin { #endif /* PROXYSQLCLICKHOUSE */ void vacuum_stats(bool); + + +#ifdef TEST_AURORA + void enable_aurora_testing(); +#endif // TEST_AURORA + }; #endif /* __CLASS_PROXYSQL_ADMIN_H */ diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index f18833616..fd91fea9a 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -814,6 +814,7 @@ MySrvC::MySrvC(char *add, uint16_t p, uint16_t gp, unsigned int _weight, enum My use_ssl=_use_ssl; max_latency_us=_max_latency_ms*1000; current_latency_us=0; + aws_aurora_current_lag_us = 0; connect_OK=0; connect_ERR=0; queries_sent=0; @@ -967,10 +968,12 @@ MySQL_HostGroups_Manager::MySQL_HostGroups_Manager() { mydb->execute(MYHGM_MYSQL_REPLICATION_HOSTGROUPS); mydb->execute(MYHGM_MYSQL_GROUP_REPLICATION_HOSTGROUPS); mydb->execute(MYHGM_MYSQL_GALERA_HOSTGROUPS); + mydb->execute(MYHGM_MYSQL_AWS_AURORA_HOSTGROUPS); MyHostGroups=new PtrArray(); incoming_replication_hostgroups=NULL; incoming_group_replication_hostgroups=NULL; incoming_galera_hostgroups=NULL; + incoming_aws_aurora_hostgroups = NULL; pthread_rwlock_init(>id_rwlock, NULL); gtid_missing_nodes = false; gtid_ev_async = (struct ev_async *)malloc(sizeof(struct ev_async)); @@ -1383,6 +1386,13 @@ bool MySQL_HostGroups_Manager::commit() { generate_mysql_galera_hostgroups_table(); } + // AWS Aurora + if (incoming_aws_aurora_hostgroups) { + proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 4, "DELETE FROM mysql_aws_aurora_hostgroups\n"); + mydb->execute("DELETE FROM mysql_aws_aurora_hostgroups"); + generate_mysql_aws_aurora_hostgroups_table(); + } + if ( GloAdmin && GloAdmin->checksum_variables.checksum_mysql_servers ) { uint64_t hash1=0, hash2=0; @@ -1514,6 +1524,25 @@ bool MySQL_HostGroups_Manager::commit() { delete resultset; } } + { + char *error=NULL; + int cols=0; + int affected_rows=0; + SQLite3_result *resultset=NULL; + char *query=(char *)"SELECT * FROM mysql_aws_aurora_hostgroups ORDER BY writer_hostgroup"; + mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); + if (resultset) { + if (resultset->rows_count) { + if (init == false) { + init = true; + myhash.Init(19,3); + } + uint64_t hash1_ = resultset->raw_checksum(); + myhash.Update(&hash1_, sizeof(hash1_)); + } + delete resultset; + } + } if (init == true) { myhash.Final(&hash1, &hash2); } @@ -1894,7 +1923,7 @@ void MySQL_HostGroups_Manager::generate_mysql_group_replication_hostgroups_table int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; - char *query=(char *)"SELECT writer_hostgroup, hostname, port, MAX(use_ssl) use_ssl , writer_is_also_reader , max_transactions_behind FROM mysql_servers JOIN mysql_group_replication_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=backup_writer_hostgroup OR hostgroup_id=reader_hostgroup OR hostgroup_id=offline_hostgroup WHERE status NOT IN (2,3) GROUP BY hostname, port"; + char *query=(char *)"SELECT writer_hostgroup, hostname, port, MAX(use_ssl) use_ssl , writer_is_also_reader , max_transactions_behind FROM mysql_servers JOIN mysql_group_replication_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=backup_writer_hostgroup OR hostgroup_id=reader_hostgroup OR hostgroup_id=offline_hostgroup WHERE active=1 AND status NOT IN (2,3) GROUP BY hostname, port"; mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); if (resultset) { if (GloMyMon->Group_Replication_Hosts_resultset) { @@ -1989,7 +2018,7 @@ void MySQL_HostGroups_Manager::generate_mysql_galera_hostgroups_table() { int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; - char *query=(char *)"SELECT writer_hostgroup, hostname, port, MAX(use_ssl) use_ssl , writer_is_also_reader , max_transactions_behind FROM mysql_servers JOIN mysql_galera_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=backup_writer_hostgroup OR hostgroup_id=reader_hostgroup OR hostgroup_id=offline_hostgroup WHERE status NOT IN (2,3) GROUP BY hostname, port"; + char *query=(char *)"SELECT writer_hostgroup, hostname, port, MAX(use_ssl) use_ssl , writer_is_also_reader , max_transactions_behind FROM mysql_servers JOIN mysql_galera_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=backup_writer_hostgroup OR hostgroup_id=reader_hostgroup OR hostgroup_id=offline_hostgroup WHERE active=1 AND status NOT IN (2,3) GROUP BY hostname, port"; mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); if (resultset) { if (GloMyMon->Galera_Hosts_resultset) { @@ -2069,7 +2098,7 @@ SQLite3_result * MySQL_HostGroups_Manager::dump_table_mysql_aws_aurora_hostgroup int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; - char *query=(char *)"SELECT writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,comment FROM mysql_aws_aurora_hostgroups"; + char *query=(char *)"SELECT writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment FROM mysql_aws_aurora_hostgroups"; proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 4, "%s\n", query); mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); wrunlock(); @@ -4704,7 +4733,7 @@ SQLite3_result * MySQL_HostGroups_Manager::get_mysql_errors(bool reset) { return result; } -AWS_Aurora_Info::AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, char *c) { +AWS_Aurora_Info::AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c) { comment=NULL; if (c) { comment=strdup(c); @@ -4714,9 +4743,11 @@ AWS_Aurora_Info::AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, max_lag_ms=ml; check_interval_ms=ci; check_timeout_ms=ct; + writer_is_also_reader=wiar; + new_reader_weight=nrw; active=_a; __active=true; - need_converge=true; + //need_converge=true; } AWS_Aurora_Info::~AWS_Aurora_Info() { @@ -4726,7 +4757,7 @@ AWS_Aurora_Info::~AWS_Aurora_Info() { } } -bool AWS_Aurora_Info::update(int r, int ml, int ci, int ct, bool _a, char *c) { +bool AWS_Aurora_Info::update(int r, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c) { bool ret=false; __active=true; if (reader_hostgroup!=r) { @@ -4745,6 +4776,14 @@ bool AWS_Aurora_Info::update(int r, int ml, int ci, int ct, bool _a, char *c) { check_timeout_ms=ct; ret=true; } + if (writer_is_also_reader != wiar) { + writer_is_also_reader = wiar; + ret = true; + } + if (new_reader_weight != nrw) { + new_reader_weight = nrw; + ret = true; + } if (active!=_a) { active=_a; ret=true; @@ -4775,7 +4814,7 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { int rc; sqlite3_stmt *statement=NULL; sqlite3 *mydb3=mydb->get_db(); - char *query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"; + char *query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)"; rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); assert(rc==SQLITE_OK); proxy_info("New mysql_aws_aurora_hostgroups table\n"); @@ -4793,14 +4832,18 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { int max_lag_ms = atoi(r->fields[3]); int check_interval_ms = atoi(r->fields[4]); int check_timeout_ms = atoi(r->fields[5]); - proxy_info("Loading AWS Aurora info for (%d,%d,%s,%d,%d,%d,\"%s\")\n", writer_hostgroup,reader_hostgroup,(active ? "on" : "off"),max_lag_ms,check_interval_ms,check_timeout_ms,r->fields[8]); + int writer_is_also_reader = atoi(r->fields[6]); + int new_reader_weight = atoi(r->fields[7]); + proxy_info("Loading AWS Aurora info for (%d,%d,%s,%d,%d,%d,\"%s\")\n", writer_hostgroup,reader_hostgroup,(active ? "on" : "off"),max_lag_ms,check_interval_ms,check_timeout_ms,r->fields[6]); rc=sqlite3_bind_int64(statement, 1, writer_hostgroup); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 2, reader_hostgroup); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 3, active); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 4, max_lag_ms); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 5, check_interval_ms); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 6, check_timeout_ms); assert(rc==SQLITE_OK); - rc=sqlite3_bind_text(statement, 7, r->fields[6], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 7, writer_is_also_reader); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 8, new_reader_weight); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement, 9, r->fields[8], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); SAFE_SQLITE3_STEP2(statement); rc=sqlite3_clear_bindings(statement); assert(rc==SQLITE_OK); @@ -4811,12 +4854,12 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { if (it2!=AWS_Aurora_Info_Map.end()) { info=it2->second; bool changed=false; - changed=info->update(reader_hostgroup, max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, r->fields[6]); + changed=info->update(reader_hostgroup, max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, writer_is_also_reader, new_reader_weight, r->fields[8]); if (changed) { //info->need_converge=true; } } else { - info=new AWS_Aurora_Info(writer_hostgroup, reader_hostgroup, max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, r->fields[6]); + info=new AWS_Aurora_Info(writer_hostgroup, reader_hostgroup, max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, writer_is_also_reader, new_reader_weight, r->fields[8]); //info->need_converge=true; AWS_Aurora_Info_Map.insert(AWS_Aurora_Info_Map.begin(), std::pair(writer_hostgroup,info)); } @@ -4845,16 +4888,405 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; - char *query=(char *)"SELECT writer_hostgroup, hostname, port, MAX(use_ssl) use_ssl , writer_is_also_reader , max_lag_ms , check_interval_ms , check_timeout_ms FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE status NOT IN (2,3) GROUP BY hostname, port"; + char *query=(char *)"SELECT writer_hostgroup, reader_hostgroup, hostname, port, MAX(use_ssl) use_ssl , max_lag_ms , check_interval_ms , check_timeout_ms FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE active=1 AND status NOT IN (2,3) GROUP BY hostname, port"; mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); if (resultset) { if (GloMyMon->AWS_Aurora_Hosts_resultset) { delete GloMyMon->AWS_Aurora_Hosts_resultset; } GloMyMon->AWS_Aurora_Hosts_resultset=resultset; + GloMyMon->AWS_Aurora_Hosts_resultset_checksum=resultset->raw_checksum(); } } pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); pthread_mutex_unlock(&AWS_Aurora_Info_mutex); } + + + +//void MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, float current_replication_lag, bool enable, bool verbose) { +// this function returns false is the server is in the wrong HG +bool MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose) { + bool ret = false; // return false by default + bool reader_found_in_whg = false; + if (is_writer) { + // if the server is a writer, we will set ret back to true once found + ret = false; + } + GloAdmin->mysql_servers_wrlock(); + wrlock(); + int i,j; + for (i=0; i<(int)MyHostGroups->len; i++) { + MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i); + if (_whid!=(int)myhgc->hid && _rhid!=(int)myhgc->hid) continue; + for (j=0; j<(int)myhgc->mysrvs->cnt(); j++) { + MySrvC *mysrvc=(MySrvC *)myhgc->mysrvs->servers->index(j); + if (strcmp(mysrvc->address,address)==0 && mysrvc->port==port) { + // we found the server + if (enable==false) { + if (mysrvc->status == MYSQL_SERVER_STATUS_ONLINE) { + if (verbose) { + proxy_warning("Shunning server %s:%d from HG %u with replication lag of %f microseconds\n", address, port, myhgc->hid, current_replication_lag_us); + } + mysrvc->status = MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG; + } + } else { + if (mysrvc->status == MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) { + if (verbose) { + proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %f microseconds\n", address, port, myhgc->hid, current_replication_lag_us); + } + mysrvc->status = MYSQL_SERVER_STATUS_ONLINE; + } + } + mysrvc->aws_aurora_current_lag_us = current_replication_lag_us; + if (mysrvc->status == MYSQL_SERVER_STATUS_ONLINE || mysrvc->status == MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) { + // we perform check only if ONLINE or lagging + if (ret) { + if (_whid==(int)myhgc->hid && is_writer==false) { + // the server should be a reader + // but it is in the writer hostgroup + ret = false; + reader_found_in_whg == true; + } + } else { + if (is_writer==true) { + if (_whid==(int)myhgc->hid) { + // the server should be a writer + // and we found it in the writer hostgroup + ret = true; + } + } else { + if (_rhid==(int)myhgc->hid) { + // the server should be a reader + // and we found it in the reader hostgroup + ret = true; + } + } + } + } + //goto __exit_aws_aurora_replication_lag_action; + } + } + } +//__exit_aws_aurora_replication_lag_action: + wrunlock(); + GloAdmin->mysql_servers_wrunlock(); + if (ret == true) { + if (reader_found_in_whg == true) { + ret = false; + } + } + return ret; +} + +// FIXME: complete this!! +void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid, char *_hostname, unsigned int _port, bool verbose) { + int cols=0; + int affected_rows=0; + SQLite3_result *resultset=NULL; + char *query=NULL; + char *q=NULL; + char *error=NULL; + //q=(char *)"SELECT hostgroup_id FROM mysql_servers JOIN mysql_galera_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup OR hostgroup_id=backup_writer_hostgroup OR hostgroup_id=offline_hostgroup WHERE hostname='%s' AND port=%d AND status<>3"; + q=(char *)"SELECT hostgroup_id FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE hostname='%s' AND port=%d AND status<>3"; + query=(char *)malloc(strlen(q)+strlen(_hostname)+1024*1024); + sprintf(query,q,_hostname,_port); + mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset); + if (error) { + free(error); + error=NULL; + } + //free(query); + + int writer_is_also_reader=0; + int new_reader_weight = 0; + bool found_writer=false; + bool found_reader=false; + int _writer_hostgroup = _whid; + int read_HG=-1; + //bool need_converge=false; + //bool need_converge=true; + if (resultset) { + // let's get info about this cluster + pthread_mutex_lock(&AWS_Aurora_Info_mutex); + std::map::iterator it2; + it2 = AWS_Aurora_Info_Map.find(_writer_hostgroup); + AWS_Aurora_Info *info=NULL; + if (it2!=AWS_Aurora_Info_Map.end()) { + info=it2->second; + writer_is_also_reader=info->writer_is_also_reader; + new_reader_weight = info->new_reader_weight; + read_HG = info->reader_hostgroup; + //need_converge=info->need_converge; + //info->need_converge=false; + //max_writers = info->max_writers; + } + pthread_mutex_unlock(&AWS_Aurora_Info_mutex); + + if (resultset->rows_count) { + for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { + SQLite3_row *r=*it; + int hostgroup=atoi(r->fields[0]); + if (hostgroup==_writer_hostgroup) { + found_writer=true; + } + if (read_HG>=0) { + if (hostgroup==read_HG) { + found_reader=true; + } + } + } + } +/* + if (need_converge == false) { + SQLite3_result *resultset2=NULL; + q = (char *)"SELECT COUNT(*) FROM mysql_servers WHERE hostgroup_id=%d AND status=0"; + query=(char *)malloc(strlen(q)+32); + sprintf(query,q,_writer_hostgroup); + mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset2); + if (resultset2) { + if (resultset2->rows_count) { + for (std::vector::iterator it = resultset2->rows.begin() ; it != resultset2->rows.end(); ++it) { + SQLite3_row *r=*it; + int nwriters = atoi(r->fields[0]); + if (nwriters > max_writers) { + proxy_warning("Galera: too many writers in HG %d. Max=%d, current=%d\n", _writer_hostgroup, max_writers, nwriters); + need_converge = true; + } + } + } + delete resultset2; + } + free(query); + } +*/ +// if (need_converge==false) { + if (found_writer) { // maybe no-op + if ( + (writer_is_also_reader==0 && found_reader==false) + || + (writer_is_also_reader > 0 && found_reader==true) + ) { // either both true or both false + delete resultset; + resultset=NULL; + } + } +// } + } + + if (resultset) { + // If we reach there, there is some action to perform. + // This should be the case most of the time, + // because the calling function knows if an action is required. + if (resultset->rows_count) { + //need_converge=false; + + GloAdmin->mysql_servers_wrlock(); + mydb->execute("DELETE FROM mysql_servers_incoming"); + q=(char *)"INSERT INTO mysql_servers_incoming SELECT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, comment FROM mysql_servers WHERE hostgroup_id<>%d"; + sprintf(query,q,_writer_hostgroup); + mydb->execute(query); + q=(char *)"INSERT INTO mysql_servers_incoming SELECT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, comment FROM mysql_servers WHERE hostgroup_id=%d AND hostname='%s' AND port=%d"; + sprintf(query,q,_writer_hostgroup,_hostname,_port); + mydb->execute(query); + q=(char *)"UPDATE OR IGNORE mysql_servers_incoming SET hostgroup_id=%d WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; + //query=(char *)malloc(strlen(q)+strlen(_hostname)+1024); // increased this buffer as it is used for other queries too + sprintf(query,q,_writer_hostgroup,_hostname,_port,_writer_hostgroup); + mydb->execute(query); + //free(query); + q=(char *)"DELETE FROM mysql_servers_incoming WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; + //query=(char *)malloc(strlen(q)+strlen(_hostname)+64); + sprintf(query,q,_hostname,_port,_writer_hostgroup); + mydb->execute(query); + //free(query); + q=(char *)"UPDATE mysql_servers_incoming SET status=0 WHERE hostname='%s' AND port=%d AND hostgroup_id=%d"; + //query=(char *)malloc(strlen(q)+strlen(_hostname)+64); + sprintf(query,q,_hostname,_port,_writer_hostgroup); + mydb->execute(query); + + // we need to move the old writer into the reader HG + q=(char *)"DELETE FROM mysql_servers_incoming WHERE status=3 AND hostgroup_id=%d"; + sprintf(query,q,_rhid); + mydb->execute(query); + q=(char *)"INSERT OR IGNORE INTO mysql_servers_incoming SELECT %d, hostname, port, gtid_port, %d, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, comment FROM mysql_servers WHERE hostgroup_id=%d AND status=0"; + sprintf(query,q,_rhid, new_reader_weight, _whid); + mydb->execute(query); + + //free(query); + if (writer_is_also_reader && read_HG>=0) { + q=(char *)"INSERT OR IGNORE INTO mysql_servers_incoming (hostgroup_id,hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment) SELECT %d,hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment FROM mysql_servers_incoming WHERE hostgroup_id=%d AND hostname='%s' AND port=%d"; + sprintf(query,q,read_HG,_writer_hostgroup,_hostname,_port); + mydb->execute(query); + q = (char *)"UPDATE mysql_servers_incoming SET weight=%d WHERE hostgroup_id=%d AND hostname='%s' AND port=%d"; + sprintf(query, q, new_reader_weight, read_HG, _hostname, _port); + } + //converge_galera_config(_writer_hostgroup); + uint64_t checksum_current = 0; + uint64_t checksum_incoming = 0; + { + int cols=0; + int affected_rows=0; + SQLite3_result *resultset_servers=NULL; + char *query=NULL; + char *q1 = NULL; + char *q2 = NULL; + char *error=NULL; + q1 = (char *)"SELECT DISTINCT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, mysql_servers.comment FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE writer_hostgroup=%d ORDER BY hostgroup_id, hostname, port"; + q2 = (char *)"SELECT DISTINCT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, mysql_servers_incoming.comment FROM mysql_servers_incoming JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE writer_hostgroup=%d ORDER BY hostgroup_id, hostname, port"; + query = (char *)malloc(strlen(q2)+128); + sprintf(query,q1,_writer_hostgroup); + mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset_servers); + if (error == NULL) { + if (resultset_servers) { + checksum_current = resultset_servers->raw_checksum(); + } + } + if (resultset_servers) { + delete resultset_servers; + resultset_servers = NULL; + } + sprintf(query,q2,_writer_hostgroup); + mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset_servers); + if (error == NULL) { + if (resultset_servers) { + checksum_incoming = resultset_servers->raw_checksum(); + } + } + if (resultset_servers) { + delete resultset_servers; + resultset_servers = NULL; + } + free(query); + } + if (checksum_incoming!=checksum_current) { + proxy_warning("AWS Aurora: setting host %s:%d as writer\n", _hostname, _port); + commit(); + wrlock(); +/* + SQLite3_result *resultset2=NULL; + q=(char *)"SELECT writer_hostgroup, reader_hostgroup FROM mysql_aws_aurora_hostgroups WHERE writer_hostgroup=%d"; + sprintf(query,q,_writer_hostgroup); + mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset2); + if (resultset2) { + if (resultset2->rows_count) { + for (std::vector::iterator it = resultset2->rows.begin() ; it != resultset2->rows.end(); ++it) { + SQLite3_row *r=*it; + int writer_hostgroup=atoi(r->fields[0]); + int reader_hostgroup=atoi(r->fields[1]); +*/ + q=(char *)"DELETE FROM mysql_servers WHERE hostgroup_id IN (%d , %d)"; + sprintf(query,q,_whid,_rhid); + mydb->execute(query); + generate_mysql_servers_table(&_whid); + generate_mysql_servers_table(&_rhid); +/* + } + } + delete resultset2; + resultset2=NULL; + } +*/ + wrunlock(); + } else { + if (GloMTH->variables.hostgroup_manager_verbose > 1) { + proxy_warning("AWS Aurora: skipping setting node %s:%d from hostgroup %d as writer because won't change the list of ONLINE nodes in writer hostgroup\n", _hostname, _port, _writer_hostgroup); + } + } + GloAdmin->mysql_servers_wrunlock(); + free(query); + query = NULL; + } + } + if (resultset) { + delete resultset; + resultset=NULL; + } + if (query) { + free(query); + } +} + +void MySQL_HostGroups_Manager::update_aws_aurora_set_reader(int _whid, int _rhid, char *_hostname, int _port) { + int cols=0; + int affected_rows=0; + SQLite3_result *resultset=NULL; + char *query=NULL; + char *q=NULL; + char *error=NULL; + int _writer_hostgroup = _whid; + q=(char *)"SELECT hostgroup_id FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE hostname='%s' AND port=%d AND status<>3"; + query=(char *)malloc(strlen(q)+strlen(_hostname)+32); + sprintf(query,q,_hostname,_port); + mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset); + if (error) { + free(error); + error=NULL; + } + free(query); + if (resultset) { // we lock only if needed + if (resultset->rows_count) { + proxy_warning("AWS Aurora: setting host %s:%d (part of cluster with writer_hostgroup=%d) in a reader, moving from writer_hostgroup %d to reader_hostgroup %d\n", _hostname, _port, _whid, _whid, _rhid); + GloAdmin->mysql_servers_wrlock(); + mydb->execute("DELETE FROM mysql_servers_incoming"); + mydb->execute("INSERT INTO mysql_servers_incoming SELECT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, comment FROM mysql_servers"); + q=(char *)"UPDATE OR IGNORE mysql_servers_incoming SET hostgroup_id=%d WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; + query=(char *)malloc(strlen(q)+strlen(_hostname)+512); + sprintf(query,q,_rhid,_hostname,_port,_rhid); + mydb->execute(query); + //free(query); + q=(char *)"DELETE FROM mysql_servers_incoming WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; + //query=(char *)malloc(strlen(q)+strlen(_hostname)+64); + sprintf(query,q,_hostname,_port,_rhid); + mydb->execute(query); + //free(query); + q=(char *)"UPDATE mysql_servers_incoming SET status=0 WHERE hostname='%s' AND port=%d AND hostgroup_id=%d"; + sprintf(query,q,_hostname,_port,_rhid); + mydb->execute(query); + //free(query); + //converge_galera_config(_writer_hostgroup); + commit(); + wrlock(); +/* + SQLite3_result *resultset2=NULL; + q=(char *)"SELECT writer_hostgroup, reader_hostgroup FROM mysql_galera_hostgroups WHERE writer_hostgroup=%d"; + //query=(char *)malloc(strlen(q)+strlen(_hostname)+64); + sprintf(query,q,_writer_hostgroup); + mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset2); + if (resultset2) { + if (resultset2->rows_count) { + for (std::vector::iterator it = resultset2->rows.begin() ; it != resultset2->rows.end(); ++it) { + SQLite3_row *r=*it; + int writer_hostgroup=atoi(r->fields[0]); + int backup_writer_hostgroup=atoi(r->fields[1]); + int reader_hostgroup=atoi(r->fields[2]); + int offline_hostgroup=atoi(r->fields[3]); +*/ + q=(char *)"DELETE FROM mysql_servers WHERE hostgroup_id IN (%d , %d)"; + sprintf(query,q,_whid,_rhid); + mydb->execute(query); + generate_mysql_servers_table(&_whid); + generate_mysql_servers_table(&_rhid); +/* + generate_mysql_servers_table(&writer_hostgroup); + generate_mysql_servers_table(&backup_writer_hostgroup); + generate_mysql_servers_table(&reader_hostgroup); + generate_mysql_servers_table(&offline_hostgroup); + } + } + delete resultset2; + resultset2=NULL; + } +*/ + wrunlock(); + GloAdmin->mysql_servers_wrunlock(); + free(query); + } + } + if (resultset) { + delete resultset; + resultset=NULL; + } +} + + diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 23176ab1d..0259cfd36 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -294,6 +294,19 @@ void * monitor_galera_pthread(void *arg) { return NULL; } +void * monitor_aws_aurora_pthread(void *arg) { +//#ifndef NOJEM +// bool cache=false; +// mallctl("thread.tcache.enabled", NULL, NULL, &cache, sizeof(bool)); +//#endif + while (GloMTH==NULL) { + usleep(50000); + } + usleep(100000); + GloMyMon->monitor_aws_aurora(); + return NULL; +} + void * monitor_replication_lag_pthread(void *arg) { #ifndef NOJEM bool cache=false; @@ -321,7 +334,7 @@ MySQL_Monitor::MySQL_Monitor() { pthread_mutex_init(&aws_aurora_mutex,NULL); AWS_Aurora_Hosts_resultset=NULL; - + AWS_Aurora_Hosts_resultset_checksum = 0; shutdown=false; monitor_enabled=true; // default // create new SQLite datatabase @@ -342,6 +355,8 @@ MySQL_Monitor::MySQL_Monitor() { insert_into_tables_defs(tables_defs_monitor,"mysql_server_group_replication_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_GROUP_REPLICATION_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_galera_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_GALERA_LOG); insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_log", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG); + insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_check_status", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_CHECK_STATUS); + insert_into_tables_defs(tables_defs_monitor,"mysql_server_aws_aurora_failovers", MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_FAILOVERS); // create monitoring tables check_and_build_standard_tables(monitordb, tables_defs_monitor); monitordb->execute("CREATE INDEX IF NOT EXISTS idx_connect_log_time_start ON mysql_server_connect_log (time_start_us)"); @@ -397,6 +412,13 @@ MySQL_Monitor::~MySQL_Monitor() { delete AWS_Aurora_Hosts_resultset; AWS_Aurora_Hosts_resultset=NULL; } + std::map::iterator it2; + AWS_Aurora_monitor_node *node=NULL; + for (it2 = AWS_Aurora_Hosts_Map.begin(); it2 != AWS_Aurora_Hosts_Map.end(); ++it2) { + node = it2->second; + delete node; + } + AWS_Aurora_Hosts_Map.clear(); }; @@ -546,6 +568,9 @@ __exit_monitor_ping_thread: sqlite3_stmt *statement=NULL; sqlite3 *mondb=mmsd->mondb->get_db(); int rc; +#ifdef TEST_AURORA +// if ((rand() % 10) ==0) { +#endif // TEST_AURORA char *query=NULL; query=(char *)"INSERT OR REPLACE INTO mysql_server_ping_log VALUES (?1 , ?2 , ?3 , ?4 , ?5)"; rc=sqlite3_prepare_v2(mondb, query, -1, &statement, 0); @@ -564,6 +589,9 @@ __exit_monitor_ping_thread: if (mmsd->mysql_error_msg == NULL) { ping_success = true; } +#ifdef TEST_AURORA +// } +#endif // TEST_AURORA } __fast_exit_monitor_ping_thread: if (mmsd->mysql) { @@ -599,6 +627,9 @@ bool MySQL_Monitor_State_Data::set_wait_timeout() { if (mysql_thread___monitor_wait_timeout==false) { return true; } +#ifdef TEST_AURORA + return true; +#endif // TEST_AURORA bool ret=false; char *query=NULL; char *qt=(char *)"SET wait_timeout=%d"; @@ -1172,7 +1203,7 @@ void * monitor_galera_thread(void *arg) { sprintf(new_error,"timeout or error in creating new connection: %s",mmsd->mysql_error_msg); free(mmsd->mysql_error_msg); mmsd->mysql_error_msg = new_error; - proxy_error("Error on Galera check for %s:%d after %lldms. Unable to create a connection. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Error: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, new_error); + proxy_error("Error on Galera check for %s:%d after %lldms. Unable to create a connection. If the server is overload, increase mysql-monitor_connect_timeout. Error: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, new_error); goto __exit_monitor_galera_thread; } } @@ -2478,6 +2509,11 @@ __monitor_run: proxy_error("Thread creation\n"); assert(0); } + pthread_t monitor_aws_aurora_thread; + if (pthread_create(&monitor_aws_aurora_thread, &attr, &monitor_aws_aurora_pthread,NULL) != 0) { + proxy_error("Thread creation\n"); + assert(0); + } pthread_t monitor_replication_lag_thread; if (pthread_create(&monitor_replication_lag_thread, &attr, &monitor_replication_lag_pthread,NULL) != 0) { proxy_error("Thread creation\n"); @@ -2570,6 +2606,7 @@ __monitor_run: pthread_join(monitor_read_only_thread,NULL); pthread_join(monitor_group_replication_thread,NULL); pthread_join(monitor_galera_thread,NULL); + pthread_join(monitor_aws_aurora_thread,NULL); pthread_join(monitor_replication_lag_thread,NULL); while (shutdown==false) { unsigned int glover; @@ -2657,17 +2694,26 @@ bool MyGR_monitor_node::add_entry(unsigned long long _st, unsigned long long _ct } -AWS_Aurora_replica_host_status_entry::AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, uint32_t lut, float rlm, float _c) { +AWS_Aurora_replica_host_status_entry::AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, char *lut, float rlm, float _c) { server_id = strdup(serid); session_id = strdup(sessid); - last_update_timestamp = lut; + last_update_timestamp = strdup(lut); replica_lag_ms = rlm; cpu = _c; } +AWS_Aurora_replica_host_status_entry::AWS_Aurora_replica_host_status_entry(char *serid, char *sessid, char *lut, char *rlm, char *_c) { + server_id = strdup(serid); + session_id = strdup(sessid); + last_update_timestamp = strdup(lut); + replica_lag_ms = strtof(rlm, NULL); + cpu = strtof(_c, NULL); +} + AWS_Aurora_replica_host_status_entry::~AWS_Aurora_replica_host_status_entry() { free(server_id); free(session_id); + free(last_update_timestamp); } AWS_Aurora_status_entry::AWS_Aurora_status_entry(unsigned long long st, unsigned long long ct, char *e) { @@ -2799,6 +2845,7 @@ void MySQL_Monitor::populate_monitor_mysql_server_group_replication_log() { } } } + sqlite3_finalize(statement1); pthread_mutex_unlock(&GloMyMon->group_replication_mutex); } @@ -2842,6 +2889,7 @@ void MySQL_Monitor::populate_monitor_mysql_server_galera_log() { } } } + sqlite3_finalize(statement1); pthread_mutex_unlock(&GloMyMon->galera_mutex); } @@ -2941,11 +2989,16 @@ void MySQL_Monitor::populate_monitor_mysql_server_aws_aurora_log() { int rc; //char *query=NULL; char *query1=NULL; - query1=(char *)"INSERT INTO populate_monitor_mysql_server_aws_aurora_log VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)"; + query1=(char *)"INSERT OR IGNORE INTO mysql_server_aws_aurora_log VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)"; sqlite3_stmt *statement1=NULL; - pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); + char *query2=NULL; + query2=(char *)"INSERT OR IGNORE INTO mysql_server_aws_aurora_log (hostname, port, time_start_us, success_time_us, error) VALUES (?1, ?2, ?3, ?4, ?5)"; + sqlite3_stmt *statement2=NULL; rc=sqlite3_prepare_v2(mondb, query1, -1, &statement1, 0); assert(rc==SQLITE_OK); + rc=sqlite3_prepare_v2(mondb, query2, -1, &statement2, 0); + assert(rc==SQLITE_OK); + pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); monitordb->execute((char *)"DELETE FROM mysql_server_aws_aurora_log"); std::map::iterator it2; AWS_Aurora_monitor_node *node=NULL; @@ -2957,32 +3010,152 @@ void MySQL_Monitor::populate_monitor_mysql_server_aws_aurora_log() { std::string port=s.substr(found+1); int i; for (i=0; ilast_entries[i]; - if (aase->start_time) { - for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { - AWS_Aurora_replica_host_status_entry *hse = *it3; - if (hse) { - rc=sqlite3_bind_text(statement1, 1, host.c_str(), -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement1, 2, atoi(port.c_str())); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement1, 3, aase->start_time ); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement1, 4, aase->check_time ); assert(rc==SQLITE_OK); - rc=sqlite3_bind_text(statement1, 5, aase->error , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); - rc=sqlite3_bind_text(statement1, 6, hse->server_id , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); - rc=sqlite3_bind_text(statement1, 7, hse->session_id , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement1, 8, hse->last_update_timestamp ); assert(rc==SQLITE_OK); - rc=sqlite3_bind_double(statement1, 9, hse->replica_lag_ms ); assert(rc==SQLITE_OK); - rc=sqlite3_bind_double(statement1, 10, hse->cpu ); assert(rc==SQLITE_OK); - SAFE_SQLITE3_STEP2(statement1); - rc=sqlite3_clear_bindings(statement1); assert(rc==SQLITE_OK); - rc=sqlite3_reset(statement1); assert(rc==SQLITE_OK); + AWS_Aurora_status_entry * aase = node->last_entries[i]; + if (aase && aase->start_time) { + if ( aase->host_statuses->size() ) { + for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { + AWS_Aurora_replica_host_status_entry *hse = *it3; + if (hse) { + rc=sqlite3_bind_text(statement1, 1, host.c_str(), -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 2, atoi(port.c_str())); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 3, aase->start_time ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 4, aase->check_time ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 5, aase->error , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 6, hse->server_id , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 7, hse->session_id , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 8, hse->last_update_timestamp , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_double(statement1, 9, hse->replica_lag_ms ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_double(statement1, 10, hse->cpu ); assert(rc==SQLITE_OK); + SAFE_SQLITE3_STEP2(statement1); + rc=sqlite3_clear_bindings(statement1); assert(rc==SQLITE_OK); + rc=sqlite3_reset(statement1); assert(rc==SQLITE_OK); + } } + } else { + rc=sqlite3_bind_text(statement2, 1, host.c_str(), -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement2, 2, atoi(port.c_str())); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement2, 3, aase->start_time ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement2, 4, aase->check_time ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement2, 5, aase->error , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + SAFE_SQLITE3_STEP2(statement2); + rc=sqlite3_clear_bindings(statement2); assert(rc==SQLITE_OK); + rc=sqlite3_reset(statement2); assert(rc==SQLITE_OK); } } } } - pthread_mutex_unlock(&GloMyMon->galera_mutex); + sqlite3_finalize(statement1); + sqlite3_finalize(statement2); + pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); } +void MySQL_Monitor::populate_monitor_mysql_server_aws_aurora_check_status() { + sqlite3 *mondb=monitordb->get_db(); + int rc; + //char *query=NULL; + char *query1=NULL; + query1=(char *)"INSERT OR IGNORE INTO mysql_server_aws_aurora_check_status VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"; + sqlite3_stmt *statement1=NULL; + rc=sqlite3_prepare_v2(mondb, query1, -1, &statement1, 0); + assert(rc==SQLITE_OK); + pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); + monitordb->execute((char *)"DELETE FROM mysql_server_aws_aurora_check_status"); + std::map::iterator it2; + AWS_Aurora_monitor_node *node=NULL; + for (it2=GloMyMon->AWS_Aurora_Hosts_Map.begin(); it2!=GloMyMon->AWS_Aurora_Hosts_Map.end(); ++it2) { + std::string s=it2->first; + node=it2->second; + std::size_t found=s.find_last_of(":"); + std::string host=s.substr(0,found); + std::string port=s.substr(found+1); + AWS_Aurora_status_entry * aase = node->last_entry(); + char *error_msg = NULL; + if (aase && aase->start_time) { + if (aase->error) { + error_msg = aase->error; + } + } + char lut[30]; + struct tm __tm_info; + localtime_r(&node->last_checked_at, &__tm_info); + strftime(lut, 25, "%Y-%m-%d %H:%M:%S", &__tm_info); + +/* + int i; + for (i=0; ilast_entries[i]; + if (aase && aase->start_time) { + if ( aase->host_statuses->size() ) { + for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { + AWS_Aurora_replica_host_status_entry *hse = *it3; + if (hse) { +*/ + rc=sqlite3_bind_int64(statement1, 1, node->writer_hostgroup); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 2, host.c_str(), -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 3, atoi(port.c_str())); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 4, lut, -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 5, node->num_checks_tot ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement1, 6, node->num_checks_ok ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement1, 7, error_msg , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + SAFE_SQLITE3_STEP2(statement1); + rc=sqlite3_clear_bindings(statement1); assert(rc==SQLITE_OK); + rc=sqlite3_reset(statement1); assert(rc==SQLITE_OK); +/* + } + } + } else { + rc=sqlite3_bind_text(statement2, 1, host.c_str(), -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement2, 2, atoi(port.c_str())); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement2, 3, aase->start_time ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement2, 4, aase->check_time ); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement2, 5, aase->error , -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + SAFE_SQLITE3_STEP2(statement2); + rc=sqlite3_clear_bindings(statement2); assert(rc==SQLITE_OK); + rc=sqlite3_reset(statement2); assert(rc==SQLITE_OK); + } + } + } +*/ + } + sqlite3_finalize(statement1); + pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); +} + +/* +void MySQL_Monitor::gdb_dump___monitor_mysql_server_aws_aurora_log(char *hostname) { + fprintf(stderr,"gdb_dump___monitor_mysql_server_aws_aurora_log\n"); + std::map::iterator it2; + AWS_Aurora_monitor_node *node=NULL; + for (it2=GloMyMon->AWS_Aurora_Hosts_Map.begin(); it2!=GloMyMon->AWS_Aurora_Hosts_Map.end(); ++it2) { + std::string s=it2->first; + node=it2->second; + std::size_t found=s.find_last_of(":"); + std::string host=s.substr(0,found); + std::string port=s.substr(found+1); + int i; + for (i=0; ilast_entries[i]; + if (aase && aase->start_time) { + if ( aase->host_statuses->size() ) { + for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { + AWS_Aurora_replica_host_status_entry *hse = *it3; + if (hse) { + if (hostname == NULL || (hostname && ( (strcmp(hostname,host.c_str())==0) || (strcmp(hostname,hse->server_id)==0)) )) { + fprintf(stderr,"%s:%d %llu %llu %s %s %s %s %f %f\n", host.c_str(), atoi(port.c_str()), aase->start_time, aase->check_time, aase->error, hse->server_id,hse->session_id, hse->last_update_timestamp, hse->replica_lag_ms , hse->cpu); + } + } + } + } else { + if (hostname == NULL || (hostname && strcmp(hostname,host.c_str())==0) ) { + fprintf(stderr,"%s:%d %llu %llu %s\n", host.c_str(), atoi(port.c_str()), aase->start_time, aase->check_time, aase->error); + } + } + } + } + } +} +*/ + AWS_Aurora_monitor_node::AWS_Aurora_monitor_node(char *_a, int _p, int _whg) { addr=NULL; if (_a) { @@ -2993,9 +3166,13 @@ AWS_Aurora_monitor_node::AWS_Aurora_monitor_node(char *_a, int _p, int _whg) { writer_hostgroup=_whg; int i; for (i=0;istart_time=0; + //last_entries[i]->check_time=0; } + num_checks_tot = 0; + num_checks_ok = 0; + last_checked_at = 0; } AWS_Aurora_monitor_node::~AWS_Aurora_monitor_node() { @@ -3004,6 +3181,447 @@ AWS_Aurora_monitor_node::~AWS_Aurora_monitor_node() { } } +bool AWS_Aurora_monitor_node::add_entry(AWS_Aurora_status_entry *ase) { + bool ret=false; + if (idx_last_entry==-1) ret=true; + int prev_last_entry=idx_last_entry; + idx_last_entry++; + if (idx_last_entry>=AWS_Aurora_Nentries) { + idx_last_entry=0; + } + if (last_entries[idx_last_entry]) { + AWS_Aurora_status_entry *old = last_entries[idx_last_entry]; + delete old; + } + last_entries[idx_last_entry] = ase; + num_checks_tot++; + if (ase->error == NULL) { + num_checks_ok++; + } + last_checked_at = time(NULL); + return ret; // for now ignored +} + +void * monitor_AWS_Aurora_thread(void *arg); + + +typedef struct _host_def_t { + char *host; + int port; + int use_ssl; +} host_def_t; + +static void shuffle_hosts(host_def_t *array, size_t n) { + char tmp[sizeof(host_def_t)]; + char *arr = (char *)array; + size_t stride = sizeof(host_def_t) * sizeof(char); + + if (n > 1) { + size_t i; + for (i = 0; i < n - 1 ; ++i) { + size_t rnd = (size_t) fastrand(); + size_t j = i + rnd / (0x7FFF / (n - i) + 1); + memcpy(tmp, arr + j * stride, sizeof(host_def_t)); + memcpy(arr + j * stride, arr + i * stride, sizeof(host_def_t)); + memcpy(arr + i * stride, tmp, sizeof(host_def_t)); + } + } +} + +void * monitor_AWS_Aurora_thread_HG(void *arg) { + unsigned int wHG = *(unsigned int *)arg; + unsigned int rHG = 0; + unsigned int num_hosts = 0; + unsigned int cur_host_idx = 0; + unsigned int max_lag_ms = 0; + unsigned int check_interval_ms = 0; + unsigned int check_timeout_ms = 0; + //unsigned int i = 0; + proxy_info("Started Monitor thread for AWS Aurora writer HG %u\n", wHG); + + unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; + MySQL_Thread * mysql_thr = new MySQL_Thread(); + mysql_thr->curtime=monotonic_time(); + MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); + mysql_thr->refresh_variables(); + if (!GloMTH) return NULL; // quick exit during shutdown/restart + + uint64_t initial_raw_checksum = 0; + + // this is a static array of the latest reads + unsigned int ase_idx = 0; + AWS_Aurora_status_entry *lasts_ase[N_L_ASE]; + for (unsigned int i=0; iaws_aurora_mutex); + initial_raw_checksum = GloMyMon->AWS_Aurora_Hosts_resultset_checksum; + // count the number of hosts + for (std::vector::iterator it = GloMyMon->AWS_Aurora_Hosts_resultset->rows.begin() ; it != GloMyMon->AWS_Aurora_Hosts_resultset->rows.end(); ++it) { + SQLite3_row *r=*it; + if (atoi(r->fields[0]) == wHG) { + num_hosts++; + if (max_lag_ms == 0) { + max_lag_ms = atoi(r->fields[5]); + } + if (check_interval_ms == 0) { + check_interval_ms = atoi(r->fields[5]); + } + if (check_timeout_ms == 0) { + check_timeout_ms = atoi(r->fields[7]); + } + if (rHG == 0) { + rHG = atoi(r->fields[1]); + } + } + } + host_def_t *hpa = (host_def_t *)malloc(sizeof(host_def_t)*num_hosts); + for (std::vector::iterator it = GloMyMon->AWS_Aurora_Hosts_resultset->rows.begin() ; it != GloMyMon->AWS_Aurora_Hosts_resultset->rows.end(); ++it) { + SQLite3_row *r=*it; + if (atoi(r->fields[0]) == wHG) { + hpa[cur_host_idx].host = strdup(r->fields[2]); + hpa[cur_host_idx].port = atoi(r->fields[3]); + hpa[cur_host_idx].use_ssl = atoi(r->fields[4]); + cur_host_idx++; + } + } + pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); + + bool exit_now = false; + unsigned long long t1 = 0; + //unsigned long long t2 = 0; + unsigned long long next_loop_at = 0; + + bool crc = false; + + uint64_t current_raw_checksum = 0; + size_t rnd; + bool found_pingable_host = false; + bool rc_ping = false; + MySQL_Monitor_State_Data *mmsd = NULL; + + t1 = monotonic_time(); + unsigned long long start_time=t1; + + while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true && exit_now==false) { + + unsigned int glover; + t1=monotonic_time(); + + //proxy_info("Looping Monitor thread for AWS Aurora writer HG %u\n", wHG); + + if (!GloMTH) { + //proxy_info("Stopping Monitor thread for AWS Aurora writer HG %u\n", wHG); + goto __exit_monitor_AWS_Aurora_thread_HG_now; + return NULL; // quick exit during shutdown/restart + } + + // if variables has changed, triggers new checks + glover=GloMTH->get_global_version(); + if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { + MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; + mysql_thr->refresh_variables(); + next_loop_at=0; + } + + pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); + current_raw_checksum = GloMyMon->AWS_Aurora_Hosts_resultset_checksum; + pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); + + if (current_raw_checksum != initial_raw_checksum) { + // the content of AWS_Aurora_Hosts_resultset has changed. Exit + exit_now=true; + break; + } + //fprintf(stderr,"%u : %llu %llu\n", wHG, t1, next_loop_at); + if (t1 < next_loop_at) { + unsigned long long st=0; + st=next_loop_at-t1; + if (st > 50000) { + st = 50000; + } + usleep(st); + //proxy_info("Looping Monitor thread for AWS Aurora writer HG %u\n", wHG); + continue; + } + //proxy_info("Running check AWS Aurora writer HG %u\n", wHG); + found_pingable_host = false; + + rc_ping = false; + // pick a random host + rnd = (size_t) rand(); + rnd %= num_hosts; + rc_ping = GloMyMon->server_responds_to_ping(hpa[rnd].host, hpa[rnd].port); + //proxy_info("Looping Monitor thread for AWS Aurora writer HG %u\n", wHG); +#ifdef TEST_AURORA + if (rand() % 100 < 30) { + // we randomly fail 30% of the requests + rc_ping = false; + } +#endif // TEST_AURORA + if (rc_ping) { + found_pingable_host = true; + cur_host_idx = rnd; + } else { + // the randomly picked host didn't work work + shuffle_hosts(hpa,num_hosts); + for (unsigned int i=0; (found_pingable_host == false && iserver_responds_to_ping(hpa[i].host, hpa[i].port); + if (rc_ping) { + found_pingable_host = true; + cur_host_idx = i; + } + } + } + +#ifdef TEST_AURORA + if (rand() % 200 == 0) { + // we randomly fail 0.5% of the requests + found_pingable_host = false; + } +#endif // TEST_AURORA + + if (found_pingable_host == false) { + proxy_error("No node is pingable for AWS Aurora cluster with writer HG %u\n", wHG); + next_loop_at = t1 + check_interval_ms * 1000; + continue; + } + if (rand() % 1000 == 0) { // suppress 99.9% of the output, too verbose + proxy_info("Running check for AWS Aurora writer HG %u on %s:%d\n", wHG , hpa[cur_host_idx].host, hpa[cur_host_idx].port); + } + mmsd = NULL; + mmsd = new MySQL_Monitor_State_Data(hpa[cur_host_idx].host, hpa[cur_host_idx].port, NULL, hpa[cur_host_idx].use_ssl); + mmsd->writer_hostgroup = wHG; + mmsd->aws_aurora_check_timeout_ms = check_timeout_ms; + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + //unsigned long long start_time=mysql_thr->curtime; + start_time=t1; + + + mmsd->t1=start_time; + + crc=false; + if (mmsd->mysql==NULL) { // we don't have a connection, let's create it + bool rc; + rc=mmsd->create_new_connection(); + crc=true; + if (rc==false) { + unsigned long long now=monotonic_time(); + char * new_error = (char *)malloc(50+strlen(mmsd->mysql_error_msg)); + sprintf(new_error,"timeout or error in creating new connection: %s",mmsd->mysql_error_msg); + free(mmsd->mysql_error_msg); + mmsd->mysql_error_msg = new_error; + proxy_error("Error on AWS Aurora check for %s:%d after %lldms. Unable to create a connection. If the server is overload, increase mysql-monitor_connect_timeout. Error: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, new_error); + goto __exit_monitor_aws_aurora_HG_thread; + } + } + + mmsd->t1=monotonic_time(); + mmsd->interr=0; // reset the value +#ifdef TEST_AURORA + mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, "SELECT SERVER_ID, SESSION_ID, LAST_UPDATE_TIMESTAMP, REPLICA_LAG_IN_MILLISECONDS, CPU FROM REPLICA_HOST_STATUS ORDER BY SERVER_ID"); +#else + mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, "SELECT SERVER_ID, SESSION_ID, LAST_UPDATE_TIMESTAMP, REPLICA_LAG_IN_MILLISECONDS, CPU FROM INFORMATION_SCHEMA.REPLICA_HOST_STATUS ORDER BY SERVER_ID"); +#endif // TEST_AURORA + while (mmsd->async_exit_status) { + mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); + unsigned long long now=monotonic_time(); + if (now > mmsd->t1 + mmsd->aws_aurora_check_timeout_ms * 1000) { + mmsd->mysql_error_msg=strdup("timeout check"); + proxy_error("Timeout on AWS Aurora health check for %s:%d after %lldms. If the server is overload, increase mysql_aws_aurora_hostgroups.check_timeout_ms\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); + goto __exit_monitor_aws_aurora_HG_thread; + } + if (GloMyMon->shutdown==true) { + goto __fast_exit_monitor_aws_aurora_HG_thread; // exit immediately + } + if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { + mmsd->async_exit_status=mysql_query_cont(&mmsd->interr, mmsd->mysql, mmsd->async_exit_status); + } + } + mmsd->async_exit_status=mysql_store_result_start(&mmsd->result,mmsd->mysql); + while (mmsd->async_exit_status) { + mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); + unsigned long long now=monotonic_time(); + if (now > mmsd->t1 + mmsd->aws_aurora_check_timeout_ms * 1000) { + mmsd->mysql_error_msg=strdup("timeout check"); + proxy_error("Timeout on AWS Aurora health check for %s:%d after %lldms. If the server is overload, increase mysql_aws_aurora_hostgroups.check_timeout_ms\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); + goto __exit_monitor_aws_aurora_HG_thread; + } + if (GloMyMon->shutdown==true) { + goto __fast_exit_monitor_aws_aurora_HG_thread; // exit immediately + } + if ((mmsd->async_exit_status & MYSQL_WAIT_TIMEOUT) == 0) { + mmsd->async_exit_status=mysql_store_result_cont(&mmsd->result, mmsd->mysql, mmsd->async_exit_status); + } + } + if (mmsd->interr) { // check failed + mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); + } + +__exit_monitor_aws_aurora_HG_thread: + mmsd->t2=monotonic_time(); + next_loop_at = t1 + (check_interval_ms * 1000); + if (mmsd->t2 > t1) { + next_loop_at -= (mmsd->t2 - t1); + } + { + // TODO : complete this + char buf[128]; + char *s=NULL; + int l=strlen(mmsd->hostname); + if (l<110) { + s=buf; + } else { + s=(char *)malloc(l+16); + } + sprintf(s,"%s:%d",mmsd->hostname,mmsd->port); + AWS_Aurora_status_entry *ase = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); + AWS_Aurora_status_entry *ase_l = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); + if (mmsd->interr == 0 && mmsd->result) { + int num_fields=0; + int num_rows=0; + num_fields = mysql_num_fields(mmsd->result); + if (num_fields!=5) { + proxy_error("Incorrect number of fields, please report a bug\n"); + } else { + MYSQL_ROW row; + while ((row = mysql_fetch_row(mmsd->result))) { + AWS_Aurora_replica_host_status_entry *arhse = new AWS_Aurora_replica_host_status_entry(row[0], row[1], row[2], row[3], row[4]); + ase->add_host_status(arhse); + AWS_Aurora_replica_host_status_entry *arhse_l = new AWS_Aurora_replica_host_status_entry(row[0], row[1], row[2], row[3], row[4]); + ase_l->add_host_status(arhse_l); + } + } + mysql_free_result(mmsd->result); + mmsd->result=NULL; + } +//__end_process_aws_aurora_result: + if (mmsd->mysql_error_msg) { + } + unsigned long long time_now=realtime_time(); + time_now=time_now-(mmsd->t2 - start_time); + pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); + //auto it = + // TODO : complete this + std::map::iterator it2; + it2 = GloMyMon->AWS_Aurora_Hosts_Map.find(s); + AWS_Aurora_monitor_node *node=NULL; + if (it2!=GloMyMon->AWS_Aurora_Hosts_Map.end()) { + node=it2->second; + node->add_entry(ase); + } else { + node = new AWS_Aurora_monitor_node(mmsd->hostname,mmsd->port,mmsd->writer_hostgroup); + node->add_entry(ase); + GloMyMon->AWS_Aurora_Hosts_Map.insert(std::make_pair(s,node)); + } + // clean up + if (l<110) { + } else { + free(s); + } + pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); + if (lasts_ase[ase_idx]) { + AWS_Aurora_status_entry * l_ase = lasts_ase[ase_idx]; + delete l_ase; + } + lasts_ase[ase_idx] = ase_l; + GloMyMon->evaluate_aws_aurora_results(wHG, rHG, &lasts_ase[0], ase_idx, max_lag_ms); + // remember that we call evaluate_aws_aurora_results() + // *before* shifting ase_idx + ase_idx++; + if (ase_idx == N_L_ASE) { + ase_idx = 0; + } + } + if (mmsd->interr || mmsd->async_exit_status) { // check failed + } else { + if (crc==false) { + if (mmsd->mysql) { + GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); + mmsd->mysql=NULL; + } + } + } +__fast_exit_monitor_aws_aurora_HG_thread: + if (mmsd->mysql) { + // if we reached here we didn't put the connection back + if (mmsd->mysql_error_msg || mmsd->async_exit_status) { + mysql_close(mmsd->mysql); // if we reached here we should destroy it + mmsd->mysql=NULL; + } else { + if (crc) { + bool rc=mmsd->set_wait_timeout(); + if (rc) { + GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); + } else { + mysql_close(mmsd->mysql); // set_wait_timeout failed + } + mmsd->mysql=NULL; + } else { // really not sure how we reached here, drop it + mysql_close(mmsd->mysql); + mmsd->mysql=NULL; + } + } + } +__exit_monitor_AWS_Aurora_thread_HG_now: + if (mmsd) { + delete (mmsd); + mmsd = NULL; + for (unsigned int i=0; iwriter_hostgroup=atoi(r->fields[0]); + mmsd->writer_is_also_reader=atoi(r->fields[4]); + mmsd->max_transactions_behind=atoi(r->fields[5]); + mmsd->mondb=monitordb; + WorkItem* item; + item=new WorkItem(mmsd,monitor_AWS_Aurora_thread); + GloMyMon->queue.add(item); + usleep(us); +*/ +// } + +/* + for + for (std::vector::iterator it = Galera_Hosts_resultset->rows.begin() ; it != Galera_Hosts_resultset->rows.end(); ++it) { + + } + SQLite3_row *r=*it; + bool rc_ping = true; + rc_ping = server_responds_to_ping(r->fields[1],atoi(r->fields[2])); + if (rc_ping) { // only if server is responding to pings + MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[1],atoi(r->fields[2]), NULL, atoi(r->fields[3])); + mmsd->writer_hostgroup=atoi(r->fields[0]); + mmsd->writer_is_also_reader=atoi(r->fields[4]); + mmsd->max_transactions_behind=atoi(r->fields[5]); + mmsd->mondb=monitordb; + +*/ + } + + free(hpa); + if (mysql_thr) { + delete mysql_thr; + mysql_thr=NULL; + } + for (unsigned int i=0; i next time at + // when empty, a new map is populated + // when next_loop_at = 0 , the tables is emptied so to be populated again + + unsigned int *hgs_array = NULL; + pthread_t *pthreads_array = NULL; + unsigned int hgs_num = 0; + while (GloMyMon->shutdown==false && mysql_thread___monitor_enabled==true) { unsigned int glover; t1=monotonic_time(); if (!GloMTH) return NULL; // quick exit during shutdown/restart + + // if variables has changed, triggers new checks glover=GloMTH->get_global_version(); if (MySQL_Monitor__thread_MySQL_Thread_Variables_version < glover ) { MySQL_Monitor__thread_MySQL_Thread_Variables_version=glover; @@ -3031,20 +3661,81 @@ void * MySQL_Monitor::monitor_aws_aurora() { next_loop_at=0; } + // if list of servers or HG or options has changed, triggers new checks + pthread_mutex_lock(&aws_aurora_mutex); + uint64_t new_raw_checksum = AWS_Aurora_Hosts_resultset->raw_checksum(); + pthread_mutex_unlock(&aws_aurora_mutex); + if (new_raw_checksum != last_raw_checksum) { + proxy_info("Detected new/changed definition for AWS Aurora monitoring\n"); + next_loop_at = 0; + last_raw_checksum = new_raw_checksum; + if (pthreads_array) { + // wait all threads to terminate + for (unsigned int i=0; i < hgs_num; i++) { + pthread_join(pthreads_array[i], NULL); + proxy_info("Stopped Monitor thread for AWS Aurora writer HG %u\n", hgs_array[i]); + } + free(pthreads_array); + free(hgs_array); + } + hgs_num = 0; + pthread_mutex_lock(&aws_aurora_mutex); + // scan all the writer HGs + unsigned int num_rows = AWS_Aurora_Hosts_resultset->rows_count; + if (num_rows) { + unsigned int *tmp_hgs_array = (unsigned int *)malloc(sizeof(unsigned int)*num_rows); + for (std::vector::iterator it = AWS_Aurora_Hosts_resultset->rows.begin() ; it != AWS_Aurora_Hosts_resultset->rows.end(); ++it) { + SQLite3_row *r=*it; + int wHG = atoi(r->fields[0]); + bool found = false; + // very simple search. Far from optimal, but assuming very few HGs it is fast enough + for (unsigned int i=0; i < hgs_num; i++) { + if (tmp_hgs_array[i] == wHG) { + found = true; + } + } + if (found == false) { + // new wHG found + tmp_hgs_array[hgs_num]=wHG; + hgs_num++; + } + } + proxy_info("Activating Monitoring of %u AWS Aurora clusters\n", hgs_num); + hgs_array = (unsigned int *)malloc(sizeof(unsigned int)*hgs_num); + pthreads_array = (pthread_t *)malloc(sizeof(pthread_t)*hgs_num); + for (unsigned int i=0; i < hgs_num; i++) { + hgs_array[i] = tmp_hgs_array[i]; + proxy_info("Starting Monitor thread for AWS Aurora writer HG %u\n", hgs_array[i]); + if (pthread_create(&pthreads_array[i], NULL, monitor_AWS_Aurora_thread_HG, &hgs_array[i]) != 0) { + proxy_error("Thread creation\n"); + assert(0); + } + } + free(tmp_hgs_array); + } + pthread_mutex_unlock(&aws_aurora_mutex); + } +/* if (t1 < next_loop_at) { goto __sleep_monitor_aws_aurora; } + + if (next_loop_at == 0) { + // free the queue + + } + next_loop_at=t1+1000*mysql_thread___monitor_galera_healthcheck_interval; pthread_mutex_lock(&aws_aurora_mutex); - if (Galera_Hosts_resultset==NULL) { + if (AWS_Aurora_Hosts_resultset==NULL) { goto __end_monitor_aws_aurora_loop; } else { - if (Galera_Hosts_resultset->rows_count==0) { + if (AWS_Aurora_Hosts_resultset->rows_count==0) { goto __end_monitor_aws_aurora_loop; } int us=100; - if (Galera_Hosts_resultset->rows_count) { + if (AWS_Aurora_Hosts_resultset->rows_count) { us=mysql_thread___monitor_read_only_interval/2/Galera_Hosts_resultset->rows_count; } for (std::vector::iterator it = Galera_Hosts_resultset->rows.begin() ; it != Galera_Hosts_resultset->rows.end(); ++it) { @@ -3058,7 +3749,7 @@ void * MySQL_Monitor::monitor_aws_aurora() { mmsd->max_transactions_behind=atoi(r->fields[5]); mmsd->mondb=monitordb; WorkItem* item; - item=new WorkItem(mmsd,monitor_aws_aurora_thread); + item=new WorkItem(mmsd,monitor_AWS_Aurora_thread); GloMyMon->queue.add(item); usleep(us); } @@ -3074,17 +3765,18 @@ __end_monitor_aws_aurora_loop: if (mysql_thread___monitor_enabled==true) { } - __sleep_monitor_aws_aurora: t2=monotonic_time(); if (t2 500000) { - st = 500000; + if (st > 200000) { + st = 200000; } usleep(st); } +*/ + usleep(10000); } if (mysql_thr) { delete mysql_thr; @@ -3098,7 +3790,6 @@ __sleep_monitor_aws_aurora: } void * monitor_AWS_Aurora_thread(void *arg) { -// FIXME: still referring to GALERA and not AURORA MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); @@ -3122,29 +3813,24 @@ void * monitor_AWS_Aurora_thread(void *arg) { sprintf(new_error,"timeout or error in creating new connection: %s",mmsd->mysql_error_msg); free(mmsd->mysql_error_msg); mmsd->mysql_error_msg = new_error; - proxy_error("Error on AWS Aurora check for %s:%d after %lldms. Unable to create a connection. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Error: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, new_error); + proxy_error("Error on AWS Aurora check for %s:%d after %lldms. Unable to create a connection. If the server is overload, increase mysql-monitor_connect_timeout. Error: %s.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000, new_error); goto __exit_monitor_aws_aurora_thread; } } mmsd->t1=monotonic_time(); mmsd->interr=0; // reset the value - { - char *sv = mmsd->mysql->server_version; - if (strncmp(sv,(char *)"5.7",3)==0 || strncmp(sv,(char *)"8",1)==0) { - // the backend is either MySQL 5.7 or MySQL 8 : INFORMATION_SCHEMA.GLOBAL_STATUS is deprecated - mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT (SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_LOCAL_STATE') wsrep_local_state, @@read_only read_only, (SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_LOCAL_RECV_QUEUE') wsrep_local_recv_queue , @@wsrep_desync wsrep_desync, @@wsrep_reject_queries wsrep_reject_queries, @@wsrep_sst_donor_rejects_queries wsrep_sst_donor_rejects_queries, (SELECT VARIABLE_VALUE FROM performance_schema.global_status WHERE VARIABLE_NAME='WSREP_CLUSTER_STATUS') wsrep_cluster_status"); - } else { - // any other version - mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_LOCAL_STATE') wsrep_local_state, @@read_only read_only, (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_LOCAL_RECV_QUEUE') wsrep_local_recv_queue , @@wsrep_desync wsrep_desync, @@wsrep_reject_queries wsrep_reject_queries, @@wsrep_sst_donor_rejects_queries wsrep_sst_donor_rejects_queries, (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_CLUSTER_STATUS') wsrep_cluster_status"); - } - } +#ifdef TEST_AURORA + mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, "SELECT SERVER_ID, SESSION_ID, LAST_UPDATE_TIMESTAMP, REPLICA_LAG_IN_MILLISECONDS, CPU FROM REPLICA_HOST_STATUS ORDER BY SERVER_ID"); +#else + mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, "SELECT SERVER_ID, SESSION_ID, LAST_UPDATE_TIMESTAMP, REPLICA_LAG_IN_MILLISECONDS, CPU FROM INFORMATION_SCHEMA.REPLICA_HOST_STATUS ORDER BY SERVER_ID"); +#endif // TEST_AURORA while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); - if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { + if (now > mmsd->t1 + mmsd->aws_aurora_check_timeout_ms * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); - proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Assuming wsrep_cluster_status is NOT Primary\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); + proxy_error("Timeout on AWS Aurora health check for %s:%d after %lldms. If the server is overload, increase mysql_aws_aurora_hostgroups.check_timeout_ms\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); goto __exit_monitor_aws_aurora_thread; } if (GloMyMon->shutdown==true) { @@ -3158,9 +3844,9 @@ void * monitor_AWS_Aurora_thread(void *arg) { while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); - if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { + if (now > mmsd->t1 + mmsd->aws_aurora_check_timeout_ms * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); - proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Assuming wsrep_local_state is NOT 4 and read_only=YES\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); + proxy_error("Timeout on AWS Aurora health check for %s:%d after %lldms. If the server is overload, increase mysql_aws_aurora_hostgroups.check_timeout_ms\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); goto __exit_monitor_aws_aurora_thread; } if (GloMyMon->shutdown==true) { @@ -3170,7 +3856,7 @@ void * monitor_AWS_Aurora_thread(void *arg) { mmsd->async_exit_status=mysql_store_result_cont(&mmsd->result, mmsd->mysql, mmsd->async_exit_status); } } - if (mmsd->interr) { // ping failed + if (mmsd->interr) { // check failed mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); } @@ -3187,62 +3873,24 @@ __exit_monitor_aws_aurora_thread: s=(char *)malloc(l+16); } sprintf(s,"%s:%d",mmsd->hostname,mmsd->port); - bool primary_partition = false; - bool read_only=true; - bool wsrep_desync = true; - int wsrep_local_state = 0; - bool wsrep_reject_queries = true; - bool wsrep_sst_donor_rejects_queries = true; - long long wsrep_local_recv_queue=0; + AWS_Aurora_status_entry *ase = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); if (mmsd->interr == 0 && mmsd->result) { int num_fields=0; int num_rows=0; num_fields = mysql_num_fields(mmsd->result); - if (num_fields!=7) { + if (num_fields!=5) { proxy_error("Incorrect number of fields, please report a bug\n"); - goto __end_process_aws_aurora_result; - } - num_rows = mysql_num_rows(mmsd->result); - if (num_rows!=1) { - proxy_error("Incorrect number of rows, please report a bug\n"); - goto __end_process_aws_aurora_result; - } - MYSQL_ROW row=mysql_fetch_row(mmsd->result); - if (row[0]) { - wsrep_local_state = atoi(row[0]); - } - if (row[1]) { - if (!strcasecmp(row[1],"NO") || !strcasecmp(row[1],"OFF") || !strcasecmp(row[1],"0")) { - read_only=false; - } - } - if (row[2]) { - wsrep_local_recv_queue = atoll(row[2]); - } - if (row[3]) { - if (!strcasecmp(row[3],"NO") || !strcasecmp(row[3],"OFF") || !strcasecmp(row[3],"0")) { - wsrep_desync = false; - } - } - if (row[4]) { - if (!strcasecmp(row[4],"NONE")) { - wsrep_reject_queries = false; - } - } - if (row[5]) { - if (!strcasecmp(row[5],"NO") || !strcasecmp(row[5],"OFF") || !strcasecmp(row[5],"0")) { - wsrep_sst_donor_rejects_queries = false; - } - } - if (row[6]) { - if (!strcasecmp(row[6],"Primary")) { - primary_partition = true; + } else { + MYSQL_ROW row; + while ((row = mysql_fetch_row(mmsd->result))) { + AWS_Aurora_replica_host_status_entry *arhse = new AWS_Aurora_replica_host_status_entry(row[0], row[1], row[2], row[3], row[4]); + ase->add_host_status(arhse); } } mysql_free_result(mmsd->result); mmsd->result=NULL; } -__end_process_aws_aurora_result: +//__end_process_aws_aurora_result: if (mmsd->mysql_error_msg) { } unsigned long long time_now=realtime_time(); @@ -3255,14 +3903,14 @@ __end_process_aws_aurora_result: AWS_Aurora_monitor_node *node=NULL; if (it2!=GloMyMon->AWS_Aurora_Hosts_Map.end()) { node=it2->second; - node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , wsrep_local_recv_queue, primary_partition, read_only, wsrep_local_state, wsrep_desync, wsrep_reject_queries, wsrep_sst_donor_rejects_queries, mmsd->mysql_error_msg); + node->add_entry(ase); } else { node = new AWS_Aurora_monitor_node(mmsd->hostname,mmsd->port,mmsd->writer_hostgroup); - node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , wsrep_local_recv_queue, primary_partition, read_only, wsrep_local_state, wsrep_desync, wsrep_reject_queries, wsrep_sst_donor_rejects_queries, mmsd->mysql_error_msg); + node->add_entry(ase); GloMyMon->AWS_Aurora_Hosts_Map.insert(std::make_pair(s,node)); } pthread_mutex_unlock(&GloMyMon->aws_aurora_mutex); - +/* // NOTE: we update MyHGM outside the mutex aws_aurora_mutex if (mmsd->mysql_error_msg) { // there was an error checking the status of the server, surely we need to reconfigure GR MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); @@ -3302,7 +3950,7 @@ __end_process_aws_aurora_result: } } } - +*/ // clean up if (l<110) { } else { @@ -3342,3 +3990,122 @@ __fast_exit_monitor_aws_aurora_thread: delete mysql_thr; return NULL; } + +void MySQL_Monitor::evaluate_aws_aurora_results(unsigned int wHG, unsigned int rHG, AWS_Aurora_status_entry **lasts_ase, unsigned int ase_idx, unsigned int max_latency_ms) { + unsigned int i = 0; +#ifdef TEST_AURORA + bool verbose = false; + unsigned int action_yes = 0; + unsigned int action_no = 0; + unsigned int enabling = 0; + unsigned int disabling = 0; + if (rand() % 500 == 0) { + verbose = true; + bool ev = false; + if (rand() % 1000 == 0) { + ev = true; + } + for (i=0; i < N_L_ASE; i++) { + AWS_Aurora_status_entry *aase = lasts_ase[i]; + if (ev == true || i == ase_idx) { + if (aase && aase->start_time) { + if ( aase->host_statuses->size() ) { + for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { + AWS_Aurora_replica_host_status_entry *hse = *it3; + if (hse) { + fprintf(stderr,"%s %s %s %f %f\n", hse->server_id, hse->session_id, hse->last_update_timestamp, hse->replica_lag_ms , hse->cpu); + } + } + } + } + } + } + } +#endif // TEST_AURORA + unsigned int prev_ase_idx = ase_idx; + if (prev_ase_idx == 0) prev_ase_idx = N_L_ASE; + prev_ase_idx--; + AWS_Aurora_status_entry *aase = lasts_ase[ase_idx]; + AWS_Aurora_status_entry *prev_aase = lasts_ase[prev_ase_idx]; + if (aase && aase->start_time) { + if ( aase->host_statuses->size() ) { + for (std::vector::iterator it3 = aase->host_statuses->begin(); it3!=aase->host_statuses->end(); ++it3) { + AWS_Aurora_replica_host_status_entry *hse = *it3; + bool run_action = true; + bool enable = true; + bool is_writer = false; + bool rla_rc = true; + if (hse->replica_lag_ms > max_latency_ms) { + enable = false; + } + if (strcmp(hse->session_id,"MASTER_SESSION_ID")==0) { + is_writer = true; + } + // we also try to determine if a change needs to be made + if (prev_aase && prev_aase->start_time) { + if ( prev_aase->host_statuses->size() ) { + for (std::vector::iterator it4 = prev_aase->host_statuses->begin(); it4!=prev_aase->host_statuses->end(); ++it4) { + AWS_Aurora_replica_host_status_entry *prev_hse = *it4; + if (strcmp(prev_hse->server_id,hse->server_id)==0) { + bool prev_enabled = true; + if (prev_hse->replica_lag_ms > max_latency_ms) { + prev_enabled = false; + } + if (prev_enabled == enable) { + // the previous status should be the same + // do not run any action + run_action = false; + } + } + } + } + } + if (run_action) { +#ifdef TEST_AURORA + action_yes++; + (enable ? enabling++ : disabling++); + rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, 3306, hse->replica_lag_ms, enable, is_writer, verbose); +#else + rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, 3306, hse->replica_lag_ms, enable, is_writer); +#endif // TEST_AURORA +#ifdef TEST_AURORA + } else { + action_no++; +#endif // TEST_AURORA + rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, 3306, hse->replica_lag_ms, enable, is_writer); + } + //if (is_writer == true && rla_rc == false) { + if (rla_rc == false) { + if (is_writer == true) { + // the server is not configured as a writer +#ifdef TEST_AURORA + proxy_info("Calling update_aws_aurora_set_writer for %s\n", hse->server_id); +#endif // TEST_AURORA + MyHGM->update_aws_aurora_set_writer(wHG, rHG, hse->server_id, 3306); + time_t __timer; + char lut[30]; + struct tm __tm_info; + time(&__timer); + localtime_r(&__timer, &__tm_info); + strftime(lut, 25, "%Y-%m-%d %H:%M:%S", &__tm_info); + char *q1 = (char *)"INSERT INTO mysql_server_aws_aurora_failovers VALUES (%d, '%s', '%s')"; + char *q2 = (char *)malloc(strlen(q1)+strlen(lut)+strlen(hse->server_id)); + sprintf(q2, q1, wHG, hse->server_id, lut); + monitordb->execute(q2); + free(q2); + } else { +#ifdef TEST_AURORA + proxy_info("Calling update_aws_aurora_set_reader for %s\n", hse->server_id); +#endif // TEST_AURORA + MyHGM->update_aws_aurora_set_reader(wHG, rHG, hse->server_id, 3306); + } + } + } + } + } +#ifdef TEST_AURORA + if (verbose) { + proxy_info("replication_lag_actions: YES=%u , NO=%u , enabling=%u , disabling=%u\n", action_yes, action_no, enabling, disabling); + } +#endif // TEST_AURORA +} diff --git a/lib/MySQL_Protocol.cpp b/lib/MySQL_Protocol.cpp index 80e4e478b..4655b9188 100644 --- a/lib/MySQL_Protocol.cpp +++ b/lib/MySQL_Protocol.cpp @@ -1626,7 +1626,11 @@ __do_auth: if ( ((*myds)->sess->session_type == PROXYSQL_SESSION_ADMIN) || - ((*myds)->sess->session_type == PROXYSQL_SESSION_STATS) + ((*myds)->sess->session_type == PROXYSQL_SESSION_STATS) +#ifdef TEST_AURORA + || + ((*myds)->sess->session_type == PROXYSQL_SESSION_SQLITE) +#endif // TEST_AURORA ) { if (strcmp((const char *)user,mysql_thread___monitor_username)==0) { proxy_scramble(reply, (*myds)->myconn->scramble_buff, mysql_thread___monitor_password); @@ -1707,7 +1711,7 @@ __do_auth: } } else { if (auth_plugin_id == 1) { - if (session_type == PROXYSQL_SESSION_MYSQL || session_type == PROXYSQL_SESSION_SQLITE || PROXYSQL_SESSION_ADMIN || PROXYSQL_SESSION_STATS) { + if (session_type == PROXYSQL_SESSION_MYSQL || session_type == PROXYSQL_SESSION_SQLITE || session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS) { ret=proxy_scramble_sha1((char *)pass,(*myds)->myconn->scramble_buff,password+1, reply); if (ret) { if (sha1_pass==NULL) { @@ -1720,7 +1724,7 @@ __do_auth: } } } else { // mysql_clear_password - if (session_type == PROXYSQL_SESSION_MYSQL || session_type == PROXYSQL_SESSION_SQLITE || PROXYSQL_SESSION_ADMIN || PROXYSQL_SESSION_STATS) { + if (session_type == PROXYSQL_SESSION_MYSQL || session_type == PROXYSQL_SESSION_SQLITE || session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS) { /* char sha1_2[SHA_DIGEST_LENGTH+1]; sha1_2[SHA_DIGEST_LENGTH]='\0'; diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 8dd4e98f7..375169f86 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -3583,8 +3583,11 @@ void MySQL_Session::handler___status_CONNECTING_CLIENT___STATE_SERVER_HANDSHAKE( (handshake_response_return == true) && ( - //(default_hostgroup<0 && ( session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS || session_type == PROXYSQL_SESSION_SQLITE) ) +#ifdef TEST_AURORA + (default_hostgroup<0 && ( session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS || session_type == PROXYSQL_SESSION_SQLITE) ) +#else (default_hostgroup<0 && ( session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS) ) +#endif // TEST_AURORA || (default_hostgroup == 0 && session_type == PROXYSQL_SESSION_CLICKHOUSE) || @@ -3624,8 +3627,12 @@ void MySQL_Session::handler___status_CONNECTING_CLIENT___STATE_SERVER_HANDSHAKE( //if (session_type == PROXYSQL_SESSION_MYSQL || session_type == PROXYSQL_SESSION_CLICKHOUSE) { client_authenticated=true; switch (session_type) { - case PROXYSQL_SESSION_MYSQL: case PROXYSQL_SESSION_SQLITE: +#ifdef TEST_AURORA + free_users=1; + break; +#endif // TEST_AURORA + case PROXYSQL_SESSION_MYSQL: if (ldap_ctx==NULL) { free_users=GloMyAuth->increase_frontend_user_connections(client_myds->myconn->userinfo->username, &used_users); } else { diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index dbb60ce2a..44aa29f90 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -343,11 +343,11 @@ static int http_handler(void *cls, struct MHD_Connection *connection, const char // AWS Aurora -#define ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS_V2_1_0a "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 1000 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 500 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 500 AND check_timeout_ms <= 3000) DEFAULT 1000 , comment VARCHAR , UNIQUE (reader_hostgroup))" +#define ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS_V2_1_0a "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" #define ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS_V2_1_0a -#define ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE runtime_mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 1000 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 500 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 500 AND check_timeout_ms <= 3000) DEFAULT 1000 , comment VARCHAR , UNIQUE (reader_hostgroup))" +#define ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE runtime_mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" @@ -2065,6 +2065,7 @@ bool ProxySQL_Admin::GenericRefreshStatistics(const char *query_no_space, unsign bool monitor_mysql_server_galera_log=false; bool monitor_mysql_server_aws_aurora_log=false; + bool monitor_mysql_server_aws_aurora_check_status=false; bool stats_proxysql_servers_checksums = false; bool stats_proxysql_servers_metrics = false; @@ -2172,6 +2173,9 @@ bool ProxySQL_Admin::GenericRefreshStatistics(const char *query_no_space, unsign if (strstr(query_no_space,"mysql_server_aws_aurora_log")) { monitor_mysql_server_aws_aurora_log=true; refresh=true; } + if (strstr(query_no_space,"mysql_server_aws_aurora_check_status")) { + monitor_mysql_server_aws_aurora_check_status=true; refresh=true; + } // if (stats_mysql_processlist || stats_mysql_connection_pool || stats_mysql_query_digest || stats_mysql_query_digest_reset) { if (refresh==true) { pthread_mutex_lock(&admin_mutex); @@ -2284,6 +2288,11 @@ bool ProxySQL_Admin::GenericRefreshStatistics(const char *query_no_space, unsign GloMyMon->populate_monitor_mysql_server_aws_aurora_log(); } } + if (monitor_mysql_server_aws_aurora_check_status) { + if (GloMyMon) { + GloMyMon->populate_monitor_mysql_server_aws_aurora_check_status(); + } + } pthread_mutex_unlock(&admin_mutex); } if ( @@ -7761,9 +7770,9 @@ void ProxySQL_Admin::save_mysql_servers_runtime_to_database(bool _runtime) { sqlite3 *mydb3=admindb->get_db(); char *query=NULL; if (_runtime) { - query=(char *)"INSERT INTO runtime_mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"; + query=(char *)"INSERT INTO runtime_mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)"; } else { - query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"; + query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)"; } rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); assert(rc==SQLITE_OK); @@ -7776,7 +7785,9 @@ void ProxySQL_Admin::save_mysql_servers_runtime_to_database(bool _runtime) { rc=sqlite3_bind_int64(statement, 4, atoi(r->fields[3])); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 5, atoi(r->fields[4])); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 6, atoi(r->fields[5])); assert(rc==SQLITE_OK); - rc=sqlite3_bind_text(statement, 7, r->fields[6], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 7, atoi(r->fields[6])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 8, atoi(r->fields[7])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement, 9, r->fields[8], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); SAFE_SQLITE3_STEP2(statement); rc=sqlite3_clear_bindings(statement); assert(rc==SQLITE_OK); @@ -8687,7 +8698,7 @@ int ProxySQL_Admin::Read_MySQL_Servers_from_configfile() { if (root.exists("mysql_aws_aurora_hostgroups")==true) { const Setting &mysql_aws_aurora_hostgroups = root["mysql_aws_aurora_hostgroups"]; int count = mysql_aws_aurora_hostgroups.getLength(); - char *q=(char *)"INSERT OR REPLACE INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, comment) VALUES (%d, %d, %d, %d, %d, %d, '%s')"; + char *q=(char *)"INSERT OR REPLACE INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (%d, %d, %d, %d, %d, %d, %d, %d, '%s')"; for (i=0; i< count; i++) { const Setting &line = mysql_aws_aurora_hostgroups[i]; int writer_hostgroup; @@ -8696,18 +8707,21 @@ int ProxySQL_Admin::Read_MySQL_Servers_from_configfile() { int max_lag_ms; int check_interval_ms; int check_timeout_ms; - int max_transactions_behind; + int writer_is_also_reader; + int new_reader_weight; std::string comment=""; if (line.lookupValue("writer_hostgroup", writer_hostgroup)==false) continue; if (line.lookupValue("reader_hostgroup", reader_hostgroup)==false) continue; if (line.lookupValue("max_lag_ms", max_lag_ms)==false) max_lag_ms=600000; if (line.lookupValue("check_interval_ms", check_interval_ms)==false) check_interval_ms=1000; if (line.lookupValue("check_timeout_ms", check_timeout_ms)==false) check_timeout_ms=1000; + if (line.lookupValue("writer_is_also_reader", writer_is_also_reader)==false) writer_is_also_reader=0; + if (line.lookupValue("new_reader_weight", new_reader_weight)==false) new_reader_weight=0; line.lookupValue("comment", comment); char *o1=strdup(comment.c_str()); char *o=escape_string_single_quotes(o1, false); - char *query=(char *)malloc(strlen(q)+strlen(o)+128); // 128 vs sizeof(int)*8 - sprintf(query,q, writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, o); + char *query=(char *)malloc(strlen(q)+strlen(o)+256); // 128 vs sizeof(int)*8 + sprintf(query,q, writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, o); //fprintf(stderr, "%s\n", query); admindb->execute(query); if (o!=o1) free(o); @@ -9645,3 +9659,44 @@ void ProxySQL_Admin::stats___mysql_prepared_statements_info() { statsdb->execute("COMMIT"); delete resultset; } + +#ifdef TEST_AURORA +void ProxySQL_Admin::enable_aurora_testing() { + proxy_info("Admin is enabling AWS Aurora Testing using SQLite3 Server and HGs 1271 and 1272\n"); + sqlite3_stmt *statement=NULL; + sqlite3 *mydb3=admindb->get_db(); + unsigned int num_aurora_servers = GloSQLite3Server->num_aurora_servers[0]; + int rc; + mysql_servers_wrlock(); + admindb->execute("DELETE FROM mysql_servers"); + char *query=(char *)"INSERT INTO mysql_servers (hostgroup_id,hostname,use_ssl,comment) VALUES (?1, ?2, ?3, ?4)"; + rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); + assert(rc==SQLITE_OK); + for (unsigned int j=1; j<4; j++) { + proxy_info("Admin is enabling AWS Aurora Testing using SQLite3 Server and HGs 127%d and 127%d\n" , j*2-1 , j*2); + for (unsigned int i=0; iexecute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1271, 1272, 1, 25, 120, 90, 1, 1, 'Automated Aurora Testing Cluster 1')"); + admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1273, 1274, 1, 25, 120, 90, 0, 1, 'Automated Aurora Testing Cluster 2')"); + admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1275, 1276, 1, 25, 120, 90, 0, 2, 'Automated Aurora Testing Cluster 3')"); + load_mysql_servers_to_runtime(); + mysql_servers_wrunlock(); + admindb->execute("UPDATE global_variables SET variable_value=2000 WHERE variable_name='mysql-monitor_ping_interval'"); + admindb->execute("UPDATE global_variables SET variable_value=1000 WHERE variable_name='mysql-monitor_ping_timeout'"); + load_mysql_variables_to_runtime(); + admindb->execute("INSERT INTO mysql_users (username,password,default_hostgroup) VALUES ('aurora1','pass1',1271), ('aurora2','pass2',1273), ('aurora3','pass3',1275)"); + init_users(); +} +#endif // TEST_AURORA diff --git a/lib/ProxySQL_HTTP_Server.cpp b/lib/ProxySQL_HTTP_Server.cpp index d0f1637da..cd939f8b2 100644 --- a/lib/ProxySQL_HTTP_Server.cpp +++ b/lib/ProxySQL_HTTP_Server.cpp @@ -125,7 +125,7 @@ static char *generate_home() { html.append("Uptime = "); { unsigned long long t1=monotonic_time(); - char buf1[30]; + char buf1[40]; unsigned long long uptime = (t1-GloVars.global.start_time)/1000/1000; unsigned long long days = uptime / 86400; unsigned long long hours = (uptime - days*86400)/3600; diff --git a/lib/SQLite3_Server.cpp b/lib/SQLite3_Server.cpp index 36c61231e..ca5479715 100644 --- a/lib/SQLite3_Server.cpp +++ b/lib/SQLite3_Server.cpp @@ -118,8 +118,8 @@ typedef struct _ifaces_desc_t { char **mysql_ifaces; } ifaces_desc_t; -#define MAX_IFACES 8 -#define MAX_SQLITE3SERVER_LISTENERS 16 +#define MAX_IFACES 32 +#define MAX_SQLITE3SERVER_LISTENERS 32 class ifaces_desc { public: @@ -246,6 +246,38 @@ void SQLite3_Server_session_handler(MySQL_Session *sess, void *_pa, PtrSize_t *p memcpy(query,(char *)pkt->ptr+sizeof(mysql_hdr)+1,query_length-1); query[query_length-1]=0; +#ifdef TEST_AURORA + if (sess->client_myds->proxy_addr.addr == NULL) { + struct sockaddr addr; + socklen_t addr_len=sizeof(struct sockaddr); + memset(&addr,0,addr_len); + int rc; + rc=getsockname(sess->client_myds->fd, &addr, &addr_len); + if (rc==0) { + char buf[512]; + switch (addr.sa_family) { + case AF_INET: { + struct sockaddr_in *ipv4 = (struct sockaddr_in *)&addr; + inet_ntop(addr.sa_family, &ipv4->sin_addr, buf, INET_ADDRSTRLEN); + sess->client_myds->proxy_addr.addr = strdup(buf); + } + break; + case AF_INET6: { + struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&addr; + inet_ntop(addr.sa_family, &ipv6->sin6_addr, buf, INET6_ADDRSTRLEN); + sess->client_myds->proxy_addr.addr = strdup(buf); + } + break; + default: + sess->client_myds->proxy_addr.addr = strdup("unknown"); + break; + } + } else { + sess->client_myds->proxy_addr.addr = strdup("unknown"); + } + } +#endif // TEST_AURORA + char *query_no_space=(char *)l_alloc(query_length); memcpy(query_no_space,query,query_length); @@ -260,7 +292,13 @@ void SQLite3_Server_session_handler(MySQL_Session *sess, void *_pa, PtrSize_t *p if (query_no_space_length==SELECT_VERSION_COMMENT_LEN) { if (!strncasecmp(SELECT_VERSION_COMMENT, query_no_space, query_no_space_length)) { l_free(query_length,query); - query=l_strdup("SELECT '(ProxySQL Admin Module)'"); +#ifdef TEST_AURORA + char *a = (char *)"SELECT '(ProxySQL Aurora Test Server) - %s'"; + query = (char *)malloc(strlen(a)+strlen(sess->client_myds->proxy_addr.addr)); + sprintf(query,a,sess->client_myds->proxy_addr.addr); +#else + query=l_strdup("SELECT '(ProxySQL SQLite3 Server)'"); +#endif // TEST_AURORA query_length=strlen(query)+1; goto __run_query; } @@ -436,8 +474,27 @@ __end_show_commands: __run_query: if (run_query) { +#ifdef TEST_AURORA + if (strncasecmp("SELECT",query_no_space,6)==0) { + if (strstr(query_no_space,(char *)"REPLICA_HOST_STATUS")) { + pthread_mutex_lock(&GloSQLite3Server->aurora_mutex); + GloSQLite3Server->populate_aws_aurora_table(sess); + } + } +#endif // TEST_AURORA SQLite3_Session *sqlite_sess = (SQLite3_Session *)sess->thread->gen_args; sqlite_sess->sessdb->execute_statement(query, &error , &cols , &affected_rows , &resultset); +#ifdef TEST_AURORA + if (strncasecmp("SELECT",query_no_space,6)==0) { + if (strstr(query_no_space,(char *)"REPLICA_HOST_STATUS")) { + pthread_mutex_unlock(&GloSQLite3Server->aurora_mutex); + if (rand() % 100 == 0) { + // randomly add some latency on 1% of the traffic + sleep(2); + } + } + } +#endif // TEST_AURORA sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot); delete resultset; } @@ -639,11 +696,16 @@ __end_while_pool: #else #define DEB "" #endif /* DEBUG */ -#define PROXYSQL_SQLITE3_SERVER_VERSION "0.7.0625" DEB +#define PROXYSQL_SQLITE3_SERVER_VERSION "1.9.0218" DEB SQLite3_Server::~SQLite3_Server() { delete sessdb; sessdb = NULL; + +#ifdef TEST_AURORA + drop_tables_defs(tables_defs_aurora); + delete tables_defs_aurora; +#endif // TEST_AURORA }; SQLite3_Server::SQLite3_Server() { @@ -664,9 +726,117 @@ SQLite3_Server::SQLite3_Server() { sessdb = new SQLite3DB(); sessdb->open((char *)"file:mem_sqlitedb?mode=memory&cache=shared", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX); - variables.mysql_ifaces=strdup("127.0.0.1:6030"); variables.read_only=false; + +#ifdef TEST_AURORA + pthread_mutex_init(&aurora_mutex,NULL); + unsigned int nas = time(NULL); + nas = nas % 3; // range + nas += 5; // min + max_num_aurora_servers = 10; // hypothetical maximum number of nodes + string s = ""; + for (unsigned int j=1; j<4; j++) { + cur_aurora_writer[j-1] = 0; + num_aurora_servers[j-1] = nas; + for (unsigned int i=11; iexecute("DELETE FROM REPLICA_HOST_STATUS"); + sqlite3_stmt *statement=NULL; + sqlite3 *mydb3=sessdb->get_db(); + int rc; + char *query=(char *)"INSERT INTO REPLICA_HOST_STATUS VALUES (?1, ?2, ?3, ?4, ?5)"; + rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); + assert(rc==SQLITE_OK); + time_t __timer; + char lut[30]; + struct tm __tm_info; + time(&__timer); + localtime_r(&__timer, &__tm_info); + strftime(lut, 25, "%Y-%m-%d %H:%M:%S", &__tm_info); + string myip = string(sess->client_myds->proxy_addr.addr); + string clu_id_s = myip.substr(6,1); + unsigned int cluster_id = atoi(clu_id_s.c_str()); + cluster_id--; + if (rand() % 200 == 0) { + // simulate a failover + cur_aurora_writer[cluster_id] = rand() % num_aurora_servers[cluster_id]; + proxy_info("Simulating a failover for AWS Aurora cluster %d , HGs (%d:%d)\n", cluster_id, 1270 + cluster_id*2+1 , 1270 + cluster_id*2+2); + } + for (unsigned int i=0; i *tables_defs, const char *table_name, const char *table_def) { + table_def_t *td = new table_def_t; + td->table_name=strdup(table_name); + td->table_def=strdup(table_def); + tables_defs->push_back(td); +}; + +void SQLite3_Server::check_and_build_standard_tables(SQLite3DB *db, std::vector *tables_defs) { + table_def_t *td; + db->execute("PRAGMA foreign_keys = OFF"); + for (std::vector::iterator it=tables_defs->begin(); it!=tables_defs->end(); ++it) { + td=*it; + db->check_and_build_table(td->table_name, td->table_def); + } + db->execute("PRAGMA foreign_keys = ON"); +}; + +void SQLite3_Server::drop_tables_defs(std::vector *tables_defs) { + table_def_t *td; + while (!tables_defs->empty()) { + td=tables_defs->back(); + free(td->table_name); + td->table_name=NULL; + free(td->table_def); + td->table_def=NULL; + tables_defs->pop_back(); + delete td; + } }; +#endif // TEST_AURORA void SQLite3_Server::wrlock() { pthread_rwlock_wrlock(&rwlock); @@ -684,6 +854,14 @@ void SQLite3_Server::print_version() { bool SQLite3_Server::init() { cpu_timer cpt; +#ifdef TEST_AURORA + tables_defs_aurora = new std::vector; + insert_into_tables_defs(tables_defs_aurora, + (const char *)"REPLICA_HOST_STATUS", + (const char *)"CREATE TABLE REPLICA_HOST_STATUS (SERVER_ID VARCHAR NOT NULL, SESSION_ID VARCHAR NOT NULL, CPU REAL NOT NULL, LAST_UPDATE_TIMESTAMP VARCHAR NOT NULL, REPLICA_LAG_IN_MILLISECONDS REAL NOT NULL)"); + check_and_build_standard_tables(sessdb, tables_defs_aurora); + GloAdmin->enable_aurora_testing(); +#endif // TEST_AURORA child_func[0]=child_mysql; main_shutdown=0; main_poll_nfds=0; diff --git a/src/Makefile b/src/Makefile index 243d718d9..5636a31ce 100644 --- a/src/Makefile +++ b/src/Makefile @@ -132,5 +132,5 @@ $(LIBPROXYSQLAR): default: $(EXECUTABLE) clean: - rm -f *.pid $(ODIR)/*.o *~ core $(EXECUTABLE) + rm -f *.pid $(ODIR)/*.o *~ core perf.data* heaptrack.proxysql.* $(EXECUTABLE) diff --git a/src/main.cpp b/src/main.cpp index 5ede651f8..9b35e0c27 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1661,7 +1661,9 @@ finish: #ifdef RUNNING_ON_VALGRIND if (RUNNING_ON_VALGRIND==0) { - dlclose(__mysql_ldap_auth); + if (__mysql_ldap_auth) { + dlclose(__mysql_ldap_auth); + } } #endif return 0; From 0c232bf04b04e0d1ad7a4d7820b6089e7e9a2b77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 28 Feb 2019 17:13:37 +1100 Subject: [PATCH 03/20] Do not kill Session if a backend failed This new algorithm makes the backend connection fails instead of killing the session This is an improvement over issue #1085 --- include/MySQL_Session.h | 1 + lib/MySQL_Session.cpp | 17 +++++++++++++++++ lib/MySQL_Thread.cpp | 10 ++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/include/MySQL_Session.h b/include/MySQL_Session.h index ef3ebb967..c8be69073 100644 --- a/include/MySQL_Session.h +++ b/include/MySQL_Session.h @@ -214,6 +214,7 @@ class MySQL_Session void MySQL_Stmt_Result_to_MySQL_wire(MYSQL_STMT *stmt, MySQL_Connection *myconn); unsigned int NumActiveTransactions(); bool HasOfflineBackends(); + bool SetEventInOfflineBackends(); int FindOneActiveTransaction(); unsigned long long IdleTime(); diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 375169f86..024f25c66 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -4752,6 +4752,23 @@ bool MySQL_Session::HasOfflineBackends() { return ret; } +bool MySQL_Session::SetEventInOfflineBackends() { + bool ret=false; + if (mybes==0) return ret; + MySQL_Backend *_mybe; + unsigned int i; + for (i=0; i < mybes->len; i++) { + _mybe=(MySQL_Backend *)mybes->index(i); + if (_mybe->server_myds) + if (_mybe->server_myds->myconn) + if (_mybe->server_myds->myconn->IsServerOffline()) { + _mybe->server_myds->revents|=POLLIN; + ret = true; + } + } + return ret; +} + int MySQL_Session::FindOneActiveTransaction() { int ret=-1; if (mybes==0) return ret; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 4703a3bbe..999ed4aeb 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -3551,8 +3551,14 @@ void MySQL_Thread::process_all_sessions() { } if (servers_table_version_current != servers_table_version_previous) { // bug fix for #1085 // Immediatelly kill all client connections using an OFFLINE node - if (sess->HasOfflineBackends()) { - sess->killed=true; + //if (sess->HasOfflineBackends()) { + // sess->killed=true; + //} + // Search for connections that should be terminated, and simulate data in them + // the following 2 lines of code replace the previous 2 lines + // instead of killing the sessions, fails the backend connections + if (sess->SetEventInOfflineBackends()) { + sess->to_process=1; } } } From b36f5d785a0f421e843156dbfbd6ba9df1fc3c91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 21 Mar 2019 01:40:59 +1100 Subject: [PATCH 04/20] Second commit to support Aurora It adds port and endpoint_address in mysql_aws_aurora_hostgroups --- include/MySQL_HostGroups_Manager.h | 17 ++- lib/MySQL_HostGroups_Manager.cpp | 209 +++++++++++++++++++++-------- lib/MySQL_Monitor.cpp | 20 +-- lib/ProxySQL_Admin.cpp | 37 +++-- lib/SQLite3_Server.cpp | 8 +- 5 files changed, 204 insertions(+), 87 deletions(-) diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index 2230f2f15..f84f09837 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -36,7 +36,7 @@ #define MYHGM_MYSQL_GALERA_HOSTGROUPS "CREATE TABLE mysql_galera_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , backup_writer_hostgroup INT CHECK (backup_writer_hostgroup>=0 AND backup_writer_hostgroup<>writer_hostgroup) NOT NULL , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND backup_writer_hostgroup<>reader_hostgroup AND reader_hostgroup>0) , offline_hostgroup INT NOT NULL CHECK (offline_hostgroup<>writer_hostgroup AND offline_hostgroup<>reader_hostgroup AND backup_writer_hostgroup<>offline_hostgroup AND offline_hostgroup>=0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_writers INT NOT NULL CHECK (max_writers >= 0) DEFAULT 1 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1,2)) NOT NULL DEFAULT 0 , max_transactions_behind INT CHECK (max_transactions_behind>=0) NOT NULL DEFAULT 0 , comment VARCHAR , UNIQUE (reader_hostgroup) , UNIQUE (offline_hostgroup) , UNIQUE (backup_writer_hostgroup))" -#define MYHGM_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" +#define MYHGM_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , aurora_port INT NOT NUlL DEFAULT 3306 , endpoint_address VARCHAR NOT NULL DEFAULT '' , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" typedef std::unordered_map umap_mysql_errors; @@ -435,6 +435,7 @@ class AWS_Aurora_Info { public: int writer_hostgroup; int reader_hostgroup; + int aurora_port; int max_lag_ms; int check_interval_ms; int check_timeout_ms; @@ -442,11 +443,12 @@ class AWS_Aurora_Info { int new_reader_weight; // TODO // add intermediary status value, for example the last check time + char * endpoint_address; char * comment; bool active; bool __active; - AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c); - bool update(int r, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c); + AWS_Aurora_Info(int w, int r, int _port, char *_end_addr, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c); + bool update(int r, int _port, char *_end_addr, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c); ~AWS_Aurora_Info(); }; @@ -593,9 +595,12 @@ class MySQL_HostGroups_Manager { // FIXME : add action functions for AWS Aurora //void aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, float current_replication_lag, bool enable, bool verbose=true); - bool aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose=true); - void update_aws_aurora_set_writer(int _whid, int _rhid, char *address, unsigned int port, bool verbose=true); - void update_aws_aurora_set_reader(int _whid, int _rhid, char *_hostname, int _port); + //bool aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose=true); + //void update_aws_aurora_set_writer(int _whid, int _rhid, char *address, unsigned int port, bool verbose=true); + //void update_aws_aurora_set_reader(int _whid, int _rhid, char *_hostname, int _port); + bool aws_aurora_replication_lag_action(int _whid, int _rhid, char *server_id, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose=true); + void update_aws_aurora_set_writer(int _whid, int _rhid, char *server_id, bool verbose=true); + void update_aws_aurora_set_reader(int _whid, int _rhid, char *server_id); SQLite3_result * get_stats_mysql_gtid_executed(); void generate_mysql_gtid_executed_tables(); diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index fd91fea9a..2a415afc5 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -2098,7 +2098,7 @@ SQLite3_result * MySQL_HostGroups_Manager::dump_table_mysql_aws_aurora_hostgroup int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; - char *query=(char *)"SELECT writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment FROM mysql_aws_aurora_hostgroups"; + char *query=(char *)"SELECT writer_hostgroup,reader_hostgroup,active,aurora_port,endpoint_address,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment FROM mysql_aws_aurora_hostgroups"; proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 4, "%s\n", query); mydb->execute_statement(query, &error , &cols , &affected_rows , &resultset); wrunlock(); @@ -4733,7 +4733,7 @@ SQLite3_result * MySQL_HostGroups_Manager::get_mysql_errors(bool reset) { return result; } -AWS_Aurora_Info::AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c) { +AWS_Aurora_Info::AWS_Aurora_Info(int w, int r, int _port, char *_end_addr, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c) { comment=NULL; if (c) { comment=strdup(c); @@ -4748,6 +4748,8 @@ AWS_Aurora_Info::AWS_Aurora_Info(int w, int r, int ml, int ci, int ct, bool _a, active=_a; __active=true; //need_converge=true; + aurora_port = _port; + endpoint_address = strdup(_end_addr); } AWS_Aurora_Info::~AWS_Aurora_Info() { @@ -4755,9 +4757,13 @@ AWS_Aurora_Info::~AWS_Aurora_Info() { free(comment); comment=NULL; } + if (endpoint_address) { + free(endpoint_address); + endpoint_address=NULL; + } } -bool AWS_Aurora_Info::update(int r, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c) { +bool AWS_Aurora_Info::update(int r, int _port, char *_end_addr, int ml, int ci, int ct, bool _a, int wiar, int nrw, char *c) { bool ret=false; __active=true; if (reader_hostgroup!=r) { @@ -4788,6 +4794,28 @@ bool AWS_Aurora_Info::update(int r, int ml, int ci, int ct, bool _a, int wiar, i active=_a; ret=true; } + if (aurora_port != _port) { + aurora_port = _port; + ret = true; + } + if (endpoint_address) { + if (_end_addr) { + if (strcmp(endpoint_address,_end_addr)) { + free(endpoint_address); + endpoint_address = strdup(_end_addr); + ret = true; + } + } else { + free(endpoint_address); + endpoint_address=NULL; + ret = true; + } + } else { + if (_end_addr) { + endpoint_address=strdup(_end_addr); + ret = true; + } + } // for comment we don't change return value if (comment) { if (c) { @@ -4814,7 +4842,7 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { int rc; sqlite3_stmt *statement=NULL; sqlite3 *mydb3=mydb->get_db(); - char *query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)"; + char *query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,aurora_port,endpoint_address,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)"; rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); assert(rc==SQLITE_OK); proxy_info("New mysql_aws_aurora_hostgroups table\n"); @@ -4829,21 +4857,24 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { int writer_hostgroup=atoi(r->fields[0]); int reader_hostgroup=atoi(r->fields[1]); int active=atoi(r->fields[2]); - int max_lag_ms = atoi(r->fields[3]); - int check_interval_ms = atoi(r->fields[4]); - int check_timeout_ms = atoi(r->fields[5]); - int writer_is_also_reader = atoi(r->fields[6]); - int new_reader_weight = atoi(r->fields[7]); - proxy_info("Loading AWS Aurora info for (%d,%d,%s,%d,%d,%d,\"%s\")\n", writer_hostgroup,reader_hostgroup,(active ? "on" : "off"),max_lag_ms,check_interval_ms,check_timeout_ms,r->fields[6]); + int aurora_port = atoi(r->fields[3]); + int max_lag_ms = atoi(r->fields[5]); + int check_interval_ms = atoi(r->fields[6]); + int check_timeout_ms = atoi(r->fields[7]); + int writer_is_also_reader = atoi(r->fields[8]); + int new_reader_weight = atoi(r->fields[9]); + proxy_info("Loading AWS Aurora info for (%d,%d,%s,%d,\"%s\",%d,%d,%d,\"%s\")\n", writer_hostgroup,reader_hostgroup,(active ? "on" : "off"),aurora_port,r->fields[4],max_lag_ms,check_interval_ms,check_timeout_ms,r->fields[10]); rc=sqlite3_bind_int64(statement, 1, writer_hostgroup); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 2, reader_hostgroup); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 3, active); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement, 4, max_lag_ms); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement, 5, check_interval_ms); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement, 6, check_timeout_ms); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement, 7, writer_is_also_reader); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement, 8, new_reader_weight); assert(rc==SQLITE_OK); - rc=sqlite3_bind_text(statement, 9, r->fields[8], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 4, aurora_port); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement, 5, r->fields[4], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 6, max_lag_ms); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 7, check_interval_ms); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 8, check_timeout_ms); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 9, writer_is_also_reader); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 10, new_reader_weight); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement, 11, r->fields[10], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); SAFE_SQLITE3_STEP2(statement); rc=sqlite3_clear_bindings(statement); assert(rc==SQLITE_OK); @@ -4854,12 +4885,12 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { if (it2!=AWS_Aurora_Info_Map.end()) { info=it2->second; bool changed=false; - changed=info->update(reader_hostgroup, max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, writer_is_also_reader, new_reader_weight, r->fields[8]); + changed=info->update(reader_hostgroup, aurora_port, r->fields[4], max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, writer_is_also_reader, new_reader_weight, r->fields[10]); if (changed) { //info->need_converge=true; } } else { - info=new AWS_Aurora_Info(writer_hostgroup, reader_hostgroup, max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, writer_is_also_reader, new_reader_weight, r->fields[8]); + info=new AWS_Aurora_Info(writer_hostgroup, reader_hostgroup, aurora_port, r->fields[4], max_lag_ms, check_interval_ms, check_timeout_ms, (bool)active, writer_is_also_reader, new_reader_weight, r->fields[10]); //info->need_converge=true; AWS_Aurora_Info_Map.insert(AWS_Aurora_Info_Map.begin(), std::pair(writer_hostgroup,info)); } @@ -4907,13 +4938,32 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { //void MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, float current_replication_lag, bool enable, bool verbose) { // this function returns false is the server is in the wrong HG -bool MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose) { +bool MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int _rhid, char *_server_id, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose) { bool ret = false; // return false by default bool reader_found_in_whg = false; if (is_writer) { // if the server is a writer, we will set ret back to true once found ret = false; } + unsigned port = 3306; + char *endpoint_address = strdup((char *)""); + { + pthread_mutex_lock(&AWS_Aurora_Info_mutex); + std::map::iterator it2; + it2 = AWS_Aurora_Info_Map.find(_whid); + AWS_Aurora_Info *info=NULL; + if (it2!=AWS_Aurora_Info_Map.end()) { + info=it2->second; + if (info->endpoint_address) { + free(endpoint_address); + endpoint_address = strdup(info->endpoint_address); + } + port = info->aurora_port; + } + pthread_mutex_unlock(&AWS_Aurora_Info_mutex); + } + char *address = (char *)malloc(strlen(_server_id)+strlen(endpoint_address)+1); + sprintf(address,"%s%s",_server_id,endpoint_address); GloAdmin->mysql_servers_wrlock(); wrlock(); int i,j; @@ -4977,11 +5027,13 @@ bool MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int ret = false; } } + free(address); + free(endpoint_address); return ret; } // FIXME: complete this!! -void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid, char *_hostname, unsigned int _port, bool verbose) { +void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid, char *_server_id, bool verbose) { int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; @@ -4989,25 +5041,46 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid char *q=NULL; char *error=NULL; //q=(char *)"SELECT hostgroup_id FROM mysql_servers JOIN mysql_galera_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup OR hostgroup_id=backup_writer_hostgroup OR hostgroup_id=offline_hostgroup WHERE hostname='%s' AND port=%d AND status<>3"; - q=(char *)"SELECT hostgroup_id FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE hostname='%s' AND port=%d AND status<>3"; - query=(char *)malloc(strlen(q)+strlen(_hostname)+1024*1024); - sprintf(query,q,_hostname,_port); - mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset); - if (error) { - free(error); - error=NULL; - } - //free(query); + q=(char *)"SELECT hostgroup_id FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE hostname='%s%s' AND port=%d AND status<>3"; int writer_is_also_reader=0; int new_reader_weight = 0; bool found_writer=false; bool found_reader=false; int _writer_hostgroup = _whid; + int aurora_port = 3306; + char *endpoint_address = strdup((char *)""); int read_HG=-1; - //bool need_converge=false; - //bool need_converge=true; + { + pthread_mutex_lock(&AWS_Aurora_Info_mutex); + std::map::iterator it2; + it2 = AWS_Aurora_Info_Map.find(_writer_hostgroup); + AWS_Aurora_Info *info=NULL; + if (it2!=AWS_Aurora_Info_Map.end()) { + info=it2->second; + writer_is_also_reader=info->writer_is_also_reader; + new_reader_weight = info->new_reader_weight; + read_HG = info->reader_hostgroup; + if (info->endpoint_address) { + free(endpoint_address); + endpoint_address = strdup(info->endpoint_address); + } + aurora_port = info->aurora_port; + } + pthread_mutex_unlock(&AWS_Aurora_Info_mutex); + } + + query=(char *)malloc(strlen(q)+strlen(_server_id)+strlen(endpoint_address)+1024*1024); + sprintf(query, q, _server_id, endpoint_address, aurora_port); + mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset); + if (error) { + free(error); + error=NULL; + } + //free(query); + if (resultset) { +/* // let's get info about this cluster pthread_mutex_lock(&AWS_Aurora_Info_mutex); std::map::iterator it2; @@ -5023,7 +5096,7 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid //max_writers = info->max_writers; } pthread_mutex_unlock(&AWS_Aurora_Info_mutex); - +*/ if (resultset->rows_count) { for (std::vector::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) { SQLite3_row *r=*it; @@ -5087,22 +5160,22 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid q=(char *)"INSERT INTO mysql_servers_incoming SELECT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, comment FROM mysql_servers WHERE hostgroup_id<>%d"; sprintf(query,q,_writer_hostgroup); mydb->execute(query); - q=(char *)"INSERT INTO mysql_servers_incoming SELECT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, comment FROM mysql_servers WHERE hostgroup_id=%d AND hostname='%s' AND port=%d"; - sprintf(query,q,_writer_hostgroup,_hostname,_port); + q=(char *)"INSERT INTO mysql_servers_incoming SELECT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, comment FROM mysql_servers WHERE hostgroup_id=%d AND hostname='%s%s' AND port=%d"; + sprintf(query, q, _writer_hostgroup, _server_id, endpoint_address, aurora_port); mydb->execute(query); - q=(char *)"UPDATE OR IGNORE mysql_servers_incoming SET hostgroup_id=%d WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; + q=(char *)"UPDATE OR IGNORE mysql_servers_incoming SET hostgroup_id=%d WHERE hostname='%s%s' AND port=%d AND hostgroup_id<>%d"; //query=(char *)malloc(strlen(q)+strlen(_hostname)+1024); // increased this buffer as it is used for other queries too - sprintf(query,q,_writer_hostgroup,_hostname,_port,_writer_hostgroup); + sprintf(query, q, _writer_hostgroup, _server_id, endpoint_address, aurora_port, _writer_hostgroup); mydb->execute(query); //free(query); - q=(char *)"DELETE FROM mysql_servers_incoming WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; + q=(char *)"DELETE FROM mysql_servers_incoming WHERE hostname='%s%s' AND port=%d AND hostgroup_id<>%d"; //query=(char *)malloc(strlen(q)+strlen(_hostname)+64); - sprintf(query,q,_hostname,_port,_writer_hostgroup); + sprintf(query, q, _server_id, endpoint_address, aurora_port, _writer_hostgroup); mydb->execute(query); //free(query); - q=(char *)"UPDATE mysql_servers_incoming SET status=0 WHERE hostname='%s' AND port=%d AND hostgroup_id=%d"; + q=(char *)"UPDATE mysql_servers_incoming SET status=0 WHERE hostname='%s%s' AND port=%d AND hostgroup_id=%d"; //query=(char *)malloc(strlen(q)+strlen(_hostname)+64); - sprintf(query,q,_hostname,_port,_writer_hostgroup); + sprintf(query, q, _server_id, endpoint_address, aurora_port, _writer_hostgroup); mydb->execute(query); // we need to move the old writer into the reader HG @@ -5115,13 +5188,12 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid //free(query); if (writer_is_also_reader && read_HG>=0) { - q=(char *)"INSERT OR IGNORE INTO mysql_servers_incoming (hostgroup_id,hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment) SELECT %d,hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment FROM mysql_servers_incoming WHERE hostgroup_id=%d AND hostname='%s' AND port=%d"; - sprintf(query,q,read_HG,_writer_hostgroup,_hostname,_port); + q=(char *)"INSERT OR IGNORE INTO mysql_servers_incoming (hostgroup_id,hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment) SELECT %d,hostname,port,gtid_port,status,weight,compression,max_connections,max_replication_lag,use_ssl,max_latency_ms,comment FROM mysql_servers_incoming WHERE hostgroup_id=%d AND hostname='%s%s' AND port=%d"; + sprintf(query, q, read_HG, _writer_hostgroup, _server_id, endpoint_address, aurora_port); mydb->execute(query); - q = (char *)"UPDATE mysql_servers_incoming SET weight=%d WHERE hostgroup_id=%d AND hostname='%s' AND port=%d"; - sprintf(query, q, new_reader_weight, read_HG, _hostname, _port); + q = (char *)"UPDATE mysql_servers_incoming SET weight=%d WHERE hostgroup_id=%d AND hostname='%s%s' AND port=%d"; + sprintf(query, q, new_reader_weight, read_HG, _server_id, endpoint_address, aurora_port); } - //converge_galera_config(_writer_hostgroup); uint64_t checksum_current = 0; uint64_t checksum_incoming = 0; { @@ -5160,7 +5232,7 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid free(query); } if (checksum_incoming!=checksum_current) { - proxy_warning("AWS Aurora: setting host %s:%d as writer\n", _hostname, _port); + proxy_warning("AWS Aurora: setting host %s%s:%d as writer\n", _server_id, endpoint_address, aurora_port); commit(); wrlock(); /* @@ -5190,7 +5262,7 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid wrunlock(); } else { if (GloMTH->variables.hostgroup_manager_verbose > 1) { - proxy_warning("AWS Aurora: skipping setting node %s:%d from hostgroup %d as writer because won't change the list of ONLINE nodes in writer hostgroup\n", _hostname, _port, _writer_hostgroup); + proxy_warning("AWS Aurora: skipping setting node %s%s:%d from hostgroup %d as writer because won't change the list of ONLINE nodes in writer hostgroup\n", _server_id, endpoint_address, aurora_port, _writer_hostgroup); } } GloAdmin->mysql_servers_wrunlock(); @@ -5205,9 +5277,10 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_writer(int _whid, int _rhid if (query) { free(query); } + free(endpoint_address); } -void MySQL_HostGroups_Manager::update_aws_aurora_set_reader(int _whid, int _rhid, char *_hostname, int _port) { +void MySQL_HostGroups_Manager::update_aws_aurora_set_reader(int _whid, int _rhid, char *_server_id) { int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; @@ -5215,9 +5288,26 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_reader(int _whid, int _rhid char *q=NULL; char *error=NULL; int _writer_hostgroup = _whid; - q=(char *)"SELECT hostgroup_id FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE hostname='%s' AND port=%d AND status<>3"; - query=(char *)malloc(strlen(q)+strlen(_hostname)+32); - sprintf(query,q,_hostname,_port); + int aurora_port = 3306; + char *endpoint_address = strdup((char *)""); + { + pthread_mutex_lock(&AWS_Aurora_Info_mutex); + std::map::iterator it2; + it2 = AWS_Aurora_Info_Map.find(_writer_hostgroup); + AWS_Aurora_Info *info=NULL; + if (it2!=AWS_Aurora_Info_Map.end()) { + info=it2->second; + if (info->endpoint_address) { + free(endpoint_address); + endpoint_address = strdup(info->endpoint_address); + } + aurora_port = info->aurora_port; + } + pthread_mutex_unlock(&AWS_Aurora_Info_mutex); + } + q=(char *)"SELECT hostgroup_id FROM mysql_servers JOIN mysql_aws_aurora_hostgroups ON hostgroup_id=writer_hostgroup OR hostgroup_id=reader_hostgroup WHERE hostname='%s%s' AND port=%d AND status<>3"; + query=(char *)malloc(strlen(q)+strlen(_server_id)+strlen(endpoint_address)+32); + sprintf(query, q, _server_id, endpoint_address, aurora_port); mydb->execute_statement(query, &error, &cols , &affected_rows , &resultset); if (error) { free(error); @@ -5226,22 +5316,22 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_reader(int _whid, int _rhid free(query); if (resultset) { // we lock only if needed if (resultset->rows_count) { - proxy_warning("AWS Aurora: setting host %s:%d (part of cluster with writer_hostgroup=%d) in a reader, moving from writer_hostgroup %d to reader_hostgroup %d\n", _hostname, _port, _whid, _whid, _rhid); + proxy_warning("AWS Aurora: setting host %s%s:%d (part of cluster with writer_hostgroup=%d) in a reader, moving from writer_hostgroup %d to reader_hostgroup %d\n", _server_id, endpoint_address, aurora_port, _whid, _whid, _rhid); GloAdmin->mysql_servers_wrlock(); mydb->execute("DELETE FROM mysql_servers_incoming"); mydb->execute("INSERT INTO mysql_servers_incoming SELECT hostgroup_id, hostname, port, gtid_port, weight, status, compression, max_connections, max_replication_lag, use_ssl, max_latency_ms, comment FROM mysql_servers"); - q=(char *)"UPDATE OR IGNORE mysql_servers_incoming SET hostgroup_id=%d WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; - query=(char *)malloc(strlen(q)+strlen(_hostname)+512); - sprintf(query,q,_rhid,_hostname,_port,_rhid); + q=(char *)"UPDATE OR IGNORE mysql_servers_incoming SET hostgroup_id=%d WHERE hostname='%s%s' AND port=%d AND hostgroup_id<>%d"; + query=(char *)malloc(strlen(q)+strlen(_server_id)+strlen(endpoint_address)+512); + sprintf(query, q, _rhid, _server_id, endpoint_address, aurora_port, _rhid); mydb->execute(query); //free(query); - q=(char *)"DELETE FROM mysql_servers_incoming WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; + q=(char *)"DELETE FROM mysql_servers_incoming WHERE hostname='%s%s' AND port=%d AND hostgroup_id<>%d"; //query=(char *)malloc(strlen(q)+strlen(_hostname)+64); - sprintf(query,q,_hostname,_port,_rhid); + sprintf(query, q, _server_id, endpoint_address, aurora_port, _rhid); mydb->execute(query); //free(query); - q=(char *)"UPDATE mysql_servers_incoming SET status=0 WHERE hostname='%s' AND port=%d AND hostgroup_id=%d"; - sprintf(query,q,_hostname,_port,_rhid); + q=(char *)"UPDATE mysql_servers_incoming SET status=0 WHERE hostname='%s%s' AND port=%d AND hostgroup_id=%d"; + sprintf(query, q, _server_id, endpoint_address, aurora_port, _rhid); mydb->execute(query); //free(query); //converge_galera_config(_writer_hostgroup); @@ -5287,6 +5377,7 @@ void MySQL_HostGroups_Manager::update_aws_aurora_set_reader(int _whid, int _rhid delete resultset; resultset=NULL; } + free(endpoint_address); } diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 0259cfd36..fcdbea14c 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -3477,8 +3477,12 @@ __exit_monitor_aws_aurora_HG_thread: s=(char *)malloc(l+16); } sprintf(s,"%s:%d",mmsd->hostname,mmsd->port); - AWS_Aurora_status_entry *ase = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); - AWS_Aurora_status_entry *ase_l = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); + unsigned long long time_now=realtime_time(); + time_now=time_now-(mmsd->t2 - start_time); + //AWS_Aurora_status_entry *ase = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); + //AWS_Aurora_status_entry *ase_l = new AWS_Aurora_status_entry(mmsd->t1, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); + AWS_Aurora_status_entry *ase = new AWS_Aurora_status_entry(time_now, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); + AWS_Aurora_status_entry *ase_l = new AWS_Aurora_status_entry(time_now, mmsd->t2-mmsd->t1, mmsd->mysql_error_msg); if (mmsd->interr == 0 && mmsd->result) { int num_fields=0; int num_rows=0; @@ -3500,8 +3504,6 @@ __exit_monitor_aws_aurora_HG_thread: //__end_process_aws_aurora_result: if (mmsd->mysql_error_msg) { } - unsigned long long time_now=realtime_time(); - time_now=time_now-(mmsd->t2 - start_time); pthread_mutex_lock(&GloMyMon->aws_aurora_mutex); //auto it = // TODO : complete this @@ -4064,15 +4066,15 @@ void MySQL_Monitor::evaluate_aws_aurora_results(unsigned int wHG, unsigned int r #ifdef TEST_AURORA action_yes++; (enable ? enabling++ : disabling++); - rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, 3306, hse->replica_lag_ms, enable, is_writer, verbose); + rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, hse->replica_lag_ms, enable, is_writer, verbose); #else - rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, 3306, hse->replica_lag_ms, enable, is_writer); + rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, hse->replica_lag_ms, enable, is_writer); #endif // TEST_AURORA #ifdef TEST_AURORA } else { action_no++; #endif // TEST_AURORA - rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, 3306, hse->replica_lag_ms, enable, is_writer); + rla_rc = MyHGM->aws_aurora_replication_lag_action(wHG, rHG, hse->server_id, hse->replica_lag_ms, enable, is_writer); } //if (is_writer == true && rla_rc == false) { if (rla_rc == false) { @@ -4081,7 +4083,7 @@ void MySQL_Monitor::evaluate_aws_aurora_results(unsigned int wHG, unsigned int r #ifdef TEST_AURORA proxy_info("Calling update_aws_aurora_set_writer for %s\n", hse->server_id); #endif // TEST_AURORA - MyHGM->update_aws_aurora_set_writer(wHG, rHG, hse->server_id, 3306); + MyHGM->update_aws_aurora_set_writer(wHG, rHG, hse->server_id); time_t __timer; char lut[30]; struct tm __tm_info; @@ -4097,7 +4099,7 @@ void MySQL_Monitor::evaluate_aws_aurora_results(unsigned int wHG, unsigned int r #ifdef TEST_AURORA proxy_info("Calling update_aws_aurora_set_reader for %s\n", hse->server_id); #endif // TEST_AURORA - MyHGM->update_aws_aurora_set_reader(wHG, rHG, hse->server_id, 3306); + MyHGM->update_aws_aurora_set_reader(wHG, rHG, hse->server_id); } } } diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index 44aa29f90..832c893bf 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -343,11 +343,11 @@ static int http_handler(void *cls, struct MHD_Connection *connection, const char // AWS Aurora -#define ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS_V2_1_0a "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" +#define ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS_V2_1_0a "CREATE TABLE mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , aurora_port INT NOT NUlL DEFAULT 3306 , endpoint_address VARCHAR NOT NULL DEFAULT '' , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" #define ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS ADMIN_SQLITE_TABLE_MYSQL_AWS_AURORA_HOSTGROUPS_V2_1_0a -#define ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE runtime_mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" +#define ADMIN_SQLITE_TABLE_RUNTIME_MYSQL_AWS_AURORA_HOSTGROUPS "CREATE TABLE runtime_mysql_aws_aurora_hostgroups (writer_hostgroup INT CHECK (writer_hostgroup>=0) NOT NULL PRIMARY KEY , reader_hostgroup INT NOT NULL CHECK (reader_hostgroup<>writer_hostgroup AND reader_hostgroup>0) , active INT CHECK (active IN (0,1)) NOT NULL DEFAULT 1 , aurora_port INT NOT NUlL DEFAULT 3306 , endpoint_address VARCHAR NOT NULL DEFAULT '' , max_lag_ms INT NOT NULL CHECK (max_lag_ms>= 10 AND max_lag_ms <= 600000) DEFAULT 600000 , check_interval_ms INT NOT NULL CHECK (check_interval_ms >= 100 AND check_interval_ms <= 600000) DEFAULT 1000 , check_timeout_ms INT NOT NULL CHECK (check_timeout_ms >= 80 AND check_timeout_ms <= 3000) DEFAULT 800 , writer_is_also_reader INT CHECK (writer_is_also_reader IN (0,1)) NOT NULL DEFAULT 0 , new_reader_weight INT CHECK (new_reader_weight >= 0 AND new_reader_weight <=10000000) NOT NULL DEFAULT 1 , comment VARCHAR , UNIQUE (reader_hostgroup))" @@ -7770,9 +7770,9 @@ void ProxySQL_Admin::save_mysql_servers_runtime_to_database(bool _runtime) { sqlite3 *mydb3=admindb->get_db(); char *query=NULL; if (_runtime) { - query=(char *)"INSERT INTO runtime_mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)"; + query=(char *)"INSERT INTO runtime_mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,aurora_port,endpoint_address,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)"; } else { - query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)"; + query=(char *)"INSERT INTO mysql_aws_aurora_hostgroups(writer_hostgroup,reader_hostgroup,active,aurora_port,endpoint_address,max_lag_ms,check_interval_ms,check_timeout_ms,writer_is_also_reader,new_reader_weight,comment) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)"; } rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); assert(rc==SQLITE_OK); @@ -7783,11 +7783,13 @@ void ProxySQL_Admin::save_mysql_servers_runtime_to_database(bool _runtime) { rc=sqlite3_bind_int64(statement, 2, atoi(r->fields[1])); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 3, atoi(r->fields[2])); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 4, atoi(r->fields[3])); assert(rc==SQLITE_OK); - rc=sqlite3_bind_int64(statement, 5, atoi(r->fields[4])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement, 5, r->fields[4], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 6, atoi(r->fields[5])); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 7, atoi(r->fields[6])); assert(rc==SQLITE_OK); rc=sqlite3_bind_int64(statement, 8, atoi(r->fields[7])); assert(rc==SQLITE_OK); - rc=sqlite3_bind_text(statement, 9, r->fields[8], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 9, atoi(r->fields[8])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_int64(statement, 10, atoi(r->fields[9])); assert(rc==SQLITE_OK); + rc=sqlite3_bind_text(statement, 11, r->fields[10], -1, SQLITE_TRANSIENT); assert(rc==SQLITE_OK); SAFE_SQLITE3_STEP2(statement); rc=sqlite3_clear_bindings(statement); assert(rc==SQLITE_OK); @@ -8698,34 +8700,42 @@ int ProxySQL_Admin::Read_MySQL_Servers_from_configfile() { if (root.exists("mysql_aws_aurora_hostgroups")==true) { const Setting &mysql_aws_aurora_hostgroups = root["mysql_aws_aurora_hostgroups"]; int count = mysql_aws_aurora_hostgroups.getLength(); - char *q=(char *)"INSERT OR REPLACE INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (%d, %d, %d, %d, %d, %d, %d, %d, '%s')"; + char *q=(char *)"INSERT OR REPLACE INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, aurora_port, endpoint_address, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (%d, %d, %d, %d, '%s', %d, %d, %d, %d, %d, '%s')"; for (i=0; i< count; i++) { const Setting &line = mysql_aws_aurora_hostgroups[i]; int writer_hostgroup; int reader_hostgroup; int active=1; // default + int aurora_port; int max_lag_ms; int check_interval_ms; int check_timeout_ms; int writer_is_also_reader; int new_reader_weight; std::string comment=""; + std::string endpoint_address=""; if (line.lookupValue("writer_hostgroup", writer_hostgroup)==false) continue; if (line.lookupValue("reader_hostgroup", reader_hostgroup)==false) continue; + if (line.lookupValue("aurora_port", aurora_port)==false) aurora_port=3306; if (line.lookupValue("max_lag_ms", max_lag_ms)==false) max_lag_ms=600000; if (line.lookupValue("check_interval_ms", check_interval_ms)==false) check_interval_ms=1000; if (line.lookupValue("check_timeout_ms", check_timeout_ms)==false) check_timeout_ms=1000; if (line.lookupValue("writer_is_also_reader", writer_is_also_reader)==false) writer_is_also_reader=0; if (line.lookupValue("new_reader_weight", new_reader_weight)==false) new_reader_weight=0; line.lookupValue("comment", comment); + line.lookupValue("endpoint_address", comment); char *o1=strdup(comment.c_str()); char *o=escape_string_single_quotes(o1, false); - char *query=(char *)malloc(strlen(q)+strlen(o)+256); // 128 vs sizeof(int)*8 - sprintf(query,q, writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, o); + char *p1=strdup(endpoint_address.c_str()); + char *p=escape_string_single_quotes(p1, false); + char *query=(char *)malloc(strlen(q)+strlen(o)+strlen(p)+256); // 128 vs sizeof(int)*8 + sprintf(query,q, writer_hostgroup, reader_hostgroup, active, aurora_port, p, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, o); //fprintf(stderr, "%s\n", query); admindb->execute(query); if (o!=o1) free(o); free(o1); + if (p!=p1) free(p); + free(p1); free(query); rows++; } @@ -9675,7 +9685,12 @@ void ProxySQL_Admin::enable_aurora_testing() { for (unsigned int j=1; j<4; j++) { proxy_info("Admin is enabling AWS Aurora Testing using SQLite3 Server and HGs 127%d and 127%d\n" , j*2-1 , j*2); for (unsigned int i=0; iexecute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1271, 1272, 1, 25, 120, 90, 1, 1, 'Automated Aurora Testing Cluster 1')"); + admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, endpoint_address, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1271, 1272, 1, '.aws-test.com', 25, 120, 90, 1, 1, 'Automated Aurora Testing Cluster 1')"); admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1273, 1274, 1, 25, 120, 90, 0, 1, 'Automated Aurora Testing Cluster 2')"); admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1275, 1276, 1, 25, 120, 90, 0, 2, 'Automated Aurora Testing Cluster 3')"); load_mysql_servers_to_runtime(); diff --git a/lib/SQLite3_Server.cpp b/lib/SQLite3_Server.cpp index ca5479715..8cad43163 100644 --- a/lib/SQLite3_Server.cpp +++ b/lib/SQLite3_Server.cpp @@ -778,8 +778,12 @@ void SQLite3_Server::populate_aws_aurora_table(MySQL_Session *sess) { proxy_info("Simulating a failover for AWS Aurora cluster %d , HGs (%d:%d)\n", cluster_id, 1270 + cluster_id*2+1 , 1270 + cluster_id*2+2); } for (unsigned int i=0; i Date: Thu, 21 Mar 2019 03:28:42 +1100 Subject: [PATCH 05/20] Adding support for max_lag_ms in query --- include/MySQL_HostGroups_Manager.h | 6 +-- include/MySQL_Thread.h | 2 +- include/query_processor.h | 4 +- lib/MySQL_HostGroups_Manager.cpp | 66 ++++++++++++++++++++++-------- lib/MySQL_Session.cpp | 8 ++-- lib/MySQL_Thread.cpp | 7 +++- lib/Query_Processor.cpp | 6 +++ 7 files changed, 73 insertions(+), 26 deletions(-) diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index f84f09837..2d7685c3b 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -384,7 +384,7 @@ class MyHGC { // MySQL Host Group Container MySrvList *mysrvs; MyHGC(int); ~MyHGC(); - MySrvC *get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid); + MySrvC *get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms); }; class Group_Replication_Info { @@ -565,7 +565,7 @@ class MySQL_HostGroups_Manager { void MyConn_add_to_pool(MySQL_Connection *); - MySQL_Connection * get_MyConn_from_pool(unsigned int hid, MySQL_Session *sess, bool ff, char * gtid_uuid, uint64_t gtid_trxid); + MySQL_Connection * get_MyConn_from_pool(unsigned int hid, MySQL_Session *sess, bool ff, char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms); void drop_all_idle_connections(); int get_multiple_idle_connections(int, unsigned long long, MySQL_Connection **, int); @@ -598,7 +598,7 @@ class MySQL_HostGroups_Manager { //bool aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose=true); //void update_aws_aurora_set_writer(int _whid, int _rhid, char *address, unsigned int port, bool verbose=true); //void update_aws_aurora_set_reader(int _whid, int _rhid, char *_hostname, int _port); - bool aws_aurora_replication_lag_action(int _whid, int _rhid, char *server_id, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose=true); + bool aws_aurora_replication_lag_action(int _whid, int _rhid, char *server_id, float current_replication_lag_ms, bool enable, bool is_writer, bool verbose=true); void update_aws_aurora_set_writer(int _whid, int _rhid, char *server_id, bool verbose=true); void update_aws_aurora_set_reader(int _whid, int _rhid, char *server_id); diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index 769a98881..b6fef27c3 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -274,7 +274,7 @@ class MySQL_Thread void unregister_session_connection_handler(int idx, bool _new=false); void listener_handle_new_connection(MySQL_Data_Stream *myds, unsigned int n); void Get_Memory_Stats(); - MySQL_Connection * get_MyConn_local(unsigned int, MySQL_Session *sess, char *gtid_uuid, uint64_t gtid_trxid); + MySQL_Connection * get_MyConn_local(unsigned int, MySQL_Session *sess, char *gtid_uuid, uint64_t gtid_trxid, int max_lag_ms); void push_MyConn_local(MySQL_Connection *); void return_local_connections(); void Scan_Sessions_to_Kill(PtrArray *mysess); diff --git a/include/query_processor.h b/include/query_processor.h index f6d16cd0a..3940f224b 100644 --- a/include/query_processor.h +++ b/include/query_processor.h @@ -72,8 +72,9 @@ class Query_Processor_Output { int sticky_conn; int multiplex; int gtid_from_hostgroup; + int max_lag_ms; int log; - char *comment; // #643 + char *comment; // #643 std::string *new_query; void * operator new(size_t size) { return l_alloc(size); @@ -104,6 +105,7 @@ class Query_Processor_Output { sticky_conn=-1; multiplex=-1; gtid_from_hostgroup=-1; + max_lag_ms=-1; log=-1; new_query=NULL; error_msg=NULL; diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 2a415afc5..3268ec577 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -2190,7 +2190,7 @@ void MySQL_HostGroups_Manager::push_MyConn_to_pool_array(MySQL_Connection **ca, wrunlock(); } -MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid) { +MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms) { MySrvC *mysrvc=NULL; unsigned int j; unsigned int sum=0; @@ -2209,8 +2209,15 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid) { TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); } } else { - sum+=mysrvc->weight; - TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + if (max_lag_ms >= 0) { + if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + sum+=mysrvc->weight; + TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + } + } else { + sum+=mysrvc->weight; + TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + } } } } @@ -2248,8 +2255,15 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid) { TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); } } else { - sum+=mysrvc->weight; - TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + if (max_lag_ms >= 0) { + if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + sum+=mysrvc->weight; + TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + } + } else { + sum+=mysrvc->weight; + TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + } } } } @@ -2284,8 +2298,15 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid) { TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); } } else { - sum+=mysrvc->weight; - TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + if (max_lag_ms >= 0) { + if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + sum+=mysrvc->weight; + TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + } + } else { + sum+=mysrvc->weight; + TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + } } } } @@ -2314,8 +2335,15 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid) { New_TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); } } else { - New_sum+=mysrvc->weight; - New_TotalUsedConn+=len; + if (max_lag_ms >= 0) { + if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + New_sum+=mysrvc->weight; + New_TotalUsedConn+=len; + } + } else { + New_sum+=mysrvc->weight; + New_TotalUsedConn+=len; + } } } } @@ -2350,7 +2378,13 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid) { TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); } } else { - New_sum+=mysrvc->weight; + if (max_lag_ms >= 0) { + if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + New_sum+=mysrvc->weight; + } + } else { + New_sum+=mysrvc->weight; + } } if (k<=New_sum) { proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC %p, server %s:%d\n", mysrvc, mysrvc->address, mysrvc->port); @@ -2447,12 +2481,12 @@ MySQL_Connection * MySrvConnList::get_random_MyConn(MySQL_Session *sess, bool ff return NULL; // never reach here } -MySQL_Connection * MySQL_HostGroups_Manager::get_MyConn_from_pool(unsigned int _hid, MySQL_Session *sess, bool ff, char * gtid_uuid, uint64_t gtid_trxid) { +MySQL_Connection * MySQL_HostGroups_Manager::get_MyConn_from_pool(unsigned int _hid, MySQL_Session *sess, bool ff, char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms) { MySQL_Connection * conn=NULL; wrlock(); status.myconnpoll_get++; MyHGC *myhgc=MyHGC_lookup(_hid); - MySrvC *mysrvc=myhgc->get_random_MySrvC(gtid_uuid, gtid_trxid); + MySrvC *mysrvc=myhgc->get_random_MySrvC(gtid_uuid, gtid_trxid, max_lag_ms); if (mysrvc) { // a MySrvC exists. If not, we return NULL = no targets conn=mysrvc->ConnectionsFree->get_random_MyConn(sess, ff); if (conn) { @@ -4938,7 +4972,7 @@ void MySQL_HostGroups_Manager::generate_mysql_aws_aurora_hostgroups_table() { //void MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int _rhid, char *address, unsigned int port, float current_replication_lag, bool enable, bool verbose) { // this function returns false is the server is in the wrong HG -bool MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int _rhid, char *_server_id, unsigned int current_replication_lag_us, bool enable, bool is_writer, bool verbose) { +bool MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int _rhid, char *_server_id, float current_replication_lag_ms, bool enable, bool is_writer, bool verbose) { bool ret = false; // return false by default bool reader_found_in_whg = false; if (is_writer) { @@ -4977,19 +5011,19 @@ bool MySQL_HostGroups_Manager::aws_aurora_replication_lag_action(int _whid, int if (enable==false) { if (mysrvc->status == MYSQL_SERVER_STATUS_ONLINE) { if (verbose) { - proxy_warning("Shunning server %s:%d from HG %u with replication lag of %f microseconds\n", address, port, myhgc->hid, current_replication_lag_us); + proxy_warning("Shunning server %s:%d from HG %u with replication lag of %f microseconds\n", address, port, myhgc->hid, current_replication_lag_ms); } mysrvc->status = MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG; } } else { if (mysrvc->status == MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) { if (verbose) { - proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %f microseconds\n", address, port, myhgc->hid, current_replication_lag_us); + proxy_warning("Re-enabling server %s:%d from HG %u with replication lag of %f microseconds\n", address, port, myhgc->hid, current_replication_lag_ms); } mysrvc->status = MYSQL_SERVER_STATUS_ONLINE; } } - mysrvc->aws_aurora_current_lag_us = current_replication_lag_us; + mysrvc->aws_aurora_current_lag_us = current_replication_lag_ms * 1000; if (mysrvc->status == MYSQL_SERVER_STATUS_ONLINE || mysrvc->status == MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) { // we perform check only if ONLINE or lagging if (ret) { diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 024f25c66..3ed46f345 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -4508,16 +4508,16 @@ void MySQL_Session::handler___client_DSS_QUERY_SENT___server_DSS_NOT_INITIALIZED } } uuid[n]='\0'; - mc=thread->get_MyConn_local(mybe->hostgroup_id, this, uuid, trxid); + mc=thread->get_MyConn_local(mybe->hostgroup_id, this, uuid, trxid, -1); } else { - mc=thread->get_MyConn_local(mybe->hostgroup_id, this, NULL, 0); + mc=thread->get_MyConn_local(mybe->hostgroup_id, this, NULL, 0, qpo->max_lag_ms); } } if (mc==NULL) { if (trxid) { - mc=MyHGM->get_MyConn_from_pool(mybe->hostgroup_id, this, session_fast_forward, uuid, trxid); + mc=MyHGM->get_MyConn_from_pool(mybe->hostgroup_id, this, session_fast_forward, uuid, trxid, qpo->max_lag_ms); } else { - mc=MyHGM->get_MyConn_from_pool(mybe->hostgroup_id, this, session_fast_forward, NULL, 0); + mc=MyHGM->get_MyConn_from_pool(mybe->hostgroup_id, this, session_fast_forward, NULL, 0, qpo->max_lag_ms); } } else { thread->status_variables.ConnPool_get_conn_immediate++; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 999ed4aeb..c533c341e 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -5071,7 +5071,7 @@ void MySQL_Thread::Get_Memory_Stats() { } -MySQL_Connection * MySQL_Thread::get_MyConn_local(unsigned int _hid, MySQL_Session *sess, char *gtid_uuid, uint64_t gtid_trxid) { +MySQL_Connection * MySQL_Thread::get_MyConn_local(unsigned int _hid, MySQL_Session *sess, char *gtid_uuid, uint64_t gtid_trxid, int max_lag_ms) { unsigned int i; unsigned int bc = 0; // best candidate bool pcf = false; // possible candidate found @@ -5125,6 +5125,11 @@ MySQL_Connection * MySQL_Thread::get_MyConn_local(unsigned int _hid, MySQL_Sessi } else { // c=(MySQL_Connection *)cached_connections->remove_index_fast(i); + if (max_lag_ms >= 0) { + if (max_lag_ms >= (c->parent->aws_aurora_current_lag_us / 1000)) { + continue; + } + } if (pcf == false) { bc = i; pcf = true; diff --git a/lib/Query_Processor.cpp b/lib/Query_Processor.cpp index e053800f1..0fedf4d38 100644 --- a/lib/Query_Processor.cpp +++ b/lib/Query_Processor.cpp @@ -1946,6 +1946,12 @@ bool Query_Processor::query_parser_first_comment(Query_Processor_Output *qpo, ch qpo->mirror_hostgroup=t; } } + if (!strcasecmp(key,"max_lag_ms")) { + if (c >= '0' && c <= '9') { // it is a digit + int t=atoi(value); + qpo->max_lag_ms = t; + } + } } proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 5, "Variables in comment %s , key=%s , value=%s\n", token, key, value); From b74181eeea94bac03f180866d1abd4a63e8e7429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 21 Mar 2019 13:05:03 +1100 Subject: [PATCH 06/20] Added aws_aurora_replicas_skipped_during_query aws_aurora_replicas_skipped_during_query is a status variable for better monitoring behavior due to replication lag in AWS Aurora Also fixed lag computing in connection pool --- include/MySQL_HostGroups_Manager.h | 2 +- include/MySQL_Thread.h | 2 ++ lib/MySQL_HostGroups_Manager.cpp | 16 +++++++++------- lib/MySQL_Thread.cpp | 23 ++++++++++++++++++++++- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/include/MySQL_HostGroups_Manager.h b/include/MySQL_HostGroups_Manager.h index 2d7685c3b..c933e8726 100644 --- a/include/MySQL_HostGroups_Manager.h +++ b/include/MySQL_HostGroups_Manager.h @@ -384,7 +384,7 @@ class MyHGC { // MySQL Host Group Container MySrvList *mysrvs; MyHGC(int); ~MyHGC(); - MySrvC *get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms); + MySrvC *get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms, MySQL_Session *sess); }; class Group_Replication_Info { diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index b6fef27c3..ceea8e688 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -247,6 +247,7 @@ class MySQL_Thread unsigned long long unexpected_packet; unsigned long long killed_connections; unsigned long long killed_queries; + unsigned long long aws_aurora_replicas_skipped_during_query; unsigned int active_transactions; } status_variables; @@ -513,6 +514,7 @@ class MySQL_Threads_Handler unsigned long long get_max_connect_timeout(); unsigned long long get_unexpected_com_quit(); unsigned long long get_unexpected_packet(); + unsigned long long get_aws_aurora_replicas_skipped_during_query(); unsigned long long get_killed_connections(); unsigned long long get_killed_queries(); iface_info *MLM_find_iface_from_fd(int fd) { diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 3268ec577..bdbe9d2a2 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -2190,7 +2190,7 @@ void MySQL_HostGroups_Manager::push_MyConn_to_pool_array(MySQL_Connection **ca, wrunlock(); } -MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms) { +MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms, MySQL_Session *sess) { MySrvC *mysrvc=NULL; unsigned int j; unsigned int sum=0; @@ -2210,9 +2210,11 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ } } else { if (max_lag_ms >= 0) { - if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + if (max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + } else { + sess->thread->status_variables.aws_aurora_replicas_skipped_during_query++; } } else { sum+=mysrvc->weight; @@ -2256,7 +2258,7 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ } } else { if (max_lag_ms >= 0) { - if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + if (max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); } @@ -2299,7 +2301,7 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ } } else { if (max_lag_ms >= 0) { - if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + if (max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); } @@ -2336,7 +2338,7 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ } } else { if (max_lag_ms >= 0) { - if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + if (max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { New_sum+=mysrvc->weight; New_TotalUsedConn+=len; } @@ -2379,7 +2381,7 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ } } else { if (max_lag_ms >= 0) { - if (max_lag_ms <= mysrvc->aws_aurora_current_lag_us/1000) { + if (max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { New_sum+=mysrvc->weight; } } else { @@ -2486,7 +2488,7 @@ MySQL_Connection * MySQL_HostGroups_Manager::get_MyConn_from_pool(unsigned int _ wrlock(); status.myconnpoll_get++; MyHGC *myhgc=MyHGC_lookup(_hid); - MySrvC *mysrvc=myhgc->get_random_MySrvC(gtid_uuid, gtid_trxid, max_lag_ms); + MySrvC *mysrvc=myhgc->get_random_MySrvC(gtid_uuid, gtid_trxid, max_lag_ms, sess); if (mysrvc) { // a MySrvC exists. If not, we return NULL = no targets conn=mysrvc->ConnectionsFree->get_random_MyConn(sess, ff); if (conn) { diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index c533c341e..38c71f9ac 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -3813,6 +3813,7 @@ MySQL_Thread::MySQL_Thread() { status_variables.unexpected_packet = 0; status_variables.killed_connections = 0; status_variables.killed_queries = 0; + status_variables.aws_aurora_replicas_skipped_during_query = 0; match_regexes=NULL; @@ -4321,6 +4322,12 @@ SQLite3_result * MySQL_Threads_Handler::SQL3_GlobalStatus(bool _memory) { pta[1]=buf; result->add_row(pta); } + { // AWS Aurora replicas skipped during query + pta[0]=(char *)"aws_aurora_replicas_skipped_during_query"; + sprintf(buf,"%llu",get_aws_aurora_replicas_skipped_during_query()); + pta[1]=buf; + result->add_row(pta); + } { // killed connections pta[0]=(char *)"mysql_killed_backend_connections"; sprintf(buf,"%llu",get_killed_connections()); @@ -5126,7 +5133,8 @@ MySQL_Connection * MySQL_Thread::get_MyConn_local(unsigned int _hid, MySQL_Sessi // c=(MySQL_Connection *)cached_connections->remove_index_fast(i); if (max_lag_ms >= 0) { - if (max_lag_ms >= (c->parent->aws_aurora_current_lag_us / 1000)) { + if (max_lag_ms < (c->parent->aws_aurora_current_lag_us / 1000)) { + status_variables.aws_aurora_replicas_skipped_during_query++; continue; } } @@ -5289,6 +5297,19 @@ unsigned long long MySQL_Threads_Handler::get_unexpected_packet() { return q; } +unsigned long long MySQL_Threads_Handler::get_aws_aurora_replicas_skipped_during_query() { + unsigned long long q=0; + unsigned int i; + for (i=0;istatus_variables.aws_aurora_replicas_skipped_during_query,0); + } + } + return q; +} + unsigned long long MySQL_Threads_Handler::get_killed_connections() { unsigned long long q=0; unsigned int i; From 16913fdd78d2606dd51c4805966dd6f5096c3b51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 21 Mar 2019 18:34:51 +1100 Subject: [PATCH 07/20] Optimizing get_random_MySrvC() --- lib/MySQL_HostGroups_Manager.cpp | 133 ++++++++++++++++++++++++++++++- lib/MySQL_Session.cpp | 6 +- lib/SQLite3_Server.cpp | 3 +- 3 files changed, 137 insertions(+), 5 deletions(-) diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index bdbe9d2a2..c352787a4 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -10,6 +10,14 @@ #include "ev.h" + +#define USE_MYSRVC_ARRAY + +#ifdef USE_MYSRVC_ARRAY +static unsigned long long array_mysrvc_total = 0; +static unsigned long long array_mysrvc_cands = 0; +#endif // USE_MYSRVC_ARRAY + #define SAFE_SQLITE3_STEP(_stmt) do {\ do {\ rc=sqlite3_step(_stmt);\ @@ -2196,6 +2204,22 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ unsigned int sum=0; unsigned int TotalUsedConn=0; unsigned int l=mysrvs->cnt(); +#ifdef USE_MYSRVC_ARRAY +#ifdef TEST_AURORA + unsigned long long a1 = array_mysrvc_total/10000; + array_mysrvc_total += l; + unsigned long long a2 = array_mysrvc_total/10000; + if (a2 > a1) { + fprintf(stderr, "Total: %llu, Candidates: %llu\n", array_mysrvc_total-l, array_mysrvc_cands); + } +#endif // TEST_AURORA + MySrvC *mysrvcCandidates_static[32]; + MySrvC **mysrvcCandidates = mysrvcCandidates_static; + unsigned int num_candidates = 0; + if (l>32) { + mysrvcCandidates = (MySrvC **)malloc(sizeof(MySrvC *)*l); + } +#endif // USE_MYSRVC_ARRAY if (l) { //int j=0; for (j=0; jgtid_exists(mysrvc, gtid_uuid, gtid_trxid)) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } } else { if (max_lag_ms >= 0) { if (max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } else { sess->thread->status_variables.aws_aurora_replicas_skipped_during_query++; } } else { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } } } @@ -2255,16 +2291,28 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } } else { if (max_lag_ms >= 0) { if (max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } } else { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } } } @@ -2298,16 +2346,28 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } } else { if (max_lag_ms >= 0) { if (max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } } else { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY + mysrvcCandidates[num_candidates]=mysrvc; + num_candidates++; +#endif // USE_MYSRVC_ARRAY } } } @@ -2317,6 +2377,12 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ } if (sum==0) { proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC NULL because no backend ONLINE or with weight\n"); +#ifdef USE_MYSRVC_ARRAY + if (l>32) { + free(mysrvcCandidates); + } + array_mysrvc_cands += num_candidates; +#endif // USE_MYSRVC_ARRAY return NULL; // if we reach here, we couldn't find any target } @@ -2324,13 +2390,27 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ unsigned int New_TotalUsedConn=0; // we will now scan again to ignore overloaded servers +#ifdef USE_MYSRVC_ARRAY + for (j=0; jidx(j); if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) { // consider this server only if ONLINE +#endif // USE_MYSRVC_ARRAY unsigned int len=mysrvc->ConnectionsUsed->conns_length(); +#ifdef USE_MYSRVC_ARRAY +#else + if (len < mysrvc->max_connections) { // consider this server only if didn't reach max_connections if ( mysrvc->current_latency_us < ( mysrvc->max_latency_us ? mysrvc->max_latency_us : mysql_thread___default_max_latency_ms*1000 ) ) { // consider the host only if not too far +#endif // USE_MYSRVC_ARRAY if ((len * sum) <= (TotalUsedConn * mysrvc->weight * 1.5 + 1)) { + +#ifdef USE_MYSRVC_ARRAY + New_sum+=mysrvc->weight; + New_TotalUsedConn+=len; +#else if (gtid_trxid) { if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) { New_sum+=mysrvc->weight; @@ -2347,14 +2427,33 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ New_TotalUsedConn+=len; } } +#endif // USE_MYSRVC_ARRAY +#ifdef USE_MYSRVC_ARRAY + } else { + // remove the candidate + if (j+1 < num_candidates) { + mysrvcCandidates[j] = mysrvcCandidates[num_candidates-1]; + } + j--; + num_candidates--; +#endif // USE_MYSRVC_ARRAY } +#ifdef USE_MYSRVC_ARRAY +#else } } } +#endif // USE_MYSRVC_ARRAY } if (New_sum==0) { proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC NULL because no backend ONLINE or with weight\n"); +#ifdef USE_MYSRVC_ARRAY + if (l>32) { + free(mysrvcCandidates); + } + array_mysrvc_cands += num_candidates; +#endif // USE_MYSRVC_ARRAY return NULL; // if we reach here, we couldn't find any target } @@ -2364,9 +2463,13 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ } else { k=fastrand()%New_sum; } - k++; + k++; New_sum=0; +#ifdef USE_MYSRVC_ARRAY + for (j=0; jidx(j); if (mysrvc->status==MYSQL_SERVER_STATUS_ONLINE) { // consider this server only if ONLINE @@ -2374,10 +2477,14 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ if (len < mysrvc->max_connections) { // consider this server only if didn't reach max_connections if ( mysrvc->current_latency_us < ( mysrvc->max_latency_us ? mysrvc->max_latency_us : mysql_thread___default_max_latency_ms*1000 ) ) { // consider the host only if not too far if ((len * sum) <= (TotalUsedConn * mysrvc->weight * 1.5 + 1)) { +#endif // USE_MYSRVC_ARRAY +#ifdef USE_MYSRVC_ARRAY + New_sum+=mysrvc->weight; +#else if (gtid_trxid) { if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) { New_sum+=mysrvc->weight; - TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); + //TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); // this line is a bug } } else { if (max_lag_ms >= 0) { @@ -2388,17 +2495,33 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ New_sum+=mysrvc->weight; } } +#endif // USE_MYSRVC_ARRAY if (k<=New_sum) { proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC %p, server %s:%d\n", mysrvc, mysrvc->address, mysrvc->port); +#ifdef USE_MYSRVC_ARRAY + if (l>32) { + free(mysrvcCandidates); + } + array_mysrvc_cands += num_candidates; +#endif // USE_MYSRVC_ARRAY return mysrvc; } +#ifdef USE_MYSRVC_ARRAY +#else } } } } +#endif // USE_MYSRVC_ARRAY } } proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC NULL\n"); +#ifdef USE_MYSRVC_ARRAY + if (l>32) { + free(mysrvcCandidates); + } + array_mysrvc_cands += num_candidates; +#endif // USE_MYSRVC_ARRAY return NULL; // if we reach here, we couldn't find any target } @@ -2488,7 +2611,11 @@ MySQL_Connection * MySQL_HostGroups_Manager::get_MyConn_from_pool(unsigned int _ wrlock(); status.myconnpoll_get++; MyHGC *myhgc=MyHGC_lookup(_hid); - MySrvC *mysrvc=myhgc->get_random_MySrvC(gtid_uuid, gtid_trxid, max_lag_ms, sess); + MySrvC *mysrvc = NULL; +#ifdef TEST_AURORA + for (int i=0; i<10; i++) +#endif // TEST_AURORA + mysrvc = myhgc->get_random_MySrvC(gtid_uuid, gtid_trxid, max_lag_ms, sess); if (mysrvc) { // a MySrvC exists. If not, we return NULL = no targets conn=mysrvc->ConnectionsFree->get_random_MyConn(sess, ff); if (conn) { diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 3ed46f345..acb9032d7 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -3044,7 +3044,11 @@ handler_again: myds->max_connect_time=thread->curtime+mysql_thread___connect_timeout_server_max*1000; } bool retry_conn=false; - proxy_error("Detected an offline server during query: %s, %d\n", myconn->parent->address, myconn->parent->port); + if (myconn->server_status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) { + proxy_error("Detected a lagging server during query: %s, %d\n", myconn->parent->address, myconn->parent->port); + } else { + proxy_error("Detected an offline server during query: %s, %d\n", myconn->parent->address, myconn->parent->port); + } if (myds->query_retries_on_failure > 0) { myds->query_retries_on_failure--; if ((myds->myconn->reusable==true) && myds->myconn->IsActiveTransaction()==false && myds->myconn->MultiplexDisabled()==false) { diff --git a/lib/SQLite3_Server.cpp b/lib/SQLite3_Server.cpp index 8cad43163..ae7358438 100644 --- a/lib/SQLite3_Server.cpp +++ b/lib/SQLite3_Server.cpp @@ -772,7 +772,8 @@ void SQLite3_Server::populate_aws_aurora_table(MySQL_Session *sess) { string clu_id_s = myip.substr(6,1); unsigned int cluster_id = atoi(clu_id_s.c_str()); cluster_id--; - if (rand() % 200 == 0) { + //if (rand() % 200 == 0) { + if (rand() % 20000 == 0) { // simulate a failover cur_aurora_writer[cluster_id] = rand() % num_aurora_servers[cluster_id]; proxy_info("Simulating a failover for AWS Aurora cluster %d , HGs (%d:%d)\n", cluster_id, 1270 + cluster_id*2+1 , 1270 + cluster_id*2+2); From 8469801e740828e20887f3ff96f2d09fe5bba75c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 21 Mar 2019 21:33:00 +1100 Subject: [PATCH 08/20] Implemented first prototype of latency awareness Configurable using mysql-min_num_servers_lantency_awareness --- include/MySQL_Thread.h | 4 +++ lib/MySQL_HostGroups_Manager.cpp | 45 ++++++++++++++++++++++++++++++++ lib/MySQL_Thread.cpp | 39 +++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index ceea8e688..2b00e357c 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -239,6 +239,7 @@ class MySQL_Thread unsigned long long ConnPool_get_conn_immediate; unsigned long long ConnPool_get_conn_success; unsigned long long ConnPool_get_conn_failure; + unsigned long long ConnPool_get_conn_latency_awareness; unsigned long long gtid_binlog_collected; unsigned long long gtid_session_collected; unsigned long long generated_pkt_err; @@ -252,6 +253,7 @@ class MySQL_Thread } status_variables; struct { + int min_num_servers_lantency_awareness; bool stats_time_backend_query; bool stats_time_query_processor; bool query_cache_stores_empty_result; @@ -440,6 +442,7 @@ class MySQL_Threads_Handler char * ssl_p2s_key; char * ssl_p2s_cipher; int query_cache_size_MB; + int min_num_servers_lantency_awareness; bool stats_time_backend_query; bool stats_time_query_processor; bool query_cache_stores_empty_result; @@ -510,6 +513,7 @@ class MySQL_Threads_Handler unsigned long long get_ConnPool_get_conn_immediate(); unsigned long long get_ConnPool_get_conn_success(); unsigned long long get_ConnPool_get_conn_failure(); + unsigned long long get_ConnPool_get_conn_latency_awareness(); unsigned long long get_generated_pkt_err(); unsigned long long get_max_connect_timeout(); unsigned long long get_unexpected_com_quit(); diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index c352787a4..491809f24 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -2446,6 +2446,7 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ #endif // USE_MYSRVC_ARRAY } + if (New_sum==0) { proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC NULL because no backend ONLINE or with weight\n"); #ifdef USE_MYSRVC_ARRAY @@ -2457,6 +2458,50 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ return NULL; // if we reach here, we couldn't find any target } +#ifdef USE_MYSRVC_ARRAY + // latency awareness algorithm is enabled only when compiled with USE_MYSRVC_ARRAY + if (sess->thread->variables.min_num_servers_lantency_awareness) { + if (num_candidates >= sess->thread->variables.min_num_servers_lantency_awareness) { + unsigned int servers_with_latency = 0; + unsigned int total_latency_us = 0; + // scan and verify that all servers have some latency + for (j=0; jcurrent_latency_us) { + servers_with_latency++; + total_latency_us += mysrvc->current_latency_us; + } + } + if (servers_with_latency == num_candidates) { + // all servers have some latency. + // That is good. If any server have no latency, something is wrong + // and we will skip this algorithm + sess->thread->status_variables.ConnPool_get_conn_latency_awareness++; + unsigned int avg_latency_us = 0; + avg_latency_us = total_latency_us/num_candidates; + for (j=0; jcurrent_latency_us > avg_latency_us) { + // remove the candidate + if (j+1 < num_candidates) { + mysrvcCandidates[j] = mysrvcCandidates[num_candidates-1]; + } + j--; + num_candidates--; + } + } + // we scan again to adjust weight + New_sum = 0; + for (j=0; jweight; + } + } + } + } +#endif // USE_MYSRVC_ARRAY + + unsigned int k; if (New_sum > 32768) { k=rand()%New_sum; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 38c71f9ac..894a326e9 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -321,6 +321,7 @@ static char * mysql_thread_variables_names[]= { (char *)"default_sql_mode", (char *)"default_time_zone", (char *)"connpoll_reset_queue_length", + (char *)"min_num_servers_lantency_awareness", (char *)"stats_time_backend_query", (char *)"stats_time_query_processor", (char *)"query_cache_stores_empty_result", @@ -441,6 +442,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() { variables.query_digests_normalize_digest_text=false; variables.query_digests_track_hostname=false; variables.connpoll_reset_queue_length = 50; + variables.min_num_servers_lantency_awareness = 1000; variables.stats_time_backend_query=false; variables.stats_time_query_processor=false; variables.query_cache_stores_empty_result=true; @@ -682,6 +684,7 @@ VALGRIND_DISABLE_ERROR_REPORTING; #ifdef IDLE_THREADS if (!strcasecmp(name,"session_idle_ms")) return (int)variables.session_idle_ms; #endif // IDLE_THREADS + if (!strcasecmp(name,"min_num_servers_lantency_awareness")) return variables.min_num_servers_lantency_awareness; if (!strcasecmp(name,"stats_time_backend_query")) return (int)variables.stats_time_backend_query; if (!strcasecmp(name,"stats_time_query_processor")) return (int)variables.stats_time_query_processor; if (!strcasecmp(name,"sessions_sort")) return (int)variables.sessions_sort; @@ -766,6 +769,7 @@ VALGRIND_DISABLE_ERROR_REPORTING; if (!strcasecmp(name,"query_digests_normalize_digest_text")) return (int)variables.query_digests_normalize_digest_text; if (!strcasecmp(name,"query_digests_track_hostname")) return (int)variables.query_digests_track_hostname; if (!strcasecmp(name,"connpoll_reset_queue_length")) return (int)variables.connpoll_reset_queue_length; + if (!strcasecmp(name,"min_num_servers_lantency_awareness")) return (int)variables.min_num_servers_lantency_awareness; if (!strcasecmp(name,"stats_time_backend_query")) return (int)variables.stats_time_backend_query; if (!strcasecmp(name,"stats_time_query_processor")) return (int)variables.stats_time_query_processor; if (!strcasecmp(name,"query_cache_stores_empty_result")) return (int)variables.query_cache_stores_empty_result; @@ -1139,6 +1143,10 @@ VALGRIND_DISABLE_ERROR_REPORTING; sprintf(intbuf,"%d",variables.poll_timeout_on_failure); return strdup(intbuf); } + if (!strcasecmp(name,"min_num_servers_lantency_awareness")) { + sprintf(intbuf,"%d",variables.min_num_servers_lantency_awareness); + return strdup(intbuf); + } if (!strcasecmp(name,"threads")) { sprintf(intbuf,"%d", (num_threads ? num_threads : DEFAULT_NUM_THREADS)); return strdup(intbuf); @@ -2059,6 +2067,15 @@ bool MySQL_Threads_Handler::set_variable(char *name, char *value) { // this is t return false; } } + if (!strcasecmp(name,"min_num_servers_lantency_awareness")) { + int intv=atoi(value); + if (intv >= 0 && intv <= 10000) { + variables.min_num_servers_lantency_awareness=intv; + return true; + } else { + return false; + } + } if (!strcasecmp(name,"default_charset")) { if (vallen) { MARIADB_CHARSET_INFO * c=proxysql_find_charset_name(value); @@ -3730,6 +3747,7 @@ void MySQL_Thread::refresh_variables() { mysql_thread___query_digests_lowercase=(bool)GloMTH->get_variable_int((char *)"query_digests_lowercase"); mysql_thread___query_digests_normalize_digest_text=(bool)GloMTH->get_variable_int((char *)"query_digests_normalize_digest_text"); mysql_thread___query_digests_track_hostname=(bool)GloMTH->get_variable_int((char *)"query_digests_track_hostname"); + variables.min_num_servers_lantency_awareness=GloMTH->get_variable_int((char *)"min_num_servers_lantency_awareness"); variables.stats_time_backend_query=(bool)GloMTH->get_variable_int((char *)"stats_time_backend_query"); variables.stats_time_query_processor=(bool)GloMTH->get_variable_int((char *)"stats_time_query_processor"); variables.query_cache_stores_empty_result=(bool)GloMTH->get_variable_int((char *)"query_cache_stores_empty_result"); @@ -3805,6 +3823,7 @@ MySQL_Thread::MySQL_Thread() { status_variables.ConnPool_get_conn_immediate=0; status_variables.ConnPool_get_conn_success=0; status_variables.ConnPool_get_conn_failure=0; + status_variables.ConnPool_get_conn_latency_awareness=0; status_variables.active_transactions=0; status_variables.gtid_session_collected = 0; status_variables.generated_pkt_err = 0; @@ -3817,6 +3836,7 @@ MySQL_Thread::MySQL_Thread() { match_regexes=NULL; + variables.min_num_servers_lantency_awareness = 1000; variables.stats_time_backend_query=false; variables.stats_time_query_processor=false; variables.query_cache_stores_empty_result=true; @@ -4280,6 +4300,12 @@ SQLite3_result * MySQL_Threads_Handler::SQL3_GlobalStatus(bool _memory) { result->add_row(pta); } } + { // ConnPool_get_conn_latency_awareness + pta[0]=(char *)"ConnPool_get_conn_latency_awareness"; + sprintf(buf,"%llu",get_ConnPool_get_conn_latency_awareness()); + pta[1]=buf; + result->add_row(pta); + } { // ConnPool_get_conn_immediate pta[0]=(char *)"ConnPool_get_conn_immediate"; sprintf(buf,"%llu",get_ConnPool_get_conn_immediate()); @@ -5206,6 +5232,19 @@ void MySQL_Thread::return_local_connections() { } } +unsigned long long MySQL_Threads_Handler::get_ConnPool_get_conn_latency_awareness() { + unsigned long long q=0; + unsigned int i; + for (i=0;istatus_variables.ConnPool_get_conn_latency_awareness,0); + } + } + return q; +} + unsigned long long MySQL_Threads_Handler::get_ConnPool_get_conn_immediate() { unsigned long long q=0; unsigned int i; From 6aac86bebcc00ad3caa22f8260545a154f875c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Fri, 22 Mar 2019 00:23:50 +1100 Subject: [PATCH 09/20] Changed the way max_lag_ms in query is computed It now remembers if the query is waiting for long time --- include/query_processor.h | 2 +- lib/MySQL_Session.cpp | 28 +++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/query_processor.h b/include/query_processor.h index 3940f224b..a2748c322 100644 --- a/include/query_processor.h +++ b/include/query_processor.h @@ -72,7 +72,7 @@ class Query_Processor_Output { int sticky_conn; int multiplex; int gtid_from_hostgroup; - int max_lag_ms; + long long max_lag_ms; int log; char *comment; // #643 std::string *new_query; diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index acb9032d7..adbed28ef 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -4491,6 +4491,19 @@ void MySQL_Session::handler___client_DSS_QUERY_SENT___server_DSS_NOT_INITIALIZED MySQL_Backend * _gtid_from_backend = NULL; char uuid[64]; uint64_t trxid = 0; + unsigned long long now_us = 0; + if (qpo->max_lag_ms >= 0) { + if (qpo->max_lag_ms > 360000) { // this is an absolute time, we convert it to relative + if (now_us == 0) { + now_us = realtime_time(); + } + long long now_ms = now_us/1000; + qpo->max_lag_ms -= now_ms; + if (qpo->max_lag_ms < 0) { + qpo->max_lag_ms = -1; // time expired + } + } + } if (session_fast_forward == false) { if (qpo->gtid_from_hostgroup >= 0) { _gtid_from_backend = find_backend(qpo->gtid_from_hostgroup); @@ -4514,14 +4527,14 @@ void MySQL_Session::handler___client_DSS_QUERY_SENT___server_DSS_NOT_INITIALIZED uuid[n]='\0'; mc=thread->get_MyConn_local(mybe->hostgroup_id, this, uuid, trxid, -1); } else { - mc=thread->get_MyConn_local(mybe->hostgroup_id, this, NULL, 0, qpo->max_lag_ms); + mc=thread->get_MyConn_local(mybe->hostgroup_id, this, NULL, 0, (int)qpo->max_lag_ms); } } if (mc==NULL) { if (trxid) { - mc=MyHGM->get_MyConn_from_pool(mybe->hostgroup_id, this, session_fast_forward, uuid, trxid, qpo->max_lag_ms); + mc=MyHGM->get_MyConn_from_pool(mybe->hostgroup_id, this, session_fast_forward, uuid, trxid, -1); } else { - mc=MyHGM->get_MyConn_from_pool(mybe->hostgroup_id, this, session_fast_forward, NULL, 0, qpo->max_lag_ms); + mc=MyHGM->get_MyConn_from_pool(mybe->hostgroup_id, this, session_fast_forward, NULL, 0, (int)qpo->max_lag_ms); } } else { thread->status_variables.ConnPool_get_conn_immediate++; @@ -4532,6 +4545,15 @@ void MySQL_Session::handler___client_DSS_QUERY_SENT___server_DSS_NOT_INITIALIZED } else { thread->status_variables.ConnPool_get_conn_failure++; } + if (qpo->max_lag_ms >= 0) { + if (qpo->max_lag_ms <= 360000) { // this is a relative time , we convert it to absolute + if (now_us == 0) { + now_us = realtime_time(); + } + long long now_ms = now_us/1000; + qpo->max_lag_ms += now_ms; + } + } proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION, 5, "Sess=%p -- server_myds=%p -- MySQL_Connection %p\n", this, mybe->server_myds, mybe->server_myds->myconn); if (mybe->server_myds->myconn==NULL) { // we couldn't get a connection for whatever reason, ex: no backends, or too busy From e20acb9a1c32aeb06dfffb96919e157b1e55eaf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Fri, 22 Mar 2019 00:54:36 +1100 Subject: [PATCH 10/20] Adding more status variables Useful for AWS Aurora, but not only --- include/MySQL_Thread.h | 6 +++++ lib/MySQL_Session.cpp | 5 ++++ lib/MySQL_Thread.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index 2b00e357c..69863efab 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -227,6 +227,7 @@ class MySQL_Thread unsigned long long queries; unsigned long long queries_slow; unsigned long long queries_gtid; + unsigned long long queries_with_max_lag_ms; unsigned long long queries_backends_bytes_sent; unsigned long long queries_backends_bytes_recv; unsigned long long queries_frontends_bytes_sent; @@ -244,6 +245,8 @@ class MySQL_Thread unsigned long long gtid_session_collected; unsigned long long generated_pkt_err; unsigned long long max_connect_timeout_err; + unsigned long long backend_lagging_during_query; + unsigned long long backend_offline_during_query; unsigned long long unexpected_com_quit; unsigned long long unexpected_packet; unsigned long long killed_connections; @@ -519,6 +522,9 @@ class MySQL_Threads_Handler unsigned long long get_unexpected_com_quit(); unsigned long long get_unexpected_packet(); unsigned long long get_aws_aurora_replicas_skipped_during_query(); + unsigned long long get_backend_lagging_during_query(); + unsigned long long get_backend_offline_during_query(); + unsigned long long get_queries_with_max_lag_ms(); unsigned long long get_killed_connections(); unsigned long long get_killed_queries(); iface_info *MLM_find_iface_from_fd(int fd) { diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index adbed28ef..7096cbb92 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -2293,6 +2293,9 @@ __get_pkts_from_client: clock_gettime(CLOCK_THREAD_CPUTIME_ID,&begint); } qpo=GloQPro->process_mysql_query(this,pkt.ptr,pkt.size,&CurrentQuery); + if (qpo->max_lag_ms >= 0) { + thread->status_variables.queries_with_max_lag_ms++; + } if (thread->variables.stats_time_query_processor) { clock_gettime(CLOCK_THREAD_CPUTIME_ID,&endt); thread->status_variables.query_processor_time=thread->status_variables.query_processor_time + @@ -3045,8 +3048,10 @@ handler_again: } bool retry_conn=false; if (myconn->server_status==MYSQL_SERVER_STATUS_SHUNNED_REPLICATION_LAG) { + thread->status_variables.backend_lagging_during_query++; proxy_error("Detected a lagging server during query: %s, %d\n", myconn->parent->address, myconn->parent->port); } else { + thread->status_variables.backend_offline_during_query++; proxy_error("Detected an offline server during query: %s, %d\n", myconn->parent->address, myconn->parent->port); } if (myds->query_retries_on_failure > 0) { diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 894a326e9..e00fbdac2 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -3828,6 +3828,9 @@ MySQL_Thread::MySQL_Thread() { status_variables.gtid_session_collected = 0; status_variables.generated_pkt_err = 0; status_variables.max_connect_timeout_err = 0; + status_variables.backend_lagging_during_query = 0; + status_variables.backend_offline_during_query = 0; + status_variables.queries_with_max_lag_ms = 0; status_variables.unexpected_com_quit = 0; status_variables.unexpected_packet = 0; status_variables.killed_connections = 0; @@ -4336,6 +4339,24 @@ SQLite3_result * MySQL_Threads_Handler::SQL3_GlobalStatus(bool _memory) { pta[1]=buf; result->add_row(pta); } + { // backend_lagging_during_query + pta[0]=(char *)"backend_lagging_during_query"; + sprintf(buf,"%llu",get_backend_lagging_during_query()); + pta[1]=buf; + result->add_row(pta); + } + { // backend_offline_during_query + pta[0]=(char *)"backend_offline_during_query"; + sprintf(buf,"%llu",get_backend_offline_during_query()); + pta[1]=buf; + result->add_row(pta); + } + { // queries_with_max_lag_ms + pta[0]=(char *)"queries_with_max_lag_ms"; + sprintf(buf,"%llu",get_queries_with_max_lag_ms()); + pta[1]=buf; + result->add_row(pta); + } { // Unexpected COM_QUIT pta[0]=(char *)"mysql_unexpected_frontend_com_quit"; sprintf(buf,"%llu",get_unexpected_com_quit()); @@ -5297,6 +5318,45 @@ unsigned long long MySQL_Threads_Handler::get_generated_pkt_err() { return q; } +unsigned long long MySQL_Threads_Handler::get_backend_lagging_during_query() { + unsigned long long q=0; + unsigned int i; + for (i=0;istatus_variables.backend_lagging_during_query,0); + } + } + return q; +} + +unsigned long long MySQL_Threads_Handler::get_backend_offline_during_query() { + unsigned long long q=0; + unsigned int i; + for (i=0;istatus_variables.backend_offline_during_query,0); + } + } + return q; +} + +unsigned long long MySQL_Threads_Handler::get_queries_with_max_lag_ms() { + unsigned long long q=0; + unsigned int i; + for (i=0;istatus_variables.queries_with_max_lag_ms,0); + } + } + return q; +} + unsigned long long MySQL_Threads_Handler::get_max_connect_timeout() { unsigned long long q=0; unsigned int i; From e853e052d4fb4d9e1dc83a3b78673078c5ea2a0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Fri, 22 Mar 2019 10:56:00 +1100 Subject: [PATCH 11/20] Fixed typo microseconds to milliseconds --- include/MySQL_Monitor.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/MySQL_Monitor.hpp b/include/MySQL_Monitor.hpp index da7ef3b0d..641900400 100644 --- a/include/MySQL_Monitor.hpp +++ b/include/MySQL_Monitor.hpp @@ -25,7 +25,7 @@ //#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG "CREATE TABLE mysql_server_aws_aurora_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , error VARCHAR , SERVER_ID VARCHAR NOT NULL DEFAULT '' , SESSION_ID VARCHAR NOT NULL DEFAULT '' , LAST_UPDATE_TIMESTAMP VARCHAR NOT NULL DEFAULT '' , replica_lag_in_microseconds INT NOT NULL DEFAULT 0 , CPU INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port, time_start_us, SERVER_ID))" -#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG "CREATE TABLE mysql_server_aws_aurora_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , error VARCHAR , SERVER_ID VARCHAR NOT NULL DEFAULT '' , SESSION_ID VARCHAR , LAST_UPDATE_TIMESTAMP VARCHAR , replica_lag_in_microseconds INT NOT NULL DEFAULT 0 , CPU INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port, time_start_us, SERVER_ID))" +#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_LOG "CREATE TABLE mysql_server_aws_aurora_log (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_start_us INT NOT NULL DEFAULT 0 , success_time_us INT DEFAULT 0 , error VARCHAR , SERVER_ID VARCHAR NOT NULL DEFAULT '' , SESSION_ID VARCHAR , LAST_UPDATE_TIMESTAMP VARCHAR , replica_lag_in_milliseconds INT NOT NULL DEFAULT 0 , CPU INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port, time_start_us, SERVER_ID))" #define MONITOR_SQLITE_TABLE_MYSQL_SERVER_AWS_AURORA_CHECK_STATUS "CREATE TABLE mysql_server_aws_aurora_check_status (writer_hostgroup INT NOT NULL , hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , last_checked_at VARCHAR , checks_tot INT NOT NULL DEFAULT 0 , checks_ok INT NOT NULL DEFAULT 0 , last_error VARCHAR , PRIMARY KEY (writer_hostgroup, hostname, port))" From 006c042c0b191d3387efb152435b691ddac3c0b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Wed, 10 Apr 2019 22:46:49 +1000 Subject: [PATCH 12/20] Adding more protection in monitoring --- include/MySQL_Monitor.hpp | 2 +- lib/MySQL_HostGroups_Manager.cpp | 1 + lib/MySQL_Monitor.cpp | 24 +++++++++++++++++++++++- lib/ProxySQL_Admin.cpp | 13 +++++++++++-- lib/SQLite3_Server.cpp | 10 ++++++++++ src/main.cpp | 2 ++ 6 files changed, 48 insertions(+), 4 deletions(-) diff --git a/include/MySQL_Monitor.hpp b/include/MySQL_Monitor.hpp index 641900400..8a6d5b3d4 100644 --- a/include/MySQL_Monitor.hpp +++ b/include/MySQL_Monitor.hpp @@ -5,7 +5,6 @@ #include "thread.h" #include "wqueue.h" - //#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_CONNECT "CREATE TABLE mysql_server_connect (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_since INT NOT NULL DEFAULT 0 , time_until INT NOT NULL DEFAULT 0 , connect_success_count INT NOT NULL DEFAULT 0 , connect_success_first INT NOT NULL DEFAULT 0 , connect_success_last INT NOT NULL DEFAULT 0 , connect_success_time_min INT NOT NULL DEFAULT 0 , connect_success_time_max INT NOT NULL DEFAULT 0 , connect_success_time_total INT NOT NULL DEFAULT 0 , connect_failure_count INT NOT NULL DEFAULT 0 , connect_failure_first INT NOT NULL DEFAULT 0 , connect_failure_last INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port))" //#define MONITOR_SQLITE_TABLE_MYSQL_SERVER_PING "CREATE TABLE mysql_server_ping (hostname VARCHAR NOT NULL , port INT NOT NULL DEFAULT 3306 , time_since INT NOT NULL DEFAULT 0 , time_until INT NOT NULL DEFAULT 0 , ping_success_count INT NOT NULL DEFAULT 0 , ping_success_first INT NOT NULL DEFAULT 0, ping_success_last INT NOT NULL DEFAULT 0 , ping_success_time_min INT NOT NULL DEFAULT 0 , ping_success_time_max INT NOT NULL DEFAULT 0 , ping_success_time_total INT NOT NULL DEFAULT 0 , ping_failure_count INT NOT NULL DEFAULT 0 , ping_failure_first INT NOT NULL DEFAULT 0 , ping_failure_last INT NOT NULL DEFAULT 0 , PRIMARY KEY (hostname, port))" @@ -245,6 +244,7 @@ class MySQL_Monitor { wqueue queue; MySQL_Monitor_Connection_Pool *My_Conn_Pool; bool shutdown; + pthread_mutex_t mon_en_mutex; bool monitor_enabled; SQLite3DB *admindb; // internal database SQLite3DB *monitordb; // internal database diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 0d8520e84..81597a476 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -959,6 +959,7 @@ MySQL_HostGroups_Manager::MySQL_HostGroups_Manager() { pthread_mutex_init(&readonly_mutex, NULL); pthread_mutex_init(&Group_Replication_Info_mutex, NULL); pthread_mutex_init(&Galera_Info_mutex, NULL); + pthread_mutex_init(&AWS_Aurora_Info_mutex, NULL); #ifdef MHM_PTHREAD_MUTEX pthread_mutex_init(&lock, NULL); #else diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 9a2aca82d..45a43c9d9 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -75,7 +75,11 @@ class ConsumerThread : public Thread { return NULL; } if (item->routine) { // NULL is allowed, do nothing for it - if (GloMyMon->monitor_enabled==true) { + + pthread_mutex_lock(&GloMyMon->mon_en_mutex); + bool me = GloMyMon->monitor_enabled; + pthread_mutex_unlock(&GloMyMon->mon_en_mutex); + if (me) { item->routine((void *)item->mmsd); } } @@ -460,6 +464,7 @@ void MySQL_Monitor::check_and_build_standard_tables(SQLite3DB *db, std::vectorcurtime=monotonic_time(); @@ -1180,6 +1188,7 @@ __fast_exit_monitor_group_replication_thread: } void * monitor_galera_thread(void *arg) { + mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); @@ -1427,12 +1436,17 @@ __fast_exit_monitor_galera_thread: } void * monitor_replication_lag_thread(void *arg) { + mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; if (!GloMTH) return NULL; // quick exit during shutdown/restart MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); +#ifdef DEBUG + MYSQL *mysqlcopy = NULL; +#endif // DEBUG + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); unsigned long long start_time=mysql_thr->curtime; @@ -1453,6 +1467,10 @@ void * monitor_replication_lag_thread(void *arg) { } } +#ifdef DEBUG + mysqlcopy = mmsd->mysql; +#endif // DEBUG + mmsd->t1=monotonic_time(); mmsd->interr=0; // reset the value if (percona_heartbeat_table) { @@ -2482,6 +2500,7 @@ void * MySQL_Monitor::run() { unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); + pthread_mutex_init(&mon_en_mutex,NULL); MySQL_Monitor__thread_MySQL_Thread_Variables_version=GloMTH->get_global_version(); mysql_thr->refresh_variables(); //if (!GloMTH) return NULL; // quick exit during shutdown/restart @@ -2559,7 +2578,9 @@ __monitor_run: } } } + pthread_mutex_lock(&mon_en_mutex); monitor_enabled=mysql_thread___monitor_enabled; + pthread_mutex_unlock(&mon_en_mutex); if ( rand()%5 == 0) { // purge once in a while My_Conn_Pool->purge_idle_connections(); } @@ -3812,6 +3833,7 @@ __sleep_monitor_aws_aurora: } void * monitor_AWS_Aurora_thread(void *arg) { + mysql_close(mysql_init(NULL)); MySQL_Monitor_State_Data *mmsd=(MySQL_Monitor_State_Data *)arg; MySQL_Thread * mysql_thr = new MySQL_Thread(); mysql_thr->curtime=monotonic_time(); diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index 09f2e92f6..b1036be53 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -9709,10 +9709,19 @@ void ProxySQL_Admin::enable_aurora_testing() { admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, endpoint_address, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1271, 1272, 1, '.aws-test.com', 25, 120, 90, 1, 1, 'Automated Aurora Testing Cluster 1')"); admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1273, 1274, 1, 25, 120, 90, 0, 1, 'Automated Aurora Testing Cluster 2')"); admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1275, 1276, 1, 25, 120, 90, 0, 2, 'Automated Aurora Testing Cluster 3')"); + admindb->execute("UPDATE mysql_aws_aurora_hostgroups SET active=0"); + admindb->execute("update mysql_servers set max_replication_lag=20"); load_mysql_servers_to_runtime(); mysql_servers_wrunlock(); - admindb->execute("UPDATE global_variables SET variable_value=2000 WHERE variable_name='mysql-monitor_ping_interval'"); - admindb->execute("UPDATE global_variables SET variable_value=1000 WHERE variable_name='mysql-monitor_ping_timeout'"); + //admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_ping_interval'"); + //admindb->execute("UPDATE global_variables SET variable_value=1500 WHERE variable_name='mysql-monitor_ping_timeout'"); + //admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_replication_lag_interval'"); + //admindb->execute("UPDATE global_variables SET variable_value=1500 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value=200 WHERE variable_name='mysql-monitor_ping_interval'"); + admindb->execute("UPDATE global_variables SET variable_value=100 WHERE variable_name='mysql-monitor_ping_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value=200 WHERE variable_name='mysql-monitor_replication_lag_interval'"); + admindb->execute("UPDATE global_variables SET variable_value=100 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value='percona.heartbeat' WHERE variable_name='mysql-monitor_replication_lag_use_percona_heartbeat'"); load_mysql_variables_to_runtime(); admindb->execute("INSERT INTO mysql_users (username,password,default_hostgroup) VALUES ('aurora1','pass1',1271), ('aurora2','pass2',1273), ('aurora3','pass3',1275)"); init_users(); diff --git a/lib/SQLite3_Server.cpp b/lib/SQLite3_Server.cpp index adff8148b..3648c3a14 100644 --- a/lib/SQLite3_Server.cpp +++ b/lib/SQLite3_Server.cpp @@ -480,6 +480,10 @@ __run_query: pthread_mutex_lock(&GloSQLite3Server->aurora_mutex); GloSQLite3Server->populate_aws_aurora_table(sess); } + if (strstr(query_no_space,(char *)"Seconds_Behind_Master")) { + free(query); + query = strdup((char *)"SELECT 19 as Seconds_Behind_Master"); + } } #endif // TEST_AURORA SQLite3_Session *sqlite_sess = (SQLite3_Session *)sess->thread->gen_args; @@ -493,6 +497,12 @@ __run_query: sleep(2); } } + if (strstr(query_no_space,(char *)"Seconds_Behind_Master")) { + if (rand() % 10 == 0) { + // randomly add some latency on 10% of the traffic + sleep(2); + } + } } #endif // TEST_AURORA sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot); diff --git a/src/main.cpp b/src/main.cpp index db765bb73..2143b2bc1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1384,6 +1384,8 @@ bool ProxySQL_daemonize_phase3() { int main(int argc, const char * argv[]) { { + MYSQL *my = mysql_init(NULL); + mysql_close(my); // cpu_timer t; ProxySQL_Main_init(); #ifdef DEBUG From 6e03099b46273fecc517ac90bc10ea9c4ebc9222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 11 Apr 2019 10:35:37 +1000 Subject: [PATCH 13/20] Further error handling in Monitor --- lib/MySQL_Monitor.cpp | 75 ++++++++++++++++++++++++++++-------------- lib/ProxySQL_Admin.cpp | 4 +-- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 3b52424f9..e8e6ea0f3 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -161,7 +161,7 @@ private: // std::map, std::vector > my_connections; PtrArray *servers; public: - MYSQL * get_connection(char *hostname, int port); + MYSQL * get_connection(char *hostname, int port, MySQL_Monitor_State_Data *mmsd); void put_connection(char *hostname, int port, MYSQL *my); // void purge_idle_connections(); MySQL_Monitor_Connection_Pool() { @@ -169,15 +169,16 @@ public: conns = new PtrArray(); pthread_mutex_init(&m2, NULL); }; - void conn_register(MYSQL *my) { + void conn_register(MySQL_Monitor_State_Data *mmsd) { std::lock_guard lock(mutex); + MYSQL *my = mmsd->mysql; pthread_mutex_lock(&m2); for (unsigned int i=0; ilen; i++) { MYSQL *my1 = (MYSQL *)conns->index(i); assert(my!=my1); assert(my->net.fd!=my1->net.fd); } - fprintf(stderr,"Registering MYSQL with FD %d\n", my->net.fd); + fprintf(stderr,"Registering MYSQL with FD %d from mmsd %p and MYSQL %p\n", my->net.fd, mmsd, mmsd->mysql); conns->add(my); pthread_mutex_unlock(&m2); }; @@ -197,7 +198,7 @@ public: }; }; -MYSQL * MySQL_Monitor_Connection_Pool::get_connection(char *hostname, int port) { +MYSQL * MySQL_Monitor_Connection_Pool::get_connection(char *hostname, int port, MySQL_Monitor_State_Data *mmsd) { std::lock_guard lock(mutex); pthread_mutex_lock(&m2); MYSQL *my = NULL; @@ -222,6 +223,13 @@ MYSQL * MySQL_Monitor_Connection_Pool::get_connection(char *hostname, int port) assert(my!=my1); assert(my->net.fd!=my1->net.fd); } + for (unsigned int l=0; llen; l++) { + MYSQL *my1 = (MYSQL *)conns->index(l); + assert(my!=my1); + assert(my->net.fd!=my1->net.fd); + } + fprintf(stderr,"Registering MYSQL with FD %d from mmsd %p and MYSQL %p\n", my->net.fd, mmsd, my); + conns->add(my); } pthread_mutex_unlock(&m2); return my; @@ -240,15 +248,32 @@ void MySQL_Monitor_Connection_Pool::put_connection(char *hostname, int port, MYS MonMySrvC *srv = (MonMySrvC *)servers->index(i); if (srv->port == port && strcmp(hostname,srv->address)==0) { srv->conns->add(my); - pthread_mutex_unlock(&m2); - return; + for (unsigned int j=0; jlen; j++) { + MYSQL *my1 = (MYSQL *)conns->index(j); + if (my1 == my) { + conns->remove_index_fast(j); + fprintf(stderr,"Un-registering MYSQL with FD %d\n", my->net.fd); + pthread_mutex_unlock(&m2); + return; + } + } + assert(0); // it didn't register it } } // if no server was found MonMySrvC *srv = new MonMySrvC(hostname,port); srv->conns->add(my); servers->add(srv); - pthread_mutex_unlock(&m2); + for (unsigned int j=0; jlen; j++) { + MYSQL *my1 = (MYSQL *)conns->index(j); + if (my1 == my) { + conns->remove_index_fast(j); + fprintf(stderr,"Un-registering MYSQL with FD %d\n", my->net.fd); + pthread_mutex_unlock(&m2); + return; + } + } + assert(0); } /* @@ -633,7 +658,7 @@ void * monitor_ping_thread(void *arg) { mysql_thr->refresh_variables(); bool ping_success = false; - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; mmsd->t1=start_time; @@ -642,14 +667,14 @@ void * monitor_ping_thread(void *arg) { bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { - GloMyMon->My_Conn_Pool->conn_register(mmsd->mysql); + GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { goto __exit_monitor_ping_thread; } } else { - GloMyMon->My_Conn_Pool->conn_register(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_register(mmsd); } mmsd->t1=monotonic_time(); @@ -675,7 +700,7 @@ void * monitor_ping_thread(void *arg) { } else { if (crc==false) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mmsd->mysql=NULL; } } @@ -723,7 +748,7 @@ __fast_exit_monitor_ping_thread: bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); } else { GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mysql_close(mmsd->mysql); // set_wait_timeout failed @@ -828,7 +853,7 @@ void * monitor_read_only_thread(void *arg) { mysql_thr->curtime=monotonic_time(); mysql_thr->refresh_variables(); - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; bool read_only_success = false; @@ -1056,7 +1081,7 @@ void * monitor_group_replication_thread(void *arg) { mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; @@ -1321,7 +1346,7 @@ void * monitor_galera_thread(void *arg) { mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; @@ -1583,7 +1608,7 @@ void * monitor_replication_lag_thread(void *arg) { MYSQL *mysqlcopy = NULL; #endif // DEBUG - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; bool replication_lag_success = false; @@ -1598,14 +1623,14 @@ void * monitor_replication_lag_thread(void *arg) { bool rc; rc=mmsd->create_new_connection(); if (mmsd->mysql) { - GloMyMon->My_Conn_Pool->conn_register(mmsd->mysql); + GloMyMon->My_Conn_Pool->conn_register(mmsd); } crc=true; if (rc==false) { goto __fast_exit_monitor_replication_lag_thread; } } else { - GloMyMon->My_Conn_Pool->conn_register(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_register(mmsd); } #ifdef DEBUG @@ -1672,7 +1697,7 @@ void * monitor_replication_lag_thread(void *arg) { } } else { if (crc==false) { - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); mmsd->mysql=NULL; } @@ -1752,7 +1777,7 @@ __exit_monitor_replication_lag_thread: } } - if (mmsd->interr) { // check failed + if (mmsd->interr || mmsd->mysql_error_msg) { // check failed if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mysql_close(mmsd->mysql); @@ -1761,7 +1786,7 @@ __exit_monitor_replication_lag_thread: } else { if (mmsd->mysql) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mmsd->mysql=NULL; } } @@ -1776,7 +1801,7 @@ __fast_exit_monitor_replication_lag_thread: bool rc=mmsd->set_wait_timeout(); if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); - GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); + //GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); } else { GloMyMon->My_Conn_Pool->conn_unregister(mmsd->mysql); mysql_close(mmsd->mysql); // set_wait_timeout failed @@ -3598,7 +3623,7 @@ void * monitor_AWS_Aurora_thread_HG(void *arg) { mmsd = new MySQL_Monitor_State_Data(hpa[cur_host_idx].host, hpa[cur_host_idx].port, NULL, hpa[cur_host_idx].use_ssl); mmsd->writer_hostgroup = wHG; mmsd->aws_aurora_check_timeout_ms = check_timeout_ms; - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); //unsigned long long start_time=mysql_thr->curtime; start_time=t1; @@ -3989,7 +4014,7 @@ __sleep_monitor_aws_aurora: } for (unsigned int i=0;iqueue.add(item); + GloMyMon->queue->add(item); } return NULL; } @@ -4002,7 +4027,7 @@ void * monitor_AWS_Aurora_thread(void *arg) { mysql_thr->refresh_variables(); if (!GloMTH) return NULL; // quick exit during shutdown/restart - mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port); + mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index b1036be53..faf3c0a86 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -9718,9 +9718,9 @@ void ProxySQL_Admin::enable_aurora_testing() { //admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_replication_lag_interval'"); //admindb->execute("UPDATE global_variables SET variable_value=1500 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); admindb->execute("UPDATE global_variables SET variable_value=200 WHERE variable_name='mysql-monitor_ping_interval'"); - admindb->execute("UPDATE global_variables SET variable_value=100 WHERE variable_name='mysql-monitor_ping_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_ping_timeout'"); admindb->execute("UPDATE global_variables SET variable_value=200 WHERE variable_name='mysql-monitor_replication_lag_interval'"); - admindb->execute("UPDATE global_variables SET variable_value=100 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); admindb->execute("UPDATE global_variables SET variable_value='percona.heartbeat' WHERE variable_name='mysql-monitor_replication_lag_use_percona_heartbeat'"); load_mysql_variables_to_runtime(); admindb->execute("INSERT INTO mysql_users (username,password,default_hostgroup) VALUES ('aurora1','pass1',1271), ('aurora2','pass2',1273), ('aurora3','pass3',1275)"); From 89713295c28fa417168d63ac0537f8367e686b77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Wed, 17 Apr 2019 16:20:51 +1000 Subject: [PATCH 14/20] Fix compiling bug --- lib/MySQL_Monitor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 675ab4935..d7d6e97dc 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -178,7 +178,6 @@ public: std::lock_guard lock(mutex); MYSQL *my = mmsd->mysql; pthread_mutex_lock(&m2); - MYSQL *my = mmsd->mysql; for (unsigned int i=0; ilen; i++) { MYSQL *my1 = (MYSQL *)conns->index(i); assert(my!=my1); From 7b6058e535ad0153058590714143a917bd017a8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Wed, 17 Apr 2019 17:06:04 +1000 Subject: [PATCH 15/20] Missing unlock --- lib/MySQL_Monitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index d7d6e97dc..84fda0179 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -2945,6 +2945,7 @@ __monitor_run: } pthread_mutex_lock(&mon_en_mutex); monitor_enabled=mysql_thread___monitor_enabled; + pthread_mutex_unlock(&mon_en_mutex); if ( rand()%10 == 0) { // purge once in a while My_Conn_Pool->purge_some_connections(); } From b12caa1d8c00c83377b4eeee655c50e70f10e993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Fri, 26 Apr 2019 12:03:12 +1000 Subject: [PATCH 16/20] Adding support for automated testing of Galera Using SQLite3 Server as backend --- Makefile | 22 ++++++ include/SQLite3_Server.h | 14 +++- include/proxysql_admin.h | 4 ++ lib/MySQL_Monitor.cpp | 36 ++++++++-- lib/MySQL_Protocol.cpp | 4 +- lib/MySQL_Session.cpp | 8 +-- lib/ProxySQL_Admin.cpp | 56 +++++++++++++-- lib/SQLite3_Server.cpp | 145 ++++++++++++++++++++++++++++++++++----- 8 files changed, 257 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index 4e6b67752..3d71e182b 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,12 @@ debug: build_deps_debug build_lib_debug build_src_debug .PHONY: testaurora testaurora: build_deps_debug build_lib_testaurora build_src_testaurora +.PHONY: testgalera +testgalera: build_deps_debug build_lib_testgalera build_src_testgalera + +.PHONY: testall +testall: build_deps_debug build_lib_testall build_src_testall + .PHONY: clickhouse clickhouse: build_deps_clickhouse build_lib_clickhouse build_src_clickhouse @@ -80,6 +86,22 @@ build_src_testaurora: build_deps build_lib_testaurora build_lib_testaurora: build_deps_debug cd lib && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA" CC=${CC} CXX=${CXX} ${MAKE} +.PHONY: build_src_testgalera +build_src_testgalera: build_deps build_lib_testgalera + cd src && OPTZ="${O0} -ggdb -DDEBUG -DTEST_GALERA" CC=${CC} CXX=${CXX} ${MAKE} + +.PHONY: build_lib_testgalera +build_lib_testgalera: build_deps_debug + cd lib && OPTZ="${O0} -ggdb -DDEBUG -DTEST_GALERA" CC=${CC} CXX=${CXX} ${MAKE} + +.PHONY: build_src_testall +build_src_testall: build_deps build_lib_testall + cd src && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA -DTEST_GALERA" CC=${CC} CXX=${CXX} ${MAKE} + +.PHONY: build_lib_testall +build_lib_testall: build_deps_debug + cd lib && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA -DTEST_GALERA" CC=${CC} CXX=${CXX} ${MAKE} + .PHONY: build_src_debug build_src_debug: build_deps build_lib_debug cd src && OPTZ="${O0} -ggdb -DDEBUG" CC=${CC} CXX=${CXX} ${MAKE} diff --git a/include/SQLite3_Server.h b/include/SQLite3_Server.h index 7a83918bc..60f413254 100644 --- a/include/SQLite3_Server.h +++ b/include/SQLite3_Server.h @@ -39,10 +39,15 @@ class SQLite3_Server { } variables; #ifdef TEST_AURORA std::vector *tables_defs_aurora; +#endif // TEST_AURORA +#ifdef TEST_GALERA + std::vector *tables_defs_galera; +#endif // TEST_GALERA +#if defined(TEST_AURORA) || defined(TEST_GALERA) void insert_into_tables_defs(std::vector *, const char *table_name, const char *table_def); void drop_tables_defs(std::vector *tables_defs); void check_and_build_standard_tables(SQLite3DB *db, std::vector *tables_defs); -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA public: #ifdef TEST_AURORA unsigned int cur_aurora_writer[3]; @@ -51,6 +56,13 @@ class SQLite3_Server { pthread_mutex_t aurora_mutex; void populate_aws_aurora_table(MySQL_Session *sess); #endif // TEST_AURORA +#ifdef TEST_GALERA + //unsigned int cur_aurora_writer[3]; + unsigned int num_galera_servers[3]; + unsigned int max_num_galera_servers; + pthread_mutex_t galera_mutex; + void populate_galera_table(MySQL_Session *sess); +#endif // TEST_GALERA SQLite3_Server(); ~SQLite3_Server(); char **get_variables_list(); diff --git a/include/proxysql_admin.h b/include/proxysql_admin.h index 7a40d52c0..0e4854316 100644 --- a/include/proxysql_admin.h +++ b/include/proxysql_admin.h @@ -315,5 +315,9 @@ class ProxySQL_Admin { void enable_aurora_testing(); #endif // TEST_AURORA +#ifdef TEST_GALERA + void enable_galera_testing(); +#endif // TEST_GALERA + }; #endif /* __CLASS_PROXYSQL_ADMIN_H */ diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index 84fda0179..a04bf6408 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -846,9 +846,9 @@ bool MySQL_Monitor_State_Data::set_wait_timeout() { if (mysql_thread___monitor_wait_timeout==false) { return true; } -#ifdef TEST_AURORA +#if defined(TEST_AURORA) || defined(TEST_GALERA) return true; -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA bool ret=false; char *query=NULL; char *qt=(char *)"SET wait_timeout=%d"; @@ -1471,6 +1471,9 @@ void * monitor_galera_thread(void *arg) { mmsd->mysql=GloMyMon->My_Conn_Pool->get_connection(mmsd->hostname, mmsd->port, mmsd); unsigned long long start_time=mysql_thr->curtime; +#ifdef DEBUG + MYSQL *mysqlcopy = NULL; +#endif // DEBUG mmsd->t1=start_time; @@ -1493,9 +1496,20 @@ void * monitor_galera_thread(void *arg) { } } +#ifdef DEBUG + mysqlcopy = mmsd->mysql; +#endif // DEBUG + mmsd->t1=monotonic_time(); mmsd->interr=0; // reset the value { +#ifdef TEST_GALERA + char *q1 = (char *)"SELECT wsrep_local_state , read_only , wsrep_local_recv_queue , wsrep_desync , wsrep_reject_queries , wsrep_sst_donor_rejects_queries , wsrep_cluster_status FROM HOST_STATUS_GALERA WHERE hostgroup_id=%d AND hostname='%s' AND port=%d"; + char *q2 = (char *)malloc(strlen(q1)+strlen(mmsd->hostname)+32); + sprintf(q2,q1, mmsd->writer_hostgroup, mmsd->hostname, mmsd->port); + mmsd->async_exit_status = mysql_query_start(&mmsd->interr, mmsd->mysql, q2); + free(q2); +#else char *sv = mmsd->mysql->server_version; if (strncmp(sv,(char *)"5.7",3)==0 || strncmp(sv,(char *)"8",1)==0) { // the backend is either MySQL 5.7 or MySQL 8 : INFORMATION_SCHEMA.GLOBAL_STATUS is deprecated @@ -1504,11 +1518,12 @@ void * monitor_galera_thread(void *arg) { // any other version mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_LOCAL_STATE') wsrep_local_state, @@read_only read_only, (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_LOCAL_RECV_QUEUE') wsrep_local_recv_queue , @@wsrep_desync wsrep_desync, @@wsrep_reject_queries wsrep_reject_queries, @@wsrep_sst_donor_rejects_queries wsrep_sst_donor_rejects_queries, (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME='WSREP_CLUSTER_STATUS') wsrep_cluster_status"); } +#endif // TEST_GALERA } while (mmsd->async_exit_status) { mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status); unsigned long long now=monotonic_time(); - if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { + if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Assuming wsrep_cluster_status is NOT Primary\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); goto __exit_monitor_galera_thread; @@ -3812,6 +3827,9 @@ void * monitor_AWS_Aurora_thread_HG(void *arg) { if (mmsd->mysql==NULL) { // we don't have a connection, let's create it bool rc; rc=mmsd->create_new_connection(); + if (mmsd->mysql) { + GloMyMon->My_Conn_Pool->conn_register(mmsd); + } crc=true; if (rc==false) { unsigned long long now=monotonic_time(); @@ -3955,7 +3973,13 @@ __exit_monitor_aws_aurora_HG_thread: __fast_exit_monitor_aws_aurora_HG_thread: if (mmsd->mysql) { // if we reached here we didn't put the connection back - if (mmsd->mysql_error_msg || mmsd->async_exit_status) { + if (mmsd->mysql_error_msg) { +#ifdef DEBUG + proxy_error("Error after %dms: server %s:%d , mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); + GloMyMon->My_Conn_Pool->conn_unregister(mmsd); +#else + proxy_error("Error after %dms on server %s:%d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd->hostname, mmsd->port, mmsd->mysql_error_msg); +#endif // DEBUG mysql_close(mmsd->mysql); // if we reached here we should destroy it mmsd->mysql=NULL; } else { @@ -3964,10 +3988,14 @@ __fast_exit_monitor_aws_aurora_HG_thread: if (rc) { GloMyMon->My_Conn_Pool->put_connection(mmsd->hostname,mmsd->port,mmsd->mysql); } else { + proxy_error("Error after %dms: mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); + GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); // set_wait_timeout failed } mmsd->mysql=NULL; } else { // really not sure how we reached here, drop it + proxy_error("Error after %dms: mmsd %p , MYSQL %p , FD %d : %s\n", (mmsd->t2-mmsd->t1)/1000, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); + GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); mmsd->mysql=NULL; } diff --git a/lib/MySQL_Protocol.cpp b/lib/MySQL_Protocol.cpp index 0274d1111..6e662f429 100644 --- a/lib/MySQL_Protocol.cpp +++ b/lib/MySQL_Protocol.cpp @@ -1643,10 +1643,10 @@ __do_auth: ((*myds)->sess->session_type == PROXYSQL_SESSION_ADMIN) || ((*myds)->sess->session_type == PROXYSQL_SESSION_STATS) -#ifdef TEST_AURORA +#if defined(TEST_AURORA) || defined(TEST_GALERA) || ((*myds)->sess->session_type == PROXYSQL_SESSION_SQLITE) -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA ) { if (strcmp((const char *)user,mysql_thread___monitor_username)==0) { proxy_scramble(reply, (*myds)->myconn->scramble_buff, mysql_thread___monitor_password); diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 2ad838677..fb9e2bf74 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -3867,11 +3867,11 @@ void MySQL_Session::handler___status_CONNECTING_CLIENT___STATE_SERVER_HANDSHAKE( (handshake_response_return == true) && ( -#ifdef TEST_AURORA +#if defined(TEST_AURORA) || defined(TEST_GALERA) (default_hostgroup<0 && ( session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS || session_type == PROXYSQL_SESSION_SQLITE) ) #else (default_hostgroup<0 && ( session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS) ) -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA || (default_hostgroup == 0 && session_type == PROXYSQL_SESSION_CLICKHOUSE) || @@ -3916,10 +3916,10 @@ void MySQL_Session::handler___status_CONNECTING_CLIENT___STATE_SERVER_HANDSHAKE( client_authenticated=true; switch (session_type) { case PROXYSQL_SESSION_SQLITE: -#ifdef TEST_AURORA +#if defined(TEST_AURORA) || defined(TEST_GALERA) free_users=1; break; -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA case PROXYSQL_SESSION_MYSQL: if (ldap_ctx==NULL) { free_users=GloMyAuth->increase_frontend_user_connections(client_myds->myconn->userinfo->username, &used_users); diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index 22722f70a..9ac1178ff 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -9903,15 +9903,61 @@ void ProxySQL_Admin::stats___mysql_prepared_statements_info() { delete resultset; } +#ifdef TEST_GALERA +void ProxySQL_Admin::enable_galera_testing() { + proxy_info("Admin is enabling Galera Testing using SQLite3 Server and HGs from 2271 and 2290\n"); + sqlite3_stmt *statement=NULL; + sqlite3 *mydb3=admindb->get_db(); + unsigned int num_galera_servers = GloSQLite3Server->num_galera_servers[0]; + int rc; + mysql_servers_wrlock(); + admindb->execute("DELETE FROM mysql_servers WHERE hostgroup_id BETWEEN 2271 AND 2300"); + char *query=(char *)"INSERT INTO mysql_servers (hostgroup_id,hostname,use_ssl,comment) VALUES (?1, ?2, ?3, ?4)"; + rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); + assert(rc==SQLITE_OK); + for (unsigned int j=1; j<4; j++) { + proxy_info("Admin is enabling Galera Testing using SQLite3 Server and writer_HG %d\n" , 2260+j*10+1); + for (unsigned int i=0; iexecute("INSERT INTO mysql_galera_hostgroups (writer_hostgroup, backup_writer_hostgroup, reader_hostgroup, offline_hostgroup, active, max_writers, writer_is_also_reader, max_transactions_behind, comment) VALUES (2271, 2272, 2273, 2274, 0, 1, 1, 0, 'Automated Galera Testing Cluster 1')"); + admindb->execute("INSERT INTO mysql_galera_hostgroups (writer_hostgroup, backup_writer_hostgroup, reader_hostgroup, offline_hostgroup, active, max_writers, writer_is_also_reader, max_transactions_behind, comment) VALUES (2281, 2282, 2283, 2284, 0, 1, 1, 0, 'Automated Galera Testing Cluster 2')"); + admindb->execute("INSERT INTO mysql_galera_hostgroups (writer_hostgroup, backup_writer_hostgroup, reader_hostgroup, offline_hostgroup, active, max_writers, writer_is_also_reader, max_transactions_behind, comment) VALUES (2291, 2292, 2293, 2294, 0, 1, 1, 0, 'Automated Galera Testing Cluster 3')"); + admindb->execute("UPDATE mysql_galera_hostgroups SET active=1"); + //admindb->execute("update mysql_servers set max_replication_lag=20"); + load_mysql_servers_to_runtime(); + mysql_servers_wrunlock(); + admindb->execute("UPDATE global_variables SET variable_value=200 WHERE variable_name='mysql-monitor_ping_interval'"); + admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_ping_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value=200 WHERE variable_name='mysql-monitor_replication_lag_interval'"); + admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_replication_lag_timeout'"); + admindb->execute("UPDATE global_variables SET variable_value='percona.heartbeat' WHERE variable_name='mysql-monitor_replication_lag_use_percona_heartbeat'"); + load_mysql_variables_to_runtime(); + admindb->execute("INSERT INTO mysql_users (username,password,default_hostgroup) VALUES ('galera1','pass1',2271), ('galera2','pass2',2281), ('galera','pass3',2291)"); + init_users(); +} +#endif // TEST_GALERA #ifdef TEST_AURORA void ProxySQL_Admin::enable_aurora_testing() { - proxy_info("Admin is enabling AWS Aurora Testing using SQLite3 Server and HGs 1271 and 1272\n"); + proxy_info("Admin is enabling AWS Aurora Testing using SQLite3 Server and HGs from 1271 to 1276\n"); sqlite3_stmt *statement=NULL; sqlite3 *mydb3=admindb->get_db(); - unsigned int num_aurora_servers = GloSQLite3Server->num_aurora_servers[0]; + unsigned int num_aurora_servers = GloSQLite3Server->num_aurora_servers[0]; int rc; mysql_servers_wrlock(); - admindb->execute("DELETE FROM mysql_servers"); + admindb->execute("DELETE FROM mysql_servers WHERE hostgroup_id BETWEEN 1271 AND 1276"); char *query=(char *)"INSERT INTO mysql_servers (hostgroup_id,hostname,use_ssl,comment) VALUES (?1, ?2, ?3, ?4)"; rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); assert(rc==SQLITE_OK); @@ -9939,8 +9985,8 @@ void ProxySQL_Admin::enable_aurora_testing() { admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, endpoint_address, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1271, 1272, 1, '.aws-test.com', 25, 120, 90, 1, 1, 'Automated Aurora Testing Cluster 1')"); admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1273, 1274, 1, 25, 120, 90, 0, 1, 'Automated Aurora Testing Cluster 2')"); admindb->execute("INSERT INTO mysql_aws_aurora_hostgroups (writer_hostgroup, reader_hostgroup, active, max_lag_ms, check_interval_ms, check_timeout_ms, writer_is_also_reader, new_reader_weight, comment) VALUES (1275, 1276, 1, 25, 120, 90, 0, 2, 'Automated Aurora Testing Cluster 3')"); - admindb->execute("UPDATE mysql_aws_aurora_hostgroups SET active=0"); - admindb->execute("update mysql_servers set max_replication_lag=20"); + admindb->execute("UPDATE mysql_aws_aurora_hostgroups SET active=1"); + //admindb->execute("update mysql_servers set max_replication_lag=20"); load_mysql_servers_to_runtime(); mysql_servers_wrunlock(); //admindb->execute("UPDATE global_variables SET variable_value=3000 WHERE variable_name='mysql-monitor_ping_interval'"); diff --git a/lib/SQLite3_Server.cpp b/lib/SQLite3_Server.cpp index 3648c3a14..a6df17b19 100644 --- a/lib/SQLite3_Server.cpp +++ b/lib/SQLite3_Server.cpp @@ -118,8 +118,8 @@ typedef struct _ifaces_desc_t { char **mysql_ifaces; } ifaces_desc_t; -#define MAX_IFACES 32 -#define MAX_SQLITE3SERVER_LISTENERS 32 +#define MAX_IFACES 128 +#define MAX_SQLITE3SERVER_LISTENERS 128 class ifaces_desc { public: @@ -246,7 +246,7 @@ void SQLite3_Server_session_handler(MySQL_Session *sess, void *_pa, PtrSize_t *p memcpy(query,(char *)pkt->ptr+sizeof(mysql_hdr)+1,query_length-1); query[query_length-1]=0; -#ifdef TEST_AURORA +#if defined(TEST_AURORA) || defined(TEST_GALERA) if (sess->client_myds->proxy_addr.addr == NULL) { struct sockaddr addr; socklen_t addr_len=sizeof(struct sockaddr); @@ -276,7 +276,7 @@ void SQLite3_Server_session_handler(MySQL_Session *sess, void *_pa, PtrSize_t *p sess->client_myds->proxy_addr.addr = strdup("unknown"); } } -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA char *query_no_space=(char *)l_alloc(query_length); memcpy(query_no_space,query,query_length); @@ -292,13 +292,13 @@ void SQLite3_Server_session_handler(MySQL_Session *sess, void *_pa, PtrSize_t *p if (query_no_space_length==SELECT_VERSION_COMMENT_LEN) { if (!strncasecmp(SELECT_VERSION_COMMENT, query_no_space, query_no_space_length)) { l_free(query_length,query); -#ifdef TEST_AURORA - char *a = (char *)"SELECT '(ProxySQL Aurora Test Server) - %s'"; +#if defined(TEST_AURORA) || defined(TEST_GALERA) + char *a = (char *)"SELECT '(ProxySQL Automated Test Server) - %s'"; query = (char *)malloc(strlen(a)+strlen(sess->client_myds->proxy_addr.addr)); sprintf(query,a,sess->client_myds->proxy_addr.addr); #else query=l_strdup("SELECT '(ProxySQL SQLite3 Server)'"); -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA query_length=strlen(query)+1; goto __run_query; } @@ -474,22 +474,33 @@ __end_show_commands: __run_query: if (run_query) { -#ifdef TEST_AURORA +#if defined(TEST_AURORA) || defined(TEST_GALERA) if (strncasecmp("SELECT",query_no_space,6)==0) { +#ifdef TEST_AURORA if (strstr(query_no_space,(char *)"REPLICA_HOST_STATUS")) { pthread_mutex_lock(&GloSQLite3Server->aurora_mutex); GloSQLite3Server->populate_aws_aurora_table(sess); } +#endif // TEST_AURORA +#ifdef TEST_GALERA + if (strstr(query_no_space,(char *)"HOST_STATUS_GALERA")) { + pthread_mutex_lock(&GloSQLite3Server->galera_mutex); + GloSQLite3Server->populate_galera_table(sess); + } +#endif // TEST_GALERA if (strstr(query_no_space,(char *)"Seconds_Behind_Master")) { free(query); - query = strdup((char *)"SELECT 19 as Seconds_Behind_Master"); + char *a = (char *)"SELECT %d as Seconds_Behind_Master"; + query = (char *)malloc(strlen(a)+4); + sprintf(query,a,rand()%30+10); } } -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA SQLite3_Session *sqlite_sess = (SQLite3_Session *)sess->thread->gen_args; sqlite_sess->sessdb->execute_statement(query, &error , &cols , &affected_rows , &resultset); -#ifdef TEST_AURORA +#if defined(TEST_AURORA) || defined(TEST_GALERA) if (strncasecmp("SELECT",query_no_space,6)==0) { +#ifdef TEST_AURORA if (strstr(query_no_space,(char *)"REPLICA_HOST_STATUS")) { pthread_mutex_unlock(&GloSQLite3Server->aurora_mutex); if (rand() % 100 == 0) { @@ -497,6 +508,19 @@ __run_query: sleep(2); } } +#endif // TEST_AURORA +#ifdef TEST_GALERA + if (strstr(query_no_space,(char *)"HOST_STATUS_GALERA")) { + pthread_mutex_unlock(&GloSQLite3Server->galera_mutex); + if (resultset->rows_count == 0) { + PROXY_TRACE(); + } + if (rand() % 100 == 0) { + // randomly add some latency on 1% of the traffic + sleep(2); + } + } +#endif // TEST_GALERA if (strstr(query_no_space,(char *)"Seconds_Behind_Master")) { if (rand() % 10 == 0) { // randomly add some latency on 10% of the traffic @@ -504,7 +528,7 @@ __run_query: } } } -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA sess->SQLite3_to_MySQL(resultset, error, affected_rows, &sess->client_myds->myprot); delete resultset; } @@ -712,6 +736,11 @@ SQLite3_Server::~SQLite3_Server() { delete sessdb; sessdb = NULL; +#ifdef TEST_GALERA + drop_tables_defs(tables_defs_galera); + delete tables_defs_galera; +#endif // TEST_GALERA + #ifdef TEST_AURORA drop_tables_defs(tables_defs_aurora); delete tables_defs_aurora; @@ -738,13 +767,14 @@ SQLite3_Server::SQLite3_Server() { variables.read_only=false; +#if defined(TEST_AURORA) || defined(TEST_GALERA) + string s = ""; #ifdef TEST_AURORA pthread_mutex_init(&aurora_mutex,NULL); unsigned int nas = time(NULL); nas = nas % 3; // range nas += 5; // min max_num_aurora_servers = 10; // hypothetical maximum number of nodes - string s = ""; for (unsigned int j=1; j<4; j++) { cur_aurora_writer[j-1] = 0; num_aurora_servers[j-1] = nas; @@ -755,14 +785,85 @@ SQLite3_Server::SQLite3_Server() { } } } +#endif // TEST_AURORA +#if defined(TEST_AURORA) && defined(TEST_GALERA) + s += ";"; +#endif // TEST_AURORA || TEST_GALERA +#ifdef TEST_GALERA + pthread_mutex_init(&galera_mutex,NULL); + unsigned int ngs = time(NULL); + ngs = ngs % 3; // range + ngs += 5; // min + max_num_galera_servers = 10; // hypothetical maximum number of nodes + for (unsigned int j=1; j<4; j++) { + //cur_aurora_writer[j-1] = 0; + num_galera_servers[j-1] = ngs; + for (unsigned int i=11; iexecute("BEGIN TRANSACTION"); + char *error=NULL; + int cols=0; + int affected_rows=0; + SQLite3_result *resultset=NULL; + sqlite3 *mydb3=sessdb->get_db(); + string myip = string(sess->client_myds->proxy_addr.addr); + string clu_id_s = myip.substr(6,1); + unsigned int cluster_id = atoi(clu_id_s.c_str()); + cluster_id--; + int hg_id = 2270+(cluster_id*10)+1; + char buf[1024]; + sprintf(buf, (char *)"SELECT * FROM HOST_STATUS_GALERA WHERE hostgroup_id = %d LIMIT 1", hg_id); + sessdb->execute_statement(buf, &error , &cols , &affected_rows , &resultset); + if (resultset->rows_count==0) { + //sessdb->execute("DELETE FROM HOST_STATUS_GALERA"); + sqlite3_stmt *statement=NULL; + int rc; + char *query=(char *)"INSERT INTO HOST_STATUS_GALERA VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)"; + rc=sqlite3_prepare_v2(mydb3, query, -1, &statement, 0); + assert(rc==SQLITE_OK); + for (unsigned int i=0; iexecute("COMMIT"); +} +#endif // TEST_GALERA + +#ifdef TEST_AURORA void SQLite3_Server::populate_aws_aurora_table(MySQL_Session *sess) { // this function needs to be called with lock on mutex aurora_mutex already acquired sessdb->execute("DELETE FROM REPLICA_HOST_STATUS"); @@ -821,7 +922,10 @@ void SQLite3_Server::populate_aws_aurora_table(MySQL_Session *sess) { } sqlite3_finalize(statement); } +#endif // TEST_AURORA + +#if defined(TEST_AURORA) || defined(TEST_GALERA) void SQLite3_Server::insert_into_tables_defs(std::vector *tables_defs, const char *table_name, const char *table_def) { table_def_t *td = new table_def_t; td->table_name=strdup(table_name); @@ -851,7 +955,7 @@ void SQLite3_Server::drop_tables_defs(std::vector *tables_defs) { delete td; } }; -#endif // TEST_AURORA +#endif // TEST_AURORA || TEST_GALERA void SQLite3_Server::wrlock() { pthread_rwlock_wrlock(&rwlock); @@ -877,6 +981,15 @@ bool SQLite3_Server::init() { check_and_build_standard_tables(sessdb, tables_defs_aurora); GloAdmin->enable_aurora_testing(); #endif // TEST_AURORA +#ifdef TEST_GALERA + tables_defs_galera = new std::vector; + insert_into_tables_defs(tables_defs_galera, + (const char *)"HOST_STATUS_GALERA", + (const char *)"CREATE TABLE HOST_STATUS_GALERA (hostgroup_id INT NOT NULL , hostname VARCHAR NOT NULL , port INT NOT NULL , wsrep_local_state VARCHAR , read_only VARCHAR , wsrep_local_recv_queue VARCHAR , wsrep_desync VARCHAR , wsrep_reject_queries VARCHAR , wsrep_sst_donor_rejects_queries VARCHAR , wsrep_cluster_status VARCHAR , PRIMARY KEY (hostgroup_id, hostname, port))"); + check_and_build_standard_tables(sessdb, tables_defs_galera); + GloAdmin->enable_galera_testing(); +#endif // TEST_GALERA + child_func[0]=child_mysql; main_shutdown=0; main_poll_nfds=0; From 407da6adfae52e660d526405fff9ec0bd8719d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Fri, 26 Apr 2019 12:03:58 +1000 Subject: [PATCH 17/20] Removing a debugging line --- lib/MySQL_Monitor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index a04bf6408..ea31b7563 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -1860,7 +1860,9 @@ void * monitor_replication_lag_thread(void *arg) { if (mmsd->interr) { // replication lag check failed mmsd->mysql_error_msg=strdup(mysql_error(mmsd->mysql)); unsigned long long now=monotonic_time(); +#ifdef DEBUG proxy_error("Error after %dms: mmsd %p , MYSQL %p , FD %d : %s\n", (now-mmsd->t1)/1000, mmsd, mmsd->mysql, mmsd->mysql->net.fd, mmsd->mysql_error_msg); +#endif // DEBUG if (mmsd->mysql) { GloMyMon->My_Conn_Pool->conn_unregister(mmsd); mysql_close(mmsd->mysql); From 570d0283f58684462df85775bb17349c55c8c69b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Mon, 29 Apr 2019 20:26:57 +1000 Subject: [PATCH 18/20] Added variable monitor_galera_healthcheck_max_timeout_count Further testing required --- include/MySQL_Thread.h | 1 + include/proxysql_structs.h | 2 ++ lib/MySQL_Monitor.cpp | 58 ++++++++++++++++++++++++++++++++++++-- lib/MySQL_Thread.cpp | 17 +++++++++++ 4 files changed, 76 insertions(+), 2 deletions(-) diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index 4c04e083d..e33fa1cfa 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -353,6 +353,7 @@ class MySQL_Threads_Handler int monitor_groupreplication_healthcheck_timeout; int monitor_galera_healthcheck_interval; int monitor_galera_healthcheck_timeout; + int monitor_galera_healthcheck_max_timeout_count; int monitor_query_interval; int monitor_query_timeout; int monitor_slave_lag_when_null; diff --git a/include/proxysql_structs.h b/include/proxysql_structs.h index 66b4bf94e..b4dcac512 100644 --- a/include/proxysql_structs.h +++ b/include/proxysql_structs.h @@ -690,6 +690,7 @@ __thread int mysql_thread___monitor_groupreplication_healthcheck_interval; __thread int mysql_thread___monitor_groupreplication_healthcheck_timeout; __thread int mysql_thread___monitor_galera_healthcheck_interval; __thread int mysql_thread___monitor_galera_healthcheck_timeout; +__thread int mysql_thread___monitor_galera_healthcheck_max_timeout_count; __thread int mysql_thread___monitor_query_interval; __thread int mysql_thread___monitor_query_timeout; __thread int mysql_thread___monitor_slave_lag_when_null; @@ -814,6 +815,7 @@ extern __thread int mysql_thread___monitor_groupreplication_healthcheck_interval extern __thread int mysql_thread___monitor_groupreplication_healthcheck_timeout; extern __thread int mysql_thread___monitor_galera_healthcheck_interval; extern __thread int mysql_thread___monitor_galera_healthcheck_timeout; +extern __thread int mysql_thread___monitor_galera_healthcheck_max_timeout_count; extern __thread int mysql_thread___monitor_query_interval; extern __thread int mysql_thread___monitor_query_timeout; extern __thread int mysql_thread___monitor_slave_lag_when_null; diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index ea31b7563..a4a8a9c5f 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -1586,6 +1586,7 @@ __exit_monitor_galera_thread: bool wsrep_reject_queries = true; bool wsrep_sst_donor_rejects_queries = true; long long wsrep_local_recv_queue=0; + int num_timeouts = 0; MYSQL_FIELD * fields=NULL; if (mmsd->interr == 0 && mmsd->result) { int num_fields=0; @@ -1665,12 +1666,65 @@ __end_process_galera_result: //node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , transactions_behind,viable_candidate,read_only,mmsd->mysql_error_msg); node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , wsrep_local_recv_queue, primary_partition, read_only, wsrep_local_state, wsrep_desync, wsrep_reject_queries, wsrep_sst_donor_rejects_queries, mmsd->mysql_error_msg); GloMyMon->Galera_Hosts_Map.insert(std::make_pair(s,node)); + if (mmsd->mysql_error_msg) { + if (strncasecmp(mmsd->mysql_error_msg, (char *)"timeout", 7) == 0) { + // it was a timeout . Let's count the number of consecutive timeouts + int max_num_timeout = 10; + if (mysql_thread___monitor_galera_healthcheck_max_timeout_count < max_num_timeout) { + max_num_timeout = mysql_thread___monitor_galera_healthcheck_max_timeout_count; + } + unsigned long long start_times[max_num_timeout]; + bool timeouts[max_num_timeout]; + for (int i=0; ilast_entries[i].start_time) { + int smallidx = 0; + for (int j=0; jlast_entries[i].start_time) { + start_times[j] = node->last_entries[i].start_time; + timeouts[j] = false; + if (node->last_entries[i].error) { + if (strncasecmp(node->last_entries[i].error, (char *)"timeout", 7) == 0) { + timeouts[j] = true; + } + } + } + } + } + } + for (int i=0; igalera_mutex); // NOTE: we update MyHGM outside the mutex galera_mutex - if (mmsd->mysql_error_msg) { // there was an error checking the status of the server, surely we need to reconfigure GR - MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); + if (mmsd->mysql_error_msg) { // there was an error checking the status of the server, surely we need to reconfigure Galera + if (num_timeouts == 0) { + // it wasn't a timeout, reconfigure immediately + MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); + } else { + // it was a timeout. Check if we are having consecutive timeout + if (num_timeouts == mysql_thread___monitor_galera_healthcheck_max_timeout_count) { + proxy_error("Server %s:%d missed %d read_only checks. Assuming read_only=1\n", mmsd->hostname, mmsd->port, num_timeouts); + MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); + } else { + // not enough timeout + } + } } else { if (fields) { // if we didn't get any error, but fileds is NULL, we are likely hitting bug #1994 if (primary_partition == false || wsrep_desync == true || wsrep_local_state!=4) { diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index eb39c4cfe..c729f6597 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -245,6 +245,7 @@ static char * mysql_thread_variables_names[]= { (char *)"monitor_groupreplication_healthcheck_timeout", (char *)"monitor_galera_healthcheck_interval", (char *)"monitor_galera_healthcheck_timeout", + (char *)"monitor_galera_healthcheck_max_timeout_count", (char *)"monitor_username", (char *)"monitor_password", (char *)"monitor_replication_lag_use_percona_heartbeat", @@ -376,6 +377,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() { variables.monitor_groupreplication_healthcheck_timeout=800; variables.monitor_galera_healthcheck_interval=5000; variables.monitor_galera_healthcheck_timeout=800; + variables.monitor_galera_healthcheck_max_timeout_count=3; variables.monitor_query_interval=60000; variables.monitor_query_timeout=100; variables.monitor_slave_lag_when_null=60; @@ -654,6 +656,7 @@ int MySQL_Threads_Handler::get_variable_int(const char *name) { if (!strcmp(name,"monitor_groupreplication_healthcheck_timeout")) return (int)variables.monitor_groupreplication_healthcheck_timeout; if (!strcmp(name,"monitor_galera_healthcheck_interval")) return (int)variables.monitor_galera_healthcheck_interval; if (!strcmp(name,"monitor_galera_healthcheck_timeout")) return (int)variables.monitor_galera_healthcheck_timeout; + if (!strcmp(name,"monitor_galera_healthcheck_max_timeout_count")) return (int)variables.monitor_galera_healthcheck_max_timeout_count; } if (a == 'p' || a == 'P') { if (!strcmp(name,"monitor_ping_interval")) return (int)variables.monitor_ping_interval; @@ -951,6 +954,10 @@ char * MySQL_Threads_Handler::get_variable(char *name) { // this is the public f sprintf(intbuf,"%d",variables.monitor_galera_healthcheck_timeout); return strdup(intbuf); } + if (!strcasecmp(name,"monitor_galera_healthcheck_max_timeout_count")) { + sprintf(intbuf,"%d",variables.monitor_galera_healthcheck_max_timeout_count); + return strdup(intbuf); + } if (!strcasecmp(name,"monitor_query_interval")) { sprintf(intbuf,"%d",variables.monitor_query_interval); return strdup(intbuf); @@ -1460,6 +1467,15 @@ bool MySQL_Threads_Handler::set_variable(char *name, char *value) { // this is t return false; } } + if (!strcasecmp(name,"monitor_galera_healthcheck_max_timeout_count")) { + int intv=atoi(value); + if (intv >= 1 && intv <= 10) { + variables.monitor_galera_healthcheck_max_timeout_count=intv; + return true; + } else { + return false; + } + } if (!strcasecmp(name,"monitor_query_interval")) { int intv=atoi(value); if (intv >= 100 && intv <= 7*24*3600*1000) { @@ -3764,6 +3780,7 @@ void MySQL_Thread::refresh_variables() { mysql_thread___monitor_groupreplication_healthcheck_timeout=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_timeout"); mysql_thread___monitor_galera_healthcheck_interval=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_interval"); mysql_thread___monitor_galera_healthcheck_timeout=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_timeout"); + mysql_thread___monitor_galera_healthcheck_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_max_timeout_count"); mysql_thread___monitor_query_interval=GloMTH->get_variable_int((char *)"monitor_query_interval"); mysql_thread___monitor_query_timeout=GloMTH->get_variable_int((char *)"monitor_query_timeout"); mysql_thread___monitor_slave_lag_when_null=GloMTH->get_variable_int((char *)"monitor_slave_lag_when_null"); From 01be4ced0cc2fff4833e2bb9986f10faa386d68c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Fri, 3 May 2019 18:36:21 +1000 Subject: [PATCH 19/20] Temporary disable AWS Aurora module --- lib/ProxySQL_Admin.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index 9ac1178ff..d10a0efb3 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -8170,7 +8170,11 @@ void ProxySQL_Admin::load_mysql_servers_to_runtime() { if (resultset) delete resultset; resultset=NULL; +#ifdef AURORA_TEST // temporary enabled only for testing purpose query=(char *)"SELECT a.* FROM mysql_aws_aurora_hostgroups a LEFT JOIN mysql_aws_aurora_hostgroups b ON (a.writer_hostgroup=b.reader_hostgroup) WHERE b.reader_hostgroup IS NULL"; +#else + query=(char *)"SELECT a.* FROM mysql_aws_aurora_hostgroups a WHERE 1=0"; +#endif proxy_debug(PROXY_DEBUG_ADMIN, 4, "%s\n", query); admindb->execute_statement(query, &error , &cols , &affected_rows , &resultset_aws_aurora); if (error) { @@ -8179,7 +8183,6 @@ void ProxySQL_Admin::load_mysql_servers_to_runtime() { // Pass the resultset to MyHGM MyHGM->set_incoming_aws_aurora_hostgroups(resultset_aws_aurora); } - // commit all the changes MyHGM->commit(); From 817e20c222c6bc8e1e1198f73bf470b8379c2412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Fri, 3 May 2019 22:30:48 +1000 Subject: [PATCH 20/20] Improvement on Galera and HGM - generate_mysql_servers_table() is now less verbose - update_galera_set_writer() is protected by a mutex - update_galera_set_writer() set status to online - fixed wording in monitor_galera_thread() - corrected the computing of consecutive timeout in monitor_galera_thread() --- lib/MySQL_HostGroups_Manager.cpp | 23 +++++++++- lib/MySQL_Monitor.cpp | 74 ++++++++++++++++---------------- lib/ProxySQL_Admin.cpp | 3 ++ lib/SQLite3_Server.cpp | 4 +- 4 files changed, 63 insertions(+), 41 deletions(-) diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 1eaf6ebe3..ea5aaa61f 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -10,6 +10,7 @@ #include "ev.h" +#include #define USE_MYSRVC_ARRAY @@ -1805,12 +1806,25 @@ void MySQL_HostGroups_Manager::generate_mysql_servers_table(int *_onlyhg) { int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; - mydb->execute_statement((char *)"SELECT * FROM mysql_servers", &error , &cols , &affected_rows , &resultset); + if (_onlyhg==NULL) { + mydb->execute_statement((char *)"SELECT hostgroup_id hid, hostname, port, gtid_port gtid, weight, status, compression cmp, max_connections max_conns, max_replication_lag max_lag, use_ssl ssl, max_latency_ms max_lat, comment, mem_pointer FROM mysql_servers", &error , &cols , &affected_rows , &resultset); + } else { + int hidonly=*_onlyhg; + char *q1 = (char *)malloc(256); + sprintf(q1,"SELECT hostgroup_id hid, hostname, port, gtid_port gtid, weight, status, compression cmp, max_connections max_conns, max_replication_lag max_lag, use_ssl ssl, max_latency_ms max_lat, comment, mem_pointer FROM mysql_servers WHERE hostgroup_id=%d" , hidonly); + mydb->execute_statement(q1, &error , &cols , &affected_rows , &resultset); + free(q1); + } if (error) { proxy_error("Error on read from mysql_servers : %s\n", error); } else { if (resultset) { - proxy_info("Dumping mysql_servers\n"); + if (_onlyhg==NULL) { + proxy_info("Dumping mysql_servers: ALL\n"); + } else { + int hidonly=*_onlyhg; + proxy_info("Dumping mysql_servers: HG %d\n", hidonly); + } resultset->dump_to_stderr(); } } @@ -4376,6 +4390,8 @@ void MySQL_HostGroups_Manager::update_galera_set_read_only(char *_hostname, int } void MySQL_HostGroups_Manager::update_galera_set_writer(char *_hostname, int _port, int _writer_hostgroup) { + std::mutex local_mutex; + std::lock_guard lock(local_mutex); int cols=0; int affected_rows=0; SQLite3_result *resultset=NULL; @@ -4476,6 +4492,9 @@ void MySQL_HostGroups_Manager::update_galera_set_writer(char *_hostname, int _po query=(char *)malloc(strlen(q)+strlen(_hostname)+1024); // increased this buffer as it is used for other queries too sprintf(query,q,_writer_hostgroup,_hostname,_port,_writer_hostgroup); mydb->execute(query); + q=(char *)"UPDATE mysql_servers_incoming SET status=0 WHERE hostname='%s' AND port=%d AND hostgroup_id=%d"; + sprintf(query,q,_hostname,_port,_writer_hostgroup); + mydb->execute(query); //free(query); q=(char *)"DELETE FROM mysql_servers_incoming WHERE hostname='%s' AND port=%d AND hostgroup_id<>%d"; //query=(char *)malloc(strlen(q)+strlen(_hostname)+64); diff --git a/lib/MySQL_Monitor.cpp b/lib/MySQL_Monitor.cpp index a4a8a9c5f..62092082a 100644 --- a/lib/MySQL_Monitor.cpp +++ b/lib/MySQL_Monitor.cpp @@ -1525,7 +1525,7 @@ void * monitor_galera_thread(void *arg) { unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); - proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Assuming wsrep_cluster_status is NOT Primary\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); + proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); goto __exit_monitor_galera_thread; } if (GloMyMon->shutdown==true) { @@ -1541,7 +1541,7 @@ void * monitor_galera_thread(void *arg) { unsigned long long now=monotonic_time(); if (now > mmsd->t1 + mysql_thread___monitor_galera_healthcheck_timeout * 1000) { mmsd->mysql_error_msg=strdup("timeout check"); - proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout. Assuming wsrep_local_state is NOT 4 and read_only=YES\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); + proxy_error("Timeout on Galera health check for %s:%d after %lldms. If the server is overload, increase mysql-monitor_galera_healthcheck_timeout.\n", mmsd->hostname, mmsd->port, (now-mmsd->t1)/1000); goto __exit_monitor_galera_thread; } if (GloMyMon->shutdown==true) { @@ -1666,45 +1666,45 @@ __end_process_galera_result: //node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , transactions_behind,viable_candidate,read_only,mmsd->mysql_error_msg); node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , wsrep_local_recv_queue, primary_partition, read_only, wsrep_local_state, wsrep_desync, wsrep_reject_queries, wsrep_sst_donor_rejects_queries, mmsd->mysql_error_msg); GloMyMon->Galera_Hosts_Map.insert(std::make_pair(s,node)); - if (mmsd->mysql_error_msg) { - if (strncasecmp(mmsd->mysql_error_msg, (char *)"timeout", 7) == 0) { - // it was a timeout . Let's count the number of consecutive timeouts - int max_num_timeout = 10; - if (mysql_thread___monitor_galera_healthcheck_max_timeout_count < max_num_timeout) { - max_num_timeout = mysql_thread___monitor_galera_healthcheck_max_timeout_count; - } - unsigned long long start_times[max_num_timeout]; - bool timeouts[max_num_timeout]; - for (int i=0; ilast_entries[i].start_time) { - int smallidx = 0; - for (int j=0; jmysql_error_msg) { + if (strncasecmp(mmsd->mysql_error_msg, (char *)"timeout", 7) == 0) { + // it was a timeout . Let's count the number of consecutive timeouts + int max_num_timeout = 10; + if (mysql_thread___monitor_galera_healthcheck_max_timeout_count < max_num_timeout) { + max_num_timeout = mysql_thread___monitor_galera_healthcheck_max_timeout_count; + } + unsigned long long start_times[max_num_timeout]; + bool timeouts[max_num_timeout]; + for (int i=0; ilast_entries[i].start_time) { + int smallidx = 0; + for (int j=0; jlast_entries[i].start_time) { - start_times[j] = node->last_entries[i].start_time; - timeouts[j] = false; - if (node->last_entries[i].error) { - if (strncasecmp(node->last_entries[i].error, (char *)"timeout", 7) == 0) { - timeouts[j] = true; - } - } + } + } + if (start_times[smallidx] < node->last_entries[i].start_time) { + start_times[smallidx] = node->last_entries[i].start_time; + timeouts[smallidx] = false; + if (node->last_entries[i].error) { + if (strncasecmp(node->last_entries[i].error, (char *)"timeout", 7) == 0) { + timeouts[smallidx] = true; } } } } - for (int i=0; ihostname, mmsd->port, num_timeouts); + proxy_error("Server %s:%d missed %d Galera checks. Assuming offline\n", mmsd->hostname, mmsd->port, num_timeouts); MyHGM->update_galera_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg); } else { // not enough timeout diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index d10a0efb3..926e82f12 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -6684,6 +6684,9 @@ void ProxySQL_Admin::__insert_or_replace_maintable_select_disktable() { admindb->execute("INSERT OR REPLACE INTO main.mysql_ldap_mapping SELECT * FROM disk.mysql_ldap_mapping"); } admindb->execute("PRAGMA foreign_keys = ON"); +#if defined(TEST_AURORA) || defined(TEST_GALERA) + admindb->execute("DELETE FROM mysql_servers WHERE gtid_port > 0"); // temporary disable add GTID checks +#endif } void ProxySQL_Admin::__delete_disktable() { diff --git a/lib/SQLite3_Server.cpp b/lib/SQLite3_Server.cpp index a6df17b19..4f1ee4026 100644 --- a/lib/SQLite3_Server.cpp +++ b/lib/SQLite3_Server.cpp @@ -515,8 +515,8 @@ __run_query: if (resultset->rows_count == 0) { PROXY_TRACE(); } - if (rand() % 100 == 0) { - // randomly add some latency on 1% of the traffic + if (rand() % 20 == 0) { + // randomly add some latency on 5% of the traffic sleep(2); } }