#include "../deps/json/json.hpp" using json = nlohmann::json; #define PROXYJSON #include "MySQL_HostGroups_Manager.h" #ifdef TEST_AURORA static unsigned long long array_mysrvc_total = 0; static unsigned long long array_mysrvc_cands = 0; #endif // TEST_AURORA extern MySQL_Threads_Handler *GloMTH; MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_lag_ms, MySQL_Session *sess) { MySrvC *mysrvc=NULL; unsigned int j; unsigned int sum=0; unsigned int TotalUsedConn=0; unsigned int l=mysrvs->cnt(); static time_t last_hg_log = 0; #ifdef TEST_AURORA unsigned long long a1 = array_mysrvc_total/10000; array_mysrvc_total += l; unsigned long long a2 = array_mysrvc_total/10000; if (a2 > a1) { fprintf(stderr, "Total: %llu, Candidates: %llu\n", array_mysrvc_total-l, array_mysrvc_cands); } #endif // TEST_AURORA MySrvC *mysrvcCandidates_static[32]; MySrvC **mysrvcCandidates = mysrvcCandidates_static; unsigned int num_candidates = 0; bool max_connections_reached = false; if (l>32) { mysrvcCandidates = (MySrvC **)malloc(sizeof(MySrvC *)*l); } if (l) { //int j=0; for (j=0; jidx(j); if (mysrvc->get_status() == MYSQL_SERVER_STATUS_ONLINE) { // consider this server only if ONLINE if (mysrvc->myhgc->num_online_servers.load(std::memory_order_relaxed) <= mysrvc->myhgc->attributes.max_num_online_servers) { // number of online servers in HG is within configured range if (mysrvc->ConnectionsUsed->conns_length() < mysrvc->max_connections) { // consider this server only if didn't reach max_connections if (mysrvc->current_latency_us < (mysrvc->max_latency_us ? mysrvc->max_latency_us : mysql_thread___default_max_latency_ms*1000)) { // consider the host only if not too far if (gtid_trxid) { if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } } else { if (max_lag_ms >= 0) { if ((unsigned int)max_lag_ms >= mysrvc->aws_aurora_current_lag_us / 1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } else { sess->thread->status_variables.stvar[st_var_aws_aurora_replicas_skipped_during_query]++; } } else { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } } } } else { max_connections_reached = true; } } else { mysrvc->myhgc->log_num_online_server_count_error(); } } else { if (mysrvc->get_status() == MYSQL_SERVER_STATUS_SHUNNED) { // try to recover shunned servers if (mysrvc->shunned_automatic && mysql_thread___shun_recovery_time_sec) { time_t t; t=time(NULL); // we do all these changes without locking . We assume the server is not used from long // even if the server is still in used and any of the follow command fails it is not critical // because this is only an attempt to recover a server that is probably dead anyway // the next few lines of code try to solve issue #530 int max_wait_sec = ( mysql_thread___shun_recovery_time_sec * 1000 >= mysql_thread___connect_timeout_server_max ? mysql_thread___connect_timeout_server_max/1000 - 1 : mysql_thread___shun_recovery_time_sec ); if (max_wait_sec < 1) { // min wait time should be at least 1 second max_wait_sec = 1; } if (t > mysrvc->time_last_detected_error && (t - mysrvc->time_last_detected_error) > max_wait_sec) { if ( (mysrvc->shunned_and_kill_all_connections==false) // it is safe to bring it back online || (mysrvc->shunned_and_kill_all_connections==true && mysrvc->ConnectionsUsed->conns_length()==0 && mysrvc->ConnectionsFree->conns_length()==0) // if shunned_and_kill_all_connections is set, ensure all connections are already dropped ) { #ifdef DEBUG if (GloMTH->variables.hostgroup_manager_verbose >= 3) { proxy_info("Unshunning server %s:%d.\n", mysrvc->address, mysrvc->port); } #endif mysrvc->set_status(MYSQL_SERVER_STATUS_ONLINE); mysrvc->shunned_automatic=false; mysrvc->shunned_and_kill_all_connections=false; mysrvc->connect_ERR_at_time_last_detected_error=0; mysrvc->time_last_detected_error=0; // note: the following function scans all the hostgroups. // This is ok for now because we only have a global mutex. // If one day we implement a mutex per hostgroup (unlikely, // but possible), this must be taken into consideration if (mysql_thread___unshun_algorithm == 1) { MyHGM->unshun_server_all_hostgroups(mysrvc->address, mysrvc->port, t, max_wait_sec, &mysrvc->myhgc->hid); } // if a server is taken back online, consider it immediately if ( mysrvc->current_latency_us < ( mysrvc->max_latency_us ? mysrvc->max_latency_us : mysql_thread___default_max_latency_ms*1000 ) ) { // consider the host only if not too far if (gtid_trxid) { if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } } else { if (max_lag_ms >= 0) { if ((unsigned int)max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } } else { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } } } } } } } } } if (max_lag_ms > 0) { // we are using AWS Aurora, as this logic is implemented only here unsigned int min_num_replicas = sess->thread->variables.aurora_max_lag_ms_only_read_from_replicas; if (min_num_replicas) { if (num_candidates >= min_num_replicas) { // there are at least N replicas // we try to remove the writer unsigned int total_aws_aurora_current_lag_us=0; for (j=0; jaws_aurora_current_lag_us; } if (total_aws_aurora_current_lag_us) { // we are just double checking that we don't have all servers with aws_aurora_current_lag_us==0 for (j=0; jaws_aurora_current_lag_us==0) { sum-=mysrvc->weight; TotalUsedConn-=mysrvc->ConnectionsUsed->conns_length(); if (j < num_candidates-1) { mysrvcCandidates[j]=mysrvcCandidates[num_candidates-1]; } num_candidates--; } } } } } } if (sum==0) { // per issue #531 , we try a desperate attempt to bring back online any shunned server // we do this lowering the maximum wait time to 10% // most of the follow code is copied from few lines above time_t t; t=time(NULL); int max_wait_sec = ( mysql_thread___shun_recovery_time_sec * 1000 >= mysql_thread___connect_timeout_server_max ? mysql_thread___connect_timeout_server_max/10000 - 1 : mysql_thread___shun_recovery_time_sec/10 ); if (max_wait_sec < 1) { // min wait time should be at least 1 second max_wait_sec = 1; } if (t - last_hg_log > 1) { // log this at most once per second to avoid spamming the logs last_hg_log = time(NULL); if (gtid_trxid) { proxy_error("Hostgroup %u has no servers ready for GTID '%s:%ld'. Waiting for replication...\n", hid, gtid_uuid, gtid_trxid); } else { proxy_error("Hostgroup %u has no servers available%s! Checking servers shunned for more than %u second%s\n", hid, (max_connections_reached ? " or max_connections reached for all servers" : ""), max_wait_sec, max_wait_sec == 1 ? "" : "s"); } } for (j=0; jidx(j); if (mysrvc->get_status() == MYSQL_SERVER_STATUS_SHUNNED && mysrvc->shunned_automatic == true) { if ((t - mysrvc->time_last_detected_error) > max_wait_sec) { mysrvc->set_status(MYSQL_SERVER_STATUS_ONLINE); mysrvc->shunned_automatic=false; mysrvc->connect_ERR_at_time_last_detected_error=0; mysrvc->time_last_detected_error=0; // if a server is taken back online, consider it immediately if ( mysrvc->current_latency_us < ( mysrvc->max_latency_us ? mysrvc->max_latency_us : mysql_thread___default_max_latency_ms*1000 ) ) { // consider the host only if not too far if (gtid_trxid) { if (MyHGM->gtid_exists(mysrvc, gtid_uuid, gtid_trxid)) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } } else { if (max_lag_ms >= 0) { if ((unsigned int)max_lag_ms >= mysrvc->aws_aurora_current_lag_us/1000) { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } } else { sum+=mysrvc->weight; TotalUsedConn+=mysrvc->ConnectionsUsed->conns_length(); mysrvcCandidates[num_candidates]=mysrvc; num_candidates++; } } } } } } } if (sum==0) { proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC NULL because no backend ONLINE or with weight\n"); if (l>32) { free(mysrvcCandidates); } #ifdef TEST_AURORA array_mysrvc_cands += num_candidates; #endif // TEST_AURORA return NULL; // if we reach here, we couldn't find any target } /* unsigned int New_sum=0; unsigned int New_TotalUsedConn=0; // we will now scan again to ignore overloaded servers for (j=0; jConnectionsUsed->conns_length(); if ((len * sum) <= (TotalUsedConn * mysrvc->weight * 1.5 + 1)) { New_sum+=mysrvc->weight; New_TotalUsedConn+=len; } else { // remove the candidate if (j+1 < num_candidates) { mysrvcCandidates[j] = mysrvcCandidates[num_candidates-1]; } j--; num_candidates--; } } */ unsigned int New_sum=sum; if (New_sum==0) { proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC NULL because no backend ONLINE or with weight\n"); if (l>32) { free(mysrvcCandidates); } #ifdef TEST_AURORA array_mysrvc_cands += num_candidates; #endif // TEST_AURORA return NULL; // if we reach here, we couldn't find any target } // latency awareness algorithm is enabled only when compiled with USE_MYSRVC_ARRAY if (sess && sess->thread->variables.min_num_servers_lantency_awareness) { if ((int) num_candidates >= sess->thread->variables.min_num_servers_lantency_awareness) { unsigned int servers_with_latency = 0; unsigned int total_latency_us = 0; // scan and verify that all servers have some latency for (j=0; jcurrent_latency_us) { servers_with_latency++; total_latency_us += mysrvc->current_latency_us; } } if (servers_with_latency == num_candidates) { // all servers have some latency. // That is good. If any server have no latency, something is wrong // and we will skip this algorithm sess->thread->status_variables.stvar[st_var_ConnPool_get_conn_latency_awareness]++; unsigned int avg_latency_us = 0; avg_latency_us = total_latency_us/num_candidates; for (j=0; jcurrent_latency_us > avg_latency_us) { // remove the candidate if (j+1 < num_candidates) { mysrvcCandidates[j] = mysrvcCandidates[num_candidates-1]; } j--; num_candidates--; } } // we scan again to adjust weight New_sum = 0; for (j=0; jweight; } } } } unsigned int k; if (New_sum > 32768) { k=rand()%New_sum; } else { k=fastrand()%New_sum; } k++; New_sum=0; for (j=0; jweight; if (k<=New_sum) { proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC %p, server %s:%d\n", mysrvc, mysrvc->address, mysrvc->port); if (l>32) { free(mysrvcCandidates); } #ifdef TEST_AURORA array_mysrvc_cands += num_candidates; #endif // TEST_AURORA return mysrvc; } } } else { time_t t = time(NULL); if (t - last_hg_log > 1) { last_hg_log = time(NULL); proxy_error("Hostgroup %u has no servers available!\n", hid); } } proxy_debug(PROXY_DEBUG_MYSQL_CONNPOOL, 7, "Returning MySrvC NULL\n"); if (l>32) { free(mysrvcCandidates); } #ifdef TEST_AURORA array_mysrvc_cands += num_candidates; #endif // TEST_AURORA return NULL; // if we reach here, we couldn't find any target }