Merge pull request #3774 from sysown/v2.x-test_unshun_failures

Fix test 'test_unshun_algorithm-t' and variable 'mysql-connect_retries_delay'
pull/3794/head
René Cannaò 4 years ago committed by GitHub
commit 7dc82cd1bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2811,6 +2811,11 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_
||
(mysrvc->shunned_and_kill_all_connections==true && mysrvc->ConnectionsUsed->conns_length()==0 && mysrvc->ConnectionsFree->conns_length()==0) // if shunned_and_kill_all_connections is set, ensure all connections are already dropped
) {
#ifdef DEBUG
if (GloMTH->variables.hostgroup_manager_verbose >= 3) {
proxy_info("Unshunning server %s:%d.\n", mysrvc->address, mysrvc->port);
}
#endif
mysrvc->status=MYSQL_SERVER_STATUS_ONLINE;
mysrvc->shunned_automatic=false;
mysrvc->shunned_and_kill_all_connections=false;

@ -2698,6 +2698,9 @@ bool MySQL_Session::handler_again___status_CONNECTING_SERVER(int *_rc) {
}
}
// NOTE-connect_retries_delay: This check alone is not enough for imposing
// 'mysql_thread___connect_retries_delay'. In case of 'async_connect' failing, 'pause_until' should also
// be set to 'mysql_thread___connect_retries_delay'. Complementary NOTE below.
if (mybe->server_myds->myconn==NULL) {
pause_until=thread->curtime+mysql_thread___connect_retries_delay*1000;
*_rc=1;
@ -2768,6 +2771,14 @@ bool MySQL_Session::handler_again___status_CONNECTING_SERVER(int *_rc) {
PROXY_TRACE();
}
myds->destroy_MySQL_Connection_From_Pool(false);
// NOTE-connect_retries_delay: In case of failure to connect, if
// 'mysql_thread___connect_retries_delay' is set, we impose a delay in the session
// processing via 'pause_until'. Complementary NOTE above.
if (mysql_thread___connect_retries_delay) {
pause_until=thread->curtime+mysql_thread___connect_retries_delay*1000;
set_status(CONNECTING_SERVER);
return false;
}
NEXT_IMMEDIATE_NEW(CONNECTING_SERVER);
} else {
__exit_handler_again___status_CONNECTING_SERVER_with_err:

@ -0,0 +1,118 @@
/**
* @file test_mysql_connect_retries_delay-t.cpp
* @brief This test verifies the behavior for 'mysql-connect_retries_delay'.
* @details For doing this check, the test performs the following actions:
* 1. Disable monitoring and configure ProxySQL with a non-existing server in it's own hostgroup.
* 2. Configure 'mysql-connect_retries_delay'.
* 3. Create a connection against ProxySQL and issue a query against the non-existing server.
* 4. Check that the execution time of the query till the error matches 'mysql-connect_retries_delay'.
* 5. Repeat the previous 3 points for several values.
*/
#include <algorithm>
#include <chrono>
#include <string>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <vector>
#include <tuple>
#include <mysql.h>
#include <mysql/mysqld_error.h>
#include "tap.h"
#include "command_line.h"
#include "utils.h"
#include "proxysql_utils.h"
using std::string;
using std::vector;
typedef std::chrono::high_resolution_clock hrc;
const vector<uint32_t> test_retry_delays { 1000, 2000, 3000 };
const double DUR_EPSILON = 1;
int main() {
CommandLine cl;
plan(test_retry_delays.size());
if (cl.getEnv()) {
diag("Failed to get the required environmental variables.");
return EXIT_FAILURE;
}
MYSQL* proxysql_admin = mysql_init(NULL);
MYSQL* proxysql = mysql_init(NULL);
// Initialize connections
if (!proxysql_admin) {
fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_admin));
return EXIT_FAILURE;
}
if (!proxysql) {
fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql));
return EXIT_FAILURE;
}
if (!mysql_real_connect(proxysql_admin, cl.host, cl.admin_username, cl.admin_password, NULL, cl.admin_port, NULL, 0)) {
fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_admin));
return EXIT_FAILURE;
}
if (!mysql_real_connect(proxysql, cl.host, cl.username, cl.password, NULL, cl.port, NULL, 0)) {
fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql));
return EXIT_FAILURE;
}
// Global config
MYSQL_QUERY(proxysql_admin, "SET mysql-monitor_enabled=0");
MYSQL_QUERY(proxysql_admin, "SET mysql-poll_timeout=100");
MYSQL_QUERY(proxysql_admin, "SET mysql-connect_retries_on_failure=3");
MYSQL_QUERY(proxysql_admin, "SET mysql-connect_timeout_server_max=20000");
// Cleanup servers and configure just a non-existing server
MYSQL_QUERY(proxysql_admin, "DELETE FROM mysql_servers");
MYSQL_QUERY(proxysql_admin, "INSERT INTO mysql_servers (hostgroup_id,hostname,port,max_replication_lag,max_connections,comment) VALUES (10,'127.0.0.1',13330,180,500,'mysql_not_here')");
// Load the new config
MYSQL_QUERY(proxysql_admin, "LOAD MYSQL VARIABLES TO RUNTIME");
MYSQL_QUERY(proxysql_admin, "LOAD MYSQL SERVERS TO RUNTIME");
for (uint32_t retry_delay : test_retry_delays) {
string connect_retries_query {};
string_format("SET mysql-connect_retries_delay=%d", connect_retries_query, retry_delay);
// Load the new connect retry config
MYSQL_QUERY(proxysql_admin, connect_retries_query.c_str());
MYSQL_QUERY(proxysql_admin, "LOAD MYSQL VARIABLES TO RUNTIME");
// Issue a query and check the timing
std::chrono::nanoseconds dur;
hrc::time_point start;
hrc::time_point end;
start = hrc::now();
{
mysql_query(proxysql, "DO /* hostgroup=10 */ 1");
}
end = hrc::now();
dur = end - start;
double dur_s = dur.count() / pow(10,9);
double exp_dur = retry_delay * 3 / 1000.0;
ok(
dur_s > (exp_dur - DUR_EPSILON) && dur_s < (exp_dur + DUR_EPSILON),
"Test duration matches the expected duration: { act_dur: %lf, exp_dur: %lf }", dur_s, exp_dur
);
}
mysql_close(proxysql);
mysql_close(proxysql_admin);
return exit_status();
}

@ -14,6 +14,47 @@
* 3. All the servers are 'SHUNNED' using 'PROXYSQL_SIMULATOR' command.
* 4. Each of the servers placed in the individual hostgroups is 'UNSHUNNED', checking the proper behavior
* related to the other servers depending on 'mysql-unshun_algorithm'.
*
* CI-FAILURES-NOTES:
* This test received some changes after being detected that was provoking some failures in the CI.
* The circumstances of these failures are summarized here together with the issue resolution as a reminder
* of the design.
*
* PREVIOUS-DESIGN:
* In the previous design the test set the following variables:
*
* ```
* "SET mysql-shun_on_failures=5"
* "SET mysql-connect_retries_on_failure=0"
* ```
*
* These values were chosen to avoid connection retrying when the connection error takes place.
* For making sure that the server was properly UNSHUNNED, two queries were issued, with a two second delay
* between them. The second query should only produce one error, thus not going over the threshold of at
* least '2' errors within the same second to set the server as SHUNNED again.
*
* FLAW:
* The assumption that the second query cannot produce a second error is FALSE. The second query can produce
* two errors under certain timing conditions. If the call to 'mysql_real_connect_start' doesn't immediately
* return, because the underlying socket haven't been yet signaled, then 'MySQL_Session::handler' will
* return, this will result into:
*
* 1. 'MySQL_Data_Stream' receiving the socket event in next poll, the event will be processed, but because the
* server isn't present a error will be generated.
* 2. The session will later be processed, and due to the server still not being present, a second error will
* be generated, resulting into the server being SHUNNED.
*
* SOLUTION:
* Test now uses the following values for the variables:
*
* ```
* "SET mysql-shun_on_failures=3"
* "SET mysql-connect_retries_on_failure=3"
* "SET mysql-connect_retries_delay=1000"
* ```
*
* This will avoid any accidental SHUNNING due to the previously described situation, and due to
* 'connect_retries_delay' will prevent more than one connection attempt per second.
*/
#include <cstring>
@ -58,14 +99,19 @@ int shunn_all_servers(MYSQL* proxysql_admin) {
return EXIT_SUCCESS;
}
/**
* @brief Issues a simple 'DO 1' query to the target hostgroup with the intention of UNSHUNNING the server
* present in that hostgroup.
* @param proxysql_mysql An already opened connection to ProxySQL.
* @param i The hostgroup to which the query should be issued.
* @return Since query errors are ignored, because the query is supposed to fail, EXIT_SUCCESS is always
* returned.
*/
int wakup_target_server(MYSQL* proxysql_mysql, uint32_t i) {
std::string t_simple_do_query { "DO /* ;hostgroup=%d */ 1" };
std::string simple_do_query {};
string_format(t_simple_do_query, simple_do_query, i);
mysql_query(proxysql_mysql, simple_do_query.c_str());
diag("%s: running query: %s", tap_curtime().c_str(), simple_do_query.c_str());
sleep(SHUN_RECOVERY_TIME * 2);
mysql_query(proxysql_mysql, simple_do_query.c_str());
diag("%s: running query: %s", tap_curtime().c_str(), simple_do_query.c_str());
@ -217,18 +263,41 @@ int test_proxysql_simulator_error(MYSQL* proxysql_admin) {
return EXIT_SUCCESS;
}
/**
* @brief Configures the relevant 'mysql' variables for achieving the test desired UNSHUNNING behavior.
* @details The values set for these variables prevent the target server to be SHUNNED after being UNSHUNNED
* by the query issued by 'wakup_target_server', in case the query produces two simultaneous errors instead
* of just one. Why this is a possibility is described on the details sections in the file DOC.
* @param proxysql_admin An already opened connection to ProxySQL Admin.
* @return EXIT_SUCCESS in case of success, EXIT_FAILURE otherwise.
*/
int configure_mysql_shunning_variables(MYSQL* proxysql_admin) {
MYSQL_QUERY(proxysql_admin, "SET mysql-shun_on_failures=3");
diag("%s: Line:%d running admin query: SET mysql-shun_on_failures=3", tap_curtime().c_str(), __LINE__);
MYSQL_QUERY(proxysql_admin, "SET mysql-connect_retries_on_failure=3");
diag("%s: Line:%d running admin query: SET mysql-connect_retries_on_failure=3", tap_curtime().c_str(), __LINE__);
MYSQL_QUERY(proxysql_admin, "SET mysql-connect_retries_delay=1000");
diag("%s: Line:%d running admin query: SET mysql-connect_retries_delay=1000", tap_curtime().c_str(), __LINE__);
return EXIT_SUCCESS;
}
int test_unshun_algorithm_behavior(MYSQL* proxysql_mysql, MYSQL* proxysql_admin) {
// Configure Admin variables with lower thresholds
MYSQL_QUERY(proxysql_admin, "SET mysql-shun_recovery_time_sec=1");
diag("%s: Line:%d running admin query: SET mysql-shun_recovery_time_sec=1", tap_curtime().c_str(), __LINE__);
// Set verbosity up for extra information in ProxySQL log
MYSQL_QUERY(proxysql_admin, "SET mysql-hostgroup_manager_verbose=3");
diag("%s: Line:%d running admin query: SET mysql-hostgroup_manager_verbose=3", tap_curtime().c_str(), __LINE__);
// NOTE: The following varible value is set here just as a reminder. This change isn't properly propagated
// to the 'error setting operation' since this is performed from 'ProxySQL_Admin' thread when
// 'PROXYSQL_SIMULATOR' command is received. Because of this, it's in 'PROXYSQL_SIMULATOR' command impl in
// 'ProxySQL_Admin' where this variable value is updated before setting the error.
MYSQL_QUERY(proxysql_admin, "SET mysql-shun_on_failures=5");
MYSQL_QUERY(proxysql_admin, "SET mysql-connect_retries_on_failure=0");
// Configure the relevant variables for the desired UNSHUNNING behavior
if (configure_mysql_shunning_variables(proxysql_admin)) {
return EXIT_FAILURE;
}
MYSQL_QUERY(proxysql_admin, "LOAD MYSQL VARIABLES TO RUNTIME");
// Cleanup the servers and create a good number of hostgroups
MYSQL_QUERY(proxysql_admin, "DELETE FROM mysql_servers");
@ -267,6 +336,9 @@ int test_unshun_algorithm_behavior(MYSQL* proxysql_mysql, MYSQL* proxysql_admin)
// Check that only server from first hostgroup is 'SHUNNED'
bool unexp_row_value = server_status_checker(proxysql_admin, "ONLINE", "SHUNNED", i);
ok(unexp_row_value == false, "Server from first hg was set 'ONLINE' while others remained 'SHUNNED'");
if (tests_failed()) {
return exit_status();
}
}
}
@ -284,6 +356,9 @@ int test_unshun_algorithm_behavior(MYSQL* proxysql_mysql, MYSQL* proxysql_admin)
bool unexp_row_value = server_status_checker(proxysql_admin, "ONLINE", "ONLINE", i);
ok(unexp_row_value == false, "Server from first hg was set 'ONLINE' while others remained 'ONLINE'");
diag(""); // empty line
if (tests_failed()) {
return exit_status();
}
}
}
@ -310,6 +385,9 @@ int test_unshun_algorithm_behavior(MYSQL* proxysql_mysql, MYSQL* proxysql_admin)
bool unexp_row_value = server_status_checker(proxysql_admin, "ONLINE", "SHUNNED", i);
ok(unexp_row_value == false, "Server from first hg was set 'ONLINE' while others remained 'SHUNNED'");
diag(""); // empty line
if (tests_failed()) {
return exit_status();
}
}
}
@ -338,7 +416,7 @@ int main(int argc, char** argv) {
return EXIT_FAILURE;
}
if (!mysql_real_connect(proxysql_admin, cl.host, cl.admin_username, cl.admin_password, NULL, cl.admin_port, NULL, 0)) {
fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_mysql));
fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_admin));
return EXIT_FAILURE;
}

Loading…
Cancel
Save