From de5e437bc65bbbde7f211ccdd7cbb3be3411707b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Mon, 24 Jan 2022 17:34:03 +0100 Subject: [PATCH 1/7] Add exit conditions on failure for 'test_unshun_algorithm-t' --- test/tap/tests/test_unshun_algorithm-t.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/tap/tests/test_unshun_algorithm-t.cpp b/test/tap/tests/test_unshun_algorithm-t.cpp index 541af2056..abd17de7b 100644 --- a/test/tap/tests/test_unshun_algorithm-t.cpp +++ b/test/tap/tests/test_unshun_algorithm-t.cpp @@ -267,6 +267,9 @@ int test_unshun_algorithm_behavior(MYSQL* proxysql_mysql, MYSQL* proxysql_admin) // Check that only server from first hostgroup is 'SHUNNED' bool unexp_row_value = server_status_checker(proxysql_admin, "ONLINE", "SHUNNED", i); ok(unexp_row_value == false, "Server from first hg was set 'ONLINE' while others remained 'SHUNNED'"); + if (tests_failed()) { + return exit_status(); + } } } @@ -284,6 +287,9 @@ int test_unshun_algorithm_behavior(MYSQL* proxysql_mysql, MYSQL* proxysql_admin) bool unexp_row_value = server_status_checker(proxysql_admin, "ONLINE", "ONLINE", i); ok(unexp_row_value == false, "Server from first hg was set 'ONLINE' while others remained 'ONLINE'"); diag(""); // empty line + if (tests_failed()) { + return exit_status(); + } } } @@ -310,6 +316,9 @@ int test_unshun_algorithm_behavior(MYSQL* proxysql_mysql, MYSQL* proxysql_admin) bool unexp_row_value = server_status_checker(proxysql_admin, "ONLINE", "SHUNNED", i); ok(unexp_row_value == false, "Server from first hg was set 'ONLINE' while others remained 'SHUNNED'"); diag(""); // empty line + if (tests_failed()) { + return exit_status(); + } } } From c3c1a50d91ce3fa51f94bdb12a1ec63f6d7b4fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Wed, 2 Feb 2022 14:37:16 +0100 Subject: [PATCH 2/7] Add extra logging info for troubleshooting SHUNNING errors --- lib/MySQL_HostGroups_Manager.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index e033d2647..332769046 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -907,10 +907,22 @@ void MySrvC::connect_error(int err_num, bool get_mutex) { default: break; } + +#ifdef DEBUG + if (GloMTH->variables.hostgroup_manager_verbose >= 3) { + proxy_info( + "{ MySrvC: %p, connect_ERR: %d, connect_ERR_at_time_last_detected_error: %d }\n", + this, connect_ERR, connect_ERR_at_time_last_detected_error + ); + } +#endif + time_t t=time(NULL); if (t > time_last_detected_error) { time_last_detected_error=t; connect_ERR_at_time_last_detected_error=1; + proxy_error("Connection error for server %s:%d\n", address, port); + print_backtrace(); } else { if (t < time_last_detected_error) { // time_last_detected_error is in the future @@ -934,6 +946,7 @@ void MySrvC::connect_error(int err_num, bool get_mutex) { MyHGM->wrunlock(); if (_shu) { proxy_error("Shunning server %s:%d with %u errors/sec. Shunning for %u seconds\n", address, port, connect_ERR_at_time_last_detected_error , mysql_thread___shun_recovery_time_sec); + print_backtrace(); } } } @@ -2811,6 +2824,12 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ || (mysrvc->shunned_and_kill_all_connections==true && mysrvc->ConnectionsUsed->conns_length()==0 && mysrvc->ConnectionsFree->conns_length()==0) // if shunned_and_kill_all_connections is set, ensure all connections are already dropped ) { +#ifdef DEBUG + if (GloMTH->variables.hostgroup_manager_verbose >= 3) { + proxy_info("Unshunning server %s:%d.\n", mysrvc->address, mysrvc->port); + print_backtrace(); + } +#endif mysrvc->status=MYSQL_SERVER_STATUS_ONLINE; mysrvc->shunned_automatic=false; mysrvc->shunned_and_kill_all_connections=false; From b24459cd9a89a9796c9cfce1f1ee96ac1c4c12bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Wed, 2 Feb 2022 19:30:48 +0100 Subject: [PATCH 3/7] Remove extra logging info for troubleshooting SHUNNING errors --- lib/MySQL_HostGroups_Manager.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/lib/MySQL_HostGroups_Manager.cpp b/lib/MySQL_HostGroups_Manager.cpp index 332769046..180c7e4dd 100644 --- a/lib/MySQL_HostGroups_Manager.cpp +++ b/lib/MySQL_HostGroups_Manager.cpp @@ -907,22 +907,10 @@ void MySrvC::connect_error(int err_num, bool get_mutex) { default: break; } - -#ifdef DEBUG - if (GloMTH->variables.hostgroup_manager_verbose >= 3) { - proxy_info( - "{ MySrvC: %p, connect_ERR: %d, connect_ERR_at_time_last_detected_error: %d }\n", - this, connect_ERR, connect_ERR_at_time_last_detected_error - ); - } -#endif - time_t t=time(NULL); if (t > time_last_detected_error) { time_last_detected_error=t; connect_ERR_at_time_last_detected_error=1; - proxy_error("Connection error for server %s:%d\n", address, port); - print_backtrace(); } else { if (t < time_last_detected_error) { // time_last_detected_error is in the future @@ -946,7 +934,6 @@ void MySrvC::connect_error(int err_num, bool get_mutex) { MyHGM->wrunlock(); if (_shu) { proxy_error("Shunning server %s:%d with %u errors/sec. Shunning for %u seconds\n", address, port, connect_ERR_at_time_last_detected_error , mysql_thread___shun_recovery_time_sec); - print_backtrace(); } } } @@ -2827,7 +2814,6 @@ MySrvC *MyHGC::get_random_MySrvC(char * gtid_uuid, uint64_t gtid_trxid, int max_ #ifdef DEBUG if (GloMTH->variables.hostgroup_manager_verbose >= 3) { proxy_info("Unshunning server %s:%d.\n", mysrvc->address, mysrvc->port); - print_backtrace(); } #endif mysrvc->status=MYSQL_SERVER_STATUS_ONLINE; From 3a19f717ac4e469cd4f52d1b6619dab9a7a4e50b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Wed, 2 Feb 2022 19:32:57 +0100 Subject: [PATCH 4/7] Fix 'mysql-connect_retries_delay' not being honored --- lib/MySQL_Session.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index 5b0bd9b52..88c550574 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -2698,6 +2698,9 @@ bool MySQL_Session::handler_again___status_CONNECTING_SERVER(int *_rc) { } } + // NOTE-connect_retries_delay: This check alone is not enough for imposing + // 'mysql_thread___connect_retries_delay'. In case of 'async_connect' failing, 'pause_until' should also + // be set to 'mysql_thread___connect_retries_delay'. Complementary NOTE below. if (mybe->server_myds->myconn==NULL) { pause_until=thread->curtime+mysql_thread___connect_retries_delay*1000; *_rc=1; @@ -2768,6 +2771,14 @@ bool MySQL_Session::handler_again___status_CONNECTING_SERVER(int *_rc) { PROXY_TRACE(); } myds->destroy_MySQL_Connection_From_Pool(false); + // NOTE-connect_retries_delay: In case of failure to connect, if + // 'mysql_thread___connect_retries_delay' is set, we impose a delay in the session + // processing via 'pause_until'. Complementary NOTE above. + if (mysql_thread___connect_retries_delay) { + pause_until=thread->curtime+mysql_thread___connect_retries_delay*1000; + set_status(CONNECTING_SERVER); + return false; + } NEXT_IMMEDIATE_NEW(CONNECTING_SERVER); } else { __exit_handler_again___status_CONNECTING_SERVER_with_err: From cfdb9caf1675a90c4dcf06be781d36c1540c7979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Thu, 3 Feb 2022 10:45:23 +0100 Subject: [PATCH 5/7] Fix 'test_unshun_algorithm' design and improve doc --- test/tap/tests/test_unshun_algorithm-t.cpp | 87 +++++++++++++++++++--- 1 file changed, 78 insertions(+), 9 deletions(-) diff --git a/test/tap/tests/test_unshun_algorithm-t.cpp b/test/tap/tests/test_unshun_algorithm-t.cpp index abd17de7b..dc11ce637 100644 --- a/test/tap/tests/test_unshun_algorithm-t.cpp +++ b/test/tap/tests/test_unshun_algorithm-t.cpp @@ -14,6 +14,47 @@ * 3. All the servers are 'SHUNNED' using 'PROXYSQL_SIMULATOR' command. * 4. Each of the servers placed in the individual hostgroups is 'UNSHUNNED', checking the proper behavior * related to the other servers depending on 'mysql-unshun_algorithm'. + * + * CI-FAILURES-NOTES: + * This test received some changes after being detected that was provoking some failures in the CI. + * The circumstances of these failures are summarized here together with the issue resolution as a reminder + * of the design. + * + * PREVIOUS-DESIGN: + * In the previous design the test set the following variables: + * + * ``` + * "SET mysql-shun_on_failures=5" + * "SET mysql-connect_retries_on_failure=0" + * ``` + * + * These values were chosen to avoid connection retrying when the connection error takes place. + * For making sure that the server was properly UNSHUNNED, two queries were issued, with a two second delay + * between them. The second query should only produce one error, thus not going over the threshold of at + * least '2' errors within the same second to set the server as SHUNNED again. + * + * FLAW: + * The assumption that the second query cannot produce a second error is FALSE. The second query can produce + * two errors under certain timing conditions. If the call to 'mysql_real_connect_start' doesn't immediately + * return, because the underlying socket haven't been yet signaled, then 'MySQL_Session::handler' will + * return, this will result into: + * + * 1. 'MySQL_Data_Stream' receiving the socket event in next poll, the event will be processed, but because the + * server isn't present a error will be generated. + * 2. The session will later be processed, and due to the server still not being present, a second error will + * be generated, resulting into the server being SHUNNED. + * + * SOLUTION: + * Test now uses the following values for the variables: + * + * ``` + * "SET mysql-shun_on_failures=3" + * "SET mysql-connect_retries_on_failure=3" + * "SET mysql-connect_retries_delay=1000" + * ``` + * + * This will avoid any accidental SHUNNING due to the previously described situation, and due to + * 'connect_retries_delay' will prevent more than one connection attempt per second. */ #include @@ -58,14 +99,19 @@ int shunn_all_servers(MYSQL* proxysql_admin) { return EXIT_SUCCESS; } +/** + * @brief Issues a simple 'DO 1' query to the target hostgroup with the intention of UNSHUNNING the server + * present in that hostgroup. + * @param proxysql_mysql An already opened connection to ProxySQL. + * @param i The hostgroup to which the query should be issued. + * @return Since query errors are ignored, because the query is supposed to fail, EXIT_SUCCESS is always + * returned. + */ int wakup_target_server(MYSQL* proxysql_mysql, uint32_t i) { std::string t_simple_do_query { "DO /* ;hostgroup=%d */ 1" }; std::string simple_do_query {}; string_format(t_simple_do_query, simple_do_query, i); - mysql_query(proxysql_mysql, simple_do_query.c_str()); - diag("%s: running query: %s", tap_curtime().c_str(), simple_do_query.c_str()); - sleep(SHUN_RECOVERY_TIME * 2); mysql_query(proxysql_mysql, simple_do_query.c_str()); diag("%s: running query: %s", tap_curtime().c_str(), simple_do_query.c_str()); @@ -217,18 +263,41 @@ int test_proxysql_simulator_error(MYSQL* proxysql_admin) { return EXIT_SUCCESS; } +/** + * @brief Configures the relevant 'mysql' variables for achieving the test desired UNSHUNNING behavior. + * @details The values set for these variables prevent the target server to be SHUNNED after being UNSHUNNED + * by the query issued by 'wakup_target_server', in case the query produces two simultaneous errors instead + * of just one. Why this is a possibility is described on the details sections in the file DOC. + * @param proxysql_admin An already opened connection to ProxySQL Admin. + * @return EXIT_SUCCESS in case of success, EXIT_FAILURE otherwise. + */ +int configure_mysql_shunning_variables(MYSQL* proxysql_admin) { + MYSQL_QUERY(proxysql_admin, "SET mysql-shun_on_failures=3"); + diag("%s: Line:%d running admin query: SET mysql-shun_on_failures=3", tap_curtime().c_str(), __LINE__); + + MYSQL_QUERY(proxysql_admin, "SET mysql-connect_retries_on_failure=3"); + diag("%s: Line:%d running admin query: SET mysql-connect_retries_on_failure=3", tap_curtime().c_str(), __LINE__); + + MYSQL_QUERY(proxysql_admin, "SET mysql-connect_retries_delay=1000"); + diag("%s: Line:%d running admin query: SET mysql-connect_retries_delay=1000", tap_curtime().c_str(), __LINE__); + + return EXIT_SUCCESS; +} + int test_unshun_algorithm_behavior(MYSQL* proxysql_mysql, MYSQL* proxysql_admin) { // Configure Admin variables with lower thresholds MYSQL_QUERY(proxysql_admin, "SET mysql-shun_recovery_time_sec=1"); diag("%s: Line:%d running admin query: SET mysql-shun_recovery_time_sec=1", tap_curtime().c_str(), __LINE__); + + // Set verbosity up for extra information in ProxySQL log MYSQL_QUERY(proxysql_admin, "SET mysql-hostgroup_manager_verbose=3"); diag("%s: Line:%d running admin query: SET mysql-hostgroup_manager_verbose=3", tap_curtime().c_str(), __LINE__); - // NOTE: The following varible value is set here just as a reminder. This change isn't properly propagated - // to the 'error setting operation' since this is performed from 'ProxySQL_Admin' thread when - // 'PROXYSQL_SIMULATOR' command is received. Because of this, it's in 'PROXYSQL_SIMULATOR' command impl in - // 'ProxySQL_Admin' where this variable value is updated before setting the error. - MYSQL_QUERY(proxysql_admin, "SET mysql-shun_on_failures=5"); - MYSQL_QUERY(proxysql_admin, "SET mysql-connect_retries_on_failure=0"); + + // Configure the relevant variables for the desired UNSHUNNING behavior + if (configure_mysql_shunning_variables(proxysql_admin)) { + return EXIT_FAILURE; + } + MYSQL_QUERY(proxysql_admin, "LOAD MYSQL VARIABLES TO RUNTIME"); // Cleanup the servers and create a good number of hostgroups MYSQL_QUERY(proxysql_admin, "DELETE FROM mysql_servers"); From f2a6bcb646872936d43043e0555f626d1b2590b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Thu, 3 Feb 2022 10:46:09 +0100 Subject: [PATCH 6/7] Add new test for 'mysql-connect_retries_delay' --- .../test_mysql_connect_retries_delay-t.cpp | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 test/tap/tests/test_mysql_connect_retries_delay-t.cpp diff --git a/test/tap/tests/test_mysql_connect_retries_delay-t.cpp b/test/tap/tests/test_mysql_connect_retries_delay-t.cpp new file mode 100644 index 000000000..e08596bb9 --- /dev/null +++ b/test/tap/tests/test_mysql_connect_retries_delay-t.cpp @@ -0,0 +1,118 @@ +/** + * @file test_mysql_connect_retries_delay-t.cpp + * @brief This test verifies the behavior for 'mysql-connect_retries_delay'. + * @details For doing this check, the test performs the following actions: + * 1. Disable monitoring and configure ProxySQL with a non-existing server in it's own hostgroup. + * 2. Configure 'mysql-connect_retries_delay'. + * 3. Create a connection against ProxySQL and issue a query against the non-existing server. + * 4. Check that the execution time of the query till the error matches 'mysql-connect_retries_delay'. + * 5. Repeat the previous 3 points for several values. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "tap.h" +#include "command_line.h" +#include "utils.h" +#include "proxysql_utils.h" + +using std::string; +using std::vector; + +typedef std::chrono::high_resolution_clock hrc; + +const vector test_retry_delays { 1000, 2000, 3000 }; +const double DUR_EPSILON = 1; + +int main() { + CommandLine cl; + + plan(test_retry_delays.size()); + + if (cl.getEnv()) { + diag("Failed to get the required environmental variables."); + return EXIT_FAILURE; + } + + MYSQL* proxysql_admin = mysql_init(NULL); + MYSQL* proxysql = mysql_init(NULL); + + // Initialize connections + if (!proxysql_admin) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_admin)); + return EXIT_FAILURE; + } + if (!proxysql) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql)); + return EXIT_FAILURE; + } + + if (!mysql_real_connect(proxysql_admin, cl.host, cl.admin_username, cl.admin_password, NULL, cl.admin_port, NULL, 0)) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_admin)); + return EXIT_FAILURE; + } + if (!mysql_real_connect(proxysql, cl.host, cl.username, cl.password, NULL, cl.port, NULL, 0)) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql)); + return EXIT_FAILURE; + } + + // Global config + MYSQL_QUERY(proxysql_admin, "SET mysql-monitor_enabled=0"); + MYSQL_QUERY(proxysql_admin, "SET mysql-poll_timeout=100"); + MYSQL_QUERY(proxysql_admin, "SET mysql-connect_retries_on_failure=3"); + MYSQL_QUERY(proxysql_admin, "SET mysql-connect_timeout_server_max=20000"); + + // Cleanup servers and configure just a non-existing server + MYSQL_QUERY(proxysql_admin, "DELETE FROM mysql_servers"); + MYSQL_QUERY(proxysql_admin, "INSERT INTO mysql_servers (hostgroup_id,hostname,port,max_replication_lag,max_connections,comment) VALUES (10,'127.0.0.1',13330,180,500,'mysql_not_here')"); + + // Load the new config + MYSQL_QUERY(proxysql_admin, "LOAD MYSQL VARIABLES TO RUNTIME"); + MYSQL_QUERY(proxysql_admin, "LOAD MYSQL SERVERS TO RUNTIME"); + + for (uint32_t retry_delay : test_retry_delays) { + string connect_retries_query {}; + string_format("SET mysql-connect_retries_delay=%d", connect_retries_query, retry_delay); + + // Load the new connect retry config + MYSQL_QUERY(proxysql_admin, connect_retries_query.c_str()); + MYSQL_QUERY(proxysql_admin, "LOAD MYSQL VARIABLES TO RUNTIME"); + + // Issue a query and check the timing + std::chrono::nanoseconds dur; + hrc::time_point start; + hrc::time_point end; + + start = hrc::now(); + + { + mysql_query(proxysql, "DO /* hostgroup=10 */ 1"); + } + + end = hrc::now(); + dur = end - start; + + double dur_s = dur.count() / pow(10,9); + double exp_dur = retry_delay * 3 / 1000.0; + + ok( + dur_s > (exp_dur - DUR_EPSILON) && dur_s < (exp_dur + DUR_EPSILON), + "Test duration matches the expected duration: { act_dur: %lf, exp_dur: %lf }", dur_s, exp_dur + ); + } + + mysql_close(proxysql); + mysql_close(proxysql_admin); + + return exit_status(); +} From 0b63c74e80a283278c44ba0157924aeeec7b6389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Thu, 3 Feb 2022 12:31:14 +0100 Subject: [PATCH 7/7] Minor typo --- test/tap/tests/test_unshun_algorithm-t.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tap/tests/test_unshun_algorithm-t.cpp b/test/tap/tests/test_unshun_algorithm-t.cpp index dc11ce637..99275f008 100644 --- a/test/tap/tests/test_unshun_algorithm-t.cpp +++ b/test/tap/tests/test_unshun_algorithm-t.cpp @@ -416,7 +416,7 @@ int main(int argc, char** argv) { return EXIT_FAILURE; } if (!mysql_real_connect(proxysql_admin, cl.host, cl.admin_username, cl.admin_password, NULL, cl.admin_port, NULL, 0)) { - fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_mysql)); + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_admin)); return EXIT_FAILURE; }