From d7c4a1a94c626f40f483bcf0e50535bafbc4e3ca Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Sun, 12 Apr 2026 17:08:33 +0000 Subject: [PATCH] fix(tests): resolve flaky failures in pgsql-copy_from_stdin_session_parameter-t and test_dns_cache-t 1. pgsql-copy_from_stdin_session_parameter-t: race condition in log file reading The test's check_logs_for_command() performed a single stream-based read via get_matching_lines() to find the "Switching back to Normal mode" log line. When ProxySQL hadn't flushed the log entry yet (observed window as tight as ~23 microseconds between PQgetResult returning and the check), the read would miss the line, causing a false test failure. The log line was always present in the final file, confirming this is a timing issue. Fix: Replace the single-shot read with a retry loop (up to 10 attempts, 10ms each = 100ms total). If all stream-based retries fail, fall back to get_matching_lines_from_filename() which opens a fresh file handle, avoiding any stream position issues entirely. Add a global g_proxysql_log_path to support the filename-based fallback. 2. test_dns_cache-t: spurious dns_cache_queried counter increment in Step 9 Step 9 disables the DNS cache refresh interval (set to 0) and expects dns_cache_queried to remain unchanged (equal_to). However, INVALID_DOMAIN was still in mysql_servers from Step 8, so when "DO 1" queries were routed through hostgroup 999, the connection routing layer called MySQL_Monitor::dns_lookup() -> DNS_Cache::lookup(), incrementing the counter by exactly 1 regardless of the refresh interval setting. Fix: Before the measurement window in Step 9, replace INVALID_DOMAIN with 0.0.0.0 in mysql_servers. Since 0.0.0.0 is a valid IP address, dns_lookup() returns immediately without consulting the DNS cache (early return at MySQL_Monitor.cpp:6675), so the counter stays unchanged as expected. Both fixes verified: each test passes in isolation against legacy infra. --- ...ql-copy_from_stdin_session_parameter-t.cpp | 21 +++++++++++++++++-- test/tap/tests/test_dns_cache-t.cpp | 5 +++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/test/tap/tests/pgsql-copy_from_stdin_session_parameter-t.cpp b/test/tap/tests/pgsql-copy_from_stdin_session_parameter-t.cpp index 2d5a54e6a..3b5ca7c54 100644 --- a/test/tap/tests/pgsql-copy_from_stdin_session_parameter-t.cpp +++ b/test/tap/tests/pgsql-copy_from_stdin_session_parameter-t.cpp @@ -16,6 +16,7 @@ CommandLine cl; constexpr unsigned int MAX_ITERATION = 10; +std::string g_proxysql_log_path {}; using PGConnPtr = std::unique_ptr; @@ -103,8 +104,23 @@ bool sendCopyData(PGconn* conn, const char* data, int size, bool last) { } bool check_logs_for_command(std::fstream& f_proxysql_log, const std::string& command_regex) { - const auto& [_, cmd_lines] { get_matching_lines(f_proxysql_log, command_regex) }; - return cmd_lines.empty() ? false : true; + // Retry with small delays to handle the race between ProxySQL writing + // the log line and the test reading it. The stream-based approach + // (get_matching_lines) can miss a line that hasn't been flushed yet. + constexpr int max_retries = 10; + constexpr useconds_t retry_delay_us = 10000; // 10ms + for (int i = 0; i < max_retries; i++) { + // First try the stream-based check (fast path, no filesystem hit) + const auto& [_, cmd_lines] { get_matching_lines(f_proxysql_log, command_regex) }; + if (!cmd_lines.empty()) { + return true; + } + // Stream didn't find it yet — wait and retry from the file + usleep(retry_delay_us); + } + // Final attempt: read directly from file to avoid any stream position issues + const auto& [_, cmd_lines] { get_matching_lines_from_filename(g_proxysql_log_path, command_regex, false, 0) }; + return !cmd_lines.empty(); } bool setupTestTable(PGconn* conn) { @@ -262,6 +278,7 @@ int main(int argc, char** argv) { return exit_status(); std::string f_path{ get_env("REGULAR_INFRA_DATADIR") + "/proxysql.log" }; + g_proxysql_log_path = f_path; std::fstream f_proxysql_log{}; int of_err = open_file_and_seek_end(f_path, f_proxysql_log); diff --git a/test/tap/tests/test_dns_cache-t.cpp b/test/tap/tests/test_dns_cache-t.cpp index 9ea4d9de9..c3e178a56 100644 --- a/test/tap/tests/test_dns_cache-t.cpp +++ b/test/tap/tests/test_dns_cache-t.cpp @@ -260,6 +260,11 @@ int main(int argc, char** argv) { //disable dns cache EXECUTE_QUERY("SET mysql-monitor_local_dns_cache_refresh_interval=0", proxysql_admin, false), EXECUTE_QUERY("LOAD MYSQL VARIABLES TO RUNTIME", proxysql_admin, false), + // Replace INVALID_DOMAIN with an IP address so that routing + // lookups don't trigger DNS queries during the measurement window + EXECUTE_QUERY("DELETE FROM mysql_servers WHERE hostname='INVALID_DOMAIN'", proxysql_admin, false), + EXECUTE_QUERY("INSERT INTO mysql_servers (hostgroup_id,hostname,port,max_replication_lag,max_connections,comment) VALUES (999,'0.0.0.0',7861,0,1000,'dummy mysql server')", proxysql_admin, false), + EXECUTE_QUERY("LOAD MYSQL SERVERS TO RUNTIME", proxysql_admin, false), DELAY_SEC(2), UPDATE_PREV_METRICS(proxysql_admin), LOOP_FUNC(EXECUTE_QUERY("DO 1", proxysql, true), 2),