From 86e245397296025421948cbae465b8e29cbbadaf Mon Sep 17 00:00:00 2001 From: Rahim Kanji Date: Wed, 3 Sep 2025 15:34:06 +0500 Subject: [PATCH 1/3] Updated SET parsing regex pattern to allow optional spaces after commas in values --- lib/PgSQL_Set_Stmt_Parser.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/PgSQL_Set_Stmt_Parser.cpp b/lib/PgSQL_Set_Stmt_Parser.cpp index 159924dc2..3e41bcf41 100644 --- a/lib/PgSQL_Set_Stmt_Parser.cpp +++ b/lib/PgSQL_Set_Stmt_Parser.cpp @@ -69,13 +69,9 @@ void PgSQL_Set_Stmt_Parser::generateRE_parse1v2() { proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); #endif // DEBUG - //const std::string pattern = "(?:(?PSESSION|LOCAL)\\s+)?(?:(?P[^=\\s][^=;]*?)\\s*(?:=|TO)\\s*(?P[^;]+)|(?PTIME\\s+ZONE|NAMES|SCHEMA|AUTHORIZATION|TRANSACTION\\s+ISOLATION\\s+LEVEL|CHARACTERISTICS\\s+AS\\s+TRANSACTION\\s+ISOLATION\\s+LEVEL)\\s+(?P[^;]+))\\s*;?\\s*"; - // Function Call: Check if Group 3 is populated. // Literal: Check if Group 4 is populated. - //const std::string pattern = "(?:(SESSION|LOCAL)\\s+)?((?:\\S+(?:\\s+\\S+)*?))(?:\\s+(?:TO|=)\\s+|\\s+)(?:(\\w+\\s*\\([^)]*\\))|((?:'(?:''|[^'])*'|-?\\d+(?:\\.\\d+)?(?:[eE][+-]?\\d+)?|t|true|f|false|on|off|default|\\S+)))\\s*;?"; - //const std::string pattern = "(?:(SESSION|LOCAL)\\s+)?((?:\\S+(?:\\s+\\S+)*?))(?:\\s*(?:TO|=)\\s*|\\s+)(?:(\\w+\\s*\\([^)]*\\))|((?:'(?:''|[^'])*'|-?\\d+(?:\\.\\d+)?(?:[eE][+-]?\\d+)?|true|t|1|yes|false|f|0|no|on|off|default|\\S+)))\\s*;?"; - const std::string pattern = R"((?:(SESSION)\s+)?((?:\S+(?:\s+\S+)*?))(?:\s*(?:TO|=)\s*|\s+)(?:(\w+\s*\([^)]*\))|((?:'(?:''|[^'])*'|-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|[^;]+)))\s*;?)"; + const std::string pattern = R"((?:(SESSION)\s+)?((?:TIME\s+ZONE|TRANSACTION\s+ISOLATION\s+LEVEL|XML\s+OPTION|(?:(?:[^\s=]{1,4}|[^\s=]{6,}|(?:[^lL][^\s=]{4}|[lL][^oO][^\s=]{3}|[lL][oO][^cC][^\s=]{2}|[lL][oO][cC][^aA][^\s=]|[lL][oO][cC][aA][^lL])))))(?:\s*=\s*|\s+TO\s+|\s+)()('(?:''|[^'])*'|-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|[^;()]+)\s*;?)"; #ifdef DEBUG VALGRIND_DISABLE_ERROR_REPORTING; @@ -91,6 +87,12 @@ VALGRIND_DISABLE_ERROR_REPORTING; parse1v2_pattern = pattern; parse1v2_re = new re2::RE2(parse1v2_pattern, *parse1v2_opt2); + + if (!parse1v2_re->ok()) { + proxy_error("Error in RE2 regex pattern: %s\n", parse1v2_re->error().c_str()); + assert(false); + } + parse1v2_init = true; } From bbbb73f8ee08d728b51b50a87f613cce45dc7bb6 Mon Sep 17 00:00:00 2001 From: Rahim Kanji Date: Thu, 4 Sep 2025 15:28:48 +0500 Subject: [PATCH 2/3] Added TAP test --- test/tap/tests/pgsql-set_statement_test-t.cpp | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 test/tap/tests/pgsql-set_statement_test-t.cpp diff --git a/test/tap/tests/pgsql-set_statement_test-t.cpp b/test/tap/tests/pgsql-set_statement_test-t.cpp new file mode 100644 index 000000000..09e0f916b --- /dev/null +++ b/test/tap/tests/pgsql-set_statement_test-t.cpp @@ -0,0 +1,155 @@ +/** + * @file pgsql-set_statement_test-t.cpp + * @brief Intention: not to test every PostgreSQL variable, but to ensure different forms of SET statements + * are parsed correctly and not wrongly locked on a hostgroup. Covers common syntaxes (`=`, TO, + * multi-word params, aliases) and checks that unsupported forms like LOCAL or function-style values + * trigger the expected hostgroup lock behavior. + */ + +#include +#include +#include +#include +#include +#include "libpq-fe.h" +#include "command_line.h" +#include "tap.h" +#include "utils.h" + +CommandLine cl; + +using PGConnPtr = std::unique_ptr; + +enum ConnType { + ADMIN, + BACKEND +}; + +PGConnPtr createNewConnection(ConnType conn_type, const std::string& options = "", bool with_ssl = false) { + + const char* host = (conn_type == BACKEND) ? cl.pgsql_host : cl.pgsql_admin_host; + int port = (conn_type == BACKEND) ? cl.pgsql_port : cl.pgsql_admin_port; + const char* username = (conn_type == BACKEND) ? cl.pgsql_root_username : cl.admin_username; + const char* password = (conn_type == BACKEND) ? cl.pgsql_root_password : cl.admin_password; + + std::stringstream ss; + + ss << "host=" << host << " port=" << port; + ss << " user=" << username << " password=" << password; + ss << (with_ssl ? " sslmode=require" : " sslmode=disable"); + + if (options.empty() == false) { + ss << " options='" << options << "'"; + } + + PGconn* conn = PQconnectdb(ss.str().c_str()); + if (PQstatus(conn) != CONNECTION_OK) { + fprintf(stderr, "Connection failed to '%s': %s", (conn_type == BACKEND ? "Backend" : "Admin"), PQerrorMessage(conn)); + PQfinish(conn); + return PGConnPtr(nullptr, &PQfinish); + } + return PGConnPtr(conn, &PQfinish); +} + +struct TestCase { + std::string sql; + bool should_not_lock_on_hostgroup; + std::string description; +}; + +std::fstream f_proxysql_log{}; + +bool check_logs_for_command(const std::string& command_regex) { + const auto& [_, cmd_lines] { get_matching_lines(f_proxysql_log, command_regex) }; + return !cmd_lines.empty(); +} + +bool run_set_statement(const std::string& stmt, ConnType type = BACKEND) { + PGConnPtr conn = createNewConnection(type); + if (!conn) return false; + + PGresult* res = PQexec(conn.get(), stmt.c_str()); + if (PQresultStatus(res) != PGRES_COMMAND_OK) { + PQclear(res); + return false; + } + PQclear(res); + + return check_logs_for_command(".*\\[WARNING\\] Unable to parse unknown SET query from client.*") == false; +} + +int main(int argc, char** argv) { + + std::vector tests = { + // Standard param/value + {"SET datestyle = 'ISO, MDY';", true, "datestyle with ="}, + {"SET datestyle TO 'ISO,MDY';", true, "datestyle with TO"}, + {"SET standard_conforming_strings TO on;", true, "boolean ON"}, + {"SET enable_seqscan = off;", true, "boolean OFF"}, + {"SET SESSION datestyle = 'ISO, DMY';", true, "SESSION prefix"}, + + // TIME ZONE + {"SET TIME ZONE 'UTC';", true, "TIME ZONE UTC"}, + {"SET TIME ZONE DEFAULT;", true, "TIME ZONE DEFAULT"}, + {"SET TIME ZONE -7;", true, "TIME ZONE numeric offset"}, + {"SET TIME ZONE INTERVAL '+02:30' HOUR TO MINUTE;", true, "TIME ZONE interval"}, + + // TRANSACTION ISOLATION LEVEL + {"SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;", false, "TX ISOLATION READ UNCOMMITTED"}, + {"SET TRANSACTION ISOLATION LEVEL READ COMMITTED;", false, "TX ISOLATION READ COMMITTED"}, + {"SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;", false, "TX ISOLATION REPEATABLE READ"}, + {"SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;", false, "TX ISOLATION SERIALIZABLE"}, + + // XML OPTION + {"SET XML OPTION DOCUMENT;", false, "XML OPTION DOCUMENT"}, + {"SET XML OPTION CONTENT;", false, "XML OPTION CONTENT"}, + + // SESSION AUTHORIZATION + {"SET SESSION AUTHORIZATION DEFAULT;", false, "SESSION AUTHORIZATION DEFAULT"}, + + // ROLE + {"SET ROLE NONE;", false, "ROLE NONE"}, + + // SCHEMA + {"SET SCHEMA 'pg_catalog';", false, "SCHEMA valid"}, + + // NAMES + {"SET NAMES SQL_ASCII;", true, "NAMES SQL_ASCII"}, + {"SET NAMES UTF8;", true, "NAMES UTF8"}, + + // SEED + {"SET SEED 0.5;", false, "SEED 0.5"}, + {"SET SEED 0;", false, "SEED 0"}, + {"SET SEED 1;", false, "SEED 1"}, + {"SET SEED 1.5;", false, "SEED out of range"}, + + // Failure cases + {"SET ALL TO DEFAULT;", false, "ALL should fail"}, + {"SET LOCAL datestyle TO 'ISO,MDY';", false, "LOCAL should fail"}, + {"SET search_path TO current_schemas(true);", false, "function value should fail"}, + {"SET datestyle = ;", false, "missing value"} + }; + + plan(tests.size()); + + if (cl.getEnv()) + return exit_status(); + + std::string f_path{ get_env("REGULAR_INFRA_DATADIR") + "/proxysql.log" }; + + int of_err = open_file_and_seek_end(f_path, f_proxysql_log); + if (of_err != EXIT_SUCCESS) { + return exit_status(); + } + + for (const auto& t : tests) { + f_proxysql_log.clear(f_proxysql_log.rdstate() & ~std::ios_base::failbit); + f_proxysql_log.seekg(f_proxysql_log.tellg()); + bool result = run_set_statement(t.sql); + ok(result == t.should_not_lock_on_hostgroup, "%s", t.description.c_str()); + usleep(10000); + } + + f_proxysql_log.close(); + return exit_status(); +} From e2915738105145a482b165fe7f20d60b5d76a264 Mon Sep 17 00:00:00 2001 From: Rahim Kanji Date: Thu, 4 Sep 2025 18:12:59 +0500 Subject: [PATCH 3/3] Improved regex pattern --- lib/PgSQL_Set_Stmt_Parser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/PgSQL_Set_Stmt_Parser.cpp b/lib/PgSQL_Set_Stmt_Parser.cpp index 3e41bcf41..a81d44f78 100644 --- a/lib/PgSQL_Set_Stmt_Parser.cpp +++ b/lib/PgSQL_Set_Stmt_Parser.cpp @@ -71,7 +71,7 @@ void PgSQL_Set_Stmt_Parser::generateRE_parse1v2() { // Function Call: Check if Group 3 is populated. // Literal: Check if Group 4 is populated. - const std::string pattern = R"((?:(SESSION)\s+)?((?:TIME\s+ZONE|TRANSACTION\s+ISOLATION\s+LEVEL|XML\s+OPTION|(?:(?:[^\s=]{1,4}|[^\s=]{6,}|(?:[^lL][^\s=]{4}|[lL][^oO][^\s=]{3}|[lL][oO][^cC][^\s=]{2}|[lL][oO][cC][^aA][^\s=]|[lL][oO][cC][aA][^lL])))))(?:\s*=\s*|\s+TO\s+|\s+)()('(?:''|[^'])*'|-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|[^;()]+)\s*;?)"; + const std::string pattern = R"((?:(SESSION)\s+)?((?:TIME\s+ZONE|TRANSACTION\s+ISOLATION\s+LEVEL|XML\s+OPTION|(?:(?:[^\s=]{1,4}|[^\s=]{6,}|(?:[^lL][^\s=]{4}|[lL][^oO][^\s=]{3}|[lL][oO][^cC][^\s=]{2}|[lL][oO][cC][^aA][^\s=]|[lL][oO][cC][aA][^lL])))))(?:\s*=\s*|\s+TO\s+|\s+)(?:([A-Za-z_][\w$\.]*)\s*\(\s*('(?:''|[^'])*'|-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|[^();]+?)\s*\)|('(?:''|[^'])*'|-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|[^;()]+))\s*;?)"; #ifdef DEBUG VALGRIND_DISABLE_ERROR_REPORTING; @@ -114,13 +114,13 @@ std::map> PgSQL_Set_Stmt_Parser::parse1v2() VALGRIND_ENABLE_ERROR_REPORTING; #endif // DEBUG std::string var; - std::string scope, param_name, param_val, param_val_func; + std::string scope, param_name, param_val_func, param_val_func_args, param_val; re2::StringPiece input(query); - while (re2::RE2::Consume(&input, *parse1v2_re, &scope, ¶m_name, ¶m_val_func, ¶m_val)) { + while (re2::RE2::Consume(&input, *parse1v2_re, &scope, ¶m_name, ¶m_val_func, ¶m_val_func_args, ¶m_val)) { // FIXME: verify if we reached end of query. Did we parse everything? std::vector op; #ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: scope='%s' , parameter name='%s' , parameter value='%s' parameter_value_func='%s'\n", scope.c_str(), param_name.c_str(), param_val.c_str(), param_val_func.c_str()); + proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: scope='%s', parameter name='%s' , parameter value='%s' , parameter_value_func='%s' , parameter_value_func_args='%s'\n", scope.c_str(), param_name.c_str(), param_val.c_str(), param_val_func.c_str(), param_val_func_args.c_str()); #endif // DEBUG std::string key;