From 178f679fad6db3c8834af334969bce21f38a0156 Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Mon, 23 Feb 2026 00:25:16 +0000 Subject: [PATCH] fix: Handle optimizer hints /*+ */ correctly in query tokenizers The query tokenizers for both MySQL and PostgreSQL did not correctly handle optimizer hint comments in the format /*+ ... */. When parsing queries like `/*+ hostgroup=1000 */ SELECT 1`, the '+' character was incorrectly included in the extracted first comment content, resulting in the parsed key being '+hostgroup' instead of 'hostgroup'. This caused the query_processor_first_comment_parsing variable (modes 1 and 3) to not work correctly when using optimizer hint syntax. Changes: - c_tokenizer.cpp: Detect both /*! and /*+ comment formats - pgsql_tokenizer.cpp: Detect /*+ comment format - issue5384-t.cpp: Re-enable tests 2 and 3 (previously skipped) - pgsql-issue5384-t.cpp: Re-enable tests 2 and 3, add hostgroup 1000 setup Fixes #5413 (MySQL tokenizer) Fixes #5414 (PostgreSQL tokenizer) --- lib/c_tokenizer.cpp | 7 +- lib/pgsql_tokenizer.cpp | 14 +++- test/tap/tests/issue5384-t.cpp | 118 +++++++++++++++++---------- test/tap/tests/pgsql-issue5384-t.cpp | 64 +++++++++++++-- 4 files changed, 147 insertions(+), 56 deletions(-) diff --git a/lib/c_tokenizer.cpp b/lib/c_tokenizer.cpp index 9e696406d..da7cb11b5 100644 --- a/lib/c_tokenizer.cpp +++ b/lib/c_tokenizer.cpp @@ -595,13 +595,14 @@ enum p_st process_cmnt_type_1(const options* opts, shared_st* shared_st, cmnt_ty enum p_st next_st = st_cmnt_type_1; const char* res_final_pos = shared_st->res_init_pos + shared_st->d_max_len; - // initial mark "/*|/*!" detection + // initial mark "/*|/*!|/*+" detection // comments are not copied by while processed, boundary checks should rely on 'q_cur_pos' and 'q_len'. if (shared_st->q_cur_pos <= (shared_st->q_len-2) && *shared_st->q == '/' && *(shared_st->q+1) == '*') { c_t_1_st->cur_cmd_cmnt_len = 0; // check length before accessing beyond 'q_cur_pos + 1' - if (shared_st->q_cur_pos != (shared_st->q_len-2) && *(shared_st->q+2) == '!') { + // Handle both /*! (MySQL version hints) and /*+ (optimizer hints) + if (shared_st->q_cur_pos != (shared_st->q_len-2) && (*(shared_st->q+2) == '!' || *(shared_st->q+2) == '+')) { c_t_1_st->is_cmd = 1; } @@ -613,7 +614,7 @@ enum p_st process_cmnt_type_1(const options* opts, shared_st* shared_st, cmnt_ty c_t_1_st->cur_cmd_cmnt_len++; } - // discard processed "/*" or "/*!" + // discard processed "/*" or "/*!" or "/*+" shared_st->q += 2 + c_t_1_st->is_cmd; shared_st->q_cur_pos += 2 + c_t_1_st->is_cmd; diff --git a/lib/pgsql_tokenizer.cpp b/lib/pgsql_tokenizer.cpp index 13cb201e2..d58292016 100644 --- a/lib/pgsql_tokenizer.cpp +++ b/lib/pgsql_tokenizer.cpp @@ -652,7 +652,7 @@ enum p_st process_cmnt_type_1(const options* opts, shared_st* shared_st, cmnt_ty // comments are not copied by while processed, boundary checks should rely on 'q_cur_pos' and 'q_len'. if (shared_st->q_cur_pos <= (shared_st->q_len-2) && *shared_st->q == '/' && *(shared_st->q+1) == '*') { - if (c_t_1_st->nest_level == 0) + if (c_t_1_st->nest_level == 0) c_t_1_st->cur_cmd_cmnt_len = 0; // Increment nesting level /* @@ -677,9 +677,15 @@ enum p_st process_cmnt_type_1(const options* opts, shared_st* shared_st, cmnt_ty c_t_1_st->fst_cmnt_len += 2; } - // discard processed "/*" - shared_st->q += 2; - shared_st->q_cur_pos += 2; + // check for optimizer hint marker /*+ + int is_optimizer_hint = 0; + if (shared_st->q_cur_pos <= (shared_st->q_len-3) && *(shared_st->q+2) == '+') { + is_optimizer_hint = 1; + } + + // discard processed "/*" or "/*+" + shared_st->q += 2 + is_optimizer_hint; + shared_st->q_cur_pos += 2 + is_optimizer_hint; // v1_crashing_payload_04 if (shared_st->q_cur_pos >= shared_st->q_len - 1) { diff --git a/test/tap/tests/issue5384-t.cpp b/test/tap/tests/issue5384-t.cpp index 7d381025d..5e163766f 100644 --- a/test/tap/tests/issue5384-t.cpp +++ b/test/tap/tests/issue5384-t.cpp @@ -22,6 +22,16 @@ int main(int, char**) { plan(3); + diag("=== MySQL query_processor_first_comment_parsing Test ==="); + diag("This test verifies the mysql-query_processor_first_comment_parsing variable"); + diag("which controls when comment hints (like /*+ hostgroup=N */) are parsed:"); + diag(" - Mode 1: Parse comments BEFORE query rules are applied"); + diag(" - Mode 2: Parse comments AFTER query rules are applied (default)"); + diag(" - Mode 3: Parse comments both BEFORE and AFTER rules"); + diag("The test uses a query rule that strips comments, then verifies that"); + diag("the hostgroup hint is still correctly parsed when using mode 1 or 3."); + diag("======================================================="); + MYSQL* admin = init_mysql_conn(cl.host, cl.admin_port, cl.admin_username, cl.admin_password); if (!admin) { return exit_status(); @@ -99,52 +109,78 @@ int main(int, char**) { } diag(" ========== Test 2: New behavior (parsed before rules) =========="); - // TODO: This test currently fails - mysql-query_processor_first_comment_parsing=1 - // should parse comments BEFORE rules are applied, but the feature appears - // to not be working correctly. Skipping until the issue is resolved. - skip(1, "mysql-query_processor_first_comment_parsing=1 feature not working correctly"); - - /* - const char *q_set_var2 = "SET mysql-query_processor_first_comment_parsing = 1"; - diag("Running on Admin: %s", q_set_var2); - MYSQL_QUERY_T(admin, q_set_var2); - - diag("Running on Admin: %s", q_load_vars); - MYSQL_QUERY_T(admin, q_load_vars); - - diag("Running on Admin: %s", q_truncate); - MYSQL_QUERY_T(admin, q_truncate); - - diag("Running on Proxy: %s", query); - MYSQL_QUERY_T(proxy, query); - proxy_res = mysql_store_result(proxy); - if (proxy_res) mysql_free_result(proxy_res); - - diag("Running on Admin: %s", q_stats); - if (mysql_query_t(admin, q_stats)) { - fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(admin)); - return exit_status(); - } - res = mysql_store_result(admin); - if (res) { - MYSQL_ROW row = mysql_fetch_row(res); - if (row) { - int hg = atoi(row[0]); - ok(hg == 1000, "Comment SHOULD have been parsed BEFORE it was stripped by rule. hg=%d", hg); + { + const char *q_set_var2 = "SET mysql-query_processor_first_comment_parsing = 1"; + diag("Running on Admin: %s", q_set_var2); + MYSQL_QUERY_T(admin, q_set_var2); + + diag("Running on Admin: %s", q_load_vars); + MYSQL_QUERY_T(admin, q_load_vars); + + diag("Running on Admin: %s", q_truncate); + MYSQL_QUERY_T(admin, q_truncate); + + diag("Running on Proxy: %s", query); + MYSQL_QUERY_T(proxy, query); + proxy_res = mysql_store_result(proxy); + if (proxy_res) mysql_free_result(proxy_res); + + diag("Running on Admin: %s", q_stats); + if (mysql_query_t(admin, q_stats)) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(admin)); + return exit_status(); + } + res = mysql_store_result(admin); + if (res) { + MYSQL_ROW row = mysql_fetch_row(res); + if (row) { + int hg = atoi(row[0]); + ok(hg == 1000, "Comment SHOULD have been parsed BEFORE it was stripped by rule. hg=%d", hg); + } else { + ok(0, "Failed to find query in stats (Test 2)"); + } + mysql_free_result(res); } else { - ok(0, "Failed to find query in stats (Test 2)"); + ok(0, "mysql_store_result returned NULL (Test 2)"); } - mysql_free_result(res); - } else { - ok(0, "mysql_store_result returned NULL (Test 2)"); } - */ diag(" ========== Test 3: Both passes (mode 3) =========="); - // TODO: This test currently fails - mysql-query_processor_first_comment_parsing=3 - // should parse comments in both passes (before and after rules), but the feature - // appears to not be working correctly. Skipping until the issue is resolved. - skip(1, "mysql-query_processor_first_comment_parsing=3 feature not working correctly"); + { + const char *q_set_var3 = "SET mysql-query_processor_first_comment_parsing = 3"; + diag("Running on Admin: %s", q_set_var3); + MYSQL_QUERY_T(admin, q_set_var3); + + diag("Running on Admin: %s", q_load_vars); + MYSQL_QUERY_T(admin, q_load_vars); + + diag("Running on Admin: %s", q_truncate); + MYSQL_QUERY_T(admin, q_truncate); + + diag("Running on Proxy: %s", query); + MYSQL_QUERY_T(proxy, query); + proxy_res = mysql_store_result(proxy); + if (proxy_res) mysql_free_result(proxy_res); + + diag("Running on Admin: %s", q_stats); + if (mysql_query_t(admin, q_stats)) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(admin)); + return exit_status(); + } + res = mysql_store_result(admin); + if (res) { + MYSQL_ROW row = mysql_fetch_row(res); + if (row) { + int hg = atoi(row[0]); + ok(hg == 1000, "Comment SHOULD have been parsed (mode 3 parses before rules). hg=%d", hg); + } else { + ok(0, "Failed to find query in stats (Test 3)"); + } + mysql_free_result(res); + } else { + ok(0, "mysql_store_result returned NULL (Test 3)"); + } + } // Teardown: restore defaults diag("Teardown: restoring defaults"); diff --git a/test/tap/tests/pgsql-issue5384-t.cpp b/test/tap/tests/pgsql-issue5384-t.cpp index f87422a15..df2e65c48 100644 --- a/test/tap/tests/pgsql-issue5384-t.cpp +++ b/test/tap/tests/pgsql-issue5384-t.cpp @@ -82,6 +82,18 @@ int main(int argc, char** argv) { plan(3); + diag("=== PostgreSQL query_processor_first_comment_parsing Test ==="); + diag("This test verifies the pgsql-query_processor_first_comment_parsing variable"); + diag("which controls when comment hints (like /*+ hostgroup=N */) are parsed:"); + diag(" - Mode 1: Parse comments BEFORE query rules are applied"); + diag(" - Mode 2: Parse comments AFTER query rules are applied (default)"); + diag(" - Mode 3: Parse comments both BEFORE and AFTER rules"); + diag("Note: PostgreSQL doesn't have native optimizer hints like MySQL's /*+ */,"); + diag("but ProxySQL supports the same hint syntax for consistency across protocols."); + diag("The test uses a query rule that strips comments, then verifies that"); + diag("the hostgroup hint is still correctly parsed when using mode 1 or 3."); + diag("============================================================="); + PGConnPtr admin = createNewConnection(ADMIN); if (!admin) { BAIL_OUT("Failed to connect to admin interface"); @@ -94,6 +106,16 @@ int main(int argc, char** argv) { return exit_status(); } + // Setup: Create hostgroup 1000 with a backend server for testing comment routing + diag("Setup: Creating hostgroup 1000 with backend server"); + { + std::stringstream ss; + ss << "INSERT OR REPLACE INTO pgsql_servers (hostgroup_id, hostname, port) VALUES (1000, '" + << cl.pgsql_server_host << "', " << cl.pgsql_server_port << ")"; + PQclear(PQexec(admin.get(), ss.str().c_str())); + PQclear(PQexec(admin.get(), "LOAD PGSQL SERVERS TO RUNTIME")); + } + diag(" ========== Test 1: Default behavior (parsed after rules) =========="); PQclear(PQexec(admin.get(), "DELETE FROM pgsql_query_rules")); @@ -119,16 +141,40 @@ int main(int argc, char** argv) { } diag(" ========== Test 2: New behavior (parsed before rules) =========="); - // TODO: This test currently fails - pgsql-query_processor_first_comment_parsing=1 - // should parse comments BEFORE rules are applied, but the feature appears - // to not be working correctly. Skipping until the issue is resolved. - skip(1, "pgsql-query_processor_first_comment_parsing=1 feature not working correctly"); + { + PQclear(PQexec(admin.get(), "SET pgsql-query_processor_first_comment_parsing = 1")); + PQclear(PQexec(admin.get(), "LOAD PGSQL VARIABLES TO RUNTIME")); + + PQclear(PQexec(admin.get(), "TRUNCATE stats_pgsql_query_digest")); + + diag("Running on Proxy: %s", query); + PQclear(PQexec(proxy.get(), query)); + + int hg = get_query_hg_from_stats(admin.get(), "SELECT ?"); + if (hg != -1) { + ok(hg == 1000, "Comment SHOULD have been parsed BEFORE it was stripped by rule. hg=%d", hg); + } else { + ok(0, "Failed to find query in stats (Test 2)"); + } + } diag(" ========== Test 3: Both passes (mode 3) =========="); - // TODO: This test currently fails - pgsql-query_processor_first_comment_parsing=3 - // should parse comments in both passes (before and after rules), but the feature - // appears to not be working correctly. Skipping until the issue is resolved. - skip(1, "pgsql-query_processor_first_comment_parsing=3 feature not working correctly"); + { + PQclear(PQexec(admin.get(), "SET pgsql-query_processor_first_comment_parsing = 3")); + PQclear(PQexec(admin.get(), "LOAD PGSQL VARIABLES TO RUNTIME")); + + PQclear(PQexec(admin.get(), "TRUNCATE stats_pgsql_query_digest")); + + diag("Running on Proxy: %s", query); + PQclear(PQexec(proxy.get(), query)); + + int hg = get_query_hg_from_stats(admin.get(), "SELECT ?"); + if (hg != -1) { + ok(hg == 1000, "Comment SHOULD have been parsed (mode 3 parses before rules). hg=%d", hg); + } else { + ok(0, "Failed to find query in stats (Test 3)"); + } + } // Teardown diag("Teardown: restoring defaults"); @@ -136,6 +182,8 @@ int main(int argc, char** argv) { PQclear(PQexec(admin.get(), "LOAD PGSQL QUERY RULES TO RUNTIME")); PQclear(PQexec(admin.get(), "SET pgsql-query_processor_first_comment_parsing = 2")); PQclear(PQexec(admin.get(), "LOAD PGSQL VARIABLES TO RUNTIME")); + PQclear(PQexec(admin.get(), "DELETE FROM pgsql_servers WHERE hostgroup_id=1000")); + PQclear(PQexec(admin.get(), "LOAD PGSQL SERVERS TO RUNTIME")); return exit_status(); }