From 35b2dd593d63eb533b9d3a063b3aadcae1833c14 Mon Sep 17 00:00:00 2001 From: Rahim Kanji Date: Wed, 5 Feb 2025 14:51:14 +0500 Subject: [PATCH] Separate implementation of SET statement parser for MySQL and PostgreSQL --- include/MySQL_Thread.h | 4 +- include/PgSQL_Thread.h | 4 +- include/set_parser.h | 56 ---- lib/Makefile | 5 +- lib/MySQL_Session.cpp | 8 +- lib/MySQL_Thread.cpp | 2 +- lib/PgSQL_Session.cpp | 6 +- lib/PgSQL_Thread.cpp | 2 +- lib/set_parser.cpp | 667 ----------------------------------------- src/main.cpp | 2 +- 10 files changed, 17 insertions(+), 739 deletions(-) delete mode 100644 include/set_parser.h delete mode 100644 lib/set_parser.cpp diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index d41f58c72..7a5b0c0fa 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -14,7 +14,7 @@ #include "prometheus_helpers.h" -#include "set_parser.h" +#include "MySQL_Set_Stmt_Parser.h" /* #define MIN_POLL_LEN 8 @@ -192,7 +192,7 @@ class __attribute__((aligned(64))) MySQL_Thread : public Base_Thread pthread_mutex_t thread_mutex; // if set_parser_algorithm == 2 , a single thr_SetParser is used - SetParser *thr_SetParser; + MySQL_Set_Stmt_Parser* thr_SetParser; MySQL_Thread(); ~MySQL_Thread(); diff --git a/include/PgSQL_Thread.h b/include/PgSQL_Thread.h index 64abef7ab..2d67de5e5 100644 --- a/include/PgSQL_Thread.h +++ b/include/PgSQL_Thread.h @@ -15,7 +15,7 @@ #include "prometheus_helpers.h" -#include "set_parser.h" +#include "PgSQL_Set_Stmt_Parser.h" enum class AUTHENTICATION_METHOD { NO_PASSWORD, @@ -244,7 +244,7 @@ public: pthread_mutex_t thread_mutex; // if set_parser_algorithm == 2 , a single thr_SetParser is used - SetParser* thr_SetParser; + PgSQL_Set_Stmt_Parser *thr_SetParser; /** * @brief Default constructor for the PgSQL_Thread class. diff --git a/include/set_parser.h b/include/set_parser.h deleted file mode 100644 index 68d55bf21..000000000 --- a/include/set_parser.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef __CLASS_SET_PARSER_H -#define __CLASS_SET_PARSER_H -#include -#include -#include - -#include "re2/re2.h" -#include "re2/regexp.h" - -//#define PARSERDEBUG - -template -class SetParser { - private: - // parse1v2 variables used for compile the RE only once - bool parse1v2_init; - re2::RE2::Options * parse1v2_opt2; - re2::RE2 * parse1v2_re; - std::string parse1v2_pattern; - std::string query; -#ifdef PARSERDEBUG - int verbosity; - public: - SetParser(std::string q, int verb = 0); -#else - public: - SetParser(std::string q); -#endif - // set_query() allows to change the query associated to a SetParser. - // This allow to parse multiple queries using just a single SetParser. - // At the moment this makes sense only when using parse1v2() because it - // allows to compile the regular expression only once - void set_query(const std::string& q); - // First implementation of the general parser - // It uses a single complex RE pattern that is hardcoded - std::map> parse1(); - // Second implementation of the general parser . - // It uses a RE pattern that is built at runtime . - // The final pattern used by parse1v2() is a lot longer than the one used by parse1() - // making it very difficult to read, but the code generating it should be clear - std::map> parse1v2(); - void generateRE_parse1v2(); - // First implemenation of the parser for TRANSACTION ISOLATION LEVEL and TRANSACTION READ/WRITE - std::map> parse2(); - std::string parse_character_set(); - std::string parse_USE_query(std::string& errmsg); - std::string remove_comments(const std::string& q); -#ifdef DEBUG - // built-in testing - void test_parse_USE_query(); -#endif // DEBUG - ~SetParser(); -}; - - -#endif /* __CLASS_SET_PARSER_H */ diff --git a/lib/Makefile b/lib/Makefile index 5328a23eb..94fb8ebf7 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -169,7 +169,7 @@ MYCXXFLAGS := $(STDCPP) $(MYCFLAGS) $(PSQLCH) $(ENABLE_EPOLL) default: libproxysql.a .PHONY: default -_OBJ_CXX := ProxySQL_GloVars.oo network.oo debug.oo configfile.oo Query_Cache.oo SpookyV2.oo MySQL_Authentication.oo gen_utils.oo sqlite3db.oo mysql_connection.oo MySQL_HostGroups_Manager.oo mysql_data_stream.oo MySQL_Thread.oo MySQL_Session.oo MySQL_Protocol.oo mysql_backend.oo Query_Processor.oo MySQL_Query_Processor.oo PgSQL_Query_Processor.oo ProxySQL_Admin.oo ProxySQL_Config.oo ProxySQL_Restapi.oo MySQL_Monitor.oo MySQL_Logger.oo thread.oo MySQL_PreparedStatement.oo ProxySQL_Cluster.oo ClickHouse_Authentication.oo ClickHouse_Server.oo ProxySQL_Statistics.oo Chart_bundle_js.oo ProxySQL_HTTP_Server.oo ProxySQL_RESTAPI_Server.oo font-awesome.min.css.oo main-bundle.min.css.oo set_parser.oo MySQL_Variables.oo c_tokenizer.oo proxysql_utils.oo proxysql_coredump.oo proxysql_sslkeylog.oo \ +_OBJ_CXX := ProxySQL_GloVars.oo network.oo debug.oo configfile.oo Query_Cache.oo SpookyV2.oo MySQL_Authentication.oo gen_utils.oo sqlite3db.oo mysql_connection.oo MySQL_HostGroups_Manager.oo mysql_data_stream.oo MySQL_Thread.oo MySQL_Session.oo MySQL_Protocol.oo mysql_backend.oo Query_Processor.oo MySQL_Query_Processor.oo PgSQL_Query_Processor.oo ProxySQL_Admin.oo ProxySQL_Config.oo ProxySQL_Restapi.oo MySQL_Monitor.oo MySQL_Logger.oo thread.oo MySQL_PreparedStatement.oo ProxySQL_Cluster.oo ClickHouse_Authentication.oo ClickHouse_Server.oo ProxySQL_Statistics.oo Chart_bundle_js.oo ProxySQL_HTTP_Server.oo ProxySQL_RESTAPI_Server.oo font-awesome.min.css.oo main-bundle.min.css.oo MySQL_Variables.oo c_tokenizer.oo proxysql_utils.oo proxysql_coredump.oo proxysql_sslkeylog.oo \ sha256crypt.oo \ BaseSrvList.oo BaseHGC.oo Base_HostGroups_Manager.oo \ QP_rule_text.oo QP_query_digest_stats.oo \ @@ -181,7 +181,8 @@ _OBJ_CXX := ProxySQL_GloVars.oo network.oo debug.oo configfile.oo Query_Cache.oo proxy_protocol_info.oo \ proxysql_find_charset.oo ProxySQL_Poll.oo \ PgSQL_Protocol.oo PgSQL_Thread.oo PgSQL_Data_Stream.oo PgSQL_Session.oo PgSQL_Variables.oo PgSQL_HostGroups_Manager.oo PgSQL_Connection.oo PgSQL_Backend.oo PgSQL_Logger.oo PgSQL_Authentication.oo PgSQL_Error_Helper.oo \ - MySQL_Query_Cache.oo PgSQL_Query_Cache.oo PgSQL_Monitor.oo + MySQL_Query_Cache.oo PgSQL_Query_Cache.oo PgSQL_Monitor.oo \ + MySQL_Set_Stmt_Parser.oo PgSQL_Set_Stmt_Parser.oo OBJ_CXX := $(patsubst %,$(ODIR)/%,$(_OBJ_CXX)) HEADERS := ../include/*.h ../include/*.hpp diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index f458a4e33..0a6a24307 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -5714,7 +5714,7 @@ void MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C if (session_type == PROXYSQL_SESSION_MYSQL) { __sync_fetch_and_add(&MyHGM->status.frontend_use_db, 1); string nq=string((char *)pkt->ptr+sizeof(mysql_hdr)+1,pkt->size-sizeof(mysql_hdr)-1); - SetParser parser(nq); + MySQL_Set_Stmt_Parser parser(nq); string errmsg = ""; string schemaname = parser.parse_USE_query(errmsg); if (schemaname != "") { @@ -5992,7 +5992,7 @@ bool MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C ) { proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Parsing SET command %s\n", nq.c_str()); proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 5, "Parsing SET command = %s\n", nq.c_str()); - SetParser parser(nq); + MySQL_Set_Stmt_Parser parser(nq); std::map> set = {}; if (mysql_thread___set_parser_algorithm == 1) { // legacy behavior set = parser.parse1(); @@ -6545,7 +6545,7 @@ bool MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C } } } else if (match_regexes && match_regexes[2]->match(dig)) { - SetParser parser(nq); + MySQL_Set_Stmt_Parser parser(nq); std::map> set = parser.parse2(); for(auto it = std::begin(set); it != std::end(set); ++it) { @@ -6610,7 +6610,7 @@ bool MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C } } } else if (match_regexes && match_regexes[3]->match(dig)) { - SetParser parser(nq); + MySQL_Set_Stmt_Parser parser(nq); std::string charset = parser.parse_character_set(); const MARIADB_CHARSET_INFO * c; if (!charset.empty()) { diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 6a0adb3d8..e1fb5b26f 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -2951,7 +2951,7 @@ bool MySQL_Thread::init() { mypolls.add(POLLIN, pipefd[0], NULL, 0); assert(i==0); - thr_SetParser = new SetParser(""); + thr_SetParser = new MySQL_Set_Stmt_Parser(""); match_regexes=(Session_Regex **)malloc(sizeof(Session_Regex *)*4); // match_regexes[0]=new Session_Regex((char *)"^SET (|SESSION |@@|@@session.)SQL_LOG_BIN( *)(:|)=( *)"); match_regexes[0] = NULL; // NOTE: historically we used match_regexes[0] for SET SQL_LOG_BIN . Not anymore diff --git a/lib/PgSQL_Session.cpp b/lib/PgSQL_Session.cpp index 002fef307..2efd25cb7 100644 --- a/lib/PgSQL_Session.cpp +++ b/lib/PgSQL_Session.cpp @@ -4836,7 +4836,7 @@ bool PgSQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C { proxy_debug(PROXY_DEBUG_MYSQL_COM, 5, "Parsing SET command %s\n", nq.c_str()); proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 5, "Parsing SET command = %s\n", nq.c_str()); - SetParser parser(nq); + PgSQL_Set_Stmt_Parser parser(nq); std::map> set = {}; std::vector> param_status = {}; bool send_param_status = false; @@ -5130,7 +5130,7 @@ bool PgSQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C } /* TODO else if (match_regexes && match_regexes[2]->match(dig)) { - SetParser parser(nq); + PgSQL_Set_Stmt_Parser parser(nq); std::map> set = parser.parse2(); for (auto it = std::begin(set); it != std::end(set); ++it) { @@ -5197,7 +5197,7 @@ bool PgSQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C }*/ else if (match_regexes && match_regexes[3]->match(dig)) { std::vector> param_status; - SetParser parser(nq); + PgSQL_Set_Stmt_Parser parser(nq); std::string charset = parser.parse_character_set(); int charset_encoding = -1; if (!charset.empty()) { diff --git a/lib/PgSQL_Thread.cpp b/lib/PgSQL_Thread.cpp index 10e730ac5..a2d9cdc0e 100644 --- a/lib/PgSQL_Thread.cpp +++ b/lib/PgSQL_Thread.cpp @@ -2842,7 +2842,7 @@ bool PgSQL_Thread::init() { mypolls.add(POLLIN, pipefd[0], NULL, 0); assert(i == 0); - thr_SetParser = new SetParser(""); + thr_SetParser = new PgSQL_Set_Stmt_Parser(""); match_regexes = (Session_Regex**)malloc(sizeof(Session_Regex*) * 4); // match_regexes[0]=new Session_Regex((char *)"^SET (|SESSION |@@|@@session.)SQL_LOG_BIN( *)(:|)=( *)"); match_regexes[0] = NULL; // NOTE: historically we used match_regexes[0] for SET SQL_LOG_BIN . Not anymore diff --git a/lib/set_parser.cpp b/lib/set_parser.cpp deleted file mode 100644 index c63f4dc28..000000000 --- a/lib/set_parser.cpp +++ /dev/null @@ -1,667 +0,0 @@ -#include "set_parser.h" -#include "gen_utils.h" -#include -#include -#include -#include -#include // for std::pair -//#ifdef PARSERDEBUG -#include -//#endif - -#ifdef DEBUG -//#define VALGRIND_ENABLE_ERROR_REPORTING -//#define VALGRIND_DISABLE_ERROR_REPORTING -#include "valgrind.h" -#else -#define VALGRIND_ENABLE_ERROR_REPORTING -#define VALGRIND_DISABLE_ERROR_REPORTING -#endif // DEBUG - -using namespace std; - -#define MULTI_STATEMENTS_USE "Unable to parse multi-statements command with USE statement" - -static void remove_quotes(string& v) { - if (v.length() > 2) { - char firstChar = v[0]; - char lastChar = v[v.length()-1]; - if (firstChar == lastChar) { - if (firstChar == '\'' || firstChar == '"' || firstChar == '`') { - v.erase(v.length()-1, 1); - v.erase(0, 1); - } - } - } -} - -#ifdef PARSERDEBUG -SetParser::SetParser(std::string nq, int verb) { - verbosity = verb; -#else -template -SetParser::SetParser(std::string nq) { -#endif - parse1v2_init = false; - set_query(nq); -} - -template -SetParser::~SetParser() { - if (parse1v2_init == true) { - delete parse1v2_opt2; - delete parse1v2_re; - } -} - -template -void SetParser::set_query(const std::string& nq) { - int query_no_space_length = nq.length(); - char *query_no_space=(char *)malloc(query_no_space_length+1); - memcpy(query_no_space,nq.c_str(),query_no_space_length); - query_no_space[query_no_space_length]='\0'; - query_no_space_length=remove_spaces(query_no_space); - query = std::string(query_no_space); - free(query_no_space); -} - - -#define QUOTES "(?:'|\"|`)?" -#define SPACES " *" -#define NAMES "(NAMES)" -#define NAME_VALUE "((?:\\w|\\d)+)" - -#define SESSION_P1 "(?:|SESSION +|@@|@@session.|@@local.)" -#define VAR_P1 "`?(@\\w+|\\w+)`?" - -// added (?:[\\w]+=(?:on|off)|,)+ for optimizer_switch -#define VAR_VALUE_P1_1 "(?:\\()*(?:SELECT)*(?: )*(?:CONCAT\\()*(?:(?:(?: )*REPLACE|IFNULL|CONCAT)\\()+(?: )*(?:NULL|@OLD_SQL_MODE|@@SQL_MODE),(?:(?:'|\\w|,| |\"|\\))+(?:\\))*)(?:\\))" -#define VAR_VALUE_P1_2 "|(?:NULL)" -#define VAR_VALUE_P1_3 "|(?:[\\w]+=(?:on|off)|,)+" -#define VAR_VALUE_P1_4 "|(?:[@\\w/\\d:\\+\\-]|,)+" -#define VAR_VALUE_P1_5 "|(?:(?:'{1}|\"{1})(?:)(?:'{1}|\"{1}))" -#define VAR_VALUE_P1_6 "|(?: )+" -#define VAR_VALUE_P1 "(" VAR_VALUE_P1_1 VAR_VALUE_P1_2 VAR_VALUE_P1_3 VAR_VALUE_P1_4 VAR_VALUE_P1_5 VAR_VALUE_P1_6 ")" - -template -std::map> SetParser::parse1() { -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); -#endif // DEBUG - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - opt2->set_longest_match(false); - - re2::RE2 re0("^\\s*SET\\s+", *opt2); - re2::RE2::Replace(&query, re0, ""); - re2::RE2 re1("(\\s|;)+$", *opt2); // remove trailing spaces and semicolon - re2::RE2::Replace(&query, re1, ""); - - std::map> result; - - const std::string pattern="(?:" NAMES SPACES QUOTES NAME_VALUE QUOTES "(?: +COLLATE +" QUOTES NAME_VALUE QUOTES "|)" "|" SESSION_P1 VAR_P1 SPACES "(?:|:)=" SPACES QUOTES VAR_VALUE_P1 QUOTES ") *,? *"; -#ifdef DEBUG -VALGRIND_DISABLE_ERROR_REPORTING; -#endif // DEBUG - re2::RE2 re(pattern, *opt2); -#ifdef DEBUG -VALGRIND_ENABLE_ERROR_REPORTING; -#endif // DEBUG - std::string var; - std::string value1, value2, value3, value4, value5; - re2::StringPiece input(query); - while (re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4, &value5)) { - std::vector op; -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: v1='%s' , v2='%s' , v3='%s' , v4='%s' , v5='%s'\n", value1.c_str(), value2.c_str(), value3.c_str(), value4.c_str(), value5.c_str()); -#endif // DEBUG - std::string key; - if (value1 != "") { - // NAMES - key = value1; - op.push_back(value2); - if (value3 != "") { - op.push_back(value3); - } - } else if (value4 != "") { - // VARIABLE - if (strcasecmp("transaction_isolation", value4.c_str()) == 0) { - value4 = "tx_isolation"; - } else if (strcasecmp("transaction_read_only", value4.c_str()) == 0) { - value4 = "tx_read_only"; - } - size_t pos = value5.find_last_not_of(" \n\r\t,"); - if (pos != value5.npos) { - value5.erase(pos+1); - } - key = value4; - if (value5 == "''" || value5 == "\"\"") { - op.push_back(""); - } else { - op.push_back(value5); - } - } - - std::transform(key.begin(), key.end(), key.begin(), ::tolower); - result[key] = op; - } - if (input.size() != 0) { - result = {}; - } - delete opt2; - return result; -} - -template -void SetParser::generateRE_parse1v2() { - vector quote_symbol = {"\"", "'", "`"}; - vector var_patterns = {}; - { - // this block needs to be added at the very beginning, otherwise REPLACE|IFNULL|CONCAT may be considered simple words - // sw0 matches: - // - single word, quoted or not quoted - // - variable name , with double @ (session variable) or single @ (user defiend variable) - // - strings that includes words, spaces and commas - // - single quote string - string sw0 = "(?:\\w+|\"[\\w, ]+\"|\'[\\w, ]+\'|@(?:|@)\\w+|\'\')"; - string mw0 = "(?:" + sw0 + "(?: *, *" + sw0 + ")*)"; // multiple words, separated by comma and random spaces - string fww = "(?:(?:REPLACE|IFNULL|CONCAT)\\( *" + mw0 + "\\))"; // functions REPLACE|IFNULL|CONCAT having argument multiple words - string rfww2 = "(?:(?:REPLACE|IFNULL|CONCAT)\\( *" + fww + " *, *" + mw0 + "\\))"; //functions REPLACE|IFNULL|CONCAT calling the same functions - string rfww3 = "(?:(?:REPLACE|IFNULL|CONCAT)\\( *" + rfww2 + " *, *" + mw0 + "\\))"; //functions REPLACE|IFNULL|CONCAT calling the same functions - string rfww4 = "(?:(?:REPLACE|IFNULL|CONCAT)\\( *" + rfww3 + " *, *" + mw0 + "\\))"; //functions REPLACE|IFNULL|CONCAT calling the same functions - // all the above function allows space after the open parenthesis - string Selfww = "(?:\\(SELECT *" + fww + "\\))"; // for calls like SET sql_mode=(SELECT CONCAT(@@sql_mode, ',PIPES_AS_CONCAT,NO_ENGINE_SUBSTITUTION')); - // FIXME: add error handling in case rfww4 is removed -#ifdef PARSERDEBUG - if (verbosity > 0) { - cout << fww << endl; - cout << rfww2 << endl; - cout << rfww3 << endl; - cout << rfww4 << endl; - cout << Selfww << endl; - } -#endif - var_patterns.push_back(rfww4); // add first function calling function , otherwise functions will be considered simple names - var_patterns.push_back(rfww3); // add first function calling function , otherwise functions will be considered simple names - var_patterns.push_back(rfww2); // add first function calling function - var_patterns.push_back(fww); - var_patterns.push_back(Selfww); - } - - string vp = "NULL"; // NULL - var_patterns.push_back(vp); - - { - string vp0 = "(?:\\w|\\d)+"; // single word with letters and digits , for example utf8mb4 and latin1 - string vp2 = "(?:" + vp0 + "(?:-" + vp0 + ")*)"; // multiple words (letters and digits) separated by dash, WITHOUT any spaces between words . Used also for transaction isolation - var_patterns.push_back(vp2); - for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { - string s = *it + vp2 + *it; - var_patterns.push_back(s); // add with quote - } - } - - vp = "\\w+(?:,\\w+)+"; // multiple words separated by commas, WITHOUT any spaces between words - // NOTE: we do not use multiple words without quotes - for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { - string s = *it + vp + *it; - var_patterns.push_back(s); // add with quote - } - - // regex for optimizer_switch - { - string v1 = "(?:on|off)"; // on|off - string v2 = "\\w+=" + v1; // "\\w+=(?:on|off)" , example: index_merge_sort_union=on - string v3 = v2 + "(?:," + v2 + ")*"; // "\\w+=(?:on|off)(?:,\\w+=(?:on|off))*" - // example index_merge=on,index_merge_union=on,index_merge_sort_union=off - // note: spaces are not allowed - // NOTE: the whole set of flags must be quoted - for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { - string s = *it + v3 + *it; - var_patterns.push_back(s); // add with quote - } - } - - -// DO NOT REMOVE THIS COMMENTED CODE -// It helps understanding how a regex was built - -// vp = "\\d+"; // a number integer N1 -// var_patterns.push_back(vp); -// vp = "\\d+\\.\\d+"; // a decimal N2 -// var_patterns.push_back(vp); -// vp = "\\d+(?:|\\.\\d+)"; // an integer or decimal N3 , merge of N1 and N2 -// var_patterns.push_back(vp); - -// vp = " *(?:\\+|\\-) *\\d+"; // a signed number integer with spaces before and after the sign . N4 = sign + N1 -// var_patterns.push_back(vp); -// vp = " *(?:\\+|\\-) *\\d+\\.\\d+"; // a signed decimal with spaces before and after the sign . N5 = sign + N2 -// var_patterns.push_back(vp); - -// vp = " *(?:\\+|\\-) *\\d+(?:|\\.\\d+)"; // a signed integer or decimal , N6 = N4 + N5 -// var_patterns.push_back(vp); - - vp = "(?:| *(?:\\+|\\-) *)\\d+(?:|\\.\\d+)"; // a signed or unsigned integer or decimal , N7 = merge of N3 and N6 - var_patterns.push_back(vp); - - { - // time_zone in numeric format: - // - +/- sign - // 1 or 2 digits - // : - // 2 digits - string tzd = "(?:(?:\\+|\\-)(?:|\\d)\\d:\\d\\d)"; - // time_zone in string format: - // word / word - string tzw = "(?:\\w+/\\w+)"; - vp = "(?:" + tzd + "|" + tzw + ")"; // time_zone in numeric and string format - } - for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { - string s = *it + vp + *it; - var_patterns.push_back(s); // add with quote - } - - // add just variable name, for example SET time_zone = @old_time_zone - vp = "(?:@(?:|@)\\w+)"; - var_patterns.push_back(vp); - - - // add empty strings , with optional spaces - for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { - string s = *it + " *" + *it; - var_patterns.push_back(s); // add with quote - } - - string var_value = "("; - for (auto it = var_patterns.begin(); it != var_patterns.end(); it++) { - string s = "(?:" + *it + ")"; - auto it2 = it; - it2++; - if (it2 != var_patterns.end()) - s += "|"; - var_value += s; - } - var_value += ")"; - -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); -#endif // DEBUG - parse1v2_opt2 = new re2::RE2::Options(RE2::Quiet); - parse1v2_opt2->set_case_sensitive(false); - parse1v2_opt2->set_longest_match(false); - - string var_1_0 = "(?:@\\w+|\\w+)"; // @name|name - string var_1 = "(" + var_1_0 + "|`" + var_1_0 + "`)"; // var_1_0|`var_1_0` - var_1 = SESSION_P1 + var_1; - - string charset_name = "(?:(?:\\w|\\d)+)"; - string name_value = "("; - for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { - string s = "(?:" + *it + charset_name + *it + ")"; - s += "|"; - name_value += s; - } - name_value += charset_name; // without quotes - name_value += ")"; - -#ifdef PARSERDEBUG - if (verbosity > 0) { - cout << var_value << endl; - cout << name_value << endl; - } -#endif - - std::string pattern; - if constexpr (std::is_same_v) { - pattern = "(?:" NAMES SPACES + name_value + "(?: +COLLATE +" + name_value + "|)" "|" + var_1 + SPACES "(?:|:)=" SPACES + var_value + ") *,? *"; - } else if constexpr (std::is_same_v) { - pattern = "(?:" NAMES SPACES + name_value + "(?: +COLLATE +" + name_value + "|)" "|" + var_1 + SPACES "(?:|:)(?:TO|=)" SPACES + var_value + ") *,? *"; - } else { - assert(0); - } - -#ifdef DEBUG -VALGRIND_DISABLE_ERROR_REPORTING; -#endif // DEBUG -#ifdef PARSERDEBUG - if (verbosity > 0) { - cout << pattern << endl; - } -#endif - parse1v2_pattern = pattern; - parse1v2_re = new re2::RE2(parse1v2_pattern, *parse1v2_opt2); - parse1v2_init = true; -} - -template -std::map> SetParser::parse1v2() { - - std::map> result = {}; - - if (parse1v2_init == false) { - generateRE_parse1v2(); - } - - re2::RE2 re0("^\\s*SET\\s+", *parse1v2_opt2); - re2::RE2::Replace(&query, re0, ""); - re2::RE2 re1("(\\s|;)+$", *parse1v2_opt2); // remove trailing spaces and semicolon - re2::RE2::Replace(&query, re1, ""); - -#ifdef DEBUG -VALGRIND_ENABLE_ERROR_REPORTING; -#endif // DEBUG - std::string var; - std::string value1, value2, value3, value4, value5; - re2::StringPiece input(query); - while (re2::RE2::Consume(&input, *parse1v2_re, &value1, &value2, &value3, &value4, &value5)) { - // FIXME: verify if we reached end of query. Did we parse everything? - std::vector op; -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: v1='%s' , v2='%s' , v3='%s' , v4='%s' , v5='%s'\n", value1.c_str(), value2.c_str(), value3.c_str(), value4.c_str(), value5.c_str()); -#endif // DEBUG - std::string key; - if (value1 != "") { - // NAMES - key = value1; - remove_quotes(value2); - op.push_back(value2); - if (value3 != "") { - remove_quotes(value3); - op.push_back(value3); - } - } else if (value4 != "") { - // VARIABLE - remove_quotes(value4); - if (strcasecmp("transaction_isolation", value4.c_str()) == 0) { - value4 = "tx_isolation"; - } else if (strcasecmp("transaction_read_only", value4.c_str()) == 0) { - value4 = "tx_read_only"; - } - size_t pos = value5.find_last_not_of(" \n\r\t,"); - if (pos != value5.npos) { - value5.erase(pos+1); - } - key = value4; - if (value5 == "''" || value5 == "\"\"") { - op.push_back(""); - } else { - remove_quotes(value5); - op.push_back(value5); - } - } - - std::transform(key.begin(), key.end(), key.begin(), ::tolower); - result[key] = op; - } - if (input.size() != 0) { -#ifdef PARSERDEBUG - if (verbosity > 0) { - cout << "Failed to parse: " << input << endl; - } -#endif - result = {}; - } - //delete opt2; - return result; -} - -template -std::map> SetParser::parse2() { - -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); -#endif // DEBUG - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - opt2->set_longest_match(false); - - re2::RE2 re0("^\\s*SET\\s+", *opt2); - re2::RE2::Replace(&query, re0, ""); - - std::map> result; - - // Regex used: - // SET(?: +)(|SESSION +)TRANSACTION(?: +)(?:(?:(ISOLATION(?: +)LEVEL)(?: +)(REPEATABLE(?: +)READ|READ(?: +)COMMITTED|READ(?: +)UNCOMMITTED|SERIALIZABLE))|(?:(READ)(?: +)(WRITE|ONLY))) - const std::string pattern="(|SESSION) *TRANSACTION(?: +)(?:(?:(ISOLATION(?: +)LEVEL)(?: +)(REPEATABLE(?: +)READ|READ(?: +)COMMITTED|READ(?: +)UNCOMMITTED|SERIALIZABLE))|(?:(READ)(?: +)(WRITE|ONLY)))"; - re2::RE2 re(pattern, *opt2); - std::string var; - std::string value1, value2, value3, value4, value5; - re2::StringPiece input(query); - while (re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4, &value5)) { - std::vector op; -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: v1='%s' , v2='%s' , v3='%s' , v4='%s' , v5='%s'\n", value1.c_str(), value2.c_str(), value3.c_str(), value4.c_str(), value5.c_str()); -#endif // DEBUG - std::string key; - //if (value1 != "") { // session is specified - if (value2 != "") { // isolation level - key = value1 + ":" + value2; - std::transform(value3.begin(), value3.end(), value3.begin(), ::toupper); - op.push_back(value3); - } else { - key = value1 + ":" + value4; - std::transform(value5.begin(), value5.end(), value5.begin(), ::toupper); - op.push_back(value5); - } - //} - std::transform(key.begin(), key.end(), key.begin(), ::tolower); - result[key] = op; - } - - delete opt2; - return result; -} - -template<> -std::string SetParser::parse_character_set() { -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); -#endif // DEBUG - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - opt2->set_longest_match(false); - - re2::RE2 re0("^\\s*SET\\s+", *opt2); - re2::RE2::Replace(&query, re0, ""); - - std::map> result; - - const std::string pattern="((charset)|(character +set))(?: )(?:'?)([^'|\\s]*)(?:'?)"; - re2::RE2 re(pattern, *opt2); - std::string var; - std::string value1, value2, value3, value4; - re2::StringPiece input(query); - re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4); - - delete opt2; - return value4; -} - -template<> -std::string SetParser::parse_character_set() { -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); -#endif // DEBUG - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - opt2->set_longest_match(false); - - re2::RE2 re0("^\\s*SET\\s+", *opt2); - re2::RE2::Replace(&query, re0, ""); - - std::map> result; - const std::string pattern = "(client_encoding|names)\\s*(=|TO)\\s*['\"]?([A-Z_0-9]+)['\"]?"; - re2::RE2 re(pattern, *opt2); - std::string var; - std::string value1, value2, value3; - re2::StringPiece input(query); - re2::RE2::Consume(&input, re, &value1, &value2, &value3); - - delete opt2; - return value3; -} - -template -std::string SetParser::parse_USE_query(std::string& errmsg) { -#ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); -#endif // DEBUG - - re2::RE2::Options opt2(RE2::Quiet); - opt2.set_case_sensitive(false); - opt2.set_longest_match(false); - - std::string dbname = remove_comments(query); - size_t pos = dbname.find_last_not_of(" ;"); - if (pos != dbname.npos) { - dbname.erase(pos + 1); // remove trailing spaces and semicolumns - } - re2::RE2 re0("^\\s*", opt2); - re2::RE2::Replace(&dbname, re0, ""); - if (dbname.size() >= 4) { - if ( - strncasecmp(dbname.c_str(), "USE ",4) == 0 - || - strncasecmp(dbname.c_str(), "USE`",4) == 0 - ) { - re2::RE2 re1("^USE\\s*", opt2); - re2::RE2::Replace(&dbname, re1, ""); - re2::RE2 re2("\\s*$", opt2); - re2::RE2::Replace(&dbname, re2, ""); - if (dbname[0] == '`') { - if (dbname.length() > 2) { - if (dbname[dbname.length()-1] == '`') { - // Remove the first character - dbname.erase(0, 1); - // Remove the last character - dbname.erase(dbname.length() - 1); - } - } - } - } else { - dbname = ""; - } - } else { - dbname = ""; - } - - if (dbname.find_first_of(';') != std::string::npos) { - errmsg = MULTI_STATEMENTS_USE; - dbname = ""; - } - - return dbname; -} - -template -std::string SetParser::remove_comments(const std::string& q) { - std::string result = ""; - bool in_multiline_comment = false; - - for (size_t i = 0; i < query.size(); ++i) { - char current_char = query[i]; - - // Check for multiline comment start - if (current_char == '/' && i + 1 < query.size() && query[i + 1] == '*') { - in_multiline_comment = true; - i++; // Skip the '*' - continue; - } - - // Check for multiline comment end - if (in_multiline_comment && current_char == '*' && i + 1 < query.size() && query[i + 1] == '/') { - in_multiline_comment = false; - i++; // Skip the '/' - continue; - } - - // Skip characters inside multiline comment - if (in_multiline_comment) { - continue; - } - - // Check for single-line comments - if (current_char == '#' || (current_char == '-' && i + 1 < query.size() && query[i + 1] == '-')) { - // Skip until the end of the line - while (i < query.size() && query[i] != '\n') { - i++; - } - continue; - } - - // Append the character to the result if it's not a comment - result += current_char; - } - - return result; -} - - -#ifdef DEBUG -template -void SetParser::test_parse_USE_query() { - - // Define vector of pairs (query, expected dbname) - std::vector> testCases = { - {"USE my_database", "my_database"}, // Basic Case - {"USE my_database", "my_database"}, // Basic Case - {"USE my_database ", "my_database"}, // Basic Case - {"/* comment */USE dbname /* comment */", "dbname"}, // With Comments - {"/* comment */ USE dbname", "dbname"}, // With Comments - {"USE dbname /* comment */", "dbname"}, // With Comments - {"/* comment */USE `dbname` /* comment */", "dbname"}, // With backtick - {"/* comment */USE `dbname`/* comment */", "dbname"}, // With backtick - {"/* comment */USE`dbname` /* comment */", "dbname"}, // With backtick - {"/* comment */USE `dbname`/* comment */", "dbname"}, // With backtick - {"/* comment\nmultiline comment */USE dbname /* comment */", "dbname"}, // Multiline Comment - {"/* comment */USE dbname # comment", "dbname"}, // Hash Comment - {"/* comment */USE dbname -- comment", "dbname"}, // Double Dash Comment - {"/* comment */USE dbname # comment", "dbname"}, // Hash Comment - {"/* comment */USE dbname -- comment", "dbname"}, // Double Dash Comment - {"USE dbname # comment", "dbname"}, // Hash Comment - {"USE dbname -- comment", "dbname"}, // Double Dash Comment - {"SELECT * FROM my_table", ""}, // No match - {"/*+ placeholder_comment */ USE test_use_comment", "test_use_comment"}, - - {"USE /*+ placeholder_comment */ `test_use_comment-a1`", "test_use_comment-a1"}, - {"USE /*+ placeholder_comment */ `test_use_comment_1`", "test_use_comment_1"}, - {"USE/*+ placeholder_comment */ `test_use_comment_2`", "test_use_comment_2"}, - {"USE /*+ placeholder_comment */`test_use_comment_3`", "test_use_comment_3"}, - {"USE /*+ placeholder_comment */ test_use_comment_4", "test_use_comment_4"}, - {"USE/*+ placeholder_comment */ test_use_comment_5", "test_use_comment_5"}, - {"USE /*+ placeholder_comment */test_use_comment_6", "test_use_comment_6"}, - {"USE /*+ placeholder_comment */ `test_use_comment-1`", "test_use_comment-1"}, - {"use my_database", "my_database"}, - {"/* comment */ use dbname -- comment", "dbname"}, - {"/* comment\nmultiline comment */USE dbname /* comment\nmultiline comment */", "dbname"}, // Multiline Comment - - {"USE/*+ placeholder_comment */ `test_use_comment-2`", "test_use_comment-2"}, - {"USE /*+ placeholder_comment */`test_use_comment-3`", "test_use_comment-3"}, - {"/*+ placeholder_comment */USE `test_use_comment-4`", "test_use_comment-4"}, - {"USE/*+ placeholder_comment */`test_use_comment-5`", "test_use_comment-5"}, - {"/* comment */USE`test_use_comment-6`", "test_use_comment-6"}, - {"USE`test_use_comment-7`", "test_use_comment-7"}, - {"USE test_use_comment-7 ;", "test_use_comment-7"}, - {"USE`test_use_comment-2` ; ", "test_use_comment-2"}, - {"USE`test_use_comment-2` ; -- comment", "test_use_comment-2"}, - {"USE test_use_comment-7 /* comment */ ; ", "test_use_comment-7"}, - {"USE /* comment */ test_use_comment-7 ; ", "test_use_comment-7"}, - {"USE dbame ; SELECT 1", ""}, - }; - - // Run tests for each pair - for (const auto& p : testCases) { - set_query(p.first); - std::string errmsg = ""; - std::string dbname = parse_USE_query(errmsg); - if (dbname != p.second) { - // we call parse_USE_query() again just to make it easier to create a breakpoint - std::string s = parse_USE_query(errmsg); - assert(s == p.second); - } - } -} -#endif // DEBUG - -template class SetParser; -template class SetParser; diff --git a/src/main.cpp b/src/main.cpp index a005db5e3..081dd8580 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2325,7 +2325,7 @@ int main(int argc, const char * argv[]) { #ifdef DEBUG { // Automated testing - SetParser parser(""); + MySQL_Set_Stmt_Parser parser(""); parser.test_parse_USE_query(); } #endif // DEBUG