From 131d2e8792e151cfdf8073418c6e1bf3a5d1688c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Tue, 27 Jun 2023 08:12:50 +0000 Subject: [PATCH] Initial rewriting of SetParser --- .gitignore | 1 + include/set_parser.h | 7 + lib/set_parser.cpp | 318 ++++++++++++++++++++++++++--- test/tap/tests/Makefile | 8 + test/tap/tests/setparser_test.cpp | 2 +- test/tap/tests/setparser_test2.cpp | 2 +- test/tap/tests/setparser_test3.cpp | 309 ++++++++++++++++++++++++++++ 7 files changed, 614 insertions(+), 33 deletions(-) create mode 100644 test/tap/tests/setparser_test3.cpp diff --git a/.gitignore b/.gitignore index 124d43e2b..339f9ba76 100644 --- a/.gitignore +++ b/.gitignore @@ -117,6 +117,7 @@ test/tap/tests/galera_1_timeout_count test/tap/tests/galera_2_timeout_no_count test/tap/tests/setparser_test test/tap/tests/setparser_test2 +test/tap/tests/setparser_test3 test/tap/tests/reg_test_3504-change_user_libmariadb_helper test/tap/tests/reg_test_3504-change_user_libmysql_helper test/tap/tests/generate_set_session_csv diff --git a/include/set_parser.h b/include/set_parser.h index 5f8a8b1da..3ee7a6643 100644 --- a/include/set_parser.h +++ b/include/set_parser.h @@ -7,9 +7,16 @@ class SetParser { private: std::string query; +#ifdef PARSERDEBUG + int verbosity; + public: + SetParser(std::string q, int verb = 0); +#else public: SetParser(std::string q); +#endif std::map> parse1(); + std::map> parse1v2(); std::map> parse2(); std::string parse_character_set(); }; diff --git a/lib/set_parser.cpp b/lib/set_parser.cpp index 1fff9f6d2..6416efe09 100644 --- a/lib/set_parser.cpp +++ b/lib/set_parser.cpp @@ -5,8 +5,32 @@ #include #include #include +#ifdef PARSERDEBUG +#include +#endif +using namespace std; + + +static void remove_quotes(string& v) { + if (v.length() > 2) { + char firstChar = v[0]; + char lastChar = v[v.length()-1]; + if (firstChar == lastChar) { + if (firstChar == '\'' || firstChar == '"') { + v.erase(v.length()-1, 1); + v.erase(0, 1); + } + } + } +} + +#ifdef PARSERDEBUG +SetParser::SetParser(std::string nq, int verb) { + verbosity = verb; +#else SetParser::SetParser(std::string nq) { +#endif int query_no_space_length = nq.length(); char *query_no_space=(char *)malloc(query_no_space_length+1); memcpy(query_no_space,nq.c_str(),query_no_space_length); @@ -21,6 +45,20 @@ SetParser::SetParser(std::string nq) { #define NAMES "(NAMES)" #define NAME_VALUE "((?:\\w|\\d)+)" +#define SESSION_P1 "(?:|SESSION +|@@|@@session.|@@local.)" +#define VAR_P1 "`?(@\\w+|\\w+)`?" +//#define VAR_VALUE "((?:[\\w/\\d:\\+\\-]|,)+)" +//#define VAR_VALUE "((?:CONCAT\\((?:(REPLACE|CONCAT)\\()+@@sql_mode,(?:(?:'|\\w|,| |\"|\\))+(?:\\)))|(?:[@\\w/\\d:\\+\\-]|,)+|(?:)))" + +// added (?:[\\w]+=(?:on|off)|,)+ for optimizer_switch +#define VAR_VALUE_P1_1 "(?:\\()*(?:SELECT)*(?: )*(?:CONCAT\\()*(?:(?:(?: )*REPLACE|IFNULL|CONCAT)\\()+(?: )*(?:NULL|@OLD_SQL_MODE|@@SQL_MODE),(?:(?:'|\\w|,| |\"|\\))+(?:\\))*)(?:\\))" +#define VAR_VALUE_P1_2 "|(?:NULL)" +#define VAR_VALUE_P1_3 "|(?:[\\w]+=(?:on|off)|,)+" +#define VAR_VALUE_P1_4 "|(?:[@\\w/\\d:\\+\\-]|,)+" +#define VAR_VALUE_P1_5 "|(?:(?:'{1}|\"{1})(?:)(?:'{1}|\"{1}))" +#define VAR_VALUE_P1_6 "|(?: )+" +#define VAR_VALUE_P1 "(" VAR_VALUE_P1_1 VAR_VALUE_P1_2 VAR_VALUE_P1_3 VAR_VALUE_P1_4 VAR_VALUE_P1_5 VAR_VALUE_P1_6 ")" + std::map> SetParser::parse1() { proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); @@ -33,12 +71,45 @@ std::map> SetParser::parse1() { std::map> result; -#define SESSION_P1 "(?:|SESSION +|@@|@@session.|@@local.)" -#define VAR_P1 "`?(@\\w+|\\w+)`?" -//#define VAR_VALUE "((?:[\\w/\\d:\\+\\-]|,)+)" -//#define VAR_VALUE "((?:CONCAT\\((?:(REPLACE|CONCAT)\\()+@@sql_mode,(?:(?:'|\\w|,| |\"|\\))+(?:\\)))|(?:[@\\w/\\d:\\+\\-]|,)+|(?:)))" + const std::string pattern="(?:" NAMES SPACES QUOTES NAME_VALUE QUOTES "(?: +COLLATE +" QUOTES NAME_VALUE QUOTES "|)" "|" SESSION_P1 VAR_P1 SPACES "(?:|:)=" SPACES QUOTES VAR_VALUE_P1 QUOTES ") *,? *"; +VALGRIND_DISABLE_ERROR_REPORTING; + re2::RE2 re(pattern, *opt2); +VALGRIND_ENABLE_ERROR_REPORTING; + std::string var; + std::string value1, value2, value3, value4, value5; + re2::StringPiece input(query); + while (re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4, &value5)) { + std::vector op; +#ifdef DEBUG + proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: v1='%s' , v2='%s' , v3='%s' , v4='%s' , v5='%s'\n", value1.c_str(), value2.c_str(), value3.c_str(), value4.c_str(), value5.c_str()); +#endif // DEBUG + std::string key; + if (value1 != "") { + // NAMES + key = value1; + op.push_back(value2); + if (value3 != "") { + op.push_back(value3); + } + } else if (value4 != "") { + // VARIABLE + value5.erase(value5.find_last_not_of(" \n\r\t,")+1); + key = value4; + if (value5 == "''" || value5 == "\"\"") { + op.push_back(""); + } else { + op.push_back(value5); + } + } -// added (?:[\\w]+=(?:on|off)|,)+ for optimizer_switch + std::transform(key.begin(), key.end(), key.begin(), ::tolower); + result[key] = op; + } + delete opt2; + return result; +} + +/* #define VAR_VALUE_P1_1 "(?:\\()*(?:SELECT)*(?: )*(?:CONCAT\\()*(?:(?:(?: )*REPLACE|IFNULL|CONCAT)\\()+(?: )*(?:NULL|@OLD_SQL_MODE|@@SQL_MODE),(?:(?:'|\\w|,| |\"|\\))+(?:\\))*)(?:\\))" #define VAR_VALUE_P1_2 "|(?:NULL)" #define VAR_VALUE_P1_3 "|(?:[\\w]+=(?:on|off)|,)+" @@ -46,43 +117,228 @@ std::map> SetParser::parse1() { #define VAR_VALUE_P1_5 "|(?:(?:'{1}|\"{1})(?:)(?:'{1}|\"{1}))" #define VAR_VALUE_P1_6 "|(?: )+" #define VAR_VALUE_P1 "(" VAR_VALUE_P1_1 VAR_VALUE_P1_2 VAR_VALUE_P1_3 VAR_VALUE_P1_4 VAR_VALUE_P1_5 VAR_VALUE_P1_6 ")" +*/ - const std::string pattern="(?:" NAMES SPACES QUOTES NAME_VALUE QUOTES "(?: +COLLATE +" QUOTES NAME_VALUE QUOTES "|)" "|" SESSION_P1 VAR_P1 SPACES "(?:|:)=" SPACES QUOTES VAR_VALUE_P1 QUOTES ") *,? *"; + +std::map> SetParser::parse1v2() { + + + vector quote_symbol = {"\"", "'", "`"}; + + vector var_patterns = {}; + + { + // this block needs to be added at the very beginning, otherwise REPLACE|IFNULL|CONCAT may be considered simple words + // sw0 matches: + // - single word, quoted or not quoted + // - variable name , with double @ (session variable) or single @ (user defiend variable) + // - strings that includes words, spaces and commas + // - single quote string + string sw0 = "(?:\\w+|\"[\\w, ]+\"|\'[\\w, ]+\'|@(?:|@)\\w+|\'\')"; + string mw0 = "(?:" + sw0 + "(?: *, *" + sw0 + ")*)"; // multiple words, separated by comma and random spaces + string fww = "(?:(?:REPLACE|IFNULL|CONCAT)\\( *" + mw0 + "\\))"; // functions REPLACE|IFNULL|CONCAT having argument multiple words + string rfww2 = "(?:(?:REPLACE|IFNULL|CONCAT)\\( *" + fww + " *, *" + mw0 + "\\))"; //functions REPLACE|IFNULL|CONCAT calling the same functions + string rfww3 = "(?:(?:REPLACE|IFNULL|CONCAT)\\( *" + rfww2 + " *, *" + mw0 + "\\))"; //functions REPLACE|IFNULL|CONCAT calling the same functions + string rfww4 = "(?:(?:REPLACE|IFNULL|CONCAT)\\( *" + rfww3 + " *, *" + mw0 + "\\))"; //functions REPLACE|IFNULL|CONCAT calling the same functions + // all the above function allows space after the open parenthesis + string Selfww = "(?:\\(SELECT *" + fww + "\\))"; // for calls like SET sql_mode=(SELECT CONCAT(@@sql_mode, ',PIPES_AS_CONCAT,NO_ENGINE_SUBSTITUTION')); + // FIXME: add error handling in case rfww4 is removed +#ifdef PARSERDEBUG + if (verbosity > 0) { + cout << fww << endl; + cout << rfww2 << endl; + cout << rfww3 << endl; + cout << rfww4 << endl; + cout << Selfww << endl; + } +#endif + var_patterns.push_back(rfww4); // add first function calling function , otherwise functions will be considered simple names + var_patterns.push_back(rfww3); // add first function calling function , otherwise functions will be considered simple names + var_patterns.push_back(rfww2); // add first function calling function + var_patterns.push_back(fww); + var_patterns.push_back(Selfww); + } + + string vp = "NULL"; // NULL + var_patterns.push_back(vp); + vp = "\\w+"; // single word + var_patterns.push_back(vp); + for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { + string s = *it + vp + *it; + var_patterns.push_back(s); // add with quote + } + + vp = "\\w+(?:,\\w+)+"; // multiple words separated by commas, WITHOUT any spaces between words + // NOTE: we do not use multiple words without quotes + for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { + string s = *it + vp + *it; + var_patterns.push_back(s); // add with quote + } + + + + +// DO NOT REMOVE THIS COMMENTED CODE +// It helps understanding how a regex was built + +// vp = "\\d+"; // a number integer N1 +// var_patterns.push_back(vp); +// vp = "\\d+\\.\\d+"; // a decimal N2 +// var_patterns.push_back(vp); +// vp = "\\d+(?:|\\.\\d+)"; // an integer or decimal N3 , merge of N1 and N2 +// var_patterns.push_back(vp); + +// vp = " *(?:\\+|\\-) *\\d+"; // a signed number integer with spaces before and after the sign . N4 = sign + N1 +// var_patterns.push_back(vp); +// vp = " *(?:\\+|\\-) *\\d+\\.\\d+"; // a signed decimal with spaces before and after the sign . N5 = sign + N2 +// var_patterns.push_back(vp); + +// vp = " *(?:\\+|\\-) *\\d+(?:|\\.\\d+)"; // a signed integer or decimal , N6 = N4 + N5 +// var_patterns.push_back(vp); + + vp = "(?:| *(?:\\+|\\-) *)\\d+(?:|\\.\\d+)"; // a signed or unsigned integer or decimal , N7 = merge of N3 and N6 + var_patterns.push_back(vp); + + + { + // time_zone in numeric format: + // - +/- sign + // 1 or 2 digits + // : + // 2 digits + string tzd = "(?:(?:\\+|\\-)(?:|\\d)\\d:\\d\\d)"; + // time_zone in string format: + // word / word + string tzw = "(?:\\w+/\\w+)"; + vp = "(?:" + tzd + "|" + tzw + ")"; // time_zone in numeric and string format + } + for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { + string s = *it + vp + *it; + var_patterns.push_back(s); // add with quote + } + + // add just variable name, for example SET time_zone = @old_time_zone + vp = "(?:@(?:|@)\\w+)"; + var_patterns.push_back(vp); + + + // add empty strings , with optional spaces + for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { + string s = *it + " *" + *it; + var_patterns.push_back(s); // add with quote + } + + + + string var_value = "("; + for (auto it = var_patterns.begin(); it != var_patterns.end(); it++) { + string s = "(?:" + *it + ")"; + auto it2 = it; + it2++; + if (it2 != var_patterns.end()) + s += "|"; + var_value += s; + } + var_value += ")"; + + + proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); + re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); + opt2->set_case_sensitive(false); + opt2->set_longest_match(false); + + re2::RE2 re0("^\\s*SET\\s+", *opt2); + re2::RE2::Replace(&query, re0, ""); + + std::map> result = {}; + + + string var_1_0 = "(?:@\\w+|\\w+)"; // @name|name + string var_1 = "(" + var_1_0 + "|`" + var_1_0 + "`)"; // var_1_0|`var_1_0` + var_1 = SESSION_P1 + var_1; + + string charset_name = "(?:(?:\\w|\\d)+)"; + string name_value = "("; + for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) { + string s = "(?:" + *it + charset_name + *it + ")"; + //auto it2 = it; + //it2++; + //if (it2 != quote_symbol.end()) + s += "|"; + name_value += s; + } + name_value += charset_name; // without quotes + name_value += ")"; + +#ifdef PARSERDEBUG + if (verbosity > 0) { + cout << query << endl; + cout << var_value << endl; + cout << name_value << endl; + } +#endif + +#ifdef PARSERDEBUG +// delete opt2; +// return result; +#endif + +/* +#define QUOTES "(?:'|\"|`)?" +#define SPACES " *" +#define NAMES "(NAMES)" +#define NAME_VALUE "((?:\\w|\\d)+)" +*/ + + + //const std::string pattern="(?:" NAMES SPACES QUOTES NAME_VALUE QUOTES "(?: +COLLATE +" QUOTES NAME_VALUE QUOTES "|)" "|" SESSION_P1 VAR_P1 SPACES "(?:|:)=" SPACES QUOTES VAR_VALUE_P1 QUOTES ") *,? *"; + const std::string pattern="(?:" NAMES SPACES + name_value + "(?: +COLLATE +" + name_value + "|)" "|" + var_1 + SPACES "(?:|:)=" SPACES + var_value + ") *,? *"; + //const std::string pattern=var_1 + SPACES "(?:|:)=" SPACES + var_value; VALGRIND_DISABLE_ERROR_REPORTING; +#ifdef PARSERDEBUG + if (verbosity > 0) { + cout << pattern << endl; + } +#endif re2::RE2 re(pattern, *opt2); VALGRIND_ENABLE_ERROR_REPORTING; std::string var; std::string value1, value2, value3, value4, value5; re2::StringPiece input(query); while (re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4, &value5)) { - std::vector op; + // FIXME: verify if we reached end of query. Did we parse everything? + std::vector op; #ifdef DEBUG - proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: v1='%s' , v2='%s' , v3='%s' , v4='%s' , v5='%s'\n", value1.c_str(), value2.c_str(), value3.c_str(), value4.c_str(), value5.c_str()); + proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: v1='%s' , v2='%s' , v3='%s' , v4='%s' , v5='%s'\n", value1.c_str(), value2.c_str(), value3.c_str(), value4.c_str(), value5.c_str()); #endif // DEBUG - std::string key; - if (value1 != "") { - // NAMES - key = value1; - op.push_back(value2); - if (value3 != "") { - op.push_back(value3); - } - } else if (value4 != "") { - // VARIABLE - value5.erase(value5.find_last_not_of(" \n\r\t,")+1); - key = value4; - if (value5 == "''" || value5 == "\"\"") { - op.push_back(""); - } else { - op.push_back(value5); - } - } - - std::transform(key.begin(), key.end(), key.begin(), ::tolower); - result[key] = op; - } + std::string key; + if (value1 != "") { + // NAMES + key = value1; + remove_quotes(value2); + op.push_back(value2); + if (value3 != "") { + remove_quotes(value3); + op.push_back(value3); + } + } else if (value4 != "") { + // VARIABLE + value5.erase(value5.find_last_not_of(" \n\r\t,")+1); + key = value4; + if (value5 == "''" || value5 == "\"\"") { + op.push_back(""); + } else { + remove_quotes(value5); + op.push_back(value5); + } + } + + std::transform(key.begin(), key.end(), key.begin(), ::tolower); + result[key] = op; + } + delete opt2; - return result; + return result; } diff --git a/test/tap/tests/Makefile b/test/tap/tests/Makefile index 6b70589e3..88381d088 100644 --- a/test/tap/tests/Makefile +++ b/test/tap/tests/Makefile @@ -128,6 +128,7 @@ debug: tests tests: tests-cpp tests-php tests-py \ setparser_test reg_test_3504-change_user_libmariadb_helper reg_test_3504-change_user_libmysql_helper \ setparser_test2 setparser_test2-t \ + setparser_test3 setparser_test3-t \ set_testing-240.csv test_clickhouse_server_libmysql-t reg_test_stmt_resultset_err_no_rows_libmysql-t \ prepare_statement_err3024_libmysql-t prepare_statement_err3024_async-t reg_test_mariadb_stmt_store_result_libmysql-t \ reg_test_mariadb_stmt_store_result_async-t @@ -191,6 +192,13 @@ setparser_test2-t: setparser_test2 setparser_test2: setparser_test2.cpp $(TAP_LIBDIR)/libtap.a $(LDIR)/set_parser.cpp $(LIBPROXYSQLAR) $(LIBCOREDUMPERAR) g++ -DDEBUG setparser_test2.cpp $(LDIR)/set_parser.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 -lproxysql $(MYLIBS) -ltap -ldl -lpthread $(WASAN) $(LIBCOREDUMPERAR) -o setparser_test2 -DGITVERSION=\"$(GIT_VERSION)\" +setparser_test3-t: setparser_test3 + rm setparser_test3-t || true + ln -s setparser_test3 setparser_test3-t + +setparser_test3: setparser_test3.cpp $(TAP_LIBDIR)/libtap.a $(LDIR)/set_parser.cpp $(LIBPROXYSQLAR) $(LIBCOREDUMPERAR) + g++ -DDEBUG -DPARSERDEBUG setparser_test3.cpp $(LDIR)/set_parser.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 -lproxysql $(MYLIBS) -ltap -ldl -lpthread $(WASAN) $(LIBCOREDUMPERAR) -o setparser_test3 -DGITVERSION=\"$(GIT_VERSION)\" + reg_test_3504-change_user_libmariadb_helper: reg_test_3504-change_user_helper.cpp $(CXX) -DDEBUG reg_test_3504-change_user_helper.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) $(MYLIBS) -lpthread -ldl -std=c++11 -ltap $(STATIC_LIBS) -o reg_test_3504-change_user_libmariadb_helper -DGITVERSION=\"$(GIT_VERSION)\" diff --git a/test/tap/tests/setparser_test.cpp b/test/tap/tests/setparser_test.cpp index b2be4cc6f..8bc210ec4 100644 --- a/test/tap/tests/setparser_test.cpp +++ b/test/tap/tests/setparser_test.cpp @@ -224,7 +224,7 @@ TEST(TestParse, SET_VARIOUS) { static Test multiple[] = { { "SET time_zone = 'Europe/Paris', sql_mode = 'TRADITIONAL'", { Expected("time_zone", {"Europe/Paris"}), Expected("sql_mode", {"TRADITIONAL"}) } }, { "SET time_zone = 'Europe/Paris', sql_mode = IFNULL(NULL,\"STRICT_TRANS_TABLES\")", { Expected("time_zone", {"Europe/Paris"}), Expected("sql_mode", {"IFNULL(NULL,\"STRICT_TRANS_TABLES\")"}) } }, - { "SET sql_mode = 'TRADITIONAL', NAMES 'utf8 COLLATE 'unicode_ci'", { Expected("sql_mode", {"TRADITIONAL"}), Expected("names", {"utf8", "unicode_ci"}) } }, + { "SET sql_mode = 'TRADITIONAL', NAMES 'utf8 COLLATE 'unicode_ci'", { Expected("sql_mode", {"TRADITIONAL"}), Expected("names", {"utf8", "unicode_ci"}) } }, // FIXME: typo { "SET @@SESSION.sql_mode = CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO'), @@SESSION.sql_auto_is_null = 0, @@SESSION.wait_timeout = 2147483", { Expected("sql_mode", {"CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO')"}), Expected("sql_auto_is_null", {"0"}), Expected("wait_timeout", {"2147483"}) } }, diff --git a/test/tap/tests/setparser_test2.cpp b/test/tap/tests/setparser_test2.cpp index fe5dd6358..a02465083 100644 --- a/test/tap/tests/setparser_test2.cpp +++ b/test/tap/tests/setparser_test2.cpp @@ -230,7 +230,7 @@ static Test various[] = { static Test multiple[] = { { "SET time_zone = 'Europe/Paris', sql_mode = 'TRADITIONAL'", { Expected("time_zone", {"Europe/Paris"}), Expected("sql_mode", {"TRADITIONAL"}) } }, { "SET time_zone = 'Europe/Paris', sql_mode = IFNULL(NULL,\"STRICT_TRANS_TABLES\")", { Expected("time_zone", {"Europe/Paris"}), Expected("sql_mode", {"IFNULL(NULL,\"STRICT_TRANS_TABLES\")"}) } }, - { "SET sql_mode = 'TRADITIONAL', NAMES 'utf8 COLLATE 'unicode_ci'", { Expected("sql_mode", {"TRADITIONAL"}), Expected("names", {"utf8", "unicode_ci"}) } }, + { "SET sql_mode = 'TRADITIONAL', NAMES 'utf8 COLLATE 'unicode_ci'", { Expected("sql_mode", {"TRADITIONAL"}), Expected("names", {"utf8", "unicode_ci"}) } }, // FIXME: typo { "SET @@SESSION.sql_mode = CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO'), @@SESSION.sql_auto_is_null = 0, @@SESSION.wait_timeout = 2147483", { Expected("sql_mode", {"CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO')"}), Expected("sql_auto_is_null", {"0"}), Expected("wait_timeout", {"2147483"}) } }, diff --git a/test/tap/tests/setparser_test3.cpp b/test/tap/tests/setparser_test3.cpp new file mode 100644 index 000000000..5996aa031 --- /dev/null +++ b/test/tap/tests/setparser_test3.cpp @@ -0,0 +1,309 @@ +/** + * @file setparser_test.cpp + * @brief Test file for unit testing 'SetParser' type, responsible of parsing + * non-trivial 'SET' statements. This test is executed via the wrapper tap test + * 'setparser_test-t'. + * This file is an extension of ../../set_parser_test/setparsertest.cpp + */ + +// NOTE: Avoids the definition of 'global_variables glovars' in 'proxysql_structs.h' +#define PROXYSQL_EXTERN +// NOTE: Avoids definition of 'proxy_sqlite3_*' functions as 'extern' +#define MAIN_PROXY_SQLITE3 + +#include "command_line.h" +#include "tap.h" + +#include + +#include "re2/re2.h" +#include "re2/regexp.h" +#include "util/test.h" +#include "set_parser.h" +#include +#include +#include +#include +#include + +// ******************************************************************************************* +/** + * TODO: This should be fixed once we have improved include hierarchy. All the following + * includes are required to avoid the following linker error related to 'GloMyLdapAuth': + * + * ``` + * /usr/bin/ld: ../../../lib/libproxysql.a(ProxySQL_GloVars.oo): in function `ProxySQL_GlobalVariables::generate_global_checksum()': + * /home/javjarfer/Projects/proxysql_v2.2.0/lib/ProxySQL_GloVars.cpp:374: undefined reference to `GloMyLdapAuth' + * ``` + * + * For now we just declare it locally to avoid the linking error. + */ +#include "openssl/ssl.h" +#include "mysql.h" +#include "proxysql_structs.h" +#include "sqlite3db.h" +#include "MySQL_LDAP_Authentication.hpp" + +using namespace std; + +MySQL_LDAP_Authentication *GloMyLdapAuth = nullptr; +// ****************************************************************************************** + +bool iequals(const std::string& a, const std::string& b) +{ + unsigned int sz = a.size(); + if (b.size() != sz) + return false; + for (unsigned int i = 0; i < sz; ++i) + if (tolower(a[i]) != tolower(b[i])) + return false; + return true; +} + + +void printMap(const std::string query, std::map> map) { + std::cout << "Query: " << query << endl; + for (const auto& entry : map) { + std::cout << " - Key: " << entry.first << endl; + + for (const auto& value : entry.second) { + std::cout << " + Value: " << value << endl; + } + } +} + + +struct Expected { + const char* var; + std::vector values; + Expected(const char* var, std::vector values): var(var), values(values){}; +}; + +struct Test { + const char* query; + std::vector results; +}; + +static Test sql_mode[] = { + { "SET @@sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET SESSION sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET @@session.sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET @@local.sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET sql_mode = 'TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET SQL_MODE ='TRADITIONAL'", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET SQL_MODE = \"TRADITIONAL\"", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET SQL_MODE = TRADITIONAL", { Expected("sql_mode", {"TRADITIONAL"}) } }, + { "set sql_mode = IFNULL(NULL,\"STRICT_TRANS_TABLES\")", { Expected("sql_mode", {"IFNULL(NULL,\"STRICT_TRANS_TABLES\")"}) } }, + { "set sql_mode = IFNULL(NULL,'STRICT_TRANS_TABLES')", { Expected("sql_mode", {"IFNULL(NULL,'STRICT_TRANS_TABLES')"}) } }, + { "SET @@SESSION.sql_mode = CONCAT(CONCAT(@@sql_mode, ', STRICT_ALL_TABLES'), ', NO_AUTO_VALUE_ON_ZERO')", { Expected("sql_mode", {"CONCAT(CONCAT(@@sql_mode, ', STRICT_ALL_TABLES'), ', NO_AUTO_VALUE_ON_ZERO')"}) } }, + { "SET @@LOCAL.sql_mode = CONCAT(CONCAT(@@sql_mode, ', STRICT_ALL_TABLES'), ', NO_AUTO_VALUE_ON_ZERO')", { Expected("sql_mode", {"CONCAT(CONCAT(@@sql_mode, ', STRICT_ALL_TABLES'), ', NO_AUTO_VALUE_ON_ZERO')"}) } }, + { "set session sql_mode = 'ONLY_FULL_GROUP_BY'" , { Expected("sql_mode", {"ONLY_FULL_GROUP_BY"}) } }, + { "SET sql_mode = 'NO_ZERO_DATE,STRICT_ALL_TABLES,ONLY_FULL_GROUP_BY'" , { Expected("sql_mode", {"NO_ZERO_DATE,STRICT_ALL_TABLES,ONLY_FULL_GROUP_BY"}) } }, + { "SET @@sql_mode = CONCAT(@@sql_mode, ',', 'ONLY_FULL_GROUP_BY')" , { Expected("sql_mode", {"CONCAT(@@sql_mode, ',', 'ONLY_FULL_GROUP_BY')"}) } }, + { "SET @@sql_mode = REPLACE(REPLACE(REPLACE(@@sql_mode, 'ONLY_FULL_GROUP_BY,', ''),',ONLY_FULL_GROUP_BY', ''),'ONLY_FULL_GROUP_BY', '')" , { Expected("sql_mode", {"REPLACE(REPLACE(REPLACE(@@sql_mode, 'ONLY_FULL_GROUP_BY,', ''),',ONLY_FULL_GROUP_BY', ''),'ONLY_FULL_GROUP_BY', '')"}) } }, + { "SET @@sql_mode = REPLACE( REPLACE( REPLACE( @@sql_mode, 'ONLY_FULL_GROUP_BY,', ''),',ONLY_FULL_GROUP_BY', ''),'ONLY_FULL_GROUP_BY', '')" , { Expected("sql_mode", {"REPLACE( REPLACE( REPLACE( @@sql_mode, 'ONLY_FULL_GROUP_BY,', ''),',ONLY_FULL_GROUP_BY', ''),'ONLY_FULL_GROUP_BY', '')"}) } }, +// { "SET @@SESSION.sql_mode = CONCAT(CONCAT(@@sql_mode, ', STRICT_ALL_TABLES'), ', NO_AUTO_VALUE_ON_ZERO')", { Expected("sql_mode", {"CONCAT(CONCAT(@@sql_mode, ', STRICT_ALL_TABLES'), ', NO_AUTO_VALUE_ON_ZERO')"}) } }, + { "SET SQL_MODE=IFNULL(@@sql_mode,'')", { Expected("sql_mode", { "IFNULL(@@sql_mode,'')" } ) } }, + { "SET SQL_MODE=IFNULL(@old_sql_mode,'')", { Expected("sql_mode", { "IFNULL(@old_sql_mode,'')" } ) } }, + { "SET SQL_MODE=IFNULL(@OLD_SQL_MODE,'')", { Expected("sql_mode", { "IFNULL(@OLD_SQL_MODE,'')" } ) } }, + // Complex queries involving 'SELECT' and surrounding parenthesis should be parsed properly + { "SET sql_mode=(SELECT CONCAT(@@sql_mode, ',PIPES_AS_CONCAT,NO_ENGINE_SUBSTITUTION'))", { Expected("sql_mode", { "(SELECT CONCAT(@@sql_mode, ',PIPES_AS_CONCAT,NO_ENGINE_SUBSTITUTION'))" } ) } }, + { "SET sql_mode=(SELECT CONCAT(@@sql_mode, ',PIPES_AS_CONCAT,NO_ENGINE_SUBSTITUTION')), time_zone = '+00:00', NAMES utf8mb4 COLLATE utf8mb4_unicode_ci", + { + Expected("sql_mode", { "(SELECT CONCAT(@@sql_mode, ',PIPES_AS_CONCAT,NO_ENGINE_SUBSTITUTION'))" } ), + Expected("time_zone", { "+00:00" } ), + Expected("names", {"utf8mb4", "utf8mb4_unicode_ci"} ) + } + }, + // Empty set of 'sql_mode' should result into an empty value + { "SET sql_mode=''", { Expected("sql_mode", { "" } ) } }, + // Invalid 'non-matching' versions of 'sql_mode' should result into 'non-matching' + { "SET sql_mode=(SELECT CONCA(@@sql_mode, ',PIPES_AS_CONCAT,NO_ENGINE_SUBSTITUTION'))", {} }, +// { "SET sql_mode=(SELECT CONCAT(@sql_mode, ',PIPES_AS_CONCAT,NO_ENGINE_SUBSTITUTION'))", {} }, // parse1v2 SHOULD process it + { "SET sql_mode=(SELECT CONCAT(@@sql_mode, ',PIPES_AS_CONCAT[,NO_ENGINE_SUBSTITUTION'))", {} }, + { "SET sql_mode=(SELCT CONCAT(@@sql_mode, ',PIPES_AS_CONCAT[,NO_ENGINE_SUBSTITUTION'))", {} } +}; + +void TestParse(const Test* tests, int ntests, const std::string& title) { + for (int i = 0; i < ntests; i++) { + std::map> data; + for(auto it = std::begin(tests[i].results); it != std::end(tests[i].results); ++it) { + data[it->var] = it->values; + } + + SetParser parser(tests[i].query, 1); + //std::map> result = parser.parse1(); + std::map> result = parser.parse1v2(); + + cout << endl; + printMap("result", result); + cout << endl; + printMap("expected", data); + cout << endl; + + CHECK_EQ(result.size(), data.size()); + ok(result.size() == data.size() , "Sizes match: %lu, %lu" , result.size() , data.size()); + CHECK(std::equal(std::begin(result), std::end(result), std::begin(data))); + ok(std::equal(std::begin(result), std::end(result), std::begin(data)) == true, "Elements match"); + } +} + +/* +TEST(TestParse, SET_SQL_MODE) { + TestParse(sql_mode, arraysize(sql_mode), "sql_mode"); +} +*/ +static Test time_zone[] = { + { "SET @@time_zone = 'Europe/Paris'", { Expected("time_zone", {"Europe/Paris"}) } }, + { "SET @@time_zone = '+00:00'", { Expected("time_zone", {"+00:00"}) } }, + { "SET @@time_zone = \"Europe/Paris\"", { Expected("time_zone", {"Europe/Paris"}) } }, + { "SET @@time_zone = \"+00:00\"", { Expected("time_zone", {"+00:00"}) } }, + { "SET @@time_zone = @OLD_TIME_ZONE", { Expected("time_zone", {"@OLD_TIME_ZONE"}) } }, + { "SET @@TIME_ZONE = @OLD_TIME_ZONE", { Expected("time_zone", {"@OLD_TIME_ZONE"}) } }, +}; + +/* +TEST(TestParse, SET_TIME_ZONE) { + TestParse(time_zone, arraysize(time_zone), "time_zone"); +} +*/ + +static Test session_track_gtids[] = { + { "SET @@session_track_gtids = OFF", { Expected("session_track_gtids", {"OFF"}) } }, + { "SET @@session_track_gtids = OWN_GTID", { Expected("session_track_gtids", {"OWN_GTID"}) } }, + { "SET @@SESSION.session_track_gtids = OWN_GTID", { Expected("session_track_gtids", {"OWN_GTID"}) } }, + { "SET @@LOCAL.session_track_gtids = OWN_GTID", { Expected("session_track_gtids", {"OWN_GTID"}) } }, + { "SET SESSION session_track_gtids = OWN_GTID", { Expected("session_track_gtids", {"OWN_GTID"}) } }, + { "SET @@session_track_gtids = ALL_GTIDS", { Expected("session_track_gtids", {"ALL_GTIDS"}) } }, + { "SET @@SESSION.session_track_gtids = ALL_GTIDS", { Expected("session_track_gtids", {"ALL_GTIDS"}) } }, + { "SET @@LOCAL.session_track_gtids = ALL_GTIDS", { Expected("session_track_gtids", {"ALL_GTIDS"}) } }, + { "SET SESSION session_track_gtids = ALL_GTIDS", { Expected("session_track_gtids", {"ALL_GTIDS"}) } }, +}; + +/* +TEST(TestParse, SET_SESSION_TRACK_GTIDS) { + TestParse(session_track_gtids, arraysize(session_track_gtids), "session_track_gtids"); +} +*/ + +static Test character_set_results[] = { + { "SET @@character_set_results = utf8", { Expected("character_set_results", {"utf8"}) } }, + { "SET @@character_set_results = NULL", { Expected("character_set_results", {"NULL"}) } }, + { "SET character_set_results = NULL", { Expected("character_set_results", {"NULL"}) } }, + { "SET @@session.character_set_results = NULL", { Expected("character_set_results", {"NULL"}) } }, + { "SET @@local.character_set_results = NULL", { Expected("character_set_results", {"NULL"}) } }, + { "SET session character_set_results = NULL", { Expected("character_set_results", {"NULL"}) } }, +}; + +static Test names[] = { + { "SET NAMES utf8", { Expected("names", {"utf8"}) } }, + { "SET NAMES 'utf8'", { Expected("names", {"utf8"}) } }, + { "SET NAMES \"utf8\"", { Expected("names", {"utf8"}) } }, + { "SET NAMES utf8 COLLATE unicode_ci", { Expected("names", {"utf8", "unicode_ci"}) } }, +}; + +static Test various[] = { + { "SET @@SESSION.SQL_SELECT_LIMIT= DEFAULT", { Expected("sql_select_limit", {"DEFAULT"}) } }, + { "SET @@LOCAL.SQL_SELECT_LIMIT= DEFAULT", { Expected("sql_select_limit", {"DEFAULT"}) } }, + { "SET @@SQL_SELECT_LIMIT= DEFAULT", { Expected("sql_select_limit", {"DEFAULT"}) } }, + { "SET SESSION SQL_SELECT_LIMIT = DEFAULT", { Expected("sql_select_limit", {"DEFAULT"}) } }, + { "SET @@SESSION.SQL_SELECT_LIMIT= 1234", { Expected("sql_select_limit", {"1234"}) } }, + { "SET @@LOCAL.SQL_SELECT_LIMIT= 1234", { Expected("sql_select_limit", {"1234"}) } }, + { "SET @@SQL_SELECT_LIMIT= 1234", { Expected("sql_select_limit", {"1234"}) } }, + { "SET SESSION SQL_SELECT_LIMIT = 1234", { Expected("sql_select_limit", {"1234"}) } }, + { "SET @@SESSION.SQL_SELECT_LIMIT= 1234", { Expected("sql_select_limit", {"1234"}) } }, + { "SET @@LOCAL.SQL_SELECT_LIMIT= 1234", { Expected("sql_select_limit", {"1234"}) } }, + { "SET @@SESSION.SQL_SELECT_LIMIT= @old_sql_select_limit", { Expected("sql_select_limit", {"@old_sql_select_limit"}) } }, + { "SET @@LOCAL.SQL_SELECT_LIMIT= @old_sql_select_limit", { Expected("sql_select_limit", {"@old_sql_select_limit"}) } }, + { "SET SQL_SELECT_LIMIT= @old_sql_select_limit", { Expected("sql_select_limit", {"@old_sql_select_limit"}) } }, + { "SET @@SESSION.sql_auto_is_null = 0", { Expected("sql_auto_is_null", {"0"}) } }, + { "SET @@LOCAL.sql_auto_is_null = 0", { Expected("sql_auto_is_null", {"0"}) } }, + { "SET SESSION sql_auto_is_null = 1", { Expected("sql_auto_is_null", {"1"}) } }, + { "SET sql_auto_is_null = OFF", { Expected("sql_auto_is_null", {"OFF"}) } }, + { "SET @@sql_auto_is_null = ON", { Expected("sql_auto_is_null", {"ON"}) } }, + { "SET @@SESSION.sql_safe_updates = 0", { Expected("sql_safe_updates", {"0"}) } }, + { "SET @@LOCAL.sql_safe_updates = 0", { Expected("sql_safe_updates", {"0"}) } }, + { "SET SESSION sql_safe_updates = 1", { Expected("sql_safe_updates", {"1"}) } }, + { "SET SQL_SAFE_UPDATES = OFF", { Expected("sql_safe_updates", {"OFF"}) } }, + { "SET @@sql_safe_updates = ON", { Expected("sql_safe_updates", {"ON"}) } }, +}; + +static Test multiple[] = { + { "SET time_zone = 'Europe/Paris', sql_mode = 'TRADITIONAL'", { Expected("time_zone", {"Europe/Paris"}), Expected("sql_mode", {"TRADITIONAL"}) } }, + { "SET time_zone = 'Europe/Paris', sql_mode = IFNULL(NULL,\"STRICT_TRANS_TABLES\")", { Expected("time_zone", {"Europe/Paris"}), Expected("sql_mode", {"IFNULL(NULL,\"STRICT_TRANS_TABLES\")"}) } }, + //{ "SET sql_mode = 'TRADITIONAL', NAMES 'utf8 COLLATE 'unicode_ci'", { Expected("sql_mode", {"TRADITIONAL"}), Expected("names", {"utf8", "unicode_ci"}) } }, // FIXME: this should return an error + { "SET sql_mode = 'TRADITIONAL', NAMES 'utf8' COLLATE 'unicode_ci'", { Expected("sql_mode", {"TRADITIONAL"}), Expected("names", {"utf8", "unicode_ci"}) } }, + { "SET @@SESSION.sql_mode = CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO'), @@SESSION.sql_auto_is_null = 0, @@SESSION.wait_timeout = 2147483", + { Expected("sql_mode", {"CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO')"}), Expected("sql_auto_is_null", {"0"}), + Expected("wait_timeout", {"2147483"}) } }, + { "SET @@LOCAL.sql_mode = CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO'), @@SESSION.sql_auto_is_null = 0, @@SESSION.wait_timeout = 2147483", + { Expected("sql_mode", {"CONCAT(CONCAT(@@sql_mode, ',STRICT_ALL_TABLES'), ',NO_AUTO_VALUE_ON_ZERO')"}), Expected("sql_auto_is_null", {"0"}), + Expected("wait_timeout", {"2147483"}) } }, + { "set autocommit=1, sql_mode = concat(@@sql_mode,',STRICT_TRANS_TABLES')", { Expected("autocommit", {"1"}), Expected("sql_mode", {"concat(@@sql_mode,',STRICT_TRANS_TABLES')"}) } }, + { "SET NAMES utf8, @@SESSION.sql_mode = CONCAT(REPLACE(REPLACE(REPLACE(@@sql_mode, 'STRICT_TRANS_TABLES', ''), 'STRICT_ALL_TABLES', ''), 'TRADITIONAL', ''), ',NO_AUTO_VALUE_ON_ZERO'), @@SESSION.sql_auto_is_null = 0, @@SESSION.wait_timeout = 3600", + { Expected("names", {"utf8"}), Expected("sql_mode", {"CONCAT(REPLACE(REPLACE(REPLACE(@@sql_mode, 'STRICT_TRANS_TABLES', ''), 'STRICT_ALL_TABLES', ''), 'TRADITIONAL', ''), ',NO_AUTO_VALUE_ON_ZERO')"}), Expected("sql_auto_is_null", {"0"}), + Expected("wait_timeout", {"3600"}) } }, + { "SET NAMES utf8, @@LOCAL.sql_mode = CONCAT(REPLACE(REPLACE(REPLACE(@@sql_mode, 'STRICT_TRANS_TABLES', ''), 'STRICT_ALL_TABLES', ''), 'TRADITIONAL', ''), ',NO_AUTO_VALUE_ON_ZERO'), @@LOCAL.sql_auto_is_null = 0, @@LOCAL.wait_timeout = 3600", + { Expected("names", {"utf8"}), Expected("sql_mode", {"CONCAT(REPLACE(REPLACE(REPLACE(@@sql_mode, 'STRICT_TRANS_TABLES', ''), 'STRICT_ALL_TABLES', ''), 'TRADITIONAL', ''), ',NO_AUTO_VALUE_ON_ZERO')"}), Expected("sql_auto_is_null", {"0"}), + Expected("wait_timeout", {"3600"}) } }, + { "set autocommit=1, session_track_schema=1, sql_mode = concat(@@sql_mode,',STRICT_TRANS_TABLES'), @@SESSION.net_write_timeout=7200", { Expected("autocommit", {"1"}), Expected("session_track_schema", {"1"}), Expected("sql_mode", {"concat(@@sql_mode,',STRICT_TRANS_TABLES')"}), + Expected("net_write_timeout", {"7200"}) } }, + { "set autocommit=1, session_track_schema=1, sql_mode = concat(@@sql_mode,',STRICT_TRANS_TABLES'), @@LOCAL.net_write_timeout=7200", { Expected("autocommit", {"1"}), Expected("session_track_schema", {"1"}), Expected("sql_mode", {"concat(@@sql_mode,',STRICT_TRANS_TABLES')"}), + Expected("net_write_timeout", {"7200"}) } }, + // Mutiple set queries involving 'NULL' values should be properly parsed with and without spaces + { "set character_set_results=null, names latin7, character_set_client='utf8mb4'", + { + Expected("character_set_results", { "null" } ), + Expected("names", { "latin7" } ), + Expected("character_set_client", { "utf8mb4" } ), + } + }, + { "SET character_set_results=NULL, NAMES latin7, character_set_client='utf8mb4'", + { + Expected("character_set_results", { "NULL" } ), + Expected("names", { "latin7" } ), + Expected("character_set_client", { "utf8mb4" } ), + } + }, + { "set character_set_results=null,names latin7,character_set_client='utf8mb4'", + { + Expected("character_set_results", { "null" } ), + Expected("names", { "latin7" } ), + Expected("character_set_client", { "utf8mb4" } ), + } + }, + { "SET character_set_results=NULL,NAMES latin7,character_set_client='utf8mb4'", + { + Expected("character_set_results", { "NULL" } ), + Expected("names", { "latin7" } ), + Expected("character_set_client", { "utf8mb4" } ), + } + }, +}; + + +int main(int argc, char** argv) { + CommandLine cl; + + if(cl.getEnv()) + return exit_status(); + + unsigned int p = 0; + p += arraysize(sql_mode); + p += arraysize(time_zone); + p += arraysize(session_track_gtids); + p += arraysize(character_set_results); + p += arraysize(names); + p += arraysize(various); + p += arraysize(multiple); + p *= 2; + plan(p); + TestParse(sql_mode, arraysize(sql_mode), "sql_mode"); + TestParse(time_zone, arraysize(time_zone), "time_zone"); + TestParse(session_track_gtids, arraysize(session_track_gtids), "session_track_gtids"); + TestParse(character_set_results, arraysize(character_set_results), "character_set_results"); + TestParse(names, arraysize(names), "names"); + TestParse(various, arraysize(various), "various"); + TestParse(multiple, arraysize(multiple), "multiple"); + return exit_status(); +}