diff --git a/include/set_parser.h b/include/set_parser.h index 3ee7a6643..36391d15a 100644 --- a/include/set_parser.h +++ b/include/set_parser.h @@ -4,8 +4,17 @@ #include #include +#include "re2/re2.h" +#include "re2/regexp.h" + + class SetParser { private: + // parse1v2 variables used for compile the RE only once + bool parse1v2_init; + re2::RE2::Options * parse1v2_opt2; + re2::RE2 * parse1v2_re; + std::string parse1v2_pattern; std::string query; #ifdef PARSERDEBUG int verbosity; @@ -15,10 +24,24 @@ class SetParser { public: SetParser(std::string q); #endif + // set_query() allows to change the query associated to a SetParser. + // This allow to parse multiple queries using just a single SetParser. + // At the moment this makes sense only when using parse1v2() because it + // allows to compile the regular expression only once + void set_query(const std::string& q); + // First implementation of the general parser + // It uses a single complex RE pattern that is hardcoded std::map> parse1(); + // Second implementation of the general parser . + // It uses a RE pattern that is built at runtime . + // The final pattern used by parse1v2() is a lot longer than the one used by parse1() + // making it very difficult to read, but the code generating it should be clear std::map> parse1v2(); + void generateRE_parse1v2(); + // First implemenation of the parser for TRANSACTION ISOLATION LEVEL and TRANSACTION READ/WRITE std::map> parse2(); std::string parse_character_set(); + ~SetParser(); }; diff --git a/lib/set_parser.cpp b/lib/set_parser.cpp index 6416efe09..73e072536 100644 --- a/lib/set_parser.cpp +++ b/lib/set_parser.cpp @@ -1,6 +1,4 @@ #include "set_parser.h" -#include "re2/re2.h" -#include "re2/regexp.h" #include "gen_utils.h" #include #include @@ -31,6 +29,18 @@ SetParser::SetParser(std::string nq, int verb) { #else SetParser::SetParser(std::string nq) { #endif + parse1v2_init = false; + set_query(nq); +} + +SetParser::~SetParser() { + if (parse1v2_init == true) { + delete parse1v2_opt2; + delete parse1v2_re; + } +} + +void SetParser::set_query(const std::string& nq) { int query_no_space_length = nq.length(); char *query_no_space=(char *)malloc(query_no_space_length+1); memcpy(query_no_space,nq.c_str(),query_no_space_length); @@ -40,6 +50,7 @@ SetParser::SetParser(std::string nq) { free(query_no_space); } + #define QUOTES "(?:'|\"|`)?" #define SPACES " *" #define NAMES "(NAMES)" @@ -119,14 +130,10 @@ VALGRIND_ENABLE_ERROR_REPORTING; #define VAR_VALUE_P1 "(" VAR_VALUE_P1_1 VAR_VALUE_P1_2 VAR_VALUE_P1_3 VAR_VALUE_P1_4 VAR_VALUE_P1_5 VAR_VALUE_P1_6 ")" */ - -std::map> SetParser::parse1v2() { - +void SetParser::generateRE_parse1v2() { vector quote_symbol = {"\"", "'", "`"}; - vector var_patterns = {}; - { // this block needs to be added at the very beginning, otherwise REPLACE|IFNULL|CONCAT may be considered simple words // sw0 matches: @@ -243,14 +250,11 @@ std::map> SetParser::parse1v2() { proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); - re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet); - opt2->set_case_sensitive(false); - opt2->set_longest_match(false); + parse1v2_opt2 = new re2::RE2::Options(RE2::Quiet); + parse1v2_opt2->set_case_sensitive(false); + parse1v2_opt2->set_longest_match(false); - re2::RE2 re0("^\\s*SET\\s+", *opt2); - re2::RE2::Replace(&query, re0, ""); - std::map> result = {}; string var_1_0 = "(?:@\\w+|\\w+)"; // @name|name @@ -272,7 +276,6 @@ std::map> SetParser::parse1v2() { #ifdef PARSERDEBUG if (verbosity > 0) { - cout << query << endl; cout << var_value << endl; cout << name_value << endl; } @@ -300,12 +303,27 @@ VALGRIND_DISABLE_ERROR_REPORTING; cout << pattern << endl; } #endif - re2::RE2 re(pattern, *opt2); + //re2::RE2 re(pattern, *opt2); + parse1v2_pattern = pattern; + parse1v2_re = new re2::RE2(parse1v2_pattern, *parse1v2_opt2); + parse1v2_init = true; +} + +std::map> SetParser::parse1v2() { + + std::map> result = {}; + + if (parse1v2_init == false) { + generateRE_parse1v2(); + } + + re2::RE2 re0("^\\s*SET\\s+", *parse1v2_opt2); + re2::RE2::Replace(&query, re0, ""); VALGRIND_ENABLE_ERROR_REPORTING; std::string var; std::string value1, value2, value3, value4, value5; re2::StringPiece input(query); - while (re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4, &value5)) { + while (re2::RE2::Consume(&input, *parse1v2_re, &value1, &value2, &value3, &value4, &value5)) { // FIXME: verify if we reached end of query. Did we parse everything? std::vector op; #ifdef DEBUG @@ -337,7 +355,7 @@ VALGRIND_ENABLE_ERROR_REPORTING; result[key] = op; } - delete opt2; + //delete opt2; return result; } diff --git a/test/tap/tests/setparser_test3.cpp b/test/tap/tests/setparser_test3.cpp index cf649ee53..20ced5f33 100644 --- a/test/tap/tests/setparser_test3.cpp +++ b/test/tap/tests/setparser_test3.cpp @@ -6,6 +6,8 @@ #include "setparser_test_common.h" +SetParser *parser = NULL; + void TestParse(const Test* tests, int ntests, const std::string& title) { for (int i = 0; i < ntests; i++) { std::map> data; @@ -13,9 +15,12 @@ void TestParse(const Test* tests, int ntests, const std::string& title) { data[it->var] = it->values; } - SetParser parser(tests[i].query, 1); + //SetParser parser(tests[i].query, 1); //std::map> result = parser.parse1(); - std::map> result = parser.parse1v2(); + //std::map> result = parser.parse1v2(); + + parser->set_query(tests[i].query); + std::map> result = parser->parse1v2(); cout << endl; printMap("result", result); @@ -43,6 +48,7 @@ int main(int argc, char** argv) { p += arraysize(Set1_v2); p *= 2; plan(p); + parser = new SetParser("", 1); TestParse(sql_mode, arraysize(sql_mode), "sql_mode"); TestParse(time_zone, arraysize(time_zone), "time_zone"); TestParse(session_track_gtids, arraysize(session_track_gtids), "session_track_gtids");