Make SetParser reusable

Added set_query() to change the query.
parse1v2() is now able to only once:
- generate the pattern
- compile the regex
pull/4274/head
René Cannaò 3 years ago
parent b167adc5af
commit dcd4a4f72f

@ -4,8 +4,17 @@
#include <map>
#include <vector>
#include "re2/re2.h"
#include "re2/regexp.h"
class SetParser {
private:
// parse1v2 variables used for compile the RE only once
bool parse1v2_init;
re2::RE2::Options * parse1v2_opt2;
re2::RE2 * parse1v2_re;
std::string parse1v2_pattern;
std::string query;
#ifdef PARSERDEBUG
int verbosity;
@ -15,10 +24,24 @@ class SetParser {
public:
SetParser(std::string q);
#endif
// set_query() allows to change the query associated to a SetParser.
// This allow to parse multiple queries using just a single SetParser.
// At the moment this makes sense only when using parse1v2() because it
// allows to compile the regular expression only once
void set_query(const std::string& q);
// First implementation of the general parser
// It uses a single complex RE pattern that is hardcoded
std::map<std::string, std::vector<std::string>> parse1();
// Second implementation of the general parser .
// It uses a RE pattern that is built at runtime .
// The final pattern used by parse1v2() is a lot longer than the one used by parse1()
// making it very difficult to read, but the code generating it should be clear
std::map<std::string, std::vector<std::string>> parse1v2();
void generateRE_parse1v2();
// First implemenation of the parser for TRANSACTION ISOLATION LEVEL and TRANSACTION READ/WRITE
std::map<std::string, std::vector<std::string>> parse2();
std::string parse_character_set();
~SetParser();
};

@ -1,6 +1,4 @@
#include "set_parser.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "gen_utils.h"
#include <string>
#include <vector>
@ -31,6 +29,18 @@ SetParser::SetParser(std::string nq, int verb) {
#else
SetParser::SetParser(std::string nq) {
#endif
parse1v2_init = false;
set_query(nq);
}
SetParser::~SetParser() {
if (parse1v2_init == true) {
delete parse1v2_opt2;
delete parse1v2_re;
}
}
void SetParser::set_query(const std::string& nq) {
int query_no_space_length = nq.length();
char *query_no_space=(char *)malloc(query_no_space_length+1);
memcpy(query_no_space,nq.c_str(),query_no_space_length);
@ -40,6 +50,7 @@ SetParser::SetParser(std::string nq) {
free(query_no_space);
}
#define QUOTES "(?:'|\"|`)?"
#define SPACES " *"
#define NAMES "(NAMES)"
@ -119,14 +130,10 @@ VALGRIND_ENABLE_ERROR_REPORTING;
#define VAR_VALUE_P1 "(" VAR_VALUE_P1_1 VAR_VALUE_P1_2 VAR_VALUE_P1_3 VAR_VALUE_P1_4 VAR_VALUE_P1_5 VAR_VALUE_P1_6 ")"
*/
std::map<std::string,std::vector<std::string>> SetParser::parse1v2() {
void SetParser::generateRE_parse1v2() {
vector<string> quote_symbol = {"\"", "'", "`"};
vector<string> var_patterns = {};
{
// this block needs to be added at the very beginning, otherwise REPLACE|IFNULL|CONCAT may be considered simple words
// sw0 matches:
@ -243,14 +250,11 @@ std::map<std::string,std::vector<std::string>> SetParser::parse1v2() {
proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str());
re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet);
opt2->set_case_sensitive(false);
opt2->set_longest_match(false);
parse1v2_opt2 = new re2::RE2::Options(RE2::Quiet);
parse1v2_opt2->set_case_sensitive(false);
parse1v2_opt2->set_longest_match(false);
re2::RE2 re0("^\\s*SET\\s+", *opt2);
re2::RE2::Replace(&query, re0, "");
std::map<std::string,std::vector<std::string>> result = {};
string var_1_0 = "(?:@\\w+|\\w+)"; // @name|name
@ -272,7 +276,6 @@ std::map<std::string,std::vector<std::string>> SetParser::parse1v2() {
#ifdef PARSERDEBUG
if (verbosity > 0) {
cout << query << endl;
cout << var_value << endl;
cout << name_value << endl;
}
@ -300,12 +303,27 @@ VALGRIND_DISABLE_ERROR_REPORTING;
cout << pattern << endl;
}
#endif
re2::RE2 re(pattern, *opt2);
//re2::RE2 re(pattern, *opt2);
parse1v2_pattern = pattern;
parse1v2_re = new re2::RE2(parse1v2_pattern, *parse1v2_opt2);
parse1v2_init = true;
}
std::map<std::string,std::vector<std::string>> SetParser::parse1v2() {
std::map<std::string,std::vector<std::string>> result = {};
if (parse1v2_init == false) {
generateRE_parse1v2();
}
re2::RE2 re0("^\\s*SET\\s+", *parse1v2_opt2);
re2::RE2::Replace(&query, re0, "");
VALGRIND_ENABLE_ERROR_REPORTING;
std::string var;
std::string value1, value2, value3, value4, value5;
re2::StringPiece input(query);
while (re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4, &value5)) {
while (re2::RE2::Consume(&input, *parse1v2_re, &value1, &value2, &value3, &value4, &value5)) {
// FIXME: verify if we reached end of query. Did we parse everything?
std::vector<std::string> op;
#ifdef DEBUG
@ -337,7 +355,7 @@ VALGRIND_ENABLE_ERROR_REPORTING;
result[key] = op;
}
delete opt2;
//delete opt2;
return result;
}

@ -6,6 +6,8 @@
#include "setparser_test_common.h"
SetParser *parser = NULL;
void TestParse(const Test* tests, int ntests, const std::string& title) {
for (int i = 0; i < ntests; i++) {
std::map<std::string, std::vector<std::string>> data;
@ -13,9 +15,12 @@ void TestParse(const Test* tests, int ntests, const std::string& title) {
data[it->var] = it->values;
}
SetParser parser(tests[i].query, 1);
//SetParser parser(tests[i].query, 1);
//std::map<std::string, std::vector<std::string>> result = parser.parse1();
std::map<std::string, std::vector<std::string>> result = parser.parse1v2();
//std::map<std::string, std::vector<std::string>> result = parser.parse1v2();
parser->set_query(tests[i].query);
std::map<std::string, std::vector<std::string>> result = parser->parse1v2();
cout << endl;
printMap("result", result);
@ -43,6 +48,7 @@ int main(int argc, char** argv) {
p += arraysize(Set1_v2);
p *= 2;
plan(p);
parser = new SetParser("", 1);
TestParse(sql_mode, arraysize(sql_mode), "sql_mode");
TestParse(time_zone, arraysize(time_zone), "time_zone");
TestParse(session_track_gtids, arraysize(session_track_gtids), "session_track_gtids");

Loading…
Cancel
Save