You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
proxysql/lib/PgSQL_Set_Stmt_Parser.cpp

271 lines
8.7 KiB

#include "PgSQL_Set_Stmt_Parser.h"
#include "gen_utils.h"
#include <string>
#include <vector>
#include <map>
#include <cassert>
#include <utility> // for std::pair
//#ifdef PARSERDEBUG
#include <iostream>
//#endif
#ifdef DEBUG
//#define VALGRIND_ENABLE_ERROR_REPORTING
//#define VALGRIND_DISABLE_ERROR_REPORTING
#include "valgrind.h"
#else
#define VALGRIND_ENABLE_ERROR_REPORTING
#define VALGRIND_DISABLE_ERROR_REPORTING
#endif // DEBUG
using namespace std;
#define MULTI_STATEMENTS_USE "Unable to parse multi-statements command with USE statement"
static void remove_quotes(string& v) {
if (v.length() > 2) {
char firstChar = v[0];
char lastChar = v[v.length()-1];
if (firstChar == lastChar) {
if (firstChar == '\'' || firstChar == '"' || firstChar == '`') {
v.erase(v.length()-1, 1);
v.erase(0, 1);
}
}
}
}
#ifdef PARSERDEBUG
PgSQL_Set_Stmt_Parser::PgSQL_Set_Stmt_Parser(std::string nq, int verb) {
verbosity = verb;
#else
PgSQL_Set_Stmt_Parser::PgSQL_Set_Stmt_Parser(std::string nq) {
#endif
parse1v2_init = false;
set_query(nq);
}
PgSQL_Set_Stmt_Parser::~PgSQL_Set_Stmt_Parser() {
if (parse1v2_init == true) {
delete parse1v2_opt2;
delete parse1v2_re;
}
}
void PgSQL_Set_Stmt_Parser::set_query(const std::string& nq) {
int query_no_space_length = nq.length();
char *query_no_space=(char *)malloc(query_no_space_length+1);
memcpy(query_no_space,nq.c_str(),query_no_space_length);
query_no_space[query_no_space_length]='\0';
query_no_space_length=remove_spaces(query_no_space);
query = std::string(query_no_space);
free(query_no_space);
}
void PgSQL_Set_Stmt_Parser::generateRE_parse1v2() {
#ifdef DEBUG
proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str());
#endif // DEBUG
//const std::string pattern = "(?:(?P<scope>SESSION|LOCAL)\\s+)?(?:(?P<parameter>[^=\\s][^=;]*?)\\s*(?:=|TO)\\s*(?P<value>[^;]+)|(?P<parameter_kw>TIME\\s+ZONE|NAMES|SCHEMA|AUTHORIZATION|TRANSACTION\\s+ISOLATION\\s+LEVEL|CHARACTERISTICS\\s+AS\\s+TRANSACTION\\s+ISOLATION\\s+LEVEL)\\s+(?P<value_kw>[^;]+))\\s*;?\\s*";
// Function Call: Check if Group 3 is populated.
// Literal: Check if Group 4 is populated.
//const std::string pattern = "(?:(SESSION|LOCAL)\\s+)?((?:\\S+(?:\\s+\\S+)*?))(?:\\s+(?:TO|=)\\s+|\\s+)(?:(\\w+\\s*\\([^)]*\\))|((?:'(?:''|[^'])*'|-?\\d+(?:\\.\\d+)?(?:[eE][+-]?\\d+)?|t|true|f|false|on|off|default|\\S+)))\\s*;?";
//const std::string pattern = "(?:(SESSION|LOCAL)\\s+)?((?:\\S+(?:\\s+\\S+)*?))(?:\\s*(?:TO|=)\\s*|\\s+)(?:(\\w+\\s*\\([^)]*\\))|((?:'(?:''|[^'])*'|-?\\d+(?:\\.\\d+)?(?:[eE][+-]?\\d+)?|true|t|1|yes|false|f|0|no|on|off|default|\\S+)))\\s*;?";
const std::string pattern = R"((?:(SESSION)\s+)?((?:\S+(?:\s+\S+)*?))(?:\s*(?:TO|=)\s*|\s+)(?:(\w+\s*\([^)]*\))|((?:'(?:''|[^'])*'|-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|[^;]+)))\s*;?)";
#ifdef DEBUG
VALGRIND_DISABLE_ERROR_REPORTING;
#endif // DEBUG
#ifdef PARSERDEBUG
if (verbosity > 0) {
cout << pattern << endl;
}
#endif
parse1v2_opt2 = new re2::RE2::Options(RE2::Quiet);
parse1v2_opt2->set_case_sensitive(false);
parse1v2_opt2->set_longest_match(false);
parse1v2_pattern = pattern;
parse1v2_re = new re2::RE2(parse1v2_pattern, *parse1v2_opt2);
parse1v2_init = true;
}
std::map<std::string,std::vector<std::string>> PgSQL_Set_Stmt_Parser::parse1v2() {
std::map<std::string,std::vector<std::string>> result = {};
if (parse1v2_init == false) {
generateRE_parse1v2();
}
re2::RE2 re0("^\\s*SET\\s+", *parse1v2_opt2);
re2::RE2::Replace(&query, re0, "");
re2::RE2 re1("(\\s|;)+$", *parse1v2_opt2); // remove trailing spaces and semicolon
re2::RE2::Replace(&query, re1, "");
#ifdef DEBUG
VALGRIND_ENABLE_ERROR_REPORTING;
#endif // DEBUG
std::string var;
std::string scope, param_name, param_val, param_val_func;
re2::StringPiece input(query);
while (re2::RE2::Consume(&input, *parse1v2_re, &scope, &param_name, &param_val_func, &param_val)) {
// FIXME: verify if we reached end of query. Did we parse everything?
std::vector<std::string> op;
#ifdef DEBUG
proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: scope='%s' , parameter name='%s' , parameter value='%s' parameter_value_func='%s'\n", scope.c_str(), param_name.c_str(), param_val.c_str(), param_val_func.c_str());
#endif // DEBUG
std::string key;
if (param_val_func.empty() == false) return {};
if (param_name.empty() || param_val.empty()) {
continue;
}
key = param_name;
remove_quotes(key);
size_t pos = param_val.find_last_not_of(" \n\r\t,");
if (pos != param_val.npos) {
param_val.erase(pos+1);
}
if (param_val == "''" || param_val == "\"\"") {
op.push_back("");
} else {
remove_quotes(param_val);
op.push_back(param_val);
}
std::transform(key.begin(), key.end(), key.begin(), ::tolower);
result[key] = op;
}
if (input.size() != 0) {
#ifdef PARSERDEBUG
if (verbosity > 0) {
cout << "Failed to parse: " << input << endl;
}
#endif
result = {};
}
return result;
}
std::map<std::string,std::vector<std::string>> PgSQL_Set_Stmt_Parser::parse2() {
#ifdef DEBUG
proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str());
#endif // DEBUG
re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet);
opt2->set_case_sensitive(false);
opt2->set_longest_match(false);
re2::RE2 re0("^\\s*SET\\s+", *opt2);
re2::RE2::Replace(&query, re0, "");
std::map<std::string,std::vector<std::string>> result;
// Regex used:
// SET(?: +)(|SESSION +)TRANSACTION(?: +)(?:(?:(ISOLATION(?: +)LEVEL)(?: +)(REPEATABLE(?: +)READ|READ(?: +)COMMITTED|READ(?: +)UNCOMMITTED|SERIALIZABLE))|(?:(READ)(?: +)(WRITE|ONLY)))
const std::string pattern="(|SESSION) *TRANSACTION(?: +)(?:(?:(ISOLATION(?: +)LEVEL)(?: +)(REPEATABLE(?: +)READ|READ(?: +)COMMITTED|READ(?: +)UNCOMMITTED|SERIALIZABLE))|(?:(READ)(?: +)(WRITE|ONLY)))";
re2::RE2 re(pattern, *opt2);
std::string var;
std::string value1, value2, value3, value4, value5;
re2::StringPiece input(query);
while (re2::RE2::Consume(&input, re, &value1, &value2, &value3, &value4, &value5)) {
std::vector<std::string> op;
#ifdef DEBUG
proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "SET parsing: v1='%s' , v2='%s' , v3='%s' , v4='%s' , v5='%s'\n", value1.c_str(), value2.c_str(), value3.c_str(), value4.c_str(), value5.c_str());
#endif // DEBUG
std::string key;
//if (value1 != "") { // session is specified
if (value2 != "") { // isolation level
key = value1 + ":" + value2;
std::transform(value3.begin(), value3.end(), value3.begin(), ::toupper);
op.push_back(value3);
} else {
key = value1 + ":" + value4;
std::transform(value5.begin(), value5.end(), value5.begin(), ::toupper);
op.push_back(value5);
}
//}
std::transform(key.begin(), key.end(), key.begin(), ::tolower);
result[key] = op;
}
delete opt2;
return result;
}
#if 0
std::string PgSQL_Set_Stmt_Parser::parse_character_set() {
#ifdef DEBUG
proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str());
#endif // DEBUG
re2::RE2::Options *opt2=new re2::RE2::Options(RE2::Quiet);
opt2->set_case_sensitive(false);
opt2->set_longest_match(false);
re2::RE2 re0("^\\s*SET\\s+", *opt2);
re2::RE2::Replace(&query, re0, "");
std::map<std::string,std::vector<std::string>> result;
const std::string pattern = "(client_encoding|names)\\s*(=|TO)\\s*['\"]?([A-Z_0-9]+)['\"]?";
re2::RE2 re(pattern, *opt2);
std::string var;
std::string value1, value2, value3;
re2::StringPiece input(query);
re2::RE2::Consume(&input, re, &value1, &value2, &value3);
delete opt2;
return value3;
}
#endif
std::string PgSQL_Set_Stmt_Parser::remove_comments(const std::string& q) {
std::string result = "";
bool in_multiline_comment = false;
for (size_t i = 0; i < query.size(); ++i) {
char current_char = query[i];
// Check for multiline comment start
if (current_char == '/' && i + 1 < query.size() && query[i + 1] == '*') {
in_multiline_comment = true;
i++; // Skip the '*'
continue;
}
// Check for multiline comment end
if (in_multiline_comment && current_char == '*' && i + 1 < query.size() && query[i + 1] == '/') {
in_multiline_comment = false;
i++; // Skip the '/'
continue;
}
// Skip characters inside multiline comment
if (in_multiline_comment) {
continue;
}
// Check for single-line comments
if (current_char == '#' || (current_char == '-' && i + 1 < query.size() && query[i + 1] == '-')) {
// Skip until the end of the line
while (i < query.size() && query[i] != '\n') {
i++;
}
continue;
}
// Append the character to the result if it's not a comment
result += current_char;
}
return result;
}