Better parsing of USE queries sent with COM_QUERY #4598

pull/4605/head
Rene Cannao 2 years ago
parent 01d1fc9828
commit 95fa15cdf9

@ -42,6 +42,11 @@ class SetParser {
// First implemenation of the parser for TRANSACTION ISOLATION LEVEL and TRANSACTION READ/WRITE
std::map<std::string, std::vector<std::string>> parse2();
std::string parse_character_set();
std::string parse_USE_query();
#ifdef DEBUG
// built-in testing
void test_parse_USE_query();
#endif // DEBUG
~SetParser();
};

@ -6138,30 +6138,27 @@ void MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C
if (session_type == PROXYSQL_SESSION_MYSQL) {
__sync_fetch_and_add(&MyHGM->status.frontend_use_db, 1);
string nq=string((char *)pkt->ptr+sizeof(mysql_hdr)+1,pkt->size-sizeof(mysql_hdr)-1);
RE2::GlobalReplace(&nq,(char *)"(?U)/\\*.*\\*/",(char *)" ");
char *sn_tmp = (char *)nq.c_str();
while (sn_tmp < ( nq.c_str() + nq.length() - 4 ) && *sn_tmp == ' ')
sn_tmp++;
//char *schemaname=strdup(nq.c_str()+4);
char *schemaname=strdup(sn_tmp+3);
char *schemanameptr=trim_spaces_and_quotes_in_place(schemaname);
// handle cases like "USE `schemaname`
if(schemanameptr[0]=='`' && schemanameptr[strlen(schemanameptr)-1]=='`') {
schemanameptr[strlen(schemanameptr)-1]='\0';
schemanameptr++;
}
client_myds->myconn->userinfo->set_schemaname(schemanameptr,strlen(schemanameptr));
free(schemaname);
if (mirror==false) {
SetParser parser(nq);
string schemaname = parser.parse_USE_query();
if (schemaname != "") {
client_myds->myconn->userinfo->set_schemaname((char *)schemaname.c_str(),schemaname.length());
if (mirror==false) {
RequestEnd(NULL);
}
l_free(pkt->size,pkt->ptr);
client_myds->setDSS_STATE_QUERY_SENT_NET();
unsigned int nTrx=NumActiveTransactions();
uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 );
if (autocommit) setStatus |= SERVER_STATUS_AUTOCOMMIT;
client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL);
GloMyLogger->log_audit_entry(PROXYSQL_MYSQL_INITDB, this, NULL);
} else {
l_free(pkt->size,pkt->ptr);
client_myds->setDSS_STATE_QUERY_SENT_NET();
std::string msg = "Unable to parse: " + nq;
client_myds->myprot.generate_pkt_ERR(true,NULL,NULL,client_myds->pkt_sid+1,1148,(char *)"42000", msg.c_str());
RequestEnd(NULL);
}
l_free(pkt->size,pkt->ptr);
client_myds->setDSS_STATE_QUERY_SENT_NET();
unsigned int nTrx=NumActiveTransactions();
uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 );
if (autocommit) setStatus |= SERVER_STATUS_AUTOCOMMIT;
client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL);
GloMyLogger->log_audit_entry(PROXYSQL_MYSQL_INITDB, this, NULL);
client_myds->DSS=STATE_SLEEP;
} else {
l_free(pkt->size,pkt->ptr);

@ -3,10 +3,14 @@
#include <string>
#include <vector>
#include <map>
#include <cassert>
#include <utility> // for std::pair
#ifdef PARSERDEBUG
#include <iostream>
#endif
#include "pcrecpp.h"
using namespace std;
@ -506,3 +510,117 @@ std::string SetParser::parse_character_set() {
return value4;
}
std::string SetParser::parse_USE_query() {
#ifdef DEBUG
proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str());
#endif // DEBUG
std::string pattern = "";
/*
// Step 1: Match for an optional multiline comment at the beginning of the input string.
* `^` : Matches the beginning of the string.
* `\s*` : Matches zero or more whitespace characters (spaces, tabs, newlines).
* `(?: ... )?` : Non-capturing group, made optional by `?`.
* `\/\*.*\*\/` : Matches a multiline C-style comment
* `\s*` : Matches zero or more whitespace characters.
*/
pattern += R"(^\s*(?:\/\*.*\*\/\s*)?)";
/*
// Step 2: This part matches the "USE" keyword followed by zero or more spaces.
* `USE`: Matches the literal string "USE".
* `\s*`: Matches zero or more whitespace characters.
*/
pattern += R"(USE\s*)";
/*
// Step 3: Match the optional backtick, optional space, and the dbname
* `\s+` : Matches one or more whitespace characters.
* `|` : Or operator.
* `(` : Matches a backtick character literally.
* `(`)?` : capturing group. This matches an optional opening backtick.
* `([a-zA-Z0-9_]+)` : Matches one or more alphanumeric characters or underscores (captures the dbname).
* `\1?` : Backreference to the first capturing group (the opening backtick), making the closing backtick optional.
This part handles the following:
* Optional space or backtick : It matches either one or more spaces (`\s+`) or a backtick (`) after "USE". This allows for the optional space when the dbname is wrapped in backticks.
* Optional opening backtick : It matches an optional opening backtick.
* Capturing the dbname : It captures the actual dbname, which can consist of alphanumeric characters and underscores.
* Optional closing backtick : It matches an optional closing backtick, which must match the opening backtick if present.
*/
pattern += R"((`|\s+)?(?:`)?([a-zA-Z0-9_]+)\1?)";
// add an optional space
pattern += R"(\s*)";
/*
// Step 4: Match the optional second comment
* `(?: ... )?` : Non-capturing group, made optional by `?`.
* `\/\*.*\*\/` : Matches a multiline C-style comment .
* `#[^\n]*$` : Matches a single-line comment starting with `#` (until the end of the line).
* `-- [^\n]*$` : Matches a single-line comment starting with `-- ` (until the end of the line).
*/
pattern += R"((?:\/\*.*\*\/|#[^\n]*$|-- [^\n]*$)?)";
/*
// Step 5: Match the end of the line
* `\s*`: Matches zero or more whitespace characters.
* `$` : Matches the end of the string.
*/
pattern += R"(\s*$)";
std::string dbname = "";
std::string opening_quote;
pcrecpp::RE_Options opt;
opt.set_caseless(true);
pcrecpp::RE re(pattern, opt);
std::string sp(query);
re.FullMatch(sp, &opening_quote, &dbname);
return dbname;
}
#ifdef DEBUG
void SetParser::test_parse_USE_query() {
// Define vector of pairs (query, expected dbname)
std::vector<std::pair<std::string, std::string>> testCases = {
{"USE my_database", "my_database"}, // Basic Case
{"USE my_database", "my_database"}, // Basic Case
{"USE my_database ", "my_database"}, // Basic Case
{"/* comment */USE dbname /* comment */", "dbname"}, // With Comments
{"/* comment */ USE dbname", "dbname"}, // With Comments
{"USE dbname /* comment */", "dbname"}, // With Comments
{"/* comment */USE `dbname` /* comment */", "dbname"}, // With backtick
{"/* comment */USE `dbname`/* comment */", "dbname"}, // With backtick
{"/* comment */USE`dbname` /* comment */", "dbname"}, // With backtick
{"/* comment */USE `dbname`/* comment */", "dbname"}, // With backtick
{"/* comment\nmultiline comment */USE dbname /* comment */", "dbname"}, // Multiline Comment
{"/* comment */USE dbname # comment", "dbname"}, // Hash Comment
{"/* comment */USE dbname -- comment", "dbname"}, // Double Dash Comment
{"/* comment */USE dbname # comment", "dbname"}, // Hash Comment
{"/* comment */USE dbname -- comment", "dbname"}, // Double Dash Comment
{"USE dbname # comment", "dbname"}, // Hash Comment
{"USE dbname -- comment", "dbname"}, // Double Dash Comment
{"SELECT * FROM my_table", ""}, // No match
};
// Run tests for each pair
for (const auto& p : testCases) {
set_query(p.first);
std::string dbname = parse_USE_query();
if (dbname != p.second) {
// we call parse_USE_query() again just to make it easier to create a breakpoint
std::string s = parse_USE_query();
assert(s == p.second);
}
}
}
#endif // DEBUG

@ -1976,6 +1976,13 @@ int main(int argc, const char * argv[]) {
// std::cerr << "Main init phase0 completed in ";
#endif
}
#ifdef DEBUG
{
// Automated testing
SetParser parser("");
parser.test_parse_USE_query();
}
#endif // DEBUG
{
cpu_timer t;
ProxySQL_Main_process_global_variables(argc, argv);

Loading…
Cancel
Save