From 95fa15cdf99e7a984a9b0b6339fa0b3dcbf801df Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Mon, 12 Aug 2024 20:25:22 +0000 Subject: [PATCH] Better parsing of USE queries sent with COM_QUERY #4598 --- include/set_parser.h | 5 ++ lib/MySQL_Session.cpp | 41 +++++++-------- lib/set_parser.cpp | 118 ++++++++++++++++++++++++++++++++++++++++++ src/main.cpp | 7 +++ 4 files changed, 149 insertions(+), 22 deletions(-) diff --git a/include/set_parser.h b/include/set_parser.h index 542186834..ac783f06b 100644 --- a/include/set_parser.h +++ b/include/set_parser.h @@ -42,6 +42,11 @@ class SetParser { // First implemenation of the parser for TRANSACTION ISOLATION LEVEL and TRANSACTION READ/WRITE std::map> parse2(); std::string parse_character_set(); + std::string parse_USE_query(); +#ifdef DEBUG + // built-in testing + void test_parse_USE_query(); +#endif // DEBUG ~SetParser(); }; diff --git a/lib/MySQL_Session.cpp b/lib/MySQL_Session.cpp index b40d92253..42930c9b9 100644 --- a/lib/MySQL_Session.cpp +++ b/lib/MySQL_Session.cpp @@ -6138,30 +6138,27 @@ void MySQL_Session::handler___status_WAITING_CLIENT_DATA___STATE_SLEEP___MYSQL_C if (session_type == PROXYSQL_SESSION_MYSQL) { __sync_fetch_and_add(&MyHGM->status.frontend_use_db, 1); string nq=string((char *)pkt->ptr+sizeof(mysql_hdr)+1,pkt->size-sizeof(mysql_hdr)-1); - RE2::GlobalReplace(&nq,(char *)"(?U)/\\*.*\\*/",(char *)" "); - char *sn_tmp = (char *)nq.c_str(); - while (sn_tmp < ( nq.c_str() + nq.length() - 4 ) && *sn_tmp == ' ') - sn_tmp++; - //char *schemaname=strdup(nq.c_str()+4); - char *schemaname=strdup(sn_tmp+3); - char *schemanameptr=trim_spaces_and_quotes_in_place(schemaname); - // handle cases like "USE `schemaname` - if(schemanameptr[0]=='`' && schemanameptr[strlen(schemanameptr)-1]=='`') { - schemanameptr[strlen(schemanameptr)-1]='\0'; - schemanameptr++; - } - client_myds->myconn->userinfo->set_schemaname(schemanameptr,strlen(schemanameptr)); - free(schemaname); - if (mirror==false) { + SetParser parser(nq); + string schemaname = parser.parse_USE_query(); + if (schemaname != "") { + client_myds->myconn->userinfo->set_schemaname((char *)schemaname.c_str(),schemaname.length()); + if (mirror==false) { + RequestEnd(NULL); + } + l_free(pkt->size,pkt->ptr); + client_myds->setDSS_STATE_QUERY_SENT_NET(); + unsigned int nTrx=NumActiveTransactions(); + uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); + if (autocommit) setStatus |= SERVER_STATUS_AUTOCOMMIT; + client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); + GloMyLogger->log_audit_entry(PROXYSQL_MYSQL_INITDB, this, NULL); + } else { + l_free(pkt->size,pkt->ptr); + client_myds->setDSS_STATE_QUERY_SENT_NET(); + std::string msg = "Unable to parse: " + nq; + client_myds->myprot.generate_pkt_ERR(true,NULL,NULL,client_myds->pkt_sid+1,1148,(char *)"42000", msg.c_str()); RequestEnd(NULL); } - l_free(pkt->size,pkt->ptr); - client_myds->setDSS_STATE_QUERY_SENT_NET(); - unsigned int nTrx=NumActiveTransactions(); - uint16_t setStatus = (nTrx ? SERVER_STATUS_IN_TRANS : 0 ); - if (autocommit) setStatus |= SERVER_STATUS_AUTOCOMMIT; - client_myds->myprot.generate_pkt_OK(true,NULL,NULL,1,0,0,setStatus,0,NULL); - GloMyLogger->log_audit_entry(PROXYSQL_MYSQL_INITDB, this, NULL); client_myds->DSS=STATE_SLEEP; } else { l_free(pkt->size,pkt->ptr); diff --git a/lib/set_parser.cpp b/lib/set_parser.cpp index 521808d64..80370b253 100644 --- a/lib/set_parser.cpp +++ b/lib/set_parser.cpp @@ -3,10 +3,14 @@ #include #include #include +#include +#include // for std::pair #ifdef PARSERDEBUG #include #endif +#include "pcrecpp.h" + using namespace std; @@ -506,3 +510,117 @@ std::string SetParser::parse_character_set() { return value4; } +std::string SetParser::parse_USE_query() { +#ifdef DEBUG + proxy_debug(PROXY_DEBUG_MYSQL_QUERY_PROCESSOR, 4, "Parsing query %s\n", query.c_str()); +#endif // DEBUG + + +std::string pattern = ""; + + + +/* +// Step 1: Match for an optional multiline comment at the beginning of the input string. + * `^` : Matches the beginning of the string. + * `\s*` : Matches zero or more whitespace characters (spaces, tabs, newlines). + * `(?: ... )?` : Non-capturing group, made optional by `?`. + * `\/\*.*\*\/` : Matches a multiline C-style comment + * `\s*` : Matches zero or more whitespace characters. +*/ + pattern += R"(^\s*(?:\/\*.*\*\/\s*)?)"; + +/* +// Step 2: This part matches the "USE" keyword followed by zero or more spaces. + * `USE`: Matches the literal string "USE". + * `\s*`: Matches zero or more whitespace characters. +*/ + pattern += R"(USE\s*)"; + +/* +// Step 3: Match the optional backtick, optional space, and the dbname + * `\s+` : Matches one or more whitespace characters. + * `|` : Or operator. + * `(` : Matches a backtick character literally. + * `(`)?` : capturing group. This matches an optional opening backtick. + * `([a-zA-Z0-9_]+)` : Matches one or more alphanumeric characters or underscores (captures the dbname). + * `\1?` : Backreference to the first capturing group (the opening backtick), making the closing backtick optional. + + This part handles the following: + * Optional space or backtick : It matches either one or more spaces (`\s+`) or a backtick (`) after "USE". This allows for the optional space when the dbname is wrapped in backticks. + * Optional opening backtick : It matches an optional opening backtick. + * Capturing the dbname : It captures the actual dbname, which can consist of alphanumeric characters and underscores. + * Optional closing backtick : It matches an optional closing backtick, which must match the opening backtick if present. +*/ + pattern += R"((`|\s+)?(?:`)?([a-zA-Z0-9_]+)\1?)"; +// add an optional space + pattern += R"(\s*)"; + +/* +// Step 4: Match the optional second comment + * `(?: ... )?` : Non-capturing group, made optional by `?`. + * `\/\*.*\*\/` : Matches a multiline C-style comment . + * `#[^\n]*$` : Matches a single-line comment starting with `#` (until the end of the line). + * `-- [^\n]*$` : Matches a single-line comment starting with `-- ` (until the end of the line). +*/ + pattern += R"((?:\/\*.*\*\/|#[^\n]*$|-- [^\n]*$)?)"; + +/* +// Step 5: Match the end of the line + * `\s*`: Matches zero or more whitespace characters. + * `$` : Matches the end of the string. +*/ + pattern += R"(\s*$)"; + + + std::string dbname = ""; + std::string opening_quote; + + pcrecpp::RE_Options opt; + opt.set_caseless(true); + pcrecpp::RE re(pattern, opt); + std::string sp(query); + re.FullMatch(sp, &opening_quote, &dbname); + + return dbname; +} + + +#ifdef DEBUG +void SetParser::test_parse_USE_query() { + + // Define vector of pairs (query, expected dbname) + std::vector> testCases = { + {"USE my_database", "my_database"}, // Basic Case + {"USE my_database", "my_database"}, // Basic Case + {"USE my_database ", "my_database"}, // Basic Case + {"/* comment */USE dbname /* comment */", "dbname"}, // With Comments + {"/* comment */ USE dbname", "dbname"}, // With Comments + {"USE dbname /* comment */", "dbname"}, // With Comments + {"/* comment */USE `dbname` /* comment */", "dbname"}, // With backtick + {"/* comment */USE `dbname`/* comment */", "dbname"}, // With backtick + {"/* comment */USE`dbname` /* comment */", "dbname"}, // With backtick + {"/* comment */USE `dbname`/* comment */", "dbname"}, // With backtick + {"/* comment\nmultiline comment */USE dbname /* comment */", "dbname"}, // Multiline Comment + {"/* comment */USE dbname # comment", "dbname"}, // Hash Comment + {"/* comment */USE dbname -- comment", "dbname"}, // Double Dash Comment + {"/* comment */USE dbname # comment", "dbname"}, // Hash Comment + {"/* comment */USE dbname -- comment", "dbname"}, // Double Dash Comment + {"USE dbname # comment", "dbname"}, // Hash Comment + {"USE dbname -- comment", "dbname"}, // Double Dash Comment + {"SELECT * FROM my_table", ""}, // No match + }; + + // Run tests for each pair + for (const auto& p : testCases) { + set_query(p.first); + std::string dbname = parse_USE_query(); + if (dbname != p.second) { + // we call parse_USE_query() again just to make it easier to create a breakpoint + std::string s = parse_USE_query(); + assert(s == p.second); + } + } + +} +#endif // DEBUG diff --git a/src/main.cpp b/src/main.cpp index c30a6bd99..d98743a60 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1976,6 +1976,13 @@ int main(int argc, const char * argv[]) { // std::cerr << "Main init phase0 completed in "; #endif } +#ifdef DEBUG + { + // Automated testing + SetParser parser(""); + parser.test_parse_USE_query(); + } +#endif // DEBUG { cpu_timer t; ProxySQL_Main_process_global_variables(argc, argv);