diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index 64779b64d..d6ab55b60 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -426,6 +426,7 @@ class MySQL_Threads_Handler bool query_digests_no_digits; bool query_digests_normalize_digest_text; bool query_digests_track_hostname; + int query_digests_grouping_limit; bool default_reconnect; bool have_compress; bool have_ssl; diff --git a/include/proxysql_structs.h b/include/proxysql_structs.h index b2df3b8d0..9013ecec2 100644 --- a/include/proxysql_structs.h +++ b/include/proxysql_structs.h @@ -758,6 +758,7 @@ __thread bool mysql_thread___sessions_sort; __thread bool mysql_thread___kill_backend_connection_when_disconnect; __thread bool mysql_thread___client_session_track_gtid; __thread char * mysql_thread___default_variables[SQL_NAME_LAST]; +__thread int mysql_thread___query_digests_grouping_limit; /* variables used for Query Cache */ __thread int mysql_thread___query_cache_size_MB; @@ -903,6 +904,7 @@ extern __thread bool mysql_thread___sessions_sort; extern __thread bool mysql_thread___kill_backend_connection_when_disconnect; extern __thread bool mysql_thread___client_session_track_gtid; extern __thread char * mysql_thread___default_variables[SQL_NAME_LAST]; +extern __thread int mysql_thread___query_digests_grouping_limit; /* variables used for Query Cache */ extern __thread int mysql_thread___query_cache_size_MB; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 4021811b6..e77a0c612 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -476,6 +476,7 @@ static char * mysql_thread_variables_names[]= { (char *)"threshold_resultset_size", (char *)"query_digests_max_digest_length", (char *)"query_digests_max_query_length", + (char *)"query_digests_grouping_limit", (char *)"wait_timeout", (char *)"throttle_max_bytes_per_second_to_client", (char *)"throttle_ratio_server_to_client", @@ -1121,6 +1122,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() { #ifdef DEBUG variables.session_debug=true; #endif /*debug */ + variables.query_digests_grouping_limit = 3; // status variables status_variables.mirror_sessions_current=0; __global_MySQL_Thread_Variables_version=1; @@ -1463,6 +1465,7 @@ int MySQL_Threads_Handler::get_variable_int(const char *name) { if (!strcmp(name,"query_digests_normalize_digest_text")) return (int)variables.query_digests_normalize_digest_text; if (!strcmp(name,"query_digests_replace_null")) return (int)variables.query_digests_replace_null; if (!strcmp(name,"query_digests_track_hostname")) return (int)variables.query_digests_track_hostname; + if (!strcmp(name,"query_digests_grouping_limit")) return (int)variables.query_digests_grouping_limit; } if (name[6]=='p') { if (!strcmp(name,"query_processor_iterations")) return (int)variables.query_processor_iterations; @@ -1855,6 +1858,10 @@ char * MySQL_Threads_Handler::get_variable(char *name) { // this is the public f sprintf(intbuf,"%d",variables.query_digests_max_query_length); return strdup(intbuf); } + if (!strcasecmp(name,"query_digests_grouping_limit")) { + sprintf(intbuf,"%d",variables.query_digests_grouping_limit); + return strdup(intbuf); + } if (!strcasecmp(name,"wait_timeout")) { sprintf(intbuf,"%d",variables.wait_timeout); return strdup(intbuf); @@ -2440,6 +2447,15 @@ bool MySQL_Threads_Handler::set_variable(char *name, const char *value) { // thi return false; } } + if (!strcasecmp(name,"query_digests_grouping_limit")) { + int intv=atoi(value); + if (intv >= 1 && intv <= 2089) { + variables.query_digests_grouping_limit=intv; + return true; + } else { + return false; + } + } if (!strcasecmp(name,"wait_timeout")) { int intv=atoi(value); if (intv >= 0 && intv <= 20*24*3600*1000) { @@ -4833,6 +4849,7 @@ void MySQL_Thread::refresh_variables() { mysql_thread___query_digests_no_digits=(bool)GloMTH->get_variable_int((char *)"query_digests_no_digits"); mysql_thread___query_digests_normalize_digest_text=(bool)GloMTH->get_variable_int((char *)"query_digests_normalize_digest_text"); mysql_thread___query_digests_track_hostname=(bool)GloMTH->get_variable_int((char *)"query_digests_track_hostname"); + mysql_thread___query_digests_grouping_limit=(int)GloMTH->get_variable_int((char *)"query_digests_grouping_limit"); variables.min_num_servers_lantency_awareness=GloMTH->get_variable_int((char *)"min_num_servers_lantency_awareness"); variables.aurora_max_lag_ms_only_read_from_replicas=GloMTH->get_variable_int((char *)"aurora_max_lag_ms_only_read_from_replicas"); variables.stats_time_backend_query=(bool)GloMTH->get_variable_int((char *)"stats_time_backend_query"); diff --git a/lib/c_tokenizer.c b/lib/c_tokenizer.c index fdf4f81b3..55ff8172a 100644 --- a/lib/c_tokenizer.c +++ b/lib/c_tokenizer.c @@ -14,6 +14,7 @@ extern __thread int mysql_thread___query_digests_max_query_length; extern __thread bool mysql_thread___query_digests_lowercase; extern __thread bool mysql_thread___query_digests_replace_null; extern __thread bool mysql_thread___query_digests_no_digits; +extern __thread bool mysql_thread___query_digests_grouping_limit; void tokenizer(tokenizer_t *result, const char* s, const char* delimiters, int empties ) { @@ -180,6 +181,21 @@ static char is_digit_string(char *f, char *t) return 1; } +static inline char is_arithmetic_op(char op) { + if (op == '+') { + return 1; + } else if (op == '-') { + return 1; + } else if (op == '*') { + return 1; + } else if (op == '/') { + return 1; + } else if (op == '%') { + return 1; + } else { + return 0; + } +} char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comment, char *buf){ int i = 0; @@ -212,6 +228,12 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme bool lowercase=0; bool replace_null=0; bool replace_number=0; + + char grouping_digest=0; + char grouping_limit_exceeded=0; + int grouping_count=0; + int grouping_lim = mysql_thread___query_digests_grouping_limit; + lowercase=mysql_thread___query_digests_lowercase; replace_null = mysql_thread___query_digests_replace_null; replace_number = mysql_thread___query_digests_no_digits; @@ -231,7 +253,7 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme { ccl=0; flag = 1; - if (*(s+1)=='!') + if (i != (len-1) && *(s+1)=='!') cmd=1; } @@ -242,16 +264,16 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme } // comment type 3 - start with '--' - else if(prev_char == '-' && *s == '-' && ((*(s+1)==' ') || (*(s+1)=='\n') || (*(s+1)=='\r') || (*(s+1)=='\t') )) + else if(i != (len-1) && prev_char == '-' && *s == '-' && ((*(s+1)==' ') || (*(s+1)=='\n') || (*(s+1)=='\r') || (*(s+1)=='\t') )) { flag = 3; } - else if (*s == '-') { - if (prev_char != '-' && i!=(len-1) && ((*(s+1)=='-'))) { + else if (i != (len-1) && *s == '-' && (*(s+1)=='-')) { + if (prev_char != '-') { flag = 3; } - else if (i==0 && ((*(s+1)=='-'))) { + else if (i==0) { flag = 3; } } @@ -306,6 +328,41 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme } } } + { + char* p = p_r - 2; + // supress spaces before aritmetic operators + if (p >= r && is_space_char(prev_char) && is_arithmetic_op(*s)) { + if (*p == '?') { + prev_char = *s; + --p_r; + *p_r++ = *s; + s++; + i++; + continue; + } + } + // supress spaces before and after commas + if (p >= r && is_space_char(prev_char) && ((*s == ',') || (*p == ','))) { + --p_r; + // only copy the comma if we are not grouping a query + if (!grouping_limit_exceeded) { + *p_r++ = *s; + } + prev_char = ','; + s++; + i++; + continue; + } + // supress spaces before closing brakets + if (p >= r && (*s == ')')) { + prev_char = *s; + --p_r; + *p_r++ = *s; + s++; + i++; + continue; + } + } if (replace_null) { if (*s == 'n' || *s == 'N') { // we search for NULL , #2171 if (i && is_token_char(prev_char)) { @@ -515,11 +572,24 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme p_r--; } } + if ( _p >= r && is_space_char(*(_p + 2))) { + if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',')) { + p_r--; + } + } *p_r++ = '?'; i++; continue; } + // is float + if (*s == '.' || *s == 'e' || ((*s == '+' || *s == '-') && prev_char == 'e')) { + prev_char = *s; + i++; + s++; + continue; + } + // token char or last char if(is_token_char(*s) || len == i+1) { @@ -528,6 +598,7 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme char *_p = p_r_t; _p-=3; p_r = p_r_t; + // remove symbol and keep parenthesis or comma if ( _p >= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { if ( ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || @@ -536,7 +607,31 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme p_r--; } } - *p_r++ = '?'; + // check if we are in a grouping candidate query_digest + if (*(_p + 2) == '(' || *_p == '(') { + grouping_digest = 1; + } + // Remove spaces before number + if ( _p >= r && is_space_char(*(_p + 2))) { + // A point can be found prior to a number in case of query grouping + if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',' || *(_p+1) == '.')) { + p_r--; + } + } + if (grouping_count < grouping_lim) { + *p_r++ = '?'; + if (grouping_digest) { + grouping_count++; + } + } else { + if (!grouping_limit_exceeded) { + *p_r++ = '.'; + *p_r++ = '.'; + *p_r++ = '.'; + + grouping_limit_exceeded=1; + } + } if(len == i+1) { if(is_token_char(*s)) @@ -544,8 +639,6 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme i++; continue; } - - } flag = 0; } @@ -556,10 +649,20 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme // COPY CHAR // ================================================= // convert every space char to ' ' + if (*s != ',' && !is_digit_char(*s) && !is_space_char(*s)) { + grouping_digest = 0; + grouping_count = 0; + grouping_limit_exceeded = 0; + } + if (lowercase==0) { - *p_r++ = !is_space_char(*s) ? *s : ' '; + if (!grouping_digest || !grouping_limit_exceeded || !(*s == ',')) { + *p_r++ = !is_space_char(*s) ? *s : ' '; + } } else { - *p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' '; + if (!grouping_digest || !grouping_limit_exceeded || !(*s == ',')) { + *p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' '; + } } prev_char = *s++; diff --git a/test/tap/tests/Makefile b/test/tap/tests/Makefile index b2ed2d733..1cab6ed7e 100644 --- a/test/tap/tests/Makefile +++ b/test/tap/tests/Makefile @@ -100,4 +100,6 @@ galera_2_timeout_no_count: galera_2_timeout_no_count.cpp $(TAP_LIBDIR)/libtap.a aurora: aurora.cpp $(TAP_LIBDIR)/libtap.a g++ -DTEST_AURORA -DDEBUG aurora.cpp ../tap/SQLite3_Server.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 $(OBJ) $(MYLIBS) -ltap -ldl $(STATIC_LIBS) -o aurora -DGITVERSION=\"$(GIT_VERSION)\" - \ No newline at end of file + +test_tokenizer-t: test_tokenizer-t.cpp $(TAP_LIBDIR)/libtap.a + g++ test_tokenizer-t.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 $(MYLIBS) -lproxysql -ltap -Wl,--no-as-needed -ldl -lpthread -o test_tokenizer-t -DGITVERSION=\"$(GIT_VERSION)\" diff --git a/test/tap/tests/test_tokenizer-t.cpp b/test/tap/tests/test_tokenizer-t.cpp new file mode 100644 index 000000000..39b3bc40f --- /dev/null +++ b/test/tap/tests/test_tokenizer-t.cpp @@ -0,0 +1,179 @@ +#include +#include +#include +#include +#include + +#include "proxysql.h" + +#include "tap.h" +#include "command_line.h" + +#include "utils.h" + +#include + +__thread int mysql_thread___query_digests_max_query_length = 65000; +__thread bool mysql_thread___query_digests_lowercase = false; +__thread bool mysql_thread___query_digests_replace_null = false; +__thread bool mysql_thread___query_digests_no_digits = false; +__thread int mysql_thread___query_digests_grouping_limit = 3; + +const std::vector queries { + // floats + "select 1.1", + "select 1192.1102", + "select 99.1929", + // exponentials + "select 1.1e9", + "select 1.1e+9", + "select 1.1e-9", + // operators + "select 1 +1", + "select 1+ 1", + "select 1- 1", + "select 1 -1", + "select 1* 1", + "select 1 *1", + "select 1/ 1", + "select 1 /1", + "select 1% 1", + "select 1 %1", + // operators and commas + "select 1+ 1, 1 -1, 1 * 1 , 1/1 , 100 % 3", + "SELECT * FROM t t1, t t2 ,t t3,t t4 LIMIT 0", + // strings + "select * from t where t = \"foo\"", + "select \"1+ 1, 1 -1, 1 * 1 , 1/1 , 100 % 3\"", + // not modified + "select * fromt t", + // test query_digest reduction + "SELECT * FROM tablename WHERE id IN (1,2,3,4,5,6,7,8,9,10)", + "SELECT * FROM tablename WHERE id IN (1,2,3,4)", + // invalid request grouping + "SELECT * tablename where id IN (1,2,3,4,5,6,7,8, AND j in (1,2,3,4,5,6 and k=1" +}; + +const std::vector exp_results { + // floats + "select ?", + "select ?", + "select ?", + // exponentials + "select ?", + "select ?", + "select ?", + // operators + "select ?+?", + "select ?+?", + "select ?-?", + "select ?-?", + "select ?*?", + "select ?*?", + "select ?/?", + "select ?/?", + "select ?%?", + "select ?%?", + // operators and commas + "select ?+?,?-?,?*?,?/?,?%?", + "SELECT * FROM t t1,t t2,t t3,t t4 LIMIT ?", + // strings + "select * from t where t = ?", + "select ?", + // not modified + "select * fromt t", + // test query_digest reduction + "SELECT * FROM tablename WHERE id IN (?,?,?,...)", + "SELECT * FROM tablename WHERE id IN (?,?,?,...)", + // invalid request grouping + "SELECT * tablename where id IN (?,?,?,... AND j in (?,?,?,... and k=?" +}; + +const std::vector queries_grouping { + // test query_digest reduction + "SELECT * FROM tablename WHERE id IN (1,2, 3,4 ,5 ,6,7,8,9,10)", + // invalid request grouping + "SELECT * tablename where id IN (1,2,3,4,5 , 6,7,8, AND j in (1, 2,3, 4 ,5,6,7,8,9 and k=1", + "SELECT (1.1, 1, 2, 13, 4.81, 12) FROM db.table", + "SELECT (1.1, 1.12934 , 21.32 , 91, 91, 12.93 ) FROM db.table2", + "SELECT (1.1, 1.12934 , 21.32 , 91.2 , 91, 12 ) FROM db.table7", + "SELECT (1.1, 1.12934, 21.32, 391,2381,28.493,1283 ) FROM db.table2", + "SELECT (1.1, 1.12934, 21.32 , 91, 91, 12.1 ) FROM db.table3" +}; + +const std::vector exp_queries_grouping { + // test query_digest reduction + "SELECT * FROM tablename WHERE id IN (?,...)", + // invalid request grouping + "SELECT * tablename where id IN (?,... AND j in (?,... and k=?", + "SELECT (?,...) FROM db.table", + "SELECT (?,...) FROM db.table2", + "SELECT (?,...) FROM db.table7", + "SELECT (?,...) FROM db.table2", + "SELECT (?,...) FROM db.table3" +}; + +std::string replace_str(const std::string& str, const std::string& match, const std::string& repl) { + if(match.empty()) { + return str; + } + + std::string result = str; + size_t start_pos = 0; + + while((start_pos = result.find(match, start_pos)) != std::string::npos) { + result.replace(start_pos, match.length(), repl); + start_pos += repl.length(); + } + + return result; +} + +std::string increase_mark_num(const std::string query, uint32_t num) { + std::string result = query; + std::string marks = ""; + + for (uint32_t i = 0; i < num - 1; i++) { + marks += "?,"; + } + marks += "?,..."; + + result = replace_str(result, "?,...", marks); + + return result; +} + +int main(int argc, char** argv) { + if (queries.size() != exp_results.size()) { + ok(0, "queries and exp_results sizes doesn't match"); + return exit_status(); + } + + char buf[QUERY_DIGEST_BUF]; + + for (size_t i = 0; i < queries.size(); i++) { + const auto& query = queries[i]; + const auto& exp_res = exp_results[i]; + + char* c_res = mysql_query_digest_and_first_comment(const_cast(query.c_str()), query.length(), NULL, buf); + std::string result(c_res); + + ok(result == exp_res, "Digest should be equal to exp result: '%s' == '%s'", result.c_str(), exp_res.c_str()); + } + + for (size_t i = 0; i < queries_grouping.size(); i++) { + for (int j = 1; j <= 5; j++) { + mysql_thread___query_digests_grouping_limit = j; + + const auto& query = queries_grouping[i]; + const auto& exp_res = increase_mark_num(exp_queries_grouping[i], j); + + char* c_res = mysql_query_digest_and_first_comment(const_cast(query.c_str()), query.length(), NULL, buf); + std::string result(c_res); + + ok(result == exp_res, "Grouping digest should be equal to exp result: '%s' == '%s'", result.c_str(), exp_res.c_str()); + } + } + + return exit_status(); +}