From 2436df4b2442713ccacbe7217e199806c0dd943f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Sat, 25 Apr 2020 12:44:35 +0200 Subject: [PATCH 01/10] Added support for FRs 2660, 2661, 2662 and 2663 --- lib/c_tokenizer.c | 66 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/lib/c_tokenizer.c b/lib/c_tokenizer.c index fdf4f81b3..8ace1dc43 100644 --- a/lib/c_tokenizer.c +++ b/lib/c_tokenizer.c @@ -180,6 +180,21 @@ static char is_digit_string(char *f, char *t) return 1; } +inline char is_arithmetic_op(char op) { + if (op == '+') { + return 1; + } else if (op == '-') { + return 1; + } else if (op == '*') { + return 1; + } else if (op == '/') { + return 1; + } else if (op == '%') { + return 1; + } else { + return 0; + } +} char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comment, char *buf){ int i = 0; @@ -247,11 +262,11 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme flag = 3; } - else if (*s == '-') { - if (prev_char != '-' && i!=(len-1) && ((*(s+1)=='-'))) { + else if (*s == '-' && ((*(s+1)=='-'))) { + if (prev_char != '-' && i!=(len-1)) { flag = 3; } - else if (i==0 && ((*(s+1)=='-'))) { + else if (i==0) { flag = 3; } } @@ -306,6 +321,29 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme } } } + { + char* p = p_r - 2; + // supress spaces before aritmetic operators + if (p >= r && is_space_char(prev_char) && is_arithmetic_op(*s)) { + if (*p == '?') { + prev_char = *s; + --p_r; + *p_r++ = *s; + s++; + i++; + continue; + } + } + // supress spaces before and after commas + if (p >= r && is_space_char(prev_char) && ((*s == ',') || (*p == ','))) { + prev_char = ','; + --p_r; + *p_r++ = *s; + s++; + i++; + continue; + } + } if (replace_null) { if (*s == 'n' || *s == 'N') { // we search for NULL , #2171 if (i && is_token_char(prev_char)) { @@ -515,11 +553,24 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme p_r--; } } + if ( _p >= r && is_space_char(*(_p + 2))) { + if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',')) { + p_r--; + } + } *p_r++ = '?'; i++; continue; } + // is float + if (*s == '.' || *s == 'e' || ((*s == '+' || *s == '-') && prev_char == 'e')) { + prev_char = *s; + i++; + s++; + continue; + } + // token char or last char if(is_token_char(*s) || len == i+1) { @@ -528,6 +579,7 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme char *_p = p_r_t; _p-=3; p_r = p_r_t; + // remove symbol and keep parenthesis or comma if ( _p >= r && ( *(_p+2) == '-' || *(_p+2) == '+') ) { if ( ( *(_p+1) == ',' ) || ( *(_p+1) == '(' ) || @@ -536,6 +588,12 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme p_r--; } } + // Remove spaces before number + if ( _p >= r && is_space_char(*(_p + 2))) { + if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',')) { + p_r--; + } + } *p_r++ = '?'; if(len == i+1) { @@ -544,8 +602,6 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme i++; continue; } - - } flag = 0; } From 21650254838a2724b32ddee3587823aff647245b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Sat, 25 Apr 2020 12:50:40 +0200 Subject: [PATCH 02/10] Added unit test file for tokenizer functions --- test/tap/tests/Makefile | 9 ++- test/tap/tests/test_tokenizer-t.cpp | 98 +++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 test/tap/tests/test_tokenizer-t.cpp diff --git a/test/tap/tests/Makefile b/test/tap/tests/Makefile index 43124fcde..692a5bdfd 100644 --- a/test/tap/tests/Makefile +++ b/test/tap/tests/Makefile @@ -75,13 +75,13 @@ all: tests .PHONY: clean clean: - rm -f basic-t set_character_set-t charset_unsigned_int-t select_config_file-t sqlite3-t galera_1_timeout_count galera_2_timeout_no_count aurora test_set_character_results-t test_ps_large_result-t set_testing-t test_firewall-t || true + rm -f basic-t set_character_set-t charset_unsigned_int-t select_config_file-t sqlite3-t galera_1_timeout_count galera_2_timeout_no_count aurora test_set_character_results-t test_ps_large_result-t set_testing-t test_firewall-t test_tokenizer-t|| true OPT=-O2 -SRC=basic-t.cpp set_character_set-t.cpp charset_unsigned_int-t.cpp select_config_file-t.cpp sqlite3-t.cpp galera_1_timeout_count.cpp galera_2_timeout_no_count.cpp aurora.cpp test_set_character_results-t.cpp test_ps_large_result-t.cpp set_testing-t.cpp test_firewall-t.cpp +SRC=basic-t.cpp set_character_set-t.cpp charset_unsigned_int-t.cpp select_config_file-t.cpp sqlite3-t.cpp galera_1_timeout_count.cpp galera_2_timeout_no_count.cpp aurora.cpp test_set_character_results-t.cpp test_ps_large_result-t.cpp set_testing-t.cpp test_firewall-t.cpp test_tokenizer-t.cpp -tests: basic-t set_character_set-t charset_unsigned_int-t select_config_file-t sqlite3-t test_set_character_results-t test_ps_large_result-t set_testing-t test_firewall-t +tests: basic-t set_character_set-t charset_unsigned_int-t select_config_file-t sqlite3-t test_set_character_results-t test_ps_large_result-t set_testing-t test_firewall-t test_tokenizer-t testgalera: galera_1_timeout_count galera_2_timeout_no_count testaurora: aurora @@ -120,3 +120,6 @@ aurora: aurora.cpp $(TAP_LIBDIR)/libtap.a set_testing-t: set_testing-t.cpp $(TAP_LIBDIR)/libtap.a g++ set_testing-t.cpp -Wall $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 $(MYLIBS) -ltap -pthread -O0 -ggdb -ldl -lssl -lcrypto -o set_testing-t + +test_tokenizer-t: test_tokenizer-t.cpp $(TAP_LIBDIR)/libtap.a + g++ test_tokenizer-t.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 $(MYLIBS) -lproxysql -ltap -Wl,--no-as-needed -ldl -lpthread -o test_tokenizer-t diff --git a/test/tap/tests/test_tokenizer-t.cpp b/test/tap/tests/test_tokenizer-t.cpp new file mode 100644 index 000000000..2fb2a87f2 --- /dev/null +++ b/test/tap/tests/test_tokenizer-t.cpp @@ -0,0 +1,98 @@ +#include +#include +#include + +#include "proxysql.h" + +#include "tap.h" +#include "command_line.h" + +#include "utils.h" + +#include + +__thread int mysql_thread___query_digests_max_query_length = 65000; +__thread bool mysql_thread___query_digests_lowercase = false; +__thread bool mysql_thread___query_digests_replace_null = false; +__thread bool mysql_thread___query_digests_no_digits = false; + +const std::vector queries { + // floats + "select 1.1", + "select 1192.1102", + "select 99.1929", + // exponentials + "select 1.1e9", + "select 1.1e+9", + "select 1.1e-9", + // operators + "select 1 +1", + "select 1+ 1", + "select 1- 1", + "select 1 -1", + "select 1* 1", + "select 1 *1", + "select 1/ 1", + "select 1 /1", + "select 1% 1", + "select 1 %1", + // operators and commas + "select 1+ 1, 1 -1, 1 * 1 , 1/1 , 100 % 3", + "SELECT * FROM t t1, t t2 ,t t3,t t4 LIMIT 0", + // strings + "select * from t where t = \"foo\"", + "select \"1+ 1, 1 -1, 1 * 1 , 1/1 , 100 % 3\"", + // not modified + "select * fromt t" +}; + +const std::vector exp_results { + // floats + "select ?", + "select ?", + "select ?", + // exponentials + "select ?", + "select ?", + "select ?", + // operators + "select ?+?", + "select ?+?", + "select ?-?", + "select ?-?", + "select ?*?", + "select ?*?", + "select ?/?", + "select ?/?", + "select ?%?", + "select ?%?", + // operators and commas + "select ?+?,?-?,?*?,?/?,?%?", + "SELECT * FROM t t1,t t2,t t3,t t4 LIMIT ?", + // strings + "select * from t where t = ?", + "select ?", + // not modified + "select * fromt t" +}; + +int main(int argc, char** argv) { + if (queries.size() != exp_results.size()) { + ok(0, "queries and exp_results sizes doesn't match"); + return exit_status(); + } + + char buf[QUERY_DIGEST_BUF]; + + for (size_t i = 0; i < queries.size(); i++) { + const auto& query = queries[i]; + const auto& exp_res = exp_results[i]; + + char* c_res = mysql_query_digest_and_first_comment(const_cast(query.c_str()), query.length(), NULL, buf); + std::string result(c_res); + + ok(result == exp_res, "result isn't equal to exp result: '%s' != '%s'", result.c_str(), exp_res.c_str()); + } + + return exit_status(); +} \ No newline at end of file From 5f9fa3ec72cc3a97e2bab9337388ab132de28f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Sat, 25 Apr 2020 20:27:12 +0200 Subject: [PATCH 03/10] Adding static in is_arithmetic_op --- lib/c_tokenizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/c_tokenizer.c b/lib/c_tokenizer.c index 8ace1dc43..2053c5fba 100644 --- a/lib/c_tokenizer.c +++ b/lib/c_tokenizer.c @@ -180,7 +180,7 @@ static char is_digit_string(char *f, char *t) return 1; } -inline char is_arithmetic_op(char op) { +static inline char is_arithmetic_op(char op) { if (op == '+') { return 1; } else if (op == '-') { From 65e55a4b3545a9b48dfeae88791dc7eb52a01963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Mon, 27 Apr 2020 12:56:00 +0200 Subject: [PATCH 04/10] Added len checks to avoid possible buffer overflows --- lib/c_tokenizer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/c_tokenizer.c b/lib/c_tokenizer.c index 2053c5fba..2f5b8b497 100644 --- a/lib/c_tokenizer.c +++ b/lib/c_tokenizer.c @@ -246,7 +246,7 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme { ccl=0; flag = 1; - if (*(s+1)=='!') + if (i != (len-1) && *(s+1)=='!') cmd=1; } @@ -257,13 +257,13 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme } // comment type 3 - start with '--' - else if(prev_char == '-' && *s == '-' && ((*(s+1)==' ') || (*(s+1)=='\n') || (*(s+1)=='\r') || (*(s+1)=='\t') )) + else if(i != (len-1) && prev_char == '-' && *s == '-' && ((*(s+1)==' ') || (*(s+1)=='\n') || (*(s+1)=='\r') || (*(s+1)=='\t') )) { flag = 3; } - else if (*s == '-' && ((*(s+1)=='-'))) { - if (prev_char != '-' && i!=(len-1)) { + else if (i != (len-1) && *s == '-' && (*(s+1)=='-')) { + if (prev_char != '-') { flag = 3; } else if (i==0) { From f82f239808b2db715d063dd45b6b3b7f464a0dd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Thu, 30 Apr 2020 16:03:38 +0200 Subject: [PATCH 05/10] Added support for grouping queries in 'stats_mysql_query_digest' --- include/MySQL_Thread.h | 1 + include/proxysql_structs.h | 2 ++ lib/MySQL_Thread.cpp | 17 +++++++++++++ lib/c_tokenizer.c | 52 +++++++++++++++++++++++++++++++++++--- 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index f5d38b8b7..b82d193e7 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -316,6 +316,7 @@ class MySQL_Threads_Handler bool query_digests_no_digits; bool query_digests_normalize_digest_text; bool query_digests_track_hostname; + int query_digests_grouping_limit; bool default_reconnect; bool have_compress; bool have_ssl; diff --git a/include/proxysql_structs.h b/include/proxysql_structs.h index 97f8e1e1c..ffbbc41d0 100644 --- a/include/proxysql_structs.h +++ b/include/proxysql_structs.h @@ -758,6 +758,7 @@ __thread bool mysql_thread___sessions_sort; __thread bool mysql_thread___kill_backend_connection_when_disconnect; __thread bool mysql_thread___client_session_track_gtid; __thread char * mysql_thread___default_variables[SQL_NAME_LAST]; +__thread int mysql_thread___query_digests_grouping_limit; /* variables used for Query Cache */ __thread int mysql_thread___query_cache_size_MB; @@ -903,6 +904,7 @@ extern __thread bool mysql_thread___sessions_sort; extern __thread bool mysql_thread___kill_backend_connection_when_disconnect; extern __thread bool mysql_thread___client_session_track_gtid; extern __thread char * mysql_thread___default_variables[SQL_NAME_LAST]; +extern __thread int mysql_thread___query_digests_grouping_limit; /* variables used for Query Cache */ extern __thread int mysql_thread___query_cache_size_MB; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 60b6a1fe8..db368d277 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -426,6 +426,7 @@ static char * mysql_thread_variables_names[]= { (char *)"threshold_resultset_size", (char *)"query_digests_max_digest_length", (char *)"query_digests_max_query_length", + (char *)"query_digests_grouping_limit", (char *)"wait_timeout", (char *)"throttle_max_bytes_per_second_to_client", (char *)"throttle_ratio_server_to_client", @@ -649,6 +650,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() { #ifdef DEBUG variables.session_debug=true; #endif /*debug */ + variables.query_digests_grouping_limit = 3; // status variables status_variables.mirror_sessions_current=0; __global_MySQL_Thread_Variables_version=1; @@ -987,6 +989,7 @@ int MySQL_Threads_Handler::get_variable_int(const char *name) { if (!strcmp(name,"query_digests_normalize_digest_text")) return (int)variables.query_digests_normalize_digest_text; if (!strcmp(name,"query_digests_replace_null")) return (int)variables.query_digests_replace_null; if (!strcmp(name,"query_digests_track_hostname")) return (int)variables.query_digests_track_hostname; + if (!strcmp(name,"query_digests_grouping_limit")) return (int)variables.query_digests_grouping_limit; } if (name[6]=='p') { if (!strcmp(name,"query_processor_iterations")) return (int)variables.query_processor_iterations; @@ -1379,6 +1382,10 @@ char * MySQL_Threads_Handler::get_variable(char *name) { // this is the public f sprintf(intbuf,"%d",variables.query_digests_max_query_length); return strdup(intbuf); } + if (!strcasecmp(name,"query_digests_grouping_limit")) { + sprintf(intbuf,"%d",variables.query_digests_grouping_limit); + return strdup(intbuf); + } if (!strcasecmp(name,"wait_timeout")) { sprintf(intbuf,"%d",variables.wait_timeout); return strdup(intbuf); @@ -1964,6 +1971,15 @@ bool MySQL_Threads_Handler::set_variable(char *name, const char *value) { // thi return false; } } + if (!strcasecmp(name,"query_digests_grouping_limit")) { + int intv=atoi(value); + if (intv >= 1 && intv <= 2089) { + variables.query_digests_grouping_limit=intv; + return true; + } else { + return false; + } + } if (!strcasecmp(name,"wait_timeout")) { int intv=atoi(value); if (intv >= 0 && intv <= 20*24*3600*1000) { @@ -4478,6 +4494,7 @@ void MySQL_Thread::refresh_variables() { mysql_thread___query_digests_no_digits=(bool)GloMTH->get_variable_int((char *)"query_digests_no_digits"); mysql_thread___query_digests_normalize_digest_text=(bool)GloMTH->get_variable_int((char *)"query_digests_normalize_digest_text"); mysql_thread___query_digests_track_hostname=(bool)GloMTH->get_variable_int((char *)"query_digests_track_hostname"); + mysql_thread___query_digests_grouping_limit=(int)GloMTH->get_variable_int((char *)"query_digests_grouping_limit"); variables.min_num_servers_lantency_awareness=GloMTH->get_variable_int((char *)"min_num_servers_lantency_awareness"); variables.aurora_max_lag_ms_only_read_from_replicas=GloMTH->get_variable_int((char *)"aurora_max_lag_ms_only_read_from_replicas"); variables.stats_time_backend_query=(bool)GloMTH->get_variable_int((char *)"stats_time_backend_query"); diff --git a/lib/c_tokenizer.c b/lib/c_tokenizer.c index 2f5b8b497..a00de3597 100644 --- a/lib/c_tokenizer.c +++ b/lib/c_tokenizer.c @@ -14,6 +14,7 @@ extern __thread int mysql_thread___query_digests_max_query_length; extern __thread bool mysql_thread___query_digests_lowercase; extern __thread bool mysql_thread___query_digests_replace_null; extern __thread bool mysql_thread___query_digests_no_digits; +extern __thread bool mysql_thread___query_digests_grouping_limit; void tokenizer(tokenizer_t *result, const char* s, const char* delimiters, int empties ) { @@ -227,6 +228,12 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme bool lowercase=0; bool replace_null=0; bool replace_number=0; + + char grouping_digest=0; + char grouping_limit_exceeded=0; + int grouping_count=0; + int grouping_lim = mysql_thread___query_digests_grouping_limit; + lowercase=mysql_thread___query_digests_lowercase; replace_null = mysql_thread___query_digests_replace_null; replace_number = mysql_thread___query_digests_no_digits; @@ -343,6 +350,15 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme i++; continue; } + // supress spaces before closing brakets + if (p >= r && (*s == ')')) { + prev_char = *s; + --p_r; + *p_r++ = *s; + s++; + i++; + continue; + } } if (replace_null) { if (*s == 'n' || *s == 'N') { // we search for NULL , #2171 @@ -588,13 +604,31 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme p_r--; } } + // check if we are in a grouping candidate query_digest + if (*(_p + 2) == '(' || *_p == '(') { + grouping_digest = 1; + } // Remove spaces before number if ( _p >= r && is_space_char(*(_p + 2))) { - if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',')) { + // A point can be found prior to a number in case of query grouping + if ( _p >= r && ( *(_p+1) == '-' || *(_p+1) == '+' || *(_p+1) == '*' || *(_p+1) == '/' || *(_p+1) == '%' || *(_p+1) == ',' || *(_p+1) == '.')) { p_r--; } } - *p_r++ = '?'; + if (grouping_count < grouping_lim) { + *p_r++ = '?'; + if (grouping_digest) { + grouping_count++; + } + } else { + if (!grouping_limit_exceeded) { + *p_r++ = '.'; + *p_r++ = '.'; + *p_r++ = '.'; + + grouping_limit_exceeded=1; + } + } if(len == i+1) { if(is_token_char(*s)) @@ -612,10 +646,20 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme // COPY CHAR // ================================================= // convert every space char to ' ' + if (*s != ',' && !is_digit_char(*s) && !is_space_char(*s)) { + grouping_digest = 0; + grouping_count = 0; + grouping_limit_exceeded = 0; + } + if (lowercase==0) { - *p_r++ = !is_space_char(*s) ? *s : ' '; + if (!grouping_digest || !grouping_limit_exceeded || !(*s == ',')) { + *p_r++ = !is_space_char(*s) ? *s : ' '; + } } else { - *p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' '; + if (!grouping_digest || !grouping_limit_exceeded || !(*s == ',')) { + *p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' '; + } } prev_char = *s++; From 123ae1374151a4b77ba432e80ca702476ea9df66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Thu, 30 Apr 2020 16:04:17 +0200 Subject: [PATCH 06/10] Added tests for new grouping feature for 'stats_mysql_query_digest' --- test/tap/tests/test_tokenizer-t.cpp | 49 +++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/test/tap/tests/test_tokenizer-t.cpp b/test/tap/tests/test_tokenizer-t.cpp index 2fb2a87f2..8d039e1e0 100644 --- a/test/tap/tests/test_tokenizer-t.cpp +++ b/test/tap/tests/test_tokenizer-t.cpp @@ -15,6 +15,7 @@ __thread int mysql_thread___query_digests_max_query_length = 65000; __thread bool mysql_thread___query_digests_lowercase = false; __thread bool mysql_thread___query_digests_replace_null = false; __thread bool mysql_thread___query_digests_no_digits = false; +__thread int mysql_thread___query_digests_grouping_limit = 3; const std::vector queries { // floats @@ -43,7 +44,12 @@ const std::vector queries { "select * from t where t = \"foo\"", "select \"1+ 1, 1 -1, 1 * 1 , 1/1 , 100 % 3\"", // not modified - "select * fromt t" + "select * fromt t", + // test query_digest reduction + "SELECT * FROM tablename WHERE id IN (1,2,3,4,5,6,7,8,9,10)", + "SELECT * FROM tablename WHERE id IN (1,2,3,4)", + // invalid request grouping + "SELECT * tablename where id IN (1,2,3,4,5,6,7,8, AND j in (1,2,3,4,5,6 and k=1" }; const std::vector exp_results { @@ -73,7 +79,34 @@ const std::vector exp_results { "select * from t where t = ?", "select ?", // not modified - "select * fromt t" + "select * fromt t", + // test query_digest reduction + "SELECT * FROM tablename WHERE id IN (?,?,?,...)", + "SELECT * FROM tablename WHERE id IN (?,?,?,...)", + // invalid request grouping + "SELECT * tablename where id IN (?,?,?,... AND j in (?,?,?,... and k=?" +}; + +const std::vector queries_grouping { + // test query_digest reduction + "SELECT * FROM tablename WHERE id IN (1,2,3,4,5,6,7,8,9,10)", + "SELECT * FROM tablename WHERE id IN (1,2,3,4)", + // invalid request grouping + "SELECT * tablename where id IN (1,2,3,4,5,6,7,8, AND j in (1,2,3,4,5,6 and k=1", + "SELECT (1.1, 1, 2) FROM db.table", + "SELECT (1.1, 1.12934, 21.32 ) FROM db.table2", + "SELECT (1.1, 1.12934, 21.32, 39123 ) FROM db.table2", +}; + +const std::vector exp_queries_grouping { + // test query_digest reduction + "SELECT * FROM tablename WHERE id IN (?,?,?,...)", + "SELECT * FROM tablename WHERE id IN (?,?,?,...)", + // invalid request grouping + "SELECT * tablename where id IN (?,?,?,... AND j in (?,?,?,... and k=?", + "SELECT (?,?,?) FROM db.table", + "SELECT (?,?,?) FROM db.table2", + "SELECT (?,?,?,...) FROM db.table2", }; int main(int argc, char** argv) { @@ -91,7 +124,17 @@ int main(int argc, char** argv) { char* c_res = mysql_query_digest_and_first_comment(const_cast(query.c_str()), query.length(), NULL, buf); std::string result(c_res); - ok(result == exp_res, "result isn't equal to exp result: '%s' != '%s'", result.c_str(), exp_res.c_str()); + ok(result == exp_res, "Digest should be equal to exp result: '%s' == '%s'", result.c_str(), exp_res.c_str()); + } + + for (size_t i = 0; i < queries_grouping.size(); i++) { + const auto& query = queries_grouping[i]; + const auto& exp_res = exp_queries_grouping[i]; + + char* c_res = mysql_query_digest_and_first_comment(const_cast(query.c_str()), query.length(), NULL, buf); + std::string result(c_res); + + ok(result == exp_res, "Grouping digest should be equal to exp result: '%s' == '%s'", result.c_str(), exp_res.c_str()); } return exit_status(); From 10c789e90948da565ebef887eb33f1c1f527b6c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 8 May 2020 11:07:17 +0200 Subject: [PATCH 07/10] Added GITVERSION flag for new test file building --- test/tap/tests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tap/tests/Makefile b/test/tap/tests/Makefile index 833a3deb4..840068bb5 100644 --- a/test/tap/tests/Makefile +++ b/test/tap/tests/Makefile @@ -100,4 +100,4 @@ aurora: aurora.cpp $(TAP_LIBDIR)/libtap.a g++ -DTEST_AURORA -DDEBUG aurora.cpp ../tap/SQLite3_Server.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 $(OBJ) $(MYLIBS) -ltap -ldl $(STATIC_LIBS) -o aurora -DGITVERSION=\"$(GIT_VERSION)\" test_tokenizer-t: test_tokenizer-t.cpp $(TAP_LIBDIR)/libtap.a - g++ test_tokenizer-t.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 $(MYLIBS) -lproxysql -ltap -Wl,--no-as-needed -ldl -lpthread -o test_tokenizer-t + g++ test_tokenizer-t.cpp $(INCLUDEDIRS) $(LDIRS) $(OPT) -std=c++11 $(MYLIBS) -lproxysql -ltap -Wl,--no-as-needed -ldl -lpthread -o test_tokenizer-t -DGITVERSION=\"$(GIT_VERSION)\" From 24e300cf56677e4a91e954dbbf9b1c9d8bd69059 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 8 May 2020 11:08:25 +0200 Subject: [PATCH 08/10] Added new line at the end of file --- test/tap/tests/test_tokenizer-t.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tap/tests/test_tokenizer-t.cpp b/test/tap/tests/test_tokenizer-t.cpp index 8d039e1e0..61f11224a 100644 --- a/test/tap/tests/test_tokenizer-t.cpp +++ b/test/tap/tests/test_tokenizer-t.cpp @@ -138,4 +138,4 @@ int main(int argc, char** argv) { } return exit_status(); -} \ No newline at end of file +} From aa132d200d66c644daff61e6a4e9ccfedbc30dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 8 May 2020 11:11:55 +0200 Subject: [PATCH 09/10] Improved tokenizer tests with new cases and variable grouping size --- test/tap/tests/test_tokenizer-t.cpp | 76 +++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 19 deletions(-) diff --git a/test/tap/tests/test_tokenizer-t.cpp b/test/tap/tests/test_tokenizer-t.cpp index 61f11224a..39b3bc40f 100644 --- a/test/tap/tests/test_tokenizer-t.cpp +++ b/test/tap/tests/test_tokenizer-t.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include "proxysql.h" @@ -89,26 +91,58 @@ const std::vector exp_results { const std::vector queries_grouping { // test query_digest reduction - "SELECT * FROM tablename WHERE id IN (1,2,3,4,5,6,7,8,9,10)", - "SELECT * FROM tablename WHERE id IN (1,2,3,4)", + "SELECT * FROM tablename WHERE id IN (1,2, 3,4 ,5 ,6,7,8,9,10)", // invalid request grouping - "SELECT * tablename where id IN (1,2,3,4,5,6,7,8, AND j in (1,2,3,4,5,6 and k=1", - "SELECT (1.1, 1, 2) FROM db.table", - "SELECT (1.1, 1.12934, 21.32 ) FROM db.table2", - "SELECT (1.1, 1.12934, 21.32, 39123 ) FROM db.table2", + "SELECT * tablename where id IN (1,2,3,4,5 , 6,7,8, AND j in (1, 2,3, 4 ,5,6,7,8,9 and k=1", + "SELECT (1.1, 1, 2, 13, 4.81, 12) FROM db.table", + "SELECT (1.1, 1.12934 , 21.32 , 91, 91, 12.93 ) FROM db.table2", + "SELECT (1.1, 1.12934 , 21.32 , 91.2 , 91, 12 ) FROM db.table7", + "SELECT (1.1, 1.12934, 21.32, 391,2381,28.493,1283 ) FROM db.table2", + "SELECT (1.1, 1.12934, 21.32 , 91, 91, 12.1 ) FROM db.table3" }; const std::vector exp_queries_grouping { // test query_digest reduction - "SELECT * FROM tablename WHERE id IN (?,?,?,...)", - "SELECT * FROM tablename WHERE id IN (?,?,?,...)", + "SELECT * FROM tablename WHERE id IN (?,...)", // invalid request grouping - "SELECT * tablename where id IN (?,?,?,... AND j in (?,?,?,... and k=?", - "SELECT (?,?,?) FROM db.table", - "SELECT (?,?,?) FROM db.table2", - "SELECT (?,?,?,...) FROM db.table2", + "SELECT * tablename where id IN (?,... AND j in (?,... and k=?", + "SELECT (?,...) FROM db.table", + "SELECT (?,...) FROM db.table2", + "SELECT (?,...) FROM db.table7", + "SELECT (?,...) FROM db.table2", + "SELECT (?,...) FROM db.table3" }; +std::string replace_str(const std::string& str, const std::string& match, const std::string& repl) { + if(match.empty()) { + return str; + } + + std::string result = str; + size_t start_pos = 0; + + while((start_pos = result.find(match, start_pos)) != std::string::npos) { + result.replace(start_pos, match.length(), repl); + start_pos += repl.length(); + } + + return result; +} + +std::string increase_mark_num(const std::string query, uint32_t num) { + std::string result = query; + std::string marks = ""; + + for (uint32_t i = 0; i < num - 1; i++) { + marks += "?,"; + } + marks += "?,..."; + + result = replace_str(result, "?,...", marks); + + return result; +} + int main(int argc, char** argv) { if (queries.size() != exp_results.size()) { ok(0, "queries and exp_results sizes doesn't match"); @@ -128,13 +162,17 @@ int main(int argc, char** argv) { } for (size_t i = 0; i < queries_grouping.size(); i++) { - const auto& query = queries_grouping[i]; - const auto& exp_res = exp_queries_grouping[i]; - - char* c_res = mysql_query_digest_and_first_comment(const_cast(query.c_str()), query.length(), NULL, buf); - std::string result(c_res); - - ok(result == exp_res, "Grouping digest should be equal to exp result: '%s' == '%s'", result.c_str(), exp_res.c_str()); + for (int j = 1; j <= 5; j++) { + mysql_thread___query_digests_grouping_limit = j; + + const auto& query = queries_grouping[i]; + const auto& exp_res = increase_mark_num(exp_queries_grouping[i], j); + + char* c_res = mysql_query_digest_and_first_comment(const_cast(query.c_str()), query.length(), NULL, buf); + std::string result(c_res); + + ok(result == exp_res, "Grouping digest should be equal to exp result: '%s' == '%s'", result.c_str(), exp_res.c_str()); + } } return exit_status(); From 06ef68023a9024e74ae0c14d90231492a0a48778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 8 May 2020 11:16:01 +0200 Subject: [PATCH 10/10] Avoid copying extra commas in case of parsing a 'query grouping' --- lib/c_tokenizer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/c_tokenizer.c b/lib/c_tokenizer.c index a00de3597..55ff8172a 100644 --- a/lib/c_tokenizer.c +++ b/lib/c_tokenizer.c @@ -343,9 +343,12 @@ char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comme } // supress spaces before and after commas if (p >= r && is_space_char(prev_char) && ((*s == ',') || (*p == ','))) { - prev_char = ','; --p_r; - *p_r++ = *s; + // only copy the comma if we are not grouping a query + if (!grouping_limit_exceeded) { + *p_r++ = *s; + } + prev_char = ','; s++; i++; continue;