From 35ad2f68c26738c8e452c282dbb3aa9b2a993d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 4 Feb 2022 16:40:32 +0100 Subject: [PATCH] Implement new variable 'mysql-query_digests_keep_comment' --- include/MySQL_Thread.h | 1 + include/proxysql_structs.h | 2 ++ lib/MySQL_Thread.cpp | 4 +++ lib/c_tokenizer.cpp | 58 ++++++++++++++++++++++++++++++-------- 4 files changed, 53 insertions(+), 12 deletions(-) diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h index 008457c3d..0e060ea68 100644 --- a/include/MySQL_Thread.h +++ b/include/MySQL_Thread.h @@ -494,6 +494,7 @@ class MySQL_Threads_Handler bool query_digests_no_digits; bool query_digests_normalize_digest_text; bool query_digests_track_hostname; + bool query_digests_keep_comment; int query_digests_grouping_limit; int query_digests_groups_grouping_limit; bool default_reconnect; diff --git a/include/proxysql_structs.h b/include/proxysql_structs.h index 7780ee96f..22d935e43 100644 --- a/include/proxysql_structs.h +++ b/include/proxysql_structs.h @@ -816,6 +816,7 @@ __thread bool mysql_thread___query_digests_replace_null; __thread bool mysql_thread___query_digests_no_digits; __thread bool mysql_thread___query_digests_normalize_digest_text; __thread bool mysql_thread___query_digests_track_hostname; +__thread bool mysql_thread___query_digests_keep_comment; __thread int mysql_thread___query_digests_max_digest_length; __thread int mysql_thread___query_digests_max_query_length; __thread int mysql_thread___show_processlist_extended; @@ -976,6 +977,7 @@ extern __thread bool mysql_thread___query_digests_no_digits; extern __thread bool mysql_thread___query_digests_replace_null; extern __thread bool mysql_thread___query_digests_normalize_digest_text; extern __thread bool mysql_thread___query_digests_track_hostname; +extern __thread bool mysql_thread___query_digests_keep_comment; extern __thread int mysql_thread___query_digests_max_digest_length; extern __thread int mysql_thread___query_digests_max_query_length; extern __thread int mysql_thread___show_processlist_extended; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 16d2eee5d..bc15192f1 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -539,6 +539,7 @@ static char * mysql_thread_variables_names[]= { (char *)"query_digests_no_digits", (char *)"query_digests_normalize_digest_text", (char *)"query_digests_track_hostname", + (char *)"query_digests_keep_comment", (char *)"servers_stats", (char *)"default_reconnect", #ifdef DEBUG @@ -1156,6 +1157,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() { variables.query_digests_no_digits=false; variables.query_digests_normalize_digest_text=false; variables.query_digests_track_hostname=false; + variables.query_digests_keep_comment=false; variables.connpoll_reset_queue_length = 50; variables.min_num_servers_lantency_awareness = 1000; variables.aurora_max_lag_ms_only_read_from_replicas = 2; @@ -2094,6 +2096,7 @@ char ** MySQL_Threads_Handler::get_variables_list() { VariablesPointers_bool["query_digests_no_digits"] = make_tuple(&variables.query_digests_no_digits, false); VariablesPointers_bool["query_digests_normalize_digest_text"] = make_tuple(&variables.query_digests_normalize_digest_text, false); VariablesPointers_bool["query_digests_track_hostname"] = make_tuple(&variables.query_digests_track_hostname, false); + VariablesPointers_bool["query_digests_keep_comment"] = make_tuple(&variables.query_digests_keep_comment, false); VariablesPointers_bool["servers_stats"] = make_tuple(&variables.servers_stats, false); VariablesPointers_bool["sessions_sort"] = make_tuple(&variables.sessions_sort, false); VariablesPointers_bool["stats_time_backend_query"] = make_tuple(&variables.stats_time_backend_query, false); @@ -4002,6 +4005,7 @@ void MySQL_Thread::refresh_variables() { mysql_thread___query_digests_track_hostname=(bool)GloMTH->get_variable_int((char *)"query_digests_track_hostname"); mysql_thread___query_digests_grouping_limit=(int)GloMTH->get_variable_int((char *)"query_digests_grouping_limit"); mysql_thread___query_digests_groups_grouping_limit=(int)GloMTH->get_variable_int((char *)"query_digests_groups_grouping_limit"); + mysql_thread___query_digests_keep_comment=(bool)GloMTH->get_variable_int((char *)"query_digests_keep_comment"); variables.min_num_servers_lantency_awareness=GloMTH->get_variable_int((char *)"min_num_servers_lantency_awareness"); variables.aurora_max_lag_ms_only_read_from_replicas=GloMTH->get_variable_int((char *)"aurora_max_lag_ms_only_read_from_replicas"); variables.stats_time_backend_query=(bool)GloMTH->get_variable_int((char *)"stats_time_backend_query"); diff --git a/lib/c_tokenizer.cpp b/lib/c_tokenizer.cpp index 96ff51f75..21249d0c0 100644 --- a/lib/c_tokenizer.cpp +++ b/lib/c_tokenizer.cpp @@ -16,6 +16,7 @@ extern __thread bool mysql_thread___query_digests_replace_null; extern __thread bool mysql_thread___query_digests_no_digits; extern __thread bool mysql_thread___query_digests_grouping_limit; extern __thread bool mysql_thread___query_digests_groups_grouping_limit; +extern __thread bool mysql_thread___query_digests_keep_comment; void tokenizer(tokenizer_t *result, const char* s, const char* delimiters, int empties ) { @@ -825,6 +826,7 @@ typedef struct options { bool lowercase; bool replace_null; bool replace_number; + bool keep_comment; int grouping_limit; int groups_grouping_limit; } options; @@ -841,6 +843,7 @@ static inline void get_options(struct options* opts) { opts->replace_number = mysql_thread___query_digests_no_digits; opts->grouping_limit = mysql_thread___query_digests_grouping_limit; opts->groups_grouping_limit = mysql_thread___query_digests_groups_grouping_limit; + opts->keep_comment = mysql_thread___query_digests_keep_comment; } /** @@ -1116,8 +1119,9 @@ char cur_cmd_cmnt[FIRST_COMMENT_MAX_LENGTH]; * - 'st_no_mark_found' if the comment has completed to be parsed. */ static __attribute__((always_inline)) inline -enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, char** fst_cmnt) { +enum p_st process_cmnt_type_1(options* opts, shared_st* shared_st, cmnt_type_1_st* c_t_1_st, char** fst_cmnt) { enum p_st next_st = st_cmnt_type_1; + const char* res_final_pos = shared_st->res_init_pos + shared_st->d_max_len; // initial mark "/*|/*!" detection if (*shared_st->q == '/' && *(shared_st->q+1) == '*') { @@ -1128,6 +1132,14 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch c_t_1_st->is_cmd = 1; } + // copy the initial mark "/*" if comment preserving is enabled + if (opts->keep_comment) { + cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len] = *(shared_st->q); + c_t_1_st->cur_cmd_cmnt_len++; + cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len] = *(shared_st->q + 1); + c_t_1_st->cur_cmd_cmnt_len++; + } + // discard processed "/*" or "/*!" shared_st->q += 2 + c_t_1_st->is_cmd; shared_st->q_cur_pos += 2 + c_t_1_st->is_cmd; @@ -1139,7 +1151,7 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch // { // we are parsing a "/*!" comment - if (c_t_1_st->is_cmd) { + if (c_t_1_st->is_cmd || (c_t_1_st->is_cmd == false && opts->keep_comment)) { // copy the char into 'cur_cmd_cmnt' if (c_t_1_st->cur_cmd_cmnt_len < FIRST_COMMENT_MAX_LENGTH-1) { cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len] = *shared_st->q; @@ -1178,12 +1190,14 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch // comment type 1 - /* .. */ if (shared_st->prev_char == '*' && *shared_st->q == '/') { - if (c_t_1_st->is_cmd) { + if (c_t_1_st->is_cmd || (c_t_1_st->is_cmd == false && opts->keep_comment)) { cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len]=0; if (c_t_1_st->cur_cmd_cmnt_len >= 2) { // we are not interested into copying the final '*/' for the comment - c_t_1_st->cur_cmd_cmnt_len -= 2; + if (opts->keep_comment == false) { + c_t_1_st->cur_cmd_cmnt_len -= 2; + } cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len] = 0; // counter for the lenght of the cmd comment annotation, with format `/*!12345 ... */`. @@ -1208,12 +1222,23 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch // copy the cmd comment minus the annotation and the marks if (end) { // check if the comment to be copied is going to fit in the target buffer - const char* res_final_pos = shared_st->res_init_pos + shared_st->d_max_len - 1; int res_free_space = res_final_pos - shared_st->res_cur_pos; - int comment_size = c_t_1_st->cur_cmd_cmnt_len - cmnt_annot_len; + int comment_size = 0; + + if (opts->keep_comment) { + comment_size = c_t_1_st->cur_cmd_cmnt_len; + } else { + comment_size = c_t_1_st->cur_cmd_cmnt_len - cmnt_annot_len; + } + int copy_length = res_free_space > comment_size ? comment_size : res_free_space; - memcpy(shared_st->res_cur_pos, cur_cmd_cmnt + cmnt_annot_len, copy_length); + if (opts->keep_comment) { + memcpy(shared_st->res_cur_pos, cur_cmd_cmnt, copy_length); + } else { + memcpy(shared_st->res_cur_pos, cur_cmd_cmnt + cmnt_annot_len, copy_length); + } + shared_st->res_cur_pos += copy_length; // TODO: Check if the copy can be prevented as in the outer check for non-cmd comments @@ -1233,11 +1258,14 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch // TODO: Related to previous TODO. Remember this is a relatively new change in the current code // not at the beginning and previous char is not ' ' if ( - shared_st->res_init_pos != shared_st->res_cur_pos && + shared_st->res_init_pos != shared_st->res_cur_pos && shared_st->res_cur_pos != res_final_pos && *shared_st->res_cur_pos != ' ' && *(shared_st->res_cur_pos-1) != ' ' ) { *shared_st->res_cur_pos++ = ' '; - } else if (shared_st->res_init_pos != shared_st->res_cur_pos && *shared_st->res_cur_pos == ' ') { + } else if ( + shared_st->res_init_pos != shared_st->res_cur_pos && shared_st->res_cur_pos != res_final_pos && + *shared_st->res_cur_pos == ' ' + ) { shared_st->res_cur_pos++; } @@ -1743,7 +1771,7 @@ void stage_1_parsing(shared_st* shared_st, stage_1_st* stage_1_st, options* opts if (cur_st == st_cmnt_type_1) { // by default, we don't copy the next char for comments shared_st->copy_next_char = 0; - cur_st = process_cmnt_type_1(shared_st, cmnt_type_1_st, fst_cmnt); + cur_st = process_cmnt_type_1(opts, shared_st, cmnt_type_1_st, fst_cmnt); if (cur_st == st_no_mark_found) { shared_st->copy_next_char = 1; continue; @@ -1862,7 +1890,13 @@ void stage_2_parsing(shared_st* shared_st, stage_1_st* stage_1_st, stage_2_st* s if (lc == '(' || rc == ')') { shared_st->res_cur_pos++; } else if ((is_arithmetic_op(lc) && rc == '?') || lc == ',' || rc == ',') { - shared_st->res_cur_pos++; + char llc = *(shared_st->res_cur_pos-2); + + if (opts->keep_comment && (llc == '*' && lc == '/')) { + *shared_st->res_pre_pos++ = *shared_st->res_cur_pos++; + } else { + shared_st->res_cur_pos++; + } } else if (is_arithmetic_op(rc) && lc == '?' && is_token_char(lc)) { shared_st->res_cur_pos++; } else { @@ -2898,7 +2932,7 @@ char* mysql_query_digest_and_first_comment_one_it(char* q, int q_len, char** fst if (cur_st == st_cmnt_type_1) { // by default, we don't copy the next char for comments shared_st.copy_next_char = 0; - cur_st = process_cmnt_type_1(&shared_st, &c_t_1_st, fst_cmnt); + cur_st = process_cmnt_type_1(&opts, &shared_st, &c_t_1_st, fst_cmnt); if (cur_st == st_no_mark_found) { shared_st.copy_next_char = 1; continue;