From 35ad2f68c26738c8e452c282dbb3aa9b2a993d47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?=
 <jaramago.fernandez.javier@gmail.com>
Date: Fri, 4 Feb 2022 16:40:32 +0100
Subject: [PATCH] Implement new variable 'mysql-query_digests_keep_comment'

---
 include/MySQL_Thread.h     |  1 +
 include/proxysql_structs.h |  2 ++
 lib/MySQL_Thread.cpp       |  4 +++
 lib/c_tokenizer.cpp        | 58 ++++++++++++++++++++++++++++++--------
 4 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/include/MySQL_Thread.h b/include/MySQL_Thread.h
index 008457c3d..0e060ea68 100644
--- a/include/MySQL_Thread.h
+++ b/include/MySQL_Thread.h
@@ -494,6 +494,7 @@ class MySQL_Threads_Handler
 		bool query_digests_no_digits;
 		bool query_digests_normalize_digest_text;
 		bool query_digests_track_hostname;
+		bool query_digests_keep_comment;
 		int query_digests_grouping_limit;
 		int query_digests_groups_grouping_limit;
 		bool default_reconnect;
diff --git a/include/proxysql_structs.h b/include/proxysql_structs.h
index 7780ee96f..22d935e43 100644
--- a/include/proxysql_structs.h
+++ b/include/proxysql_structs.h
@@ -816,6 +816,7 @@ __thread bool mysql_thread___query_digests_replace_null;
 __thread bool mysql_thread___query_digests_no_digits;
 __thread bool mysql_thread___query_digests_normalize_digest_text;
 __thread bool mysql_thread___query_digests_track_hostname;
+__thread bool mysql_thread___query_digests_keep_comment;
 __thread int mysql_thread___query_digests_max_digest_length;
 __thread int mysql_thread___query_digests_max_query_length;
 __thread int mysql_thread___show_processlist_extended;
@@ -976,6 +977,7 @@ extern __thread bool mysql_thread___query_digests_no_digits;
 extern __thread bool mysql_thread___query_digests_replace_null;
 extern __thread bool mysql_thread___query_digests_normalize_digest_text;
 extern __thread bool mysql_thread___query_digests_track_hostname;
+extern __thread bool mysql_thread___query_digests_keep_comment;
 extern __thread int mysql_thread___query_digests_max_digest_length;
 extern __thread int mysql_thread___query_digests_max_query_length;
 extern __thread int mysql_thread___show_processlist_extended;
diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp
index 16d2eee5d..bc15192f1 100644
--- a/lib/MySQL_Thread.cpp
+++ b/lib/MySQL_Thread.cpp
@@ -539,6 +539,7 @@ static char * mysql_thread_variables_names[]= {
 	(char *)"query_digests_no_digits",
 	(char *)"query_digests_normalize_digest_text",
 	(char *)"query_digests_track_hostname",
+	(char *)"query_digests_keep_comment",
 	(char *)"servers_stats",
 	(char *)"default_reconnect",
 #ifdef DEBUG
@@ -1156,6 +1157,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() {
 	variables.query_digests_no_digits=false;
 	variables.query_digests_normalize_digest_text=false;
 	variables.query_digests_track_hostname=false;
+	variables.query_digests_keep_comment=false;
 	variables.connpoll_reset_queue_length = 50;
 	variables.min_num_servers_lantency_awareness = 1000;
 	variables.aurora_max_lag_ms_only_read_from_replicas = 2;
@@ -2094,6 +2096,7 @@ char ** MySQL_Threads_Handler::get_variables_list() {
 		VariablesPointers_bool["query_digests_no_digits"]         = make_tuple(&variables.query_digests_no_digits,         false);
 		VariablesPointers_bool["query_digests_normalize_digest_text"] = make_tuple(&variables.query_digests_normalize_digest_text, false);
 		VariablesPointers_bool["query_digests_track_hostname"]    = make_tuple(&variables.query_digests_track_hostname,    false);
+		VariablesPointers_bool["query_digests_keep_comment"]  = make_tuple(&variables.query_digests_keep_comment,  false);
 		VariablesPointers_bool["servers_stats"]                   = make_tuple(&variables.servers_stats,                   false);
 		VariablesPointers_bool["sessions_sort"]                   = make_tuple(&variables.sessions_sort,                   false);
 		VariablesPointers_bool["stats_time_backend_query"]        = make_tuple(&variables.stats_time_backend_query,        false);
@@ -4002,6 +4005,7 @@ void MySQL_Thread::refresh_variables() {
 	mysql_thread___query_digests_track_hostname=(bool)GloMTH->get_variable_int((char *)"query_digests_track_hostname");
 	mysql_thread___query_digests_grouping_limit=(int)GloMTH->get_variable_int((char *)"query_digests_grouping_limit");
 	mysql_thread___query_digests_groups_grouping_limit=(int)GloMTH->get_variable_int((char *)"query_digests_groups_grouping_limit");
+	mysql_thread___query_digests_keep_comment=(bool)GloMTH->get_variable_int((char *)"query_digests_keep_comment");
 	variables.min_num_servers_lantency_awareness=GloMTH->get_variable_int((char *)"min_num_servers_lantency_awareness");
 	variables.aurora_max_lag_ms_only_read_from_replicas=GloMTH->get_variable_int((char *)"aurora_max_lag_ms_only_read_from_replicas");
 	variables.stats_time_backend_query=(bool)GloMTH->get_variable_int((char *)"stats_time_backend_query");
diff --git a/lib/c_tokenizer.cpp b/lib/c_tokenizer.cpp
index 96ff51f75..21249d0c0 100644
--- a/lib/c_tokenizer.cpp
+++ b/lib/c_tokenizer.cpp
@@ -16,6 +16,7 @@ extern __thread bool mysql_thread___query_digests_replace_null;
 extern __thread bool mysql_thread___query_digests_no_digits;
 extern __thread bool mysql_thread___query_digests_grouping_limit;
 extern __thread bool mysql_thread___query_digests_groups_grouping_limit;
+extern __thread bool mysql_thread___query_digests_keep_comment;
 
 void tokenizer(tokenizer_t *result, const char* s, const char* delimiters, int empties )
 {
@@ -825,6 +826,7 @@ typedef struct options {
 	bool lowercase;
 	bool replace_null;
 	bool replace_number;
+	bool keep_comment;
 	int grouping_limit;
 	int groups_grouping_limit;
 } options;
@@ -841,6 +843,7 @@ static inline void get_options(struct options* opts) {
 	opts->replace_number = mysql_thread___query_digests_no_digits;
 	opts->grouping_limit = mysql_thread___query_digests_grouping_limit;
 	opts->groups_grouping_limit = mysql_thread___query_digests_groups_grouping_limit;
+	opts->keep_comment = mysql_thread___query_digests_keep_comment;
 }
 
 /**
@@ -1116,8 +1119,9 @@ char cur_cmd_cmnt[FIRST_COMMENT_MAX_LENGTH];
  *   - 'st_no_mark_found' if the comment has completed to be parsed.
  */
 static __attribute__((always_inline)) inline
-enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, char** fst_cmnt) {
+enum p_st process_cmnt_type_1(options* opts, shared_st* shared_st, cmnt_type_1_st* c_t_1_st, char** fst_cmnt) {
 	enum p_st next_st = st_cmnt_type_1;
+	const char* res_final_pos = shared_st->res_init_pos + shared_st->d_max_len;
 
 	// initial mark "/*|/*!" detection
 	if (*shared_st->q == '/' && *(shared_st->q+1) == '*') {
@@ -1128,6 +1132,14 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch
 			c_t_1_st->is_cmd = 1;
 		}
 
+		// copy the initial mark "/*" if comment preserving is enabled
+		if (opts->keep_comment) {
+			cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len] = *(shared_st->q);
+			c_t_1_st->cur_cmd_cmnt_len++;
+			cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len] = *(shared_st->q + 1);
+			c_t_1_st->cur_cmd_cmnt_len++;
+		}
+
 		// discard processed "/*" or "/*!"
 		shared_st->q += 2 + c_t_1_st->is_cmd;
 		shared_st->q_cur_pos += 2 + c_t_1_st->is_cmd;
@@ -1139,7 +1151,7 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch
 //  {
 
 	// we are parsing a "/*!" comment
-	if (c_t_1_st->is_cmd) {
+	if (c_t_1_st->is_cmd || (c_t_1_st->is_cmd == false && opts->keep_comment)) {
 		// copy the char into 'cur_cmd_cmnt'
 		if (c_t_1_st->cur_cmd_cmnt_len < FIRST_COMMENT_MAX_LENGTH-1) {
 			cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len] = *shared_st->q;
@@ -1178,12 +1190,14 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch
 
 	// comment type 1 - /* .. */
 	if (shared_st->prev_char == '*' && *shared_st->q == '/') {
-		if (c_t_1_st->is_cmd) {
+		if (c_t_1_st->is_cmd || (c_t_1_st->is_cmd == false && opts->keep_comment)) {
 			cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len]=0;
 
 			if (c_t_1_st->cur_cmd_cmnt_len >= 2) {
 				// we are not interested into copying the final '*/' for the comment
-				c_t_1_st->cur_cmd_cmnt_len -= 2;
+				if (opts->keep_comment == false) {
+					c_t_1_st->cur_cmd_cmnt_len -= 2;
+				}
 
 				cur_cmd_cmnt[c_t_1_st->cur_cmd_cmnt_len] = 0;
 				// counter for the lenght of the cmd comment annotation, with format `/*!12345 ... */`.
@@ -1208,12 +1222,23 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch
 				// copy the cmd comment minus the annotation and the marks
 				if (end) {
 					// check if the comment to be copied is going to fit in the target buffer
-					const char* res_final_pos = shared_st->res_init_pos + shared_st->d_max_len - 1;
 					int res_free_space = res_final_pos - shared_st->res_cur_pos;
-					int comment_size = c_t_1_st->cur_cmd_cmnt_len - cmnt_annot_len;
+					int comment_size = 0;
+
+					if (opts->keep_comment) {
+						comment_size = c_t_1_st->cur_cmd_cmnt_len;
+					} else {
+						comment_size = c_t_1_st->cur_cmd_cmnt_len - cmnt_annot_len;
+					}
+
 					int copy_length = res_free_space > comment_size ? comment_size : res_free_space;
 
-					memcpy(shared_st->res_cur_pos, cur_cmd_cmnt + cmnt_annot_len, copy_length);
+					if (opts->keep_comment) {
+						memcpy(shared_st->res_cur_pos, cur_cmd_cmnt, copy_length);
+					} else {
+						memcpy(shared_st->res_cur_pos, cur_cmd_cmnt + cmnt_annot_len, copy_length);
+					}
+
 					shared_st->res_cur_pos += copy_length;
 
 					// TODO: Check if the copy can be prevented as in the outer check for non-cmd comments
@@ -1233,11 +1258,14 @@ enum p_st process_cmnt_type_1(shared_st* shared_st, cmnt_type_1_st* c_t_1_st, ch
 		// TODO: Related to previous TODO. Remember this is a relatively new change in the current code
 		// not at the beginning and previous char is not ' '
 		if (
-			shared_st->res_init_pos != shared_st->res_cur_pos &&
+			shared_st->res_init_pos != shared_st->res_cur_pos && shared_st->res_cur_pos != res_final_pos &&
 			*shared_st->res_cur_pos != ' ' && *(shared_st->res_cur_pos-1) != ' '
 		) {
 			*shared_st->res_cur_pos++ = ' ';
-		} else if (shared_st->res_init_pos != shared_st->res_cur_pos && *shared_st->res_cur_pos == ' ') {
+		} else if (
+			shared_st->res_init_pos != shared_st->res_cur_pos && shared_st->res_cur_pos != res_final_pos &&
+			*shared_st->res_cur_pos == ' '
+		) {
 			shared_st->res_cur_pos++;
 		}
 
@@ -1743,7 +1771,7 @@ void stage_1_parsing(shared_st* shared_st, stage_1_st* stage_1_st, options* opts
 			if (cur_st == st_cmnt_type_1) {
 				// by default, we don't copy the next char for comments
 				shared_st->copy_next_char = 0;
-				cur_st = process_cmnt_type_1(shared_st, cmnt_type_1_st, fst_cmnt);
+				cur_st = process_cmnt_type_1(opts, shared_st, cmnt_type_1_st, fst_cmnt);
 				if (cur_st == st_no_mark_found) {
 					shared_st->copy_next_char = 1;
 					continue;
@@ -1862,7 +1890,13 @@ void stage_2_parsing(shared_st* shared_st, stage_1_st* stage_1_st, stage_2_st* s
 			if (lc == '(' || rc == ')') {
 				shared_st->res_cur_pos++;
 			} else if ((is_arithmetic_op(lc) && rc == '?') || lc == ',' || rc == ',') {
-				shared_st->res_cur_pos++;
+				char llc = *(shared_st->res_cur_pos-2);
+
+				if (opts->keep_comment && (llc == '*' && lc == '/')) {
+					*shared_st->res_pre_pos++ = *shared_st->res_cur_pos++;
+				} else {
+					shared_st->res_cur_pos++;
+				}
 			} else if (is_arithmetic_op(rc) && lc == '?' && is_token_char(lc)) {
 				shared_st->res_cur_pos++;
 			} else {
@@ -2898,7 +2932,7 @@ char* mysql_query_digest_and_first_comment_one_it(char* q, int q_len, char** fst
 			if (cur_st == st_cmnt_type_1) {
 				// by default, we don't copy the next char for comments
 				shared_st.copy_next_char = 0;
-				cur_st = process_cmnt_type_1(&shared_st, &c_t_1_st, fst_cmnt);
+				cur_st = process_cmnt_type_1(&opts, &shared_st, &c_t_1_st, fst_cmnt);
 				if (cur_st == st_no_mark_found) {
 					shared_st.copy_next_char = 1;
 					continue;