From f16f9f95c1eafb3dbfbbb19c916da1816183fcf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?=
 <jaramago.fernandez.javier@gmail.com>
Date: Fri, 4 Feb 2022 16:51:39 +0100
Subject: [PATCH] Add support for 'mysql-query_digests_keep_comment' to test

Test also received several fixes:
 - All fields supported for 'mz' tests payloads are now optional
 - Addressed several memory leaks
 - Payload file updated with new cases and docs
---
 .../test_mysql_query_digests_stages-t.cpp     | 66 +++++++++++++++++--
 .../regular_tokenizer_digests.hjson           | 64 ++++++++++++++++++
 2 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/test/tap/tests/test_mysql_query_digests_stages-t.cpp b/test/tap/tests/test_mysql_query_digests_stages-t.cpp
index 69684bba5..421561aaf 100644
--- a/test/tap/tests/test_mysql_query_digests_stages-t.cpp
+++ b/test/tap/tests/test_mysql_query_digests_stages-t.cpp
@@ -46,6 +46,7 @@ __thread int mysql_thread___query_digests_max_query_length = 65000;
 __thread bool mysql_thread___query_digests_lowercase = false;
 __thread bool mysql_thread___query_digests_replace_null = true;
 __thread bool mysql_thread___query_digests_no_digits = false;
+__thread bool mysql_thread___query_digests_keep_comment = false;
 __thread int mysql_thread___query_digests_grouping_limit = 3;
 __thread int mysql_thread___query_digests_groups_grouping_limit = 1;
 
@@ -203,14 +204,25 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co
 		}
 
 		for (const nlohmann::json& mz_test_def : mz_tests_defs) {
-			int digest_max_size = mz_test_def.at("digest_max_size");
-			int grouping_limit = mz_test_def.at("grouping_limit");
-			int groups_grouping_limit = mz_test_def.at("groups_grouping_limit");
-			int replace_digits = 0;
 			string exp_digest {};
+
+			int digest_max_size = 2048;
+			int grouping_limit = 3;
+			int groups_grouping_limit = 0;
+			int replace_digits = 0;
 			bool no_digest = true;
 			int lowercase = 0;
+			bool keep_comment = false;
 
+			if (mz_test_def.contains("digest_max_size")) {
+				digest_max_size = mz_test_def.at("digest_max_size");
+			}
+			if (mz_test_def.contains("grouping_limit")) {
+				grouping_limit = mz_test_def.at("grouping_limit");
+			}
+			if (mz_test_def.contains("groups_grouping_limit")) {
+				groups_grouping_limit = mz_test_def.at("groups_grouping_limit");
+			}
 			if (mz_test_def.contains("replace_digits")) {
 				replace_digits = mz_test_def.at("replace_digits");
 			}
@@ -221,6 +233,9 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co
 			if (mz_test_def.contains("lowercase")) {
 				lowercase = mz_test_def.at("lowercase");
 			}
+			if (mz_test_def.contains("keep_comment")) {
+				keep_comment = mz_test_def.at("keep_comment");
+			}
 
 			int backup_digest_max_length = mysql_thread___query_digests_max_query_length;
 			mysql_thread___query_digests_max_query_length = digest_max_size;
@@ -232,6 +247,8 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co
 			mysql_thread___query_digests_no_digits = replace_digits;
 			int lowercase_backup = mysql_thread___query_digests_lowercase;
 			mysql_thread___query_digests_lowercase = lowercase_backup;
+			int keep_comment_backup = mysql_thread___query_digests_keep_comment;
+			mysql_thread___query_digests_keep_comment = keep_comment;
 
 			char* c_res = mysql_query_digest_and_first_comment_2(c_query, query.size(), &first_comment,
 					((query.size() < QUERY_DIGEST_BUF) ? buf : NULL));
@@ -250,6 +267,16 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co
 			mysql_thread___query_digests_groups_grouping_limit = backup_groups_grouping_limit;
 			mysql_thread___query_digests_no_digits = no_digits_backup;
 			mysql_thread___query_digests_lowercase = lowercase_backup;
+			mysql_thread___query_digests_keep_comment = keep_comment_backup;
+
+			if (query.size() >= QUERY_DIGEST_BUF) {
+				free(c_res);
+			}
+
+			if (first_comment != NULL) {
+				free(first_comment);
+				first_comment = NULL;
+			}
 		}
 	}
 }
@@ -272,6 +299,11 @@ int process_digest_test(const nlohmann::json& test_def) {
 			"Digest should be equal to exp result for 'STAGE 1' parsing:\n * Query: `%s`,\n * Act: `%s`,\n * Exp: `%s`",
 			query.c_str(), stage_1_res.c_str(), digest_stage_1.c_str()
 		);
+
+		if (first_comment != NULL) {
+			free(first_comment);
+			first_comment = NULL;
+		}
 	}
 	if (test_def.contains("s2")) {
 		std::string digest_stage_2 { test_def.at("s2") };
@@ -285,6 +317,11 @@ int process_digest_test(const nlohmann::json& test_def) {
 				"Digest should be equal to exp result for 'STAGE 2' parsing:\n * Query: `%s`,\n * Act: `%s`,\n * Exp: `%s`",
 				query.c_str(), stage_2_res.c_str(), digest_stage_2.c_str()
 			);
+
+			if (first_comment != NULL) {
+				free(first_comment);
+				first_comment = NULL;
+			}
 		}
 	}
 	if (test_def.contains("s3")) {
@@ -303,6 +340,11 @@ int process_digest_test(const nlohmann::json& test_def) {
 			);
 
 			mysql_thread___query_digests_groups_grouping_limit = backup_groups_grouping_limit;
+
+			if (first_comment != NULL) {
+				free(first_comment);
+				first_comment = NULL;
+			}
 		}
 	}
 	if (test_def.contains("s4")) {
@@ -317,6 +359,11 @@ int process_digest_test(const nlohmann::json& test_def) {
 				"Digest should be equal to exp result for 'STAGE 4' parsing:\n * Query: `%s`,\n * Act: `%s`,\n * Exp: `%s`",
 				query.c_str(), stage_4_res.c_str(), digest_stage_4.c_str()
 			);
+
+			if (first_comment != NULL) {
+				free(first_comment);
+				first_comment = NULL;
+			}
 		}
 	}
 	if (test_def.contains("dr")) {
@@ -335,6 +382,11 @@ int process_digest_test(const nlohmann::json& test_def) {
 			);
 
 			mysql_thread___query_digests_no_digits = no_digits_backup;
+
+			if (first_comment != NULL) {
+				free(first_comment);
+				first_comment = NULL;
+			}
 		}
 	}
 	if (test_def.contains("mz")) {
@@ -364,6 +416,8 @@ void process_crashing_tests(CommandLine& cl, const nlohmann::json& test_defs) {
 
 		process_mz_test_def(test_def, c_query, query);
 		ok(true, "Crashing test execution finished without a crash");
+
+		free(c_query);
 	}
 }
 
@@ -652,6 +706,10 @@ void process_grouping_tests(uint32_t max_groups) {
 						failed_cases.push_back({query, parsing_res, exp_result, m, n});
 					}
 
+					if (query.size() >= QUERY_DIGEST_BUF) {
+						free(c_res);
+					}
+
 					free(c_query);
 				}
 			}
diff --git a/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson b/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson
index 6eff7e44f..14d6387c6 100644
--- a/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson
+++ b/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson
@@ -688,6 +688,70 @@
 			}
 		]
 	},
+	{
+		"q": [
+			"SELECT /* keep_comment */ 1",
+			"SELECT /* keep_comment */ +1",
+			"SELECT /* keep_comment */ 1.123",
+			"SELECT /* keep_comment */ 0x100",
+			"SELECT /* keep_comment */ 1.1E-9",
+			"SELECT /* keep_comment */ 1.1E+9",
+			"SELECT /* keep_comment */ 0x100"
+		],
+		"mz": [
+			{
+				"digest_max_size": 100,
+				"keep_comment": true,
+				"digest": "SELECT /* keep_comment */ ?"
+			}
+		]
+	},
+	{
+		"q": [
+			"SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */ 1"
+		],
+		"mz": [
+			{
+				"digest_max_size": 110,
+				"keep_comment": true,
+				"digest": "SELECT /* long_comment 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
+				// Two chars are lost in this case, but it's nothing critical
+			}
+		]
+	},
+	{
+		"q": [
+			"SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */ 1"
+		],
+		"mz": [
+			{
+				// The final digest size is 135, but 137 chars are placing the final '*/' without issues. This is expected as the final position is never used
+				// for comment copying purposes in the state machine. Thus the '*/' is not copy, but there is room for the number replacement '?'.
+				"digest_max_size": 135,
+				"keep_comment": true,
+				"digest": "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 *"
+			},
+			{
+				// The final digest size is 135, but 137 chars are placing the final '*/' without issues. This is expected as the final position is never used
+				// for comment copying purposes in the state machine. Because of this, just the '*' is copied in this case.
+				"digest_max_size": 136,
+				"keep_comment": true,
+				"digest": "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */"
+			},
+			{
+				// The final digest size is 136, but 137 chars are placing the final '*/' without issues. This is expected as the final position is never used
+				// for comment copying purposes in the state machine. Thanks to the extra char in the buffer the digest contains the terminator.
+				"digest_max_size": 137,
+				"keep_comment": true,
+				"digest": "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */"
+			},
+			{
+				"digest_max_size": 138,
+				"keep_comment": true,
+				"digest": "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */ ?"
+			}
+		]
+	},
 	{
 		"q": "create /* foo_comment*/table table_10_utf8_4 (\n`pk` int primary key,\n`col_bigint_undef_signed` bigint ,\n`col_bigint_undef_unsigned` bigint unsigned ,\n`col_bigint_key_signed` bigint ,\n`col_bigint_key_unsigned` bigint unsigned ,\n`col_float_undef_signed` float ,\n`col_float_undef_unsigned` float unsigned ,\n`col_float_key_signed` float ,\n`col_float_key_unsigned` float unsigned ,\n`col_double_undef_signed` double ,\n`col_double_undef_unsigned` double unsigned ,\n`col_double_key_signed` double ,\n`col_double_key_unsigned` double unsigned ,\n`col_decimal(40, 20)_undef_signed` decimal(40, 20) ,\n`col_decimal(40, 20)_undef_unsigned` decimal(40, 20) unsigned ,\n`col_decimal(40, 20)_key_signed` decimal(40, 20)  ,\n`col_decimal(40, 20)_key_unsigned` decimal(40, 20) unsigned ,\n`col_char(20)_undef_signed` char(20)  ,\n`col_char(20)_key_signed` char(20) ,\n`col_varchar(20)_undef_signed` varchar(20)  ,\n`col_varchar(20)_key_signed` varchar(20) ,\n`col_enum_undef_signed` enum('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') ,\n`col_enum_key_signed` enum('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') ,\nkey (`col_bigint_key_signed`),\nkey (`col_bigint_key_unsigned`),\nkey (`col_float_key_signed`),\nkey (`col_float_key_unsigned`),\nkey (`col_double_key_signed`),\nkey (`col_double_key_unsigned`),\nkey (`col_decimal(40, 20)_key_signed`),\nkey (`col_decimal(40, 20)_key_unsigned`),\nkey (`col_char(20)_key_signed`),\nkey (`col_varchar(20)_key_signed`),\nkey (`col_enum_key_signed`)\n) character set utf8 \npartition by hash(pk)\npartitions 4",
 		"mz": [