From f16f9f95c1eafb3dbfbbb19c916da1816183fcf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 4 Feb 2022 16:51:39 +0100 Subject: [PATCH] Add support for 'mysql-query_digests_keep_comment' to test Test also received several fixes: - All fields supported for 'mz' tests payloads are now optional - Addressed several memory leaks - Payload file updated with new cases and docs --- .../test_mysql_query_digests_stages-t.cpp | 66 +++++++++++++++++-- .../regular_tokenizer_digests.hjson | 64 ++++++++++++++++++ 2 files changed, 126 insertions(+), 4 deletions(-) diff --git a/test/tap/tests/test_mysql_query_digests_stages-t.cpp b/test/tap/tests/test_mysql_query_digests_stages-t.cpp index 69684bba5..421561aaf 100644 --- a/test/tap/tests/test_mysql_query_digests_stages-t.cpp +++ b/test/tap/tests/test_mysql_query_digests_stages-t.cpp @@ -46,6 +46,7 @@ __thread int mysql_thread___query_digests_max_query_length = 65000; __thread bool mysql_thread___query_digests_lowercase = false; __thread bool mysql_thread___query_digests_replace_null = true; __thread bool mysql_thread___query_digests_no_digits = false; +__thread bool mysql_thread___query_digests_keep_comment = false; __thread int mysql_thread___query_digests_grouping_limit = 3; __thread int mysql_thread___query_digests_groups_grouping_limit = 1; @@ -203,14 +204,25 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co } for (const nlohmann::json& mz_test_def : mz_tests_defs) { - int digest_max_size = mz_test_def.at("digest_max_size"); - int grouping_limit = mz_test_def.at("grouping_limit"); - int groups_grouping_limit = mz_test_def.at("groups_grouping_limit"); - int replace_digits = 0; string exp_digest {}; + + int digest_max_size = 2048; + int grouping_limit = 3; + int groups_grouping_limit = 0; + int replace_digits = 0; bool no_digest = true; int lowercase = 0; + bool keep_comment = false; + if (mz_test_def.contains("digest_max_size")) { + digest_max_size = mz_test_def.at("digest_max_size"); + } + if (mz_test_def.contains("grouping_limit")) { + grouping_limit = mz_test_def.at("grouping_limit"); + } + if (mz_test_def.contains("groups_grouping_limit")) { + groups_grouping_limit = mz_test_def.at("groups_grouping_limit"); + } if (mz_test_def.contains("replace_digits")) { replace_digits = mz_test_def.at("replace_digits"); } @@ -221,6 +233,9 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co if (mz_test_def.contains("lowercase")) { lowercase = mz_test_def.at("lowercase"); } + if (mz_test_def.contains("keep_comment")) { + keep_comment = mz_test_def.at("keep_comment"); + } int backup_digest_max_length = mysql_thread___query_digests_max_query_length; mysql_thread___query_digests_max_query_length = digest_max_size; @@ -232,6 +247,8 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co mysql_thread___query_digests_no_digits = replace_digits; int lowercase_backup = mysql_thread___query_digests_lowercase; mysql_thread___query_digests_lowercase = lowercase_backup; + int keep_comment_backup = mysql_thread___query_digests_keep_comment; + mysql_thread___query_digests_keep_comment = keep_comment; char* c_res = mysql_query_digest_and_first_comment_2(c_query, query.size(), &first_comment, ((query.size() < QUERY_DIGEST_BUF) ? buf : NULL)); @@ -250,6 +267,16 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co mysql_thread___query_digests_groups_grouping_limit = backup_groups_grouping_limit; mysql_thread___query_digests_no_digits = no_digits_backup; mysql_thread___query_digests_lowercase = lowercase_backup; + mysql_thread___query_digests_keep_comment = keep_comment_backup; + + if (query.size() >= QUERY_DIGEST_BUF) { + free(c_res); + } + + if (first_comment != NULL) { + free(first_comment); + first_comment = NULL; + } } } } @@ -272,6 +299,11 @@ int process_digest_test(const nlohmann::json& test_def) { "Digest should be equal to exp result for 'STAGE 1' parsing:\n * Query: `%s`,\n * Act: `%s`,\n * Exp: `%s`", query.c_str(), stage_1_res.c_str(), digest_stage_1.c_str() ); + + if (first_comment != NULL) { + free(first_comment); + first_comment = NULL; + } } if (test_def.contains("s2")) { std::string digest_stage_2 { test_def.at("s2") }; @@ -285,6 +317,11 @@ int process_digest_test(const nlohmann::json& test_def) { "Digest should be equal to exp result for 'STAGE 2' parsing:\n * Query: `%s`,\n * Act: `%s`,\n * Exp: `%s`", query.c_str(), stage_2_res.c_str(), digest_stage_2.c_str() ); + + if (first_comment != NULL) { + free(first_comment); + first_comment = NULL; + } } } if (test_def.contains("s3")) { @@ -303,6 +340,11 @@ int process_digest_test(const nlohmann::json& test_def) { ); mysql_thread___query_digests_groups_grouping_limit = backup_groups_grouping_limit; + + if (first_comment != NULL) { + free(first_comment); + first_comment = NULL; + } } } if (test_def.contains("s4")) { @@ -317,6 +359,11 @@ int process_digest_test(const nlohmann::json& test_def) { "Digest should be equal to exp result for 'STAGE 4' parsing:\n * Query: `%s`,\n * Act: `%s`,\n * Exp: `%s`", query.c_str(), stage_4_res.c_str(), digest_stage_4.c_str() ); + + if (first_comment != NULL) { + free(first_comment); + first_comment = NULL; + } } } if (test_def.contains("dr")) { @@ -335,6 +382,11 @@ int process_digest_test(const nlohmann::json& test_def) { ); mysql_thread___query_digests_no_digits = no_digits_backup; + + if (first_comment != NULL) { + free(first_comment); + first_comment = NULL; + } } } if (test_def.contains("mz")) { @@ -364,6 +416,8 @@ void process_crashing_tests(CommandLine& cl, const nlohmann::json& test_defs) { process_mz_test_def(test_def, c_query, query); ok(true, "Crashing test execution finished without a crash"); + + free(c_query); } } @@ -652,6 +706,10 @@ void process_grouping_tests(uint32_t max_groups) { failed_cases.push_back({query, parsing_res, exp_result, m, n}); } + if (query.size() >= QUERY_DIGEST_BUF) { + free(c_res); + } + free(c_query); } } diff --git a/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson b/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson index 6eff7e44f..14d6387c6 100644 --- a/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson +++ b/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson @@ -688,6 +688,70 @@ } ] }, + { + "q": [ + "SELECT /* keep_comment */ 1", + "SELECT /* keep_comment */ +1", + "SELECT /* keep_comment */ 1.123", + "SELECT /* keep_comment */ 0x100", + "SELECT /* keep_comment */ 1.1E-9", + "SELECT /* keep_comment */ 1.1E+9", + "SELECT /* keep_comment */ 0x100" + ], + "mz": [ + { + "digest_max_size": 100, + "keep_comment": true, + "digest": "SELECT /* keep_comment */ ?" + } + ] + }, + { + "q": [ + "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */ 1" + ], + "mz": [ + { + "digest_max_size": 110, + "keep_comment": true, + "digest": "SELECT /* long_comment 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + // Two chars are lost in this case, but it's nothing critical + } + ] + }, + { + "q": [ + "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */ 1" + ], + "mz": [ + { + // The final digest size is 135, but 137 chars are placing the final '*/' without issues. This is expected as the final position is never used + // for comment copying purposes in the state machine. Thus the '*/' is not copy, but there is room for the number replacement '?'. + "digest_max_size": 135, + "keep_comment": true, + "digest": "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 *" + }, + { + // The final digest size is 135, but 137 chars are placing the final '*/' without issues. This is expected as the final position is never used + // for comment copying purposes in the state machine. Because of this, just the '*' is copied in this case. + "digest_max_size": 136, + "keep_comment": true, + "digest": "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */" + }, + { + // The final digest size is 136, but 137 chars are placing the final '*/' without issues. This is expected as the final position is never used + // for comment copying purposes in the state machine. Thanks to the extra char in the buffer the digest contains the terminator. + "digest_max_size": 137, + "keep_comment": true, + "digest": "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */" + }, + { + "digest_max_size": 138, + "keep_comment": true, + "digest": "SELECT /* long_comment 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 */ ?" + } + ] + }, { "q": "create /* foo_comment*/table table_10_utf8_4 (\n`pk` int primary key,\n`col_bigint_undef_signed` bigint ,\n`col_bigint_undef_unsigned` bigint unsigned ,\n`col_bigint_key_signed` bigint ,\n`col_bigint_key_unsigned` bigint unsigned ,\n`col_float_undef_signed` float ,\n`col_float_undef_unsigned` float unsigned ,\n`col_float_key_signed` float ,\n`col_float_key_unsigned` float unsigned ,\n`col_double_undef_signed` double ,\n`col_double_undef_unsigned` double unsigned ,\n`col_double_key_signed` double ,\n`col_double_key_unsigned` double unsigned ,\n`col_decimal(40, 20)_undef_signed` decimal(40, 20) ,\n`col_decimal(40, 20)_undef_unsigned` decimal(40, 20) unsigned ,\n`col_decimal(40, 20)_key_signed` decimal(40, 20) ,\n`col_decimal(40, 20)_key_unsigned` decimal(40, 20) unsigned ,\n`col_char(20)_undef_signed` char(20) ,\n`col_char(20)_key_signed` char(20) ,\n`col_varchar(20)_undef_signed` varchar(20) ,\n`col_varchar(20)_key_signed` varchar(20) ,\n`col_enum_undef_signed` enum('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') ,\n`col_enum_key_signed` enum('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z') ,\nkey (`col_bigint_key_signed`),\nkey (`col_bigint_key_unsigned`),\nkey (`col_float_key_signed`),\nkey (`col_float_key_unsigned`),\nkey (`col_double_key_signed`),\nkey (`col_double_key_unsigned`),\nkey (`col_decimal(40, 20)_key_signed`),\nkey (`col_decimal(40, 20)_key_unsigned`),\nkey (`col_char(20)_key_signed`),\nkey (`col_varchar(20)_key_signed`),\nkey (`col_enum_key_signed`)\n) character set utf8 \npartition by hash(pk)\npartitions 4", "mz": [