diff --git a/lib/c_tokenizer.cpp b/lib/c_tokenizer.cpp index 9fc5a4c73..5281be3dc 100644 --- a/lib/c_tokenizer.cpp +++ b/lib/c_tokenizer.cpp @@ -447,7 +447,8 @@ enum p_st get_next_st(const options* opts, struct shared_st* shared_st) { // cmnt type 1 - start with '/*' if( // v1_crashing_payload_05 - shared_st->q_cur_pos < (shared_st->d_max_len-1) && *shared_st->q == '/' && *(shared_st->q+1) == '*' + shared_st->q_cur_pos < (shared_st->q_len - 2) && + *shared_st->q == '/' && *(shared_st->q+1) == '*' ) { st = st_cmnt_type_1; } @@ -458,7 +459,7 @@ enum p_st get_next_st(const options* opts, struct shared_st* shared_st) { // cmnt type 3 - start with '--' else if ( // shared_st->query isn't over, need to check next character - shared_st->q_cur_pos < (shared_st->d_max_len-2) && + shared_st->q_cur_pos < (shared_st->q_len - 2) && // found starting pattern '-- ' (space is required) *shared_st->q == '-' && *(shared_st->q+1) == '-' && is_space_char(*(shared_st->q+2)) ) { @@ -778,14 +779,9 @@ enum p_st process_cmnt_type_2(shared_st* shared_st) { if (*shared_st->q == '#' && shared_st->q_cur_pos <= (shared_st->q_len - 2)) { shared_st->q += 1; shared_st->q_cur_pos += 1; - - if (shared_st->q_cur_pos == (shared_st->q_len - 2)) { - next_state = st_no_mark_found; - return next_state; - } } - if (*shared_st->q == '\n' || *shared_st->q == '\r' || (shared_st->q_cur_pos == shared_st->q_len - 1)) { + if (*shared_st->q == '\n' || *shared_st->q == '\r' || (shared_st->q_cur_pos >= shared_st->q_len - 1)) { next_state = st_no_mark_found; shared_st->prev_char = ' '; @@ -818,14 +814,9 @@ enum p_st process_cmnt_type_3(shared_st* shared_st) { ) { shared_st->q += 3; shared_st->q_cur_pos += 3; - - if (shared_st->q_cur_pos == (shared_st->q_len - 4)) { - next_state = st_no_mark_found; - return next_state; - } } - if (*shared_st->q == '\n' || *shared_st->q == '\r' || (shared_st->q_cur_pos == shared_st->q_len - 1)) { + if (*shared_st->q == '\n' || *shared_st->q == '\r' || (shared_st->q_cur_pos >= shared_st->q_len - 1)) { next_state = st_no_mark_found; shared_st->prev_char = ' '; diff --git a/test/tap/tests/test_mysql_query_digests_stages-t.cpp b/test/tap/tests/test_mysql_query_digests_stages-t.cpp index 910d6227d..4cfa2bb52 100644 --- a/test/tap/tests/test_mysql_query_digests_stages-t.cpp +++ b/test/tap/tests/test_mysql_query_digests_stages-t.cpp @@ -232,6 +232,7 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co bool no_digest = true; int lowercase = 0; bool keep_comment = false; + bool replace_null = true; if (mz_test_def.contains("digest_max_size")) { digest_max_size = mz_test_def.at("digest_max_size"); @@ -255,6 +256,9 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co if (mz_test_def.contains("keep_comment")) { keep_comment = mz_test_def.at("keep_comment"); } + if (mz_test_def.contains("replace_null")) { + replace_null = mz_test_def.at("replace_null"); + } int backup_digest_max_length = mysql_thread___query_digests_max_query_length; mysql_thread___query_digests_max_query_length = digest_max_size; @@ -266,6 +270,8 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co mysql_thread___query_digests_no_digits = replace_digits; int lowercase_backup = mysql_thread___query_digests_lowercase; mysql_thread___query_digests_lowercase = lowercase_backup; + int replace_null_backup = mysql_thread___query_digests_replace_null; + mysql_thread___query_digests_replace_null = replace_null; int keep_comment_backup = mysql_thread___query_digests_keep_comment; mysql_thread___query_digests_keep_comment = keep_comment; @@ -287,6 +293,7 @@ void process_mz_test_def(const nlohmann::json& test_def, const char* c_query, co mysql_thread___query_digests_no_digits = no_digits_backup; mysql_thread___query_digests_lowercase = lowercase_backup; mysql_thread___query_digests_keep_comment = keep_comment_backup; + mysql_thread___query_digests_replace_null = replace_null_backup; if (query.size() >= QUERY_DIGEST_BUF) { free(c_res); @@ -772,9 +779,10 @@ int main(int argc, char** argv) { bool exec_grouping_tests = true; bool exec_regular_tests = true; std::string tests_filter_str {}; + std::string digests_file_arg {}; // check parameters for test filtering - if (argc == 2) { + if (argc >= 2) { tests_filter_str = argv[1]; if (tests_filter_str.find("crashing") == std::string::npos) { @@ -786,9 +794,15 @@ int main(int argc, char** argv) { if (tests_filter_str.find("regular") == std::string::npos) { exec_regular_tests = false; } + + if (argc == 3) { + digests_file_arg = argv[2]; + } } - const string digests_filepath { string(cl.workdir) + DIGESTS_TEST_FILENAME }; + const string digests_filepath { + digests_file_arg.empty() ? string(cl.workdir) + DIGESTS_TEST_FILENAME : digests_file_arg + }; const string crashing_payloads { string(cl.workdir) + "tokenizer_payloads/crashing_payloads.hjson" }; uint32_t max_groups = 10; diff --git a/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson b/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson index 77dc7aa59..2843350d3 100644 --- a/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson +++ b/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson @@ -24,7 +24,10 @@ "# random_comment\n select 1.1 #final_comment \n ", "-- random_comment\n select 1.1 -- final_comment\n ", "-- random_comment\n select 1.1-- final_comment \n", - "-- random_comment\n select 1.1 -- final_comment \n " + // NOTE: Reg check for last `-- ` comment with '4' chars + "select 1 -- aaaa", + // NOTE: Reg check for last `# ` comment with '1' chars + "select 1 # a" ], "s1": "select ?", "s2": "select ?", @@ -954,6 +957,49 @@ } ] }, + { + // Correct compression of final comment '--' with query exceeding 'query_digests_max_query_length' + "q": [ + "insert into table (field1, field2, field3, field4, field5, field6) values ('00000000000000000000000000', null, '11111111111111111111111111', 'STOP', 1745624748961, '00000000000000000000000000') on duplicate key update field4 = 'STOP', field5 = 1745624748961, field6 = '11111111111111111111111111' -- cmt: OOOOOOOOOOOOOOOOOOOOOOOOOO:NNNNNNNNNNNNNNNNNNNNNNNNNN:N" + ], + "mz": [ + { + "digest_max_size": 299, + "grouping_limit": 3, + "replace_null": false, + "groups_grouping_limit": 10, + "digest": "insert into table (field1,field2,field3,field4,field5,field6) values (?,null,?,?,?,?) on duplicate key update field4 = ?,field5 = ?,field6 = ?" + } + ] + }, + { + // Correct compression of final comment '--' with query exceeding 'query_digests_max_query_length' + "q": [ + "SELECT '01JSQMJPDJEF' -- cmt: foo" + ], + "mz": [ + { + "digest_max_size": 25, + "grouping_limit": 3, + "groups_grouping_limit": 10, + "digest": "SELECT ?" + } + ] + }, + { + // Correct compression of final comment '--' with query exceeding 'query_digests_max_query_length' + "q": [ + "SELECT '01JSQMJPDJEF' /* cmt: foo */" + ], + "mz": [ + { + "digest_max_size": 24, + "grouping_limit": 3, + "groups_grouping_limit": 10, + "digest": "SELECT ?" + } + ] + }, { // digest_corner_cases_1.hjson - Testing the compression limits of number parsing when buffer is exceeded "q": [