/** * @file pgsql_tokenizer_unit-t.cpp * @brief Unit tests for the PostgreSQL SQL tokenizer/digest functions in lib/pgsql_tokenizer.cpp. * * Tests the pure parsing functions: * - pgsql_query_digest_and_first_comment() / pgsql_query_digest_and_first_comment_2() * - pgsql_query_digest_first_stage() * - pgsql_query_strip_comments() * * These tests mirror c_tokenizer_unit-t.cpp but cover PgSQL-specific features: * - Dollar-quoted strings ($$...$$ and $tag$...$tag$) * - PgSQL-style type casts (::typename) * - Double-quoted identifiers (preserved, not replaced) * - Boolean literal replacement (TRUE/FALSE -> ?) * - ARRAY literal replacement * - Nested block comments * - Prefix-typed string literals (E'...', B'...', X'...', U&'...') * - -- line comments (no space required after --) */ #include "tap.h" #include "test_globals.h" #include "test_init.h" #include "proxysql.h" #include "c_tokenizer.h" #include #include #include // Thread-local variables used by pgsql_query_digest_first_stage and // pgsql_query_digest_and_first_comment_2 (which call get_pgsql_options internally). extern __thread int pgsql_thread___query_digests_max_query_length; extern __thread bool pgsql_thread___query_digests_lowercase; extern __thread bool pgsql_thread___query_digests_replace_null; extern __thread bool pgsql_thread___query_digests_no_digits; extern __thread int pgsql_thread___query_digests_grouping_limit; extern __thread int pgsql_thread___query_digests_groups_grouping_limit; extern __thread bool pgsql_thread___query_digests_keep_comment; /** * @brief Set thread-local digest options to sensible defaults for testing. */ static void setup_digest_defaults() { pgsql_thread___query_digests_max_query_length = 2048; pgsql_thread___query_digests_lowercase = true; pgsql_thread___query_digests_replace_null = true; pgsql_thread___query_digests_no_digits = true; pgsql_thread___query_digests_grouping_limit = 3; pgsql_thread___query_digests_groups_grouping_limit = 0; pgsql_thread___query_digests_keep_comment = false; } // ============================================================================ // Helper: call pgsql_query_digest_and_first_comment with explicit options // ============================================================================ /** * @brief Convenience wrapper that digests a query using the explicit options API. * * Returns the digest as a std::string. Uses a stack buffer to avoid malloc. */ static std::string digest_query(const char* query, bool lowercase = true, bool replace_null = true, bool no_digits = true, int grouping_limit = 3, int groups_grouping_limit = 0, bool keep_comment = false, int max_query_length = 2048) { char buf[2048]; memset(buf, 0, sizeof(buf)); char* first_comment = nullptr; options opts; opts.lowercase = lowercase; opts.replace_null = replace_null; opts.replace_number = no_digits; opts.grouping_limit = grouping_limit; opts.groups_grouping_limit = groups_grouping_limit; opts.keep_comment = keep_comment; opts.max_query_length = max_query_length; int q_len = (int)strlen(query); char* result = pgsql_query_digest_and_first_comment(query, q_len, &first_comment, buf, &opts); std::string ret(result); if (first_comment) free(first_comment); return ret; } /** * @brief Digest query via the thread-local wrapper (pgsql_query_digest_and_first_comment_2). */ static std::string digest_query_2(const char* query) { char buf[2048]; memset(buf, 0, sizeof(buf)); char* first_comment = nullptr; int q_len = (int)strlen(query); char* result = pgsql_query_digest_and_first_comment_2(query, q_len, &first_comment, buf); std::string ret(result); if (first_comment) free(first_comment); return ret; } /** * @brief First-stage digest via the thread-local wrapper. */ static std::string digest_first_stage(const char* query) { char buf[2048]; memset(buf, 0, sizeof(buf)); char* first_comment = nullptr; int q_len = (int)strlen(query); char* result = pgsql_query_digest_first_stage(query, q_len, &first_comment, buf); std::string ret(result); if (first_comment) free(first_comment); return ret; } // ============================================================================ // 1. Basic digest — number/literal replacement // ============================================================================ static void test_digest_simple_select() { std::string d = digest_query("SELECT * FROM users WHERE id=1"); ok(d == "select * from users where id=?", "digest: simple SELECT with integer literal replaced"); } static void test_digest_insert_values() { std::string d = digest_query("INSERT INTO orders VALUES(1,'test',3.14)"); ok(d == "insert into orders values(?,?,?)", "digest: INSERT with multiple literal types replaced"); } static void test_digest_string_literals() { std::string d = digest_query("SELECT * FROM t WHERE name='alice'"); ok(d == "select * from t where name=?", "digest: single-quoted string literal replaced with ?"); } static void test_digest_float_literal() { std::string d = digest_query("SELECT * FROM t WHERE val=3.14159"); ok(d == "select * from t where val=?", "digest: float literal replaced with ?"); } static void test_digest_negative_number() { std::string d = digest_query("SELECT * FROM t WHERE val=-42"); ok(d == "select * from t where val=?", "digest: negative number replaced with ?"); } static void test_digest_hex_literal() { std::string d = digest_query("SELECT * FROM t WHERE id=0xFF"); ok(d == "select * from t where id=?", "digest: hex literal replaced with ?"); } static void test_digest_null_replacement() { std::string d = digest_query("SELECT * FROM t WHERE val=NULL"); ok(d == "select * from t where val=?", "digest: NULL replaced with ? when replace_null=true"); } static void test_digest_null_no_replacement() { std::string d = digest_query("SELECT * FROM t WHERE val=NULL", /* lowercase */ true, /* replace_null */ false); ok(d.find("null") != std::string::npos, "digest: NULL preserved when replace_null=false"); } static void test_digest_scientific_notation() { std::string d = digest_query("SELECT * FROM t WHERE val=1E10"); ok(d == "select * from t where val=?", "digest: scientific notation replaced with ?"); } // ============================================================================ // 2. PgSQL-specific: Dollar-quoted strings // ============================================================================ static void test_digest_dollar_quote_empty_tag() { std::string d = digest_query("SELECT $$hello world$$"); ok(d == "select ?", "digest: $$...$$ dollar-quoted string replaced with ?"); } static void test_digest_dollar_quote_with_tag() { std::string d = digest_query("SELECT $tag$some text$tag$"); ok(d == "select ?", "digest: $tag$...$tag$ dollar-quoted string replaced with ?"); } static void test_digest_dollar_quote_in_function() { std::string d = digest_query("CREATE FUNCTION foo() RETURNS void AS $body$BEGIN RETURN; END;$body$ LANGUAGE plpgsql"); ok(d == "create function foo() returns void as ? language plpgsql", "digest: dollar-quoted function body replaced with ?"); } static void test_digest_dollar_quote_with_special_chars() { // Dollar-quoted strings can contain single quotes without escaping std::string d = digest_query("SELECT $$it's a test$$"); ok(d == "select ?", "digest: dollar-quoted string with embedded single quote replaced"); } // ============================================================================ // 3. PgSQL-specific: Type casts (::typename) // ============================================================================ static void test_digest_typecast_simple() { std::string d = digest_query("SELECT 1::int"); ok(d == "select ?", "digest: number with ::int typecast, both replaced"); } static void test_digest_typecast_varchar() { std::string d = digest_query("SELECT 'hello'::varchar"); ok(d == "select ?", "digest: string with ::varchar typecast replaced"); } static void test_digest_typecast_with_modifier() { std::string d = digest_query("SELECT 'hello'::varchar(255)"); ok(d == "select ?", "digest: typecast with modifier ::varchar(255) handled"); } static void test_digest_typecast_array() { std::string d = digest_query("SELECT '{1,2,3}'::int[]"); ok(d == "select ?", "digest: typecast with array brackets ::int[] handled"); } static void test_digest_typecast_in_where() { std::string d = digest_query("SELECT * FROM t WHERE col::text = 'foo'"); // Typecast ::text is stripped; the exact spacing may vary ok(d.find("col") != std::string::npos && d.find("::") == std::string::npos && d.find("?") != std::string::npos, "digest: ::text typecast in WHERE clause stripped"); } static void test_digest_typecast_quoted() { std::string d = digest_query("SELECT 1::\"my type\""); ok(d == "select ?", "digest: typecast with quoted type name handled"); } // ============================================================================ // 4. PgSQL-specific: Double-quoted identifiers (preserved, NOT replaced) // ============================================================================ static void test_digest_double_quoted_identifier() { std::string d = digest_query("SELECT * FROM \"MyTable\" WHERE \"myCol\"=1"); // Double-quoted identifiers are preserved (not replaced with ?) but may be lowercased ok(d.find("mytable") != std::string::npos || d.find("MyTable") != std::string::npos, "digest: double-quoted identifier is preserved (not replaced with ?)"); ok(d.find("?") != std::string::npos, "digest: literal after double-quoted column is still replaced with ?"); } static void test_digest_double_quoted_case_preserved() { // Double-quoted identifiers: content is copied to digest (not replaced) std::string d = digest_query("SELECT \"MixedCase\" FROM t"); // The identifier content should appear in the digest (possibly lowercased) ok(d.find("mixedcase") != std::string::npos || d.find("MixedCase") != std::string::npos, "digest: double-quoted identifier content appears in output"); } // ============================================================================ // 5. Comment handling // ============================================================================ static void test_digest_block_comment_stripped() { std::string d = digest_query("/* comment */ SELECT 1"); ok(d == "select ?", "digest: block comment stripped"); } static void test_digest_inline_comment() { std::string d = digest_query("SELECT 1 -- inline comment\n"); ok(d == "select ?", "digest: -- inline comment stripped"); } static void test_digest_inline_comment_no_space() { // PgSQL: -- starts a comment even without a space after -- std::string d = digest_query("SELECT 1 --inline\n"); ok(d == "select ?", "digest: --inline (no space) comment stripped"); } static void test_digest_nested_block_comment() { // PgSQL supports nested /* */ comments std::string d = digest_query("/* outer /* inner */ still comment */ SELECT 1"); ok(d == "select ?", "digest: nested block comments handled"); } static void test_digest_first_comment_extracted() { char buf[2048]; memset(buf, 0, sizeof(buf)); char* first_comment = nullptr; options opts; opts.lowercase = true; opts.replace_null = true; opts.replace_number = true; opts.grouping_limit = 3; opts.groups_grouping_limit = 0; opts.keep_comment = true; opts.max_query_length = 2048; const char* query = "/* my_comment */ SELECT 1"; pgsql_query_digest_and_first_comment(query, (int)strlen(query), &first_comment, buf, &opts); ok(first_comment != nullptr, "digest: first_comment is not NULL when keep_comment=true"); if (first_comment) { ok(strstr(first_comment, "my_comment") != nullptr, "digest: first_comment contains 'my_comment'"); free(first_comment); } else { ok(false, "digest: first_comment contains 'my_comment' (skipped, was NULL)"); } } // ============================================================================ // 6. Lowercase // ============================================================================ static void test_digest_lowercase() { std::string d = digest_query("SELECT * FROM Users", /* lowercase */ true); ok(d == "select * from users", "digest: keywords and identifiers lowercased"); } static void test_digest_no_lowercase() { std::string d = digest_query("SELECT * FROM Users", /* lowercase */ false); ok(d == "SELECT * FROM Users", "digest: case preserved when lowercase=false"); } // ============================================================================ // 7. Grouping (IN-list collapsing) // ============================================================================ static void test_digest_grouping_limit() { std::string d = digest_query("SELECT * FROM t WHERE id IN (1,2,3,4,5)", /* lowercase */ true, /* replace_null */ true, /* no_digits */ true, /* grouping_limit */ 2); ok(d.find("...") != std::string::npos, "digest: grouping_limit=2 collapses excess IN-list values to '...'"); } static void test_digest_in_clause() { std::string d = digest_query("SELECT * FROM t WHERE id IN (1,2,3)"); ok(d == "select * from t where id in (?,?,?)", "digest: IN clause literals replaced"); } static void test_digest_in_clause_grouping() { std::string d = digest_query("SELECT * FROM t WHERE id IN (1,2,3,4,5)", /* lowercase */ true, /* replace_null */ true, /* no_digits */ true, /* grouping_limit */ 3); ok(d.find("...") != std::string::npos, "digest: IN clause with grouping_limit collapses to '...'"); } // ============================================================================ // 8. PgSQL-specific: Boolean literal replacement // ============================================================================ static void test_digest_boolean_true() { std::string d = digest_query("SELECT * FROM t WHERE active=TRUE"); ok(d == "select * from t where active=?", "digest: TRUE replaced with ?"); } static void test_digest_boolean_false() { std::string d = digest_query("SELECT * FROM t WHERE active=FALSE"); ok(d == "select * from t where active=?", "digest: FALSE replaced with ?"); } static void test_digest_boolean_case_insensitive() { std::string d = digest_query("SELECT * FROM t WHERE a=true AND b=False"); ok(d == "select * from t where a=? and b=?", "digest: boolean literals case-insensitive replacement"); } // ============================================================================ // 9. PgSQL-specific: ARRAY literal replacement // ============================================================================ static void test_digest_array_literal() { std::string d = digest_query("SELECT * FROM t WHERE col=ARRAY[1,2,3]"); ok(d == "select * from t where col=?", "digest: ARRAY[1,2,3] replaced with ?"); } static void test_digest_array_literal_strings() { std::string d = digest_query("INSERT INTO t VALUES(ARRAY['a','b','c'])"); ok(d == "insert into t values(?)", "digest: ARRAY with string elements replaced with ?"); } // ============================================================================ // 10. PgSQL-specific: Prefix-typed string literals // ============================================================================ static void test_digest_escape_string() { std::string d = digest_query("SELECT E'hello\\nworld'"); ok(d == "select ?", "digest: E'...' escape string replaced with ?"); } static void test_digest_bit_string() { std::string d = digest_query("SELECT B'10101'"); ok(d == "select ?", "digest: B'...' bit string replaced with ?"); } static void test_digest_hex_string() { std::string d = digest_query("SELECT X'DEADBEEF'"); ok(d == "select ?", "digest: X'...' hex string replaced with ?"); } static void test_digest_unicode_string() { std::string d = digest_query("SELECT U&'d\\0061t\\+000061'"); ok(d == "select ?", "digest: U&'...' unicode string replaced with ?"); } // ============================================================================ // 11. First-stage and wrapper tests // ============================================================================ static void test_digest_first_stage_basic() { std::string d = digest_first_stage("SELECT * FROM t WHERE id=42"); ok(d == "select * from t where id=?", "first_stage: basic SELECT with literal replacement"); } static void test_digest_first_stage_string() { std::string d = digest_first_stage("SELECT * FROM t WHERE name='test'"); ok(d == "select * from t where name=?", "first_stage: string literal replaced"); } static void test_digest_first_stage_dollar_quote() { std::string d = digest_first_stage("SELECT $$test$$"); ok(d == "select ?", "first_stage: dollar-quoted string replaced"); } static void test_digest_2_basic() { std::string d = digest_query_2("SELECT * FROM t WHERE id=1 AND name='foo'"); ok(d == "select * from t where id=? and name=?", "digest_2: multiple literal types replaced via thread-local wrapper"); } static void test_digest_2_with_typecast() { std::string d = digest_query_2("SELECT * FROM t WHERE id=1::bigint"); ok(d == "select * from t where id=?", "digest_2: typecast handled via thread-local wrapper"); } // ============================================================================ // 12. pgsql_query_strip_comments // ============================================================================ static void test_strip_comments_block() { const char* query = "/* comment */ SELECT 1"; char* input = strdup(query); char* result = pgsql_query_strip_comments(input, (int)strlen(input), true); ok(result != nullptr, "strip_comments: result is not NULL"); if (result) { ok(strstr(result, "comment") == nullptr, "strip_comments: block comment removed"); ok(strstr(result, "select") != nullptr, "strip_comments: SELECT keyword preserved (lowercased)"); free(result); } else { ok(false, "strip_comments: block comment removed (skipped)"); ok(false, "strip_comments: SELECT keyword preserved (skipped)"); } free(input); } static void test_strip_comments_inline() { const char* query = "SELECT 1 -- inline\n"; char* input = strdup(query); char* result = pgsql_query_strip_comments(input, (int)strlen(input), false); ok(result != nullptr, "strip_comments inline: result is not NULL"); if (result) { ok(strstr(result, "inline") == nullptr, "strip_comments inline: -- comment removed"); free(result); } else { ok(false, "strip_comments inline: -- comment removed (skipped)"); } free(input); } static void test_strip_comments_no_lowercase() { const char* query = "/* rem */ SELECT 1"; char* input = strdup(query); char* result = pgsql_query_strip_comments(input, (int)strlen(input), false); ok(result != nullptr, "strip_comments no_lc: result is not NULL"); if (result) { ok(strstr(result, "SELECT") != nullptr, "strip_comments no_lc: case preserved when lowercase=false"); free(result); } else { ok(false, "strip_comments no_lc: case preserved (skipped)"); } free(input); } // ============================================================================ // 13. Edge cases // ============================================================================ static void test_digest_empty_query() { std::string d = digest_query(""); ok(d.empty(), "digest: empty query produces empty digest"); } static void test_digest_whitespace_only() { std::string d = digest_query(" "); ok(d.empty() || d == " ", "digest: whitespace-only query produces empty or single-space digest"); } static void test_digest_multiple_spaces() { std::string d = digest_query("SELECT * FROM t"); ok(d == "select * from t", "digest: multiple spaces collapsed to single space"); } static void test_digest_escaped_single_quote() { std::string d = digest_query("SELECT * FROM t WHERE name='it''s'"); ok(d == "select * from t where name=?", "digest: escaped single quote ('') inside string handled"); } static void test_digest_backslash_escape_in_string() { std::string d = digest_query("SELECT * FROM t WHERE name='hello\\nworld'"); ok(d == "select * from t where name=?", "digest: backslash escape inside string handled"); } static void test_digest_multiple_statements() { std::string d = digest_query("SELECT 1; SELECT 2"); // Should handle the semicolon as a token separator ok(d.find("select") != std::string::npos, "digest: multiple statements handled without crash"); } static void test_digest_complex_query() { std::string d = digest_query( "SELECT u.id, u.name FROM users u " "JOIN orders o ON u.id = o.user_id " "WHERE o.amount > 100 AND u.status = 'active' " "ORDER BY o.created_at DESC LIMIT 10" ); ok(d.find("select") == 0, "digest: complex JOIN query digested without crash"); ok(d.find("?") != std::string::npos, "digest: complex query has literals replaced"); } static void test_digest_comment_between_tokens() { std::string d = digest_query("SELECT/*comment*/1"); ok(d == "select ?", "digest: comment between tokens (no spaces) handled"); } static void test_digest_only_comment() { std::string d = digest_query("/* just a comment */"); ok(d.empty() || d == " ", "digest: query that is only a comment produces empty/space digest"); } // ============================================================================ // Main // ============================================================================ int main() { plan(65); int rc = test_init_minimal(); ok(rc == 0, "test_init_minimal() succeeds"); // Set thread-local variables for functions that read them setup_digest_defaults(); // 1. Basic digest — number/literal replacement (9 tests) test_digest_simple_select(); // 1 test_digest_insert_values(); // 1 test_digest_string_literals(); // 1 test_digest_float_literal(); // 1 test_digest_negative_number(); // 1 test_digest_hex_literal(); // 1 test_digest_null_replacement(); // 1 test_digest_null_no_replacement(); // 1 test_digest_scientific_notation(); // 1 // 2. Dollar-quoted strings (4 tests) test_digest_dollar_quote_empty_tag(); // 1 test_digest_dollar_quote_with_tag(); // 1 test_digest_dollar_quote_in_function(); // 1 test_digest_dollar_quote_with_special_chars(); // 1 // 3. Type casts (6 tests) test_digest_typecast_simple(); // 1 test_digest_typecast_varchar(); // 1 test_digest_typecast_with_modifier(); // 1 test_digest_typecast_array(); // 1 test_digest_typecast_in_where(); // 1 test_digest_typecast_quoted(); // 1 // 4. Double-quoted identifiers (3 tests) test_digest_double_quoted_identifier(); // 2 test_digest_double_quoted_case_preserved(); // 1 // 5. Comment handling (6 tests) test_digest_block_comment_stripped(); // 1 test_digest_inline_comment(); // 1 test_digest_inline_comment_no_space(); // 1 test_digest_nested_block_comment(); // 1 test_digest_first_comment_extracted(); // 2 // 6. Lowercase (2 tests) test_digest_lowercase(); // 1 test_digest_no_lowercase(); // 1 // 7. Grouping (3 tests) test_digest_grouping_limit(); // 1 test_digest_in_clause(); // 1 test_digest_in_clause_grouping(); // 1 // 8. Boolean literals (3 tests) test_digest_boolean_true(); // 1 test_digest_boolean_false(); // 1 test_digest_boolean_case_insensitive(); // 1 // 9. ARRAY literals (2 tests) test_digest_array_literal(); // 1 test_digest_array_literal_strings(); // 1 // 10. Prefix-typed strings (4 tests) test_digest_escape_string(); // 1 test_digest_bit_string(); // 1 test_digest_hex_string(); // 1 test_digest_unicode_string(); // 1 // 11. First-stage and wrapper (5 tests) test_digest_first_stage_basic(); // 1 test_digest_first_stage_string(); // 1 test_digest_first_stage_dollar_quote(); // 1 test_digest_2_basic(); // 1 test_digest_2_with_typecast(); // 1 // 12. strip_comments (7 tests) test_strip_comments_block(); // 3 test_strip_comments_inline(); // 2 test_strip_comments_no_lowercase(); // 2 // 13. Edge cases (8 tests) test_digest_empty_query(); // 1 test_digest_whitespace_only(); // 1 test_digest_multiple_spaces(); // 1 test_digest_escaped_single_quote(); // 1 test_digest_backslash_escape_in_string(); // 1 test_digest_multiple_statements(); // 1 test_digest_complex_query(); // 2 test_digest_comment_between_tokens(); // 1 test_digest_only_comment(); // 1 // Total: 1 + 9 + 4 + 6 + 2 + 6 + 2 + 3 + 3 + 2 + 4 + 5 + 7 + 10 = 64 test_cleanup_minimal(); return exit_status(); }