diff --git a/lib/pgsql_tokenizer.cpp b/lib/pgsql_tokenizer.cpp index 13cb201e2..20778bb93 100644 --- a/lib/pgsql_tokenizer.cpp +++ b/lib/pgsql_tokenizer.cpp @@ -1245,8 +1245,6 @@ enum p_st process_replace_null(shared_st* shared_st, const options* opts) { static __attribute__((always_inline)) inline enum p_st process_pg_typecast(shared_st* s, pg_typecast_st* tc) { - enum p_st next = st_pg_typecast; - // On entering state if (!tc->started) { tc->started = true; @@ -1307,11 +1305,27 @@ enum p_st process_pg_typecast(shared_st* s, pg_typecast_st* tc) c = (s->q_cur_pos < s->q_len) ? *s->q : '\0'; } - // Skip any whitespace - while (s->q_cur_pos < s->q_len && is_space_char(c)) { - s->q++; - s->q_cur_pos++; - c = (s->q_cur_pos < s->q_len) ? *s->q : '\0'; + // Skip whitespace, but only if it's followed by a modifier or + // array bracket (e.g. `::int (10)`, `::int []`). If the + // whitespace is just a separator before the next token (e.g. + // `::int FROM ...`), preserve it so the dispatcher sees the + // space and emits it between the typecast and the following + // token in the output. Without this lookahead the typecast + // handler used to consume the trailing space and produced + // digests like `count(*)from x` (issue #5755). + if (s->q_cur_pos < s->q_len && is_space_char(c)) { + int saved_pos = s->q_cur_pos; + const char* saved_q = s->q; + while (s->q_cur_pos < s->q_len && is_space_char(c)) { + s->q++; + s->q_cur_pos++; + c = (s->q_cur_pos < s->q_len) ? *s->q : '\0'; + } + if (c != '(' && c != '[') { + s->q = saved_q; + s->q_cur_pos = saved_pos; + c = (s->q_cur_pos < s->q_len) ? *s->q : '\0'; + } } // Handle type modifiers (parentheses with parameters) @@ -1375,18 +1389,21 @@ enum p_st process_pg_typecast(shared_st* s, pg_typecast_st* tc) } } - // End of type name? Now check if we're at a delimiter - if (s->q_cur_pos >= s->q_len || - is_space_char(c) || - c == ')' || c == '(' || c == ';' || c == ',' || - c == '+' || c == '-' || c == '*' || c == '/' || - c == '=' || c == '<' || c == '>' || c == '@' || - c == ']' || c == '[') { - // Exit state - return st_no_mark_found; - } - - return next; + // All typecast consumption (type name + optional whitespace + + // modifiers + array brackets) is complete by this point, so the + // state must always exit. The previous attempt to detect "end of + // typecast" via an enumerated delimiter list dropped through to + // `return next` for any character not in that list (e.g. the 'F' + // of `FROM` after `::INT FROM "Inventory"`), which kept the + // dispatcher in `st_pg_typecast`, advanced the cursor by one extra + // char per outer-loop iteration, and silently swallowed the rest + // of the query. See issue #5755. + // + // Also reset `started` so a subsequent `::cast` later in the same + // query (e.g. `SELECT 1::INT, 2::TEXT`) re-enters cleanly via the + // entry block at the top of this function. + tc->started = false; + return st_no_mark_found; } /** diff --git a/test/tap/tests/unit/pgsql_tokenizer_unit-t.cpp b/test/tap/tests/unit/pgsql_tokenizer_unit-t.cpp index 98738330e..284c713a7 100644 --- a/test/tap/tests/unit/pgsql_tokenizer_unit-t.cpp +++ b/test/tap/tests/unit/pgsql_tokenizer_unit-t.cpp @@ -261,6 +261,47 @@ static void test_digest_typecast_quoted() { "digest: typecast with quoted type name handled"); } +// Regression test for issue #5755: a typecast in the middle of a query +// must not swallow the rest of the query. The bug was in +// process_pg_typecast() which, after consuming `::TYPENAME`, sometimes +// returned `st_pg_typecast` (instead of `st_no_mark_found`) when the +// next character wasn't in an enumerated delimiter list. The +// dispatcher would then advance one extra char per iteration AND keep +// re-entering the typecast handler, eating subsequent tokens until end +// of input. +static void test_digest_typecast_followed_by_clause() { + // Pre-fix output: "select count(*)" — everything after `::INT` was + // silently dropped. Post-fix the FROM/WHERE clause must survive. + std::string d = digest_query( + "SELECT COUNT(*)::INT FROM \"Inventory\" AS i WHERE i.\"TenantId\"=$1"); + ok(d.find("from") != std::string::npos && + d.find("where") != std::string::npos, + "digest #5755: typecast does not swallow following FROM/WHERE clause"); + ok(d.find("inventory") != std::string::npos || + d.find("Inventory") != std::string::npos, + "digest #5755: quoted identifier after typecast is preserved"); +} + +static void test_digest_typecast_then_identifier() { + // Bisected minimal repro: identifier directly after `::TYPENAME `. + std::string d = digest_query("SELECT a::int FROM t"); + ok(d.find("from") != std::string::npos && d.find("t") != std::string::npos, + "digest #5755: bare identifier after typecast survives"); +} + +// Regression test for the per-call `tc->started` reset: multiple casts +// in the same query must each re-enter the typecast handler cleanly. +// Without the reset, the second `::cast` would skip the `::`-skip +// branch and start parsing the type name from the wrong offset. +static void test_digest_typecast_multiple_in_same_query() { + std::string d = digest_query("SELECT 1::int, 2::text FROM t"); + ok(d.find("from") != std::string::npos && d.find("t") != std::string::npos, + "digest #5755: query with multiple typecasts preserves trailing FROM"); + // Both literals must be replaced with `?`. + ok(d.find("?") != std::string::npos, + "digest #5755: literal replacement still happens before each typecast"); +} + // ============================================================================ // 4. PgSQL-specific: Double-quoted identifiers (preserved, NOT replaced) // ============================================================================ @@ -609,7 +650,7 @@ static void test_digest_only_comment() { // ============================================================================ int main() { - plan(65); + plan(70); int rc = test_init_minimal(); ok(rc == 0, "test_init_minimal() succeeds"); @@ -633,13 +674,16 @@ int main() { test_digest_dollar_quote_in_function(); // 1 test_digest_dollar_quote_with_special_chars(); // 1 - // 3. Type casts (6 tests) + // 3. Type casts (11 tests) test_digest_typecast_simple(); // 1 test_digest_typecast_varchar(); // 1 test_digest_typecast_with_modifier(); // 1 test_digest_typecast_array(); // 1 test_digest_typecast_in_where(); // 1 test_digest_typecast_quoted(); // 1 + test_digest_typecast_followed_by_clause(); // 2 (issue #5755) + test_digest_typecast_then_identifier(); // 1 (issue #5755) + test_digest_typecast_multiple_in_same_query(); // 2 (issue #5755) // 4. Double-quoted identifiers (3 tests) test_digest_double_quoted_identifier(); // 2