From e82bcb391b3a19318999b7a32820d28a9eee57cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Sun, 22 Mar 2026 12:00:14 +0100 Subject: [PATCH 1/3] Extract PgSQL error classification logic (Phase 3.10, #5498) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New: include/PgSQLErrorClassifier.h, lib/PgSQLErrorClassifier.cpp Functions: - classify_pgsql_error(): classifies by SQLSTATE class — connection (08), transaction rollback (40), resources (53) = retryable; operator intervention (57), system error (58) = fatal; all others (syntax 42, constraints 23, data 22) = report to client - pgsql_can_retry_error(): checks retry conditions (retries left, not in transaction — PgSQL transactions are atomic) --- include/PgSQLErrorClassifier.h | 57 ++++++++++++++++++++++++++++++++++ lib/Makefile | 1 + lib/PgSQLErrorClassifier.cpp | 54 ++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 include/PgSQLErrorClassifier.h create mode 100644 lib/PgSQLErrorClassifier.cpp diff --git a/include/PgSQLErrorClassifier.h b/include/PgSQLErrorClassifier.h new file mode 100644 index 000000000..1be6b1e6e --- /dev/null +++ b/include/PgSQLErrorClassifier.h @@ -0,0 +1,57 @@ +/** + * @file PgSQLErrorClassifier.h + * @brief Pure PgSQL error classification for retry decisions. + * + * Classifies PostgreSQL SQLSTATE error codes by class to determine + * if a query error is retryable or fatal. + * + * @see Phase 3.10 (GitHub issue #5498) + */ + +#ifndef PGSQL_ERROR_CLASSIFIER_H +#define PGSQL_ERROR_CLASSIFIER_H + +/** + * @brief Action to take after a PgSQL backend error. + */ +enum PgSQLErrorAction { + PGSQL_ERROR_REPORT_TO_CLIENT, ///< Send error to client, no retry. + PGSQL_ERROR_RETRY, ///< Retryable error (connection/server). + PGSQL_ERROR_FATAL ///< Fatal server state (shutdown/crash). +}; + +/** + * @brief Classify a PgSQL SQLSTATE error code for retry eligibility. + * + * SQLSTATE classes (first 2 chars): + * - "08" (connection exception): retryable + * - "40" (transaction rollback, including serialization failure): retryable + * - "53" (insufficient resources, e.g. too_many_connections): retryable + * - "57" (operator intervention, e.g. admin_shutdown): fatal + * - "58" (system error, e.g. crash_shutdown): fatal + * - All others (syntax, constraint, etc.): report to client + * + * @param sqlstate 5-character SQLSTATE string (e.g., "08006", "42P01"). + * @return PgSQLErrorAction indicating what to do. + */ +PgSQLErrorAction classify_pgsql_error(const char *sqlstate); + +/** + * @brief Check if a PgSQL error is retryable given session conditions. + * + * Even if the error class is retryable, retry is blocked when: + * - In an active transaction (PgSQL transactions are atomic) + * - No retries remaining + * + * @param action Result of classify_pgsql_error(). + * @param retries_remaining Number of retries left. + * @param in_transaction Whether a transaction is in progress. + * @return true if the error can be retried. + */ +bool pgsql_can_retry_error( + PgSQLErrorAction action, + int retries_remaining, + bool in_transaction +); + +#endif // PGSQL_ERROR_CLASSIFIER_H diff --git a/lib/Makefile b/lib/Makefile index 7af5cd6ef..96d284fe5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -108,6 +108,7 @@ _OBJ_CXX := ProxySQL_GloVars.oo network.oo debug.oo configfile.oo Query_Cache.oo MonitorHealthDecision.oo \ TransactionState.oo \ HostgroupRouting.oo \ + PgSQLErrorClassifier.oo \ proxy_sqlite3_symbols.oo # TSDB object files diff --git a/lib/PgSQLErrorClassifier.cpp b/lib/PgSQLErrorClassifier.cpp new file mode 100644 index 000000000..e6279e369 --- /dev/null +++ b/lib/PgSQLErrorClassifier.cpp @@ -0,0 +1,54 @@ +/** + * @file PgSQLErrorClassifier.cpp + * @brief Implementation of PgSQL error classification. + * + * @see PgSQLErrorClassifier.h + * @see Phase 3.10 (GitHub issue #5498) + */ + +#include "PgSQLErrorClassifier.h" +#include + +PgSQLErrorAction classify_pgsql_error(const char *sqlstate) { + if (sqlstate == nullptr || strlen(sqlstate) < 2) { + return PGSQL_ERROR_REPORT_TO_CLIENT; + } + + // Classify by SQLSTATE class (first 2 characters) + char cls[3] = {sqlstate[0], sqlstate[1], '\0'}; + + // Connection exceptions — retryable + if (strcmp(cls, "08") == 0) return PGSQL_ERROR_RETRY; + + // Transaction rollback (serialization failure, deadlock) — retryable + if (strcmp(cls, "40") == 0) return PGSQL_ERROR_RETRY; + + // Insufficient resources (too many connections) — retryable + if (strcmp(cls, "53") == 0) return PGSQL_ERROR_RETRY; + + // Operator intervention (admin shutdown, crash) — fatal + if (strcmp(cls, "57") == 0) return PGSQL_ERROR_FATAL; + + // System error (I/O error, crash shutdown) — fatal + if (strcmp(cls, "58") == 0) return PGSQL_ERROR_FATAL; + + // Everything else (syntax, constraints, data, etc.) — report to client + return PGSQL_ERROR_REPORT_TO_CLIENT; +} + +bool pgsql_can_retry_error( + PgSQLErrorAction action, + int retries_remaining, + bool in_transaction) +{ + if (action != PGSQL_ERROR_RETRY) { + return false; + } + if (retries_remaining <= 0) { + return false; + } + if (in_transaction) { + return false; // PgSQL transactions are atomic, can't retry mid-txn + } + return true; +} From a066763ee9f4d7893b512955ff8fd419f956d3aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Sun, 22 Mar 2026 12:00:14 +0100 Subject: [PATCH 2/3] Add PgSQL error classifier unit tests (Phase 3.10, #5498) 25 test cases: connection errors (08xxx), transaction errors (40xxx), resource errors (53xxx), fatal errors (57xxx/58xxx), non-retryable (syntax/constraint/data), edge cases (null/empty), retry conditions (retries, in-transaction, non-retryable, fatal). --- test/tap/tests/unit/Makefile | 3 +- .../unit/pgsql_error_classifier_unit-t.cpp | 99 +++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 test/tap/tests/unit/pgsql_error_classifier_unit-t.cpp diff --git a/test/tap/tests/unit/Makefile b/test/tap/tests/unit/Makefile index 999f3c30c..5cd780988 100644 --- a/test/tap/tests/unit/Makefile +++ b/test/tap/tests/unit/Makefile @@ -235,7 +235,8 @@ UNIT_TESTS := smoke_test-t query_cache_unit-t query_processor_unit-t \ protocol_unit-t auth_unit-t connection_pool_unit-t \ rule_matching_unit-t hostgroups_unit-t monitor_health_unit-t \ hostgroup_routing_unit-t \ - transaction_state_unit-t + transaction_state_unit-t \ + pgsql_error_classifier_unit-t .PHONY: all all: $(UNIT_TESTS) diff --git a/test/tap/tests/unit/pgsql_error_classifier_unit-t.cpp b/test/tap/tests/unit/pgsql_error_classifier_unit-t.cpp new file mode 100644 index 000000000..7d0299096 --- /dev/null +++ b/test/tap/tests/unit/pgsql_error_classifier_unit-t.cpp @@ -0,0 +1,99 @@ +/** + * @file pgsql_error_classifier_unit-t.cpp + * @brief Unit tests for PgSQL error classification. + * + * @see Phase 3.10 (GitHub issue #5498) + */ + +#include "tap.h" +#include "test_globals.h" +#include "test_init.h" +#include "proxysql.h" +#include "PgSQLErrorClassifier.h" + +static void test_connection_errors() { + ok(classify_pgsql_error("08000") == PGSQL_ERROR_RETRY, + "08000 (connection exception): retryable"); + ok(classify_pgsql_error("08003") == PGSQL_ERROR_RETRY, + "08003 (connection does not exist): retryable"); + ok(classify_pgsql_error("08006") == PGSQL_ERROR_RETRY, + "08006 (connection failure): retryable"); +} + +static void test_transaction_errors() { + ok(classify_pgsql_error("40001") == PGSQL_ERROR_RETRY, + "40001 (serialization failure): retryable"); + ok(classify_pgsql_error("40P01") == PGSQL_ERROR_RETRY, + "40P01 (deadlock detected): retryable"); +} + +static void test_resource_errors() { + ok(classify_pgsql_error("53000") == PGSQL_ERROR_RETRY, + "53000 (insufficient resources): retryable"); + ok(classify_pgsql_error("53300") == PGSQL_ERROR_RETRY, + "53300 (too many connections): retryable"); +} + +static void test_fatal_errors() { + ok(classify_pgsql_error("57000") == PGSQL_ERROR_FATAL, + "57000 (operator intervention): fatal"); + ok(classify_pgsql_error("57P01") == PGSQL_ERROR_FATAL, + "57P01 (admin shutdown): fatal"); + ok(classify_pgsql_error("57P02") == PGSQL_ERROR_FATAL, + "57P02 (crash shutdown): fatal"); + ok(classify_pgsql_error("58000") == PGSQL_ERROR_FATAL, + "58000 (system error): fatal"); +} + +static void test_non_retryable_errors() { + ok(classify_pgsql_error("42601") == PGSQL_ERROR_REPORT_TO_CLIENT, + "42601 (syntax error): report"); + ok(classify_pgsql_error("42P01") == PGSQL_ERROR_REPORT_TO_CLIENT, + "42P01 (undefined table): report"); + ok(classify_pgsql_error("23505") == PGSQL_ERROR_REPORT_TO_CLIENT, + "23505 (unique violation): report"); + ok(classify_pgsql_error("23503") == PGSQL_ERROR_REPORT_TO_CLIENT, + "23503 (foreign key violation): report"); + ok(classify_pgsql_error("22001") == PGSQL_ERROR_REPORT_TO_CLIENT, + "22001 (string data right truncation): report"); +} + +static void test_edge_cases() { + ok(classify_pgsql_error(nullptr) == PGSQL_ERROR_REPORT_TO_CLIENT, + "null sqlstate: report"); + ok(classify_pgsql_error("") == PGSQL_ERROR_REPORT_TO_CLIENT, + "empty sqlstate: report"); + ok(classify_pgsql_error("0") == PGSQL_ERROR_REPORT_TO_CLIENT, + "single char sqlstate: report"); +} + +static void test_retry_conditions() { + ok(pgsql_can_retry_error(PGSQL_ERROR_RETRY, 3, false) == true, + "can retry: retryable + retries left + no txn"); + ok(pgsql_can_retry_error(PGSQL_ERROR_RETRY, 0, false) == false, + "no retry: no retries left"); + ok(pgsql_can_retry_error(PGSQL_ERROR_RETRY, 3, true) == false, + "no retry: in transaction"); + ok(pgsql_can_retry_error(PGSQL_ERROR_REPORT_TO_CLIENT, 3, false) == false, + "no retry: non-retryable error"); + ok(pgsql_can_retry_error(PGSQL_ERROR_FATAL, 3, false) == false, + "no retry: fatal error"); +} + +int main() { + plan(25); + int rc = test_init_minimal(); + ok(rc == 0, "test_init_minimal() succeeds"); + + test_connection_errors(); // 3 + test_transaction_errors(); // 2 + test_resource_errors(); // 2 + test_fatal_errors(); // 4 + test_non_retryable_errors(); // 5 + test_edge_cases(); // 3 + test_retry_conditions(); // 5 + // Total: 1+3+2+2+4+5+3+5 = 25 + + test_cleanup_minimal(); + return exit_status(); +} From f5ace60850e4e48ccbc006c76894df00b3b3f7f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Canna=C3=B2?= Date: Sun, 22 Mar 2026 12:51:03 +0100 Subject: [PATCH 3/3] Address review on PgSQL error classifier (PR #5514) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix: 57014 (query_canceled) is now classified as REPORT_TO_CLIENT instead of FATAL (it's a normal cancellation, not a server crash) - Fix docstring: crash_shutdown is 57P02 (class 57), not class 58; class 58 is system/I/O errors - Add test for 57014 exception (+1 test, plan 25→26) --- include/PgSQLErrorClassifier.h | 5 +++-- lib/PgSQLErrorClassifier.cpp | 12 +++++++++--- .../tap/tests/unit/pgsql_error_classifier_unit-t.cpp | 7 +++++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/PgSQLErrorClassifier.h b/include/PgSQLErrorClassifier.h index 1be6b1e6e..471ddce40 100644 --- a/include/PgSQLErrorClassifier.h +++ b/include/PgSQLErrorClassifier.h @@ -27,8 +27,9 @@ enum PgSQLErrorAction { * - "08" (connection exception): retryable * - "40" (transaction rollback, including serialization failure): retryable * - "53" (insufficient resources, e.g. too_many_connections): retryable - * - "57" (operator intervention, e.g. admin_shutdown): fatal - * - "58" (system error, e.g. crash_shutdown): fatal + * - "57" (operator intervention, e.g. admin_shutdown, crash_shutdown): fatal + * Exception: "57014" (query_canceled) is non-fatal + * - "58" (system error, e.g. I/O error): fatal * - All others (syntax, constraint, etc.): report to client * * @param sqlstate 5-character SQLSTATE string (e.g., "08006", "42P01"). diff --git a/lib/PgSQLErrorClassifier.cpp b/lib/PgSQLErrorClassifier.cpp index e6279e369..cd48afcce 100644 --- a/lib/PgSQLErrorClassifier.cpp +++ b/lib/PgSQLErrorClassifier.cpp @@ -26,10 +26,16 @@ PgSQLErrorAction classify_pgsql_error(const char *sqlstate) { // Insufficient resources (too many connections) — retryable if (strcmp(cls, "53") == 0) return PGSQL_ERROR_RETRY; - // Operator intervention (admin shutdown, crash) — fatal - if (strcmp(cls, "57") == 0) return PGSQL_ERROR_FATAL; + // Operator intervention — mostly fatal, except query_canceled + if (strcmp(cls, "57") == 0) { + // 57014 = query_canceled — not fatal, report to client + if (strlen(sqlstate) >= 5 && strncmp(sqlstate, "57014", 5) == 0) { + return PGSQL_ERROR_REPORT_TO_CLIENT; + } + return PGSQL_ERROR_FATAL; // admin_shutdown, crash_shutdown, etc. + } - // System error (I/O error, crash shutdown) — fatal + // System error (I/O error) — fatal if (strcmp(cls, "58") == 0) return PGSQL_ERROR_FATAL; // Everything else (syntax, constraints, data, etc.) — report to client diff --git a/test/tap/tests/unit/pgsql_error_classifier_unit-t.cpp b/test/tap/tests/unit/pgsql_error_classifier_unit-t.cpp index 7d0299096..396492415 100644 --- a/test/tap/tests/unit/pgsql_error_classifier_unit-t.cpp +++ b/test/tap/tests/unit/pgsql_error_classifier_unit-t.cpp @@ -43,6 +43,9 @@ static void test_fatal_errors() { "57P02 (crash shutdown): fatal"); ok(classify_pgsql_error("58000") == PGSQL_ERROR_FATAL, "58000 (system error): fatal"); + // 57014 is an exception — query_canceled is NOT fatal + ok(classify_pgsql_error("57014") == PGSQL_ERROR_REPORT_TO_CLIENT, + "57014 (query canceled): not fatal, report to client"); } static void test_non_retryable_errors() { @@ -81,7 +84,7 @@ static void test_retry_conditions() { } int main() { - plan(25); + plan(26); int rc = test_init_minimal(); ok(rc == 0, "test_init_minimal() succeeds"); @@ -92,7 +95,7 @@ int main() { test_non_retryable_errors(); // 5 test_edge_cases(); // 3 test_retry_conditions(); // 5 - // Total: 1+3+2+2+4+5+3+5 = 25 + // Total: 1+3+2+2+5+5+3+5 = 26 test_cleanup_minimal(); return exit_status();