From 8f38b8a577fdf213a7a34b894773edc4f293399e Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Fri, 16 Jan 2026 18:38:13 +0000 Subject: [PATCH] feat: Add exponential backoff retry for transient LLM failures This commit adds configurable retry logic with exponential backoff for NL2SQL LLM API calls. Changes: - Add retry configuration to NL2SQLRequest (max_retries, retry_backoff_ms, retry_multiplier, retry_max_backoff_ms) - Add is_retryable_error() to identify retryable HTTP/CURL errors - Add sleep_with_jitter() for exponential backoff with 10% jitter - Add call_generic_openai_with_retry() wrapper - Add call_generic_anthropic_with_retry() wrapper - Update NL2SQL_Converter::convert() to use retry wrappers Default retry behavior: - 3 retries with 1000ms initial backoff - 2.0x multiplier, 30000ms max backoff - Retries on empty responses (transient failures) Part of: Phase 3 of NL2SQL improvement plan --- include/NL2SQL_Converter.h | 21 +++- lib/LLM_Clients.cpp | 210 +++++++++++++++++++++++++++++++++++++ lib/NL2SQL_Converter.cpp | 8 +- 3 files changed, 236 insertions(+), 3 deletions(-) diff --git a/include/NL2SQL_Converter.h b/include/NL2SQL_Converter.h index 5b306e299..f0e408a9b 100644 --- a/include/NL2SQL_Converter.h +++ b/include/NL2SQL_Converter.h @@ -88,7 +88,15 @@ struct NL2SQLRequest { // Request tracking for correlation and debugging std::string request_id; ///< Unique ID for this request (UUID-like) - NL2SQLRequest() : max_latency_ms(0), allow_cache(true) { + // Retry configuration for transient failures + int max_retries; ///< Maximum retry attempts (default: 3) + int retry_backoff_ms; ///< Initial backoff in ms (default: 1000) + double retry_multiplier; ///< Backoff multiplier (default: 2.0) + int retry_max_backoff_ms; ///< Maximum backoff in ms (default: 30000) + + NL2SQLRequest() : max_latency_ms(0), allow_cache(true), + max_retries(3), retry_backoff_ms(1000), + retry_multiplier(2.0), retry_max_backoff_ms(30000) { // Generate UUID-like request ID for correlation char uuid[64]; snprintf(uuid, sizeof(uuid), "%08lx-%04x-%04x-%04x-%012lx", @@ -205,6 +213,17 @@ private: std::string call_generic_anthropic(const std::string& prompt, const std::string& model, const std::string& url, const char* key, const std::string& req_id = ""); + // Retry wrapper methods + std::string call_generic_openai_with_retry(const std::string& prompt, const std::string& model, + const std::string& url, const char* key, + const std::string& req_id, + int max_retries, int initial_backoff_ms, + double backoff_multiplier, int max_backoff_ms); + std::string call_generic_anthropic_with_retry(const std::string& prompt, const std::string& model, + const std::string& url, const char* key, + const std::string& req_id, + int max_retries, int initial_backoff_ms, + double backoff_multiplier, int max_backoff_ms); NL2SQLResult check_vector_cache(const NL2SQLRequest& req); void store_in_vector_cache(const NL2SQLRequest& req, const NL2SQLResult& result); std::string get_schema_context(const std::vector& tables); diff --git a/lib/LLM_Clients.cpp b/lib/LLM_Clients.cpp index e83d1d45d..232a11a7d 100644 --- a/lib/LLM_Clients.cpp +++ b/lib/LLM_Clients.cpp @@ -106,6 +106,66 @@ static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* use return totalSize; } +// ============================================================================ +// Retry Logic Helper Functions +// ============================================================================ + +/** + * @brief Check if an error is retryable based on HTTP status code + * + * Determines whether a failed LLM API call should be retried based on: + * - HTTP status codes (408 timeout, 429 rate limit, 5xx server errors) + * - CURL error codes (network failures, timeouts) + * + * @param http_status_code HTTP status code from response + * @param curl_code libcurl error code + * @return true if error is retryable, false otherwise + */ +static bool is_retryable_error(int http_status_code, CURLcode curl_code) { + // Retry on specific HTTP status codes + if (http_status_code == 408 || // Request Timeout + http_status_code == 429 || // Too Many Requests (rate limit) + http_status_code == 500 || // Internal Server Error + http_status_code == 502 || // Bad Gateway + http_status_code == 503 || // Service Unavailable + http_status_code == 504) { // Gateway Timeout + return true; + } + + // Retry on specific curl errors (network issues, timeouts) + if (curl_code == CURLE_OPERATION_TIMEDOUT || + curl_code == CURLE_COULDNT_CONNECT || + curl_code == CURLE_READ_ERROR || + curl_code == CURLE_RECV_ERROR) { + return true; + } + + return false; +} + +/** + * @brief Sleep with exponential backoff and jitter + * + * Implements exponential backoff with jitter to prevent thundering herd + * problem when multiple requests retry simultaneously. + * + * @param base_delay_ms Base delay in milliseconds + * @param jitter_factor Jitter as fraction of base delay (default 0.1 = 10%) + */ +static void sleep_with_jitter(int base_delay_ms, double jitter_factor = 0.1) { + // Add random jitter to prevent synchronized retries + int jitter_ms = static_cast(base_delay_ms * jitter_factor); + int random_jitter = (rand() % (2 * jitter_ms)) - jitter_ms; + + int total_delay_ms = base_delay_ms + random_jitter; + if (total_delay_ms < 0) total_delay_ms = 0; + + struct timespec ts; + ts.tv_sec = total_delay_ms / 1000; + ts.tv_nsec = (total_delay_ms % 1000) * 1000000; + nanosleep(&ts, NULL); +} + // ============================================================================ // HTTP Client implementations for different LLM providers // ============================================================================ @@ -452,3 +512,153 @@ std::string NL2SQL_Converter::call_generic_anthropic(const std::string& prompt, return ""; } } + +// ============================================================================ +// Retry Wrapper Functions +// ============================================================================ + +/** + * @brief Call OpenAI-compatible API with retry logic + * + * Wrapper around call_generic_openai() that implements: + * - Exponential backoff with jitter + * - Retry on empty responses (transient failures) + * - Configurable max retries and backoff parameters + * + * @param prompt The prompt to send to the API + * @param model Model name to use + * @param url Full API endpoint URL + * @param key API key (can be NULL for local endpoints) + * @param req_id Request ID for correlation + * @param max_retries Maximum number of retry attempts + * @param initial_backoff_ms Initial backoff delay in milliseconds + * @param backoff_multiplier Multiplier for exponential backoff + * @param max_backoff_ms Maximum backoff delay in milliseconds + * @return Generated SQL or empty string if all retries fail + */ +std::string NL2SQL_Converter::call_generic_openai_with_retry( + const std::string& prompt, + const std::string& model, + const std::string& url, + const char* key, + const std::string& req_id, + int max_retries, + int initial_backoff_ms, + double backoff_multiplier, + int max_backoff_ms) +{ + int attempt = 0; + int current_backoff_ms = initial_backoff_ms; + + while (attempt <= max_retries) { + // Call the base function (attempt 0 is the first try) + std::string result = call_generic_openai(prompt, model, url, key, req_id); + + // If we got a successful response, return it + if (!result.empty()) { + if (attempt > 0) { + proxy_info("NL2SQL [%s]: Request succeeded after %d retries\n", + req_id.c_str(), attempt); + } + return result; + } + + // If this was our last attempt, give up + if (attempt == max_retries) { + proxy_error("NL2SQL [%s]: Request failed after %d attempts. Max retries reached.\n", + req_id.c_str(), attempt + 1); + return ""; + } + + // Log retry attempt + proxy_warning("NL2SQL [%s]: Empty response, retrying in %dms (attempt %d/%d)\n", + req_id.c_str(), current_backoff_ms, attempt + 1, max_retries + 1); + + // Sleep with exponential backoff and jitter + sleep_with_jitter(current_backoff_ms); + + // Increase backoff for next attempt + current_backoff_ms = static_cast(current_backoff_ms * backoff_multiplier); + if (current_backoff_ms > max_backoff_ms) { + current_backoff_ms = max_backoff_ms; + } + + attempt++; + } + + // Should not reach here, but handle gracefully + return ""; +} + +/** + * @brief Call Anthropic-compatible API with retry logic + * + * Wrapper around call_generic_anthropic() that implements: + * - Exponential backoff with jitter + * - Retry on empty responses (transient failures) + * - Configurable max retries and backoff parameters + * + * @param prompt The prompt to send to the API + * @param model Model name to use + * @param url Full API endpoint URL + * @param key API key (required for Anthropic) + * @param req_id Request ID for correlation + * @param max_retries Maximum number of retry attempts + * @param initial_backoff_ms Initial backoff delay in milliseconds + * @param backoff_multiplier Multiplier for exponential backoff + * @param max_backoff_ms Maximum backoff delay in milliseconds + * @return Generated SQL or empty string if all retries fail + */ +std::string NL2SQL_Converter::call_generic_anthropic_with_retry( + const std::string& prompt, + const std::string& model, + const std::string& url, + const char* key, + const std::string& req_id, + int max_retries, + int initial_backoff_ms, + double backoff_multiplier, + int max_backoff_ms) +{ + int attempt = 0; + int current_backoff_ms = initial_backoff_ms; + + while (attempt <= max_retries) { + // Call the base function (attempt 0 is the first try) + std::string result = call_generic_anthropic(prompt, model, url, key, req_id); + + // If we got a successful response, return it + if (!result.empty()) { + if (attempt > 0) { + proxy_info("NL2SQL [%s]: Request succeeded after %d retries\n", + req_id.c_str(), attempt); + } + return result; + } + + // If this was our last attempt, give up + if (attempt == max_retries) { + proxy_error("NL2SQL [%s]: Request failed after %d attempts. Max retries reached.\n", + req_id.c_str(), attempt + 1); + return ""; + } + + // Log retry attempt + proxy_warning("NL2SQL [%s]: Empty response, retrying in %dms (attempt %d/%d)\n", + req_id.c_str(), current_backoff_ms, attempt + 1, max_retries + 1); + + // Sleep with exponential backoff and jitter + sleep_with_jitter(current_backoff_ms); + + // Increase backoff for next attempt + current_backoff_ms = static_cast(current_backoff_ms * backoff_multiplier); + if (current_backoff_ms > max_backoff_ms) { + current_backoff_ms = max_backoff_ms; + } + + attempt++; + } + + // Should not reach here, but handle gracefully + return ""; +} diff --git a/lib/NL2SQL_Converter.cpp b/lib/NL2SQL_Converter.cpp index ca9d8ad18..7659dbfbe 100644 --- a/lib/NL2SQL_Converter.cpp +++ b/lib/NL2SQL_Converter.cpp @@ -677,7 +677,9 @@ NL2SQLResult NL2SQL_Converter::convert(const NL2SQLRequest& req) { ? config.provider_url : "http://localhost:11434/v1/chat/completions"; model = config.provider_model ? config.provider_model : "llama3.2"; - raw_sql = call_generic_openai(prompt, model, url, key, req.request_id); + raw_sql = call_generic_openai_with_retry(prompt, model, url, key, req.request_id, + req.max_retries, req.retry_backoff_ms, + req.retry_multiplier, req.retry_max_backoff_ms); result.explanation = "Generated by OpenAI-compatible provider (" + std::string(model) + ")"; result.provider_used = "openai"; break; @@ -687,7 +689,9 @@ NL2SQLResult NL2SQL_Converter::convert(const NL2SQLRequest& req) { ? config.provider_url : "https://api.anthropic.com/v1/messages"; model = config.provider_model ? config.provider_model : "claude-3-haiku"; - raw_sql = call_generic_anthropic(prompt, model, url, key, req.request_id); + raw_sql = call_generic_anthropic_with_retry(prompt, model, url, key, req.request_id, + req.max_retries, req.retry_backoff_ms, + req.retry_multiplier, req.retry_max_backoff_ms); result.explanation = "Generated by Anthropic-compatible provider (" + std::string(model) + ")"; result.provider_used = "anthropic"; break;