From 8f38b8a577fdf213a7a34b894773edc4f293399e Mon Sep 17 00:00:00 2001
From: Rene Cannao <rene@proxysql.com>
Date: Fri, 16 Jan 2026 18:38:13 +0000
Subject: [PATCH] feat: Add exponential backoff retry for transient LLM
 failures

This commit adds configurable retry logic with exponential backoff
for NL2SQL LLM API calls.

Changes:
- Add retry configuration to NL2SQLRequest (max_retries, retry_backoff_ms,
  retry_multiplier, retry_max_backoff_ms)
- Add is_retryable_error() to identify retryable HTTP/CURL errors
- Add sleep_with_jitter() for exponential backoff with 10% jitter
- Add call_generic_openai_with_retry() wrapper
- Add call_generic_anthropic_with_retry() wrapper
- Update NL2SQL_Converter::convert() to use retry wrappers

Default retry behavior:
- 3 retries with 1000ms initial backoff
- 2.0x multiplier, 30000ms max backoff
- Retries on empty responses (transient failures)

Part of: Phase 3 of NL2SQL improvement plan
---
 include/NL2SQL_Converter.h |  21 +++-
 lib/LLM_Clients.cpp        | 210 +++++++++++++++++++++++++++++++++++++
 lib/NL2SQL_Converter.cpp   |   8 +-
 3 files changed, 236 insertions(+), 3 deletions(-)

diff --git a/include/NL2SQL_Converter.h b/include/NL2SQL_Converter.h
index 5b306e299..f0e408a9b 100644
--- a/include/NL2SQL_Converter.h
+++ b/include/NL2SQL_Converter.h
@@ -88,7 +88,15 @@ struct NL2SQLRequest {
 	// Request tracking for correlation and debugging
 	std::string request_id;                  ///< Unique ID for this request (UUID-like)
 
-	NL2SQLRequest() : max_latency_ms(0), allow_cache(true) {
+	// Retry configuration for transient failures
+	int max_retries;                         ///< Maximum retry attempts (default: 3)
+	int retry_backoff_ms;                    ///< Initial backoff in ms (default: 1000)
+	double retry_multiplier;                 ///< Backoff multiplier (default: 2.0)
+	int retry_max_backoff_ms;                ///< Maximum backoff in ms (default: 30000)
+
+	NL2SQLRequest() : max_latency_ms(0), allow_cache(true),
+	                  max_retries(3), retry_backoff_ms(1000),
+	                  retry_multiplier(2.0), retry_max_backoff_ms(30000) {
 		// Generate UUID-like request ID for correlation
 		char uuid[64];
 		snprintf(uuid, sizeof(uuid), "%08lx-%04x-%04x-%04x-%012lx",
@@ -205,6 +213,17 @@ private:
 	std::string call_generic_anthropic(const std::string& prompt, const std::string& model,
 	                                    const std::string& url, const char* key,
 	                                    const std::string& req_id = "");
+	// Retry wrapper methods
+	std::string call_generic_openai_with_retry(const std::string& prompt, const std::string& model,
+	                                            const std::string& url, const char* key,
+	                                            const std::string& req_id,
+	                                            int max_retries, int initial_backoff_ms,
+	                                            double backoff_multiplier, int max_backoff_ms);
+	std::string call_generic_anthropic_with_retry(const std::string& prompt, const std::string& model,
+	                                               const std::string& url, const char* key,
+	                                               const std::string& req_id,
+	                                               int max_retries, int initial_backoff_ms,
+	                                               double backoff_multiplier, int max_backoff_ms);
 	NL2SQLResult check_vector_cache(const NL2SQLRequest& req);
 	void store_in_vector_cache(const NL2SQLRequest& req, const NL2SQLResult& result);
 	std::string get_schema_context(const std::vector<std::string>& tables);
diff --git a/lib/LLM_Clients.cpp b/lib/LLM_Clients.cpp
index e83d1d45d..232a11a7d 100644
--- a/lib/LLM_Clients.cpp
+++ b/lib/LLM_Clients.cpp
@@ -106,6 +106,66 @@ static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* use
 	return totalSize;
 }
 
+// ============================================================================
+// Retry Logic Helper Functions
+// ============================================================================
+
+/**
+ * @brief Check if an error is retryable based on HTTP status code
+ *
+ * Determines whether a failed LLM API call should be retried based on:
+ * - HTTP status codes (408 timeout, 429 rate limit, 5xx server errors)
+ * - CURL error codes (network failures, timeouts)
+ *
+ * @param http_status_code HTTP status code from response
+ * @param curl_code libcurl error code
+ * @return true if error is retryable, false otherwise
+ */
+static bool is_retryable_error(int http_status_code, CURLcode curl_code) {
+	// Retry on specific HTTP status codes
+	if (http_status_code == 408 ||           // Request Timeout
+	    http_status_code == 429 ||           // Too Many Requests (rate limit)
+	    http_status_code == 500 ||           // Internal Server Error
+	    http_status_code == 502 ||           // Bad Gateway
+	    http_status_code == 503 ||           // Service Unavailable
+	    http_status_code == 504) {           // Gateway Timeout
+		return true;
+	}
+
+	// Retry on specific curl errors (network issues, timeouts)
+	if (curl_code == CURLE_OPERATION_TIMEDOUT ||
+	    curl_code == CURLE_COULDNT_CONNECT ||
+	    curl_code == CURLE_READ_ERROR ||
+	    curl_code == CURLE_RECV_ERROR) {
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * @brief Sleep with exponential backoff and jitter
+ *
+ * Implements exponential backoff with jitter to prevent thundering herd
+ * problem when multiple requests retry simultaneously.
+ *
+ * @param base_delay_ms Base delay in milliseconds
+ * @param jitter_factor Jitter as fraction of base delay (default 0.1 = 10%)
+ */
+static void sleep_with_jitter(int base_delay_ms, double jitter_factor = 0.1) {
+	// Add random jitter to prevent synchronized retries
+	int jitter_ms = static_cast<int>(base_delay_ms * jitter_factor);
+	int random_jitter = (rand() % (2 * jitter_ms)) - jitter_ms;
+
+	int total_delay_ms = base_delay_ms + random_jitter;
+	if (total_delay_ms < 0) total_delay_ms = 0;
+
+	struct timespec ts;
+	ts.tv_sec = total_delay_ms / 1000;
+	ts.tv_nsec = (total_delay_ms % 1000) * 1000000;
+	nanosleep(&ts, NULL);
+}
+
 // ============================================================================
 // HTTP Client implementations for different LLM providers
 // ============================================================================
@@ -452,3 +512,153 @@ std::string NL2SQL_Converter::call_generic_anthropic(const std::string& prompt,
 		return "";
 	}
 }
+
+// ============================================================================
+// Retry Wrapper Functions
+// ============================================================================
+
+/**
+ * @brief Call OpenAI-compatible API with retry logic
+ *
+ * Wrapper around call_generic_openai() that implements:
+ * - Exponential backoff with jitter
+ * - Retry on empty responses (transient failures)
+ * - Configurable max retries and backoff parameters
+ *
+ * @param prompt The prompt to send to the API
+ * @param model Model name to use
+ * @param url Full API endpoint URL
+ * @param key API key (can be NULL for local endpoints)
+ * @param req_id Request ID for correlation
+ * @param max_retries Maximum number of retry attempts
+ * @param initial_backoff_ms Initial backoff delay in milliseconds
+ * @param backoff_multiplier Multiplier for exponential backoff
+ * @param max_backoff_ms Maximum backoff delay in milliseconds
+ * @return Generated SQL or empty string if all retries fail
+ */
+std::string NL2SQL_Converter::call_generic_openai_with_retry(
+    const std::string& prompt,
+    const std::string& model,
+    const std::string& url,
+    const char* key,
+    const std::string& req_id,
+    int max_retries,
+    int initial_backoff_ms,
+    double backoff_multiplier,
+    int max_backoff_ms)
+{
+	int attempt = 0;
+	int current_backoff_ms = initial_backoff_ms;
+
+	while (attempt <= max_retries) {
+		// Call the base function (attempt 0 is the first try)
+		std::string result = call_generic_openai(prompt, model, url, key, req_id);
+
+		// If we got a successful response, return it
+		if (!result.empty()) {
+			if (attempt > 0) {
+				proxy_info("NL2SQL [%s]: Request succeeded after %d retries\n",
+				          req_id.c_str(), attempt);
+			}
+			return result;
+		}
+
+		// If this was our last attempt, give up
+		if (attempt == max_retries) {
+			proxy_error("NL2SQL [%s]: Request failed after %d attempts. Max retries reached.\n",
+			           req_id.c_str(), attempt + 1);
+			return "";
+		}
+
+		// Log retry attempt
+		proxy_warning("NL2SQL [%s]: Empty response, retrying in %dms (attempt %d/%d)\n",
+		             req_id.c_str(), current_backoff_ms, attempt + 1, max_retries + 1);
+
+		// Sleep with exponential backoff and jitter
+		sleep_with_jitter(current_backoff_ms);
+
+		// Increase backoff for next attempt
+		current_backoff_ms = static_cast<int>(current_backoff_ms * backoff_multiplier);
+		if (current_backoff_ms > max_backoff_ms) {
+			current_backoff_ms = max_backoff_ms;
+		}
+
+		attempt++;
+	}
+
+	// Should not reach here, but handle gracefully
+	return "";
+}
+
+/**
+ * @brief Call Anthropic-compatible API with retry logic
+ *
+ * Wrapper around call_generic_anthropic() that implements:
+ * - Exponential backoff with jitter
+ * - Retry on empty responses (transient failures)
+ * - Configurable max retries and backoff parameters
+ *
+ * @param prompt The prompt to send to the API
+ * @param model Model name to use
+ * @param url Full API endpoint URL
+ * @param key API key (required for Anthropic)
+ * @param req_id Request ID for correlation
+ * @param max_retries Maximum number of retry attempts
+ * @param initial_backoff_ms Initial backoff delay in milliseconds
+ * @param backoff_multiplier Multiplier for exponential backoff
+ * @param max_backoff_ms Maximum backoff delay in milliseconds
+ * @return Generated SQL or empty string if all retries fail
+ */
+std::string NL2SQL_Converter::call_generic_anthropic_with_retry(
+    const std::string& prompt,
+    const std::string& model,
+    const std::string& url,
+    const char* key,
+    const std::string& req_id,
+    int max_retries,
+    int initial_backoff_ms,
+    double backoff_multiplier,
+    int max_backoff_ms)
+{
+	int attempt = 0;
+	int current_backoff_ms = initial_backoff_ms;
+
+	while (attempt <= max_retries) {
+		// Call the base function (attempt 0 is the first try)
+		std::string result = call_generic_anthropic(prompt, model, url, key, req_id);
+
+		// If we got a successful response, return it
+		if (!result.empty()) {
+			if (attempt > 0) {
+				proxy_info("NL2SQL [%s]: Request succeeded after %d retries\n",
+				          req_id.c_str(), attempt);
+			}
+			return result;
+		}
+
+		// If this was our last attempt, give up
+		if (attempt == max_retries) {
+			proxy_error("NL2SQL [%s]: Request failed after %d attempts. Max retries reached.\n",
+			           req_id.c_str(), attempt + 1);
+			return "";
+		}
+
+		// Log retry attempt
+		proxy_warning("NL2SQL [%s]: Empty response, retrying in %dms (attempt %d/%d)\n",
+		             req_id.c_str(), current_backoff_ms, attempt + 1, max_retries + 1);
+
+		// Sleep with exponential backoff and jitter
+		sleep_with_jitter(current_backoff_ms);
+
+		// Increase backoff for next attempt
+		current_backoff_ms = static_cast<int>(current_backoff_ms * backoff_multiplier);
+		if (current_backoff_ms > max_backoff_ms) {
+			current_backoff_ms = max_backoff_ms;
+		}
+
+		attempt++;
+	}
+
+	// Should not reach here, but handle gracefully
+	return "";
+}
diff --git a/lib/NL2SQL_Converter.cpp b/lib/NL2SQL_Converter.cpp
index ca9d8ad18..7659dbfbe 100644
--- a/lib/NL2SQL_Converter.cpp
+++ b/lib/NL2SQL_Converter.cpp
@@ -677,7 +677,9 @@ NL2SQLResult NL2SQL_Converter::convert(const NL2SQLRequest& req) {
 			      ? config.provider_url
 			      : "http://localhost:11434/v1/chat/completions";
 			model = config.provider_model ? config.provider_model : "llama3.2";
-			raw_sql = call_generic_openai(prompt, model, url, key, req.request_id);
+			raw_sql = call_generic_openai_with_retry(prompt, model, url, key, req.request_id,
+			                                            req.max_retries, req.retry_backoff_ms,
+			                                            req.retry_multiplier, req.retry_max_backoff_ms);
 			result.explanation = "Generated by OpenAI-compatible provider (" + std::string(model) + ")";
 			result.provider_used = "openai";
 			break;
@@ -687,7 +689,9 @@ NL2SQLResult NL2SQL_Converter::convert(const NL2SQLRequest& req) {
 			      ? config.provider_url
 			      : "https://api.anthropic.com/v1/messages";
 			model = config.provider_model ? config.provider_model : "claude-3-haiku";
-			raw_sql = call_generic_anthropic(prompt, model, url, key, req.request_id);
+			raw_sql = call_generic_anthropic_with_retry(prompt, model, url, key, req.request_id,
+			                                               req.max_retries, req.retry_backoff_ms,
+			                                               req.retry_multiplier, req.retry_max_backoff_ms);
 			result.explanation = "Generated by Anthropic-compatible provider (" + std::string(model) + ")";
 			result.provider_used = "anthropic";
 			break;