proxysql/include/LLM_Bridge.h

/**
 * @file llm_bridge.h
 * @brief Generic LLM Bridge for ProxySQL
 *
 * The LLM_Bridge class provides a generic interface to Large Language Models
 * using multiple LLM providers with hybrid deployment and vector-based
 * semantic caching.
 *
 * Key Features:
 * - Multi-provider LLM support (local + generic cloud)
 * - Semantic similarity caching using sqlite-vec
 * - Generic prompt handling (not SQL-specific)
 * - Configurable model selection based on latency/budget
 * - Generic provider support (OpenAI-compatible, Anthropic-compatible)
 *
 * @date 2025-01-17
 * @version 1.0.0
 *
 * Example Usage:
 * @code
 * LLMRequest req;
 * req.prompt = "Summarize this data...";
 * LLMResult result = bridge->process(req);
 * std::cout << result.text_response << std::endl;
 * @endcode
 */

#ifndef __CLASS_LLM_BRIDGE_H
#define __CLASS_LLM_BRIDGE_H

#ifdef PROXYSQLGENAI

#define LLM_BRIDGE_VERSION "1.0.0"

#include "proxysql.h"
#include <string>
#include <vector>

// Forward declarations
class SQLite3DB;

/**
 * @brief Result structure for LLM bridge processing
 *
 * Contains the LLM text response along with metadata including
 * cache status, error details, and performance timing.
 *
 * @note When errors occur, error_code, error_details, and http_status_code
 *       provide diagnostic information for troubleshooting.
 */
struct LLMResult {
	std::string text_response;                 ///< LLM-generated text response
	std::string explanation;                   ///< Which model generated this
	bool cached;                                ///< True if from semantic cache
	int64_t cache_id;                           ///< Cache entry ID for tracking

	// Error details - populated when processing fails
	std::string error_code;                     ///< Structured error code (e.g., "ERR_API_KEY_MISSING")
	std::string error_details;                  ///< Detailed error context with query, provider, URL
	int http_status_code;                       ///< HTTP status code if applicable (0 if N/A)
	std::string provider_used;                  ///< Which provider was attempted

	// Performance timing information
	int total_time_ms;                          ///< Total processing time in milliseconds
	int cache_lookup_time_ms;                   ///< Cache lookup time in milliseconds
	int cache_store_time_ms;                    ///< Cache store time in milliseconds
	int llm_call_time_ms;                       ///< LLM call time in milliseconds
	bool cache_hit;                             ///< True if cache was hit

	LLMResult() : cached(false), cache_id(0), http_status_code(0),
	             total_time_ms(0), cache_lookup_time_ms(0), cache_store_time_ms(0),
	             llm_call_time_ms(0), cache_hit(false) {}
};

/**
 * @brief Request structure for LLM bridge processing
 *
 * Contains the prompt text and context for LLM processing.
 *
 * @note If max_latency_ms is set and < 500ms, the system will prefer
 *       local Ollama regardless of provider preference.
 */
struct LLMRequest {
	std::string prompt;                         ///< Prompt text for LLM
	std::string system_message;                 ///< Optional system role message
	std::string schema_name;                    ///< Optional schema/database context
	int max_latency_ms;                         ///< Max acceptable latency (ms)
	bool allow_cache;                           ///< Enable semantic cache lookup

	// Request tracking for correlation and debugging
	std::string request_id;                     ///< Unique ID for this request (UUID-like)

	// Retry configuration for transient failures
	int max_retries;                            ///< Maximum retry attempts (default: 3)
	int retry_backoff_ms;                       ///< Initial backoff in ms (default: 1000)
	double retry_multiplier;                    ///< Backoff multiplier (default: 2.0)
	int retry_max_backoff_ms;                   ///< Maximum backoff in ms (default: 30000)

	LLMRequest() : max_latency_ms(0), allow_cache(true),
	              max_retries(3), retry_backoff_ms(1000),
	              retry_multiplier(2.0), retry_max_backoff_ms(30000) {
		// Generate UUID-like request ID for correlation
		char uuid[64];
		snprintf(uuid, sizeof(uuid), "%08lx-%04x-%04x-%04x-%012lx",
		         (unsigned long)rand(), (unsigned)rand() & 0xffff,
		         (unsigned)rand() & 0xffff, (unsigned)rand() & 0xffff,
		         (unsigned long)rand() & 0xffffffffffff);
		request_id = uuid;
	}
};

/**
 * @brief Error codes for LLM bridge processing
 *
 * Structured error codes that provide machine-readable error information
 * for programmatic handling and user-friendly error messages.
 *
 * Error codes are strings that can be used for:
 * - Conditional logic (switch on error type)
 * - Logging and monitoring
 * - User error messages
 *
 * @see llm_error_code_to_string()
 */
enum class LLMErrorCode {
	SUCCESS = 0,                      ///< No error
	ERR_API_KEY_MISSING,              ///< API key not configured
	ERR_API_KEY_INVALID,              ///< API key format is invalid
	ERR_TIMEOUT,                      ///< Request timed out
	ERR_CONNECTION_FAILED,            ///< Network connection failed
	ERR_RATE_LIMITED,                 ///< Rate limited by provider (HTTP 429)
	ERR_SERVER_ERROR,                 ///< Server error (HTTP 5xx)
	ERR_EMPTY_RESPONSE,               ///< Empty response from LLM
	ERR_INVALID_RESPONSE,             ///< Malformed response from LLM
	ERR_VALIDATION_FAILED,            ///< Input validation failed
	ERR_UNKNOWN_PROVIDER,             ///< Invalid provider name
	ERR_REQUEST_TOO_LARGE             ///< Request exceeds size limit
};

/**
 * @brief Convert error code enum to string representation
 *
 * Returns the string representation of an error code for logging
 * and display purposes.
 *
 * @param code The error code to convert
 * @return String representation of the error code
 */
const char* llm_error_code_to_string(LLMErrorCode code);

/**
 * @brief Model provider format types for LLM bridge
 *
 * Defines the API format to use for generic providers:
 * - GENERIC_OPENAI: Any OpenAI-compatible endpoint (including Ollama)
 * - GENERIC_ANTHROPIC: Any Anthropic-compatible endpoint
 * - FALLBACK_ERROR: No model available (error state)
 *
 * @note For all providers, URL and API key are configured via variables.
 *       Ollama can be used via its OpenAI-compatible endpoint at /v1/chat/completions.
 *
 * @note Missing API keys will result in error (no automatic fallback).
 */
enum class ModelProvider {
	GENERIC_OPENAI,    ///< Any OpenAI-compatible endpoint (configurable URL)
	GENERIC_ANTHROPIC, ///< Any Anthropic-compatible endpoint (configurable URL)
	FALLBACK_ERROR     ///< No model available (error state)
};

/**
 * @brief Generic LLM Bridge class
 *
 * Processes prompts using LLMs with hybrid local/cloud model support
 * and vector cache.
 *
 * Architecture:
 * - Vector cache for semantic similarity (sqlite-vec)
 * - Model selection based on latency/budget
 * - Generic HTTP client (libcurl) supporting multiple API formats
 * - Generic prompt handling (not tied to SQL)
 *
 * Configuration Variables:
 * - genai_llm_provider: "ollama", "openai", or "anthropic"
 * - genai_llm_provider_url: Custom endpoint URL (for generic providers)
 * - genai_llm_provider_model: Model name
 * - genai_llm_provider_key: API key (optional for local)
 *
 * Thread Safety:
 * - This class is NOT thread-safe by itself
 * - External locking must be provided by AI_Features_Manager
 *
 * @see AI_Features_Manager, LLMRequest, LLMResult
 */
class LLM_Bridge {
private:
	struct {
		bool enabled;
		char* provider;                 ///< "openai" or "anthropic"
		char* provider_url;             ///< Generic endpoint URL
		char* provider_model;           ///< Model name
		char* provider_key;             ///< API key
		int cache_similarity_threshold;
		int timeout_ms;
	} config;

	SQLite3DB* vector_db;

	// Internal methods
	std::string build_prompt(const LLMRequest& req);
	std::string call_generic_openai(const std::string& prompt, const std::string& model,
	                                 const std::string& url, const char* key,
	                                 const std::string& req_id = "");
	std::string call_generic_anthropic(const std::string& prompt, const std::string& model,
	                                    const std::string& url, const char* key,
	                                    const std::string& req_id = "");
	// Retry wrapper methods
	std::string call_generic_openai_with_retry(const std::string& prompt, const std::string& model,
	                                            const std::string& url, const char* key,
	                                            const std::string& req_id,
	                                            int max_retries, int initial_backoff_ms,
	                                            double backoff_multiplier, int max_backoff_ms);
	std::string call_generic_anthropic_with_retry(const std::string& prompt, const std::string& model,
	                                               const std::string& url, const char* key,
	                                               const std::string& req_id,
	                                               int max_retries, int initial_backoff_ms,
	                                               double backoff_multiplier, int max_backoff_ms);
	LLMResult check_cache(const LLMRequest& req);
	void store_in_cache(const LLMRequest& req, const LLMResult& result);
	ModelProvider select_model(const LLMRequest& req);
	std::vector<float> get_text_embedding(const std::string& text);

public:
	/**
	 * @brief Constructor - initializes with default configuration
	 *
	 * Sets up default values:
	 * - provider: "openai"
	 * - provider_url: "http://localhost:11434/v1/chat/completions" (Ollama default)
	 * - provider_model: "llama3.2"
	 * - cache_similarity_threshold: 85
	 * - timeout_ms: 30000
	 */
	LLM_Bridge();

	/**
	 * @brief Destructor - frees allocated resources
	 */
	~LLM_Bridge();

	/**
	 * @brief Initialize the LLM bridge
	 *
	 * Initializes vector DB connection and validates configuration.
	 * The vector_db will be provided by AI_Features_Manager.
	 *
	 * @return 0 on success, non-zero on failure
	 */
	int init();

	/**
	 * @brief Shutdown the LLM bridge
	 *
	 * Closes vector DB connection and cleans up resources.
	 */
	void close();

	/**
	 * @brief Set the vector database for caching
	 *
	 * Sets the vector database instance for semantic similarity caching.
	 * Called by AI_Features_Manager during initialization.
	 *
	 * @param db Pointer to SQLite3DB instance
	 */
	void set_vector_db(SQLite3DB* db) { vector_db = db; }

	/**
	 * @brief Update configuration from AI_Features_Manager
	 *
	 * Copies configuration variables from AI_Features_Manager to internal config.
	 * This is called by AI_Features_Manager when variables change.
	 */
	void update_config(const char* provider, const char* provider_url, const char* provider_model,
	                   const char* provider_key, int cache_threshold, int timeout);

	/**
	 * @brief Process a prompt using the LLM
	 *
	 * This is the main entry point for LLM bridge processing. The flow is:
	 * 1. Check vector cache for semantically similar prompts
	 * 2. Build prompt with optional system message
	 * 3. Select appropriate model (Ollama or generic provider)
	 * 4. Call LLM API
	 * 5. Parse response
	 * 6. Store in vector cache for future use
	 *
	 * @param req LLM request containing prompt and context
	 * @return LLMResult with text response and metadata
	 *
	 * @note This is a synchronous blocking call. For non-blocking behavior,
	 *       use the async interface via MySQL_Session.
	 *
	 * Example:
	 * @code
	 * LLMRequest req;
	 * req.prompt = "Explain this query: SELECT * FROM users";
	 * req.allow_cache = true;
	 * LLMResult result = bridge.process(req);
	 * std::cout << result.text_response << std::endl;
	 * @endcode
	 */
	LLMResult process(const LLMRequest& req);

	/**
	 * @brief Clear the vector cache
	 *
	 * Removes all cached LLM responses from the vector database.
	 * This is useful for testing or when context changes significantly.
	 */
	void clear_cache();

	/**
	 * @brief Get cache statistics
	 *
	 * Returns JSON string with cache metrics:
	 * - entries: Total number of cached responses
	 * - hits: Number of cache hits
	 * - misses: Number of cache misses
	 *
	 * @return JSON string with cache statistics
	 */
	std::string get_cache_stats();
};

#endif /* PROXYSQLGENAI */

#endif // __CLASS_LLM_BRIDGE_H