diff --git a/include/AI_Features_Manager.h b/include/AI_Features_Manager.h index 68693cb63..c240737ff 100644 --- a/include/AI_Features_Manager.h +++ b/include/AI_Features_Manager.h @@ -1,3 +1,32 @@ +/** + * @file ai_features_manager.h + * @brief AI Features Manager for ProxySQL + * + * The AI_Features_Manager class coordinates all AI-related features in ProxySQL: + * - NL2SQL (Natural Language to SQL) conversion + * - Anomaly detection for security monitoring + * - Vector storage for semantic caching + * - Hybrid model routing (local Ollama + cloud APIs) + * + * Architecture: + * - Central configuration management with 'ai-' variable prefix + * - Thread-safe operations using pthread rwlock + * - Follows same pattern as MCP_Threads_Handler and GenAI_Threads_Handler + * - Coordinates with MySQL_Session for query interception + * + * @date 2025-01-16 + * @version 0.1.0 + * + * Example Usage: + * @code + * // Access NL2SQL converter + * NL2SQL_Converter* nl2sql = GloAI->get_nl2sql(); + * NL2SQLRequest req; + * req.natural_language = "Show top customers"; + * NL2SQLResult result = nl2sql->convert(req); + * @endcode + */ + #ifndef __CLASS_AI_FEATURES_MANAGER_H #define __CLASS_AI_FEATURES_MANAGER_H @@ -23,6 +52,12 @@ class SQLite3DB; * * This class follows the same pattern as MCP_Threads_Handler and GenAI_Threads_Handler * for configuration management and lifecycle. + * + * Thread Safety: + * - All public methods are thread-safe using pthread rwlock + * - Use wrlock()/wrunlock() for manual locking if needed + * + * @see NL2SQL_Converter, Anomaly_Detector */ class AI_Features_Manager { private: @@ -97,28 +132,132 @@ public: double daily_cloud_spend_usd; } status_variables; + /** + * @brief Constructor - initializes with default configuration + */ AI_Features_Manager(); + + /** + * @brief Destructor - cleanup resources + */ ~AI_Features_Manager(); - // Lifecycle + /** + * @brief Initialize all AI features + * + * Initializes vector database, NL2SQL converter, and anomaly detector. + * This must be called after ProxySQL configuration is loaded. + * + * @return 0 on success, non-zero on failure + */ int init(); + + /** + * @brief Shutdown all AI features + * + * Gracefully shuts down all components and frees resources. + * Safe to call multiple times. + */ void shutdown(); - // Thread-safe locking + /** + * @brief Acquire write lock for thread-safe operations + * + * Use this for manual locking when performing multiple operations + * that need to be atomic. + * + * @note Must be paired with wrunlock() + */ void wrlock(); + + /** + * @brief Release write lock + * + * @note Must be called after wrlock() + */ void wrunlock(); - // Component access + /** + * @brief Get NL2SQL converter instance + * + * @return Pointer to NL2SQL_Converter or NULL if not initialized + * + * @note Thread-safe when called within wrlock()/wrunlock() pair + */ NL2SQL_Converter* get_nl2sql() { return nl2sql_converter; } + + /** + * @brief Get anomaly detector instance + * + * @return Pointer to Anomaly_Detector or NULL if not initialized + * + * @note Thread-safe when called within wrlock()/wrunlock() pair + */ Anomaly_Detector* get_anomaly_detector() { return anomaly_detector; } + + /** + * @brief Get vector database instance + * + * @return Pointer to SQLite3DB or NULL if not initialized + * + * @note Thread-safe when called within wrlock()/wrunlock() pair + */ SQLite3DB* get_vector_db() { return vector_db; } - // Variable management (for admin interface) + /** + * @brief Get configuration variable value + * + * Retrieves the value of an AI configuration variable by name. + * Variable names should be without the 'ai_' prefix. + * + * @param name Variable name (e.g., "nl2sql_enabled") + * @return Variable value or NULL if not found + * + * Example: + * @code + * char* enabled = GloAI->get_variable("nl2sql_enabled"); + * if (enabled && strcmp(enabled, "true") == 0) { ... } + * @endcode + */ char* get_variable(const char* name); + + /** + * @brief Set configuration variable value + * + * Updates an AI configuration variable at runtime. + * Variable names should be without the 'ai_' prefix. + * + * @param name Variable name (e.g., "nl2sql_enabled") + * @param value New value + * @return true on success, false on failure + * + * Example: + * @code + * GloAI->set_variable("nl2sql_ollama_model", "llama3.3"); + * @endcode + */ bool set_variable(const char* name, const char* value); + + /** + * @brief Get list of all AI variable names + * + * Returns NULL-terminated array of variable names for admin interface. + * + * @return Array of strings (must be freed by caller) + */ char** get_variables_list(); - // Status reporting + /** + * @brief Get AI features status as JSON + * + * Returns comprehensive status including: + * - Enabled features + * - Status counters (requests, cache hits, etc.) + * - Current configuration + * - Daily cloud spend + * + * @return JSON string with status information + */ std::string get_status_json(); }; diff --git a/include/Anomaly_Detector.h b/include/Anomaly_Detector.h index 66ed023c8..8b52fe115 100644 --- a/include/Anomaly_Detector.h +++ b/include/Anomaly_Detector.h @@ -1,3 +1,37 @@ +/** + * @file anomaly_detector.h + * @brief Real-time Anomaly Detection for ProxySQL + * + * The Anomaly_Detector class provides security threat detection using: + * - Embedding-based similarity to known threats + * - Statistical outlier detection + * - Rule-based pattern matching + * - Rate limiting per user/host + * + * Key Features: + * - Multi-stage detection pipeline + * - Behavioral profiling and tracking + * - Configurable risk thresholds + * - Auto-block or log-only modes + * + * @date 2025-01-16 + * @version 0.1.0 (stub implementation) + * + * Example Usage: + * @code + * Anomaly_Detector* detector = GloAI->get_anomaly_detector(); + * AnomalyResult result = detector->analyze( + * "SELECT * FROM users", + * "app_user", + * "192.168.1.100", + * "production" + * ); + * if (result.should_block) { + * proxy_warning("Query blocked: %s\n", result.explanation.c_str()); + * } + * @endcode + */ + #ifndef __CLASS_ANOMALY_DETECTOR_H #define __CLASS_ANOMALY_DETECTOR_H @@ -13,6 +47,9 @@ class SQLite3DB; /** * @brief Anomaly detection result + * + * Contains the outcome of an anomaly check including risk score, + * anomaly type, explanation, and whether to block the query. */ struct AnomalyResult { bool is_anomaly; ///< True if anomaly detected diff --git a/include/NL2SQL_Converter.h b/include/NL2SQL_Converter.h index 0fa70d7b8..7adb85259 100644 --- a/include/NL2SQL_Converter.h +++ b/include/NL2SQL_Converter.h @@ -1,3 +1,30 @@ +/** + * @file nl2sql_converter.h + * @brief Natural Language to SQL Converter for ProxySQL + * + * The NL2SQL_Converter class provides natural language to SQL conversion + * using multiple LLM providers (Ollama, OpenAI, Anthropic) with hybrid + * deployment and vector-based semantic caching. + * + * Key Features: + * - Multi-provider LLM support (local + cloud) + * - Semantic similarity caching using sqlite-vec + * - Schema-aware conversion + * - Configurable model selection based on latency/budget + * + * @date 2025-01-16 + * @version 0.1.0 + * + * Example Usage: + * @code + * NL2SQLRequest req; + * req.natural_language = "Show top 10 customers"; + * req.schema_name = "sales"; + * NL2SQLResult result = converter->convert(req); + * std::cout << result.sql_query << std::endl; + * @endcode + */ + #ifndef __CLASS_NL2SQL_CONVERTER_H #define __CLASS_NL2SQL_CONVERTER_H @@ -12,39 +39,61 @@ class SQLite3DB; /** * @brief Result structure for NL2SQL conversion + * + * Contains the generated SQL query along with metadata including + * confidence score, explanation, and cache status. + * + * @note The confidence score is a heuristic based on SQL validation + * and LLM response quality. Actual SQL correctness should be + * verified before execution. */ struct NL2SQLResult { - std::string sql_query; ///< Generated SQL - float confidence; ///< 0.0-1.0 - std::string explanation; ///< LLM explanation - std::vector tables_used; ///< Tables referenced - bool cached; ///< From cache - int64_t cache_id; ///< Cache entry ID + std::string sql_query; ///< Generated SQL query + float confidence; ///< Confidence score 0.0-1.0 + std::string explanation; ///< Which model generated this + std::vector tables_used; ///< Tables referenced in SQL + bool cached; ///< True if from semantic cache + int64_t cache_id; ///< Cache entry ID for tracking NL2SQLResult() : confidence(0.0f), cached(false), cache_id(0) {} }; /** * @brief Request structure for NL2SQL conversion + * + * Contains the natural language query and context for conversion. + * Context includes schema name and optional table list for better + * SQL generation. + * + * @note If max_latency_ms is set and < 500ms, the system will prefer + * local Ollama regardless of provider preference. */ struct NL2SQLRequest { - std::string natural_language; ///< Input query - std::string schema_name; ///< Current schema - int max_latency_ms; ///< Latency requirement - bool allow_cache; ///< Check vector cache - std::vector context_tables; ///< Relevant tables + std::string natural_language; ///< Natural language query text + std::string schema_name; ///< Current database/schema name + int max_latency_ms; ///< Max acceptable latency (ms) + bool allow_cache; ///< Enable semantic cache lookup + std::vector context_tables; ///< Optional table hints for schema NL2SQLRequest() : max_latency_ms(0), allow_cache(true) {} }; /** - * @brief Model provider options + * @brief Model provider options for NL2SQL conversion + * + * Defines available LLM providers with different trade-offs: + * - LOCAL_OLLAMA: Free, fast, limited model quality + * - CLOUD_OPENAI: Paid, slower, high quality + * - CLOUD_ANTHROPIC: Paid, slower, high quality + * + * @note The system automatically falls back to Ollama if cloud + * API keys are not configured. */ enum class ModelProvider { - LOCAL_OLLAMA, ///< Local models via Ollama - CLOUD_OPENAI, ///< OpenAI API - CLOUD_ANTHROPIC, ///< Anthropic API - FALLBACK_ERROR ///< No model available + LOCAL_OLLAMA, ///< Local models via Ollama (default) + CLOUD_OPENAI, ///< OpenAI API (requires API key) + CLOUD_ANTHROPIC, ///< Anthropic API (requires API key) + FALLBACK_ERROR ///< No model available (error state) }; /** @@ -52,6 +101,18 @@ enum class ModelProvider { * * Converts natural language queries to SQL using LLMs with hybrid * local/cloud model support and vector cache. + * + * Architecture: + * - Vector cache for semantic similarity (sqlite-vec) + * - Model selection based on latency/budget + * - Multi-provider HTTP clients (libcurl) + * - Schema-aware prompt building + * + * Thread Safety: + * - This class is NOT thread-safe by itself + * - External locking must be provided by AI_Features_Manager + * + * @see AI_Features_Manager, NL2SQLRequest, NL2SQLResult */ class NL2SQL_Converter { private: @@ -82,18 +143,102 @@ private: ModelProvider select_model(const NL2SQLRequest& req); public: + /** + * @brief Constructor - initializes with default configuration + * + * Sets up default values: + * - query_prefix: "NL2SQL:" + * - model_provider: "ollama" + * - ollama_model: "llama3.2" + * - openai_model: "gpt-4o-mini" + * - anthropic_model: "claude-3-haiku" + * - cache_similarity_threshold: 85 + * - timeout_ms: 30000 + */ NL2SQL_Converter(); + + /** + * @brief Destructor - frees allocated resources + */ ~NL2SQL_Converter(); - // Initialization + /** + * @brief Initialize the NL2SQL converter + * + * Initializes vector DB connection and validates configuration. + * The vector_db will be provided by AI_Features_Manager. + * + * @return 0 on success, non-zero on failure + * + * @note This is a stub implementation for Phase 2. + * Full vector cache integration is planned for Phase 3. + */ int init(); + + /** + * @brief Shutdown the NL2SQL converter + * + * Closes vector DB connection and cleans up resources. + */ void close(); - // Main conversion method + /** + * @brief Convert natural language query to SQL + * + * This is the main entry point for NL2SQL conversion. The flow is: + * 1. Check vector cache for semantically similar queries + * 2. Build prompt with schema context + * 3. Select appropriate model (Ollama/OpenAI/Anthropic) + * 4. Call LLM API + * 5. Parse and clean SQL response + * 6. Store in vector cache for future use + * + * @param req NL2SQL request containing natural language query and context + * @return NL2SQLResult with generated SQL, confidence score, and metadata + * + * @note This is a synchronous blocking call. For non-blocking behavior, + * use the async interface via MySQL_Session. + * + * @note The confidence score is heuristic-based. Actual SQL correctness + * should be verified before execution. + * + * @see NL2SQLRequest, NL2SQLResult, ModelProvider + * + * Example: + * @code + * NL2SQLRequest req; + * req.natural_language = "Find customers with orders > $1000"; + * req.allow_cache = true; + * NL2SQLResult result = converter.convert(req); + * if (result.confidence > 0.7f) { + * execute_sql(result.sql_query); + * } + * @endcode + */ NL2SQLResult convert(const NL2SQLRequest& req); - // Cache management + /** + * @brief Clear the vector cache + * + * Removes all cached NL2SQL conversions from the vector database. + * This is useful for testing or when schema changes significantly. + * + * @note This is a stub implementation for Phase 2. + */ void clear_cache(); + + /** + * @brief Get cache statistics + * + * Returns JSON string with cache metrics: + * - entries: Total number of cached conversions + * - hits: Number of cache hits + * - misses: Number of cache misses + * + * @return JSON string with cache statistics + * + * @note This is a stub implementation for Phase 2. + */ std::string get_cache_stats(); }; diff --git a/lib/LLM_Clients.cpp b/lib/LLM_Clients.cpp index 6d124ee07..d40057f13 100644 --- a/lib/LLM_Clients.cpp +++ b/lib/LLM_Clients.cpp @@ -1,3 +1,23 @@ +/** + * @file LLM_Clients.cpp + * @brief HTTP client implementations for LLM providers + * + * This file implements HTTP clients for three LLM providers: + * - Ollama (local): POST http://localhost:11434/api/generate + * - OpenAI (cloud): POST https://api.openai.com/v1/chat/completions + * - Anthropic (cloud): POST https://api.anthropic.com/v1/messages + * + * All clients use libcurl for HTTP requests and nlohmann/json for + * request/response parsing. Each client handles: + * - Request formatting for the specific API + * - Authentication headers + * - Response parsing and SQL extraction + * - Markdown code block stripping + * - Error handling and logging + * + * @see NL2SQL_Converter.h + */ + #include "NL2SQL_Converter.h" #include "sqlite3db.h" #include "proxysql_utils.h" @@ -14,6 +34,18 @@ using json = nlohmann::json; // Write callback for curl responses // ============================================================================ +/** + * @brief libcurl write callback for collecting HTTP response data + * + * This callback is invoked by libcurl as data arrives. + * It appends the received data to a std::string buffer. + * + * @param contents Pointer to received data + * @param size Size of each element + * @param nmemb Number of elements + * @param userp User pointer (std::string* for response buffer) + * @return Total bytes processed + */ static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp) { size_t totalSize = size * nmemb; std::string* response = static_cast(userp); @@ -26,10 +58,12 @@ static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* use // ============================================================================ /** - * @brief Call Ollama API for text generation + * @brief Call Ollama API for text generation (local LLM) * * Ollama endpoint: POST http://localhost:11434/api/generate + * * Request format: + * @code{.json} * { * "model": "llama3.2", * "prompt": "Convert to SQL: Show top customers", @@ -39,12 +73,20 @@ static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* use * "num_predict": 500 * } * } + * @endcode + * * Response format: + * @code{.json} * { * "response": "SELECT * FROM customers...", * "model": "llama3.2", * "total_duration": 123456789 * } + * @endcode + * + * @param prompt The prompt to send to Ollama + * @param model Model name (e.g., "llama3.2") + * @return Generated SQL or empty string on error */ std::string NL2SQL_Converter::call_ollama(const std::string& prompt, const std::string& model) { std::string response_data; @@ -124,10 +166,12 @@ std::string NL2SQL_Converter::call_ollama(const std::string& prompt, const std:: } /** - * @brief Call OpenAI API for text generation + * @brief Call OpenAI API for text generation (cloud LLM) * * OpenAI endpoint: POST https://api.openai.com/v1/chat/completions + * * Request format: + * @code{.json} * { * "model": "gpt-4o-mini", * "messages": [ @@ -137,7 +181,10 @@ std::string NL2SQL_Converter::call_ollama(const std::string& prompt, const std:: * "temperature": 0.1, * "max_tokens": 500 * } + * @endcode + * * Response format: + * @code{.json} * { * "choices": [{ * "message": { @@ -148,6 +195,11 @@ std::string NL2SQL_Converter::call_ollama(const std::string& prompt, const std:: * }], * "usage": {"total_tokens": 123} * } + * @endcode + * + * @param prompt The prompt to send to OpenAI + * @param model Model name (e.g., "gpt-4o-mini") + * @return Generated SQL or empty string on error */ std::string NL2SQL_Converter::call_openai(const std::string& prompt, const std::string& model) { std::string response_data; diff --git a/lib/NL2SQL_Converter.cpp b/lib/NL2SQL_Converter.cpp index dd9e2d00f..e9e26eb4c 100644 --- a/lib/NL2SQL_Converter.cpp +++ b/lib/NL2SQL_Converter.cpp @@ -1,3 +1,16 @@ +/** + * @file NL2SQL_Converter.cpp + * @brief Implementation of Natural Language to SQL Converter + * + * This file implements the NL2SQL conversion pipeline including: + * - Vector cache operations for semantic similarity + * - Model selection based on latency/budget + * - LLM API calls (Ollama, OpenAI, Anthropic) + * - SQL validation and cleaning + * + * @see NL2SQL_Converter.h + */ + #include "NL2SQL_Converter.h" #include "sqlite3db.h" #include "proxysql_utils.h" @@ -12,6 +25,14 @@ using json = nlohmann::json; // Global instance is defined elsewhere if needed // NL2SQL_Converter *GloNL2SQL = NULL; +// ============================================================================ +// Constructor/Destructor +// ============================================================================ + +/** + * Constructor initializes with default configuration values. + * The vector_db will be set by AI_Features_Manager during init(). + */ NL2SQL_Converter::NL2SQL_Converter() : vector_db(NULL) { config.enabled = true; config.query_prefix = strdup("NL2SQL:"); @@ -36,6 +57,14 @@ NL2SQL_Converter::~NL2SQL_Converter() { free(config.anthropic_key); } +// ============================================================================ +// Lifecycle +// ============================================================================ + +/** + * Initialize the NL2SQL converter. + * The vector DB will be provided by AI_Features_Manager during initialization. + */ int NL2SQL_Converter::init() { proxy_info("NL2SQL: Initializing NL2SQL Converter v%s\n", NL2SQL_CONVERTER_VERSION); @@ -187,15 +216,22 @@ std::string NL2SQL_Converter::get_schema_context(const std::vector& // ============================================================================ /** - * @brief Convert natural language to SQL + * @brief Convert natural language to SQL (main entry point) * - * This is the main entry point for NL2SQL conversion. The flow is: + * Conversion Pipeline: * 1. Check vector cache for semantically similar queries * 2. Build prompt with schema context * 3. Select appropriate model (Ollama/OpenAI/Anthropic) - * 4. Call LLM API + * 4. Call LLM API via HTTP * 5. Parse and clean SQL response * 6. Store in vector cache for future use + * + * The confidence score is calculated based on: + * - SQL keyword validation (does it look like SQL?) + * - Response quality (non-empty, well-formed) + * - Default score of 0.85 for valid-looking SQL + * + * @note This is a synchronous blocking call. */ NL2SQLResult NL2SQL_Converter::convert(const NL2SQLRequest& req) { NL2SQLResult result;