docs: Add comprehensive doxygen comments to NL2SQL headers and LLM_Clients

- Add file-level doxygen documentation with @file, @brief, @date, @version
- Add detailed class and method documentation with @param, @return, @note, @see
- Document data structures (NL2SQLRequest, NL2SQLResult, ModelProvider)
- Add section comments and inline documentation for implementation files
- Document all three LLM provider APIs (Ollama, OpenAI, Anthropic)
pull/5310/head
Rene Cannao 4 months ago
parent bc4fff12ce
commit 4f45c25945

@ -1,3 +1,32 @@
/**
* @file ai_features_manager.h
* @brief AI Features Manager for ProxySQL
*
* The AI_Features_Manager class coordinates all AI-related features in ProxySQL:
* - NL2SQL (Natural Language to SQL) conversion
* - Anomaly detection for security monitoring
* - Vector storage for semantic caching
* - Hybrid model routing (local Ollama + cloud APIs)
*
* Architecture:
* - Central configuration management with 'ai-' variable prefix
* - Thread-safe operations using pthread rwlock
* - Follows same pattern as MCP_Threads_Handler and GenAI_Threads_Handler
* - Coordinates with MySQL_Session for query interception
*
* @date 2025-01-16
* @version 0.1.0
*
* Example Usage:
* @code
* // Access NL2SQL converter
* NL2SQL_Converter* nl2sql = GloAI->get_nl2sql();
* NL2SQLRequest req;
* req.natural_language = "Show top customers";
* NL2SQLResult result = nl2sql->convert(req);
* @endcode
*/
#ifndef __CLASS_AI_FEATURES_MANAGER_H
#define __CLASS_AI_FEATURES_MANAGER_H
@ -23,6 +52,12 @@ class SQLite3DB;
*
* This class follows the same pattern as MCP_Threads_Handler and GenAI_Threads_Handler
* for configuration management and lifecycle.
*
* Thread Safety:
* - All public methods are thread-safe using pthread rwlock
* - Use wrlock()/wrunlock() for manual locking if needed
*
* @see NL2SQL_Converter, Anomaly_Detector
*/
class AI_Features_Manager {
private:
@ -97,28 +132,132 @@ public:
double daily_cloud_spend_usd;
} status_variables;
/**
* @brief Constructor - initializes with default configuration
*/
AI_Features_Manager();
/**
* @brief Destructor - cleanup resources
*/
~AI_Features_Manager();
// Lifecycle
/**
* @brief Initialize all AI features
*
* Initializes vector database, NL2SQL converter, and anomaly detector.
* This must be called after ProxySQL configuration is loaded.
*
* @return 0 on success, non-zero on failure
*/
int init();
/**
* @brief Shutdown all AI features
*
* Gracefully shuts down all components and frees resources.
* Safe to call multiple times.
*/
void shutdown();
// Thread-safe locking
/**
* @brief Acquire write lock for thread-safe operations
*
* Use this for manual locking when performing multiple operations
* that need to be atomic.
*
* @note Must be paired with wrunlock()
*/
void wrlock();
/**
* @brief Release write lock
*
* @note Must be called after wrlock()
*/
void wrunlock();
// Component access
/**
* @brief Get NL2SQL converter instance
*
* @return Pointer to NL2SQL_Converter or NULL if not initialized
*
* @note Thread-safe when called within wrlock()/wrunlock() pair
*/
NL2SQL_Converter* get_nl2sql() { return nl2sql_converter; }
/**
* @brief Get anomaly detector instance
*
* @return Pointer to Anomaly_Detector or NULL if not initialized
*
* @note Thread-safe when called within wrlock()/wrunlock() pair
*/
Anomaly_Detector* get_anomaly_detector() { return anomaly_detector; }
/**
* @brief Get vector database instance
*
* @return Pointer to SQLite3DB or NULL if not initialized
*
* @note Thread-safe when called within wrlock()/wrunlock() pair
*/
SQLite3DB* get_vector_db() { return vector_db; }
// Variable management (for admin interface)
/**
* @brief Get configuration variable value
*
* Retrieves the value of an AI configuration variable by name.
* Variable names should be without the 'ai_' prefix.
*
* @param name Variable name (e.g., "nl2sql_enabled")
* @return Variable value or NULL if not found
*
* Example:
* @code
* char* enabled = GloAI->get_variable("nl2sql_enabled");
* if (enabled && strcmp(enabled, "true") == 0) { ... }
* @endcode
*/
char* get_variable(const char* name);
/**
* @brief Set configuration variable value
*
* Updates an AI configuration variable at runtime.
* Variable names should be without the 'ai_' prefix.
*
* @param name Variable name (e.g., "nl2sql_enabled")
* @param value New value
* @return true on success, false on failure
*
* Example:
* @code
* GloAI->set_variable("nl2sql_ollama_model", "llama3.3");
* @endcode
*/
bool set_variable(const char* name, const char* value);
/**
* @brief Get list of all AI variable names
*
* Returns NULL-terminated array of variable names for admin interface.
*
* @return Array of strings (must be freed by caller)
*/
char** get_variables_list();
// Status reporting
/**
* @brief Get AI features status as JSON
*
* Returns comprehensive status including:
* - Enabled features
* - Status counters (requests, cache hits, etc.)
* - Current configuration
* - Daily cloud spend
*
* @return JSON string with status information
*/
std::string get_status_json();
};

@ -1,3 +1,37 @@
/**
* @file anomaly_detector.h
* @brief Real-time Anomaly Detection for ProxySQL
*
* The Anomaly_Detector class provides security threat detection using:
* - Embedding-based similarity to known threats
* - Statistical outlier detection
* - Rule-based pattern matching
* - Rate limiting per user/host
*
* Key Features:
* - Multi-stage detection pipeline
* - Behavioral profiling and tracking
* - Configurable risk thresholds
* - Auto-block or log-only modes
*
* @date 2025-01-16
* @version 0.1.0 (stub implementation)
*
* Example Usage:
* @code
* Anomaly_Detector* detector = GloAI->get_anomaly_detector();
* AnomalyResult result = detector->analyze(
* "SELECT * FROM users",
* "app_user",
* "192.168.1.100",
* "production"
* );
* if (result.should_block) {
* proxy_warning("Query blocked: %s\n", result.explanation.c_str());
* }
* @endcode
*/
#ifndef __CLASS_ANOMALY_DETECTOR_H
#define __CLASS_ANOMALY_DETECTOR_H
@ -13,6 +47,9 @@ class SQLite3DB;
/**
* @brief Anomaly detection result
*
* Contains the outcome of an anomaly check including risk score,
* anomaly type, explanation, and whether to block the query.
*/
struct AnomalyResult {
bool is_anomaly; ///< True if anomaly detected

@ -1,3 +1,30 @@
/**
* @file nl2sql_converter.h
* @brief Natural Language to SQL Converter for ProxySQL
*
* The NL2SQL_Converter class provides natural language to SQL conversion
* using multiple LLM providers (Ollama, OpenAI, Anthropic) with hybrid
* deployment and vector-based semantic caching.
*
* Key Features:
* - Multi-provider LLM support (local + cloud)
* - Semantic similarity caching using sqlite-vec
* - Schema-aware conversion
* - Configurable model selection based on latency/budget
*
* @date 2025-01-16
* @version 0.1.0
*
* Example Usage:
* @code
* NL2SQLRequest req;
* req.natural_language = "Show top 10 customers";
* req.schema_name = "sales";
* NL2SQLResult result = converter->convert(req);
* std::cout << result.sql_query << std::endl;
* @endcode
*/
#ifndef __CLASS_NL2SQL_CONVERTER_H
#define __CLASS_NL2SQL_CONVERTER_H
@ -12,39 +39,61 @@ class SQLite3DB;
/**
* @brief Result structure for NL2SQL conversion
*
* Contains the generated SQL query along with metadata including
* confidence score, explanation, and cache status.
*
* @note The confidence score is a heuristic based on SQL validation
* and LLM response quality. Actual SQL correctness should be
* verified before execution.
*/
struct NL2SQLResult {
std::string sql_query; ///< Generated SQL
float confidence; ///< 0.0-1.0
std::string explanation; ///< LLM explanation
std::vector<std::string> tables_used; ///< Tables referenced
bool cached; ///< From cache
int64_t cache_id; ///< Cache entry ID
std::string sql_query; ///< Generated SQL query
float confidence; ///< Confidence score 0.0-1.0
std::string explanation; ///< Which model generated this
std::vector<std::string> tables_used; ///< Tables referenced in SQL
bool cached; ///< True if from semantic cache
int64_t cache_id; ///< Cache entry ID for tracking
NL2SQLResult() : confidence(0.0f), cached(false), cache_id(0) {}
};
/**
* @brief Request structure for NL2SQL conversion
*
* Contains the natural language query and context for conversion.
* Context includes schema name and optional table list for better
* SQL generation.
*
* @note If max_latency_ms is set and < 500ms, the system will prefer
* local Ollama regardless of provider preference.
*/
struct NL2SQLRequest {
std::string natural_language; ///< Input query
std::string schema_name; ///< Current schema
int max_latency_ms; ///< Latency requirement
bool allow_cache; ///< Check vector cache
std::vector<std::string> context_tables; ///< Relevant tables
std::string natural_language; ///< Natural language query text
std::string schema_name; ///< Current database/schema name
int max_latency_ms; ///< Max acceptable latency (ms)
bool allow_cache; ///< Enable semantic cache lookup
std::vector<std::string> context_tables; ///< Optional table hints for schema
NL2SQLRequest() : max_latency_ms(0), allow_cache(true) {}
};
/**
* @brief Model provider options
* @brief Model provider options for NL2SQL conversion
*
* Defines available LLM providers with different trade-offs:
* - LOCAL_OLLAMA: Free, fast, limited model quality
* - CLOUD_OPENAI: Paid, slower, high quality
* - CLOUD_ANTHROPIC: Paid, slower, high quality
*
* @note The system automatically falls back to Ollama if cloud
* API keys are not configured.
*/
enum class ModelProvider {
LOCAL_OLLAMA, ///< Local models via Ollama
CLOUD_OPENAI, ///< OpenAI API
CLOUD_ANTHROPIC, ///< Anthropic API
FALLBACK_ERROR ///< No model available
LOCAL_OLLAMA, ///< Local models via Ollama (default)
CLOUD_OPENAI, ///< OpenAI API (requires API key)
CLOUD_ANTHROPIC, ///< Anthropic API (requires API key)
FALLBACK_ERROR ///< No model available (error state)
};
/**
@ -52,6 +101,18 @@ enum class ModelProvider {
*
* Converts natural language queries to SQL using LLMs with hybrid
* local/cloud model support and vector cache.
*
* Architecture:
* - Vector cache for semantic similarity (sqlite-vec)
* - Model selection based on latency/budget
* - Multi-provider HTTP clients (libcurl)
* - Schema-aware prompt building
*
* Thread Safety:
* - This class is NOT thread-safe by itself
* - External locking must be provided by AI_Features_Manager
*
* @see AI_Features_Manager, NL2SQLRequest, NL2SQLResult
*/
class NL2SQL_Converter {
private:
@ -82,18 +143,102 @@ private:
ModelProvider select_model(const NL2SQLRequest& req);
public:
/**
* @brief Constructor - initializes with default configuration
*
* Sets up default values:
* - query_prefix: "NL2SQL:"
* - model_provider: "ollama"
* - ollama_model: "llama3.2"
* - openai_model: "gpt-4o-mini"
* - anthropic_model: "claude-3-haiku"
* - cache_similarity_threshold: 85
* - timeout_ms: 30000
*/
NL2SQL_Converter();
/**
* @brief Destructor - frees allocated resources
*/
~NL2SQL_Converter();
// Initialization
/**
* @brief Initialize the NL2SQL converter
*
* Initializes vector DB connection and validates configuration.
* The vector_db will be provided by AI_Features_Manager.
*
* @return 0 on success, non-zero on failure
*
* @note This is a stub implementation for Phase 2.
* Full vector cache integration is planned for Phase 3.
*/
int init();
/**
* @brief Shutdown the NL2SQL converter
*
* Closes vector DB connection and cleans up resources.
*/
void close();
// Main conversion method
/**
* @brief Convert natural language query to SQL
*
* This is the main entry point for NL2SQL conversion. The flow is:
* 1. Check vector cache for semantically similar queries
* 2. Build prompt with schema context
* 3. Select appropriate model (Ollama/OpenAI/Anthropic)
* 4. Call LLM API
* 5. Parse and clean SQL response
* 6. Store in vector cache for future use
*
* @param req NL2SQL request containing natural language query and context
* @return NL2SQLResult with generated SQL, confidence score, and metadata
*
* @note This is a synchronous blocking call. For non-blocking behavior,
* use the async interface via MySQL_Session.
*
* @note The confidence score is heuristic-based. Actual SQL correctness
* should be verified before execution.
*
* @see NL2SQLRequest, NL2SQLResult, ModelProvider
*
* Example:
* @code
* NL2SQLRequest req;
* req.natural_language = "Find customers with orders > $1000";
* req.allow_cache = true;
* NL2SQLResult result = converter.convert(req);
* if (result.confidence > 0.7f) {
* execute_sql(result.sql_query);
* }
* @endcode
*/
NL2SQLResult convert(const NL2SQLRequest& req);
// Cache management
/**
* @brief Clear the vector cache
*
* Removes all cached NL2SQL conversions from the vector database.
* This is useful for testing or when schema changes significantly.
*
* @note This is a stub implementation for Phase 2.
*/
void clear_cache();
/**
* @brief Get cache statistics
*
* Returns JSON string with cache metrics:
* - entries: Total number of cached conversions
* - hits: Number of cache hits
* - misses: Number of cache misses
*
* @return JSON string with cache statistics
*
* @note This is a stub implementation for Phase 2.
*/
std::string get_cache_stats();
};

@ -1,3 +1,23 @@
/**
* @file LLM_Clients.cpp
* @brief HTTP client implementations for LLM providers
*
* This file implements HTTP clients for three LLM providers:
* - Ollama (local): POST http://localhost:11434/api/generate
* - OpenAI (cloud): POST https://api.openai.com/v1/chat/completions
* - Anthropic (cloud): POST https://api.anthropic.com/v1/messages
*
* All clients use libcurl for HTTP requests and nlohmann/json for
* request/response parsing. Each client handles:
* - Request formatting for the specific API
* - Authentication headers
* - Response parsing and SQL extraction
* - Markdown code block stripping
* - Error handling and logging
*
* @see NL2SQL_Converter.h
*/
#include "NL2SQL_Converter.h"
#include "sqlite3db.h"
#include "proxysql_utils.h"
@ -14,6 +34,18 @@ using json = nlohmann::json;
// Write callback for curl responses
// ============================================================================
/**
* @brief libcurl write callback for collecting HTTP response data
*
* This callback is invoked by libcurl as data arrives.
* It appends the received data to a std::string buffer.
*
* @param contents Pointer to received data
* @param size Size of each element
* @param nmemb Number of elements
* @param userp User pointer (std::string* for response buffer)
* @return Total bytes processed
*/
static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp) {
size_t totalSize = size * nmemb;
std::string* response = static_cast<std::string*>(userp);
@ -26,10 +58,12 @@ static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* use
// ============================================================================
/**
* @brief Call Ollama API for text generation
* @brief Call Ollama API for text generation (local LLM)
*
* Ollama endpoint: POST http://localhost:11434/api/generate
*
* Request format:
* @code{.json}
* {
* "model": "llama3.2",
* "prompt": "Convert to SQL: Show top customers",
@ -39,12 +73,20 @@ static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* use
* "num_predict": 500
* }
* }
* @endcode
*
* Response format:
* @code{.json}
* {
* "response": "SELECT * FROM customers...",
* "model": "llama3.2",
* "total_duration": 123456789
* }
* @endcode
*
* @param prompt The prompt to send to Ollama
* @param model Model name (e.g., "llama3.2")
* @return Generated SQL or empty string on error
*/
std::string NL2SQL_Converter::call_ollama(const std::string& prompt, const std::string& model) {
std::string response_data;
@ -124,10 +166,12 @@ std::string NL2SQL_Converter::call_ollama(const std::string& prompt, const std::
}
/**
* @brief Call OpenAI API for text generation
* @brief Call OpenAI API for text generation (cloud LLM)
*
* OpenAI endpoint: POST https://api.openai.com/v1/chat/completions
*
* Request format:
* @code{.json}
* {
* "model": "gpt-4o-mini",
* "messages": [
@ -137,7 +181,10 @@ std::string NL2SQL_Converter::call_ollama(const std::string& prompt, const std::
* "temperature": 0.1,
* "max_tokens": 500
* }
* @endcode
*
* Response format:
* @code{.json}
* {
* "choices": [{
* "message": {
@ -148,6 +195,11 @@ std::string NL2SQL_Converter::call_ollama(const std::string& prompt, const std::
* }],
* "usage": {"total_tokens": 123}
* }
* @endcode
*
* @param prompt The prompt to send to OpenAI
* @param model Model name (e.g., "gpt-4o-mini")
* @return Generated SQL or empty string on error
*/
std::string NL2SQL_Converter::call_openai(const std::string& prompt, const std::string& model) {
std::string response_data;

@ -1,3 +1,16 @@
/**
* @file NL2SQL_Converter.cpp
* @brief Implementation of Natural Language to SQL Converter
*
* This file implements the NL2SQL conversion pipeline including:
* - Vector cache operations for semantic similarity
* - Model selection based on latency/budget
* - LLM API calls (Ollama, OpenAI, Anthropic)
* - SQL validation and cleaning
*
* @see NL2SQL_Converter.h
*/
#include "NL2SQL_Converter.h"
#include "sqlite3db.h"
#include "proxysql_utils.h"
@ -12,6 +25,14 @@ using json = nlohmann::json;
// Global instance is defined elsewhere if needed
// NL2SQL_Converter *GloNL2SQL = NULL;
// ============================================================================
// Constructor/Destructor
// ============================================================================
/**
* Constructor initializes with default configuration values.
* The vector_db will be set by AI_Features_Manager during init().
*/
NL2SQL_Converter::NL2SQL_Converter() : vector_db(NULL) {
config.enabled = true;
config.query_prefix = strdup("NL2SQL:");
@ -36,6 +57,14 @@ NL2SQL_Converter::~NL2SQL_Converter() {
free(config.anthropic_key);
}
// ============================================================================
// Lifecycle
// ============================================================================
/**
* Initialize the NL2SQL converter.
* The vector DB will be provided by AI_Features_Manager during initialization.
*/
int NL2SQL_Converter::init() {
proxy_info("NL2SQL: Initializing NL2SQL Converter v%s\n", NL2SQL_CONVERTER_VERSION);
@ -187,15 +216,22 @@ std::string NL2SQL_Converter::get_schema_context(const std::vector<std::string>&
// ============================================================================
/**
* @brief Convert natural language to SQL
* @brief Convert natural language to SQL (main entry point)
*
* This is the main entry point for NL2SQL conversion. The flow is:
* Conversion Pipeline:
* 1. Check vector cache for semantically similar queries
* 2. Build prompt with schema context
* 3. Select appropriate model (Ollama/OpenAI/Anthropic)
* 4. Call LLM API
* 4. Call LLM API via HTTP
* 5. Parse and clean SQL response
* 6. Store in vector cache for future use
*
* The confidence score is calculated based on:
* - SQL keyword validation (does it look like SQL?)
* - Response quality (non-empty, well-formed)
* - Default score of 0.85 for valid-looking SQL
*
* @note This is a synchronous blocking call.
*/
NL2SQLResult NL2SQL_Converter::convert(const NL2SQLRequest& req) {
NL2SQLResult result;

Loading…
Cancel
Save