mirror of https://github.com/sysown/proxysql
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
450 lines
17 KiB
450 lines
17 KiB
#ifndef __CLASS_GENAI_THREAD_H
|
|
#define __CLASS_GENAI_THREAD_H
|
|
|
|
#ifdef PROXYSQLGENAI
|
|
|
|
#include "proxysql.h"
|
|
#include <vector>
|
|
#include <queue>
|
|
#include <mutex>
|
|
#include <condition_variable>
|
|
#include <atomic>
|
|
#include <unordered_set>
|
|
#include <thread>
|
|
#include <sys/socket.h>
|
|
|
|
#ifdef epoll_create1
|
|
#include <sys/epoll.h>
|
|
#endif
|
|
|
|
#include "curl/curl.h"
|
|
|
|
#define GENAI_THREAD_VERSION "0.1.0"
|
|
|
|
/**
|
|
* @brief GenAI operation types
|
|
*/
|
|
enum GenAI_Operation : uint32_t {
|
|
GENAI_OP_EMBEDDING = 0, ///< Generate embeddings for documents
|
|
GENAI_OP_RERANK = 1, ///< Rerank documents by relevance to query
|
|
GENAI_OP_JSON = 2, ///< Autonomous JSON query processing (handles embed/rerank/document_from_sql)
|
|
GENAI_OP_LLM = 3, ///< Generic LLM bridge processing
|
|
};
|
|
|
|
/**
|
|
* @brief Document structure for passing document data
|
|
*/
|
|
struct GenAI_Document {
|
|
const char* text; ///< Pointer to document text (owned by caller)
|
|
size_t text_size; ///< Length of text in bytes
|
|
|
|
GenAI_Document() : text(nullptr), text_size(0) {}
|
|
GenAI_Document(const char* t, size_t s) : text(t), text_size(s) {}
|
|
};
|
|
|
|
/**
|
|
* @brief Embedding result structure
|
|
*/
|
|
struct GenAI_EmbeddingResult {
|
|
float* data; ///< Pointer to embedding vector
|
|
size_t embedding_size;///< Number of floats per embedding
|
|
size_t count; ///< Number of embeddings
|
|
|
|
GenAI_EmbeddingResult() : data(nullptr), embedding_size(0), count(0) {}
|
|
~GenAI_EmbeddingResult();
|
|
|
|
// Disable copy
|
|
GenAI_EmbeddingResult(const GenAI_EmbeddingResult&) = delete;
|
|
GenAI_EmbeddingResult& operator=(const GenAI_EmbeddingResult&) = delete;
|
|
|
|
// Move semantics
|
|
GenAI_EmbeddingResult(GenAI_EmbeddingResult&& other) noexcept;
|
|
GenAI_EmbeddingResult& operator=(GenAI_EmbeddingResult&& other) noexcept;
|
|
};
|
|
|
|
/**
|
|
* @brief Rerank result structure
|
|
*/
|
|
struct GenAI_RerankResult {
|
|
uint32_t index; ///< Original document index
|
|
float score; ///< Relevance score
|
|
};
|
|
|
|
/**
|
|
* @brief Rerank result array structure
|
|
*/
|
|
struct GenAI_RerankResultArray {
|
|
GenAI_RerankResult* data; ///< Pointer to result array
|
|
size_t count; ///< Number of results
|
|
|
|
GenAI_RerankResultArray() : data(nullptr), count(0) {}
|
|
~GenAI_RerankResultArray();
|
|
|
|
// Disable copy
|
|
GenAI_RerankResultArray(const GenAI_RerankResultArray&) = delete;
|
|
GenAI_RerankResultArray& operator=(const GenAI_RerankResultArray&) = delete;
|
|
|
|
// Move semantics
|
|
GenAI_RerankResultArray(GenAI_RerankResultArray&& other) noexcept;
|
|
GenAI_RerankResultArray& operator=(GenAI_RerankResultArray&& other) noexcept;
|
|
};
|
|
|
|
/**
|
|
* @brief Request structure for internal queue
|
|
*/
|
|
struct GenAI_Request {
|
|
int client_fd; ///< Client file descriptor
|
|
uint64_t request_id; ///< Request ID
|
|
uint32_t operation; ///< Operation type
|
|
std::string query; ///< Query for rerank (empty for embedding)
|
|
uint32_t top_n; ///< Top N results for rerank
|
|
std::vector<GenAI_Document> documents; ///< Documents to process
|
|
std::string json_query; ///< Raw JSON query from client (for autonomous processing)
|
|
};
|
|
|
|
/**
|
|
* @brief Request header for socketpair communication between MySQL_Session and GenAI
|
|
*
|
|
* This structure is sent from MySQL_Session to the GenAI listener via socketpair
|
|
* when making async GenAI requests. It contains all the metadata needed to process
|
|
* the request without blocking the MySQL thread.
|
|
*
|
|
* Communication flow:
|
|
* 1. MySQL_Session creates socketpair()
|
|
* 2. MySQL_Session sends GenAI_RequestHeader + JSON query via its fd
|
|
* 3. GenAI listener reads from socketpair via epoll
|
|
* 4. GenAI worker processes request (blocking curl in worker thread)
|
|
* 5. GenAI worker sends GenAI_ResponseHeader + JSON result back via socketpair
|
|
* 6. MySQL_Session receives response via epoll notification
|
|
*
|
|
* @see GenAI_ResponseHeader
|
|
*/
|
|
struct GenAI_RequestHeader {
|
|
uint64_t request_id; ///< Client's correlation ID for matching requests/responses
|
|
uint32_t operation; ///< Operation type (GENAI_OP_EMBEDDING, GENAI_OP_RERANK, GENAI_OP_JSON)
|
|
uint32_t query_len; ///< Length of JSON query that follows this header (0 if no query)
|
|
uint32_t flags; ///< Reserved for future use (must be 0)
|
|
uint32_t top_n; ///< For rerank operations: maximum number of results to return (0 = all)
|
|
};
|
|
|
|
/**
|
|
* @brief Response header for socketpair communication from GenAI to MySQL_Session
|
|
*
|
|
* This structure is sent from the GenAI worker back to MySQL_Session via socketpair
|
|
* after processing completes. It contains status information and metadata about
|
|
* the results, followed by the JSON result payload.
|
|
*
|
|
* Response format:
|
|
* - GenAI_ResponseHeader (this structure)
|
|
* - JSON result data (result_len bytes if result_len > 0)
|
|
*
|
|
* @see GenAI_RequestHeader
|
|
*/
|
|
struct GenAI_ResponseHeader {
|
|
uint64_t request_id; ///< Echo of client's request ID for request/response matching
|
|
uint32_t status_code; ///< Status code: 0=success, >0=error occurred
|
|
uint32_t result_len; ///< Length of JSON result payload that follows this header
|
|
uint32_t processing_time_ms;///< Time taken by GenAI worker to process the request (milliseconds)
|
|
uint64_t result_ptr; ///< Reserved for future shared memory optimizations (must be 0)
|
|
uint32_t result_count; ///< Number of results in the response (e.g., number of embeddings/reranks)
|
|
uint32_t reserved; ///< Reserved for future use (must be 0)
|
|
};
|
|
|
|
/**
|
|
* @brief GenAI Threads Handler class for managing GenAI module
|
|
*
|
|
* This class handles the GenAI module's configuration variables, lifecycle,
|
|
* and provides embedding and reranking functionality via external services.
|
|
*/
|
|
class GenAI_Threads_Handler
|
|
{
|
|
private:
|
|
std::atomic<int> shutdown_;
|
|
pthread_rwlock_t rwlock;
|
|
|
|
// Threading components
|
|
std::vector<pthread_t> worker_threads_;
|
|
std::thread listener_thread_;
|
|
std::queue<GenAI_Request> request_queue_;
|
|
std::mutex queue_mutex_;
|
|
std::condition_variable queue_cv_;
|
|
std::unordered_set<int> client_fds_;
|
|
std::mutex clients_mutex_;
|
|
|
|
// epoll for async I/O
|
|
int epoll_fd_;
|
|
int event_fd_;
|
|
|
|
// Worker methods
|
|
void worker_loop(int worker_id);
|
|
void listener_loop();
|
|
|
|
// HTTP client methods
|
|
GenAI_EmbeddingResult call_llama_embedding(const std::string& text);
|
|
GenAI_EmbeddingResult call_llama_batch_embedding(const std::vector<std::string>& texts);
|
|
GenAI_RerankResultArray call_llama_rerank(const std::string& query,
|
|
const std::vector<std::string>& texts,
|
|
uint32_t top_n);
|
|
static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp);
|
|
|
|
public:
|
|
/**
|
|
* @brief Structure holding GenAI module configuration variables
|
|
*/
|
|
struct {
|
|
// Thread configuration
|
|
int genai_threads; ///< Number of worker threads (default: 4)
|
|
|
|
// Service endpoints
|
|
char* genai_embedding_uri; ///< URI for embedding service (default: http://127.0.0.1:8013/embedding)
|
|
char* genai_rerank_uri; ///< URI for reranking service (default: http://127.0.0.1:8012/rerank)
|
|
char* genai_embedding_model; ///< Embedding model name (default: empty)
|
|
|
|
// Timeouts (in milliseconds)
|
|
int genai_embedding_timeout_ms; ///< Timeout for embedding requests (default: 30000)
|
|
int genai_rerank_timeout_ms; ///< Timeout for reranking requests (default: 30000)
|
|
|
|
// AI Features master switches
|
|
bool genai_enabled; ///< Master enable for all AI features (default: false)
|
|
bool genai_llm_enabled; ///< Enable LLM bridge feature (default: false)
|
|
bool genai_anomaly_enabled; ///< Enable anomaly detection (default: false)
|
|
|
|
// LLM bridge configuration
|
|
char* genai_llm_provider; ///< Provider format: "openai" or "anthropic" (default: "openai")
|
|
char* genai_llm_provider_url; ///< LLM endpoint URL (default: http://localhost:11434/v1/chat/completions)
|
|
char* genai_llm_provider_model; ///< Model name (default: "llama3.2")
|
|
char* genai_llm_provider_key; ///< API key (default: NULL)
|
|
int genai_llm_cache_similarity_threshold; ///< Semantic cache threshold 0-100 (default: 85)
|
|
int genai_llm_cache_enabled; ///< Enable semantic cache (default: true)
|
|
int genai_llm_timeout_ms; ///< LLM request timeout in ms (default: 30000)
|
|
|
|
// Anomaly detection configuration
|
|
int genai_anomaly_risk_threshold; ///< Risk score threshold for blocking 0-100 (default: 70)
|
|
int genai_anomaly_similarity_threshold; ///< Similarity threshold 0-100 (default: 80)
|
|
int genai_anomaly_rate_limit; ///< Max queries per minute (default: 100)
|
|
bool genai_anomaly_auto_block; ///< Auto-block suspicious queries (default: true)
|
|
bool genai_anomaly_log_only; ///< Log-only mode (default: false)
|
|
|
|
// Hybrid model routing
|
|
bool genai_prefer_local_models; ///< Prefer local Ollama over cloud (default: true)
|
|
double genai_daily_budget_usd; ///< Daily cloud spend limit (default: 10.0)
|
|
int genai_max_cloud_requests_per_hour; ///< Cloud API rate limit (default: 100)
|
|
|
|
// Vector storage configuration
|
|
char* genai_vector_db_path; ///< Vector database file path (default: /var/lib/proxysql/ai_features.db)
|
|
int genai_vector_dimension; ///< Embedding dimension (default: 1536)
|
|
|
|
// RAG configuration
|
|
bool genai_rag_enabled; ///< Enable RAG features (default: false)
|
|
int genai_rag_k_max; ///< Maximum k for search results (default: 50)
|
|
int genai_rag_candidates_max; ///< Maximum candidates for hybrid search (default: 500)
|
|
int genai_rag_query_max_bytes; ///< Maximum query length in bytes (default: 8192)
|
|
int genai_rag_response_max_bytes; ///< Maximum response size in bytes (default: 5000000)
|
|
int genai_rag_timeout_ms; ///< RAG operation timeout in ms (default: 2000)
|
|
} variables;
|
|
|
|
struct {
|
|
int threads_initialized = 0;
|
|
int active_requests = 0;
|
|
int completed_requests = 0;
|
|
int failed_requests = 0;
|
|
} status_variables;
|
|
|
|
unsigned int num_threads;
|
|
|
|
/**
|
|
* @brief Default constructor for GenAI_Threads_Handler
|
|
*/
|
|
GenAI_Threads_Handler();
|
|
|
|
/**
|
|
* @brief Destructor for GenAI_Threads_Handler
|
|
*/
|
|
~GenAI_Threads_Handler();
|
|
|
|
/**
|
|
* @brief Initialize the GenAI module
|
|
*
|
|
* Starts worker threads and listener for processing requests.
|
|
*
|
|
* @param num Number of threads (uses genai_threads variable if 0)
|
|
* @param stack Stack size for threads (unused, reserved)
|
|
*/
|
|
void init(unsigned int num = 0, size_t stack = 0);
|
|
|
|
/**
|
|
* @brief Shutdown the GenAI module
|
|
*
|
|
* Stops all threads and cleans up resources.
|
|
*/
|
|
void shutdown();
|
|
|
|
/**
|
|
* @brief Acquire write lock on variables
|
|
*/
|
|
void wrlock();
|
|
|
|
/**
|
|
* @brief Release write lock on variables
|
|
*/
|
|
void wrunlock();
|
|
|
|
/**
|
|
* @brief Get the value of a variable as a string
|
|
*
|
|
* @param name The name of the variable (without 'genai-' prefix)
|
|
* @return Dynamically allocated string with the value, or NULL if not found
|
|
*/
|
|
char* get_variable(char* name);
|
|
|
|
/**
|
|
* @brief Set the value of a variable
|
|
*
|
|
* @param name The name of the variable (without 'genai-' prefix)
|
|
* @param value The new value to set
|
|
* @return true if successful, false if variable not found or value invalid
|
|
*/
|
|
bool set_variable(char* name, const char* value);
|
|
|
|
/**
|
|
* @brief Get a list of all variable names
|
|
*
|
|
* @return Dynamically allocated array of strings, terminated by NULL
|
|
*/
|
|
char** get_variables_list();
|
|
|
|
/**
|
|
* @brief Check if a variable exists
|
|
*
|
|
* @param name The name of the variable to check
|
|
* @return true if the variable exists, false otherwise
|
|
*/
|
|
bool has_variable(const char* name);
|
|
|
|
/**
|
|
* @brief Print the version information
|
|
*/
|
|
void print_version();
|
|
|
|
/**
|
|
* @brief Register a client file descriptor with GenAI module for async communication
|
|
*
|
|
* Registers the GenAI side of a socketpair with the GenAI epoll instance.
|
|
* This allows the GenAI listener to receive requests from MySQL sessions asynchronously.
|
|
*
|
|
* Usage flow:
|
|
* 1. MySQL_Session creates socketpair(fds)
|
|
* 2. MySQL_Session keeps fds[0] for reading responses
|
|
* 3. MySQL_Session calls register_client(fds[1]) to register GenAI side
|
|
* 4. GenAI listener adds fds[1] to its epoll for reading requests
|
|
* 5. When request is received, it's queued to worker threads
|
|
*
|
|
* @param client_fd The GenAI side file descriptor from socketpair (typically fds[1])
|
|
* @return true if successfully registered and added to epoll, false on error
|
|
*
|
|
* @see unregister_client()
|
|
*/
|
|
bool register_client(int client_fd);
|
|
|
|
/**
|
|
* @brief Unregister a client file descriptor from GenAI module
|
|
*
|
|
* Removes a previously registered client fd from the GenAI epoll instance
|
|
* and closes the connection. Called when a MySQL session ends or an error occurs.
|
|
*
|
|
* @param client_fd The GenAI side file descriptor to remove
|
|
*
|
|
* @see register_client()
|
|
*/
|
|
void unregister_client(int client_fd);
|
|
|
|
/**
|
|
* @brief Get current queue depth (number of pending requests)
|
|
*
|
|
* @return Number of requests in the queue
|
|
*/
|
|
size_t get_queue_size();
|
|
|
|
// Public API methods for embedding and reranking
|
|
// These methods can be called directly without going through socket pairs
|
|
|
|
/**
|
|
* @brief Generate embeddings for multiple documents
|
|
*
|
|
* Sends the documents to the embedding service (configured via genai_embedding_uri)
|
|
* and returns the resulting embedding vectors. This method blocks until the
|
|
* embedding service responds (typically 10-100ms per document depending on model size).
|
|
*
|
|
* For async non-blocking behavior, use the socketpair-based async API via
|
|
* MySQL_Session's GENAI: query handler instead.
|
|
*
|
|
* @param documents Vector of document texts to embed (each can be up to several KB)
|
|
* @return GenAI_EmbeddingResult containing all embeddings with metadata.
|
|
* The caller takes ownership of the returned data and must free it.
|
|
* On error, returns an empty result (data==nullptr || count==0).
|
|
*
|
|
* @note This is a BLOCKING call. For async operation, use GENAI: queries through MySQL_Session.
|
|
* @see rerank_documents(), process_json_query()
|
|
*/
|
|
GenAI_EmbeddingResult embed_documents(const std::vector<std::string>& documents);
|
|
|
|
/**
|
|
* @brief Rerank documents based on query relevance
|
|
*
|
|
* Sends the query and documents to the reranking service (configured via genai_rerank_uri)
|
|
* and returns the documents sorted by relevance to the query. This method blocks
|
|
* until the reranking service responds (typically 20-50ms for most models).
|
|
*
|
|
* For async non-blocking behavior, use the socketpair-based async API via
|
|
* MySQL_Session's GENAI: query handler instead.
|
|
*
|
|
* @param query Query string to rerank against (e.g., search query, user question)
|
|
* @param documents Vector of document texts to rerank (typically search results or candidates)
|
|
* @param top_n Maximum number of top results to return (0 = return all sorted results)
|
|
* @return GenAI_RerankResultArray containing results sorted by relevance.
|
|
* Each result includes the original document index and a relevance score.
|
|
* The caller takes ownership of the returned data and must free it.
|
|
* On error, returns an empty result (data==nullptr || count==0).
|
|
*
|
|
* @note This is a BLOCKING call. For async operation, use GENAI: queries through MySQL_Session.
|
|
* @see embed_documents(), process_json_query()
|
|
*/
|
|
GenAI_RerankResultArray rerank_documents(const std::string& query,
|
|
const std::vector<std::string>& documents,
|
|
uint32_t top_n = 0);
|
|
|
|
/**
|
|
* @brief Process JSON query autonomously (handles embed/rerank/document_from_sql)
|
|
*
|
|
* This method processes JSON queries that describe embedding or reranking operations.
|
|
* It autonomously parses the JSON, determines the operation type, and routes to the
|
|
* appropriate handler. This is the main entry point for the async GENAI: query syntax.
|
|
*
|
|
* Supported query formats:
|
|
* - {"type": "embed", "documents": ["doc1", "doc2", ...]}
|
|
* - {"type": "rerank", "query": "...", "documents": [...], "top_n": 5}
|
|
* - {"type": "rerank", "query": "...", "document_from_sql": {"query": "SELECT ..."}}
|
|
*
|
|
* The response format is a JSON object with "columns" and "rows" arrays:
|
|
* - {"columns": ["col1", "col2"], "rows": [["val1", "val2"], ...]}
|
|
* - Error responses: {"error": "error message"}
|
|
*
|
|
* @param json_query JSON query string from client (must be valid JSON)
|
|
* @return JSON string result with columns and rows formatted for MySQL resultset.
|
|
* Returns empty string on error.
|
|
*
|
|
* @note This method is called from worker threads as part of async request processing.
|
|
* The blocking HTTP calls occur in the worker thread, not the MySQL thread.
|
|
*
|
|
* @see embed_documents(), rerank_documents()
|
|
*/
|
|
std::string process_json_query(const std::string& json_query);
|
|
};
|
|
|
|
// Global instance of the GenAI Threads Handler
|
|
extern GenAI_Threads_Handler *GloGATH;
|
|
|
|
#endif /* PROXYSQLGENAI */
|
|
|
|
#endif // __CLASS_GENAI_THREAD_H
|