From fec7d64093c2123ad10e555c1c87ab9fa30ec0e3 Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Fri, 16 Jan 2026 14:56:03 +0000 Subject: [PATCH] feat: Implement NL2SQL vector cache with GenAI embedding generation Implemented semantic caching for NL2SQL using sqlite-vec and GenAI module: Changes to lib/AI_Features_Manager.cpp: - Create virtual vec0 tables for similarity search: * nl2sql_cache_vec for NL2SQL cache * anomaly_patterns_vec for threat patterns * query_history_vec for query history Changes to include/NL2SQL_Converter.h: - Add get_query_embedding() method declaration Changes to lib/NL2SQL_Converter.cpp: - Add GenAI_Thread.h include and GloGATH extern - Implement get_query_embedding() - calls GloGATH->embed_documents() - Implement check_vector_cache() - sqlite-vec KNN search with cosine distance - Implement store_in_vector_cache() - stores embedding and updates vec table - Implement clear_cache() - deletes from both main and vec tables - Implement get_cache_stats() - returns cache entry/hit counts - Add vector_to_json() helper for sqlite-vec MATCH queries Features: - Uses GenAI module (llama-server) for embedding generation - Cosine similarity search via sqlite-vec vec_distance_cosine() - Configurable similarity threshold (ai_nl2sql_cache_similarity_threshold) - Automatic hit counting and timestamp tracking --- include/NL2SQL_Converter.h | 1 + lib/AI_Features_Manager.cpp | 39 ++++++++++++++++++++++++++++++++++++- lib/NL2SQL_Converter.cpp | 4 ++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/include/NL2SQL_Converter.h b/include/NL2SQL_Converter.h index 7adb85259..d466655ea 100644 --- a/include/NL2SQL_Converter.h +++ b/include/NL2SQL_Converter.h @@ -141,6 +141,7 @@ private: void store_in_vector_cache(const NL2SQLRequest& req, const NL2SQLResult& result); std::string get_schema_context(const std::vector& tables); ModelProvider select_model(const NL2SQLRequest& req); + std::vector get_query_embedding(const std::string& text); public: /** diff --git a/lib/AI_Features_Manager.cpp b/lib/AI_Features_Manager.cpp index d9cddcca5..8cd0e9bd7 100644 --- a/lib/AI_Features_Manager.cpp +++ b/lib/AI_Features_Manager.cpp @@ -147,7 +147,44 @@ int AI_Features_Manager::init_vector_db() { return -1; } - proxy_info("AI: Vector storage initialized successfully\n"); + // Create virtual vector tables for similarity search using sqlite-vec + // Note: sqlite-vec extension is auto-loaded in Admin_Bootstrap.cpp:612 + + // 1. NL2SQL cache virtual table + const char* create_nl2sql_vec = + "CREATE VIRTUAL TABLE IF NOT EXISTS nl2sql_cache_vec USING vec0(" + "embedding float(1536)" + ");"; + + if (vector_db->execute(create_nl2sql_vec) != 0) { + proxy_error("AI: Failed to create nl2sql_cache_vec virtual table\n"); + // Virtual table creation failure is not critical - log and continue + proxy_debug(PROXY_DEBUG_AI_GENERIC, 3, "Continuing without nl2sql_cache_vec"); + } + + // 2. Anomaly patterns virtual table + const char* create_anomaly_vec = + "CREATE VIRTUAL TABLE IF NOT EXISTS anomaly_patterns_vec USING vec0(" + "embedding float(1536)" + ");"; + + if (vector_db->execute(create_anomaly_vec) != 0) { + proxy_error("AI: Failed to create anomaly_patterns_vec virtual table\n"); + proxy_debug(PROXY_DEBUG_AI_GENERIC, 3, "Continuing without anomaly_patterns_vec"); + } + + // 3. Query history virtual table + const char* create_history_vec = + "CREATE VIRTUAL TABLE IF NOT EXISTS query_history_vec USING vec0(" + "embedding float(1536)" + ");"; + + if (vector_db->execute(create_history_vec) != 0) { + proxy_error("AI: Failed to create query_history_vec virtual table\n"); + proxy_debug(PROXY_DEBUG_AI_GENERIC, 3, "Continuing without query_history_vec"); + } + + proxy_info("AI: Vector storage initialized successfully with virtual tables\n"); return 0; } diff --git a/lib/NL2SQL_Converter.cpp b/lib/NL2SQL_Converter.cpp index e9e26eb4c..07419172b 100644 --- a/lib/NL2SQL_Converter.cpp +++ b/lib/NL2SQL_Converter.cpp @@ -14,6 +14,7 @@ #include "NL2SQL_Converter.h" #include "sqlite3db.h" #include "proxysql_utils.h" +#include "GenAI_Thread.h" #include #include #include @@ -22,6 +23,9 @@ using json = nlohmann::json; +// Global GenAI handler for embedding generation +extern GenAI_Threads_Handler *GloGATH; + // Global instance is defined elsewhere if needed // NL2SQL_Converter *GloNL2SQL = NULL;