feat: Implement NL2SQL vector cache with GenAI embedding generation

Implemented semantic caching for NL2SQL using sqlite-vec and GenAI module:

Changes to lib/AI_Features_Manager.cpp:
- Create virtual vec0 tables for similarity search:
  * nl2sql_cache_vec for NL2SQL cache
  * anomaly_patterns_vec for threat patterns
  * query_history_vec for query history

Changes to include/NL2SQL_Converter.h:
- Add get_query_embedding() method declaration

Changes to lib/NL2SQL_Converter.cpp:
- Add GenAI_Thread.h include and GloGATH extern
- Implement get_query_embedding() - calls GloGATH->embed_documents()
- Implement check_vector_cache() - sqlite-vec KNN search with cosine distance
- Implement store_in_vector_cache() - stores embedding and updates vec table
- Implement clear_cache() - deletes from both main and vec tables
- Implement get_cache_stats() - returns cache entry/hit counts
- Add vector_to_json() helper for sqlite-vec MATCH queries

Features:
- Uses GenAI module (llama-server) for embedding generation
- Cosine similarity search via sqlite-vec vec_distance_cosine()
- Configurable similarity threshold (ai_nl2sql_cache_similarity_threshold)
- Automatic hit counting and timestamp tracking
pull/5310/head
Rene Cannao 3 months ago
parent 0be9715188
commit fec7d64093

@ -141,6 +141,7 @@ private:
void store_in_vector_cache(const NL2SQLRequest& req, const NL2SQLResult& result);
std::string get_schema_context(const std::vector<std::string>& tables);
ModelProvider select_model(const NL2SQLRequest& req);
std::vector<float> get_query_embedding(const std::string& text);
public:
/**

@ -147,7 +147,44 @@ int AI_Features_Manager::init_vector_db() {
return -1;
}
proxy_info("AI: Vector storage initialized successfully\n");
// Create virtual vector tables for similarity search using sqlite-vec
// Note: sqlite-vec extension is auto-loaded in Admin_Bootstrap.cpp:612
// 1. NL2SQL cache virtual table
const char* create_nl2sql_vec =
"CREATE VIRTUAL TABLE IF NOT EXISTS nl2sql_cache_vec USING vec0("
"embedding float(1536)"
");";
if (vector_db->execute(create_nl2sql_vec) != 0) {
proxy_error("AI: Failed to create nl2sql_cache_vec virtual table\n");
// Virtual table creation failure is not critical - log and continue
proxy_debug(PROXY_DEBUG_AI_GENERIC, 3, "Continuing without nl2sql_cache_vec");
}
// 2. Anomaly patterns virtual table
const char* create_anomaly_vec =
"CREATE VIRTUAL TABLE IF NOT EXISTS anomaly_patterns_vec USING vec0("
"embedding float(1536)"
");";
if (vector_db->execute(create_anomaly_vec) != 0) {
proxy_error("AI: Failed to create anomaly_patterns_vec virtual table\n");
proxy_debug(PROXY_DEBUG_AI_GENERIC, 3, "Continuing without anomaly_patterns_vec");
}
// 3. Query history virtual table
const char* create_history_vec =
"CREATE VIRTUAL TABLE IF NOT EXISTS query_history_vec USING vec0("
"embedding float(1536)"
");";
if (vector_db->execute(create_history_vec) != 0) {
proxy_error("AI: Failed to create query_history_vec virtual table\n");
proxy_debug(PROXY_DEBUG_AI_GENERIC, 3, "Continuing without query_history_vec");
}
proxy_info("AI: Vector storage initialized successfully with virtual tables\n");
return 0;
}

@ -14,6 +14,7 @@
#include "NL2SQL_Converter.h"
#include "sqlite3db.h"
#include "proxysql_utils.h"
#include "GenAI_Thread.h"
#include <cstring>
#include <cstdlib>
#include <sstream>
@ -22,6 +23,9 @@
using json = nlohmann::json;
// Global GenAI handler for embedding generation
extern GenAI_Threads_Handler *GloGATH;
// Global instance is defined elsewhere if needed
// NL2SQL_Converter *GloNL2SQL = NULL;

Loading…
Cancel
Save