#ifdef PROXYSQLGENAI #include "AI_Features_Manager.h" #include "GenAI_Thread.h" #include "LLM_Bridge.h" #include "Anomaly_Detector.h" #include "sqlite3db.h" #include "proxysql_utils.h" #include #include #include #include // for dirname // Global instance is defined in src/main.cpp extern AI_Features_Manager *GloAI; // GenAI module - configuration is now managed here extern GenAI_Threads_Handler *GloGATH; // Forward declaration to avoid header ordering issues class ProxySQL_Admin; extern ProxySQL_Admin *GloAdmin; AI_Features_Manager::AI_Features_Manager() : shutdown_(0), llm_bridge(NULL), anomaly_detector(NULL), vector_db(NULL) { pthread_rwlock_init(&rwlock, NULL); // Initialize status counters memset(&status_variables, 0, sizeof(status_variables)); // Note: Configuration is now managed by GenAI module (GloGATH) // All genai-* variables are accessible via GloGATH->get_variable() } AI_Features_Manager::~AI_Features_Manager() { shutdown(); // Note: Configuration strings are owned by GenAI module, not freed here pthread_rwlock_destroy(&rwlock); } int AI_Features_Manager::init_vector_db() { proxy_info("AI: Initializing vector storage at %s\n", GloGATH->variables.genai_vector_db_path); // Ensure directory exists char* path_copy = strdup(GloGATH->variables.genai_vector_db_path); if (!path_copy) { proxy_error("AI: Failed to allocate memory for path copy in init_vector_db\n"); return -1; } char* dir = dirname(path_copy); struct stat st; if (stat(dir, &st) != 0) { // Create directory if it doesn't exist char cmd[512]; snprintf(cmd, sizeof(cmd), "mkdir -p %s", dir); system(cmd); } free(path_copy); vector_db = new SQLite3DB(); char path_buf[512]; strncpy(path_buf, GloGATH->variables.genai_vector_db_path, sizeof(path_buf) - 1); path_buf[sizeof(path_buf) - 1] = '\0'; int rc = vector_db->open(path_buf, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE); if (rc != SQLITE_OK) { proxy_error("AI: Failed to open vector database: %s\n", GloGATH->variables.genai_vector_db_path); delete vector_db; vector_db = NULL; return -1; } // Enable SQLite extensions for vector_db // Once enabled, SQLite loads extensions such as vec0 and rembed automatically. // Refer - Admin_Bootstrap.cpp:590 (*proxy_sqlite3_enable_load_extension)(vector_db->get_db(), 1); // Create tables for LLM cache const char* create_llm_cache = "CREATE TABLE IF NOT EXISTS llm_cache (" "id INTEGER PRIMARY KEY AUTOINCREMENT , " "prompt TEXT NOT NULL , " "response TEXT NOT NULL , " "system_message TEXT , " "embedding BLOB , " "hit_count INTEGER DEFAULT 0 , " "last_hit INTEGER , " "created_at INTEGER DEFAULT (strftime('%s' , 'now'))" ");"; if (!vector_db->execute(create_llm_cache)) { proxy_error("AI: Failed to create llm_cache table\n"); return -1; } // Create table for anomaly patterns const char* create_anomaly_patterns = "CREATE TABLE IF NOT EXISTS anomaly_patterns (" "id INTEGER PRIMARY KEY AUTOINCREMENT , " "pattern_name TEXT , " "pattern_type TEXT , " // 'sql_injection', 'dos', 'privilege_escalation' "query_example TEXT , " "embedding BLOB , " "severity INTEGER , " // 1-10 "created_at INTEGER DEFAULT (strftime('%s' , 'now'))" ");"; if (!vector_db->execute(create_anomaly_patterns)) { proxy_error("AI: Failed to create anomaly_patterns table\n"); return -1; } // Create table for query history const char* create_query_history = "CREATE TABLE IF NOT EXISTS query_history (" "id INTEGER PRIMARY KEY AUTOINCREMENT , " "prompt TEXT NOT NULL , " "response TEXT , " "embedding BLOB , " "execution_time_ms INTEGER , " "success BOOLEAN , " "timestamp INTEGER DEFAULT (strftime('%s' , 'now'))" ");"; if (!vector_db->execute(create_query_history)) { proxy_error("AI: Failed to create query_history table\n"); return -1; } // Create virtual vector tables for similarity search using sqlite-vec // Note: sqlite-vec extension is auto-loaded in Admin_Bootstrap.cpp:612 // 1. LLM cache virtual table const char* create_llm_vec = "CREATE VIRTUAL TABLE IF NOT EXISTS llm_cache_vec USING vec0(" "embedding float[1536]" ");"; if (!vector_db->execute(create_llm_vec)) { proxy_error("AI: Failed to create llm_cache_vec virtual table\n"); // Virtual table creation failure is not critical - log and continue proxy_debug(PROXY_DEBUG_GENAI, 3, "Continuing without llm_cache_vec"); } // 2. Anomaly patterns virtual table const char* create_anomaly_vec = "CREATE VIRTUAL TABLE IF NOT EXISTS anomaly_patterns_vec USING vec0(" "embedding float[1536]" ");"; if (!vector_db->execute(create_anomaly_vec)) { proxy_error("AI: Failed to create anomaly_patterns_vec virtual table\n"); proxy_debug(PROXY_DEBUG_GENAI, 3, "Continuing without anomaly_patterns_vec"); } // 3. Query history virtual table const char* create_history_vec = "CREATE VIRTUAL TABLE IF NOT EXISTS query_history_vec USING vec0(" "embedding float[1536]" ");"; if (!vector_db->execute(create_history_vec)) { proxy_error("AI: Failed to create query_history_vec virtual table\n"); proxy_debug(PROXY_DEBUG_GENAI, 3, "Continuing without query_history_vec"); } // 4. RAG tables for Retrieval-Augmented Generation // rag_sources: control plane for ingestion configuration const char* create_rag_sources = "CREATE TABLE IF NOT EXISTS rag_sources (" "source_id INTEGER PRIMARY KEY, " "name TEXT NOT NULL UNIQUE, " "enabled INTEGER NOT NULL DEFAULT 1, " "backend_type TEXT NOT NULL, " "backend_host TEXT NOT NULL, " "backend_port INTEGER NOT NULL, " "backend_user TEXT NOT NULL, " "backend_pass TEXT NOT NULL, " "backend_db TEXT NOT NULL, " "table_name TEXT NOT NULL, " "pk_column TEXT NOT NULL, " "where_sql TEXT, " "doc_map_json TEXT NOT NULL, " "chunking_json TEXT NOT NULL, " "embedding_json TEXT, " "created_at INTEGER NOT NULL DEFAULT (unixepoch()), " "updated_at INTEGER NOT NULL DEFAULT (unixepoch())" ");"; if (!vector_db->execute(create_rag_sources)) { proxy_error("AI: Failed to create rag_sources table\n"); return -1; } // Indexes for rag_sources const char* create_rag_sources_enabled_idx = "CREATE INDEX IF NOT EXISTS idx_rag_sources_enabled ON rag_sources(enabled);"; if (!vector_db->execute(create_rag_sources_enabled_idx)) { proxy_error("AI: Failed to create idx_rag_sources_enabled index\n"); return -1; } const char* create_rag_sources_backend_idx = "CREATE INDEX IF NOT EXISTS idx_rag_sources_backend ON rag_sources(backend_type, backend_host, backend_port, backend_db, table_name);"; if (!vector_db->execute(create_rag_sources_backend_idx)) { proxy_error("AI: Failed to create idx_rag_sources_backend index\n"); return -1; } // rag_documents: canonical documents const char* create_rag_documents = "CREATE TABLE IF NOT EXISTS rag_documents (" "doc_id TEXT PRIMARY KEY, " "source_id INTEGER NOT NULL REFERENCES rag_sources(source_id), " "source_name TEXT NOT NULL, " "pk_json TEXT NOT NULL, " "title TEXT, " "body TEXT, " "metadata_json TEXT NOT NULL DEFAULT '{}', " "updated_at INTEGER NOT NULL DEFAULT (unixepoch()), " "deleted INTEGER NOT NULL DEFAULT 0" ");"; if (!vector_db->execute(create_rag_documents)) { proxy_error("AI: Failed to create rag_documents table\n"); return -1; } // Indexes for rag_documents const char* create_rag_documents_source_updated_idx = "CREATE INDEX IF NOT EXISTS idx_rag_documents_source_updated ON rag_documents(source_id, updated_at);"; if (!vector_db->execute(create_rag_documents_source_updated_idx)) { proxy_error("AI: Failed to create idx_rag_documents_source_updated index\n"); return -1; } const char* create_rag_documents_source_deleted_idx = "CREATE INDEX IF NOT EXISTS idx_rag_documents_source_deleted ON rag_documents(source_id, deleted);"; if (!vector_db->execute(create_rag_documents_source_deleted_idx)) { proxy_error("AI: Failed to create idx_rag_documents_source_deleted index\n"); return -1; } // rag_chunks: chunked content const char* create_rag_chunks = "CREATE TABLE IF NOT EXISTS rag_chunks (" "chunk_id TEXT PRIMARY KEY, " "doc_id TEXT NOT NULL REFERENCES rag_documents(doc_id), " "source_id INTEGER NOT NULL REFERENCES rag_sources(source_id), " "chunk_index INTEGER NOT NULL, " "title TEXT, " "body TEXT NOT NULL, " "metadata_json TEXT NOT NULL DEFAULT '{}', " "updated_at INTEGER NOT NULL DEFAULT (unixepoch()), " "deleted INTEGER NOT NULL DEFAULT 0" ");"; if (!vector_db->execute(create_rag_chunks)) { proxy_error("AI: Failed to create rag_chunks table\n"); return -1; } // Indexes for rag_chunks const char* create_rag_chunks_doc_idx = "CREATE UNIQUE INDEX IF NOT EXISTS uq_rag_chunks_doc_idx ON rag_chunks(doc_id, chunk_index);"; if (!vector_db->execute(create_rag_chunks_doc_idx)) { proxy_error("AI: Failed to create uq_rag_chunks_doc_idx index\n"); return -1; } const char* create_rag_chunks_source_doc_idx = "CREATE INDEX IF NOT EXISTS idx_rag_chunks_source_doc ON rag_chunks(source_id, doc_id);"; if (!vector_db->execute(create_rag_chunks_source_doc_idx)) { proxy_error("AI: Failed to create idx_rag_chunks_source_doc index\n"); return -1; } const char* create_rag_chunks_deleted_idx = "CREATE INDEX IF NOT EXISTS idx_rag_chunks_deleted ON rag_chunks(deleted);"; if (!vector_db->execute(create_rag_chunks_deleted_idx)) { proxy_error("AI: Failed to create idx_rag_chunks_deleted index\n"); return -1; } // rag_fts_chunks: FTS5 index (contentless) const char* create_rag_fts_chunks = "CREATE VIRTUAL TABLE IF NOT EXISTS rag_fts_chunks USING fts5(" "chunk_id UNINDEXED, " "title, " "body, " "tokenize = 'unicode61'" ");"; if (!vector_db->execute(create_rag_fts_chunks)) { proxy_error("AI: Failed to create rag_fts_chunks virtual table\n"); proxy_debug(PROXY_DEBUG_GENAI, 3, "Continuing without rag_fts_chunks"); } // rag_vec_chunks: sqlite3-vec index // Use configurable vector dimension from GenAI module int vector_dimension = 1536; // Default value if (GloGATH) { vector_dimension = GloGATH->variables.genai_vector_dimension; } std::string create_rag_vec_chunks_sql = "CREATE VIRTUAL TABLE IF NOT EXISTS rag_vec_chunks USING vec0(" "embedding float[" + std::to_string(vector_dimension) + "], " "chunk_id TEXT, " "doc_id TEXT, " "source_id INTEGER, " "updated_at INTEGER" ");"; const char* create_rag_vec_chunks = create_rag_vec_chunks_sql.c_str(); if (!vector_db->execute(create_rag_vec_chunks)) { proxy_error("AI: Failed to create rag_vec_chunks virtual table\n"); proxy_debug(PROXY_DEBUG_GENAI, 3, "Continuing without rag_vec_chunks"); } // rag_chunk_view: convenience view for debugging const char* create_rag_chunk_view = "CREATE VIEW IF NOT EXISTS rag_chunk_view AS " "SELECT " "c.chunk_id, " "c.doc_id, " "c.source_id, " "d.source_name, " "d.pk_json, " "COALESCE(c.title, d.title) AS title, " "c.body, " "d.metadata_json AS doc_metadata_json, " "c.metadata_json AS chunk_metadata_json, " "c.updated_at " "FROM rag_chunks c " "JOIN rag_documents d ON d.doc_id = c.doc_id " "WHERE c.deleted = 0 AND d.deleted = 0;"; if (!vector_db->execute(create_rag_chunk_view)) { proxy_error("AI: Failed to create rag_chunk_view view\n"); proxy_debug(PROXY_DEBUG_GENAI, 3, "Continuing without rag_chunk_view"); } // rag_sync_state: sync state placeholder for later incremental ingestion const char* create_rag_sync_state = "CREATE TABLE IF NOT EXISTS rag_sync_state (" "source_id INTEGER PRIMARY KEY REFERENCES rag_sources(source_id), " "mode TEXT NOT NULL DEFAULT 'poll', " "cursor_json TEXT NOT NULL DEFAULT '{}', " "last_ok_at INTEGER, " "last_error TEXT" ");"; if (!vector_db->execute(create_rag_sync_state)) { proxy_error("AI: Failed to create rag_sync_state table\n"); return -1; } proxy_info("AI: Vector storage initialized successfully with virtual tables\n"); return 0; } int AI_Features_Manager::init_llm_bridge() { if (!GloGATH->variables.genai_llm_enabled) { proxy_info("AI: LLM bridge disabled , skipping initialization\n"); return 0; } proxy_info("AI: Initializing LLM Bridge\n"); llm_bridge = new LLM_Bridge(); // Set vector database llm_bridge->set_vector_db(vector_db); // Update config with current variables from GenAI module llm_bridge->update_config( GloGATH->variables.genai_llm_provider, GloGATH->variables.genai_llm_provider_url, GloGATH->variables.genai_llm_provider_model, GloGATH->variables.genai_llm_provider_key, GloGATH->variables.genai_llm_cache_similarity_threshold, GloGATH->variables.genai_llm_timeout_ms ); if (llm_bridge->init() != 0) { proxy_error("AI: Failed to initialize LLM Bridge\n"); delete llm_bridge; llm_bridge = NULL; return -1; } proxy_info("AI: LLM Bridge initialized\n"); return 0; } int AI_Features_Manager::init_anomaly_detector() { if (!GloGATH->variables.genai_anomaly_enabled) { proxy_info("AI: Anomaly detection disabled , skipping initialization\n"); return 0; } proxy_info("AI: Initializing Anomaly Detector\n"); anomaly_detector = new Anomaly_Detector(); if (anomaly_detector->init() != 0) { proxy_error("AI: Failed to initialize Anomaly Detector\n"); delete anomaly_detector; anomaly_detector = NULL; return -1; } proxy_info("AI: Anomaly Detector initialized\n"); return 0; } void AI_Features_Manager::close_vector_db() { if (vector_db) { delete vector_db; vector_db = NULL; } } void AI_Features_Manager::close_llm_bridge() { if (llm_bridge) { llm_bridge->close(); delete llm_bridge; llm_bridge = NULL; } } void AI_Features_Manager::close_anomaly_detector() { if (anomaly_detector) { anomaly_detector->close(); delete anomaly_detector; anomaly_detector = NULL; } } int AI_Features_Manager::init() { proxy_info("AI: Initializing AI Features Manager v%s\n", AI_FEATURES_MANAGER_VERSION); if (!GloGATH || !GloGATH->variables.genai_enabled) { proxy_info("AI: AI features disabled by configuration\n"); return 0; } // Initialize vector storage first (needed by both LLM bridge and Anomaly Detector) if (init_vector_db() != 0) { proxy_error("AI: Failed to initialize vector storage\n"); return -1; } // Initialize LLM bridge if (init_llm_bridge() != 0) { proxy_error("AI: Failed to initialize LLM bridge\n"); return -1; } // Initialize Anomaly Detector if (init_anomaly_detector() != 0) { proxy_error("AI: Failed to initialize Anomaly Detector\n"); return -1; } proxy_info("AI: AI Features Manager initialized successfully\n"); return 0; } void AI_Features_Manager::shutdown() { if (shutdown_) return; shutdown_ = 1; proxy_info("AI: Shutting down AI Features Manager\n"); close_llm_bridge(); close_anomaly_detector(); close_vector_db(); proxy_info("AI: AI Features Manager shutdown complete\n"); } void AI_Features_Manager::wrlock() { pthread_rwlock_wrlock(&rwlock); } void AI_Features_Manager::wrunlock() { pthread_rwlock_unlock(&rwlock); } // Note: Configuration get/set methods have been removed - they are now // handled by the GenAI module (GloGATH). Use GloGATH->get_variable() // and GloGATH->set_variable() for configuration access. std::string AI_Features_Manager::get_status_json() { char buf[2048]; snprintf(buf, sizeof(buf), "{" "\"version\": \"%s\" , " "\"llm\": {" "\"total_requests\": %llu , " "\"cache_hits\": %llu , " "\"local_calls\": %llu , " "\"cloud_calls\": %llu , " "\"total_response_time_ms\": %llu , " "\"cache_total_lookup_time_ms\": %llu , " "\"cache_total_store_time_ms\": %llu , " "\"cache_lookups\": %llu , " "\"cache_stores\": %llu , " "\"cache_misses\": %llu" "} , " "\"anomaly\": {" "\"total_checks\": %llu , " "\"blocked\": %llu , " "\"flagged\": %llu" "} , " "\"spend\": {" "\"daily_usd\": %.2f" "}" "}", AI_FEATURES_MANAGER_VERSION, status_variables.llm_total_requests, status_variables.llm_cache_hits, status_variables.llm_local_model_calls, status_variables.llm_cloud_model_calls, status_variables.llm_total_response_time_ms, status_variables.llm_cache_total_lookup_time_ms, status_variables.llm_cache_total_store_time_ms, status_variables.llm_cache_lookups, status_variables.llm_cache_stores, status_variables.llm_cache_misses, status_variables.anomaly_total_checks, status_variables.anomaly_blocked_queries, status_variables.anomaly_flagged_queries, status_variables.daily_cloud_spend_usd ); return std::string(buf); } #endif /* PROXYSQLGENAI */