#ifdef PROXYSQLGENAI /** * @file Anomaly_Detector.cpp * @brief Implementation of Real-time Anomaly Detection for ProxySQL * * Implements multi-stage anomaly detection pipeline: * 1. SQL Injection Pattern Detection * 2. Query Normalization and Pattern Matching * 3. Rate Limiting per User/Host * 4. Statistical Outlier Detection * 5. Embedding-based Threat Similarity * * @see Anomaly_Detector.h */ #include "Anomaly_Detector.h" #include "sqlite3db.h" #include "proxysql_utils.h" #include "GenAI_Thread.h" #include "cpp.h" #include #include #include #include #include #include #include // JSON library #include "../deps/json/json.hpp" using json = nlohmann::json; #define PROXYJSON // Global GenAI handler for embedding generation extern GenAI_Threads_Handler *GloGATH; // ============================================================================ // Constants // ============================================================================ // SQL Injection Patterns (regex-based) static const char* SQL_INJECTION_PATTERNS[] = { "('|\").*?('|\")", // Quote sequences "\\bor\\b.*=.*\\bor\\b", // OR 1=1 "\\band\\b.*=.*\\band\\b", // AND 1=1 "union.*select", // UNION SELECT "drop.*table", // DROP TABLE "exec.*xp_", // SQL Server exec ";.*--", // Comment injection "/\\*.*\\*/", // Block comments "concat\\(", // CONCAT based attacks "char\\(", // CHAR based attacks "0x[0-9a-f]+", // Hex encoded NULL }; // Suspicious Keywords static const char* SUSPICIOUS_KEYWORDS[] = { "sleep(", "waitfor delay", "benchmark(", "pg_sleep", "load_file", "into outfile", "dumpfile", "script>", "javascript:", "onerror=", "onload=", NULL }; // Thresholds #define DEFAULT_RATE_LIMIT 100 // queries per minute #define DEFAULT_RISK_THRESHOLD 70 // 0-100 #define DEFAULT_SIMILARITY_THRESHOLD 85 // 0-100 #define USER_STATS_WINDOW 3600 // 1 hour in seconds #define MAX_RECENT_QUERIES 100 // ============================================================================ // Constructor/Destructor // ============================================================================ Anomaly_Detector::Anomaly_Detector() : vector_db(NULL) { config.enabled = true; config.risk_threshold = DEFAULT_RISK_THRESHOLD; config.similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD; config.rate_limit = DEFAULT_RATE_LIMIT; config.auto_block = true; config.log_only = false; } Anomaly_Detector::~Anomaly_Detector() { close(); } // ============================================================================ // Initialization // ============================================================================ /** * @brief Initialize the anomaly detector * * Sets up the vector database connection and loads any * pre-configured threat patterns from storage. */ int Anomaly_Detector::init() { proxy_info("Anomaly: Initializing Anomaly Detector v%s\n", ANOMALY_DETECTOR_VERSION); // Vector DB will be provided by AI_Features_Manager // For now, we'll work without it for basic pattern detection proxy_info("Anomaly: Anomaly Detector initialized with %zu injection patterns\n", sizeof(SQL_INJECTION_PATTERNS) / sizeof(SQL_INJECTION_PATTERNS[0]) - 1); return 0; } /** * @brief Close and cleanup resources */ void Anomaly_Detector::close() { // Clear user statistics clear_user_statistics(); proxy_info("Anomaly: Anomaly Detector closed\n"); } // ============================================================================ // Query Normalization // ============================================================================ /** * @brief Normalize SQL query for pattern matching * * Normalization steps: * 1. Convert to lowercase * 2. Remove extra whitespace * 3. Replace string literals with placeholders * 4. Replace numeric literals with placeholders * 5. Remove comments * * @param query Original SQL query * @return Normalized query pattern */ std::string Anomaly_Detector::normalize_query(const std::string& query) { std::string normalized = query; // Convert to lowercase std::transform(normalized.begin(), normalized.end(), normalized.begin(), ::tolower); // Remove SQL comments std::regex comment_regex("--.*?$|/\\*.*?\\*/", std::regex::multiline); normalized = std::regex_replace(normalized, comment_regex, ""); // Replace string literals with placeholder std::regex string_regex("'[^']*'|\"[^\"]*\""); normalized = std::regex_replace(normalized, string_regex, "?"); // Replace numeric literals with placeholder std::regex numeric_regex("\\b\\d+\\b"); normalized = std::regex_replace(normalized, numeric_regex, "N"); // Normalize whitespace std::regex whitespace_regex("\\s+"); normalized = std::regex_replace(normalized, whitespace_regex, " "); // Trim leading/trailing whitespace normalized.erase(0, normalized.find_first_not_of(" \t\n\r")); normalized.erase(normalized.find_last_not_of(" \t\n\r") + 1); return normalized; } // ============================================================================ // SQL Injection Detection // ============================================================================ /** * @brief Check for SQL injection patterns * * Uses regex-based pattern matching to detect common SQL injection * attack vectors including: * - Tautologies (OR 1=1) * - Union-based injection * - Comment-based injection * - Stacked queries * - String/character encoding attacks * * @param query SQL query to check * @return AnomalyResult with injection details */ AnomalyResult Anomaly_Detector::check_sql_injection(const std::string& query) { AnomalyResult result; result.is_anomaly = false; result.risk_score = 0.0f; result.anomaly_type = "sql_injection"; result.should_block = false; try { std::string query_lower = query; std::transform(query_lower.begin(), query_lower.end(), query_lower.begin(), ::tolower); // Check each injection pattern int pattern_matches = 0; for (int i = 0; SQL_INJECTION_PATTERNS[i] != NULL; i++) { std::regex pattern(SQL_INJECTION_PATTERNS[i], std::regex::icase); if (std::regex_search(query, pattern)) { pattern_matches++; result.matched_rules.push_back(std::string("injection_pattern_") + std::to_string(i)); } } // Check suspicious keywords for (int i = 0; SUSPICIOUS_KEYWORDS[i] != NULL; i++) { if (query_lower.find(SUSPICIOUS_KEYWORDS[i]) != std::string::npos) { pattern_matches++; result.matched_rules.push_back(std::string("suspicious_keyword_") + std::to_string(i)); } } // Calculate risk score based on pattern matches if (pattern_matches > 0) { result.is_anomaly = true; result.risk_score = std::min(1.0f, pattern_matches * 0.3f); std::ostringstream explanation; explanation << "SQL injection patterns detected: " << pattern_matches << " matches"; result.explanation = explanation.str(); // Auto-block if high risk and auto-block enabled if (result.risk_score >= config.risk_threshold / 100.0f && config.auto_block) { result.should_block = true; } proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Anomaly: SQL injection detected in query: %s (risk: %.2f)\n", query.c_str(), result.risk_score); } } catch (const std::regex_error& e) { proxy_error("Anomaly: Regex error in injection check: %s\n", e.what()); } catch (const std::exception& e) { proxy_error("Anomaly: Error in injection check: %s\n", e.what()); } return result; } // ============================================================================ // Rate Limiting // ============================================================================ /** * @brief Check rate limiting per user/host * * Tracks the number of queries per user/host within a time window * to detect potential DoS attacks or brute force attempts. * * @param user Username * @param client_host Client IP address * @return AnomalyResult with rate limit details */ AnomalyResult Anomaly_Detector::check_rate_limiting(const std::string& user, const std::string& client_host) { AnomalyResult result; result.is_anomaly = false; result.risk_score = 0.0f; result.anomaly_type = "rate_limit"; result.should_block = false; if (!config.enabled) { return result; } // Get current time uint64_t current_time = (uint64_t)time(NULL); std::string key = user + "@" + client_host; // Get or create user stats UserStats& stats = user_statistics[key]; // Check if we're within the time window if (current_time - stats.last_query_time > USER_STATS_WINDOW) { // Window expired, reset counter stats.query_count = 0; stats.recent_queries.clear(); } // Increment query count stats.query_count++; stats.last_query_time = current_time; // Check if rate limit exceeded if (stats.query_count > (uint64_t)config.rate_limit) { result.is_anomaly = true; // Risk score increases with excess queries float excess_ratio = (float)(stats.query_count - config.rate_limit) / config.rate_limit; result.risk_score = std::min(1.0f, 0.5f + excess_ratio); std::ostringstream explanation; explanation << "Rate limit exceeded: " << stats.query_count << " queries per " << USER_STATS_WINDOW << " seconds (limit: " << config.rate_limit << ")"; result.explanation = explanation.str(); result.matched_rules.push_back("rate_limit_exceeded"); if (config.auto_block) { result.should_block = true; } proxy_warning("Anomaly: Rate limit exceeded for %s: %lu queries\n", key.c_str(), stats.query_count); } return result; } // ============================================================================ // Statistical Anomaly Detection // ============================================================================ /** * @brief Detect statistical anomalies in query behavior * * Analyzes query patterns to detect unusual behavior such as: * - Abnormally large result sets * - Unexpected execution times * - Queries affecting many rows * - Unusual query patterns for the user * * @param fp Query fingerprint * @return AnomalyResult with statistical anomaly details */ AnomalyResult Anomaly_Detector::check_statistical_anomaly(const QueryFingerprint& fp) { AnomalyResult result; result.is_anomaly = false; result.risk_score = 0.0f; result.anomaly_type = "statistical"; result.should_block = false; if (!config.enabled) { return result; } std::string key = fp.user + "@" + fp.client_host; UserStats& stats = user_statistics[key]; // Calculate some basic statistics uint64_t avg_queries = 10; // Default baseline float z_score = 0.0f; if (stats.query_count > avg_queries * 3) { // Query count is more than 3 standard deviations above mean result.is_anomaly = true; z_score = (float)(stats.query_count - avg_queries) / avg_queries; result.risk_score = std::min(1.0f, z_score / 5.0f); // Normalize std::ostringstream explanation; explanation << "Unusually high query rate: " << stats.query_count << " queries (baseline: " << avg_queries << ")"; result.explanation = explanation.str(); result.matched_rules.push_back("high_query_rate"); proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Anomaly: Statistical anomaly for %s: z-score=%.2f\n", key.c_str(), z_score); } // Check for abnormal execution time or rows affected if (fp.execution_time_ms > 5000) { // 5 seconds result.is_anomaly = true; result.risk_score = std::max(result.risk_score, 0.3f); if (!result.explanation.empty()) { result.explanation += "; "; } result.explanation += "Long execution time detected"; result.matched_rules.push_back("long_execution_time"); } if (fp.affected_rows > 10000) { result.is_anomaly = true; result.risk_score = std::max(result.risk_score, 0.2f); if (!result.explanation.empty()) { result.explanation += "; "; } result.explanation += "Large result set detected"; result.matched_rules.push_back("large_result_set"); } return result; } // ============================================================================ // Embedding-based Similarity Detection // ============================================================================ /** * @brief Check embedding-based similarity to known threats * * Compares the query embedding to embeddings of known malicious queries * stored in the vector database. This can detect novel attacks that * don't match explicit patterns. * * @param query SQL query * @param embedding Query vector embedding (if available) * @return AnomalyResult with similarity details */ AnomalyResult Anomaly_Detector::check_embedding_similarity(const std::string& query, const std::vector& embedding) { AnomalyResult result; result.is_anomaly = false; result.risk_score = 0.0f; result.anomaly_type = "embedding_similarity"; result.should_block = false; if (!config.enabled || !vector_db) { // Can't do embedding check without vector DB return result; } // If embedding not provided, generate it std::vector query_embedding = embedding; if (query_embedding.empty()) { query_embedding = get_query_embedding(query); } if (query_embedding.empty()) { return result; } // Convert embedding to JSON for sqlite-vec MATCH std::string embedding_json = "["; for (size_t i = 0; i < query_embedding.size(); i++) { if (i > 0) embedding_json += ","; embedding_json += std::to_string(query_embedding[i]); } embedding_json += "]"; // Calculate distance threshold from similarity // Similarity 0-100 -> Distance 0-2 (cosine distance: 0=similar, 2=dissimilar) float distance_threshold = 2.0f - (config.similarity_threshold / 50.0f); // Search for similar threat patterns char search[1024]; snprintf(search, sizeof(search), "SELECT p.pattern_name, p.pattern_type, p.severity, " " vec_distance_cosine(v.embedding, '%s') as distance " "FROM anomaly_patterns p " "JOIN anomaly_patterns_vec v ON p.id = v.rowid " "WHERE v.embedding MATCH '%s' " "AND distance < %f " "ORDER BY distance " "LIMIT 5", embedding_json.c_str(), embedding_json.c_str(), distance_threshold); // Execute search sqlite3* db = vector_db->get_db(); sqlite3_stmt* stmt = NULL; int rc = (*proxy_sqlite3_prepare_v2)(db, search, -1, &stmt, NULL); if (rc != SQLITE_OK) { proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Embedding search prepare failed: %s", (*proxy_sqlite3_errmsg)(db)); return result; } // Check if any threat patterns matched rc = (*proxy_sqlite3_step)(stmt); if (rc == SQLITE_ROW) { // Found similar threat pattern result.is_anomaly = true; // Extract pattern info const char* pattern_name = reinterpret_cast((*proxy_sqlite3_column_text)(stmt, 0)); const char* pattern_type = reinterpret_cast((*proxy_sqlite3_column_text)(stmt, 1)); int severity = (*proxy_sqlite3_column_int)(stmt, 2); double distance = (*proxy_sqlite3_column_double)(stmt, 3); // Calculate risk score based on severity and similarity // - Base score from severity (1-10) -> 0.1-1.0 // - Boost by similarity (lower distance = higher risk) result.risk_score = (severity / 10.0f) * (1.0f - (distance / 2.0f)); // Set anomaly type result.anomaly_type = "embedding_similarity"; // Build explanation char explanation[512]; snprintf(explanation, sizeof(explanation), "Query similar to known threat pattern '%s' (type: %s, severity: %d, distance: %.2f)", pattern_name ? pattern_name : "unknown", pattern_type ? pattern_type : "unknown", severity, distance); result.explanation = explanation; // Add matched pattern to rules if (pattern_name) { result.matched_rules.push_back(std::string("pattern:") + pattern_name); } // Determine if should block result.should_block = (result.risk_score > (config.risk_threshold / 100.0f)); proxy_info("Anomaly: Embedding similarity detected (pattern: %s, score: %.2f)\n", pattern_name ? pattern_name : "unknown", result.risk_score); } (*proxy_sqlite3_finalize)(stmt); proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Anomaly: Embedding similarity check performed\n"); return result; } /** * @brief Get vector embedding for a query * * Generates a vector representation of the query using a sentence * transformer or similar embedding model. * * Uses the GenAI module (GloGATH) for embedding generation via llama-server. * * @param query SQL query * @return Vector embedding (empty if not available) */ std::vector Anomaly_Detector::get_query_embedding(const std::string& query) { if (!GloGATH) { proxy_debug(PROXY_DEBUG_ANOMALY, 3, "GenAI handler not available for embedding"); return {}; } // Normalize query first for better embedding quality std::string normalized = normalize_query(query); // Generate embedding using GenAI GenAI_EmbeddingResult result = GloGATH->embed_documents({normalized}); if (!result.data || result.count == 0) { proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Failed to generate embedding"); return {}; } // Convert to std::vector std::vector embedding(result.data, result.data + result.embedding_size); // Free the result data (GenAI allocates with malloc) if (result.data) { free(result.data); } proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Generated embedding with %zu dimensions", embedding.size()); return embedding; } // ============================================================================ // User Statistics Management // ============================================================================ /** * @brief Update user statistics with query fingerprint * * Tracks user behavior for statistical anomaly detection. * * @param fp Query fingerprint */ void Anomaly_Detector::update_user_statistics(const QueryFingerprint& fp) { if (!config.enabled) { return; } std::string key = fp.user + "@" + fp.client_host; UserStats& stats = user_statistics[key]; // Add to recent queries stats.recent_queries.push_back(fp.query_pattern); // Keep only recent queries if (stats.recent_queries.size() > MAX_RECENT_QUERIES) { stats.recent_queries.erase(stats.recent_queries.begin()); } stats.last_query_time = fp.timestamp; stats.query_count++; // Cleanup old entries periodically static int cleanup_counter = 0; if (++cleanup_counter % 1000 == 0) { uint64_t current_time = (uint64_t)time(NULL); auto it = user_statistics.begin(); while (it != user_statistics.end()) { if (current_time - it->second.last_query_time > USER_STATS_WINDOW * 2) { it = user_statistics.erase(it); } else { ++it; } } } } // ============================================================================ // Main Analysis Method // ============================================================================ /** * @brief Main entry point for anomaly detection * * Runs the multi-stage detection pipeline: * 1. SQL Injection Pattern Detection * 2. Rate Limiting Check * 3. Statistical Anomaly Detection * 4. Embedding Similarity Check (if vector DB available) * * @param query SQL query to analyze * @param user Username * @param client_host Client IP address * @param schema Database schema name * @return AnomalyResult with combined analysis */ AnomalyResult Anomaly_Detector::analyze(const std::string& query, const std::string& user, const std::string& client_host, const std::string& schema) { AnomalyResult combined_result; combined_result.is_anomaly = false; combined_result.risk_score = 0.0f; combined_result.should_block = false; if (!config.enabled) { return combined_result; } proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Anomaly: Analyzing query from %s@%s\n", user.c_str(), client_host.c_str()); // Run all detection stages AnomalyResult injection_result = check_sql_injection(query); AnomalyResult rate_result = check_rate_limiting(user, client_host); // Build fingerprint for statistical analysis QueryFingerprint fp; fp.query_pattern = normalize_query(query); fp.user = user; fp.client_host = client_host; fp.schema = schema; fp.timestamp = (uint64_t)time(NULL); AnomalyResult stat_result = check_statistical_anomaly(fp); // Embedding similarity (optional) std::vector embedding; AnomalyResult embed_result = check_embedding_similarity(query, embedding); // Combine results combined_result.is_anomaly = injection_result.is_anomaly || rate_result.is_anomaly || stat_result.is_anomaly || embed_result.is_anomaly; // Take maximum risk score combined_result.risk_score = std::max({injection_result.risk_score, rate_result.risk_score, stat_result.risk_score, embed_result.risk_score}); // Combine explanations std::vector explanations; if (!injection_result.explanation.empty()) { explanations.push_back(injection_result.explanation); } if (!rate_result.explanation.empty()) { explanations.push_back(rate_result.explanation); } if (!stat_result.explanation.empty()) { explanations.push_back(stat_result.explanation); } if (!embed_result.explanation.empty()) { explanations.push_back(embed_result.explanation); } if (!explanations.empty()) { combined_result.explanation = explanations[0]; for (size_t i = 1; i < explanations.size(); i++) { combined_result.explanation += "; " + explanations[i]; } } // Combine matched rules combined_result.matched_rules = injection_result.matched_rules; combined_result.matched_rules.insert(combined_result.matched_rules.end(), rate_result.matched_rules.begin(), rate_result.matched_rules.end()); combined_result.matched_rules.insert(combined_result.matched_rules.end(), stat_result.matched_rules.begin(), stat_result.matched_rules.end()); combined_result.matched_rules.insert(combined_result.matched_rules.end(), embed_result.matched_rules.begin(), embed_result.matched_rules.end()); // Determine if should block combined_result.should_block = injection_result.should_block || rate_result.should_block || (combined_result.risk_score >= config.risk_threshold / 100.0f && config.auto_block); // Update user statistics update_user_statistics(fp); // Log anomaly if detected if (combined_result.is_anomaly) { if (config.log_only) { proxy_warning("Anomaly: Detected (log-only mode): %s (risk: %.2f)\n", combined_result.explanation.c_str(), combined_result.risk_score); } else if (combined_result.should_block) { proxy_error("Anomaly: BLOCKED: %s (risk: %.2f)\n", combined_result.explanation.c_str(), combined_result.risk_score); } else { proxy_warning("Anomaly: Detected: %s (risk: %.2f)\n", combined_result.explanation.c_str(), combined_result.risk_score); } } return combined_result; } // ============================================================================ // Threat Pattern Management // ============================================================================ /** * @brief Add a threat pattern to the database * * @param pattern_name Human-readable name * @param query_example Example query * @param pattern_type Type of threat (injection, flooding, etc.) * @param severity Severity level (0-100) * @return Pattern ID or -1 on error */ int Anomaly_Detector::add_threat_pattern(const std::string& pattern_name, const std::string& query_example, const std::string& pattern_type, int severity) { proxy_info("Anomaly: Adding threat pattern: %s (type: %s, severity: %d)\n", pattern_name.c_str(), pattern_type.c_str(), severity); if (!vector_db) { proxy_error("Anomaly: Cannot add pattern - no vector DB\n"); return -1; } // Generate embedding for the query example std::vector embedding = get_query_embedding(query_example); if (embedding.empty()) { proxy_error("Anomaly: Failed to generate embedding for threat pattern\n"); return -1; } // Insert into main table with embedding BLOB sqlite3* db = vector_db->get_db(); sqlite3_stmt* stmt = NULL; const char* insert = "INSERT INTO anomaly_patterns " "(pattern_name, pattern_type, query_example, embedding, severity) " "VALUES (?, ?, ?, ?, ?)"; int rc = (*proxy_sqlite3_prepare_v2)(db, insert, -1, &stmt, NULL); if (rc != SQLITE_OK) { proxy_error("Anomaly: Failed to prepare pattern insert: %s\n", (*proxy_sqlite3_errmsg)(db)); return -1; } // Bind values (*proxy_sqlite3_bind_text)(stmt, 1, pattern_name.c_str(), -1, SQLITE_TRANSIENT); (*proxy_sqlite3_bind_text)(stmt, 2, pattern_type.c_str(), -1, SQLITE_TRANSIENT); (*proxy_sqlite3_bind_text)(stmt, 3, query_example.c_str(), -1, SQLITE_TRANSIENT); (*proxy_sqlite3_bind_blob)(stmt, 4, embedding.data(), embedding.size() * sizeof(float), SQLITE_TRANSIENT); (*proxy_sqlite3_bind_int)(stmt, 5, severity); // Execute insert rc = (*proxy_sqlite3_step)(stmt); if (rc != SQLITE_DONE) { proxy_error("Anomaly: Failed to insert pattern: %s\n", (*proxy_sqlite3_errmsg)(db)); (*proxy_sqlite3_finalize)(stmt); return -1; } (*proxy_sqlite3_finalize)(stmt); // Get the inserted rowid sqlite3_int64 rowid = (*proxy_sqlite3_last_insert_rowid)(db); // Update virtual table (sqlite-vec needs explicit rowid insertion) char update_vec[256]; snprintf(update_vec, sizeof(update_vec), "INSERT INTO anomaly_patterns_vec(rowid) VALUES (%lld)", rowid); char* err = NULL; rc = (*proxy_sqlite3_exec)(db, update_vec, NULL, NULL, &err); if (rc != SQLITE_OK) { proxy_error("Anomaly: Failed to update vec table: %s\n", err ? err : "unknown"); if (err) (*proxy_sqlite3_free)(err); return -1; } proxy_info("Anomaly: Added threat pattern '%s' (id: %lld)\n", pattern_name.c_str(), rowid); return (int)rowid; } /** * @brief List all threat patterns * * @return JSON array of threat patterns */ std::string Anomaly_Detector::list_threat_patterns() { if (!vector_db) { return "[]"; } json patterns = json::array(); sqlite3* db = vector_db->get_db(); const char* query = "SELECT id, pattern_name, pattern_type, query_example, severity, created_at " "FROM anomaly_patterns ORDER BY severity DESC"; sqlite3_stmt* stmt = NULL; int rc = (*proxy_sqlite3_prepare_v2)(db, query, -1, &stmt, NULL); if (rc != SQLITE_OK) { proxy_error("Anomaly: Failed to query threat patterns: %s\n", (*proxy_sqlite3_errmsg)(db)); return "[]"; } while ((*proxy_sqlite3_step)(stmt) == SQLITE_ROW) { json pattern; pattern["id"] = (*proxy_sqlite3_column_int64)(stmt, 0); const char* name = reinterpret_cast((*proxy_sqlite3_column_text)(stmt, 1)); const char* type = reinterpret_cast((*proxy_sqlite3_column_text)(stmt, 2)); const char* example = reinterpret_cast((*proxy_sqlite3_column_text)(stmt, 3)); pattern["pattern_name"] = name ? name : ""; pattern["pattern_type"] = type ? type : ""; pattern["query_example"] = example ? example : ""; pattern["severity"] = (*proxy_sqlite3_column_int)(stmt, 4); pattern["created_at"] = (*proxy_sqlite3_column_int64)(stmt, 5); patterns.push_back(pattern); } (*proxy_sqlite3_finalize)(stmt); return patterns.dump(); } /** * @brief Remove a threat pattern * * @param pattern_id Pattern ID to remove * @return true if removed, false otherwise */ bool Anomaly_Detector::remove_threat_pattern(int pattern_id) { proxy_info("Anomaly: Removing threat pattern: %d\n", pattern_id); if (!vector_db) { proxy_error("Anomaly: Cannot remove pattern - no vector DB\n"); return false; } sqlite3* db = vector_db->get_db(); // First, remove from virtual table char del_vec[256]; snprintf(del_vec, sizeof(del_vec), "DELETE FROM anomaly_patterns_vec WHERE rowid = %d", pattern_id); char* err = NULL; int rc = (*proxy_sqlite3_exec)(db, del_vec, NULL, NULL, &err); if (rc != SQLITE_OK) { proxy_error("Anomaly: Failed to delete from vec table: %s\n", err ? err : "unknown"); if (err) (*proxy_sqlite3_free)(err); return false; } // Then, remove from main table snprintf(del_vec, sizeof(del_vec), "DELETE FROM anomaly_patterns WHERE id = %d", pattern_id); rc = (*proxy_sqlite3_exec)(db, del_vec, NULL, NULL, &err); if (rc != SQLITE_OK) { proxy_error("Anomaly: Failed to delete pattern: %s\n", err ? err : "unknown"); if (err) (*proxy_sqlite3_free)(err); return false; } proxy_info("Anomaly: Removed threat pattern %d\n", pattern_id); return true; } // ============================================================================ // Statistics and Monitoring // ============================================================================ /** * @brief Get anomaly detection statistics * * @return JSON string with statistics */ std::string Anomaly_Detector::get_statistics() { json stats; stats["users_tracked"] = user_statistics.size(); stats["config"] = { {"enabled", config.enabled}, {"risk_threshold", config.risk_threshold}, {"similarity_threshold", config.similarity_threshold}, {"rate_limit", config.rate_limit}, {"auto_block", config.auto_block}, {"log_only", config.log_only} }; // Count total queries uint64_t total_queries = 0; for (const auto& entry : user_statistics) { total_queries += entry.second.query_count; } stats["total_queries_tracked"] = total_queries; // Count threat patterns if (vector_db) { sqlite3* db = vector_db->get_db(); const char* count_query = "SELECT COUNT(*) FROM anomaly_patterns"; sqlite3_stmt* stmt = NULL; int rc = (*proxy_sqlite3_prepare_v2)(db, count_query, -1, &stmt, NULL); if (rc == SQLITE_OK) { rc = (*proxy_sqlite3_step)(stmt); if (rc == SQLITE_ROW) { stats["threat_patterns_count"] = (*proxy_sqlite3_column_int)(stmt, 0); } (*proxy_sqlite3_finalize)(stmt); } // Count by pattern type const char* type_query = "SELECT pattern_type, COUNT(*) FROM anomaly_patterns GROUP BY pattern_type"; rc = (*proxy_sqlite3_prepare_v2)(db, type_query, -1, &stmt, NULL); if (rc == SQLITE_OK) { json by_type = json::object(); while ((*proxy_sqlite3_step)(stmt) == SQLITE_ROW) { const char* type = reinterpret_cast((*proxy_sqlite3_column_text)(stmt, 0)); int count = (*proxy_sqlite3_column_int)(stmt, 1); if (type) { by_type[type] = count; } } (*proxy_sqlite3_finalize)(stmt); stats["threat_patterns_by_type"] = by_type; } } return stats.dump(); } /** * @brief Clear all user statistics */ void Anomaly_Detector::clear_user_statistics() { size_t count = user_statistics.size(); user_statistics.clear(); proxy_info("Anomaly: Cleared statistics for %zu users\n", count); } #endif /* PROXYSQLGENAI */