You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
proxysql/lib/Anomaly_Detector.cpp

958 lines
31 KiB

#ifdef PROXYSQLGENAI
/**
* @file Anomaly_Detector.cpp
* @brief Implementation of Real-time Anomaly Detection for ProxySQL
*
* Implements multi-stage anomaly detection pipeline:
* 1. SQL Injection Pattern Detection
* 2. Query Normalization and Pattern Matching
* 3. Rate Limiting per User/Host
* 4. Statistical Outlier Detection
* 5. Embedding-based Threat Similarity
*
* @see Anomaly_Detector.h
*/
#include "Anomaly_Detector.h"
#include "sqlite3db.h"
#include "proxysql_utils.h"
#include "GenAI_Thread.h"
#include "cpp.h"
#include <cstring>
#include <cstdlib>
#include <sstream>
#include <algorithm>
#include <regex>
#include <ctime>
#include <cmath>
// JSON library
#include "../deps/json/json.hpp"
using json = nlohmann::json;
#define PROXYJSON
// Global GenAI handler for embedding generation
extern GenAI_Threads_Handler *GloGATH;
// ============================================================================
// Constants
// ============================================================================
// SQL Injection Patterns (regex-based)
static const char* SQL_INJECTION_PATTERNS[] = {
"('|\").*?('|\")", // Quote sequences
"\\bor\\b.*=.*\\bor\\b", // OR 1=1
"\\band\\b.*=.*\\band\\b", // AND 1=1
"union.*select", // UNION SELECT
"drop.*table", // DROP TABLE
"exec.*xp_", // SQL Server exec
";.*--", // Comment injection
"/\\*.*\\*/", // Block comments
"concat\\(", // CONCAT based attacks
"char\\(", // CHAR based attacks
"0x[0-9a-f]+", // Hex encoded
NULL
};
// Suspicious Keywords
static const char* SUSPICIOUS_KEYWORDS[] = {
"sleep(", "waitfor delay", "benchmark(", "pg_sleep",
"load_file", "into outfile", "dumpfile",
"script>", "javascript:", "onerror=", "onload=",
NULL
};
// Thresholds
#define DEFAULT_RATE_LIMIT 100 // queries per minute
#define DEFAULT_RISK_THRESHOLD 70 // 0-100
#define DEFAULT_SIMILARITY_THRESHOLD 85 // 0-100
#define USER_STATS_WINDOW 3600 // 1 hour in seconds
#define MAX_RECENT_QUERIES 100
// ============================================================================
// Constructor/Destructor
// ============================================================================
Anomaly_Detector::Anomaly_Detector() : vector_db(NULL) {
config.enabled = true;
config.risk_threshold = DEFAULT_RISK_THRESHOLD;
config.similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;
config.rate_limit = DEFAULT_RATE_LIMIT;
config.auto_block = true;
config.log_only = false;
}
Anomaly_Detector::~Anomaly_Detector() {
close();
}
// ============================================================================
// Initialization
// ============================================================================
/**
* @brief Initialize the anomaly detector
*
* Sets up the vector database connection and loads any
* pre-configured threat patterns from storage.
*/
int Anomaly_Detector::init() {
proxy_info("Anomaly: Initializing Anomaly Detector v%s\n", ANOMALY_DETECTOR_VERSION);
// Vector DB will be provided by AI_Features_Manager
// For now, we'll work without it for basic pattern detection
proxy_info("Anomaly: Anomaly Detector initialized with %zu injection patterns\n",
sizeof(SQL_INJECTION_PATTERNS) / sizeof(SQL_INJECTION_PATTERNS[0]) - 1);
return 0;
}
/**
* @brief Close and cleanup resources
*/
void Anomaly_Detector::close() {
// Clear user statistics
clear_user_statistics();
proxy_info("Anomaly: Anomaly Detector closed\n");
}
// ============================================================================
// Query Normalization
// ============================================================================
/**
* @brief Normalize SQL query for pattern matching
*
* Normalization steps:
* 1. Convert to lowercase
* 2. Remove extra whitespace
* 3. Replace string literals with placeholders
* 4. Replace numeric literals with placeholders
* 5. Remove comments
*
* @param query Original SQL query
* @return Normalized query pattern
*/
std::string Anomaly_Detector::normalize_query(const std::string& query) {
std::string normalized = query;
// Convert to lowercase
std::transform(normalized.begin(), normalized.end(), normalized.begin(), ::tolower);
// Remove SQL comments
std::regex comment_regex("--.*?$|/\\*.*?\\*/", std::regex::multiline);
normalized = std::regex_replace(normalized, comment_regex, "");
// Replace string literals with placeholder
std::regex string_regex("'[^']*'|\"[^\"]*\"");
normalized = std::regex_replace(normalized, string_regex, "?");
// Replace numeric literals with placeholder
std::regex numeric_regex("\\b\\d+\\b");
normalized = std::regex_replace(normalized, numeric_regex, "N");
// Normalize whitespace
std::regex whitespace_regex("\\s+");
normalized = std::regex_replace(normalized, whitespace_regex, " ");
// Trim leading/trailing whitespace
normalized.erase(0, normalized.find_first_not_of(" \t\n\r"));
normalized.erase(normalized.find_last_not_of(" \t\n\r") + 1);
return normalized;
}
// ============================================================================
// SQL Injection Detection
// ============================================================================
/**
* @brief Check for SQL injection patterns
*
* Uses regex-based pattern matching to detect common SQL injection
* attack vectors including:
* - Tautologies (OR 1=1)
* - Union-based injection
* - Comment-based injection
* - Stacked queries
* - String/character encoding attacks
*
* @param query SQL query to check
* @return AnomalyResult with injection details
*/
AnomalyResult Anomaly_Detector::check_sql_injection(const std::string& query) {
AnomalyResult result;
result.is_anomaly = false;
result.risk_score = 0.0f;
result.anomaly_type = "sql_injection";
result.should_block = false;
try {
std::string query_lower = query;
std::transform(query_lower.begin(), query_lower.end(), query_lower.begin(), ::tolower);
// Check each injection pattern
int pattern_matches = 0;
for (int i = 0; SQL_INJECTION_PATTERNS[i] != NULL; i++) {
std::regex pattern(SQL_INJECTION_PATTERNS[i], std::regex::icase);
if (std::regex_search(query, pattern)) {
pattern_matches++;
result.matched_rules.push_back(std::string("injection_pattern_") + std::to_string(i));
}
}
// Check suspicious keywords
for (int i = 0; SUSPICIOUS_KEYWORDS[i] != NULL; i++) {
if (query_lower.find(SUSPICIOUS_KEYWORDS[i]) != std::string::npos) {
pattern_matches++;
result.matched_rules.push_back(std::string("suspicious_keyword_") + std::to_string(i));
}
}
// Calculate risk score based on pattern matches
if (pattern_matches > 0) {
result.is_anomaly = true;
result.risk_score = std::min(1.0f, pattern_matches * 0.3f);
std::ostringstream explanation;
explanation << "SQL injection patterns detected: " << pattern_matches << " matches";
result.explanation = explanation.str();
// Auto-block if high risk and auto-block enabled
if (result.risk_score >= config.risk_threshold / 100.0f && config.auto_block) {
result.should_block = true;
}
proxy_debug(PROXY_DEBUG_ANOMALY, 3,
"Anomaly: SQL injection detected in query: %s (risk: %.2f)\n",
query.c_str(), result.risk_score);
}
} catch (const std::regex_error& e) {
proxy_error("Anomaly: Regex error in injection check: %s\n", e.what());
} catch (const std::exception& e) {
proxy_error("Anomaly: Error in injection check: %s\n", e.what());
}
return result;
}
// ============================================================================
// Rate Limiting
// ============================================================================
/**
* @brief Check rate limiting per user/host
*
* Tracks the number of queries per user/host within a time window
* to detect potential DoS attacks or brute force attempts.
*
* @param user Username
* @param client_host Client IP address
* @return AnomalyResult with rate limit details
*/
AnomalyResult Anomaly_Detector::check_rate_limiting(const std::string& user,
const std::string& client_host) {
AnomalyResult result;
result.is_anomaly = false;
result.risk_score = 0.0f;
result.anomaly_type = "rate_limit";
result.should_block = false;
if (!config.enabled) {
return result;
}
// Get current time
uint64_t current_time = (uint64_t)time(NULL);
std::string key = user + "@" + client_host;
// Get or create user stats
UserStats& stats = user_statistics[key];
// Check if we're within the time window
if (current_time - stats.last_query_time > USER_STATS_WINDOW) {
// Window expired, reset counter
stats.query_count = 0;
stats.recent_queries.clear();
}
// Increment query count
stats.query_count++;
stats.last_query_time = current_time;
// Check if rate limit exceeded
if (stats.query_count > (uint64_t)config.rate_limit) {
result.is_anomaly = true;
// Risk score increases with excess queries
float excess_ratio = (float)(stats.query_count - config.rate_limit) / config.rate_limit;
result.risk_score = std::min(1.0f, 0.5f + excess_ratio);
std::ostringstream explanation;
explanation << "Rate limit exceeded: " << stats.query_count
<< " queries per " << USER_STATS_WINDOW << " seconds (limit: "
<< config.rate_limit << ")";
result.explanation = explanation.str();
result.matched_rules.push_back("rate_limit_exceeded");
if (config.auto_block) {
result.should_block = true;
}
proxy_warning("Anomaly: Rate limit exceeded for %s: %lu queries\n",
key.c_str(), stats.query_count);
}
return result;
}
// ============================================================================
// Statistical Anomaly Detection
// ============================================================================
/**
* @brief Detect statistical anomalies in query behavior
*
* Analyzes query patterns to detect unusual behavior such as:
* - Abnormally large result sets
* - Unexpected execution times
* - Queries affecting many rows
* - Unusual query patterns for the user
*
* @param fp Query fingerprint
* @return AnomalyResult with statistical anomaly details
*/
AnomalyResult Anomaly_Detector::check_statistical_anomaly(const QueryFingerprint& fp) {
AnomalyResult result;
result.is_anomaly = false;
result.risk_score = 0.0f;
result.anomaly_type = "statistical";
result.should_block = false;
if (!config.enabled) {
return result;
}
std::string key = fp.user + "@" + fp.client_host;
UserStats& stats = user_statistics[key];
// Calculate some basic statistics
uint64_t avg_queries = 10; // Default baseline
float z_score = 0.0f;
if (stats.query_count > avg_queries * 3) {
// Query count is more than 3 standard deviations above mean
result.is_anomaly = true;
z_score = (float)(stats.query_count - avg_queries) / avg_queries;
result.risk_score = std::min(1.0f, z_score / 5.0f); // Normalize
std::ostringstream explanation;
explanation << "Unusually high query rate: " << stats.query_count
<< " queries (baseline: " << avg_queries << ")";
result.explanation = explanation.str();
result.matched_rules.push_back("high_query_rate");
proxy_debug(PROXY_DEBUG_ANOMALY, 3,
"Anomaly: Statistical anomaly for %s: z-score=%.2f\n",
key.c_str(), z_score);
}
// Check for abnormal execution time or rows affected
if (fp.execution_time_ms > 5000) { // 5 seconds
result.is_anomaly = true;
result.risk_score = std::max(result.risk_score, 0.3f);
if (!result.explanation.empty()) {
result.explanation += "; ";
}
result.explanation += "Long execution time detected";
result.matched_rules.push_back("long_execution_time");
}
if (fp.affected_rows > 10000) {
result.is_anomaly = true;
result.risk_score = std::max(result.risk_score, 0.2f);
if (!result.explanation.empty()) {
result.explanation += "; ";
}
result.explanation += "Large result set detected";
result.matched_rules.push_back("large_result_set");
}
return result;
}
// ============================================================================
// Embedding-based Similarity Detection
// ============================================================================
/**
* @brief Check embedding-based similarity to known threats
*
* Compares the query embedding to embeddings of known malicious queries
* stored in the vector database. This can detect novel attacks that
* don't match explicit patterns.
*
* @param query SQL query
* @param embedding Query vector embedding (if available)
* @return AnomalyResult with similarity details
*/
AnomalyResult Anomaly_Detector::check_embedding_similarity(const std::string& query,
const std::vector<float>& embedding) {
AnomalyResult result;
result.is_anomaly = false;
result.risk_score = 0.0f;
result.anomaly_type = "embedding_similarity";
result.should_block = false;
if (!config.enabled || !vector_db) {
// Can't do embedding check without vector DB
return result;
}
// If embedding not provided, generate it
std::vector<float> query_embedding = embedding;
if (query_embedding.empty()) {
query_embedding = get_query_embedding(query);
}
if (query_embedding.empty()) {
return result;
}
// Convert embedding to JSON for sqlite-vec MATCH
std::string embedding_json = "[";
for (size_t i = 0; i < query_embedding.size(); i++) {
if (i > 0) embedding_json += ",";
embedding_json += std::to_string(query_embedding[i]);
}
embedding_json += "]";
// Calculate distance threshold from similarity
// Similarity 0-100 -> Distance 0-2 (cosine distance: 0=similar, 2=dissimilar)
float distance_threshold = 2.0f - (config.similarity_threshold / 50.0f);
// Search for similar threat patterns
char search[1024];
snprintf(search, sizeof(search),
"SELECT p.pattern_name, p.pattern_type, p.severity, "
" vec_distance_cosine(v.embedding, '%s') as distance "
"FROM anomaly_patterns p "
"JOIN anomaly_patterns_vec v ON p.id = v.rowid "
"WHERE v.embedding MATCH '%s' "
"AND distance < %f "
"ORDER BY distance "
"LIMIT 5",
embedding_json.c_str(), embedding_json.c_str(), distance_threshold);
// Execute search
sqlite3* db = vector_db->get_db();
sqlite3_stmt* stmt = NULL;
int rc = (*proxy_sqlite3_prepare_v2)(db, search, -1, &stmt, NULL);
if (rc != SQLITE_OK) {
proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Embedding search prepare failed: %s", (*proxy_sqlite3_errmsg)(db));
return result;
}
// Check if any threat patterns matched
rc = (*proxy_sqlite3_step)(stmt);
if (rc == SQLITE_ROW) {
// Found similar threat pattern
result.is_anomaly = true;
// Extract pattern info
const char* pattern_name = reinterpret_cast<const char*>((*proxy_sqlite3_column_text)(stmt, 0));
const char* pattern_type = reinterpret_cast<const char*>((*proxy_sqlite3_column_text)(stmt, 1));
int severity = (*proxy_sqlite3_column_int)(stmt, 2);
double distance = (*proxy_sqlite3_column_double)(stmt, 3);
// Calculate risk score based on severity and similarity
// - Base score from severity (1-10) -> 0.1-1.0
// - Boost by similarity (lower distance = higher risk)
result.risk_score = (severity / 10.0f) * (1.0f - (distance / 2.0f));
// Set anomaly type
result.anomaly_type = "embedding_similarity";
// Build explanation
char explanation[512];
snprintf(explanation, sizeof(explanation),
"Query similar to known threat pattern '%s' (type: %s, severity: %d, distance: %.2f)",
pattern_name ? pattern_name : "unknown",
pattern_type ? pattern_type : "unknown",
severity, distance);
result.explanation = explanation;
// Add matched pattern to rules
if (pattern_name) {
result.matched_rules.push_back(std::string("pattern:") + pattern_name);
}
// Determine if should block
result.should_block = (result.risk_score > (config.risk_threshold / 100.0f));
proxy_info("Anomaly: Embedding similarity detected (pattern: %s, score: %.2f)\n",
pattern_name ? pattern_name : "unknown", result.risk_score);
}
(*proxy_sqlite3_finalize)(stmt);
proxy_debug(PROXY_DEBUG_ANOMALY, 3,
"Anomaly: Embedding similarity check performed\n");
return result;
}
/**
* @brief Get vector embedding for a query
*
* Generates a vector representation of the query using a sentence
* transformer or similar embedding model.
*
* Uses the GenAI module (GloGATH) for embedding generation via llama-server.
*
* @param query SQL query
* @return Vector embedding (empty if not available)
*/
std::vector<float> Anomaly_Detector::get_query_embedding(const std::string& query) {
if (!GloGATH) {
proxy_debug(PROXY_DEBUG_ANOMALY, 3, "GenAI handler not available for embedding");
return {};
}
// Normalize query first for better embedding quality
std::string normalized = normalize_query(query);
// Generate embedding using GenAI
GenAI_EmbeddingResult result = GloGATH->embed_documents({normalized});
if (!result.data || result.count == 0) {
proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Failed to generate embedding");
return {};
}
// Convert to std::vector<float>
std::vector<float> embedding(result.data, result.data + result.embedding_size);
// Free the result data (GenAI allocates with malloc)
if (result.data) {
free(result.data);
}
proxy_debug(PROXY_DEBUG_ANOMALY, 3, "Generated embedding with %zu dimensions", embedding.size());
return embedding;
}
// ============================================================================
// User Statistics Management
// ============================================================================
/**
* @brief Update user statistics with query fingerprint
*
* Tracks user behavior for statistical anomaly detection.
*
* @param fp Query fingerprint
*/
void Anomaly_Detector::update_user_statistics(const QueryFingerprint& fp) {
if (!config.enabled) {
return;
}
std::string key = fp.user + "@" + fp.client_host;
UserStats& stats = user_statistics[key];
// Add to recent queries
stats.recent_queries.push_back(fp.query_pattern);
// Keep only recent queries
if (stats.recent_queries.size() > MAX_RECENT_QUERIES) {
stats.recent_queries.erase(stats.recent_queries.begin());
}
stats.last_query_time = fp.timestamp;
stats.query_count++;
// Cleanup old entries periodically
static int cleanup_counter = 0;
if (++cleanup_counter % 1000 == 0) {
uint64_t current_time = (uint64_t)time(NULL);
auto it = user_statistics.begin();
while (it != user_statistics.end()) {
if (current_time - it->second.last_query_time > USER_STATS_WINDOW * 2) {
it = user_statistics.erase(it);
} else {
++it;
}
}
}
}
// ============================================================================
// Main Analysis Method
// ============================================================================
/**
* @brief Main entry point for anomaly detection
*
* Runs the multi-stage detection pipeline:
* 1. SQL Injection Pattern Detection
* 2. Rate Limiting Check
* 3. Statistical Anomaly Detection
* 4. Embedding Similarity Check (if vector DB available)
*
* @param query SQL query to analyze
* @param user Username
* @param client_host Client IP address
* @param schema Database schema name
* @return AnomalyResult with combined analysis
*/
AnomalyResult Anomaly_Detector::analyze(const std::string& query, const std::string& user,
const std::string& client_host, const std::string& schema) {
AnomalyResult combined_result;
combined_result.is_anomaly = false;
combined_result.risk_score = 0.0f;
combined_result.should_block = false;
if (!config.enabled) {
return combined_result;
}
proxy_debug(PROXY_DEBUG_ANOMALY, 3,
"Anomaly: Analyzing query from %s@%s\n",
user.c_str(), client_host.c_str());
// Run all detection stages
AnomalyResult injection_result = check_sql_injection(query);
AnomalyResult rate_result = check_rate_limiting(user, client_host);
// Build fingerprint for statistical analysis
QueryFingerprint fp;
fp.query_pattern = normalize_query(query);
fp.user = user;
fp.client_host = client_host;
fp.schema = schema;
fp.timestamp = (uint64_t)time(NULL);
AnomalyResult stat_result = check_statistical_anomaly(fp);
// Embedding similarity (optional)
std::vector<float> embedding;
AnomalyResult embed_result = check_embedding_similarity(query, embedding);
// Combine results
combined_result.is_anomaly = injection_result.is_anomaly ||
rate_result.is_anomaly ||
stat_result.is_anomaly ||
embed_result.is_anomaly;
// Take maximum risk score
combined_result.risk_score = std::max({injection_result.risk_score,
rate_result.risk_score,
stat_result.risk_score,
embed_result.risk_score});
// Combine explanations
std::vector<std::string> explanations;
if (!injection_result.explanation.empty()) {
explanations.push_back(injection_result.explanation);
}
if (!rate_result.explanation.empty()) {
explanations.push_back(rate_result.explanation);
}
if (!stat_result.explanation.empty()) {
explanations.push_back(stat_result.explanation);
}
if (!embed_result.explanation.empty()) {
explanations.push_back(embed_result.explanation);
}
if (!explanations.empty()) {
combined_result.explanation = explanations[0];
for (size_t i = 1; i < explanations.size(); i++) {
combined_result.explanation += "; " + explanations[i];
}
}
// Combine matched rules
combined_result.matched_rules = injection_result.matched_rules;
combined_result.matched_rules.insert(combined_result.matched_rules.end(),
rate_result.matched_rules.begin(),
rate_result.matched_rules.end());
combined_result.matched_rules.insert(combined_result.matched_rules.end(),
stat_result.matched_rules.begin(),
stat_result.matched_rules.end());
combined_result.matched_rules.insert(combined_result.matched_rules.end(),
embed_result.matched_rules.begin(),
embed_result.matched_rules.end());
// Determine if should block
combined_result.should_block = injection_result.should_block ||
rate_result.should_block ||
(combined_result.risk_score >= config.risk_threshold / 100.0f && config.auto_block);
// Update user statistics
update_user_statistics(fp);
// Log anomaly if detected
if (combined_result.is_anomaly) {
if (config.log_only) {
proxy_warning("Anomaly: Detected (log-only mode): %s (risk: %.2f)\n",
combined_result.explanation.c_str(), combined_result.risk_score);
} else if (combined_result.should_block) {
proxy_error("Anomaly: BLOCKED: %s (risk: %.2f)\n",
combined_result.explanation.c_str(), combined_result.risk_score);
} else {
proxy_warning("Anomaly: Detected: %s (risk: %.2f)\n",
combined_result.explanation.c_str(), combined_result.risk_score);
}
}
return combined_result;
}
// ============================================================================
// Threat Pattern Management
// ============================================================================
/**
* @brief Add a threat pattern to the database
*
* @param pattern_name Human-readable name
* @param query_example Example query
* @param pattern_type Type of threat (injection, flooding, etc.)
* @param severity Severity level (0-100)
* @return Pattern ID or -1 on error
*/
int Anomaly_Detector::add_threat_pattern(const std::string& pattern_name,
const std::string& query_example,
const std::string& pattern_type,
int severity) {
proxy_info("Anomaly: Adding threat pattern: %s (type: %s, severity: %d)\n",
pattern_name.c_str(), pattern_type.c_str(), severity);
if (!vector_db) {
proxy_error("Anomaly: Cannot add pattern - no vector DB\n");
return -1;
}
// Generate embedding for the query example
std::vector<float> embedding = get_query_embedding(query_example);
if (embedding.empty()) {
proxy_error("Anomaly: Failed to generate embedding for threat pattern\n");
return -1;
}
// Insert into main table with embedding BLOB
sqlite3* db = vector_db->get_db();
sqlite3_stmt* stmt = NULL;
const char* insert = "INSERT INTO anomaly_patterns "
"(pattern_name, pattern_type, query_example, embedding, severity) "
"VALUES (?, ?, ?, ?, ?)";
int rc = (*proxy_sqlite3_prepare_v2)(db, insert, -1, &stmt, NULL);
if (rc != SQLITE_OK) {
proxy_error("Anomaly: Failed to prepare pattern insert: %s\n", (*proxy_sqlite3_errmsg)(db));
return -1;
}
// Bind values
(*proxy_sqlite3_bind_text)(stmt, 1, pattern_name.c_str(), -1, SQLITE_TRANSIENT);
(*proxy_sqlite3_bind_text)(stmt, 2, pattern_type.c_str(), -1, SQLITE_TRANSIENT);
(*proxy_sqlite3_bind_text)(stmt, 3, query_example.c_str(), -1, SQLITE_TRANSIENT);
(*proxy_sqlite3_bind_blob)(stmt, 4, embedding.data(), embedding.size() * sizeof(float), SQLITE_TRANSIENT);
(*proxy_sqlite3_bind_int)(stmt, 5, severity);
// Execute insert
rc = (*proxy_sqlite3_step)(stmt);
if (rc != SQLITE_DONE) {
proxy_error("Anomaly: Failed to insert pattern: %s\n", (*proxy_sqlite3_errmsg)(db));
(*proxy_sqlite3_finalize)(stmt);
return -1;
}
(*proxy_sqlite3_finalize)(stmt);
// Get the inserted rowid
sqlite3_int64 rowid = (*proxy_sqlite3_last_insert_rowid)(db);
// Update virtual table (sqlite-vec needs explicit rowid insertion)
char update_vec[256];
snprintf(update_vec, sizeof(update_vec),
"INSERT INTO anomaly_patterns_vec(rowid) VALUES (%lld)", rowid);
char* err = NULL;
rc = (*proxy_sqlite3_exec)(db, update_vec, NULL, NULL, &err);
if (rc != SQLITE_OK) {
proxy_error("Anomaly: Failed to update vec table: %s\n", err ? err : "unknown");
if (err) (*proxy_sqlite3_free)(err);
return -1;
}
proxy_info("Anomaly: Added threat pattern '%s' (id: %lld)\n", pattern_name.c_str(), rowid);
return (int)rowid;
}
/**
* @brief List all threat patterns
*
* @return JSON array of threat patterns
*/
std::string Anomaly_Detector::list_threat_patterns() {
if (!vector_db) {
return "[]";
}
json patterns = json::array();
sqlite3* db = vector_db->get_db();
const char* query = "SELECT id, pattern_name, pattern_type, query_example, severity, created_at "
"FROM anomaly_patterns ORDER BY severity DESC";
sqlite3_stmt* stmt = NULL;
int rc = (*proxy_sqlite3_prepare_v2)(db, query, -1, &stmt, NULL);
if (rc != SQLITE_OK) {
proxy_error("Anomaly: Failed to query threat patterns: %s\n", (*proxy_sqlite3_errmsg)(db));
return "[]";
}
while ((*proxy_sqlite3_step)(stmt) == SQLITE_ROW) {
json pattern;
pattern["id"] = (*proxy_sqlite3_column_int64)(stmt, 0);
const char* name = reinterpret_cast<const char*>((*proxy_sqlite3_column_text)(stmt, 1));
const char* type = reinterpret_cast<const char*>((*proxy_sqlite3_column_text)(stmt, 2));
const char* example = reinterpret_cast<const char*>((*proxy_sqlite3_column_text)(stmt, 3));
pattern["pattern_name"] = name ? name : "";
pattern["pattern_type"] = type ? type : "";
pattern["query_example"] = example ? example : "";
pattern["severity"] = (*proxy_sqlite3_column_int)(stmt, 4);
pattern["created_at"] = (*proxy_sqlite3_column_int64)(stmt, 5);
patterns.push_back(pattern);
}
(*proxy_sqlite3_finalize)(stmt);
return patterns.dump();
}
/**
* @brief Remove a threat pattern
*
* @param pattern_id Pattern ID to remove
* @return true if removed, false otherwise
*/
bool Anomaly_Detector::remove_threat_pattern(int pattern_id) {
proxy_info("Anomaly: Removing threat pattern: %d\n", pattern_id);
if (!vector_db) {
proxy_error("Anomaly: Cannot remove pattern - no vector DB\n");
return false;
}
sqlite3* db = vector_db->get_db();
// First, remove from virtual table
char del_vec[256];
snprintf(del_vec, sizeof(del_vec), "DELETE FROM anomaly_patterns_vec WHERE rowid = %d", pattern_id);
char* err = NULL;
int rc = (*proxy_sqlite3_exec)(db, del_vec, NULL, NULL, &err);
if (rc != SQLITE_OK) {
proxy_error("Anomaly: Failed to delete from vec table: %s\n", err ? err : "unknown");
if (err) (*proxy_sqlite3_free)(err);
return false;
}
// Then, remove from main table
snprintf(del_vec, sizeof(del_vec), "DELETE FROM anomaly_patterns WHERE id = %d", pattern_id);
rc = (*proxy_sqlite3_exec)(db, del_vec, NULL, NULL, &err);
if (rc != SQLITE_OK) {
proxy_error("Anomaly: Failed to delete pattern: %s\n", err ? err : "unknown");
if (err) (*proxy_sqlite3_free)(err);
return false;
}
proxy_info("Anomaly: Removed threat pattern %d\n", pattern_id);
return true;
}
// ============================================================================
// Statistics and Monitoring
// ============================================================================
/**
* @brief Get anomaly detection statistics
*
* @return JSON string with statistics
*/
std::string Anomaly_Detector::get_statistics() {
json stats;
stats["users_tracked"] = user_statistics.size();
stats["config"] = {
{"enabled", config.enabled},
{"risk_threshold", config.risk_threshold},
{"similarity_threshold", config.similarity_threshold},
{"rate_limit", config.rate_limit},
{"auto_block", config.auto_block},
{"log_only", config.log_only}
};
// Count total queries
uint64_t total_queries = 0;
for (const auto& entry : user_statistics) {
total_queries += entry.second.query_count;
}
stats["total_queries_tracked"] = total_queries;
// Count threat patterns
if (vector_db) {
sqlite3* db = vector_db->get_db();
const char* count_query = "SELECT COUNT(*) FROM anomaly_patterns";
sqlite3_stmt* stmt = NULL;
int rc = (*proxy_sqlite3_prepare_v2)(db, count_query, -1, &stmt, NULL);
if (rc == SQLITE_OK) {
rc = (*proxy_sqlite3_step)(stmt);
if (rc == SQLITE_ROW) {
stats["threat_patterns_count"] = (*proxy_sqlite3_column_int)(stmt, 0);
}
(*proxy_sqlite3_finalize)(stmt);
}
// Count by pattern type
const char* type_query = "SELECT pattern_type, COUNT(*) FROM anomaly_patterns GROUP BY pattern_type";
rc = (*proxy_sqlite3_prepare_v2)(db, type_query, -1, &stmt, NULL);
if (rc == SQLITE_OK) {
json by_type = json::object();
while ((*proxy_sqlite3_step)(stmt) == SQLITE_ROW) {
const char* type = reinterpret_cast<const char*>((*proxy_sqlite3_column_text)(stmt, 0));
int count = (*proxy_sqlite3_column_int)(stmt, 1);
if (type) {
by_type[type] = count;
}
}
(*proxy_sqlite3_finalize)(stmt);
stats["threat_patterns_by_type"] = by_type;
}
}
return stats.dump();
}
/**
* @brief Clear all user statistics
*/
void Anomaly_Detector::clear_user_statistics() {
size_t count = user_statistics.size();
user_statistics.clear();
proxy_info("Anomaly: Cleared statistics for %zu users\n", count);
}
#endif /* PROXYSQLGENAI */