mirror of https://github.com/sysown/proxysql
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
914 lines
24 KiB
914 lines
24 KiB
#ifndef CLASS_DISCOVERY_SCHEMA_H
|
|
#define CLASS_DISCOVERY_SCHEMA_H
|
|
|
|
#ifdef PROXYSQLGENAI
|
|
|
|
#include "sqlite3db.h"
|
|
#include <string>
|
|
#include <vector>
|
|
#include <memory>
|
|
#include <pthread.h>
|
|
#include <unordered_map>
|
|
#include "json.hpp"
|
|
|
|
/**
|
|
* @brief MCP query rule structure
|
|
*
|
|
* Action is inferred from rule properties:
|
|
* - if error_msg != NULL → block
|
|
* - if replace_pattern != NULL → rewrite
|
|
* - if timeout_ms > 0 → timeout
|
|
* - otherwise → allow
|
|
*
|
|
* Note: 'hits' is only for in-memory tracking, not persisted to the table.
|
|
*/
|
|
struct MCP_Query_Rule {
|
|
int rule_id;
|
|
bool active;
|
|
char *username;
|
|
char *target_id;
|
|
char *schemaname;
|
|
char *tool_name;
|
|
char *match_pattern;
|
|
bool negate_match_pattern;
|
|
int re_modifiers; // bitmask: 1=CASELESS
|
|
int flagIN;
|
|
int flagOUT;
|
|
char *replace_pattern;
|
|
int timeout_ms;
|
|
char *error_msg;
|
|
char *ok_msg;
|
|
bool log;
|
|
bool apply;
|
|
char *comment;
|
|
uint64_t hits; // in-memory only, not persisted to table
|
|
void* regex_engine; // compiled regex (RE2)
|
|
|
|
MCP_Query_Rule() : rule_id(0), active(false), username(NULL), target_id(NULL), schemaname(NULL),
|
|
tool_name(NULL), match_pattern(NULL), negate_match_pattern(false),
|
|
re_modifiers(1), flagIN(0), flagOUT(0), replace_pattern(NULL),
|
|
timeout_ms(0), error_msg(NULL), ok_msg(NULL), log(false), apply(true),
|
|
comment(NULL), hits(0), regex_engine(NULL) {}
|
|
};
|
|
|
|
/**
|
|
* @brief MCP query digest statistics
|
|
*/
|
|
struct MCP_Query_Digest_Stats {
|
|
std::string tool_name;
|
|
int run_id;
|
|
uint64_t digest;
|
|
std::string digest_text;
|
|
unsigned int count_star;
|
|
time_t first_seen;
|
|
time_t last_seen;
|
|
unsigned long long sum_time;
|
|
unsigned long long min_time;
|
|
unsigned long long max_time;
|
|
|
|
MCP_Query_Digest_Stats() : run_id(-1), digest(0), count_star(0),
|
|
first_seen(0), last_seen(0),
|
|
sum_time(0), min_time(0), max_time(0) {}
|
|
|
|
void add_timing(unsigned long long duration_us, time_t timestamp) {
|
|
count_star++;
|
|
sum_time += duration_us;
|
|
if (duration_us < min_time || min_time == 0) min_time = duration_us;
|
|
if (duration_us > max_time) max_time = duration_us;
|
|
if (first_seen == 0) first_seen = timestamp;
|
|
last_seen = timestamp;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief MCP query processor output
|
|
*
|
|
* This structure collects all possible actions from matching MCP query rules.
|
|
* A single rule can perform multiple actions simultaneously (rewrite + timeout + block).
|
|
* Actions are inferred from rule properties:
|
|
* - if error_msg != NULL → block
|
|
* - if replace_pattern != NULL → rewrite
|
|
* - if timeout_ms > 0 → timeout
|
|
* - if OK_msg != NULL → return OK message
|
|
*
|
|
* The calling code checks these fields and performs the appropriate actions.
|
|
*/
|
|
struct MCP_Query_Processor_Output {
|
|
std::string *new_query; // Rewritten query (caller must delete)
|
|
int timeout_ms; // Query timeout in milliseconds (-1 = not set)
|
|
char *error_msg; // Error message to return (NULL = not set)
|
|
char *OK_msg; // OK message to return (NULL = not set)
|
|
int log; // Whether to log this query (-1 = not set, 0 = no, 1 = yes)
|
|
int next_query_flagIN; // Flag for next query (-1 = not set)
|
|
|
|
void init() {
|
|
new_query = NULL;
|
|
timeout_ms = -1;
|
|
error_msg = NULL;
|
|
OK_msg = NULL;
|
|
log = -1;
|
|
next_query_flagIN = -1;
|
|
}
|
|
|
|
void destroy() {
|
|
if (new_query) {
|
|
delete new_query;
|
|
new_query = NULL;
|
|
}
|
|
if (error_msg) {
|
|
free(error_msg);
|
|
error_msg = NULL;
|
|
}
|
|
if (OK_msg) {
|
|
free(OK_msg);
|
|
OK_msg = NULL;
|
|
}
|
|
}
|
|
|
|
MCP_Query_Processor_Output() {
|
|
init();
|
|
}
|
|
|
|
~MCP_Query_Processor_Output() {
|
|
destroy();
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Two-Phase Discovery Catalog Schema Manager
|
|
*
|
|
* This class manages a comprehensive SQLite catalog for database discovery with two layers:
|
|
* 1. Deterministic Layer: Static metadata harvested from MySQL INFORMATION_SCHEMA
|
|
* 2. LLM Agent Layer: Semantic interpretations generated by LLM agents
|
|
*
|
|
* Schema separates deterministic metadata (runs, objects, columns, indexes, fks)
|
|
* from LLM-generated semantics (summaries, domains, metrics, question templates).
|
|
*/
|
|
class Discovery_Schema {
|
|
private:
|
|
SQLite3DB* db;
|
|
std::string db_path;
|
|
|
|
// MCP query rules management
|
|
std::vector<MCP_Query_Rule*> mcp_query_rules;
|
|
pthread_rwlock_t mcp_rules_lock;
|
|
volatile unsigned int mcp_rules_version;
|
|
|
|
// MCP query digest statistics
|
|
std::unordered_map<std::string, std::unordered_map<uint64_t, void*>> mcp_digest_umap;
|
|
pthread_rwlock_t mcp_digest_rwlock;
|
|
|
|
/**
|
|
* @brief Initialize catalog schema with all tables
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int init_schema();
|
|
|
|
/**
|
|
* @brief Create deterministic layer tables
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int create_deterministic_tables();
|
|
|
|
/**
|
|
* @brief Create LLM agent layer tables
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int create_llm_tables();
|
|
|
|
/**
|
|
* @brief Create FTS5 indexes
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int create_fts_tables();
|
|
|
|
public:
|
|
/**
|
|
* @brief Constructor
|
|
* @param path Path to the catalog database file
|
|
*/
|
|
Discovery_Schema(const std::string& path);
|
|
|
|
/**
|
|
* @brief Destructor
|
|
*/
|
|
~Discovery_Schema();
|
|
|
|
/**
|
|
* @brief Initialize the catalog database
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int init();
|
|
|
|
/**
|
|
* @brief Close the catalog database
|
|
*/
|
|
void close();
|
|
|
|
/**
|
|
* @brief Resolve schema name or run_id to a run_id within a target scope
|
|
*
|
|
* If input is a numeric run_id, returns it as-is.
|
|
* If input is a schema name, finds the latest run_id for that schema.
|
|
*
|
|
* @param target_id Required target scope identifier
|
|
* @param run_id_or_schema Either a numeric run_id or a schema name
|
|
* @return run_id on success, -1 if schema not found
|
|
*/
|
|
int resolve_run_id(const std::string& target_id, const std::string& run_id_or_schema);
|
|
|
|
/**
|
|
* @brief Create a new discovery run
|
|
*
|
|
* @param target_id Logical target identifier that produced this run
|
|
* @param protocol Backend protocol for this run (mysql|pgsql)
|
|
* @param source_dsn Data source identifier (e.g., "mysql://host:port/")
|
|
* @param server_version Backend server version string
|
|
* @param notes Optional notes for this run
|
|
* @return run_id on success, -1 on error
|
|
*/
|
|
int create_run(
|
|
const std::string& target_id,
|
|
const std::string& protocol,
|
|
const std::string& source_dsn,
|
|
const std::string& server_version,
|
|
const std::string& notes = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Finish a discovery run
|
|
*
|
|
* @param run_id The run ID to finish
|
|
* @param notes Optional completion notes
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int finish_run(int run_id, const std::string& notes = "");
|
|
|
|
/**
|
|
* @brief Get run ID info
|
|
*
|
|
* @param run_id The run ID
|
|
* @return JSON string with run info
|
|
*/
|
|
std::string get_run_info(int run_id);
|
|
|
|
/**
|
|
* @brief Create a new LLM agent run bound to a deterministic run
|
|
*
|
|
* @param run_id The deterministic run ID
|
|
* @param model_name Model name (e.g., "claude-3.5-sonnet")
|
|
* @param prompt_hash Optional hash of system prompt
|
|
* @param budget_json Optional budget JSON
|
|
* @return agent_run_id on success, -1 on error
|
|
*/
|
|
int create_agent_run(
|
|
int run_id,
|
|
const std::string& model_name,
|
|
const std::string& prompt_hash = "",
|
|
const std::string& budget_json = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Finish an agent run
|
|
*
|
|
* @param agent_run_id The agent run ID
|
|
* @param status Status: "success" or "failed"
|
|
* @param error Optional error message
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int finish_agent_run(
|
|
int agent_run_id,
|
|
const std::string& status,
|
|
const std::string& error = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Get the last (most recent) agent_run_id for a given run_id
|
|
*
|
|
* @param run_id Run ID
|
|
* @return agent_run_id on success, 0 if no agent runs exist for this run_id
|
|
*/
|
|
int get_last_agent_run_id(int run_id);
|
|
|
|
/**
|
|
* @brief Insert a schema
|
|
*
|
|
* @param run_id Run ID
|
|
* @param schema_name Schema/database name
|
|
* @param charset Character set
|
|
* @param collation Collation
|
|
* @return schema_id on success, -1 on error
|
|
*/
|
|
int insert_schema(
|
|
int run_id,
|
|
const std::string& schema_name,
|
|
const std::string& charset = "",
|
|
const std::string& collation = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Insert an object (table/view/routine/trigger)
|
|
*
|
|
* @param run_id Run ID
|
|
* @param schema_name Schema name
|
|
* @param object_name Object name
|
|
* @param object_type Object type (table/view/routine/trigger)
|
|
* @param engine Storage engine (for tables)
|
|
* @param table_rows_est Estimated row count
|
|
* @param data_length Data length in bytes
|
|
* @param index_length Index length in bytes
|
|
* @param create_time Creation time
|
|
* @param update_time Last update time
|
|
* @param object_comment Object comment
|
|
* @param definition_sql Definition SQL (for views/routines)
|
|
* @return object_id on success, -1 on error
|
|
*/
|
|
int insert_object(
|
|
int run_id,
|
|
const std::string& schema_name,
|
|
const std::string& object_name,
|
|
const std::string& object_type,
|
|
const std::string& engine = "",
|
|
long table_rows_est = 0,
|
|
long data_length = 0,
|
|
long index_length = 0,
|
|
const std::string& create_time = "",
|
|
const std::string& update_time = "",
|
|
const std::string& object_comment = "",
|
|
const std::string& definition_sql = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Insert a column
|
|
*
|
|
* @param object_id Object ID
|
|
* @param ordinal_pos Ordinal position
|
|
* @param column_name Column name
|
|
* @param data_type Data type
|
|
* @param column_type Full column type
|
|
* @param is_nullable Is nullable (0/1)
|
|
* @param column_default Default value
|
|
* @param extra Extra info (auto_increment, etc.)
|
|
* @param charset Character set
|
|
* @param collation Collation
|
|
* @param column_comment Column comment
|
|
* @param is_pk Is primary key (0/1)
|
|
* @param is_unique Is unique (0/1)
|
|
* @param is_indexed Is indexed (0/1)
|
|
* @param is_time Is time type (0/1)
|
|
* @param is_id_like Is ID-like name (0/1)
|
|
* @return column_id on success, -1 on error
|
|
*/
|
|
int insert_column(
|
|
int object_id,
|
|
int ordinal_pos,
|
|
const std::string& column_name,
|
|
const std::string& data_type,
|
|
const std::string& column_type = "",
|
|
int is_nullable = 1,
|
|
const std::string& column_default = "",
|
|
const std::string& extra = "",
|
|
const std::string& charset = "",
|
|
const std::string& collation = "",
|
|
const std::string& column_comment = "",
|
|
int is_pk = 0,
|
|
int is_unique = 0,
|
|
int is_indexed = 0,
|
|
int is_time = 0,
|
|
int is_id_like = 0
|
|
);
|
|
|
|
/**
|
|
* @brief Insert an index
|
|
*
|
|
* @param object_id Object ID
|
|
* @param index_name Index name
|
|
* @param is_unique Is unique (0/1)
|
|
* @param is_primary Is primary key (0/1)
|
|
* @param index_type Index type (BTREE/HASH/FULLTEXT)
|
|
* @param cardinality Cardinality
|
|
* @return index_id on success, -1 on error
|
|
*/
|
|
int insert_index(
|
|
int object_id,
|
|
const std::string& index_name,
|
|
int is_unique = 0,
|
|
int is_primary = 0,
|
|
const std::string& index_type = "",
|
|
long cardinality = 0
|
|
);
|
|
|
|
/**
|
|
* @brief Insert an index column
|
|
*
|
|
* @param index_id Index ID
|
|
* @param seq_in_index Sequence in index
|
|
* @param column_name Column name
|
|
* @param sub_part Sub-part length
|
|
* @param collation Collation (A/D)
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int insert_index_column(
|
|
int index_id,
|
|
int seq_in_index,
|
|
const std::string& column_name,
|
|
int sub_part = 0,
|
|
const std::string& collation = "A"
|
|
);
|
|
|
|
/**
|
|
* @brief Insert a foreign key
|
|
*
|
|
* @param run_id Run ID
|
|
* @param child_object_id Child object ID
|
|
* @param fk_name FK name
|
|
* @param parent_schema_name Parent schema name
|
|
* @param parent_object_name Parent object name
|
|
* @param on_update ON UPDATE rule
|
|
* @param on_delete ON DELETE rule
|
|
* @return fk_id on success, -1 on error
|
|
*/
|
|
int insert_foreign_key(
|
|
int run_id,
|
|
int child_object_id,
|
|
const std::string& fk_name,
|
|
const std::string& parent_schema_name,
|
|
const std::string& parent_object_name,
|
|
const std::string& on_update = "",
|
|
const std::string& on_delete = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Insert a foreign key column
|
|
*
|
|
* @param fk_id FK ID
|
|
* @param seq Sequence number
|
|
* @param child_column Child column name
|
|
* @param parent_column Parent column name
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int insert_foreign_key_column(
|
|
int fk_id,
|
|
int seq,
|
|
const std::string& child_column,
|
|
const std::string& parent_column
|
|
);
|
|
|
|
/**
|
|
* @brief Update object derived flags
|
|
*
|
|
* Updates has_primary_key, has_foreign_keys, has_time_column flags
|
|
* based on actual data in columns, indexes, foreign_keys tables.
|
|
*
|
|
* @param run_id Run ID
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int update_object_flags(int run_id);
|
|
|
|
/**
|
|
* @brief Insert or update a profile
|
|
*
|
|
* @param run_id Run ID
|
|
* @param object_id Object ID
|
|
* @param profile_kind Profile kind (table_quick, column, time_range, etc.)
|
|
* @param profile_json Profile data as JSON string
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int upsert_profile(
|
|
int run_id,
|
|
int object_id,
|
|
const std::string& profile_kind,
|
|
const std::string& profile_json
|
|
);
|
|
|
|
/**
|
|
* @brief Rebuild FTS index for a run
|
|
*
|
|
* Deletes and rebuilds the fts_objects index for all objects in a run.
|
|
*
|
|
* @param run_id Run ID
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int rebuild_fts_index(int run_id);
|
|
|
|
/**
|
|
* @brief Full-text search over objects
|
|
*
|
|
* @param run_id Run ID
|
|
* @param query FTS5 query
|
|
* @param limit Max results
|
|
* @param object_type Optional filter by object type
|
|
* @param schema_name Optional filter by schema name
|
|
* @return JSON array of matching objects
|
|
*/
|
|
std::string fts_search(
|
|
int run_id,
|
|
const std::string& query,
|
|
int limit = 25,
|
|
const std::string& object_type = "",
|
|
const std::string& schema_name = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Get object by ID or key
|
|
*
|
|
* @param run_id Run ID
|
|
* @param object_id Object ID (optional)
|
|
* @param schema_name Schema name (if using object_key)
|
|
* @param object_name Object name (if using object_key)
|
|
* @param include_definition Include view/routine definitions
|
|
* @param include_profiles Include profile data
|
|
* @return JSON string with object details
|
|
*/
|
|
std::string get_object(
|
|
int run_id,
|
|
int object_id = -1,
|
|
const std::string& schema_name = "",
|
|
const std::string& object_name = "",
|
|
bool include_definition = false,
|
|
bool include_profiles = true
|
|
);
|
|
|
|
/**
|
|
* @brief List objects with pagination
|
|
*
|
|
* @param run_id Run ID
|
|
* @param schema_name Optional schema filter
|
|
* @param object_type Optional object type filter
|
|
* @param order_by Order by field (name/rows_est_desc/size_desc)
|
|
* @param page_size Page size
|
|
* @param page_token Page token (empty for first page)
|
|
* @return JSON string with results and next page token
|
|
*/
|
|
std::string list_objects(
|
|
int run_id,
|
|
const std::string& schema_name = "",
|
|
const std::string& object_type = "",
|
|
const std::string& order_by = "name",
|
|
int page_size = 50,
|
|
const std::string& page_token = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Get relationships for an object
|
|
*
|
|
* Returns foreign keys, view dependencies, and inferred relationships.
|
|
*
|
|
* @param run_id Run ID
|
|
* @param object_id Object ID
|
|
* @param include_inferred Include LLM-inferred relationships
|
|
* @param min_confidence Minimum confidence for inferred relationships
|
|
* @return JSON string with relationships
|
|
*/
|
|
std::string get_relationships(
|
|
int run_id,
|
|
int object_id,
|
|
bool include_inferred = true,
|
|
double min_confidence = 0.0
|
|
);
|
|
|
|
/**
|
|
* @brief Append an agent event
|
|
*
|
|
* @param agent_run_id Agent run ID
|
|
* @param event_type Event type (tool_call/tool_result/note/decision)
|
|
* @param payload_json Event payload as JSON string
|
|
* @return event_id on success, -1 on error
|
|
*/
|
|
int append_agent_event(
|
|
int agent_run_id,
|
|
const std::string& event_type,
|
|
const std::string& payload_json
|
|
);
|
|
|
|
/**
|
|
* @brief Upsert an LLM object summary
|
|
*
|
|
* @param agent_run_id Agent run ID
|
|
* @param run_id Deterministic run ID
|
|
* @param object_id Object ID
|
|
* @param summary_json Summary data as JSON string
|
|
* @param confidence Confidence score (0.0-1.0)
|
|
* @param status Status (draft/validated/stable)
|
|
* @param sources_json Optional sources evidence
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int upsert_llm_summary(
|
|
int agent_run_id,
|
|
int run_id,
|
|
int object_id,
|
|
const std::string& summary_json,
|
|
double confidence = 0.5,
|
|
const std::string& status = "draft",
|
|
const std::string& sources_json = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Get LLM summary for an object
|
|
*
|
|
* @param run_id Run ID
|
|
* @param object_id Object ID
|
|
* @param agent_run_id Optional specific agent run ID
|
|
* @param latest Get latest summary across all agent runs
|
|
* @return JSON string with summary or null
|
|
*/
|
|
std::string get_llm_summary(
|
|
int run_id,
|
|
int object_id,
|
|
int agent_run_id = -1,
|
|
bool latest = true
|
|
);
|
|
|
|
/**
|
|
* @brief Upsert an LLM-inferred relationship
|
|
*
|
|
* @param agent_run_id Agent run ID
|
|
* @param run_id Deterministic run ID
|
|
* @param child_object_id Child object ID
|
|
* @param child_column Child column name
|
|
* @param parent_object_id Parent object ID
|
|
* @param parent_column Parent column name
|
|
* @param rel_type Relationship type (fk_like/bridge/polymorphic/etc)
|
|
* @param confidence Confidence score
|
|
* @param evidence_json Evidence JSON string
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int upsert_llm_relationship(
|
|
int agent_run_id,
|
|
int run_id,
|
|
int child_object_id,
|
|
const std::string& child_column,
|
|
int parent_object_id,
|
|
const std::string& parent_column,
|
|
const std::string& rel_type = "fk_like",
|
|
double confidence = 0.6,
|
|
const std::string& evidence_json = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Upsert a domain
|
|
*
|
|
* @param agent_run_id Agent run ID
|
|
* @param run_id Deterministic run ID
|
|
* @param domain_key Domain key (e.g., "billing", "sales")
|
|
* @param title Domain title
|
|
* @param description Domain description
|
|
* @param confidence Confidence score
|
|
* @return domain_id on success, -1 on error
|
|
*/
|
|
int upsert_llm_domain(
|
|
int agent_run_id,
|
|
int run_id,
|
|
const std::string& domain_key,
|
|
const std::string& title = "",
|
|
const std::string& description = "",
|
|
double confidence = 0.6
|
|
);
|
|
|
|
/**
|
|
* @brief Set domain members
|
|
*
|
|
* Replaces all members of a domain with the provided list.
|
|
*
|
|
* @param agent_run_id Agent run ID
|
|
* @param run_id Deterministic run ID
|
|
* @param domain_key Domain key
|
|
* @param members_json Members JSON array with object_id, role, confidence
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int set_domain_members(
|
|
int agent_run_id,
|
|
int run_id,
|
|
const std::string& domain_key,
|
|
const std::string& members_json
|
|
);
|
|
|
|
/**
|
|
* @brief Upsert a metric
|
|
*
|
|
* @param agent_run_id Agent run ID
|
|
* @param run_id Deterministic run ID
|
|
* @param metric_key Metric key (e.g., "orders.count")
|
|
* @param title Metric title
|
|
* @param description Metric description
|
|
* @param domain_key Optional domain key
|
|
* @param grain Grain (day/order/customer/etc)
|
|
* @param unit Unit (USD/count/ms/etc)
|
|
* @param sql_template Optional SQL template
|
|
* @param depends_json Optional dependencies JSON
|
|
* @param confidence Confidence score
|
|
* @return metric_id on success, -1 on error
|
|
*/
|
|
int upsert_llm_metric(
|
|
int agent_run_id,
|
|
int run_id,
|
|
const std::string& metric_key,
|
|
const std::string& title,
|
|
const std::string& description = "",
|
|
const std::string& domain_key = "",
|
|
const std::string& grain = "",
|
|
const std::string& unit = "",
|
|
const std::string& sql_template = "",
|
|
const std::string& depends_json = "",
|
|
double confidence = 0.6
|
|
);
|
|
|
|
/**
|
|
* @brief Add a question template
|
|
*
|
|
* @param agent_run_id Agent run ID
|
|
* @param run_id Deterministic run ID
|
|
* @param title Template title
|
|
* @param question_nl Natural language question
|
|
* @param template_json Query plan template JSON
|
|
* @param example_sql Optional example SQL
|
|
* @param related_objects JSON array of related object names (tables/views)
|
|
* @param confidence Confidence score
|
|
* @return template_id on success, -1 on error
|
|
*/
|
|
int add_question_template(
|
|
int agent_run_id,
|
|
int run_id,
|
|
const std::string& title,
|
|
const std::string& question_nl,
|
|
const std::string& template_json,
|
|
const std::string& example_sql = "",
|
|
const std::string& related_objects = "",
|
|
double confidence = 0.6
|
|
);
|
|
|
|
/**
|
|
* @brief Add an LLM note
|
|
*
|
|
* @param agent_run_id Agent run ID
|
|
* @param run_id Deterministic run ID
|
|
* @param scope Note scope (global/schema/object/domain)
|
|
* @param object_id Optional object ID
|
|
* @param domain_key Optional domain key
|
|
* @param title Note title
|
|
* @param body Note body
|
|
* @param tags_json Optional tags JSON array
|
|
* @return note_id on success, -1 on error
|
|
*/
|
|
int add_llm_note(
|
|
int agent_run_id,
|
|
int run_id,
|
|
const std::string& scope,
|
|
int object_id = -1,
|
|
const std::string& domain_key = "",
|
|
const std::string& title = "",
|
|
const std::string& body = "",
|
|
const std::string& tags_json = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Full-text search over LLM artifacts
|
|
*
|
|
* @param run_id Run ID
|
|
* @param query FTS query (empty to list all)
|
|
* @param limit Max results
|
|
* @param include_objects Include full object details for question templates
|
|
* @return JSON array of matching LLM artifacts with example_sql and related_objects
|
|
*/
|
|
std::string fts_search_llm(
|
|
int run_id,
|
|
const std::string& query,
|
|
int limit = 25,
|
|
bool include_objects = false
|
|
);
|
|
|
|
/**
|
|
* @brief Log an LLM search query
|
|
*
|
|
* @param run_id Run ID
|
|
* @param query Search query string
|
|
* @param lmt Result limit
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int log_llm_search(
|
|
int run_id,
|
|
const std::string& query,
|
|
int lmt = 25
|
|
);
|
|
|
|
/**
|
|
* @brief Log RAG FTS search operation
|
|
*
|
|
* Logs RAG full-text search operations to the rag_search_log table.
|
|
* Similar to log_llm_search() but for RAG searches.
|
|
*
|
|
* @param query Search query string
|
|
* @param k Number of results requested
|
|
* @param filters JSON string of filters applied (empty if none)
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int log_rag_search_fts(
|
|
const std::string& query,
|
|
int k,
|
|
const std::string& filters = ""
|
|
);
|
|
|
|
/**
|
|
* @brief Log MCP tool invocation via /mcp/query/ endpoint
|
|
* @param tool_name Name of the tool that was called
|
|
* @param schema Schema name (empty if not applicable)
|
|
* @param run_id Run ID (0 or -1 if not applicable)
|
|
* @param start_time Start monotonic time (microseconds)
|
|
* @param execution_time Execution duration (microseconds)
|
|
* @param error Error message (empty if success)
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int log_query_tool_call(
|
|
const std::string& tool_name,
|
|
const std::string& schema,
|
|
int run_id,
|
|
unsigned long long start_time,
|
|
unsigned long long execution_time,
|
|
const std::string& error
|
|
);
|
|
|
|
/**
|
|
* @brief Get database handle for direct access
|
|
* @return SQLite3DB pointer
|
|
*/
|
|
SQLite3DB* get_db() { return db; }
|
|
|
|
/**
|
|
* @brief Get the database file path
|
|
* @return Database file path
|
|
*/
|
|
std::string get_db_path() const { return db_path; }
|
|
|
|
// ============================================================
|
|
// MCP QUERY RULES
|
|
// ============================================================
|
|
|
|
/**
|
|
* @brief Load MCP query rules from SQLite
|
|
*/
|
|
void load_mcp_query_rules(SQLite3_result* resultset);
|
|
|
|
/**
|
|
* @brief Evaluate MCP query rules for a tool invocation
|
|
* @return MCP_Query_Processor_Output object populated with actions from matching rules
|
|
* Caller is responsible for destroying the returned object.
|
|
*/
|
|
MCP_Query_Processor_Output* evaluate_mcp_query_rules(
|
|
const std::string& tool_name,
|
|
const std::string& username,
|
|
const std::string& target_id,
|
|
const std::string& schemaname,
|
|
const nlohmann::json& arguments,
|
|
const std::string& original_query
|
|
);
|
|
|
|
/**
|
|
* @brief Get current MCP query rules as resultset
|
|
*/
|
|
SQLite3_result* get_mcp_query_rules();
|
|
|
|
/**
|
|
* @brief Get stats for MCP query rules (hits per rule)
|
|
*/
|
|
SQLite3_result* get_stats_mcp_query_rules();
|
|
|
|
// ============================================================
|
|
// MCP QUERY DIGEST
|
|
// ============================================================
|
|
|
|
/**
|
|
* @brief Update MCP query digest statistics
|
|
*/
|
|
void update_mcp_query_digest(
|
|
const std::string& tool_name,
|
|
int run_id,
|
|
uint64_t digest,
|
|
const std::string& digest_text,
|
|
unsigned long long duration_us,
|
|
time_t timestamp
|
|
);
|
|
|
|
/**
|
|
* @brief Get MCP query digest statistics
|
|
* @param reset If true, reset stats after retrieval
|
|
*/
|
|
SQLite3_result* get_mcp_query_digest(bool reset = false);
|
|
|
|
/**
|
|
* @brief Compute MCP query digest hash using SpookyHash
|
|
*/
|
|
static uint64_t compute_mcp_digest(
|
|
const std::string& tool_name,
|
|
const nlohmann::json& arguments
|
|
);
|
|
|
|
/**
|
|
* @brief Fingerprint MCP query arguments (replace literals with ?)
|
|
*/
|
|
static std::string fingerprint_mcp_args(const nlohmann::json& arguments);
|
|
};
|
|
|
|
#endif /* PROXYSQLGENAI */
|
|
|
|
#endif /* CLASS_DISCOVERY_SCHEMA_H */
|