#ifndef CLASS_MYSQL_TOOL_HANDLER_H #define CLASS_MYSQL_TOOL_HANDLER_H #include "MySQL_Catalog.h" #include "cpp.h" #include #include #include #include #include // Forward declaration for MYSQL (mysql.h is included via proxysql.h/cpp.h) typedef struct st_mysql MYSQL; /** * @brief MySQL Tool Handler for LLM Database Exploration * * This class provides tools for an LLM to safely explore a MySQL database: * - Discovery tools (list_schemas, list_tables, describe_table) * - Profiling tools (table_profile, column_profile) * - Sampling tools (sample_rows, sample_distinct) * - Query tools (run_sql_readonly, explain_sql) * - Relationship tools (suggest_joins, find_reference_candidates) * - Catalog tools (external memory for LLM discoveries) */ class MySQL_Tool_Handler { private: // Connection configuration std::vector mysql_hosts; ///< List of MySQL host addresses std::vector mysql_ports; ///< List of MySQL port numbers std::string mysql_user; ///< MySQL username for authentication std::string mysql_password; ///< MySQL password for authentication std::string mysql_schema; ///< Default schema/database name // Connection pool /** * @brief Represents a single MySQL connection in the pool * * Contains the MYSQL handle, connection details, and availability status. */ struct MySQLConnection { MYSQL* mysql; ///< MySQL connection handle (NULL if not connected) std::string host; ///< Host address for this connection int port; ///< Port number for this connection bool in_use; ///< True if connection is currently checked out }; std::vector connection_pool; ///< Pool of MySQL connections pthread_mutex_t pool_lock; ///< Mutex protecting connection pool access int pool_size; ///< Number of connections in the pool // Catalog for LLM memory MySQL_Catalog* catalog; ///< SQLite catalog for LLM discoveries // Query guardrails int max_rows; ///< Maximum rows to return (default 200) int timeout_ms; ///< Query timeout in milliseconds (default 2000) bool allow_select_star; ///< Allow SELECT * without LIMIT (default false) /** * @brief Initialize connection pool to backend MySQL servers * @return 0 on success, -1 on error */ int init_connection_pool(); /** * @brief Get a connection from the pool * @return Pointer to MYSQL connection, or NULL if none available */ MYSQL* get_connection(); /** * @brief Return a connection to the pool * @param mysql The MYSQL connection to return */ void return_connection(MYSQL* mysql); /** * @brief Execute a query and return results as JSON * @param query SQL query to execute * @return JSON with results or error */ std::string execute_query(const std::string& query); /** * @brief Validate SQL is read-only * @param query SQL to validate * @return true if safe, false otherwise */ bool validate_readonly_query(const std::string& query); /** * @brief Check if SQL contains dangerous keywords * @param query SQL to check * @return true if dangerous, false otherwise */ bool is_dangerous_query(const std::string& query); /** * @brief Sanitize SQL to prevent injection * @param query SQL to sanitize * @return Sanitized query */ std::string sanitize_query(const std::string& query); public: /** * @brief Constructor * @param hosts Comma-separated list of MySQL hosts * @param ports Comma-separated list of MySQL ports * @param user MySQL username * @param password MySQL password * @param schema Default schema/database * @param catalog_path Path to catalog database */ MySQL_Tool_Handler( const std::string& hosts, const std::string& ports, const std::string& user, const std::string& password, const std::string& schema, const std::string& catalog_path ); /** * @brief Destructor */ ~MySQL_Tool_Handler(); /** * @brief Initialize the tool handler * @return 0 on success, -1 on error */ int init(); /** * @brief Close connections and cleanup */ void close(); // ========== Inventory Tools ========== /** * @brief List available schemas/databases * @param page_token Pagination token (optional) * @param page_size Page size (default 50) * @return JSON array of schemas with metadata */ std::string list_schemas(const std::string& page_token = "", int page_size = 50); /** * @brief List tables in a schema * @param schema Schema name (empty for all schemas) * @param page_token Pagination token (optional) * @param page_size Page size (default 50) * @param name_filter Optional name pattern filter * @return JSON array of tables with size estimates */ std::string list_tables( const std::string& schema = "", const std::string& page_token = "", int page_size = 50, const std::string& name_filter = "" ); // ========== Structure Tools ========== /** * @brief Get detailed table schema * @param schema Schema name * @param table Table name * @return JSON with columns, types, keys, indexes */ std::string describe_table(const std::string& schema, const std::string& table); /** * @brief Get constraints (FK, unique, etc.) * @param schema Schema name * @param table Table name (empty for all tables in schema) * @return JSON array of constraints */ std::string get_constraints(const std::string& schema, const std::string& table = ""); /** * @brief Get view definition * @param schema Schema name * @param view View name * @return JSON with view details */ std::string describe_view(const std::string& schema, const std::string& view); // ========== Profiling Tools ========== /** * @brief Get quick table profile * @param schema Schema name * @param table Table name * @param mode Profile mode ("quick" or "full") * @return JSON with table statistics */ std::string table_profile( const std::string& schema, const std::string& table, const std::string& mode = "quick" ); /** * @brief Get column profile (distinct values, nulls, etc.) * @param schema Schema name * @param table Table name * @param column Column name * @param max_top_values Max distinct values to return (default 20) * @return JSON with column statistics */ std::string column_profile( const std::string& schema, const std::string& table, const std::string& column, int max_top_values = 20 ); // ========== Sampling Tools ========== /** * @brief Sample rows from a table (with hard cap) * @param schema Schema name * @param table Table name * @param columns Optional comma-separated column list * @param where Optional WHERE clause * @param order_by Optional ORDER BY clause * @param limit Max rows (hard cap default 20) * @return JSON array of rows */ std::string sample_rows( const std::string& schema, const std::string& table, const std::string& columns = "", const std::string& where = "", const std::string& order_by = "", int limit = 20 ); /** * @brief Sample distinct values from a column * @param schema Schema name * @param table Table name * @param column Column name * @param where Optional WHERE clause * @param limit Max distinct values (default 50) * @return JSON array of distinct values */ std::string sample_distinct( const std::string& schema, const std::string& table, const std::string& column, const std::string& where = "", int limit = 50 ); // ========== Query Tools ========== /** * @brief Execute read-only SQL with guardrails * @param sql SQL query * @param max_rows Max rows (enforced, default 200) * @param timeout_sec Timeout in seconds (enforced, default 2) * @return JSON with query results or error */ std::string run_sql_readonly( const std::string& sql, int max_rows = 200, int timeout_sec = 2 ); /** * @brief Explain a query (EXPLAIN/EXPLAIN ANALYZE) * @param sql SQL query to explain * @return JSON with execution plan */ std::string explain_sql(const std::string& sql); // ========== Relationship Inference Tools ========== /** * @brief Suggest joins between two tables (heuristic-based) * @param schema Schema name * @param table_a First table * @param table_b Second table (empty for auto-detect) * @param max_candidates Max suggestions (default 5) * @return JSON array of join candidates with confidence */ std::string suggest_joins( const std::string& schema, const std::string& table_a, const std::string& table_b = "", int max_candidates = 5 ); /** * @brief Find tables referenced by a column (e.g., orders.customer_id) * @param schema Schema name * @param table Table name * @param column Column name * @param max_tables Max results (default 50) * @return JSON array of candidate references */ std::string find_reference_candidates( const std::string& schema, const std::string& table, const std::string& column, int max_tables = 50 ); // ========== Catalog Tools (LLM Memory) ========== /** * @brief Upsert catalog entry * @param kind Entry kind * @param key Unique key * @param document JSON document * @param schema Schema name (empty for all schemas) * @param tags Comma-separated tags * @param links Comma-separated links * @return JSON result */ std::string catalog_upsert( const std::string& schema, const std::string& kind, const std::string& key, const std::string& document, const std::string& tags = "", const std::string& links = "" ); /** * @brief Get catalog entry * @param schema Schema name (empty for all schemas) * @param kind Entry kind * @param key Unique key * @return JSON document or error */ std::string catalog_get(const std::string& schema, const std::string& kind, const std::string& key); /** * @brief Search catalog * @param schema Schema name (empty for all schemas) * @param query Search query * @param kind Optional kind filter * @param tags Optional tag filter * @param limit Max results (default 20) * @param offset Pagination offset (default 0) * @return JSON array of matching entries */ std::string catalog_search( const std::string& schema, const std::string& query, const std::string& kind = "", const std::string& tags = "", int limit = 20, int offset = 0 ); /** * @brief List catalog entries * @param schema Schema name (empty for all schemas) * @param kind Optional kind filter * @param limit Max results per page (default 50) * @param offset Pagination offset (default 0) * @return JSON with total count and results array */ std::string catalog_list( const std::string& schema = "", const std::string& kind = "", int limit = 50, int offset = 0 ); /** * @brief Merge catalog entries * @param keys JSON array of keys to merge * @param target_key Target key for merged entry * @param kind Kind for merged entry (default "domain") * @param instructions Optional instructions * @return JSON result */ std::string catalog_merge( const std::string& keys, const std::string& target_key, const std::string& kind = "domain", const std::string& instructions = "" ); /** * @brief Delete catalog entry * @param schema Schema name (empty for all schemas) * @param kind Entry kind * @param key Unique key * @return JSON result */ std::string catalog_delete(const std::string& schema, const std::string& kind, const std::string& key); }; #endif /* CLASS_MYSQL_TOOL_HANDLER_H */