mirror of https://github.com/sysown/proxysql
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
421 lines
10 KiB
421 lines
10 KiB
#ifndef CLASS_STATIC_HARVESTER_H
|
|
#define CLASS_STATIC_HARVESTER_H
|
|
|
|
#include "Discovery_Schema.h"
|
|
#include "cpp.h"
|
|
#include <string>
|
|
#include <vector>
|
|
#include <memory>
|
|
#include <pthread.h>
|
|
|
|
// Forward declaration for MYSQL
|
|
typedef struct st_mysql MYSQL;
|
|
|
|
/**
|
|
* @brief Static Metadata Harvester from MySQL INFORMATION_SCHEMA
|
|
*
|
|
* This class performs deterministic metadata extraction from MySQL's
|
|
* INFORMATION_SCHEMA and stores it in a Discovery_Schema catalog.
|
|
*
|
|
* Harvest stages:
|
|
* 1. Schemas/Databases
|
|
* 2. Objects (tables/views/routines/triggers)
|
|
* 3. Columns with derived hints (is_time, is_id_like)
|
|
* 4. Indexes and index columns
|
|
* 5. Foreign keys and FK columns
|
|
* 6. View definitions
|
|
* 7. Quick profiles (metadata-based analysis)
|
|
* 8. FTS5 index rebuild
|
|
*/
|
|
class Static_Harvester {
|
|
private:
|
|
// MySQL connection
|
|
std::string mysql_host;
|
|
int mysql_port;
|
|
std::string mysql_user;
|
|
std::string mysql_password;
|
|
std::string mysql_schema; // Default schema (can be empty)
|
|
MYSQL* mysql_conn;
|
|
pthread_mutex_t conn_lock; ///< Mutex protecting MySQL connection
|
|
|
|
// Discovery schema
|
|
Discovery_Schema* catalog;
|
|
|
|
// Current run state
|
|
int current_run_id;
|
|
std::string source_dsn;
|
|
std::string mysql_version;
|
|
|
|
// Internal helper methods
|
|
|
|
/**
|
|
* @brief Connect to MySQL server
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int connect_mysql();
|
|
|
|
/**
|
|
* @brief Disconnect from MySQL server
|
|
*/
|
|
void disconnect_mysql();
|
|
|
|
/**
|
|
* @brief Execute query and return results
|
|
* @param query SQL query
|
|
* @param results Output: vector of result rows
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int execute_query(const std::string& query, std::vector<std::vector<std::string>>& results);
|
|
|
|
/**
|
|
* @brief Get MySQL version
|
|
* @return MySQL version string
|
|
*/
|
|
std::string get_mysql_version();
|
|
|
|
/**
|
|
* @brief Check if data type is a time type
|
|
* @param data_type Data type string
|
|
* @return true if time type, false otherwise
|
|
*/
|
|
static bool is_time_type(const std::string& data_type);
|
|
|
|
/**
|
|
* @brief Check if column name is ID-like
|
|
* @param column_name Column name
|
|
* @return true if ID-like, false otherwise
|
|
*/
|
|
static bool is_id_like_name(const std::string& column_name);
|
|
|
|
/**
|
|
* @brief Validate schema name for safe use in SQL queries
|
|
*
|
|
* Validates that a schema name contains only safe characters
|
|
* (alphanumeric, underscore, dollar sign) to prevent SQL injection
|
|
* when used in string concatenation for INFORMATION_SCHEMA queries.
|
|
*
|
|
* @param name Schema name to validate
|
|
* @return true if safe to use, false otherwise
|
|
*/
|
|
static bool is_valid_schema_name(const std::string& name);
|
|
|
|
/**
|
|
* @brief Escape a string for safe use in SQL queries
|
|
*
|
|
* Escapes single quotes by doubling them to prevent SQL injection
|
|
* when strings are used in string concatenation for SQL queries.
|
|
*
|
|
* @param str String to escape
|
|
* @return Escaped string with single quotes doubled
|
|
*/
|
|
static std::string escape_sql_string(const std::string& str);
|
|
|
|
public:
|
|
/**
|
|
* @brief Constructor
|
|
*
|
|
* @param host MySQL host address
|
|
* @param port MySQL port
|
|
* @param user MySQL username
|
|
* @param password MySQL password
|
|
* @param schema Default schema (empty for all schemas)
|
|
* @param catalog_path Path to catalog database
|
|
*/
|
|
Static_Harvester(
|
|
const std::string& host,
|
|
int port,
|
|
const std::string& user,
|
|
const std::string& password,
|
|
const std::string& schema,
|
|
const std::string& catalog_path
|
|
);
|
|
|
|
/**
|
|
* @brief Destructor
|
|
*/
|
|
~Static_Harvester();
|
|
|
|
/**
|
|
* @brief Initialize the harvester
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int init();
|
|
|
|
/**
|
|
* @brief Close connections and cleanup
|
|
*/
|
|
void close();
|
|
|
|
/**
|
|
* @brief Start a new discovery run
|
|
*
|
|
* Creates a new run entry in the catalog and stores run_id.
|
|
*
|
|
* @param notes Optional notes for this run
|
|
* @return run_id on success, -1 on error
|
|
*/
|
|
int start_run(const std::string& notes = "");
|
|
|
|
/**
|
|
* @brief Finish the current discovery run
|
|
*
|
|
* Updates the run entry with finish timestamp and notes.
|
|
*
|
|
* @param notes Optional completion notes
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int finish_run(const std::string& notes = "");
|
|
|
|
/**
|
|
* @brief Get the current run ID
|
|
* @return Current run_id, or -1 if no active run
|
|
*/
|
|
int get_run_id() const { return current_run_id; }
|
|
|
|
// ========== Harvest Stages ==========
|
|
|
|
/**
|
|
* @brief Harvest schemas/databases
|
|
*
|
|
* Queries information_schema.SCHEMATA and inserts into catalog.
|
|
*
|
|
* @param only_schema Optional filter for single schema
|
|
* @return Number of schemas harvested, or -1 on error
|
|
*/
|
|
int harvest_schemas(const std::string& only_schema = "");
|
|
|
|
/**
|
|
* @brief Harvest objects (tables/views/routines/triggers)
|
|
*
|
|
* Queries information_schema.TABLES and ROUTINES.
|
|
* Also harvests view definitions.
|
|
*
|
|
* @param only_schema Optional filter for single schema
|
|
* @return Number of objects harvested, or -1 on error
|
|
*/
|
|
int harvest_objects(const std::string& only_schema = "");
|
|
|
|
/**
|
|
* @brief Harvest columns with derived hints
|
|
*
|
|
* Queries information_schema.COLUMNS and computes:
|
|
* - is_time: date/datetime/timestamp/time/year
|
|
* - is_id_like: column_name REGEXP '(^id$|_id$)'
|
|
*
|
|
* @param only_schema Optional filter for single schema
|
|
* @return Number of columns harvested, or -1 on error
|
|
*/
|
|
int harvest_columns(const std::string& only_schema = "");
|
|
|
|
/**
|
|
* @brief Harvest indexes and index columns
|
|
*
|
|
* Queries information_schema.STATISTICS.
|
|
* Marks is_pk, is_unique, is_indexed on columns.
|
|
*
|
|
* @param only_schema Optional filter for single schema
|
|
* @return Number of indexes harvested, or -1 on error
|
|
*/
|
|
int harvest_indexes(const std::string& only_schema = "");
|
|
|
|
/**
|
|
* @brief Harvest foreign keys
|
|
*
|
|
* Queries information_schema.KEY_COLUMN_USAGE and
|
|
* REFERENTIAL_CONSTRAINTS.
|
|
*
|
|
* @param only_schema Optional filter for single schema
|
|
* @return Number of foreign keys harvested, or -1 on error
|
|
*/
|
|
int harvest_foreign_keys(const std::string& only_schema = "");
|
|
|
|
/**
|
|
* @brief Harvest view definitions
|
|
*
|
|
* Queries information_schema.VIEWS and stores VIEW_DEFINITION.
|
|
*
|
|
* @param only_schema Optional filter for single schema
|
|
* @return Number of views updated, or -1 on error
|
|
*/
|
|
int harvest_view_definitions(const std::string& only_schema = "");
|
|
|
|
/**
|
|
* @brief Build quick profiles (metadata-only analysis)
|
|
*
|
|
* Analyzes metadata to derive:
|
|
* - guessed_kind: log/event, fact, entity, unknown
|
|
* - rows_est, size_bytes, engine
|
|
* - has_primary_key, has_foreign_keys, has_time_column
|
|
*
|
|
* Stores as 'table_quick' profile.
|
|
*
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int build_quick_profiles();
|
|
|
|
/**
|
|
* @brief Rebuild FTS5 index for current run
|
|
*
|
|
* Deletes and rebuilds fts_objects index.
|
|
*
|
|
* @return 0 on success, -1 on error
|
|
*/
|
|
int rebuild_fts_index();
|
|
|
|
/**
|
|
* @brief Run full harvest (all stages)
|
|
*
|
|
* Executes all harvest stages in order:
|
|
* 1. Start run
|
|
* 2. Harvest schemas
|
|
* 3. Harvest objects
|
|
* 4. Harvest columns
|
|
* 5. Harvest indexes
|
|
* 6. Harvest foreign keys
|
|
* 7. Build quick profiles
|
|
* 8. Rebuild FTS index
|
|
* 9. Finish run
|
|
*
|
|
* @param only_schema Optional filter for single schema
|
|
* @param notes Optional run notes
|
|
* @return run_id on success, -1 on error
|
|
*/
|
|
int run_full_harvest(const std::string& only_schema = "", const std::string& notes = "");
|
|
|
|
/**
|
|
* @brief Get harvest statistics
|
|
*
|
|
* Returns counts of harvested objects for the current run.
|
|
*
|
|
* @return JSON string with statistics
|
|
*/
|
|
std::string get_harvest_stats();
|
|
|
|
/**
|
|
* @brief Get harvest statistics for a specific run
|
|
*
|
|
* Returns counts of harvested objects for the specified run_id.
|
|
*
|
|
* @param run_id The run ID to get stats for
|
|
* @return JSON string with statistics
|
|
*/
|
|
std::string get_harvest_stats(int run_id);
|
|
|
|
// ========== Data Structures for Query Results ==========
|
|
|
|
/**
|
|
* @brief Schema row structure
|
|
*/
|
|
struct SchemaRow {
|
|
std::string schema_name;
|
|
std::string charset;
|
|
std::string collation;
|
|
};
|
|
|
|
/**
|
|
* @brief Object row structure
|
|
*/
|
|
struct ObjectRow {
|
|
std::string schema_name;
|
|
std::string object_name;
|
|
std::string object_type;
|
|
std::string engine;
|
|
long table_rows_est;
|
|
long data_length;
|
|
long index_length;
|
|
std::string create_time;
|
|
std::string update_time;
|
|
std::string object_comment;
|
|
std::string definition_sql;
|
|
};
|
|
|
|
/**
|
|
* @brief Column row structure
|
|
*/
|
|
struct ColumnRow {
|
|
std::string schema_name;
|
|
std::string object_name;
|
|
int ordinal_pos;
|
|
std::string column_name;
|
|
std::string data_type;
|
|
std::string column_type;
|
|
int is_nullable;
|
|
std::string column_default;
|
|
std::string extra;
|
|
std::string charset;
|
|
std::string collation;
|
|
std::string column_comment;
|
|
};
|
|
|
|
/**
|
|
* @brief Index row structure
|
|
*/
|
|
struct IndexRow {
|
|
std::string schema_name;
|
|
std::string object_name;
|
|
std::string index_name;
|
|
int is_unique;
|
|
std::string index_type;
|
|
int seq_in_index;
|
|
std::string column_name;
|
|
int sub_part;
|
|
std::string collation;
|
|
long cardinality;
|
|
};
|
|
|
|
/**
|
|
* @brief Foreign key row structure
|
|
*/
|
|
struct FKRow {
|
|
std::string child_schema;
|
|
std::string child_table;
|
|
std::string fk_name;
|
|
std::string child_column;
|
|
std::string parent_schema;
|
|
std::string parent_table;
|
|
std::string parent_column;
|
|
int seq;
|
|
std::string on_update;
|
|
std::string on_delete;
|
|
};
|
|
|
|
// ========== Helper Query Methods (for testing) ==========
|
|
|
|
/**
|
|
* @brief Fetch schemas from MySQL
|
|
* @param filter Optional schema name filter
|
|
* @return Vector of SchemaRow
|
|
*/
|
|
std::vector<SchemaRow> fetch_schemas(const std::string& filter = "");
|
|
|
|
/**
|
|
* @brief Fetch tables/views from MySQL
|
|
* @param filter Optional schema name filter
|
|
* @return Vector of ObjectRow
|
|
*/
|
|
std::vector<ObjectRow> fetch_tables_views(const std::string& filter = "");
|
|
|
|
/**
|
|
* @brief Fetch columns from MySQL
|
|
* @param filter Optional schema name filter
|
|
* @return Vector of ColumnRow
|
|
*/
|
|
std::vector<ColumnRow> fetch_columns(const std::string& filter = "");
|
|
|
|
/**
|
|
* @brief Fetch indexes from MySQL
|
|
* @param filter Optional schema name filter
|
|
* @return Vector of IndexRow
|
|
*/
|
|
std::vector<IndexRow> fetch_indexes(const std::string& filter = "");
|
|
|
|
/**
|
|
* @brief Fetch foreign keys from MySQL
|
|
* @param filter Optional schema name filter
|
|
* @return Vector of FKRow
|
|
*/
|
|
std::vector<FKRow> fetch_foreign_keys(const std::string& filter = "");
|
|
};
|
|
|
|
#endif /* CLASS_STATIC_HARVESTER_H */
|