From 6a788e48c4dc6d213d2e872e78e341d4df5f261f Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Mon, 16 Feb 2026 23:45:19 +0000 Subject: [PATCH] mcp: make /mcp/query self-healing when targets/backends appear after startup Problem addressed: - MCP query endpoint could stay unusable with 'Tool Handler not initialized' after restart/reload flows. - This was triggered when Query_Tool_Handler could not build an executable pool at init time (for example profiles loaded before ONLINE servers), leaving runtime commands like 'LOAD MCP QUERY RULES FROM MEMORY' blocked behind a NULL query tool handler. - Users had to manually toggle MCP enablement to recover, which is the opposite of expected self-healing behavior. What this commit changes: 1) Query_Tool_Handler pool init is now resilient and idempotent - init_connection_pool() now starts with close() so reinitialization fully resets stale mysql/pgsql pool state before rebuilding from runtime profile + server tables. - If no executable targets are available, init_connection_pool() now returns success with a warning instead of hard failure. This allows the query tool handler (and /mcp/query endpoint) to stay initialized even before backends are ready. 2) Lazy auto-rebuild on first query usage - get_connection() and get_pgsql_connection() now: - refresh target registry before resolution, - attempt to use an existing pooled connection, - if unavailable, trigger a full pool rebuild (init_connection_pool()) and retry once. - This provides automatic recovery when hostgroups/servers/profiles are loaded or changed after MCP startup, without requiring manual MCP disable/enable. 3) Admin runtime rule load path attempts MCP recovery - load_mcp_query_rules_to_runtime() now detects NULL query tool handler and calls load_mcp_server() once before failing. - This turns a hard, immediate admin error into a self-recovery attempt consistent with MCP runtime semantics. Behavioral impact: - /mcp/query endpoint remains online even when there are temporarily zero executable targets. - As soon as compatible runtime targets/backends exist, run_sql_readonly/explain_sql can recover automatically on demand. - LOAD MCP QUERY RULES TO/FROM RUNTIME no longer fails immediately on first NULL handler condition; it retries after MCP server recovery. Validation performed: - Recompiled modified objects successfully: - lib/obj/Query_Tool_Handler.oo - lib/obj/ProxySQL_Admin.oo - Full TAP runtime test execution is not possible in this sandbox due blocked local TCP socket creation; validation should be run in the normal test environment where ProxySQL/MySQL/PGSQL are reachable. --- lib/ProxySQL_Admin.cpp | 9 ++++- lib/Query_Tool_Handler.cpp | 82 +++++++++++++++++++++++++++++--------- 2 files changed, 72 insertions(+), 19 deletions(-) diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index fdbdbbec5..4543a5359 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -8001,7 +8001,14 @@ char* ProxySQL_Admin::load_mcp_query_rules_to_runtime() { if (!GloMCPH) return (char*)"MCP Handler not started: command impossible to run"; Query_Tool_Handler* qth = GloMCPH->query_tool_handler; - if (!qth) return (char*)"Query Tool Handler not initialized"; + if (!qth) { + proxy_warning("MCP query rules load requested but Query Tool Handler is NULL, attempting MCP server self-recovery\n"); + load_mcp_server(); + qth = GloMCPH->query_tool_handler; + if (!qth) { + return (char*)"Query Tool Handler not initialized"; + } + } // Get the discovery schema catalog Discovery_Schema* catalog = qth->get_catalog(); diff --git a/lib/Query_Tool_Handler.cpp b/lib/Query_Tool_Handler.cpp index 16a6e8406..20e3bd2cb 100644 --- a/lib/Query_Tool_Handler.cpp +++ b/lib/Query_Tool_Handler.cpp @@ -321,6 +321,8 @@ void Query_Tool_Handler::close() { } int Query_Tool_Handler::init_connection_pool() { + // Ensure re-initialization is idempotent when topology/auth changes at runtime. + close(); refresh_target_registry(); pthread_mutex_lock(&pool_lock); @@ -428,8 +430,8 @@ int Query_Tool_Handler::init_connection_pool() { pthread_mutex_unlock(&pool_lock); if ((pool_size + pg_pool_size) == 0) { - proxy_error("Query_Tool_Handler: No executable targets available\n"); - return -1; + proxy_warning("Query_Tool_Handler: No executable targets available yet (handler remains initialized)\n"); + return 0; } proxy_info( @@ -570,45 +572,89 @@ const Query_Tool_Handler::QueryTarget* Query_Tool_Handler::resolve_target(const } void* Query_Tool_Handler::get_connection(const std::string& target_id) { + const auto find_available_connection = [&](const std::string& resolved_target, const std::string& expected_auth_profile_id) -> void* { + pthread_mutex_lock(&pool_lock); + for (auto& conn : connection_pool) { + if (!conn.in_use && conn.target_id == resolved_target && conn.auth_profile_id == expected_auth_profile_id) { + conn.in_use = true; + void* mysql_ptr = conn.mysql; + pthread_mutex_unlock(&pool_lock); + return mysql_ptr; + } + } + pthread_mutex_unlock(&pool_lock); + return NULL; + }; + + refresh_target_registry(); const std::string resolved_target = target_id.empty() ? default_target_id : target_id; const QueryTarget* target = resolve_target(resolved_target); - if (target == NULL) { + if (target == NULL || !target->executable) { + proxy_error("Query_Tool_Handler: target '%s' is unknown or not executable\n", resolved_target.c_str()); return NULL; } - pthread_mutex_lock(&pool_lock); + void* mysql_ptr = find_available_connection(resolved_target, target->auth_profile_id); + if (mysql_ptr) { + return mysql_ptr; + } - for (auto& conn : connection_pool) { - if (!conn.in_use && conn.target_id == resolved_target && conn.auth_profile_id == target->auth_profile_id) { - conn.in_use = true; - pthread_mutex_unlock(&pool_lock); - return conn.mysql; + // Self-heal path: runtime targets/backends may have changed after handler startup. + if (init_connection_pool() == 0) { + refresh_target_registry(); + const QueryTarget* refreshed_target = resolve_target(resolved_target); + if (refreshed_target && refreshed_target->executable) { + mysql_ptr = find_available_connection(resolved_target, refreshed_target->auth_profile_id); + if (mysql_ptr) { + return mysql_ptr; + } } } - pthread_mutex_unlock(&pool_lock); proxy_error("Query_Tool_Handler: No available connection for target '%s'\n", resolved_target.c_str()); return NULL; } void* Query_Tool_Handler::get_pgsql_connection(const std::string& target_id) { + const auto find_available_pg_connection = [&](const std::string& resolved_target, const std::string& expected_auth_profile_id) -> void* { + pthread_mutex_lock(&pool_lock); + for (auto& conn : pgsql_connection_pool) { + if (!conn.in_use && conn.target_id == resolved_target && conn.auth_profile_id == expected_auth_profile_id) { + conn.in_use = true; + void* pgconn_ptr = conn.pgconn; + pthread_mutex_unlock(&pool_lock); + return pgconn_ptr; + } + } + pthread_mutex_unlock(&pool_lock); + return NULL; + }; + + refresh_target_registry(); const std::string resolved_target = target_id.empty() ? default_target_id : target_id; const QueryTarget* target = resolve_target(resolved_target); - if (target == NULL) { + if (target == NULL || !target->executable) { + proxy_error("Query_Tool_Handler: target '%s' is unknown or not executable\n", resolved_target.c_str()); return NULL; } - pthread_mutex_lock(&pool_lock); + void* pgconn_ptr = find_available_pg_connection(resolved_target, target->auth_profile_id); + if (pgconn_ptr) { + return pgconn_ptr; + } - for (auto& conn : pgsql_connection_pool) { - if (!conn.in_use && conn.target_id == resolved_target && conn.auth_profile_id == target->auth_profile_id) { - conn.in_use = true; - pthread_mutex_unlock(&pool_lock); - return conn.pgconn; + // Self-heal path: runtime targets/backends may have changed after handler startup. + if (init_connection_pool() == 0) { + refresh_target_registry(); + const QueryTarget* refreshed_target = resolve_target(resolved_target); + if (refreshed_target && refreshed_target->executable) { + pgconn_ptr = find_available_pg_connection(resolved_target, refreshed_target->auth_profile_id); + if (pgconn_ptr) { + return pgconn_ptr; + } } } - pthread_mutex_unlock(&pool_lock); proxy_error("Query_Tool_Handler: No available pgsql connection for target '%s'\n", resolved_target.c_str()); return NULL; }