mcp: make /mcp/query self-healing when targets/backends appear after startup

Problem addressed: - MCP query endpoint could stay unusable with 'Tool Handler not initialized' after restart/reload flows. - This was triggered when Query_Tool_Handler could not build an executable pool at init time (for example profiles loaded before ONLINE servers), leaving runtime commands like 'LOAD MCP QUERY RULES FROM MEMORY' blocked behind a NULL query tool handler. - Users had to manually toggle MCP enablement to recover, which is the opposite of expected self-healing behavior. What this commit changes: 1) Query_Tool_Handler pool init is now resilient and idempotent - init_connection_pool() now starts with close() so reinitialization fully resets stale mysql/pgsql pool state before rebuilding from runtime profile + server tables. - If no executable targets are available, init_connection_pool() now returns success with a warning instead of hard failure. This allows the query tool handler (and /mcp/query endpoint) to stay initialized even before backends are ready. 2) Lazy auto-rebuild on first query usage - get_connection() and get_pgsql_connection() now: - refresh target registry before resolution, - attempt to use an existing pooled connection, - if unavailable, trigger a full pool rebuild (init_connection_pool()) and retry once. - This provides automatic recovery when hostgroups/servers/profiles are loaded or changed after MCP startup, without requiring manual MCP disable/enable. 3) Admin runtime rule load path attempts MCP recovery - load_mcp_query_rules_to_runtime() now detects NULL query tool handler and calls load_mcp_server() once before failing. - This turns a hard, immediate admin error into a self-recovery attempt consistent with MCP runtime semantics. Behavioral impact: - /mcp/query endpoint remains online even when there are temporarily zero executable targets. - As soon as compatible runtime targets/backends exist, run_sql_readonly/explain_sql can recover automatically on demand. - LOAD MCP QUERY RULES TO/FROM RUNTIME no longer fails immediately on first NULL handler condition; it retries after MCP server recovery. Validation performed: - Recompiled modified objects successfully: - lib/obj/Query_Tool_Handler.oo - lib/obj/ProxySQL_Admin.oo - Full TAP runtime test execution is not possible in this sandbox due blocked local TCP socket creation; validation should be run in the normal test environment where ProxySQL/MySQL/PGSQL are reachable.
2 months ago · 6a788e48c4
parent 998bd82387
commit 6a788e48c4
2 changed files with 72 additions and 19 deletions
--- a/lib/ProxySQL_Admin.cpp
+++ b/lib/ProxySQL_Admin.cpp
@ -8001,7 +8001,14 @@ char* ProxySQL_Admin::load_mcp_query_rules_to_runtime() {

 	if (!GloMCPH) return (char*)"MCP Handler not started: command impossible to run";
 	Query_Tool_Handler* qth = GloMCPH->query_tool_handler;
-	if (!qth) return (char*)"Query Tool Handler not initialized";
+	if (!qth) {
+		proxy_warning("MCP query rules load requested but Query Tool Handler is NULL, attempting MCP server self-recovery\n");
+		load_mcp_server();
+		qth = GloMCPH->query_tool_handler;
+		if (!qth) {
+			return (char*)"Query Tool Handler not initialized";
+		}
+	}

 	// Get the discovery schema catalog
 	Discovery_Schema* catalog = qth->get_catalog();
--- a/lib/Query_Tool_Handler.cpp
+++ b/lib/Query_Tool_Handler.cpp
@ -321,6 +321,8 @@ void Query_Tool_Handler::close() {
 }

 int Query_Tool_Handler::init_connection_pool() {
+	// Ensure re-initialization is idempotent when topology/auth changes at runtime.
+	close();
 	refresh_target_registry();

 	pthread_mutex_lock(&pool_lock);
@ -428,8 +430,8 @@ int Query_Tool_Handler::init_connection_pool() {

 	pthread_mutex_unlock(&pool_lock);
 	if ((pool_size + pg_pool_size) == 0) {
-		proxy_error("Query_Tool_Handler: No executable targets available\n");
-		return -1;
+		proxy_warning("Query_Tool_Handler: No executable targets available yet (handler remains initialized)\n");
+		return 0;
 	}

 	proxy_info(
@ -570,45 +572,89 @@ const Query_Tool_Handler::QueryTarget* Query_Tool_Handler::resolve_target(const
 }

 void* Query_Tool_Handler::get_connection(const std::string& target_id) {
+	const auto find_available_connection = [&](const std::string& resolved_target, const std::string& expected_auth_profile_id) -> void* {
+		pthread_mutex_lock(&pool_lock);
+		for (auto& conn : connection_pool) {
+			if (!conn.in_use && conn.target_id == resolved_target && conn.auth_profile_id == expected_auth_profile_id) {
+				conn.in_use = true;
+				void* mysql_ptr = conn.mysql;
+				pthread_mutex_unlock(&pool_lock);
+				return mysql_ptr;
+			}
+		}
+		pthread_mutex_unlock(&pool_lock);
+		return NULL;
+	};
+
+	refresh_target_registry();
 	const std::string resolved_target = target_id.empty() ? default_target_id : target_id;
 	const QueryTarget* target = resolve_target(resolved_target);
-	if (target == NULL) {
+	if (target == NULL || !target->executable) {
+		proxy_error("Query_Tool_Handler: target '%s' is unknown or not executable\n", resolved_target.c_str());
 		return NULL;
 	}

-	pthread_mutex_lock(&pool_lock);
+	void* mysql_ptr = find_available_connection(resolved_target, target->auth_profile_id);
+	if (mysql_ptr) {
+		return mysql_ptr;
+	}

-	for (auto& conn : connection_pool) {
-		if (!conn.in_use && conn.target_id == resolved_target && conn.auth_profile_id == target->auth_profile_id) {
-			conn.in_use = true;
-			pthread_mutex_unlock(&pool_lock);
-			return conn.mysql;
+	// Self-heal path: runtime targets/backends may have changed after handler startup.
+	if (init_connection_pool() == 0) {
+		refresh_target_registry();
+		const QueryTarget* refreshed_target = resolve_target(resolved_target);
+		if (refreshed_target && refreshed_target->executable) {
+			mysql_ptr = find_available_connection(resolved_target, refreshed_target->auth_profile_id);
+			if (mysql_ptr) {
+				return mysql_ptr;
+			}
 		}
 	}

-	pthread_mutex_unlock(&pool_lock);
 	proxy_error("Query_Tool_Handler: No available connection for target '%s'\n", resolved_target.c_str());
 	return NULL;
 }

 void* Query_Tool_Handler::get_pgsql_connection(const std::string& target_id) {
+	const auto find_available_pg_connection = [&](const std::string& resolved_target, const std::string& expected_auth_profile_id) -> void* {
+		pthread_mutex_lock(&pool_lock);
+		for (auto& conn : pgsql_connection_pool) {
+			if (!conn.in_use && conn.target_id == resolved_target && conn.auth_profile_id == expected_auth_profile_id) {
+				conn.in_use = true;
+				void* pgconn_ptr = conn.pgconn;
+				pthread_mutex_unlock(&pool_lock);
+				return pgconn_ptr;
+			}
+		}
+		pthread_mutex_unlock(&pool_lock);
+		return NULL;
+	};
+
+	refresh_target_registry();
 	const std::string resolved_target = target_id.empty() ? default_target_id : target_id;
 	const QueryTarget* target = resolve_target(resolved_target);
-	if (target == NULL) {
+	if (target == NULL || !target->executable) {
+		proxy_error("Query_Tool_Handler: target '%s' is unknown or not executable\n", resolved_target.c_str());
 		return NULL;
 	}

-	pthread_mutex_lock(&pool_lock);
+	void* pgconn_ptr = find_available_pg_connection(resolved_target, target->auth_profile_id);
+	if (pgconn_ptr) {
+		return pgconn_ptr;
+	}

-	for (auto& conn : pgsql_connection_pool) {
-		if (!conn.in_use && conn.target_id == resolved_target && conn.auth_profile_id == target->auth_profile_id) {
-			conn.in_use = true;
-			pthread_mutex_unlock(&pool_lock);
-			return conn.pgconn;
+	// Self-heal path: runtime targets/backends may have changed after handler startup.
+	if (init_connection_pool() == 0) {
+		refresh_target_registry();
+		const QueryTarget* refreshed_target = resolve_target(resolved_target);
+		if (refreshed_target && refreshed_target->executable) {
+			pgconn_ptr = find_available_pg_connection(resolved_target, refreshed_target->auth_profile_id);
+			if (pgconn_ptr) {
+				return pgconn_ptr;
+			}
 		}
 	}

-	pthread_mutex_unlock(&pool_lock);
 	proxy_error("Query_Tool_Handler: No available pgsql connection for target '%s'\n", resolved_target.c_str());
 	return NULL;
 }