#!/usr/bin/env python3 """ Two-Phase Database Discovery The Agent (via Claude Code) performs both phases: 1. Calls discovery.run_static to trigger ProxySQL's static harvest 2. Performs LLM semantic analysis using catalog data This script is a wrapper that launches Claude Code with the prompts. """ import argparse import sys import json import os import subprocess # Script directory SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) def load_prompt(filename): """Load prompt from file""" path = os.path.join(SCRIPT_DIR, "prompts", filename) with open(path, "r") as f: return f.read() def main(): parser = argparse.ArgumentParser( description="Two-Phase Database Discovery using Claude Code", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Discovery all schemas %(prog)s --mcp-config mcp_config.json --target-id tap_mysql_default --schema test # Discovery specific schema %(prog)s --mcp-config mcp_config.json --target-id tap_mysql_default --schema sales # Discovery specific schema (REQUIRED) %(prog)s --mcp-config mcp_config.json --target-id tap_pgsql_default --schema public # With custom model %(prog)s --mcp-config mcp_config.json --target-id tap_mysql_default --schema sales --model claude-3-opus-20240229 """ ) parser.add_argument( "--mcp-config", required=True, help="Path to MCP server configuration JSON" ) parser.add_argument( "--schema", required=True, help="Schema/database to discover (REQUIRED)" ) parser.add_argument( "--target-id", required=True, help="MCP target_id to use for static harvest and catalog/LLM tools (REQUIRED)" ) parser.add_argument( "--model", default="claude-3.5-sonnet", help="Claude model to use (default: claude-3.5-sonnet)" ) parser.add_argument( "--catalog-path", default="mcp_catalog.db", help="Path to SQLite catalog database (default: mcp_catalog.db)" ) parser.add_argument( "--run-id", type=int, help="Run ID from Phase 1 static harvest (required if not using auto-fetch)" ) parser.add_argument( "--output", help="Optional: Path to save discovery summary (DEPRECATED - all data in catalog)" ) parser.add_argument( "--dry-run", action="store_true", help="Show what would be done without executing" ) parser.add_argument( "--dangerously-skip-permissions", action="store_true", help="Bypass all permission checks (use only in trusted environments)" ) parser.add_argument( "--mcp-only", action="store_true", default=True, help="Restrict to MCP tools only (disable Bash/Edit/Write - default: True)" ) args = parser.parse_args() # Determine run_id run_id = None if args.run_id: run_id = args.run_id else: # Try to get the latest run_id from the static harvest output import subprocess import json as json_module try: # Run static harvest and parse the output to get run_id endpoint = os.getenv("PROXYSQL_MCP_ENDPOINT", "https://127.0.0.1:6071/mcp/query") harvest_query = { "jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": { "name": "discovery.run_static", "arguments": { "target_id": args.target_id, "schema_filter": args.schema } } } result = subprocess.run( ["curl", "-k", "-s", "-X", "POST", endpoint, "-H", "Content-Type: application/json", "-d", json_module.dumps(harvest_query)], capture_output=True, text=True, timeout=30 ) response = json_module.loads(result.stdout) if response.get("result") and response["result"].get("content"): content = response["result"]["content"][0]["text"] harvest_data = json_module.loads(content) run_id = harvest_data.get("run_id") else: run_id = None except Exception as e: print(f"Warning: Could not fetch latest run_id: {e}", file=sys.stderr) print(f"Debug: {result.stdout[:500]}", file=sys.stderr) run_id = None if not run_id: print("Error: Could not determine run_id.", file=sys.stderr) print("Either:") print(" 1. Run: ./static_harvest.sh --target-id --schema first") print(" 2. Or use: ./two_phase_discovery.py --run-id --target-id --schema ") sys.exit(1) print(f"[*] Using run_id: {run_id} for target_id: {args.target_id}") # Load prompts try: system_prompt = load_prompt("two_phase_discovery_prompt.md") user_prompt = load_prompt("two_phase_user_prompt.md") except FileNotFoundError as e: print(f"Error: Could not load prompt files: {e}", file=sys.stderr) print(f"Make sure prompts are in: {os.path.join(SCRIPT_DIR, 'prompts')}", file=sys.stderr) sys.exit(1) # Replace placeholders in user prompt schema_filter = args.schema if args.schema else "all schemas" user_prompt = user_prompt.replace("", str(run_id)) user_prompt = user_prompt.replace("", args.target_id) user_prompt = user_prompt.replace("", args.model) user_prompt = user_prompt.replace("", schema_filter) # Dry run mode if args.dry_run: print("[DRY RUN] Two-Phase Database Discovery") print(f" MCP Config: {args.mcp_config}") print(f" Schema: {schema_filter}") print(f" Target ID: {args.target_id}") print(f" Model: {args.model}") print(f" Catalog Path: {args.catalog_path}") print() print("System prompt:") print(" " + "\n ".join(system_prompt.split("\n")[:10])) print(" ...") print() print("User prompt:") print(" " + "\n ".join(user_prompt.split("\n")[:10])) print(" ...") return 0 # Check if claude command is available try: result = subprocess.run( ["claude", "--version"], capture_output=True, text=True, timeout=5 ) if result.returncode != 0: raise FileNotFoundError except (FileNotFoundError, subprocess.TimeoutExpired): print("Error: 'claude' command not found. Please install Claude Code CLI.", file=sys.stderr) print(" Visit: https://claude.ai/download", file=sys.stderr) sys.exit(1) # Launch Claude Code with the prompts print("[*] Launching Claude Code for two-phase discovery...") print(f" Schema: {schema_filter}") print(f" Target ID: {args.target_id}") print(f" Model: {args.model}") print(f" Catalog: {args.catalog_path}") print(f" MCP Config: {args.mcp_config}") print() # Create temporary files for prompts import tempfile with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as system_file: system_file.write(system_prompt) system_path = system_file.name with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as user_file: user_file.write(user_prompt) user_path = user_file.name try: # Build claude command # Pass prompt via stdin since it can be very long claude_cmd = [ "claude", "--mcp-config", args.mcp_config, "--system-prompt", system_path, "--print", # Non-interactive mode ] # Add permission mode - always use dangerously-skip-permissions for headless MCP operation # The permission-mode dontAsk doesn't work correctly with MCP tools claude_cmd.extend(["--dangerously-skip-permissions"]) # Restrict to MCP tools only (disable Bash/Edit/Write) to enforce NO FILES rule if args.mcp_only: claude_cmd.extend(["--allowed-tools", ""]) # Empty string = disable all built-in tools # Execute claude with prompt via stdin with open(user_path, "r") as user_file: result = subprocess.run(claude_cmd, stdin=user_file) sys.exit(result.returncode) finally: # Clean up temporary files try: os.unlink(system_path) except: pass try: os.unlink(user_path) except: pass if __name__ == "__main__": main()