mirror of https://github.com/sysown/proxysql
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
257 lines
8.7 KiB
257 lines
8.7 KiB
#!/usr/bin/env python3
|
|
"""
|
|
Two-Phase Database Discovery
|
|
|
|
The Agent (via Claude Code) performs both phases:
|
|
1. Calls discovery.run_static to trigger ProxySQL's static harvest
|
|
2. Performs LLM semantic analysis using catalog data
|
|
|
|
This script is a wrapper that launches Claude Code with the prompts.
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
import json
|
|
import os
|
|
import subprocess
|
|
|
|
# Script directory
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
def load_prompt(filename):
|
|
"""Load prompt from file"""
|
|
path = os.path.join(SCRIPT_DIR, "prompts", filename)
|
|
with open(path, "r") as f:
|
|
return f.read()
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Two-Phase Database Discovery using Claude Code",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Discovery all schemas
|
|
%(prog)s --mcp-config mcp_config.json --target-id tap_mysql_default --schema test
|
|
|
|
# Discovery specific schema
|
|
%(prog)s --mcp-config mcp_config.json --target-id tap_mysql_default --schema sales
|
|
|
|
# Discovery specific schema (REQUIRED)
|
|
%(prog)s --mcp-config mcp_config.json --target-id tap_pgsql_default --schema public
|
|
|
|
# With custom model
|
|
%(prog)s --mcp-config mcp_config.json --target-id tap_mysql_default --schema sales --model claude-3-opus-20240229
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--mcp-config",
|
|
required=True,
|
|
help="Path to MCP server configuration JSON"
|
|
)
|
|
parser.add_argument(
|
|
"--schema",
|
|
required=True,
|
|
help="Schema/database to discover (REQUIRED)"
|
|
)
|
|
parser.add_argument(
|
|
"--target-id",
|
|
required=True,
|
|
help="MCP target_id to use for static harvest and catalog/LLM tools (REQUIRED)"
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
default="claude-3.5-sonnet",
|
|
help="Claude model to use (default: claude-3.5-sonnet)"
|
|
)
|
|
parser.add_argument(
|
|
"--catalog-path",
|
|
default="mcp_catalog.db",
|
|
help="Path to SQLite catalog database (default: mcp_catalog.db)"
|
|
)
|
|
parser.add_argument(
|
|
"--run-id",
|
|
type=int,
|
|
help="Run ID from Phase 1 static harvest (required if not using auto-fetch)"
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
help="Optional: Path to save discovery summary (DEPRECATED - all data in catalog)"
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="Show what would be done without executing"
|
|
)
|
|
parser.add_argument(
|
|
"--dangerously-skip-permissions",
|
|
action="store_true",
|
|
help="Bypass all permission checks (use only in trusted environments)"
|
|
)
|
|
parser.add_argument(
|
|
"--mcp-only",
|
|
action="store_true",
|
|
default=True,
|
|
help="Restrict to MCP tools only (disable Bash/Edit/Write - default: True)"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Determine run_id
|
|
run_id = None
|
|
if args.run_id:
|
|
run_id = args.run_id
|
|
else:
|
|
# Try to get the latest run_id from the static harvest output
|
|
import subprocess
|
|
import json as json_module
|
|
try:
|
|
# Run static harvest and parse the output to get run_id
|
|
endpoint = os.getenv("PROXYSQL_MCP_ENDPOINT", "https://127.0.0.1:6071/mcp/query")
|
|
harvest_query = {
|
|
"jsonrpc": "2.0",
|
|
"id": 1,
|
|
"method": "tools/call",
|
|
"params": {
|
|
"name": "discovery.run_static",
|
|
"arguments": {
|
|
"target_id": args.target_id,
|
|
"schema_filter": args.schema
|
|
}
|
|
}
|
|
}
|
|
result = subprocess.run(
|
|
["curl", "-k", "-s", "-X", "POST", endpoint,
|
|
"-H", "Content-Type: application/json",
|
|
"-d", json_module.dumps(harvest_query)],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
response = json_module.loads(result.stdout)
|
|
if response.get("result") and response["result"].get("content"):
|
|
content = response["result"]["content"][0]["text"]
|
|
harvest_data = json_module.loads(content)
|
|
run_id = harvest_data.get("run_id")
|
|
else:
|
|
run_id = None
|
|
except Exception as e:
|
|
print(f"Warning: Could not fetch latest run_id: {e}", file=sys.stderr)
|
|
print(f"Debug: {result.stdout[:500]}", file=sys.stderr)
|
|
run_id = None
|
|
|
|
if not run_id:
|
|
print("Error: Could not determine run_id.", file=sys.stderr)
|
|
print("Either:")
|
|
print(" 1. Run: ./static_harvest.sh --target-id <target_id> --schema <your_schema> first")
|
|
print(" 2. Or use: ./two_phase_discovery.py --run-id <run_id> --target-id <target_id> --schema <schema>")
|
|
sys.exit(1)
|
|
|
|
print(f"[*] Using run_id: {run_id} for target_id: {args.target_id}")
|
|
|
|
# Load prompts
|
|
try:
|
|
system_prompt = load_prompt("two_phase_discovery_prompt.md")
|
|
user_prompt = load_prompt("two_phase_user_prompt.md")
|
|
except FileNotFoundError as e:
|
|
print(f"Error: Could not load prompt files: {e}", file=sys.stderr)
|
|
print(f"Make sure prompts are in: {os.path.join(SCRIPT_DIR, 'prompts')}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Replace placeholders in user prompt
|
|
schema_filter = args.schema if args.schema else "all schemas"
|
|
user_prompt = user_prompt.replace("<USE_THE_PROVIDED_RUN_ID>", str(run_id))
|
|
user_prompt = user_prompt.replace("<TARGET_ID>", args.target_id)
|
|
user_prompt = user_prompt.replace("<MODEL_NAME_HERE>", args.model)
|
|
user_prompt = user_prompt.replace("<SCHEMA_FILTER>", schema_filter)
|
|
|
|
# Dry run mode
|
|
if args.dry_run:
|
|
print("[DRY RUN] Two-Phase Database Discovery")
|
|
print(f" MCP Config: {args.mcp_config}")
|
|
print(f" Schema: {schema_filter}")
|
|
print(f" Target ID: {args.target_id}")
|
|
print(f" Model: {args.model}")
|
|
print(f" Catalog Path: {args.catalog_path}")
|
|
print()
|
|
print("System prompt:")
|
|
print(" " + "\n ".join(system_prompt.split("\n")[:10]))
|
|
print(" ...")
|
|
print()
|
|
print("User prompt:")
|
|
print(" " + "\n ".join(user_prompt.split("\n")[:10]))
|
|
print(" ...")
|
|
return 0
|
|
|
|
# Check if claude command is available
|
|
try:
|
|
result = subprocess.run(
|
|
["claude", "--version"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5
|
|
)
|
|
if result.returncode != 0:
|
|
raise FileNotFoundError
|
|
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
print("Error: 'claude' command not found. Please install Claude Code CLI.", file=sys.stderr)
|
|
print(" Visit: https://claude.ai/download", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Launch Claude Code with the prompts
|
|
print("[*] Launching Claude Code for two-phase discovery...")
|
|
print(f" Schema: {schema_filter}")
|
|
print(f" Target ID: {args.target_id}")
|
|
print(f" Model: {args.model}")
|
|
print(f" Catalog: {args.catalog_path}")
|
|
print(f" MCP Config: {args.mcp_config}")
|
|
print()
|
|
|
|
# Create temporary files for prompts
|
|
import tempfile
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as system_file:
|
|
system_file.write(system_prompt)
|
|
system_path = system_file.name
|
|
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as user_file:
|
|
user_file.write(user_prompt)
|
|
user_path = user_file.name
|
|
|
|
try:
|
|
# Build claude command
|
|
# Pass prompt via stdin since it can be very long
|
|
claude_cmd = [
|
|
"claude",
|
|
"--mcp-config", args.mcp_config,
|
|
"--system-prompt", system_path,
|
|
"--print", # Non-interactive mode
|
|
]
|
|
|
|
# Add permission mode - always use dangerously-skip-permissions for headless MCP operation
|
|
# The permission-mode dontAsk doesn't work correctly with MCP tools
|
|
claude_cmd.extend(["--dangerously-skip-permissions"])
|
|
|
|
# Restrict to MCP tools only (disable Bash/Edit/Write) to enforce NO FILES rule
|
|
if args.mcp_only:
|
|
claude_cmd.extend(["--allowed-tools", ""]) # Empty string = disable all built-in tools
|
|
|
|
# Execute claude with prompt via stdin
|
|
with open(user_path, "r") as user_file:
|
|
result = subprocess.run(claude_cmd, stdin=user_file)
|
|
sys.exit(result.returncode)
|
|
|
|
finally:
|
|
# Clean up temporary files
|
|
try:
|
|
os.unlink(system_path)
|
|
except:
|
|
pass
|
|
try:
|
|
os.unlink(user_path)
|
|
except:
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|