diff --git a/scripts/mcp/test_nl2sql_e2e.sh b/scripts/mcp/test_nl2sql_e2e.sh new file mode 100755 index 000000000..4462b4d58 --- /dev/null +++ b/scripts/mcp/test_nl2sql_e2e.sh @@ -0,0 +1,297 @@ +#!/bin/bash +# +# @file test_nl2sql_e2e.sh +# @brief End-to-end NL2SQL testing with live LLMs +# +# Tests complete workflow from natural language to executed SQL +# +# Prerequisites: +# - Running ProxySQL with NL2SQL enabled +# - Ollama running on localhost:11434 (or configured LLM) +# - Test database schema +# +# Usage: +# ./test_nl2sql_e2e.sh [--mock|--live] +# +# @date 2025-01-16 + +set -e + +# ============================================================================ +# Configuration +# ============================================================================ + +PROXYSQL_ADMIN_HOST=${PROXYSQL_ADMIN_HOST:-127.0.0.1} +PROXYSQL_ADMIN_PORT=${PROXYSQL_ADMIN_PORT:-6032} +PROXYSQL_HOST=${PROXYSQL_HOST:-127.0.0.1} +PROXYSQL_PORT=${PROXYSQL_PORT:-6033} +PROXYSQL_USER=${PROXYSQL_USER:-root} +PROXYSQL_PASSWORD=${PROXYSQL_PASSWORD:-} +TEST_SCHEMA=${TEST_SCHEMA:-test_nl2sql} +LLM_MODE=${1:---live} # --mock or --live + +# Color output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Test counters +TOTAL=0 +PASSED=0 +FAILED=0 +SKIPPED=0 + +# ============================================================================ +# Helper Functions +# ============================================================================ + +# +# @brief Print section header +# @param $1 Section name +# +print_section() { + echo -e "\n${BLUE}========================================${NC}" + echo -e "${BLUE}$1${NC}" + echo -e "${BLUE}========================================${NC}\n" +} + +# +# @brief Run a single test +# @param $1 Test name +# @param $2 NL2SQL query +# @param $3 Expected SQL pattern (regex) +# @return 0 if test passes, 1 if fails +# +run_test() { + local test_name="$1" + local nl2sql_query="$2" + local expected_pattern="$3" + + TOTAL=$((TOTAL + 1)) + + echo -e "${YELLOW}Test $TOTAL: $test_name${NC}" + echo " Query: $nl2sql_query" + + # For now, we'll use mock responses since NL2SQL is not fully integrated + # In Phase 2, this will execute real NL2SQL queries + local sql="" + local result="" + + if [ "$LLM_MODE" = "--mock" ]; then + # Generate mock SQL based on query pattern + if [[ "$nl2sql_query" =~ "SELECT"|"select"|"Show"|"show" ]]; then + sql="SELECT * FROM" + elif [[ "$nl2sql_query" =~ "WHERE"|"where"|"Find"|"find" ]]; then + sql="SELECT * FROM WHERE" + elif [[ "$nl2sql_query" =~ "JOIN"|"join"|"with" ]]; then + sql="SELECT * FROM JOIN" + elif [[ "$nl2sql_query" =~ "COUNT"|"count"|"Count" ]]; then + sql="SELECT COUNT(*) FROM" + else + sql="SELECT" + fi + result="Mock: $sql" + else + # For live mode, we would execute the actual query + # This is not yet implemented + result="Live mode not yet implemented" + sql="SELECT" + fi + + echo " Generated: $sql" + + # Check if expected pattern exists + if echo "$sql" | grep -qiE "$expected_pattern"; then + echo -e " ${GREEN}PASSED${NC}" + PASSED=$((PASSED + 1)) + return 0 + else + echo -e " ${RED}FAILED: Expected pattern '$expected_pattern' not found${NC}" + FAILED=$((FAILED + 1)) + return 1 + fi +} + +# +# @brief Execute MySQL command +# @param $1 Query to execute +# +mysql_exec() { + mysql -h $PROXYSQL_ADMIN_HOST -P $PROXYSQL_ADMIN_PORT -u admin -padmin \ + -e "$1" 2>/dev/null || true +} + +# +# @brief Setup test schema +# +setup_schema() { + print_section "Setting Up Test Schema" + + # Create test database via admin + mysql_exec "CREATE DATABASE IF NOT EXISTS $TEST_SCHEMA" + + # Create test tables + mysql_exec "CREATE TABLE IF NOT EXISTS $TEST_SCHEMA.customers ( + id INT PRIMARY KEY AUTO_INCREMENT, + name VARCHAR(100), + country VARCHAR(50), + created_at DATE + )" + + mysql_exec "CREATE TABLE IF NOT EXISTS $TEST_SCHEMA.orders ( + id INT PRIMARY KEY AUTO_INCREMENT, + customer_id INT, + total DECIMAL(10,2), + status VARCHAR(20), + FOREIGN KEY (customer_id) REFERENCES $TEST_SCHEMA.customers(id) + )" + + # Insert test data + mysql_exec "INSERT INTO $TEST_SCHEMA.customers (name, country, created_at) VALUES + ('Alice', 'USA', '2024-01-01'), + ('Bob', 'UK', '2024-02-01'), + ('Charlie', 'USA', '2024-03-01') + ON DUPLICATE KEY UPDATE name=name" + + mysql_exec "INSERT INTO $TEST_SCHEMA.orders (customer_id, total, status) VALUES + (1, 100.00, 'completed'), + (2, 200.00, 'pending'), + (3, 150.00, 'completed') + ON DUPLICATE KEY UPDATE total=total" + + echo -e "${GREEN}Test schema created${NC}" +} + +# +# @brief Configure LLM mode +# +configure_llm() { + print_section "LLM Configuration: $LLM_MODE" + + if [ "$LLM_MODE" = "--mock" ]; then + mysql_exec "SET mysql-have_sql_injection='false'" 2>/dev/null || true + echo -e "${GREEN}Using mocked LLM responses${NC}" + else + mysql_exec "SET mysql-have_sql_injection='false'" 2>/dev/null || true + echo -e "${GREEN}Using live LLM (ensure Ollama is running)${NC}" + + # Check Ollama connectivity + if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then + echo -e "${GREEN}Ollama is accessible${NC}" + else + echo -e "${YELLOW}Warning: Ollama may not be running on localhost:11434${NC}" + fi + fi +} + +# ============================================================================ +# Test Cases +# ============================================================================ + +run_e2e_tests() { + print_section "Running End-to-End NL2SQL Tests" + + # Test 1: Simple SELECT + run_test \ + "Simple SELECT all customers" \ + "NL2SQL: Show all customers" \ + "SELECT.*customers" + + # Test 2: SELECT with WHERE + run_test \ + "SELECT with condition" \ + "NL2SQL: Find customers from USA" \ + "SELECT.*WHERE" + + # Test 3: JOIN query + run_test \ + "JOIN customers and orders" \ + "NL2SQL: Show customer names with their order amounts" \ + "SELECT.*JOIN" + + # Test 4: Aggregation + run_test \ + "COUNT aggregation" \ + "NL2SQL: Count customers by country" \ + "COUNT.*GROUP BY" + + # Test 5: Sorting + run_test \ + "ORDER BY" \ + "NL2SQL: Show orders sorted by total amount" \ + "SELECT.*ORDER BY" + + # Test 6: Complex query + run_test \ + "Complex aggregation" \ + "NL2SQL: What is the average order total per country?" \ + "AVG" + + # Test 7: Date handling + run_test \ + "Date filtering" \ + "NL2SQL: Find customers created in 2024" \ + "2024" + + # Test 8: Subquery (may fail with simple models) + run_test \ + "Subquery" \ + "NL2SQL: Find customers with orders above average" \ + "SELECT" +} + +# ============================================================================ +# Results Summary +# ============================================================================ + +print_summary() { + print_section "Test Summary" + + echo "Total tests: $TOTAL" + echo -e "Passed: ${GREEN}$PASSED${NC}" + echo -e "Failed: ${RED}$FAILED${NC}" + echo -e "Skipped: ${YELLOW}$SKIPPED${NC}" + + local pass_rate=0 + if [ $TOTAL -gt 0 ]; then + pass_rate=$((PASSED * 100 / TOTAL)) + fi + echo "Pass rate: $pass_rate%" + + if [ $FAILED -eq 0 ]; then + echo -e "\n${GREEN}All tests passed!${NC}" + return 0 + else + echo -e "\n${RED}Some tests failed${NC}" + return 1 + fi +} + +# ============================================================================ +# Main +# ============================================================================ + +main() { + print_section "NL2SQL End-to-End Testing" + + echo "Configuration:" + echo " ProxySQL: $PROXYSQL_HOST:$PROXYSQL_PORT" + echo " Admin: $PROXYSQL_ADMIN_HOST:$PROXYSQL_ADMIN_PORT" + echo " Schema: $TEST_SCHEMA" + echo " LLM Mode: $LLM_MODE" + + # Setup + setup_schema + configure_llm + + # Run tests + run_e2e_tests + + # Summary + print_summary +} + +# Run main +main "$@"