You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
proxysql/scripts/mcp/test_nl2sql_e2e.sh

298 lines
7.9 KiB

#!/bin/bash
#
# @file test_nl2sql_e2e.sh
# @brief End-to-end NL2SQL testing with live LLMs
#
# Tests complete workflow from natural language to executed SQL
#
# Prerequisites:
# - Running ProxySQL with NL2SQL enabled
# - Ollama running on localhost:11434 (or configured LLM)
# - Test database schema
#
# Usage:
# ./test_nl2sql_e2e.sh [--mock|--live]
#
# @date 2025-01-16
set -e
# ============================================================================
# Configuration
# ============================================================================
PROXYSQL_ADMIN_HOST=${PROXYSQL_ADMIN_HOST:-127.0.0.1}
PROXYSQL_ADMIN_PORT=${PROXYSQL_ADMIN_PORT:-6032}
PROXYSQL_HOST=${PROXYSQL_HOST:-127.0.0.1}
PROXYSQL_PORT=${PROXYSQL_PORT:-6033}
PROXYSQL_USER=${PROXYSQL_USER:-root}
PROXYSQL_PASSWORD=${PROXYSQL_PASSWORD:-}
TEST_SCHEMA=${TEST_SCHEMA:-test_nl2sql}
LLM_MODE=${1:---live} # --mock or --live
# Color output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Test counters
TOTAL=0
PASSED=0
FAILED=0
SKIPPED=0
# ============================================================================
# Helper Functions
# ============================================================================
#
# @brief Print section header
# @param $1 Section name
#
print_section() {
echo -e "\n${BLUE}========================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}========================================${NC}\n"
}
#
# @brief Run a single test
# @param $1 Test name
# @param $2 NL2SQL query
# @param $3 Expected SQL pattern (regex)
# @return 0 if test passes, 1 if fails
#
run_test() {
local test_name="$1"
local nl2sql_query="$2"
local expected_pattern="$3"
TOTAL=$((TOTAL + 1))
echo -e "${YELLOW}Test $TOTAL: $test_name${NC}"
echo " Query: $nl2sql_query"
# For now, we'll use mock responses since NL2SQL is not fully integrated
# In Phase 2, this will execute real NL2SQL queries
local sql=""
local result=""
if [ "$LLM_MODE" = "--mock" ]; then
# Generate mock SQL based on query pattern
if [[ "$nl2sql_query" =~ "SELECT"|"select"|"Show"|"show" ]]; then
sql="SELECT * FROM"
elif [[ "$nl2sql_query" =~ "WHERE"|"where"|"Find"|"find" ]]; then
sql="SELECT * FROM WHERE"
elif [[ "$nl2sql_query" =~ "JOIN"|"join"|"with" ]]; then
sql="SELECT * FROM JOIN"
elif [[ "$nl2sql_query" =~ "COUNT"|"count"|"Count" ]]; then
sql="SELECT COUNT(*) FROM"
else
sql="SELECT"
fi
result="Mock: $sql"
else
# For live mode, we would execute the actual query
# This is not yet implemented
result="Live mode not yet implemented"
sql="SELECT"
fi
echo " Generated: $sql"
# Check if expected pattern exists
if echo "$sql" | grep -qiE "$expected_pattern"; then
echo -e " ${GREEN}PASSED${NC}"
PASSED=$((PASSED + 1))
return 0
else
echo -e " ${RED}FAILED: Expected pattern '$expected_pattern' not found${NC}"
FAILED=$((FAILED + 1))
return 1
fi
}
#
# @brief Execute MySQL command
# @param $1 Query to execute
#
mysql_exec() {
mysql -h $PROXYSQL_ADMIN_HOST -P $PROXYSQL_ADMIN_PORT -u admin -padmin \
-e "$1" 2>/dev/null || true
}
#
# @brief Setup test schema
#
setup_schema() {
print_section "Setting Up Test Schema"
# Create test database via admin
mysql_exec "CREATE DATABASE IF NOT EXISTS $TEST_SCHEMA"
# Create test tables
mysql_exec "CREATE TABLE IF NOT EXISTS $TEST_SCHEMA.customers (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100),
country VARCHAR(50),
created_at DATE
)"
mysql_exec "CREATE TABLE IF NOT EXISTS $TEST_SCHEMA.orders (
id INT PRIMARY KEY AUTO_INCREMENT,
customer_id INT,
total DECIMAL(10,2),
status VARCHAR(20),
FOREIGN KEY (customer_id) REFERENCES $TEST_SCHEMA.customers(id)
)"
# Insert test data
mysql_exec "INSERT INTO $TEST_SCHEMA.customers (name, country, created_at) VALUES
('Alice', 'USA', '2024-01-01'),
('Bob', 'UK', '2024-02-01'),
('Charlie', 'USA', '2024-03-01')
ON DUPLICATE KEY UPDATE name=name"
mysql_exec "INSERT INTO $TEST_SCHEMA.orders (customer_id, total, status) VALUES
(1, 100.00, 'completed'),
(2, 200.00, 'pending'),
(3, 150.00, 'completed')
ON DUPLICATE KEY UPDATE total=total"
echo -e "${GREEN}Test schema created${NC}"
}
#
# @brief Configure LLM mode
#
configure_llm() {
print_section "LLM Configuration: $LLM_MODE"
if [ "$LLM_MODE" = "--mock" ]; then
mysql_exec "SET mysql-have_sql_injection='false'" 2>/dev/null || true
echo -e "${GREEN}Using mocked LLM responses${NC}"
else
mysql_exec "SET mysql-have_sql_injection='false'" 2>/dev/null || true
echo -e "${GREEN}Using live LLM (ensure Ollama is running)${NC}"
# Check Ollama connectivity
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
echo -e "${GREEN}Ollama is accessible${NC}"
else
echo -e "${YELLOW}Warning: Ollama may not be running on localhost:11434${NC}"
fi
fi
}
# ============================================================================
# Test Cases
# ============================================================================
run_e2e_tests() {
print_section "Running End-to-End NL2SQL Tests"
# Test 1: Simple SELECT
run_test \
"Simple SELECT all customers" \
"NL2SQL: Show all customers" \
"SELECT.*customers"
# Test 2: SELECT with WHERE
run_test \
"SELECT with condition" \
"NL2SQL: Find customers from USA" \
"SELECT.*WHERE"
# Test 3: JOIN query
run_test \
"JOIN customers and orders" \
"NL2SQL: Show customer names with their order amounts" \
"SELECT.*JOIN"
# Test 4: Aggregation
run_test \
"COUNT aggregation" \
"NL2SQL: Count customers by country" \
"COUNT.*GROUP BY"
# Test 5: Sorting
run_test \
"ORDER BY" \
"NL2SQL: Show orders sorted by total amount" \
"SELECT.*ORDER BY"
# Test 6: Complex query
run_test \
"Complex aggregation" \
"NL2SQL: What is the average order total per country?" \
"AVG"
# Test 7: Date handling
run_test \
"Date filtering" \
"NL2SQL: Find customers created in 2024" \
"2024"
# Test 8: Subquery (may fail with simple models)
run_test \
"Subquery" \
"NL2SQL: Find customers with orders above average" \
"SELECT"
}
# ============================================================================
# Results Summary
# ============================================================================
print_summary() {
print_section "Test Summary"
echo "Total tests: $TOTAL"
echo -e "Passed: ${GREEN}$PASSED${NC}"
echo -e "Failed: ${RED}$FAILED${NC}"
echo -e "Skipped: ${YELLOW}$SKIPPED${NC}"
local pass_rate=0
if [ $TOTAL -gt 0 ]; then
pass_rate=$((PASSED * 100 / TOTAL))
fi
echo "Pass rate: $pass_rate%"
if [ $FAILED -eq 0 ]; then
echo -e "\n${GREEN}All tests passed!${NC}"
return 0
else
echo -e "\n${RED}Some tests failed${NC}"
return 1
fi
}
# ============================================================================
# Main
# ============================================================================
main() {
print_section "NL2SQL End-to-End Testing"
echo "Configuration:"
echo " ProxySQL: $PROXYSQL_HOST:$PROXYSQL_PORT"
echo " Admin: $PROXYSQL_ADMIN_HOST:$PROXYSQL_ADMIN_PORT"
echo " Schema: $TEST_SCHEMA"
echo " LLM Mode: $LLM_MODE"
# Setup
setup_schema
configure_llm
# Run tests
run_e2e_tests
# Summary
print_summary
}
# Run main
main "$@"