mirror of https://github.com/sysown/proxysql
New scripts for running multiple TAP groups in parallel:
- run-multi-group.bash: Orchestrates parallel execution of multiple TAP groups
* RUN_ID: Links all groups in a single test run (e.g., commit SHA)
* TAP_GROUPS: Space-separated list of groups to run
* PARALLEL_JOBS: Limit concurrent groups (default: unlimited)
* TIMEOUT_MINUTES: Per-group timeout (default: 60)
* EXIT_ON_FIRST_FAIL: Stop on first failure (default: 0)
* AUTO_CLEANUP: Destroy successful groups automatically (default: 0)
* SKIP_CLUSTER_START: Skip ProxySQL cluster init (default: 0)
- destroy-multi-group.bash: Bulk cleanup for a specific RUN_ID
* Destroys all infrastructures matching *-{RUN_ID}
* Can target specific groups or auto-discover from log directories
Each group gets fully isolated infrastructure:
- INFRA_ID: {TAP_GROUP}-{RUN_ID}
- Network: {INFRA_ID}_backend
- Per-group logs in ci_infra_logs/{INFRA_ID}/
Fixes #5463
pull/5484/head
parent
679d758e24
commit
472e3644f7
@ -0,0 +1,145 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
#
|
||||
# Destroy Multiple TAP Groups for a Specific RUN_ID
|
||||
# Usage:
|
||||
# RUN_ID="abc123" ./destroy-multi-group.bash
|
||||
#
|
||||
# Optional:
|
||||
# TAP_GROUPS="group1 group2" # Only destroy specific groups for this RUN_ID
|
||||
# FORCE=1 # Skip confirmation
|
||||
#
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
|
||||
|
||||
# Configuration
|
||||
export WORKSPACE="${WORKSPACE:-${REPO_ROOT}}"
|
||||
RUN_ID="${RUN_ID:-}"
|
||||
TAP_GROUPS="${TAP_GROUPS:-}"
|
||||
FORCE="${FORCE:-0}"
|
||||
|
||||
# Validate required variables
|
||||
if [ -z "${RUN_ID}" ]; then
|
||||
echo "ERROR: RUN_ID is not set."
|
||||
echo "Usage: RUN_ID=<id> [TAP_GROUPS='group1 group2'] ${0}"
|
||||
echo ""
|
||||
echo "This will destroy all infrastructures matching '*-${RUN_ID}'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Determine which groups to destroy
|
||||
if [ -n "${TAP_GROUPS}" ]; then
|
||||
# Use specified groups
|
||||
read -ra GROUPS <<< "${TAP_GROUPS}"
|
||||
MODE="specific"
|
||||
else
|
||||
# Auto-discover groups by finding matching directories
|
||||
MODE="auto"
|
||||
GROUPS=()
|
||||
|
||||
# Look for log directories matching *-${RUN_ID}
|
||||
LOGS_PATH="${WORKSPACE}/ci_infra_logs"
|
||||
if [ -d "${LOGS_PATH}" ]; then
|
||||
for dir in "${LOGS_PATH}"/*; do
|
||||
if [ -d "${dir}" ]; then
|
||||
dir_name=$(basename "${dir}")
|
||||
if [[ "${dir_name}" == *"-${RUN_ID}" ]]; then
|
||||
# Extract group name from INFRA_ID
|
||||
group_name="${dir_name%-${RUN_ID}}"
|
||||
GROUPS+=("${group_name}")
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
TOTAL_GROUPS=${#GROUPS[@]}
|
||||
|
||||
if [ "${TOTAL_GROUPS}" -eq 0 ]; then
|
||||
echo "No groups found to destroy for RUN_ID: ${RUN_ID}"
|
||||
echo "Usage: RUN_ID=<id> ${0}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "=========================================="
|
||||
echo "Destroy Multiple TAP Groups"
|
||||
echo "=========================================="
|
||||
echo "RUN_ID: ${RUN_ID}"
|
||||
echo "MODE: ${MODE}"
|
||||
echo "GROUPS: ${GROUPS[*]}"
|
||||
echo "=========================================="
|
||||
|
||||
# Confirmation prompt (unless FORCE=1)
|
||||
if [ "${FORCE}" -eq 0 ]; then
|
||||
echo ""
|
||||
echo "This will destroy the following ${TOTAL_GROUPS} infrastructure(s):"
|
||||
for group in "${GROUPS[@]}"; do
|
||||
echo " - ${group}-${RUN_ID}"
|
||||
done
|
||||
echo ""
|
||||
read -p "Are you sure? [y/N] " -n 1 -r
|
||||
echo ""
|
||||
if [[ ! ${REPLY} =~ ^[Yy]$ ]]; then
|
||||
echo "Aborted."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Track results
|
||||
declare -A DESTROY_RESULTS
|
||||
SUCCESS_COUNT=0
|
||||
FAIL_COUNT=0
|
||||
|
||||
# Destroy each group's infrastructure
|
||||
for group in "${GROUPS[@]}"; do
|
||||
infra_id="${group}-${RUN_ID}"
|
||||
echo ""
|
||||
echo ">>> Destroying: ${group} (INFRA_ID: ${infra_id})"
|
||||
|
||||
export INFRA_ID="${infra_id}"
|
||||
export TAP_GROUP="${group}"
|
||||
|
||||
# First stop ProxySQL
|
||||
if "${SCRIPT_DIR}/stop-proxysql-isolated.bash" >/dev/null 2>&1; then
|
||||
echo " Stopped ProxySQL"
|
||||
fi
|
||||
|
||||
# Then destroy backends
|
||||
if "${SCRIPT_DIR}/destroy-infras.bash" >/dev/null 2>&1; then
|
||||
echo " ✓ ${group} destroyed"
|
||||
DESTROY_RESULTS["${group}"]=0
|
||||
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
|
||||
else
|
||||
echo " ✗ ${group} failed to destroy (may already be cleaned up)"
|
||||
DESTROY_RESULTS["${group}"]=1
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Also clean up the multi-group results directory if it exists
|
||||
MULTI_GROUP_DIR="${WORKSPACE}/ci_infra_logs/multi-group-${RUN_ID}"
|
||||
if [ -d "${MULTI_GROUP_DIR}" ]; then
|
||||
echo ""
|
||||
echo ">>> Removing multi-group results directory: ${MULTI_GROUP_DIR}"
|
||||
rm -rf "${MULTI_GROUP_DIR}"
|
||||
echo " ✓ Results directory removed"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " DESTROY SUMMARY "
|
||||
echo "=========================================="
|
||||
echo "TOTAL: ${TOTAL_GROUPS}"
|
||||
echo "SUCCESS: ${SUCCESS_COUNT}"
|
||||
echo "FAILED: ${FAIL_COUNT}"
|
||||
echo "=========================================="
|
||||
|
||||
if [ "${FAIL_COUNT}" -eq 0 ]; then
|
||||
echo ">>> All groups destroyed successfully"
|
||||
exit 0
|
||||
else
|
||||
echo ">>> Some groups failed to destroy (may already be cleaned up)"
|
||||
exit 0 # Exit 0 since cleanup is best-effort
|
||||
fi
|
||||
@ -0,0 +1,287 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
#
|
||||
# Run Multiple TAP Groups in Parallel
|
||||
# Usage:
|
||||
# RUN_ID="abc123" \
|
||||
# TAP_GROUPS="legacy-g1 legacy-g2 ai-g1 mysql84-g1" \
|
||||
# ./run-multi-group.bash
|
||||
#
|
||||
# Optional environment variables:
|
||||
# PARALLEL_JOBS=4 # Max parallel groups (default: unlimited)
|
||||
# TIMEOUT_MINUTES=60 # Hard timeout per group (default: 60)
|
||||
# EXIT_ON_FIRST_FAIL=0 # Stop on first failure (default: 0)
|
||||
# AUTO_CLEANUP=0 # Auto cleanup successful groups (default: 0)
|
||||
# SKIP_CLUSTER_START=1 # Skip ProxySQL cluster initialization (default: 0)
|
||||
#
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
|
||||
|
||||
# Configuration
|
||||
export WORKSPACE="${WORKSPACE:-${REPO_ROOT}}"
|
||||
RUN_ID="${RUN_ID:-$(date +%s)}"
|
||||
TAP_GROUPS="${TAP_GROUPS:-}"
|
||||
PARALLEL_JOBS="${PARALLEL_JOBS:-0}" # 0 = unlimited
|
||||
TIMEOUT_MINUTES="${TIMEOUT_MINUTES:-60}"
|
||||
EXIT_ON_FIRST_FAIL="${EXIT_ON_FIRST_FAIL:-0}"
|
||||
AUTO_CLEANUP="${AUTO_CLEANUP:-0}"
|
||||
SKIP_CLUSTER_START="${SKIP_CLUSTER_START:-0}"
|
||||
|
||||
# Validate required variables
|
||||
if [ -z "${TAP_GROUPS}" ]; then
|
||||
echo "ERROR: TAP_GROUPS is not set."
|
||||
echo "Usage: RUN_ID=<id> TAP_GROUPS='group1 group2' ${0}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Convert TAP_GROUPS to array
|
||||
read -ra GROUPS <<< "${TAP_GROUPS}"
|
||||
TOTAL_GROUPS=${#GROUPS[@]}
|
||||
|
||||
if [ "${TOTAL_GROUPS}" -eq 0 ]; then
|
||||
echo "ERROR: No TAP groups specified."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=========================================="
|
||||
echo "Parallel TAP Group Execution"
|
||||
echo "=========================================="
|
||||
echo "RUN_ID: ${RUN_ID}"
|
||||
echo "TAP_GROUPS: ${TAP_GROUPS}"
|
||||
echo "PARALLEL_JOBS: ${PARALLEL_JOBS}"
|
||||
echo "TIMEOUT_MINUTES: ${TIMEOUT_MINUTES}"
|
||||
echo "EXIT_ON_FIRST_FAIL: ${EXIT_ON_FIRST_FAIL}"
|
||||
echo "AUTO_CLEANUP: ${AUTO_CLEANUP}"
|
||||
echo "SKIP_CLUSTER_START: ${SKIP_CLUSTER_START}"
|
||||
echo "=========================================="
|
||||
|
||||
# Create results directory
|
||||
RESULTS_DIR="${WORKSPACE}/ci_infra_logs/multi-group-${RUN_ID}"
|
||||
mkdir -p "${RESULTS_DIR}"
|
||||
|
||||
# Arrays to track job PIDs and their associated groups
|
||||
declare -a JOB_PIDS=()
|
||||
declare -A GROUP_FOR_PID
|
||||
declare -A PID_FOR_GROUP
|
||||
declare -A EXIT_CODES
|
||||
declare -A START_TIMES
|
||||
declare -A END_TIMES
|
||||
|
||||
# Cleanup function for interrupted runs
|
||||
cleanup_on_interrupt() {
|
||||
echo ""
|
||||
echo ">>> INTERRUPT received - cleaning up running jobs..."
|
||||
for pid in "${JOB_PIDS[@]}"; do
|
||||
kill -TERM "${pid}" 2>/dev/null || true
|
||||
done
|
||||
sleep 2
|
||||
for pid in "${JOB_PIDS[@]}"; do
|
||||
kill -KILL "${pid}" 2>/dev/null || true
|
||||
done
|
||||
exit 130
|
||||
}
|
||||
trap cleanup_on_interrupt INT TERM
|
||||
|
||||
# Function to run a single group
|
||||
run_single_group() {
|
||||
local group="${1}"
|
||||
local infra_id="${group}-${RUN_ID}"
|
||||
local log_file="${RESULTS_DIR}/${group}.log"
|
||||
local start_time end_time duration
|
||||
|
||||
start_time=$(date +%s)
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] STARTING: ${group} (INFRA_ID: ${infra_id})" | tee -a "${log_file}"
|
||||
|
||||
# Export variables for the child process
|
||||
export INFRA_ID="${infra_id}"
|
||||
export TAP_GROUP="${group}"
|
||||
|
||||
# Run ensure-infras + run-tests-isolated with timeout
|
||||
# Note: We don't run cleanup here - let the user decide when to destroy
|
||||
local exit_code=0
|
||||
|
||||
if ! timeout "${TIMEOUT_MINUTES}m" bash -c "
|
||||
set -euo pipefail
|
||||
export INFRA_ID='${infra_id}'
|
||||
export TAP_GROUP='${group}'
|
||||
export WORKSPACE='${WORKSPACE}'
|
||||
export SKIP_CLUSTER_START='${SKIP_CLUSTER_START}'
|
||||
|
||||
echo '[$(date '+%Y-%m-%d %H:%M:%S')] Setting up infrastructure...' | tee -a '${log_file}'
|
||||
if ! '${SCRIPT_DIR}/ensure-infras.bash' >> '${log_file}' 2>&1; then
|
||||
echo '[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: Failed to set up infrastructure' | tee -a '${log_file}'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo '[$(date '+%Y-%m-%d %H:%M:%S')] Running tests...' | tee -a '${log_file}'
|
||||
if ! '${SCRIPT_DIR}/run-tests-isolated.bash' >> '${log_file}' 2>&1; then
|
||||
echo '[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: Tests failed' | tee -a '${log_file}'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo '[$(date '+%Y-%m-%d %H:%M:%S')] Tests completed successfully' | tee -a '${log_file}'
|
||||
"; then
|
||||
exit_code=$?
|
||||
if [ "${exit_code}" -eq 124 ]; then
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] TIMEOUT: ${group} after ${TIMEOUT_MINUTES} minutes" | tee -a "${log_file}"
|
||||
exit_code=124
|
||||
else
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] FAILED: ${group} (exit code: ${exit_code})" | tee -a "${log_file}"
|
||||
fi
|
||||
else
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] SUCCESS: ${group}" | tee -a "${log_file}"
|
||||
fi
|
||||
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time - start_time))
|
||||
|
||||
# Write result file
|
||||
cat > "${RESULTS_DIR}/${group}.result" << EOF
|
||||
GROUP=${group}
|
||||
INFRA_ID=${infra_id}
|
||||
EXIT_CODE=${exit_code}
|
||||
DURATION=${duration}
|
||||
START_TIME=${start_time}
|
||||
END_TIME=${end_time}
|
||||
LOG_FILE=${log_file}
|
||||
EOF
|
||||
|
||||
# Auto-cleanup successful runs if enabled
|
||||
if [ "${exit_code}" -eq 0 ] && [ "${AUTO_CLEANUP}" -eq 1 ]; then
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Auto-cleanup: ${group}" | tee -a "${log_file}"
|
||||
INFRA_ID="${infra_id}" TAP_GROUP="${group}" "${SCRIPT_DIR}/stop-proxysql-isolated.bash" >> "${log_file}" 2>&1 || true
|
||||
INFRA_ID="${infra_id}" TAP_GROUP="${group}" "${SCRIPT_DIR}/destroy-infras.bash" >> "${log_file}" 2>&1 || true
|
||||
fi
|
||||
|
||||
return "${exit_code}"
|
||||
}
|
||||
|
||||
# Main execution
|
||||
echo ">>> Starting parallel execution of ${TOTAL_GROUPS} groups..."
|
||||
START_TIME=$(date +%s)
|
||||
|
||||
# Track overall status
|
||||
OVERALL_FAILED=0
|
||||
JOBS_RUNNING=0
|
||||
|
||||
# Launch jobs
|
||||
for group in "${GROUPS[@]}"; do
|
||||
# Check if we should stop due to previous failure
|
||||
if [ "${EXIT_ON_FIRST_FAIL}" -eq 1 ] && [ "${OVERALL_FAILED}" -ne 0 ]; then
|
||||
echo ">>> Skipping ${group} due to previous failure"
|
||||
continue
|
||||
fi
|
||||
|
||||
# If PARALLEL_JOBS is set and we're at the limit, wait for a job to finish
|
||||
if [ "${PARALLEL_JOBS}" -gt 0 ] && [ "${JOBS_RUNNING}" -ge "${PARALLEL_JOBS}" ]; then
|
||||
echo ">>> Waiting for a job to finish (max ${PARALLEL_JOBS} parallel)..."
|
||||
wait -n || true
|
||||
JOBS_RUNNING=$((JOBS_RUNNING - 1))
|
||||
fi
|
||||
|
||||
# Start the job
|
||||
echo ">>> Launching: ${group}"
|
||||
run_single_group "${group}" &
|
||||
local_pid=$!
|
||||
JOB_PIDS+=("${local_pid}")
|
||||
GROUP_FOR_PID["${local_pid}"]="${group}"
|
||||
PID_FOR_GROUP["${group}"]="${local_pid}"
|
||||
JOBS_RUNNING=$((JOBS_RUNNING + 1))
|
||||
done
|
||||
|
||||
# Wait for all jobs to complete
|
||||
echo ">>> Waiting for all jobs to complete..."
|
||||
for pid in "${JOB_PIDS[@]}"; do
|
||||
group="${GROUP_FOR_PID[${pid}]}"
|
||||
if wait "${pid}"; then
|
||||
EXIT_CODES["${group}"]=0
|
||||
else
|
||||
EXIT_CODES["${group}"]=$?
|
||||
OVERALL_FAILED=1
|
||||
fi
|
||||
done
|
||||
|
||||
END_TIME=$(date +%s)
|
||||
TOTAL_DURATION=$((END_TIME - START_TIME))
|
||||
|
||||
# Generate summary report
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " EXECUTION SUMMARY "
|
||||
echo "=========================================="
|
||||
printf "%-25s %10s %12s\n" "Group" "Duration" "Status"
|
||||
echo "------------------------------------------"
|
||||
|
||||
for group in "${GROUPS[@]}"; do
|
||||
exit_code="${EXIT_CODES[${group}]:-1}"
|
||||
result_file="${RESULTS_DIR}/${group}.result"
|
||||
|
||||
if [ -f "${result_file}" ]; then
|
||||
duration=$(grep "^DURATION=" "${result_file}" | cut -d= -f2)
|
||||
duration_min=$((duration / 60))
|
||||
duration_sec=$((duration % 60))
|
||||
duration_str="${duration_min}m${duration_sec}s"
|
||||
else
|
||||
duration_str="N/A"
|
||||
fi
|
||||
|
||||
if [ "${exit_code}" -eq 0 ]; then
|
||||
status="✓ PASS"
|
||||
elif [ "${exit_code}" -eq 124 ]; then
|
||||
status="✗ TIMEOUT"
|
||||
else
|
||||
status="✗ FAIL"
|
||||
fi
|
||||
|
||||
printf "%-25s %10s %12s\n" "${group}" "${duration_str}" "${status}"
|
||||
done
|
||||
|
||||
echo "------------------------------------------"
|
||||
echo "TOTAL TIME: $((TOTAL_DURATION / 60))m$((TOTAL_DURATION % 60))s"
|
||||
echo "=========================================="
|
||||
|
||||
# Summary of results
|
||||
PASS_COUNT=0
|
||||
FAIL_COUNT=0
|
||||
TIMEOUT_COUNT=0
|
||||
|
||||
for group in "${GROUPS[@]}"; do
|
||||
exit_code="${EXIT_CODES[${group}]:-1}"
|
||||
if [ "${exit_code}" -eq 0 ]; then
|
||||
PASS_COUNT=$((PASS_COUNT + 1))
|
||||
elif [ "${exit_code}" -eq 124 ]; then
|
||||
TIMEOUT_COUNT=$((TIMEOUT_COUNT + 1))
|
||||
else
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "PASSED: ${PASS_COUNT}/${TOTAL_GROUPS}"
|
||||
echo "FAILED: ${FAIL_COUNT}/${TOTAL_GROUPS}"
|
||||
echo "TIMEOUT: ${TIMEOUT_COUNT}/${TOTAL_GROUPS}"
|
||||
echo ""
|
||||
echo "Results directory: ${RESULTS_DIR}"
|
||||
echo ""
|
||||
|
||||
# Print log locations for failed groups
|
||||
if [ "${FAIL_COUNT}" -gt 0 ] || [ "${TIMEOUT_COUNT}" -gt 0 ]; then
|
||||
echo "Failed/Timed out group logs:"
|
||||
for group in "${GROUPS[@]}"; do
|
||||
exit_code="${EXIT_CODES[${group}]:-1}"
|
||||
if [ "${exit_code}" -ne 0 ]; then
|
||||
echo " ${group}: ${RESULTS_DIR}/${group}.log"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Exit with appropriate code
|
||||
if [ "${OVERALL_FAILED}" -eq 0 ]; then
|
||||
echo ">>> All groups passed!"
|
||||
exit 0
|
||||
else
|
||||
echo ">>> Some groups failed. Check logs above."
|
||||
exit 1
|
||||
fi
|
||||
Loading…
Reference in new issue