mirror of https://github.com/sysown/proxysql
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
442 lines
15 KiB
442 lines
15 KiB
#!/bin/bash
|
|
set -euo pipefail
|
|
#
|
|
# Run Multiple TAP Groups in Parallel
|
|
# Usage:
|
|
# RUN_ID="abc123" \
|
|
# TAP_GROUPS="legacy-g1 legacy-g2 ai-g1 mysql84-g1" \
|
|
# ./run-multi-group.bash
|
|
#
|
|
# Optional environment variables:
|
|
# PARALLEL_JOBS=4 # Max parallel groups (default: unlimited)
|
|
# TIMEOUT_MINUTES=60 # Hard timeout per group (default: 60)
|
|
# EXIT_ON_FIRST_FAIL=0 # Stop on first failure (default: 0)
|
|
# AUTO_CLEANUP=0 # Auto cleanup successful groups (default: 0)
|
|
# SKIP_CLUSTER_START=1 # Skip ProxySQL cluster initialization (default: 0)
|
|
# COVERAGE=1 # Enable code coverage collection (default: 0)
|
|
# TAP_USE_NOISE=1 # Enable noise injection for race condition testing (default: 0)
|
|
# STAGGER_DELAY=5 # Seconds between group startups (default: 5)
|
|
#
|
|
# Coverage notes:
|
|
# - Requires ProxySQL to be compiled with COVERAGE=1 (adds --coverage flags)
|
|
# - Requires fastcov and genhtml to be available in containers
|
|
# - Coverage is collected regardless of test success/failure/timeout
|
|
# - Individual reports: ci_infra_logs/{INFRA_ID}/coverage-report/
|
|
# - Combined report: ci_infra_logs/multi-group-{RUN_ID}/coverage-report/
|
|
#
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
|
|
|
|
# Configuration
|
|
export WORKSPACE="${WORKSPACE:-${REPO_ROOT}}"
|
|
RUN_ID="${RUN_ID:-$(date +%s)}"
|
|
TAP_GROUPS="${TAP_GROUPS:-}"
|
|
PARALLEL_JOBS="${PARALLEL_JOBS:-2}" # Default: 2 parallel groups
|
|
TIMEOUT_MINUTES="${TIMEOUT_MINUTES:-60}"
|
|
EXIT_ON_FIRST_FAIL="${EXIT_ON_FIRST_FAIL:-0}"
|
|
AUTO_CLEANUP="${AUTO_CLEANUP:-0}"
|
|
SKIP_CLUSTER_START="${SKIP_CLUSTER_START:-0}"
|
|
COVERAGE="${COVERAGE:-0}"
|
|
TAP_USE_NOISE="${TAP_USE_NOISE:-0}"
|
|
|
|
# Validate required variables
|
|
if [ -z "${TAP_GROUPS}" ]; then
|
|
echo "ERROR: TAP_GROUPS is not set."
|
|
echo "Usage: RUN_ID=<id> TAP_GROUPS='group1 group2' ${0}"
|
|
exit 1
|
|
fi
|
|
|
|
# Count groups
|
|
TOTAL_GROUPS=0
|
|
for _ in ${TAP_GROUPS}; do
|
|
TOTAL_GROUPS=$((TOTAL_GROUPS + 1))
|
|
done
|
|
|
|
if [ "${TOTAL_GROUPS}" -eq 0 ]; then
|
|
echo "ERROR: No TAP groups specified."
|
|
exit 1
|
|
fi
|
|
|
|
echo "=========================================="
|
|
echo "Parallel TAP Group Execution"
|
|
echo "=========================================="
|
|
echo "RUN_ID: ${RUN_ID}"
|
|
echo "TAP_GROUPS: ${TAP_GROUPS}"
|
|
echo "PARALLEL_JOBS: ${PARALLEL_JOBS}"
|
|
echo "TIMEOUT_MINUTES: ${TIMEOUT_MINUTES}"
|
|
echo "EXIT_ON_FIRST_FAIL: ${EXIT_ON_FIRST_FAIL}"
|
|
echo "AUTO_CLEANUP: ${AUTO_CLEANUP}"
|
|
echo "SKIP_CLUSTER_START: ${SKIP_CLUSTER_START}"
|
|
echo "COVERAGE: ${COVERAGE}"
|
|
echo "TAP_USE_NOISE: ${TAP_USE_NOISE}"
|
|
echo "=========================================="
|
|
|
|
# Create results directory
|
|
RESULTS_DIR="${WORKSPACE}/ci_infra_logs/multi-group-${RUN_ID}"
|
|
mkdir -p "${RESULTS_DIR}"
|
|
|
|
# Arrays to track job PIDs and their associated groups
|
|
declare -a JOB_PIDS=()
|
|
declare -A GROUP_FOR_PID
|
|
declare -A PID_FOR_GROUP
|
|
declare -A EXIT_CODES
|
|
declare -A START_TIMES
|
|
declare -A END_TIMES
|
|
|
|
# Cleanup function for interrupted runs
|
|
cleanup_on_interrupt() {
|
|
echo ""
|
|
echo ">>> INTERRUPT received - cleaning up running jobs..."
|
|
for pid in "${JOB_PIDS[@]}"; do
|
|
kill -TERM "${pid}" 2>/dev/null || true
|
|
done
|
|
sleep 2
|
|
for pid in "${JOB_PIDS[@]}"; do
|
|
kill -KILL "${pid}" 2>/dev/null || true
|
|
done
|
|
exit 130
|
|
}
|
|
trap cleanup_on_interrupt INT TERM
|
|
|
|
# Function to run a single group
|
|
run_single_group() {
|
|
local group="${1}"
|
|
local group_index="${2}"
|
|
local infra_id="${group}-${RUN_ID}"
|
|
local log_file="${RESULTS_DIR}/${group}.log"
|
|
local start_time end_time duration
|
|
|
|
# Sequential delay to stagger infrastructure startup
|
|
# This prevents resource contention when running multiple groups in parallel
|
|
# Each group starts STAGGER_DELAY seconds after the previous one
|
|
local STAGGER_DELAY="${STAGGER_DELAY:-5}"
|
|
local delay=$((group_index * STAGGER_DELAY))
|
|
if [ "${delay}" -gt 0 ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${group}: Waiting ${delay}s to stagger startup (index=${group_index}, delay=${STAGGER_DELAY}s per group)..." | tee -a "${log_file}"
|
|
sleep "${delay}"
|
|
fi
|
|
|
|
start_time=$(date +%s)
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] STARTING: ${group} (INFRA_ID: ${infra_id})" | tee -a "${log_file}"
|
|
|
|
# Export variables for the child process
|
|
export INFRA_ID="${infra_id}"
|
|
export TAP_GROUP="${group}"
|
|
|
|
# Run ensure-infras + run-tests-isolated with timeout
|
|
# Note: We don't run cleanup here - let the user decide when to destroy
|
|
local exit_code=0
|
|
|
|
# Run tests with timeout - capture exit code properly
|
|
local cmd_exit_code=0
|
|
# Export variables for the subshell
|
|
export INFRA_ID="${infra_id}"
|
|
export TAP_GROUP="${group}"
|
|
export SKIP_CLUSTER_START="${SKIP_CLUSTER_START}"
|
|
export COVERAGE="${COVERAGE}"
|
|
export TAP_USE_NOISE="${TAP_USE_NOISE}"
|
|
|
|
# Run infrastructure setup and tests with timeout
|
|
# Using temp file instead of heredoc to avoid shell expansion issues
|
|
inner_script=$(mktemp)
|
|
cat > "${inner_script}" << INNERSCRIPT
|
|
#!/bin/bash
|
|
set -e
|
|
|
|
# Setup infrastructure
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Setting up infrastructure..." >> "${log_file}" 2>&1
|
|
"${SCRIPT_DIR}/ensure-infras.bash" >> "${log_file}" 2>&1
|
|
if [ \$? -ne 0 ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: Failed to set up infrastructure" >> "${log_file}" 2>&1
|
|
exit 1
|
|
fi
|
|
|
|
# Run tests
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Running tests..." >> "${log_file}" 2>&1
|
|
"${SCRIPT_DIR}/run-tests-isolated.bash" >> "${log_file}" 2>&1
|
|
test_exit=\$?
|
|
if [ \${test_exit} -ne 0 ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] WARNING: Tests failed with exit code \${test_exit}" >> "${log_file}" 2>&1
|
|
exit \${test_exit}
|
|
fi
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Tests completed successfully" >> "${log_file}" 2>&1
|
|
INNERSCRIPT
|
|
|
|
# Run the script with timeout
|
|
chmod +x "${inner_script}"
|
|
timeout "${TIMEOUT_MINUTES}m" bash "${inner_script}"
|
|
cmd_exit_code=$?
|
|
rm -f "${inner_script}"
|
|
|
|
# Process exit code
|
|
if [ "${cmd_exit_code}" -eq 0 ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] SUCCESS: ${group}" | tee -a "${log_file}"
|
|
exit_code=0
|
|
elif [ "${cmd_exit_code}" -eq 124 ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] TIMEOUT: ${group} after ${TIMEOUT_MINUTES} minutes" | tee -a "${log_file}"
|
|
exit_code=124
|
|
else
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] FAILED: ${group} (exit code: ${cmd_exit_code})" | tee -a "${log_file}"
|
|
exit_code="${cmd_exit_code}"
|
|
fi
|
|
|
|
end_time=$(date +%s)
|
|
duration=$((end_time - start_time))
|
|
|
|
# Write result file
|
|
cat > "${RESULTS_DIR}/${group}.result" << EOF
|
|
GROUP=${group}
|
|
INFRA_ID=${infra_id}
|
|
EXIT_CODE=${exit_code}
|
|
DURATION=${duration}
|
|
START_TIME=${start_time}
|
|
END_TIME=${end_time}
|
|
LOG_FILE=${log_file}
|
|
EOF
|
|
|
|
# Auto-cleanup successful runs if enabled
|
|
if [ "${exit_code}" -eq 0 ] && [ "${AUTO_CLEANUP}" -eq 1 ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Auto-cleanup: ${group}" | tee -a "${log_file}"
|
|
INFRA_ID="${infra_id}" TAP_GROUP="${group}" "${SCRIPT_DIR}/stop-proxysql-isolated.bash" >> "${log_file}" 2>&1 || true
|
|
INFRA_ID="${infra_id}" TAP_GROUP="${group}" "${SCRIPT_DIR}/destroy-infras.bash" >> "${log_file}" 2>&1 || true
|
|
fi
|
|
|
|
return "${exit_code}"
|
|
}
|
|
|
|
# Main execution
|
|
echo ">>> Starting parallel execution of ${TOTAL_GROUPS} groups..."
|
|
START_TIME=$(date +%s)
|
|
|
|
# Track overall status
|
|
OVERALL_FAILED=0
|
|
JOBS_RUNNING=0
|
|
GROUP_INDEX=0
|
|
|
|
# Launch jobs
|
|
for group in ${TAP_GROUPS}; do
|
|
# Check if we should stop due to previous failure
|
|
if [ "${EXIT_ON_FIRST_FAIL}" -eq 1 ] && [ "${OVERALL_FAILED}" -ne 0 ]; then
|
|
echo ">>> Skipping ${group} due to previous failure"
|
|
continue
|
|
fi
|
|
|
|
# If PARALLEL_JOBS is set and we're at the limit, wait for a job to finish
|
|
if [ "${PARALLEL_JOBS}" -gt 0 ] && [ "${JOBS_RUNNING}" -ge "${PARALLEL_JOBS}" ]; then
|
|
echo ">>> Waiting for a job to finish (max ${PARALLEL_JOBS} parallel)..."
|
|
wait -n || true
|
|
JOBS_RUNNING=$((JOBS_RUNNING - 1))
|
|
fi
|
|
|
|
# Start the job
|
|
echo ">>> Launching: ${group}"
|
|
run_single_group "${group}" "${GROUP_INDEX}" &
|
|
local_pid=$!
|
|
JOB_PIDS+=("${local_pid}")
|
|
GROUP_FOR_PID["${local_pid}"]="${group}"
|
|
PID_FOR_GROUP["${group}"]="${local_pid}"
|
|
JOBS_RUNNING=$((JOBS_RUNNING + 1))
|
|
GROUP_INDEX=$((GROUP_INDEX + 1))
|
|
done
|
|
|
|
# Wait for all jobs to complete
|
|
echo ">>> Waiting for all jobs to complete..."
|
|
for pid in "${JOB_PIDS[@]}"; do
|
|
group="${GROUP_FOR_PID[${pid}]}"
|
|
if wait "${pid}"; then
|
|
EXIT_CODES["${group}"]=0
|
|
else
|
|
EXIT_CODES["${group}"]=$?
|
|
OVERALL_FAILED=1
|
|
fi
|
|
done
|
|
|
|
END_TIME=$(date +%s)
|
|
TOTAL_DURATION=$((END_TIME - START_TIME))
|
|
|
|
# Generate summary report
|
|
echo ""
|
|
echo "=========================================="
|
|
echo " EXECUTION SUMMARY "
|
|
echo "=========================================="
|
|
printf "%-25s %10s %12s\n" "Group" "Duration" "Status"
|
|
echo "------------------------------------------"
|
|
|
|
for group in ${TAP_GROUPS}; do
|
|
exit_code="${EXIT_CODES[${group}]:-1}"
|
|
result_file="${RESULTS_DIR}/${group}.result"
|
|
|
|
if [ -f "${result_file}" ]; then
|
|
duration=$(grep "^DURATION=" "${result_file}" | cut -d= -f2)
|
|
duration_min=$((duration / 60))
|
|
duration_sec=$((duration % 60))
|
|
duration_str="${duration_min}m${duration_sec}s"
|
|
else
|
|
duration_str="N/A"
|
|
fi
|
|
|
|
if [ "${exit_code}" -eq 0 ]; then
|
|
status="✓ PASS"
|
|
elif [ "${exit_code}" -eq 124 ]; then
|
|
status="✗ TIMEOUT"
|
|
else
|
|
status="✗ FAIL"
|
|
fi
|
|
|
|
printf "%-25s %10s %12s\n" "${group}" "${duration_str}" "${status}"
|
|
done
|
|
|
|
echo "------------------------------------------"
|
|
echo "TOTAL TIME: $((TOTAL_DURATION / 60))m$((TOTAL_DURATION % 60))s"
|
|
echo "=========================================="
|
|
|
|
# Summary of results
|
|
PASS_COUNT=0
|
|
FAIL_COUNT=0
|
|
TIMEOUT_COUNT=0
|
|
|
|
for group in ${TAP_GROUPS}; do
|
|
exit_code="${EXIT_CODES[${group}]:-1}"
|
|
if [ "${exit_code}" -eq 0 ]; then
|
|
PASS_COUNT=$((PASS_COUNT + 1))
|
|
elif [ "${exit_code}" -eq 124 ]; then
|
|
TIMEOUT_COUNT=$((TIMEOUT_COUNT + 1))
|
|
else
|
|
FAIL_COUNT=$((FAIL_COUNT + 1))
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "PASSED: ${PASS_COUNT}/${TOTAL_GROUPS}"
|
|
echo "FAILED: ${FAIL_COUNT}/${TOTAL_GROUPS}"
|
|
echo "TIMEOUT: ${TIMEOUT_COUNT}/${TOTAL_GROUPS}"
|
|
echo ""
|
|
echo "Results directory: ${RESULTS_DIR}"
|
|
echo ""
|
|
|
|
# Print log locations for failed groups
|
|
if [ "${FAIL_COUNT}" -gt 0 ] || [ "${TIMEOUT_COUNT}" -gt 0 ]; then
|
|
echo "Failed/Timed out group logs:"
|
|
for group in ${TAP_GROUPS}; do
|
|
exit_code="${EXIT_CODES[${group}]:-1}"
|
|
if [ "${exit_code}" -ne 0 ]; then
|
|
echo " ${group}: ${RESULTS_DIR}/${group}.log"
|
|
fi
|
|
done
|
|
echo ""
|
|
fi
|
|
|
|
# Combine coverage reports if coverage mode is enabled
|
|
if [ "${COVERAGE}" -eq 1 ]; then
|
|
echo ""
|
|
echo "=========================================="
|
|
echo " COMBINING COVERAGE REPORTS "
|
|
echo "=========================================="
|
|
|
|
COMBINED_COVERAGE_DIR="${RESULTS_DIR}/coverage-report"
|
|
mkdir -p "${COMBINED_COVERAGE_DIR}"
|
|
|
|
# Find all individual coverage info files from each group
|
|
COVERAGE_FILES=""
|
|
for group in ${TAP_GROUPS}; do
|
|
infra_id="${group}-${RUN_ID}"
|
|
group_coverage_dir="${WORKSPACE}/ci_infra_logs/${infra_id}/coverage-report"
|
|
if [ -d "${group_coverage_dir}" ]; then
|
|
for info_file in "${group_coverage_dir}"/*.info; do
|
|
if [ -f "${info_file}" ]; then
|
|
COVERAGE_FILES="${COVERAGE_FILES} ${info_file}"
|
|
echo ">>> Found coverage: ${info_file}"
|
|
fi
|
|
done
|
|
fi
|
|
done
|
|
|
|
if [ -n "${COVERAGE_FILES}" ]; then
|
|
COMBINED_INFO="${COMBINED_COVERAGE_DIR}/combined-coverage.info"
|
|
echo ">>> Combining coverage reports into: ${COMBINED_INFO}"
|
|
|
|
# Run coverage combination in container (tools may not be on host)
|
|
docker run --rm \
|
|
-v "${WORKSPACE}:${WORKSPACE}" \
|
|
-e COVERAGE_FILES="${COVERAGE_FILES}" \
|
|
-e COMBINED_INFO="${COMBINED_INFO}" \
|
|
proxysql-ci-base:latest \
|
|
bash -c '
|
|
set -e
|
|
if command -v fastcov >/dev/null 2>&1; then
|
|
fastcov -b -l -C ${COVERAGE_FILES} -o "${COMBINED_INFO}" 2>&1 || {
|
|
echo ">>> WARNING: fastcov combine failed, trying lcov..."
|
|
if command -v lcov >/dev/null 2>&1; then
|
|
FIRST_FILE=true
|
|
for info_file in ${COVERAGE_FILES}; do
|
|
if [ "${FIRST_FILE}" = true ]; then
|
|
cp "${info_file}" "${COMBINED_INFO}"
|
|
FIRST_FILE=false
|
|
else
|
|
lcov -a "${COMBINED_INFO}" -a "${info_file}" -o "${COMBINED_INFO}".tmp 2>/dev/null && \
|
|
mv "${COMBINED_INFO}".tmp "${COMBINED_INFO}"
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
elif command -v lcov >/dev/null 2>&1; then
|
|
FIRST_FILE=true
|
|
for info_file in ${COVERAGE_FILES}; do
|
|
if [ "${FIRST_FILE}" = true ]; then
|
|
cp "${info_file}" "${COMBINED_INFO}"
|
|
FIRST_FILE=false
|
|
else
|
|
lcov -a "${COMBINED_INFO}" -a "${info_file}" -o "${COMBINED_INFO}".tmp 2>/dev/null && \
|
|
mv "${COMBINED_INFO}".tmp "${COMBINED_INFO}"
|
|
fi
|
|
done
|
|
else
|
|
echo ">>> ERROR: Neither fastcov nor lcov available"
|
|
exit 1
|
|
fi
|
|
' || echo ">>> WARNING: Coverage combination failed"
|
|
|
|
if [ -f "${COMBINED_INFO}" ]; then
|
|
echo ">>> Combined coverage report: ${COMBINED_INFO}"
|
|
|
|
# Generate HTML report
|
|
echo ">>> Generating HTML coverage report..."
|
|
COMBINED_HTML="${COMBINED_COVERAGE_DIR}/html"
|
|
mkdir -p "${COMBINED_HTML}"
|
|
|
|
docker run --rm \
|
|
-v "${WORKSPACE}:${WORKSPACE}" \
|
|
-e COMBINED_INFO="${COMBINED_INFO}" \
|
|
-e COMBINED_HTML="${COMBINED_HTML}" \
|
|
proxysql-ci-base:latest \
|
|
bash -c '
|
|
if command -v genhtml >/dev/null 2>&1; then
|
|
genhtml --branch-coverage --ignore-errors negative,source --synthesize-missing "${COMBINED_INFO}" --output-directory "${COMBINED_HTML}" 2>&1 || \
|
|
echo ">>> WARNING: HTML generation failed"
|
|
else
|
|
echo ">>> WARNING: genhtml not available"
|
|
fi
|
|
'
|
|
|
|
if [ -f "${COMBINED_HTML}/index.html" ]; then
|
|
echo ">>> Combined HTML report: ${COMBINED_HTML}/index.html"
|
|
fi
|
|
else
|
|
echo ">>> WARNING: Failed to generate combined coverage report"
|
|
fi
|
|
else
|
|
echo ">>> No coverage files found to combine"
|
|
fi
|
|
echo ""
|
|
fi
|
|
|
|
# Exit with appropriate code
|
|
if [ "${OVERALL_FAILED}" -eq 0 ]; then
|
|
echo ">>> All groups passed!"
|
|
exit 0
|
|
else
|
|
echo ">>> Some groups failed. Check logs above."
|
|
exit 1
|
|
fi
|