From 86e86c8ce38231b4b93763a767c8d392f14032e8 Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Sat, 21 Mar 2026 22:56:03 +0000 Subject: [PATCH] Fix parallel coverage collection and improve multi-group runner - Fix code coverage race condition: Each ProxySQL instance now writes .gcda files to its own directory using GCOV_PREFIX, preventing parallel groups from overwriting each other's coverage data - Add TAP_USE_NOISE documentation for noise injection testing - Replace random stagger delay with sequential delay (STAGGER_DELAY) to guarantee proper spacing between group startups - Update README with multi-group runner, coverage, and noise injection docs --- test/infra/README.md | 75 ++++++++++++++++++- test/infra/control/env-isolated.bash | 9 +++ test/infra/control/run-multi-group.bash | 26 ++++--- test/infra/control/run-tests-isolated.bash | 24 ++++++ .../control/start-proxysql-isolated.bash | 7 +- 5 files changed, 130 insertions(+), 11 deletions(-) diff --git a/test/infra/README.md b/test/infra/README.md index 2b424f9e9..a159d43e5 100644 --- a/test/infra/README.md +++ b/test/infra/README.md @@ -117,6 +117,8 @@ The `docker-compose-init.bash` scripts implement a strict **non-destructive poli | `TEST_PY_TAP_INCL` | (none) | Filter tests within the group (regex matching test names). | | `SKIP_CLUSTER_START`| `0` | Set to `1` to bypass starting additional ProxySQL nodes. | | `PROXY_DATA_DIR_HOST`| (dynamic) | Host path for ProxySQL persistent data. | +| `COVERAGE` | `0` | Enable code coverage collection. | +| `TAP_USE_NOISE` | `0` | Enable noise injection for race condition testing. | --- @@ -191,7 +193,78 @@ export PROXYSQL_CLUSTER_NODES=3 --- -## 8. Troubleshooting +## 8. Multi-Group Parallel Execution + +The `run-multi-group.bash` script enables running multiple TAP groups in parallel with proper isolation and resource management. + +### Basic Usage +```bash +RUN_ID="test-$(date +%s)" \ +TAP_GROUPS="legacy-g1 legacy-g2 legacy-g3 mysql84-g1" \ +./test/infra/control/run-multi-group.bash +``` + +### Configuration Options + +| Variable | Default | Description | +| :--- | :--- | :--- | +| `RUN_ID` | (timestamp) | Unique identifier for the multi-group run. | +| `TAP_GROUPS` | (required) | Space-separated list of TAP groups to run. | +| `PARALLEL_JOBS` | `2` | Maximum number of groups running in parallel. | +| `TIMEOUT_MINUTES` | `60` | Hard timeout per group in minutes. | +| `EXIT_ON_FIRST_FAIL` | `0` | Stop all groups on first failure if set to `1`. | +| `AUTO_CLEANUP` | `0` | Automatically cleanup successful groups if set to `1`. | +| `STAGGER_DELAY` | `5` | Seconds between group startups to prevent resource contention. | +| `COVERAGE` | `0` | Enable code coverage collection if set to `1`. | +| `TAP_USE_NOISE` | `0` | Enable noise injection for race condition testing if set to `1`. | + +### Output Location +- Individual group logs: `ci_infra_logs/multi-group-{RUN_ID}/{group}.log` +- Combined coverage report: `ci_infra_logs/multi-group-{RUN_ID}/coverage-report/` + +--- + +## 9. Code Coverage Collection + +When running tests with `COVERAGE=1`, the system collects code coverage data from ProxySQL. + +### Requirements +- ProxySQL must be compiled with `COVERAGE=1` (adds `--coverage` flags) +- `fastcov` and `genhtml` must be available in the `proxysql-ci-base` container + +### Usage +```bash +# Single group with coverage +COVERAGE=1 ./test/infra/control/run-tests-isolated.bash + +# Multi-group with coverage (reports are combined) +COVERAGE=1 RUN_ID="cov-$(date +%s)" TAP_GROUPS="legacy-g1 legacy-g2" ./test/infra/control/run-multi-group.bash +``` + +### Output +- Individual reports: `ci_infra_logs/{INFRA_ID}/coverage-report/` +- Combined report: `ci_infra_logs/multi-group-{RUN_ID}/coverage-report/` + +--- + +## 10. Noise Injection Testing + +Noise injection helps detect race conditions and deadlocks by introducing random delays and stress during test execution. + +### Usage +```bash +# Enable noise injection for a single group +TAP_USE_NOISE=1 ./test/infra/control/run-tests-isolated.bash + +# Enable noise injection for multi-group run +TAP_USE_NOISE=1 RUN_ID="noise-$(date +%s)" TAP_GROUPS="legacy-g1 legacy-g2" ./test/infra/control/run-multi-group.bash +``` + +For more details, see `test/tap/NOISE_TESTING.md`. + +--- + +## 11. Troubleshooting * **"Directory Not Empty"**: Run `./test/infra/control/stop-proxysql-isolated.bash` with the same `INFRA_ID` to cleanup, or manually delete the folder in `ci_infra_logs/`. * **Permission Denied**: The system uses `sudo` for log directory management. Ensure your user has sudo privileges. diff --git a/test/infra/control/env-isolated.bash b/test/infra/control/env-isolated.bash index 88f174909..096da6388 100755 --- a/test/infra/control/env-isolated.bash +++ b/test/infra/control/env-isolated.bash @@ -96,6 +96,12 @@ export TEST_PY_TAP_DUMP_RUNTIME="${TEST_PY_TAP_DUMP_RUNTIME:-1}" export TEST_PY_TAP_DUMP_STATS="${TEST_PY_TAP_DUMP_STATS:-1}" export TEST_TAP_TIMEOUT="${TEST_TAP_TIMEOUT:-0}" +# Noise injection for race condition testing +# When enabled, tests that support noise injection will introduce random delays +# and stress to help detect race conditions and deadlocks +# See test/tap/NOISE_TESTING.md for more details +export TAP_USE_NOISE="${TAP_USE_NOISE:-0}" + # TAP test filtering export TEST_PY_TAP_INCL="${TEST_PY_TAP_INCL:-}" export TEST_PY_TAP_EXCL="${TEST_PY_TAP_EXCL:-reg_test_3273_ssl_con-t}" @@ -106,3 +112,6 @@ export TEST_PY_TAPINT_EXCL="${TEST_PY_TAPINT_EXCL:-}" export TAP_REG_TEST_3549_AUTOCOMMIT_TRACKING___MYSQL_SERVER_HOSTGROUP=1300 echo ">>> Isolated Environment Loaded (INFRA_ID: ${INFRA_ID})" +if [ "${TAP_USE_NOISE}" = "1" ] || [ "${TAP_USE_NOISE}" = "true" ]; then + echo ">>> Noise Injection ENABLED - tests will introduce random delays for race condition testing" +fi diff --git a/test/infra/control/run-multi-group.bash b/test/infra/control/run-multi-group.bash index 228de08fb..18df22339 100755 --- a/test/infra/control/run-multi-group.bash +++ b/test/infra/control/run-multi-group.bash @@ -14,6 +14,8 @@ set -euo pipefail # AUTO_CLEANUP=0 # Auto cleanup successful groups (default: 0) # SKIP_CLUSTER_START=1 # Skip ProxySQL cluster initialization (default: 0) # COVERAGE=1 # Enable code coverage collection (default: 0) +# TAP_USE_NOISE=1 # Enable noise injection for race condition testing (default: 0) +# STAGGER_DELAY=5 # Seconds between group startups (default: 5) # # Coverage notes: # - Requires ProxySQL to be compiled with COVERAGE=1 (adds --coverage flags) @@ -36,6 +38,7 @@ EXIT_ON_FIRST_FAIL="${EXIT_ON_FIRST_FAIL:-0}" AUTO_CLEANUP="${AUTO_CLEANUP:-0}" SKIP_CLUSTER_START="${SKIP_CLUSTER_START:-0}" COVERAGE="${COVERAGE:-0}" +TAP_USE_NOISE="${TAP_USE_NOISE:-0}" # Validate required variables if [ -z "${TAP_GROUPS}" ]; then @@ -66,6 +69,7 @@ echo "EXIT_ON_FIRST_FAIL: ${EXIT_ON_FIRST_FAIL}" echo "AUTO_CLEANUP: ${AUTO_CLEANUP}" echo "SKIP_CLUSTER_START: ${SKIP_CLUSTER_START}" echo "COVERAGE: ${COVERAGE}" +echo "TAP_USE_NOISE: ${TAP_USE_NOISE}" echo "==========================================" # Create results directory @@ -98,19 +102,20 @@ trap cleanup_on_interrupt INT TERM # Function to run a single group run_single_group() { local group="${1}" + local group_index="${2}" local infra_id="${group}-${RUN_ID}" local log_file="${RESULTS_DIR}/${group}.log" local start_time end_time duration - # Add random delay to stagger infrastructure startup + # Sequential delay to stagger infrastructure startup # This prevents resource contention when running multiple groups in parallel - # Use group name hash + random to ensure both unique and unpredictable delays - local group_hash=$(echo -n "${group}" | cksum | cut -d' ' -f1) - local base_delay=$((group_hash % 15)) # 0-14 seconds based on group name - local random_delay=$((RANDOM % 10 + 1)) # 1-10 seconds random - local delay=$((base_delay + random_delay)) # Total: 1-24 seconds - echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${group}: Waiting ${delay}s to stagger startup (base=${base_delay}s + random=${random_delay}s)..." | tee -a "${log_file}" - sleep "${delay}" + # Each group starts STAGGER_DELAY seconds after the previous one + local STAGGER_DELAY="${STAGGER_DELAY:-5}" + local delay=$((group_index * STAGGER_DELAY)) + if [ "${delay}" -gt 0 ]; then + echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${group}: Waiting ${delay}s to stagger startup (index=${group_index}, delay=${STAGGER_DELAY}s per group)..." | tee -a "${log_file}" + sleep "${delay}" + fi start_time=$(date +%s) echo "[$(date '+%Y-%m-%d %H:%M:%S')] STARTING: ${group} (INFRA_ID: ${infra_id})" | tee -a "${log_file}" @@ -130,6 +135,7 @@ run_single_group() { export TAP_GROUP="${group}" export SKIP_CLUSTER_START="${SKIP_CLUSTER_START}" export COVERAGE="${COVERAGE}" + export TAP_USE_NOISE="${TAP_USE_NOISE}" timeout "${TIMEOUT_MINUTES}m" bash <>> Launching: ${group}" - run_single_group "${group}" & + run_single_group "${group}" "${GROUP_INDEX}" & local_pid=$! JOB_PIDS+=("${local_pid}") GROUP_FOR_PID["${local_pid}"]="${group}" PID_FOR_GROUP["${group}"]="${local_pid}" JOBS_RUNNING=$((JOBS_RUNNING + 1)) + GROUP_INDEX=$((GROUP_INDEX + 1)) done # Wait for all jobs to complete diff --git a/test/infra/control/run-tests-isolated.bash b/test/infra/control/run-tests-isolated.bash index 438510e67..7b262379a 100755 --- a/test/infra/control/run-tests-isolated.bash +++ b/test/infra/control/run-tests-isolated.bash @@ -11,6 +11,13 @@ set -o pipefail # # Optional environment variables: # COVERAGE=1 # Enable code coverage collection (default: 0) +# TAP_USE_NOISE=1 # Enable noise injection for race condition testing (default: 0) +# +# Noise injection notes: +# - When enabled, tests that support noise injection will introduce random delays +# and stress to help detect race conditions and deadlocks +# - Tests check `cl.use_noise` and adjust their behavior accordingly +# - See test/tap/NOISE_TESTING.md for more details # # Coverage notes: # - Requires ProxySQL to be compiled with COVERAGE=1 (adds --coverage flags) @@ -143,6 +150,7 @@ docker run \ -e SCRIPT_DIR="${SCRIPT_DIR}" \ -e MYSQL_BINLOG_BIN="${MYSQL_BINLOG_BIN}" \ -e BINLOG_READER_BIN="${BINLOG_READER_BIN}" \ + -e TAP_USE_NOISE="${TAP_USE_NOISE:-0}" \ proxysql-ci-base:latest \ /bin/bash -c " set -e @@ -152,6 +160,22 @@ docker run \ local exit_code=\$? if [ \"\${COVERAGE_MODE}\" = \"1\" ]; then echo \">>> Collecting code coverage data (exit code was: \${exit_code})...\" + + # Copy .gcda files from group-specific gcov directory to workspace + # This merges coverage data from this group's ProxySQL instance + if [ -d \"/gcov\" ] && [ \"\$(ls -A /gcov 2>/dev/null)\" ]; then + echo \">>> Merging coverage data from /gcov to workspace...\" + # Find and copy all .gcda files, preserving directory structure + cd /gcov && find . -name '*.gcda' -type f | while read gcda; do + # Remove leading ./ and get the target path + target=\"\${WORKSPACE}/\${gcda#./}\" + target_dir=\"\$(dirname \"\$target\")\" + mkdir -p \"\$target_dir\" + cp -f \"\$gcda\" \"\$target\" + done + echo \">>> Coverage data merged successfully\" + fi + if command -v fastcov >/dev/null 2>&1; then mkdir -p \"\${COVERAGE_REPORT_DIR}\" local coverage_file=\"\${COVERAGE_REPORT_DIR}/\${INFRA_ID}.info\" diff --git a/test/infra/control/start-proxysql-isolated.bash b/test/infra/control/start-proxysql-isolated.bash index fdb1bd7bb..357284dd3 100755 --- a/test/infra/control/start-proxysql-isolated.bash +++ b/test/infra/control/start-proxysql-isolated.bash @@ -21,6 +21,11 @@ INFRA_LOGS_PATH="${WORKSPACE}/ci_infra_logs" PROXY_DATA_DIR="${INFRA_LOGS_PATH}/${INFRA_ID}/proxysql" GENERIC_CONFIG="${SCRIPT_DIR}/proxysql-ci.cnf" +# Coverage data directory (separate per INFRA_ID to avoid parallel write conflicts) +# GCOV_PREFIX redirects .gcda files to a separate directory for each ProxySQL instance +COVERAGE_DATA_DIR="${INFRA_LOGS_PATH}/${INFRA_ID}/gcov" +mkdir -p "${COVERAGE_DATA_DIR}" + echo ">>> Setting up isolated network: ${NETWORK_NAME}" docker network inspect ${NETWORK_NAME} >/dev/null 2>&1 || docker network create ${NETWORK_NAME} @@ -32,7 +37,7 @@ $SUDO rm -f "${PROXY_DATA_DIR}/proxysql.db" "${PROXY_DATA_DIR}"/*.pem docker rm -f "${PROXY_CONTAINER}" >/dev/null 2>&1 || true echo ">>> Starting ProxySQL container: ${PROXY_CONTAINER}" -docker run -d --name "${PROXY_CONTAINER}" --hostname "proxysql" --network "${NETWORK_NAME}" --network-alias "proxysql" -v "${WORKSPACE}/src/proxysql:/usr/bin/proxysql" -v "${GENERIC_CONFIG}:/etc/proxysql.cnf" -v "${PROXY_DATA_DIR}:/var/lib/proxysql" proxysql-ci-base:latest /bin/bash -c "/usr/bin/proxysql --idle-threads --clickhouse-server --sqlite3-server -f -c /etc/proxysql.cnf -D /var/lib/proxysql 2>&1 | tee /var/lib/proxysql/proxysql.log" +docker run -d --name "${PROXY_CONTAINER}" --hostname "proxysql" --network "${NETWORK_NAME}" --network-alias "proxysql" -v "${WORKSPACE}/src/proxysql:/usr/bin/proxysql" -v "${GENERIC_CONFIG}:/etc/proxysql.cnf" -v "${PROXY_DATA_DIR}:/var/lib/proxysql" -v "${COVERAGE_DATA_DIR}:/gcov" -e GCOV_PREFIX="/gcov" -e GCOV_PREFIX_STRIP="3" proxysql-ci-base:latest /bin/bash -c "/usr/bin/proxysql --idle-threads --clickhouse-server --sqlite3-server -f -c /etc/proxysql.cnf -D /var/lib/proxysql 2>&1 | tee /var/lib/proxysql/proxysql.log" if [ -f /.dockerenv ]; then RUNNER_ID=$(hostname)