From 6909011b67638927cd36e164706dbcb515242a89 Mon Sep 17 00:00:00 2001 From: Rahim Kanji Date: Tue, 26 May 2026 15:59:30 +0500 Subject: [PATCH] perf(partition): promote longest-waiting B session via max_connect_time fold Under sustained contention ProcessAllSessions_Partition leaves the B (CONNECTING_SERVER) band unordered, so the pass can serve newer waiters ahead of older ones until they hit connect_timeout_server_max and abort. Track the oldest waiter during the classifier's is_B arm (no second pass) and swap it to running_end. Key on max_connect_time, which the is_B test already loads -- the min-compare is a register compare with no extra memory access, and the smallest value is the waiter closest to the timeout abort. Gated by |B| > PARTITION_FAIRNESS_MIN_B (=4) and partition_active. --- include/Base_Thread.h | 1 + lib/Base_Thread.cpp | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/Base_Thread.h b/include/Base_Thread.h index 255b19b18..de8198a31 100644 --- a/include/Base_Thread.h +++ b/include/Base_Thread.h @@ -66,6 +66,7 @@ public: // Below this attempt count a tick carries no signal: gate state and // streak are left untouched. Avoids "2/2 NULL = 100% stressed" noise. static constexpr unsigned int PARTITION_GATE_MIN_ATTEMPTS = 4; + static constexpr unsigned int PARTITION_FAIRNESS_MIN_B = 4; // Called by sessions inside this worker at the get_MyConn_from_pool() // call site to feed the gate. diff --git a/lib/Base_Thread.cpp b/lib/Base_Thread.cpp index e5820eafc..4c1806717 100644 --- a/lib/Base_Thread.cpp +++ b/lib/Base_Thread.cpp @@ -285,16 +285,22 @@ void Base_Thread::ProcessAllSessions_Partition() { size_t running_end = 0; size_t idle_begin = mysql_sessions->len; size_t idx = 0; + size_t oldest_idx = SIZE_MAX; + unsigned long long oldest_mct = UINT64_MAX; while (idx < idle_begin) { S* s = static_cast(mysql_sessions->index(idx)); const bool has_be = (s->mybe && s->mybe->server_myds); - const bool is_B = has_be && (s->mybe->server_myds->max_connect_time != 0); + const unsigned long long mct = has_be ? s->mybe->server_myds->max_connect_time : 0ULL; + const bool is_B = (mct != 0); const bool is_A = !is_B && has_be && (s->mybe->server_myds->myconn != nullptr) && (s->status != WAITING_CLIENT_DATA); if (is_A) { if (idx != running_end) { + // Keep the tracked oldest valid: the B session at running_end is + // about to be swapped to idx. + if (oldest_idx == running_end) oldest_idx = idx; void* p = mysql_sessions->pdata[idx]; mysql_sessions->pdata[idx] = mysql_sessions->pdata[running_end]; mysql_sessions->pdata[running_end] = p; @@ -302,6 +308,14 @@ void Base_Thread::ProcessAllSessions_Partition() { ++running_end; ++idx; } else if (is_B) { + // Key on max_connect_time: it was already loaded for the is_B test + // above, so this is a register compare with no extra memory access. + // Smallest max_connect_time == earliest connect start == the session + // closest to the connect_timeout_server_max abort. + if (mct < oldest_mct) { + oldest_mct = mct; + oldest_idx = idx; + } ++idx; } else { --idle_begin; @@ -313,6 +327,16 @@ void Base_Thread::ProcessAllSessions_Partition() { // do NOT advance idx - re-examine the swapped-in element test } } + + // Promote the longest-waiting B session (smallest max_connect_time) to + // running_end so the CONNECTING_SERVER pass serves it first. Gated by a + // minimum B-band size to avoid churn on tiny bands. + if (idle_begin > running_end + PARTITION_FAIRNESS_MIN_B + && oldest_idx != SIZE_MAX && oldest_idx != running_end) { + void* p = mysql_sessions->pdata[running_end]; + mysql_sessions->pdata[running_end] = mysql_sessions->pdata[oldest_idx]; + mysql_sessions->pdata[oldest_idx] = p; + } } // this function was inline in MySQL_Thread::run()