mirror of https://github.com/sysown/proxysql
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
502 lines
18 KiB
502 lines
18 KiB
#include "MySQL_Passthrough_Auth_Cache.h"
|
|
|
|
#include "gen_utils.h"
|
|
|
|
#include "re2/re2.h"
|
|
|
|
#include <cstdio>
|
|
|
|
MySQL_Passthrough_Auth_Cache::MySQL_Passthrough_Auth_Cache()
|
|
: inflight_probes(0),
|
|
stat_probes_attempted(0),
|
|
stat_probes_ok(0),
|
|
stat_probes_failed_credentials(0),
|
|
stat_probes_failed_transport(0),
|
|
stat_lockouts_user(0),
|
|
stat_lockouts_ip(0),
|
|
stat_inflight_cap_rejects(0),
|
|
stat_cache_hits(0),
|
|
stat_cache_invalidations(0),
|
|
compiled_pattern(NULL) {
|
|
pthread_rwlock_init(&lock, NULL);
|
|
pthread_mutex_init(&failure_lock, NULL);
|
|
pthread_rwlock_init(&pattern_lock, NULL);
|
|
}
|
|
|
|
MySQL_Passthrough_Auth_Cache::~MySQL_Passthrough_Auth_Cache() {
|
|
pthread_rwlock_wrlock(&lock);
|
|
entries.clear();
|
|
pthread_rwlock_unlock(&lock);
|
|
pthread_rwlock_destroy(&lock);
|
|
pthread_mutex_lock(&failure_lock);
|
|
failures_by_user.clear();
|
|
failures_by_ip.clear();
|
|
pthread_mutex_unlock(&failure_lock);
|
|
pthread_mutex_destroy(&failure_lock);
|
|
pthread_rwlock_wrlock(&pattern_lock);
|
|
if (compiled_pattern) {
|
|
delete compiled_pattern;
|
|
compiled_pattern = NULL;
|
|
}
|
|
compiled_pattern_str.clear();
|
|
pthread_rwlock_unlock(&pattern_lock);
|
|
pthread_rwlock_destroy(&pattern_lock);
|
|
}
|
|
|
|
bool MySQL_Passthrough_Auth_Cache::lookup(
|
|
const std::string& username, std::string& out_cleartext, uint32_t ttl_s
|
|
) {
|
|
/*
|
|
* Reader fast-path: cache HIT and entry not expired.
|
|
*
|
|
* The cache is read on every passthrough-eligible client connect,
|
|
* so this is the dominant code path on a busy proxy. Take the
|
|
* read lock, observe the entry, copy out the cleartext, drop the
|
|
* lock. Multiple concurrent connects share the read lock and
|
|
* don't serialize.
|
|
*
|
|
* If the entry is missing, we still hold only the read lock --
|
|
* just return miss. If the entry IS present but expired under
|
|
* the TTL, we need to evict it (mutates the map) which requires
|
|
* the write lock; release the read lock and fall through to the
|
|
* slow path below.
|
|
*/
|
|
{
|
|
pthread_rwlock_rdlock(&lock);
|
|
auto it = entries.find(username);
|
|
if (it == entries.end()) {
|
|
pthread_rwlock_unlock(&lock);
|
|
return false;
|
|
}
|
|
bool expired = false;
|
|
if (ttl_s > 0) {
|
|
const uint64_t now_us = monotonic_time();
|
|
const uint64_t age_us = now_us - it->second.learned_at_us;
|
|
if (age_us > static_cast<uint64_t>(ttl_s) * 1000000ULL) {
|
|
expired = true;
|
|
}
|
|
}
|
|
if (!expired) {
|
|
out_cleartext = it->second.cleartext_password;
|
|
pthread_rwlock_unlock(&lock);
|
|
return true;
|
|
}
|
|
pthread_rwlock_unlock(&lock);
|
|
}
|
|
|
|
/*
|
|
* Writer slow-path: the entry was expired. Re-check under the
|
|
* write lock (between releasing rdlock and acquiring wrlock,
|
|
* another thread might have already evicted it, or even inserted
|
|
* a fresh one). Standard double-checked-locking pattern.
|
|
*/
|
|
pthread_rwlock_wrlock(&lock);
|
|
auto it = entries.find(username);
|
|
if (it == entries.end()) {
|
|
/* Another thread evicted it; treat as miss. */
|
|
pthread_rwlock_unlock(&lock);
|
|
return false;
|
|
}
|
|
if (ttl_s > 0) {
|
|
const uint64_t now_us = monotonic_time();
|
|
const uint64_t age_us = now_us - it->second.learned_at_us;
|
|
if (age_us > static_cast<uint64_t>(ttl_s) * 1000000ULL) {
|
|
entries.erase(it);
|
|
pthread_rwlock_unlock(&lock);
|
|
return false;
|
|
}
|
|
}
|
|
/* The entry got refreshed by another thread while we were upgrading;
|
|
* return its cleartext as a hit. */
|
|
out_cleartext = it->second.cleartext_password;
|
|
pthread_rwlock_unlock(&lock);
|
|
return true;
|
|
}
|
|
|
|
void MySQL_Passthrough_Auth_Cache::insert(
|
|
const std::string& username, const std::string& cleartext, int hostgroup_probed
|
|
) {
|
|
pthread_rwlock_wrlock(&lock);
|
|
entry_t& e = entries[username];
|
|
e.cleartext_password = cleartext;
|
|
e.learned_at_us = monotonic_time();
|
|
e.hostgroup_probed = hostgroup_probed;
|
|
pthread_rwlock_unlock(&lock);
|
|
}
|
|
|
|
bool MySQL_Passthrough_Auth_Cache::evict(const std::string& username) {
|
|
pthread_rwlock_wrlock(&lock);
|
|
const bool removed = (entries.erase(username) > 0);
|
|
pthread_rwlock_unlock(&lock);
|
|
return removed;
|
|
}
|
|
|
|
void MySQL_Passthrough_Auth_Cache::clear() {
|
|
pthread_rwlock_wrlock(&lock);
|
|
entries.clear();
|
|
pthread_rwlock_unlock(&lock);
|
|
}
|
|
|
|
size_t MySQL_Passthrough_Auth_Cache::size() const {
|
|
pthread_rwlock_rdlock(&lock);
|
|
const size_t n = entries.size();
|
|
pthread_rwlock_unlock(&lock);
|
|
return n;
|
|
}
|
|
|
|
std::vector<passthrough_entry_view> MySQL_Passthrough_Auth_Cache::snapshot() const {
|
|
std::vector<passthrough_entry_view> out;
|
|
pthread_rwlock_rdlock(&lock);
|
|
out.reserve(entries.size());
|
|
for (const auto& kv : entries) {
|
|
passthrough_entry_view v;
|
|
v.username = kv.first;
|
|
v.learned_at_us = kv.second.learned_at_us;
|
|
v.hostgroup_probed = kv.second.hostgroup_probed;
|
|
out.push_back(std::move(v));
|
|
}
|
|
pthread_rwlock_unlock(&lock);
|
|
return out;
|
|
}
|
|
|
|
bool MySQL_Passthrough_Auth_Cache::try_acquire_inflight(int max_inflight) {
|
|
if (max_inflight <= 0) {
|
|
// 0 or negative means "no cap"; succeed without bookkeeping.
|
|
// Practically the variable is bounded to [1, 10000] by the
|
|
// VariablesPointers_int registration, but be defensive.
|
|
inflight_probes.fetch_add(1, std::memory_order_relaxed);
|
|
return true;
|
|
}
|
|
int prev = inflight_probes.fetch_add(1, std::memory_order_relaxed);
|
|
if (prev >= max_inflight) {
|
|
inflight_probes.fetch_sub(1, std::memory_order_relaxed);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void MySQL_Passthrough_Auth_Cache::release_inflight() {
|
|
inflight_probes.fetch_sub(1, std::memory_order_relaxed);
|
|
}
|
|
|
|
int MySQL_Passthrough_Auth_Cache::inflight() const {
|
|
return inflight_probes.load(std::memory_order_relaxed);
|
|
}
|
|
|
|
namespace {
|
|
// Drop timestamps older than the window from a deque; return how many
|
|
// remain. Caller holds failure_lock.
|
|
size_t prune_and_count(std::deque<uint64_t>& dq, uint64_t now_us, uint64_t window_us) {
|
|
while (!dq.empty() && dq.front() + window_us < now_us) {
|
|
dq.pop_front();
|
|
}
|
|
return dq.size();
|
|
}
|
|
|
|
/**
|
|
* @brief Erase the map entry when the corresponding deque is empty.
|
|
*
|
|
* Without this, every distinct (username, source-IP) pair that ever
|
|
* triggered a failure stays in the map as an entry with an empty
|
|
* deque after its timestamps expire. An attacker churning random
|
|
* usernames/IPs grows the map at line-rate until the process runs
|
|
* out of memory. Erase on empty so the map size is bounded by the
|
|
* currently-active failure population, not the cumulative history.
|
|
*
|
|
* Caller holds failure_lock.
|
|
*/
|
|
void erase_if_empty(
|
|
std::unordered_map<std::string, std::deque<uint64_t>>& m,
|
|
std::unordered_map<std::string, std::deque<uint64_t>>::iterator it
|
|
) {
|
|
if (it != m.end() && it->second.empty()) {
|
|
m.erase(it);
|
|
}
|
|
}
|
|
|
|
// Hard cap on the failure-map size is operator-tunable; see the
|
|
// max_keys parameter to record_failure and the global variable
|
|
// mysql-passthrough_auth_failure_map_cap (default 100000).
|
|
|
|
/**
|
|
* @brief Evict the oldest entry in @p m to bring size under the cap.
|
|
*
|
|
* Linear scan; OK because eviction is rare (only fires when the cap
|
|
* is hit). Caller holds failure_lock.
|
|
*
|
|
* Opportunistic empty-deque sweep: as we walk the map looking for the
|
|
* oldest non-empty deque, we also reclaim every empty-deque "zombie"
|
|
* we encounter. This protects against the failure mode where empty
|
|
* deques accumulate (because would_lockout_* only calls
|
|
* erase_if_empty on the specific key being checked, NOT a global
|
|
* sweep): under sustained-churn workload where each unique
|
|
* username/IP appears exactly once, no would_lockout_* call ever
|
|
* re-touches a given key, so prune_and_count's eviction never gets a
|
|
* chance to run on those zombies. The map fills with empty deques
|
|
* until the hard cap fires, and without this sweep evict_oldest would
|
|
* be forced to choose between the only remaining non-empty deque
|
|
* (i.e. the entry we JUST inserted) -- silently dropping the lockout
|
|
* signal that triggered the cap.
|
|
*
|
|
* The sweep is bounded by the current map size, and we also use a
|
|
* second iterator-safe pass to actually erase: collecting empty
|
|
* iterators during the find pass and erasing them after, so we don't
|
|
* invalidate @c oldest mid-iteration. The "find oldest non-empty +
|
|
* collect empties" is one pass; the erase loop is the second.
|
|
*
|
|
* Worst case stays O(N); attack-time cost unchanged. Best case is
|
|
* better because empties get removed as we go, so subsequent
|
|
* evict_oldest calls walk a smaller map.
|
|
*/
|
|
void evict_oldest(
|
|
std::unordered_map<std::string, std::deque<uint64_t>>& m
|
|
) {
|
|
auto oldest = m.end();
|
|
uint64_t oldest_ts = UINT64_MAX;
|
|
std::vector<std::unordered_map<std::string, std::deque<uint64_t>>::iterator> empties;
|
|
for (auto it = m.begin(); it != m.end(); ++it) {
|
|
if (it->second.empty()) {
|
|
empties.push_back(it);
|
|
} else if (it->second.front() < oldest_ts) {
|
|
oldest_ts = it->second.front();
|
|
oldest = it;
|
|
}
|
|
}
|
|
/* Phase 1: reclaim every empty-deque zombie. This is the work
|
|
* erase_if_empty would have done lazily on a would_lockout_* call
|
|
* that never came. */
|
|
for (auto& it : empties) {
|
|
m.erase(it);
|
|
}
|
|
/* Phase 2: drop the oldest real (non-empty) entry.
|
|
*
|
|
* Caller invokes evict_oldest only after observing size() > cap,
|
|
* so an unconditional eviction here is defensible: we were
|
|
* already over the limit AT the call site. We do NOT re-check
|
|
* @c m.size() vs the cap here -- evict_oldest doesn't know the
|
|
* cap (the cap is a per-call parameter at the record_failure
|
|
* site) -- so this eviction may sometimes be one entry more
|
|
* aggressive than strictly necessary if the zombie sweep alone
|
|
* brought us under the cap.
|
|
*
|
|
* The trade-off:
|
|
* - leave-as-is: every cap-trigger reclaims one real entry
|
|
* even if zombies covered it. Bias toward freshness; slight
|
|
* over-eviction under attack workloads that happen to leave
|
|
* lots of zombies relative to live entries.
|
|
* - re-check vs cap: would need to thread the cap into
|
|
* evict_oldest. Avoids the over-eviction but adds API
|
|
* surface and a second size() call.
|
|
* Picked leave-as-is for Phase 1 simplicity. A Phase-2 follow-up
|
|
* could thread the cap through if real-world attack telemetry
|
|
* shows the over-eviction matters.
|
|
*
|
|
* The just-inserted entry by definition has the LATEST timestamp
|
|
* at its front(), so this never picks it up unless every other
|
|
* entry has been swept (the degenerate case where the cap was
|
|
* exceeded only by accumulated zombies).
|
|
*/
|
|
if (oldest != m.end()) {
|
|
m.erase(oldest);
|
|
}
|
|
}
|
|
} // anonymous namespace
|
|
|
|
bool MySQL_Passthrough_Auth_Cache::would_lockout_user(
|
|
const std::string& username, int max_failures, uint32_t window_s
|
|
) const {
|
|
if (max_failures <= 0 || window_s == 0 || username.empty()) return false;
|
|
const uint64_t now_us = monotonic_time();
|
|
const uint64_t window_us = static_cast<uint64_t>(window_s) * 1000000ULL;
|
|
pthread_mutex_lock(&failure_lock);
|
|
auto it = failures_by_user.find(username);
|
|
bool lockout = false;
|
|
if (it != failures_by_user.end()) {
|
|
lockout = prune_and_count(it->second, now_us, window_us)
|
|
>= static_cast<size_t>(max_failures);
|
|
/* Reclaim the map entry if the prune left an empty deque -- bounds
|
|
* unconditional map growth from churn (spec §7.2 / B8 follow-up). */
|
|
erase_if_empty(failures_by_user, it);
|
|
}
|
|
pthread_mutex_unlock(&failure_lock);
|
|
return lockout;
|
|
}
|
|
|
|
bool MySQL_Passthrough_Auth_Cache::would_lockout_ip(
|
|
const std::string& ip, int max_failures, uint32_t window_s
|
|
) const {
|
|
if (max_failures <= 0 || window_s == 0 || ip.empty()) return false;
|
|
const uint64_t now_us = monotonic_time();
|
|
const uint64_t window_us = static_cast<uint64_t>(window_s) * 1000000ULL;
|
|
pthread_mutex_lock(&failure_lock);
|
|
auto it = failures_by_ip.find(ip);
|
|
bool lockout = false;
|
|
if (it != failures_by_ip.end()) {
|
|
lockout = prune_and_count(it->second, now_us, window_us)
|
|
>= static_cast<size_t>(max_failures);
|
|
erase_if_empty(failures_by_ip, it);
|
|
}
|
|
pthread_mutex_unlock(&failure_lock);
|
|
return lockout;
|
|
}
|
|
|
|
void MySQL_Passthrough_Auth_Cache::record_failure(
|
|
const std::string& username, const std::string& ip, int max_keys
|
|
) {
|
|
const uint64_t now_us = monotonic_time();
|
|
const size_t cap =
|
|
max_keys > 0 ? static_cast<size_t>(max_keys) : SIZE_MAX;
|
|
pthread_mutex_lock(&failure_lock);
|
|
if (!username.empty()) {
|
|
failures_by_user[username].push_back(now_us);
|
|
/* Defense-in-depth: if attacker is churning usernames faster than
|
|
* the window expires, evict the oldest entry to keep memory
|
|
* bounded. We lose lockout state for one historical user; the
|
|
* alternative is unbounded growth. The cap is operator-tunable
|
|
* via mysql-passthrough_auth_failure_map_cap. */
|
|
if (failures_by_user.size() > cap) {
|
|
evict_oldest(failures_by_user);
|
|
}
|
|
}
|
|
if (!ip.empty()) {
|
|
failures_by_ip[ip].push_back(now_us);
|
|
if (failures_by_ip.size() > cap) {
|
|
evict_oldest(failures_by_ip);
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&failure_lock);
|
|
}
|
|
|
|
bool MySQL_Passthrough_Auth_Cache::username_allowed(
|
|
const std::string& username, const std::string& pattern
|
|
) {
|
|
/**
|
|
* @brief Spec §7.1 username allowlist (re2 FullMatch).
|
|
*
|
|
* An empty pattern means "allow every username" -- this matches the
|
|
* variable's default (mysql-passthrough_auth_username_pattern="") and
|
|
* preserves the pre-fix behavior for operators who haven't opted in.
|
|
*/
|
|
if (pattern.empty()) return true;
|
|
|
|
/**
|
|
* @brief Reader fast-path.
|
|
*
|
|
* Steady state: the pattern hasn't changed since the last call. Take
|
|
* the read lock, observe the cached compiled regex, run FullMatch
|
|
* (which is documented thread-safe on a const RE2), drop the lock.
|
|
* Concurrent probes don't serialize through a mutex -- they share
|
|
* the read lock. This is the dominant path because operators set
|
|
* the pattern infrequently relative to connect rate.
|
|
*/
|
|
{
|
|
pthread_rwlock_rdlock(&pattern_lock);
|
|
if (compiled_pattern != NULL && pattern == compiled_pattern_str) {
|
|
const bool ok = compiled_pattern->ok()
|
|
&& re2::RE2::FullMatch(username, *compiled_pattern);
|
|
pthread_rwlock_unlock(&pattern_lock);
|
|
return ok;
|
|
}
|
|
pthread_rwlock_unlock(&pattern_lock);
|
|
}
|
|
|
|
/**
|
|
* @brief Writer slow-path -- compile (or recompile) under the write lock.
|
|
*
|
|
* The pattern string didn't match the cached one, so we need a new
|
|
* compiled RE2. Acquire the write lock. Re-check under the write lock
|
|
* (between the rdlock drop and wrlock acquire another thread may
|
|
* have already done the compile we want), and only do the alloc /
|
|
* destroy / assign work if we're still the one who needs to.
|
|
*
|
|
* RE2::Quiet suppresses log spam on operator-supplied bad regexes;
|
|
* we discover the bad-ness via ok() below and fail-safe deny.
|
|
*/
|
|
pthread_rwlock_wrlock(&pattern_lock);
|
|
if (compiled_pattern == NULL || pattern != compiled_pattern_str) {
|
|
if (compiled_pattern) {
|
|
delete compiled_pattern;
|
|
compiled_pattern = NULL;
|
|
}
|
|
re2::RE2::Options opts(re2::RE2::Quiet);
|
|
opts.set_case_sensitive(true);
|
|
compiled_pattern = new re2::RE2(pattern, opts);
|
|
compiled_pattern_str = pattern;
|
|
}
|
|
|
|
/**
|
|
* @brief Fail safe on bad regex.
|
|
*
|
|
* If the operator supplies a regex that doesn't compile (typo, unsupported
|
|
* syntax, ...), RE2::ok() returns false. Treat that as a deny-all rather
|
|
* than allow-all: a misconfigured allowlist must NOT default to permitting
|
|
* every username (which would re-open the unknown-user surface that the
|
|
* pattern exists to gate).
|
|
*/
|
|
const bool ok = compiled_pattern->ok()
|
|
&& re2::RE2::FullMatch(username, *compiled_pattern);
|
|
|
|
pthread_rwlock_unlock(&pattern_lock);
|
|
return ok;
|
|
}
|
|
|
|
void MySQL_Passthrough_Auth_Cache::bump_probes_attempted() {
|
|
stat_probes_attempted.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
void MySQL_Passthrough_Auth_Cache::bump_probes_ok() {
|
|
stat_probes_ok.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
void MySQL_Passthrough_Auth_Cache::bump_probes_failed_credentials() {
|
|
stat_probes_failed_credentials.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
void MySQL_Passthrough_Auth_Cache::bump_probes_failed_transport() {
|
|
stat_probes_failed_transport.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
void MySQL_Passthrough_Auth_Cache::bump_lockouts_user() {
|
|
stat_lockouts_user.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
void MySQL_Passthrough_Auth_Cache::bump_lockouts_ip() {
|
|
stat_lockouts_ip.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
void MySQL_Passthrough_Auth_Cache::bump_inflight_cap_rejects() {
|
|
stat_inflight_cap_rejects.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
void MySQL_Passthrough_Auth_Cache::bump_cache_hits() {
|
|
stat_cache_hits.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
void MySQL_Passthrough_Auth_Cache::bump_cache_invalidations() {
|
|
stat_cache_invalidations.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
|
|
std::vector<MySQL_Passthrough_Auth_Cache::metric_kv>
|
|
MySQL_Passthrough_Auth_Cache::metrics_snapshot() const {
|
|
/**
|
|
* @brief Order matters here: this is the order @c
|
|
* stats_mysql_passthrough_auth_metrics returns to admin clients.
|
|
* Counters first (monotonic-since-startup), gauges last
|
|
* (current-state). All values are read with relaxed memory
|
|
* ordering -- stats are advisory and don't need to synchronize
|
|
* with the increments.
|
|
*/
|
|
std::vector<metric_kv> out;
|
|
out.reserve(11);
|
|
out.push_back({ "probes_attempted", stat_probes_attempted.load(std::memory_order_relaxed) });
|
|
out.push_back({ "probes_ok", stat_probes_ok.load(std::memory_order_relaxed) });
|
|
out.push_back({ "probes_failed_credentials", stat_probes_failed_credentials.load(std::memory_order_relaxed) });
|
|
out.push_back({ "probes_failed_transport", stat_probes_failed_transport.load(std::memory_order_relaxed) });
|
|
out.push_back({ "lockouts_user", stat_lockouts_user.load(std::memory_order_relaxed) });
|
|
out.push_back({ "lockouts_ip", stat_lockouts_ip.load(std::memory_order_relaxed) });
|
|
out.push_back({ "inflight_cap_rejects", stat_inflight_cap_rejects.load(std::memory_order_relaxed) });
|
|
out.push_back({ "cache_hits", stat_cache_hits.load(std::memory_order_relaxed) });
|
|
out.push_back({ "cache_invalidations", stat_cache_invalidations.load(std::memory_order_relaxed) });
|
|
/* Current-state gauges. Use the public accessors so the locking
|
|
* lives in one place. */
|
|
out.push_back({ "inflight_probes", static_cast<uint64_t>(inflight()) });
|
|
out.push_back({ "cache_entries", static_cast<uint64_t>(size()) });
|
|
return out;
|
|
}
|
|
|
|
void MySQL_Passthrough_Auth_Cache::print_version() {
|
|
fprintf(stderr, "MySQL_Passthrough_Auth_Cache rev. " MYSQL_PASSTHROUGH_AUTH_CACHE_VERSION "\n");
|
|
}
|