diff --git a/include/gen_utils.h b/include/gen_utils.h index b5c440c7f..34c260531 100644 --- a/include/gen_utils.h +++ b/include/gen_utils.h @@ -24,6 +24,111 @@ inline int fastrand() { #define def_fastrand #endif +/** + * @brief Thread-local state for the xoshiro128++ PRNG. + * + * s[0..3] hold the internal 128-bit state. Keeping it thread_local guarantees + * that each thread uses an independent sequence without synchronization. + */ +static thread_local uint32_t s[4]; + +/** + * @brief Thread-local flag indicating whether the PRNG state has been seeded. + * + * Lazy initialization is used to seed the state on the first use per thread. + */ +static thread_local uint8_t seeded = 0; + +/** + * @brief Initialize the thread-local PRNG state. + * + * Seeds the 128-bit xoshiro state using a mix of the monotonic clock and the + * calling thread identifier. A splitmix-like mixing function is applied to + * produce well-dispersed bits. Ensures the state is not all zeros. + * + * Important: + * - Uses CLOCK_MONOTONIC to reduce susceptibility to wall-clock changes. + * - Not cryptographically secure. Do not use for security-sensitive code. + */ +static void init_seed(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + + uint64_t t = ((uint64_t)ts.tv_nsec) ^ ((uint64_t)ts.tv_sec << 32); + uint64_t tid = (uintptr_t)pthread_self(); + + // Simple mixing: XOR, shifts, multiplies + uint64_t x = t ^ tid; + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + + // Split into four 32-bit words + s[0] = (uint32_t)x; + s[1] = (uint32_t)(x >> 32); + s[2] = ~s[0]; // invert for extra diversity + s[3] = ~s[1]; + + // avoid all-zero state + if (!s[0] && !s[1] && !s[2] && !s[3]) + s[0] = 1; + + seeded = 1; +} + +/** + * @brief Rotate left utility. + * + * @param x Value to rotate. + * @param k Rotation amount in bits (0..31). + * @return x rotated left by k bits. + */ +static inline uint32_t rotl(uint32_t x, int k) { + return (x << k) | (x >> (32 - k)); +} + +/** + * @brief xoshiro128++ PRNG round function. + * + * This is the "++" variant: result = rotl(s0 + s3, 7) + s0. + * It updates the internal state using xorshift operations and a rotation. + * The algorithm is designed for speed and statistical quality. + * + * Thread safety: + * - Uses thread-local state; no locks required. + * + * @return A 32-bit pseudo-random number. + */ +static uint32_t xoshiro128_plus_plus(void) { + if (!seeded) init_seed(); + + const uint32_t result = rotl(s[0] + s[3], 7) + s[0]; + const uint32_t t = s[1] << 9; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + s[2] ^= t; + s[3] = rotl(s[3], 11); + + return result; +} + +/** + * @brief Fast, non-cryptographic random number generator. + * + * Convenience wrapper over xoshiro128_plus_plus(). Returns a 32-bit + * pseudo-random value. On first call per thread, the generator is seeded. + * + * @return A 32-bit pseudo-random number. + */ +static inline uint32_t rand_fast() { + return xoshiro128_plus_plus(); +} + class PtrArray { private: void expand(unsigned int more) {