diff --git a/CHANGELOG.md b/CHANGELOG.md index a6381ff7c0..1c6ea84bd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,24 @@ Canonical reference for changes, improvements, and bugfixes for Boundary. ## Next +### Bug Fixes + +* Fix a dead lock issue where the controller could get stuck with all of its + available database connections being stuck in `idle in transaction`. + If a controller is configured to have a `max_open_connections`, and was under + sufficient load in the form of requests from workers interacting with + sessions, like in the form of authorizing new session connections, the + controller could get stuck after consuming all of the database connections, + leaving them in the `idle in transaction` state. This was due to a + combination of issues, including the lack of a request timeout for worker to + controller grpc requests, and the session repository attempting to use a + separate database connection to retrieve a kms.Wrapper after already starting + a database transaction. The fixes move these kms operations outside of the + transaction and set a max request duration for the grpc requests based on + the cluster's listener configuration. + ([PR](https://github.com/hashicorp/boundary/pull/4803) and + [PR](https://github.com/hashicorp/boundary/pull/4805)) + ## 0.16.0 (2024/04/30) ### New and Improved diff --git a/internal/daemon/controller/interceptor.go b/internal/daemon/controller/interceptor.go index 136aabc524..dc521ef163 100644 --- a/internal/daemon/controller/interceptor.go +++ b/internal/daemon/controller/interceptor.go @@ -9,6 +9,7 @@ import ( "net/http" "reflect" "runtime/debug" + "time" grpc_recovery "github.com/grpc-ecosystem/go-grpc-middleware/recovery" "github.com/hashicorp/boundary/globals" @@ -496,6 +497,15 @@ func eventsResponseInterceptor( } } +func requestMaxDurationInterceptor(_ context.Context, maxRequestDuration time.Duration) grpc.UnaryServerInterceptor { + const op = "controller.requestMaxDurationInterceptor" + return func(interceptorCtx context.Context, req any, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) { + withTimeout, cancel := context.WithTimeout(interceptorCtx, maxRequestDuration) + defer cancel() + return handler(withTimeout, req) + } +} + func workerRequestInfoInterceptor(ctx context.Context, eventer *event.Eventer) (grpc.UnaryServerInterceptor, error) { const op = "worker.requestInfoInterceptor" if eventer == nil { diff --git a/internal/daemon/controller/listeners.go b/internal/daemon/controller/listeners.go index f6411ac2e3..d6982ffa35 100644 --- a/internal/daemon/controller/listeners.go +++ b/internal/daemon/controller/listeners.go @@ -226,6 +226,7 @@ func (c *Controller) configureForCluster(ln *base.ServerListener) (func(), error grpc.UnaryInterceptor( grpc_middleware.ChainUnaryServer( workerReqInterceptor, + requestMaxDurationInterceptor(c.baseContext, ln.Config.MaxRequestDuration), eventsRequestInterceptor(c.baseContext), // before we get started, send the required events with the request eventsResponseInterceptor(c.baseContext), // as we finish, send the required events with the response ),