From 99a16dafc2a82beaacb4ae5c6294c12b18b0055b Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Wed, 11 May 2022 17:02:36 -0400 Subject: [PATCH] Leave cluster listener up when bad data is sent (#2073) Leave cluster listener up when bad data is sent Removing the ALPN muxer exposed the underlying listener directly to the gRPC server, which means that a quirk of the ALPN muxer (not surfacing errors on accept) turned into the gRPC server receiving errors on accept, and it turns out if the error does not satisfy a special temporary interface the listener will be closed. The obviously better thing to do would be to let the listener close itself if there was truly a fatal error rather than relying on undocumented interfaces, but here we are. --- CHANGELOG.md | 23 +++++++++++------ .../controller/intercepting_listener.go | 25 ++++++++++++++++--- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e062fa7459..d57df1eb19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,31 +4,39 @@ Canonical reference for changes, improvements, and bugfixes for Boundary. ## Next +### Bug Fixes + +* controller: Do not shut down cluster listener when it receives an invalid + packet ([Issue](https://github.com/hashicorp/boundary/issues/2072), + [PR](https://github.com/hashicorp/boundary/pull/2073)) + ## 0.8.0 (2022/05/03) ### New and Improved -* metrics: provide metrics for controllers and workers -* controller: new health endpoint ([PR](https://github.com/hashicorp/boundary/pull/1882)). -* Improve response time for listing sessions and targets. +* metrics: Provide metrics for controllers and workers +* controller: Add health endpoint ([PR](https://github.com/hashicorp/boundary/pull/1882)) +* controller: Improve response time for listing sessions and targets. [PR](https://github.com/hashicorp/boundary/pull/2049) -* ui: Add support for worker filters in targets. -* ui: Add manual refresh button in sessions list. +* ui: Add support for worker filters in targets +* ui: Add manual refresh button in sessions list ### Bug Fixes + * worker: create new error to prevent `event.newError: missing error: invalid parameter` and handle session cancel with no TOFU token ([Issue](https://github.com/hashicorp/boundary/issues/1902), [PR](https://github.com/hashicorp/boundary/pull/1929)) * controller: Reconcile DEKs with existing scopes ([Issue](https://github.com/hashicorp/boundary/issues/1856), [PR](https://github.com/hashicorp/boundary/pull/1976)) -* Fix for retrieving sessions that could result in incomplete results when +* controller: Fix for retrieving sessions that could result in incomplete results when there is a large number (10k+) of sessions. [PR](https://github.com/hashicorp/boundary/pull/2049) * session: update session state trigger to prevent transitions to invalid states ([Issue](https://github.com/hashicorp/boundary/issues/2040), [PR](https://github.com/hashicorp/boundary/pull/2046)) - + ## 0.7.6 (2022/03/15) ### Bug Fixes + * sessions: Sessions and session connections have been refactored to better isolate transactions and prevent resource contention that caused deadlocks. ([Issue](https://github.com/hashicorp/boundary/issues/1812), @@ -53,6 +61,7 @@ to better isolate transactions and prevent resource contention that caused deadl * ui: Add support for dynamic host catalog. AWS and Azure plugin-based CRUD operations. ### Bug Fixes + * targets: Specifying a plugin based host id when authorizing a session now works. ([PR](https://github.com/hashicorp/boundary/pull/1853)) * targets: DNS names are now properly parsed when selecting an endpoint diff --git a/internal/daemon/controller/intercepting_listener.go b/internal/daemon/controller/intercepting_listener.go index 7cece060b6..52f5c6463a 100644 --- a/internal/daemon/controller/intercepting_listener.go +++ b/internal/daemon/controller/intercepting_listener.go @@ -9,6 +9,23 @@ import ( "github.com/hashicorp/boundary/internal/observability/event" ) +// tempError is an error that satisfies the temporary error interface that is +// internally used by gRPC to determine whether an error should cause a listener +// to die. Any error that isn't an accept error is wrapped in this since one +// connection failing TLS wise doesn't mean we don't want to accept any more... +type tempError struct { + error +} + +// newTempError is a "temporary" error +func newTempError(inner error) tempError { + return tempError{error: inner} +} + +func (t tempError) Temporary() bool { + return true +} + // interceptingListener allows us to validate the nonce from a connection before // handing it off to the gRPC server. It is expected that the first thing a // connection sends after successful TLS validation is the nonce that was @@ -53,7 +70,7 @@ func (m *interceptingListener) Accept() (net.Conn, error) { event.WriteError(context.TODO(), op, err, event.WithInfoMsg("error closing worker connection")) } } - return nil, err + return nil, newTempError(err) } nonce := make([]byte, 20) @@ -62,20 +79,20 @@ func (m *interceptingListener) Accept() (net.Conn, error) { if err := conn.Close(); err != nil { event.WriteError(ctx, op, err, event.WithInfoMsg("error closing worker connection")) } - return nil, fmt.Errorf("error reading nonce from connection: %w", err) + return nil, newTempError(fmt.Errorf("error reading nonce from connection: %w", err)) } if read != len(nonce) { if err := conn.Close(); err != nil { event.WriteError(ctx, op, err, event.WithInfoMsg("error closing worker connection")) } - return nil, fmt.Errorf("error reading nonce from worker, expected %d bytes, got %d", 20, read) + return nil, newTempError(fmt.Errorf("error reading nonce from worker, expected %d bytes, got %d", 20, read)) } workerInfoRaw, found := m.c.workerAuthCache.Load(string(nonce)) if !found { if err := conn.Close(); err != nil { event.WriteError(ctx, op, err, event.WithInfoMsg("error closing worker connection")) } - return nil, errors.New("did not find valid nonce for incoming worker") + return nil, newTempError(errors.New("did not find valid nonce for incoming worker")) } workerInfo := workerInfoRaw.(*workerAuthEntry) workerInfo.conn = conn