mirror of https://github.com/hashicorp/boundary
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
396 lines
13 KiB
396 lines
13 KiB
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package sequential
|
|
|
|
import (
|
|
"context"
|
|
"net"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/boundary/api/targets"
|
|
"github.com/hashicorp/boundary/internal/cmd/config"
|
|
"github.com/hashicorp/boundary/internal/daemon/controller"
|
|
"github.com/hashicorp/boundary/internal/daemon/worker"
|
|
"github.com/hashicorp/boundary/internal/server"
|
|
"github.com/hashicorp/boundary/internal/session"
|
|
"github.com/hashicorp/boundary/internal/tests/helper"
|
|
"github.com/hashicorp/dawdle"
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// timeoutBurdenType details our "burden cases" for the session
|
|
// cleanup tests.
|
|
//
|
|
// There are 3 burden cases:
|
|
//
|
|
// * default: This case simulates normal default operation where both
|
|
// worker and controller generally are timing out connections at
|
|
// generally the same interval. In reality, this is not necessarily
|
|
// going to be the case, but it's hard to test individual cases when
|
|
// both settings are the same.
|
|
//
|
|
// * worker: This case assumes the worker is the source of truth for
|
|
// controller state. Here, the controller's grace period is
|
|
// increased to a high factor over the default to ensure that the
|
|
// worker is managing the lifecycle of a connection and will properly
|
|
// unclaim it closed once the connection resumes, ensuring the
|
|
// connection is marked as closed on the worker.
|
|
type timeoutBurdenType string
|
|
|
|
const (
|
|
timeoutBurdenTypeDefault timeoutBurdenType = "default"
|
|
timeoutBurdenTypeWorker timeoutBurdenType = "worker"
|
|
)
|
|
|
|
var timeoutBurdenCases = []timeoutBurdenType{timeoutBurdenTypeDefault, timeoutBurdenTypeWorker}
|
|
|
|
func controllerGracePeriod(ty timeoutBurdenType) time.Duration {
|
|
if ty == timeoutBurdenTypeWorker {
|
|
return helper.DefaultControllerRPCGracePeriod * 10
|
|
}
|
|
|
|
return helper.DefaultControllerRPCGracePeriod
|
|
}
|
|
|
|
// TestSessionCleanup is the main test for session cleanup, and
|
|
// dispatches to the individual subtests.
|
|
func TestSessionCleanup(t *testing.T) {
|
|
t.Run("default/single_controller", testWorkerSessionCleanupSingle("default"))
|
|
t.Run("default/multi_controller", testWorkerSessionCleanupMulti("default"))
|
|
t.Run("worker/single_controller", testWorkerSessionCleanupSingle("worker"))
|
|
t.Run("worker/multi_controller", testWorkerSessionCleanupMulti("worker"))
|
|
}
|
|
|
|
func testWorkerSessionCleanupSingle(burdenCase timeoutBurdenType) func(t *testing.T) {
|
|
const op = "cluster.testWorkerSessionCleanupSingle"
|
|
return func(t *testing.T) {
|
|
require := require.New(t)
|
|
// This prevents us from running tests in parallel.
|
|
server.TestUseCommunityFilterWorkersFn(t)
|
|
logger := hclog.New(&hclog.LoggerOptions{
|
|
Name: t.Name(),
|
|
Level: hclog.Trace,
|
|
})
|
|
|
|
conf, err := config.DevController()
|
|
require.NoError(err)
|
|
|
|
pl, err := net.Listen("tcp", "[::1]:0")
|
|
require.NoError(err)
|
|
c1 := controller.NewTestController(t, &controller.TestControllerOpts{
|
|
Config: conf,
|
|
InitialResourcesSuffix: "1234567890",
|
|
Logger: logger.Named("c1"),
|
|
PublicClusterAddr: pl.Addr().String(),
|
|
WorkerRPCGracePeriod: controllerGracePeriod(burdenCase),
|
|
// Run the scheduler more often to speed up cleanup of orphaned connections
|
|
SchedulerRunJobInterval: time.Second,
|
|
})
|
|
|
|
helper.ExpectWorkers(t, c1)
|
|
|
|
// Wire up the testing proxies
|
|
require.Len(c1.ClusterAddrs(), 1)
|
|
proxy, err := dawdle.NewProxy("tcp", "", c1.ClusterAddrs()[0],
|
|
dawdle.WithListener(pl),
|
|
dawdle.WithRbufSize(256),
|
|
dawdle.WithWbufSize(256),
|
|
)
|
|
require.NoError(err)
|
|
t.Cleanup(func() {
|
|
_ = proxy.Close()
|
|
})
|
|
require.NotEmpty(t, proxy.ListenerAddr())
|
|
|
|
w1 := worker.NewTestWorker(t, &worker.TestWorkerOpts{
|
|
WorkerAuthKms: c1.Config().WorkerAuthKms,
|
|
InitialUpstreams: []string{proxy.ListenerAddr()},
|
|
Logger: logger.Named("w1"),
|
|
SuccessfulControllerRPCGracePeriodDuration: helper.DefaultControllerRPCGracePeriod,
|
|
WorkerRPCInterval: time.Second,
|
|
})
|
|
|
|
helper.ExpectWorkers(t, c1, w1)
|
|
|
|
// Use an independent context for test things that take a context so
|
|
// that we aren't tied to any timeouts in the controller, etc. This
|
|
// can interfere with some of the test operations.
|
|
ctx := context.Background()
|
|
|
|
// Connect target
|
|
client := c1.Client()
|
|
client.SetToken(c1.Token().Token)
|
|
tcl := targets.NewClient(client)
|
|
tgt, err := tcl.Read(ctx, "ttcp_1234567890")
|
|
require.NoError(err)
|
|
require.NotNil(tgt)
|
|
|
|
// Create test server, update default port on target
|
|
ts := helper.NewTestTcpServer(t)
|
|
require.NotNil(t, ts)
|
|
t.Cleanup(ts.Close)
|
|
tgt, err = tcl.Update(ctx, tgt.Item.Id, tgt.Item.Version, targets.WithTcpTargetDefaultPort(ts.Port()), targets.WithSessionConnectionLimit(-1))
|
|
require.NoError(err)
|
|
require.NotNil(tgt)
|
|
|
|
// Authorize and connect
|
|
sess := helper.NewTestSession(ctx, t, tcl, "ttcp_1234567890")
|
|
sConn := sess.Connect(ctx, t)
|
|
|
|
// Run initial send/receive test, make sure things are working
|
|
t.Log("running initial send/recv test")
|
|
sConn.TestSendRecvAll(t)
|
|
|
|
// Wait for a session info to be sent to the server, so the controller has
|
|
// at least one record of the connection.
|
|
w1.Worker().TestWaitForNextSuccessfulSessionInfoUpdate(t)
|
|
|
|
// Kill the link
|
|
t.Log("pausing controller/worker link")
|
|
proxy.Pause()
|
|
|
|
// Wait for failure connection state (depends on burden case)
|
|
switch burdenCase {
|
|
case timeoutBurdenTypeWorker:
|
|
// Wait on worker, then check controller
|
|
sess.ExpectConnectionStateOnWorker(ctx, t, w1, session.StatusClosed)
|
|
sess.ExpectConnectionStateOnController(ctx, t, c1.Controller().ConnectionRepoFn, session.StatusConnected)
|
|
|
|
default:
|
|
// Should be closed on both worker and controller. Wait on
|
|
// worker then check controller.
|
|
sess.ExpectConnectionStateOnWorker(ctx, t, w1, session.StatusClosed)
|
|
sess.ExpectConnectionStateOnController(ctx, t, c1.Controller().ConnectionRepoFn, session.StatusClosed)
|
|
}
|
|
|
|
sConn.TestSendRecvFail(t)
|
|
|
|
// Resume the connection, and reconnect.
|
|
t.Log("resuming controller/worker link")
|
|
proxy.Resume()
|
|
helper.ExpectWorkers(t, c1, w1)
|
|
|
|
// Do something post-reconnect depending on burden case. Note in
|
|
// the default case, both worker and controller should be
|
|
// relatively in sync, so we don't worry about these
|
|
// post-reconnection assertions.
|
|
switch burdenCase {
|
|
case timeoutBurdenTypeWorker:
|
|
// If we are expecting the worker to be the source of truth of
|
|
// a connection status, ensure that our old session's
|
|
// connections are actually closed now that the worker is
|
|
// properly reporting in again.
|
|
sess.ExpectConnectionStateOnController(ctx, t, c1.Controller().ConnectionRepoFn, session.StatusClosed)
|
|
}
|
|
|
|
// Proceed with new connection test
|
|
t.Log("connecting to new session after resuming controller/worker link")
|
|
sess = helper.NewTestSession(ctx, t, tcl, "ttcp_1234567890") // re-assign, other connection will close in t.Cleanup()
|
|
sConn = sess.Connect(ctx, t)
|
|
sConn.TestSendRecvAll(t)
|
|
}
|
|
}
|
|
|
|
func testWorkerSessionCleanupMulti(burdenCase timeoutBurdenType) func(t *testing.T) {
|
|
const op = "cluster.testWorkerSessionCleanupMulti"
|
|
return func(t *testing.T) {
|
|
// This prevents us from running tests in parallel.
|
|
server.TestUseCommunityFilterWorkersFn(t)
|
|
require := require.New(t)
|
|
logger := hclog.New(&hclog.LoggerOptions{
|
|
Name: t.Name(),
|
|
Level: hclog.Trace,
|
|
})
|
|
|
|
// ******************
|
|
// ** Controller 1 **
|
|
// ******************
|
|
conf1, err := config.DevController()
|
|
require.NoError(err)
|
|
|
|
pl1, err := net.Listen("tcp", "[::1]:0")
|
|
require.NoError(err)
|
|
c1 := controller.NewTestController(t, &controller.TestControllerOpts{
|
|
Config: conf1,
|
|
InitialResourcesSuffix: "1234567890",
|
|
Logger: logger.Named("c1"),
|
|
PublicClusterAddr: pl1.Addr().String(),
|
|
WorkerRPCGracePeriod: controllerGracePeriod(burdenCase),
|
|
})
|
|
|
|
// ******************
|
|
// ** Controller 2 **
|
|
// ******************
|
|
pl2, err := net.Listen("tcp", "[::1]:0")
|
|
require.NoError(err)
|
|
c2 := c1.AddClusterControllerMember(t, &controller.TestControllerOpts{
|
|
Logger: logger.Named("c2"),
|
|
PublicClusterAddr: pl2.Addr().String(),
|
|
WorkerRPCGracePeriod: controllerGracePeriod(burdenCase),
|
|
})
|
|
|
|
wg := new(sync.WaitGroup)
|
|
wg.Add(2)
|
|
go func() {
|
|
defer wg.Done()
|
|
helper.ExpectWorkers(t, c1)
|
|
}()
|
|
go func() {
|
|
defer wg.Done()
|
|
helper.ExpectWorkers(t, c2)
|
|
}()
|
|
wg.Wait()
|
|
|
|
// *************
|
|
// ** Proxy 1 **
|
|
// *************
|
|
require.Len(c1.ClusterAddrs(), 1)
|
|
p1, err := dawdle.NewProxy("tcp", "", c1.ClusterAddrs()[0],
|
|
dawdle.WithListener(pl1),
|
|
dawdle.WithRbufSize(256),
|
|
dawdle.WithWbufSize(256),
|
|
)
|
|
require.NoError(err)
|
|
t.Cleanup(func() {
|
|
_ = p1.Close()
|
|
})
|
|
require.NotEmpty(t, p1.ListenerAddr())
|
|
|
|
// *************
|
|
// ** Proxy 2 **
|
|
// *************
|
|
require.Len(c2.ClusterAddrs(), 1)
|
|
p2, err := dawdle.NewProxy("tcp", "", c2.ClusterAddrs()[0],
|
|
dawdle.WithListener(pl2),
|
|
dawdle.WithRbufSize(256),
|
|
dawdle.WithWbufSize(256),
|
|
)
|
|
require.NoError(err)
|
|
t.Cleanup(func() {
|
|
_ = p2.Close()
|
|
})
|
|
require.NotEmpty(t, p2.ListenerAddr())
|
|
|
|
// ************
|
|
// ** Worker **
|
|
// ************
|
|
w1 := worker.NewTestWorker(t, &worker.TestWorkerOpts{
|
|
WorkerAuthKms: c1.Config().WorkerAuthKms,
|
|
InitialUpstreams: []string{p1.ListenerAddr(), p2.ListenerAddr()},
|
|
Logger: logger.Named("w1"),
|
|
SuccessfulControllerRPCGracePeriodDuration: helper.DefaultControllerRPCGracePeriod,
|
|
WorkerRPCInterval: time.Second,
|
|
})
|
|
|
|
wg.Add(2)
|
|
go func() {
|
|
defer wg.Done()
|
|
helper.ExpectWorkers(t, c1, w1)
|
|
}()
|
|
go func() {
|
|
defer wg.Done()
|
|
helper.ExpectWorkers(t, c2, w1)
|
|
}()
|
|
wg.Wait()
|
|
|
|
// Use an independent context for test things that take a context so
|
|
// that we aren't tied to any timeouts in the controller, etc. This
|
|
// can interfere with some of the test operations.
|
|
ctx := context.Background()
|
|
|
|
// Connect target
|
|
client := c1.Client()
|
|
client.SetToken(c1.Token().Token)
|
|
tcl := targets.NewClient(client)
|
|
tgt, err := tcl.Read(ctx, "ttcp_1234567890")
|
|
require.NoError(err)
|
|
require.NotNil(tgt)
|
|
|
|
// Create test server, update default port on target
|
|
ts := helper.NewTestTcpServer(t)
|
|
require.NotNil(ts)
|
|
t.Cleanup(ts.Close)
|
|
tgt, err = tcl.Update(ctx, tgt.Item.Id, tgt.Item.Version, targets.WithTcpTargetDefaultPort(ts.Port()), targets.WithSessionConnectionLimit(-1))
|
|
require.NoError(err)
|
|
require.NotNil(tgt)
|
|
|
|
// Authorize and connect
|
|
sess := helper.NewTestSession(ctx, t, tcl, "ttcp_1234567890")
|
|
sConn := sess.Connect(ctx, t)
|
|
|
|
// Run initial send/receive test, make sure things are working
|
|
t.Log("running initial send/recv test")
|
|
sConn.TestSendRecvAll(t)
|
|
|
|
// Wait for a session info to be sent to the server, so the controller has
|
|
// at least one record of the connection.
|
|
w1.Worker().TestWaitForNextSuccessfulSessionInfoUpdate(t)
|
|
|
|
// Kill connection to first controller, and run test again, should
|
|
// pass, deferring to other controller. Wait for the next
|
|
// successful status report to ensure this.
|
|
t.Log("pausing link to controller #1")
|
|
p1.Pause()
|
|
w1.Worker().TestWaitForNextSuccessfulSessionInfoUpdate(t)
|
|
sConn.TestSendRecvAll(t)
|
|
|
|
// Resume first controller, pause second. This one should work too.
|
|
t.Log("pausing link to controller #2, resuming #1")
|
|
p1.Resume()
|
|
p2.Pause()
|
|
w1.Worker().TestWaitForNextSuccessfulSessionInfoUpdate(t)
|
|
|
|
// Kill the first controller connection again. This one should fail
|
|
// due to lack of any connection.
|
|
t.Log("pausing link to controller #1 again, both connections should be offline")
|
|
p1.Pause()
|
|
|
|
// Wait for failure connection state (depends on burden case)
|
|
switch burdenCase {
|
|
case timeoutBurdenTypeWorker:
|
|
// Wait on worker, then check controller
|
|
sess.ExpectConnectionStateOnWorker(ctx, t, w1, session.StatusClosed)
|
|
sess.ExpectConnectionStateOnController(ctx, t, c1.Controller().ConnectionRepoFn, session.StatusConnected)
|
|
|
|
default:
|
|
// Should be closed on both worker and controller. Wait on
|
|
// worker then check controller.
|
|
sess.ExpectConnectionStateOnWorker(ctx, t, w1, session.StatusClosed)
|
|
sess.ExpectConnectionStateOnController(ctx, t, c1.Controller().ConnectionRepoFn, session.StatusClosed)
|
|
}
|
|
|
|
// Run send/receive test again to check expected connection-level
|
|
// behavior
|
|
sConn.TestSendRecvFail(t)
|
|
|
|
// Finally resume both, try again. Should behave as per normal.
|
|
t.Log("resuming connections to both controllers")
|
|
p1.Resume()
|
|
p2.Resume()
|
|
w1.Worker().TestWaitForNextSuccessfulSessionInfoUpdate(t)
|
|
|
|
// Do something post-reconnect depending on burden case. Note in
|
|
// the default case, both worker and controller should be
|
|
// relatively in sync, so we don't worry about these
|
|
// post-reconnection assertions.
|
|
switch burdenCase {
|
|
case timeoutBurdenTypeWorker:
|
|
// If we are expecting the worker to be the source of truth of
|
|
// a connection status, ensure that our old session's
|
|
// connections are actually closed now that the worker is
|
|
// properly reporting in again.
|
|
sess.ExpectConnectionStateOnController(ctx, t, c1.Controller().ConnectionRepoFn, session.StatusClosed)
|
|
}
|
|
|
|
// Proceed with new connection test
|
|
t.Log("connecting to new session after resuming controller/worker link")
|
|
sess = helper.NewTestSession(ctx, t, tcl, "ttcp_1234567890") // re-assign, other connection will close in t.Cleanup()
|
|
sConn = sess.Connect(ctx, t)
|
|
sConn.TestSendRecvAll(t)
|
|
}
|
|
}
|