// Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 package worker import ( "context" "fmt" "net" "os" "strings" "sync" "sync/atomic" "testing" "time" "github.com/hashicorp/boundary/internal/cmd/base" "github.com/hashicorp/boundary/internal/cmd/config" "github.com/hashicorp/boundary/internal/db" "github.com/hashicorp/boundary/internal/event" pbs "github.com/hashicorp/boundary/internal/gen/controller/servers/services" "github.com/hashicorp/boundary/internal/server" "github.com/hashicorp/boundary/internal/server/store" "github.com/hashicorp/boundary/internal/types/scope" "github.com/hashicorp/go-hclog" wrapping "github.com/hashicorp/go-kms-wrapping/v2" "github.com/hashicorp/go-kms-wrapping/v2/extras/multi" "github.com/hashicorp/nodeenrollment/types" "github.com/mr-tron/base58" "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" ) // TestWorker wraps a base.Server and Worker to provide a // fully-programmatic worker for tests. Error checking (for instance, for // valid config) is not stringent at the moment. type TestWorker struct { b *base.Server w *Worker t testing.TB addrs []string // The address the worker proxies are listening on ctx context.Context cancel context.CancelFunc name string shutdownDoneCh chan struct{} shutdownOnce *sync.Once } // Worker returns the underlying worker func (tw *TestWorker) Worker() *Worker { return tw.w } func (tw *TestWorker) Config() *Config { return tw.w.conf } func (tw *TestWorker) Context() context.Context { return tw.ctx } func (tw *TestWorker) Cancel() { tw.cancel() } func (tw *TestWorker) Name() string { return tw.name } func (tw *TestWorker) UpstreamAddrs() []string { lastRoutingInfo := tw.w.LastRoutingInfoSuccess() return lastRoutingInfo.GetCalculatedUpstreamAddresses() } func (tw *TestWorker) ProxyAddrs() []string { if tw.addrs != nil { return tw.addrs } for _, listener := range tw.b.Listeners { if listener.Config.Purpose[0] == "proxy" { tcpAddr, ok := listener.ProxyListener.Addr().(*net.TCPAddr) if !ok { tw.t.Fatal("could not parse address as a TCP addr") } addr := net.JoinHostPort(tcpAddr.IP.String(), fmt.Sprintf("%d", tcpAddr.Port)) tw.addrs = append(tw.addrs, addr) } } return tw.addrs } // TestSessionInfo provides detail about a particular session from // the worker's local session state. This detail is a point-in-time // snapshot of what's in sessionInfoMap for a particular session, and // may not contain all of the information that is contained within // it, or the underlying ConnInfoMap. Only details that are really // important to testing are passed along. type TestSessionInfo struct { Id string Status pbs.SESSIONSTATUS // Connections is indexed by connection ID, which is also included // within TestConnectionInfo for convenience. Connections map[string]TestConnectionInfo } // TestConnectionInfo provides detail about a particular connection // as a part of TestSessionInfo. See that struct for details about // the purpose of this data and how it's gathered. type TestConnectionInfo struct { Id string Status pbs.CONNECTIONSTATUS CloseTime time.Time } // LookupSession returns session info from the worker's local session // state. // // The return boolean will be true if the session was found, false if // it wasn't. // // See TestSessionInfo for details on how to use this info. func (tw *TestWorker) LookupSession(id string) (TestSessionInfo, bool) { var result TestSessionInfo sess := tw.w.sessionManager.Get(id) if sess == nil { return TestSessionInfo{}, false } conns := make(map[string]TestConnectionInfo) for _, conn := range sess.GetLocalConnections() { conns[conn.Id] = TestConnectionInfo{ Id: conn.Id, Status: conn.Status, CloseTime: conn.CloseTime, } } result.Id = sess.GetId() result.Status = sess.GetStatus() result.Connections = conns return result, true } // Shutdown runs any cleanup functions; be sure to run this after your test is // done func (tw *TestWorker) Shutdown() { tw.shutdownOnce.Do(func() { if tw.b != nil { close(tw.b.ShutdownCh) } tw.cancel() if tw.w != nil { if err := tw.w.Shutdown(); err != nil { tw.t.Error(err) } } if tw.b != nil { if err := tw.b.RunShutdownFuncs(); err != nil { tw.t.Error(err) } } close(tw.shutdownDoneCh) }) } type TestWorkerOpts struct { // Config; if not provided a dev one will be created Config *config.Config // Sets initial upstream addresses InitialUpstreams []string // If true, the worker will not be started DisableAutoStart bool // The worker auth KMS to use, or one will be created WorkerAuthKms wrapping.Wrapper // The downstream worker auth KMS to use, or one will be created DownstreamWorkerAuthKms *multi.PooledWrapper // The worker credential storage KMS to use, or one will be created WorkerAuthStorageKms wrapping.Wrapper // The location of the worker's auth storage WorkerAuthStoragePath string // The location of the worker's recording storage WorkerRecordingStoragePath string // The interval between each respective worker RPC invocation // This sets the interval for SessionInfo, RoutingInfo and Statistics. WorkerRPCInterval time.Duration // The name to use for the worker, otherwise one will be randomly // generated, unless provided in a non-nil Config Name string // The logger to use, or one will be created Logger hclog.Logger // The registerer to use for registering all the collectors. Nil means // no metrics are registered. PrometheusRegisterer prometheus.Registerer // The amount of time to wait before marking connections as closed when a // connection cannot be made back to the controller SuccessfulControllerRPCGracePeriodDuration time.Duration // Overrides worker's nonceFn, for cases where we want to have control // over the nonce we send to the Controller NonceFn randFn // If set, override the normal auth rotation period AuthRotationPeriod time.Duration // Toggle worker auth debugging WorkerAuthDebuggingEnabled *atomic.Bool // Enable audit events EnableAuditEvents bool // Enable system events EnableSysEvents bool // Enable observation events EnableObservationEvents bool // Enable IPv6 EnableIPv6 bool // Enable error events EnableErrorEvents bool } func NewTestWorker(t testing.TB, opts *TestWorkerOpts) *TestWorker { const op = "worker.NewTestWorker" ctx, cancel := context.WithCancel(context.Background()) tw := &TestWorker{ t: t, ctx: ctx, cancel: cancel, shutdownDoneCh: make(chan struct{}), shutdownOnce: new(sync.Once), } t.Cleanup(tw.Shutdown) if opts == nil { opts = new(TestWorkerOpts) } // Base server tw.b = base.NewServer(nil) tw.b.WorkerAuthDebuggingEnabled = opts.WorkerAuthDebuggingEnabled tw.b.Command = &base.Command{ Context: ctx, ShutdownCh: make(chan struct{}), } // Get dev config, or use a provided one var err error if opts.Config == nil { var configOpts []config.Option configOpts = append(configOpts, config.WithAuditEventsEnabled(opts.EnableAuditEvents)) configOpts = append(configOpts, config.WithSysEventsEnabled(opts.EnableSysEvents)) configOpts = append(configOpts, config.WithObservationsEnabled(opts.EnableObservationEvents)) configOpts = append(configOpts, config.WithIPv6Enabled(opts.EnableIPv6)) configOpts = append(configOpts, config.TestWithErrorEventsEnabled(t, opts.EnableErrorEvents)) opts.Config, err = config.DevWorker(configOpts...) if err != nil { t.Fatal(err) } if opts.Name != "" { opts.Config.Worker.Name = opts.Name } if opts.WorkerRPCInterval > 0 { opts.Config.Worker.TestWorkerRPCInterval = opts.WorkerRPCInterval } } if len(opts.InitialUpstreams) > 0 { opts.Config.Worker.InitialUpstreams = opts.InitialUpstreams } // Start a logger tw.b.Logger = opts.Logger if tw.b.Logger == nil { tw.b.Logger = hclog.New(&hclog.LoggerOptions{ Level: hclog.Trace, }) } tw.b.PrometheusRegisterer = opts.PrometheusRegisterer if opts.Config.Worker == nil { opts.Config.Worker = &config.Worker{ Name: opts.Name, } } if opts.WorkerAuthStoragePath != "" { opts.Config.Worker.AuthStoragePath = opts.WorkerAuthStoragePath } if opts.WorkerRecordingStoragePath != "" { opts.Config.Worker.RecordingStoragePath = opts.WorkerRecordingStoragePath } tw.b.EnabledPlugins = append(tw.b.EnabledPlugins, base.EnabledPluginLoopback) tw.name = opts.Config.Worker.Name if opts.SuccessfulControllerRPCGracePeriodDuration != 0 { opts.Config.Worker.SuccessfulControllerRPCGracePeriodDuration = opts.SuccessfulControllerRPCGracePeriodDuration } serverName, err := os.Hostname() if err != nil { t.Fatal(err) } serverName = fmt.Sprintf("%s/worker", serverName) if err := tw.b.SetupEventing(tw.b.Context, tw.b.Logger, tw.b.StderrLock, serverName, base.WithEventerConfig(opts.Config.Eventing)); err != nil { t.Fatal(err) } // Set up KMSes if err := tw.b.SetupKMSes(tw.b.Context, nil, opts.Config); err != nil { t.Fatal(err) } if opts.WorkerAuthKms != nil { tw.b.WorkerAuthKms = opts.WorkerAuthKms } if opts.WorkerAuthStorageKms != nil { tw.b.WorkerAuthStorageKms = opts.WorkerAuthStorageKms } if opts.DownstreamWorkerAuthKms != nil { tw.b.DownstreamWorkerAuthKms = opts.DownstreamWorkerAuthKms } // Ensure the listeners use random port allocation for _, listener := range opts.Config.Listeners { listener.RandomPort = true } if err := tw.b.SetupListeners(nil, opts.Config.SharedConfig, []string{"proxy"}); err != nil { t.Fatal(err) } if err := tw.b.SetupWorkerPublicAddress(opts.Config, ""); err != nil { t.Fatal(err) } conf := &Config{ RawConfig: opts.Config, Server: tw.b, } tw.w, err = New(ctx, conf) if err != nil { t.Fatal(err) } tw.w.TestOverrideAuthRotationPeriod = opts.AuthRotationPeriod if opts.NonceFn != nil { tw.w.nonceFn = opts.NonceFn } // The real server functions will listen for shutdown cues and act so mimic // that here, and ensure that channels get drained go func() { for { select { case <-tw.b.ShutdownCh: tw.Shutdown() case <-tw.b.ServerSideShutdownCh: tw.Shutdown() case <-tw.shutdownDoneCh: return } } }() if !opts.DisableAutoStart { if err := tw.w.Start(); err != nil { t.Fatal(err) } } return tw } func (tw *TestWorker) AddClusterWorkerMember(t testing.TB, opts *TestWorkerOpts) *TestWorker { const op = "worker.(TestWorker).AddClusterWorkerMember" if opts == nil { opts = new(TestWorkerOpts) } nextOpts := &TestWorkerOpts{ WorkerAuthKms: tw.w.conf.WorkerAuthKms, DownstreamWorkerAuthKms: tw.w.conf.DownstreamWorkerAuthKms, WorkerAuthStorageKms: tw.w.conf.WorkerAuthStorageKms, Name: opts.Name, InitialUpstreams: tw.UpstreamAddrs(), Logger: tw.w.conf.Logger, SuccessfulControllerRPCGracePeriodDuration: opts.SuccessfulControllerRPCGracePeriodDuration, WorkerAuthDebuggingEnabled: tw.w.conf.WorkerAuthDebuggingEnabled, } if nextOpts.Name == "" { var err error nextOpts.Name, err = db.NewPublicId(context.Background(), "w") if err != nil { t.Fatal(err) } nextOpts.Name = strings.ToLower(nextOpts.Name) event.WriteSysEvent(context.TODO(), op, "worker name generated", "name", nextOpts.Name) } return NewTestWorker(t, nextOpts) } // NewAuthorizedPkiTestWorker creates a new test worker with the provided upstreams // and creates it in the provided repo as an authorized worker. It returns // The TestWorker and it's boundary id. func NewAuthorizedPkiTestWorker(t *testing.T, repo *server.Repository, name string, upstreams []string, opt ...config.Option) (*TestWorker, string) { t.Helper() logger := hclog.New(&hclog.LoggerOptions{ Level: hclog.Trace, }) wcfg, err := config.DevWorker() require.NoError(t, err) wcfg.Worker.Name = "" wcfg.Worker.InitialUpstreams = upstreams w := NewTestWorker(t, &TestWorkerOpts{ InitialUpstreams: upstreams, Logger: logger.Named(name), Config: wcfg, }) t.Cleanup(w.Shutdown) // Perform initial authentication of worker to controller reqBytes, err := base58.FastBase58Decoding(w.Worker().WorkerAuthRegistrationRequest) require.NoError(t, err) // Decode the proto into the request and create the worker pkiWorkerReq := new(types.FetchNodeCredentialsRequest) require.NoError(t, proto.Unmarshal(reqBytes, pkiWorkerReq)) wr, err := repo.CreateWorker(context.Background(), &server.Worker{ Worker: &store.Worker{ Name: name, ScopeId: scope.Global.String(), }, }, server.WithFetchNodeCredentialsRequest(pkiWorkerReq)) require.NoError(t, err) return w, wr.GetPublicId() } // mockServerCoordinationService is meant to stand in for a controller when testing // the methods defined by the server coordination service. It allows applying assertions and specifying // the return value of grpc methods by overwriting service methods. type mockServerCoordinationService struct { pbs.UnimplementedServerCoordinationServiceServer nextReqAssert func(*pbs.StatusRequest) (*pbs.StatusResponse, error) nextStatisticAssert func(*pbs.StatisticsRequest) (*pbs.StatisticsResponse, error) nextSessionInfoAssert func(*pbs.SessionInfoRequest) (*pbs.SessionInfoResponse, error) } func (m mockServerCoordinationService) Status(ctx context.Context, req *pbs.StatusRequest) (*pbs.StatusResponse, error) { if m.nextReqAssert != nil { return m.nextReqAssert(req) } return nil, status.Error(codes.Unavailable, "Status not implemented") } func (m mockServerCoordinationService) Statistics(ctx context.Context, req *pbs.StatisticsRequest) (*pbs.StatisticsResponse, error) { if m.nextStatisticAssert != nil { return m.nextStatisticAssert(req) } return nil, status.Error(codes.Unavailable, "Statistics not implemented") } func (m mockServerCoordinationService) SessionInfo(ctx context.Context, req *pbs.SessionInfoRequest) (*pbs.SessionInfoResponse, error) { if m.nextSessionInfoAssert != nil { return m.nextSessionInfoAssert(req) } return nil, status.Error(codes.Unavailable, "SessionInfo not implemented") } var _ pbs.ServerCoordinationServiceServer = (*mockServerCoordinationService)(nil) // TestWaitForNextSuccessfulSessionInfoUpdate waits for the next successful session info. It's // used by testing in place of a more opaque and possibly unnecessarily long sleep for // things like initial controller check-in, etc. // // The timeout is aligned with the worker's session info grace period. func (w *Worker) TestWaitForNextSuccessfulSessionInfoUpdate(t testing.TB) { t.Helper() const op = "worker.(Worker).WaitForNextSuccessfulSessionInfoUpdate" waitStart := time.Now() ctx, cancel := context.WithTimeout(w.baseContext, time.Duration(w.successfulSessionInfoGracePeriod.Load())) defer cancel() t.Log("waiting for next session info report to controller") for { select { case <-time.After(time.Second): // pass case <-ctx.Done(): t.Error("error waiting for next session info report to controller") return } si := w.lastSessionInfoSuccess.Load().(*lastSessionInfo) if si != nil && si.LastSuccessfulRequestTime.After(waitStart) { break } } t.Log("next worker session info update sent successfully") } // TestWaitForNextSuccessfulStatisticsUpdate waits for the next successful statistics. It's // used by testing in place of a more opaque and possibly unnecessarily long sleep for // things like initial controller check-in, etc. // // The timeout is aligned with twice the worker's statistics timeout duration. func (w *Worker) TestWaitForNextSuccessfulStatisticsUpdate(t testing.TB) { t.Helper() const op = "worker.(Worker).WaitForNextSuccessfulStatisticsUpdate" waitStart := time.Now() ctx, cancel := context.WithTimeout(w.baseContext, time.Duration(2*w.statisticsCallTimeoutDuration.Load())) defer cancel() t.Log("waiting for next statistics report to controller") for { select { case <-time.After(time.Second): // pass case <-ctx.Done(): t.Error("error waiting for next statistics report to controller") return } si := w.lastStatisticsSuccess.Load().(*lastStatistics) if si != nil && si.LastSuccessfulRequestTime.After(waitStart) { break } } t.Log("next worker statistics update sent successfully") }