Mark expired Vault token as expired when Vault is unreachable (#6235)

* add shutdown function to TestVaultServer

* implement shutdown function in TestVaultServer

* stop renewing vault token when token has already expired

* Add comment

* clean up error checks

* modify credential_vault_token_renewal_revocation_view to include expiration_time

* include TokenExpirationTime in clientStore

* add test to validate that unexpired tokens are not set as expired

* add token status validation

* add expiration time validation to tests

* add small comment

* remove unnecessary renew from test

* fix token translation uses wrong time field and add tests

* add more token status validation

* fix lint

* move migration to 103

* remove t.Parallel() comments

* remove unused setup code

* use atomic for testvault.stopped

implement atomic read/write for testvault

* remove file created by merge conflict

* make gen
rand-read-reverting
Sorawis Nilparuk (Bo) 1 month ago committed by GitHub
parent 7ff2af4708
commit 42e682e120
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -169,6 +169,12 @@ func (r *TokenRenewalJob) Run(ctx context.Context, _ time.Duration) error {
return nil
}
func isForbiddenError(err error) bool {
var respErr *vault.ResponseError
ok := errors.As(err, &respErr)
return ok && respErr.StatusCode == http.StatusForbidden
}
func (r *TokenRenewalJob) renewToken(ctx context.Context, s *clientStore) error {
const op = "vault.(TokenRenewalJob).renewToken"
databaseWrapper, err := r.kms.GetWrapper(ctx, s.ProjectId, kms.KeyPurposeDatabase)
@ -190,33 +196,34 @@ func (r *TokenRenewalJob) renewToken(ctx context.Context, s *clientStore) error
return errors.Wrap(ctx, err, op)
}
var respErr *vault.ResponseError
renewedToken, err := vc.renewToken(ctx)
if ok := errors.As(err, &respErr); ok && respErr.StatusCode == http.StatusForbidden {
if err != nil {
// Vault returned a 403 when attempting a renew self, the token is either expired
// or malformed. Set status to "expired" so credentials created with token can be
// cleaned up.
query, values := token.updateStatusQuery(ExpiredToken)
numRows, err := r.writer.Exec(ctx, query, values)
if err != nil {
return errors.Wrap(ctx, err, op)
}
if numRows != 1 {
return errors.New(ctx, errors.Unknown, op, "token expired but failed to update repo")
}
if s.TokenStatus == string(CurrentToken) {
event.WriteSysEvent(ctx, op, "Vault credential store current token has expired", "credential store id", s.PublicId)
}
// Also, check if the token has already expired based on time to avoid attempting
// to renew the expired token against an Vault server that may no longer exist.
if isForbiddenError(err) || time.Now().After(token.ExpirationTime.AsTime()) {
query, values := token.updateStatusQuery(ExpiredToken)
numRows, err := r.writer.Exec(ctx, query, values)
if err != nil {
return errors.Wrap(ctx, err, op)
}
if numRows != 1 {
return errors.New(ctx, errors.Unknown, op, "token expired but failed to update repo")
}
if s.TokenStatus == string(CurrentToken) {
event.WriteSysEvent(ctx, op, "Vault credential store current token has expired", "credential store id", s.PublicId)
}
// Set credentials associated with this token to expired as Vault will already cascade delete them
_, err = r.writer.Exec(ctx, updateCredentialStatusByTokenQuery, []any{ExpiredCredential, token.TokenHmac})
if err != nil {
return errors.Wrap(ctx, err, op, errors.WithMsg("error updating credentials to revoked after revoking token"))
// Set credentials associated with this token to expired as Vault will already cascade delete them
_, err = r.writer.Exec(ctx, updateCredentialStatusByTokenQuery, []any{ExpiredCredential, token.TokenHmac})
if err != nil {
return errors.Wrap(ctx, err, op, errors.WithMsg("error updating credentials to revoked after revoking token"))
}
// exit early as we mark the token as expired
return nil
}
return nil
}
if err != nil {
return errors.Wrap(ctx, err, op, errors.WithMsg("unable to renew vault token"))
}

@ -155,7 +155,6 @@ func testVaultCred(t *testing.T,
}
func TestNewTokenRenewalJob(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
wrapper := db.TestWrapper(t)
@ -239,7 +238,6 @@ func TestNewTokenRenewalJob(t *testing.T) {
}
func TestTokenRenewalJob_RunLimits(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
@ -325,7 +323,6 @@ func TestTokenRenewalJob_RunLimits(t *testing.T) {
}
func TestTokenRenewalJob_Run(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
@ -435,7 +432,6 @@ func TestTokenRenewalJob_Run(t *testing.T) {
}
func TestTokenRenewalJob_RunExpired(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
@ -500,9 +496,119 @@ func TestTokenRenewalJob_RunExpired(t *testing.T) {
require.Nil(cs)
}
func TestTokenRenewalJob_NextRunIn(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
// TestTokenRenewalJob_Run_VaultUnreachableTemporarily tests that tokens are not marked
// as expired when Vault is unreachable temporarily.
func TestTokenRenewalJob_Run_VaultUnreachableTemporarily(t *testing.T) {
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
wrapper := db.TestWrapper(t)
kmsCache := kms.TestKms(t, conn, wrapper)
sche := scheduler.TestScheduler(t, conn, wrapper, scheduler.WithRunJobsInterval(time.Second))
_, prj := iam.TestScopes(t, iam.TestRepo(t, conn, wrapper))
v := NewTestVaultServer(t)
_, ct := v.CreateToken(t, WithTokenPeriod(time.Second*300))
in, err := NewCredentialStore(prj.GetPublicId(), v.Addr, []byte(ct))
assert.NoError(err)
require.NotNil(in)
r, err := newTokenRenewalJob(ctx, rw, rw, kmsCache)
require.NoError(err)
err = sche.RegisterJob(ctx, r)
require.NoError(err)
repo, err := NewRepository(ctx, rw, rw, kmsCache, sche)
require.NoError(err)
cs, err := repo.CreateCredentialStore(ctx, in)
require.NoError(err)
tokenBeforeRenew := allocToken()
require.NoError(rw.LookupWhere(ctx, &tokenBeforeRenew, "store_id = ?", []any{cs.GetPublicId()}))
assert.True(time.Now().Before(tokenBeforeRenew.ExpirationTime.AsTime()))
assert.Equal(string(CurrentToken), tokenBeforeRenew.Status)
// Shutdown Vault server to make vault unreachable
v.Shutdown(t)
// Renewal will fail because Vault is unreachable but job does not return an error
err = r.Run(ctx, 0)
require.NoError(err)
// Verify token was not expired in repo
tokenAfterFailedRenew := allocToken()
require.NoError(rw.LookupWhere(ctx, &tokenAfterFailedRenew, "store_id = ?", []any{cs.GetPublicId()}))
// expiration time is still in the future and token should still be 'current'
assert.True(time.Now().Before(tokenAfterFailedRenew.ExpirationTime.AsTime()))
// expiration time should remain the same since renewal failed
assert.Equal(tokenBeforeRenew.ExpirationTime, tokenAfterFailedRenew.ExpirationTime)
assert.Equal(string(CurrentToken), tokenAfterFailedRenew.Status)
}
// TestTokenRenewalJob_RunExpired_VaultUnreachable tests token renewal logic when the Vault server becomes unreachable.
// The job should stop attempting to renew the token once the token is expired since the renewal will never
// be successful and there is no guarantee that Vault server will ever be reachable again.
func TestTokenRenewalJob_RunExpired_VaultUnreachablePermanently(t *testing.T) {
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
wrapper := db.TestWrapper(t)
kmsCache := kms.TestKms(t, conn, wrapper)
sche := scheduler.TestScheduler(t, conn, wrapper, scheduler.WithRunJobsInterval(time.Second))
_, prj := iam.TestScopes(t, iam.TestRepo(t, conn, wrapper))
v := NewTestVaultServer(t)
// Create 2s token so it expires in vault before we can renew it
_, ct := v.CreateToken(t, WithTokenPeriod(time.Second*2))
in, err := NewCredentialStore(prj.GetPublicId(), v.Addr, []byte(ct))
assert.NoError(err)
require.NotNil(in)
r, err := newTokenRenewalJob(ctx, rw, rw, kmsCache)
require.NoError(err)
err = sche.RegisterJob(ctx, r)
require.NoError(err)
repo, err := NewRepository(ctx, rw, rw, kmsCache, sche)
require.NoError(err)
cs, err := repo.CreateCredentialStore(ctx, in)
require.NoError(err)
tokenBeforeRenew := allocToken()
require.NoError(rw.LookupWhere(ctx, &tokenBeforeRenew, "store_id = ?", []any{cs.GetPublicId()}))
// expiration time is in the future
assert.True(tokenBeforeRenew.ExpirationTime.AsTime().After(time.Now()))
assert.Equal(string(CurrentToken), tokenBeforeRenew.Status)
err = r.Run(ctx, 0)
require.NoError(err)
tokenAfterSuccessfulRenew := allocToken()
require.NoError(rw.LookupWhere(ctx, &tokenAfterSuccessfulRenew, "store_id = ?", []any{cs.GetPublicId()}))
// successful renewal should have updated expiration time
assert.True(tokenAfterSuccessfulRenew.ExpirationTime.AsTime().After(tokenBeforeRenew.ExpirationTime.AsTime()))
assert.Equal(string(CurrentToken), tokenAfterSuccessfulRenew.Status)
// Shutdown Vault server to make vault unreachable
v.Shutdown(t)
// Sleep to move clock and expire token
time.Sleep(time.Second * 2)
// Renewal should fail, job does not return an error when renewal fails (emits error event)
err = r.Run(ctx, 0)
require.NoError(err)
// token should be marked as expired in the repo since the expiration time has passed
tokenAfterFailedRenew := allocToken()
require.NoError(rw.LookupWhere(ctx, &tokenAfterFailedRenew, "store_id = ?", []any{cs.GetPublicId()}))
assert.Equal(string(ExpiredToken), tokenAfterFailedRenew.Status)
}
func TestTokenRenewalJob_NextRunIn(t *testing.T) {
ctx := context.Background()
conn, _ := db.TestSetup(t, "postgres")
@ -629,7 +735,6 @@ func TestTokenRenewalJob_NextRunIn(t *testing.T) {
}
func TestNewTokenRevocationJob(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
wrapper := db.TestWrapper(t)
@ -713,8 +818,6 @@ func TestNewTokenRevocationJob(t *testing.T) {
}
func TestTokenRevocationJob_RunLimits(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
conn, _ := db.TestSetup(t, "postgres")
@ -803,7 +906,6 @@ func TestTokenRevocationJob_RunLimits(t *testing.T) {
}
func TestTokenRevocationJob_Run(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
@ -942,7 +1044,6 @@ func TestTokenRevocationJob_Run(t *testing.T) {
}
func TestNewCredentialRenewalJob(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
wrapper := db.TestWrapper(t)
@ -1026,7 +1127,6 @@ func TestNewCredentialRenewalJob(t *testing.T) {
}
func TestCredentialRenewalJob_RunLimits(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
@ -1142,7 +1242,6 @@ func TestCredentialRenewalJob_RunLimits(t *testing.T) {
}
func TestCredentialRenewalJob_Run(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
@ -1253,7 +1352,6 @@ func TestCredentialRenewalJob_Run(t *testing.T) {
}
func TestCredentialRenewalJob_RunExpired(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
@ -1333,7 +1431,6 @@ func TestCredentialRenewalJob_RunExpired(t *testing.T) {
}
func TestCredentialRenewalJob_NextRunIn(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
@ -1491,7 +1588,6 @@ func TestCredentialRenewalJob_NextRunIn(t *testing.T) {
}
func TestNewCredentialRevocationJob(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
wrapper := db.TestWrapper(t)
@ -1575,7 +1671,6 @@ func TestNewCredentialRevocationJob(t *testing.T) {
}
func TestCredentialRevocationJob_RunLimits(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
@ -1691,7 +1786,6 @@ func TestCredentialRevocationJob_RunLimits(t *testing.T) {
}
func TestCredentialRevocationJob_Run(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
@ -1786,7 +1880,6 @@ func TestCredentialRevocationJob_Run(t *testing.T) {
}
func TestCredentialRevocationJob_RunDeleted(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
@ -1896,7 +1989,6 @@ func TestCredentialRevocationJob_RunDeleted(t *testing.T) {
}
func TestNewCredentialStoreCleanupJob(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
wrapper := db.TestWrapper(t)
@ -1980,7 +2072,6 @@ func TestNewCredentialStoreCleanupJob(t *testing.T) {
}
func TestCredentialStoreCleanupJob_Run(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)
@ -2133,7 +2224,6 @@ func TestCredentialStoreCleanupJob_Run(t *testing.T) {
}
func TestNewCredentialCleanupJob(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
conn, _ := db.TestSetup(t, "postgres")
rw := db.New(conn)
@ -2220,7 +2310,6 @@ func TestVaultJobsCorrelationId(t *testing.T) {
}
func TestCredentialCleanupJob_Run(t *testing.T) {
// t.Parallel() - this was causing test failures, investigate before un-commenting
ctx := context.Background()
assert, require := assert.New(t), require.New(t)

@ -43,25 +43,26 @@ func (r *Repository) lookupClientStore(ctx context.Context, publicId string) (*c
// a Vault client. If the Vault token for the store is expired all token data will be null
// other than the status of expired.
type clientStore struct {
PublicId string `gorm:"primary_key"`
ProjectId string
DeleteTime *timestamp.Timestamp
VaultAddress string
Namespace string
CaCert []byte
TlsServerName string
TlsSkipVerify bool
WorkerFilter string
TokenHmac []byte
Token TokenSecret
CtToken []byte
TokenRenewalTime *timestamp.Timestamp
TokenKeyId string
TokenStatus string
ClientCert []byte
ClientKeyId string
ClientKey KeySecret
CtClientKey []byte
PublicId string `gorm:"primary_key"`
ProjectId string
DeleteTime *timestamp.Timestamp
VaultAddress string
Namespace string
CaCert []byte
TlsServerName string
TlsSkipVerify bool
WorkerFilter string
TokenHmac []byte
Token TokenSecret
CtToken []byte
TokenRenewalTime *timestamp.Timestamp
TokenKeyId string
TokenStatus string
TokenExpirationTime *timestamp.Timestamp
ClientCert []byte
ClientKeyId string
ClientKey KeySecret
CtClientKey []byte
}
func allocClientStore() *clientStore {
@ -98,7 +99,7 @@ func (ps *clientStore) token() *Token {
tk.Status = ps.TokenStatus
tk.CtToken = ps.CtToken
tk.KeyId = ps.TokenKeyId
tk.ExpirationTime = ps.TokenExpirationTime
return tk
}

@ -50,8 +50,6 @@ func init() {
}
}
func gotDocker(t testing.TB) {}
func gotNewServer(t testing.TB, opt ...TestOption) *TestVaultServer {
const (
serverTlsTemplate = `{
@ -189,9 +187,17 @@ func gotNewServer(t testing.TB, opt ...TestOption) *TestVaultServer {
resource, err := pool.RunWithOptions(dockerOptions)
require.NoError(err)
server.Shutdown = func(t *testing.T) {
cleanupResource(t, pool, resource)
server.stopped.Store(true)
}
if !opts.skipCleanup {
t.Cleanup(func() {
cleanupResource(t, pool, resource)
if !server.stopped.Load() {
cleanupResource(t, pool, resource)
}
})
}
server.vaultContainer = resource

@ -18,6 +18,7 @@ import (
"net/http"
"path"
"strings"
"sync/atomic"
"testing"
"time"
@ -1196,6 +1197,9 @@ type TestVaultServer struct {
vaultContainer any
ldapContainer any
postgresContainer any
stopped atomic.Bool
Shutdown func(t *testing.T)
}
// NewTestVaultServer creates and returns a TestVaultServer. Some Vault

@ -0,0 +1,52 @@
-- Copyright (c) HashiCorp, Inc.
-- SPDX-License-Identifier: BUSL-1.1
-- public.credential_vault_token_renewal_revocation source
begin;
drop view credential_vault_token_renewal_revocation;
create view credential_vault_token_renewal_revocation as
with
tokens as (
select token, -- encrypted
token_hmac,
store_id,
-- renewal time is the midpoint between the last renewal time and the expiration time
last_renewal_time + (expiration_time - last_renewal_time) / 2 as renewal_time,
key_id,
status,
expiration_time
from credential_vault_token
where status in ('current', 'maintaining', 'revoke')
)
select store.public_id as public_id,
store.project_id as project_id,
store.vault_address as vault_address,
store.namespace as namespace,
store.ca_cert as ca_cert,
store.tls_server_name as tls_server_name,
store.tls_skip_verify as tls_skip_verify,
store.worker_filter as worker_filter,
store.delete_time as delete_time,
token.token as ct_token, -- encrypted
token.token_hmac as token_hmac,
token.renewal_time as token_renewal_time,
token.key_id as token_key_id,
token.status as token_status,
token.expiration_time as token_expiration_time,
cert.certificate as client_cert,
cert.certificate_key as ct_client_key, -- encrypted
cert.key_id as client_key_id
from credential_vault_store store
join tokens token
on store.public_id = token.store_id
left join credential_vault_client_certificate cert
on store.public_id = cert.store_id;
comment on view credential_vault_token_renewal_revocation is
'credential_vault_token_renewal_revocation is a view where each row contains a credential store and the credential store''s data needed to connect to Vault. '
'The view returns a separate row for each active token in Vault (current, maintaining and revoke tokens); this view should only be used for token renewal and revocation. '
'Each row may contain encrypted data. This view should not be used to retrieve data which will be returned external to boundary.';
commit;
Loading…
Cancel
Save