From 26a0303b38acb5312d10da448acd270fa8a2c237 Mon Sep 17 00:00:00 2001 From: Louis Ruch Date: Fri, 25 Jun 2021 13:30:03 -0700 Subject: [PATCH] Add sentinel and sanitize package (#1353) * Add sentinel migrations * Add sentinel and sanitize package --- internal/credential/vault/credential.go | 9 +- internal/credential/vault/credential_test.go | 3 +- internal/db/domains_test.go | 85 +++++-------------- internal/db/sanitize/doc.go | 3 + internal/db/sanitize/sanitize.go | 35 ++++++++ internal/db/sanitize/sanitize_test.go | 28 ++++++ .../postgres/10/07_sentinels.up.sql | 36 ++++++++ internal/db/schema/postgres_migration.gen.go | 36 +++++++- internal/db/sentinel/doc.go | 11 +++ internal/db/sentinel/sentinel.go | 31 +++++++ internal/db/sentinel/sentinel_test.go | 41 +++++++++ 11 files changed, 249 insertions(+), 69 deletions(-) create mode 100644 internal/db/sanitize/doc.go create mode 100644 internal/db/sanitize/sanitize.go create mode 100644 internal/db/sanitize/sanitize_test.go create mode 100644 internal/db/schema/migrations/postgres/10/07_sentinels.up.sql create mode 100644 internal/db/sentinel/doc.go create mode 100644 internal/db/sentinel/sentinel.go create mode 100644 internal/db/sentinel/sentinel_test.go diff --git a/internal/credential/vault/credential.go b/internal/credential/vault/credential.go index 9b06f65c1a..a3a9c02edb 100644 --- a/internal/credential/vault/credential.go +++ b/internal/credential/vault/credential.go @@ -5,6 +5,8 @@ import ( "github.com/hashicorp/boundary/internal/credential" "github.com/hashicorp/boundary/internal/credential/vault/store" + "github.com/hashicorp/boundary/internal/db/sanitize" + "github.com/hashicorp/boundary/internal/db/sentinel" "github.com/hashicorp/boundary/internal/errors" "github.com/hashicorp/boundary/internal/oplog" "google.golang.org/protobuf/proto" @@ -30,10 +32,6 @@ const ( // ExpiredCredential represents a credential that expired. This is a terminal // status. It does not transition to RevokedCredential. ExpiredCredential CredentialStatus = "expired" -) - -const ( - externalIdSentinel = "\ufffenone" // UnknownCredentialStatus represents a credential that has an unknown // status. @@ -60,8 +58,9 @@ func newCredential(libraryId, sessionId, externalId string, tokenHmac []byte, ex } status := string(ActiveCredential) + externalId = sanitize.String(externalId) if externalId == "" { - externalId = externalIdSentinel + externalId = sentinel.ExternalIdNone status = string(UnknownCredentialStatus) } diff --git a/internal/credential/vault/credential_test.go b/internal/credential/vault/credential_test.go index d7519b6390..280598dc52 100644 --- a/internal/credential/vault/credential_test.go +++ b/internal/credential/vault/credential_test.go @@ -8,6 +8,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/hashicorp/boundary/internal/credential/vault/store" "github.com/hashicorp/boundary/internal/db" + "github.com/hashicorp/boundary/internal/db/sentinel" "github.com/hashicorp/boundary/internal/db/timestamp" "github.com/hashicorp/boundary/internal/iam" temp "github.com/hashicorp/boundary/internal/session" @@ -110,7 +111,7 @@ func TestCredential_New(t *testing.T) { Credential: &store.Credential{ LibraryId: lib.GetPublicId(), SessionId: session.GetPublicId(), - ExternalId: "\ufffenone", + ExternalId: sentinel.ExternalIdNone, TokenHmac: token.GetTokenHmac(), Status: string(UnknownCredentialStatus), }, diff --git a/internal/db/domains_test.go b/internal/db/domains_test.go index 0673fa1dd8..be5b5b6a56 100644 --- a/internal/db/domains_test.go +++ b/internal/db/domains_test.go @@ -927,15 +927,21 @@ returning id; value string wantErr bool }{ - {"normal", "\ufffefoo", false}, + {"normal", "\ufffefoo\uffff", false}, {"normal non-sentinel", "foo", false}, + {"empty-with-sentinel", "\ufffe\uffff", false}, {"trailing sentinel", "\ufffefoo\ufffe", false}, - {"sentinel with space before word", "\ufffe foo", false}, - {"sentinel with empty string", "\ufffe ", true}, - {"multiple sentinels with empty string", "\ufffe\ufffe ", true}, - {"multiple sentinels", "\ufffe\ufffefoo", false}, + {"only start sentinel", "\ufffe", true}, + {"only end sentinel", "\uffff", true}, + {"sentinel with space before word", "\ufffe foo\uffff", false}, + {"sentinel with space after word", "\ufffefoo \uffff", false}, + {"start sentinel with empty string", "\ufffe ", true}, + {"multiple sentinels with empty string", "\ufffe\ufffe \uffff\uffff", false}, + {"multiple sentinels", "\ufffe\ufffefoo\uffff\uffff", false}, + {"multiple sentinels inverted", "\uffff\ufffffoo\ufffe\ufffe", false}, {"sentinel space sentinel space string", "\ufffe \ufffe foo ", false}, - {"empty string", " ", true}, + {"only spaces string", " ", true}, + {"empty string", "", true}, } for _, tt := range tests { tt := tt @@ -967,15 +973,18 @@ select wt_is_sentinel($1); value string want bool }{ - {"normal", "\ufffefoo", true}, + {"normal", "\ufffefoo\uffff", true}, {"non-sentinel", "foo", false}, - {"trailing sentinel", "\ufffefoo\ufffe", true}, - {"sentinel with space before word", "\ufffe foo", true}, - {"sentinel with empty string", "\ufffe ", false}, - {"multiple sentinels with empty string", "\ufffe\ufffe ", false}, - {"multiple sentinels", "\ufffe\ufffefoo", true}, - {"sentinel space sentinel space string", "\ufffe \ufffe foo ", true}, - {"empty string", " ", false}, + {"only start sentinel", "\ufffefoo", false}, + {"only end sentinel", "foo\uffff", false}, + {"trailing start sentinel", "\ufffefoo\ufffe", false}, + {"leading end sentinel", "\ufffffoo\uffff", false}, + {"sentinel with space before word", "\ufffe foo\uffff", true}, + {"sentinel with empty string", "\ufffe \uffff", true}, + {"multiple start sentinels with empty string", "\ufffe\ufffe \uffff", true}, + {"multiple start sentinels", "\ufffe\ufffefoo\uffff", true}, + {"only spaces", " ", false}, + {"empty string", "", false}, } for _, tt := range tests { tt := tt @@ -998,54 +1007,6 @@ select wt_is_sentinel($1); } } -func TestDomain_wt_to_sentinel(t *testing.T) { - const ( - query = ` -select wt_to_sentinel($1); -` - ) - - conn, _ := TestSetup(t, "postgres") - db := conn.DB() - - tests := []struct { - name string - value string - want string - }{ - {"V", "foo", "\ufffefoo"}, - {"space V", " foo", "\ufffefoo"}, - {"V space", "foo ", "\ufffefoo"}, - {"space space V", " foo ", "\ufffefoo"}, - {"sentinel V", "\ufffefoo", "\ufffefoo"}, - {"sentinel space V", "\ufffe foo", "\ufffefoo"}, - {"sentinel V space", "\ufffefoo ", "\ufffefoo"}, - {"sentinel sentinel V", "\ufffe\ufffefoo", "\ufffefoo"}, - {"sentinel sentinel space V", "\ufffe\ufffe foo", "\ufffefoo"}, - {"sentinel sentinel V space", "\ufffe\ufffefoo ", "\ufffefoo"}, - {"sentinel space sentinel space V space", "\ufffe \ufffe foo ", "\ufffefoo"}, - } - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - assert, require := assert.New(t), require.New(t) - t.Logf("query value: %q", tt.value) - - var got string - rows, err := db.Query(query, tt.value) - require.NoError(err) - defer rows.Close() - - require.True(rows.Next()) - require.NoError(rows.Scan(&got)) - assert.Equal(tt.want, got) - - require.False(rows.Next()) - require.NoError(rows.Err()) - }) - } -} - func TestDomain_not_null_columns_func(t *testing.T) { const ( createTable = ` diff --git a/internal/db/sanitize/doc.go b/internal/db/sanitize/doc.go new file mode 100644 index 0000000000..570aa40824 --- /dev/null +++ b/internal/db/sanitize/doc.go @@ -0,0 +1,3 @@ +// Package sanitize contains a set of functions that sanitizes input received from external +// systems before being persisted in the database. +package sanitize diff --git a/internal/db/sanitize/sanitize.go b/internal/db/sanitize/sanitize.go new file mode 100644 index 0000000000..6777cb0764 --- /dev/null +++ b/internal/db/sanitize/sanitize.go @@ -0,0 +1,35 @@ +package sanitize + +import ( + "unicode" + + "github.com/hashicorp/boundary/internal/db/sentinel" +) + +// String sanitizes s by replacing all invalid unicode characters as well as the sentinel +// start character U+FFFE and sentinel end character U+FFFF with the Unicode +// replacement character U+FFFD. +// +// According to the Unicode standard: "If a noncharacter is received in open interchange, +// an application is not required to interpret it in any way. It is good practice, however, +// to recognize it as a noncharacter and to take appropriate action, such as replacing it +// with U+FFFD replacement character." +// See https://www.unicode.org/versions/Unicode13.0.0/ch23.pdf#G12612. +func String(s string) string { + out := make([]rune, 0, len(s)) + + // For a string, the range clause will return the index and the rune at the index of + // the string. If the iteration encounters an invalid UTF-8 sequence, the rune value + // returned will be 0xFFFD, the Unicode replacement character. + // See https://golang.org/ref/spec#For_statements. + for _, r := range s { + switch r { + case sentinel.Start, sentinel.End: + // The range clause does not replace the sentinel start and end characters. + out = append(out, unicode.ReplacementChar) + default: + out = append(out, r) + } + } + return string(out) +} diff --git a/internal/db/sanitize/sanitize_test.go b/internal/db/sanitize/sanitize_test.go new file mode 100644 index 0000000000..50c969ef9c --- /dev/null +++ b/internal/db/sanitize/sanitize_test.go @@ -0,0 +1,28 @@ +package sanitize + +import "testing" + +func TestString(t *testing.T) { + tests := []struct { + name string + s string + want string + }{ + {"no-special", "string", "string"}, + {"spaces", "string string", "string string"}, + {"leading-sentinel-start", "\ufffestring", "\ufffdstring"}, + {"mixed", "\ufffe\uffffstring\ufffestring\uffff", "\ufffd\ufffdstring\ufffdstring\ufffd"}, + {"only-sentinels", "\ufffe\uffff\ufffe\uffff", "\ufffd\ufffd\ufffd\ufffd"}, + {"empty-string", "", ""}, + {"with-invalid-utf8", "\xff\xfe", "\ufffd\ufffd"}, + {"with-invalid-utf8-and-sentinels", "\xce\ufffe\ufffd\xcc", "\ufffd\ufffd\ufffd\ufffd"}, + {"with-invalid-utf8-mixed", "\xcefoo\xccbar\uffffzoo", "\ufffdfoo\ufffdbar\ufffdzoo"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := String(tt.s); got != tt.want { + t.Errorf("String() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/internal/db/schema/migrations/postgres/10/07_sentinels.up.sql b/internal/db/schema/migrations/postgres/10/07_sentinels.up.sql new file mode 100644 index 0000000000..10f82eccf2 --- /dev/null +++ b/internal/db/schema/migrations/postgres/10/07_sentinels.up.sql @@ -0,0 +1,36 @@ +begin; + +update credential_vault_credential + set external_id = concat(external_id, u&'\ffff') + where wt_is_sentinel(external_id) + and not starts_with(reverse(external_id), u&'\ffff'); + +alter domain wt_sentinel + drop constraint wt_sentinel_not_valid; + +drop function wt_is_sentinel; + +create function wt_is_sentinel(string text) + returns bool +as $$ +select starts_with(string, u&'\fffe') and starts_with(reverse(string), u&'\ffff'); +$$ language sql + immutable + returns null on null input; +comment on function wt_is_sentinel is + 'wt_is_sentinel returns true if string is a sentinel value'; + +alter domain wt_sentinel + add constraint wt_sentinel_not_valid + check( + wt_is_sentinel(value) + or + length(trim(trailing u&'\ffff' from trim(leading u&'\fffe ' from value))) > 0 + ); + +comment on domain wt_sentinel is + 'A non-empty string with a Unicode prefix of U+FFFE and suffix of U+FFFF to indicate it is a sentinel value'; + +drop function wt_to_sentinel; -- wt_to_sentinel is not needed, dropping and not re-creating + +commit; diff --git a/internal/db/schema/postgres_migration.gen.go b/internal/db/schema/postgres_migration.gen.go index b575cfa0e0..ac5b625b4f 100644 --- a/internal/db/schema/postgres_migration.gen.go +++ b/internal/db/schema/postgres_migration.gen.go @@ -4,7 +4,7 @@ package schema func init() { migrationStates["postgres"] = migrationState{ - binarySchemaVersion: 10006, + binarySchemaVersion: 10007, upMigrations: map[int][]byte{ 1: []byte(` create domain wt_public_id as text @@ -6041,6 +6041,40 @@ create table session_credential_dynamic ( $$ language plpgsql; create trigger revoke_credentials after insert on session_state for each row execute procedure revoke_credentials(); +`), + 10007: []byte(` +update credential_vault_credential + set external_id = concat(external_id, u&'\ffff') + where wt_is_sentinel(external_id) + and not starts_with(reverse(external_id), u&'\ffff'); + +alter domain wt_sentinel + drop constraint wt_sentinel_not_valid; + +drop function wt_is_sentinel; + +create function wt_is_sentinel(string text) + returns bool +as $$ +select starts_with(string, u&'\fffe') and starts_with(reverse(string), u&'\ffff'); +$$ language sql + immutable + returns null on null input; +comment on function wt_is_sentinel is + 'wt_is_sentinel returns true if string is a sentinel value'; + +alter domain wt_sentinel + add constraint wt_sentinel_not_valid + check( + wt_is_sentinel(value) + or + length(trim(trailing u&'\ffff' from trim(leading u&'\fffe ' from value))) > 0 + ); + +comment on domain wt_sentinel is + 'A non-empty string with a Unicode prefix of U+FFFE and suffix of U+FFFF to indicate it is a sentinel value'; + +drop function wt_to_sentinel; -- wt_to_sentinel is not needed, dropping and not re-creating `), 2001: []byte(` -- log_migration entries represent logs generated during migrations diff --git a/internal/db/sentinel/doc.go b/internal/db/sentinel/doc.go new file mode 100644 index 0000000000..83d7c5db05 --- /dev/null +++ b/internal/db/sentinel/doc.go @@ -0,0 +1,11 @@ +// Package sentinel allows for the use of Unicode non-characters to distinguish between +// Boundary defined sentinels and values provided by external systems. +// +// All sentinel values are prefixed with the sentinel start character U+FFFE and suffixed +// with the sentinel end character U+FFFF. Any string that starts with U+FFFE and ends with +// U+FFFF is a valid sentinel and reserved for use within Boundary. +// +// U+FFFE and U+FFFF are special non-characters reserved for internal use in the Unicode +// standard. +// See https://www.unicode.org/versions/Unicode13.0.0/ch23.pdf#G12612. +package sentinel diff --git a/internal/db/sentinel/sentinel.go b/internal/db/sentinel/sentinel.go new file mode 100644 index 0000000000..93ba2a7a75 --- /dev/null +++ b/internal/db/sentinel/sentinel.go @@ -0,0 +1,31 @@ +package sentinel + +const ( + // Start is the Unicode special non-character U+FFFE, and the prefix included in all + // Boundary defined sentinel values. + Start = '\ufffe' + + // End is the Unicode special non-character U+FFFF, and the suffix included in all + // Boundary defined sentinel values. + End = '\uffff' +) + +const ( + // ExternalIdNone is a Boundary sentinel indicating that no id was provided by an + // external system. + ExternalIdNone = "\ufffenone\uffff" +) + +// Is returns true if s is a valid sentinel. +func Is(s string) bool { + // A valid sentinel must be at least 6 bytes in length, 3 bytes for '\ufffe' and 3 + // bytes for '\uffff'. + if len(s) < 6 { + return false + } + sr := []rune(s) + if sr[0] == Start && sr[len(sr)-1] == End { + return true + } + return false +} diff --git a/internal/db/sentinel/sentinel_test.go b/internal/db/sentinel/sentinel_test.go new file mode 100644 index 0000000000..1eeab66eaf --- /dev/null +++ b/internal/db/sentinel/sentinel_test.go @@ -0,0 +1,41 @@ +package sentinel + +import ( + "testing" +) + +func TestIs(t *testing.T) { + tests := []struct { + name string + s string + want bool + }{ + {"normal", "\ufffefoo\uffff", true}, + {"non-sentinel", "foo", false}, + {"trailing and leading start sentinel", "\ufffefoo\ufffe", false}, + {"trailing and leading end sentinel", "\uffffoo\uffff", false}, + {"only start sentinel with string", "\ufffefoo", false}, + {"only end sentinel with string", "foo\uffff", false}, + {"only end sentinel", "\uffff", false}, + {"only start sentinel", "\ufffe", false}, + {"sentinel with space before word", "\ufffe foo\uffff", true}, + {"sentinel with only spaces", "\ufffe \uffff", true}, + {"sentinel with empty string", "\ufffe\uffff", true}, + {"multiple start sentinels with empty string", "\ufffe\ufffe \uffff", true}, + {"multiple start sentinels", "\ufffe\ufffefoo\uffff", true}, + {"start sentinel space start sentinel space string", "\ufffe \ufffe foo \uffff", true}, + {"sentinel with space after word", "\ufffefoo \uffff", true}, + {"multiple end sentinels with empty string", "\ufffe \uffff\uffff\uffff", true}, + {"multiple end sentinels", "\ufffefoo\uffff\uffff\uffff", true}, + {"string space end sentinel space end sentinel", "\ufffefoo \uffff \uffff", true}, + {"only spaces", " ", false}, + {"empty string", "", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := Is(tt.s); got != tt.want { + t.Errorf("Is() = %v, want %v", got, tt.want) + } + }) + } +}