mirror of https://github.com/hashicorp/boundary
Add sentinel and sanitize package (#1353)
* Add sentinel migrations * Add sentinel and sanitize packagepull/1359/head
parent
28f48df026
commit
26a0303b38
@ -0,0 +1,3 @@
|
||||
// Package sanitize contains a set of functions that sanitizes input received from external
|
||||
// systems before being persisted in the database.
|
||||
package sanitize
|
||||
@ -0,0 +1,35 @@
|
||||
package sanitize
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"github.com/hashicorp/boundary/internal/db/sentinel"
|
||||
)
|
||||
|
||||
// String sanitizes s by replacing all invalid unicode characters as well as the sentinel
|
||||
// start character U+FFFE and sentinel end character U+FFFF with the Unicode
|
||||
// replacement character U+FFFD.
|
||||
//
|
||||
// According to the Unicode standard: "If a noncharacter is received in open interchange,
|
||||
// an application is not required to interpret it in any way. It is good practice, however,
|
||||
// to recognize it as a noncharacter and to take appropriate action, such as replacing it
|
||||
// with U+FFFD replacement character."
|
||||
// See https://www.unicode.org/versions/Unicode13.0.0/ch23.pdf#G12612.
|
||||
func String(s string) string {
|
||||
out := make([]rune, 0, len(s))
|
||||
|
||||
// For a string, the range clause will return the index and the rune at the index of
|
||||
// the string. If the iteration encounters an invalid UTF-8 sequence, the rune value
|
||||
// returned will be 0xFFFD, the Unicode replacement character.
|
||||
// See https://golang.org/ref/spec#For_statements.
|
||||
for _, r := range s {
|
||||
switch r {
|
||||
case sentinel.Start, sentinel.End:
|
||||
// The range clause does not replace the sentinel start and end characters.
|
||||
out = append(out, unicode.ReplacementChar)
|
||||
default:
|
||||
out = append(out, r)
|
||||
}
|
||||
}
|
||||
return string(out)
|
||||
}
|
||||
@ -0,0 +1,28 @@
|
||||
package sanitize
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestString(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
s string
|
||||
want string
|
||||
}{
|
||||
{"no-special", "string", "string"},
|
||||
{"spaces", "string string", "string string"},
|
||||
{"leading-sentinel-start", "\ufffestring", "\ufffdstring"},
|
||||
{"mixed", "\ufffe\uffffstring\ufffestring\uffff", "\ufffd\ufffdstring\ufffdstring\ufffd"},
|
||||
{"only-sentinels", "\ufffe\uffff\ufffe\uffff", "\ufffd\ufffd\ufffd\ufffd"},
|
||||
{"empty-string", "", ""},
|
||||
{"with-invalid-utf8", "\xff\xfe", "\ufffd\ufffd"},
|
||||
{"with-invalid-utf8-and-sentinels", "\xce\ufffe\ufffd\xcc", "\ufffd\ufffd\ufffd\ufffd"},
|
||||
{"with-invalid-utf8-mixed", "\xcefoo\xccbar\uffffzoo", "\ufffdfoo\ufffdbar\ufffdzoo"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := String(tt.s); got != tt.want {
|
||||
t.Errorf("String() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,36 @@
|
||||
begin;
|
||||
|
||||
update credential_vault_credential
|
||||
set external_id = concat(external_id, u&'\ffff')
|
||||
where wt_is_sentinel(external_id)
|
||||
and not starts_with(reverse(external_id), u&'\ffff');
|
||||
|
||||
alter domain wt_sentinel
|
||||
drop constraint wt_sentinel_not_valid;
|
||||
|
||||
drop function wt_is_sentinel;
|
||||
|
||||
create function wt_is_sentinel(string text)
|
||||
returns bool
|
||||
as $$
|
||||
select starts_with(string, u&'\fffe') and starts_with(reverse(string), u&'\ffff');
|
||||
$$ language sql
|
||||
immutable
|
||||
returns null on null input;
|
||||
comment on function wt_is_sentinel is
|
||||
'wt_is_sentinel returns true if string is a sentinel value';
|
||||
|
||||
alter domain wt_sentinel
|
||||
add constraint wt_sentinel_not_valid
|
||||
check(
|
||||
wt_is_sentinel(value)
|
||||
or
|
||||
length(trim(trailing u&'\ffff' from trim(leading u&'\fffe ' from value))) > 0
|
||||
);
|
||||
|
||||
comment on domain wt_sentinel is
|
||||
'A non-empty string with a Unicode prefix of U+FFFE and suffix of U+FFFF to indicate it is a sentinel value';
|
||||
|
||||
drop function wt_to_sentinel; -- wt_to_sentinel is not needed, dropping and not re-creating
|
||||
|
||||
commit;
|
||||
@ -0,0 +1,11 @@
|
||||
// Package sentinel allows for the use of Unicode non-characters to distinguish between
|
||||
// Boundary defined sentinels and values provided by external systems.
|
||||
//
|
||||
// All sentinel values are prefixed with the sentinel start character U+FFFE and suffixed
|
||||
// with the sentinel end character U+FFFF. Any string that starts with U+FFFE and ends with
|
||||
// U+FFFF is a valid sentinel and reserved for use within Boundary.
|
||||
//
|
||||
// U+FFFE and U+FFFF are special non-characters reserved for internal use in the Unicode
|
||||
// standard.
|
||||
// See https://www.unicode.org/versions/Unicode13.0.0/ch23.pdf#G12612.
|
||||
package sentinel
|
||||
@ -0,0 +1,31 @@
|
||||
package sentinel
|
||||
|
||||
const (
|
||||
// Start is the Unicode special non-character U+FFFE, and the prefix included in all
|
||||
// Boundary defined sentinel values.
|
||||
Start = '\ufffe'
|
||||
|
||||
// End is the Unicode special non-character U+FFFF, and the suffix included in all
|
||||
// Boundary defined sentinel values.
|
||||
End = '\uffff'
|
||||
)
|
||||
|
||||
const (
|
||||
// ExternalIdNone is a Boundary sentinel indicating that no id was provided by an
|
||||
// external system.
|
||||
ExternalIdNone = "\ufffenone\uffff"
|
||||
)
|
||||
|
||||
// Is returns true if s is a valid sentinel.
|
||||
func Is(s string) bool {
|
||||
// A valid sentinel must be at least 6 bytes in length, 3 bytes for '\ufffe' and 3
|
||||
// bytes for '\uffff'.
|
||||
if len(s) < 6 {
|
||||
return false
|
||||
}
|
||||
sr := []rune(s)
|
||||
if sr[0] == Start && sr[len(sr)-1] == End {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
@ -0,0 +1,41 @@
|
||||
package sentinel
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
s string
|
||||
want bool
|
||||
}{
|
||||
{"normal", "\ufffefoo\uffff", true},
|
||||
{"non-sentinel", "foo", false},
|
||||
{"trailing and leading start sentinel", "\ufffefoo\ufffe", false},
|
||||
{"trailing and leading end sentinel", "\uffffoo\uffff", false},
|
||||
{"only start sentinel with string", "\ufffefoo", false},
|
||||
{"only end sentinel with string", "foo\uffff", false},
|
||||
{"only end sentinel", "\uffff", false},
|
||||
{"only start sentinel", "\ufffe", false},
|
||||
{"sentinel with space before word", "\ufffe foo\uffff", true},
|
||||
{"sentinel with only spaces", "\ufffe \uffff", true},
|
||||
{"sentinel with empty string", "\ufffe\uffff", true},
|
||||
{"multiple start sentinels with empty string", "\ufffe\ufffe \uffff", true},
|
||||
{"multiple start sentinels", "\ufffe\ufffefoo\uffff", true},
|
||||
{"start sentinel space start sentinel space string", "\ufffe \ufffe foo \uffff", true},
|
||||
{"sentinel with space after word", "\ufffefoo \uffff", true},
|
||||
{"multiple end sentinels with empty string", "\ufffe \uffff\uffff\uffff", true},
|
||||
{"multiple end sentinels", "\ufffefoo\uffff\uffff\uffff", true},
|
||||
{"string space end sentinel space end sentinel", "\ufffefoo \uffff \uffff", true},
|
||||
{"only spaces", " ", false},
|
||||
{"empty string", "", false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := Is(tt.s); got != tt.want {
|
||||
t.Errorf("Is() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
Reference in new issue