From fcdcf117f0edb077d1b1ee4f9a267396f2f7774a Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sun, 11 Jan 2015 12:38:45 -0800 Subject: [PATCH] config/lang: initial work --- config/lang/ast.go | 1 + config/lang/ast/ast.go | 12 + config/lang/ast/concat.go | 7 + config/lang/ast/literal.go | 8 + config/lang/ast/variable_access.go | 6 + config/lang/lang.y | 59 +++++ config/lang/lex.go | 176 +++++++++++++++ config/lang/lex_test.go | 65 ++++++ config/lang/parse.go | 32 +++ config/lang/parse_test.go | 68 ++++++ config/lang/token.go | 1 + config/lang/y.go | 351 +++++++++++++++++++++++++++++ config/lang/y.output | 85 +++++++ 13 files changed, 871 insertions(+) create mode 100644 config/lang/ast.go create mode 100644 config/lang/ast/ast.go create mode 100644 config/lang/ast/concat.go create mode 100644 config/lang/ast/literal.go create mode 100644 config/lang/ast/variable_access.go create mode 100644 config/lang/lang.y create mode 100644 config/lang/lex.go create mode 100644 config/lang/lex_test.go create mode 100644 config/lang/parse.go create mode 100644 config/lang/parse_test.go create mode 100644 config/lang/token.go create mode 100644 config/lang/y.go create mode 100644 config/lang/y.output diff --git a/config/lang/ast.go b/config/lang/ast.go new file mode 100644 index 0000000000..48e87f869d --- /dev/null +++ b/config/lang/ast.go @@ -0,0 +1 @@ +package lang diff --git a/config/lang/ast/ast.go b/config/lang/ast/ast.go new file mode 100644 index 0000000000..3bb748ccc1 --- /dev/null +++ b/config/lang/ast/ast.go @@ -0,0 +1,12 @@ +package ast + +// Node is the interface that all AST nodes must implement. +type Node interface{} + +// Type is the type of a literal. +type Type uint + +const ( + TypeInvalid Type = 1 << iota + TypeString +) diff --git a/config/lang/ast/concat.go b/config/lang/ast/concat.go new file mode 100644 index 0000000000..3a3374ccc5 --- /dev/null +++ b/config/lang/ast/concat.go @@ -0,0 +1,7 @@ +package ast + +// Concat represents a node where the result of two or more expressions are +// concatenated. The result of all expressions must be a string. +type Concat struct { + Exprs []Node +} diff --git a/config/lang/ast/literal.go b/config/lang/ast/literal.go new file mode 100644 index 0000000000..f82ad4f9cf --- /dev/null +++ b/config/lang/ast/literal.go @@ -0,0 +1,8 @@ +package ast + +// LiteralNode represents a single literal value, such as "foo" or +// 42 or 3.14159. Based on the Type, the Value can be safely cast. +type LiteralNode struct { + Value interface{} + Type Type +} diff --git a/config/lang/ast/variable_access.go b/config/lang/ast/variable_access.go new file mode 100644 index 0000000000..bf22ce356f --- /dev/null +++ b/config/lang/ast/variable_access.go @@ -0,0 +1,6 @@ +package ast + +// VariableAccess represents a variable access. +type VariableAccess struct { + Name string +} diff --git a/config/lang/lang.y b/config/lang/lang.y new file mode 100644 index 0000000000..52db6ee343 --- /dev/null +++ b/config/lang/lang.y @@ -0,0 +1,59 @@ +// This is the yacc input for creating the parser for interpolation +// expressions in Go. To build it, just run `go generate` on this +// package, as the lexer has the go generate pragma within it. + +%{ +package lang + +import ( + "github.com/hashicorp/terraform/config/lang/ast" +) + +%} + +%union { + node ast.Node + str string +} + +%token STRING IDENTIFIER PROGRAM_BRACKET_LEFT PROGRAM_BRACKET_RIGHT + +%type expr interpolation literal + +%% + +top: + literal + { + parserResult = $1 + } +| literal interpolation + { + parserResult = &ast.Concat{ + Exprs: []ast.Node{$1, $2}, + } + } + +interpolation: + PROGRAM_BRACKET_LEFT expr PROGRAM_BRACKET_RIGHT + { + $$ = $2 + } + +expr: + IDENTIFIER + { + $$ = &ast.VariableAccess{Name: $1} + } +| literal + { + $$ = $1 + } + +literal: + STRING + { + $$ = &ast.LiteralNode{Value: $1, Type: ast.TypeString} + } + +%% diff --git a/config/lang/lex.go b/config/lang/lex.go new file mode 100644 index 0000000000..6f0705c84d --- /dev/null +++ b/config/lang/lex.go @@ -0,0 +1,176 @@ +package lang + +import ( + "bytes" + "fmt" + "unicode" + "unicode/utf8" +) + +//go:generate go tool yacc -p parser lang.y + +// The parser expects the lexer to return 0 on EOF. +const lexEOF = 0 + +// The parser uses the type Lex as a lexer. It must provide +// the methods Lex(*SymType) int and Error(string). +type parserLex struct { + Err error + Input string + + interpolationDepth int + pos int + width int +} + +// The parser calls this method to get each new token. +func (x *parserLex) Lex(yylval *parserSymType) int { + for { + c := x.next() + if c == lexEOF { + return lexEOF + } + + // Are we starting an interpolation? + if c == '$' && x.peek() == '{' { + x.next() + x.interpolationDepth++ + return PROGRAM_BRACKET_LEFT + } + + // If we see a double quote and we're in an interpolation, then + // we are lexing a string. + if c == '"' && x.interpolationDepth > 0 { + return x.lexString(yylval, true) + } + + switch c { + case '}': + x.interpolationDepth-- + return PROGRAM_BRACKET_RIGHT + default: + x.backup() + if x.interpolationDepth > 0 { + // We're within an interpolation. + return x.lexId(yylval) + } else { + // We're just a normal string that isn't part of any + // interpolation yet. + return x.lexString(yylval, false) + } + } + } +} + +func (x *parserLex) lexId(yylval *parserSymType) int { + var b bytes.Buffer + for { + c := x.next() + if c == lexEOF { + break + } + + // If this isn't a character we want in an ID, return out. + // One day we should make this a regexp. + if c != '_' && + c != '-' && + c != '.' && + c != '*' && + !unicode.IsLetter(c) && + !unicode.IsNumber(c) { + x.backup() + break + } + + if _, err := b.WriteRune(c); err != nil { + x.Error(err.Error()) + return lexEOF + } + } + + yylval.str = b.String() + return IDENTIFIER +} + +func (x *parserLex) lexString(yylval *parserSymType, quoted bool) int { + var b bytes.Buffer + for { + c := x.next() + if c == lexEOF { + break + } + + // Behavior is a bit different if we're lexing within a quoted string. + if quoted { + // If its a double quote, we've reached the end of the string + if c == '"' { + break + } + + // Let's check to see if we're escaping anything. + if c == '\\' { + switch n := x.next(); n { + case '\\': + fallthrough + case '"': + c = n + case 'n': + c = '\n' + default: + x.backup() + } + } + } + + // If we hit a '}' and we're in a program, then end it. + if c == '}' && x.interpolationDepth > 0 { + x.backup() + break + } + + // If we hit a dollar sign, then check if we're starting + // another interpolation. If so, then we're done. + if c == '$' && x.peek() == '{' { + x.backup() + break + } + + if _, err := b.WriteRune(c); err != nil { + x.Error(err.Error()) + return lexEOF + } + } + + yylval.str = b.String() + return STRING +} + +// Return the next rune for the lexer. +func (x *parserLex) next() rune { + if int(x.pos) >= len(x.Input) { + x.width = 0 + return lexEOF + } + + r, w := utf8.DecodeRuneInString(x.Input[x.pos:]) + x.width = w + x.pos += x.width + return r +} + +// peek returns but does not consume the next rune in the input +func (x *parserLex) peek() rune { + r := x.next() + x.backup() + return r +} + +// backup steps back one rune. Can only be called once per next. +func (x *parserLex) backup() { + x.pos -= x.width +} + +// The parser calls this method on a parse error. +func (x *parserLex) Error(s string) { + x.Err = fmt.Errorf("parse error: %s", s) +} diff --git a/config/lang/lex_test.go b/config/lang/lex_test.go new file mode 100644 index 0000000000..ab5a18b9b9 --- /dev/null +++ b/config/lang/lex_test.go @@ -0,0 +1,65 @@ +package lang + +import ( + "reflect" + "testing" +) + +func TestLex(t *testing.T) { + cases := []struct { + Input string + Output []int + }{ + { + "foo", + []int{STRING, lexEOF}, + }, + + { + "foo$bar", + []int{STRING, lexEOF}, + }, + + { + "foo ${bar}", + []int{STRING, PROGRAM_BRACKET_LEFT, IDENTIFIER, PROGRAM_BRACKET_RIGHT, lexEOF}, + }, + + { + "foo ${\"bar\"}", + []int{STRING, PROGRAM_BRACKET_LEFT, STRING, PROGRAM_BRACKET_RIGHT, lexEOF}, + }, + } + + for _, tc := range cases { + l := &parserLex{Input: tc.Input} + var actual []int + for { + token := l.Lex(new(parserSymType)) + actual = append(actual, token) + + if token == lexEOF { + break + } + + // Be careful against what are probably infinite loops + if len(actual) > 100 { + t.Fatalf("Input:%s\n\nExausted.", tc.Input) + } + } + + if !reflect.DeepEqual(actual, tc.Output) { + t.Fatalf( + "Input: %s\n\nBad: %#v\n\nExpected: %#v", + tc.Input, actual, tc.Output) + } + } +} + +/* OTHERS: + +foo ${var.foo} +bar ${"hello"} +foo ${concat("foo ${var.bar}", var.baz)} + +*/ diff --git a/config/lang/parse.go b/config/lang/parse.go new file mode 100644 index 0000000000..8ece590afd --- /dev/null +++ b/config/lang/parse.go @@ -0,0 +1,32 @@ +package lang + +import ( + "sync" + + "github.com/hashicorp/terraform/config/lang/ast" +) + +var parserErrors []error +var parserLock sync.Mutex +var parserResult ast.Node + +// Parse parses the given program and returns an executable AST tree. +func Parse(v string) (ast.Node, error) { + // Unfortunately due to the way that goyacc generated parsers are + // formatted, we can only do a single parse at a time without a lot + // of extra work. In the future we can remove this limitation. + parserLock.Lock() + defer parserLock.Unlock() + + // Reset our globals + parserErrors = nil + parserResult = nil + + // Create the lexer + lex := &parserLex{Input: v} + + // Parse! + parserParse(lex) + + return parserResult, lex.Err +} diff --git a/config/lang/parse_test.go b/config/lang/parse_test.go new file mode 100644 index 0000000000..38507582a8 --- /dev/null +++ b/config/lang/parse_test.go @@ -0,0 +1,68 @@ +package lang + +import ( + "reflect" + "testing" + + "github.com/hashicorp/terraform/config/lang/ast" +) + +func TestParse(t *testing.T) { + cases := []struct { + Input string + Error bool + Result ast.Node + }{ + { + "foo", + false, + &ast.LiteralNode{ + Value: "foo", + Type: ast.TypeString, + }, + }, + + { + "foo ${var.bar}", + false, + &ast.Concat{ + Exprs: []ast.Node{ + &ast.LiteralNode{ + Value: "foo ", + Type: ast.TypeString, + }, + &ast.VariableAccess{ + Name: "var.bar", + }, + }, + }, + }, + + { + "foo ${\"bar\"}", + false, + &ast.Concat{ + Exprs: []ast.Node{ + &ast.LiteralNode{ + Value: "foo ", + Type: ast.TypeString, + }, + &ast.LiteralNode{ + Value: "bar", + Type: ast.TypeString, + }, + }, + }, + }, + } + + for _, tc := range cases { + actual, err := Parse(tc.Input) + if (err != nil) != tc.Error { + t.Fatalf("Error: %s\n\nInput: %s", err, tc.Input) + } + if !reflect.DeepEqual(actual, tc.Result) { + t.Fatalf("Bad: %#v\n\nInput: %s", actual, tc.Input) + } + } +} diff --git a/config/lang/token.go b/config/lang/token.go new file mode 100644 index 0000000000..48e87f869d --- /dev/null +++ b/config/lang/token.go @@ -0,0 +1 @@ +package lang diff --git a/config/lang/y.go b/config/lang/y.go new file mode 100644 index 0000000000..a2b2e81fdc --- /dev/null +++ b/config/lang/y.go @@ -0,0 +1,351 @@ +//line lang.y:6 +package lang + +import __yyfmt__ "fmt" + +//line lang.y:6 +import ( + "github.com/hashicorp/terraform/config/lang/ast" +) + +//line lang.y:14 +type parserSymType struct { + yys int + node ast.Node + str string +} + +const STRING = 57346 +const IDENTIFIER = 57347 +const PROGRAM_BRACKET_LEFT = 57348 +const PROGRAM_BRACKET_RIGHT = 57349 + +var parserToknames = []string{ + "STRING", + "IDENTIFIER", + "PROGRAM_BRACKET_LEFT", + "PROGRAM_BRACKET_RIGHT", +} +var parserStatenames = []string{} + +const parserEofCode = 1 +const parserErrCode = 2 +const parserMaxDepth = 200 + +//line lang.y:59 + +//line yacctab:1 +var parserExca = []int{ + -1, 1, + 1, -1, + -2, 0, +} + +const parserNprod = 7 +const parserPrivate = 57344 + +var parserTokenNames []string +var parserStates []string + +const parserLast = 10 + +var parserAct = []int{ + + 9, 3, 7, 2, 5, 3, 1, 4, 6, 8, +} +var parserPact = []int{ + + 1, -1000, -2, -1000, -1000, -3, -7, -1000, -1000, -1000, +} +var parserPgo = []int{ + + 0, 8, 7, 3, 6, +} +var parserR1 = []int{ + + 0, 4, 4, 2, 1, 1, 3, +} +var parserR2 = []int{ + + 0, 1, 2, 3, 1, 1, 1, +} +var parserChk = []int{ + + -1000, -4, -3, 4, -2, 6, -1, 5, -3, 7, +} +var parserDef = []int{ + + 0, -2, 1, 6, 2, 0, 0, 4, 5, 3, +} +var parserTok1 = []int{ + + 1, +} +var parserTok2 = []int{ + + 2, 3, 4, 5, 6, 7, +} +var parserTok3 = []int{ + 0, +} + +//line yaccpar:1 + +/* parser for yacc output */ + +var parserDebug = 0 + +type parserLexer interface { + Lex(lval *parserSymType) int + Error(s string) +} + +const parserFlag = -1000 + +func parserTokname(c int) string { + // 4 is TOKSTART above + if c >= 4 && c-4 < len(parserToknames) { + if parserToknames[c-4] != "" { + return parserToknames[c-4] + } + } + return __yyfmt__.Sprintf("tok-%v", c) +} + +func parserStatname(s int) string { + if s >= 0 && s < len(parserStatenames) { + if parserStatenames[s] != "" { + return parserStatenames[s] + } + } + return __yyfmt__.Sprintf("state-%v", s) +} + +func parserlex1(lex parserLexer, lval *parserSymType) int { + c := 0 + char := lex.Lex(lval) + if char <= 0 { + c = parserTok1[0] + goto out + } + if char < len(parserTok1) { + c = parserTok1[char] + goto out + } + if char >= parserPrivate { + if char < parserPrivate+len(parserTok2) { + c = parserTok2[char-parserPrivate] + goto out + } + } + for i := 0; i < len(parserTok3); i += 2 { + c = parserTok3[i+0] + if c == char { + c = parserTok3[i+1] + goto out + } + } + +out: + if c == 0 { + c = parserTok2[1] /* unknown char */ + } + if parserDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", parserTokname(c), uint(char)) + } + return c +} + +func parserParse(parserlex parserLexer) int { + var parsern int + var parserlval parserSymType + var parserVAL parserSymType + parserS := make([]parserSymType, parserMaxDepth) + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + parserstate := 0 + parserchar := -1 + parserp := -1 + goto parserstack + +ret0: + return 0 + +ret1: + return 1 + +parserstack: + /* put a state and value onto the stack */ + if parserDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", parserTokname(parserchar), parserStatname(parserstate)) + } + + parserp++ + if parserp >= len(parserS) { + nyys := make([]parserSymType, len(parserS)*2) + copy(nyys, parserS) + parserS = nyys + } + parserS[parserp] = parserVAL + parserS[parserp].yys = parserstate + +parsernewstate: + parsern = parserPact[parserstate] + if parsern <= parserFlag { + goto parserdefault /* simple state */ + } + if parserchar < 0 { + parserchar = parserlex1(parserlex, &parserlval) + } + parsern += parserchar + if parsern < 0 || parsern >= parserLast { + goto parserdefault + } + parsern = parserAct[parsern] + if parserChk[parsern] == parserchar { /* valid shift */ + parserchar = -1 + parserVAL = parserlval + parserstate = parsern + if Errflag > 0 { + Errflag-- + } + goto parserstack + } + +parserdefault: + /* default state action */ + parsern = parserDef[parserstate] + if parsern == -2 { + if parserchar < 0 { + parserchar = parserlex1(parserlex, &parserlval) + } + + /* look through exception table */ + xi := 0 + for { + if parserExca[xi+0] == -1 && parserExca[xi+1] == parserstate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + parsern = parserExca[xi+0] + if parsern < 0 || parsern == parserchar { + break + } + } + parsern = parserExca[xi+1] + if parsern < 0 { + goto ret0 + } + } + if parsern == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + parserlex.Error("syntax error") + Nerrs++ + if parserDebug >= 1 { + __yyfmt__.Printf("%s", parserStatname(parserstate)) + __yyfmt__.Printf(" saw %s\n", parserTokname(parserchar)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for parserp >= 0 { + parsern = parserPact[parserS[parserp].yys] + parserErrCode + if parsern >= 0 && parsern < parserLast { + parserstate = parserAct[parsern] /* simulate a shift of "error" */ + if parserChk[parserstate] == parserErrCode { + goto parserstack + } + } + + /* the current p has no shift on "error", pop stack */ + if parserDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", parserS[parserp].yys) + } + parserp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 + + case 3: /* no shift yet; clobber input char */ + if parserDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", parserTokname(parserchar)) + } + if parserchar == parserEofCode { + goto ret1 + } + parserchar = -1 + goto parsernewstate /* try again in the same state */ + } + } + + /* reduction by production parsern */ + if parserDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", parsern, parserStatname(parserstate)) + } + + parsernt := parsern + parserpt := parserp + _ = parserpt // guard against "declared and not used" + + parserp -= parserR2[parsern] + parserVAL = parserS[parserp+1] + + /* consult goto table to find next state */ + parsern = parserR1[parsern] + parserg := parserPgo[parsern] + parserj := parserg + parserS[parserp].yys + 1 + + if parserj >= parserLast { + parserstate = parserAct[parserg] + } else { + parserstate = parserAct[parserj] + if parserChk[parserstate] != -parsern { + parserstate = parserAct[parserg] + } + } + // dummy call; replaced with literal code + switch parsernt { + + case 1: + //line lang.y:27 + { + parserResult = parserS[parserpt-0].node + } + case 2: + //line lang.y:31 + { + parserResult = &ast.Concat{ + Exprs: []ast.Node{parserS[parserpt-1].node, parserS[parserpt-0].node}, + } + } + case 3: + //line lang.y:39 + { + parserVAL.node = parserS[parserpt-1].node + } + case 4: + //line lang.y:45 + { + parserVAL.node = &ast.VariableAccess{Name: parserS[parserpt-0].str} + } + case 5: + //line lang.y:49 + { + parserVAL.node = parserS[parserpt-0].node + } + case 6: + //line lang.y:55 + { + parserVAL.node = &ast.LiteralNode{Value: parserS[parserpt-0].str, Type: ast.TypeString} + } + } + goto parserstack /* stack new state and value */ +} diff --git a/config/lang/y.output b/config/lang/y.output new file mode 100644 index 0000000000..1fe8babd45 --- /dev/null +++ b/config/lang/y.output @@ -0,0 +1,85 @@ + +state 0 + $accept: .top $end + + STRING shift 3 + . error + + literal goto 2 + top goto 1 + +state 1 + $accept: top.$end + + $end accept + . error + + +state 2 + top: literal. (1) + top: literal.interpolation + + PROGRAM_BRACKET_LEFT shift 5 + . reduce 1 (src line 25) + + interpolation goto 4 + +state 3 + literal: STRING. (6) + + . reduce 6 (src line 53) + + +state 4 + top: literal interpolation. (2) + + . reduce 2 (src line 30) + + +state 5 + interpolation: PROGRAM_BRACKET_LEFT.expr PROGRAM_BRACKET_RIGHT + + STRING shift 3 + IDENTIFIER shift 7 + . error + + expr goto 6 + literal goto 8 + +state 6 + interpolation: PROGRAM_BRACKET_LEFT expr.PROGRAM_BRACKET_RIGHT + + PROGRAM_BRACKET_RIGHT shift 9 + . error + + +state 7 + expr: IDENTIFIER. (4) + + . reduce 4 (src line 43) + + +state 8 + expr: literal. (5) + + . reduce 5 (src line 48) + + +state 9 + interpolation: PROGRAM_BRACKET_LEFT expr PROGRAM_BRACKET_RIGHT. (3) + + . reduce 3 (src line 37) + + +7 terminals, 5 nonterminals +7 grammar rules, 10/2000 states +0 shift/reduce, 0 reduce/reduce conflicts reported +54 working sets used +memory: parser 5/30000 +1 extra closures +5 shift entries, 1 exceptions +5 goto entries +0 entries saved by goto default +Optimizer space used: output 10/30000 +10 table entries, 0 zero +maximum spread: 7, maximum offset: 7