From 1ff5a838a48cf9e012b59e40e463752f0ebeddec Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sun, 11 Jan 2015 13:59:24 -0800 Subject: [PATCH] config/lang: can parse nested interpolations --- config/lang/ast/concat.go | 8 ++ config/lang/ast/literal.go | 8 ++ config/lang/lang.y | 36 ++++++-- config/lang/lex.go | 90 ++++++++++++++++++-- config/lang/lex_test.go | 34 ++++++++ config/lang/parse_test.go | 74 ++++++++++++++++ config/lang/y.go | 103 ++++++++++++++--------- config/lang/y.output | 167 ++++++++++++++++++++++--------------- 8 files changed, 397 insertions(+), 123 deletions(-) diff --git a/config/lang/ast/concat.go b/config/lang/ast/concat.go index 3a3374ccc5..c88e3de5a1 100644 --- a/config/lang/ast/concat.go +++ b/config/lang/ast/concat.go @@ -1,7 +1,15 @@ package ast +import ( + "fmt" +) + // Concat represents a node where the result of two or more expressions are // concatenated. The result of all expressions must be a string. type Concat struct { Exprs []Node } + +func (n *Concat) GoString() string { + return fmt.Sprintf("*%#v", *n) +} diff --git a/config/lang/ast/literal.go b/config/lang/ast/literal.go index f82ad4f9cf..3477a5dbde 100644 --- a/config/lang/ast/literal.go +++ b/config/lang/ast/literal.go @@ -1,8 +1,16 @@ package ast +import ( + "fmt" +) + // LiteralNode represents a single literal value, such as "foo" or // 42 or 3.14159. Based on the Type, the Value can be safely cast. type LiteralNode struct { Value interface{} Type Type } + +func (n *LiteralNode) GoString() string { + return fmt.Sprintf("*%#v", *n) +} diff --git a/config/lang/lang.y b/config/lang/lang.y index f346799fd5..4ea84f456c 100644 --- a/config/lang/lang.y +++ b/config/lang/lang.y @@ -18,29 +18,49 @@ import ( } %token STRING IDENTIFIER PROGRAM_BRACKET_LEFT PROGRAM_BRACKET_RIGHT +%token PROGRAM_STRING_START PROGRAM_STRING_END %token PAREN_LEFT PAREN_RIGHT COMMA -%type expr interpolation literal +%type expr interpolation literal literalModeTop literalModeValue %type args %% top: - literal + literalModeTop { parserResult = $1 } -| interpolation + +literalModeTop: + literalModeValue { - parserResult = $1 + $$ = $1 } -| literal interpolation +| literalModeTop literalModeValue { - parserResult = &ast.Concat{ - Exprs: []ast.Node{$1, $2}, + var result []ast.Node + if c, ok := $1.(*ast.Concat); ok { + result = append(c.Exprs, $2) + } else { + result = []ast.Node{$1, $2} + } + + $$ = &ast.Concat{ + Exprs: result, } } +literalModeValue: + literal + { + $$ = $1 + } +| interpolation + { + $$ = $1 + } + interpolation: PROGRAM_BRACKET_LEFT expr PROGRAM_BRACKET_RIGHT { @@ -48,7 +68,7 @@ interpolation: } expr: - literal + literalModeTop { $$ = $1 } diff --git a/config/lang/lex.go b/config/lang/lex.go index 365001199b..ff35d8ca59 100644 --- a/config/lang/lex.go +++ b/config/lang/lex.go @@ -18,13 +18,43 @@ type parserLex struct { Err error Input string + mode parserMode interpolationDepth int pos int width int } +// parserMode keeps track of what mode we're in for the parser. We have +// two modes: literal and interpolation. Literal mode is when strings +// don't have to be quoted, and interpolations are defined as ${foo}. +// Interpolation mode means that strings have to be quoted and unquoted +// things are identifiers, such as foo("bar"). +type parserMode uint8 + +const ( + parserModeInvalid parserMode = 0 + parserModeLiteral = 1 << iota + parserModeInterpolation +) + // The parser calls this method to get each new token. func (x *parserLex) Lex(yylval *parserSymType) int { + if x.mode == parserModeInvalid { + x.mode = parserModeLiteral + } + + switch x.mode { + case parserModeLiteral: + return x.lexModeLiteral(yylval) + case parserModeInterpolation: + return x.lexModeInterpolation(yylval) + default: + x.Error(fmt.Sprintf("Unknown parse mode: %s", x.mode)) + return lexEOF + } +} + +func (x *parserLex) lexModeLiteral(yylval *parserSymType) int { for { c := x.next() if c == lexEOF { @@ -35,14 +65,36 @@ func (x *parserLex) Lex(yylval *parserSymType) int { if c == '$' && x.peek() == '{' { x.next() x.interpolationDepth++ + x.mode = parserModeInterpolation return PROGRAM_BRACKET_LEFT } - if x.interpolationDepth == 0 { - // We're just a normal string that isn't part of any - // interpolation yet. - x.backup() - return x.lexString(yylval, false) + // We're just a normal string that isn't part of any interpolation yet. + x.backup() + result, terminated := x.lexString(yylval, x.interpolationDepth > 0) + + // If the string terminated and we're within an interpolation already + // then that means that we finished a nested string, so pop + // back out to interpolation mode. + if terminated && x.interpolationDepth > 0 { + x.mode = parserModeInterpolation + + // If the string is empty, just skip it. We're still in + // an interpolation so we do this to avoid empty nodes. + if yylval.str == "" { + return x.Lex(yylval) + } + } + + return result + } +} + +func (x *parserLex) lexModeInterpolation(yylval *parserSymType) int { + for { + c := x.next() + if c == lexEOF { + return lexEOF } // Ignore all whitespace @@ -53,12 +105,26 @@ func (x *parserLex) Lex(yylval *parserSymType) int { // If we see a double quote and we're in an interpolation, then // we are lexing a string. if c == '"' { - return x.lexString(yylval, true) + result, terminated := x.lexString(yylval, true) + if !terminated { + // The string didn't end, which means that we're in the + // middle of starting another interpolation. + x.mode = parserModeLiteral + + // If the string is empty and we're starting an interpolation, + // then just skip it to avoid empty string AST nodes + if yylval.str == "" { + return x.Lex(yylval) + } + } + + return result } switch c { case '}': x.interpolationDepth-- + x.mode = parserModeLiteral return PROGRAM_BRACKET_RIGHT case '(': return PAREN_LEFT @@ -103,11 +169,16 @@ func (x *parserLex) lexId(yylval *parserSymType) int { return IDENTIFIER } -func (x *parserLex) lexString(yylval *parserSymType, quoted bool) int { +func (x *parserLex) lexString(yylval *parserSymType, quoted bool) (int, bool) { var b bytes.Buffer + terminated := false for { c := x.next() if c == lexEOF { + if quoted { + x.Error("unterminated string") + } + break } @@ -115,6 +186,7 @@ func (x *parserLex) lexString(yylval *parserSymType, quoted bool) int { if quoted { // If its a double quote, we've reached the end of the string if c == '"' { + terminated = true break } @@ -148,12 +220,12 @@ func (x *parserLex) lexString(yylval *parserSymType, quoted bool) int { if _, err := b.WriteRune(c); err != nil { x.Error(err.Error()) - return lexEOF + return lexEOF, false } } yylval.str = b.String() - return STRING + return STRING, terminated } // Return the next rune for the lexer. diff --git a/config/lang/lex_test.go b/config/lang/lex_test.go index 3546bc8859..ce6a369606 100644 --- a/config/lang/lex_test.go +++ b/config/lang/lex_test.go @@ -45,6 +45,40 @@ func TestLex(t *testing.T) { PAREN_RIGHT, PROGRAM_BRACKET_RIGHT, lexEOF}, }, + + { + "${bar(inner(baz))}", + []int{PROGRAM_BRACKET_LEFT, + IDENTIFIER, PAREN_LEFT, + IDENTIFIER, PAREN_LEFT, + IDENTIFIER, + PAREN_RIGHT, PAREN_RIGHT, + PROGRAM_BRACKET_RIGHT, lexEOF}, + }, + + { + "foo ${foo.bar.baz}", + []int{STRING, PROGRAM_BRACKET_LEFT, IDENTIFIER, PROGRAM_BRACKET_RIGHT, lexEOF}, + }, + + { + "foo ${foo.bar.*.baz}", + []int{STRING, PROGRAM_BRACKET_LEFT, IDENTIFIER, PROGRAM_BRACKET_RIGHT, lexEOF}, + }, + + { + "foo ${foo(\"baz\")}", + []int{STRING, PROGRAM_BRACKET_LEFT, + IDENTIFIER, PAREN_LEFT, STRING, PAREN_RIGHT, + PROGRAM_BRACKET_RIGHT, lexEOF}, + }, + + { + `foo ${"${var.foo}"}`, + []int{STRING, PROGRAM_BRACKET_LEFT, + PROGRAM_BRACKET_LEFT, IDENTIFIER, PROGRAM_BRACKET_RIGHT, + PROGRAM_BRACKET_RIGHT, lexEOF}, + }, } for _, tc := range cases { diff --git a/config/lang/parse_test.go b/config/lang/parse_test.go index 7e7114f573..a635be8587 100644 --- a/config/lang/parse_test.go +++ b/config/lang/parse_test.go @@ -38,6 +38,26 @@ func TestParse(t *testing.T) { }, }, + { + "foo ${var.bar} baz", + false, + &ast.Concat{ + Exprs: []ast.Node{ + &ast.LiteralNode{ + Value: "foo ", + Type: ast.TypeString, + }, + &ast.VariableAccess{ + Name: "var.bar", + }, + &ast.LiteralNode{ + Value: " baz", + Type: ast.TypeString, + }, + }, + }, + }, + { "foo ${\"bar\"}", false, @@ -83,6 +103,60 @@ func TestParse(t *testing.T) { }, }, }, + + { + "${foo(bar(baz))}", + false, + &ast.Call{ + Func: "foo", + Args: []ast.Node{ + &ast.Call{ + Func: "bar", + Args: []ast.Node{ + &ast.VariableAccess{ + Name: "baz", + }, + }, + }, + }, + }, + }, + + { + `foo ${"bar ${baz}"}`, + false, + &ast.Concat{ + Exprs: []ast.Node{ + &ast.LiteralNode{ + Value: "foo ", + Type: ast.TypeString, + }, + &ast.Concat{ + Exprs: []ast.Node{ + &ast.LiteralNode{ + Value: "bar ", + Type: ast.TypeString, + }, + &ast.VariableAccess{ + Name: "baz", + }, + }, + }, + }, + }, + }, + + { + `foo ${bar ${baz}}`, + true, + nil, + }, + + { + `foo ${${baz}}`, + true, + nil, + }, } for _, tc := range cases { diff --git a/config/lang/y.go b/config/lang/y.go index a0ef3d1054..173c1208c9 100644 --- a/config/lang/y.go +++ b/config/lang/y.go @@ -20,15 +20,19 @@ const STRING = 57346 const IDENTIFIER = 57347 const PROGRAM_BRACKET_LEFT = 57348 const PROGRAM_BRACKET_RIGHT = 57349 -const PAREN_LEFT = 57350 -const PAREN_RIGHT = 57351 -const COMMA = 57352 +const PROGRAM_STRING_START = 57350 +const PROGRAM_STRING_END = 57351 +const PAREN_LEFT = 57352 +const PAREN_RIGHT = 57353 +const COMMA = 57354 var parserToknames = []string{ "STRING", "IDENTIFIER", "PROGRAM_BRACKET_LEFT", "PROGRAM_BRACKET_RIGHT", + "PROGRAM_STRING_START", + "PROGRAM_STRING_END", "PAREN_LEFT", "PAREN_RIGHT", "COMMA", @@ -39,7 +43,7 @@ const parserEofCode = 1 const parserErrCode = 2 const parserMaxDepth = 200 -//line lang.y:83 +//line lang.y:103 //line yacctab:1 var parserExca = []int{ @@ -48,47 +52,48 @@ var parserExca = []int{ -2, 0, } -const parserNprod = 12 +const parserNprod = 14 const parserPrivate = 57344 var parserTokenNames []string var parserStates []string -const parserLast = 18 +const parserLast = 21 var parserAct = []int{ - 7, 14, 15, 11, 10, 4, 5, 5, 4, 9, - 3, 1, 13, 6, 8, 2, 16, 12, + 9, 16, 17, 13, 3, 12, 1, 8, 6, 11, + 7, 6, 14, 7, 15, 8, 10, 2, 18, 4, + 5, } var parserPact = []int{ - 1, -1000, 0, -1000, -1000, 4, -1000, -3, -1000, -5, - -1000, 4, -8, -1000, -1000, 4, -1000, + 7, -1000, 7, -1000, -1000, -1000, -1000, 4, -1000, -2, + 7, -7, -1000, 4, -10, -1000, -1000, 4, -1000, } var parserPgo = []int{ - 0, 0, 10, 14, 17, 11, + 0, 0, 20, 19, 16, 4, 12, 6, } var parserR1 = []int{ - 0, 5, 5, 5, 2, 1, 1, 1, 4, 4, - 4, 3, + 0, 7, 4, 4, 5, 5, 2, 1, 1, 1, + 6, 6, 6, 3, } var parserR2 = []int{ - 0, 1, 1, 2, 3, 1, 1, 4, 0, 3, - 1, 1, + 0, 1, 1, 2, 1, 1, 3, 1, 1, 4, + 0, 3, 1, 1, } var parserChk = []int{ - -1000, -5, -3, -2, 4, 6, -2, -1, -3, 5, - 7, 8, -4, -1, 9, 10, -1, + -1000, -7, -4, -5, -3, -2, 4, 6, -5, -1, + -4, 5, 7, 10, -6, -1, 11, 12, -1, } var parserDef = []int{ - 0, -2, 1, 2, 11, 0, 3, 0, 5, 6, - 4, 8, 0, 10, 7, 0, 9, + 0, -2, 1, 2, 4, 5, 13, 0, 3, 0, + 7, 8, 6, 10, 0, 12, 9, 0, 11, } var parserTok1 = []int{ @@ -96,7 +101,8 @@ var parserTok1 = []int{ } var parserTok2 = []int{ - 2, 3, 4, 5, 6, 7, 8, 9, 10, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, } var parserTok3 = []int{ 0, @@ -328,59 +334,76 @@ parserdefault: switch parsernt { case 1: - //line lang.y:30 + //line lang.y:31 { parserResult = parserS[parserpt-0].node } case 2: - //line lang.y:34 + //line lang.y:37 { - parserResult = parserS[parserpt-0].node + parserVAL.node = parserS[parserpt-0].node } case 3: - //line lang.y:38 + //line lang.y:41 { - parserResult = &ast.Concat{ - Exprs: []ast.Node{parserS[parserpt-1].node, parserS[parserpt-0].node}, + var result []ast.Node + if c, ok := parserS[parserpt-1].node.(*ast.Concat); ok { + result = append(c.Exprs, parserS[parserpt-0].node) + } else { + result = []ast.Node{parserS[parserpt-1].node, parserS[parserpt-0].node} + } + + parserVAL.node = &ast.Concat{ + Exprs: result, } } case 4: - //line lang.y:46 + //line lang.y:56 { - parserVAL.node = parserS[parserpt-1].node + parserVAL.node = parserS[parserpt-0].node } case 5: - //line lang.y:52 + //line lang.y:60 { parserVAL.node = parserS[parserpt-0].node } case 6: - //line lang.y:56 + //line lang.y:66 { - parserVAL.node = &ast.VariableAccess{Name: parserS[parserpt-0].str} + parserVAL.node = parserS[parserpt-1].node } case 7: - //line lang.y:60 + //line lang.y:72 { - parserVAL.node = &ast.Call{Func: parserS[parserpt-3].str, Args: parserS[parserpt-1].nodeList} + parserVAL.node = parserS[parserpt-0].node } case 8: - //line lang.y:65 + //line lang.y:76 { - parserVAL.nodeList = nil + parserVAL.node = &ast.VariableAccess{Name: parserS[parserpt-0].str} } case 9: - //line lang.y:69 + //line lang.y:80 { - parserVAL.nodeList = append(parserS[parserpt-2].nodeList, parserS[parserpt-0].node) + parserVAL.node = &ast.Call{Func: parserS[parserpt-3].str, Args: parserS[parserpt-1].nodeList} } case 10: - //line lang.y:73 + //line lang.y:85 { - parserVAL.nodeList = append(parserVAL.nodeList, parserS[parserpt-0].node) + parserVAL.nodeList = nil } case 11: - //line lang.y:79 + //line lang.y:89 + { + parserVAL.nodeList = append(parserS[parserpt-2].nodeList, parserS[parserpt-0].node) + } + case 12: + //line lang.y:93 + { + parserVAL.nodeList = append(parserVAL.nodeList, parserS[parserpt-0].node) + } + case 13: + //line lang.y:99 { parserVAL.node = &ast.LiteralNode{Value: parserS[parserpt-0].str, Type: ast.TypeString} } diff --git a/config/lang/y.output b/config/lang/y.output index f098e493df..b17ca3ae59 100644 --- a/config/lang/y.output +++ b/config/lang/y.output @@ -2,12 +2,14 @@ state 0 $accept: .top $end - STRING shift 4 - PROGRAM_BRACKET_LEFT shift 5 + STRING shift 6 + PROGRAM_BRACKET_LEFT shift 7 . error - interpolation goto 3 - literal goto 2 + interpolation goto 5 + literal goto 4 + literalModeTop goto 2 + literalModeValue goto 3 top goto 1 state 1 @@ -18,127 +20,160 @@ state 1 state 2 - top: literal. (1) - top: literal.interpolation + top: literalModeTop. (1) + literalModeTop: literalModeTop.literalModeValue - PROGRAM_BRACKET_LEFT shift 5 - . reduce 1 (src line 28) + STRING shift 6 + PROGRAM_BRACKET_LEFT shift 7 + . reduce 1 (src line 29) - interpolation goto 6 + interpolation goto 5 + literal goto 4 + literalModeValue goto 8 state 3 - top: interpolation. (2) + literalModeTop: literalModeValue. (2) - . reduce 2 (src line 33) + . reduce 2 (src line 35) state 4 - literal: STRING. (11) + literalModeValue: literal. (4) - . reduce 11 (src line 77) + . reduce 4 (src line 54) state 5 - interpolation: PROGRAM_BRACKET_LEFT.expr PROGRAM_BRACKET_RIGHT + literalModeValue: interpolation. (5) - STRING shift 4 - IDENTIFIER shift 9 - . error + . reduce 5 (src line 59) - expr goto 7 - literal goto 8 state 6 - top: literal interpolation. (3) + literal: STRING. (13) - . reduce 3 (src line 37) + . reduce 13 (src line 97) state 7 - interpolation: PROGRAM_BRACKET_LEFT expr.PROGRAM_BRACKET_RIGHT + interpolation: PROGRAM_BRACKET_LEFT.expr PROGRAM_BRACKET_RIGHT - PROGRAM_BRACKET_RIGHT shift 10 + STRING shift 6 + IDENTIFIER shift 11 + PROGRAM_BRACKET_LEFT shift 7 . error + expr goto 9 + interpolation goto 5 + literal goto 4 + literalModeTop goto 10 + literalModeValue goto 3 state 8 - expr: literal. (5) + literalModeTop: literalModeTop literalModeValue. (3) - . reduce 5 (src line 50) + . reduce 3 (src line 40) state 9 - expr: IDENTIFIER. (6) - expr: IDENTIFIER.PAREN_LEFT args PAREN_RIGHT + interpolation: PROGRAM_BRACKET_LEFT expr.PROGRAM_BRACKET_RIGHT - PAREN_LEFT shift 11 - . reduce 6 (src line 55) + PROGRAM_BRACKET_RIGHT shift 12 + . error state 10 - interpolation: PROGRAM_BRACKET_LEFT expr PROGRAM_BRACKET_RIGHT. (4) + literalModeTop: literalModeTop.literalModeValue + expr: literalModeTop. (7) - . reduce 4 (src line 44) + STRING shift 6 + PROGRAM_BRACKET_LEFT shift 7 + . reduce 7 (src line 70) + interpolation goto 5 + literal goto 4 + literalModeValue goto 8 state 11 - expr: IDENTIFIER PAREN_LEFT.args PAREN_RIGHT - args: . (8) + expr: IDENTIFIER. (8) + expr: IDENTIFIER.PAREN_LEFT args PAREN_RIGHT - STRING shift 4 - IDENTIFIER shift 9 - . reduce 8 (src line 64) + PAREN_LEFT shift 13 + . reduce 8 (src line 75) - expr goto 13 - literal goto 8 - args goto 12 state 12 - expr: IDENTIFIER PAREN_LEFT args.PAREN_RIGHT - args: args.COMMA expr + interpolation: PROGRAM_BRACKET_LEFT expr PROGRAM_BRACKET_RIGHT. (6) - PAREN_RIGHT shift 14 - COMMA shift 15 - . error + . reduce 6 (src line 64) state 13 - args: expr. (10) + expr: IDENTIFIER PAREN_LEFT.args PAREN_RIGHT + args: . (10) - . reduce 10 (src line 72) + STRING shift 6 + IDENTIFIER shift 11 + PROGRAM_BRACKET_LEFT shift 7 + . reduce 10 (src line 84) + expr goto 15 + interpolation goto 5 + literal goto 4 + literalModeTop goto 10 + literalModeValue goto 3 + args goto 14 state 14 - expr: IDENTIFIER PAREN_LEFT args PAREN_RIGHT. (7) + expr: IDENTIFIER PAREN_LEFT args.PAREN_RIGHT + args: args.COMMA expr - . reduce 7 (src line 59) + PAREN_RIGHT shift 16 + COMMA shift 17 + . error state 15 + args: expr. (12) + + . reduce 12 (src line 92) + + +state 16 + expr: IDENTIFIER PAREN_LEFT args PAREN_RIGHT. (9) + + . reduce 9 (src line 79) + + +state 17 args: args COMMA.expr - STRING shift 4 - IDENTIFIER shift 9 + STRING shift 6 + IDENTIFIER shift 11 + PROGRAM_BRACKET_LEFT shift 7 . error - expr goto 16 - literal goto 8 + expr goto 18 + interpolation goto 5 + literal goto 4 + literalModeTop goto 10 + literalModeValue goto 3 -state 16 - args: args COMMA expr. (9) +state 18 + args: args COMMA expr. (11) - . reduce 9 (src line 68) + . reduce 11 (src line 88) -10 terminals, 6 nonterminals -12 grammar rules, 17/2000 states +12 terminals, 8 nonterminals +14 grammar rules, 19/2000 states 0 shift/reduce, 0 reduce/reduce conflicts reported -55 working sets used -memory: parser 11/30000 -6 extra closures -13 shift entries, 1 exceptions -9 goto entries -2 entries saved by goto default -Optimizer space used: output 18/30000 -18 table entries, 0 zero -maximum spread: 10, maximum offset: 15 +57 working sets used +memory: parser 25/30000 +14 extra closures +19 shift entries, 1 exceptions +12 goto entries +15 entries saved by goto default +Optimizer space used: output 21/30000 +21 table entries, 0 zero +maximum spread: 12, maximum offset: 17