github.com/franklinhu/terraform@v0.6.9-0.20151202232446-81f7fb1e6f9e/config/lang/lex.go (about)

     1  package lang
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"strconv"
     7  	"unicode"
     8  	"unicode/utf8"
     9  
    10  	"github.com/hashicorp/terraform/config/lang/ast"
    11  )
    12  
    13  //go:generate go tool yacc -p parser lang.y
    14  
    15  // The parser expects the lexer to return 0 on EOF.
    16  const lexEOF = 0
    17  
    18  // The parser uses the type <prefix>Lex as a lexer.  It must provide
    19  // the methods Lex(*<prefix>SymType) int and Error(string).
    20  type parserLex struct {
    21  	Err   error
    22  	Input string
    23  
    24  	mode               parserMode
    25  	interpolationDepth int
    26  	pos                int
    27  	width              int
    28  	col, line          int
    29  	lastLine           int
    30  	astPos             *ast.Pos
    31  }
    32  
    33  // parserToken is the token yielded to the parser. The value can be
    34  // determined within the parser type based on the enum value returned
    35  // from Lex.
    36  type parserToken struct {
    37  	Value interface{}
    38  	Pos   ast.Pos
    39  }
    40  
    41  // parserMode keeps track of what mode we're in for the parser. We have
    42  // two modes: literal and interpolation. Literal mode is when strings
    43  // don't have to be quoted, and interpolations are defined as ${foo}.
    44  // Interpolation mode means that strings have to be quoted and unquoted
    45  // things are identifiers, such as foo("bar").
    46  type parserMode uint8
    47  
    48  const (
    49  	parserModeInvalid parserMode = 0
    50  	parserModeLiteral            = 1 << iota
    51  	parserModeInterpolation
    52  )
    53  
    54  // The parser calls this method to get each new token.
    55  func (x *parserLex) Lex(yylval *parserSymType) int {
    56  	// We always start in literal mode, since programs don't start
    57  	// in an interpolation. ex. "foo ${bar}" vs "bar" (and assuming interp.)
    58  	if x.mode == parserModeInvalid {
    59  		x.mode = parserModeLiteral
    60  	}
    61  
    62  	// Defer an update to set the proper column/line we read the next token.
    63  	defer func() {
    64  		if yylval.token != nil && yylval.token.Pos.Column == 0 {
    65  			yylval.token.Pos = *x.astPos
    66  		}
    67  	}()
    68  
    69  	x.astPos = nil
    70  	return x.lex(yylval)
    71  }
    72  
    73  func (x *parserLex) lex(yylval *parserSymType) int {
    74  	switch x.mode {
    75  	case parserModeLiteral:
    76  		return x.lexModeLiteral(yylval)
    77  	case parserModeInterpolation:
    78  		return x.lexModeInterpolation(yylval)
    79  	default:
    80  		x.Error(fmt.Sprintf("Unknown parse mode: %d", x.mode))
    81  		return lexEOF
    82  	}
    83  }
    84  
    85  func (x *parserLex) lexModeLiteral(yylval *parserSymType) int {
    86  	for {
    87  		c := x.next()
    88  		if c == lexEOF {
    89  			return lexEOF
    90  		}
    91  
    92  		// Are we starting an interpolation?
    93  		if c == '$' && x.peek() == '{' {
    94  			x.next()
    95  			x.interpolationDepth++
    96  			x.mode = parserModeInterpolation
    97  			return PROGRAM_BRACKET_LEFT
    98  		}
    99  
   100  		// We're just a normal string that isn't part of any interpolation yet.
   101  		x.backup()
   102  		result, terminated := x.lexString(yylval, x.interpolationDepth > 0)
   103  
   104  		// If the string terminated and we're within an interpolation already
   105  		// then that means that we finished a nested string, so pop
   106  		// back out to interpolation mode.
   107  		if terminated && x.interpolationDepth > 0 {
   108  			x.mode = parserModeInterpolation
   109  
   110  			// If the string is empty, just skip it. We're still in
   111  			// an interpolation so we do this to avoid empty nodes.
   112  			if yylval.token.Value.(string) == "" {
   113  				return x.lex(yylval)
   114  			}
   115  		}
   116  
   117  		return result
   118  	}
   119  }
   120  
   121  func (x *parserLex) lexModeInterpolation(yylval *parserSymType) int {
   122  	for {
   123  		c := x.next()
   124  		if c == lexEOF {
   125  			return lexEOF
   126  		}
   127  
   128  		// Ignore all whitespace
   129  		if unicode.IsSpace(c) {
   130  			continue
   131  		}
   132  
   133  		// If we see a double quote then we're lexing a string since
   134  		// we're in interpolation mode.
   135  		if c == '"' {
   136  			result, terminated := x.lexString(yylval, true)
   137  			if !terminated {
   138  				// The string didn't end, which means that we're in the
   139  				// middle of starting another interpolation.
   140  				x.mode = parserModeLiteral
   141  
   142  				// If the string is empty and we're starting an interpolation,
   143  				// then just skip it to avoid empty string AST nodes
   144  				if yylval.token.Value.(string) == "" {
   145  					return x.lex(yylval)
   146  				}
   147  			}
   148  
   149  			return result
   150  		}
   151  
   152  		// If we are seeing a number, it is the start of a number. Lex it.
   153  		if c >= '0' && c <= '9' {
   154  			x.backup()
   155  			return x.lexNumber(yylval)
   156  		}
   157  
   158  		switch c {
   159  		case '}':
   160  			// '}' means we ended the interpolation. Pop back into
   161  			// literal mode and reduce our interpolation depth.
   162  			x.interpolationDepth--
   163  			x.mode = parserModeLiteral
   164  			return PROGRAM_BRACKET_RIGHT
   165  		case '(':
   166  			return PAREN_LEFT
   167  		case ')':
   168  			return PAREN_RIGHT
   169  		case ',':
   170  			return COMMA
   171  		case '+':
   172  			yylval.token = &parserToken{Value: ast.ArithmeticOpAdd}
   173  			return ARITH_OP
   174  		case '-':
   175  			yylval.token = &parserToken{Value: ast.ArithmeticOpSub}
   176  			return ARITH_OP
   177  		case '*':
   178  			yylval.token = &parserToken{Value: ast.ArithmeticOpMul}
   179  			return ARITH_OP
   180  		case '/':
   181  			yylval.token = &parserToken{Value: ast.ArithmeticOpDiv}
   182  			return ARITH_OP
   183  		case '%':
   184  			yylval.token = &parserToken{Value: ast.ArithmeticOpMod}
   185  			return ARITH_OP
   186  		default:
   187  			x.backup()
   188  			return x.lexId(yylval)
   189  		}
   190  	}
   191  }
   192  
   193  func (x *parserLex) lexId(yylval *parserSymType) int {
   194  	var b bytes.Buffer
   195  	var last rune
   196  	for {
   197  		c := x.next()
   198  		if c == lexEOF {
   199  			break
   200  		}
   201  
   202  		// We only allow * after a '.' for resource splast: type.name.*.id
   203  		// Otherwise, its probably multiplication.
   204  		if c == '*' && last != '.' {
   205  			x.backup()
   206  			break
   207  		}
   208  
   209  		// If this isn't a character we want in an ID, return out.
   210  		// One day we should make this a regexp.
   211  		if c != '_' &&
   212  			c != '-' &&
   213  			c != '.' &&
   214  			c != '*' &&
   215  			!unicode.IsLetter(c) &&
   216  			!unicode.IsNumber(c) {
   217  			x.backup()
   218  			break
   219  		}
   220  
   221  		if _, err := b.WriteRune(c); err != nil {
   222  			x.Error(err.Error())
   223  			return lexEOF
   224  		}
   225  
   226  		last = c
   227  	}
   228  
   229  	yylval.token = &parserToken{Value: b.String()}
   230  	return IDENTIFIER
   231  }
   232  
   233  // lexNumber lexes out a number: an integer or a float.
   234  func (x *parserLex) lexNumber(yylval *parserSymType) int {
   235  	var b bytes.Buffer
   236  	gotPeriod := false
   237  	for {
   238  		c := x.next()
   239  		if c == lexEOF {
   240  			break
   241  		}
   242  
   243  		// If we see a period, we might be getting a float..
   244  		if c == '.' {
   245  			// If we've already seen a period, then ignore it, and
   246  			// exit. This will probably result in a syntax error later.
   247  			if gotPeriod {
   248  				x.backup()
   249  				break
   250  			}
   251  
   252  			gotPeriod = true
   253  		} else if c < '0' || c > '9' {
   254  			// If we're not seeing a number, then also exit.
   255  			x.backup()
   256  			break
   257  		}
   258  
   259  		if _, err := b.WriteRune(c); err != nil {
   260  			x.Error(fmt.Sprintf("internal error: %s", err))
   261  			return lexEOF
   262  		}
   263  	}
   264  
   265  	// If we didn't see a period, it is an int
   266  	if !gotPeriod {
   267  		v, err := strconv.ParseInt(b.String(), 0, 0)
   268  		if err != nil {
   269  			x.Error(fmt.Sprintf("expected number: %s", err))
   270  			return lexEOF
   271  		}
   272  
   273  		yylval.token = &parserToken{Value: int(v)}
   274  		return INTEGER
   275  	}
   276  
   277  	// If we did see a period, it is a float
   278  	f, err := strconv.ParseFloat(b.String(), 64)
   279  	if err != nil {
   280  		x.Error(fmt.Sprintf("expected float: %s", err))
   281  		return lexEOF
   282  	}
   283  
   284  	yylval.token = &parserToken{Value: f}
   285  	return FLOAT
   286  }
   287  
   288  func (x *parserLex) lexString(yylval *parserSymType, quoted bool) (int, bool) {
   289  	var b bytes.Buffer
   290  	terminated := false
   291  	for {
   292  		c := x.next()
   293  		if c == lexEOF {
   294  			if quoted {
   295  				x.Error("unterminated string")
   296  			}
   297  
   298  			break
   299  		}
   300  
   301  		// Behavior is a bit different if we're lexing within a quoted string.
   302  		if quoted {
   303  			// If its a double quote, we've reached the end of the string
   304  			if c == '"' {
   305  				terminated = true
   306  				break
   307  			}
   308  
   309  			// Let's check to see if we're escaping anything.
   310  			if c == '\\' {
   311  				switch n := x.next(); n {
   312  				case '\\', '"':
   313  					c = n
   314  				case 'n':
   315  					c = '\n'
   316  				default:
   317  					x.backup()
   318  				}
   319  			}
   320  		}
   321  
   322  		// If we hit a dollar sign, then check if we're starting
   323  		// another interpolation. If so, then we're done.
   324  		if c == '$' {
   325  			n := x.peek()
   326  
   327  			// If it is '{', then we're starting another interpolation
   328  			if n == '{' {
   329  				x.backup()
   330  				break
   331  			}
   332  
   333  			// If it is '$', then we're escaping a dollar sign
   334  			if n == '$' {
   335  				x.next()
   336  			}
   337  		}
   338  
   339  		if _, err := b.WriteRune(c); err != nil {
   340  			x.Error(err.Error())
   341  			return lexEOF, false
   342  		}
   343  	}
   344  
   345  	yylval.token = &parserToken{Value: b.String()}
   346  	return STRING, terminated
   347  }
   348  
   349  // Return the next rune for the lexer.
   350  func (x *parserLex) next() rune {
   351  	if int(x.pos) >= len(x.Input) {
   352  		x.width = 0
   353  		return lexEOF
   354  	}
   355  
   356  	r, w := utf8.DecodeRuneInString(x.Input[x.pos:])
   357  	x.width = w
   358  	x.pos += x.width
   359  
   360  	if x.line == 0 {
   361  		x.line = 1
   362  		x.col = 1
   363  	} else {
   364  		x.col += 1
   365  	}
   366  
   367  	if r == '\n' {
   368  		x.lastLine = x.col
   369  		x.line += 1
   370  		x.col = 1
   371  	}
   372  
   373  	if x.astPos == nil {
   374  		x.astPos = &ast.Pos{Column: x.col, Line: x.line}
   375  	}
   376  
   377  	return r
   378  }
   379  
   380  // peek returns but does not consume the next rune in the input
   381  func (x *parserLex) peek() rune {
   382  	r := x.next()
   383  	x.backup()
   384  	return r
   385  }
   386  
   387  // backup steps back one rune. Can only be called once per next.
   388  func (x *parserLex) backup() {
   389  	x.pos -= x.width
   390  	x.col -= 1
   391  
   392  	// If we are at column 0, we're backing up across a line boundary
   393  	// so we need to be careful to get the proper value.
   394  	if x.col == 0 {
   395  		x.col = x.lastLine
   396  		x.line -= 1
   397  	}
   398  }
   399  
   400  // The parser calls this method on a parse error.
   401  func (x *parserLex) Error(s string) {
   402  	x.Err = fmt.Errorf("parse error: %s", s)
   403  }