github.com/franklinhu/terraform@v0.6.9-0.20151202232446-81f7fb1e6f9e/config/lang/lex.go (about) 1 package lang 2 3 import ( 4 "bytes" 5 "fmt" 6 "strconv" 7 "unicode" 8 "unicode/utf8" 9 10 "github.com/hashicorp/terraform/config/lang/ast" 11 ) 12 13 //go:generate go tool yacc -p parser lang.y 14 15 // The parser expects the lexer to return 0 on EOF. 16 const lexEOF = 0 17 18 // The parser uses the type <prefix>Lex as a lexer. It must provide 19 // the methods Lex(*<prefix>SymType) int and Error(string). 20 type parserLex struct { 21 Err error 22 Input string 23 24 mode parserMode 25 interpolationDepth int 26 pos int 27 width int 28 col, line int 29 lastLine int 30 astPos *ast.Pos 31 } 32 33 // parserToken is the token yielded to the parser. The value can be 34 // determined within the parser type based on the enum value returned 35 // from Lex. 36 type parserToken struct { 37 Value interface{} 38 Pos ast.Pos 39 } 40 41 // parserMode keeps track of what mode we're in for the parser. We have 42 // two modes: literal and interpolation. Literal mode is when strings 43 // don't have to be quoted, and interpolations are defined as ${foo}. 44 // Interpolation mode means that strings have to be quoted and unquoted 45 // things are identifiers, such as foo("bar"). 46 type parserMode uint8 47 48 const ( 49 parserModeInvalid parserMode = 0 50 parserModeLiteral = 1 << iota 51 parserModeInterpolation 52 ) 53 54 // The parser calls this method to get each new token. 55 func (x *parserLex) Lex(yylval *parserSymType) int { 56 // We always start in literal mode, since programs don't start 57 // in an interpolation. ex. "foo ${bar}" vs "bar" (and assuming interp.) 58 if x.mode == parserModeInvalid { 59 x.mode = parserModeLiteral 60 } 61 62 // Defer an update to set the proper column/line we read the next token. 63 defer func() { 64 if yylval.token != nil && yylval.token.Pos.Column == 0 { 65 yylval.token.Pos = *x.astPos 66 } 67 }() 68 69 x.astPos = nil 70 return x.lex(yylval) 71 } 72 73 func (x *parserLex) lex(yylval *parserSymType) int { 74 switch x.mode { 75 case parserModeLiteral: 76 return x.lexModeLiteral(yylval) 77 case parserModeInterpolation: 78 return x.lexModeInterpolation(yylval) 79 default: 80 x.Error(fmt.Sprintf("Unknown parse mode: %d", x.mode)) 81 return lexEOF 82 } 83 } 84 85 func (x *parserLex) lexModeLiteral(yylval *parserSymType) int { 86 for { 87 c := x.next() 88 if c == lexEOF { 89 return lexEOF 90 } 91 92 // Are we starting an interpolation? 93 if c == '$' && x.peek() == '{' { 94 x.next() 95 x.interpolationDepth++ 96 x.mode = parserModeInterpolation 97 return PROGRAM_BRACKET_LEFT 98 } 99 100 // We're just a normal string that isn't part of any interpolation yet. 101 x.backup() 102 result, terminated := x.lexString(yylval, x.interpolationDepth > 0) 103 104 // If the string terminated and we're within an interpolation already 105 // then that means that we finished a nested string, so pop 106 // back out to interpolation mode. 107 if terminated && x.interpolationDepth > 0 { 108 x.mode = parserModeInterpolation 109 110 // If the string is empty, just skip it. We're still in 111 // an interpolation so we do this to avoid empty nodes. 112 if yylval.token.Value.(string) == "" { 113 return x.lex(yylval) 114 } 115 } 116 117 return result 118 } 119 } 120 121 func (x *parserLex) lexModeInterpolation(yylval *parserSymType) int { 122 for { 123 c := x.next() 124 if c == lexEOF { 125 return lexEOF 126 } 127 128 // Ignore all whitespace 129 if unicode.IsSpace(c) { 130 continue 131 } 132 133 // If we see a double quote then we're lexing a string since 134 // we're in interpolation mode. 135 if c == '"' { 136 result, terminated := x.lexString(yylval, true) 137 if !terminated { 138 // The string didn't end, which means that we're in the 139 // middle of starting another interpolation. 140 x.mode = parserModeLiteral 141 142 // If the string is empty and we're starting an interpolation, 143 // then just skip it to avoid empty string AST nodes 144 if yylval.token.Value.(string) == "" { 145 return x.lex(yylval) 146 } 147 } 148 149 return result 150 } 151 152 // If we are seeing a number, it is the start of a number. Lex it. 153 if c >= '0' && c <= '9' { 154 x.backup() 155 return x.lexNumber(yylval) 156 } 157 158 switch c { 159 case '}': 160 // '}' means we ended the interpolation. Pop back into 161 // literal mode and reduce our interpolation depth. 162 x.interpolationDepth-- 163 x.mode = parserModeLiteral 164 return PROGRAM_BRACKET_RIGHT 165 case '(': 166 return PAREN_LEFT 167 case ')': 168 return PAREN_RIGHT 169 case ',': 170 return COMMA 171 case '+': 172 yylval.token = &parserToken{Value: ast.ArithmeticOpAdd} 173 return ARITH_OP 174 case '-': 175 yylval.token = &parserToken{Value: ast.ArithmeticOpSub} 176 return ARITH_OP 177 case '*': 178 yylval.token = &parserToken{Value: ast.ArithmeticOpMul} 179 return ARITH_OP 180 case '/': 181 yylval.token = &parserToken{Value: ast.ArithmeticOpDiv} 182 return ARITH_OP 183 case '%': 184 yylval.token = &parserToken{Value: ast.ArithmeticOpMod} 185 return ARITH_OP 186 default: 187 x.backup() 188 return x.lexId(yylval) 189 } 190 } 191 } 192 193 func (x *parserLex) lexId(yylval *parserSymType) int { 194 var b bytes.Buffer 195 var last rune 196 for { 197 c := x.next() 198 if c == lexEOF { 199 break 200 } 201 202 // We only allow * after a '.' for resource splast: type.name.*.id 203 // Otherwise, its probably multiplication. 204 if c == '*' && last != '.' { 205 x.backup() 206 break 207 } 208 209 // If this isn't a character we want in an ID, return out. 210 // One day we should make this a regexp. 211 if c != '_' && 212 c != '-' && 213 c != '.' && 214 c != '*' && 215 !unicode.IsLetter(c) && 216 !unicode.IsNumber(c) { 217 x.backup() 218 break 219 } 220 221 if _, err := b.WriteRune(c); err != nil { 222 x.Error(err.Error()) 223 return lexEOF 224 } 225 226 last = c 227 } 228 229 yylval.token = &parserToken{Value: b.String()} 230 return IDENTIFIER 231 } 232 233 // lexNumber lexes out a number: an integer or a float. 234 func (x *parserLex) lexNumber(yylval *parserSymType) int { 235 var b bytes.Buffer 236 gotPeriod := false 237 for { 238 c := x.next() 239 if c == lexEOF { 240 break 241 } 242 243 // If we see a period, we might be getting a float.. 244 if c == '.' { 245 // If we've already seen a period, then ignore it, and 246 // exit. This will probably result in a syntax error later. 247 if gotPeriod { 248 x.backup() 249 break 250 } 251 252 gotPeriod = true 253 } else if c < '0' || c > '9' { 254 // If we're not seeing a number, then also exit. 255 x.backup() 256 break 257 } 258 259 if _, err := b.WriteRune(c); err != nil { 260 x.Error(fmt.Sprintf("internal error: %s", err)) 261 return lexEOF 262 } 263 } 264 265 // If we didn't see a period, it is an int 266 if !gotPeriod { 267 v, err := strconv.ParseInt(b.String(), 0, 0) 268 if err != nil { 269 x.Error(fmt.Sprintf("expected number: %s", err)) 270 return lexEOF 271 } 272 273 yylval.token = &parserToken{Value: int(v)} 274 return INTEGER 275 } 276 277 // If we did see a period, it is a float 278 f, err := strconv.ParseFloat(b.String(), 64) 279 if err != nil { 280 x.Error(fmt.Sprintf("expected float: %s", err)) 281 return lexEOF 282 } 283 284 yylval.token = &parserToken{Value: f} 285 return FLOAT 286 } 287 288 func (x *parserLex) lexString(yylval *parserSymType, quoted bool) (int, bool) { 289 var b bytes.Buffer 290 terminated := false 291 for { 292 c := x.next() 293 if c == lexEOF { 294 if quoted { 295 x.Error("unterminated string") 296 } 297 298 break 299 } 300 301 // Behavior is a bit different if we're lexing within a quoted string. 302 if quoted { 303 // If its a double quote, we've reached the end of the string 304 if c == '"' { 305 terminated = true 306 break 307 } 308 309 // Let's check to see if we're escaping anything. 310 if c == '\\' { 311 switch n := x.next(); n { 312 case '\\', '"': 313 c = n 314 case 'n': 315 c = '\n' 316 default: 317 x.backup() 318 } 319 } 320 } 321 322 // If we hit a dollar sign, then check if we're starting 323 // another interpolation. If so, then we're done. 324 if c == '$' { 325 n := x.peek() 326 327 // If it is '{', then we're starting another interpolation 328 if n == '{' { 329 x.backup() 330 break 331 } 332 333 // If it is '$', then we're escaping a dollar sign 334 if n == '$' { 335 x.next() 336 } 337 } 338 339 if _, err := b.WriteRune(c); err != nil { 340 x.Error(err.Error()) 341 return lexEOF, false 342 } 343 } 344 345 yylval.token = &parserToken{Value: b.String()} 346 return STRING, terminated 347 } 348 349 // Return the next rune for the lexer. 350 func (x *parserLex) next() rune { 351 if int(x.pos) >= len(x.Input) { 352 x.width = 0 353 return lexEOF 354 } 355 356 r, w := utf8.DecodeRuneInString(x.Input[x.pos:]) 357 x.width = w 358 x.pos += x.width 359 360 if x.line == 0 { 361 x.line = 1 362 x.col = 1 363 } else { 364 x.col += 1 365 } 366 367 if r == '\n' { 368 x.lastLine = x.col 369 x.line += 1 370 x.col = 1 371 } 372 373 if x.astPos == nil { 374 x.astPos = &ast.Pos{Column: x.col, Line: x.line} 375 } 376 377 return r 378 } 379 380 // peek returns but does not consume the next rune in the input 381 func (x *parserLex) peek() rune { 382 r := x.next() 383 x.backup() 384 return r 385 } 386 387 // backup steps back one rune. Can only be called once per next. 388 func (x *parserLex) backup() { 389 x.pos -= x.width 390 x.col -= 1 391 392 // If we are at column 0, we're backing up across a line boundary 393 // so we need to be careful to get the proper value. 394 if x.col == 0 { 395 x.col = x.lastLine 396 x.line -= 1 397 } 398 } 399 400 // The parser calls this method on a parse error. 401 func (x *parserLex) Error(s string) { 402 x.Err = fmt.Errorf("parse error: %s", s) 403 }