github.com/nicgrayson/terraform@v0.4.3-0.20150415203910-c4de50829380/config/lang/lex.go (about) 1 package lang 2 3 import ( 4 "bytes" 5 "fmt" 6 "strconv" 7 "unicode" 8 "unicode/utf8" 9 10 "github.com/hashicorp/terraform/config/lang/ast" 11 ) 12 13 //go:generate go tool yacc -p parser lang.y 14 15 // The parser expects the lexer to return 0 on EOF. 16 const lexEOF = 0 17 18 // The parser uses the type <prefix>Lex as a lexer. It must provide 19 // the methods Lex(*<prefix>SymType) int and Error(string). 20 type parserLex struct { 21 Err error 22 Input string 23 24 mode parserMode 25 interpolationDepth int 26 pos int 27 width int 28 col, line int 29 lastLine int 30 astPos *ast.Pos 31 } 32 33 // parserToken is the token yielded to the parser. The value can be 34 // determined within the parser type based on the enum value returned 35 // from Lex. 36 type parserToken struct { 37 Value interface{} 38 Pos ast.Pos 39 } 40 41 // parserMode keeps track of what mode we're in for the parser. We have 42 // two modes: literal and interpolation. Literal mode is when strings 43 // don't have to be quoted, and interpolations are defined as ${foo}. 44 // Interpolation mode means that strings have to be quoted and unquoted 45 // things are identifiers, such as foo("bar"). 46 type parserMode uint8 47 48 const ( 49 parserModeInvalid parserMode = 0 50 parserModeLiteral = 1 << iota 51 parserModeInterpolation 52 ) 53 54 // The parser calls this method to get each new token. 55 func (x *parserLex) Lex(yylval *parserSymType) int { 56 // We always start in literal mode, since programs don't start 57 // in an interpolation. ex. "foo ${bar}" vs "bar" (and assuming interp.) 58 if x.mode == parserModeInvalid { 59 x.mode = parserModeLiteral 60 } 61 62 // Defer an update to set the proper column/line we read the next token. 63 defer func() { 64 if yylval.token != nil && yylval.token.Pos.Column == 0 { 65 yylval.token.Pos = *x.astPos 66 } 67 }() 68 69 x.astPos = nil 70 return x.lex(yylval) 71 } 72 73 func (x *parserLex) lex(yylval *parserSymType) int { 74 switch x.mode { 75 case parserModeLiteral: 76 return x.lexModeLiteral(yylval) 77 case parserModeInterpolation: 78 return x.lexModeInterpolation(yylval) 79 default: 80 x.Error(fmt.Sprintf("Unknown parse mode: %d", x.mode)) 81 return lexEOF 82 } 83 } 84 85 func (x *parserLex) lexModeLiteral(yylval *parserSymType) int { 86 for { 87 c := x.next() 88 if c == lexEOF { 89 return lexEOF 90 } 91 92 // Are we starting an interpolation? 93 if c == '$' && x.peek() == '{' { 94 x.next() 95 x.interpolationDepth++ 96 x.mode = parserModeInterpolation 97 return PROGRAM_BRACKET_LEFT 98 } 99 100 // We're just a normal string that isn't part of any interpolation yet. 101 x.backup() 102 result, terminated := x.lexString(yylval, x.interpolationDepth > 0) 103 104 // If the string terminated and we're within an interpolation already 105 // then that means that we finished a nested string, so pop 106 // back out to interpolation mode. 107 if terminated && x.interpolationDepth > 0 { 108 x.mode = parserModeInterpolation 109 110 // If the string is empty, just skip it. We're still in 111 // an interpolation so we do this to avoid empty nodes. 112 if yylval.token.Value.(string) == "" { 113 return x.lex(yylval) 114 } 115 } 116 117 return result 118 } 119 } 120 121 func (x *parserLex) lexModeInterpolation(yylval *parserSymType) int { 122 for { 123 c := x.next() 124 if c == lexEOF { 125 return lexEOF 126 } 127 128 // Ignore all whitespace 129 if unicode.IsSpace(c) { 130 continue 131 } 132 133 // If we see a double quote then we're lexing a string since 134 // we're in interpolation mode. 135 if c == '"' { 136 result, terminated := x.lexString(yylval, true) 137 if !terminated { 138 // The string didn't end, which means that we're in the 139 // middle of starting another interpolation. 140 x.mode = parserModeLiteral 141 142 // If the string is empty and we're starting an interpolation, 143 // then just skip it to avoid empty string AST nodes 144 if yylval.token.Value.(string) == "" { 145 return x.lex(yylval) 146 } 147 } 148 149 return result 150 } 151 152 // If we are seeing a number, it is the start of a number. Lex it. 153 if c >= '0' && c <= '9' { 154 x.backup() 155 return x.lexNumber(yylval) 156 } 157 158 switch c { 159 case '}': 160 // '}' means we ended the interpolation. Pop back into 161 // literal mode and reduce our interpolation depth. 162 x.interpolationDepth-- 163 x.mode = parserModeLiteral 164 return PROGRAM_BRACKET_RIGHT 165 case '(': 166 return PAREN_LEFT 167 case ')': 168 return PAREN_RIGHT 169 case ',': 170 return COMMA 171 case '+': 172 yylval.token = &parserToken{Value: ast.ArithmeticOpAdd} 173 return ARITH_OP 174 case '-': 175 yylval.token = &parserToken{Value: ast.ArithmeticOpSub} 176 return ARITH_OP 177 case '*': 178 yylval.token = &parserToken{Value: ast.ArithmeticOpMul} 179 return ARITH_OP 180 case '/': 181 yylval.token = &parserToken{Value: ast.ArithmeticOpDiv} 182 return ARITH_OP 183 case '%': 184 yylval.token = &parserToken{Value: ast.ArithmeticOpMod} 185 return ARITH_OP 186 default: 187 x.backup() 188 return x.lexId(yylval) 189 } 190 } 191 } 192 193 func (x *parserLex) lexId(yylval *parserSymType) int { 194 var b bytes.Buffer 195 for { 196 c := x.next() 197 if c == lexEOF { 198 break 199 } 200 201 // If this isn't a character we want in an ID, return out. 202 // One day we should make this a regexp. 203 if c != '_' && 204 c != '-' && 205 c != '.' && 206 c != '*' && 207 !unicode.IsLetter(c) && 208 !unicode.IsNumber(c) { 209 x.backup() 210 break 211 } 212 213 if _, err := b.WriteRune(c); err != nil { 214 x.Error(err.Error()) 215 return lexEOF 216 } 217 } 218 219 yylval.token = &parserToken{Value: b.String()} 220 return IDENTIFIER 221 } 222 223 // lexNumber lexes out a number: an integer or a float. 224 func (x *parserLex) lexNumber(yylval *parserSymType) int { 225 var b bytes.Buffer 226 gotPeriod := false 227 for { 228 c := x.next() 229 if c == lexEOF { 230 break 231 } 232 233 // If we see a period, we might be getting a float.. 234 if c == '.' { 235 // If we've already seen a period, then ignore it, and 236 // exit. This will probably result in a syntax error later. 237 if gotPeriod { 238 x.backup() 239 break 240 } 241 242 gotPeriod = true 243 } else if c < '0' || c > '9' { 244 // If we're not seeing a number, then also exit. 245 x.backup() 246 break 247 } 248 249 if _, err := b.WriteRune(c); err != nil { 250 x.Error(fmt.Sprintf("internal error: %s", err)) 251 return lexEOF 252 } 253 } 254 255 // If we didn't see a period, it is an int 256 if !gotPeriod { 257 v, err := strconv.ParseInt(b.String(), 0, 0) 258 if err != nil { 259 x.Error(fmt.Sprintf("expected number: %s", err)) 260 return lexEOF 261 } 262 263 yylval.token = &parserToken{Value: int(v)} 264 return INTEGER 265 } 266 267 // If we did see a period, it is a float 268 f, err := strconv.ParseFloat(b.String(), 64) 269 if err != nil { 270 x.Error(fmt.Sprintf("expected float: %s", err)) 271 return lexEOF 272 } 273 274 yylval.token = &parserToken{Value: f} 275 return FLOAT 276 } 277 278 func (x *parserLex) lexString(yylval *parserSymType, quoted bool) (int, bool) { 279 var b bytes.Buffer 280 terminated := false 281 for { 282 c := x.next() 283 if c == lexEOF { 284 if quoted { 285 x.Error("unterminated string") 286 } 287 288 break 289 } 290 291 // Behavior is a bit different if we're lexing within a quoted string. 292 if quoted { 293 // If its a double quote, we've reached the end of the string 294 if c == '"' { 295 terminated = true 296 break 297 } 298 299 // Let's check to see if we're escaping anything. 300 if c == '\\' { 301 switch n := x.next(); n { 302 case '\\': 303 fallthrough 304 case '"': 305 c = n 306 case 'n': 307 c = '\n' 308 default: 309 x.backup() 310 } 311 } 312 } 313 314 // If we hit a dollar sign, then check if we're starting 315 // another interpolation. If so, then we're done. 316 if c == '$' { 317 n := x.peek() 318 319 // If it is '{', then we're starting another interpolation 320 if n == '{' { 321 x.backup() 322 break 323 } 324 325 // If it is '$', then we're escaping a dollar sign 326 if n == '$' { 327 x.next() 328 } 329 } 330 331 if _, err := b.WriteRune(c); err != nil { 332 x.Error(err.Error()) 333 return lexEOF, false 334 } 335 } 336 337 yylval.token = &parserToken{Value: b.String()} 338 return STRING, terminated 339 } 340 341 // Return the next rune for the lexer. 342 func (x *parserLex) next() rune { 343 if int(x.pos) >= len(x.Input) { 344 x.width = 0 345 return lexEOF 346 } 347 348 r, w := utf8.DecodeRuneInString(x.Input[x.pos:]) 349 x.width = w 350 x.pos += x.width 351 352 if x.line == 0 { 353 x.line = 1 354 x.col = 1 355 } else { 356 x.col += 1 357 } 358 359 if r == '\n' { 360 x.lastLine = x.col 361 x.line += 1 362 x.col = 1 363 } 364 365 if x.astPos == nil { 366 x.astPos = &ast.Pos{Column: x.col, Line: x.line} 367 } 368 369 return r 370 } 371 372 // peek returns but does not consume the next rune in the input 373 func (x *parserLex) peek() rune { 374 r := x.next() 375 x.backup() 376 return r 377 } 378 379 // backup steps back one rune. Can only be called once per next. 380 func (x *parserLex) backup() { 381 x.pos -= x.width 382 x.col -= 1 383 384 // If we are at column 0, we're backing up across a line boundary 385 // so we need to be careful to get the proper value. 386 if x.col == 0 { 387 x.col = x.lastLine 388 x.line -= 1 389 } 390 } 391 392 // The parser calls this method on a parse error. 393 func (x *parserLex) Error(s string) { 394 x.Err = fmt.Errorf("parse error: %s", s) 395 }