github.com/arvindram03/terraform@v0.3.7-0.20150212015210-408f838db36d/config/lang/lex.go (about) 1 package lang 2 3 import ( 4 "bytes" 5 "fmt" 6 "strconv" 7 "unicode" 8 "unicode/utf8" 9 10 "github.com/hashicorp/terraform/config/lang/ast" 11 ) 12 13 //go:generate go tool yacc -p parser lang.y 14 15 // The parser expects the lexer to return 0 on EOF. 16 const lexEOF = 0 17 18 // The parser uses the type <prefix>Lex as a lexer. It must provide 19 // the methods Lex(*<prefix>SymType) int and Error(string). 20 type parserLex struct { 21 Err error 22 Input string 23 24 mode parserMode 25 interpolationDepth int 26 pos int 27 width int 28 col, line int 29 lastLine int 30 astPos *ast.Pos 31 } 32 33 // parserToken is the token yielded to the parser. The value can be 34 // determined within the parser type based on the enum value returned 35 // from Lex. 36 type parserToken struct { 37 Value interface{} 38 Pos ast.Pos 39 } 40 41 // parserMode keeps track of what mode we're in for the parser. We have 42 // two modes: literal and interpolation. Literal mode is when strings 43 // don't have to be quoted, and interpolations are defined as ${foo}. 44 // Interpolation mode means that strings have to be quoted and unquoted 45 // things are identifiers, such as foo("bar"). 46 type parserMode uint8 47 48 const ( 49 parserModeInvalid parserMode = 0 50 parserModeLiteral = 1 << iota 51 parserModeInterpolation 52 ) 53 54 // The parser calls this method to get each new token. 55 func (x *parserLex) Lex(yylval *parserSymType) int { 56 // We always start in literal mode, since programs don't start 57 // in an interpolation. ex. "foo ${bar}" vs "bar" (and assuming interp.) 58 if x.mode == parserModeInvalid { 59 x.mode = parserModeLiteral 60 } 61 62 // Defer an update to set the proper column/line we read the next token. 63 defer func() { 64 if yylval.token != nil && yylval.token.Pos.Column == 0 { 65 yylval.token.Pos = *x.astPos 66 } 67 }() 68 69 x.astPos = nil 70 return x.lex(yylval) 71 } 72 73 func (x *parserLex) lex(yylval *parserSymType) int { 74 switch x.mode { 75 case parserModeLiteral: 76 return x.lexModeLiteral(yylval) 77 case parserModeInterpolation: 78 return x.lexModeInterpolation(yylval) 79 default: 80 x.Error(fmt.Sprintf("Unknown parse mode: %d", x.mode)) 81 return lexEOF 82 } 83 } 84 85 func (x *parserLex) lexModeLiteral(yylval *parserSymType) int { 86 for { 87 c := x.next() 88 if c == lexEOF { 89 return lexEOF 90 } 91 92 // Are we starting an interpolation? 93 if c == '$' && x.peek() == '{' { 94 x.next() 95 x.interpolationDepth++ 96 x.mode = parserModeInterpolation 97 return PROGRAM_BRACKET_LEFT 98 } 99 100 // We're just a normal string that isn't part of any interpolation yet. 101 x.backup() 102 result, terminated := x.lexString(yylval, x.interpolationDepth > 0) 103 104 // If the string terminated and we're within an interpolation already 105 // then that means that we finished a nested string, so pop 106 // back out to interpolation mode. 107 if terminated && x.interpolationDepth > 0 { 108 x.mode = parserModeInterpolation 109 110 // If the string is empty, just skip it. We're still in 111 // an interpolation so we do this to avoid empty nodes. 112 if yylval.token.Value.(string) == "" { 113 return x.lex(yylval) 114 } 115 } 116 117 return result 118 } 119 } 120 121 func (x *parserLex) lexModeInterpolation(yylval *parserSymType) int { 122 for { 123 c := x.next() 124 if c == lexEOF { 125 return lexEOF 126 } 127 128 // Ignore all whitespace 129 if unicode.IsSpace(c) { 130 continue 131 } 132 133 // If we see a double quote then we're lexing a string since 134 // we're in interpolation mode. 135 if c == '"' { 136 result, terminated := x.lexString(yylval, true) 137 if !terminated { 138 // The string didn't end, which means that we're in the 139 // middle of starting another interpolation. 140 x.mode = parserModeLiteral 141 142 // If the string is empty and we're starting an interpolation, 143 // then just skip it to avoid empty string AST nodes 144 if yylval.token.Value.(string) == "" { 145 return x.lex(yylval) 146 } 147 } 148 149 return result 150 } 151 152 // If we are seeing a number, it is the start of a number. Lex it. 153 if c >= '0' && c <= '9' { 154 x.backup() 155 return x.lexNumber(yylval) 156 } 157 158 switch c { 159 case '}': 160 // '}' means we ended the interpolation. Pop back into 161 // literal mode and reduce our interpolation depth. 162 x.interpolationDepth-- 163 x.mode = parserModeLiteral 164 return PROGRAM_BRACKET_RIGHT 165 case '(': 166 return PAREN_LEFT 167 case ')': 168 return PAREN_RIGHT 169 case ',': 170 return COMMA 171 default: 172 x.backup() 173 return x.lexId(yylval) 174 } 175 } 176 } 177 178 func (x *parserLex) lexId(yylval *parserSymType) int { 179 var b bytes.Buffer 180 for { 181 c := x.next() 182 if c == lexEOF { 183 break 184 } 185 186 // If this isn't a character we want in an ID, return out. 187 // One day we should make this a regexp. 188 if c != '_' && 189 c != '-' && 190 c != '.' && 191 c != '*' && 192 !unicode.IsLetter(c) && 193 !unicode.IsNumber(c) { 194 x.backup() 195 break 196 } 197 198 if _, err := b.WriteRune(c); err != nil { 199 x.Error(err.Error()) 200 return lexEOF 201 } 202 } 203 204 yylval.token = &parserToken{Value: b.String()} 205 return IDENTIFIER 206 } 207 208 // lexNumber lexes out a number: an integer or a float. 209 func (x *parserLex) lexNumber(yylval *parserSymType) int { 210 var b bytes.Buffer 211 gotPeriod := false 212 for { 213 c := x.next() 214 if c == lexEOF { 215 break 216 } 217 218 // If we see a period, we might be getting a float.. 219 if c == '.' { 220 // If we've already seen a period, then ignore it, and 221 // exit. This will probably result in a syntax error later. 222 if gotPeriod { 223 x.backup() 224 break 225 } 226 227 gotPeriod = true 228 } else if c < '0' || c > '9' { 229 // If we're not seeing a number, then also exit. 230 x.backup() 231 break 232 } 233 234 if _, err := b.WriteRune(c); err != nil { 235 x.Error(fmt.Sprintf("internal error: %s", err)) 236 return lexEOF 237 } 238 } 239 240 // If we didn't see a period, it is an int 241 if !gotPeriod { 242 v, err := strconv.ParseInt(b.String(), 0, 0) 243 if err != nil { 244 x.Error(fmt.Sprintf("expected number: %s", err)) 245 return lexEOF 246 } 247 248 yylval.token = &parserToken{Value: int(v)} 249 return INTEGER 250 } 251 252 // If we did see a period, it is a float 253 f, err := strconv.ParseFloat(b.String(), 64) 254 if err != nil { 255 x.Error(fmt.Sprintf("expected float: %s", err)) 256 return lexEOF 257 } 258 259 yylval.token = &parserToken{Value: f} 260 return FLOAT 261 } 262 263 func (x *parserLex) lexString(yylval *parserSymType, quoted bool) (int, bool) { 264 var b bytes.Buffer 265 terminated := false 266 for { 267 c := x.next() 268 if c == lexEOF { 269 if quoted { 270 x.Error("unterminated string") 271 } 272 273 break 274 } 275 276 // Behavior is a bit different if we're lexing within a quoted string. 277 if quoted { 278 // If its a double quote, we've reached the end of the string 279 if c == '"' { 280 terminated = true 281 break 282 } 283 284 // Let's check to see if we're escaping anything. 285 if c == '\\' { 286 switch n := x.next(); n { 287 case '\\': 288 fallthrough 289 case '"': 290 c = n 291 case 'n': 292 c = '\n' 293 default: 294 x.backup() 295 } 296 } 297 } 298 299 // If we hit a dollar sign, then check if we're starting 300 // another interpolation. If so, then we're done. 301 if c == '$' { 302 n := x.peek() 303 304 // If it is '{', then we're starting another interpolation 305 if n == '{' { 306 x.backup() 307 break 308 } 309 310 // If it is '$', then we're escaping a dollar sign 311 if n == '$' { 312 x.next() 313 } 314 } 315 316 if _, err := b.WriteRune(c); err != nil { 317 x.Error(err.Error()) 318 return lexEOF, false 319 } 320 } 321 322 yylval.token = &parserToken{Value: b.String()} 323 return STRING, terminated 324 } 325 326 // Return the next rune for the lexer. 327 func (x *parserLex) next() rune { 328 if int(x.pos) >= len(x.Input) { 329 x.width = 0 330 return lexEOF 331 } 332 333 r, w := utf8.DecodeRuneInString(x.Input[x.pos:]) 334 x.width = w 335 x.pos += x.width 336 337 if x.line == 0 { 338 x.line = 1 339 x.col = 1 340 } else { 341 x.col += 1 342 } 343 344 if r == '\n' { 345 x.lastLine = x.col 346 x.line += 1 347 x.col = 1 348 } 349 350 if x.astPos == nil { 351 x.astPos = &ast.Pos{Column: x.col, Line: x.line} 352 } 353 354 return r 355 } 356 357 // peek returns but does not consume the next rune in the input 358 func (x *parserLex) peek() rune { 359 r := x.next() 360 x.backup() 361 return r 362 } 363 364 // backup steps back one rune. Can only be called once per next. 365 func (x *parserLex) backup() { 366 x.pos -= x.width 367 x.col -= 1 368 369 // If we are at column 0, we're backing up across a line boundary 370 // so we need to be careful to get the proper value. 371 if x.col == 0 { 372 x.col = x.lastLine 373 x.line -= 1 374 } 375 } 376 377 // The parser calls this method on a parse error. 378 func (x *parserLex) Error(s string) { 379 x.Err = fmt.Errorf("parse error: %s", s) 380 }