github.com/tiagovtristao/plz@v13.4.0+incompatible/src/parse/asp/grammar_parse.go (about) 1 package asp 2 3 import ( 4 "io" 5 "reflect" 6 "strconv" 7 "strings" 8 ) 9 10 // keywords are the list of reserved keywords in the language. They can't be assigned to. 11 // Not all of these have meaning in the build language (and many never will), but they are 12 // reserved in Python and for practical reasons it is useful to remain a subset of Python. 13 var keywords = map[string]struct{}{ 14 "False": {}, 15 "None": {}, 16 "True": {}, 17 "and": {}, 18 "as": {}, 19 "assert": {}, 20 "break": {}, 21 "class": {}, 22 "continue": {}, 23 "def": {}, 24 "del": {}, 25 "elif": {}, 26 "else": {}, 27 "except": {}, 28 "finally": {}, 29 "for": {}, 30 "from": {}, 31 "global": {}, 32 "if": {}, 33 "import": {}, 34 "in": {}, 35 "is": {}, 36 "lambda": {}, 37 "nonlocal": {}, 38 "not": {}, 39 "or": {}, 40 "pass": {}, 41 "raise": {}, 42 "return": {}, 43 "try": {}, 44 "while": {}, 45 "with": {}, 46 "yield": {}, 47 } 48 49 type parser struct { 50 l *lex 51 endPos Position 52 } 53 54 // parseFileInput is the only external entry point to this class, it parses a file into a FileInput structure. 55 func parseFileInput(r io.Reader) (input *FileInput, err error) { 56 // The rest of the parser functions signal unhappiness by panicking, we 57 // recover any such failures here and convert to an error. 58 defer func() { 59 if r := recover(); r != nil { 60 err = r.(error) 61 } 62 }() 63 64 p := &parser{l: newLexer(r)} 65 input = &FileInput{} 66 for tok := p.l.Peek(); tok.Type != EOF; tok = p.l.Peek() { 67 input.Statements = append(input.Statements, p.parseStatement()) 68 } 69 return input, nil 70 } 71 72 func (p *parser) assert(condition bool, pos Token, message string, args ...interface{}) { 73 if !condition { 74 p.fail(pos, message, args...) 75 } 76 } 77 78 func (p *parser) assertTokenType(tok Token, expectedType rune) { 79 if tok.Type != expectedType { 80 p.fail(tok, "unexpected token %s, expected %s", tok, reverseSymbol(expectedType)) 81 } 82 } 83 84 func (p *parser) next(expectedType rune) Token { 85 tok := p.l.Next() 86 p.assertTokenType(tok, expectedType) 87 return tok 88 } 89 90 func (p *parser) nextv(expectedValue string) Token { 91 tok := p.l.Next() 92 if tok.Value != expectedValue { 93 p.fail(tok, "unexpected token %s, expected %s", tok, expectedValue) 94 } 95 return tok 96 } 97 98 func (p *parser) optional(option rune) bool { 99 if tok := p.l.Peek(); tok.Type == option { 100 p.l.Next() 101 return true 102 } 103 return false 104 } 105 106 func (p *parser) optionalv(option string) bool { 107 if tok := p.l.Peek(); tok.Value == option { 108 p.l.Next() 109 return true 110 } 111 return false 112 } 113 114 func (p *parser) anythingBut(r rune) bool { 115 return p.l.Peek().Type != r 116 } 117 118 func (p *parser) oneof(expectedTypes ...rune) Token { 119 tok := p.l.Next() 120 for _, t := range expectedTypes { 121 if tok.Type == t { 122 return tok 123 } 124 } 125 p.fail(tok, "unexpected token %s, expected one of %s", tok.Value, strings.Join(reverseSymbols(expectedTypes), " ")) 126 return Token{} 127 } 128 129 func (p *parser) oneofval(expectedValues ...string) Token { 130 tok := p.l.Next() 131 for _, v := range expectedValues { 132 if tok.Value == v { 133 return tok 134 } 135 } 136 p.fail(tok, "unexpected token %s, expected one of %s", tok.Value, strings.Join(expectedValues, ", ")) 137 return Token{} 138 } 139 140 func (p *parser) fail(pos Token, message string, args ...interface{}) { 141 fail(pos.Pos, message, args...) 142 } 143 144 func (p *parser) parseStatement() *Statement { 145 s := &Statement{} 146 tok := p.l.Peek() 147 s.Pos = tok.Pos 148 149 switch tok.Value { 150 case "pass": 151 s.Pass = true 152 p.endPos = p.l.Next().EndPos() 153 p.next(EOL) 154 case "continue": 155 s.Continue = true 156 p.endPos = p.l.Next().EndPos() 157 p.next(EOL) 158 case "def": 159 s.FuncDef = p.parseFuncDef() 160 case "for": 161 s.For = p.parseFor() 162 case "if": 163 s.If = p.parseIf() 164 case "return": 165 p.endPos = p.l.Next().EndPos() 166 s.Return = p.parseReturn() 167 case "raise": 168 p.l.Next() 169 s.Raise = p.parseExpression() 170 p.next(EOL) 171 case "assert": 172 p.initField(&s.Assert) 173 p.l.Next() 174 s.Assert.Expr = p.parseExpression() 175 if p.optional(',') { 176 tok := p.next(String) 177 s.Assert.Message = tok.Value 178 p.endPos = tok.EndPos() 179 } 180 p.next(EOL) 181 default: 182 if tok.Type == Ident { 183 s.Ident = p.parseIdentStatement() 184 } else { 185 s.Literal = p.parseExpression() 186 } 187 p.next(EOL) 188 } 189 s.EndPos = p.endPos 190 return s 191 } 192 193 func (p *parser) parseStatements() []*Statement { 194 stmts := []*Statement{} 195 for p.anythingBut(Unindent) { 196 stmts = append(stmts, p.parseStatement()) 197 } 198 p.next(Unindent) 199 return stmts 200 } 201 202 func (p *parser) parseReturn() *ReturnStatement { 203 r := &ReturnStatement{} 204 for p.anythingBut(EOL) { 205 r.Values = append(r.Values, p.parseExpression()) 206 if !p.optional(',') { 207 break 208 } 209 } 210 p.next(EOL) 211 return r 212 } 213 214 func (p *parser) parseFuncDef() *FuncDef { 215 p.nextv("def") 216 fd := &FuncDef{ 217 Name: p.next(Ident).Value, 218 } 219 if strings.HasPrefix(fd.Name, "_") { 220 fd.IsPrivate = true 221 } 222 p.next('(') 223 for p.anythingBut(')') { 224 fd.Arguments = append(fd.Arguments, p.parseArgument()) 225 if !p.optional(',') { 226 break 227 } 228 } 229 p.next(')') 230 231 if tok := p.l.Peek(); tok.Value == "-" { 232 p.next('-') 233 p.next('>') 234 235 tok := p.oneofval("bool", "str", "int", "list", "dict", "function", "config") 236 fd.Return = tok.Value 237 } 238 239 // Get the position for the end of function defition header 240 fd.EoDef = p.next(':').Pos 241 242 p.next(EOL) 243 if tok := p.l.Peek(); tok.Type == String { 244 fd.Docstring = tok.Value 245 // endPos being set here, this is for when function only contains docstring 246 p.endPos = p.l.Next().EndPos() 247 p.next(EOL) 248 } 249 250 fd.Statements = p.parseStatements() 251 252 return fd 253 } 254 255 func (p *parser) parseArgument() Argument { 256 a := Argument{ 257 Name: p.next(Ident).Value, 258 } 259 // indicate an argument is private if it is prefixed with "_" 260 if strings.HasPrefix(a.Name, "_") { 261 a.IsPrivate = true 262 } 263 if tok := p.l.Peek(); tok.Type == ',' || tok.Type == ')' { 264 return a 265 } 266 tok := p.oneof(':', '&', '=') 267 if tok.Type == ':' { 268 // Type annotations 269 for { 270 tok = p.oneofval("bool", "str", "int", "list", "dict", "function", "config") 271 a.Type = append(a.Type, tok.Value) 272 if !p.optional('|') { 273 break 274 } 275 } 276 if tok := p.l.Peek(); tok.Type == ',' || tok.Type == ')' { 277 return a 278 } 279 tok = p.oneof('&', '=') 280 } 281 if tok.Type == '&' { 282 // Argument aliases 283 for { 284 tok = p.next(Ident) 285 a.Aliases = append(a.Aliases, tok.Value) 286 if !p.optional('&') { 287 break 288 } 289 } 290 if tok := p.l.Peek(); tok.Type == ',' || tok.Type == ')' { 291 return a 292 } 293 tok = p.next('=') 294 } 295 // Default value 296 a.Value = p.parseExpression() 297 return a 298 } 299 300 func (p *parser) parseIf() *IfStatement { 301 p.nextv("if") 302 i := &IfStatement{} 303 p.parseExpressionInPlace(&i.Condition) 304 p.next(':') 305 p.next(EOL) 306 i.Statements = p.parseStatements() 307 308 for p.optionalv("elif") { 309 elif := &i.Elif[p.newElement(&i.Elif)] 310 p.parseExpressionInPlace(&elif.Condition) 311 p.next(':') 312 p.next(EOL) 313 elif.Statements = p.parseStatements() 314 } 315 if p.optionalv("else") { 316 p.next(':') 317 p.next(EOL) 318 i.ElseStatements = p.parseStatements() 319 } 320 321 return i 322 } 323 324 // newElement is a nasty little hack to allow extending slices of types that we can't readily name. 325 // This is added in preference to having to break everything out to separately named types. 326 func (p *parser) newElement(x interface{}) int { 327 v := reflect.ValueOf(x).Elem() 328 v.Set(reflect.Append(v, reflect.Zero(v.Type().Elem()))) 329 return v.Len() - 1 330 } 331 332 // initField is a similar little hack for initialising non-slice fields. 333 func (p *parser) initField(x interface{}) { 334 v := reflect.ValueOf(x).Elem() 335 v.Set(reflect.New(v.Type().Elem())) 336 } 337 338 func (p *parser) parseFor() *ForStatement { 339 f := &ForStatement{} 340 p.nextv("for") 341 f.Names = p.parseIdentList() 342 p.nextv("in") 343 p.parseExpressionInPlace(&f.Expr) 344 p.next(':') 345 p.next(EOL) 346 f.Statements = p.parseStatements() 347 348 return f 349 } 350 351 // TODO: could ret last token here 352 func (p *parser) parseIdentList() []string { 353 ret := []string{p.next(Ident).Value} // First one is compulsory 354 for tok := p.l.Peek(); tok.Type == ','; tok = p.l.Peek() { 355 p.l.Next() 356 ret = append(ret, p.next(Ident).Value) 357 } 358 return ret 359 } 360 361 func (p *parser) parseExpression() *Expression { 362 e := p.parseUnconditionalExpression() 363 p.parseInlineIf(e) 364 e.EndPos = p.endPos 365 return e 366 } 367 368 func (p *parser) parseExpressionInPlace(e *Expression) { 369 e.Pos = p.l.Peek().Pos 370 p.parseUnconditionalExpressionInPlace(e) 371 p.parseInlineIf(e) 372 e.EndPos = p.endPos 373 } 374 375 func (p *parser) parseInlineIf(e *Expression) { 376 if p.optionalv("if") { 377 e.If = &InlineIf{Condition: p.parseExpression()} 378 p.nextv("else") 379 e.If.Else = p.parseExpression() 380 } 381 } 382 383 func (p *parser) parseUnconditionalExpression() *Expression { 384 e := &Expression{Pos: p.l.Peek().Pos} 385 p.parseUnconditionalExpressionInPlace(e) 386 return e 387 } 388 389 func (p *parser) parseUnconditionalExpressionInPlace(e *Expression) { 390 if tok := p.l.Peek(); tok.Type == '-' || tok.Value == "not" { 391 p.l.Next() 392 var valueExp *ValueExpression 393 valueExp = p.parseValueExpression() 394 e.UnaryOp = &UnaryOp{ 395 Op: tok.Value, 396 Expr: *valueExp, 397 } 398 } else { 399 e.Val = p.parseValueExpression() 400 } 401 tok := p.l.Peek() 402 if tok.Value == "not" { 403 // Hack for "not in" which needs an extra token. 404 p.l.Next() 405 tok = p.l.Peek() 406 p.assert(tok.Value == "in", tok, "expected 'in', not %s", tok.Value) 407 tok.Value = "not in" 408 p.endPos = tok.EndPos() 409 } 410 if op, present := operators[tok.Value]; present { 411 tok = p.l.Next() 412 o := &e.Op[p.newElement(&e.Op)] 413 o.Op = op 414 o.Expr = p.parseUnconditionalExpression() 415 if len(o.Expr.Op) > 0 { 416 if op := o.Expr.Op[0].Op; op == And || op == Or || op == Is { 417 // Hoist logical operator back up here to fix precedence. This is a bit of a hack and 418 // might not be perfect in all cases... 419 e.Op = append(e.Op, o.Expr.Op...) 420 o.Expr.Op = nil 421 } 422 } 423 p.l.Peek() 424 } 425 } 426 427 func (p *parser) parseValueExpression() *ValueExpression { 428 ve := &ValueExpression{} 429 tok := p.l.Peek() 430 431 if tok.Type == String { 432 if tok.Value[0] == 'f' { 433 ve.FString = p.parseFString() 434 } else { 435 ve.String = tok.Value 436 p.endPos = p.l.Next().EndPos() 437 } 438 } else if tok.Type == Int { 439 p.assert(len(tok.Value) < 19, tok, "int literal is too large: %s", tok) 440 p.initField(&ve.Int) 441 i, err := strconv.Atoi(tok.Value) 442 p.assert(err == nil, tok, "invalid int value %s", tok) // Theoretically the lexer shouldn't have fed us this... 443 ve.Int.Int = i 444 p.endPos = p.l.Next().EndPos() 445 } else if tok.Value == "False" || tok.Value == "True" || tok.Value == "None" { 446 ve.Bool = tok.Value 447 p.endPos = p.l.Next().EndPos() 448 } else if tok.Type == '[' { 449 ve.List = p.parseList('[', ']') 450 } else if tok.Type == '(' { 451 ve.Tuple = p.parseList('(', ')') 452 } else if tok.Type == '{' { 453 ve.Dict = p.parseDict() 454 } else if tok.Value == "lambda" { 455 ve.Lambda = p.parseLambda() 456 } else if tok.Type == Ident { 457 ve.Ident = p.parseIdentExpr() 458 p.endPos = ve.Ident.EndPos 459 } else { 460 p.fail(tok, "Unexpected token %s", tok) 461 } 462 463 tok = p.l.Peek() 464 if tok.Type == '[' { 465 ve.Slice = p.parseSlice() 466 tok = p.l.Peek() 467 } 468 if p.optional('.') { 469 ve.Property = p.parseIdentExpr() 470 p.endPos = ve.Property.EndPos 471 } else if p.optional('(') { 472 ve.Call = p.parseCall() 473 } 474 return ve 475 } 476 477 func (p *parser) parseIdentStatement() *IdentStatement { 478 tok := p.l.Peek() 479 i := &IdentStatement{ 480 Name: p.next(Ident).Value, 481 } 482 _, reserved := keywords[i.Name] 483 p.assert(!reserved, tok, "Cannot operate on keyword or constant %s", i.Name) 484 tok = p.l.Next() 485 switch tok.Type { 486 case ',': 487 p.initField(&i.Unpack) 488 i.Unpack.Names = p.parseIdentList() 489 p.next('=') 490 i.Unpack.Expr = p.parseExpression() 491 case '[': 492 p.initField(&i.Index) 493 i.Index.Expr = p.parseExpression() 494 p.endPos = p.next(']').EndPos() 495 if tok := p.oneofval("=", "+="); tok.Type == '=' { 496 i.Index.Assign = p.parseExpression() 497 } else { 498 i.Index.AugAssign = p.parseExpression() 499 } 500 case '.': 501 p.initField(&i.Action) 502 i.Action.Property = p.parseIdentExpr() 503 p.endPos = i.Action.Property.EndPos 504 case '(': 505 p.initField(&i.Action) 506 i.Action.Call = p.parseCall() 507 case '=': 508 p.initField(&i.Action) 509 i.Action.Assign = p.parseExpression() 510 default: 511 p.assert(tok.Value == "+=", tok, "Unexpected token %s, expected one of , [ . ( = +=", tok) 512 p.initField(&i.Action) 513 i.Action.AugAssign = p.parseExpression() 514 } 515 return i 516 } 517 518 func (p *parser) parseIdentExpr() *IdentExpr { 519 //var endPos Position 520 identTok := p.next(Ident) 521 ie := &IdentExpr{ 522 Name: identTok.Value, 523 Pos: identTok.Pos, 524 } 525 for tok := p.l.Peek(); tok.Type == '.' || tok.Type == '('; tok = p.l.Peek() { 526 tok := p.l.Next() 527 action := &ie.Action[p.newElement(&ie.Action)] 528 if tok.Type == '.' { 529 action.Property = p.parseIdentExpr() 530 ie.EndPos = action.Property.EndPos 531 } else { 532 action.Call = p.parseCall() 533 ie.EndPos = p.endPos 534 } 535 } 536 537 // In case the Ident is a variable name, we assign the endPos to the end of current token. 538 // see test_data/unary_op.build 539 if ie.EndPos.Column == 0 { 540 ie.EndPos = identTok.EndPos() 541 } 542 return ie 543 } 544 545 func (p *parser) parseCall() *Call { 546 // The leading ( has already been consumed (because that fits better at the various call sites) 547 c := &Call{} 548 names := map[string]bool{} 549 for tok := p.l.Peek(); tok.Type != ')'; tok = p.l.Peek() { 550 arg := CallArgument{} 551 if tok.Type == Ident && p.l.AssignFollows() { 552 // Named argument. 553 arg.Pos = tok.Pos 554 arg.Name = tok.Value 555 p.next(Ident) 556 p.next('=') 557 p.assert(!names[arg.Name], tok, "Repeated argument %s", arg.Name) 558 names[arg.Name] = true 559 } 560 p.parseExpressionInPlace(&arg.Value) 561 c.Arguments = append(c.Arguments, arg) 562 if !p.optional(',') { 563 break 564 } 565 } 566 p.endPos = p.next(')').EndPos() 567 return c 568 } 569 570 func (p *parser) parseList(opening, closing rune) *List { 571 l := &List{} 572 p.next(opening) 573 for tok := p.l.Peek(); tok.Type != closing; tok = p.l.Peek() { 574 l.Values = append(l.Values, p.parseExpression()) 575 if !p.optional(',') { 576 break 577 } 578 } 579 if tok := p.l.Peek(); tok.Value == "for" { 580 p.assert(len(l.Values) == 1, tok, "Must have exactly 1 item in a list comprehension") 581 l.Comprehension = p.parseComprehension() 582 } 583 p.endPos = p.next(closing).EndPos() 584 return l 585 } 586 587 func (p *parser) parseDict() *Dict { 588 d := &Dict{} 589 p.next('{') 590 for tok := p.l.Peek(); tok.Type != '}'; tok = p.l.Peek() { 591 di := &DictItem{} 592 p.parseExpressionInPlace(&di.Key) 593 p.next(':') 594 p.parseExpressionInPlace(&di.Value) 595 d.Items = append(d.Items, di) 596 if !p.optional(',') { 597 break 598 } 599 } 600 if tok := p.l.Peek(); tok.Value == "for" { 601 p.assert(len(d.Items) == 1, tok, "Must have exactly 1 key:value pair in a dict comprehension") 602 d.Comprehension = p.parseComprehension() 603 } 604 p.endPos = p.next('}').EndPos() 605 return d 606 } 607 608 func (p *parser) parseSlice() *Slice { 609 s := &Slice{} 610 p.next('[') 611 if p.optional(':') { 612 s.Colon = ":" 613 } else if !p.optional(':') { 614 s.Start = p.parseExpression() 615 if p.optional(':') { 616 s.Colon = ":" 617 } 618 } 619 if nextType := p.l.Peek().Type; nextType == ']' { 620 p.endPos = p.l.Next().EndPos() 621 return s 622 } 623 s.End = p.parseExpression() 624 p.endPos = p.next(']').EndPos() 625 return s 626 } 627 628 func (p *parser) parseComprehension() *Comprehension { 629 c := &Comprehension{} 630 p.nextv("for") 631 c.Names = p.parseIdentList() 632 p.nextv("in") 633 c.Expr = p.parseUnconditionalExpression() 634 if p.optionalv("for") { 635 p.initField(&c.Second) 636 c.Second.Names = p.parseIdentList() 637 p.nextv("in") 638 c.Second.Expr = p.parseUnconditionalExpression() 639 } 640 if p.optionalv("if") { 641 c.If = p.parseUnconditionalExpression() 642 } 643 return c 644 } 645 646 func (p *parser) parseLambda() *Lambda { 647 l := &Lambda{} 648 p.nextv("lambda") 649 for tok := p.l.Peek(); tok.Type == Ident; tok = p.l.Peek() { 650 p.l.Next() 651 arg := Argument{Name: tok.Value} 652 if p.optional('=') { 653 arg.Value = p.parseExpression() 654 } 655 l.Arguments = append(l.Arguments, arg) 656 if !p.optional(',') { 657 break 658 } 659 } 660 p.next(':') 661 p.parseExpressionInPlace(&l.Expr) 662 return l 663 } 664 665 func (p *parser) parseFString() *FString { 666 f := &FString{} 667 tok := p.next(String) 668 s := tok.Value[2 : len(tok.Value)-1] // Strip preceding f" and trailing " 669 p.endPos = tok.EndPos() 670 tok.Pos.Column++ // track position in case of error 671 for idx := strings.IndexByte(s, '{'); idx != -1; idx = strings.IndexByte(s, '{') { 672 v := &f.Vars[p.newElement(&f.Vars)] 673 v.Prefix = s[:idx] 674 s = s[idx+1:] 675 tok.Pos.Column += idx + 1 676 idx = strings.IndexByte(s, '}') 677 p.assert(idx != -1, tok, "Unterminated brace in fstring") 678 if varname := s[:idx]; strings.HasPrefix(varname, "CONFIG.") { 679 v.Config = strings.TrimPrefix(varname, "CONFIG.") 680 } else { 681 v.Var = varname 682 } 683 s = s[idx+1:] 684 tok.Pos.Column += idx + 1 685 } 686 f.Suffix = s 687 688 return f 689 }