github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/syntax/parse.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 // This file defines a recursive-descent parser for exprcore. 8 // The LL(1) grammar of exprcore and the names of many productions follow Python 2.7. 9 // 10 // TODO(adonovan): use syntax.Error more systematically throughout the 11 // package. Verify that error positions are correct using the 12 // chunkedfile mechanism. 13 14 import ( 15 "log" 16 "strings" 17 "unicode" 18 ) 19 20 // Enable this flag to print the token stream and log.Fatal on the first error. 21 const debug = false 22 23 // A Mode value is a set of flags (or 0) that controls optional parser functionality. 24 type Mode uint 25 26 const ( 27 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments 28 ) 29 30 // Parse parses the input data and returns the corresponding parse tree. 31 // 32 // If src != nil, ParseFile parses the source from src and the filename 33 // is only used when recording position information. 34 // The type of the argument for the src parameter must be string, 35 // []byte, or io.Reader. 36 // If src == nil, ParseFile parses the file specified by filename. 37 func Parse(filename string, src interface{}, mode Mode) (f *File, err error) { 38 in, err := newScanner(filename, src, mode&RetainComments != 0) 39 if err != nil { 40 return nil, err 41 } 42 p := parser{in: in} 43 defer p.in.recover(&err) 44 45 p.nextToken() // read first lookahead token 46 f = p.parseFile() 47 if f != nil { 48 f.Path = filename 49 } 50 p.assignComments(f) 51 return f, nil 52 } 53 54 // ParseCompoundStmt parses a single compound statement: 55 // a blank line, a def, for, while, or if statement, or a 56 // semicolon-separated list of simple statements followed 57 // by a newline. These are the units on which the REPL operates. 58 // ParseCompoundStmt does not consume any following input. 59 // The parser calls the readline function each 60 // time it needs a new line of input. 61 func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) { 62 in, err := newScanner(filename, readline, false) 63 if err != nil { 64 return nil, err 65 } 66 67 p := parser{in: in} 68 defer p.in.recover(&err) 69 70 p.nextToken() // read first lookahead token 71 72 var stmts []Stmt 73 switch p.tok { 74 case DEF, IF, FOR, WHILE: 75 stmts = p.parseStmt(stmts) 76 case NEWLINE: 77 // blank line 78 default: 79 stmts = append(stmts, p.parseStmt2()) 80 // stmts = p.parseSimpleStmt(stmts, false) 81 // Require but don't consume newline, to avoid blocking again. 82 // if p.tok != SEMI { 83 // p.in.errorf(p.in.pos, "invalid syntax") 84 // } 85 } 86 87 return &File{Path: filename, Stmts: stmts}, nil 88 } 89 90 // ParseExpr parses a exprcore expression. 91 // A comma-separated list of expressions is parsed as a tuple. 92 // See Parse for explanation of parameters. 93 func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) { 94 in, err := newScanner(filename, src, mode&RetainComments != 0) 95 if err != nil { 96 return nil, err 97 } 98 p := parser{in: in} 99 defer p.in.recover(&err) 100 101 p.nextToken() // read first lookahead token 102 103 // Use parseExpr, not parseTest, to permit an unparenthesized tuple. 104 expr = p.parseExpr(false) 105 106 // A following newline (e.g. "f()\n") appears outside any brackets, 107 // on a non-blank line, and thus results in a NEWLINE token. 108 if p.tok == SEMI { 109 p.nextToken() 110 } 111 112 if p.tok != EOF { 113 p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok) 114 } 115 p.assignComments(expr) 116 return expr, nil 117 } 118 119 type parser struct { 120 in *scanner 121 tok Token 122 tokval tokenValue 123 } 124 125 // nextToken advances the scanner and returns the position of the 126 // previous token. 127 func (p *parser) nextToken() Position { 128 oldpos := p.tokval.pos 129 p.tok = p.in.nextToken(&p.tokval) 130 // enable to see the token stream 131 if debug { 132 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos) 133 } 134 return oldpos 135 } 136 137 func (p *parser) expectSemi() { 138 if p.tok != RBRACE && p.tok != RPAREN { 139 switch p.tok { 140 case SEMI: 141 p.nextToken() 142 default: 143 p.in.errorf(p.in.pos, "got %#v, expected SEMI", p.tok) 144 } 145 } 146 } 147 148 // file_input = (NEWLINE | stmt)* EOF 149 func (p *parser) parseFile() *File { 150 var stmts []Stmt 151 for p.tok != EOF { 152 stmts = append(stmts, p.parseStmt2()) 153 } 154 return &File{Stmts: stmts} 155 } 156 157 func (p *parser) parseStmt(stmts []Stmt) []Stmt { 158 if p.tok == DEF { 159 return append(stmts, p.parseDefStmt()) 160 } else if p.tok == IF { 161 return append(stmts, p.parseIfStmt()) 162 } else if p.tok == FOR { 163 return append(stmts, p.parseForStmt()) 164 } else if p.tok == WHILE { 165 return append(stmts, p.parseWhileStmt()) 166 } 167 return p.parseSimpleStmt(stmts, false) 168 } 169 170 func (p *parser) parseStmt2() Stmt { 171 var stmt Stmt 172 173 switch p.tok { 174 case DEF: 175 stmt = p.parseDefStmt() 176 case IF: 177 stmt = p.parseIfStmt() 178 case FOR: 179 stmt = p.parseForStmt() 180 case WHILE: 181 stmt = p.parseWhileStmt() 182 case RETURN: 183 pos := p.nextToken() // consume RETURN 184 var result Expr 185 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { 186 result = p.parseExpr(false) 187 } 188 stmt = &ReturnStmt{Return: pos, Result: result} 189 190 case BREAK, CONTINUE, PASS: 191 tok := p.tok 192 pos := p.nextToken() // consume it 193 stmt = &BranchStmt{Token: tok, TokenPos: pos} 194 195 case LOAD: 196 stmt = p.parseLoadStmt() 197 198 case IMPORT: 199 stmt = p.parseImportStmt() 200 201 default: 202 // Assignment 203 x := p.parseExpr(false) 204 switch p.tok { 205 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ: 206 op := p.tok 207 pos := p.nextToken() // consume op 208 rhs := p.parseExpr(false) 209 stmt = &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} 210 211 default: 212 // Expression statement (e.g. function call, doc string). 213 stmt = &ExprStmt{X: x} 214 } 215 } 216 217 p.expectSemi() 218 219 return stmt 220 } 221 222 func (p *parser) parseShell() *ShellExpr { 223 shellpos := p.nextToken() // consume SHELL 224 val := p.tokval.string 225 226 raw := p.tokval.raw 227 str := &Literal{Token: STRING, TokenPos: shellpos, Raw: raw, Value: val} 228 229 return &ShellExpr{ 230 Shell: shellpos, 231 Content: []Expr{str}, 232 } 233 } 234 235 func (p *parser) parseDynamicShell() *ShellExpr { 236 shellpos := p.nextToken() // consume SHELL 237 238 var content []Expr 239 240 val := p.tokval.string 241 raw := p.tokval.raw 242 str := &Literal{Token: STRING, TokenPos: shellpos, Raw: raw, Value: val} 243 content = append(content, str) 244 245 for p.tok != DSHELL_END { 246 switch p.tok { 247 case DSHELL_PART: 248 val := p.tokval.string 249 raw := p.tokval.raw 250 str := &Literal{Token: STRING, TokenPos: shellpos, Raw: raw, Value: val} 251 content = append(content, str) 252 p.nextToken() 253 default: 254 content = append(content, p.parseExpr(false)) 255 } 256 } 257 258 p.consume(DSHELL_END) 259 260 return &ShellExpr{ 261 Shell: shellpos, 262 Content: content, 263 } 264 } 265 266 func (p *parser) parseDefStmt() *DefStmt { 267 defpos := p.nextToken() // consume DEF 268 id := p.parseIdent() 269 p.consume(LPAREN) 270 params := p.parseParams() 271 p.consume(RPAREN) 272 body := p.parseSuite() 273 return &DefStmt{ 274 Def: defpos, 275 Name: id, 276 Params: params, 277 Body: body, 278 } 279 } 280 281 func (p *parser) parseIfStmt() Stmt { 282 ec := exprContext{} 283 ifpos := p.nextToken() // consume IF 284 cond := p.parseTest(ec) 285 body := p.parseSuite() 286 ifStmt := &IfStmt{ 287 If: ifpos, 288 Cond: cond, 289 True: body, 290 } 291 tail := ifStmt 292 for p.tok == ELIF { 293 elifpos := p.nextToken() // consume ELIF 294 cond := p.parseTest(ec) 295 body := p.parseSuite() 296 elif := &IfStmt{ 297 If: elifpos, 298 Cond: cond, 299 True: body, 300 } 301 tail.ElsePos = elifpos 302 tail.False = []Stmt{elif} 303 tail = elif 304 } 305 if p.tok == ELSE { 306 tail.ElsePos = p.nextToken() // consume ELSE 307 tail.False = p.parseSuite() 308 } 309 return ifStmt 310 } 311 312 func (p *parser) parseForStmt() Stmt { 313 forpos := p.nextToken() // consume FOR 314 vars := p.parseForLoopVariables() 315 p.consume(IN) 316 x := p.parseExpr(false) 317 body := p.parseSuite() 318 return &ForStmt{ 319 For: forpos, 320 Vars: vars, 321 X: x, 322 Body: body, 323 } 324 } 325 326 func (p *parser) parseWhileStmt() Stmt { 327 ec := exprContext{} 328 whilepos := p.nextToken() // consume WHILE 329 cond := p.parseTest(ec) 330 body := p.parseSuite() 331 return &WhileStmt{ 332 While: whilepos, 333 Cond: cond, 334 Body: body, 335 } 336 } 337 338 // Equivalent to 'exprlist' production in Python grammar. 339 // 340 // loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA? 341 func (p *parser) parseForLoopVariables() Expr { 342 ec := exprContext{} 343 // Avoid parseExpr because it would consume the IN token 344 // following x in "for x in y: ...". 345 v := p.parsePrimaryWithSuffix(ec) 346 if p.tok != COMMA { 347 return v 348 } 349 350 list := []Expr{v} 351 for p.tok == COMMA { 352 p.nextToken() 353 if terminatesExprList(p.tok) { 354 break 355 } 356 list = append(list, p.parsePrimaryWithSuffix(ec)) 357 } 358 return &TupleExpr{List: list} 359 } 360 361 // simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE 362 // In REPL mode, it does not consume the NEWLINE. 363 func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt { 364 for { 365 stmts = append(stmts, p.parseSmallStmt()) 366 p.expectSemi() 367 if p.tok != SEMI { 368 break 369 } 370 p.nextToken() // consume SEMI 371 if p.tok == RBRACE || p.tok == NEWLINE || p.tok == EOF { 372 break 373 } 374 } 375 // EOF without NEWLINE occurs in `if x: pass`, for example. 376 if p.tok != EOF && consumeNL { 377 p.consume(NEWLINE) 378 } 379 380 return stmts 381 } 382 383 // small_stmt = RETURN expr? 384 // | PASS | BREAK | CONTINUE 385 // | LOAD ... 386 // | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign 387 // | expr 388 func (p *parser) parseSmallStmt() Stmt { 389 switch p.tok { 390 case RETURN: 391 pos := p.nextToken() // consume RETURN 392 var result Expr 393 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { 394 result = p.parseExpr(false) 395 } 396 return &ReturnStmt{Return: pos, Result: result} 397 398 case BREAK, CONTINUE, PASS: 399 tok := p.tok 400 pos := p.nextToken() // consume it 401 return &BranchStmt{Token: tok, TokenPos: pos} 402 403 case LOAD: 404 return p.parseLoadStmt() 405 } 406 407 // Assignment 408 x := p.parseExpr(false) 409 switch p.tok { 410 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ: 411 op := p.tok 412 pos := p.nextToken() // consume op 413 rhs := p.parseExpr(false) 414 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} 415 } 416 417 // Expression statement (e.g. function call, doc string). 418 return &ExprStmt{X: x} 419 } 420 421 // stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')' 422 func (p *parser) parseLoadStmt() *LoadStmt { 423 ec := exprContext{} 424 loadPos := p.nextToken() // consume LOAD 425 lparen := p.consume(LPAREN) 426 427 if p.tok != STRING { 428 p.in.errorf(p.in.pos, "first operand of load statement must be a string literal") 429 } 430 module := p.parsePrimary(ec).(*Literal) 431 432 var from, to []*Ident 433 for p.tok != RPAREN && p.tok != EOF { 434 p.consume(COMMA) 435 if p.tok == RPAREN { 436 break // allow trailing comma 437 } 438 switch p.tok { 439 case STRING: 440 // load("module", "id") 441 // To name is same as original. 442 lit := p.parsePrimary(ec).(*Literal) 443 id := &Ident{ 444 NamePos: lit.TokenPos.add(`"`), 445 Name: lit.Value.(string), 446 } 447 to = append(to, id) 448 from = append(from, id) 449 450 case IDENT: 451 // load("module", to="from") 452 id := p.parseIdent() 453 to = append(to, id) 454 if p.tok != EQ { 455 p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name) 456 } 457 p.consume(EQ) 458 if p.tok != STRING { 459 p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name) 460 } 461 lit := p.parsePrimary(ec).(*Literal) 462 from = append(from, &Ident{ 463 NamePos: lit.TokenPos.add(`"`), 464 Name: lit.Value.(string), 465 }) 466 467 case RPAREN: 468 p.in.errorf(p.in.pos, "trailing comma in load statement") 469 470 default: 471 p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok) 472 } 473 } 474 rparen := p.consume(RPAREN) 475 476 if len(to) == 0 { 477 p.in.errorf(lparen, "load statement must import at least 1 symbol") 478 } 479 return &LoadStmt{ 480 Load: loadPos, 481 Module: module, 482 To: to, 483 From: from, 484 Rparen: rparen, 485 } 486 } 487 488 // import ruby using version="2.7.2" as r 489 // stmt = IMPORT (STRING | IDENTIFIER) (COMMA (STRING | IDENTIFER))* 490 func (p *parser) parseImportStmt() *ImportStmt { 491 ec := exprContext{} 492 loadPos := p.nextToken() // consume IMPORT 493 494 var imports []*ImportPackage 495 496 var rparen Position 497 498 for { 499 500 var namespace, name *Literal 501 502 switch p.tok { 503 case STRING: 504 name = p.parsePrimary(ec).(*Literal) 505 case IDENT: 506 id := p.parsePrimary(ec).(*Ident) 507 508 name = &Literal{ 509 Token: STRING, 510 TokenPos: id.NamePos, 511 Raw: id.Name, 512 Value: id.Name, 513 } 514 default: 515 p.in.errorf(p.in.pos, "package/namespace must be string or identifer: %v", p.tok) 516 } 517 518 if p.tok == DOT { 519 namespace = name 520 p.consume(DOT) 521 522 switch p.tok { 523 case STRING: 524 name = p.parsePrimary(ec).(*Literal) 525 case IDENT: 526 id := p.parsePrimary(ec).(*Ident) 527 528 name = &Literal{ 529 Token: STRING, 530 TokenPos: id.NamePos, 531 Raw: id.Name, 532 Value: id.Name, 533 } 534 default: 535 p.in.errorf(p.in.pos, "package must be string or identifer: %v", p.tok) 536 } 537 } 538 539 var ( 540 as *Ident 541 args []*BinaryExpr 542 ) 543 544 if p.tok == USING { 545 p.consume(USING) 546 547 args = p.parseImportArgs(ec) 548 } 549 550 if p.tok == AS { 551 p.consume(AS) 552 if p.tok != IDENT { 553 p.in.errorf(p.in.pos, "binding of package must be identifer") 554 } 555 556 as = p.parsePrimary(ec).(*Ident) 557 } else { 558 notLN := func(r rune) bool { 559 return !unicode.IsLetter(r) && !unicode.IsNumber(r) 560 } 561 562 s := strings.TrimRightFunc(name.Value.(string), notLN) 563 564 idx := strings.LastIndexFunc(s, notLN) 565 566 bind := s 567 if idx != -1 { 568 bind = s[idx+1:] 569 } 570 571 as = &Ident{ 572 NamePos: name.TokenPos, 573 Name: bind, 574 } 575 } 576 577 imports = append(imports, &ImportPackage{ 578 Namespace: namespace, 579 PackageName: name, 580 BindingName: as, 581 Args: args, 582 }) 583 584 if p.tok == COMMA { 585 p.consume(COMMA) 586 continue 587 } 588 589 if p.tok == SEMI { 590 rparen = p.tokval.pos 591 break 592 } 593 } 594 595 return &ImportStmt{ 596 Load: loadPos, 597 Imports: imports, 598 Rparen: rparen, 599 } 600 } 601 602 // suite is typically what follows a COLON (e.g. after DEF or FOR). 603 // suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT 604 func (p *parser) parseSuiteOld() []Stmt { 605 p.consume(LBRACE) 606 var stmts []Stmt 607 for p.tok != RBRACE && p.tok != EOF { 608 stmts = p.parseStmt(stmts) 609 if p.tok == SEMI { 610 p.consume(SEMI) 611 } 612 } 613 p.consume(RBRACE) 614 return stmts 615 } 616 617 func (p *parser) parseSuite() []Stmt { 618 p.consume(LBRACE) 619 var stmts []Stmt 620 for p.tok != RBRACE && p.tok != EOF { 621 stmts = append(stmts, p.parseStmt2()) 622 } 623 p.consume(RBRACE) 624 return stmts 625 } 626 627 func (p *parser) parseIdent() *Ident { 628 if p.tok != IDENT { 629 p.in.error(p.in.pos, "not an identifier") 630 } 631 id := &Ident{ 632 NamePos: p.tokval.pos, 633 Name: p.tokval.raw, 634 } 635 p.nextToken() 636 return id 637 } 638 639 func (p *parser) consume(tokens ...Token) Position { 640 var ok bool 641 642 for _, t := range tokens { 643 if p.tok == t { 644 ok = true 645 break 646 } 647 } 648 649 if !ok { 650 if len(tokens) == 1 { 651 p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, tokens[0]) 652 } else { 653 var strs []string 654 for _, t := range tokens { 655 strs = append(strs, t.String()) 656 } 657 658 p.in.errorf(p.in.pos, "got %#v, want one of: '%s'", p.tok, strings.Join(strs, "' '")) 659 } 660 } 661 662 return p.nextToken() 663 } 664 665 // params = (param COMMA)* param COMMA? 666 // | 667 // 668 // param = IDENT 669 // | IDENT EQ test 670 // | STAR 671 // | STAR IDENT 672 // | STARSTAR IDENT 673 // 674 // parseParams parses a parameter list. The resulting expressions are of the form: 675 // 676 // *Ident x 677 // *Binary{Op: EQ, X: *Ident, Y: Expr} x=y 678 // *Unary{Op: STAR} * 679 // *Unary{Op: STAR, X: *Ident} *args 680 // *Unary{Op: STARSTAR, X: *Ident} **kwargs 681 func (p *parser) parseParams() []Expr { 682 ec := exprContext{} 683 var params []Expr 684 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF { 685 if len(params) > 0 { 686 p.consume(COMMA) 687 } 688 if p.tok == RPAREN { 689 break 690 } 691 692 // * or *args or **kwargs 693 if p.tok == STAR || p.tok == STARSTAR { 694 op := p.tok 695 pos := p.nextToken() 696 var x Expr 697 if op == STARSTAR || p.tok == IDENT { 698 x = p.parseIdent() 699 } 700 params = append(params, &UnaryExpr{ 701 OpPos: pos, 702 Op: op, 703 X: x, 704 }) 705 continue 706 } 707 708 // IDENT 709 // IDENT = test 710 id := p.parseIdent() 711 if p.tok == EQ { // default value 712 eq := p.nextToken() 713 dflt := p.parseTest(ec) 714 params = append(params, &BinaryExpr{ 715 X: id, 716 OpPos: eq, 717 Op: EQ, 718 Y: dflt, 719 }) 720 continue 721 } 722 723 params = append(params, id) 724 } 725 return params 726 } 727 728 // parseExpr parses an expression, possible consisting of a 729 // comma-separated list of 'test' expressions. 730 // 731 // In many cases we must use parseTest to avoid ambiguity such as 732 // f(x, y) vs. f((x, y)). 733 func (p *parser) parseExpr(inParens bool) Expr { 734 ec := exprContext{} 735 x := p.parseTest(ec) 736 if p.tok != COMMA { 737 return x 738 } 739 740 // tuple 741 exprs := p.parseExprs([]Expr{x}, exprContext{allowTrailingComma: inParens}) 742 return &TupleExpr{List: exprs} 743 } 744 745 type exprContext struct { 746 allowTrailingComma bool 747 allowParams bool 748 } 749 750 // parseExprs parses a comma-separated list of expressions, starting with the comma. 751 // It is used to parse tuples and list elements. 752 // expr_list = (',' expr)* ','? 753 func (p *parser) parseExprs(exprs []Expr, ec exprContext) []Expr { 754 for p.tok == COMMA { 755 pos := p.nextToken() 756 if terminatesExprList(p.tok) { 757 if !ec.allowTrailingComma { 758 p.in.error(pos, "unparenthesized tuple with trailing comma") 759 } 760 break 761 } 762 exprs = append(exprs, p.parseTest(ec)) 763 } 764 return exprs 765 } 766 767 // parseTest parses a 'test', a single-component expression. 768 func (p *parser) parseTest(ec exprContext) Expr { 769 if p.tok == LAMBDA { 770 return p.parseLambda(ec, true) 771 } 772 773 x := p.parseTestPrec(ec, 0) 774 775 // conditional expression (t IF cond ELSE f) 776 if p.tok == IF { 777 ifpos := p.nextToken() 778 cond := p.parseTestPrec(ec, 0) 779 if p.tok != ELSE { 780 p.in.error(ifpos, "conditional expression without else clause") 781 } 782 elsepos := p.nextToken() 783 else_ := p.parseTest(ec) 784 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_} 785 } 786 787 return x 788 } 789 790 // parseTestNoCond parses a a single-component expression without 791 // consuming a trailing 'if expr else expr'. 792 func (p *parser) parseTestNoCond(ec exprContext) Expr { 793 if p.tok == LAMBDA { 794 return p.parseLambda(ec, false) 795 } 796 return p.parseTestPrec(ec, 0) 797 } 798 799 // parseLambda parses a lambda expression. 800 // The allowCond flag allows the body to be an 'a if b else c' conditional. 801 func (p *parser) parseLambda(ec exprContext, allowCond bool) Expr { 802 lambda := p.nextToken() 803 var params []Expr 804 if p.tok != COLON { 805 params = p.parseParams() 806 } 807 p.consume(COLON) 808 809 var body Expr 810 if allowCond { 811 body = p.parseTest(ec) 812 } else { 813 body = p.parseTestNoCond(ec) 814 } 815 816 return &LambdaExpr{ 817 Lambda: lambda, 818 Params: params, 819 Body: body, 820 } 821 } 822 823 func (p *parser) skipWS() { 824 for p.tok == NEWLINE || p.tok == INDENT || p.tok == OUTDENT { 825 p.nextToken() 826 } 827 } 828 829 func (p *parser) parseTestPrec(ec exprContext, prec int) Expr { 830 if prec >= len(preclevels) { 831 return p.parsePrimaryWithSuffix(ec) 832 } 833 834 // expr = NOT expr 835 if p.tok == NOT && prec == int(precedence[NOT]) { 836 pos := p.nextToken() 837 x := p.parseTestPrec(ec, prec) 838 return &UnaryExpr{ 839 OpPos: pos, 840 Op: NOT, 841 X: x, 842 } 843 } 844 845 return p.parseBinopExpr(ec, prec) 846 } 847 848 // expr = test (OP test)* 849 // Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing. 850 func (p *parser) parseBinopExpr(ec exprContext, prec int) Expr { 851 x := p.parseTestPrec(ec, prec+1) 852 for first := true; ; first = false { 853 if p.tok == NOT { 854 p.nextToken() // consume NOT 855 // In this context, NOT must be followed by IN. 856 // Replace NOT IN by a single NOT_IN token. 857 if p.tok != IN { 858 p.in.errorf(p.in.pos, "got %#v, want in", p.tok) 859 } 860 p.tok = NOT_IN 861 } 862 863 // Binary operator of specified precedence? 864 opprec := int(precedence[p.tok]) 865 if opprec < prec { 866 return x 867 } 868 869 // Comparisons are non-associative. 870 if !first && opprec == int(precedence[EQL]) { 871 p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)", 872 x.(*BinaryExpr).Op, p.tok) 873 } 874 875 op := p.tok 876 pos := p.nextToken() 877 y := p.parseTestPrec(ec, opprec+1) 878 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y} 879 } 880 } 881 882 // precedence maps each operator to its precedence (0-7), or -1 for other tokens. 883 var precedence [maxToken]int8 884 885 // preclevels groups operators of equal precedence. 886 // Comparisons are nonassociative; other binary operators associate to the left. 887 // Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary. 888 // See https://github.com/google/exprcore-go/blob/master/doc/spec.md#binary-operators 889 var preclevels = [...][]Token{ 890 {OR}, // or 891 {AND}, // and 892 {NOT}, // not (unary) 893 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in 894 {PIPE}, // | 895 {CIRCUMFLEX}, // ^ 896 {AMP}, // & 897 {LTLT, GTGT}, // << >> 898 {MINUS, PLUS}, // - 899 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / // 900 } 901 902 func init() { 903 // populate precedence table 904 for i := range precedence { 905 precedence[i] = -1 906 } 907 for level, tokens := range preclevels { 908 for _, tok := range tokens { 909 precedence[tok] = int8(level) 910 } 911 } 912 } 913 914 // primary_with_suffix = primary 915 // | primary '.' IDENT 916 // | primary slice_suffix 917 // | primary call_suffix 918 func (p *parser) parsePrimaryWithSuffix(ec exprContext) Expr { 919 x := p.parsePrimary(ec) 920 for { 921 switch p.tok { 922 case DOT: 923 dot := p.nextToken() 924 id := p.parseIdent() 925 x = &DotExpr{Dot: dot, X: x, Name: id} 926 case LBRACK: 927 x = p.parseSliceSuffix(ec, x) 928 case LPAREN: 929 x = p.parseCallSuffix(ec, x) 930 default: 931 return x 932 } 933 } 934 } 935 936 // slice_suffix = '[' expr? ':' expr? ':' expr? ']' 937 func (p *parser) parseSliceSuffix(ec exprContext, x Expr) Expr { 938 lbrack := p.nextToken() 939 var lo, hi, step Expr 940 if p.tok != COLON { 941 y := p.parseExpr(false) 942 943 // index x[y] 944 if p.tok == RBRACK { 945 rbrack := p.nextToken() 946 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack} 947 } 948 949 lo = y 950 } 951 952 // slice or substring x[lo:hi:step] 953 if p.tok == COLON { 954 p.nextToken() 955 if p.tok != COLON && p.tok != RBRACK { 956 hi = p.parseTest(ec) 957 } 958 } 959 if p.tok == COLON { 960 p.nextToken() 961 if p.tok != RBRACK { 962 step = p.parseTest(ec) 963 } 964 } 965 rbrack := p.consume(RBRACK) 966 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack} 967 } 968 969 // call_suffix = '(' arg_list? ')' 970 func (p *parser) parseCallSuffix(ec exprContext, fn Expr) Expr { 971 lparen := p.consume(LPAREN) 972 var rparen Position 973 var args []Expr 974 975 if p.tok == RPAREN { 976 rparen = p.nextToken() 977 } else { 978 args = p.parseArgs(ec) 979 rparen = p.consume(RPAREN) 980 } 981 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen} 982 } 983 984 // parseArgs parses a list of actual parameter values (arguments). 985 // It mirrors the structure of parseParams. 986 // arg_list = ((arg COMMA)* arg COMMA?)? 987 func (p *parser) parseArgs(ec exprContext) []Expr { 988 var args []Expr 989 for p.tok != RPAREN && p.tok != EOF { 990 if len(args) > 0 { 991 p.consume(COMMA, SEMI) 992 } 993 if p.tok == RPAREN { 994 break 995 } 996 997 // *args or **kwargs 998 if p.tok == STAR || p.tok == STARSTAR { 999 op := p.tok 1000 pos := p.nextToken() 1001 x := p.parseTest(ec) 1002 args = append(args, &UnaryExpr{ 1003 OpPos: pos, 1004 Op: op, 1005 X: x, 1006 }) 1007 continue 1008 } 1009 1010 var arg Expr 1011 1012 if p.tok == DEF { 1013 x := p.parseDefStmt() 1014 1015 arg = &BinaryExpr{ 1016 X: x.Name, 1017 OpPos: x.Def, 1018 Op: EQ, 1019 Y: &LambdaExpr{ 1020 Lambda: x.Def, 1021 Params: x.Params, 1022 Stmts: x.Body, 1023 }, 1024 } 1025 } else { 1026 // We use a different strategy from Bazel here to stay within LL(1). 1027 // Instead of looking ahead two tokens (IDENT, COLON) we parse 1028 // 'test = test' then check that the first was an IDENT. 1029 x := p.parseTest(ec) 1030 1031 if p.tok == COLON { 1032 // name: value 1033 if _, ok := x.(*Ident); !ok { 1034 p.in.errorf(p.in.pos, "keyword argument must have form name:expr") 1035 } 1036 eq := p.nextToken() 1037 y := p.parseTest(ec) 1038 arg = &BinaryExpr{ 1039 X: x, 1040 OpPos: eq, 1041 Op: EQ, 1042 Y: y, 1043 } 1044 } else { 1045 arg = x 1046 } 1047 } 1048 1049 args = append(args, arg) 1050 } 1051 return args 1052 } 1053 1054 // parseArgs parses a list of actual parameter values (arguments). 1055 // It mirrors the structure of parseParams. 1056 // arg_list = ((arg COMMA)* arg COMMA?)? 1057 func (p *parser) parseImportArgs(ec exprContext) []*BinaryExpr { 1058 var args []*BinaryExpr 1059 for p.tok != RPAREN && p.tok != EOF { 1060 if len(args) > 0 { 1061 if p.tok == SEMI { 1062 break 1063 } 1064 1065 p.consume(COMMA) 1066 } 1067 1068 var arg *BinaryExpr 1069 1070 // We use a different strategy from Bazel here to stay within LL(1). 1071 // Instead of looking ahead two tokens (IDENT, COLON) we parse 1072 // 'test = test' then check that the first was an IDENT. 1073 x := p.parseTest(ec) 1074 1075 if p.tok == COLON { 1076 // name: value 1077 if _, ok := x.(*Ident); !ok { 1078 p.in.errorf(p.in.pos, "keyword argument must have form name:expr") 1079 } 1080 eq := p.nextToken() 1081 y := p.parseTest(ec) 1082 arg = &BinaryExpr{ 1083 X: x, 1084 OpPos: eq, 1085 Op: EQ, 1086 Y: y, 1087 } 1088 } else { 1089 p.in.errorf(p.in.pos, "import only accepts keyword arguments") 1090 } 1091 1092 args = append(args, arg) 1093 } 1094 1095 return args 1096 } 1097 1098 // primary = IDENT 1099 // | INT | FLOAT 1100 // | STRING 1101 // | '[' ... // list literal or comprehension 1102 // | '{' ... // dict literal or comprehension 1103 // | '(' ... // tuple or parenthesized expression 1104 // | ('-'|'+'|'~') primary_with_suffix 1105 func (p *parser) parsePrimary(ec exprContext) Expr { 1106 p.skipWS() 1107 1108 switch p.tok { 1109 case IDENT: 1110 id := p.parseIdent() 1111 1112 if p.tok == ARROW { 1113 pos := p.nextToken() 1114 return p.parseArrow(pos, []Expr{id}) 1115 } 1116 1117 return id 1118 case INT, FLOAT, STRING: 1119 var val interface{} 1120 tok := p.tok 1121 switch tok { 1122 case INT: 1123 if p.tokval.bigInt != nil { 1124 val = p.tokval.bigInt 1125 } else { 1126 val = p.tokval.int 1127 } 1128 case FLOAT: 1129 val = p.tokval.float 1130 case STRING: 1131 val = p.tokval.string 1132 } 1133 raw := p.tokval.raw 1134 pos := p.nextToken() 1135 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val} 1136 1137 case LBRACK: 1138 return p.parseList() 1139 1140 case LBRACE: 1141 return p.parseProto(ec) 1142 1143 case PERCENT_BRACE: 1144 return p.parseDict(ec) 1145 1146 case STARSTAR: 1147 if !ec.allowParams { 1148 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) 1149 } 1150 1151 op := p.tok 1152 pos := p.nextToken() 1153 x := p.parseIdent() 1154 1155 return &UnaryExpr{ 1156 OpPos: pos, 1157 Op: op, 1158 X: x, 1159 } 1160 case STAR: 1161 if !ec.allowParams { 1162 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) 1163 } 1164 1165 op := p.tok 1166 pos := p.nextToken() 1167 x := p.parseIdent() 1168 1169 return &UnaryExpr{ 1170 OpPos: pos, 1171 Op: op, 1172 X: x, 1173 } 1174 1175 case LPAREN: 1176 lparen := p.nextToken() 1177 if p.tok == RPAREN { 1178 // empty tuple 1179 rparen := p.nextToken() 1180 1181 if p.tok == ARROW { 1182 pos := p.nextToken() 1183 return p.parseArrow(pos, nil) 1184 } 1185 return &TupleExpr{Lparen: lparen, Rparen: rparen} 1186 } 1187 // e := p.parseExpr(true) // allow trailing comma 1188 ec := exprContext{ 1189 allowParams: true, 1190 allowTrailingComma: true, 1191 } 1192 1193 e := p.parseTest(ec) 1194 exprs := []Expr{e} 1195 1196 if p.tok == COMMA { 1197 // tuple 1198 exprs = p.parseExprs(exprs, ec) 1199 e = &TupleExpr{List: exprs} 1200 } 1201 1202 rparen := p.consume(RPAREN) 1203 1204 if p.tok == ARROW { 1205 pos := p.nextToken() 1206 1207 return p.parseArrow(pos, exprs) 1208 } else { 1209 return &ParenExpr{ 1210 Lparen: lparen, 1211 X: e, 1212 Rparen: rparen, 1213 } 1214 } 1215 1216 case ARROW: 1217 pos := p.nextToken() 1218 return p.parseArrow(pos, nil) 1219 1220 case MINUS, PLUS, TILDE: // unary 1221 tok := p.tok 1222 pos := p.nextToken() 1223 x := p.parsePrimaryWithSuffix(ec) 1224 return &UnaryExpr{ 1225 OpPos: pos, 1226 Op: tok, 1227 X: x, 1228 } 1229 case AT: 1230 pos := p.nextToken() 1231 ident := p.parseIdent() 1232 1233 return &AtExpr{ 1234 OpPos: pos, 1235 Name: ident.Name, 1236 NamePos: ident.NamePos, 1237 } 1238 case SHELL: 1239 return p.parseShell() 1240 case DSHELL_START: 1241 return p.parseDynamicShell() 1242 } 1243 1244 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) 1245 panic("unreachable") 1246 } 1247 1248 func (p *parser) parseArrowSuite() []Stmt { 1249 if p.tok == LBRACE { 1250 p.nextToken() // consume LBRACE 1251 // p.consume(INDENT) 1252 var stmts []Stmt 1253 for p.tok != RBRACE && p.tok != EOF { 1254 stmts = append(stmts, p.parseStmt2()) 1255 } 1256 p.consume(RBRACE) 1257 return stmts 1258 } 1259 1260 body := p.parseTest(exprContext{}) 1261 return []Stmt{&ExprStmt{X: body}} 1262 } 1263 1264 func (p *parser) parseArrowStmt(stmts []Stmt) []Stmt { 1265 if p.tok == DEF { 1266 return append(stmts, p.parseDefStmt()) 1267 } else if p.tok == IF { 1268 return append(stmts, p.parseIfStmt()) 1269 } else if p.tok == FOR { 1270 return append(stmts, p.parseForStmt()) 1271 } else if p.tok == WHILE { 1272 return append(stmts, p.parseWhileStmt()) 1273 } 1274 1275 return p.parseSimpleStmt(stmts, false) 1276 } 1277 1278 func (p *parser) parseArrow(pos Position, exprArgs []Expr) Expr { 1279 body := p.parseArrowSuite() 1280 1281 // Add a return to the end. If the final statement is also 1282 // an expression, then return it's value. Otherwise return None. 1283 1284 last := len(body) - 1 1285 1286 start, _ := body[last].Span() 1287 if expr, ok := body[last].(*ExprStmt); ok { 1288 body[last] = &ReturnStmt{Return: start, Result: expr.X} 1289 } else { 1290 body = append(body, &ReturnStmt{Return: start}) 1291 } 1292 1293 return &LambdaExpr{ 1294 Lambda: pos, 1295 Params: exprArgs, 1296 Stmts: body, 1297 } 1298 } 1299 1300 // list = '[' ']' 1301 // | '[' expr ']' 1302 // | '[' expr expr_list ']' 1303 // | '[' expr (FOR loop_variables IN expr)+ ']' 1304 func (p *parser) parseList() Expr { 1305 ec := exprContext{} 1306 lbrack := p.nextToken() 1307 if p.tok == RBRACK { 1308 // empty List 1309 rbrack := p.nextToken() 1310 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack} 1311 } 1312 1313 x := p.parseTest(ec) 1314 1315 if p.tok == FOR { 1316 // list comprehension 1317 return p.parseComprehensionSuffix(lbrack, x, RBRACK) 1318 } 1319 1320 exprs := []Expr{x} 1321 if p.tok == COMMA { 1322 // multi-item list literal 1323 ec := exprContext{ 1324 allowTrailingComma: true, 1325 } 1326 exprs = p.parseExprs(exprs, ec) // allow trailing comma 1327 } 1328 1329 rbrack := p.consume(RBRACK) 1330 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack} 1331 } 1332 1333 // proto = '{' '}' 1334 // | '{' proto_entry_list '}' 1335 func (p *parser) parseProto(ec exprContext) Expr { 1336 lbrace := p.nextToken() 1337 if p.tok == RBRACE { 1338 // empty dict 1339 rbrace := p.nextToken() 1340 return &ProtoExpr{Lbrace: lbrace, Rbrace: rbrace} 1341 } 1342 1343 x := p.parseProtoEntry(ec) 1344 1345 entries := []Stmt{x} 1346 for p.tok == SEMI { 1347 p.nextToken() 1348 if p.tok == RBRACE { 1349 break 1350 } 1351 entries = append(entries, p.parseProtoEntry(ec)) 1352 } 1353 1354 rbrace := p.consume(RBRACE) 1355 return &ProtoExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} 1356 } 1357 1358 // dict_entry = test ':' test 1359 func (p *parser) parseProtoEntry(ec exprContext) *ProtoEntry { 1360 if p.tok == DEF { 1361 de := p.parseDefStmt() 1362 return &ProtoEntry{Key: de.Name, Colon: de.Name.NamePos, Value: de} 1363 } 1364 1365 k := p.parseIdent() 1366 colon := p.consume(COLON) 1367 v := p.parseTest(ec) 1368 return &ProtoEntry{Key: k, Colon: colon, Value: &ExprStmt{X: v}} 1369 } 1370 1371 // dict = '%{' '}' 1372 // | '%{' dict_entry_list '}' 1373 // | '%{' dict_entry FOR loop_variables IN expr '}' 1374 func (p *parser) parseDict(ec exprContext) Expr { 1375 lbrace := p.nextToken() 1376 if p.tok == RBRACE { 1377 // empty dict 1378 rbrace := p.nextToken() 1379 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace} 1380 } 1381 1382 x := p.parseDictEntry(ec) 1383 1384 if p.tok == FOR { 1385 // dict comprehension 1386 return p.parseComprehensionSuffix(lbrace, x, RBRACE) 1387 } 1388 1389 entries := []Expr{x} 1390 for p.tok == COMMA { 1391 p.nextToken() 1392 if p.tok == RBRACE { 1393 break 1394 } 1395 entries = append(entries, p.parseDictEntry(ec)) 1396 } 1397 1398 rbrace := p.consume(RBRACE) 1399 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} 1400 } 1401 1402 // dict_entry = test ':' test 1403 func (p *parser) parseDictEntry(ec exprContext) *DictEntry { 1404 k := p.parseTest(ec) 1405 colon := p.consume(COLON) 1406 v := p.parseTest(ec) 1407 return &DictEntry{Key: k, Colon: colon, Value: v} 1408 } 1409 1410 // comp_suffix = FOR loopvars IN expr comp_suffix 1411 // | IF expr comp_suffix 1412 // | ']' or ')' (end) 1413 // 1414 // There can be multiple FOR/IF clauses; the first is always a FOR. 1415 func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr { 1416 ec := exprContext{} 1417 var clauses []Node 1418 for p.tok != endBrace { 1419 if p.tok == FOR { 1420 pos := p.nextToken() 1421 vars := p.parseForLoopVariables() 1422 in := p.consume(IN) 1423 // Following Python 3, the operand of IN cannot be: 1424 // - a conditional expression ('x if y else z'), 1425 // due to conflicts in Python grammar 1426 // ('if' is used by the comprehension); 1427 // - a lambda expression 1428 // - an unparenthesized tuple. 1429 x := p.parseTestPrec(ec, 0) 1430 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x}) 1431 } else if p.tok == IF { 1432 pos := p.nextToken() 1433 cond := p.parseTestNoCond(ec) 1434 clauses = append(clauses, &IfClause{If: pos, Cond: cond}) 1435 } else if p.tok == SEMI { 1436 // indicates a semi before a final brace 1437 p.nextToken() 1438 break 1439 } else { 1440 p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace) 1441 } 1442 } 1443 rbrace := p.nextToken() 1444 1445 return &Comprehension{ 1446 Curly: endBrace == RBRACE, 1447 Lbrack: lbrace, 1448 Body: body, 1449 Clauses: clauses, 1450 Rbrack: rbrace, 1451 } 1452 } 1453 1454 func terminatesExprList(tok Token) bool { 1455 switch tok { 1456 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI: 1457 return true 1458 } 1459 return false 1460 } 1461 1462 // Comment assignment. 1463 // We build two lists of all subnodes, preorder and postorder. 1464 // The preorder list is ordered by start location, with outer nodes first. 1465 // The postorder list is ordered by end location, with outer nodes last. 1466 // We use the preorder list to assign each whole-line comment to the syntax 1467 // immediately following it, and we use the postorder list to assign each 1468 // end-of-line comment to the syntax immediately preceding it. 1469 1470 // flattenAST returns the list of AST nodes, both in prefix order and in postfix 1471 // order. 1472 func flattenAST(root Node) (pre, post []Node) { 1473 stack := []Node{} 1474 Walk(root, func(n Node) bool { 1475 if n != nil { 1476 pre = append(pre, n) 1477 stack = append(stack, n) 1478 } else { 1479 post = append(post, stack[len(stack)-1]) 1480 stack = stack[:len(stack)-1] 1481 } 1482 return true 1483 }) 1484 return pre, post 1485 } 1486 1487 // assignComments attaches comments to nearby syntax. 1488 func (p *parser) assignComments(n Node) { 1489 // Leave early if there are no comments 1490 if len(p.in.lineComments)+len(p.in.suffixComments) == 0 { 1491 return 1492 } 1493 1494 pre, post := flattenAST(n) 1495 1496 // Assign line comments to syntax immediately following. 1497 line := p.in.lineComments 1498 for _, x := range pre { 1499 start, _ := x.Span() 1500 1501 switch x.(type) { 1502 case *File: 1503 continue 1504 } 1505 1506 for len(line) > 0 && !start.isBefore(line[0].Start) { 1507 x.AllocComments() 1508 x.Comments().Before = append(x.Comments().Before, line[0]) 1509 line = line[1:] 1510 } 1511 } 1512 1513 // Remaining line comments go at end of file. 1514 if len(line) > 0 { 1515 n.AllocComments() 1516 n.Comments().After = append(n.Comments().After, line...) 1517 } 1518 1519 // Assign suffix comments to syntax immediately before. 1520 suffix := p.in.suffixComments 1521 for i := len(post) - 1; i >= 0; i-- { 1522 x := post[i] 1523 1524 // Do not assign suffix comments to file 1525 switch x.(type) { 1526 case *File: 1527 continue 1528 } 1529 1530 _, end := x.Span() 1531 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) { 1532 x.AllocComments() 1533 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1]) 1534 suffix = suffix[:len(suffix)-1] 1535 } 1536 } 1537 }