github.com/k14s/starlark-go@v0.0.0-20200720175618-3a5c849cc368/syntax/parse.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 // This file defines a recursive-descent parser for Starlark. 8 // The LL(1) grammar of Starlark and the names of many productions follow Python 2.7. 9 // 10 // TODO(adonovan): use syntax.Error more systematically throughout the 11 // package. Verify that error positions are correct using the 12 // chunkedfile mechanism. 13 14 import "log" 15 16 // Enable this flag to print the token stream and log.Fatal on the first error. 17 const debug = false 18 19 // A Mode value is a set of flags (or 0) that controls optional parser functionality. 20 type Mode uint 21 22 const ( 23 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments 24 BlockScanner Mode = 1 << iota // use if/end syntax instead of indent 25 ) 26 27 // Parse parses the input data and returns the corresponding parse tree. 28 // 29 // If src != nil, ParseFile parses the source from src and the filename 30 // is only used when recording position information. 31 // The type of the argument for the src parameter must be string, 32 // []byte, or io.Reader. 33 // If src == nil, ParseFile parses the file specified by filename. 34 func Parse(filename string, src interface{}, mode Mode) (f *File, err error) { 35 in, err := newScanner(filename, src, mode&RetainComments != 0) 36 if err != nil { 37 return nil, err 38 } 39 var inScanner scannerInterface = in 40 if (mode & BlockScanner) == BlockScanner { 41 inScanner = newBlockScanner(in) 42 } 43 p := parser{in: inScanner} 44 defer p.in.recover(&err) 45 46 p.nextToken() // read first lookahead token 47 f = p.parseFile() 48 if f != nil { 49 f.Path = filename 50 } 51 p.assignComments(f) 52 return f, nil 53 } 54 55 // ParseCompoundStmt parses a single compound statement: 56 // a blank line, a def, for, while, or if statement, or a 57 // semicolon-separated list of simple statements followed 58 // by a newline. These are the units on which the REPL operates. 59 // ParseCompoundStmt does not consume any following input. 60 // The parser calls the readline function each 61 // time it needs a new line of input. 62 func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) { 63 in, err := newScanner(filename, readline, false) 64 if err != nil { 65 return nil, err 66 } 67 68 p := parser{in: newBlockScanner(in)} 69 defer p.in.recover(&err) 70 71 p.nextToken() // read first lookahead token 72 73 var stmts []Stmt 74 switch p.tok { 75 case DEF, IF, FOR, WHILE: 76 stmts = p.parseStmt(stmts) 77 case NEWLINE: 78 // blank line 79 default: 80 stmts = p.parseSimpleStmt(stmts, false) 81 // Require but don't consume newline, to avoid blocking again. 82 if p.tok != NEWLINE { 83 p.in.errorf(p.in.getPos(), "invalid syntax") 84 } 85 } 86 87 return &File{Path: filename, Stmts: stmts}, nil 88 } 89 90 // ParseExpr parses a Starlark expression. 91 // A comma-separated list of expressions is parsed as a tuple. 92 // See Parse for explanation of parameters. 93 func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) { 94 in, err := newScanner(filename, src, mode&RetainComments != 0) 95 if err != nil { 96 return nil, err 97 } 98 p := parser{in: newBlockScanner(in)} 99 defer p.in.recover(&err) 100 101 p.nextToken() // read first lookahead token 102 103 // Use parseExpr, not parseTest, to permit an unparenthesized tuple. 104 expr = p.parseExpr(false) 105 106 // A following newline (e.g. "f()\n") appears outside any brackets, 107 // on a non-blank line, and thus results in a NEWLINE token. 108 if p.tok == NEWLINE { 109 p.nextToken() 110 } 111 112 if p.tok != EOF { 113 p.in.errorf(p.in.getPos(), "got %#v after expression, want EOF", p.tok) 114 } 115 p.assignComments(expr) 116 return expr, nil 117 } 118 119 type parser struct { 120 in scannerInterface 121 tok Token 122 tokval tokenValue 123 } 124 125 // nextToken advances the scanner and returns the position of the 126 // previous token. 127 func (p *parser) nextToken() Position { 128 oldpos := p.tokval.pos 129 p.tok = p.in.nextToken(&p.tokval) 130 // enable to see the token stream 131 if debug { 132 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos) 133 } 134 return oldpos 135 } 136 137 // file_input = (NEWLINE | stmt)* EOF 138 func (p *parser) parseFile() *File { 139 var stmts []Stmt 140 for p.tok != EOF { 141 if p.tok == NEWLINE { 142 p.nextToken() 143 continue 144 } 145 stmts = p.parseStmt(stmts) 146 } 147 return &File{Stmts: stmts} 148 } 149 150 func (p *parser) parseStmt(stmts []Stmt) []Stmt { 151 if p.tok == DEF { 152 return append(stmts, p.parseDefStmt()) 153 } else if p.tok == IF { 154 return append(stmts, p.parseIfStmt()) 155 } else if p.tok == FOR { 156 return append(stmts, p.parseForStmt()) 157 } else if p.tok == WHILE { 158 return append(stmts, p.parseWhileStmt()) 159 } 160 return p.parseSimpleStmt(stmts, true) 161 } 162 163 func (p *parser) parseDefStmt() Stmt { 164 defpos := p.nextToken() // consume DEF 165 id := p.parseIdent() 166 p.consume(LPAREN) 167 params := p.parseParams() 168 p.consume(RPAREN) 169 p.consume(COLON) 170 body := p.parseSuite() 171 return &DefStmt{ 172 Def: defpos, 173 Name: id, 174 Params: params, 175 Body: body, 176 } 177 } 178 179 func (p *parser) parseIfStmt() Stmt { 180 ifpos := p.nextToken() // consume IF 181 cond := p.parseTest() 182 p.consume(COLON) 183 body := p.parseSuite() 184 ifStmt := &IfStmt{ 185 If: ifpos, 186 Cond: cond, 187 True: body, 188 } 189 tail := ifStmt 190 for p.tok == ELIF { 191 elifpos := p.nextToken() // consume ELIF 192 cond := p.parseTest() 193 p.consume(COLON) 194 body := p.parseSuite() 195 elif := &IfStmt{ 196 If: elifpos, 197 Cond: cond, 198 True: body, 199 } 200 tail.ElsePos = elifpos 201 tail.False = []Stmt{elif} 202 tail = elif 203 } 204 if p.tok == ELSE { 205 tail.ElsePos = p.nextToken() // consume ELSE 206 p.consume(COLON) 207 tail.False = p.parseSuite() 208 } 209 return ifStmt 210 } 211 212 func (p *parser) parseForStmt() Stmt { 213 forpos := p.nextToken() // consume FOR 214 vars := p.parseForLoopVariables() 215 p.consume(IN) 216 x := p.parseExpr(false) 217 p.consume(COLON) 218 body := p.parseSuite() 219 return &ForStmt{ 220 For: forpos, 221 Vars: vars, 222 X: x, 223 Body: body, 224 } 225 } 226 227 func (p *parser) parseWhileStmt() Stmt { 228 whilepos := p.nextToken() // consume WHILE 229 cond := p.parseTest() 230 p.consume(COLON) 231 body := p.parseSuite() 232 return &WhileStmt{ 233 While: whilepos, 234 Cond: cond, 235 Body: body, 236 } 237 } 238 239 // Equivalent to 'exprlist' production in Python grammar. 240 // 241 // loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA? 242 func (p *parser) parseForLoopVariables() Expr { 243 // Avoid parseExpr because it would consume the IN token 244 // following x in "for x in y: ...". 245 v := p.parsePrimaryWithSuffix() 246 if p.tok != COMMA { 247 return v 248 } 249 250 list := []Expr{v} 251 for p.tok == COMMA { 252 p.nextToken() 253 if terminatesExprList(p.tok) { 254 break 255 } 256 list = append(list, p.parsePrimaryWithSuffix()) 257 } 258 return &TupleExpr{List: list} 259 } 260 261 // simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE 262 // In REPL mode, it does not consume the NEWLINE. 263 func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt { 264 for { 265 stmts = append(stmts, p.parseSmallStmt()) 266 if p.tok != SEMI { 267 break 268 } 269 p.nextToken() // consume SEMI 270 if p.tok == NEWLINE || p.tok == EOF { 271 break 272 } 273 } 274 // EOF without NEWLINE occurs in `if x: pass`, for example. 275 if p.tok != EOF && consumeNL { 276 p.consume(NEWLINE) 277 } 278 279 return stmts 280 } 281 282 // small_stmt = RETURN expr? 283 // | PASS | BREAK | CONTINUE 284 // | LOAD ... 285 // | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign 286 // | expr 287 func (p *parser) parseSmallStmt() Stmt { 288 switch p.tok { 289 case RETURN: 290 pos := p.nextToken() // consume RETURN 291 var result Expr 292 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { 293 result = p.parseExpr(false) 294 } 295 return &ReturnStmt{Return: pos, Result: result} 296 297 case BREAK, CONTINUE, PASS: 298 tok := p.tok 299 pos := p.nextToken() // consume it 300 return &BranchStmt{Token: tok, TokenPos: pos} 301 302 case LOAD: 303 return p.parseLoadStmt() 304 } 305 306 // Assignment 307 x := p.parseExpr(false) 308 switch p.tok { 309 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ: 310 op := p.tok 311 pos := p.nextToken() // consume op 312 rhs := p.parseExpr(false) 313 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} 314 } 315 316 // Expression statement (e.g. function call, doc string). 317 return &ExprStmt{X: x} 318 } 319 320 // stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')' 321 func (p *parser) parseLoadStmt() *LoadStmt { 322 loadPos := p.nextToken() // consume LOAD 323 lparen := p.consume(LPAREN) 324 325 if p.tok != STRING { 326 p.in.errorf(p.in.getPos(), "first operand of load statement must be a string literal") 327 } 328 module := p.parsePrimary().(*Literal) 329 330 var from, to []*Ident 331 for p.tok != RPAREN && p.tok != EOF { 332 p.consume(COMMA) 333 if p.tok == RPAREN { 334 break // allow trailing comma 335 } 336 switch p.tok { 337 case STRING: 338 // load("module", "id") 339 // To name is same as original. 340 lit := p.parsePrimary().(*Literal) 341 id := &Ident{ 342 NamePos: lit.TokenPos.add(`"`), 343 Name: lit.Value.(string), 344 } 345 to = append(to, id) 346 from = append(from, id) 347 348 case IDENT: 349 // load("module", to="from") 350 id := p.parseIdent() 351 to = append(to, id) 352 if p.tok != EQ { 353 p.in.errorf(p.in.getPos(), `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name) 354 } 355 p.consume(EQ) 356 if p.tok != STRING { 357 p.in.errorf(p.in.getPos(), `original name of loaded symbol must be quoted: %s="originalname"`, id.Name) 358 } 359 lit := p.parsePrimary().(*Literal) 360 from = append(from, &Ident{ 361 NamePos: lit.TokenPos.add(`"`), 362 Name: lit.Value.(string), 363 }) 364 365 case RPAREN: 366 p.in.errorf(p.in.getPos(), "trailing comma in load statement") 367 368 default: 369 p.in.errorf(p.in.getPos(), `load operand must be "name" or localname="name" (got %#v)`, p.tok) 370 } 371 } 372 rparen := p.consume(RPAREN) 373 374 if len(to) == 0 { 375 p.in.errorf(lparen, "load statement must import at least 1 symbol") 376 } 377 return &LoadStmt{ 378 Load: loadPos, 379 Module: module, 380 To: to, 381 From: from, 382 Rparen: rparen, 383 } 384 } 385 386 // suite is typically what follows a COLON (e.g. after DEF or FOR). 387 // suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT 388 func (p *parser) parseSuite() []Stmt { 389 if p.tok == NEWLINE { 390 p.nextToken() // consume NEWLINE 391 p.consume(INDENT) 392 var stmts []Stmt 393 for p.tok != OUTDENT && p.tok != EOF { 394 stmts = p.parseStmt(stmts) 395 } 396 p.consume(OUTDENT) 397 return stmts 398 } 399 400 return p.parseSimpleStmt(nil, true) 401 } 402 403 func (p *parser) parseIdent() *Ident { 404 if p.tok != IDENT { 405 for _, v := range keywordToken { 406 if p.tok == v { 407 p.in.errorf(p.in.getPos(), "use of reserved keyword '%s' is not allowed (expected identifier)", p.tokval.raw) 408 } 409 } 410 p.in.errorf(p.in.getPos(), "not an identifier") 411 } 412 id := &Ident{ 413 NamePos: p.tokval.pos, 414 Name: p.tokval.raw, 415 } 416 p.nextToken() 417 return id 418 } 419 420 func (p *parser) consume(t Token) Position { 421 if p.tok != t { 422 p.in.errorf(p.in.getPos(), "got %#v, want %#v", p.tok, t) 423 } 424 return p.nextToken() 425 } 426 427 // params = (param COMMA)* param 428 // | 429 // 430 // param = IDENT 431 // | IDENT EQ test 432 // | STAR 433 // | STAR IDENT 434 // | STARSTAR IDENT 435 // 436 // parseParams parses a parameter list. The resulting expressions are of the form: 437 // 438 // *Ident x 439 // *Binary{Op: EQ, X: *Ident, Y: Expr} x=y 440 // *Unary{Op: STAR} * 441 // *Unary{Op: STAR, X: *Ident} *args 442 // *Unary{Op: STARSTAR, X: *Ident} **kwargs 443 func (p *parser) parseParams() []Expr { 444 var params []Expr 445 stars := false 446 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF { 447 if len(params) > 0 { 448 p.consume(COMMA) 449 } 450 if p.tok == RPAREN { 451 // list can end with a COMMA if there is neither * nor ** 452 if stars { 453 p.in.errorf(p.in.getPos(), "got %#v, want parameter", p.tok) 454 } 455 break 456 } 457 458 // * or *args or **kwargs 459 if p.tok == STAR || p.tok == STARSTAR { 460 stars = true 461 op := p.tok 462 pos := p.nextToken() 463 var x Expr 464 if op == STARSTAR || p.tok == IDENT { 465 x = p.parseIdent() 466 } 467 params = append(params, &UnaryExpr{ 468 OpPos: pos, 469 Op: op, 470 X: x, 471 }) 472 continue 473 } 474 475 // IDENT 476 // IDENT = test 477 id := p.parseIdent() 478 if p.tok == EQ { // default value 479 eq := p.nextToken() 480 dflt := p.parseTest() 481 params = append(params, &BinaryExpr{ 482 X: id, 483 OpPos: eq, 484 Op: EQ, 485 Y: dflt, 486 }) 487 continue 488 } 489 490 params = append(params, id) 491 } 492 return params 493 } 494 495 // parseExpr parses an expression, possible consisting of a 496 // comma-separated list of 'test' expressions. 497 // 498 // In many cases we must use parseTest to avoid ambiguity such as 499 // f(x, y) vs. f((x, y)). 500 func (p *parser) parseExpr(inParens bool) Expr { 501 x := p.parseTest() 502 if p.tok != COMMA { 503 return x 504 } 505 506 // tuple 507 exprs := p.parseExprs([]Expr{x}, inParens) 508 return &TupleExpr{List: exprs} 509 } 510 511 // parseExprs parses a comma-separated list of expressions, starting with the comma. 512 // It is used to parse tuples and list elements. 513 // expr_list = (',' expr)* ','? 514 func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr { 515 for p.tok == COMMA { 516 pos := p.nextToken() 517 if terminatesExprList(p.tok) { 518 if !allowTrailingComma { 519 p.in.error(pos, "unparenthesized tuple with trailing comma") 520 } 521 break 522 } 523 exprs = append(exprs, p.parseTest()) 524 } 525 return exprs 526 } 527 528 // parseTest parses a 'test', a single-component expression. 529 func (p *parser) parseTest() Expr { 530 if p.tok == LAMBDA { 531 return p.parseLambda(true) 532 } 533 534 x := p.parseTestPrec(0) 535 536 // conditional expression (t IF cond ELSE f) 537 if p.tok == IF { 538 ifpos := p.nextToken() 539 cond := p.parseTestPrec(0) 540 if p.tok != ELSE { 541 p.in.error(ifpos, "conditional expression without else clause") 542 } 543 elsepos := p.nextToken() 544 else_ := p.parseTest() 545 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_} 546 } 547 548 return x 549 } 550 551 // parseTestNoCond parses a a single-component expression without 552 // consuming a trailing 'if expr else expr'. 553 func (p *parser) parseTestNoCond() Expr { 554 if p.tok == LAMBDA { 555 return p.parseLambda(false) 556 } 557 return p.parseTestPrec(0) 558 } 559 560 // parseLambda parses a lambda expression. 561 // The allowCond flag allows the body to be an 'a if b else c' conditional. 562 func (p *parser) parseLambda(allowCond bool) Expr { 563 lambda := p.nextToken() 564 var params []Expr 565 if p.tok != COLON { 566 params = p.parseParams() 567 } 568 p.consume(COLON) 569 570 var body Expr 571 if allowCond { 572 body = p.parseTest() 573 } else { 574 body = p.parseTestNoCond() 575 } 576 577 return &LambdaExpr{ 578 Lambda: lambda, 579 Params: params, 580 Body: body, 581 } 582 } 583 584 func (p *parser) parseTestPrec(prec int) Expr { 585 if prec >= len(preclevels) { 586 return p.parsePrimaryWithSuffix() 587 } 588 589 // expr = NOT expr 590 if p.tok == NOT && prec == int(precedence[NOT]) { 591 pos := p.nextToken() 592 x := p.parseTestPrec(prec) 593 return &UnaryExpr{ 594 OpPos: pos, 595 Op: NOT, 596 X: x, 597 } 598 } 599 600 return p.parseBinopExpr(prec) 601 } 602 603 // expr = test (OP test)* 604 // Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing. 605 func (p *parser) parseBinopExpr(prec int) Expr { 606 x := p.parseTestPrec(prec + 1) 607 for first := true; ; first = false { 608 if p.tok == NOT { 609 p.nextToken() // consume NOT 610 // In this context, NOT must be followed by IN. 611 // Replace NOT IN by a single NOT_IN token. 612 if p.tok != IN { 613 p.in.errorf(p.in.getPos(), "got %#v, want in", p.tok) 614 } 615 p.tok = NOT_IN 616 } 617 618 // Binary operator of specified precedence? 619 opprec := int(precedence[p.tok]) 620 if opprec < prec { 621 return x 622 } 623 624 // Comparisons are non-associative. 625 if !first && opprec == int(precedence[EQL]) { 626 p.in.errorf(p.in.getPos(), "%s does not associate with %s (use parens)", 627 x.(*BinaryExpr).Op, p.tok) 628 } 629 630 op := p.tok 631 pos := p.nextToken() 632 y := p.parseTestPrec(opprec + 1) 633 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y} 634 } 635 } 636 637 // precedence maps each operator to its precedence (0-7), or -1 for other tokens. 638 var precedence [maxToken]int8 639 640 // preclevels groups operators of equal precedence. 641 // Comparisons are nonassociative; other binary operators associate to the left. 642 // Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary. 643 // See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators 644 var preclevels = [...][]Token{ 645 {OR}, // or 646 {AND}, // and 647 {NOT}, // not (unary) 648 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in 649 {PIPE}, // | 650 {CIRCUMFLEX}, // ^ 651 {AMP}, // & 652 {LTLT, GTGT}, // << >> 653 {MINUS, PLUS}, // - 654 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / // 655 } 656 657 func init() { 658 // populate precedence table 659 for i := range precedence { 660 precedence[i] = -1 661 } 662 for level, tokens := range preclevels { 663 for _, tok := range tokens { 664 precedence[tok] = int8(level) 665 } 666 } 667 } 668 669 // primary_with_suffix = primary 670 // | primary '.' IDENT 671 // | primary slice_suffix 672 // | primary call_suffix 673 func (p *parser) parsePrimaryWithSuffix() Expr { 674 x := p.parsePrimary() 675 for { 676 switch p.tok { 677 case DOT: 678 dot := p.nextToken() 679 id := p.parseIdent() 680 x = &DotExpr{Dot: dot, X: x, Name: id} 681 case LBRACK: 682 x = p.parseSliceSuffix(x) 683 case LPAREN: 684 x = p.parseCallSuffix(x) 685 default: 686 return x 687 } 688 } 689 } 690 691 // slice_suffix = '[' expr? ':' expr? ':' expr? ']' 692 func (p *parser) parseSliceSuffix(x Expr) Expr { 693 lbrack := p.nextToken() 694 var lo, hi, step Expr 695 if p.tok != COLON { 696 y := p.parseExpr(false) 697 698 // index x[y] 699 if p.tok == RBRACK { 700 rbrack := p.nextToken() 701 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack} 702 } 703 704 lo = y 705 } 706 707 // slice or substring x[lo:hi:step] 708 if p.tok == COLON { 709 p.nextToken() 710 if p.tok != COLON && p.tok != RBRACK { 711 hi = p.parseTest() 712 } 713 } 714 if p.tok == COLON { 715 p.nextToken() 716 if p.tok != RBRACK { 717 step = p.parseTest() 718 } 719 } 720 rbrack := p.consume(RBRACK) 721 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack} 722 } 723 724 // call_suffix = '(' arg_list? ')' 725 func (p *parser) parseCallSuffix(fn Expr) Expr { 726 lparen := p.consume(LPAREN) 727 var rparen Position 728 var args []Expr 729 if p.tok == RPAREN { 730 rparen = p.nextToken() 731 } else { 732 args = p.parseArgs() 733 rparen = p.consume(RPAREN) 734 } 735 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen} 736 } 737 738 // parseArgs parses a list of actual parameter values (arguments). 739 // It mirrors the structure of parseParams. 740 // arg_list = ((arg COMMA)* arg COMMA?)? 741 func (p *parser) parseArgs() []Expr { 742 var args []Expr 743 stars := false 744 for p.tok != RPAREN && p.tok != EOF { 745 if len(args) > 0 { 746 p.consume(COMMA) 747 } 748 if p.tok == RPAREN { 749 // list can end with a COMMA if there is neither * nor ** 750 if stars { 751 p.in.errorf(p.in.getPos(), `got %#v, want argument`, p.tok) 752 } 753 break 754 } 755 756 // *args or **kwargs 757 if p.tok == STAR || p.tok == STARSTAR { 758 stars = true 759 op := p.tok 760 pos := p.nextToken() 761 x := p.parseTest() 762 args = append(args, &UnaryExpr{ 763 OpPos: pos, 764 Op: op, 765 X: x, 766 }) 767 continue 768 } 769 770 // We use a different strategy from Bazel here to stay within LL(1). 771 // Instead of looking ahead two tokens (IDENT, EQ) we parse 772 // 'test = test' then check that the first was an IDENT. 773 x := p.parseTest() 774 775 if p.tok == EQ { 776 // name = value 777 if _, ok := x.(*Ident); !ok { 778 p.in.errorf(p.in.getPos(), "keyword argument must have form name=expr") 779 } 780 eq := p.nextToken() 781 y := p.parseTest() 782 x = &BinaryExpr{ 783 X: x, 784 OpPos: eq, 785 Op: EQ, 786 Y: y, 787 } 788 } 789 790 args = append(args, x) 791 } 792 return args 793 } 794 795 // primary = IDENT 796 // | INT | FLOAT 797 // | STRING 798 // | '[' ... // list literal or comprehension 799 // | '{' ... // dict literal or comprehension 800 // | '(' ... // tuple or parenthesized expression 801 // | ('-'|'+'|'~') primary_with_suffix 802 func (p *parser) parsePrimary() Expr { 803 switch p.tok { 804 case IDENT: 805 return p.parseIdent() 806 807 case INT, FLOAT, STRING: 808 var val interface{} 809 tok := p.tok 810 switch tok { 811 case INT: 812 if p.tokval.bigInt != nil { 813 val = p.tokval.bigInt 814 } else { 815 val = p.tokval.int 816 } 817 case FLOAT: 818 val = p.tokval.float 819 case STRING: 820 val = p.tokval.string 821 } 822 raw := p.tokval.raw 823 pos := p.nextToken() 824 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val} 825 826 case LBRACK: 827 return p.parseList() 828 829 case LBRACE: 830 return p.parseDict() 831 832 case LPAREN: 833 lparen := p.nextToken() 834 if p.tok == RPAREN { 835 // empty tuple 836 rparen := p.nextToken() 837 return &TupleExpr{Lparen: lparen, Rparen: rparen} 838 } 839 e := p.parseExpr(true) // allow trailing comma 840 rparen := p.consume(RPAREN) 841 return &ParenExpr{ 842 Lparen: lparen, 843 X: e, 844 Rparen: rparen, 845 } 846 847 case MINUS, PLUS, TILDE: // unary 848 tok := p.tok 849 pos := p.nextToken() 850 x := p.parsePrimaryWithSuffix() 851 return &UnaryExpr{ 852 OpPos: pos, 853 Op: tok, 854 X: x, 855 } 856 } 857 p.in.errorf(p.in.getPos(), "got %#v, want primary expression", p.tok) 858 panic("unreachable") 859 } 860 861 // list = '[' ']' 862 // | '[' expr ']' 863 // | '[' expr expr_list ']' 864 // | '[' expr (FOR loop_variables IN expr)+ ']' 865 func (p *parser) parseList() Expr { 866 lbrack := p.nextToken() 867 if p.tok == RBRACK { 868 // empty List 869 rbrack := p.nextToken() 870 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack} 871 } 872 873 x := p.parseTest() 874 875 if p.tok == FOR { 876 // list comprehension 877 return p.parseComprehensionSuffix(lbrack, x, RBRACK) 878 } 879 880 exprs := []Expr{x} 881 if p.tok == COMMA { 882 // multi-item list literal 883 exprs = p.parseExprs(exprs, true) // allow trailing comma 884 } 885 886 rbrack := p.consume(RBRACK) 887 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack} 888 } 889 890 // dict = '{' '}' 891 // | '{' dict_entry_list '}' 892 // | '{' dict_entry FOR loop_variables IN expr '}' 893 func (p *parser) parseDict() Expr { 894 lbrace := p.nextToken() 895 if p.tok == RBRACE { 896 // empty dict 897 rbrace := p.nextToken() 898 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace} 899 } 900 901 x := p.parseDictEntry() 902 903 if p.tok == FOR { 904 // dict comprehension 905 return p.parseComprehensionSuffix(lbrace, x, RBRACE) 906 } 907 908 entries := []Expr{x} 909 for p.tok == COMMA { 910 p.nextToken() 911 if p.tok == RBRACE { 912 break 913 } 914 entries = append(entries, p.parseDictEntry()) 915 } 916 917 rbrace := p.consume(RBRACE) 918 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} 919 } 920 921 // dict_entry = test ':' test 922 func (p *parser) parseDictEntry() *DictEntry { 923 k := p.parseTest() 924 colon := p.consume(COLON) 925 v := p.parseTest() 926 return &DictEntry{Key: k, Colon: colon, Value: v} 927 } 928 929 // comp_suffix = FOR loopvars IN expr comp_suffix 930 // | IF expr comp_suffix 931 // | ']' or ')' (end) 932 // 933 // There can be multiple FOR/IF clauses; the first is always a FOR. 934 func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr { 935 var clauses []Node 936 for p.tok != endBrace { 937 if p.tok == FOR { 938 pos := p.nextToken() 939 vars := p.parseForLoopVariables() 940 in := p.consume(IN) 941 // Following Python 3, the operand of IN cannot be: 942 // - a conditional expression ('x if y else z'), 943 // due to conflicts in Python grammar 944 // ('if' is used by the comprehension); 945 // - a lambda expression 946 // - an unparenthesized tuple. 947 x := p.parseTestPrec(0) 948 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x}) 949 } else if p.tok == IF { 950 pos := p.nextToken() 951 cond := p.parseTestNoCond() 952 clauses = append(clauses, &IfClause{If: pos, Cond: cond}) 953 } else { 954 p.in.errorf(p.in.getPos(), "got %#v, want '%s', for, or if", p.tok, endBrace) 955 } 956 } 957 rbrace := p.nextToken() 958 959 return &Comprehension{ 960 Curly: endBrace == RBRACE, 961 Lbrack: lbrace, 962 Body: body, 963 Clauses: clauses, 964 Rbrack: rbrace, 965 } 966 } 967 968 func terminatesExprList(tok Token) bool { 969 switch tok { 970 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI: 971 return true 972 } 973 return false 974 } 975 976 // Comment assignment. 977 // We build two lists of all subnodes, preorder and postorder. 978 // The preorder list is ordered by start location, with outer nodes first. 979 // The postorder list is ordered by end location, with outer nodes last. 980 // We use the preorder list to assign each whole-line comment to the syntax 981 // immediately following it, and we use the postorder list to assign each 982 // end-of-line comment to the syntax immediately preceding it. 983 984 // flattenAST returns the list of AST nodes, both in prefix order and in postfix 985 // order. 986 func flattenAST(root Node) (pre, post []Node) { 987 stack := []Node{} 988 Walk(root, func(n Node) bool { 989 if n != nil { 990 pre = append(pre, n) 991 stack = append(stack, n) 992 } else { 993 post = append(post, stack[len(stack)-1]) 994 stack = stack[:len(stack)-1] 995 } 996 return true 997 }) 998 return pre, post 999 } 1000 1001 // assignComments attaches comments to nearby syntax. 1002 func (p *parser) assignComments(n Node) { 1003 // Leave early if there are no comments 1004 if len(p.in.getLineComments())+len(p.in.getSuffixComments()) == 0 { 1005 return 1006 } 1007 1008 pre, post := flattenAST(n) 1009 1010 // Assign line comments to syntax immediately following. 1011 line := p.in.getLineComments() 1012 for _, x := range pre { 1013 start, _ := x.Span() 1014 1015 switch x.(type) { 1016 case *File: 1017 continue 1018 } 1019 1020 for len(line) > 0 && !start.isBefore(line[0].Start) { 1021 x.AllocComments() 1022 x.Comments().Before = append(x.Comments().Before, line[0]) 1023 line = line[1:] 1024 } 1025 } 1026 1027 // Remaining line comments go at end of file. 1028 if len(line) > 0 { 1029 n.AllocComments() 1030 n.Comments().After = append(n.Comments().After, line...) 1031 } 1032 1033 // Assign suffix comments to syntax immediately before. 1034 suffix := p.in.getSuffixComments() 1035 for i := len(post) - 1; i >= 0; i-- { 1036 x := post[i] 1037 1038 // Do not assign suffix comments to file 1039 switch x.(type) { 1040 case *File: 1041 continue 1042 } 1043 1044 _, end := x.Span() 1045 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) { 1046 x.AllocComments() 1047 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1]) 1048 suffix = suffix[:len(suffix)-1] 1049 } 1050 } 1051 }