go.starlark.net@v0.0.0-20231101134539-556fd59b42f6/syntax/parse.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 // This file defines a recursive-descent parser for Starlark. 8 // The LL(1) grammar of Starlark and the names of many productions follow Python 2.7. 9 // 10 // TODO(adonovan): use syntax.Error more systematically throughout the 11 // package. Verify that error positions are correct using the 12 // chunkedfile mechanism. 13 14 import "log" 15 16 // Enable this flag to print the token stream and log.Fatal on the first error. 17 const debug = false 18 19 // A Mode value is a set of flags (or 0) that controls optional parser functionality. 20 type Mode uint 21 22 const ( 23 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments 24 ) 25 26 // Parse calls the Parse method of LegacyFileOptions(). 27 // Deprecated: relies on legacy global variables. 28 func Parse(filename string, src interface{}, mode Mode) (f *File, err error) { 29 return LegacyFileOptions().Parse(filename, src, mode) 30 } 31 32 // Parse parses the input data and returns the corresponding parse tree. 33 // 34 // If src != nil, Parse parses the source from src and the filename 35 // is only used when recording position information. 36 // The type of the argument for the src parameter must be string, 37 // []byte, io.Reader, or FilePortion. 38 // If src == nil, Parse parses the file specified by filename. 39 func (opts *FileOptions) Parse(filename string, src interface{}, mode Mode) (f *File, err error) { 40 in, err := newScanner(filename, src, mode&RetainComments != 0) 41 if err != nil { 42 return nil, err 43 } 44 p := parser{options: opts, in: in} 45 defer p.in.recover(&err) 46 47 p.nextToken() // read first lookahead token 48 f = p.parseFile() 49 if f != nil { 50 f.Path = filename 51 } 52 p.assignComments(f) 53 return f, nil 54 } 55 56 // ParseCompoundStmt calls the ParseCompoundStmt method of LegacyFileOptions(). 57 // Deprecated: relies on legacy global variables. 58 func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) { 59 return LegacyFileOptions().ParseCompoundStmt(filename, readline) 60 } 61 62 // ParseCompoundStmt parses a single compound statement: 63 // a blank line, a def, for, while, or if statement, or a 64 // semicolon-separated list of simple statements followed 65 // by a newline. These are the units on which the REPL operates. 66 // ParseCompoundStmt does not consume any following input. 67 // The parser calls the readline function each 68 // time it needs a new line of input. 69 func (opts *FileOptions) ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) { 70 in, err := newScanner(filename, readline, false) 71 if err != nil { 72 return nil, err 73 } 74 75 p := parser{options: opts, in: in} 76 defer p.in.recover(&err) 77 78 p.nextToken() // read first lookahead token 79 80 var stmts []Stmt 81 switch p.tok { 82 case DEF, IF, FOR, WHILE: 83 stmts = p.parseStmt(stmts) 84 case NEWLINE: 85 // blank line 86 default: 87 stmts = p.parseSimpleStmt(stmts, false) 88 // Require but don't consume newline, to avoid blocking again. 89 if p.tok != NEWLINE { 90 p.in.errorf(p.in.pos, "invalid syntax") 91 } 92 } 93 94 return &File{Options: opts, Path: filename, Stmts: stmts}, nil 95 } 96 97 // ParseExpr calls the ParseExpr method of LegacyFileOptions(). 98 // Deprecated: relies on legacy global variables. 99 func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) { 100 return LegacyFileOptions().ParseExpr(filename, src, mode) 101 } 102 103 // ParseExpr parses a Starlark expression. 104 // A comma-separated list of expressions is parsed as a tuple. 105 // See Parse for explanation of parameters. 106 func (opts *FileOptions) ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) { 107 in, err := newScanner(filename, src, mode&RetainComments != 0) 108 if err != nil { 109 return nil, err 110 } 111 p := parser{options: opts, in: in} 112 defer p.in.recover(&err) 113 114 p.nextToken() // read first lookahead token 115 116 // Use parseExpr, not parseTest, to permit an unparenthesized tuple. 117 expr = p.parseExpr(false) 118 119 // A following newline (e.g. "f()\n") appears outside any brackets, 120 // on a non-blank line, and thus results in a NEWLINE token. 121 if p.tok == NEWLINE { 122 p.nextToken() 123 } 124 125 if p.tok != EOF { 126 p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok) 127 } 128 p.assignComments(expr) 129 return expr, nil 130 } 131 132 type parser struct { 133 options *FileOptions 134 in *scanner 135 tok Token 136 tokval tokenValue 137 } 138 139 // nextToken advances the scanner and returns the position of the 140 // previous token. 141 func (p *parser) nextToken() Position { 142 oldpos := p.tokval.pos 143 p.tok = p.in.nextToken(&p.tokval) 144 // enable to see the token stream 145 if debug { 146 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos) 147 } 148 return oldpos 149 } 150 151 // file_input = (NEWLINE | stmt)* EOF 152 func (p *parser) parseFile() *File { 153 var stmts []Stmt 154 for p.tok != EOF { 155 if p.tok == NEWLINE { 156 p.nextToken() 157 continue 158 } 159 stmts = p.parseStmt(stmts) 160 } 161 return &File{Options: p.options, Stmts: stmts} 162 } 163 164 func (p *parser) parseStmt(stmts []Stmt) []Stmt { 165 if p.tok == DEF { 166 return append(stmts, p.parseDefStmt()) 167 } else if p.tok == IF { 168 return append(stmts, p.parseIfStmt()) 169 } else if p.tok == FOR { 170 return append(stmts, p.parseForStmt()) 171 } else if p.tok == WHILE { 172 return append(stmts, p.parseWhileStmt()) 173 } 174 return p.parseSimpleStmt(stmts, true) 175 } 176 177 func (p *parser) parseDefStmt() Stmt { 178 defpos := p.nextToken() // consume DEF 179 id := p.parseIdent() 180 lparen := p.consume(LPAREN) 181 params := p.parseParams() 182 rparen := p.consume(RPAREN) 183 p.consume(COLON) 184 body := p.parseSuite() 185 return &DefStmt{ 186 Def: defpos, 187 Name: id, 188 Lparen: lparen, 189 Params: params, 190 Rparen: rparen, 191 Body: body, 192 } 193 } 194 195 func (p *parser) parseIfStmt() Stmt { 196 ifpos := p.nextToken() // consume IF 197 cond := p.parseTest() 198 p.consume(COLON) 199 body := p.parseSuite() 200 ifStmt := &IfStmt{ 201 If: ifpos, 202 Cond: cond, 203 True: body, 204 } 205 tail := ifStmt 206 for p.tok == ELIF { 207 elifpos := p.nextToken() // consume ELIF 208 cond := p.parseTest() 209 p.consume(COLON) 210 body := p.parseSuite() 211 elif := &IfStmt{ 212 If: elifpos, 213 Cond: cond, 214 True: body, 215 } 216 tail.ElsePos = elifpos 217 tail.False = []Stmt{elif} 218 tail = elif 219 } 220 if p.tok == ELSE { 221 tail.ElsePos = p.nextToken() // consume ELSE 222 p.consume(COLON) 223 tail.False = p.parseSuite() 224 } 225 return ifStmt 226 } 227 228 func (p *parser) parseForStmt() Stmt { 229 forpos := p.nextToken() // consume FOR 230 vars := p.parseForLoopVariables() 231 p.consume(IN) 232 x := p.parseExpr(false) 233 p.consume(COLON) 234 body := p.parseSuite() 235 return &ForStmt{ 236 For: forpos, 237 Vars: vars, 238 X: x, 239 Body: body, 240 } 241 } 242 243 func (p *parser) parseWhileStmt() Stmt { 244 whilepos := p.nextToken() // consume WHILE 245 cond := p.parseTest() 246 p.consume(COLON) 247 body := p.parseSuite() 248 return &WhileStmt{ 249 While: whilepos, 250 Cond: cond, 251 Body: body, 252 } 253 } 254 255 // Equivalent to 'exprlist' production in Python grammar. 256 // 257 // loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA? 258 func (p *parser) parseForLoopVariables() Expr { 259 // Avoid parseExpr because it would consume the IN token 260 // following x in "for x in y: ...". 261 v := p.parsePrimaryWithSuffix() 262 if p.tok != COMMA { 263 return v 264 } 265 266 list := []Expr{v} 267 for p.tok == COMMA { 268 p.nextToken() 269 if terminatesExprList(p.tok) { 270 break 271 } 272 list = append(list, p.parsePrimaryWithSuffix()) 273 } 274 return &TupleExpr{List: list} 275 } 276 277 // simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE 278 // In REPL mode, it does not consume the NEWLINE. 279 func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt { 280 for { 281 stmts = append(stmts, p.parseSmallStmt()) 282 if p.tok != SEMI { 283 break 284 } 285 p.nextToken() // consume SEMI 286 if p.tok == NEWLINE || p.tok == EOF { 287 break 288 } 289 } 290 // EOF without NEWLINE occurs in `if x: pass`, for example. 291 if p.tok != EOF && consumeNL { 292 p.consume(NEWLINE) 293 } 294 295 return stmts 296 } 297 298 // small_stmt = RETURN expr? 299 // 300 // | PASS | BREAK | CONTINUE 301 // | LOAD ... 302 // | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign 303 // | expr 304 func (p *parser) parseSmallStmt() Stmt { 305 switch p.tok { 306 case RETURN: 307 pos := p.nextToken() // consume RETURN 308 var result Expr 309 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { 310 result = p.parseExpr(false) 311 } 312 return &ReturnStmt{Return: pos, Result: result} 313 314 case BREAK, CONTINUE, PASS: 315 tok := p.tok 316 pos := p.nextToken() // consume it 317 return &BranchStmt{Token: tok, TokenPos: pos} 318 319 case LOAD: 320 return p.parseLoadStmt() 321 } 322 323 // Assignment 324 x := p.parseExpr(false) 325 switch p.tok { 326 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ: 327 op := p.tok 328 pos := p.nextToken() // consume op 329 rhs := p.parseExpr(false) 330 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} 331 } 332 333 // Expression statement (e.g. function call, doc string). 334 return &ExprStmt{X: x} 335 } 336 337 // stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')' 338 func (p *parser) parseLoadStmt() *LoadStmt { 339 loadPos := p.nextToken() // consume LOAD 340 lparen := p.consume(LPAREN) 341 342 if p.tok != STRING { 343 p.in.errorf(p.in.pos, "first operand of load statement must be a string literal") 344 } 345 module := p.parsePrimary().(*Literal) 346 347 var from, to []*Ident 348 for p.tok != RPAREN && p.tok != EOF { 349 p.consume(COMMA) 350 if p.tok == RPAREN { 351 break // allow trailing comma 352 } 353 switch p.tok { 354 case STRING: 355 // load("module", "id") 356 // To name is same as original. 357 lit := p.parsePrimary().(*Literal) 358 id := &Ident{ 359 NamePos: lit.TokenPos.add(`"`), 360 Name: lit.Value.(string), 361 } 362 to = append(to, id) 363 from = append(from, id) 364 365 case IDENT: 366 // load("module", to="from") 367 id := p.parseIdent() 368 to = append(to, id) 369 if p.tok != EQ { 370 p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name) 371 } 372 p.consume(EQ) 373 if p.tok != STRING { 374 p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name) 375 } 376 lit := p.parsePrimary().(*Literal) 377 from = append(from, &Ident{ 378 NamePos: lit.TokenPos.add(`"`), 379 Name: lit.Value.(string), 380 }) 381 382 case RPAREN: 383 p.in.errorf(p.in.pos, "trailing comma in load statement") 384 385 default: 386 p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok) 387 } 388 } 389 rparen := p.consume(RPAREN) 390 391 if len(to) == 0 { 392 p.in.errorf(lparen, "load statement must import at least 1 symbol") 393 } 394 return &LoadStmt{ 395 Load: loadPos, 396 Module: module, 397 To: to, 398 From: from, 399 Rparen: rparen, 400 } 401 } 402 403 // suite is typically what follows a COLON (e.g. after DEF or FOR). 404 // suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT 405 func (p *parser) parseSuite() []Stmt { 406 if p.tok == NEWLINE { 407 p.nextToken() // consume NEWLINE 408 p.consume(INDENT) 409 var stmts []Stmt 410 for p.tok != OUTDENT && p.tok != EOF { 411 stmts = p.parseStmt(stmts) 412 } 413 p.consume(OUTDENT) 414 return stmts 415 } 416 417 return p.parseSimpleStmt(nil, true) 418 } 419 420 func (p *parser) parseIdent() *Ident { 421 if p.tok != IDENT { 422 p.in.error(p.in.pos, "not an identifier") 423 } 424 id := &Ident{ 425 NamePos: p.tokval.pos, 426 Name: p.tokval.raw, 427 } 428 p.nextToken() 429 return id 430 } 431 432 func (p *parser) consume(t Token) Position { 433 if p.tok != t { 434 p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t) 435 } 436 return p.nextToken() 437 } 438 439 // params = (param COMMA)* param COMMA? 440 // 441 // | 442 // 443 // param = IDENT 444 // 445 // | IDENT EQ test 446 // | STAR 447 // | STAR IDENT 448 // | STARSTAR IDENT 449 // 450 // parseParams parses a parameter list. The resulting expressions are of the form: 451 // 452 // *Ident x 453 // *Binary{Op: EQ, X: *Ident, Y: Expr} x=y 454 // *Unary{Op: STAR} * 455 // *Unary{Op: STAR, X: *Ident} *args 456 // *Unary{Op: STARSTAR, X: *Ident} **kwargs 457 func (p *parser) parseParams() []Expr { 458 var params []Expr 459 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF { 460 if len(params) > 0 { 461 p.consume(COMMA) 462 } 463 if p.tok == RPAREN { 464 break 465 } 466 467 // * or *args or **kwargs 468 if p.tok == STAR || p.tok == STARSTAR { 469 op := p.tok 470 pos := p.nextToken() 471 var x Expr 472 if op == STARSTAR || p.tok == IDENT { 473 x = p.parseIdent() 474 } 475 params = append(params, &UnaryExpr{ 476 OpPos: pos, 477 Op: op, 478 X: x, 479 }) 480 continue 481 } 482 483 // IDENT 484 // IDENT = test 485 id := p.parseIdent() 486 if p.tok == EQ { // default value 487 eq := p.nextToken() 488 dflt := p.parseTest() 489 params = append(params, &BinaryExpr{ 490 X: id, 491 OpPos: eq, 492 Op: EQ, 493 Y: dflt, 494 }) 495 continue 496 } 497 498 params = append(params, id) 499 } 500 return params 501 } 502 503 // parseExpr parses an expression, possible consisting of a 504 // comma-separated list of 'test' expressions. 505 // 506 // In many cases we must use parseTest to avoid ambiguity such as 507 // f(x, y) vs. f((x, y)). 508 func (p *parser) parseExpr(inParens bool) Expr { 509 x := p.parseTest() 510 if p.tok != COMMA { 511 return x 512 } 513 514 // tuple 515 exprs := p.parseExprs([]Expr{x}, inParens) 516 return &TupleExpr{List: exprs} 517 } 518 519 // parseExprs parses a comma-separated list of expressions, starting with the comma. 520 // It is used to parse tuples and list elements. 521 // expr_list = (',' expr)* ','? 522 func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr { 523 for p.tok == COMMA { 524 pos := p.nextToken() 525 if terminatesExprList(p.tok) { 526 if !allowTrailingComma { 527 p.in.error(pos, "unparenthesized tuple with trailing comma") 528 } 529 break 530 } 531 exprs = append(exprs, p.parseTest()) 532 } 533 return exprs 534 } 535 536 // parseTest parses a 'test', a single-component expression. 537 func (p *parser) parseTest() Expr { 538 if p.tok == LAMBDA { 539 return p.parseLambda(true) 540 } 541 542 x := p.parseTestPrec(0) 543 544 // conditional expression (t IF cond ELSE f) 545 if p.tok == IF { 546 ifpos := p.nextToken() 547 cond := p.parseTestPrec(0) 548 if p.tok != ELSE { 549 p.in.error(ifpos, "conditional expression without else clause") 550 } 551 elsepos := p.nextToken() 552 else_ := p.parseTest() 553 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_} 554 } 555 556 return x 557 } 558 559 // parseTestNoCond parses a a single-component expression without 560 // consuming a trailing 'if expr else expr'. 561 func (p *parser) parseTestNoCond() Expr { 562 if p.tok == LAMBDA { 563 return p.parseLambda(false) 564 } 565 return p.parseTestPrec(0) 566 } 567 568 // parseLambda parses a lambda expression. 569 // The allowCond flag allows the body to be an 'a if b else c' conditional. 570 func (p *parser) parseLambda(allowCond bool) Expr { 571 lambda := p.nextToken() 572 var params []Expr 573 if p.tok != COLON { 574 params = p.parseParams() 575 } 576 p.consume(COLON) 577 578 var body Expr 579 if allowCond { 580 body = p.parseTest() 581 } else { 582 body = p.parseTestNoCond() 583 } 584 585 return &LambdaExpr{ 586 Lambda: lambda, 587 Params: params, 588 Body: body, 589 } 590 } 591 592 func (p *parser) parseTestPrec(prec int) Expr { 593 if prec >= len(preclevels) { 594 return p.parsePrimaryWithSuffix() 595 } 596 597 // expr = NOT expr 598 if p.tok == NOT && prec == int(precedence[NOT]) { 599 pos := p.nextToken() 600 x := p.parseTestPrec(prec) 601 return &UnaryExpr{ 602 OpPos: pos, 603 Op: NOT, 604 X: x, 605 } 606 } 607 608 return p.parseBinopExpr(prec) 609 } 610 611 // expr = test (OP test)* 612 // Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing. 613 func (p *parser) parseBinopExpr(prec int) Expr { 614 x := p.parseTestPrec(prec + 1) 615 for first := true; ; first = false { 616 if p.tok == NOT { 617 p.nextToken() // consume NOT 618 // In this context, NOT must be followed by IN. 619 // Replace NOT IN by a single NOT_IN token. 620 if p.tok != IN { 621 p.in.errorf(p.in.pos, "got %#v, want in", p.tok) 622 } 623 p.tok = NOT_IN 624 } 625 626 // Binary operator of specified precedence? 627 opprec := int(precedence[p.tok]) 628 if opprec < prec { 629 return x 630 } 631 632 // Comparisons are non-associative. 633 if !first && opprec == int(precedence[EQL]) { 634 p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)", 635 x.(*BinaryExpr).Op, p.tok) 636 } 637 638 op := p.tok 639 pos := p.nextToken() 640 y := p.parseTestPrec(opprec + 1) 641 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y} 642 } 643 } 644 645 // precedence maps each operator to its precedence (0-7), or -1 for other tokens. 646 var precedence [maxToken]int8 647 648 // preclevels groups operators of equal precedence. 649 // Comparisons are nonassociative; other binary operators associate to the left. 650 // Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary. 651 // See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators 652 var preclevels = [...][]Token{ 653 {OR}, // or 654 {AND}, // and 655 {NOT}, // not (unary) 656 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in 657 {PIPE}, // | 658 {CIRCUMFLEX}, // ^ 659 {AMP}, // & 660 {LTLT, GTGT}, // << >> 661 {MINUS, PLUS}, // - 662 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / // 663 } 664 665 func init() { 666 // populate precedence table 667 for i := range precedence { 668 precedence[i] = -1 669 } 670 for level, tokens := range preclevels { 671 for _, tok := range tokens { 672 precedence[tok] = int8(level) 673 } 674 } 675 } 676 677 // primary_with_suffix = primary 678 // 679 // | primary '.' IDENT 680 // | primary slice_suffix 681 // | primary call_suffix 682 func (p *parser) parsePrimaryWithSuffix() Expr { 683 x := p.parsePrimary() 684 for { 685 switch p.tok { 686 case DOT: 687 dot := p.nextToken() 688 id := p.parseIdent() 689 x = &DotExpr{Dot: dot, X: x, Name: id} 690 case LBRACK: 691 x = p.parseSliceSuffix(x) 692 case LPAREN: 693 x = p.parseCallSuffix(x) 694 default: 695 return x 696 } 697 } 698 } 699 700 // slice_suffix = '[' expr? ':' expr? ':' expr? ']' 701 func (p *parser) parseSliceSuffix(x Expr) Expr { 702 lbrack := p.nextToken() 703 var lo, hi, step Expr 704 if p.tok != COLON { 705 y := p.parseExpr(false) 706 707 // index x[y] 708 if p.tok == RBRACK { 709 rbrack := p.nextToken() 710 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack} 711 } 712 713 lo = y 714 } 715 716 // slice or substring x[lo:hi:step] 717 if p.tok == COLON { 718 p.nextToken() 719 if p.tok != COLON && p.tok != RBRACK { 720 hi = p.parseTest() 721 } 722 } 723 if p.tok == COLON { 724 p.nextToken() 725 if p.tok != RBRACK { 726 step = p.parseTest() 727 } 728 } 729 rbrack := p.consume(RBRACK) 730 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack} 731 } 732 733 // call_suffix = '(' arg_list? ')' 734 func (p *parser) parseCallSuffix(fn Expr) Expr { 735 lparen := p.consume(LPAREN) 736 var rparen Position 737 var args []Expr 738 if p.tok == RPAREN { 739 rparen = p.nextToken() 740 } else { 741 args = p.parseArgs() 742 rparen = p.consume(RPAREN) 743 } 744 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen} 745 } 746 747 // parseArgs parses a list of actual parameter values (arguments). 748 // It mirrors the structure of parseParams. 749 // arg_list = ((arg COMMA)* arg COMMA?)? 750 func (p *parser) parseArgs() []Expr { 751 var args []Expr 752 for p.tok != RPAREN && p.tok != EOF { 753 if len(args) > 0 { 754 p.consume(COMMA) 755 } 756 if p.tok == RPAREN { 757 break 758 } 759 760 // *args or **kwargs 761 if p.tok == STAR || p.tok == STARSTAR { 762 op := p.tok 763 pos := p.nextToken() 764 x := p.parseTest() 765 args = append(args, &UnaryExpr{ 766 OpPos: pos, 767 Op: op, 768 X: x, 769 }) 770 continue 771 } 772 773 // We use a different strategy from Bazel here to stay within LL(1). 774 // Instead of looking ahead two tokens (IDENT, EQ) we parse 775 // 'test = test' then check that the first was an IDENT. 776 x := p.parseTest() 777 778 if p.tok == EQ { 779 // name = value 780 if _, ok := x.(*Ident); !ok { 781 p.in.errorf(p.in.pos, "keyword argument must have form name=expr") 782 } 783 eq := p.nextToken() 784 y := p.parseTest() 785 x = &BinaryExpr{ 786 X: x, 787 OpPos: eq, 788 Op: EQ, 789 Y: y, 790 } 791 } 792 793 args = append(args, x) 794 } 795 return args 796 } 797 798 // primary = IDENT 799 // 800 // | INT | FLOAT | STRING | BYTES 801 // | '[' ... // list literal or comprehension 802 // | '{' ... // dict literal or comprehension 803 // | '(' ... // tuple or parenthesized expression 804 // | ('-'|'+'|'~') primary_with_suffix 805 func (p *parser) parsePrimary() Expr { 806 switch p.tok { 807 case IDENT: 808 return p.parseIdent() 809 810 case INT, FLOAT, STRING, BYTES: 811 var val interface{} 812 tok := p.tok 813 switch tok { 814 case INT: 815 if p.tokval.bigInt != nil { 816 val = p.tokval.bigInt 817 } else { 818 val = p.tokval.int 819 } 820 case FLOAT: 821 val = p.tokval.float 822 case STRING, BYTES: 823 val = p.tokval.string 824 } 825 raw := p.tokval.raw 826 pos := p.nextToken() 827 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val} 828 829 case LBRACK: 830 return p.parseList() 831 832 case LBRACE: 833 return p.parseDict() 834 835 case LPAREN: 836 lparen := p.nextToken() 837 if p.tok == RPAREN { 838 // empty tuple 839 rparen := p.nextToken() 840 return &TupleExpr{Lparen: lparen, Rparen: rparen} 841 } 842 e := p.parseExpr(true) // allow trailing comma 843 rparen := p.consume(RPAREN) 844 return &ParenExpr{ 845 Lparen: lparen, 846 X: e, 847 Rparen: rparen, 848 } 849 850 case MINUS, PLUS, TILDE: // unary 851 tok := p.tok 852 pos := p.nextToken() 853 x := p.parsePrimaryWithSuffix() 854 return &UnaryExpr{ 855 OpPos: pos, 856 Op: tok, 857 X: x, 858 } 859 } 860 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) 861 panic("unreachable") 862 } 863 864 // list = '[' ']' 865 // 866 // | '[' expr ']' 867 // | '[' expr expr_list ']' 868 // | '[' expr (FOR loop_variables IN expr)+ ']' 869 func (p *parser) parseList() Expr { 870 lbrack := p.nextToken() 871 if p.tok == RBRACK { 872 // empty List 873 rbrack := p.nextToken() 874 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack} 875 } 876 877 x := p.parseTest() 878 879 if p.tok == FOR { 880 // list comprehension 881 return p.parseComprehensionSuffix(lbrack, x, RBRACK) 882 } 883 884 exprs := []Expr{x} 885 if p.tok == COMMA { 886 // multi-item list literal 887 exprs = p.parseExprs(exprs, true) // allow trailing comma 888 } 889 890 rbrack := p.consume(RBRACK) 891 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack} 892 } 893 894 // dict = '{' '}' 895 // 896 // | '{' dict_entry_list '}' 897 // | '{' dict_entry FOR loop_variables IN expr '}' 898 func (p *parser) parseDict() Expr { 899 lbrace := p.nextToken() 900 if p.tok == RBRACE { 901 // empty dict 902 rbrace := p.nextToken() 903 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace} 904 } 905 906 x := p.parseDictEntry() 907 908 if p.tok == FOR { 909 // dict comprehension 910 return p.parseComprehensionSuffix(lbrace, x, RBRACE) 911 } 912 913 entries := []Expr{x} 914 for p.tok == COMMA { 915 p.nextToken() 916 if p.tok == RBRACE { 917 break 918 } 919 entries = append(entries, p.parseDictEntry()) 920 } 921 922 rbrace := p.consume(RBRACE) 923 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} 924 } 925 926 // dict_entry = test ':' test 927 func (p *parser) parseDictEntry() *DictEntry { 928 k := p.parseTest() 929 colon := p.consume(COLON) 930 v := p.parseTest() 931 return &DictEntry{Key: k, Colon: colon, Value: v} 932 } 933 934 // comp_suffix = FOR loopvars IN expr comp_suffix 935 // 936 // | IF expr comp_suffix 937 // | ']' or ')' (end) 938 // 939 // There can be multiple FOR/IF clauses; the first is always a FOR. 940 func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr { 941 var clauses []Node 942 for p.tok != endBrace { 943 if p.tok == FOR { 944 pos := p.nextToken() 945 vars := p.parseForLoopVariables() 946 in := p.consume(IN) 947 // Following Python 3, the operand of IN cannot be: 948 // - a conditional expression ('x if y else z'), 949 // due to conflicts in Python grammar 950 // ('if' is used by the comprehension); 951 // - a lambda expression 952 // - an unparenthesized tuple. 953 x := p.parseTestPrec(0) 954 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x}) 955 } else if p.tok == IF { 956 pos := p.nextToken() 957 cond := p.parseTestNoCond() 958 clauses = append(clauses, &IfClause{If: pos, Cond: cond}) 959 } else { 960 p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace) 961 } 962 } 963 rbrace := p.nextToken() 964 965 return &Comprehension{ 966 Curly: endBrace == RBRACE, 967 Lbrack: lbrace, 968 Body: body, 969 Clauses: clauses, 970 Rbrack: rbrace, 971 } 972 } 973 974 func terminatesExprList(tok Token) bool { 975 switch tok { 976 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI: 977 return true 978 } 979 return false 980 } 981 982 // Comment assignment. 983 // We build two lists of all subnodes, preorder and postorder. 984 // The preorder list is ordered by start location, with outer nodes first. 985 // The postorder list is ordered by end location, with outer nodes last. 986 // We use the preorder list to assign each whole-line comment to the syntax 987 // immediately following it, and we use the postorder list to assign each 988 // end-of-line comment to the syntax immediately preceding it. 989 990 // flattenAST returns the list of AST nodes, both in prefix order and in postfix 991 // order. 992 func flattenAST(root Node) (pre, post []Node) { 993 stack := []Node{} 994 Walk(root, func(n Node) bool { 995 if n != nil { 996 pre = append(pre, n) 997 stack = append(stack, n) 998 } else { 999 post = append(post, stack[len(stack)-1]) 1000 stack = stack[:len(stack)-1] 1001 } 1002 return true 1003 }) 1004 return pre, post 1005 } 1006 1007 // assignComments attaches comments to nearby syntax. 1008 func (p *parser) assignComments(n Node) { 1009 // Leave early if there are no comments 1010 if len(p.in.lineComments)+len(p.in.suffixComments) == 0 { 1011 return 1012 } 1013 1014 pre, post := flattenAST(n) 1015 1016 // Assign line comments to syntax immediately following. 1017 line := p.in.lineComments 1018 for _, x := range pre { 1019 start, _ := x.Span() 1020 1021 switch x.(type) { 1022 case *File: 1023 continue 1024 } 1025 1026 for len(line) > 0 && !start.isBefore(line[0].Start) { 1027 x.AllocComments() 1028 x.Comments().Before = append(x.Comments().Before, line[0]) 1029 line = line[1:] 1030 } 1031 } 1032 1033 // Remaining line comments go at end of file. 1034 if len(line) > 0 { 1035 n.AllocComments() 1036 n.Comments().After = append(n.Comments().After, line...) 1037 } 1038 1039 // Assign suffix comments to syntax immediately before. 1040 suffix := p.in.suffixComments 1041 for i := len(post) - 1; i >= 0; i-- { 1042 x := post[i] 1043 1044 // Do not assign suffix comments to file 1045 switch x.(type) { 1046 case *File: 1047 continue 1048 } 1049 1050 _, end := x.Span() 1051 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) { 1052 x.AllocComments() 1053 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1]) 1054 suffix = suffix[:len(suffix)-1] 1055 } 1056 } 1057 }