github.com/google/skylark@v0.0.0-20181101142754-a5f7082aabed/syntax/parse.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 // This file defines a recursive-descent parser for Skylark. 8 // The LL(1) grammar of Skylark and the names of many productions follow Python 2.7. 9 // 10 // TODO(adonovan): use syntax.Error more systematically throughout the 11 // package. Verify that error positions are correct using the 12 // chunkedfile mechanism. 13 14 import log "log" 15 16 // Enable this flag to print the token stream and log.Fatal on the first error. 17 const debug = false 18 19 // A Mode value is a set of flags (or 0) that controls optional parser functionality. 20 type Mode uint 21 22 const ( 23 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments 24 ) 25 26 // Parse parses the input data and returns the corresponding parse tree. 27 // 28 // If src != nil, ParseFile parses the source from src and the filename 29 // is only used when recording position information. 30 // The type of the argument for the src parameter must be string, 31 // []byte, or io.Reader. 32 // If src == nil, ParseFile parses the file specified by filename. 33 func Parse(filename string, src interface{}, mode Mode) (f *File, err error) { 34 in, err := newScanner(filename, src, mode&RetainComments != 0) 35 if err != nil { 36 return nil, err 37 } 38 p := parser{in: in} 39 defer p.in.recover(&err) 40 41 p.nextToken() // read first lookahead token 42 f = p.parseFile() 43 if f != nil { 44 f.Path = filename 45 } 46 p.assignComments(f) 47 return f, nil 48 } 49 50 // ParseExpr parses a Skylark expression. 51 // See Parse for explanation of parameters. 52 func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) { 53 in, err := newScanner(filename, src, mode&RetainComments != 0) 54 if err != nil { 55 return nil, err 56 } 57 p := parser{in: in} 58 defer p.in.recover(&err) 59 60 p.nextToken() // read first lookahead token 61 expr = p.parseTest() 62 63 // A following newline (e.g. "f()\n") appears outside any brackets, 64 // on a non-blank line, and thus results in a NEWLINE token. 65 if p.tok == NEWLINE { 66 p.nextToken() 67 } 68 69 if p.tok != EOF { 70 p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok) 71 } 72 p.assignComments(expr) 73 return expr, nil 74 } 75 76 type parser struct { 77 in *scanner 78 tok Token 79 tokval tokenValue 80 } 81 82 // nextToken advances the scanner and returns the position of the 83 // previous token. 84 func (p *parser) nextToken() Position { 85 oldpos := p.tokval.pos 86 p.tok = p.in.nextToken(&p.tokval) 87 // enable to see the token stream 88 if debug { 89 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos) 90 } 91 return oldpos 92 } 93 94 // file_input = (NEWLINE | stmt)* EOF 95 func (p *parser) parseFile() *File { 96 var stmts []Stmt 97 for p.tok != EOF { 98 if p.tok == NEWLINE { 99 p.nextToken() 100 continue 101 } 102 stmts = p.parseStmt(stmts) 103 } 104 return &File{Stmts: stmts} 105 } 106 107 func (p *parser) parseStmt(stmts []Stmt) []Stmt { 108 if p.tok == DEF { 109 return append(stmts, p.parseDefStmt()) 110 } else if p.tok == IF { 111 return append(stmts, p.parseIfStmt()) 112 } else if p.tok == FOR { 113 return append(stmts, p.parseForStmt()) 114 } 115 return p.parseSimpleStmt(stmts) 116 } 117 118 func (p *parser) parseDefStmt() Stmt { 119 defpos := p.nextToken() // consume DEF 120 id := p.parseIdent() 121 p.consume(LPAREN) 122 params := p.parseParams() 123 p.consume(RPAREN) 124 p.consume(COLON) 125 body := p.parseSuite() 126 return &DefStmt{ 127 Def: defpos, 128 Name: id, 129 Function: Function{ 130 StartPos: defpos, 131 Params: params, 132 Body: body, 133 }, 134 } 135 } 136 137 func (p *parser) parseIfStmt() Stmt { 138 ifpos := p.nextToken() // consume IF 139 cond := p.parseTest() 140 p.consume(COLON) 141 body := p.parseSuite() 142 ifStmt := &IfStmt{ 143 If: ifpos, 144 Cond: cond, 145 True: body, 146 } 147 tail := ifStmt 148 for p.tok == ELIF { 149 elifpos := p.nextToken() // consume ELIF 150 cond := p.parseTest() 151 p.consume(COLON) 152 body := p.parseSuite() 153 elif := &IfStmt{ 154 If: elifpos, 155 Cond: cond, 156 True: body, 157 } 158 tail.ElsePos = elifpos 159 tail.False = []Stmt{elif} 160 tail = elif 161 } 162 if p.tok == ELSE { 163 tail.ElsePos = p.nextToken() // consume ELSE 164 p.consume(COLON) 165 tail.False = p.parseSuite() 166 } 167 return ifStmt 168 } 169 170 func (p *parser) parseForStmt() Stmt { 171 forpos := p.nextToken() // consume FOR 172 vars := p.parseForLoopVariables() 173 p.consume(IN) 174 x := p.parseExpr(false) 175 p.consume(COLON) 176 body := p.parseSuite() 177 return &ForStmt{ 178 For: forpos, 179 Vars: vars, 180 X: x, 181 Body: body, 182 } 183 } 184 185 // Equivalent to 'exprlist' production in Python grammar. 186 // 187 // loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA? 188 func (p *parser) parseForLoopVariables() Expr { 189 // Avoid parseExpr because it would consume the IN token 190 // following x in "for x in y: ...". 191 v := p.parsePrimaryWithSuffix() 192 if p.tok != COMMA { 193 return v 194 } 195 196 list := []Expr{v} 197 for p.tok == COMMA { 198 p.nextToken() 199 if terminatesExprList(p.tok) { 200 break 201 } 202 list = append(list, p.parsePrimaryWithSuffix()) 203 } 204 return &TupleExpr{List: list} 205 } 206 207 // simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE 208 func (p *parser) parseSimpleStmt(stmts []Stmt) []Stmt { 209 for { 210 stmts = append(stmts, p.parseSmallStmt()) 211 if p.tok != SEMI { 212 break 213 } 214 p.nextToken() // consume SEMI 215 if p.tok == NEWLINE || p.tok == EOF { 216 break 217 } 218 } 219 // EOF without NEWLINE occurs in `if x: pass`, for example. 220 if p.tok != EOF { 221 p.consume(NEWLINE) 222 } 223 return stmts 224 } 225 226 // small_stmt = RETURN expr? 227 // | PASS | BREAK | CONTINUE 228 // | LOAD ... 229 // | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign 230 // | expr 231 func (p *parser) parseSmallStmt() Stmt { 232 switch p.tok { 233 case RETURN: 234 pos := p.nextToken() // consume RETURN 235 var result Expr 236 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { 237 result = p.parseExpr(false) 238 } 239 return &ReturnStmt{Return: pos, Result: result} 240 241 case BREAK, CONTINUE, PASS: 242 tok := p.tok 243 pos := p.nextToken() // consume it 244 return &BranchStmt{Token: tok, TokenPos: pos} 245 246 case LOAD: 247 return p.parseLoadStmt() 248 } 249 250 // Assignment 251 x := p.parseExpr(false) 252 switch p.tok { 253 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ: 254 op := p.tok 255 pos := p.nextToken() // consume op 256 rhs := p.parseExpr(false) 257 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} 258 } 259 260 // Expression statement (e.g. function call, doc string). 261 return &ExprStmt{X: x} 262 } 263 264 // stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')' 265 func (p *parser) parseLoadStmt() *LoadStmt { 266 loadPos := p.nextToken() // consume LOAD 267 lparen := p.consume(LPAREN) 268 269 if p.tok != STRING { 270 p.in.errorf(p.in.pos, "first operand of load statement must be a string literal") 271 } 272 module := p.parsePrimary().(*Literal) 273 274 var from, to []*Ident 275 for p.tok != RPAREN && p.tok != EOF { 276 p.consume(COMMA) 277 if p.tok == RPAREN { 278 break // allow trailing comma 279 } 280 switch p.tok { 281 case STRING: 282 // load("module", "id") 283 // To name is same as original. 284 lit := p.parsePrimary().(*Literal) 285 id := &Ident{ 286 NamePos: lit.TokenPos.add(`"`), 287 Name: lit.Value.(string), 288 } 289 to = append(to, id) 290 from = append(from, id) 291 292 case IDENT: 293 // load("module", to="from") 294 id := p.parseIdent() 295 to = append(to, id) 296 if p.tok != EQ { 297 p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name) 298 } 299 p.consume(EQ) 300 if p.tok != STRING { 301 p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name) 302 } 303 lit := p.parsePrimary().(*Literal) 304 from = append(from, &Ident{ 305 NamePos: lit.TokenPos.add(`"`), 306 Name: lit.Value.(string), 307 }) 308 309 case RPAREN: 310 p.in.errorf(p.in.pos, "trailing comma in load statement") 311 312 default: 313 p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok) 314 } 315 } 316 rparen := p.consume(RPAREN) 317 318 if len(to) == 0 { 319 p.in.errorf(lparen, "load statement must import at least 1 symbol") 320 } 321 return &LoadStmt{ 322 Load: loadPos, 323 Module: module, 324 To: to, 325 From: from, 326 Rparen: rparen, 327 } 328 } 329 330 // suite is typically what follows a COLON (e.g. after DEF or FOR). 331 // suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT 332 func (p *parser) parseSuite() []Stmt { 333 if p.tok == NEWLINE { 334 p.nextToken() // consume NEWLINE 335 p.consume(INDENT) 336 var stmts []Stmt 337 for p.tok != OUTDENT && p.tok != EOF { 338 stmts = p.parseStmt(stmts) 339 } 340 p.consume(OUTDENT) 341 return stmts 342 } 343 344 return p.parseSimpleStmt(nil) 345 } 346 347 func (p *parser) parseIdent() *Ident { 348 if p.tok != IDENT { 349 p.in.error(p.in.pos, "not an identifier") 350 } 351 id := &Ident{ 352 NamePos: p.tokval.pos, 353 Name: p.tokval.raw, 354 } 355 p.nextToken() 356 return id 357 } 358 359 func (p *parser) consume(t Token) Position { 360 if p.tok != t { 361 p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t) 362 } 363 return p.nextToken() 364 } 365 366 // params = (param COMMA)* param 367 // | 368 // 369 // param = IDENT 370 // | IDENT EQ test 371 // | STAR IDENT 372 // | STARSTAR IDENT 373 // 374 // parseParams parses a parameter list. The resulting expressions are of the form: 375 // 376 // *Ident 377 // *Binary{Op: EQ, X: *Ident, Y: Expr} 378 // *Unary{Op: STAR, X: *Ident} 379 // *Unary{Op: STARSTAR, X: *Ident} 380 func (p *parser) parseParams() []Expr { 381 var params []Expr 382 stars := false 383 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF { 384 if len(params) > 0 { 385 p.consume(COMMA) 386 } 387 if p.tok == RPAREN { 388 // list can end with a COMMA if there is neither * nor ** 389 if stars { 390 p.in.errorf(p.in.pos, "got %#v, want parameter", p.tok) 391 } 392 break 393 } 394 395 // *args 396 if p.tok == STAR { 397 stars = true 398 pos := p.nextToken() 399 id := p.parseIdent() 400 params = append(params, &UnaryExpr{ 401 OpPos: pos, 402 Op: STAR, 403 X: id, 404 }) 405 continue 406 } 407 408 // **kwargs 409 if p.tok == STARSTAR { 410 stars = true 411 pos := p.nextToken() 412 id := p.parseIdent() 413 params = append(params, &UnaryExpr{ 414 OpPos: pos, 415 Op: STARSTAR, 416 X: id, 417 }) 418 continue 419 } 420 421 // IDENT 422 // IDENT = test 423 id := p.parseIdent() 424 if p.tok == EQ { // default value 425 eq := p.nextToken() 426 dflt := p.parseTest() 427 params = append(params, &BinaryExpr{ 428 X: id, 429 OpPos: eq, 430 Op: EQ, 431 Y: dflt, 432 }) 433 continue 434 } 435 436 params = append(params, id) 437 } 438 return params 439 } 440 441 // parseExpr parses an expression, possible consisting of a 442 // comma-separated list of 'test' expressions. 443 // 444 // In many cases we must use parseTest to avoid ambiguity such as 445 // f(x, y) vs. f((x, y)). 446 func (p *parser) parseExpr(inParens bool) Expr { 447 x := p.parseTest() 448 if p.tok != COMMA { 449 return x 450 } 451 452 // tuple 453 exprs := p.parseExprs([]Expr{x}, inParens) 454 return &TupleExpr{List: exprs} 455 } 456 457 // parseExprs parses a comma-separated list of expressions, starting with the comma. 458 // It is used to parse tuples and list elements. 459 // expr_list = (',' expr)* ','? 460 func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr { 461 for p.tok == COMMA { 462 pos := p.nextToken() 463 if terminatesExprList(p.tok) { 464 if !allowTrailingComma { 465 p.in.error(pos, "unparenthesized tuple with trailing comma") 466 } 467 break 468 } 469 exprs = append(exprs, p.parseTest()) 470 } 471 return exprs 472 } 473 474 // parseTest parses a 'test', a single-component expression. 475 func (p *parser) parseTest() Expr { 476 if p.tok == LAMBDA { 477 return p.parseLambda(true) 478 } 479 480 x := p.parseTestPrec(0) 481 482 // conditional expression (t IF cond ELSE f) 483 if p.tok == IF { 484 ifpos := p.nextToken() 485 cond := p.parseTestPrec(0) 486 if p.tok != ELSE { 487 p.in.error(ifpos, "conditional expression without else clause") 488 } 489 elsepos := p.nextToken() 490 else_ := p.parseTest() 491 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_} 492 } 493 494 return x 495 } 496 497 // parseTestNoCond parses a a single-component expression without 498 // consuming a trailing 'if expr else expr'. 499 func (p *parser) parseTestNoCond() Expr { 500 if p.tok == LAMBDA { 501 return p.parseLambda(false) 502 } 503 return p.parseTestPrec(0) 504 } 505 506 // parseLambda parses a lambda expression. 507 // The allowCond flag allows the body to be an 'a if b else c' conditional. 508 func (p *parser) parseLambda(allowCond bool) Expr { 509 lambda := p.nextToken() 510 var params []Expr 511 if p.tok != COLON { 512 params = p.parseParams() 513 } 514 p.consume(COLON) 515 516 var body Expr 517 if allowCond { 518 body = p.parseTest() 519 } else { 520 body = p.parseTestNoCond() 521 } 522 523 return &LambdaExpr{ 524 Lambda: lambda, 525 Function: Function{ 526 StartPos: lambda, 527 Params: params, 528 Body: []Stmt{&ReturnStmt{Result: body}}, 529 }, 530 } 531 } 532 533 func (p *parser) parseTestPrec(prec int) Expr { 534 if prec >= len(preclevels) { 535 return p.parsePrimaryWithSuffix() 536 } 537 538 // expr = NOT expr 539 if p.tok == NOT && prec == int(precedence[NOT]) { 540 pos := p.nextToken() 541 x := p.parseTestPrec(prec) 542 return &UnaryExpr{ 543 OpPos: pos, 544 Op: NOT, 545 X: x, 546 } 547 } 548 549 return p.parseBinopExpr(prec) 550 } 551 552 // expr = test (OP test)* 553 // Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing. 554 func (p *parser) parseBinopExpr(prec int) Expr { 555 x := p.parseTestPrec(prec + 1) 556 for first := true; ; first = false { 557 if p.tok == NOT { 558 p.nextToken() // consume NOT 559 // In this context, NOT must be followed by IN. 560 // Replace NOT IN by a single NOT_IN token. 561 if p.tok != IN { 562 p.in.errorf(p.in.pos, "got %#v, want in", p.tok) 563 } 564 p.tok = NOT_IN 565 } 566 567 // Binary operator of specified precedence? 568 opprec := int(precedence[p.tok]) 569 if opprec < prec { 570 return x 571 } 572 573 // Comparisons are non-associative. 574 if !first && opprec == int(precedence[EQL]) { 575 p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)", 576 x.(*BinaryExpr).Op, p.tok) 577 } 578 579 op := p.tok 580 pos := p.nextToken() 581 y := p.parseTestPrec(opprec + 1) 582 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y} 583 } 584 } 585 586 // precedence maps each operator to its precedence (0-7), or -1 for other tokens. 587 var precedence [maxToken]int8 588 589 // preclevels groups operators of equal precedence. 590 // Comparisons are nonassociative; other binary operators associate to the left. 591 // Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary. 592 // See https://github.com/google/skylark/blob/master/doc/spec.md#binary-operators 593 var preclevels = [...][]Token{ 594 {OR}, // or 595 {AND}, // and 596 {NOT}, // not (unary) 597 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in 598 {PIPE}, // | 599 {CIRCUMFLEX}, // ^ 600 {AMP}, // & 601 {LTLT, GTGT}, // << >> 602 {MINUS, PLUS}, // - 603 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / // 604 } 605 606 func init() { 607 // populate precedence table 608 for i := range precedence { 609 precedence[i] = -1 610 } 611 for level, tokens := range preclevels { 612 for _, tok := range tokens { 613 precedence[tok] = int8(level) 614 } 615 } 616 } 617 618 // primary_with_suffix = primary 619 // | primary '.' IDENT 620 // | primary slice_suffix 621 // | primary call_suffix 622 func (p *parser) parsePrimaryWithSuffix() Expr { 623 x := p.parsePrimary() 624 for { 625 switch p.tok { 626 case DOT: 627 dot := p.nextToken() 628 id := p.parseIdent() 629 x = &DotExpr{Dot: dot, X: x, Name: id} 630 case LBRACK: 631 x = p.parseSliceSuffix(x) 632 case LPAREN: 633 x = p.parseCallSuffix(x) 634 default: 635 return x 636 } 637 } 638 } 639 640 // slice_suffix = '[' expr? ':' expr? ':' expr? ']' 641 func (p *parser) parseSliceSuffix(x Expr) Expr { 642 lbrack := p.nextToken() 643 var lo, hi, step Expr 644 if p.tok != COLON { 645 y := p.parseExpr(false) 646 647 // index x[y] 648 if p.tok == RBRACK { 649 rbrack := p.nextToken() 650 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack} 651 } 652 653 lo = y 654 } 655 656 // slice or substring x[lo:hi:step] 657 if p.tok == COLON { 658 p.nextToken() 659 if p.tok != COLON && p.tok != RBRACK { 660 hi = p.parseTest() 661 } 662 } 663 if p.tok == COLON { 664 p.nextToken() 665 if p.tok != RBRACK { 666 step = p.parseTest() 667 } 668 } 669 rbrack := p.consume(RBRACK) 670 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack} 671 } 672 673 // call_suffix = '(' arg_list? ')' 674 func (p *parser) parseCallSuffix(fn Expr) Expr { 675 lparen := p.consume(LPAREN) 676 var rparen Position 677 var args []Expr 678 if p.tok == RPAREN { 679 rparen = p.nextToken() 680 } else { 681 args = p.parseArgs() 682 rparen = p.consume(RPAREN) 683 } 684 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen} 685 } 686 687 // parseArgs parses a list of actual parameter values (arguments). 688 // It mirrors the structure of parseParams. 689 // arg_list = ((arg COMMA)* arg COMMA?)? 690 func (p *parser) parseArgs() []Expr { 691 var args []Expr 692 stars := false 693 for p.tok != RPAREN && p.tok != EOF { 694 if len(args) > 0 { 695 p.consume(COMMA) 696 } 697 if p.tok == RPAREN { 698 // list can end with a COMMA if there is neither * nor ** 699 if stars { 700 p.in.errorf(p.in.pos, `got %#v, want argument`, p.tok) 701 } 702 break 703 } 704 705 // *args 706 if p.tok == STAR { 707 stars = true 708 pos := p.nextToken() 709 x := p.parseTest() 710 args = append(args, &UnaryExpr{ 711 OpPos: pos, 712 Op: STAR, 713 X: x, 714 }) 715 continue 716 } 717 718 // **kwargs 719 if p.tok == STARSTAR { 720 stars = true 721 pos := p.nextToken() 722 x := p.parseTest() 723 args = append(args, &UnaryExpr{ 724 OpPos: pos, 725 Op: STARSTAR, 726 X: x, 727 }) 728 continue 729 } 730 731 // We use a different strategy from Bazel here to stay within LL(1). 732 // Instead of looking ahead two tokens (IDENT, EQ) we parse 733 // 'test = test' then check that the first was an IDENT. 734 x := p.parseTest() 735 736 if p.tok == EQ { 737 // name = value 738 if _, ok := x.(*Ident); !ok { 739 p.in.errorf(p.in.pos, "keyword argument must have form name=expr") 740 } 741 eq := p.nextToken() 742 y := p.parseTest() 743 x = &BinaryExpr{ 744 X: x, 745 OpPos: eq, 746 Op: EQ, 747 Y: y, 748 } 749 } 750 751 args = append(args, x) 752 } 753 return args 754 } 755 756 // primary = IDENT 757 // | INT | FLOAT 758 // | STRING 759 // | '[' ... // list literal or comprehension 760 // | '{' ... // dict literal or comprehension 761 // | '(' ... // tuple or parenthesized expression 762 // | ('-'|'+'|'~') primary_with_suffix 763 func (p *parser) parsePrimary() Expr { 764 switch p.tok { 765 case IDENT: 766 return p.parseIdent() 767 768 case INT, FLOAT, STRING: 769 var val interface{} 770 tok := p.tok 771 switch tok { 772 case INT: 773 if p.tokval.bigInt != nil { 774 val = p.tokval.bigInt 775 } else { 776 val = p.tokval.int 777 } 778 case FLOAT: 779 val = p.tokval.float 780 case STRING: 781 val = p.tokval.string 782 } 783 raw := p.tokval.raw 784 pos := p.nextToken() 785 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val} 786 787 case LBRACK: 788 return p.parseList() 789 790 case LBRACE: 791 return p.parseDict() 792 793 case LPAREN: 794 lparen := p.nextToken() 795 if p.tok == RPAREN { 796 // empty tuple 797 rparen := p.nextToken() 798 return &TupleExpr{Lparen: lparen, Rparen: rparen} 799 } 800 e := p.parseExpr(true) // allow trailing comma 801 rparen := p.consume(RPAREN) 802 return &ParenExpr{ 803 Lparen: lparen, 804 X: e, 805 Rparen: rparen, 806 } 807 808 case MINUS, PLUS, TILDE: // unary 809 tok := p.tok 810 pos := p.nextToken() 811 x := p.parsePrimaryWithSuffix() 812 return &UnaryExpr{ 813 OpPos: pos, 814 Op: tok, 815 X: x, 816 } 817 } 818 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) 819 panic("unreachable") 820 } 821 822 // list = '[' ']' 823 // | '[' expr ']' 824 // | '[' expr expr_list ']' 825 // | '[' expr (FOR loop_variables IN expr)+ ']' 826 func (p *parser) parseList() Expr { 827 lbrack := p.nextToken() 828 if p.tok == RBRACK { 829 // empty List 830 rbrack := p.nextToken() 831 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack} 832 } 833 834 x := p.parseTest() 835 836 if p.tok == FOR { 837 // list comprehension 838 return p.parseComprehensionSuffix(lbrack, x, RBRACK) 839 } 840 841 exprs := []Expr{x} 842 if p.tok == COMMA { 843 // multi-item list literal 844 exprs = p.parseExprs(exprs, true) // allow trailing comma 845 } 846 847 rbrack := p.consume(RBRACK) 848 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack} 849 } 850 851 // dict = '{' '}' 852 // | '{' dict_entry_list '}' 853 // | '{' dict_entry FOR loop_variables IN expr '}' 854 func (p *parser) parseDict() Expr { 855 lbrace := p.nextToken() 856 if p.tok == RBRACE { 857 // empty dict 858 rbrace := p.nextToken() 859 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace} 860 } 861 862 x := p.parseDictEntry() 863 864 if p.tok == FOR { 865 // dict comprehension 866 return p.parseComprehensionSuffix(lbrace, x, RBRACE) 867 } 868 869 entries := []Expr{x} 870 for p.tok == COMMA { 871 p.nextToken() 872 if p.tok == RBRACE { 873 break 874 } 875 entries = append(entries, p.parseDictEntry()) 876 } 877 878 rbrace := p.consume(RBRACE) 879 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} 880 } 881 882 // dict_entry = test ':' test 883 func (p *parser) parseDictEntry() *DictEntry { 884 k := p.parseTest() 885 colon := p.consume(COLON) 886 v := p.parseTest() 887 return &DictEntry{Key: k, Colon: colon, Value: v} 888 } 889 890 // comp_suffix = FOR loopvars IN expr comp_suffix 891 // | IF expr comp_suffix 892 // | ']' or ')' (end) 893 // 894 // There can be multiple FOR/IF clauses; the first is always a FOR. 895 func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr { 896 var clauses []Node 897 for p.tok != endBrace { 898 if p.tok == FOR { 899 pos := p.nextToken() 900 vars := p.parseForLoopVariables() 901 in := p.consume(IN) 902 // Following Python 3, the operand of IN cannot be: 903 // - a conditional expression ('x if y else z'), 904 // due to conflicts in Python grammar 905 // ('if' is used by the comprehension); 906 // - a lambda expression 907 // - an unparenthesized tuple. 908 x := p.parseTestPrec(0) 909 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x}) 910 } else if p.tok == IF { 911 pos := p.nextToken() 912 cond := p.parseTestNoCond() 913 clauses = append(clauses, &IfClause{If: pos, Cond: cond}) 914 } else { 915 p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace) 916 } 917 } 918 rbrace := p.nextToken() 919 920 return &Comprehension{ 921 Curly: endBrace == RBRACE, 922 Lbrack: lbrace, 923 Body: body, 924 Clauses: clauses, 925 Rbrack: rbrace, 926 } 927 } 928 929 func terminatesExprList(tok Token) bool { 930 switch tok { 931 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI: 932 return true 933 } 934 return false 935 } 936 937 // Comment assignment. 938 // We build two lists of all subnodes, preorder and postorder. 939 // The preorder list is ordered by start location, with outer nodes first. 940 // The postorder list is ordered by end location, with outer nodes last. 941 // We use the preorder list to assign each whole-line comment to the syntax 942 // immediately following it, and we use the postorder list to assign each 943 // end-of-line comment to the syntax immediately preceding it. 944 945 // flattenAST returns the list of AST nodes, both in prefix order and in postfix 946 // order. 947 func flattenAST(root Node) (pre, post []Node) { 948 stack := []Node{} 949 Walk(root, func(n Node) bool { 950 if n != nil { 951 pre = append(pre, n) 952 stack = append(stack, n) 953 } else { 954 post = append(post, stack[len(stack)-1]) 955 stack = stack[:len(stack)-1] 956 } 957 return true 958 }) 959 return pre, post 960 } 961 962 // assignComments attaches comments to nearby syntax. 963 func (p *parser) assignComments(n Node) { 964 // Leave early if there are no comments 965 if len(p.in.lineComments)+len(p.in.suffixComments) == 0 { 966 return 967 } 968 969 pre, post := flattenAST(n) 970 971 // Assign line comments to syntax immediately following. 972 line := p.in.lineComments 973 for _, x := range pre { 974 start, _ := x.Span() 975 976 switch x.(type) { 977 case *File: 978 continue 979 } 980 981 for len(line) > 0 && !start.isBefore(line[0].Start) { 982 x.AllocComments() 983 x.Comments().Before = append(x.Comments().Before, line[0]) 984 line = line[1:] 985 } 986 } 987 988 // Remaining line comments go at end of file. 989 if len(line) > 0 { 990 n.AllocComments() 991 n.Comments().After = append(n.Comments().After, line...) 992 } 993 994 // Assign suffix comments to syntax immediately before. 995 suffix := p.in.suffixComments 996 for i := len(post) - 1; i >= 0; i-- { 997 x := post[i] 998 999 // Do not assign suffix comments to file 1000 switch x.(type) { 1001 case *File: 1002 continue 1003 } 1004 1005 _, end := x.Span() 1006 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) { 1007 x.AllocComments() 1008 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1]) 1009 suffix = suffix[:len(suffix)-1] 1010 } 1011 } 1012 }