github.com/k14s/starlark-go@v0.0.0-20200720175618-3a5c849cc368/syntax/parse.go (about)

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  // This file defines a recursive-descent parser for Starlark.
     8  // The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
     9  //
    10  // TODO(adonovan): use syntax.Error more systematically throughout the
    11  // package.  Verify that error positions are correct using the
    12  // chunkedfile mechanism.
    13  
    14  import "log"
    15  
    16  // Enable this flag to print the token stream and log.Fatal on the first error.
    17  const debug = false
    18  
    19  // A Mode value is a set of flags (or 0) that controls optional parser functionality.
    20  type Mode uint
    21  
    22  const (
    23  	RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
    24  	BlockScanner   Mode = 1 << iota // use if/end syntax instead of indent
    25  )
    26  
    27  // Parse parses the input data and returns the corresponding parse tree.
    28  //
    29  // If src != nil, ParseFile parses the source from src and the filename
    30  // is only used when recording position information.
    31  // The type of the argument for the src parameter must be string,
    32  // []byte, or io.Reader.
    33  // If src == nil, ParseFile parses the file specified by filename.
    34  func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
    35  	in, err := newScanner(filename, src, mode&RetainComments != 0)
    36  	if err != nil {
    37  		return nil, err
    38  	}
    39  	var inScanner scannerInterface = in
    40  	if (mode & BlockScanner) == BlockScanner {
    41  		inScanner = newBlockScanner(in)
    42  	}
    43  	p := parser{in: inScanner}
    44  	defer p.in.recover(&err)
    45  
    46  	p.nextToken() // read first lookahead token
    47  	f = p.parseFile()
    48  	if f != nil {
    49  		f.Path = filename
    50  	}
    51  	p.assignComments(f)
    52  	return f, nil
    53  }
    54  
    55  // ParseCompoundStmt parses a single compound statement:
    56  // a blank line, a def, for, while, or if statement, or a
    57  // semicolon-separated list of simple statements followed
    58  // by a newline. These are the units on which the REPL operates.
    59  // ParseCompoundStmt does not consume any following input.
    60  // The parser calls the readline function each
    61  // time it needs a new line of input.
    62  func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
    63  	in, err := newScanner(filename, readline, false)
    64  	if err != nil {
    65  		return nil, err
    66  	}
    67  
    68  	p := parser{in: newBlockScanner(in)}
    69  	defer p.in.recover(&err)
    70  
    71  	p.nextToken() // read first lookahead token
    72  
    73  	var stmts []Stmt
    74  	switch p.tok {
    75  	case DEF, IF, FOR, WHILE:
    76  		stmts = p.parseStmt(stmts)
    77  	case NEWLINE:
    78  		// blank line
    79  	default:
    80  		stmts = p.parseSimpleStmt(stmts, false)
    81  		// Require but don't consume newline, to avoid blocking again.
    82  		if p.tok != NEWLINE {
    83  			p.in.errorf(p.in.getPos(), "invalid syntax")
    84  		}
    85  	}
    86  
    87  	return &File{Path: filename, Stmts: stmts}, nil
    88  }
    89  
    90  // ParseExpr parses a Starlark expression.
    91  // A comma-separated list of expressions is parsed as a tuple.
    92  // See Parse for explanation of parameters.
    93  func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
    94  	in, err := newScanner(filename, src, mode&RetainComments != 0)
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	p := parser{in: newBlockScanner(in)}
    99  	defer p.in.recover(&err)
   100  
   101  	p.nextToken() // read first lookahead token
   102  
   103  	// Use parseExpr, not parseTest, to permit an unparenthesized tuple.
   104  	expr = p.parseExpr(false)
   105  
   106  	// A following newline (e.g. "f()\n") appears outside any brackets,
   107  	// on a non-blank line, and thus results in a NEWLINE token.
   108  	if p.tok == NEWLINE {
   109  		p.nextToken()
   110  	}
   111  
   112  	if p.tok != EOF {
   113  		p.in.errorf(p.in.getPos(), "got %#v after expression, want EOF", p.tok)
   114  	}
   115  	p.assignComments(expr)
   116  	return expr, nil
   117  }
   118  
   119  type parser struct {
   120  	in     scannerInterface
   121  	tok    Token
   122  	tokval tokenValue
   123  }
   124  
   125  // nextToken advances the scanner and returns the position of the
   126  // previous token.
   127  func (p *parser) nextToken() Position {
   128  	oldpos := p.tokval.pos
   129  	p.tok = p.in.nextToken(&p.tokval)
   130  	// enable to see the token stream
   131  	if debug {
   132  		log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
   133  	}
   134  	return oldpos
   135  }
   136  
   137  // file_input = (NEWLINE | stmt)* EOF
   138  func (p *parser) parseFile() *File {
   139  	var stmts []Stmt
   140  	for p.tok != EOF {
   141  		if p.tok == NEWLINE {
   142  			p.nextToken()
   143  			continue
   144  		}
   145  		stmts = p.parseStmt(stmts)
   146  	}
   147  	return &File{Stmts: stmts}
   148  }
   149  
   150  func (p *parser) parseStmt(stmts []Stmt) []Stmt {
   151  	if p.tok == DEF {
   152  		return append(stmts, p.parseDefStmt())
   153  	} else if p.tok == IF {
   154  		return append(stmts, p.parseIfStmt())
   155  	} else if p.tok == FOR {
   156  		return append(stmts, p.parseForStmt())
   157  	} else if p.tok == WHILE {
   158  		return append(stmts, p.parseWhileStmt())
   159  	}
   160  	return p.parseSimpleStmt(stmts, true)
   161  }
   162  
   163  func (p *parser) parseDefStmt() Stmt {
   164  	defpos := p.nextToken() // consume DEF
   165  	id := p.parseIdent()
   166  	p.consume(LPAREN)
   167  	params := p.parseParams()
   168  	p.consume(RPAREN)
   169  	p.consume(COLON)
   170  	body := p.parseSuite()
   171  	return &DefStmt{
   172  		Def:    defpos,
   173  		Name:   id,
   174  		Params: params,
   175  		Body:   body,
   176  	}
   177  }
   178  
   179  func (p *parser) parseIfStmt() Stmt {
   180  	ifpos := p.nextToken() // consume IF
   181  	cond := p.parseTest()
   182  	p.consume(COLON)
   183  	body := p.parseSuite()
   184  	ifStmt := &IfStmt{
   185  		If:   ifpos,
   186  		Cond: cond,
   187  		True: body,
   188  	}
   189  	tail := ifStmt
   190  	for p.tok == ELIF {
   191  		elifpos := p.nextToken() // consume ELIF
   192  		cond := p.parseTest()
   193  		p.consume(COLON)
   194  		body := p.parseSuite()
   195  		elif := &IfStmt{
   196  			If:   elifpos,
   197  			Cond: cond,
   198  			True: body,
   199  		}
   200  		tail.ElsePos = elifpos
   201  		tail.False = []Stmt{elif}
   202  		tail = elif
   203  	}
   204  	if p.tok == ELSE {
   205  		tail.ElsePos = p.nextToken() // consume ELSE
   206  		p.consume(COLON)
   207  		tail.False = p.parseSuite()
   208  	}
   209  	return ifStmt
   210  }
   211  
   212  func (p *parser) parseForStmt() Stmt {
   213  	forpos := p.nextToken() // consume FOR
   214  	vars := p.parseForLoopVariables()
   215  	p.consume(IN)
   216  	x := p.parseExpr(false)
   217  	p.consume(COLON)
   218  	body := p.parseSuite()
   219  	return &ForStmt{
   220  		For:  forpos,
   221  		Vars: vars,
   222  		X:    x,
   223  		Body: body,
   224  	}
   225  }
   226  
   227  func (p *parser) parseWhileStmt() Stmt {
   228  	whilepos := p.nextToken() // consume WHILE
   229  	cond := p.parseTest()
   230  	p.consume(COLON)
   231  	body := p.parseSuite()
   232  	return &WhileStmt{
   233  		While: whilepos,
   234  		Cond:  cond,
   235  		Body:  body,
   236  	}
   237  }
   238  
   239  // Equivalent to 'exprlist' production in Python grammar.
   240  //
   241  // loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
   242  func (p *parser) parseForLoopVariables() Expr {
   243  	// Avoid parseExpr because it would consume the IN token
   244  	// following x in "for x in y: ...".
   245  	v := p.parsePrimaryWithSuffix()
   246  	if p.tok != COMMA {
   247  		return v
   248  	}
   249  
   250  	list := []Expr{v}
   251  	for p.tok == COMMA {
   252  		p.nextToken()
   253  		if terminatesExprList(p.tok) {
   254  			break
   255  		}
   256  		list = append(list, p.parsePrimaryWithSuffix())
   257  	}
   258  	return &TupleExpr{List: list}
   259  }
   260  
   261  // simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
   262  // In REPL mode, it does not consume the NEWLINE.
   263  func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt {
   264  	for {
   265  		stmts = append(stmts, p.parseSmallStmt())
   266  		if p.tok != SEMI {
   267  			break
   268  		}
   269  		p.nextToken() // consume SEMI
   270  		if p.tok == NEWLINE || p.tok == EOF {
   271  			break
   272  		}
   273  	}
   274  	// EOF without NEWLINE occurs in `if x: pass`, for example.
   275  	if p.tok != EOF && consumeNL {
   276  		p.consume(NEWLINE)
   277  	}
   278  
   279  	return stmts
   280  }
   281  
   282  // small_stmt = RETURN expr?
   283  //            | PASS | BREAK | CONTINUE
   284  //            | LOAD ...
   285  //            | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr   // assign
   286  //            | expr
   287  func (p *parser) parseSmallStmt() Stmt {
   288  	switch p.tok {
   289  	case RETURN:
   290  		pos := p.nextToken() // consume RETURN
   291  		var result Expr
   292  		if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
   293  			result = p.parseExpr(false)
   294  		}
   295  		return &ReturnStmt{Return: pos, Result: result}
   296  
   297  	case BREAK, CONTINUE, PASS:
   298  		tok := p.tok
   299  		pos := p.nextToken() // consume it
   300  		return &BranchStmt{Token: tok, TokenPos: pos}
   301  
   302  	case LOAD:
   303  		return p.parseLoadStmt()
   304  	}
   305  
   306  	// Assignment
   307  	x := p.parseExpr(false)
   308  	switch p.tok {
   309  	case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
   310  		op := p.tok
   311  		pos := p.nextToken() // consume op
   312  		rhs := p.parseExpr(false)
   313  		return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
   314  	}
   315  
   316  	// Expression statement (e.g. function call, doc string).
   317  	return &ExprStmt{X: x}
   318  }
   319  
   320  // stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
   321  func (p *parser) parseLoadStmt() *LoadStmt {
   322  	loadPos := p.nextToken() // consume LOAD
   323  	lparen := p.consume(LPAREN)
   324  
   325  	if p.tok != STRING {
   326  		p.in.errorf(p.in.getPos(), "first operand of load statement must be a string literal")
   327  	}
   328  	module := p.parsePrimary().(*Literal)
   329  
   330  	var from, to []*Ident
   331  	for p.tok != RPAREN && p.tok != EOF {
   332  		p.consume(COMMA)
   333  		if p.tok == RPAREN {
   334  			break // allow trailing comma
   335  		}
   336  		switch p.tok {
   337  		case STRING:
   338  			// load("module", "id")
   339  			// To name is same as original.
   340  			lit := p.parsePrimary().(*Literal)
   341  			id := &Ident{
   342  				NamePos: lit.TokenPos.add(`"`),
   343  				Name:    lit.Value.(string),
   344  			}
   345  			to = append(to, id)
   346  			from = append(from, id)
   347  
   348  		case IDENT:
   349  			// load("module", to="from")
   350  			id := p.parseIdent()
   351  			to = append(to, id)
   352  			if p.tok != EQ {
   353  				p.in.errorf(p.in.getPos(), `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
   354  			}
   355  			p.consume(EQ)
   356  			if p.tok != STRING {
   357  				p.in.errorf(p.in.getPos(), `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
   358  			}
   359  			lit := p.parsePrimary().(*Literal)
   360  			from = append(from, &Ident{
   361  				NamePos: lit.TokenPos.add(`"`),
   362  				Name:    lit.Value.(string),
   363  			})
   364  
   365  		case RPAREN:
   366  			p.in.errorf(p.in.getPos(), "trailing comma in load statement")
   367  
   368  		default:
   369  			p.in.errorf(p.in.getPos(), `load operand must be "name" or localname="name" (got %#v)`, p.tok)
   370  		}
   371  	}
   372  	rparen := p.consume(RPAREN)
   373  
   374  	if len(to) == 0 {
   375  		p.in.errorf(lparen, "load statement must import at least 1 symbol")
   376  	}
   377  	return &LoadStmt{
   378  		Load:   loadPos,
   379  		Module: module,
   380  		To:     to,
   381  		From:   from,
   382  		Rparen: rparen,
   383  	}
   384  }
   385  
   386  // suite is typically what follows a COLON (e.g. after DEF or FOR).
   387  // suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
   388  func (p *parser) parseSuite() []Stmt {
   389  	if p.tok == NEWLINE {
   390  		p.nextToken() // consume NEWLINE
   391  		p.consume(INDENT)
   392  		var stmts []Stmt
   393  		for p.tok != OUTDENT && p.tok != EOF {
   394  			stmts = p.parseStmt(stmts)
   395  		}
   396  		p.consume(OUTDENT)
   397  		return stmts
   398  	}
   399  
   400  	return p.parseSimpleStmt(nil, true)
   401  }
   402  
   403  func (p *parser) parseIdent() *Ident {
   404  	if p.tok != IDENT {
   405  		for _, v := range keywordToken {
   406  			if p.tok == v {
   407  				p.in.errorf(p.in.getPos(), "use of reserved keyword '%s' is not allowed (expected identifier)", p.tokval.raw)
   408  			}
   409  		}
   410  		p.in.errorf(p.in.getPos(), "not an identifier")
   411  	}
   412  	id := &Ident{
   413  		NamePos: p.tokval.pos,
   414  		Name:    p.tokval.raw,
   415  	}
   416  	p.nextToken()
   417  	return id
   418  }
   419  
   420  func (p *parser) consume(t Token) Position {
   421  	if p.tok != t {
   422  		p.in.errorf(p.in.getPos(), "got %#v, want %#v", p.tok, t)
   423  	}
   424  	return p.nextToken()
   425  }
   426  
   427  // params = (param COMMA)* param
   428  //        |
   429  //
   430  // param = IDENT
   431  //       | IDENT EQ test
   432  //       | STAR
   433  //       | STAR IDENT
   434  //       | STARSTAR IDENT
   435  //
   436  // parseParams parses a parameter list.  The resulting expressions are of the form:
   437  //
   438  //      *Ident                                          x
   439  //      *Binary{Op: EQ, X: *Ident, Y: Expr}             x=y
   440  //      *Unary{Op: STAR}                                *
   441  //      *Unary{Op: STAR, X: *Ident}                     *args
   442  //      *Unary{Op: STARSTAR, X: *Ident}                 **kwargs
   443  func (p *parser) parseParams() []Expr {
   444  	var params []Expr
   445  	stars := false
   446  	for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
   447  		if len(params) > 0 {
   448  			p.consume(COMMA)
   449  		}
   450  		if p.tok == RPAREN {
   451  			// list can end with a COMMA if there is neither * nor **
   452  			if stars {
   453  				p.in.errorf(p.in.getPos(), "got %#v, want parameter", p.tok)
   454  			}
   455  			break
   456  		}
   457  
   458  		// * or *args or **kwargs
   459  		if p.tok == STAR || p.tok == STARSTAR {
   460  			stars = true
   461  			op := p.tok
   462  			pos := p.nextToken()
   463  			var x Expr
   464  			if op == STARSTAR || p.tok == IDENT {
   465  				x = p.parseIdent()
   466  			}
   467  			params = append(params, &UnaryExpr{
   468  				OpPos: pos,
   469  				Op:    op,
   470  				X:     x,
   471  			})
   472  			continue
   473  		}
   474  
   475  		// IDENT
   476  		// IDENT = test
   477  		id := p.parseIdent()
   478  		if p.tok == EQ { // default value
   479  			eq := p.nextToken()
   480  			dflt := p.parseTest()
   481  			params = append(params, &BinaryExpr{
   482  				X:     id,
   483  				OpPos: eq,
   484  				Op:    EQ,
   485  				Y:     dflt,
   486  			})
   487  			continue
   488  		}
   489  
   490  		params = append(params, id)
   491  	}
   492  	return params
   493  }
   494  
   495  // parseExpr parses an expression, possible consisting of a
   496  // comma-separated list of 'test' expressions.
   497  //
   498  // In many cases we must use parseTest to avoid ambiguity such as
   499  // f(x, y) vs. f((x, y)).
   500  func (p *parser) parseExpr(inParens bool) Expr {
   501  	x := p.parseTest()
   502  	if p.tok != COMMA {
   503  		return x
   504  	}
   505  
   506  	// tuple
   507  	exprs := p.parseExprs([]Expr{x}, inParens)
   508  	return &TupleExpr{List: exprs}
   509  }
   510  
   511  // parseExprs parses a comma-separated list of expressions, starting with the comma.
   512  // It is used to parse tuples and list elements.
   513  // expr_list = (',' expr)* ','?
   514  func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
   515  	for p.tok == COMMA {
   516  		pos := p.nextToken()
   517  		if terminatesExprList(p.tok) {
   518  			if !allowTrailingComma {
   519  				p.in.error(pos, "unparenthesized tuple with trailing comma")
   520  			}
   521  			break
   522  		}
   523  		exprs = append(exprs, p.parseTest())
   524  	}
   525  	return exprs
   526  }
   527  
   528  // parseTest parses a 'test', a single-component expression.
   529  func (p *parser) parseTest() Expr {
   530  	if p.tok == LAMBDA {
   531  		return p.parseLambda(true)
   532  	}
   533  
   534  	x := p.parseTestPrec(0)
   535  
   536  	// conditional expression (t IF cond ELSE f)
   537  	if p.tok == IF {
   538  		ifpos := p.nextToken()
   539  		cond := p.parseTestPrec(0)
   540  		if p.tok != ELSE {
   541  			p.in.error(ifpos, "conditional expression without else clause")
   542  		}
   543  		elsepos := p.nextToken()
   544  		else_ := p.parseTest()
   545  		return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
   546  	}
   547  
   548  	return x
   549  }
   550  
   551  // parseTestNoCond parses a a single-component expression without
   552  // consuming a trailing 'if expr else expr'.
   553  func (p *parser) parseTestNoCond() Expr {
   554  	if p.tok == LAMBDA {
   555  		return p.parseLambda(false)
   556  	}
   557  	return p.parseTestPrec(0)
   558  }
   559  
   560  // parseLambda parses a lambda expression.
   561  // The allowCond flag allows the body to be an 'a if b else c' conditional.
   562  func (p *parser) parseLambda(allowCond bool) Expr {
   563  	lambda := p.nextToken()
   564  	var params []Expr
   565  	if p.tok != COLON {
   566  		params = p.parseParams()
   567  	}
   568  	p.consume(COLON)
   569  
   570  	var body Expr
   571  	if allowCond {
   572  		body = p.parseTest()
   573  	} else {
   574  		body = p.parseTestNoCond()
   575  	}
   576  
   577  	return &LambdaExpr{
   578  		Lambda: lambda,
   579  		Params: params,
   580  		Body:   body,
   581  	}
   582  }
   583  
   584  func (p *parser) parseTestPrec(prec int) Expr {
   585  	if prec >= len(preclevels) {
   586  		return p.parsePrimaryWithSuffix()
   587  	}
   588  
   589  	// expr = NOT expr
   590  	if p.tok == NOT && prec == int(precedence[NOT]) {
   591  		pos := p.nextToken()
   592  		x := p.parseTestPrec(prec)
   593  		return &UnaryExpr{
   594  			OpPos: pos,
   595  			Op:    NOT,
   596  			X:     x,
   597  		}
   598  	}
   599  
   600  	return p.parseBinopExpr(prec)
   601  }
   602  
   603  // expr = test (OP test)*
   604  // Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
   605  func (p *parser) parseBinopExpr(prec int) Expr {
   606  	x := p.parseTestPrec(prec + 1)
   607  	for first := true; ; first = false {
   608  		if p.tok == NOT {
   609  			p.nextToken() // consume NOT
   610  			// In this context, NOT must be followed by IN.
   611  			// Replace NOT IN by a single NOT_IN token.
   612  			if p.tok != IN {
   613  				p.in.errorf(p.in.getPos(), "got %#v, want in", p.tok)
   614  			}
   615  			p.tok = NOT_IN
   616  		}
   617  
   618  		// Binary operator of specified precedence?
   619  		opprec := int(precedence[p.tok])
   620  		if opprec < prec {
   621  			return x
   622  		}
   623  
   624  		// Comparisons are non-associative.
   625  		if !first && opprec == int(precedence[EQL]) {
   626  			p.in.errorf(p.in.getPos(), "%s does not associate with %s (use parens)",
   627  				x.(*BinaryExpr).Op, p.tok)
   628  		}
   629  
   630  		op := p.tok
   631  		pos := p.nextToken()
   632  		y := p.parseTestPrec(opprec + 1)
   633  		x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
   634  	}
   635  }
   636  
   637  // precedence maps each operator to its precedence (0-7), or -1 for other tokens.
   638  var precedence [maxToken]int8
   639  
   640  // preclevels groups operators of equal precedence.
   641  // Comparisons are nonassociative; other binary operators associate to the left.
   642  // Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
   643  // See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
   644  var preclevels = [...][]Token{
   645  	{OR},                                   // or
   646  	{AND},                                  // and
   647  	{NOT},                                  // not (unary)
   648  	{EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
   649  	{PIPE},                                 // |
   650  	{CIRCUMFLEX},                           // ^
   651  	{AMP},                                  // &
   652  	{LTLT, GTGT},                           // << >>
   653  	{MINUS, PLUS},                          // -
   654  	{STAR, PERCENT, SLASH, SLASHSLASH},     // * % / //
   655  }
   656  
   657  func init() {
   658  	// populate precedence table
   659  	for i := range precedence {
   660  		precedence[i] = -1
   661  	}
   662  	for level, tokens := range preclevels {
   663  		for _, tok := range tokens {
   664  			precedence[tok] = int8(level)
   665  		}
   666  	}
   667  }
   668  
   669  // primary_with_suffix = primary
   670  //                     | primary '.' IDENT
   671  //                     | primary slice_suffix
   672  //                     | primary call_suffix
   673  func (p *parser) parsePrimaryWithSuffix() Expr {
   674  	x := p.parsePrimary()
   675  	for {
   676  		switch p.tok {
   677  		case DOT:
   678  			dot := p.nextToken()
   679  			id := p.parseIdent()
   680  			x = &DotExpr{Dot: dot, X: x, Name: id}
   681  		case LBRACK:
   682  			x = p.parseSliceSuffix(x)
   683  		case LPAREN:
   684  			x = p.parseCallSuffix(x)
   685  		default:
   686  			return x
   687  		}
   688  	}
   689  }
   690  
   691  // slice_suffix = '[' expr? ':' expr?  ':' expr? ']'
   692  func (p *parser) parseSliceSuffix(x Expr) Expr {
   693  	lbrack := p.nextToken()
   694  	var lo, hi, step Expr
   695  	if p.tok != COLON {
   696  		y := p.parseExpr(false)
   697  
   698  		// index x[y]
   699  		if p.tok == RBRACK {
   700  			rbrack := p.nextToken()
   701  			return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
   702  		}
   703  
   704  		lo = y
   705  	}
   706  
   707  	// slice or substring x[lo:hi:step]
   708  	if p.tok == COLON {
   709  		p.nextToken()
   710  		if p.tok != COLON && p.tok != RBRACK {
   711  			hi = p.parseTest()
   712  		}
   713  	}
   714  	if p.tok == COLON {
   715  		p.nextToken()
   716  		if p.tok != RBRACK {
   717  			step = p.parseTest()
   718  		}
   719  	}
   720  	rbrack := p.consume(RBRACK)
   721  	return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
   722  }
   723  
   724  // call_suffix = '(' arg_list? ')'
   725  func (p *parser) parseCallSuffix(fn Expr) Expr {
   726  	lparen := p.consume(LPAREN)
   727  	var rparen Position
   728  	var args []Expr
   729  	if p.tok == RPAREN {
   730  		rparen = p.nextToken()
   731  	} else {
   732  		args = p.parseArgs()
   733  		rparen = p.consume(RPAREN)
   734  	}
   735  	return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
   736  }
   737  
   738  // parseArgs parses a list of actual parameter values (arguments).
   739  // It mirrors the structure of parseParams.
   740  // arg_list = ((arg COMMA)* arg COMMA?)?
   741  func (p *parser) parseArgs() []Expr {
   742  	var args []Expr
   743  	stars := false
   744  	for p.tok != RPAREN && p.tok != EOF {
   745  		if len(args) > 0 {
   746  			p.consume(COMMA)
   747  		}
   748  		if p.tok == RPAREN {
   749  			// list can end with a COMMA if there is neither * nor **
   750  			if stars {
   751  				p.in.errorf(p.in.getPos(), `got %#v, want argument`, p.tok)
   752  			}
   753  			break
   754  		}
   755  
   756  		// *args or **kwargs
   757  		if p.tok == STAR || p.tok == STARSTAR {
   758  			stars = true
   759  			op := p.tok
   760  			pos := p.nextToken()
   761  			x := p.parseTest()
   762  			args = append(args, &UnaryExpr{
   763  				OpPos: pos,
   764  				Op:    op,
   765  				X:     x,
   766  			})
   767  			continue
   768  		}
   769  
   770  		// We use a different strategy from Bazel here to stay within LL(1).
   771  		// Instead of looking ahead two tokens (IDENT, EQ) we parse
   772  		// 'test = test' then check that the first was an IDENT.
   773  		x := p.parseTest()
   774  
   775  		if p.tok == EQ {
   776  			// name = value
   777  			if _, ok := x.(*Ident); !ok {
   778  				p.in.errorf(p.in.getPos(), "keyword argument must have form name=expr")
   779  			}
   780  			eq := p.nextToken()
   781  			y := p.parseTest()
   782  			x = &BinaryExpr{
   783  				X:     x,
   784  				OpPos: eq,
   785  				Op:    EQ,
   786  				Y:     y,
   787  			}
   788  		}
   789  
   790  		args = append(args, x)
   791  	}
   792  	return args
   793  }
   794  
   795  //  primary = IDENT
   796  //          | INT | FLOAT
   797  //          | STRING
   798  //          | '[' ...                    // list literal or comprehension
   799  //          | '{' ...                    // dict literal or comprehension
   800  //          | '(' ...                    // tuple or parenthesized expression
   801  //          | ('-'|'+'|'~') primary_with_suffix
   802  func (p *parser) parsePrimary() Expr {
   803  	switch p.tok {
   804  	case IDENT:
   805  		return p.parseIdent()
   806  
   807  	case INT, FLOAT, STRING:
   808  		var val interface{}
   809  		tok := p.tok
   810  		switch tok {
   811  		case INT:
   812  			if p.tokval.bigInt != nil {
   813  				val = p.tokval.bigInt
   814  			} else {
   815  				val = p.tokval.int
   816  			}
   817  		case FLOAT:
   818  			val = p.tokval.float
   819  		case STRING:
   820  			val = p.tokval.string
   821  		}
   822  		raw := p.tokval.raw
   823  		pos := p.nextToken()
   824  		return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
   825  
   826  	case LBRACK:
   827  		return p.parseList()
   828  
   829  	case LBRACE:
   830  		return p.parseDict()
   831  
   832  	case LPAREN:
   833  		lparen := p.nextToken()
   834  		if p.tok == RPAREN {
   835  			// empty tuple
   836  			rparen := p.nextToken()
   837  			return &TupleExpr{Lparen: lparen, Rparen: rparen}
   838  		}
   839  		e := p.parseExpr(true) // allow trailing comma
   840  		rparen := p.consume(RPAREN)
   841  		return &ParenExpr{
   842  			Lparen: lparen,
   843  			X:      e,
   844  			Rparen: rparen,
   845  		}
   846  
   847  	case MINUS, PLUS, TILDE: // unary
   848  		tok := p.tok
   849  		pos := p.nextToken()
   850  		x := p.parsePrimaryWithSuffix()
   851  		return &UnaryExpr{
   852  			OpPos: pos,
   853  			Op:    tok,
   854  			X:     x,
   855  		}
   856  	}
   857  	p.in.errorf(p.in.getPos(), "got %#v, want primary expression", p.tok)
   858  	panic("unreachable")
   859  }
   860  
   861  // list = '[' ']'
   862  //      | '[' expr ']'
   863  //      | '[' expr expr_list ']'
   864  //      | '[' expr (FOR loop_variables IN expr)+ ']'
   865  func (p *parser) parseList() Expr {
   866  	lbrack := p.nextToken()
   867  	if p.tok == RBRACK {
   868  		// empty List
   869  		rbrack := p.nextToken()
   870  		return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
   871  	}
   872  
   873  	x := p.parseTest()
   874  
   875  	if p.tok == FOR {
   876  		// list comprehension
   877  		return p.parseComprehensionSuffix(lbrack, x, RBRACK)
   878  	}
   879  
   880  	exprs := []Expr{x}
   881  	if p.tok == COMMA {
   882  		// multi-item list literal
   883  		exprs = p.parseExprs(exprs, true) // allow trailing comma
   884  	}
   885  
   886  	rbrack := p.consume(RBRACK)
   887  	return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
   888  }
   889  
   890  // dict = '{' '}'
   891  //      | '{' dict_entry_list '}'
   892  //      | '{' dict_entry FOR loop_variables IN expr '}'
   893  func (p *parser) parseDict() Expr {
   894  	lbrace := p.nextToken()
   895  	if p.tok == RBRACE {
   896  		// empty dict
   897  		rbrace := p.nextToken()
   898  		return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
   899  	}
   900  
   901  	x := p.parseDictEntry()
   902  
   903  	if p.tok == FOR {
   904  		// dict comprehension
   905  		return p.parseComprehensionSuffix(lbrace, x, RBRACE)
   906  	}
   907  
   908  	entries := []Expr{x}
   909  	for p.tok == COMMA {
   910  		p.nextToken()
   911  		if p.tok == RBRACE {
   912  			break
   913  		}
   914  		entries = append(entries, p.parseDictEntry())
   915  	}
   916  
   917  	rbrace := p.consume(RBRACE)
   918  	return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
   919  }
   920  
   921  // dict_entry = test ':' test
   922  func (p *parser) parseDictEntry() *DictEntry {
   923  	k := p.parseTest()
   924  	colon := p.consume(COLON)
   925  	v := p.parseTest()
   926  	return &DictEntry{Key: k, Colon: colon, Value: v}
   927  }
   928  
   929  // comp_suffix = FOR loopvars IN expr comp_suffix
   930  //             | IF expr comp_suffix
   931  //             | ']'  or  ')'                              (end)
   932  //
   933  // There can be multiple FOR/IF clauses; the first is always a FOR.
   934  func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
   935  	var clauses []Node
   936  	for p.tok != endBrace {
   937  		if p.tok == FOR {
   938  			pos := p.nextToken()
   939  			vars := p.parseForLoopVariables()
   940  			in := p.consume(IN)
   941  			// Following Python 3, the operand of IN cannot be:
   942  			// - a conditional expression ('x if y else z'),
   943  			//   due to conflicts in Python grammar
   944  			//  ('if' is used by the comprehension);
   945  			// - a lambda expression
   946  			// - an unparenthesized tuple.
   947  			x := p.parseTestPrec(0)
   948  			clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
   949  		} else if p.tok == IF {
   950  			pos := p.nextToken()
   951  			cond := p.parseTestNoCond()
   952  			clauses = append(clauses, &IfClause{If: pos, Cond: cond})
   953  		} else {
   954  			p.in.errorf(p.in.getPos(), "got %#v, want '%s', for, or if", p.tok, endBrace)
   955  		}
   956  	}
   957  	rbrace := p.nextToken()
   958  
   959  	return &Comprehension{
   960  		Curly:   endBrace == RBRACE,
   961  		Lbrack:  lbrace,
   962  		Body:    body,
   963  		Clauses: clauses,
   964  		Rbrack:  rbrace,
   965  	}
   966  }
   967  
   968  func terminatesExprList(tok Token) bool {
   969  	switch tok {
   970  	case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
   971  		return true
   972  	}
   973  	return false
   974  }
   975  
   976  // Comment assignment.
   977  // We build two lists of all subnodes, preorder and postorder.
   978  // The preorder list is ordered by start location, with outer nodes first.
   979  // The postorder list is ordered by end location, with outer nodes last.
   980  // We use the preorder list to assign each whole-line comment to the syntax
   981  // immediately following it, and we use the postorder list to assign each
   982  // end-of-line comment to the syntax immediately preceding it.
   983  
   984  // flattenAST returns the list of AST nodes, both in prefix order and in postfix
   985  // order.
   986  func flattenAST(root Node) (pre, post []Node) {
   987  	stack := []Node{}
   988  	Walk(root, func(n Node) bool {
   989  		if n != nil {
   990  			pre = append(pre, n)
   991  			stack = append(stack, n)
   992  		} else {
   993  			post = append(post, stack[len(stack)-1])
   994  			stack = stack[:len(stack)-1]
   995  		}
   996  		return true
   997  	})
   998  	return pre, post
   999  }
  1000  
  1001  // assignComments attaches comments to nearby syntax.
  1002  func (p *parser) assignComments(n Node) {
  1003  	// Leave early if there are no comments
  1004  	if len(p.in.getLineComments())+len(p.in.getSuffixComments()) == 0 {
  1005  		return
  1006  	}
  1007  
  1008  	pre, post := flattenAST(n)
  1009  
  1010  	// Assign line comments to syntax immediately following.
  1011  	line := p.in.getLineComments()
  1012  	for _, x := range pre {
  1013  		start, _ := x.Span()
  1014  
  1015  		switch x.(type) {
  1016  		case *File:
  1017  			continue
  1018  		}
  1019  
  1020  		for len(line) > 0 && !start.isBefore(line[0].Start) {
  1021  			x.AllocComments()
  1022  			x.Comments().Before = append(x.Comments().Before, line[0])
  1023  			line = line[1:]
  1024  		}
  1025  	}
  1026  
  1027  	// Remaining line comments go at end of file.
  1028  	if len(line) > 0 {
  1029  		n.AllocComments()
  1030  		n.Comments().After = append(n.Comments().After, line...)
  1031  	}
  1032  
  1033  	// Assign suffix comments to syntax immediately before.
  1034  	suffix := p.in.getSuffixComments()
  1035  	for i := len(post) - 1; i >= 0; i-- {
  1036  		x := post[i]
  1037  
  1038  		// Do not assign suffix comments to file
  1039  		switch x.(type) {
  1040  		case *File:
  1041  			continue
  1042  		}
  1043  
  1044  		_, end := x.Span()
  1045  		if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
  1046  			x.AllocComments()
  1047  			x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
  1048  			suffix = suffix[:len(suffix)-1]
  1049  		}
  1050  	}
  1051  }