go.starlark.net@v0.0.0-20231101134539-556fd59b42f6/syntax/parse.go (about)

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  // This file defines a recursive-descent parser for Starlark.
     8  // The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
     9  //
    10  // TODO(adonovan): use syntax.Error more systematically throughout the
    11  // package.  Verify that error positions are correct using the
    12  // chunkedfile mechanism.
    13  
    14  import "log"
    15  
    16  // Enable this flag to print the token stream and log.Fatal on the first error.
    17  const debug = false
    18  
    19  // A Mode value is a set of flags (or 0) that controls optional parser functionality.
    20  type Mode uint
    21  
    22  const (
    23  	RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
    24  )
    25  
    26  // Parse calls the Parse method of LegacyFileOptions().
    27  // Deprecated: relies on legacy global variables.
    28  func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
    29  	return LegacyFileOptions().Parse(filename, src, mode)
    30  }
    31  
    32  // Parse parses the input data and returns the corresponding parse tree.
    33  //
    34  // If src != nil, Parse parses the source from src and the filename
    35  // is only used when recording position information.
    36  // The type of the argument for the src parameter must be string,
    37  // []byte, io.Reader, or FilePortion.
    38  // If src == nil, Parse parses the file specified by filename.
    39  func (opts *FileOptions) Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
    40  	in, err := newScanner(filename, src, mode&RetainComments != 0)
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  	p := parser{options: opts, in: in}
    45  	defer p.in.recover(&err)
    46  
    47  	p.nextToken() // read first lookahead token
    48  	f = p.parseFile()
    49  	if f != nil {
    50  		f.Path = filename
    51  	}
    52  	p.assignComments(f)
    53  	return f, nil
    54  }
    55  
    56  // ParseCompoundStmt calls the ParseCompoundStmt method of LegacyFileOptions().
    57  // Deprecated: relies on legacy global variables.
    58  func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
    59  	return LegacyFileOptions().ParseCompoundStmt(filename, readline)
    60  }
    61  
    62  // ParseCompoundStmt parses a single compound statement:
    63  // a blank line, a def, for, while, or if statement, or a
    64  // semicolon-separated list of simple statements followed
    65  // by a newline. These are the units on which the REPL operates.
    66  // ParseCompoundStmt does not consume any following input.
    67  // The parser calls the readline function each
    68  // time it needs a new line of input.
    69  func (opts *FileOptions) ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
    70  	in, err := newScanner(filename, readline, false)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  
    75  	p := parser{options: opts, in: in}
    76  	defer p.in.recover(&err)
    77  
    78  	p.nextToken() // read first lookahead token
    79  
    80  	var stmts []Stmt
    81  	switch p.tok {
    82  	case DEF, IF, FOR, WHILE:
    83  		stmts = p.parseStmt(stmts)
    84  	case NEWLINE:
    85  		// blank line
    86  	default:
    87  		stmts = p.parseSimpleStmt(stmts, false)
    88  		// Require but don't consume newline, to avoid blocking again.
    89  		if p.tok != NEWLINE {
    90  			p.in.errorf(p.in.pos, "invalid syntax")
    91  		}
    92  	}
    93  
    94  	return &File{Options: opts, Path: filename, Stmts: stmts}, nil
    95  }
    96  
    97  // ParseExpr calls the ParseExpr method of LegacyFileOptions().
    98  // Deprecated: relies on legacy global variables.
    99  func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
   100  	return LegacyFileOptions().ParseExpr(filename, src, mode)
   101  }
   102  
   103  // ParseExpr parses a Starlark expression.
   104  // A comma-separated list of expressions is parsed as a tuple.
   105  // See Parse for explanation of parameters.
   106  func (opts *FileOptions) ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
   107  	in, err := newScanner(filename, src, mode&RetainComments != 0)
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  	p := parser{options: opts, in: in}
   112  	defer p.in.recover(&err)
   113  
   114  	p.nextToken() // read first lookahead token
   115  
   116  	// Use parseExpr, not parseTest, to permit an unparenthesized tuple.
   117  	expr = p.parseExpr(false)
   118  
   119  	// A following newline (e.g. "f()\n") appears outside any brackets,
   120  	// on a non-blank line, and thus results in a NEWLINE token.
   121  	if p.tok == NEWLINE {
   122  		p.nextToken()
   123  	}
   124  
   125  	if p.tok != EOF {
   126  		p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok)
   127  	}
   128  	p.assignComments(expr)
   129  	return expr, nil
   130  }
   131  
   132  type parser struct {
   133  	options *FileOptions
   134  	in      *scanner
   135  	tok     Token
   136  	tokval  tokenValue
   137  }
   138  
   139  // nextToken advances the scanner and returns the position of the
   140  // previous token.
   141  func (p *parser) nextToken() Position {
   142  	oldpos := p.tokval.pos
   143  	p.tok = p.in.nextToken(&p.tokval)
   144  	// enable to see the token stream
   145  	if debug {
   146  		log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
   147  	}
   148  	return oldpos
   149  }
   150  
   151  // file_input = (NEWLINE | stmt)* EOF
   152  func (p *parser) parseFile() *File {
   153  	var stmts []Stmt
   154  	for p.tok != EOF {
   155  		if p.tok == NEWLINE {
   156  			p.nextToken()
   157  			continue
   158  		}
   159  		stmts = p.parseStmt(stmts)
   160  	}
   161  	return &File{Options: p.options, Stmts: stmts}
   162  }
   163  
   164  func (p *parser) parseStmt(stmts []Stmt) []Stmt {
   165  	if p.tok == DEF {
   166  		return append(stmts, p.parseDefStmt())
   167  	} else if p.tok == IF {
   168  		return append(stmts, p.parseIfStmt())
   169  	} else if p.tok == FOR {
   170  		return append(stmts, p.parseForStmt())
   171  	} else if p.tok == WHILE {
   172  		return append(stmts, p.parseWhileStmt())
   173  	}
   174  	return p.parseSimpleStmt(stmts, true)
   175  }
   176  
   177  func (p *parser) parseDefStmt() Stmt {
   178  	defpos := p.nextToken() // consume DEF
   179  	id := p.parseIdent()
   180  	lparen := p.consume(LPAREN)
   181  	params := p.parseParams()
   182  	rparen := p.consume(RPAREN)
   183  	p.consume(COLON)
   184  	body := p.parseSuite()
   185  	return &DefStmt{
   186  		Def:    defpos,
   187  		Name:   id,
   188  		Lparen: lparen,
   189  		Params: params,
   190  		Rparen: rparen,
   191  		Body:   body,
   192  	}
   193  }
   194  
   195  func (p *parser) parseIfStmt() Stmt {
   196  	ifpos := p.nextToken() // consume IF
   197  	cond := p.parseTest()
   198  	p.consume(COLON)
   199  	body := p.parseSuite()
   200  	ifStmt := &IfStmt{
   201  		If:   ifpos,
   202  		Cond: cond,
   203  		True: body,
   204  	}
   205  	tail := ifStmt
   206  	for p.tok == ELIF {
   207  		elifpos := p.nextToken() // consume ELIF
   208  		cond := p.parseTest()
   209  		p.consume(COLON)
   210  		body := p.parseSuite()
   211  		elif := &IfStmt{
   212  			If:   elifpos,
   213  			Cond: cond,
   214  			True: body,
   215  		}
   216  		tail.ElsePos = elifpos
   217  		tail.False = []Stmt{elif}
   218  		tail = elif
   219  	}
   220  	if p.tok == ELSE {
   221  		tail.ElsePos = p.nextToken() // consume ELSE
   222  		p.consume(COLON)
   223  		tail.False = p.parseSuite()
   224  	}
   225  	return ifStmt
   226  }
   227  
   228  func (p *parser) parseForStmt() Stmt {
   229  	forpos := p.nextToken() // consume FOR
   230  	vars := p.parseForLoopVariables()
   231  	p.consume(IN)
   232  	x := p.parseExpr(false)
   233  	p.consume(COLON)
   234  	body := p.parseSuite()
   235  	return &ForStmt{
   236  		For:  forpos,
   237  		Vars: vars,
   238  		X:    x,
   239  		Body: body,
   240  	}
   241  }
   242  
   243  func (p *parser) parseWhileStmt() Stmt {
   244  	whilepos := p.nextToken() // consume WHILE
   245  	cond := p.parseTest()
   246  	p.consume(COLON)
   247  	body := p.parseSuite()
   248  	return &WhileStmt{
   249  		While: whilepos,
   250  		Cond:  cond,
   251  		Body:  body,
   252  	}
   253  }
   254  
   255  // Equivalent to 'exprlist' production in Python grammar.
   256  //
   257  // loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
   258  func (p *parser) parseForLoopVariables() Expr {
   259  	// Avoid parseExpr because it would consume the IN token
   260  	// following x in "for x in y: ...".
   261  	v := p.parsePrimaryWithSuffix()
   262  	if p.tok != COMMA {
   263  		return v
   264  	}
   265  
   266  	list := []Expr{v}
   267  	for p.tok == COMMA {
   268  		p.nextToken()
   269  		if terminatesExprList(p.tok) {
   270  			break
   271  		}
   272  		list = append(list, p.parsePrimaryWithSuffix())
   273  	}
   274  	return &TupleExpr{List: list}
   275  }
   276  
   277  // simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
   278  // In REPL mode, it does not consume the NEWLINE.
   279  func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt {
   280  	for {
   281  		stmts = append(stmts, p.parseSmallStmt())
   282  		if p.tok != SEMI {
   283  			break
   284  		}
   285  		p.nextToken() // consume SEMI
   286  		if p.tok == NEWLINE || p.tok == EOF {
   287  			break
   288  		}
   289  	}
   290  	// EOF without NEWLINE occurs in `if x: pass`, for example.
   291  	if p.tok != EOF && consumeNL {
   292  		p.consume(NEWLINE)
   293  	}
   294  
   295  	return stmts
   296  }
   297  
   298  // small_stmt = RETURN expr?
   299  //
   300  //	| PASS | BREAK | CONTINUE
   301  //	| LOAD ...
   302  //	| expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr   // assign
   303  //	| expr
   304  func (p *parser) parseSmallStmt() Stmt {
   305  	switch p.tok {
   306  	case RETURN:
   307  		pos := p.nextToken() // consume RETURN
   308  		var result Expr
   309  		if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
   310  			result = p.parseExpr(false)
   311  		}
   312  		return &ReturnStmt{Return: pos, Result: result}
   313  
   314  	case BREAK, CONTINUE, PASS:
   315  		tok := p.tok
   316  		pos := p.nextToken() // consume it
   317  		return &BranchStmt{Token: tok, TokenPos: pos}
   318  
   319  	case LOAD:
   320  		return p.parseLoadStmt()
   321  	}
   322  
   323  	// Assignment
   324  	x := p.parseExpr(false)
   325  	switch p.tok {
   326  	case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
   327  		op := p.tok
   328  		pos := p.nextToken() // consume op
   329  		rhs := p.parseExpr(false)
   330  		return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
   331  	}
   332  
   333  	// Expression statement (e.g. function call, doc string).
   334  	return &ExprStmt{X: x}
   335  }
   336  
   337  // stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
   338  func (p *parser) parseLoadStmt() *LoadStmt {
   339  	loadPos := p.nextToken() // consume LOAD
   340  	lparen := p.consume(LPAREN)
   341  
   342  	if p.tok != STRING {
   343  		p.in.errorf(p.in.pos, "first operand of load statement must be a string literal")
   344  	}
   345  	module := p.parsePrimary().(*Literal)
   346  
   347  	var from, to []*Ident
   348  	for p.tok != RPAREN && p.tok != EOF {
   349  		p.consume(COMMA)
   350  		if p.tok == RPAREN {
   351  			break // allow trailing comma
   352  		}
   353  		switch p.tok {
   354  		case STRING:
   355  			// load("module", "id")
   356  			// To name is same as original.
   357  			lit := p.parsePrimary().(*Literal)
   358  			id := &Ident{
   359  				NamePos: lit.TokenPos.add(`"`),
   360  				Name:    lit.Value.(string),
   361  			}
   362  			to = append(to, id)
   363  			from = append(from, id)
   364  
   365  		case IDENT:
   366  			// load("module", to="from")
   367  			id := p.parseIdent()
   368  			to = append(to, id)
   369  			if p.tok != EQ {
   370  				p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
   371  			}
   372  			p.consume(EQ)
   373  			if p.tok != STRING {
   374  				p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
   375  			}
   376  			lit := p.parsePrimary().(*Literal)
   377  			from = append(from, &Ident{
   378  				NamePos: lit.TokenPos.add(`"`),
   379  				Name:    lit.Value.(string),
   380  			})
   381  
   382  		case RPAREN:
   383  			p.in.errorf(p.in.pos, "trailing comma in load statement")
   384  
   385  		default:
   386  			p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok)
   387  		}
   388  	}
   389  	rparen := p.consume(RPAREN)
   390  
   391  	if len(to) == 0 {
   392  		p.in.errorf(lparen, "load statement must import at least 1 symbol")
   393  	}
   394  	return &LoadStmt{
   395  		Load:   loadPos,
   396  		Module: module,
   397  		To:     to,
   398  		From:   from,
   399  		Rparen: rparen,
   400  	}
   401  }
   402  
   403  // suite is typically what follows a COLON (e.g. after DEF or FOR).
   404  // suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
   405  func (p *parser) parseSuite() []Stmt {
   406  	if p.tok == NEWLINE {
   407  		p.nextToken() // consume NEWLINE
   408  		p.consume(INDENT)
   409  		var stmts []Stmt
   410  		for p.tok != OUTDENT && p.tok != EOF {
   411  			stmts = p.parseStmt(stmts)
   412  		}
   413  		p.consume(OUTDENT)
   414  		return stmts
   415  	}
   416  
   417  	return p.parseSimpleStmt(nil, true)
   418  }
   419  
   420  func (p *parser) parseIdent() *Ident {
   421  	if p.tok != IDENT {
   422  		p.in.error(p.in.pos, "not an identifier")
   423  	}
   424  	id := &Ident{
   425  		NamePos: p.tokval.pos,
   426  		Name:    p.tokval.raw,
   427  	}
   428  	p.nextToken()
   429  	return id
   430  }
   431  
   432  func (p *parser) consume(t Token) Position {
   433  	if p.tok != t {
   434  		p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t)
   435  	}
   436  	return p.nextToken()
   437  }
   438  
   439  // params = (param COMMA)* param COMMA?
   440  //
   441  //	|
   442  //
   443  // param = IDENT
   444  //
   445  //	| IDENT EQ test
   446  //	| STAR
   447  //	| STAR IDENT
   448  //	| STARSTAR IDENT
   449  //
   450  // parseParams parses a parameter list.  The resulting expressions are of the form:
   451  //
   452  //	*Ident                                          x
   453  //	*Binary{Op: EQ, X: *Ident, Y: Expr}             x=y
   454  //	*Unary{Op: STAR}                                *
   455  //	*Unary{Op: STAR, X: *Ident}                     *args
   456  //	*Unary{Op: STARSTAR, X: *Ident}                 **kwargs
   457  func (p *parser) parseParams() []Expr {
   458  	var params []Expr
   459  	for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
   460  		if len(params) > 0 {
   461  			p.consume(COMMA)
   462  		}
   463  		if p.tok == RPAREN {
   464  			break
   465  		}
   466  
   467  		// * or *args or **kwargs
   468  		if p.tok == STAR || p.tok == STARSTAR {
   469  			op := p.tok
   470  			pos := p.nextToken()
   471  			var x Expr
   472  			if op == STARSTAR || p.tok == IDENT {
   473  				x = p.parseIdent()
   474  			}
   475  			params = append(params, &UnaryExpr{
   476  				OpPos: pos,
   477  				Op:    op,
   478  				X:     x,
   479  			})
   480  			continue
   481  		}
   482  
   483  		// IDENT
   484  		// IDENT = test
   485  		id := p.parseIdent()
   486  		if p.tok == EQ { // default value
   487  			eq := p.nextToken()
   488  			dflt := p.parseTest()
   489  			params = append(params, &BinaryExpr{
   490  				X:     id,
   491  				OpPos: eq,
   492  				Op:    EQ,
   493  				Y:     dflt,
   494  			})
   495  			continue
   496  		}
   497  
   498  		params = append(params, id)
   499  	}
   500  	return params
   501  }
   502  
   503  // parseExpr parses an expression, possible consisting of a
   504  // comma-separated list of 'test' expressions.
   505  //
   506  // In many cases we must use parseTest to avoid ambiguity such as
   507  // f(x, y) vs. f((x, y)).
   508  func (p *parser) parseExpr(inParens bool) Expr {
   509  	x := p.parseTest()
   510  	if p.tok != COMMA {
   511  		return x
   512  	}
   513  
   514  	// tuple
   515  	exprs := p.parseExprs([]Expr{x}, inParens)
   516  	return &TupleExpr{List: exprs}
   517  }
   518  
   519  // parseExprs parses a comma-separated list of expressions, starting with the comma.
   520  // It is used to parse tuples and list elements.
   521  // expr_list = (',' expr)* ','?
   522  func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
   523  	for p.tok == COMMA {
   524  		pos := p.nextToken()
   525  		if terminatesExprList(p.tok) {
   526  			if !allowTrailingComma {
   527  				p.in.error(pos, "unparenthesized tuple with trailing comma")
   528  			}
   529  			break
   530  		}
   531  		exprs = append(exprs, p.parseTest())
   532  	}
   533  	return exprs
   534  }
   535  
   536  // parseTest parses a 'test', a single-component expression.
   537  func (p *parser) parseTest() Expr {
   538  	if p.tok == LAMBDA {
   539  		return p.parseLambda(true)
   540  	}
   541  
   542  	x := p.parseTestPrec(0)
   543  
   544  	// conditional expression (t IF cond ELSE f)
   545  	if p.tok == IF {
   546  		ifpos := p.nextToken()
   547  		cond := p.parseTestPrec(0)
   548  		if p.tok != ELSE {
   549  			p.in.error(ifpos, "conditional expression without else clause")
   550  		}
   551  		elsepos := p.nextToken()
   552  		else_ := p.parseTest()
   553  		return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
   554  	}
   555  
   556  	return x
   557  }
   558  
   559  // parseTestNoCond parses a a single-component expression without
   560  // consuming a trailing 'if expr else expr'.
   561  func (p *parser) parseTestNoCond() Expr {
   562  	if p.tok == LAMBDA {
   563  		return p.parseLambda(false)
   564  	}
   565  	return p.parseTestPrec(0)
   566  }
   567  
   568  // parseLambda parses a lambda expression.
   569  // The allowCond flag allows the body to be an 'a if b else c' conditional.
   570  func (p *parser) parseLambda(allowCond bool) Expr {
   571  	lambda := p.nextToken()
   572  	var params []Expr
   573  	if p.tok != COLON {
   574  		params = p.parseParams()
   575  	}
   576  	p.consume(COLON)
   577  
   578  	var body Expr
   579  	if allowCond {
   580  		body = p.parseTest()
   581  	} else {
   582  		body = p.parseTestNoCond()
   583  	}
   584  
   585  	return &LambdaExpr{
   586  		Lambda: lambda,
   587  		Params: params,
   588  		Body:   body,
   589  	}
   590  }
   591  
   592  func (p *parser) parseTestPrec(prec int) Expr {
   593  	if prec >= len(preclevels) {
   594  		return p.parsePrimaryWithSuffix()
   595  	}
   596  
   597  	// expr = NOT expr
   598  	if p.tok == NOT && prec == int(precedence[NOT]) {
   599  		pos := p.nextToken()
   600  		x := p.parseTestPrec(prec)
   601  		return &UnaryExpr{
   602  			OpPos: pos,
   603  			Op:    NOT,
   604  			X:     x,
   605  		}
   606  	}
   607  
   608  	return p.parseBinopExpr(prec)
   609  }
   610  
   611  // expr = test (OP test)*
   612  // Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
   613  func (p *parser) parseBinopExpr(prec int) Expr {
   614  	x := p.parseTestPrec(prec + 1)
   615  	for first := true; ; first = false {
   616  		if p.tok == NOT {
   617  			p.nextToken() // consume NOT
   618  			// In this context, NOT must be followed by IN.
   619  			// Replace NOT IN by a single NOT_IN token.
   620  			if p.tok != IN {
   621  				p.in.errorf(p.in.pos, "got %#v, want in", p.tok)
   622  			}
   623  			p.tok = NOT_IN
   624  		}
   625  
   626  		// Binary operator of specified precedence?
   627  		opprec := int(precedence[p.tok])
   628  		if opprec < prec {
   629  			return x
   630  		}
   631  
   632  		// Comparisons are non-associative.
   633  		if !first && opprec == int(precedence[EQL]) {
   634  			p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)",
   635  				x.(*BinaryExpr).Op, p.tok)
   636  		}
   637  
   638  		op := p.tok
   639  		pos := p.nextToken()
   640  		y := p.parseTestPrec(opprec + 1)
   641  		x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
   642  	}
   643  }
   644  
   645  // precedence maps each operator to its precedence (0-7), or -1 for other tokens.
   646  var precedence [maxToken]int8
   647  
   648  // preclevels groups operators of equal precedence.
   649  // Comparisons are nonassociative; other binary operators associate to the left.
   650  // Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
   651  // See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
   652  var preclevels = [...][]Token{
   653  	{OR},                                   // or
   654  	{AND},                                  // and
   655  	{NOT},                                  // not (unary)
   656  	{EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
   657  	{PIPE},                                 // |
   658  	{CIRCUMFLEX},                           // ^
   659  	{AMP},                                  // &
   660  	{LTLT, GTGT},                           // << >>
   661  	{MINUS, PLUS},                          // -
   662  	{STAR, PERCENT, SLASH, SLASHSLASH},     // * % / //
   663  }
   664  
   665  func init() {
   666  	// populate precedence table
   667  	for i := range precedence {
   668  		precedence[i] = -1
   669  	}
   670  	for level, tokens := range preclevels {
   671  		for _, tok := range tokens {
   672  			precedence[tok] = int8(level)
   673  		}
   674  	}
   675  }
   676  
   677  // primary_with_suffix = primary
   678  //
   679  //	| primary '.' IDENT
   680  //	| primary slice_suffix
   681  //	| primary call_suffix
   682  func (p *parser) parsePrimaryWithSuffix() Expr {
   683  	x := p.parsePrimary()
   684  	for {
   685  		switch p.tok {
   686  		case DOT:
   687  			dot := p.nextToken()
   688  			id := p.parseIdent()
   689  			x = &DotExpr{Dot: dot, X: x, Name: id}
   690  		case LBRACK:
   691  			x = p.parseSliceSuffix(x)
   692  		case LPAREN:
   693  			x = p.parseCallSuffix(x)
   694  		default:
   695  			return x
   696  		}
   697  	}
   698  }
   699  
   700  // slice_suffix = '[' expr? ':' expr?  ':' expr? ']'
   701  func (p *parser) parseSliceSuffix(x Expr) Expr {
   702  	lbrack := p.nextToken()
   703  	var lo, hi, step Expr
   704  	if p.tok != COLON {
   705  		y := p.parseExpr(false)
   706  
   707  		// index x[y]
   708  		if p.tok == RBRACK {
   709  			rbrack := p.nextToken()
   710  			return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
   711  		}
   712  
   713  		lo = y
   714  	}
   715  
   716  	// slice or substring x[lo:hi:step]
   717  	if p.tok == COLON {
   718  		p.nextToken()
   719  		if p.tok != COLON && p.tok != RBRACK {
   720  			hi = p.parseTest()
   721  		}
   722  	}
   723  	if p.tok == COLON {
   724  		p.nextToken()
   725  		if p.tok != RBRACK {
   726  			step = p.parseTest()
   727  		}
   728  	}
   729  	rbrack := p.consume(RBRACK)
   730  	return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
   731  }
   732  
   733  // call_suffix = '(' arg_list? ')'
   734  func (p *parser) parseCallSuffix(fn Expr) Expr {
   735  	lparen := p.consume(LPAREN)
   736  	var rparen Position
   737  	var args []Expr
   738  	if p.tok == RPAREN {
   739  		rparen = p.nextToken()
   740  	} else {
   741  		args = p.parseArgs()
   742  		rparen = p.consume(RPAREN)
   743  	}
   744  	return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
   745  }
   746  
   747  // parseArgs parses a list of actual parameter values (arguments).
   748  // It mirrors the structure of parseParams.
   749  // arg_list = ((arg COMMA)* arg COMMA?)?
   750  func (p *parser) parseArgs() []Expr {
   751  	var args []Expr
   752  	for p.tok != RPAREN && p.tok != EOF {
   753  		if len(args) > 0 {
   754  			p.consume(COMMA)
   755  		}
   756  		if p.tok == RPAREN {
   757  			break
   758  		}
   759  
   760  		// *args or **kwargs
   761  		if p.tok == STAR || p.tok == STARSTAR {
   762  			op := p.tok
   763  			pos := p.nextToken()
   764  			x := p.parseTest()
   765  			args = append(args, &UnaryExpr{
   766  				OpPos: pos,
   767  				Op:    op,
   768  				X:     x,
   769  			})
   770  			continue
   771  		}
   772  
   773  		// We use a different strategy from Bazel here to stay within LL(1).
   774  		// Instead of looking ahead two tokens (IDENT, EQ) we parse
   775  		// 'test = test' then check that the first was an IDENT.
   776  		x := p.parseTest()
   777  
   778  		if p.tok == EQ {
   779  			// name = value
   780  			if _, ok := x.(*Ident); !ok {
   781  				p.in.errorf(p.in.pos, "keyword argument must have form name=expr")
   782  			}
   783  			eq := p.nextToken()
   784  			y := p.parseTest()
   785  			x = &BinaryExpr{
   786  				X:     x,
   787  				OpPos: eq,
   788  				Op:    EQ,
   789  				Y:     y,
   790  			}
   791  		}
   792  
   793  		args = append(args, x)
   794  	}
   795  	return args
   796  }
   797  
   798  // primary = IDENT
   799  //
   800  //	| INT | FLOAT | STRING | BYTES
   801  //	| '[' ...                    // list literal or comprehension
   802  //	| '{' ...                    // dict literal or comprehension
   803  //	| '(' ...                    // tuple or parenthesized expression
   804  //	| ('-'|'+'|'~') primary_with_suffix
   805  func (p *parser) parsePrimary() Expr {
   806  	switch p.tok {
   807  	case IDENT:
   808  		return p.parseIdent()
   809  
   810  	case INT, FLOAT, STRING, BYTES:
   811  		var val interface{}
   812  		tok := p.tok
   813  		switch tok {
   814  		case INT:
   815  			if p.tokval.bigInt != nil {
   816  				val = p.tokval.bigInt
   817  			} else {
   818  				val = p.tokval.int
   819  			}
   820  		case FLOAT:
   821  			val = p.tokval.float
   822  		case STRING, BYTES:
   823  			val = p.tokval.string
   824  		}
   825  		raw := p.tokval.raw
   826  		pos := p.nextToken()
   827  		return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
   828  
   829  	case LBRACK:
   830  		return p.parseList()
   831  
   832  	case LBRACE:
   833  		return p.parseDict()
   834  
   835  	case LPAREN:
   836  		lparen := p.nextToken()
   837  		if p.tok == RPAREN {
   838  			// empty tuple
   839  			rparen := p.nextToken()
   840  			return &TupleExpr{Lparen: lparen, Rparen: rparen}
   841  		}
   842  		e := p.parseExpr(true) // allow trailing comma
   843  		rparen := p.consume(RPAREN)
   844  		return &ParenExpr{
   845  			Lparen: lparen,
   846  			X:      e,
   847  			Rparen: rparen,
   848  		}
   849  
   850  	case MINUS, PLUS, TILDE: // unary
   851  		tok := p.tok
   852  		pos := p.nextToken()
   853  		x := p.parsePrimaryWithSuffix()
   854  		return &UnaryExpr{
   855  			OpPos: pos,
   856  			Op:    tok,
   857  			X:     x,
   858  		}
   859  	}
   860  	p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok)
   861  	panic("unreachable")
   862  }
   863  
   864  // list = '[' ']'
   865  //
   866  //	| '[' expr ']'
   867  //	| '[' expr expr_list ']'
   868  //	| '[' expr (FOR loop_variables IN expr)+ ']'
   869  func (p *parser) parseList() Expr {
   870  	lbrack := p.nextToken()
   871  	if p.tok == RBRACK {
   872  		// empty List
   873  		rbrack := p.nextToken()
   874  		return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
   875  	}
   876  
   877  	x := p.parseTest()
   878  
   879  	if p.tok == FOR {
   880  		// list comprehension
   881  		return p.parseComprehensionSuffix(lbrack, x, RBRACK)
   882  	}
   883  
   884  	exprs := []Expr{x}
   885  	if p.tok == COMMA {
   886  		// multi-item list literal
   887  		exprs = p.parseExprs(exprs, true) // allow trailing comma
   888  	}
   889  
   890  	rbrack := p.consume(RBRACK)
   891  	return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
   892  }
   893  
   894  // dict = '{' '}'
   895  //
   896  //	| '{' dict_entry_list '}'
   897  //	| '{' dict_entry FOR loop_variables IN expr '}'
   898  func (p *parser) parseDict() Expr {
   899  	lbrace := p.nextToken()
   900  	if p.tok == RBRACE {
   901  		// empty dict
   902  		rbrace := p.nextToken()
   903  		return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
   904  	}
   905  
   906  	x := p.parseDictEntry()
   907  
   908  	if p.tok == FOR {
   909  		// dict comprehension
   910  		return p.parseComprehensionSuffix(lbrace, x, RBRACE)
   911  	}
   912  
   913  	entries := []Expr{x}
   914  	for p.tok == COMMA {
   915  		p.nextToken()
   916  		if p.tok == RBRACE {
   917  			break
   918  		}
   919  		entries = append(entries, p.parseDictEntry())
   920  	}
   921  
   922  	rbrace := p.consume(RBRACE)
   923  	return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
   924  }
   925  
   926  // dict_entry = test ':' test
   927  func (p *parser) parseDictEntry() *DictEntry {
   928  	k := p.parseTest()
   929  	colon := p.consume(COLON)
   930  	v := p.parseTest()
   931  	return &DictEntry{Key: k, Colon: colon, Value: v}
   932  }
   933  
   934  // comp_suffix = FOR loopvars IN expr comp_suffix
   935  //
   936  //	| IF expr comp_suffix
   937  //	| ']'  or  ')'                              (end)
   938  //
   939  // There can be multiple FOR/IF clauses; the first is always a FOR.
   940  func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
   941  	var clauses []Node
   942  	for p.tok != endBrace {
   943  		if p.tok == FOR {
   944  			pos := p.nextToken()
   945  			vars := p.parseForLoopVariables()
   946  			in := p.consume(IN)
   947  			// Following Python 3, the operand of IN cannot be:
   948  			// - a conditional expression ('x if y else z'),
   949  			//   due to conflicts in Python grammar
   950  			//  ('if' is used by the comprehension);
   951  			// - a lambda expression
   952  			// - an unparenthesized tuple.
   953  			x := p.parseTestPrec(0)
   954  			clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
   955  		} else if p.tok == IF {
   956  			pos := p.nextToken()
   957  			cond := p.parseTestNoCond()
   958  			clauses = append(clauses, &IfClause{If: pos, Cond: cond})
   959  		} else {
   960  			p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace)
   961  		}
   962  	}
   963  	rbrace := p.nextToken()
   964  
   965  	return &Comprehension{
   966  		Curly:   endBrace == RBRACE,
   967  		Lbrack:  lbrace,
   968  		Body:    body,
   969  		Clauses: clauses,
   970  		Rbrack:  rbrace,
   971  	}
   972  }
   973  
   974  func terminatesExprList(tok Token) bool {
   975  	switch tok {
   976  	case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
   977  		return true
   978  	}
   979  	return false
   980  }
   981  
   982  // Comment assignment.
   983  // We build two lists of all subnodes, preorder and postorder.
   984  // The preorder list is ordered by start location, with outer nodes first.
   985  // The postorder list is ordered by end location, with outer nodes last.
   986  // We use the preorder list to assign each whole-line comment to the syntax
   987  // immediately following it, and we use the postorder list to assign each
   988  // end-of-line comment to the syntax immediately preceding it.
   989  
   990  // flattenAST returns the list of AST nodes, both in prefix order and in postfix
   991  // order.
   992  func flattenAST(root Node) (pre, post []Node) {
   993  	stack := []Node{}
   994  	Walk(root, func(n Node) bool {
   995  		if n != nil {
   996  			pre = append(pre, n)
   997  			stack = append(stack, n)
   998  		} else {
   999  			post = append(post, stack[len(stack)-1])
  1000  			stack = stack[:len(stack)-1]
  1001  		}
  1002  		return true
  1003  	})
  1004  	return pre, post
  1005  }
  1006  
  1007  // assignComments attaches comments to nearby syntax.
  1008  func (p *parser) assignComments(n Node) {
  1009  	// Leave early if there are no comments
  1010  	if len(p.in.lineComments)+len(p.in.suffixComments) == 0 {
  1011  		return
  1012  	}
  1013  
  1014  	pre, post := flattenAST(n)
  1015  
  1016  	// Assign line comments to syntax immediately following.
  1017  	line := p.in.lineComments
  1018  	for _, x := range pre {
  1019  		start, _ := x.Span()
  1020  
  1021  		switch x.(type) {
  1022  		case *File:
  1023  			continue
  1024  		}
  1025  
  1026  		for len(line) > 0 && !start.isBefore(line[0].Start) {
  1027  			x.AllocComments()
  1028  			x.Comments().Before = append(x.Comments().Before, line[0])
  1029  			line = line[1:]
  1030  		}
  1031  	}
  1032  
  1033  	// Remaining line comments go at end of file.
  1034  	if len(line) > 0 {
  1035  		n.AllocComments()
  1036  		n.Comments().After = append(n.Comments().After, line...)
  1037  	}
  1038  
  1039  	// Assign suffix comments to syntax immediately before.
  1040  	suffix := p.in.suffixComments
  1041  	for i := len(post) - 1; i >= 0; i-- {
  1042  		x := post[i]
  1043  
  1044  		// Do not assign suffix comments to file
  1045  		switch x.(type) {
  1046  		case *File:
  1047  			continue
  1048  		}
  1049  
  1050  		_, end := x.Span()
  1051  		if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
  1052  			x.AllocComments()
  1053  			x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
  1054  			suffix = suffix[:len(suffix)-1]
  1055  		}
  1056  	}
  1057  }