github.com/benhoyt/goawk@v1.8.1/parser/parser.go (about)

     1  // Package parser is an AWK parser and abstract syntax tree.
     2  //
     3  // Use the ParseProgram function to parse an AWK program, and then
     4  // give the result to one of the interp.Exec* functions to execute it.
     5  //
     6  package parser
     7  
     8  import (
     9  	"fmt"
    10  	"io"
    11  	"regexp"
    12  	"strconv"
    13  	"strings"
    14  
    15  	. "github.com/benhoyt/goawk/internal/ast"
    16  	. "github.com/benhoyt/goawk/lexer"
    17  )
    18  
    19  // ParseError (actually *ParseError) is the type of error returned by
    20  // ParseProgram.
    21  type ParseError struct {
    22  	// Source line/column position where the error occurred.
    23  	Position Position
    24  	// Error message.
    25  	Message string
    26  }
    27  
    28  // Error returns a formatted version of the error, including the line
    29  // and column numbers.
    30  func (e *ParseError) Error() string {
    31  	return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message)
    32  }
    33  
    34  // ParseConfig lets you specify configuration for the parsing process
    35  // (for example printing type information for debugging).
    36  type ParserConfig struct {
    37  	// Enable printing of type information
    38  	DebugTypes bool
    39  
    40  	// io.Writer to print type information on (for example, os.Stderr)
    41  	DebugWriter io.Writer
    42  
    43  	// Map of named Go functions to allow calling from AWK. See docs
    44  	// on interp.Config.Funcs for details.
    45  	Funcs map[string]interface{}
    46  }
    47  
    48  // ParseProgram parses an entire AWK program, returning the *Program
    49  // abstract syntax tree or a *ParseError on error. "config" describes
    50  // the parser configuration (and is allowed to be nil).
    51  func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) {
    52  	defer func() {
    53  		// The parser uses panic with a *ParseError to signal parsing
    54  		// errors internally, and they're caught here. This
    55  		// significantly simplifies the recursive descent calls as
    56  		// we don't have to check errors everywhere.
    57  		if r := recover(); r != nil {
    58  			// Convert to ParseError or re-panic
    59  			err = r.(*ParseError)
    60  		}
    61  	}()
    62  	lexer := NewLexer(src)
    63  	p := parser{lexer: lexer}
    64  	if config != nil {
    65  		p.debugTypes = config.DebugTypes
    66  		p.debugWriter = config.DebugWriter
    67  		p.nativeFuncs = config.Funcs
    68  	}
    69  	p.initResolve()
    70  	p.next() // initialize p.tok
    71  	return p.program(), nil
    72  }
    73  
    74  // Program is the abstract syntax tree for an entire AWK program.
    75  type Program struct {
    76  	// These fields aren't intended to be used or modified directly,
    77  	// but are exported for the interpreter (Program itself needs to
    78  	// be exported in package "parser", otherwise these could live in
    79  	// "internal/ast".)
    80  	Begin     []Stmts
    81  	Actions   []Action
    82  	End       []Stmts
    83  	Functions []Function
    84  	Scalars   map[string]int
    85  	Arrays    map[string]int
    86  }
    87  
    88  // String returns an indented, pretty-printed version of the parsed
    89  // program.
    90  func (p *Program) String() string {
    91  	parts := []string{}
    92  	for _, ss := range p.Begin {
    93  		parts = append(parts, "BEGIN {\n"+ss.String()+"}")
    94  	}
    95  	for _, a := range p.Actions {
    96  		parts = append(parts, a.String())
    97  	}
    98  	for _, ss := range p.End {
    99  		parts = append(parts, "END {\n"+ss.String()+"}")
   100  	}
   101  	for _, function := range p.Functions {
   102  		parts = append(parts, function.String())
   103  	}
   104  	return strings.Join(parts, "\n\n")
   105  }
   106  
   107  // Parser state
   108  type parser struct {
   109  	// Lexer instance and current token values
   110  	lexer *Lexer
   111  	pos   Position // position of last token (tok)
   112  	tok   Token    // last lexed token
   113  	val   string   // string value of last token (or "")
   114  
   115  	// Parsing state
   116  	inAction  bool   // true if parsing an action (false in BEGIN or END)
   117  	funcName  string // function name if parsing a func, else ""
   118  	loopDepth int    // current loop depth (0 if not in any loops)
   119  
   120  	// Variable tracking and resolving
   121  	locals     map[string]bool                // current function's locals (for determining scope)
   122  	varTypes   map[string]map[string]typeInfo // map of func name to var name to type
   123  	varRefs    []varRef                       // all variable references (usually scalars)
   124  	arrayRefs  []arrayRef                     // all array references
   125  	multiExprs map[*MultiExpr]Position        // tracks comma-separated expressions
   126  
   127  	// Function tracking
   128  	functions   map[string]int // map of function name to index
   129  	userCalls   []userCall     // record calls so we can resolve them later
   130  	nativeFuncs map[string]interface{}
   131  
   132  	// Configuration and debugging
   133  	debugTypes  bool      // show variable types for debugging
   134  	debugWriter io.Writer // where the debug output goes
   135  }
   136  
   137  // Parse an entire AWK program.
   138  func (p *parser) program() *Program {
   139  	prog := &Program{}
   140  	p.optionalNewlines()
   141  	for p.tok != EOF {
   142  		switch p.tok {
   143  		case BEGIN:
   144  			p.next()
   145  			prog.Begin = append(prog.Begin, p.stmtsBrace())
   146  		case END:
   147  			p.next()
   148  			prog.End = append(prog.End, p.stmtsBrace())
   149  		case FUNCTION:
   150  			function := p.function()
   151  			p.addFunction(function.Name, len(prog.Functions))
   152  			prog.Functions = append(prog.Functions, function)
   153  		default:
   154  			p.inAction = true
   155  			// Allow empty pattern, normal pattern, or range pattern
   156  			pattern := []Expr{}
   157  			if !p.matches(LBRACE, EOF) {
   158  				pattern = append(pattern, p.expr())
   159  			}
   160  			if !p.matches(LBRACE, EOF, NEWLINE) {
   161  				p.commaNewlines()
   162  				pattern = append(pattern, p.expr())
   163  			}
   164  			// Or an empty action (equivalent to { print $0 })
   165  			action := Action{pattern, nil}
   166  			if p.tok == LBRACE {
   167  				action.Stmts = p.stmtsBrace()
   168  			}
   169  			prog.Actions = append(prog.Actions, action)
   170  			p.inAction = false
   171  		}
   172  		p.optionalNewlines()
   173  	}
   174  
   175  	p.resolveUserCalls(prog)
   176  	p.resolveVars(prog)
   177  	p.checkMultiExprs()
   178  
   179  	return prog
   180  }
   181  
   182  // Parse a list of statements.
   183  func (p *parser) stmts() Stmts {
   184  	switch p.tok {
   185  	case SEMICOLON:
   186  		// This is so things like this parse correctly:
   187  		// BEGIN { for (i=0; i<10; i++); print "x" }
   188  		p.next()
   189  		return nil
   190  	case LBRACE:
   191  		return p.stmtsBrace()
   192  	default:
   193  		return []Stmt{p.stmt()}
   194  	}
   195  }
   196  
   197  // Parse a list of statements surrounded in {...} braces.
   198  func (p *parser) stmtsBrace() Stmts {
   199  	p.expect(LBRACE)
   200  	p.optionalNewlines()
   201  	ss := []Stmt{}
   202  	for p.tok != RBRACE && p.tok != EOF {
   203  		ss = append(ss, p.stmt())
   204  	}
   205  	p.expect(RBRACE)
   206  	if p.tok == SEMICOLON {
   207  		p.next()
   208  	}
   209  	return ss
   210  }
   211  
   212  // Parse a "simple" statement (eg: allowed in a for loop init clause).
   213  func (p *parser) simpleStmt() Stmt {
   214  	switch p.tok {
   215  	case PRINT, PRINTF:
   216  		op := p.tok
   217  		p.next()
   218  		args := p.exprList(p.printExpr)
   219  		if len(args) == 1 {
   220  			// This allows parens around all the print args
   221  			if m, ok := args[0].(*MultiExpr); ok {
   222  				args = m.Exprs
   223  				p.useMultiExpr(m)
   224  			}
   225  		}
   226  		redirect := ILLEGAL
   227  		var dest Expr
   228  		if p.matches(GREATER, APPEND, PIPE) {
   229  			redirect = p.tok
   230  			p.next()
   231  			dest = p.expr()
   232  		}
   233  		if op == PRINT {
   234  			return &PrintStmt{args, redirect, dest}
   235  		} else {
   236  			if len(args) == 0 {
   237  				panic(p.error("expected printf args, got none"))
   238  			}
   239  			return &PrintfStmt{args, redirect, dest}
   240  		}
   241  	case DELETE:
   242  		p.next()
   243  		ref := p.arrayRef(p.val, p.pos)
   244  		p.expect(NAME)
   245  		var index []Expr
   246  		if p.tok == LBRACKET {
   247  			p.next()
   248  			index = p.exprList(p.expr)
   249  			if len(index) == 0 {
   250  				panic(p.error("expected expression instead of ]"))
   251  			}
   252  			p.expect(RBRACKET)
   253  		}
   254  		return &DeleteStmt{ref, index}
   255  	case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN:
   256  		panic(p.error("expected print/printf, delete, or expression"))
   257  	default:
   258  		return &ExprStmt{p.expr()}
   259  	}
   260  }
   261  
   262  // Parse any top-level statement.
   263  func (p *parser) stmt() Stmt {
   264  	for p.matches(SEMICOLON, NEWLINE) {
   265  		p.next()
   266  	}
   267  	var s Stmt
   268  	switch p.tok {
   269  	case IF:
   270  		p.next()
   271  		p.expect(LPAREN)
   272  		cond := p.expr()
   273  		p.expect(RPAREN)
   274  		p.optionalNewlines()
   275  		body := p.stmts()
   276  		p.optionalNewlines()
   277  		var elseBody Stmts
   278  		if p.tok == ELSE {
   279  			p.next()
   280  			p.optionalNewlines()
   281  			elseBody = p.stmts()
   282  		}
   283  		s = &IfStmt{cond, body, elseBody}
   284  	case FOR:
   285  		// Parse for statement, either "for in" or C-like for loop.
   286  		//
   287  		//     FOR LPAREN NAME IN NAME RPAREN NEWLINE* stmts |
   288  		//     FOR LPAREN [simpleStmt] SEMICOLON NEWLINE*
   289  		//                [expr] SEMICOLON NEWLINE*
   290  		//                [simpleStmt] RPAREN NEWLINE* stmts
   291  		//
   292  		p.next()
   293  		p.expect(LPAREN)
   294  		var pre Stmt
   295  		if p.tok != SEMICOLON {
   296  			pre = p.simpleStmt()
   297  		}
   298  		if pre != nil && p.tok == RPAREN {
   299  			// Match: for (var in array) body
   300  			p.next()
   301  			p.optionalNewlines()
   302  			exprStmt, ok := pre.(*ExprStmt)
   303  			if !ok {
   304  				panic(p.error("expected 'for (var in array) ...'"))
   305  			}
   306  			inExpr, ok := (exprStmt.Expr).(*InExpr)
   307  			if !ok {
   308  				panic(p.error("expected 'for (var in array) ...'"))
   309  			}
   310  			if len(inExpr.Index) != 1 {
   311  				panic(p.error("expected 'for (var in array) ...'"))
   312  			}
   313  			varExpr, ok := (inExpr.Index[0]).(*VarExpr)
   314  			if !ok {
   315  				panic(p.error("expected 'for (var in array) ...'"))
   316  			}
   317  			body := p.loopStmts()
   318  			s = &ForInStmt{varExpr, inExpr.Array, body}
   319  		} else {
   320  			// Match: for ([pre]; [cond]; [post]) body
   321  			p.expect(SEMICOLON)
   322  			p.optionalNewlines()
   323  			var cond Expr
   324  			if p.tok != SEMICOLON {
   325  				cond = p.expr()
   326  			}
   327  			p.expect(SEMICOLON)
   328  			p.optionalNewlines()
   329  			var post Stmt
   330  			if p.tok != RPAREN {
   331  				post = p.simpleStmt()
   332  			}
   333  			p.expect(RPAREN)
   334  			p.optionalNewlines()
   335  			body := p.loopStmts()
   336  			s = &ForStmt{pre, cond, post, body}
   337  		}
   338  	case WHILE:
   339  		p.next()
   340  		p.expect(LPAREN)
   341  		cond := p.expr()
   342  		p.expect(RPAREN)
   343  		p.optionalNewlines()
   344  		body := p.loopStmts()
   345  		s = &WhileStmt{cond, body}
   346  	case DO:
   347  		p.next()
   348  		p.optionalNewlines()
   349  		body := p.loopStmts()
   350  		p.expect(WHILE)
   351  		p.expect(LPAREN)
   352  		cond := p.expr()
   353  		p.expect(RPAREN)
   354  		s = &DoWhileStmt{body, cond}
   355  	case BREAK:
   356  		if p.loopDepth == 0 {
   357  			panic(p.error("break must be inside a loop body"))
   358  		}
   359  		p.next()
   360  		s = &BreakStmt{}
   361  	case CONTINUE:
   362  		if p.loopDepth == 0 {
   363  			panic(p.error("continue must be inside a loop body"))
   364  		}
   365  		p.next()
   366  		s = &ContinueStmt{}
   367  	case NEXT:
   368  		if !p.inAction {
   369  			panic(p.error("next can't be in BEGIN or END"))
   370  		}
   371  		p.next()
   372  		s = &NextStmt{}
   373  	case EXIT:
   374  		p.next()
   375  		var status Expr
   376  		if !p.matches(NEWLINE, SEMICOLON, RBRACE) {
   377  			status = p.expr()
   378  		}
   379  		s = &ExitStmt{status}
   380  	case RETURN:
   381  		if p.funcName == "" {
   382  			panic(p.error("return must be inside a function"))
   383  		}
   384  		p.next()
   385  		var value Expr
   386  		if !p.matches(NEWLINE, SEMICOLON, RBRACE) {
   387  			value = p.expr()
   388  		}
   389  		s = &ReturnStmt{value}
   390  	case LBRACE:
   391  		body := p.stmtsBrace()
   392  		s = &BlockStmt{body}
   393  	default:
   394  		s = p.simpleStmt()
   395  	}
   396  	for p.matches(NEWLINE, SEMICOLON) {
   397  		p.next()
   398  	}
   399  	return s
   400  }
   401  
   402  // Same as stmts(), but tracks that we're in a loop (as break and
   403  // continue can only occur inside a loop).
   404  func (p *parser) loopStmts() Stmts {
   405  	p.loopDepth++
   406  	ss := p.stmts()
   407  	p.loopDepth--
   408  	return ss
   409  }
   410  
   411  // Parse a function definition and body. As it goes, this resolves
   412  // the local variable indexes and tracks which parameters are array
   413  // parameters.
   414  func (p *parser) function() Function {
   415  	if p.funcName != "" {
   416  		// Should never actually get here (FUNCTION token is only
   417  		// handled at the top level), but just in case.
   418  		panic(p.error("can't nest functions"))
   419  	}
   420  	p.next()
   421  	name := p.val
   422  	if _, ok := p.functions[name]; ok {
   423  		panic(p.error("function %q already defined", name))
   424  	}
   425  	p.expect(NAME)
   426  	p.expect(LPAREN)
   427  	first := true
   428  	params := make([]string, 0, 7) // pre-allocate some to reduce allocations
   429  	p.locals = make(map[string]bool, 7)
   430  	for p.tok != RPAREN {
   431  		if !first {
   432  			p.commaNewlines()
   433  		}
   434  		first = false
   435  		param := p.val
   436  		if param == name {
   437  			panic(p.error("can't use function name as parameter name"))
   438  		}
   439  		if p.locals[param] {
   440  			panic(p.error("duplicate parameter name %q", param))
   441  		}
   442  		p.expect(NAME)
   443  		params = append(params, param)
   444  		p.locals[param] = true
   445  	}
   446  	p.expect(RPAREN)
   447  	p.optionalNewlines()
   448  
   449  	// Parse the body
   450  	p.startFunction(name, params)
   451  	body := p.stmtsBrace()
   452  	p.stopFunction()
   453  	p.locals = nil
   454  
   455  	return Function{name, params, nil, body}
   456  }
   457  
   458  // Parse expressions separated by commas: args to print[f] or user
   459  // function call, or multi-dimensional index.
   460  func (p *parser) exprList(parse func() Expr) []Expr {
   461  	exprs := []Expr{}
   462  	first := true
   463  	for !p.matches(NEWLINE, SEMICOLON, RBRACE, RBRACKET, RPAREN, GREATER, PIPE, APPEND) {
   464  		if !first {
   465  			p.commaNewlines()
   466  		}
   467  		first = false
   468  		exprs = append(exprs, parse())
   469  	}
   470  	return exprs
   471  }
   472  
   473  // Here's where things get slightly interesting: only certain
   474  // expression types are allowed in print/printf statements,
   475  // presumably so `print a, b > "file"` is a file redirect instead of
   476  // a greater-than comparison. So we kind of have two ways to recurse
   477  // down here: expr(), which parses all expressions, and printExpr(),
   478  // which skips PIPE GETLINE and GREATER expressions.
   479  
   480  // Parse a single expression.
   481  func (p *parser) expr() Expr      { return p.getLine() }
   482  func (p *parser) printExpr() Expr { return p._assign(p.printCond) }
   483  
   484  // Parse an "expr | getline [var]" expression:
   485  //
   486  //     assign [PIPE GETLINE [NAME]]
   487  //
   488  func (p *parser) getLine() Expr {
   489  	expr := p._assign(p.cond)
   490  	if p.tok == PIPE {
   491  		p.next()
   492  		p.expect(GETLINE)
   493  		var varExpr *VarExpr
   494  		if p.tok == NAME {
   495  			varExpr = p.varRef(p.val, p.pos)
   496  			p.next()
   497  		}
   498  		return &GetlineExpr{expr, varExpr, nil}
   499  	}
   500  	return expr
   501  }
   502  
   503  // Parse an = assignment expression:
   504  //
   505  //     lvalue [assign_op assign]
   506  //
   507  // An lvalue is a variable name, an array[expr] index expression, or
   508  // an $expr field expression.
   509  //
   510  func (p *parser) _assign(higher func() Expr) Expr {
   511  	expr := higher()
   512  	if IsLValue(expr) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN,
   513  		MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) {
   514  		op := p.tok
   515  		p.next()
   516  		right := p._assign(higher)
   517  		switch op {
   518  		case ASSIGN:
   519  			return &AssignExpr{expr, right}
   520  		case ADD_ASSIGN:
   521  			op = ADD
   522  		case DIV_ASSIGN:
   523  			op = DIV
   524  		case MOD_ASSIGN:
   525  			op = MOD
   526  		case MUL_ASSIGN:
   527  			op = MUL
   528  		case POW_ASSIGN:
   529  			op = POW
   530  		case SUB_ASSIGN:
   531  			op = SUB
   532  		}
   533  		return &AugAssignExpr{expr, op, right}
   534  	}
   535  	return expr
   536  }
   537  
   538  // Parse a ?: conditional expression:
   539  //
   540  //     or [QUESTION NEWLINE* cond COLON NEWLINE* cond]
   541  //
   542  func (p *parser) cond() Expr      { return p._cond(p.or) }
   543  func (p *parser) printCond() Expr { return p._cond(p.printOr) }
   544  
   545  func (p *parser) _cond(higher func() Expr) Expr {
   546  	expr := higher()
   547  	if p.tok == QUESTION {
   548  		p.next()
   549  		p.optionalNewlines()
   550  		t := p.expr()
   551  		p.expect(COLON)
   552  		p.optionalNewlines()
   553  		f := p.expr()
   554  		return &CondExpr{expr, t, f}
   555  	}
   556  	return expr
   557  }
   558  
   559  // Parse an || or expresion:
   560  //
   561  //     and [OR NEWLINE* and] [OR NEWLINE* and] ...
   562  //
   563  func (p *parser) or() Expr      { return p.binaryLeft(p.and, true, OR) }
   564  func (p *parser) printOr() Expr { return p.binaryLeft(p.printAnd, true, OR) }
   565  
   566  // Parse an && and expresion:
   567  //
   568  //     in [AND NEWLINE* in] [AND NEWLINE* in] ...
   569  //
   570  func (p *parser) and() Expr      { return p.binaryLeft(p.in, true, AND) }
   571  func (p *parser) printAnd() Expr { return p.binaryLeft(p.printIn, true, AND) }
   572  
   573  // Parse an "in" expression:
   574  //
   575  //     match [IN NAME] [IN NAME] ...
   576  //
   577  func (p *parser) in() Expr      { return p._in(p.match) }
   578  func (p *parser) printIn() Expr { return p._in(p.printMatch) }
   579  
   580  func (p *parser) _in(higher func() Expr) Expr {
   581  	expr := higher()
   582  	for p.tok == IN {
   583  		p.next()
   584  		ref := p.arrayRef(p.val, p.pos)
   585  		p.expect(NAME)
   586  		expr = &InExpr{[]Expr{expr}, ref}
   587  	}
   588  	return expr
   589  }
   590  
   591  // Parse a ~ match expression:
   592  //
   593  //     compare [MATCH|NOT_MATCH compare]
   594  //
   595  func (p *parser) match() Expr      { return p._match(p.compare) }
   596  func (p *parser) printMatch() Expr { return p._match(p.printCompare) }
   597  
   598  func (p *parser) _match(higher func() Expr) Expr {
   599  	expr := higher()
   600  	if p.matches(MATCH, NOT_MATCH) {
   601  		op := p.tok
   602  		p.next()
   603  		right := p.regexStr(higher) // Not match() as these aren't associative
   604  		return &BinaryExpr{expr, op, right}
   605  	}
   606  	return expr
   607  }
   608  
   609  // Parse a comparison expression:
   610  //
   611  //     concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat]
   612  //
   613  func (p *parser) compare() Expr      { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) }
   614  func (p *parser) printCompare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) }
   615  
   616  func (p *parser) _compare(ops ...Token) Expr {
   617  	expr := p.concat()
   618  	if p.matches(ops...) {
   619  		op := p.tok
   620  		p.next()
   621  		right := p.concat() // Not compare() as these aren't associative
   622  		return &BinaryExpr{expr, op, right}
   623  	}
   624  	return expr
   625  }
   626  
   627  func (p *parser) concat() Expr {
   628  	expr := p.add()
   629  	for p.matches(DOLLAR, NOT, NAME, NUMBER, STRING, LPAREN) ||
   630  		(p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC) {
   631  		right := p.add()
   632  		expr = &BinaryExpr{expr, CONCAT, right}
   633  	}
   634  	return expr
   635  }
   636  
   637  func (p *parser) add() Expr {
   638  	return p.binaryLeft(p.mul, false, ADD, SUB)
   639  }
   640  
   641  func (p *parser) mul() Expr {
   642  	return p.binaryLeft(p.pow, false, MUL, DIV, MOD)
   643  }
   644  
   645  func (p *parser) pow() Expr {
   646  	// Note that pow (expr ^ expr) is right-associative
   647  	expr := p.preIncr()
   648  	if p.tok == POW {
   649  		p.next()
   650  		right := p.pow()
   651  		return &BinaryExpr{expr, POW, right}
   652  	}
   653  	return expr
   654  }
   655  
   656  func (p *parser) preIncr() Expr {
   657  	if p.tok == INCR || p.tok == DECR {
   658  		op := p.tok
   659  		p.next()
   660  		expr := p.preIncr()
   661  		if !IsLValue(expr) {
   662  			panic(p.error("expected lvalue after ++ or --"))
   663  		}
   664  		return &IncrExpr{expr, op, true}
   665  	}
   666  	return p.postIncr()
   667  }
   668  
   669  func (p *parser) postIncr() Expr {
   670  	expr := p.primary()
   671  	if p.tok == INCR || p.tok == DECR {
   672  		if !IsLValue(expr) {
   673  			panic(p.error("expected lvalue before ++ or --"))
   674  		}
   675  		op := p.tok
   676  		p.next()
   677  		return &IncrExpr{expr, op, false}
   678  	}
   679  	return expr
   680  }
   681  
   682  func (p *parser) primary() Expr {
   683  	switch p.tok {
   684  	case NUMBER:
   685  		// AWK allows forms like "1.5e", but ParseFloat doesn't
   686  		s := strings.TrimRight(p.val, "eE")
   687  		n, _ := strconv.ParseFloat(s, 64)
   688  		p.next()
   689  		return &NumExpr{n}
   690  	case STRING:
   691  		s := p.val
   692  		p.next()
   693  		return &StrExpr{s}
   694  	case DIV, DIV_ASSIGN:
   695  		// If we get to DIV or DIV_ASSIGN as a primary expression,
   696  		// it's actually a regex.
   697  		regex := p.nextRegex()
   698  		return &RegExpr{regex}
   699  	case DOLLAR:
   700  		p.next()
   701  		return &FieldExpr{p.primary()}
   702  	case NOT, ADD, SUB:
   703  		op := p.tok
   704  		p.next()
   705  		return &UnaryExpr{op, p.pow()}
   706  	case NAME:
   707  		name := p.val
   708  		namePos := p.pos
   709  		p.next()
   710  		if p.tok == LBRACKET {
   711  			// a[x] or a[x, y] array index expression
   712  			p.next()
   713  			index := p.exprList(p.expr)
   714  			if len(index) == 0 {
   715  				panic(p.error("expected expression instead of ]"))
   716  			}
   717  			p.expect(RBRACKET)
   718  			return &IndexExpr{p.arrayRef(name, namePos), index}
   719  		} else if p.tok == LPAREN && !p.lexer.HadSpace() {
   720  			if p.locals[name] {
   721  				panic(p.error("can't call local variable %q as function", name))
   722  			}
   723  			// Grammar requires no space between function name and
   724  			// left paren for user function calls, hence the funky
   725  			// lexer.HadSpace() method.
   726  			return p.userCall(name, namePos)
   727  		}
   728  		return p.varRef(name, namePos)
   729  	case LPAREN:
   730  		parenPos := p.pos
   731  		p.next()
   732  		exprs := p.exprList(p.expr)
   733  		switch len(exprs) {
   734  		case 0:
   735  			panic(p.error("expected expression, not %s", p.tok))
   736  		case 1:
   737  			p.expect(RPAREN)
   738  			return exprs[0]
   739  		default:
   740  			// Multi-dimensional array "in" requires parens around index
   741  			p.expect(RPAREN)
   742  			if p.tok == IN {
   743  				p.next()
   744  				ref := p.arrayRef(p.val, p.pos)
   745  				p.expect(NAME)
   746  				return &InExpr{exprs, ref}
   747  			}
   748  			// MultiExpr is used as a pseudo-expression for print[f] parsing.
   749  			return p.multiExpr(exprs, parenPos)
   750  		}
   751  	case GETLINE:
   752  		p.next()
   753  		var varExpr *VarExpr
   754  		if p.tok == NAME {
   755  			varExpr = p.varRef(p.val, p.pos)
   756  			p.next()
   757  		}
   758  		var file Expr
   759  		if p.tok == LESS {
   760  			p.next()
   761  			file = p.expr()
   762  		}
   763  		return &GetlineExpr{nil, varExpr, file}
   764  	// Below is the parsing of all the builtin function calls. We
   765  	// could unify these but several of them have special handling
   766  	// (array/lvalue/regex params, optional arguments, etc), and
   767  	// doing it this way means we can check more at parse time.
   768  	case F_SUB, F_GSUB:
   769  		op := p.tok
   770  		p.next()
   771  		p.expect(LPAREN)
   772  		regex := p.regexStr(p.expr)
   773  		p.commaNewlines()
   774  		repl := p.expr()
   775  		args := []Expr{regex, repl}
   776  		if p.tok == COMMA {
   777  			p.commaNewlines()
   778  			in := p.expr()
   779  			if !IsLValue(in) {
   780  				panic(p.error("3rd arg to sub/gsub must be lvalue"))
   781  			}
   782  			args = append(args, in)
   783  		}
   784  		p.expect(RPAREN)
   785  		return &CallExpr{op, args}
   786  	case F_SPLIT:
   787  		p.next()
   788  		p.expect(LPAREN)
   789  		str := p.expr()
   790  		p.commaNewlines()
   791  		ref := p.arrayRef(p.val, p.pos)
   792  		p.expect(NAME)
   793  		args := []Expr{str, ref}
   794  		if p.tok == COMMA {
   795  			p.commaNewlines()
   796  			args = append(args, p.regexStr(p.expr))
   797  		}
   798  		p.expect(RPAREN)
   799  		return &CallExpr{F_SPLIT, args}
   800  	case F_MATCH:
   801  		p.next()
   802  		p.expect(LPAREN)
   803  		str := p.expr()
   804  		p.commaNewlines()
   805  		regex := p.regexStr(p.expr)
   806  		p.expect(RPAREN)
   807  		return &CallExpr{F_MATCH, []Expr{str, regex}}
   808  	case F_RAND:
   809  		p.next()
   810  		p.expect(LPAREN)
   811  		p.expect(RPAREN)
   812  		return &CallExpr{F_RAND, nil}
   813  	case F_SRAND:
   814  		p.next()
   815  		p.expect(LPAREN)
   816  		var args []Expr
   817  		if p.tok != RPAREN {
   818  			args = append(args, p.expr())
   819  		}
   820  		p.expect(RPAREN)
   821  		return &CallExpr{F_SRAND, args}
   822  	case F_LENGTH:
   823  		p.next()
   824  		var args []Expr
   825  		// AWK quirk: "length" is allowed to be called without parens
   826  		if p.tok == LPAREN {
   827  			p.next()
   828  			if p.tok != RPAREN {
   829  				args = append(args, p.expr())
   830  			}
   831  			p.expect(RPAREN)
   832  		}
   833  		return &CallExpr{F_LENGTH, args}
   834  	case F_SUBSTR:
   835  		p.next()
   836  		p.expect(LPAREN)
   837  		str := p.expr()
   838  		p.commaNewlines()
   839  		start := p.expr()
   840  		args := []Expr{str, start}
   841  		if p.tok == COMMA {
   842  			p.commaNewlines()
   843  			args = append(args, p.expr())
   844  		}
   845  		p.expect(RPAREN)
   846  		return &CallExpr{F_SUBSTR, args}
   847  	case F_SPRINTF:
   848  		p.next()
   849  		p.expect(LPAREN)
   850  		args := []Expr{p.expr()}
   851  		for p.tok == COMMA {
   852  			p.commaNewlines()
   853  			args = append(args, p.expr())
   854  		}
   855  		p.expect(RPAREN)
   856  		return &CallExpr{F_SPRINTF, args}
   857  	case F_FFLUSH:
   858  		p.next()
   859  		p.expect(LPAREN)
   860  		var args []Expr
   861  		if p.tok != RPAREN {
   862  			args = append(args, p.expr())
   863  		}
   864  		p.expect(RPAREN)
   865  		return &CallExpr{F_FFLUSH, args}
   866  	case F_COS, F_SIN, F_EXP, F_LOG, F_SQRT, F_INT, F_TOLOWER, F_TOUPPER, F_SYSTEM, F_CLOSE:
   867  		// Simple 1-argument functions
   868  		op := p.tok
   869  		p.next()
   870  		p.expect(LPAREN)
   871  		arg := p.expr()
   872  		p.expect(RPAREN)
   873  		return &CallExpr{op, []Expr{arg}}
   874  	case F_ATAN2, F_INDEX:
   875  		// Simple 2-argument functions
   876  		op := p.tok
   877  		p.next()
   878  		p.expect(LPAREN)
   879  		arg1 := p.expr()
   880  		p.commaNewlines()
   881  		arg2 := p.expr()
   882  		p.expect(RPAREN)
   883  		return &CallExpr{op, []Expr{arg1, arg2}}
   884  	default:
   885  		panic(p.error("expected expression instead of %s", p.tok))
   886  	}
   887  }
   888  
   889  // Parse /.../ regex or generic expression:
   890  //
   891  //     REGEX | expr
   892  //
   893  func (p *parser) regexStr(parse func() Expr) Expr {
   894  	if p.matches(DIV, DIV_ASSIGN) {
   895  		regex := p.nextRegex()
   896  		return &StrExpr{regex}
   897  	}
   898  	return parse()
   899  }
   900  
   901  // Parse left-associative binary operator. Allow newlines after
   902  // operator if allowNewline is true.
   903  //
   904  //     parse [op parse] [op parse] ...
   905  //
   906  func (p *parser) binaryLeft(higher func() Expr, allowNewline bool, ops ...Token) Expr {
   907  	expr := higher()
   908  	for p.matches(ops...) {
   909  		op := p.tok
   910  		p.next()
   911  		if allowNewline {
   912  			p.optionalNewlines()
   913  		}
   914  		right := higher()
   915  		expr = &BinaryExpr{expr, op, right}
   916  	}
   917  	return expr
   918  }
   919  
   920  // Parse comma followed by optional newlines:
   921  //
   922  //     COMMA NEWLINE*
   923  //
   924  func (p *parser) commaNewlines() {
   925  	p.expect(COMMA)
   926  	p.optionalNewlines()
   927  }
   928  
   929  // Parse zero or more optional newlines:
   930  //
   931  //    [NEWLINE] [NEWLINE] ...
   932  //
   933  func (p *parser) optionalNewlines() {
   934  	for p.tok == NEWLINE {
   935  		p.next()
   936  	}
   937  }
   938  
   939  // Parse next token into p.tok (and set p.pos and p.val).
   940  func (p *parser) next() {
   941  	p.pos, p.tok, p.val = p.lexer.Scan()
   942  	if p.tok == ILLEGAL {
   943  		panic(p.error("%s", p.val))
   944  	}
   945  }
   946  
   947  // Parse next regex and return it (must only be called after DIV or
   948  // DIV_ASSIGN token).
   949  func (p *parser) nextRegex() string {
   950  	p.pos, p.tok, p.val = p.lexer.ScanRegex()
   951  	if p.tok == ILLEGAL {
   952  		panic(p.error("%s", p.val))
   953  	}
   954  	regex := p.val
   955  	_, err := regexp.Compile(regex)
   956  	if err != nil {
   957  		panic(p.error("%v", err))
   958  	}
   959  	p.next()
   960  	return regex
   961  }
   962  
   963  // Ensure current token is tok, and parse next token into p.tok.
   964  func (p *parser) expect(tok Token) {
   965  	if p.tok != tok {
   966  		panic(p.error("expected %s instead of %s", tok, p.tok))
   967  	}
   968  	p.next()
   969  }
   970  
   971  // Return true iff current token matches one of the given operators,
   972  // but don't parse next token.
   973  func (p *parser) matches(operators ...Token) bool {
   974  	for _, operator := range operators {
   975  		if p.tok == operator {
   976  			return true
   977  		}
   978  	}
   979  	return false
   980  }
   981  
   982  // Format given string and args with Sprintf and return *ParseError
   983  // with that message and the current position.
   984  func (p *parser) error(format string, args ...interface{}) error {
   985  	message := fmt.Sprintf(format, args...)
   986  	return &ParseError{p.pos, message}
   987  }
   988  
   989  // Parse call to a user-defined function (and record call site for
   990  // resolving later).
   991  func (p *parser) userCall(name string, pos Position) *UserCallExpr {
   992  	p.expect(LPAREN)
   993  	args := []Expr{}
   994  	i := 0
   995  	for !p.matches(NEWLINE, RPAREN) {
   996  		if i > 0 {
   997  			p.commaNewlines()
   998  		}
   999  		arg := p.expr()
  1000  		p.processUserCallArg(name, arg, i)
  1001  		args = append(args, arg)
  1002  		i++
  1003  	}
  1004  	p.expect(RPAREN)
  1005  	call := &UserCallExpr{false, -1, name, args} // index is resolved later
  1006  	p.recordUserCall(call, pos)
  1007  	return call
  1008  }