github.com/benhoyt/goawk@v1.8.1/parser/parser.go (about) 1 // Package parser is an AWK parser and abstract syntax tree. 2 // 3 // Use the ParseProgram function to parse an AWK program, and then 4 // give the result to one of the interp.Exec* functions to execute it. 5 // 6 package parser 7 8 import ( 9 "fmt" 10 "io" 11 "regexp" 12 "strconv" 13 "strings" 14 15 . "github.com/benhoyt/goawk/internal/ast" 16 . "github.com/benhoyt/goawk/lexer" 17 ) 18 19 // ParseError (actually *ParseError) is the type of error returned by 20 // ParseProgram. 21 type ParseError struct { 22 // Source line/column position where the error occurred. 23 Position Position 24 // Error message. 25 Message string 26 } 27 28 // Error returns a formatted version of the error, including the line 29 // and column numbers. 30 func (e *ParseError) Error() string { 31 return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) 32 } 33 34 // ParseConfig lets you specify configuration for the parsing process 35 // (for example printing type information for debugging). 36 type ParserConfig struct { 37 // Enable printing of type information 38 DebugTypes bool 39 40 // io.Writer to print type information on (for example, os.Stderr) 41 DebugWriter io.Writer 42 43 // Map of named Go functions to allow calling from AWK. See docs 44 // on interp.Config.Funcs for details. 45 Funcs map[string]interface{} 46 } 47 48 // ParseProgram parses an entire AWK program, returning the *Program 49 // abstract syntax tree or a *ParseError on error. "config" describes 50 // the parser configuration (and is allowed to be nil). 51 func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { 52 defer func() { 53 // The parser uses panic with a *ParseError to signal parsing 54 // errors internally, and they're caught here. This 55 // significantly simplifies the recursive descent calls as 56 // we don't have to check errors everywhere. 57 if r := recover(); r != nil { 58 // Convert to ParseError or re-panic 59 err = r.(*ParseError) 60 } 61 }() 62 lexer := NewLexer(src) 63 p := parser{lexer: lexer} 64 if config != nil { 65 p.debugTypes = config.DebugTypes 66 p.debugWriter = config.DebugWriter 67 p.nativeFuncs = config.Funcs 68 } 69 p.initResolve() 70 p.next() // initialize p.tok 71 return p.program(), nil 72 } 73 74 // Program is the abstract syntax tree for an entire AWK program. 75 type Program struct { 76 // These fields aren't intended to be used or modified directly, 77 // but are exported for the interpreter (Program itself needs to 78 // be exported in package "parser", otherwise these could live in 79 // "internal/ast".) 80 Begin []Stmts 81 Actions []Action 82 End []Stmts 83 Functions []Function 84 Scalars map[string]int 85 Arrays map[string]int 86 } 87 88 // String returns an indented, pretty-printed version of the parsed 89 // program. 90 func (p *Program) String() string { 91 parts := []string{} 92 for _, ss := range p.Begin { 93 parts = append(parts, "BEGIN {\n"+ss.String()+"}") 94 } 95 for _, a := range p.Actions { 96 parts = append(parts, a.String()) 97 } 98 for _, ss := range p.End { 99 parts = append(parts, "END {\n"+ss.String()+"}") 100 } 101 for _, function := range p.Functions { 102 parts = append(parts, function.String()) 103 } 104 return strings.Join(parts, "\n\n") 105 } 106 107 // Parser state 108 type parser struct { 109 // Lexer instance and current token values 110 lexer *Lexer 111 pos Position // position of last token (tok) 112 tok Token // last lexed token 113 val string // string value of last token (or "") 114 115 // Parsing state 116 inAction bool // true if parsing an action (false in BEGIN or END) 117 funcName string // function name if parsing a func, else "" 118 loopDepth int // current loop depth (0 if not in any loops) 119 120 // Variable tracking and resolving 121 locals map[string]bool // current function's locals (for determining scope) 122 varTypes map[string]map[string]typeInfo // map of func name to var name to type 123 varRefs []varRef // all variable references (usually scalars) 124 arrayRefs []arrayRef // all array references 125 multiExprs map[*MultiExpr]Position // tracks comma-separated expressions 126 127 // Function tracking 128 functions map[string]int // map of function name to index 129 userCalls []userCall // record calls so we can resolve them later 130 nativeFuncs map[string]interface{} 131 132 // Configuration and debugging 133 debugTypes bool // show variable types for debugging 134 debugWriter io.Writer // where the debug output goes 135 } 136 137 // Parse an entire AWK program. 138 func (p *parser) program() *Program { 139 prog := &Program{} 140 p.optionalNewlines() 141 for p.tok != EOF { 142 switch p.tok { 143 case BEGIN: 144 p.next() 145 prog.Begin = append(prog.Begin, p.stmtsBrace()) 146 case END: 147 p.next() 148 prog.End = append(prog.End, p.stmtsBrace()) 149 case FUNCTION: 150 function := p.function() 151 p.addFunction(function.Name, len(prog.Functions)) 152 prog.Functions = append(prog.Functions, function) 153 default: 154 p.inAction = true 155 // Allow empty pattern, normal pattern, or range pattern 156 pattern := []Expr{} 157 if !p.matches(LBRACE, EOF) { 158 pattern = append(pattern, p.expr()) 159 } 160 if !p.matches(LBRACE, EOF, NEWLINE) { 161 p.commaNewlines() 162 pattern = append(pattern, p.expr()) 163 } 164 // Or an empty action (equivalent to { print $0 }) 165 action := Action{pattern, nil} 166 if p.tok == LBRACE { 167 action.Stmts = p.stmtsBrace() 168 } 169 prog.Actions = append(prog.Actions, action) 170 p.inAction = false 171 } 172 p.optionalNewlines() 173 } 174 175 p.resolveUserCalls(prog) 176 p.resolveVars(prog) 177 p.checkMultiExprs() 178 179 return prog 180 } 181 182 // Parse a list of statements. 183 func (p *parser) stmts() Stmts { 184 switch p.tok { 185 case SEMICOLON: 186 // This is so things like this parse correctly: 187 // BEGIN { for (i=0; i<10; i++); print "x" } 188 p.next() 189 return nil 190 case LBRACE: 191 return p.stmtsBrace() 192 default: 193 return []Stmt{p.stmt()} 194 } 195 } 196 197 // Parse a list of statements surrounded in {...} braces. 198 func (p *parser) stmtsBrace() Stmts { 199 p.expect(LBRACE) 200 p.optionalNewlines() 201 ss := []Stmt{} 202 for p.tok != RBRACE && p.tok != EOF { 203 ss = append(ss, p.stmt()) 204 } 205 p.expect(RBRACE) 206 if p.tok == SEMICOLON { 207 p.next() 208 } 209 return ss 210 } 211 212 // Parse a "simple" statement (eg: allowed in a for loop init clause). 213 func (p *parser) simpleStmt() Stmt { 214 switch p.tok { 215 case PRINT, PRINTF: 216 op := p.tok 217 p.next() 218 args := p.exprList(p.printExpr) 219 if len(args) == 1 { 220 // This allows parens around all the print args 221 if m, ok := args[0].(*MultiExpr); ok { 222 args = m.Exprs 223 p.useMultiExpr(m) 224 } 225 } 226 redirect := ILLEGAL 227 var dest Expr 228 if p.matches(GREATER, APPEND, PIPE) { 229 redirect = p.tok 230 p.next() 231 dest = p.expr() 232 } 233 if op == PRINT { 234 return &PrintStmt{args, redirect, dest} 235 } else { 236 if len(args) == 0 { 237 panic(p.error("expected printf args, got none")) 238 } 239 return &PrintfStmt{args, redirect, dest} 240 } 241 case DELETE: 242 p.next() 243 ref := p.arrayRef(p.val, p.pos) 244 p.expect(NAME) 245 var index []Expr 246 if p.tok == LBRACKET { 247 p.next() 248 index = p.exprList(p.expr) 249 if len(index) == 0 { 250 panic(p.error("expected expression instead of ]")) 251 } 252 p.expect(RBRACKET) 253 } 254 return &DeleteStmt{ref, index} 255 case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN: 256 panic(p.error("expected print/printf, delete, or expression")) 257 default: 258 return &ExprStmt{p.expr()} 259 } 260 } 261 262 // Parse any top-level statement. 263 func (p *parser) stmt() Stmt { 264 for p.matches(SEMICOLON, NEWLINE) { 265 p.next() 266 } 267 var s Stmt 268 switch p.tok { 269 case IF: 270 p.next() 271 p.expect(LPAREN) 272 cond := p.expr() 273 p.expect(RPAREN) 274 p.optionalNewlines() 275 body := p.stmts() 276 p.optionalNewlines() 277 var elseBody Stmts 278 if p.tok == ELSE { 279 p.next() 280 p.optionalNewlines() 281 elseBody = p.stmts() 282 } 283 s = &IfStmt{cond, body, elseBody} 284 case FOR: 285 // Parse for statement, either "for in" or C-like for loop. 286 // 287 // FOR LPAREN NAME IN NAME RPAREN NEWLINE* stmts | 288 // FOR LPAREN [simpleStmt] SEMICOLON NEWLINE* 289 // [expr] SEMICOLON NEWLINE* 290 // [simpleStmt] RPAREN NEWLINE* stmts 291 // 292 p.next() 293 p.expect(LPAREN) 294 var pre Stmt 295 if p.tok != SEMICOLON { 296 pre = p.simpleStmt() 297 } 298 if pre != nil && p.tok == RPAREN { 299 // Match: for (var in array) body 300 p.next() 301 p.optionalNewlines() 302 exprStmt, ok := pre.(*ExprStmt) 303 if !ok { 304 panic(p.error("expected 'for (var in array) ...'")) 305 } 306 inExpr, ok := (exprStmt.Expr).(*InExpr) 307 if !ok { 308 panic(p.error("expected 'for (var in array) ...'")) 309 } 310 if len(inExpr.Index) != 1 { 311 panic(p.error("expected 'for (var in array) ...'")) 312 } 313 varExpr, ok := (inExpr.Index[0]).(*VarExpr) 314 if !ok { 315 panic(p.error("expected 'for (var in array) ...'")) 316 } 317 body := p.loopStmts() 318 s = &ForInStmt{varExpr, inExpr.Array, body} 319 } else { 320 // Match: for ([pre]; [cond]; [post]) body 321 p.expect(SEMICOLON) 322 p.optionalNewlines() 323 var cond Expr 324 if p.tok != SEMICOLON { 325 cond = p.expr() 326 } 327 p.expect(SEMICOLON) 328 p.optionalNewlines() 329 var post Stmt 330 if p.tok != RPAREN { 331 post = p.simpleStmt() 332 } 333 p.expect(RPAREN) 334 p.optionalNewlines() 335 body := p.loopStmts() 336 s = &ForStmt{pre, cond, post, body} 337 } 338 case WHILE: 339 p.next() 340 p.expect(LPAREN) 341 cond := p.expr() 342 p.expect(RPAREN) 343 p.optionalNewlines() 344 body := p.loopStmts() 345 s = &WhileStmt{cond, body} 346 case DO: 347 p.next() 348 p.optionalNewlines() 349 body := p.loopStmts() 350 p.expect(WHILE) 351 p.expect(LPAREN) 352 cond := p.expr() 353 p.expect(RPAREN) 354 s = &DoWhileStmt{body, cond} 355 case BREAK: 356 if p.loopDepth == 0 { 357 panic(p.error("break must be inside a loop body")) 358 } 359 p.next() 360 s = &BreakStmt{} 361 case CONTINUE: 362 if p.loopDepth == 0 { 363 panic(p.error("continue must be inside a loop body")) 364 } 365 p.next() 366 s = &ContinueStmt{} 367 case NEXT: 368 if !p.inAction { 369 panic(p.error("next can't be in BEGIN or END")) 370 } 371 p.next() 372 s = &NextStmt{} 373 case EXIT: 374 p.next() 375 var status Expr 376 if !p.matches(NEWLINE, SEMICOLON, RBRACE) { 377 status = p.expr() 378 } 379 s = &ExitStmt{status} 380 case RETURN: 381 if p.funcName == "" { 382 panic(p.error("return must be inside a function")) 383 } 384 p.next() 385 var value Expr 386 if !p.matches(NEWLINE, SEMICOLON, RBRACE) { 387 value = p.expr() 388 } 389 s = &ReturnStmt{value} 390 case LBRACE: 391 body := p.stmtsBrace() 392 s = &BlockStmt{body} 393 default: 394 s = p.simpleStmt() 395 } 396 for p.matches(NEWLINE, SEMICOLON) { 397 p.next() 398 } 399 return s 400 } 401 402 // Same as stmts(), but tracks that we're in a loop (as break and 403 // continue can only occur inside a loop). 404 func (p *parser) loopStmts() Stmts { 405 p.loopDepth++ 406 ss := p.stmts() 407 p.loopDepth-- 408 return ss 409 } 410 411 // Parse a function definition and body. As it goes, this resolves 412 // the local variable indexes and tracks which parameters are array 413 // parameters. 414 func (p *parser) function() Function { 415 if p.funcName != "" { 416 // Should never actually get here (FUNCTION token is only 417 // handled at the top level), but just in case. 418 panic(p.error("can't nest functions")) 419 } 420 p.next() 421 name := p.val 422 if _, ok := p.functions[name]; ok { 423 panic(p.error("function %q already defined", name)) 424 } 425 p.expect(NAME) 426 p.expect(LPAREN) 427 first := true 428 params := make([]string, 0, 7) // pre-allocate some to reduce allocations 429 p.locals = make(map[string]bool, 7) 430 for p.tok != RPAREN { 431 if !first { 432 p.commaNewlines() 433 } 434 first = false 435 param := p.val 436 if param == name { 437 panic(p.error("can't use function name as parameter name")) 438 } 439 if p.locals[param] { 440 panic(p.error("duplicate parameter name %q", param)) 441 } 442 p.expect(NAME) 443 params = append(params, param) 444 p.locals[param] = true 445 } 446 p.expect(RPAREN) 447 p.optionalNewlines() 448 449 // Parse the body 450 p.startFunction(name, params) 451 body := p.stmtsBrace() 452 p.stopFunction() 453 p.locals = nil 454 455 return Function{name, params, nil, body} 456 } 457 458 // Parse expressions separated by commas: args to print[f] or user 459 // function call, or multi-dimensional index. 460 func (p *parser) exprList(parse func() Expr) []Expr { 461 exprs := []Expr{} 462 first := true 463 for !p.matches(NEWLINE, SEMICOLON, RBRACE, RBRACKET, RPAREN, GREATER, PIPE, APPEND) { 464 if !first { 465 p.commaNewlines() 466 } 467 first = false 468 exprs = append(exprs, parse()) 469 } 470 return exprs 471 } 472 473 // Here's where things get slightly interesting: only certain 474 // expression types are allowed in print/printf statements, 475 // presumably so `print a, b > "file"` is a file redirect instead of 476 // a greater-than comparison. So we kind of have two ways to recurse 477 // down here: expr(), which parses all expressions, and printExpr(), 478 // which skips PIPE GETLINE and GREATER expressions. 479 480 // Parse a single expression. 481 func (p *parser) expr() Expr { return p.getLine() } 482 func (p *parser) printExpr() Expr { return p._assign(p.printCond) } 483 484 // Parse an "expr | getline [var]" expression: 485 // 486 // assign [PIPE GETLINE [NAME]] 487 // 488 func (p *parser) getLine() Expr { 489 expr := p._assign(p.cond) 490 if p.tok == PIPE { 491 p.next() 492 p.expect(GETLINE) 493 var varExpr *VarExpr 494 if p.tok == NAME { 495 varExpr = p.varRef(p.val, p.pos) 496 p.next() 497 } 498 return &GetlineExpr{expr, varExpr, nil} 499 } 500 return expr 501 } 502 503 // Parse an = assignment expression: 504 // 505 // lvalue [assign_op assign] 506 // 507 // An lvalue is a variable name, an array[expr] index expression, or 508 // an $expr field expression. 509 // 510 func (p *parser) _assign(higher func() Expr) Expr { 511 expr := higher() 512 if IsLValue(expr) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN, 513 MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) { 514 op := p.tok 515 p.next() 516 right := p._assign(higher) 517 switch op { 518 case ASSIGN: 519 return &AssignExpr{expr, right} 520 case ADD_ASSIGN: 521 op = ADD 522 case DIV_ASSIGN: 523 op = DIV 524 case MOD_ASSIGN: 525 op = MOD 526 case MUL_ASSIGN: 527 op = MUL 528 case POW_ASSIGN: 529 op = POW 530 case SUB_ASSIGN: 531 op = SUB 532 } 533 return &AugAssignExpr{expr, op, right} 534 } 535 return expr 536 } 537 538 // Parse a ?: conditional expression: 539 // 540 // or [QUESTION NEWLINE* cond COLON NEWLINE* cond] 541 // 542 func (p *parser) cond() Expr { return p._cond(p.or) } 543 func (p *parser) printCond() Expr { return p._cond(p.printOr) } 544 545 func (p *parser) _cond(higher func() Expr) Expr { 546 expr := higher() 547 if p.tok == QUESTION { 548 p.next() 549 p.optionalNewlines() 550 t := p.expr() 551 p.expect(COLON) 552 p.optionalNewlines() 553 f := p.expr() 554 return &CondExpr{expr, t, f} 555 } 556 return expr 557 } 558 559 // Parse an || or expresion: 560 // 561 // and [OR NEWLINE* and] [OR NEWLINE* and] ... 562 // 563 func (p *parser) or() Expr { return p.binaryLeft(p.and, true, OR) } 564 func (p *parser) printOr() Expr { return p.binaryLeft(p.printAnd, true, OR) } 565 566 // Parse an && and expresion: 567 // 568 // in [AND NEWLINE* in] [AND NEWLINE* in] ... 569 // 570 func (p *parser) and() Expr { return p.binaryLeft(p.in, true, AND) } 571 func (p *parser) printAnd() Expr { return p.binaryLeft(p.printIn, true, AND) } 572 573 // Parse an "in" expression: 574 // 575 // match [IN NAME] [IN NAME] ... 576 // 577 func (p *parser) in() Expr { return p._in(p.match) } 578 func (p *parser) printIn() Expr { return p._in(p.printMatch) } 579 580 func (p *parser) _in(higher func() Expr) Expr { 581 expr := higher() 582 for p.tok == IN { 583 p.next() 584 ref := p.arrayRef(p.val, p.pos) 585 p.expect(NAME) 586 expr = &InExpr{[]Expr{expr}, ref} 587 } 588 return expr 589 } 590 591 // Parse a ~ match expression: 592 // 593 // compare [MATCH|NOT_MATCH compare] 594 // 595 func (p *parser) match() Expr { return p._match(p.compare) } 596 func (p *parser) printMatch() Expr { return p._match(p.printCompare) } 597 598 func (p *parser) _match(higher func() Expr) Expr { 599 expr := higher() 600 if p.matches(MATCH, NOT_MATCH) { 601 op := p.tok 602 p.next() 603 right := p.regexStr(higher) // Not match() as these aren't associative 604 return &BinaryExpr{expr, op, right} 605 } 606 return expr 607 } 608 609 // Parse a comparison expression: 610 // 611 // concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat] 612 // 613 func (p *parser) compare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) } 614 func (p *parser) printCompare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) } 615 616 func (p *parser) _compare(ops ...Token) Expr { 617 expr := p.concat() 618 if p.matches(ops...) { 619 op := p.tok 620 p.next() 621 right := p.concat() // Not compare() as these aren't associative 622 return &BinaryExpr{expr, op, right} 623 } 624 return expr 625 } 626 627 func (p *parser) concat() Expr { 628 expr := p.add() 629 for p.matches(DOLLAR, NOT, NAME, NUMBER, STRING, LPAREN) || 630 (p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC) { 631 right := p.add() 632 expr = &BinaryExpr{expr, CONCAT, right} 633 } 634 return expr 635 } 636 637 func (p *parser) add() Expr { 638 return p.binaryLeft(p.mul, false, ADD, SUB) 639 } 640 641 func (p *parser) mul() Expr { 642 return p.binaryLeft(p.pow, false, MUL, DIV, MOD) 643 } 644 645 func (p *parser) pow() Expr { 646 // Note that pow (expr ^ expr) is right-associative 647 expr := p.preIncr() 648 if p.tok == POW { 649 p.next() 650 right := p.pow() 651 return &BinaryExpr{expr, POW, right} 652 } 653 return expr 654 } 655 656 func (p *parser) preIncr() Expr { 657 if p.tok == INCR || p.tok == DECR { 658 op := p.tok 659 p.next() 660 expr := p.preIncr() 661 if !IsLValue(expr) { 662 panic(p.error("expected lvalue after ++ or --")) 663 } 664 return &IncrExpr{expr, op, true} 665 } 666 return p.postIncr() 667 } 668 669 func (p *parser) postIncr() Expr { 670 expr := p.primary() 671 if p.tok == INCR || p.tok == DECR { 672 if !IsLValue(expr) { 673 panic(p.error("expected lvalue before ++ or --")) 674 } 675 op := p.tok 676 p.next() 677 return &IncrExpr{expr, op, false} 678 } 679 return expr 680 } 681 682 func (p *parser) primary() Expr { 683 switch p.tok { 684 case NUMBER: 685 // AWK allows forms like "1.5e", but ParseFloat doesn't 686 s := strings.TrimRight(p.val, "eE") 687 n, _ := strconv.ParseFloat(s, 64) 688 p.next() 689 return &NumExpr{n} 690 case STRING: 691 s := p.val 692 p.next() 693 return &StrExpr{s} 694 case DIV, DIV_ASSIGN: 695 // If we get to DIV or DIV_ASSIGN as a primary expression, 696 // it's actually a regex. 697 regex := p.nextRegex() 698 return &RegExpr{regex} 699 case DOLLAR: 700 p.next() 701 return &FieldExpr{p.primary()} 702 case NOT, ADD, SUB: 703 op := p.tok 704 p.next() 705 return &UnaryExpr{op, p.pow()} 706 case NAME: 707 name := p.val 708 namePos := p.pos 709 p.next() 710 if p.tok == LBRACKET { 711 // a[x] or a[x, y] array index expression 712 p.next() 713 index := p.exprList(p.expr) 714 if len(index) == 0 { 715 panic(p.error("expected expression instead of ]")) 716 } 717 p.expect(RBRACKET) 718 return &IndexExpr{p.arrayRef(name, namePos), index} 719 } else if p.tok == LPAREN && !p.lexer.HadSpace() { 720 if p.locals[name] { 721 panic(p.error("can't call local variable %q as function", name)) 722 } 723 // Grammar requires no space between function name and 724 // left paren for user function calls, hence the funky 725 // lexer.HadSpace() method. 726 return p.userCall(name, namePos) 727 } 728 return p.varRef(name, namePos) 729 case LPAREN: 730 parenPos := p.pos 731 p.next() 732 exprs := p.exprList(p.expr) 733 switch len(exprs) { 734 case 0: 735 panic(p.error("expected expression, not %s", p.tok)) 736 case 1: 737 p.expect(RPAREN) 738 return exprs[0] 739 default: 740 // Multi-dimensional array "in" requires parens around index 741 p.expect(RPAREN) 742 if p.tok == IN { 743 p.next() 744 ref := p.arrayRef(p.val, p.pos) 745 p.expect(NAME) 746 return &InExpr{exprs, ref} 747 } 748 // MultiExpr is used as a pseudo-expression for print[f] parsing. 749 return p.multiExpr(exprs, parenPos) 750 } 751 case GETLINE: 752 p.next() 753 var varExpr *VarExpr 754 if p.tok == NAME { 755 varExpr = p.varRef(p.val, p.pos) 756 p.next() 757 } 758 var file Expr 759 if p.tok == LESS { 760 p.next() 761 file = p.expr() 762 } 763 return &GetlineExpr{nil, varExpr, file} 764 // Below is the parsing of all the builtin function calls. We 765 // could unify these but several of them have special handling 766 // (array/lvalue/regex params, optional arguments, etc), and 767 // doing it this way means we can check more at parse time. 768 case F_SUB, F_GSUB: 769 op := p.tok 770 p.next() 771 p.expect(LPAREN) 772 regex := p.regexStr(p.expr) 773 p.commaNewlines() 774 repl := p.expr() 775 args := []Expr{regex, repl} 776 if p.tok == COMMA { 777 p.commaNewlines() 778 in := p.expr() 779 if !IsLValue(in) { 780 panic(p.error("3rd arg to sub/gsub must be lvalue")) 781 } 782 args = append(args, in) 783 } 784 p.expect(RPAREN) 785 return &CallExpr{op, args} 786 case F_SPLIT: 787 p.next() 788 p.expect(LPAREN) 789 str := p.expr() 790 p.commaNewlines() 791 ref := p.arrayRef(p.val, p.pos) 792 p.expect(NAME) 793 args := []Expr{str, ref} 794 if p.tok == COMMA { 795 p.commaNewlines() 796 args = append(args, p.regexStr(p.expr)) 797 } 798 p.expect(RPAREN) 799 return &CallExpr{F_SPLIT, args} 800 case F_MATCH: 801 p.next() 802 p.expect(LPAREN) 803 str := p.expr() 804 p.commaNewlines() 805 regex := p.regexStr(p.expr) 806 p.expect(RPAREN) 807 return &CallExpr{F_MATCH, []Expr{str, regex}} 808 case F_RAND: 809 p.next() 810 p.expect(LPAREN) 811 p.expect(RPAREN) 812 return &CallExpr{F_RAND, nil} 813 case F_SRAND: 814 p.next() 815 p.expect(LPAREN) 816 var args []Expr 817 if p.tok != RPAREN { 818 args = append(args, p.expr()) 819 } 820 p.expect(RPAREN) 821 return &CallExpr{F_SRAND, args} 822 case F_LENGTH: 823 p.next() 824 var args []Expr 825 // AWK quirk: "length" is allowed to be called without parens 826 if p.tok == LPAREN { 827 p.next() 828 if p.tok != RPAREN { 829 args = append(args, p.expr()) 830 } 831 p.expect(RPAREN) 832 } 833 return &CallExpr{F_LENGTH, args} 834 case F_SUBSTR: 835 p.next() 836 p.expect(LPAREN) 837 str := p.expr() 838 p.commaNewlines() 839 start := p.expr() 840 args := []Expr{str, start} 841 if p.tok == COMMA { 842 p.commaNewlines() 843 args = append(args, p.expr()) 844 } 845 p.expect(RPAREN) 846 return &CallExpr{F_SUBSTR, args} 847 case F_SPRINTF: 848 p.next() 849 p.expect(LPAREN) 850 args := []Expr{p.expr()} 851 for p.tok == COMMA { 852 p.commaNewlines() 853 args = append(args, p.expr()) 854 } 855 p.expect(RPAREN) 856 return &CallExpr{F_SPRINTF, args} 857 case F_FFLUSH: 858 p.next() 859 p.expect(LPAREN) 860 var args []Expr 861 if p.tok != RPAREN { 862 args = append(args, p.expr()) 863 } 864 p.expect(RPAREN) 865 return &CallExpr{F_FFLUSH, args} 866 case F_COS, F_SIN, F_EXP, F_LOG, F_SQRT, F_INT, F_TOLOWER, F_TOUPPER, F_SYSTEM, F_CLOSE: 867 // Simple 1-argument functions 868 op := p.tok 869 p.next() 870 p.expect(LPAREN) 871 arg := p.expr() 872 p.expect(RPAREN) 873 return &CallExpr{op, []Expr{arg}} 874 case F_ATAN2, F_INDEX: 875 // Simple 2-argument functions 876 op := p.tok 877 p.next() 878 p.expect(LPAREN) 879 arg1 := p.expr() 880 p.commaNewlines() 881 arg2 := p.expr() 882 p.expect(RPAREN) 883 return &CallExpr{op, []Expr{arg1, arg2}} 884 default: 885 panic(p.error("expected expression instead of %s", p.tok)) 886 } 887 } 888 889 // Parse /.../ regex or generic expression: 890 // 891 // REGEX | expr 892 // 893 func (p *parser) regexStr(parse func() Expr) Expr { 894 if p.matches(DIV, DIV_ASSIGN) { 895 regex := p.nextRegex() 896 return &StrExpr{regex} 897 } 898 return parse() 899 } 900 901 // Parse left-associative binary operator. Allow newlines after 902 // operator if allowNewline is true. 903 // 904 // parse [op parse] [op parse] ... 905 // 906 func (p *parser) binaryLeft(higher func() Expr, allowNewline bool, ops ...Token) Expr { 907 expr := higher() 908 for p.matches(ops...) { 909 op := p.tok 910 p.next() 911 if allowNewline { 912 p.optionalNewlines() 913 } 914 right := higher() 915 expr = &BinaryExpr{expr, op, right} 916 } 917 return expr 918 } 919 920 // Parse comma followed by optional newlines: 921 // 922 // COMMA NEWLINE* 923 // 924 func (p *parser) commaNewlines() { 925 p.expect(COMMA) 926 p.optionalNewlines() 927 } 928 929 // Parse zero or more optional newlines: 930 // 931 // [NEWLINE] [NEWLINE] ... 932 // 933 func (p *parser) optionalNewlines() { 934 for p.tok == NEWLINE { 935 p.next() 936 } 937 } 938 939 // Parse next token into p.tok (and set p.pos and p.val). 940 func (p *parser) next() { 941 p.pos, p.tok, p.val = p.lexer.Scan() 942 if p.tok == ILLEGAL { 943 panic(p.error("%s", p.val)) 944 } 945 } 946 947 // Parse next regex and return it (must only be called after DIV or 948 // DIV_ASSIGN token). 949 func (p *parser) nextRegex() string { 950 p.pos, p.tok, p.val = p.lexer.ScanRegex() 951 if p.tok == ILLEGAL { 952 panic(p.error("%s", p.val)) 953 } 954 regex := p.val 955 _, err := regexp.Compile(regex) 956 if err != nil { 957 panic(p.error("%v", err)) 958 } 959 p.next() 960 return regex 961 } 962 963 // Ensure current token is tok, and parse next token into p.tok. 964 func (p *parser) expect(tok Token) { 965 if p.tok != tok { 966 panic(p.error("expected %s instead of %s", tok, p.tok)) 967 } 968 p.next() 969 } 970 971 // Return true iff current token matches one of the given operators, 972 // but don't parse next token. 973 func (p *parser) matches(operators ...Token) bool { 974 for _, operator := range operators { 975 if p.tok == operator { 976 return true 977 } 978 } 979 return false 980 } 981 982 // Format given string and args with Sprintf and return *ParseError 983 // with that message and the current position. 984 func (p *parser) error(format string, args ...interface{}) error { 985 message := fmt.Sprintf(format, args...) 986 return &ParseError{p.pos, message} 987 } 988 989 // Parse call to a user-defined function (and record call site for 990 // resolving later). 991 func (p *parser) userCall(name string, pos Position) *UserCallExpr { 992 p.expect(LPAREN) 993 args := []Expr{} 994 i := 0 995 for !p.matches(NEWLINE, RPAREN) { 996 if i > 0 { 997 p.commaNewlines() 998 } 999 arg := p.expr() 1000 p.processUserCallArg(name, arg, i) 1001 args = append(args, arg) 1002 i++ 1003 } 1004 p.expect(RPAREN) 1005 call := &UserCallExpr{false, -1, name, args} // index is resolved later 1006 p.recordUserCall(call, pos) 1007 return call 1008 }