github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optgen/lang/parser.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package lang 12 13 import ( 14 "fmt" 15 "io" 16 "os" 17 "strconv" 18 "strings" 19 ) 20 21 // FileResolver is used by the parser to abstract the opening and reading of 22 // input files. Callers of the parser can override the default behavior 23 // (os.Open) in order to open files in some other way (e.g. for testing). 24 type FileResolver func(name string) (io.Reader, error) 25 26 // Parser parses Optgen language input files and builds an abstract syntax tree 27 // (AST) from them. Typically the Optgen compiler invokes the parser and then 28 // performs semantic checks on the resulting AST. For more details on the 29 // Optgen language syntax, see the Syntax section of docs.go. 30 type Parser struct { 31 files []string 32 file int 33 r io.Reader 34 s *Scanner 35 src SourceLoc 36 saveSrc SourceLoc 37 errors []error 38 39 // comments accumulates contiguous comments as they are scanned. 40 comments CommentsExpr 41 42 // resolver is invoked to open the input files provided to the parser. 43 resolver FileResolver 44 45 // unscanned is true if the last token was unscanned (i.e. put back to be 46 // reparsed). 47 unscanned bool 48 49 // exprs is tracks top-level expressions (including comments) in order. 50 exprs []Expr 51 52 // exprComments maps expressions to comments. 53 exprComments map[Expr]CommentsExpr 54 } 55 56 // NewParser constructs a new instance of the Optgen parser, with the specified 57 // list of file paths as its input files. The Parse method must be called in 58 // order to parse the input files. 59 func NewParser(files ...string) *Parser { 60 p := &Parser{ 61 files: files, 62 exprComments: make(map[Expr]CommentsExpr), 63 } 64 65 // By default, resolve file names by a call to os.Open. 66 p.resolver = func(name string) (io.Reader, error) { 67 return os.Open(name) 68 } 69 70 return p 71 } 72 73 // SetFileResolver overrides the default method of opening input files. The 74 // default resolver will use os.Open to open input files from disk. Callers 75 // can use this method to open input files in some other way. 76 func (p *Parser) SetFileResolver(resolver FileResolver) { 77 p.resolver = resolver 78 } 79 80 // Parse parses the input files and returns the root expression of the AST. If 81 // there are parse errors, then Parse returns nil, and the errors are returned 82 // by the Errors function. 83 func (p *Parser) Parse() *RootExpr { 84 root := p.parseRoot() 85 86 // Ensure that all open files have been closed. 87 p.closeScanner() 88 89 if p.errors != nil { 90 return nil 91 } 92 return root 93 } 94 95 // Errors returns the collection of errors that occurred during parsing. If no 96 // errors occurred, then Errors returns nil. 97 func (p *Parser) Errors() []error { 98 return p.errors 99 } 100 101 // Exprs returns the top-level expressions (defines, rules, comments) in the 102 // order in which they were encountered. 103 func (p *Parser) Exprs() []Expr { 104 return p.exprs 105 } 106 107 // GetComments returns the comments associated with e. 108 func (p *Parser) GetComments(e Expr) CommentsExpr { 109 return p.exprComments[e] 110 } 111 112 func (p *Parser) getComments() CommentsExpr { 113 comments := p.comments 114 p.comments = nil 115 return comments 116 } 117 118 func (p *Parser) setComments(e Expr, comments CommentsExpr) { 119 if len(comments) > 0 { 120 p.exprComments[e] = comments 121 } 122 } 123 124 func (p *Parser) hasComments() bool { 125 return len(p.comments) > 0 126 } 127 128 func (p *Parser) appendComments() { 129 comments := p.getComments() 130 if len(comments) > 0 { 131 p.exprs = append(p.exprs, &comments) 132 } 133 } 134 135 // root = tags (define | rule) 136 func (p *Parser) parseRoot() *RootExpr { 137 rootOp := &RootExpr{} 138 139 // Ensure the scanner has been created over the first file. 140 if p.s == nil { 141 // If no files to parse, then return empty root expression. 142 if len(p.files) == 0 { 143 return rootOp 144 } 145 146 if !p.openScanner() { 147 return nil 148 } 149 } 150 151 for { 152 var tags TagsExpr 153 var comments CommentsExpr 154 155 tok := p.scan() 156 src := p.src 157 158 switch tok { 159 case EOF: 160 return rootOp 161 162 case LBRACKET: 163 p.unscan() 164 165 comments = p.getComments() 166 tags = p.parseTags() 167 if tags == nil { 168 p.tryRecover() 169 break 170 } 171 172 if p.scan() != IDENT { 173 p.unscan() 174 175 rule := p.parseRule(comments, tags, src) 176 if rule == nil { 177 p.tryRecover() 178 break 179 } 180 p.setComments(rule, comments) 181 182 rootOp.Rules = append(rootOp.Rules, rule) 183 p.exprs = append(p.exprs, rule) 184 break 185 } 186 187 fallthrough 188 189 case IDENT: 190 // Only define identifier is allowed at the top level. 191 if !p.isDefineIdent() { 192 p.addExpectedTokenErr("define statement") 193 p.tryRecover() 194 break 195 } 196 // If there was no tag, we need to check for comments. 197 if len(comments) == 0 { 198 comments = p.getComments() 199 } 200 201 p.unscan() 202 203 define := p.parseDefine(comments, tags, src) 204 if define == nil { 205 p.tryRecover() 206 break 207 } 208 p.setComments(define, comments) 209 210 rootOp.Defines = append(rootOp.Defines, define) 211 p.exprs = append(p.exprs, define) 212 213 default: 214 p.addExpectedTokenErr("define statement or rule") 215 p.tryRecover() 216 } 217 } 218 } 219 220 // define = 'define' define-name '{' define-field* '}' 221 func (p *Parser) parseDefine(comments CommentsExpr, tags TagsExpr, src SourceLoc) *DefineExpr { 222 if !p.scanToken(IDENT, "define statement") || p.s.Literal() != "define" { 223 return nil 224 } 225 226 if !p.scanToken(IDENT, "define name") { 227 return nil 228 } 229 230 name := p.s.Literal() 231 define := &DefineExpr{Src: &src, Comments: comments, Name: StringExpr(name), Tags: tags} 232 233 if !p.scanToken(LBRACE, "'{'") { 234 return nil 235 } 236 237 for { 238 if p.scan() == RBRACE { 239 if len(p.comments) > 0 { 240 p.addErr(fmt.Sprintf("comments not allowed before closing }: %v", p.comments)) 241 return nil 242 } 243 return define 244 } 245 p.unscan() 246 247 defineField := p.parseDefineField() 248 if defineField == nil { 249 return nil 250 } 251 252 define.Fields = append(define.Fields, defineField) 253 } 254 } 255 256 // define-field = field-name field-type 257 func (p *Parser) parseDefineField() *DefineFieldExpr { 258 if !p.scanToken(IDENT, "define field name") { 259 return nil 260 } 261 262 src := p.src 263 name := p.s.Literal() 264 265 if !p.scanToken(IDENT, "define field type") { 266 return nil 267 } 268 269 typ := p.s.Literal() 270 271 field := &DefineFieldExpr{ 272 Src: &src, 273 Name: StringExpr(name), 274 Comments: p.getComments(), 275 Type: StringExpr(typ), 276 } 277 p.setComments(field, field.Comments) 278 return field 279 } 280 281 // rule = match '=>' replace 282 func (p *Parser) parseRule(comments CommentsExpr, tags TagsExpr, src SourceLoc) *RuleExpr { 283 match := p.parseMatch() 284 if match == nil { 285 return nil 286 } 287 288 if !p.scanToken(ARROW, "'=>'") { 289 return nil 290 } 291 if p.hasComments() { 292 p.addErr("comments not allowed before =>") 293 return nil 294 } 295 296 replace := p.parseReplace() 297 if replace == nil { 298 return nil 299 } 300 301 return &RuleExpr{ 302 Src: &src, 303 Name: StringExpr(tags[0]), 304 Comments: comments, 305 Tags: tags[1:], 306 Match: match.(*FuncExpr), 307 Replace: replace, 308 } 309 } 310 311 // match = func 312 func (p *Parser) parseMatch() Expr { 313 if !p.scanToken(LPAREN, "match pattern") { 314 return nil 315 } 316 comments := p.getComments() 317 p.unscan() 318 f := p.parseFunc() 319 p.setComments(f, comments) 320 return f 321 } 322 323 // replace = func | ref 324 func (p *Parser) parseReplace() Expr { 325 tok := p.scan() 326 comments := p.getComments() 327 var e Expr 328 switch tok { 329 case LPAREN: 330 p.unscan() 331 e = p.parseFunc() 332 333 case DOLLAR: 334 p.unscan() 335 e = p.parseRef() 336 337 default: 338 p.addExpectedTokenErr("replace pattern") 339 return nil 340 } 341 p.setComments(e, comments) 342 return e 343 } 344 345 // func = '(' func-name arg* ')' 346 func (p *Parser) parseFunc() Expr { 347 if p.scan() != LPAREN { 348 panic("caller should have checked for left parenthesis") 349 } 350 351 src := p.src 352 name := p.parseFuncName() 353 if name == nil { 354 return nil 355 } 356 357 fn := &FuncExpr{Src: &src, Name: name} 358 for { 359 if p.scan() == RPAREN { 360 if p.hasComments() { 361 p.addErr("comments not allowed before )") 362 return nil 363 } 364 return fn 365 } 366 367 p.unscan() 368 comments := p.getComments() 369 arg := p.parseArg() 370 if arg == nil { 371 return nil 372 } 373 p.setComments(arg, comments) 374 375 fn.Args = append(fn.Args, arg) 376 } 377 } 378 379 // func-name = names | func 380 func (p *Parser) parseFuncName() Expr { 381 tok := p.scan() 382 comments := p.getComments() 383 var e Expr 384 switch tok { 385 case IDENT: 386 p.unscan() 387 e = p.parseNames() 388 389 case LPAREN: 390 // Constructed name. 391 p.unscan() 392 e = p.parseFunc() 393 394 default: 395 p.addExpectedTokenErr("name") 396 return nil 397 } 398 p.setComments(e, comments) 399 return e 400 } 401 402 // names = name ('|' name)* 403 func (p *Parser) parseNames() Expr { 404 var names NamesExpr 405 for { 406 if !p.scanToken(IDENT, "name") { 407 return nil 408 } 409 410 names = append(names, NameExpr(p.s.Literal())) 411 412 if p.scan() != PIPE { 413 p.unscan() 414 return &names 415 } 416 } 417 } 418 419 // match-child = bind | ref | match-and 420 func (p *Parser) parseArg() Expr { 421 tok := p.scan() 422 p.unscan() 423 424 if tok == DOLLAR { 425 return p.parseBindOrRef() 426 } 427 428 return p.parseAnd() 429 } 430 431 // bind = '$' label ':' and 432 // ref = '$' label 433 func (p *Parser) parseBindOrRef() Expr { 434 if p.scan() != DOLLAR { 435 panic("caller should have checked for dollar") 436 } 437 438 src := p.src 439 440 if !p.scanToken(IDENT, "label") { 441 return nil 442 } 443 444 label := StringExpr(p.s.Literal()) 445 446 if p.scan() != COLON { 447 p.unscan() 448 return &RefExpr{Src: &src, Label: label} 449 } 450 451 target := p.parseAnd() 452 if target == nil { 453 return nil 454 } 455 return &BindExpr{Src: &src, Label: label, Target: target} 456 } 457 458 // and = expr ('&' and) 459 func (p *Parser) parseAnd() Expr { 460 src := p.peekNextSource() 461 462 left := p.parseExpr() 463 if left == nil { 464 return nil 465 } 466 467 if p.scan() != AMPERSAND { 468 p.unscan() 469 return left 470 } 471 472 right := p.parseAnd() 473 if right == nil { 474 return nil 475 } 476 return &AndExpr{Src: src, Left: left, Right: right} 477 } 478 479 // expr = func | not | list | any | name | STRING | NUMBER 480 func (p *Parser) parseExpr() Expr { 481 tok := p.scan() 482 comments := p.getComments() 483 var e Expr 484 switch tok { 485 case LPAREN: 486 p.unscan() 487 e = p.parseFunc() 488 489 case CARET: 490 p.unscan() 491 e = p.parseNot() 492 493 case LBRACKET: 494 p.unscan() 495 e = p.parseList() 496 497 case ASTERISK: 498 src := p.src 499 e = &AnyExpr{Src: &src} 500 501 case IDENT: 502 name := NameExpr(p.s.Literal()) 503 e = &name 504 505 case STRING: 506 p.unscan() 507 e = p.parseString() 508 509 case NUMBER: 510 p.unscan() 511 e = p.parseNumber() 512 513 default: 514 p.addExpectedTokenErr("expression") 515 return nil 516 } 517 p.setComments(e, comments) 518 return e 519 } 520 521 // not = '^' expr 522 func (p *Parser) parseNot() Expr { 523 if p.scan() != CARET { 524 panic("caller should have checked for caret") 525 } 526 527 src := p.src 528 529 input := p.parseExpr() 530 if input == nil { 531 return nil 532 } 533 return &NotExpr{Src: &src, Input: input} 534 } 535 536 // list = '[' list-child* ']' 537 func (p *Parser) parseList() Expr { 538 if p.scan() != LBRACKET { 539 panic("caller should have checked for left bracket") 540 } 541 542 src := p.src 543 544 list := &ListExpr{Src: &src} 545 for { 546 if p.scan() == RBRACKET { 547 if p.hasComments() { 548 p.addErr("comments not allowed before ]") 549 return nil 550 } 551 return list 552 } 553 554 p.unscan() 555 item := p.parseListChild() 556 if item == nil { 557 return nil 558 } 559 560 list.Items = append(list.Items, item) 561 } 562 } 563 564 // list-child = list-any | arg 565 func (p *Parser) parseListChild() Expr { 566 tok := p.scan() 567 comments := p.getComments() 568 var e Expr 569 if tok == ELLIPSES { 570 src := p.src 571 e = &ListAnyExpr{Src: &src} 572 } else { 573 p.unscan() 574 e = p.parseArg() 575 } 576 p.setComments(e, comments) 577 return e 578 } 579 580 // ref = '$' label 581 func (p *Parser) parseRef() *RefExpr { 582 if p.scan() != DOLLAR { 583 panic("caller should have checked for dollar") 584 } 585 586 src := p.src 587 588 if !p.scanToken(IDENT, "label") { 589 return nil 590 } 591 592 return &RefExpr{Src: &src, Label: StringExpr(p.s.Literal())} 593 } 594 595 // tags = '[' IDENT (',' IDENT)* ']' 596 func (p *Parser) parseTags() TagsExpr { 597 var tags TagsExpr 598 599 if p.scan() != LBRACKET { 600 panic("caller should have checked for left bracket") 601 } 602 603 for { 604 if !p.scanToken(IDENT, "tag name") { 605 return nil 606 } 607 608 tags = append(tags, TagExpr(p.s.Literal())) 609 610 if p.scan() == RBRACKET { 611 if p.hasComments() { 612 p.addErr("comments not allowed before ]") 613 return nil 614 } 615 return tags 616 } 617 618 p.unscan() 619 if !p.scanToken(COMMA, "comma") { 620 return nil 621 } 622 } 623 } 624 625 func (p *Parser) parseString() *StringExpr { 626 if p.scan() != STRING { 627 panic("caller should have checked for literal string") 628 } 629 630 // Strip quotes. 631 s := p.s.Literal() 632 s = s[1 : len(s)-1] 633 634 e := StringExpr(s) 635 return &e 636 } 637 638 func (p *Parser) parseNumber() *NumberExpr { 639 if p.scan() != NUMBER { 640 panic("caller should have checked for numeric literal") 641 } 642 643 // Convert token literal to int64 value. 644 i, err := strconv.ParseInt(p.s.Literal(), 10, 64) 645 if err != nil { 646 p.addErr(err.Error()) 647 return nil 648 } 649 650 e := NumberExpr(i) 651 return &e 652 } 653 654 // peekNextSource returns the source information for the next token, but 655 // without actually consuming that token. 656 func (p *Parser) peekNextSource() *SourceLoc { 657 p.scan() 658 src := p.src 659 p.unscan() 660 661 // Don't directly take address of p.src, or the parser won't be 662 // eligible for GC due to that reference. 663 return &src 664 } 665 666 // scanToken scans the next token. If it does not have the expected token type, 667 // then scanToken records an error and returns false. Otherwise, it returns 668 // true. 669 func (p *Parser) scanToken(expected Token, desc string) bool { 670 if p.scan() != expected { 671 p.addExpectedTokenErr(desc) 672 return false 673 } 674 675 return true 676 } 677 678 // scan returns the next non-whitespace, non-comment token from the underlying 679 // scanner. If a token has been unscanned then read that instead. 680 func (p *Parser) scan() Token { 681 // If we have a token in the buffer, then return it. 682 if p.unscanned { 683 // Restore saved current token, and save previous token. 684 p.src, p.saveSrc = p.saveSrc, p.src 685 p.unscanned = false 686 return p.s.Token() 687 } 688 689 // Read the next token from the scanner. 690 for { 691 // Set source location of current token and save previous in case 692 // unscan is called. 693 p.saveSrc = p.src 694 p.src.Line, p.src.Pos = p.s.LineLoc() 695 696 tok := p.s.Scan() 697 switch tok { 698 case EOF: 699 p.appendComments() 700 701 // Reached end of current file, so try to open next file. 702 if p.file+1 >= len(p.files) { 703 // No more files to parse. 704 return EOF 705 } 706 p.file++ 707 708 if !p.openScanner() { 709 // Error opening file, don't try to recover. 710 return EOF 711 } 712 713 case ERROR: 714 // Error encountered while scanning. 715 p.addErr(p.s.Literal()) 716 return ERROR 717 718 case COMMENT: 719 p.comments = append(p.comments, CommentExpr(p.s.Literal())) 720 721 case WHITESPACE: 722 if strings.Count(p.s.Literal(), "\n") > 1 { 723 p.appendComments() 724 } 725 726 default: 727 return tok 728 } 729 } 730 } 731 732 // unscan pushes the previously read token back onto the buffer. 733 func (p *Parser) unscan() { 734 if p.unscanned { 735 panic("unscan was already called") 736 } 737 738 // Save current token and make previous token the current token. 739 p.src, p.saveSrc = p.saveSrc, p.src 740 p.unscanned = true 741 } 742 743 // openScanner attempts to open a scanner and reader over the next input file. 744 // If it succeeds, then it stores the reader and scanner over the file and 745 // returns true. If it fails, then it stores the error in p.err and returns 746 // false. 747 func (p *Parser) openScanner() bool { 748 r, err := p.resolver(p.files[p.file]) 749 if err != nil { 750 p.errors = append(p.errors, err) 751 return false 752 } 753 754 // Close any previous scanner and open a new one. 755 p.closeScanner() 756 p.r = r 757 p.s = NewScanner(r) 758 p.src.File = p.files[p.file] 759 return true 760 } 761 762 // closeScanner ensures that the current scanner and reader is closed. 763 func (p *Parser) closeScanner() { 764 if p.s != nil { 765 // If the reader has a Close method, call it. 766 closer, ok := p.r.(io.Closer) 767 if ok { 768 closer.Close() 769 } 770 p.r = nil 771 p.s = nil 772 } 773 } 774 775 // addExpectedTokenErr is used when the parser encounters an unexpected token. 776 // The desc argument describes what the parser expected instead of the current 777 // unexpected token. 778 func (p *Parser) addExpectedTokenErr(desc string) { 779 if p.s.Token() == EOF { 780 p.addErr(fmt.Sprintf("expected %s, found EOF", desc)) 781 } else { 782 p.addErr(fmt.Sprintf("expected %s, found '%s'", desc, p.s.Literal())) 783 } 784 } 785 786 // addErr wraps the given error text with file, line, and position context 787 // information. 788 func (p *Parser) addErr(text string) { 789 err := fmt.Errorf("%s: %s", p.src, text) 790 p.errors = append(p.errors, err) 791 } 792 793 // tryRecover attempts to recover from a parse error in order to continue 794 // reporting additional errors. 795 func (p *Parser) tryRecover() { 796 // Scan ahead, looking for top-level tokens that might allow the parser to 797 // recover enough to report further errors. 798 for { 799 tok := p.scan() 800 switch tok { 801 case EOF, ERROR: 802 // Terminate scan. 803 return 804 805 case LBRACKET, IDENT: 806 // Look for define identifier and left bracket tokens at start of 807 // line, as those are usually good recovery points. 808 if p.src.Pos == 0 { 809 if tok == LBRACKET || p.isDefineIdent() { 810 p.unscan() 811 } 812 return 813 } 814 } 815 } 816 } 817 818 func (p *Parser) isDefineIdent() bool { 819 return p.s.Token() == IDENT && p.s.Literal() == "define" 820 }