github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optgen/lang/parser.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package lang
    12  
    13  import (
    14  	"fmt"
    15  	"io"
    16  	"os"
    17  	"strconv"
    18  	"strings"
    19  )
    20  
    21  // FileResolver is used by the parser to abstract the opening and reading of
    22  // input files. Callers of the parser can override the default behavior
    23  // (os.Open) in order to open files in some other way (e.g. for testing).
    24  type FileResolver func(name string) (io.Reader, error)
    25  
    26  // Parser parses Optgen language input files and builds an abstract syntax tree
    27  // (AST) from them. Typically the Optgen compiler invokes the parser and then
    28  // performs semantic checks on the resulting AST. For more details on the
    29  // Optgen language syntax, see the Syntax section of docs.go.
    30  type Parser struct {
    31  	files   []string
    32  	file    int
    33  	r       io.Reader
    34  	s       *Scanner
    35  	src     SourceLoc
    36  	saveSrc SourceLoc
    37  	errors  []error
    38  
    39  	// comments accumulates contiguous comments as they are scanned.
    40  	comments CommentsExpr
    41  
    42  	// resolver is invoked to open the input files provided to the parser.
    43  	resolver FileResolver
    44  
    45  	// unscanned is true if the last token was unscanned (i.e. put back to be
    46  	// reparsed).
    47  	unscanned bool
    48  
    49  	// exprs is tracks top-level expressions (including comments) in order.
    50  	exprs []Expr
    51  
    52  	// exprComments maps expressions to comments.
    53  	exprComments map[Expr]CommentsExpr
    54  }
    55  
    56  // NewParser constructs a new instance of the Optgen parser, with the specified
    57  // list of file paths as its input files. The Parse method must be called in
    58  // order to parse the input files.
    59  func NewParser(files ...string) *Parser {
    60  	p := &Parser{
    61  		files:        files,
    62  		exprComments: make(map[Expr]CommentsExpr),
    63  	}
    64  
    65  	// By default, resolve file names by a call to os.Open.
    66  	p.resolver = func(name string) (io.Reader, error) {
    67  		return os.Open(name)
    68  	}
    69  
    70  	return p
    71  }
    72  
    73  // SetFileResolver overrides the default method of opening input files. The
    74  // default resolver will use os.Open to open input files from disk. Callers
    75  // can use this method to open input files in some other way.
    76  func (p *Parser) SetFileResolver(resolver FileResolver) {
    77  	p.resolver = resolver
    78  }
    79  
    80  // Parse parses the input files and returns the root expression of the AST. If
    81  // there are parse errors, then Parse returns nil, and the errors are returned
    82  // by the Errors function.
    83  func (p *Parser) Parse() *RootExpr {
    84  	root := p.parseRoot()
    85  
    86  	// Ensure that all open files have been closed.
    87  	p.closeScanner()
    88  
    89  	if p.errors != nil {
    90  		return nil
    91  	}
    92  	return root
    93  }
    94  
    95  // Errors returns the collection of errors that occurred during parsing. If no
    96  // errors occurred, then Errors returns nil.
    97  func (p *Parser) Errors() []error {
    98  	return p.errors
    99  }
   100  
   101  // Exprs returns the top-level expressions (defines, rules, comments) in the
   102  // order in which they were encountered.
   103  func (p *Parser) Exprs() []Expr {
   104  	return p.exprs
   105  }
   106  
   107  // GetComments returns the comments associated with e.
   108  func (p *Parser) GetComments(e Expr) CommentsExpr {
   109  	return p.exprComments[e]
   110  }
   111  
   112  func (p *Parser) getComments() CommentsExpr {
   113  	comments := p.comments
   114  	p.comments = nil
   115  	return comments
   116  }
   117  
   118  func (p *Parser) setComments(e Expr, comments CommentsExpr) {
   119  	if len(comments) > 0 {
   120  		p.exprComments[e] = comments
   121  	}
   122  }
   123  
   124  func (p *Parser) hasComments() bool {
   125  	return len(p.comments) > 0
   126  }
   127  
   128  func (p *Parser) appendComments() {
   129  	comments := p.getComments()
   130  	if len(comments) > 0 {
   131  		p.exprs = append(p.exprs, &comments)
   132  	}
   133  }
   134  
   135  // root = tags (define | rule)
   136  func (p *Parser) parseRoot() *RootExpr {
   137  	rootOp := &RootExpr{}
   138  
   139  	// Ensure the scanner has been created over the first file.
   140  	if p.s == nil {
   141  		// If no files to parse, then return empty root expression.
   142  		if len(p.files) == 0 {
   143  			return rootOp
   144  		}
   145  
   146  		if !p.openScanner() {
   147  			return nil
   148  		}
   149  	}
   150  
   151  	for {
   152  		var tags TagsExpr
   153  		var comments CommentsExpr
   154  
   155  		tok := p.scan()
   156  		src := p.src
   157  
   158  		switch tok {
   159  		case EOF:
   160  			return rootOp
   161  
   162  		case LBRACKET:
   163  			p.unscan()
   164  
   165  			comments = p.getComments()
   166  			tags = p.parseTags()
   167  			if tags == nil {
   168  				p.tryRecover()
   169  				break
   170  			}
   171  
   172  			if p.scan() != IDENT {
   173  				p.unscan()
   174  
   175  				rule := p.parseRule(comments, tags, src)
   176  				if rule == nil {
   177  					p.tryRecover()
   178  					break
   179  				}
   180  				p.setComments(rule, comments)
   181  
   182  				rootOp.Rules = append(rootOp.Rules, rule)
   183  				p.exprs = append(p.exprs, rule)
   184  				break
   185  			}
   186  
   187  			fallthrough
   188  
   189  		case IDENT:
   190  			// Only define identifier is allowed at the top level.
   191  			if !p.isDefineIdent() {
   192  				p.addExpectedTokenErr("define statement")
   193  				p.tryRecover()
   194  				break
   195  			}
   196  			// If there was no tag, we need to check for comments.
   197  			if len(comments) == 0 {
   198  				comments = p.getComments()
   199  			}
   200  
   201  			p.unscan()
   202  
   203  			define := p.parseDefine(comments, tags, src)
   204  			if define == nil {
   205  				p.tryRecover()
   206  				break
   207  			}
   208  			p.setComments(define, comments)
   209  
   210  			rootOp.Defines = append(rootOp.Defines, define)
   211  			p.exprs = append(p.exprs, define)
   212  
   213  		default:
   214  			p.addExpectedTokenErr("define statement or rule")
   215  			p.tryRecover()
   216  		}
   217  	}
   218  }
   219  
   220  // define = 'define' define-name '{' define-field* '}'
   221  func (p *Parser) parseDefine(comments CommentsExpr, tags TagsExpr, src SourceLoc) *DefineExpr {
   222  	if !p.scanToken(IDENT, "define statement") || p.s.Literal() != "define" {
   223  		return nil
   224  	}
   225  
   226  	if !p.scanToken(IDENT, "define name") {
   227  		return nil
   228  	}
   229  
   230  	name := p.s.Literal()
   231  	define := &DefineExpr{Src: &src, Comments: comments, Name: StringExpr(name), Tags: tags}
   232  
   233  	if !p.scanToken(LBRACE, "'{'") {
   234  		return nil
   235  	}
   236  
   237  	for {
   238  		if p.scan() == RBRACE {
   239  			if len(p.comments) > 0 {
   240  				p.addErr(fmt.Sprintf("comments not allowed before closing }: %v", p.comments))
   241  				return nil
   242  			}
   243  			return define
   244  		}
   245  		p.unscan()
   246  
   247  		defineField := p.parseDefineField()
   248  		if defineField == nil {
   249  			return nil
   250  		}
   251  
   252  		define.Fields = append(define.Fields, defineField)
   253  	}
   254  }
   255  
   256  // define-field = field-name field-type
   257  func (p *Parser) parseDefineField() *DefineFieldExpr {
   258  	if !p.scanToken(IDENT, "define field name") {
   259  		return nil
   260  	}
   261  
   262  	src := p.src
   263  	name := p.s.Literal()
   264  
   265  	if !p.scanToken(IDENT, "define field type") {
   266  		return nil
   267  	}
   268  
   269  	typ := p.s.Literal()
   270  
   271  	field := &DefineFieldExpr{
   272  		Src:      &src,
   273  		Name:     StringExpr(name),
   274  		Comments: p.getComments(),
   275  		Type:     StringExpr(typ),
   276  	}
   277  	p.setComments(field, field.Comments)
   278  	return field
   279  }
   280  
   281  // rule = match '=>' replace
   282  func (p *Parser) parseRule(comments CommentsExpr, tags TagsExpr, src SourceLoc) *RuleExpr {
   283  	match := p.parseMatch()
   284  	if match == nil {
   285  		return nil
   286  	}
   287  
   288  	if !p.scanToken(ARROW, "'=>'") {
   289  		return nil
   290  	}
   291  	if p.hasComments() {
   292  		p.addErr("comments not allowed before =>")
   293  		return nil
   294  	}
   295  
   296  	replace := p.parseReplace()
   297  	if replace == nil {
   298  		return nil
   299  	}
   300  
   301  	return &RuleExpr{
   302  		Src:      &src,
   303  		Name:     StringExpr(tags[0]),
   304  		Comments: comments,
   305  		Tags:     tags[1:],
   306  		Match:    match.(*FuncExpr),
   307  		Replace:  replace,
   308  	}
   309  }
   310  
   311  // match = func
   312  func (p *Parser) parseMatch() Expr {
   313  	if !p.scanToken(LPAREN, "match pattern") {
   314  		return nil
   315  	}
   316  	comments := p.getComments()
   317  	p.unscan()
   318  	f := p.parseFunc()
   319  	p.setComments(f, comments)
   320  	return f
   321  }
   322  
   323  // replace = func | ref
   324  func (p *Parser) parseReplace() Expr {
   325  	tok := p.scan()
   326  	comments := p.getComments()
   327  	var e Expr
   328  	switch tok {
   329  	case LPAREN:
   330  		p.unscan()
   331  		e = p.parseFunc()
   332  
   333  	case DOLLAR:
   334  		p.unscan()
   335  		e = p.parseRef()
   336  
   337  	default:
   338  		p.addExpectedTokenErr("replace pattern")
   339  		return nil
   340  	}
   341  	p.setComments(e, comments)
   342  	return e
   343  }
   344  
   345  // func = '(' func-name arg* ')'
   346  func (p *Parser) parseFunc() Expr {
   347  	if p.scan() != LPAREN {
   348  		panic("caller should have checked for left parenthesis")
   349  	}
   350  
   351  	src := p.src
   352  	name := p.parseFuncName()
   353  	if name == nil {
   354  		return nil
   355  	}
   356  
   357  	fn := &FuncExpr{Src: &src, Name: name}
   358  	for {
   359  		if p.scan() == RPAREN {
   360  			if p.hasComments() {
   361  				p.addErr("comments not allowed before )")
   362  				return nil
   363  			}
   364  			return fn
   365  		}
   366  
   367  		p.unscan()
   368  		comments := p.getComments()
   369  		arg := p.parseArg()
   370  		if arg == nil {
   371  			return nil
   372  		}
   373  		p.setComments(arg, comments)
   374  
   375  		fn.Args = append(fn.Args, arg)
   376  	}
   377  }
   378  
   379  // func-name = names | func
   380  func (p *Parser) parseFuncName() Expr {
   381  	tok := p.scan()
   382  	comments := p.getComments()
   383  	var e Expr
   384  	switch tok {
   385  	case IDENT:
   386  		p.unscan()
   387  		e = p.parseNames()
   388  
   389  	case LPAREN:
   390  		// Constructed name.
   391  		p.unscan()
   392  		e = p.parseFunc()
   393  
   394  	default:
   395  		p.addExpectedTokenErr("name")
   396  		return nil
   397  	}
   398  	p.setComments(e, comments)
   399  	return e
   400  }
   401  
   402  // names = name ('|' name)*
   403  func (p *Parser) parseNames() Expr {
   404  	var names NamesExpr
   405  	for {
   406  		if !p.scanToken(IDENT, "name") {
   407  			return nil
   408  		}
   409  
   410  		names = append(names, NameExpr(p.s.Literal()))
   411  
   412  		if p.scan() != PIPE {
   413  			p.unscan()
   414  			return &names
   415  		}
   416  	}
   417  }
   418  
   419  // match-child = bind | ref | match-and
   420  func (p *Parser) parseArg() Expr {
   421  	tok := p.scan()
   422  	p.unscan()
   423  
   424  	if tok == DOLLAR {
   425  		return p.parseBindOrRef()
   426  	}
   427  
   428  	return p.parseAnd()
   429  }
   430  
   431  // bind = '$' label ':' and
   432  // ref  = '$' label
   433  func (p *Parser) parseBindOrRef() Expr {
   434  	if p.scan() != DOLLAR {
   435  		panic("caller should have checked for dollar")
   436  	}
   437  
   438  	src := p.src
   439  
   440  	if !p.scanToken(IDENT, "label") {
   441  		return nil
   442  	}
   443  
   444  	label := StringExpr(p.s.Literal())
   445  
   446  	if p.scan() != COLON {
   447  		p.unscan()
   448  		return &RefExpr{Src: &src, Label: label}
   449  	}
   450  
   451  	target := p.parseAnd()
   452  	if target == nil {
   453  		return nil
   454  	}
   455  	return &BindExpr{Src: &src, Label: label, Target: target}
   456  }
   457  
   458  // and = expr ('&' and)
   459  func (p *Parser) parseAnd() Expr {
   460  	src := p.peekNextSource()
   461  
   462  	left := p.parseExpr()
   463  	if left == nil {
   464  		return nil
   465  	}
   466  
   467  	if p.scan() != AMPERSAND {
   468  		p.unscan()
   469  		return left
   470  	}
   471  
   472  	right := p.parseAnd()
   473  	if right == nil {
   474  		return nil
   475  	}
   476  	return &AndExpr{Src: src, Left: left, Right: right}
   477  }
   478  
   479  // expr = func | not | list | any | name | STRING | NUMBER
   480  func (p *Parser) parseExpr() Expr {
   481  	tok := p.scan()
   482  	comments := p.getComments()
   483  	var e Expr
   484  	switch tok {
   485  	case LPAREN:
   486  		p.unscan()
   487  		e = p.parseFunc()
   488  
   489  	case CARET:
   490  		p.unscan()
   491  		e = p.parseNot()
   492  
   493  	case LBRACKET:
   494  		p.unscan()
   495  		e = p.parseList()
   496  
   497  	case ASTERISK:
   498  		src := p.src
   499  		e = &AnyExpr{Src: &src}
   500  
   501  	case IDENT:
   502  		name := NameExpr(p.s.Literal())
   503  		e = &name
   504  
   505  	case STRING:
   506  		p.unscan()
   507  		e = p.parseString()
   508  
   509  	case NUMBER:
   510  		p.unscan()
   511  		e = p.parseNumber()
   512  
   513  	default:
   514  		p.addExpectedTokenErr("expression")
   515  		return nil
   516  	}
   517  	p.setComments(e, comments)
   518  	return e
   519  }
   520  
   521  // not = '^' expr
   522  func (p *Parser) parseNot() Expr {
   523  	if p.scan() != CARET {
   524  		panic("caller should have checked for caret")
   525  	}
   526  
   527  	src := p.src
   528  
   529  	input := p.parseExpr()
   530  	if input == nil {
   531  		return nil
   532  	}
   533  	return &NotExpr{Src: &src, Input: input}
   534  }
   535  
   536  // list = '[' list-child* ']'
   537  func (p *Parser) parseList() Expr {
   538  	if p.scan() != LBRACKET {
   539  		panic("caller should have checked for left bracket")
   540  	}
   541  
   542  	src := p.src
   543  
   544  	list := &ListExpr{Src: &src}
   545  	for {
   546  		if p.scan() == RBRACKET {
   547  			if p.hasComments() {
   548  				p.addErr("comments not allowed before ]")
   549  				return nil
   550  			}
   551  			return list
   552  		}
   553  
   554  		p.unscan()
   555  		item := p.parseListChild()
   556  		if item == nil {
   557  			return nil
   558  		}
   559  
   560  		list.Items = append(list.Items, item)
   561  	}
   562  }
   563  
   564  // list-child = list-any | arg
   565  func (p *Parser) parseListChild() Expr {
   566  	tok := p.scan()
   567  	comments := p.getComments()
   568  	var e Expr
   569  	if tok == ELLIPSES {
   570  		src := p.src
   571  		e = &ListAnyExpr{Src: &src}
   572  	} else {
   573  		p.unscan()
   574  		e = p.parseArg()
   575  	}
   576  	p.setComments(e, comments)
   577  	return e
   578  }
   579  
   580  // ref = '$' label
   581  func (p *Parser) parseRef() *RefExpr {
   582  	if p.scan() != DOLLAR {
   583  		panic("caller should have checked for dollar")
   584  	}
   585  
   586  	src := p.src
   587  
   588  	if !p.scanToken(IDENT, "label") {
   589  		return nil
   590  	}
   591  
   592  	return &RefExpr{Src: &src, Label: StringExpr(p.s.Literal())}
   593  }
   594  
   595  // tags = '[' IDENT (',' IDENT)* ']'
   596  func (p *Parser) parseTags() TagsExpr {
   597  	var tags TagsExpr
   598  
   599  	if p.scan() != LBRACKET {
   600  		panic("caller should have checked for left bracket")
   601  	}
   602  
   603  	for {
   604  		if !p.scanToken(IDENT, "tag name") {
   605  			return nil
   606  		}
   607  
   608  		tags = append(tags, TagExpr(p.s.Literal()))
   609  
   610  		if p.scan() == RBRACKET {
   611  			if p.hasComments() {
   612  				p.addErr("comments not allowed before ]")
   613  				return nil
   614  			}
   615  			return tags
   616  		}
   617  
   618  		p.unscan()
   619  		if !p.scanToken(COMMA, "comma") {
   620  			return nil
   621  		}
   622  	}
   623  }
   624  
   625  func (p *Parser) parseString() *StringExpr {
   626  	if p.scan() != STRING {
   627  		panic("caller should have checked for literal string")
   628  	}
   629  
   630  	// Strip quotes.
   631  	s := p.s.Literal()
   632  	s = s[1 : len(s)-1]
   633  
   634  	e := StringExpr(s)
   635  	return &e
   636  }
   637  
   638  func (p *Parser) parseNumber() *NumberExpr {
   639  	if p.scan() != NUMBER {
   640  		panic("caller should have checked for numeric literal")
   641  	}
   642  
   643  	// Convert token literal to int64 value.
   644  	i, err := strconv.ParseInt(p.s.Literal(), 10, 64)
   645  	if err != nil {
   646  		p.addErr(err.Error())
   647  		return nil
   648  	}
   649  
   650  	e := NumberExpr(i)
   651  	return &e
   652  }
   653  
   654  // peekNextSource returns the source information for the next token, but
   655  // without actually consuming that token.
   656  func (p *Parser) peekNextSource() *SourceLoc {
   657  	p.scan()
   658  	src := p.src
   659  	p.unscan()
   660  
   661  	// Don't directly take address of p.src, or the parser won't be
   662  	// eligible for GC due to that reference.
   663  	return &src
   664  }
   665  
   666  // scanToken scans the next token. If it does not have the expected token type,
   667  // then scanToken records an error and returns false. Otherwise, it returns
   668  // true.
   669  func (p *Parser) scanToken(expected Token, desc string) bool {
   670  	if p.scan() != expected {
   671  		p.addExpectedTokenErr(desc)
   672  		return false
   673  	}
   674  
   675  	return true
   676  }
   677  
   678  // scan returns the next non-whitespace, non-comment token from the underlying
   679  // scanner. If a token has been unscanned then read that instead.
   680  func (p *Parser) scan() Token {
   681  	// If we have a token in the buffer, then return it.
   682  	if p.unscanned {
   683  		// Restore saved current token, and save previous token.
   684  		p.src, p.saveSrc = p.saveSrc, p.src
   685  		p.unscanned = false
   686  		return p.s.Token()
   687  	}
   688  
   689  	// Read the next token from the scanner.
   690  	for {
   691  		// Set source location of current token and save previous in case
   692  		// unscan is called.
   693  		p.saveSrc = p.src
   694  		p.src.Line, p.src.Pos = p.s.LineLoc()
   695  
   696  		tok := p.s.Scan()
   697  		switch tok {
   698  		case EOF:
   699  			p.appendComments()
   700  
   701  			// Reached end of current file, so try to open next file.
   702  			if p.file+1 >= len(p.files) {
   703  				// No more files to parse.
   704  				return EOF
   705  			}
   706  			p.file++
   707  
   708  			if !p.openScanner() {
   709  				// Error opening file, don't try to recover.
   710  				return EOF
   711  			}
   712  
   713  		case ERROR:
   714  			// Error encountered while scanning.
   715  			p.addErr(p.s.Literal())
   716  			return ERROR
   717  
   718  		case COMMENT:
   719  			p.comments = append(p.comments, CommentExpr(p.s.Literal()))
   720  
   721  		case WHITESPACE:
   722  			if strings.Count(p.s.Literal(), "\n") > 1 {
   723  				p.appendComments()
   724  			}
   725  
   726  		default:
   727  			return tok
   728  		}
   729  	}
   730  }
   731  
   732  // unscan pushes the previously read token back onto the buffer.
   733  func (p *Parser) unscan() {
   734  	if p.unscanned {
   735  		panic("unscan was already called")
   736  	}
   737  
   738  	// Save current token and make previous token the current token.
   739  	p.src, p.saveSrc = p.saveSrc, p.src
   740  	p.unscanned = true
   741  }
   742  
   743  // openScanner attempts to open a scanner and reader over the next input file.
   744  // If it succeeds, then it stores the reader and scanner over the file and
   745  // returns true. If it fails, then it stores the error in p.err and returns
   746  // false.
   747  func (p *Parser) openScanner() bool {
   748  	r, err := p.resolver(p.files[p.file])
   749  	if err != nil {
   750  		p.errors = append(p.errors, err)
   751  		return false
   752  	}
   753  
   754  	// Close any previous scanner and open a new one.
   755  	p.closeScanner()
   756  	p.r = r
   757  	p.s = NewScanner(r)
   758  	p.src.File = p.files[p.file]
   759  	return true
   760  }
   761  
   762  // closeScanner ensures that the current scanner and reader is closed.
   763  func (p *Parser) closeScanner() {
   764  	if p.s != nil {
   765  		// If the reader has a Close method, call it.
   766  		closer, ok := p.r.(io.Closer)
   767  		if ok {
   768  			closer.Close()
   769  		}
   770  		p.r = nil
   771  		p.s = nil
   772  	}
   773  }
   774  
   775  // addExpectedTokenErr is used when the parser encounters an unexpected token.
   776  // The desc argument describes what the parser expected instead of the current
   777  // unexpected token.
   778  func (p *Parser) addExpectedTokenErr(desc string) {
   779  	if p.s.Token() == EOF {
   780  		p.addErr(fmt.Sprintf("expected %s, found EOF", desc))
   781  	} else {
   782  		p.addErr(fmt.Sprintf("expected %s, found '%s'", desc, p.s.Literal()))
   783  	}
   784  }
   785  
   786  // addErr wraps the given error text with file, line, and position context
   787  // information.
   788  func (p *Parser) addErr(text string) {
   789  	err := fmt.Errorf("%s: %s", p.src, text)
   790  	p.errors = append(p.errors, err)
   791  }
   792  
   793  // tryRecover attempts to recover from a parse error in order to continue
   794  // reporting additional errors.
   795  func (p *Parser) tryRecover() {
   796  	// Scan ahead, looking for top-level tokens that might allow the parser to
   797  	// recover enough to report further errors.
   798  	for {
   799  		tok := p.scan()
   800  		switch tok {
   801  		case EOF, ERROR:
   802  			// Terminate scan.
   803  			return
   804  
   805  		case LBRACKET, IDENT:
   806  			// Look for define identifier and left bracket tokens at start of
   807  			// line, as those are usually good recovery points.
   808  			if p.src.Pos == 0 {
   809  				if tok == LBRACKET || p.isDefineIdent() {
   810  					p.unscan()
   811  				}
   812  				return
   813  			}
   814  		}
   815  	}
   816  }
   817  
   818  func (p *Parser) isDefineIdent() bool {
   819  	return p.s.Token() == IDENT && p.s.Literal() == "define"
   820  }