github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/ast/parser.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package ast
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"os"
    10  	"path/filepath"
    11  	"strconv"
    12  	"strings"
    13  )
    14  
    15  // Parse parses sys description into AST and returns top-level nodes.
    16  // If any errors are encountered, returns nil.
    17  func Parse(data []byte, filename string, errorHandler ErrorHandler) *Description {
    18  	p := &parser{s: newScanner(data, filename, errorHandler)}
    19  	prevNewLine, prevComment := false, false
    20  	var top []Node
    21  	for p.next(); p.tok != tokEOF; {
    22  		decl := p.parseTopRecover()
    23  		if decl == nil {
    24  			continue
    25  		}
    26  		// Add new lines around structs, remove duplicate new lines.
    27  		if _, ok := decl.(*NewLine); ok && prevNewLine {
    28  			continue
    29  		}
    30  		if str, ok := decl.(*Struct); ok && !prevNewLine && !prevComment {
    31  			top = append(top, &NewLine{Pos: str.Pos})
    32  		}
    33  		top = append(top, decl)
    34  		if str, ok := decl.(*Struct); ok {
    35  			decl = &NewLine{Pos: str.Pos}
    36  			top = append(top, decl)
    37  		}
    38  		_, prevNewLine = decl.(*NewLine)
    39  		_, prevComment = decl.(*Comment)
    40  	}
    41  	if prevNewLine {
    42  		top = top[:len(top)-1]
    43  	}
    44  	if !p.s.Ok() {
    45  		return nil
    46  	}
    47  	return &Description{top}
    48  }
    49  
    50  func ParseGlob(glob string, errorHandler ErrorHandler) *Description {
    51  	if errorHandler == nil {
    52  		errorHandler = LoggingHandler
    53  	}
    54  	files, err := filepath.Glob(glob)
    55  	if err != nil {
    56  		errorHandler(Pos{}, fmt.Sprintf("failed to find input files: %v", err))
    57  		return nil
    58  	}
    59  	if len(files) == 0 {
    60  		errorHandler(Pos{}, fmt.Sprintf("no files matched by glob %q", glob))
    61  		return nil
    62  	}
    63  	desc := &Description{}
    64  	for _, f := range files {
    65  		data, err := os.ReadFile(f)
    66  		if err != nil {
    67  			errorHandler(Pos{}, fmt.Sprintf("failed to read input file: %v", err))
    68  			return nil
    69  		}
    70  		desc1 := Parse(data, f, errorHandler)
    71  		if desc1 == nil {
    72  			desc = nil
    73  		}
    74  		if desc != nil {
    75  			desc.Nodes = append(desc.Nodes, desc1.Nodes...)
    76  		}
    77  	}
    78  	return desc
    79  }
    80  
    81  type parser struct {
    82  	s *scanner
    83  
    84  	// Current token:
    85  	tok token
    86  	lit string
    87  	pos Pos
    88  }
    89  
    90  // Skip parsing till the next NEWLINE, for error recovery.
    91  var errSkipLine = errors.New("")
    92  
    93  func (p *parser) parseTopRecover() Node {
    94  	defer func() {
    95  		switch err := recover(); err {
    96  		case nil:
    97  		case errSkipLine:
    98  			// Try to recover by consuming everything until next NEWLINE.
    99  			for p.tok != tokNewLine && p.tok != tokEOF {
   100  				p.next()
   101  			}
   102  			p.tryConsume(tokNewLine)
   103  		default:
   104  			panic(err)
   105  		}
   106  	}()
   107  	decl := p.parseTop()
   108  	if decl == nil {
   109  		panic("not reachable")
   110  	}
   111  	p.consume(tokNewLine)
   112  	return decl
   113  }
   114  
   115  func (p *parser) parseTop() Node {
   116  	switch p.tok {
   117  	case tokNewLine:
   118  		return &NewLine{Pos: p.pos}
   119  	case tokComment:
   120  		return p.parseComment()
   121  	case tokDefine:
   122  		return p.parseDefine()
   123  	case tokInclude:
   124  		return p.parseInclude()
   125  	case tokIncdir:
   126  		return p.parseIncdir()
   127  	case tokResource:
   128  		return p.parseResource()
   129  	case tokIdent:
   130  		name := p.parseIdent()
   131  		switch name.Name {
   132  		case "meta":
   133  			return p.parseMeta()
   134  		case "type":
   135  			return p.parseTypeDef()
   136  		}
   137  		switch p.tok {
   138  		case tokLParen:
   139  			return p.parseCall(name)
   140  		case tokLBrace, tokLBrack:
   141  			return p.parseStruct(name)
   142  		case tokEq:
   143  			return p.parseFlags(name)
   144  		default:
   145  			p.expect(tokLParen, tokLBrace, tokLBrack, tokEq)
   146  		}
   147  	case tokIllegal:
   148  		// Scanner has already producer an error for this one.
   149  		panic(errSkipLine)
   150  	default:
   151  		p.expect(tokComment, tokDefine, tokInclude, tokResource, tokIdent)
   152  	}
   153  	panic("not reachable")
   154  }
   155  
   156  func (p *parser) next() {
   157  	p.tok, p.lit, p.pos = p.s.Scan()
   158  }
   159  
   160  func (p *parser) consume(tok token) {
   161  	p.expect(tok)
   162  	p.next()
   163  }
   164  
   165  func (p *parser) tryConsume(tok token) bool {
   166  	if p.tok != tok {
   167  		return false
   168  	}
   169  	p.next()
   170  	return true
   171  }
   172  
   173  func (p *parser) expect(tokens ...token) {
   174  	for _, tok := range tokens {
   175  		if p.tok == tok {
   176  			return
   177  		}
   178  	}
   179  	var str []string
   180  	for _, tok := range tokens {
   181  		str = append(str, tok.String())
   182  	}
   183  	p.s.Error(p.pos, fmt.Sprintf("unexpected %v, expecting %v", p.tok, strings.Join(str, ", ")))
   184  	panic(errSkipLine)
   185  }
   186  
   187  func (p *parser) parseComment() *Comment {
   188  	c := &Comment{
   189  		Pos:  p.pos,
   190  		Text: p.lit,
   191  	}
   192  	p.consume(tokComment)
   193  	return c
   194  }
   195  
   196  func (p *parser) parseMeta() *Meta {
   197  	return &Meta{
   198  		Pos:   p.pos,
   199  		Value: p.parseType(),
   200  	}
   201  }
   202  
   203  func (p *parser) parseDefine() *Define {
   204  	pos0 := p.pos
   205  	p.consume(tokDefine)
   206  	name := p.parseIdent()
   207  	p.expect(tokInt, tokIdent, tokCExpr)
   208  	var val *Int
   209  	if p.tok == tokCExpr {
   210  		val = p.parseCExpr()
   211  	} else {
   212  		val = p.parseInt()
   213  	}
   214  	return &Define{
   215  		Pos:   pos0,
   216  		Name:  name,
   217  		Value: val,
   218  	}
   219  }
   220  
   221  func (p *parser) parseInclude() *Include {
   222  	pos0 := p.pos
   223  	p.consume(tokInclude)
   224  	return &Include{
   225  		Pos:  pos0,
   226  		File: p.parseString(),
   227  	}
   228  }
   229  
   230  func (p *parser) parseIncdir() *Incdir {
   231  	pos0 := p.pos
   232  	p.consume(tokIncdir)
   233  	return &Incdir{
   234  		Pos: pos0,
   235  		Dir: p.parseString(),
   236  	}
   237  }
   238  
   239  func (p *parser) parseResource() *Resource {
   240  	pos0 := p.pos
   241  	p.consume(tokResource)
   242  	name := p.parseIdent()
   243  	p.consume(tokLBrack)
   244  	base := p.parseType()
   245  	p.consume(tokRBrack)
   246  	var values []*Int
   247  	if p.tryConsume(tokColon) {
   248  		values = append(values, p.parseInt())
   249  		for p.tryConsume(tokComma) {
   250  			values = append(values, p.parseInt())
   251  		}
   252  	}
   253  	return &Resource{
   254  		Pos:    pos0,
   255  		Name:   name,
   256  		Base:   base,
   257  		Values: values,
   258  	}
   259  }
   260  
   261  func (p *parser) parseTypeDef() *TypeDef {
   262  	pos0 := p.pos
   263  	name := p.parseIdent()
   264  	var typ *Type
   265  	var str *Struct
   266  	var args []*Ident
   267  	p.expect(tokLBrack, tokIdent)
   268  	if p.tryConsume(tokLBrack) {
   269  		args = append(args, p.parseIdent())
   270  		for p.tryConsume(tokComma) {
   271  			args = append(args, p.parseIdent())
   272  		}
   273  		p.consume(tokRBrack)
   274  		if p.tok == tokLBrace || p.tok == tokLBrack {
   275  			emptyName := &Ident{
   276  				Pos:  pos0,
   277  				Name: "",
   278  			}
   279  			str = p.parseStruct(emptyName)
   280  		} else {
   281  			typ = p.parseType()
   282  		}
   283  	} else {
   284  		typ = p.parseType()
   285  	}
   286  	return &TypeDef{
   287  		Pos:    pos0,
   288  		Name:   name,
   289  		Args:   args,
   290  		Type:   typ,
   291  		Struct: str,
   292  	}
   293  }
   294  
   295  func (p *parser) parseCall(name *Ident) *Call {
   296  	c := &Call{
   297  		Pos:      name.Pos,
   298  		Name:     name,
   299  		CallName: callName(name.Name),
   300  	}
   301  	p.consume(tokLParen)
   302  	for p.tok != tokRParen {
   303  		c.Args = append(c.Args, p.parseField(false))
   304  		p.expect(tokComma, tokRParen)
   305  		p.tryConsume(tokComma)
   306  	}
   307  	p.consume(tokRParen)
   308  	if p.tok != tokNewLine && p.tok != tokLParen {
   309  		c.Ret = p.parseType()
   310  	}
   311  	if p.tryConsume(tokLParen) {
   312  		c.Attrs = append(c.Attrs, p.parseType())
   313  		for p.tryConsume(tokComma) {
   314  			c.Attrs = append(c.Attrs, p.parseType())
   315  		}
   316  		p.consume(tokRParen)
   317  	}
   318  	return c
   319  }
   320  
   321  func callName(s string) string {
   322  	pos := strings.IndexByte(s, '$')
   323  	if pos == -1 {
   324  		return s
   325  	}
   326  	return s[:pos]
   327  }
   328  
   329  func (p *parser) parseFlags(name *Ident) Node {
   330  	p.consume(tokEq)
   331  	switch p.tok {
   332  	case tokInt, tokIdent:
   333  		return p.parseIntFlags(name)
   334  	case tokString, tokStringHex:
   335  		return p.parseStrFlags(name)
   336  	default:
   337  		p.expect(tokInt, tokIdent, tokString)
   338  		return nil
   339  	}
   340  }
   341  
   342  func (p *parser) parseIntFlags(name *Ident) *IntFlags {
   343  	values := []*Int{p.parseInt()}
   344  	for p.tryConsume(tokComma) {
   345  		values = append(values, p.parseInt())
   346  	}
   347  	return &IntFlags{
   348  		Pos:    name.Pos,
   349  		Name:   name,
   350  		Values: values,
   351  	}
   352  }
   353  
   354  func (p *parser) parseStrFlags(name *Ident) *StrFlags {
   355  	values := []*String{p.parseString()}
   356  	for p.tryConsume(tokComma) {
   357  		values = append(values, p.parseString())
   358  	}
   359  	return &StrFlags{
   360  		Pos:    name.Pos,
   361  		Name:   name,
   362  		Values: values,
   363  	}
   364  }
   365  
   366  func (p *parser) parseStruct(name *Ident) *Struct {
   367  	str := &Struct{
   368  		Pos:  name.Pos,
   369  		Name: name,
   370  	}
   371  	closing := tokRBrace
   372  	if p.tok == tokLBrack {
   373  		str.IsUnion = true
   374  		closing = tokRBrack
   375  	}
   376  	p.next()
   377  	p.consume(tokNewLine)
   378  	for {
   379  		newBlock := false
   380  		for p.tok == tokNewLine {
   381  			newBlock = true
   382  			p.next()
   383  		}
   384  		comments := p.parseCommentBlock()
   385  		if p.tryConsume(closing) {
   386  			str.Comments = comments
   387  			break
   388  		}
   389  		fld := p.parseField(true)
   390  		fld.NewBlock = newBlock
   391  		fld.Comments = comments
   392  		str.Fields = append(str.Fields, fld)
   393  		p.consume(tokNewLine)
   394  	}
   395  	if p.tryConsume(tokLBrack) {
   396  		str.Attrs = append(str.Attrs, p.parseType())
   397  		for p.tryConsume(tokComma) {
   398  			str.Attrs = append(str.Attrs, p.parseType())
   399  		}
   400  		p.consume(tokRBrack)
   401  	}
   402  	return str
   403  }
   404  
   405  func (p *parser) parseCommentBlock() []*Comment {
   406  	var comments []*Comment
   407  	for p.tok == tokComment {
   408  		comments = append(comments, p.parseComment())
   409  		p.consume(tokNewLine)
   410  		for p.tryConsume(tokNewLine) {
   411  		}
   412  	}
   413  	return comments
   414  }
   415  
   416  func (p *parser) parseField(parseAttrs bool) *Field {
   417  	name := p.parseIdent()
   418  
   419  	field := &Field{
   420  		Pos:  name.Pos,
   421  		Name: name,
   422  		Type: p.parseType(),
   423  	}
   424  
   425  	if parseAttrs && p.tryConsume(tokLParen) {
   426  		field.Attrs = append(field.Attrs, p.parseType())
   427  		for p.tryConsume(tokComma) {
   428  			field.Attrs = append(field.Attrs, p.parseType())
   429  		}
   430  		p.consume(tokRParen)
   431  	}
   432  
   433  	return field
   434  }
   435  
   436  type operatorInfo struct {
   437  	op   Operator
   438  	prio int
   439  }
   440  
   441  const maxOperatorPrio = 1
   442  
   443  // The highest priority is 0.
   444  var binaryOperators = map[token]operatorInfo{
   445  	tokCmpEq:  {op: OperatorCompareEq, prio: 0},
   446  	tokCmpNeq: {op: OperatorCompareNeq, prio: 0},
   447  	tokBinAnd: {op: OperatorBinaryAnd, prio: 1},
   448  }
   449  
   450  // Parse out a single Type object, which can either be a plain object or an expression.
   451  // For now, only expressions constructed via '(', ')', "==", "!=", '&' are supported.
   452  func (p *parser) parseType() *Type {
   453  	return p.parseBinaryExpr(0)
   454  }
   455  
   456  func (p *parser) parseBinaryExpr(expectPrio int) *Type {
   457  	if expectPrio > maxOperatorPrio {
   458  		return p.parseExprFactor()
   459  	}
   460  	lastPos := p.pos
   461  	curr := p.parseBinaryExpr(expectPrio + 1)
   462  	for {
   463  		info, ok := binaryOperators[p.tok]
   464  		if !ok || info.prio != expectPrio {
   465  			return curr
   466  		}
   467  		p.consume(p.tok)
   468  		curr = &Type{
   469  			Pos: lastPos,
   470  			Expression: &BinaryExpression{
   471  				Pos:      p.pos,
   472  				Operator: info.op,
   473  				Left:     curr,
   474  				Right:    p.parseBinaryExpr(expectPrio + 1),
   475  			},
   476  		}
   477  		lastPos = p.pos
   478  	}
   479  }
   480  
   481  func (p *parser) parseExprFactor() *Type {
   482  	if p.tok == tokLParen {
   483  		p.consume(tokLParen)
   484  		ret := p.parseBinaryExpr(0)
   485  		p.consume(tokRParen)
   486  		return ret
   487  	}
   488  	arg := &Type{
   489  		Pos: p.pos,
   490  	}
   491  	allowColon := false
   492  	switch p.tok {
   493  	case tokInt:
   494  		allowColon = true
   495  		arg.Value, arg.ValueFmt = p.parseIntValue()
   496  	case tokIdent:
   497  		allowColon = true
   498  		arg.Ident = p.lit
   499  	case tokString, tokStringHex:
   500  		arg.String = p.lit
   501  		arg.HasString = true
   502  		arg.StringFmt = strTokToFmt(p.tok)
   503  	default:
   504  		p.expect(tokInt, tokIdent, tokString)
   505  	}
   506  	p.next()
   507  	if allowColon {
   508  		for p.tryConsume(tokColon) {
   509  			col := &Type{
   510  				Pos: p.pos,
   511  			}
   512  			switch p.tok {
   513  			case tokInt:
   514  				col.Value, col.ValueFmt = p.parseIntValue()
   515  			case tokIdent:
   516  				col.Ident = p.lit
   517  			default:
   518  				p.expect(tokInt, tokIdent)
   519  			}
   520  			arg.Colon = append(arg.Colon, col)
   521  			p.next()
   522  		}
   523  	}
   524  	arg.Args = p.parseTypeList()
   525  	return arg
   526  }
   527  
   528  func (p *parser) parseTypeList() []*Type {
   529  	var args []*Type
   530  	if p.tryConsume(tokLBrack) {
   531  		args = append(args, p.parseType())
   532  		for p.tryConsume(tokComma) {
   533  			args = append(args, p.parseType())
   534  		}
   535  		p.consume(tokRBrack)
   536  	}
   537  	return args
   538  }
   539  
   540  func (p *parser) parseIdent() *Ident {
   541  	p.expect(tokIdent)
   542  	ident := &Ident{
   543  		Pos:  p.pos,
   544  		Name: p.lit,
   545  	}
   546  	p.next()
   547  	return ident
   548  }
   549  
   550  func (p *parser) parseString() *String {
   551  	p.expect(tokString, tokStringHex, tokIdent)
   552  	str := &String{
   553  		Pos:   p.pos,
   554  		Value: p.lit,
   555  		Fmt:   strTokToFmt(p.tok),
   556  	}
   557  	p.next()
   558  	return str
   559  }
   560  
   561  func strTokToFmt(tok token) StrFmt {
   562  	switch tok {
   563  	case tokString:
   564  		return StrFmtRaw
   565  	case tokStringHex:
   566  		return StrFmtHex
   567  	case tokIdent:
   568  		return StrFmtIdent
   569  	default:
   570  		panic("bad string token")
   571  	}
   572  }
   573  
   574  func (p *parser) parseInt() *Int {
   575  	i := &Int{
   576  		Pos: p.pos,
   577  	}
   578  	switch p.tok {
   579  	case tokInt:
   580  		i.Value, i.ValueFmt = p.parseIntValue()
   581  	case tokIdent:
   582  		i.Ident = p.lit
   583  	default:
   584  		p.expect(tokInt, tokIdent)
   585  	}
   586  	p.next()
   587  	return i
   588  }
   589  
   590  func (p *parser) parseIntValue() (uint64, IntFmt) {
   591  	if p.lit[0] == '\'' {
   592  		return uint64(p.lit[1]), IntFmtChar
   593  	}
   594  	if v, err := strconv.ParseUint(p.lit, 10, 64); err == nil {
   595  		return v, IntFmtDec
   596  	}
   597  	if v, err := strconv.ParseInt(p.lit, 10, 64); err == nil {
   598  		return uint64(v), IntFmtNeg
   599  	}
   600  	if len(p.lit) > 2 && p.lit[0] == '0' && p.lit[1] == 'x' {
   601  		if v, err := strconv.ParseUint(p.lit[2:], 16, 64); err == nil {
   602  			return v, IntFmtHex
   603  		}
   604  	}
   605  	panic(fmt.Sprintf("scanner returned bad integer %q", p.lit))
   606  }
   607  
   608  func (p *parser) parseCExpr() *Int {
   609  	i := &Int{
   610  		Pos:   p.pos,
   611  		CExpr: p.lit,
   612  	}
   613  	p.consume(tokCExpr)
   614  	return i
   615  }