github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/ast/parser.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package ast 5 6 import ( 7 "errors" 8 "fmt" 9 "os" 10 "path/filepath" 11 "strconv" 12 "strings" 13 ) 14 15 // Parse parses sys description into AST and returns top-level nodes. 16 // If any errors are encountered, returns nil. 17 func Parse(data []byte, filename string, errorHandler ErrorHandler) *Description { 18 p := &parser{s: newScanner(data, filename, errorHandler)} 19 prevNewLine, prevComment := false, false 20 var top []Node 21 for p.next(); p.tok != tokEOF; { 22 decl := p.parseTopRecover() 23 if decl == nil { 24 continue 25 } 26 // Add new lines around structs, remove duplicate new lines. 27 if _, ok := decl.(*NewLine); ok && prevNewLine { 28 continue 29 } 30 if str, ok := decl.(*Struct); ok && !prevNewLine && !prevComment { 31 top = append(top, &NewLine{Pos: str.Pos}) 32 } 33 top = append(top, decl) 34 if str, ok := decl.(*Struct); ok { 35 decl = &NewLine{Pos: str.Pos} 36 top = append(top, decl) 37 } 38 _, prevNewLine = decl.(*NewLine) 39 _, prevComment = decl.(*Comment) 40 } 41 if prevNewLine { 42 top = top[:len(top)-1] 43 } 44 if !p.s.Ok() { 45 return nil 46 } 47 return &Description{top} 48 } 49 50 func ParseGlob(glob string, errorHandler ErrorHandler) *Description { 51 if errorHandler == nil { 52 errorHandler = LoggingHandler 53 } 54 files, err := filepath.Glob(glob) 55 if err != nil { 56 errorHandler(Pos{}, fmt.Sprintf("failed to find input files: %v", err)) 57 return nil 58 } 59 if len(files) == 0 { 60 errorHandler(Pos{}, fmt.Sprintf("no files matched by glob %q", glob)) 61 return nil 62 } 63 desc := &Description{} 64 for _, f := range files { 65 data, err := os.ReadFile(f) 66 if err != nil { 67 errorHandler(Pos{}, fmt.Sprintf("failed to read input file: %v", err)) 68 return nil 69 } 70 desc1 := Parse(data, f, errorHandler) 71 if desc1 == nil { 72 desc = nil 73 } 74 if desc != nil { 75 desc.Nodes = append(desc.Nodes, desc1.Nodes...) 76 } 77 } 78 return desc 79 } 80 81 type parser struct { 82 s *scanner 83 84 // Current token: 85 tok token 86 lit string 87 pos Pos 88 } 89 90 // Skip parsing till the next NEWLINE, for error recovery. 91 var errSkipLine = errors.New("") 92 93 func (p *parser) parseTopRecover() Node { 94 defer func() { 95 switch err := recover(); err { 96 case nil: 97 case errSkipLine: 98 // Try to recover by consuming everything until next NEWLINE. 99 for p.tok != tokNewLine && p.tok != tokEOF { 100 p.next() 101 } 102 p.tryConsume(tokNewLine) 103 default: 104 panic(err) 105 } 106 }() 107 decl := p.parseTop() 108 if decl == nil { 109 panic("not reachable") 110 } 111 p.consume(tokNewLine) 112 return decl 113 } 114 115 func (p *parser) parseTop() Node { 116 switch p.tok { 117 case tokNewLine: 118 return &NewLine{Pos: p.pos} 119 case tokComment: 120 return p.parseComment() 121 case tokDefine: 122 return p.parseDefine() 123 case tokInclude: 124 return p.parseInclude() 125 case tokIncdir: 126 return p.parseIncdir() 127 case tokResource: 128 return p.parseResource() 129 case tokIdent: 130 name := p.parseIdent() 131 switch name.Name { 132 case "meta": 133 return p.parseMeta() 134 case "type": 135 return p.parseTypeDef() 136 } 137 switch p.tok { 138 case tokLParen: 139 return p.parseCall(name) 140 case tokLBrace, tokLBrack: 141 return p.parseStruct(name) 142 case tokEq: 143 return p.parseFlags(name) 144 default: 145 p.expect(tokLParen, tokLBrace, tokLBrack, tokEq) 146 } 147 case tokIllegal: 148 // Scanner has already producer an error for this one. 149 panic(errSkipLine) 150 default: 151 p.expect(tokComment, tokDefine, tokInclude, tokResource, tokIdent) 152 } 153 panic("not reachable") 154 } 155 156 func (p *parser) next() { 157 p.tok, p.lit, p.pos = p.s.Scan() 158 } 159 160 func (p *parser) consume(tok token) { 161 p.expect(tok) 162 p.next() 163 } 164 165 func (p *parser) tryConsume(tok token) bool { 166 if p.tok != tok { 167 return false 168 } 169 p.next() 170 return true 171 } 172 173 func (p *parser) expect(tokens ...token) { 174 for _, tok := range tokens { 175 if p.tok == tok { 176 return 177 } 178 } 179 var str []string 180 for _, tok := range tokens { 181 str = append(str, tok.String()) 182 } 183 p.s.Error(p.pos, fmt.Sprintf("unexpected %v, expecting %v", p.tok, strings.Join(str, ", "))) 184 panic(errSkipLine) 185 } 186 187 func (p *parser) parseComment() *Comment { 188 c := &Comment{ 189 Pos: p.pos, 190 Text: p.lit, 191 } 192 p.consume(tokComment) 193 return c 194 } 195 196 func (p *parser) parseMeta() *Meta { 197 return &Meta{ 198 Pos: p.pos, 199 Value: p.parseType(), 200 } 201 } 202 203 func (p *parser) parseDefine() *Define { 204 pos0 := p.pos 205 p.consume(tokDefine) 206 name := p.parseIdent() 207 p.expect(tokInt, tokIdent, tokCExpr) 208 var val *Int 209 if p.tok == tokCExpr { 210 val = p.parseCExpr() 211 } else { 212 val = p.parseInt() 213 } 214 return &Define{ 215 Pos: pos0, 216 Name: name, 217 Value: val, 218 } 219 } 220 221 func (p *parser) parseInclude() *Include { 222 pos0 := p.pos 223 p.consume(tokInclude) 224 return &Include{ 225 Pos: pos0, 226 File: p.parseString(), 227 } 228 } 229 230 func (p *parser) parseIncdir() *Incdir { 231 pos0 := p.pos 232 p.consume(tokIncdir) 233 return &Incdir{ 234 Pos: pos0, 235 Dir: p.parseString(), 236 } 237 } 238 239 func (p *parser) parseResource() *Resource { 240 pos0 := p.pos 241 p.consume(tokResource) 242 name := p.parseIdent() 243 p.consume(tokLBrack) 244 base := p.parseType() 245 p.consume(tokRBrack) 246 var values []*Int 247 if p.tryConsume(tokColon) { 248 values = append(values, p.parseInt()) 249 for p.tryConsume(tokComma) { 250 values = append(values, p.parseInt()) 251 } 252 } 253 return &Resource{ 254 Pos: pos0, 255 Name: name, 256 Base: base, 257 Values: values, 258 } 259 } 260 261 func (p *parser) parseTypeDef() *TypeDef { 262 pos0 := p.pos 263 name := p.parseIdent() 264 var typ *Type 265 var str *Struct 266 var args []*Ident 267 p.expect(tokLBrack, tokIdent) 268 if p.tryConsume(tokLBrack) { 269 args = append(args, p.parseIdent()) 270 for p.tryConsume(tokComma) { 271 args = append(args, p.parseIdent()) 272 } 273 p.consume(tokRBrack) 274 if p.tok == tokLBrace || p.tok == tokLBrack { 275 emptyName := &Ident{ 276 Pos: pos0, 277 Name: "", 278 } 279 str = p.parseStruct(emptyName) 280 } else { 281 typ = p.parseType() 282 } 283 } else { 284 typ = p.parseType() 285 } 286 return &TypeDef{ 287 Pos: pos0, 288 Name: name, 289 Args: args, 290 Type: typ, 291 Struct: str, 292 } 293 } 294 295 func (p *parser) parseCall(name *Ident) *Call { 296 c := &Call{ 297 Pos: name.Pos, 298 Name: name, 299 CallName: callName(name.Name), 300 } 301 p.consume(tokLParen) 302 for p.tok != tokRParen { 303 c.Args = append(c.Args, p.parseField(false)) 304 p.expect(tokComma, tokRParen) 305 p.tryConsume(tokComma) 306 } 307 p.consume(tokRParen) 308 if p.tok != tokNewLine && p.tok != tokLParen { 309 c.Ret = p.parseType() 310 } 311 if p.tryConsume(tokLParen) { 312 c.Attrs = append(c.Attrs, p.parseType()) 313 for p.tryConsume(tokComma) { 314 c.Attrs = append(c.Attrs, p.parseType()) 315 } 316 p.consume(tokRParen) 317 } 318 return c 319 } 320 321 func callName(s string) string { 322 pos := strings.IndexByte(s, '$') 323 if pos == -1 { 324 return s 325 } 326 return s[:pos] 327 } 328 329 func (p *parser) parseFlags(name *Ident) Node { 330 p.consume(tokEq) 331 switch p.tok { 332 case tokInt, tokIdent: 333 return p.parseIntFlags(name) 334 case tokString, tokStringHex: 335 return p.parseStrFlags(name) 336 default: 337 p.expect(tokInt, tokIdent, tokString) 338 return nil 339 } 340 } 341 342 func (p *parser) parseIntFlags(name *Ident) *IntFlags { 343 values := []*Int{p.parseInt()} 344 for p.tryConsume(tokComma) { 345 values = append(values, p.parseInt()) 346 } 347 return &IntFlags{ 348 Pos: name.Pos, 349 Name: name, 350 Values: values, 351 } 352 } 353 354 func (p *parser) parseStrFlags(name *Ident) *StrFlags { 355 values := []*String{p.parseString()} 356 for p.tryConsume(tokComma) { 357 values = append(values, p.parseString()) 358 } 359 return &StrFlags{ 360 Pos: name.Pos, 361 Name: name, 362 Values: values, 363 } 364 } 365 366 func (p *parser) parseStruct(name *Ident) *Struct { 367 str := &Struct{ 368 Pos: name.Pos, 369 Name: name, 370 } 371 closing := tokRBrace 372 if p.tok == tokLBrack { 373 str.IsUnion = true 374 closing = tokRBrack 375 } 376 p.next() 377 p.consume(tokNewLine) 378 for { 379 newBlock := false 380 for p.tok == tokNewLine { 381 newBlock = true 382 p.next() 383 } 384 comments := p.parseCommentBlock() 385 if p.tryConsume(closing) { 386 str.Comments = comments 387 break 388 } 389 fld := p.parseField(true) 390 fld.NewBlock = newBlock 391 fld.Comments = comments 392 str.Fields = append(str.Fields, fld) 393 p.consume(tokNewLine) 394 } 395 if p.tryConsume(tokLBrack) { 396 str.Attrs = append(str.Attrs, p.parseType()) 397 for p.tryConsume(tokComma) { 398 str.Attrs = append(str.Attrs, p.parseType()) 399 } 400 p.consume(tokRBrack) 401 } 402 return str 403 } 404 405 func (p *parser) parseCommentBlock() []*Comment { 406 var comments []*Comment 407 for p.tok == tokComment { 408 comments = append(comments, p.parseComment()) 409 p.consume(tokNewLine) 410 for p.tryConsume(tokNewLine) { 411 } 412 } 413 return comments 414 } 415 416 func (p *parser) parseField(parseAttrs bool) *Field { 417 name := p.parseIdent() 418 419 field := &Field{ 420 Pos: name.Pos, 421 Name: name, 422 Type: p.parseType(), 423 } 424 425 if parseAttrs && p.tryConsume(tokLParen) { 426 field.Attrs = append(field.Attrs, p.parseType()) 427 for p.tryConsume(tokComma) { 428 field.Attrs = append(field.Attrs, p.parseType()) 429 } 430 p.consume(tokRParen) 431 } 432 433 return field 434 } 435 436 type operatorInfo struct { 437 op Operator 438 prio int 439 } 440 441 const maxOperatorPrio = 1 442 443 // The highest priority is 0. 444 var binaryOperators = map[token]operatorInfo{ 445 tokCmpEq: {op: OperatorCompareEq, prio: 0}, 446 tokCmpNeq: {op: OperatorCompareNeq, prio: 0}, 447 tokBinAnd: {op: OperatorBinaryAnd, prio: 1}, 448 } 449 450 // Parse out a single Type object, which can either be a plain object or an expression. 451 // For now, only expressions constructed via '(', ')', "==", "!=", '&' are supported. 452 func (p *parser) parseType() *Type { 453 return p.parseBinaryExpr(0) 454 } 455 456 func (p *parser) parseBinaryExpr(expectPrio int) *Type { 457 if expectPrio > maxOperatorPrio { 458 return p.parseExprFactor() 459 } 460 lastPos := p.pos 461 curr := p.parseBinaryExpr(expectPrio + 1) 462 for { 463 info, ok := binaryOperators[p.tok] 464 if !ok || info.prio != expectPrio { 465 return curr 466 } 467 p.consume(p.tok) 468 curr = &Type{ 469 Pos: lastPos, 470 Expression: &BinaryExpression{ 471 Pos: p.pos, 472 Operator: info.op, 473 Left: curr, 474 Right: p.parseBinaryExpr(expectPrio + 1), 475 }, 476 } 477 lastPos = p.pos 478 } 479 } 480 481 func (p *parser) parseExprFactor() *Type { 482 if p.tok == tokLParen { 483 p.consume(tokLParen) 484 ret := p.parseBinaryExpr(0) 485 p.consume(tokRParen) 486 return ret 487 } 488 arg := &Type{ 489 Pos: p.pos, 490 } 491 allowColon := false 492 switch p.tok { 493 case tokInt: 494 allowColon = true 495 arg.Value, arg.ValueFmt = p.parseIntValue() 496 case tokIdent: 497 allowColon = true 498 arg.Ident = p.lit 499 case tokString, tokStringHex: 500 arg.String = p.lit 501 arg.HasString = true 502 arg.StringFmt = strTokToFmt(p.tok) 503 default: 504 p.expect(tokInt, tokIdent, tokString) 505 } 506 p.next() 507 if allowColon { 508 for p.tryConsume(tokColon) { 509 col := &Type{ 510 Pos: p.pos, 511 } 512 switch p.tok { 513 case tokInt: 514 col.Value, col.ValueFmt = p.parseIntValue() 515 case tokIdent: 516 col.Ident = p.lit 517 default: 518 p.expect(tokInt, tokIdent) 519 } 520 arg.Colon = append(arg.Colon, col) 521 p.next() 522 } 523 } 524 arg.Args = p.parseTypeList() 525 return arg 526 } 527 528 func (p *parser) parseTypeList() []*Type { 529 var args []*Type 530 if p.tryConsume(tokLBrack) { 531 args = append(args, p.parseType()) 532 for p.tryConsume(tokComma) { 533 args = append(args, p.parseType()) 534 } 535 p.consume(tokRBrack) 536 } 537 return args 538 } 539 540 func (p *parser) parseIdent() *Ident { 541 p.expect(tokIdent) 542 ident := &Ident{ 543 Pos: p.pos, 544 Name: p.lit, 545 } 546 p.next() 547 return ident 548 } 549 550 func (p *parser) parseString() *String { 551 p.expect(tokString, tokStringHex, tokIdent) 552 str := &String{ 553 Pos: p.pos, 554 Value: p.lit, 555 Fmt: strTokToFmt(p.tok), 556 } 557 p.next() 558 return str 559 } 560 561 func strTokToFmt(tok token) StrFmt { 562 switch tok { 563 case tokString: 564 return StrFmtRaw 565 case tokStringHex: 566 return StrFmtHex 567 case tokIdent: 568 return StrFmtIdent 569 default: 570 panic("bad string token") 571 } 572 } 573 574 func (p *parser) parseInt() *Int { 575 i := &Int{ 576 Pos: p.pos, 577 } 578 switch p.tok { 579 case tokInt: 580 i.Value, i.ValueFmt = p.parseIntValue() 581 case tokIdent: 582 i.Ident = p.lit 583 default: 584 p.expect(tokInt, tokIdent) 585 } 586 p.next() 587 return i 588 } 589 590 func (p *parser) parseIntValue() (uint64, IntFmt) { 591 if p.lit[0] == '\'' { 592 return uint64(p.lit[1]), IntFmtChar 593 } 594 if v, err := strconv.ParseUint(p.lit, 10, 64); err == nil { 595 return v, IntFmtDec 596 } 597 if v, err := strconv.ParseInt(p.lit, 10, 64); err == nil { 598 return uint64(v), IntFmtNeg 599 } 600 if len(p.lit) > 2 && p.lit[0] == '0' && p.lit[1] == 'x' { 601 if v, err := strconv.ParseUint(p.lit[2:], 16, 64); err == nil { 602 return v, IntFmtHex 603 } 604 } 605 panic(fmt.Sprintf("scanner returned bad integer %q", p.lit)) 606 } 607 608 func (p *parser) parseCExpr() *Int { 609 i := &Int{ 610 Pos: p.pos, 611 CExpr: p.lit, 612 } 613 p.consume(tokCExpr) 614 return i 615 }