go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mqlc/parser/parser.go (about) 1 // Copyright (c) Mondoo, Inc. 2 // SPDX-License-Identifier: BUSL-1.1 3 4 package parser 5 6 import ( 7 "bytes" 8 "errors" 9 "math" 10 "regexp" 11 "strconv" 12 "strings" 13 14 "github.com/alecthomas/participle/lexer" 15 ) 16 17 var mqlLexer lexer.Definition 18 19 var ( 20 Ident rune 21 Float rune 22 Int rune 23 String rune 24 Comment rune 25 Regex rune 26 Op rune 27 CallType rune 28 ) 29 30 var tokenNames map[rune]string 31 32 func init() { 33 mqlLexer = lexer.Must(lexer.Regexp(`(\s+)` + 34 `|(?P<Ident>[a-zA-Z$_][a-zA-Z0-9_]*)` + 35 `|(?P<Float>[-+]?\d*\.\d+([eE][-+]?\d+)?)` + 36 `|(?P<Int>[-+]?\d+([eE][-+]?\d+)?)` + 37 `|(?P<String>'[^']*'|"[^"]*")` + 38 `|(?P<Comment>(//|#)[^\n]*(\n|\z))` + 39 `|(?P<Regex>/([^\\/]+|\\.)+/[msi]*)` + 40 `|(?P<Op>[-+*/%,:.=<>!|&~;])` + 41 `|(?P<Call>[(){}\[\]])`, 42 )) 43 44 syms := mqlLexer.Symbols() 45 46 Ident = syms["Ident"] 47 Float = syms["Float"] 48 Int = syms["Int"] 49 String = syms["String"] 50 Comment = syms["Comment"] 51 Regex = syms["Regex"] 52 Op = syms["Op"] 53 CallType = syms["Call"] 54 55 tokenNames = map[rune]string{ 56 Ident: "identifier", 57 Float: "float", 58 Int: "number", 59 String: "string", 60 Comment: "comment", 61 Regex: "regex", 62 Op: "operator", 63 } 64 } 65 66 // ErrIncomplete points to an incomplete query. 67 type ErrIncomplete struct { 68 missing string 69 pos lexer.Position 70 // Indent is a hint for how far we are indented given 71 // strict formatting and using only tabs 72 Indent int 73 } 74 75 func (e *ErrIncomplete) Error() string { 76 return "incomplete query, missing " + e.missing + " at " + e.pos.String() 77 } 78 79 // ErrIncorrect indicates an incorrect symbol was found in a query. 80 // For example: when users close an opening '(' with a ']' 81 type ErrIncorrect struct { 82 expected string 83 got string 84 pos lexer.Position 85 } 86 87 func (e *ErrIncorrect) Error() string { 88 return "expected " + e.expected + ", got '" + e.got + "' at " + e.pos.String() 89 } 90 91 var blockCall string = "{}" 92 93 // Expression at the root of mqlc 94 type Expression struct { 95 Operand *Operand `json:",omitempty"` 96 Operations []*Operation `json:",omitempty"` 97 } 98 99 // IsEmpty expression returns true if we don't contain any action (e.g. comment-only expressions) 100 func (x *Expression) IsEmpty() bool { 101 return len(x.Operations) == 0 && (x.Operand == nil || (x.Operand.Value == nil && len(x.Operand.Calls) == 0 && len(x.Operand.Block) == 0)) 102 } 103 104 // Operation has an operator and an operand 105 type Operation struct { 106 Operator Operator 107 Operand *Operand `json:",omitempty"` 108 } 109 110 // Operand is anything that produces a value 111 type Operand struct { 112 Comments string `json:",omitempty"` 113 Value *Value `json:",omitempty"` 114 Calls []*Call `json:",omitempty"` 115 Block []*Expression `json:",omitempty"` 116 } 117 118 // Value representation 119 type Value struct { 120 Bool *bool `json:",omitempty"` 121 String *string `json:",omitempty"` 122 Int *int64 `json:",omitempty"` 123 Float *float64 `json:",omitempty"` 124 Regex *string `json:",omitempty"` 125 Array []*Expression `json:",omitempty"` 126 Map map[string]*Expression `json:",omitempty"` 127 Ident *string `json:",omitempty"` 128 } 129 130 // Call to a value 131 type Call struct { 132 Comments string `json:",omitempty"` 133 Ident *string `json:",omitempty"` 134 Function []*Arg `json:",omitempty"` 135 Accessor *Expression `json:",omitempty"` 136 } 137 138 // Arg is a call argument 139 type Arg struct { 140 Name string 141 Value *Expression 142 } 143 144 // AST holds the parsed syntax tree 145 type AST struct { 146 Expressions []*Expression 147 } 148 149 var ( 150 trueBool bool = true 151 falseBool bool = false 152 neverRef = "Never" 153 154 trueValue = Value{Bool: &trueBool} 155 falseValue = Value{Bool: &falseBool} 156 nilValue = Value{} 157 nanRef = math.NaN() 158 nanValue = Value{Float: &nanRef} 159 infinityRef = math.Inf(1) 160 infinityValue = Value{Float: &infinityRef} 161 neverValue = Value{Ident: &neverRef} 162 ) 163 164 type parser struct { 165 token lexer.Token 166 nextTokens []lexer.Token 167 lex lexer.Lexer 168 comments bytes.Buffer 169 // indent indicates optimal indentation given strict formatting 170 // and using only tabs 171 indent int 172 } 173 174 // expected generates an error string based on the expected type/field 175 // and the actual value 176 func (p *parser) expected(typ string, in string) error { 177 name := tokenNames[p.token.Type] 178 if name == "" { 179 name = "token" 180 } 181 return p.error("expected "+typ+", got "+name+" \""+p.token.Value+"\"", in) 182 } 183 184 func (p *parser) error(msg string, in string) error { 185 return errors.New(msg + " at " + p.token.Pos.String() + " in function " + in) 186 } 187 188 func (p *parser) errorMsg(msg string) error { 189 return errors.New(msg + " at " + p.token.Pos.String()) 190 } 191 192 // nextToken loads the next token into p.token 193 func (p *parser) nextToken() error { 194 if p.nextTokens == nil { 195 var err error 196 197 for { 198 p.token, err = p.lex.Next() 199 if err != nil { 200 return err 201 } 202 if p.token.Type != Comment { 203 break 204 } 205 206 p.parseComment() 207 } 208 209 return nil 210 } 211 212 p.token = p.nextTokens[0] 213 if len(p.nextTokens) == 1 { 214 p.nextTokens = nil 215 } else { 216 p.nextTokens = p.nextTokens[1:] 217 } 218 219 return nil 220 } 221 222 func (p *parser) parseComment() { 223 // we only need the comment's body 224 if p.token.Value[0] == '#' { 225 if len(p.token.Value) != 1 && p.token.Value[1] == ' ' { 226 p.comments.WriteString(strings.TrimRight(p.token.Value[2:], " \t")) 227 } else { 228 p.comments.WriteString(strings.TrimRight(p.token.Value[1:], " \t")) 229 } 230 } else { 231 if len(p.token.Value) != 2 && p.token.Value[2] == ' ' { 232 p.comments.WriteString(strings.TrimRight(p.token.Value[3:], " \t")) 233 } else { 234 p.comments.WriteString(strings.TrimRight(p.token.Value[2:], " \t")) 235 } 236 } 237 } 238 239 func (p *parser) flushComments() string { 240 if p.comments.Len() == 0 { 241 return "" 242 } 243 244 res := p.comments.String() 245 p.comments.Reset() 246 return res 247 } 248 249 // rewind pushes the current token back on the stack and replaces it iwth the given token 250 func (p *parser) rewind(token lexer.Token) { 251 p.nextTokens = append(p.nextTokens, p.token) 252 p.token = token 253 } 254 255 var ( 256 reUnescape = regexp.MustCompile("\\\\.") 257 unescapeMap = map[string]string{ 258 "\\n": "\n", 259 "\\t": "\t", 260 "\\v": "\v", 261 "\\b": "\b", 262 "\\f": "\f", 263 "\\0": "\x00", 264 } 265 ) 266 267 func (p *parser) token2string() string { 268 v := p.token.Value 269 vv := v[1 : len(v)-1] 270 271 if v[0] == '\'' { 272 return vv 273 } 274 275 vv = reUnescape.ReplaceAllStringFunc(vv, func(match string) string { 276 if found := unescapeMap[match]; found != "" { 277 return found 278 } 279 return string(match[1]) 280 }) 281 return vv 282 } 283 284 func (p *parser) parseValue() (*Value, error) { 285 switch p.token.Type { 286 case Ident: 287 switch p.token.Value { 288 case "true": 289 return &trueValue, nil 290 case "false": 291 return &falseValue, nil 292 case "null": 293 return &nilValue, nil 294 case "NaN": 295 return &nanValue, nil 296 case "Infinity": 297 return &infinityValue, nil 298 case "Never": 299 return &neverValue, nil 300 default: 301 v := p.token.Value 302 return &Value{Ident: &v}, nil 303 } 304 305 case Float: 306 v, err := strconv.ParseFloat(p.token.Value, 64) 307 if err != nil { 308 return nil, p.errorMsg("failed to parse float: " + err.Error()) 309 } 310 return &Value{Float: &v}, nil 311 312 case Int: 313 var v int64 314 var err error 315 if p.token.Value[0] == '0' { 316 v, err = strconv.ParseInt(p.token.Value, 8, 64) 317 } else { 318 v, err = strconv.ParseInt(p.token.Value, 10, 64) 319 } 320 321 if err != nil { 322 return nil, p.errorMsg("failed to parse integer: " + err.Error()) 323 } 324 return &Value{Int: &v}, nil 325 326 case String: 327 vv := p.token2string() 328 return &Value{String: &vv}, nil 329 330 case Regex: 331 v := p.token.Value 332 333 reEnd := len(v) - 1 334 for ; reEnd > 1; reEnd-- { 335 if v[reEnd] == '/' { 336 break 337 } 338 } 339 340 // TODO: handling of escape sequences 341 vv := v[1:reEnd] 342 mods := v[reEnd+1:] 343 344 if mods != "" { 345 vv = "(?" + mods + ")" + vv 346 } 347 348 return &Value{Regex: &vv}, nil 349 350 } 351 return nil, nil 352 } 353 354 func (p *parser) parseArg() (*Arg, error) { 355 res := Arg{} 356 357 if p.token.Type == Ident { 358 name := p.token 359 p.nextToken() 360 361 if p.token.Value == ":" { 362 p.nextToken() 363 res.Name = name.Value 364 } else { 365 p.rewind(name) 366 } 367 } 368 369 exp, err := p.parseExpression() 370 if err != nil { 371 return nil, err 372 } 373 if exp == nil { 374 if res.Name != "" { 375 return nil, p.expected("argument", "parseArgument") 376 } 377 return nil, nil 378 } 379 res.Value = exp 380 return &res, nil 381 } 382 383 func (p *parser) parseArray() (*Value, error) { 384 res := Value{Array: []*Expression{}} 385 386 p.nextToken() 387 if p.token.Value == "]" { 388 return &res, nil 389 } 390 391 for { 392 exp, err := p.parseExpression() 393 if exp == nil { 394 return nil, p.expected("expression", "parseOperand-array") 395 } 396 if err != nil { 397 return nil, err 398 } 399 res.Array = append(res.Array, exp) 400 401 if p.token.Value == "]" { 402 break 403 } 404 if p.token.Value != "," { 405 return nil, p.expected(", or ]", "parseOperand") 406 } 407 408 p.nextToken() 409 410 // catch trailing commas, ie: [a, b, c, ] 411 if p.token.Value == "]" { 412 break 413 } 414 } 415 416 return &res, nil 417 } 418 419 func (p *parser) parseMap() (*Value, error) { 420 res := Value{ 421 Map: map[string]*Expression{}, 422 } 423 424 p.nextToken() 425 if p.token.Value == "}" { 426 return &res, nil 427 } 428 429 for { 430 var key string 431 432 switch p.token.Type { 433 case String: 434 key = p.token2string() 435 case Ident: 436 key = p.token.Value 437 default: 438 return nil, p.expected("string", "map key") 439 } 440 441 p.nextToken() 442 if p.token.Value != ":" || p.token.Type != Op { 443 return nil, p.expected(":", "after map key") 444 } 445 446 p.nextToken() 447 exp, err := p.parseExpression() 448 if exp == nil { 449 return nil, p.expected("expression", "parseOperand-map") 450 } 451 if err != nil { 452 return nil, err 453 } 454 res.Map[key] = exp 455 456 if p.token.Value == "}" { 457 break 458 } 459 if p.token.Value != "," { 460 return nil, p.expected(", or }", "parseOperand") 461 } 462 463 p.nextToken() 464 465 // catch trailing commas, ie: {a: 1,} 466 if p.token.Value == "}" { 467 break 468 } 469 } 470 471 return &res, nil 472 } 473 474 // parseOperand and return the operand, and true if the operand is standalone 475 func (p *parser) parseOperand() (*Operand, bool, error) { 476 // operand: value [ call | accessor | '.' ident ]+ [ block ] 477 value, err := p.parseValue() 478 if err != nil { 479 return nil, false, err 480 } 481 if value == nil { 482 // arrays 483 if p.token.Value == "[" { 484 value, err = p.parseArray() 485 if err != nil { 486 return nil, false, err 487 } 488 } 489 490 // maps 491 if p.token.Value == "{" { 492 value, err = p.parseMap() 493 if err != nil { 494 return nil, false, err 495 } 496 } 497 498 // glob all fields of a resource 499 // ie: resource { * } 500 if p.token.Value == "*" { 501 p.nextToken() 502 star := "*" 503 return &Operand{ 504 Value: &Value{ 505 Ident: &star, 506 }, 507 }, true, nil 508 } 509 } 510 511 if value == nil { 512 return nil, false, nil 513 } 514 515 if value.Ident != nil && *value.Ident == "return" { 516 p.nextToken() 517 return &Operand{Value: value}, true, nil 518 } 519 520 res := Operand{ 521 Comments: p.flushComments(), 522 Value: value, 523 } 524 p.nextToken() 525 526 for { 527 switch p.token.Value { 528 case ".": 529 p.nextToken() 530 531 // everything else must be an identifier 532 if p.token.Type != Ident { 533 v := "." 534 res.Calls = append(res.Calls, &Call{Ident: &v}) 535 536 if p.token.EOF() { 537 p.indent++ 538 return &res, false, &ErrIncomplete{missing: "identifier after '.'", pos: p.token.Pos, Indent: p.indent} 539 } 540 541 return &res, false, p.errorMsg("missing field accessor") 542 } 543 544 v := p.token.Value 545 res.Calls = append(res.Calls, &Call{ 546 Ident: &v, 547 Comments: p.flushComments(), 548 }) 549 p.nextToken() 550 551 case "(": 552 p.indent++ 553 p.nextToken() 554 args := []*Arg{} 555 556 for { 557 arg, err := p.parseArg() 558 if err != nil { 559 return nil, false, err 560 } 561 if arg == nil { 562 break 563 } 564 args = append(args, arg) 565 566 if p.token.Value == "," { 567 p.nextToken() 568 } 569 } 570 571 if p.token.Value != ")" { 572 if p.token.EOF() { 573 return nil, false, &ErrIncomplete{missing: "closing ')'", pos: p.token.Pos, Indent: p.indent} 574 } 575 return nil, false, &ErrIncorrect{expected: "closing ')'", got: p.token.Value, pos: p.token.Pos} 576 } 577 578 p.indent-- 579 res.Calls = append(res.Calls, &Call{Function: args}) 580 p.nextToken() 581 582 case "[": 583 p.indent++ 584 p.nextToken() 585 586 exp, err := p.parseExpression() 587 if err != nil { 588 return nil, false, err 589 } 590 591 if p.token.Value != "]" { 592 if p.token.EOF() { 593 return nil, false, &ErrIncomplete{missing: "closing ']'", pos: p.token.Pos, Indent: p.indent} 594 } 595 return nil, false, &ErrIncorrect{expected: "closing ']'", got: p.token.Value, pos: p.token.Pos} 596 } 597 598 p.indent-- 599 if exp == nil { 600 return nil, false, p.errorMsg("missing value inside of `[]`") 601 } 602 res.Calls = append(res.Calls, &Call{ 603 Accessor: exp, 604 }) 605 p.nextToken() 606 607 case "{": 608 p.indent++ 609 if res.Value.Ident != nil && *res.Value.Ident == "switch" { 610 p.nextToken() 611 612 for { 613 ident := p.token.Value 614 if ident == "}" { 615 break 616 } 617 618 if ident != "case" && ident != "default" { 619 return nil, false, errors.New("expected `case` or `default` statements in `switch` call, got `" + ident + "`") 620 } 621 p.nextToken() 622 623 if ident == "case" { 624 exp, err := p.parseExpression() 625 if err != nil { 626 return nil, false, err 627 } 628 if exp == nil { 629 return nil, false, errors.New("missing expression after `case` statement") 630 } 631 if err = exp.processOperators(); err != nil { 632 return nil, false, err 633 } 634 if exp == nil || (exp.Operand == nil && exp.Operations == nil) { 635 return nil, false, errors.New("missing expression after `case` statement") 636 } 637 res.Block = append(res.Block, exp) 638 } else { 639 // we still need to add the empty condition block 640 res.Block = append(res.Block, nil) 641 } 642 643 if p.token.Value != ":" { 644 return nil, false, errors.New("expected `:` in `" + ident + "` statement") 645 } 646 p.nextToken() 647 648 block := Expression{ 649 Operand: &Operand{ 650 Value: &Value{ 651 Ident: &blockCall, 652 }, 653 }, 654 } 655 656 for { 657 exp, err := p.parseExpression() 658 if err != nil { 659 return nil, false, err 660 } 661 if exp == nil || (exp.Operand == nil && exp.Operations == nil) { 662 break 663 } 664 block.Operand.Block = append(block.Operand.Block, exp) 665 if p.token.Value == "case" || p.token.Value == "default" { 666 break 667 } 668 } 669 670 if len(block.Operand.Block) == 0 { 671 return nil, false, errors.New("expected block following `" + ident + "` statement") 672 } 673 res.Block = append(res.Block, &block) 674 } 675 676 p.nextToken() 677 continue 678 } 679 680 p.nextToken() 681 block := []*Expression{} 682 683 for { 684 exp, err := p.parseExpression() 685 if err != nil { 686 return nil, false, err 687 } 688 if exp == nil || (exp.Operand == nil && exp.Operations == nil) { 689 break 690 } 691 block = append(block, exp) 692 } 693 694 res.Block = block 695 696 if p.token.Value != "}" { 697 if p.token.EOF() { 698 return &res, false, &ErrIncomplete{missing: "closing '}'", pos: p.token.Pos, Indent: p.indent} 699 } 700 return &res, false, &ErrIncorrect{expected: "closing '}'", got: p.token.Value, pos: p.token.Pos} 701 } 702 703 p.indent-- 704 p.nextToken() 705 706 default: 707 return &res, false, nil 708 } 709 } 710 } 711 712 func (p *parser) parseOperation() (*Operation, error) { 713 if p.token.Type != Op { 714 return nil, nil 715 } 716 717 res := Operation{} 718 switch p.token.Value { 719 case ";": 720 return nil, nil 721 case ":": 722 return nil, nil 723 case "&": 724 p.nextToken() 725 if p.token.Value == "&" { 726 res.Operator = OpAnd 727 p.nextToken() 728 } else { 729 return nil, p.expected("&&", "parseOperation") 730 } 731 case "|": 732 p.nextToken() 733 if p.token.Value == "|" { 734 res.Operator = OpOr 735 p.nextToken() 736 } else { 737 return nil, p.expected("||", "parseOperation") 738 } 739 case "=": 740 p.nextToken() 741 if p.token.Value == "=" { 742 res.Operator = OpEqual 743 p.nextToken() 744 } else if p.token.Value == "~" { 745 res.Operator = OpCmp 746 p.nextToken() 747 } else { 748 res.Operator = OpAssignment 749 } 750 case "!": 751 p.nextToken() 752 if p.token.Value == "=" { 753 res.Operator = OpNotEqual 754 p.nextToken() 755 } else if p.token.Value == "~" { 756 res.Operator = OpNotCmp 757 p.nextToken() 758 } else { 759 return nil, p.expected("!= or !~", "parseOperation") 760 } 761 case "<": 762 p.nextToken() 763 if p.token.Value == "=" { 764 res.Operator = OpSmallerEqual 765 p.nextToken() 766 } else { 767 res.Operator = OpSmaller 768 } 769 case ">": 770 p.nextToken() 771 if p.token.Value == "=" { 772 res.Operator = OpGreaterEqual 773 p.nextToken() 774 } else { 775 res.Operator = OpGreater 776 } 777 case "+": 778 res.Operator = OpAdd 779 p.nextToken() 780 case "-": 781 res.Operator = OpSubtract 782 p.nextToken() 783 case "*": 784 res.Operator = OpMultiply 785 p.nextToken() 786 case "/": 787 res.Operator = OpDivide 788 p.nextToken() 789 case "%": 790 res.Operator = OpRemainder 791 p.nextToken() 792 default: 793 return nil, errors.New("found unexpected operation '" + p.token.Value + "'") 794 } 795 796 op, _, err := p.parseOperand() 797 if err != nil { 798 return nil, err 799 } 800 if op == nil { 801 return nil, p.expected("operand", "parseOperation") 802 } 803 804 res.Operand = op 805 return &res, nil 806 } 807 808 func (p *parser) flushExpression() *Expression { 809 if p.comments.Len() == 0 { 810 return nil 811 } 812 813 return &Expression{ 814 Operand: &Operand{ 815 Comments: p.flushComments(), 816 }, 817 } 818 } 819 820 func (p *parser) parseExpression() (*Expression, error) { 821 if p.token.EOF() { 822 return p.flushExpression(), nil 823 } 824 825 res := Expression{} 826 var err error 827 var standalone bool 828 829 // expression: operand [ op operand ]+ 830 res.Operand, standalone, err = p.parseOperand() 831 if err != nil { 832 return &res, err 833 } 834 if standalone { 835 return &res, err 836 } 837 838 var operation *Operation 839 for { 840 if p.token.Value == "," { 841 break 842 } 843 844 operation, err = p.parseOperation() 845 if operation == nil { 846 break 847 } 848 res.Operations = append(res.Operations, operation) 849 } 850 851 if res.Operand == nil && res.Operations == nil { 852 return p.flushExpression(), err 853 } 854 855 for p.token.Value == ";" { 856 p.nextToken() 857 } 858 859 return &res, err 860 } 861 862 // Parse an input string into an AST 863 func Parse(input string) (*AST, error) { 864 lex, err := mqlLexer.Lex(strings.NewReader(input)) 865 if err != nil { 866 return nil, err 867 } 868 res := AST{} 869 870 thisParser := parser{ 871 lex: lex, 872 } 873 874 err = thisParser.nextToken() 875 if err != nil { 876 return nil, err 877 } 878 if thisParser.token.EOF() { 879 return &res, nil 880 } 881 882 var exp *Expression 883 for { 884 exp, err = thisParser.parseExpression() 885 if exp == nil { 886 break 887 } 888 889 res.Expressions = append(res.Expressions, exp) 890 if err != nil { 891 break 892 } 893 894 if thisParser.token.Value == ";" { 895 err = thisParser.nextToken() 896 if err != nil { 897 return &res, err 898 } 899 } 900 901 if thisParser.token.Value != "" && thisParser.token.Type == CallType && thisParser.token.Value != "[" && thisParser.token.Value != "{" { 902 return &res, errors.New("mismatched symbol '" + thisParser.token.Value + "' at the end of expression") 903 } 904 } 905 906 return &res, err 907 } 908 909 // Lex the input mqlc string to a list of tokens 910 func Lex(input string) ([]lexer.Token, error) { 911 res := []lexer.Token{} 912 lex, err := mqlLexer.Lex(strings.NewReader(input)) 913 if err != nil { 914 return res, err 915 } 916 917 token, err := lex.Next() 918 if err != nil { 919 return res, err 920 } 921 922 for !token.EOF() { 923 if token.Type != Comment { 924 res = append(res, token) 925 } 926 927 token, err = lex.Next() 928 if err != nil { 929 return res, err 930 } 931 } 932 return res, nil 933 }