github.com/rajeev159/opa@v0.45.0/ast/parser.go (about) 1 // Copyright 2020 The OPA Authors. All rights reserved. 2 // Use of this source code is governed by an Apache2 3 // license that can be found in the LICENSE file. 4 5 package ast 6 7 import ( 8 "bytes" 9 "encoding/json" 10 "fmt" 11 "io" 12 "math/big" 13 "net/url" 14 "regexp" 15 "sort" 16 "strconv" 17 "strings" 18 19 "gopkg.in/yaml.v3" 20 21 "github.com/open-policy-agent/opa/ast/internal/scanner" 22 "github.com/open-policy-agent/opa/ast/internal/tokens" 23 "github.com/open-policy-agent/opa/ast/location" 24 ) 25 26 // Note: This state is kept isolated from the parser so that we 27 // can do efficient shallow copies of these values when doing a 28 // save() and restore(). 29 type state struct { 30 s *scanner.Scanner 31 lastEnd int 32 skippedNL bool 33 tok tokens.Token 34 tokEnd int 35 lit string 36 loc Location 37 errors Errors 38 hints []string 39 comments []*Comment 40 wildcard int 41 } 42 43 func (s *state) String() string { 44 return fmt.Sprintf("<s: %v, tok: %v, lit: %q, loc: %v, errors: %d, comments: %d>", s.s, s.tok, s.lit, s.loc, len(s.errors), len(s.comments)) 45 } 46 47 func (s *state) Loc() *location.Location { 48 cpy := s.loc 49 return &cpy 50 } 51 52 func (s *state) Text(offset, end int) []byte { 53 bs := s.s.Bytes() 54 if offset >= 0 && offset < len(bs) { 55 if end >= offset && end <= len(bs) { 56 return bs[offset:end] 57 } 58 } 59 return nil 60 } 61 62 // Parser is used to parse Rego statements. 63 type Parser struct { 64 r io.Reader 65 s *state 66 po ParserOptions 67 cache parsedTermCache 68 } 69 70 type parsedTermCacheItem struct { 71 t *Term 72 post *state // post is the post-state that's restored on a cache-hit 73 offset int 74 next *parsedTermCacheItem 75 } 76 77 type parsedTermCache struct { 78 m *parsedTermCacheItem 79 } 80 81 func (c parsedTermCache) String() string { 82 s := strings.Builder{} 83 s.WriteRune('{') 84 var e *parsedTermCacheItem 85 for e = c.m; e != nil; e = e.next { 86 fmt.Fprintf(&s, "%v", e) 87 } 88 s.WriteRune('}') 89 return s.String() 90 } 91 92 func (e *parsedTermCacheItem) String() string { 93 return fmt.Sprintf("<%d:%v>", e.offset, e.t) 94 } 95 96 // ParserOptions defines the options for parsing Rego statements. 97 type ParserOptions struct { 98 Capabilities *Capabilities 99 ProcessAnnotation bool 100 AllFutureKeywords bool 101 FutureKeywords []string 102 unreleasedKeywords bool // TODO(sr): cleanup 103 } 104 105 // NewParser creates and initializes a Parser. 106 func NewParser() *Parser { 107 p := &Parser{ 108 s: &state{}, 109 po: ParserOptions{}, 110 } 111 return p 112 } 113 114 // WithFilename provides the filename for Location details 115 // on parsed statements. 116 func (p *Parser) WithFilename(filename string) *Parser { 117 p.s.loc.File = filename 118 return p 119 } 120 121 // WithReader provides the io.Reader that the parser will 122 // use as its source. 123 func (p *Parser) WithReader(r io.Reader) *Parser { 124 p.r = r 125 return p 126 } 127 128 // WithProcessAnnotation enables or disables the processing of 129 // annotations by the Parser 130 func (p *Parser) WithProcessAnnotation(processAnnotation bool) *Parser { 131 p.po.ProcessAnnotation = processAnnotation 132 return p 133 } 134 135 // WithFutureKeywords enables "future" keywords, i.e., keywords that can 136 // be imported via 137 // 138 // import future.keywords.kw 139 // import future.keywords.other 140 // 141 // but in a more direct way. The equivalent of this import would be 142 // 143 // WithFutureKeywords("kw", "other") 144 func (p *Parser) WithFutureKeywords(kws ...string) *Parser { 145 p.po.FutureKeywords = kws 146 return p 147 } 148 149 // WithAllFutureKeywords enables all "future" keywords, i.e., the 150 // ParserOption equivalent of 151 // 152 // import future.keywords 153 func (p *Parser) WithAllFutureKeywords(yes bool) *Parser { 154 p.po.AllFutureKeywords = yes 155 return p 156 } 157 158 // withUnreleasedKeywords allows using keywords that haven't surfaced 159 // as future keywords (see above) yet, but have tests that require 160 // them to be parsed 161 func (p *Parser) withUnreleasedKeywords(yes bool) *Parser { 162 p.po.unreleasedKeywords = yes 163 return p 164 } 165 166 // WithCapabilities sets the capabilities structure on the parser. 167 func (p *Parser) WithCapabilities(c *Capabilities) *Parser { 168 p.po.Capabilities = c 169 return p 170 } 171 172 func (p *Parser) parsedTermCacheLookup() (*Term, *state) { 173 l := p.s.loc.Offset 174 // stop comparing once the cached offsets are lower than l 175 for h := p.cache.m; h != nil && h.offset >= l; h = h.next { 176 if h.offset == l { 177 return h.t, h.post 178 } 179 } 180 return nil, nil 181 } 182 183 func (p *Parser) parsedTermCachePush(t *Term, s0 *state) { 184 s1 := p.save() 185 o0 := s0.loc.Offset 186 entry := parsedTermCacheItem{t: t, post: s1, offset: o0} 187 188 // find the first one whose offset is smaller than ours 189 var e *parsedTermCacheItem 190 for e = p.cache.m; e != nil; e = e.next { 191 if e.offset < o0 { 192 break 193 } 194 } 195 entry.next = e 196 p.cache.m = &entry 197 } 198 199 // futureParser returns a shallow copy of `p` with an empty 200 // cache, and a scanner that knows all future keywords. 201 // It's used to present hints in errors, when statements would 202 // only parse successfully if some future keyword is enabled. 203 func (p *Parser) futureParser() *Parser { 204 q := *p 205 q.s = p.save() 206 q.s.s = p.s.s.WithKeywords(futureKeywords) 207 q.cache = parsedTermCache{} 208 return &q 209 } 210 211 // presentParser returns a shallow copy of `p` with an empty 212 // cache, and a scanner that knows none of the future keywords. 213 // It is used to successfully parse keyword imports, like 214 // 215 // import future.keywords.in 216 // 217 // even when the parser has already been informed about the 218 // future keyword "in". This parser won't error out because 219 // "in" is an identifier. 220 func (p *Parser) presentParser() (*Parser, map[string]tokens.Token) { 221 var cpy map[string]tokens.Token 222 q := *p 223 q.s = p.save() 224 q.s.s, cpy = p.s.s.WithoutKeywords(futureKeywords) 225 q.cache = parsedTermCache{} 226 return &q, cpy 227 } 228 229 // Parse will read the Rego source and parse statements and 230 // comments as they are found. Any errors encountered while 231 // parsing will be accumulated and returned as a list of Errors. 232 func (p *Parser) Parse() ([]Statement, []*Comment, Errors) { 233 234 if p.po.Capabilities == nil { 235 p.po.Capabilities = CapabilitiesForThisVersion() 236 } 237 238 allowedFutureKeywords := map[string]tokens.Token{} 239 240 for _, kw := range p.po.Capabilities.FutureKeywords { 241 var ok bool 242 allowedFutureKeywords[kw], ok = futureKeywords[kw] 243 if !ok { 244 return nil, nil, Errors{ 245 &Error{ 246 Code: ParseErr, 247 Message: fmt.Sprintf("illegal capabilities: unknown keyword: %v", kw), 248 Location: nil, 249 }, 250 } 251 } 252 } 253 254 var err error 255 p.s.s, err = scanner.New(p.r) 256 if err != nil { 257 return nil, nil, Errors{ 258 &Error{ 259 Code: ParseErr, 260 Message: err.Error(), 261 Location: nil, 262 }, 263 } 264 } 265 266 selected := map[string]tokens.Token{} 267 if p.po.AllFutureKeywords { 268 for kw, tok := range allowedFutureKeywords { 269 selected[kw] = tok 270 } 271 } else { 272 for _, kw := range p.po.FutureKeywords { 273 tok, ok := allowedFutureKeywords[kw] 274 if !ok { 275 return nil, nil, Errors{ 276 &Error{ 277 Code: ParseErr, 278 Message: fmt.Sprintf("unknown future keyword: %v", kw), 279 Location: nil, 280 }, 281 } 282 } 283 selected[kw] = tok 284 } 285 } 286 p.s.s = p.s.s.WithKeywords(selected) 287 288 // read the first token to initialize the parser 289 p.scan() 290 291 var stmts []Statement 292 293 // Read from the scanner until the last token is reached or no statements 294 // can be parsed. Attempt to parse package statements, import statements, 295 // rule statements, and then body/query statements (in that order). If a 296 // statement cannot be parsed, restore the parser state before trying the 297 // next type of statement. If a statement can be parsed, continue from that 298 // point trying to parse packages, imports, etc. in the same order. 299 for p.s.tok != tokens.EOF { 300 301 s := p.save() 302 303 if pkg := p.parsePackage(); pkg != nil { 304 stmts = append(stmts, pkg) 305 continue 306 } else if len(p.s.errors) > 0 { 307 break 308 } 309 310 p.restore(s) 311 s = p.save() 312 313 if imp := p.parseImport(); imp != nil { 314 if FutureRootDocument.Equal(imp.Path.Value.(Ref)[0]) { 315 p.futureImport(imp, allowedFutureKeywords) 316 } 317 stmts = append(stmts, imp) 318 continue 319 } else if len(p.s.errors) > 0 { 320 break 321 } 322 323 p.restore(s) 324 s = p.save() 325 326 if rules := p.parseRules(); rules != nil { 327 for i := range rules { 328 stmts = append(stmts, rules[i]) 329 } 330 continue 331 } else if len(p.s.errors) > 0 { 332 break 333 } 334 335 p.restore(s) 336 337 if body := p.parseQuery(true, tokens.EOF); body != nil { 338 stmts = append(stmts, body) 339 continue 340 } 341 342 break 343 } 344 345 if p.po.ProcessAnnotation { 346 stmts = p.parseAnnotations(stmts) 347 } 348 349 return stmts, p.s.comments, p.s.errors 350 } 351 352 func (p *Parser) parseAnnotations(stmts []Statement) []Statement { 353 354 annotStmts, errs := parseAnnotations(p.s.comments) 355 for _, err := range errs { 356 p.error(err.Location, err.Message) 357 } 358 359 for _, annotStmt := range annotStmts { 360 stmts = append(stmts, annotStmt) 361 } 362 363 return stmts 364 } 365 366 func parseAnnotations(comments []*Comment) ([]*Annotations, Errors) { 367 368 var hint = []byte("METADATA") 369 var curr *metadataParser 370 var blocks []*metadataParser 371 372 for i := 0; i < len(comments); i++ { 373 if curr != nil { 374 if comments[i].Location.Row == comments[i-1].Location.Row+1 && comments[i].Location.Col == 1 { 375 curr.Append(comments[i]) 376 continue 377 } 378 curr = nil 379 } 380 if bytes.HasPrefix(bytes.TrimSpace(comments[i].Text), hint) { 381 curr = newMetadataParser(comments[i].Location) 382 blocks = append(blocks, curr) 383 } 384 } 385 386 var stmts []*Annotations 387 var errs Errors 388 for _, b := range blocks { 389 a, err := b.Parse() 390 if err != nil { 391 errs = append(errs, &Error{ 392 Code: ParseErr, 393 Message: err.Error(), 394 Location: b.loc, 395 }) 396 } else { 397 stmts = append(stmts, a) 398 } 399 } 400 401 return stmts, errs 402 } 403 404 func (p *Parser) parsePackage() *Package { 405 406 var pkg Package 407 pkg.SetLoc(p.s.Loc()) 408 409 if p.s.tok != tokens.Package { 410 return nil 411 } 412 413 p.scan() 414 if p.s.tok != tokens.Ident { 415 p.illegalToken() 416 return nil 417 } 418 419 term := p.parseTerm() 420 421 if term != nil { 422 switch v := term.Value.(type) { 423 case Var: 424 pkg.Path = Ref{ 425 DefaultRootDocument.Copy().SetLocation(term.Location), 426 StringTerm(string(v)).SetLocation(term.Location), 427 } 428 case Ref: 429 pkg.Path = make(Ref, len(v)+1) 430 pkg.Path[0] = DefaultRootDocument.Copy().SetLocation(v[0].Location) 431 first, ok := v[0].Value.(Var) 432 if !ok { 433 p.errorf(v[0].Location, "unexpected %v token: expecting var", TypeName(v[0].Value)) 434 return nil 435 } 436 pkg.Path[1] = StringTerm(string(first)).SetLocation(v[0].Location) 437 for i := 2; i < len(pkg.Path); i++ { 438 switch v[i-1].Value.(type) { 439 case String: 440 pkg.Path[i] = v[i-1] 441 default: 442 p.errorf(v[i-1].Location, "unexpected %v token: expecting string", TypeName(v[i-1].Value)) 443 return nil 444 } 445 } 446 default: 447 p.illegalToken() 448 return nil 449 } 450 } 451 452 if pkg.Path == nil { 453 if len(p.s.errors) == 0 { 454 p.error(p.s.Loc(), "expected path") 455 } 456 return nil 457 } 458 459 return &pkg 460 } 461 462 func (p *Parser) parseImport() *Import { 463 464 var imp Import 465 imp.SetLoc(p.s.Loc()) 466 467 if p.s.tok != tokens.Import { 468 return nil 469 } 470 471 p.scan() 472 if p.s.tok != tokens.Ident { 473 p.error(p.s.Loc(), "expected ident") 474 return nil 475 } 476 q, prev := p.presentParser() 477 term := q.parseTerm() 478 if term != nil { 479 switch v := term.Value.(type) { 480 case Var: 481 imp.Path = RefTerm(term).SetLocation(term.Location) 482 case Ref: 483 for i := 1; i < len(v); i++ { 484 if _, ok := v[i].Value.(String); !ok { 485 p.errorf(v[i].Location, "unexpected %v token: expecting string", TypeName(v[i].Value)) 486 return nil 487 } 488 } 489 imp.Path = term 490 } 491 } 492 // keep advanced parser state, reset known keywords 493 p.s = q.s 494 p.s.s = q.s.s.WithKeywords(prev) 495 496 if imp.Path == nil { 497 p.error(p.s.Loc(), "expected path") 498 return nil 499 } 500 501 path := imp.Path.Value.(Ref) 502 503 if !RootDocumentNames.Contains(path[0]) && !FutureRootDocument.Equal(path[0]) { 504 p.errorf(imp.Path.Location, "unexpected import path, must begin with one of: %v, got: %v", 505 RootDocumentNames.Union(NewSet(FutureRootDocument)), 506 path[0]) 507 return nil 508 } 509 510 if p.s.tok == tokens.As { 511 p.scan() 512 513 if p.s.tok != tokens.Ident { 514 p.illegal("expected var") 515 return nil 516 } 517 518 if alias := p.parseTerm(); alias != nil { 519 v, ok := alias.Value.(Var) 520 if ok { 521 imp.Alias = v 522 return &imp 523 } 524 } 525 p.illegal("expected var") 526 return nil 527 } 528 529 return &imp 530 } 531 532 func (p *Parser) parseRules() []*Rule { 533 534 var rule Rule 535 rule.SetLoc(p.s.Loc()) 536 537 if p.s.tok == tokens.Default { 538 p.scan() 539 rule.Default = true 540 } 541 542 if p.s.tok != tokens.Ident { 543 return nil 544 } 545 546 usesContains := false 547 if rule.Head, usesContains = p.parseHead(rule.Default); rule.Head == nil { 548 return nil 549 } 550 551 if rule.Default { 552 if !p.validateDefaultRuleValue(&rule) { 553 return nil 554 } 555 556 rule.Body = NewBody(NewExpr(BooleanTerm(true).SetLocation(rule.Location)).SetLocation(rule.Location)) 557 return []*Rule{&rule} 558 } 559 560 hasIf := false 561 if p.s.tok == tokens.If { 562 hasIf = true 563 } 564 565 if hasIf && !usesContains && rule.Head.Key != nil && rule.Head.Value == nil { 566 p.illegal("invalid for partial set rule %s (use `contains`)", rule.Head.Name) 567 return nil 568 } 569 570 switch { 571 case hasIf: 572 p.scan() 573 s := p.save() 574 if expr := p.parseLiteral(); expr != nil { 575 // NOTE(sr): set literals are never false or undefined, so parsing this as 576 // p if { true } 577 // ^^^^^^^^ set of one element, `true` 578 // isn't valid. 579 isSetLiteral := false 580 if t, ok := expr.Terms.(*Term); ok { 581 _, isSetLiteral = t.Value.(Set) 582 } 583 // expr.Term is []*Term or Every 584 if !isSetLiteral { 585 rule.Body.Append(expr) 586 break 587 } 588 } 589 590 // parsing as literal didn't work out, expect '{ BODY }' 591 p.restore(s) 592 fallthrough 593 594 case p.s.tok == tokens.LBrace: 595 p.scan() 596 if rule.Body = p.parseBody(tokens.RBrace); rule.Body == nil { 597 return nil 598 } 599 p.scan() 600 601 case usesContains: 602 rule.Body = NewBody(NewExpr(BooleanTerm(true).SetLocation(rule.Location)).SetLocation(rule.Location)) 603 return []*Rule{&rule} 604 605 default: 606 return nil 607 } 608 609 if p.s.tok == tokens.Else { 610 611 if rule.Head.Key != nil { 612 p.error(p.s.Loc(), "else keyword cannot be used on partial rules") 613 return nil 614 } 615 616 if rule.Else = p.parseElse(rule.Head); rule.Else == nil { 617 return nil 618 } 619 } 620 621 rule.Location.Text = p.s.Text(rule.Location.Offset, p.s.lastEnd) 622 623 var rules []*Rule 624 625 rules = append(rules, &rule) 626 627 for p.s.tok == tokens.LBrace { 628 629 if rule.Else != nil { 630 p.error(p.s.Loc(), "expected else keyword") 631 return nil 632 } 633 634 loc := p.s.Loc() 635 636 p.scan() 637 var next Rule 638 639 if next.Body = p.parseBody(tokens.RBrace); next.Body == nil { 640 return nil 641 } 642 p.scan() 643 644 loc.Text = p.s.Text(loc.Offset, p.s.lastEnd) 645 next.SetLoc(loc) 646 647 // Chained rule head's keep the original 648 // rule's head AST but have their location 649 // set to the rule body. 650 next.Head = rule.Head.Copy() 651 setLocRecursive(next.Head, loc) 652 653 rules = append(rules, &next) 654 } 655 656 return rules 657 } 658 659 func (p *Parser) parseElse(head *Head) *Rule { 660 661 var rule Rule 662 rule.SetLoc(p.s.Loc()) 663 664 rule.Head = head.Copy() 665 rule.Head.SetLoc(p.s.Loc()) 666 667 defer func() { 668 rule.Location.Text = p.s.Text(rule.Location.Offset, p.s.lastEnd) 669 }() 670 671 p.scan() 672 673 switch p.s.tok { 674 case tokens.LBrace: 675 rule.Head.Value = BooleanTerm(true) 676 case tokens.Assign, tokens.Unify: 677 p.scan() 678 rule.Head.Value = p.parseTermInfixCall() 679 if rule.Head.Value == nil { 680 return nil 681 } 682 rule.Head.Location.Text = p.s.Text(rule.Head.Location.Offset, p.s.lastEnd) 683 default: 684 p.illegal("expected else value term or rule body") 685 return nil 686 } 687 688 if p.s.tok != tokens.LBrace { 689 rule.Body = NewBody(NewExpr(BooleanTerm(true))) 690 setLocRecursive(rule.Body, rule.Location) 691 return &rule 692 } 693 694 p.scan() 695 696 if rule.Body = p.parseBody(tokens.RBrace); rule.Body == nil { 697 return nil 698 } 699 700 p.scan() 701 702 if p.s.tok == tokens.Else { 703 if rule.Else = p.parseElse(head); rule.Else == nil { 704 return nil 705 } 706 } 707 return &rule 708 } 709 710 func (p *Parser) parseHead(defaultRule bool) (*Head, bool) { 711 712 var head Head 713 head.SetLoc(p.s.Loc()) 714 715 defer func() { 716 head.Location.Text = p.s.Text(head.Location.Offset, p.s.lastEnd) 717 }() 718 719 if term := p.parseVar(); term != nil { 720 head.Name = term.Value.(Var) 721 } else { 722 p.illegal("expected rule head name") 723 } 724 725 p.scan() 726 727 if p.s.tok == tokens.LParen { 728 p.scan() 729 if p.s.tok != tokens.RParen { 730 head.Args = p.parseTermList(tokens.RParen, nil) 731 if head.Args == nil { 732 return nil, false 733 } 734 } 735 p.scan() 736 737 if p.s.tok == tokens.LBrack { 738 return nil, false 739 } 740 } 741 742 if p.s.tok == tokens.LBrack { 743 p.scan() 744 head.Key = p.parseTermInfixCall() 745 if head.Key == nil { 746 p.illegal("expected rule key term (e.g., %s[<VALUE>] { ... })", head.Name) 747 } 748 if p.s.tok != tokens.RBrack { 749 if _, ok := futureKeywords[head.Name.String()]; ok { 750 p.hint("`import future.keywords.%[1]s` for '%[1]s' keyword", head.Name.String()) 751 } 752 p.illegal("non-terminated rule key") 753 } 754 p.scan() 755 } 756 757 switch p.s.tok { 758 case tokens.Contains: 759 p.scan() 760 head.Key = p.parseTermInfixCall() 761 if head.Key == nil { 762 p.illegal("expected rule key term (e.g., %s contains <VALUE> { ... })", head.Name) 763 } 764 765 return &head, true 766 case tokens.Unify: 767 p.scan() 768 head.Value = p.parseTermInfixCall() 769 if head.Value == nil { 770 p.illegal("expected rule value term (e.g., %s[%s] = <VALUE> { ... })", head.Name, head.Key) 771 } 772 773 case tokens.Assign: 774 s := p.save() 775 p.scan() 776 head.Assign = true 777 head.Value = p.parseTermInfixCall() 778 if head.Value == nil { 779 p.restore(s) 780 switch { 781 case len(head.Args) > 0: 782 p.illegal("expected function value term (e.g., %s(...) := <VALUE> { ... })", head.Name) 783 case head.Key != nil: 784 p.illegal("expected partial rule value term (e.g., %s[...] := <VALUE> { ... })", head.Name) 785 case defaultRule: 786 p.illegal("expected default rule value term (e.g., default %s := <VALUE>)", head.Name) 787 default: 788 p.illegal("expected rule value term (e.g., %s := <VALUE> { ... })", head.Name) 789 } 790 } 791 } 792 793 if head.Value == nil && head.Key == nil { 794 head.Value = BooleanTerm(true).SetLocation(head.Location) 795 } 796 797 return &head, false 798 } 799 800 func (p *Parser) parseBody(end tokens.Token) Body { 801 return p.parseQuery(false, end) 802 } 803 804 func (p *Parser) parseQuery(requireSemi bool, end tokens.Token) Body { 805 body := Body{} 806 807 if p.s.tok == end { 808 p.error(p.s.Loc(), "found empty body") 809 return nil 810 } 811 812 for { 813 814 expr := p.parseLiteral() 815 if expr == nil { 816 return nil 817 } 818 819 body.Append(expr) 820 821 if p.s.tok == tokens.Semicolon { 822 p.scan() 823 continue 824 } 825 826 if p.s.tok == end || requireSemi { 827 return body 828 } 829 830 if !p.s.skippedNL { 831 // If there was already an error then don't pile this one on 832 if len(p.s.errors) == 0 { 833 p.illegal(`expected \n or %s or %s`, tokens.Semicolon, end) 834 } 835 return nil 836 } 837 } 838 } 839 840 func (p *Parser) parseLiteral() (expr *Expr) { 841 842 offset := p.s.loc.Offset 843 loc := p.s.Loc() 844 845 defer func() { 846 if expr != nil { 847 loc.Text = p.s.Text(offset, p.s.lastEnd) 848 expr.SetLoc(loc) 849 } 850 }() 851 852 var negated bool 853 if p.s.tok == tokens.Not { 854 p.scan() 855 negated = true 856 } 857 858 switch p.s.tok { 859 case tokens.Some: 860 if negated { 861 p.illegal("illegal negation of 'some'") 862 return nil 863 } 864 return p.parseSome() 865 case tokens.Every: 866 if negated { 867 p.illegal("illegal negation of 'every'") 868 return nil 869 } 870 return p.parseEvery() 871 default: 872 s := p.save() 873 expr := p.parseExpr() 874 if expr != nil { 875 expr.Negated = negated 876 if p.s.tok == tokens.With { 877 if expr.With = p.parseWith(); expr.With == nil { 878 return nil 879 } 880 } 881 // If we find a plain `every` identifier, attempt to parse an every expression, 882 // add hint if it succeeds. 883 if term, ok := expr.Terms.(*Term); ok && Var("every").Equal(term.Value) { 884 var hint bool 885 t := p.save() 886 p.restore(s) 887 if expr := p.futureParser().parseEvery(); expr != nil { 888 _, hint = expr.Terms.(*Every) 889 } 890 p.restore(t) 891 if hint { 892 p.hint("`import future.keywords.every` for `every x in xs { ... }` expressions") 893 } 894 } 895 return expr 896 } 897 return nil 898 } 899 } 900 901 func (p *Parser) parseWith() []*With { 902 903 withs := []*With{} 904 905 for { 906 907 with := With{ 908 Location: p.s.Loc(), 909 } 910 p.scan() 911 912 if p.s.tok != tokens.Ident { 913 p.illegal("expected ident") 914 return nil 915 } 916 917 with.Target = p.parseTerm() 918 if with.Target == nil { 919 return nil 920 } 921 922 switch with.Target.Value.(type) { 923 case Ref, Var: 924 break 925 default: 926 p.illegal("expected with target path") 927 } 928 929 if p.s.tok != tokens.As { 930 p.illegal("expected as keyword") 931 return nil 932 } 933 934 p.scan() 935 936 if with.Value = p.parseTermInfixCall(); with.Value == nil { 937 return nil 938 } 939 940 with.Location.Text = p.s.Text(with.Location.Offset, p.s.lastEnd) 941 942 withs = append(withs, &with) 943 944 if p.s.tok != tokens.With { 945 break 946 } 947 } 948 949 return withs 950 } 951 952 func (p *Parser) parseSome() *Expr { 953 954 decl := &SomeDecl{} 955 decl.SetLoc(p.s.Loc()) 956 957 // Attempt to parse "some x in xs", which will end up in 958 // SomeDecl{Symbols: ["member(x, xs)"]} 959 s := p.save() 960 p.scan() 961 if term := p.parseTermInfixCall(); term != nil { 962 if call, ok := term.Value.(Call); ok { 963 switch call[0].String() { 964 case Member.Name: 965 if len(call) != 3 { 966 p.illegal("illegal domain") 967 return nil 968 } 969 case MemberWithKey.Name: 970 if len(call) != 4 { 971 p.illegal("illegal domain") 972 return nil 973 } 974 default: 975 p.illegal("expected `x in xs` or `x, y in xs` expression") 976 return nil 977 } 978 979 decl.Symbols = []*Term{term} 980 expr := NewExpr(decl).SetLocation(decl.Location) 981 if p.s.tok == tokens.With { 982 if expr.With = p.parseWith(); expr.With == nil { 983 return nil 984 } 985 } 986 return expr 987 } 988 } 989 990 p.restore(s) 991 s = p.save() // new copy for later 992 var hint bool 993 p.scan() 994 if term := p.futureParser().parseTermInfixCall(); term != nil { 995 if call, ok := term.Value.(Call); ok { 996 switch call[0].String() { 997 case Member.Name, MemberWithKey.Name: 998 hint = true 999 } 1000 } 1001 } 1002 1003 // go on as before, it's `some x[...]` or illegal 1004 p.restore(s) 1005 if hint { 1006 p.hint("`import future.keywords.in` for `some x in xs` expressions") 1007 } 1008 1009 for { // collecting var args 1010 1011 p.scan() 1012 1013 if p.s.tok != tokens.Ident { 1014 p.illegal("expected var") 1015 return nil 1016 } 1017 1018 decl.Symbols = append(decl.Symbols, p.parseVar()) 1019 1020 p.scan() 1021 1022 if p.s.tok != tokens.Comma { 1023 break 1024 } 1025 } 1026 1027 return NewExpr(decl).SetLocation(decl.Location) 1028 } 1029 1030 func (p *Parser) parseEvery() *Expr { 1031 qb := &Every{} 1032 qb.SetLoc(p.s.Loc()) 1033 1034 // TODO(sr): We'd get more accurate error messages if we didn't rely on 1035 // parseTermInfixCall here, but parsed "var [, var] in term" manually. 1036 p.scan() 1037 term := p.parseTermInfixCall() 1038 if term == nil { 1039 return nil 1040 } 1041 call, ok := term.Value.(Call) 1042 if !ok { 1043 p.illegal("expected `x[, y] in xs { ... }` expression") 1044 return nil 1045 } 1046 switch call[0].String() { 1047 case Member.Name: // x in xs 1048 if len(call) != 3 { 1049 p.illegal("illegal domain") 1050 return nil 1051 } 1052 qb.Value = call[1] 1053 qb.Domain = call[2] 1054 case MemberWithKey.Name: // k, v in xs 1055 if len(call) != 4 { 1056 p.illegal("illegal domain") 1057 return nil 1058 } 1059 qb.Key = call[1] 1060 qb.Value = call[2] 1061 qb.Domain = call[3] 1062 if _, ok := qb.Key.Value.(Var); !ok { 1063 p.illegal("expected key to be a variable") 1064 return nil 1065 } 1066 default: 1067 p.illegal("expected `x[, y] in xs { ... }` expression") 1068 return nil 1069 } 1070 if _, ok := qb.Value.Value.(Var); !ok { 1071 p.illegal("expected value to be a variable") 1072 return nil 1073 } 1074 if p.s.tok == tokens.LBrace { // every x in xs { ... } 1075 p.scan() 1076 body := p.parseBody(tokens.RBrace) 1077 if body == nil { 1078 return nil 1079 } 1080 p.scan() 1081 qb.Body = body 1082 expr := NewExpr(qb).SetLocation(qb.Location) 1083 1084 if p.s.tok == tokens.With { 1085 if expr.With = p.parseWith(); expr.With == nil { 1086 return nil 1087 } 1088 } 1089 return expr 1090 } 1091 1092 p.illegal("missing body") 1093 return nil 1094 } 1095 1096 func (p *Parser) parseExpr() *Expr { 1097 1098 lhs := p.parseTermInfixCall() 1099 if lhs == nil { 1100 return nil 1101 } 1102 1103 if op := p.parseTermOp(tokens.Assign, tokens.Unify); op != nil { 1104 if rhs := p.parseTermInfixCall(); rhs != nil { 1105 return NewExpr([]*Term{op, lhs, rhs}) 1106 } 1107 return nil 1108 } 1109 1110 // NOTE(tsandall): the top-level call term is converted to an expr because 1111 // the evaluator does not support the call term type (nested calls are 1112 // rewritten by the compiler.) 1113 if call, ok := lhs.Value.(Call); ok { 1114 return NewExpr([]*Term(call)) 1115 } 1116 1117 return NewExpr(lhs) 1118 } 1119 1120 // parseTermInfixCall consumes the next term from the input and returns it. If a 1121 // term cannot be parsed the return value is nil and error will be recorded. The 1122 // scanner will be advanced to the next token before returning. 1123 // By starting out with infix relations (==, !=, <, etc) and further calling the 1124 // other binary operators (|, &, arithmetics), it constitutes the binding 1125 // precedence. 1126 func (p *Parser) parseTermInfixCall() *Term { 1127 return p.parseTermIn(nil, true, p.s.loc.Offset) 1128 } 1129 1130 func (p *Parser) parseTermInfixCallInList() *Term { 1131 return p.parseTermIn(nil, false, p.s.loc.Offset) 1132 } 1133 1134 func (p *Parser) parseTermIn(lhs *Term, keyVal bool, offset int) *Term { 1135 // NOTE(sr): `in` is a bit special: besides `lhs in rhs`, it also 1136 // supports `key, val in rhs`, so it can have an optional second lhs. 1137 // `keyVal` triggers if we attempt to parse a second lhs argument (`mhs`). 1138 if lhs == nil { 1139 lhs = p.parseTermRelation(nil, offset) 1140 } 1141 if lhs != nil { 1142 if keyVal && p.s.tok == tokens.Comma { // second "lhs", or "middle hand side" 1143 s := p.save() 1144 p.scan() 1145 if mhs := p.parseTermRelation(nil, offset); mhs != nil { 1146 if op := p.parseTermOpName(MemberWithKey.Ref(), tokens.In); op != nil { 1147 if rhs := p.parseTermRelation(nil, p.s.loc.Offset); rhs != nil { 1148 call := p.setLoc(CallTerm(op, lhs, mhs, rhs), lhs.Location, offset, p.s.lastEnd) 1149 switch p.s.tok { 1150 case tokens.In: 1151 return p.parseTermIn(call, keyVal, offset) 1152 default: 1153 return call 1154 } 1155 } 1156 } 1157 } 1158 p.restore(s) 1159 } 1160 if op := p.parseTermOpName(Member.Ref(), tokens.In); op != nil { 1161 if rhs := p.parseTermRelation(nil, p.s.loc.Offset); rhs != nil { 1162 call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd) 1163 switch p.s.tok { 1164 case tokens.In: 1165 return p.parseTermIn(call, keyVal, offset) 1166 default: 1167 return call 1168 } 1169 } 1170 } 1171 } 1172 return lhs 1173 } 1174 1175 func (p *Parser) parseTermRelation(lhs *Term, offset int) *Term { 1176 if lhs == nil { 1177 lhs = p.parseTermOr(nil, offset) 1178 } 1179 if lhs != nil { 1180 if op := p.parseTermOp(tokens.Equal, tokens.Neq, tokens.Lt, tokens.Gt, tokens.Lte, tokens.Gte); op != nil { 1181 if rhs := p.parseTermOr(nil, p.s.loc.Offset); rhs != nil { 1182 call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd) 1183 switch p.s.tok { 1184 case tokens.Equal, tokens.Neq, tokens.Lt, tokens.Gt, tokens.Lte, tokens.Gte: 1185 return p.parseTermRelation(call, offset) 1186 default: 1187 return call 1188 } 1189 } 1190 } 1191 } 1192 return lhs 1193 } 1194 1195 func (p *Parser) parseTermOr(lhs *Term, offset int) *Term { 1196 if lhs == nil { 1197 lhs = p.parseTermAnd(nil, offset) 1198 } 1199 if lhs != nil { 1200 if op := p.parseTermOp(tokens.Or); op != nil { 1201 if rhs := p.parseTermAnd(nil, p.s.loc.Offset); rhs != nil { 1202 call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd) 1203 switch p.s.tok { 1204 case tokens.Or: 1205 return p.parseTermOr(call, offset) 1206 default: 1207 return call 1208 } 1209 } 1210 } 1211 return lhs 1212 } 1213 return nil 1214 } 1215 1216 func (p *Parser) parseTermAnd(lhs *Term, offset int) *Term { 1217 if lhs == nil { 1218 lhs = p.parseTermArith(nil, offset) 1219 } 1220 if lhs != nil { 1221 if op := p.parseTermOp(tokens.And); op != nil { 1222 if rhs := p.parseTermArith(nil, p.s.loc.Offset); rhs != nil { 1223 call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd) 1224 switch p.s.tok { 1225 case tokens.And: 1226 return p.parseTermAnd(call, offset) 1227 default: 1228 return call 1229 } 1230 } 1231 } 1232 return lhs 1233 } 1234 return nil 1235 } 1236 1237 func (p *Parser) parseTermArith(lhs *Term, offset int) *Term { 1238 if lhs == nil { 1239 lhs = p.parseTermFactor(nil, offset) 1240 } 1241 if lhs != nil { 1242 if op := p.parseTermOp(tokens.Add, tokens.Sub); op != nil { 1243 if rhs := p.parseTermFactor(nil, p.s.loc.Offset); rhs != nil { 1244 call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd) 1245 switch p.s.tok { 1246 case tokens.Add, tokens.Sub: 1247 return p.parseTermArith(call, offset) 1248 default: 1249 return call 1250 } 1251 } 1252 } 1253 } 1254 return lhs 1255 } 1256 1257 func (p *Parser) parseTermFactor(lhs *Term, offset int) *Term { 1258 if lhs == nil { 1259 lhs = p.parseTerm() 1260 } 1261 if lhs != nil { 1262 if op := p.parseTermOp(tokens.Mul, tokens.Quo, tokens.Rem); op != nil { 1263 if rhs := p.parseTerm(); rhs != nil { 1264 call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd) 1265 switch p.s.tok { 1266 case tokens.Mul, tokens.Quo, tokens.Rem: 1267 return p.parseTermFactor(call, offset) 1268 default: 1269 return call 1270 } 1271 } 1272 } 1273 } 1274 return lhs 1275 } 1276 1277 func (p *Parser) parseTerm() *Term { 1278 if term, s := p.parsedTermCacheLookup(); s != nil { 1279 p.restore(s) 1280 return term 1281 } 1282 s0 := p.save() 1283 1284 var term *Term 1285 switch p.s.tok { 1286 case tokens.Null: 1287 term = NullTerm().SetLocation(p.s.Loc()) 1288 case tokens.True: 1289 term = BooleanTerm(true).SetLocation(p.s.Loc()) 1290 case tokens.False: 1291 term = BooleanTerm(false).SetLocation(p.s.Loc()) 1292 case tokens.Sub, tokens.Dot, tokens.Number: 1293 term = p.parseNumber() 1294 case tokens.String: 1295 term = p.parseString() 1296 case tokens.Ident, tokens.Contains: // NOTE(sr): contains anywhere BUT in rule heads gets no special treatment 1297 term = p.parseVar() 1298 case tokens.LBrack: 1299 term = p.parseArray() 1300 case tokens.LBrace: 1301 term = p.parseSetOrObject() 1302 case tokens.LParen: 1303 offset := p.s.loc.Offset 1304 p.scan() 1305 if r := p.parseTermInfixCall(); r != nil { 1306 if p.s.tok == tokens.RParen { 1307 r.Location.Text = p.s.Text(offset, p.s.tokEnd) 1308 term = r 1309 } else { 1310 p.error(p.s.Loc(), "non-terminated expression") 1311 } 1312 } 1313 default: 1314 p.illegalToken() 1315 } 1316 1317 term = p.parseTermFinish(term) 1318 p.parsedTermCachePush(term, s0) 1319 return term 1320 } 1321 1322 func (p *Parser) parseTermFinish(head *Term) *Term { 1323 if head == nil { 1324 return nil 1325 } 1326 offset := p.s.loc.Offset 1327 p.scanWS() 1328 switch p.s.tok { 1329 case tokens.LParen, tokens.Dot, tokens.LBrack: 1330 return p.parseRef(head, offset) 1331 case tokens.Whitespace: 1332 p.scan() 1333 fallthrough 1334 default: 1335 if _, ok := head.Value.(Var); ok && RootDocumentNames.Contains(head) { 1336 return RefTerm(head).SetLocation(head.Location) 1337 } 1338 return head 1339 } 1340 } 1341 1342 func (p *Parser) parseNumber() *Term { 1343 var prefix string 1344 loc := p.s.Loc() 1345 if p.s.tok == tokens.Sub { 1346 prefix = "-" 1347 p.scan() 1348 switch p.s.tok { 1349 case tokens.Number, tokens.Dot: 1350 break 1351 default: 1352 p.illegal("expected number") 1353 return nil 1354 } 1355 } 1356 if p.s.tok == tokens.Dot { 1357 prefix += "." 1358 p.scan() 1359 if p.s.tok != tokens.Number { 1360 p.illegal("expected number") 1361 return nil 1362 } 1363 } 1364 1365 // Check for multiple leading 0's, parsed by math/big.Float.Parse as decimal 0: 1366 // https://golang.org/pkg/math/big/#Float.Parse 1367 if ((len(prefix) != 0 && prefix[0] == '-') || len(prefix) == 0) && 1368 len(p.s.lit) > 1 && p.s.lit[0] == '0' && p.s.lit[1] == '0' { 1369 p.illegal("expected number") 1370 return nil 1371 } 1372 1373 // Ensure that the number is valid 1374 s := prefix + p.s.lit 1375 f, ok := new(big.Float).SetString(s) 1376 if !ok { 1377 p.illegal("invalid float") 1378 return nil 1379 } 1380 1381 // Put limit on size of exponent to prevent non-linear cost of String() 1382 // function on big.Float from causing denial of service: https://github.com/golang/go/issues/11068 1383 // 1384 // n == sign * mantissa * 2^exp 1385 // 0.5 <= mantissa < 1.0 1386 // 1387 // The limit is arbitrary. 1388 exp := f.MantExp(nil) 1389 if exp > 1e5 || exp < -1e5 || f.IsInf() { // +/- inf, exp is 0 1390 p.error(p.s.Loc(), "number too big") 1391 return nil 1392 } 1393 1394 // Note: Use the original string, do *not* round trip from 1395 // the big.Float as it can cause precision loss. 1396 r := NumberTerm(json.Number(s)).SetLocation(loc) 1397 return r 1398 } 1399 1400 func (p *Parser) parseString() *Term { 1401 if p.s.lit[0] == '"' { 1402 var s string 1403 err := json.Unmarshal([]byte(p.s.lit), &s) 1404 if err != nil { 1405 p.errorf(p.s.Loc(), "illegal string literal: %s", p.s.lit) 1406 return nil 1407 } 1408 term := StringTerm(s).SetLocation(p.s.Loc()) 1409 return term 1410 } 1411 return p.parseRawString() 1412 } 1413 1414 func (p *Parser) parseRawString() *Term { 1415 if len(p.s.lit) < 2 { 1416 return nil 1417 } 1418 term := StringTerm(p.s.lit[1 : len(p.s.lit)-1]).SetLocation(p.s.Loc()) 1419 return term 1420 } 1421 1422 // this is the name to use for instantiating an empty set, e.g., `set()`. 1423 var setConstructor = RefTerm(VarTerm("set")) 1424 1425 func (p *Parser) parseCall(operator *Term, offset int) (term *Term) { 1426 1427 loc := operator.Location 1428 var end int 1429 1430 defer func() { 1431 p.setLoc(term, loc, offset, end) 1432 }() 1433 1434 p.scan() // steps over '(' 1435 1436 if p.s.tok == tokens.RParen { // no args, i.e. set() or any.func() 1437 end = p.s.tokEnd 1438 p.scanWS() 1439 if operator.Equal(setConstructor) { 1440 return SetTerm() 1441 } 1442 return CallTerm(operator) 1443 } 1444 1445 if r := p.parseTermList(tokens.RParen, []*Term{operator}); r != nil { 1446 end = p.s.tokEnd 1447 p.scanWS() 1448 return CallTerm(r...) 1449 } 1450 1451 return nil 1452 } 1453 1454 func (p *Parser) parseRef(head *Term, offset int) (term *Term) { 1455 1456 loc := head.Location 1457 var end int 1458 1459 defer func() { 1460 p.setLoc(term, loc, offset, end) 1461 }() 1462 1463 switch h := head.Value.(type) { 1464 case Var, *Array, Object, Set, *ArrayComprehension, *ObjectComprehension, *SetComprehension, Call: 1465 // ok 1466 default: 1467 p.errorf(loc, "illegal ref (head cannot be %v)", TypeName(h)) 1468 } 1469 1470 ref := []*Term{head} 1471 1472 for { 1473 switch p.s.tok { 1474 case tokens.Dot: 1475 p.scanWS() 1476 if p.s.tok != tokens.Ident { 1477 p.illegal("expected %v", tokens.Ident) 1478 return nil 1479 } 1480 ref = append(ref, StringTerm(p.s.lit).SetLocation(p.s.Loc())) 1481 p.scanWS() 1482 case tokens.LParen: 1483 term = p.parseCall(p.setLoc(RefTerm(ref...), loc, offset, p.s.loc.Offset), offset) 1484 if term != nil { 1485 switch p.s.tok { 1486 case tokens.Whitespace: 1487 p.scan() 1488 end = p.s.lastEnd 1489 return term 1490 case tokens.Dot, tokens.LBrack: 1491 term = p.parseRef(term, offset) 1492 } 1493 } 1494 end = p.s.tokEnd 1495 return term 1496 case tokens.LBrack: 1497 p.scan() 1498 if term := p.parseTermInfixCall(); term != nil { 1499 if p.s.tok != tokens.RBrack { 1500 p.illegal("expected %v", tokens.LBrack) 1501 return nil 1502 } 1503 ref = append(ref, term) 1504 p.scanWS() 1505 } else { 1506 return nil 1507 } 1508 case tokens.Whitespace: 1509 end = p.s.lastEnd 1510 p.scan() 1511 return RefTerm(ref...) 1512 default: 1513 end = p.s.lastEnd 1514 return RefTerm(ref...) 1515 } 1516 } 1517 } 1518 1519 func (p *Parser) parseArray() (term *Term) { 1520 1521 loc := p.s.Loc() 1522 offset := p.s.loc.Offset 1523 1524 defer func() { 1525 p.setLoc(term, loc, offset, p.s.tokEnd) 1526 }() 1527 1528 p.scan() 1529 1530 if p.s.tok == tokens.RBrack { 1531 return ArrayTerm() 1532 } 1533 1534 potentialComprehension := true 1535 1536 // Skip leading commas, eg [, x, y] 1537 // Supported for backwards compatibility. In the future 1538 // we should make this a parse error. 1539 if p.s.tok == tokens.Comma { 1540 potentialComprehension = false 1541 p.scan() 1542 } 1543 1544 s := p.save() 1545 1546 // NOTE(tsandall): The parser cannot attempt a relational term here because 1547 // of ambiguity around comprehensions. For example, given: 1548 // 1549 // {1 | 1} 1550 // 1551 // Does this represent a set comprehension or a set containing binary OR 1552 // call? We resolve the ambiguity by prioritizing comprehensions. 1553 head := p.parseTerm() 1554 1555 if head == nil { 1556 return nil 1557 } 1558 1559 switch p.s.tok { 1560 case tokens.RBrack: 1561 return ArrayTerm(head) 1562 case tokens.Comma: 1563 p.scan() 1564 if terms := p.parseTermList(tokens.RBrack, []*Term{head}); terms != nil { 1565 return NewTerm(NewArray(terms...)) 1566 } 1567 return nil 1568 case tokens.Or: 1569 if potentialComprehension { 1570 // Try to parse as if it is an array comprehension 1571 p.scan() 1572 if body := p.parseBody(tokens.RBrack); body != nil { 1573 return ArrayComprehensionTerm(head, body) 1574 } 1575 if p.s.tok != tokens.Comma { 1576 return nil 1577 } 1578 } 1579 // fall back to parsing as a normal array definition 1580 } 1581 1582 p.restore(s) 1583 1584 if terms := p.parseTermList(tokens.RBrack, nil); terms != nil { 1585 return NewTerm(NewArray(terms...)) 1586 } 1587 return nil 1588 } 1589 1590 func (p *Parser) parseSetOrObject() (term *Term) { 1591 loc := p.s.Loc() 1592 offset := p.s.loc.Offset 1593 1594 defer func() { 1595 p.setLoc(term, loc, offset, p.s.tokEnd) 1596 }() 1597 1598 p.scan() 1599 1600 if p.s.tok == tokens.RBrace { 1601 return ObjectTerm() 1602 } 1603 1604 potentialComprehension := true 1605 1606 // Skip leading commas, eg {, x, y} 1607 // Supported for backwards compatibility. In the future 1608 // we should make this a parse error. 1609 if p.s.tok == tokens.Comma { 1610 potentialComprehension = false 1611 p.scan() 1612 } 1613 1614 s := p.save() 1615 1616 // Try parsing just a single term first to give comprehensions higher 1617 // priority to "or" calls in ambiguous situations. Eg: { a | b } 1618 // will be a set comprehension. 1619 // 1620 // Note: We don't know yet if it is a set or object being defined. 1621 head := p.parseTerm() 1622 if head == nil { 1623 return nil 1624 } 1625 1626 switch p.s.tok { 1627 case tokens.Or: 1628 if potentialComprehension { 1629 return p.parseSet(s, head, potentialComprehension) 1630 } 1631 case tokens.RBrace, tokens.Comma: 1632 return p.parseSet(s, head, potentialComprehension) 1633 case tokens.Colon: 1634 return p.parseObject(head, potentialComprehension) 1635 } 1636 1637 p.restore(s) 1638 1639 head = p.parseTermInfixCallInList() 1640 if head == nil { 1641 return nil 1642 } 1643 1644 switch p.s.tok { 1645 case tokens.RBrace, tokens.Comma: 1646 return p.parseSet(s, head, false) 1647 case tokens.Colon: 1648 // It still might be an object comprehension, eg { a+1: b | ... } 1649 return p.parseObject(head, potentialComprehension) 1650 } 1651 1652 p.illegal("non-terminated set") 1653 return nil 1654 } 1655 1656 func (p *Parser) parseSet(s *state, head *Term, potentialComprehension bool) *Term { 1657 switch p.s.tok { 1658 case tokens.RBrace: 1659 return SetTerm(head) 1660 case tokens.Comma: 1661 p.scan() 1662 if terms := p.parseTermList(tokens.RBrace, []*Term{head}); terms != nil { 1663 return SetTerm(terms...) 1664 } 1665 case tokens.Or: 1666 if potentialComprehension { 1667 // Try to parse as if it is a set comprehension 1668 p.scan() 1669 if body := p.parseBody(tokens.RBrace); body != nil { 1670 return SetComprehensionTerm(head, body) 1671 } 1672 if p.s.tok != tokens.Comma { 1673 return nil 1674 } 1675 } 1676 // Fall back to parsing as normal set definition 1677 p.restore(s) 1678 if terms := p.parseTermList(tokens.RBrace, nil); terms != nil { 1679 return SetTerm(terms...) 1680 } 1681 } 1682 return nil 1683 } 1684 1685 func (p *Parser) parseObject(k *Term, potentialComprehension bool) *Term { 1686 // NOTE(tsandall): Assumption: this function is called after parsing the key 1687 // of the head element and then receiving a colon token from the scanner. 1688 // Advance beyond the colon and attempt to parse an object. 1689 if p.s.tok != tokens.Colon { 1690 panic("expected colon") 1691 } 1692 p.scan() 1693 1694 s := p.save() 1695 1696 // NOTE(sr): We first try to parse the value as a term (`v`), and see 1697 // if we can parse `{ x: v | ...}` as a comprehension. 1698 // However, if we encounter either a Comma or an RBace, it cannot be 1699 // parsed as a comprehension -- so we save double work further down 1700 // where `parseObjectFinish(k, v, false)` would only exercise the 1701 // same code paths once more. 1702 v := p.parseTerm() 1703 if v == nil { 1704 return nil 1705 } 1706 1707 potentialRelation := true 1708 if potentialComprehension { 1709 switch p.s.tok { 1710 case tokens.RBrace, tokens.Comma: 1711 potentialRelation = false 1712 fallthrough 1713 case tokens.Or: 1714 if term := p.parseObjectFinish(k, v, true); term != nil { 1715 return term 1716 } 1717 } 1718 } 1719 1720 p.restore(s) 1721 1722 if potentialRelation { 1723 v := p.parseTermInfixCallInList() 1724 if v == nil { 1725 return nil 1726 } 1727 1728 switch p.s.tok { 1729 case tokens.RBrace, tokens.Comma: 1730 return p.parseObjectFinish(k, v, false) 1731 } 1732 } 1733 1734 p.illegal("non-terminated object") 1735 return nil 1736 } 1737 1738 func (p *Parser) parseObjectFinish(key, val *Term, potentialComprehension bool) *Term { 1739 switch p.s.tok { 1740 case tokens.RBrace: 1741 return ObjectTerm([2]*Term{key, val}) 1742 case tokens.Or: 1743 if potentialComprehension { 1744 p.scan() 1745 if body := p.parseBody(tokens.RBrace); body != nil { 1746 return ObjectComprehensionTerm(key, val, body) 1747 } 1748 } else { 1749 p.illegal("non-terminated object") 1750 } 1751 case tokens.Comma: 1752 p.scan() 1753 if r := p.parseTermPairList(tokens.RBrace, [][2]*Term{{key, val}}); r != nil { 1754 return ObjectTerm(r...) 1755 } 1756 } 1757 return nil 1758 } 1759 1760 func (p *Parser) parseTermList(end tokens.Token, r []*Term) []*Term { 1761 if p.s.tok == end { 1762 return r 1763 } 1764 for { 1765 term := p.parseTermInfixCallInList() 1766 if term != nil { 1767 r = append(r, term) 1768 switch p.s.tok { 1769 case end: 1770 return r 1771 case tokens.Comma: 1772 p.scan() 1773 if p.s.tok == end { 1774 return r 1775 } 1776 continue 1777 default: 1778 p.illegal(fmt.Sprintf("expected %q or %q", tokens.Comma, end)) 1779 return nil 1780 } 1781 } 1782 return nil 1783 } 1784 } 1785 1786 func (p *Parser) parseTermPairList(end tokens.Token, r [][2]*Term) [][2]*Term { 1787 if p.s.tok == end { 1788 return r 1789 } 1790 for { 1791 key := p.parseTermInfixCallInList() 1792 if key != nil { 1793 switch p.s.tok { 1794 case tokens.Colon: 1795 p.scan() 1796 if val := p.parseTermInfixCallInList(); val != nil { 1797 r = append(r, [2]*Term{key, val}) 1798 switch p.s.tok { 1799 case end: 1800 return r 1801 case tokens.Comma: 1802 p.scan() 1803 if p.s.tok == end { 1804 return r 1805 } 1806 continue 1807 default: 1808 p.illegal(fmt.Sprintf("expected %q or %q", tokens.Comma, end)) 1809 return nil 1810 } 1811 } 1812 default: 1813 p.illegal(fmt.Sprintf("expected %q", tokens.Colon)) 1814 return nil 1815 } 1816 } 1817 return nil 1818 } 1819 } 1820 1821 func (p *Parser) parseTermOp(values ...tokens.Token) *Term { 1822 for i := range values { 1823 if p.s.tok == values[i] { 1824 r := RefTerm(VarTerm(fmt.Sprint(p.s.tok)).SetLocation(p.s.Loc())).SetLocation(p.s.Loc()) 1825 p.scan() 1826 return r 1827 } 1828 } 1829 return nil 1830 } 1831 1832 func (p *Parser) parseTermOpName(ref Ref, values ...tokens.Token) *Term { 1833 for i := range values { 1834 if p.s.tok == values[i] { 1835 for _, r := range ref { 1836 r.SetLocation(p.s.Loc()) 1837 } 1838 t := RefTerm(ref...) 1839 t.SetLocation(p.s.Loc()) 1840 p.scan() 1841 return t 1842 } 1843 } 1844 return nil 1845 } 1846 1847 func (p *Parser) parseVar() *Term { 1848 1849 s := p.s.lit 1850 1851 term := VarTerm(s).SetLocation(p.s.Loc()) 1852 1853 // Update wildcard values with unique identifiers 1854 if term.Equal(Wildcard) { 1855 term.Value = Var(p.genwildcard()) 1856 } 1857 1858 return term 1859 } 1860 1861 func (p *Parser) genwildcard() string { 1862 c := p.s.wildcard 1863 p.s.wildcard++ 1864 return fmt.Sprintf("%v%d", WildcardPrefix, c) 1865 } 1866 1867 func (p *Parser) error(loc *location.Location, reason string) { 1868 p.errorf(loc, reason) 1869 } 1870 1871 func (p *Parser) errorf(loc *location.Location, f string, a ...interface{}) { 1872 msg := strings.Builder{} 1873 fmt.Fprintf(&msg, f, a...) 1874 1875 switch len(p.s.hints) { 1876 case 0: // nothing to do 1877 case 1: 1878 msg.WriteString(" (hint: ") 1879 msg.WriteString(p.s.hints[0]) 1880 msg.WriteRune(')') 1881 default: 1882 msg.WriteString(" (hints: ") 1883 for i, h := range p.s.hints { 1884 if i > 0 { 1885 msg.WriteString(", ") 1886 } 1887 msg.WriteString(h) 1888 } 1889 msg.WriteRune(')') 1890 } 1891 1892 p.s.errors = append(p.s.errors, &Error{ 1893 Code: ParseErr, 1894 Message: msg.String(), 1895 Location: loc, 1896 Details: newParserErrorDetail(p.s.s.Bytes(), loc.Offset), 1897 }) 1898 p.s.hints = nil 1899 } 1900 1901 func (p *Parser) hint(f string, a ...interface{}) { 1902 p.s.hints = append(p.s.hints, fmt.Sprintf(f, a...)) 1903 } 1904 1905 func (p *Parser) illegal(note string, a ...interface{}) { 1906 tok := p.s.tok.String() 1907 1908 if p.s.tok == tokens.Illegal { 1909 p.errorf(p.s.Loc(), "illegal token") 1910 return 1911 } 1912 1913 tokType := "token" 1914 if tokens.IsKeyword(p.s.tok) { 1915 tokType = "keyword" 1916 } 1917 if _, ok := futureKeywords[p.s.tok.String()]; ok { 1918 tokType = "keyword" 1919 } 1920 1921 note = fmt.Sprintf(note, a...) 1922 if len(note) > 0 { 1923 p.errorf(p.s.Loc(), "unexpected %s %s: %s", tok, tokType, note) 1924 } else { 1925 p.errorf(p.s.Loc(), "unexpected %s %s", tok, tokType) 1926 } 1927 } 1928 1929 func (p *Parser) illegalToken() { 1930 p.illegal("") 1931 } 1932 1933 func (p *Parser) scan() { 1934 p.doScan(true) 1935 } 1936 1937 func (p *Parser) scanWS() { 1938 p.doScan(false) 1939 } 1940 1941 func (p *Parser) doScan(skipws bool) { 1942 1943 // NOTE(tsandall): the last position is used to compute the "text" field for 1944 // complex AST nodes. Whitespace never affects the last position of an AST 1945 // node so do not update it when scanning. 1946 if p.s.tok != tokens.Whitespace { 1947 p.s.lastEnd = p.s.tokEnd 1948 p.s.skippedNL = false 1949 } 1950 1951 var errs []scanner.Error 1952 for { 1953 var pos scanner.Position 1954 p.s.tok, pos, p.s.lit, errs = p.s.s.Scan() 1955 1956 p.s.tokEnd = pos.End 1957 p.s.loc.Row = pos.Row 1958 p.s.loc.Col = pos.Col 1959 p.s.loc.Offset = pos.Offset 1960 p.s.loc.Text = p.s.Text(pos.Offset, pos.End) 1961 1962 for _, err := range errs { 1963 p.error(p.s.Loc(), err.Message) 1964 } 1965 1966 if len(errs) > 0 { 1967 p.s.tok = tokens.Illegal 1968 } 1969 1970 if p.s.tok == tokens.Whitespace { 1971 if p.s.lit == "\n" { 1972 p.s.skippedNL = true 1973 } 1974 if skipws { 1975 continue 1976 } 1977 } 1978 1979 if p.s.tok != tokens.Comment { 1980 break 1981 } 1982 1983 // For backwards compatibility leave a nil 1984 // Text value if there is no text rather than 1985 // an empty string. 1986 var commentText []byte 1987 if len(p.s.lit) > 1 { 1988 commentText = []byte(p.s.lit[1:]) 1989 } 1990 comment := NewComment(commentText) 1991 comment.SetLoc(p.s.Loc()) 1992 p.s.comments = append(p.s.comments, comment) 1993 } 1994 } 1995 1996 func (p *Parser) save() *state { 1997 cpy := *p.s 1998 s := *cpy.s 1999 cpy.s = &s 2000 return &cpy 2001 } 2002 2003 func (p *Parser) restore(s *state) { 2004 p.s = s 2005 } 2006 2007 func setLocRecursive(x interface{}, loc *location.Location) { 2008 NewGenericVisitor(func(x interface{}) bool { 2009 if node, ok := x.(Node); ok { 2010 node.SetLoc(loc) 2011 } 2012 return false 2013 }).Walk(x) 2014 } 2015 2016 func (p *Parser) setLoc(term *Term, loc *location.Location, offset, end int) *Term { 2017 if term != nil { 2018 cpy := *loc 2019 term.Location = &cpy 2020 term.Location.Text = p.s.Text(offset, end) 2021 } 2022 return term 2023 } 2024 2025 func (p *Parser) validateDefaultRuleValue(rule *Rule) bool { 2026 if rule.Head.Value == nil { 2027 p.error(rule.Loc(), "illegal default rule (must have a value)") 2028 return false 2029 } 2030 2031 valid := true 2032 vis := NewGenericVisitor(func(x interface{}) bool { 2033 switch x.(type) { 2034 case *ArrayComprehension, *ObjectComprehension, *SetComprehension: // skip closures 2035 return true 2036 case Ref, Var, Call: 2037 p.error(rule.Loc(), fmt.Sprintf("illegal default rule (value cannot contain %v)", TypeName(x))) 2038 valid = false 2039 return true 2040 } 2041 return false 2042 }) 2043 2044 vis.Walk(rule.Head.Value.Value) 2045 return valid 2046 } 2047 2048 // We explicitly use yaml unmarshalling, to accommodate for the '_' in 'related_resources', 2049 // which isn't handled properly by json for some reason. 2050 type rawAnnotation struct { 2051 Scope string `yaml:"scope"` 2052 Title string `yaml:"title"` 2053 Description string `yaml:"description"` 2054 Organizations []string `yaml:"organizations"` 2055 RelatedResources []interface{} `yaml:"related_resources"` 2056 Authors []interface{} `yaml:"authors"` 2057 Schemas []rawSchemaAnnotation `yaml:"schemas"` 2058 Custom map[string]interface{} `yaml:"custom"` 2059 } 2060 2061 type rawSchemaAnnotation map[string]interface{} 2062 2063 type metadataParser struct { 2064 buf *bytes.Buffer 2065 comments []*Comment 2066 loc *location.Location 2067 } 2068 2069 func newMetadataParser(loc *Location) *metadataParser { 2070 return &metadataParser{loc: loc, buf: bytes.NewBuffer(nil)} 2071 } 2072 2073 func (b *metadataParser) Append(c *Comment) { 2074 b.buf.Write(bytes.TrimPrefix(c.Text, []byte(" "))) 2075 b.buf.WriteByte('\n') 2076 b.comments = append(b.comments, c) 2077 } 2078 2079 var yamlLineErrRegex = regexp.MustCompile(`^yaml: line ([[:digit:]]+):`) 2080 2081 func (b *metadataParser) Parse() (*Annotations, error) { 2082 2083 var raw rawAnnotation 2084 2085 if len(bytes.TrimSpace(b.buf.Bytes())) == 0 { 2086 return nil, fmt.Errorf("expected METADATA block, found whitespace") 2087 } 2088 2089 if err := yaml.Unmarshal(b.buf.Bytes(), &raw); err != nil { 2090 match := yamlLineErrRegex.FindStringSubmatch(err.Error()) 2091 if len(match) == 2 { 2092 n, err2 := strconv.Atoi(match[1]) 2093 if err2 == nil { 2094 index := n - 1 // line numbering is 1-based so subtract one from row 2095 if index >= len(b.comments) { 2096 b.loc = b.comments[len(b.comments)-1].Location 2097 } else { 2098 b.loc = b.comments[index].Location 2099 } 2100 } 2101 } 2102 return nil, err 2103 } 2104 2105 var result Annotations 2106 result.Scope = raw.Scope 2107 result.Title = raw.Title 2108 result.Description = raw.Description 2109 result.Organizations = raw.Organizations 2110 2111 for _, v := range raw.RelatedResources { 2112 rr, err := parseRelatedResource(v) 2113 if err != nil { 2114 return nil, fmt.Errorf("invalid related-resource definition %s: %w", v, err) 2115 } 2116 result.RelatedResources = append(result.RelatedResources, rr) 2117 } 2118 2119 for _, pair := range raw.Schemas { 2120 k, v := unwrapPair(pair) 2121 2122 var a SchemaAnnotation 2123 var err error 2124 2125 a.Path, err = ParseRef(k) 2126 if err != nil { 2127 return nil, fmt.Errorf("invalid document reference") 2128 } 2129 2130 switch v := v.(type) { 2131 case string: 2132 a.Schema, err = parseSchemaRef(v) 2133 if err != nil { 2134 return nil, err 2135 } 2136 case map[interface{}]interface{}: 2137 w, err := convertYAMLMapKeyTypes(v, nil) 2138 if err != nil { 2139 return nil, fmt.Errorf("invalid schema definition: %w", err) 2140 } 2141 a.Definition = &w 2142 default: 2143 return nil, fmt.Errorf("invalid schema declaration for path %q", k) 2144 } 2145 2146 result.Schemas = append(result.Schemas, &a) 2147 } 2148 2149 for _, v := range raw.Authors { 2150 author, err := parseAuthor(v) 2151 if err != nil { 2152 return nil, fmt.Errorf("invalid author definition %s: %w", v, err) 2153 } 2154 result.Authors = append(result.Authors, author) 2155 } 2156 2157 result.Custom = make(map[string]interface{}) 2158 for k, v := range raw.Custom { 2159 val, err := convertYAMLMapKeyTypes(v, nil) 2160 if err != nil { 2161 return nil, err 2162 } 2163 result.Custom[k] = val 2164 } 2165 2166 result.Location = b.loc 2167 return &result, nil 2168 } 2169 2170 func unwrapPair(pair map[string]interface{}) (k string, v interface{}) { 2171 for k, v = range pair { 2172 } 2173 return 2174 } 2175 2176 var errInvalidSchemaRef = fmt.Errorf("invalid schema reference") 2177 2178 // NOTE(tsandall): 'schema' is not registered as a root because it's not 2179 // supported by the compiler or evaluator today. Once we fix that, we can remove 2180 // this function. 2181 func parseSchemaRef(s string) (Ref, error) { 2182 2183 term, err := ParseTerm(s) 2184 if err == nil { 2185 switch v := term.Value.(type) { 2186 case Var: 2187 if term.Equal(SchemaRootDocument) { 2188 return SchemaRootRef.Copy(), nil 2189 } 2190 case Ref: 2191 if v.HasPrefix(SchemaRootRef) { 2192 return v, nil 2193 } 2194 } 2195 } 2196 2197 return nil, errInvalidSchemaRef 2198 } 2199 2200 func parseRelatedResource(rr interface{}) (*RelatedResourceAnnotation, error) { 2201 rr, err := convertYAMLMapKeyTypes(rr, nil) 2202 if err != nil { 2203 return nil, err 2204 } 2205 2206 switch rr := rr.(type) { 2207 case string: 2208 if len(rr) > 0 { 2209 u, err := url.Parse(rr) 2210 if err != nil { 2211 return nil, err 2212 } 2213 return &RelatedResourceAnnotation{Ref: *u}, nil 2214 } 2215 return nil, fmt.Errorf("ref URL may not be empty string") 2216 case map[string]interface{}: 2217 description := strings.TrimSpace(getSafeString(rr, "description")) 2218 ref := strings.TrimSpace(getSafeString(rr, "ref")) 2219 if len(ref) > 0 { 2220 u, err := url.Parse(ref) 2221 if err != nil { 2222 return nil, err 2223 } 2224 return &RelatedResourceAnnotation{Description: description, Ref: *u}, nil 2225 } 2226 return nil, fmt.Errorf("'ref' value required in object") 2227 } 2228 2229 return nil, fmt.Errorf("invalid value type, must be string or map") 2230 } 2231 2232 func parseAuthor(a interface{}) (*AuthorAnnotation, error) { 2233 a, err := convertYAMLMapKeyTypes(a, nil) 2234 if err != nil { 2235 return nil, err 2236 } 2237 2238 switch a := a.(type) { 2239 case string: 2240 return parseAuthorString(a) 2241 case map[string]interface{}: 2242 name := strings.TrimSpace(getSafeString(a, "name")) 2243 email := strings.TrimSpace(getSafeString(a, "email")) 2244 if len(name) > 0 || len(email) > 0 { 2245 return &AuthorAnnotation{name, email}, nil 2246 } 2247 return nil, fmt.Errorf("'name' and/or 'email' values required in object") 2248 } 2249 2250 return nil, fmt.Errorf("invalid value type, must be string or map") 2251 } 2252 2253 func getSafeString(m map[string]interface{}, k string) string { 2254 if v, found := m[k]; found { 2255 if s, ok := v.(string); ok { 2256 return s 2257 } 2258 } 2259 return "" 2260 } 2261 2262 const emailPrefix = "<" 2263 const emailSuffix = ">" 2264 2265 // parseAuthor parses a string into an AuthorAnnotation. If the last word of the input string is enclosed within <>, 2266 // it is extracted as the author's email. The email may not contain whitelines, as it then will be interpreted as 2267 // multiple words. 2268 func parseAuthorString(s string) (*AuthorAnnotation, error) { 2269 parts := strings.Fields(s) 2270 2271 if len(parts) == 0 { 2272 return nil, fmt.Errorf("author is an empty string") 2273 } 2274 2275 namePartCount := len(parts) 2276 trailing := parts[namePartCount-1] 2277 var email string 2278 if len(trailing) >= len(emailPrefix)+len(emailSuffix) && strings.HasPrefix(trailing, emailPrefix) && 2279 strings.HasSuffix(trailing, emailSuffix) { 2280 email = trailing[len(emailPrefix):] 2281 email = email[0 : len(email)-len(emailSuffix)] 2282 namePartCount = namePartCount - 1 2283 } 2284 2285 name := strings.Join(parts[0:namePartCount], " ") 2286 2287 return &AuthorAnnotation{Name: name, Email: email}, nil 2288 } 2289 2290 func convertYAMLMapKeyTypes(x interface{}, path []string) (interface{}, error) { 2291 var err error 2292 switch x := x.(type) { 2293 case map[interface{}]interface{}: 2294 result := make(map[string]interface{}, len(x)) 2295 for k, v := range x { 2296 str, ok := k.(string) 2297 if !ok { 2298 return nil, fmt.Errorf("invalid map key type(s): %v", strings.Join(path, "/")) 2299 } 2300 result[str], err = convertYAMLMapKeyTypes(v, append(path, str)) 2301 if err != nil { 2302 return nil, err 2303 } 2304 } 2305 return result, nil 2306 case []interface{}: 2307 for i := range x { 2308 x[i], err = convertYAMLMapKeyTypes(x[i], append(path, fmt.Sprintf("%d", i))) 2309 if err != nil { 2310 return nil, err 2311 } 2312 } 2313 return x, nil 2314 default: 2315 return x, nil 2316 } 2317 } 2318 2319 // futureKeywords is the source of truth for future keywords that will 2320 // eventually become standard keywords inside of Rego. 2321 var futureKeywords = map[string]tokens.Token{ 2322 "in": tokens.In, 2323 "every": tokens.Every, 2324 "contains": tokens.Contains, 2325 "if": tokens.If, 2326 } 2327 2328 func (p *Parser) futureImport(imp *Import, allowedFutureKeywords map[string]tokens.Token) { 2329 path := imp.Path.Value.(Ref) 2330 2331 if len(path) == 1 || !path[1].Equal(StringTerm("keywords")) { 2332 p.errorf(imp.Path.Location, "invalid import, must be `future.keywords`") 2333 return 2334 } 2335 2336 if imp.Alias != "" { 2337 p.errorf(imp.Path.Location, "`future` imports cannot be aliased") 2338 return 2339 } 2340 2341 kwds := make([]string, 0, len(allowedFutureKeywords)) 2342 for k := range allowedFutureKeywords { 2343 kwds = append(kwds, k) 2344 } 2345 2346 switch len(path) { 2347 case 2: // all keywords imported, nothing to do 2348 case 3: // one keyword imported 2349 kw, ok := path[2].Value.(String) 2350 if !ok { 2351 p.errorf(imp.Path.Location, "invalid import, must be `future.keywords.x`, e.g. `import future.keywords.in`") 2352 return 2353 } 2354 keyword := string(kw) 2355 _, ok = allowedFutureKeywords[keyword] 2356 if !ok { 2357 sort.Strings(kwds) // so the error message is stable 2358 p.errorf(imp.Path.Location, "unexpected keyword, must be one of %v", kwds) 2359 return 2360 } 2361 2362 kwds = []string{keyword} // overwrite 2363 } 2364 for _, kw := range kwds { 2365 p.s.s.AddKeyword(kw, allowedFutureKeywords[kw]) 2366 } 2367 }