go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/aip/filter_parser.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package aip contains utilities used to comply with API Improvement 16 // Proposals (AIPs) from https://google.aip.dev/. This includes 17 // an AIP-160 filter parser and AIP-132 order by clause parser. 18 package aip 19 20 // This file contains a lexer and parser for AIP-160 filter expressions. 21 // The EBNF is at https://google.aip.dev/assets/misc/ebnf-filtering.txt 22 // The function call syntax is not supported which simplifies the parser. 23 // 24 // Implemented EBNF (in terms of lexer tokens): 25 // filter: [expression]; 26 // expression: sequence {WS AND WS sequence}; 27 // sequence: factor {WS factor}; 28 // factor: term {WS OR WS term}; 29 // term: [NEGATE] simple; 30 // simple: restriction | composite; 31 // restriction: comparable [COMPARATOR arg]; 32 // comparable: member; 33 // member: (TEXT | STRING) {DOT TEXT}; 34 // composite: LPAREN expression RPAREN; 35 // arg: comparable | composite; 36 // 37 // TODO(mwarton): Redo whitespace handling. There are still some cases (like "- 30") 38 // which are accepted as valid instead of being rejected. 39 import ( 40 "fmt" 41 "regexp" 42 "strconv" 43 "strings" 44 ) 45 46 const ( 47 kindComparator = "COMPARATOR" 48 kindNegate = "NEGATE" 49 kindAnd = "AND" 50 kindOr = "OR" 51 kindDot = "DOT" 52 kindLParen = "LPAREN" 53 kindRParen = "RPAREN" 54 kindComma = "COMMA" 55 kindString = "STRING" 56 kindText = "TEXT" 57 kindEnd = "END" 58 ) 59 60 // lexerRegexp has one group for each kind of token that can be lexed, in the order of the kind consts above. There are two cases for kindNegate to handle whitespace correctly. 61 var lexerRegexp = regexp.MustCompile(`^(<=|>=|!=|<|>|=|\:)|(NOT\s)|(-)|(AND\s)|(OR\s)|(\.)|(\()|(\))|(,)|("(?:[^"\\]|\\.)*")|([^\s\.,<>=!:\(\)]+)`) 62 63 type token struct { 64 kind string 65 value string 66 } 67 68 type filterLexer struct { 69 input string 70 next *token 71 } 72 73 func NewLexer(input string) *filterLexer { 74 return &filterLexer{input: input} 75 } 76 77 func (l *filterLexer) Peek() (*token, error) { 78 if l.next == nil { 79 var err error 80 l.next, err = l.Next() 81 if err != nil { 82 return nil, err 83 } 84 } 85 return l.next, nil 86 } 87 88 func (l *filterLexer) Next() (*token, error) { 89 if l.next != nil { 90 next := l.next 91 l.next = nil 92 return next, nil 93 } 94 l.next = nil 95 l.input = strings.TrimLeft(l.input, " \t\r\n") 96 if l.input == "" { 97 return &token{kind: kindEnd}, nil 98 } 99 matches := lexerRegexp.FindStringSubmatch(l.input) 100 if matches == nil { 101 return nil, fmt.Errorf("error: unable to lex token from %q", l.input) 102 } 103 l.input = l.input[len(matches[0]):] 104 if matches[1] != "" { 105 return &token{kind: kindComparator, value: matches[1]}, nil 106 } 107 if matches[2] != "" { 108 // Needs to be fixed up to compensate for the trailing \s in the match which prevents 109 // matching "NOTother" as a negated "other". 110 length := len(matches[2]) 111 return &token{kind: kindNegate, value: matches[2][:length-1]}, nil 112 } 113 if matches[3] != "" { 114 return &token{kind: kindNegate, value: matches[3]}, nil 115 } 116 if matches[4] != "" { 117 // Needs to be fixed up to compensate for the trailing \s in the match which prevents 118 // matching "ANDother" as a "AND" "other". 119 length := len(matches[4]) 120 return &token{kind: kindAnd, value: matches[4][:length-1]}, nil 121 } 122 if matches[5] != "" { 123 // Needs to be fixed up to compensate for the trailing \s in the match which prevents 124 // matching "ORother" as a "OR" "other". 125 length := len(matches[5]) 126 return &token{kind: kindOr, value: matches[5][:length-1]}, nil 127 } 128 if matches[6] != "" { 129 return &token{kind: kindDot, value: matches[6]}, nil 130 } 131 if matches[7] != "" { 132 return &token{kind: kindLParen, value: matches[7]}, nil 133 } 134 if matches[8] != "" { 135 return &token{kind: kindRParen, value: matches[8]}, nil 136 } 137 if matches[9] != "" { 138 return &token{kind: kindComma, value: matches[9]}, nil 139 } 140 if matches[10] != "" { 141 return &token{kind: kindString, value: matches[10]}, nil 142 } 143 if matches[11] != "" { 144 return &token{kind: kindText, value: matches[11]}, nil 145 } 146 return nil, fmt.Errorf("error: unhandled lexer regexp match %q", matches[0]) 147 } 148 149 // AST Nodes. These are based on the EBNF at https://google.aip.dev/assets/misc/ebnf-filtering.txt 150 // Note that the syntax for functions is not currently supported. 151 152 // Filter, possibly empty 153 type Filter struct { 154 Expression *Expression // Optional, may be nil. 155 } 156 157 func (v *Filter) String() string { 158 var s strings.Builder 159 s.WriteString("filter{") 160 if v.Expression != nil { 161 s.WriteString(v.Expression.String()) 162 } 163 s.WriteString("}") 164 return s.String() 165 } 166 167 // Expressions may either be a conjunction (AND) of sequences or a simple 168 // sequence. 169 // 170 // Note, the AND is case-sensitive. 171 // 172 // Example: `a b AND c AND d` 173 // 174 // The expression `(a b) AND c AND d` is equivalent to the example. 175 type Expression struct { 176 // Sequences are always joined by an AND operator 177 Sequences []*Sequence 178 } 179 180 func (v *Expression) String() string { 181 var s strings.Builder 182 s.WriteString("expression{") 183 for i, c := range v.Sequences { 184 if i > 0 { 185 s.WriteString(",") 186 } 187 if c != nil { 188 s.WriteString(c.String()) 189 } 190 } 191 s.WriteString("}") 192 return s.String() 193 } 194 195 // Sequence is composed of one or more whitespace (WS) separated factors. 196 // 197 // A sequence expresses a logical relationship between 'factors' where 198 // the ranking of a filter result may be scored according to the number 199 // factors that match and other such criteria as the proximity of factors 200 // to each other within a document. 201 // 202 // When filters are used with exact match semantics rather than fuzzy 203 // match semantics, a sequence is equivalent to AND. 204 // 205 // Example: `New York Giants OR Yankees` 206 // 207 // The expression `New York (Giants OR Yankees)` is equivalent to the 208 // example. 209 type Sequence struct { 210 // Factors are always joined by an (implicit) AND operator 211 Factors []*Factor 212 } 213 214 func (v *Sequence) String() string { 215 var s strings.Builder 216 s.WriteString("sequence{") 217 for i, c := range v.Factors { 218 if i > 0 { 219 s.WriteString(",") 220 } 221 if c != nil { 222 s.WriteString(c.String()) 223 } 224 } 225 s.WriteString("}") 226 return s.String() 227 } 228 229 // Factors may either be a disjunction (OR) of terms or a simple term. 230 // 231 // Note, the OR is case-sensitive. 232 // 233 // Example: `a < 10 OR a >= 100` 234 type Factor struct { 235 // Terms are always joined by an OR operator 236 Terms []*Term 237 } 238 239 func (v *Factor) String() string { 240 var s strings.Builder 241 s.WriteString("factor{") 242 for i, c := range v.Terms { 243 if i > 0 { 244 s.WriteString(",") 245 } 246 if c != nil { 247 s.WriteString(c.String()) 248 } 249 } 250 s.WriteString("}") 251 return s.String() 252 } 253 254 // Terms may either be unary or simple expressions. 255 // 256 // Unary expressions negate the simple expression, either mathematically `-` 257 // or logically `NOT`. The negation styles may be used interchangeably. 258 // 259 // Note, the `NOT` is case-sensitive and must be followed by at least one 260 // whitespace (WS). 261 // 262 // Examples: 263 // * logical not : `NOT (a OR b)` 264 // * alternative not : `-file:".java"` 265 // * negation : `-30` 266 type Term struct { 267 Negated bool 268 Simple *Simple 269 } 270 271 func (v *Term) String() string { 272 var s strings.Builder 273 s.WriteString("term{") 274 if v.Negated { 275 s.WriteString("-") 276 } 277 if v.Simple != nil { 278 s.WriteString(v.Simple.String()) 279 } 280 s.WriteString("}") 281 return s.String() 282 } 283 284 // Simple expressions may either be a restriction or a nested (composite) 285 // expression. 286 type Simple struct { 287 Restriction *Restriction 288 // Composite is a parenthesized expression, commonly used to group 289 // terms or clarify operator precedence. 290 // 291 // Example: `(msg.endsWith('world') AND retries < 10)` 292 Composite *Expression 293 } 294 295 func (v *Simple) String() string { 296 var s strings.Builder 297 s.WriteString("simple{") 298 if v.Restriction != nil { 299 s.WriteString(v.Restriction.String()) 300 } 301 if v.Restriction != nil && v.Composite != nil { 302 s.WriteString(",") 303 } 304 if v.Composite != nil { 305 s.WriteString(v.Composite.String()) 306 } 307 s.WriteString("}") 308 return s.String() 309 } 310 311 // Restrictions express a relationship between a comparable value and a 312 // single argument. When the restriction only specifies a comparable 313 // without an operator, this is a global restriction. 314 // 315 // Note, restrictions are not whitespace sensitive. 316 // 317 // Examples: 318 // * equality : `package=com.google` 319 // * inequality : `msg != 'hello'` 320 // * greater than : `1 > 0` 321 // * greater or equal : `2.5 >= 2.4` 322 // * less than : `yesterday < request.time` 323 // * less or equal : `experiment.rollout <= cohort(request.user)` 324 // * has : `map:key` 325 // * global : `prod` 326 // 327 // In addition to the global, equality, and ordering operators, filters 328 // also support the has (`:`) operator. The has operator is unique in 329 // that it can test for presence or value based on the proto3 type of 330 // the `comparable` value. The has operator is useful for validating the 331 // structure and contents of complex values. 332 type Restriction struct { 333 Comparable *Comparable 334 // Comparators supported by list filters: <=, <. >=, >, !=, =, : 335 Comparator string 336 Arg *Arg 337 } 338 339 func (v *Restriction) String() string { 340 var s strings.Builder 341 s.WriteString("restriction{") 342 if v.Comparable != nil { 343 s.WriteString(v.Comparable.String()) 344 } 345 if v.Comparator != "" { 346 s.WriteString(",") 347 s.WriteString(strconv.Quote(v.Comparator)) 348 } 349 if v.Arg != nil { 350 s.WriteString(",") 351 s.WriteString(v.Arg.String()) 352 } 353 s.WriteString("}") 354 return s.String() 355 } 356 357 type Arg struct { 358 Comparable *Comparable 359 // Composite is a parenthesized expression, commonly used to group 360 // terms or clarify operator precedence. 361 // 362 // Example: `(msg.endsWith('world') AND retries < 10)` 363 Composite *Expression 364 } 365 366 func (v *Arg) String() string { 367 var s strings.Builder 368 s.WriteString("arg{") 369 if v.Comparable != nil { 370 s.WriteString(v.Comparable.String()) 371 } 372 if v.Comparable != nil && v.Composite != nil { 373 s.WriteString(",") 374 } 375 if v.Composite != nil { 376 s.WriteString(v.Composite.String()) 377 } 378 s.WriteString("}") 379 return s.String() 380 } 381 382 // Comparable may either be a member or function. As functions are not currently supported, it is always a member. 383 type Comparable struct { 384 Member *Member 385 } 386 387 func (v *Comparable) String() string { 388 var s strings.Builder 389 s.WriteString("comparable{") 390 if v.Member != nil { 391 s.WriteString(v.Member.String()) 392 } 393 s.WriteString("}") 394 return s.String() 395 } 396 397 // Member expressions are either value or DOT qualified field references. 398 // 399 // Example: `expr.type_map.1.type` 400 type Member struct { 401 Value string 402 Fields []string 403 } 404 405 func (v *Member) String() string { 406 var s strings.Builder 407 s.WriteString("member{") 408 s.Write([]byte(strconv.Quote(v.Value))) 409 if len(v.Fields) > 0 { 410 s.WriteString(", {") 411 } 412 for i, c := range v.Fields { 413 if i > 0 { 414 s.WriteString(",") 415 } 416 s.WriteString(strconv.Quote(c)) 417 } 418 s.WriteString("}}") 419 return s.String() 420 } 421 422 // Parse an AIP-160 filter string into an AST. 423 func ParseFilter(filter string) (*Filter, error) { 424 return newParser(filter).filter() 425 } 426 427 type parser struct { 428 lexer filterLexer 429 } 430 431 func newParser(input string) *parser { 432 return &parser{lexer: *NewLexer(input)} 433 } 434 435 func (p *parser) expect(kind string) error { 436 t, err := p.lexer.Peek() 437 if err != nil { 438 return err 439 } 440 if t.kind != kind { 441 return fmt.Errorf("expected %s but got %s(%q)", kind, t.kind, t.value) 442 } 443 _, err = p.lexer.Next() 444 return err 445 } 446 447 func (p *parser) accept(kind string) (*token, error) { 448 t, err := p.lexer.Peek() 449 if err != nil { 450 return nil, err 451 } 452 if t.kind != kind { 453 return nil, nil 454 } 455 return p.lexer.Next() 456 } 457 458 func (p *parser) filter() (*Filter, error) { 459 t, err := p.accept(kindEnd) 460 if err != nil { 461 return nil, err 462 } 463 if t != nil { 464 return &Filter{}, nil 465 } 466 e, err := p.expression() 467 if err != nil { 468 return nil, err 469 } 470 return &Filter{Expression: e}, p.expect(kindEnd) 471 } 472 473 func (p *parser) expression() (*Expression, error) { 474 s, err := p.sequence() 475 if err != nil { 476 return nil, err 477 } 478 if s == nil { 479 return nil, nil 480 } 481 e := &Expression{} 482 e.Sequences = append(e.Sequences, s) 483 for { 484 and, err := p.accept(kindAnd) 485 if err != nil { 486 return nil, err 487 } 488 if and == nil { 489 break 490 } 491 s, err := p.sequence() 492 if err != nil { 493 return nil, err 494 } 495 if s == nil { 496 return nil, fmt.Errorf("expected sequence after AND") 497 } 498 e.Sequences = append(e.Sequences, s) 499 } 500 return e, nil 501 } 502 503 func (p *parser) sequence() (*Sequence, error) { 504 s := &Sequence{} 505 for { 506 f, err := p.factor() 507 if err != nil { 508 return nil, err 509 } 510 if f == nil { 511 break 512 } 513 s.Factors = append(s.Factors, f) 514 } 515 if len(s.Factors) == 0 { 516 return nil, nil 517 } 518 return s, nil 519 } 520 521 func (p *parser) factor() (*Factor, error) { 522 t, err := p.term() 523 if err != nil { 524 return nil, err 525 } 526 if t == nil { 527 return nil, nil 528 } 529 f := &Factor{} 530 f.Terms = append(f.Terms, t) 531 for { 532 or, err := p.accept(kindOr) 533 if err != nil { 534 return nil, err 535 } 536 if or == nil { 537 break 538 } 539 t, err := p.term() 540 if err != nil { 541 return nil, err 542 } 543 if t == nil { 544 return nil, fmt.Errorf("expected sequence after AND") 545 } 546 f.Terms = append(f.Terms, t) 547 } 548 return f, nil 549 } 550 551 func (p *parser) term() (*Term, error) { 552 n, err := p.accept(kindNegate) 553 if err != nil { 554 return nil, err 555 } 556 s, err := p.simple() 557 if err != nil { 558 return nil, err 559 } 560 if s == nil { 561 if n != nil { 562 return nil, fmt.Errorf("expected simple term after negation %q", n.value) 563 } 564 return nil, nil 565 } 566 return &Term{Negated: n != nil, Simple: s}, nil 567 } 568 569 func (p *parser) simple() (*Simple, error) { 570 r, err := p.restriction() 571 if err != nil { 572 return nil, err 573 } 574 if r != nil { 575 return &Simple{Restriction: r}, nil 576 } 577 c, err := p.composite() 578 if err != nil { 579 return nil, err 580 } 581 if c != nil { 582 return &Simple{Composite: c}, nil 583 } 584 return nil, nil 585 } 586 587 func (p *parser) restriction() (*Restriction, error) { 588 comparable, err := p.comparable() 589 if err != nil { 590 return nil, err 591 } 592 if comparable == nil { 593 return nil, nil 594 } 595 comparator, err := p.accept(kindComparator) 596 if err != nil { 597 return nil, err 598 } 599 if comparator == nil { 600 return &Restriction{Comparable: comparable}, nil 601 } 602 arg, err := p.arg() 603 if err != nil { 604 return nil, err 605 } 606 if arg == nil { 607 return nil, fmt.Errorf("expected arg after %s", comparator.value) 608 } 609 return &Restriction{Comparable: comparable, Comparator: comparator.value, Arg: arg}, nil 610 } 611 612 func (p *parser) comparable() (*Comparable, error) { 613 m, err := p.member() 614 if err != nil { 615 return nil, err 616 } 617 if m == nil { 618 return nil, nil 619 } 620 return &Comparable{Member: m}, nil 621 } 622 623 func (p *parser) member() (*Member, error) { 624 v, err := p.accept(kindString) 625 if err != nil { 626 return nil, err 627 } 628 if v != nil { 629 v.value, err = strconv.Unquote(v.value) 630 if err != nil { 631 return nil, fmt.Errorf("error unquoting string: %w", err) 632 } 633 return &Member{Value: v.value}, nil 634 } 635 636 v, err = p.accept(kindText) 637 if err != nil { 638 return nil, err 639 } 640 if v == nil { 641 return nil, nil 642 } 643 m := &Member{Value: v.value} 644 for { 645 dot, err := p.accept(kindDot) 646 if err != nil { 647 return nil, err 648 } 649 if dot == nil { 650 break 651 } 652 f, err := p.accept(kindText) 653 if err != nil { 654 return nil, err 655 } 656 if f == nil { 657 return nil, fmt.Errorf("expected field name after '.'") 658 } 659 m.Fields = append(m.Fields, f.value) 660 } 661 return m, nil 662 } 663 664 func (p *parser) composite() (*Expression, error) { 665 lparen, err := p.accept(kindLParen) 666 if err != nil { 667 return nil, err 668 } 669 if lparen == nil { 670 return nil, nil 671 } 672 e, err := p.expression() 673 if err != nil { 674 return nil, err 675 } 676 if e == nil { 677 return nil, fmt.Errorf("expected expression") 678 } 679 return e, p.expect(kindRParen) 680 } 681 682 func (p *parser) arg() (*Arg, error) { 683 comparable, err := p.comparable() 684 if err != nil { 685 return nil, err 686 } 687 if comparable != nil { 688 return &Arg{Comparable: comparable}, nil 689 } 690 composite, err := p.composite() 691 if err != nil { 692 return nil, err 693 } 694 if composite != nil { 695 return &Arg{Composite: composite}, nil 696 } 697 return nil, nil 698 }