go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/aip/filter_parser.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package aip contains utilities used to comply with API Improvement
    16  // Proposals (AIPs) from https://google.aip.dev/. This includes
    17  // an AIP-160 filter parser and AIP-132 order by clause parser.
    18  package aip
    19  
    20  // This file contains a lexer and parser for AIP-160 filter expressions.
    21  // The EBNF is at https://google.aip.dev/assets/misc/ebnf-filtering.txt
    22  // The function call syntax is not supported which simplifies the parser.
    23  //
    24  // Implemented EBNF (in terms of lexer tokens):
    25  // filter: [expression];
    26  // expression: sequence {WS AND WS sequence};
    27  // sequence: factor {WS factor};
    28  // factor: term {WS OR WS term};
    29  // term: [NEGATE] simple;
    30  // simple: restriction | composite;
    31  // restriction: comparable [COMPARATOR arg];
    32  // comparable: member;
    33  // member: (TEXT | STRING) {DOT TEXT};
    34  // composite: LPAREN expression RPAREN;
    35  // arg: comparable | composite;
    36  //
    37  // TODO(mwarton): Redo whitespace handling.  There are still some cases (like "- 30")
    38  // 				  which are accepted as valid instead of being rejected.
    39  import (
    40  	"fmt"
    41  	"regexp"
    42  	"strconv"
    43  	"strings"
    44  )
    45  
    46  const (
    47  	kindComparator = "COMPARATOR"
    48  	kindNegate     = "NEGATE"
    49  	kindAnd        = "AND"
    50  	kindOr         = "OR"
    51  	kindDot        = "DOT"
    52  	kindLParen     = "LPAREN"
    53  	kindRParen     = "RPAREN"
    54  	kindComma      = "COMMA"
    55  	kindString     = "STRING"
    56  	kindText       = "TEXT"
    57  	kindEnd        = "END"
    58  )
    59  
    60  // lexerRegexp has one group for each kind of token that can be lexed, in the order of the kind consts above. There are two cases for kindNegate to handle whitespace correctly.
    61  var lexerRegexp = regexp.MustCompile(`^(<=|>=|!=|<|>|=|\:)|(NOT\s)|(-)|(AND\s)|(OR\s)|(\.)|(\()|(\))|(,)|("(?:[^"\\]|\\.)*")|([^\s\.,<>=!:\(\)]+)`)
    62  
    63  type token struct {
    64  	kind  string
    65  	value string
    66  }
    67  
    68  type filterLexer struct {
    69  	input string
    70  	next  *token
    71  }
    72  
    73  func NewLexer(input string) *filterLexer {
    74  	return &filterLexer{input: input}
    75  }
    76  
    77  func (l *filterLexer) Peek() (*token, error) {
    78  	if l.next == nil {
    79  		var err error
    80  		l.next, err = l.Next()
    81  		if err != nil {
    82  			return nil, err
    83  		}
    84  	}
    85  	return l.next, nil
    86  }
    87  
    88  func (l *filterLexer) Next() (*token, error) {
    89  	if l.next != nil {
    90  		next := l.next
    91  		l.next = nil
    92  		return next, nil
    93  	}
    94  	l.next = nil
    95  	l.input = strings.TrimLeft(l.input, " \t\r\n")
    96  	if l.input == "" {
    97  		return &token{kind: kindEnd}, nil
    98  	}
    99  	matches := lexerRegexp.FindStringSubmatch(l.input)
   100  	if matches == nil {
   101  		return nil, fmt.Errorf("error: unable to lex token from %q", l.input)
   102  	}
   103  	l.input = l.input[len(matches[0]):]
   104  	if matches[1] != "" {
   105  		return &token{kind: kindComparator, value: matches[1]}, nil
   106  	}
   107  	if matches[2] != "" {
   108  		// Needs to be fixed up to compensate for the trailing \s in the match which prevents
   109  		// matching "NOTother" as a negated "other".
   110  		length := len(matches[2])
   111  		return &token{kind: kindNegate, value: matches[2][:length-1]}, nil
   112  	}
   113  	if matches[3] != "" {
   114  		return &token{kind: kindNegate, value: matches[3]}, nil
   115  	}
   116  	if matches[4] != "" {
   117  		// Needs to be fixed up to compensate for the trailing \s in the match which prevents
   118  		// matching "ANDother" as a "AND" "other".
   119  		length := len(matches[4])
   120  		return &token{kind: kindAnd, value: matches[4][:length-1]}, nil
   121  	}
   122  	if matches[5] != "" {
   123  		// Needs to be fixed up to compensate for the trailing \s in the match which prevents
   124  		// matching "ORother" as a "OR" "other".
   125  		length := len(matches[5])
   126  		return &token{kind: kindOr, value: matches[5][:length-1]}, nil
   127  	}
   128  	if matches[6] != "" {
   129  		return &token{kind: kindDot, value: matches[6]}, nil
   130  	}
   131  	if matches[7] != "" {
   132  		return &token{kind: kindLParen, value: matches[7]}, nil
   133  	}
   134  	if matches[8] != "" {
   135  		return &token{kind: kindRParen, value: matches[8]}, nil
   136  	}
   137  	if matches[9] != "" {
   138  		return &token{kind: kindComma, value: matches[9]}, nil
   139  	}
   140  	if matches[10] != "" {
   141  		return &token{kind: kindString, value: matches[10]}, nil
   142  	}
   143  	if matches[11] != "" {
   144  		return &token{kind: kindText, value: matches[11]}, nil
   145  	}
   146  	return nil, fmt.Errorf("error: unhandled lexer regexp match %q", matches[0])
   147  }
   148  
   149  // AST Nodes.  These are based on the EBNF at https://google.aip.dev/assets/misc/ebnf-filtering.txt
   150  // Note that the syntax for functions is not currently supported.
   151  
   152  // Filter, possibly empty
   153  type Filter struct {
   154  	Expression *Expression // Optional, may be nil.
   155  }
   156  
   157  func (v *Filter) String() string {
   158  	var s strings.Builder
   159  	s.WriteString("filter{")
   160  	if v.Expression != nil {
   161  		s.WriteString(v.Expression.String())
   162  	}
   163  	s.WriteString("}")
   164  	return s.String()
   165  }
   166  
   167  // Expressions may either be a conjunction (AND) of sequences or a simple
   168  // sequence.
   169  //
   170  // Note, the AND is case-sensitive.
   171  //
   172  // Example: `a b AND c AND d`
   173  //
   174  // The expression `(a b) AND c AND d` is equivalent to the example.
   175  type Expression struct {
   176  	// Sequences are always joined by an AND operator
   177  	Sequences []*Sequence
   178  }
   179  
   180  func (v *Expression) String() string {
   181  	var s strings.Builder
   182  	s.WriteString("expression{")
   183  	for i, c := range v.Sequences {
   184  		if i > 0 {
   185  			s.WriteString(",")
   186  		}
   187  		if c != nil {
   188  			s.WriteString(c.String())
   189  		}
   190  	}
   191  	s.WriteString("}")
   192  	return s.String()
   193  }
   194  
   195  // Sequence is composed of one or more whitespace (WS) separated factors.
   196  //
   197  // A sequence expresses a logical relationship between 'factors' where
   198  // the ranking of a filter result may be scored according to the number
   199  // factors that match and other such criteria as the proximity of factors
   200  // to each other within a document.
   201  //
   202  // When filters are used with exact match semantics rather than fuzzy
   203  // match semantics, a sequence is equivalent to AND.
   204  //
   205  // Example: `New York Giants OR Yankees`
   206  //
   207  // The expression `New York (Giants OR Yankees)` is equivalent to the
   208  // example.
   209  type Sequence struct {
   210  	// Factors are always joined by an (implicit) AND operator
   211  	Factors []*Factor
   212  }
   213  
   214  func (v *Sequence) String() string {
   215  	var s strings.Builder
   216  	s.WriteString("sequence{")
   217  	for i, c := range v.Factors {
   218  		if i > 0 {
   219  			s.WriteString(",")
   220  		}
   221  		if c != nil {
   222  			s.WriteString(c.String())
   223  		}
   224  	}
   225  	s.WriteString("}")
   226  	return s.String()
   227  }
   228  
   229  // Factors may either be a disjunction (OR) of terms or a simple term.
   230  //
   231  // Note, the OR is case-sensitive.
   232  //
   233  // Example: `a < 10 OR a >= 100`
   234  type Factor struct {
   235  	// Terms are always joined by an OR operator
   236  	Terms []*Term
   237  }
   238  
   239  func (v *Factor) String() string {
   240  	var s strings.Builder
   241  	s.WriteString("factor{")
   242  	for i, c := range v.Terms {
   243  		if i > 0 {
   244  			s.WriteString(",")
   245  		}
   246  		if c != nil {
   247  			s.WriteString(c.String())
   248  		}
   249  	}
   250  	s.WriteString("}")
   251  	return s.String()
   252  }
   253  
   254  // Terms may either be unary or simple expressions.
   255  //
   256  // Unary expressions negate the simple expression, either mathematically `-`
   257  // or logically `NOT`. The negation styles may be used interchangeably.
   258  //
   259  // Note, the `NOT` is case-sensitive and must be followed by at least one
   260  // whitespace (WS).
   261  //
   262  // Examples:
   263  // * logical not     : `NOT (a OR b)`
   264  // * alternative not : `-file:".java"`
   265  // * negation        : `-30`
   266  type Term struct {
   267  	Negated bool
   268  	Simple  *Simple
   269  }
   270  
   271  func (v *Term) String() string {
   272  	var s strings.Builder
   273  	s.WriteString("term{")
   274  	if v.Negated {
   275  		s.WriteString("-")
   276  	}
   277  	if v.Simple != nil {
   278  		s.WriteString(v.Simple.String())
   279  	}
   280  	s.WriteString("}")
   281  	return s.String()
   282  }
   283  
   284  // Simple expressions may either be a restriction or a nested (composite)
   285  // expression.
   286  type Simple struct {
   287  	Restriction *Restriction
   288  	// Composite is a parenthesized expression, commonly used to group
   289  	// terms or clarify operator precedence.
   290  	//
   291  	// Example: `(msg.endsWith('world') AND retries < 10)`
   292  	Composite *Expression
   293  }
   294  
   295  func (v *Simple) String() string {
   296  	var s strings.Builder
   297  	s.WriteString("simple{")
   298  	if v.Restriction != nil {
   299  		s.WriteString(v.Restriction.String())
   300  	}
   301  	if v.Restriction != nil && v.Composite != nil {
   302  		s.WriteString(",")
   303  	}
   304  	if v.Composite != nil {
   305  		s.WriteString(v.Composite.String())
   306  	}
   307  	s.WriteString("}")
   308  	return s.String()
   309  }
   310  
   311  // Restrictions express a relationship between a comparable value and a
   312  // single argument. When the restriction only specifies a comparable
   313  // without an operator, this is a global restriction.
   314  //
   315  // Note, restrictions are not whitespace sensitive.
   316  //
   317  // Examples:
   318  // * equality         : `package=com.google`
   319  // * inequality       : `msg != 'hello'`
   320  // * greater than     : `1 > 0`
   321  // * greater or equal : `2.5 >= 2.4`
   322  // * less than        : `yesterday < request.time`
   323  // * less or equal    : `experiment.rollout <= cohort(request.user)`
   324  // * has              : `map:key`
   325  // * global           : `prod`
   326  //
   327  // In addition to the global, equality, and ordering operators, filters
   328  // also support the has (`:`) operator. The has operator is unique in
   329  // that it can test for presence or value based on the proto3 type of
   330  // the `comparable` value. The has operator is useful for validating the
   331  // structure and contents of complex values.
   332  type Restriction struct {
   333  	Comparable *Comparable
   334  	// Comparators supported by list filters: <=, <. >=, >, !=, =, :
   335  	Comparator string
   336  	Arg        *Arg
   337  }
   338  
   339  func (v *Restriction) String() string {
   340  	var s strings.Builder
   341  	s.WriteString("restriction{")
   342  	if v.Comparable != nil {
   343  		s.WriteString(v.Comparable.String())
   344  	}
   345  	if v.Comparator != "" {
   346  		s.WriteString(",")
   347  		s.WriteString(strconv.Quote(v.Comparator))
   348  	}
   349  	if v.Arg != nil {
   350  		s.WriteString(",")
   351  		s.WriteString(v.Arg.String())
   352  	}
   353  	s.WriteString("}")
   354  	return s.String()
   355  }
   356  
   357  type Arg struct {
   358  	Comparable *Comparable
   359  	// Composite is a parenthesized expression, commonly used to group
   360  	// terms or clarify operator precedence.
   361  	//
   362  	// Example: `(msg.endsWith('world') AND retries < 10)`
   363  	Composite *Expression
   364  }
   365  
   366  func (v *Arg) String() string {
   367  	var s strings.Builder
   368  	s.WriteString("arg{")
   369  	if v.Comparable != nil {
   370  		s.WriteString(v.Comparable.String())
   371  	}
   372  	if v.Comparable != nil && v.Composite != nil {
   373  		s.WriteString(",")
   374  	}
   375  	if v.Composite != nil {
   376  		s.WriteString(v.Composite.String())
   377  	}
   378  	s.WriteString("}")
   379  	return s.String()
   380  }
   381  
   382  // Comparable may either be a member or function.  As functions are not currently supported, it is always a member.
   383  type Comparable struct {
   384  	Member *Member
   385  }
   386  
   387  func (v *Comparable) String() string {
   388  	var s strings.Builder
   389  	s.WriteString("comparable{")
   390  	if v.Member != nil {
   391  		s.WriteString(v.Member.String())
   392  	}
   393  	s.WriteString("}")
   394  	return s.String()
   395  }
   396  
   397  // Member expressions are either value or DOT qualified field references.
   398  //
   399  // Example: `expr.type_map.1.type`
   400  type Member struct {
   401  	Value  string
   402  	Fields []string
   403  }
   404  
   405  func (v *Member) String() string {
   406  	var s strings.Builder
   407  	s.WriteString("member{")
   408  	s.Write([]byte(strconv.Quote(v.Value)))
   409  	if len(v.Fields) > 0 {
   410  		s.WriteString(", {")
   411  	}
   412  	for i, c := range v.Fields {
   413  		if i > 0 {
   414  			s.WriteString(",")
   415  		}
   416  		s.WriteString(strconv.Quote(c))
   417  	}
   418  	s.WriteString("}}")
   419  	return s.String()
   420  }
   421  
   422  // Parse an AIP-160 filter string into an AST.
   423  func ParseFilter(filter string) (*Filter, error) {
   424  	return newParser(filter).filter()
   425  }
   426  
   427  type parser struct {
   428  	lexer filterLexer
   429  }
   430  
   431  func newParser(input string) *parser {
   432  	return &parser{lexer: *NewLexer(input)}
   433  }
   434  
   435  func (p *parser) expect(kind string) error {
   436  	t, err := p.lexer.Peek()
   437  	if err != nil {
   438  		return err
   439  	}
   440  	if t.kind != kind {
   441  		return fmt.Errorf("expected %s but got %s(%q)", kind, t.kind, t.value)
   442  	}
   443  	_, err = p.lexer.Next()
   444  	return err
   445  }
   446  
   447  func (p *parser) accept(kind string) (*token, error) {
   448  	t, err := p.lexer.Peek()
   449  	if err != nil {
   450  		return nil, err
   451  	}
   452  	if t.kind != kind {
   453  		return nil, nil
   454  	}
   455  	return p.lexer.Next()
   456  }
   457  
   458  func (p *parser) filter() (*Filter, error) {
   459  	t, err := p.accept(kindEnd)
   460  	if err != nil {
   461  		return nil, err
   462  	}
   463  	if t != nil {
   464  		return &Filter{}, nil
   465  	}
   466  	e, err := p.expression()
   467  	if err != nil {
   468  		return nil, err
   469  	}
   470  	return &Filter{Expression: e}, p.expect(kindEnd)
   471  }
   472  
   473  func (p *parser) expression() (*Expression, error) {
   474  	s, err := p.sequence()
   475  	if err != nil {
   476  		return nil, err
   477  	}
   478  	if s == nil {
   479  		return nil, nil
   480  	}
   481  	e := &Expression{}
   482  	e.Sequences = append(e.Sequences, s)
   483  	for {
   484  		and, err := p.accept(kindAnd)
   485  		if err != nil {
   486  			return nil, err
   487  		}
   488  		if and == nil {
   489  			break
   490  		}
   491  		s, err := p.sequence()
   492  		if err != nil {
   493  			return nil, err
   494  		}
   495  		if s == nil {
   496  			return nil, fmt.Errorf("expected sequence after AND")
   497  		}
   498  		e.Sequences = append(e.Sequences, s)
   499  	}
   500  	return e, nil
   501  }
   502  
   503  func (p *parser) sequence() (*Sequence, error) {
   504  	s := &Sequence{}
   505  	for {
   506  		f, err := p.factor()
   507  		if err != nil {
   508  			return nil, err
   509  		}
   510  		if f == nil {
   511  			break
   512  		}
   513  		s.Factors = append(s.Factors, f)
   514  	}
   515  	if len(s.Factors) == 0 {
   516  		return nil, nil
   517  	}
   518  	return s, nil
   519  }
   520  
   521  func (p *parser) factor() (*Factor, error) {
   522  	t, err := p.term()
   523  	if err != nil {
   524  		return nil, err
   525  	}
   526  	if t == nil {
   527  		return nil, nil
   528  	}
   529  	f := &Factor{}
   530  	f.Terms = append(f.Terms, t)
   531  	for {
   532  		or, err := p.accept(kindOr)
   533  		if err != nil {
   534  			return nil, err
   535  		}
   536  		if or == nil {
   537  			break
   538  		}
   539  		t, err := p.term()
   540  		if err != nil {
   541  			return nil, err
   542  		}
   543  		if t == nil {
   544  			return nil, fmt.Errorf("expected sequence after AND")
   545  		}
   546  		f.Terms = append(f.Terms, t)
   547  	}
   548  	return f, nil
   549  }
   550  
   551  func (p *parser) term() (*Term, error) {
   552  	n, err := p.accept(kindNegate)
   553  	if err != nil {
   554  		return nil, err
   555  	}
   556  	s, err := p.simple()
   557  	if err != nil {
   558  		return nil, err
   559  	}
   560  	if s == nil {
   561  		if n != nil {
   562  			return nil, fmt.Errorf("expected simple term after negation %q", n.value)
   563  		}
   564  		return nil, nil
   565  	}
   566  	return &Term{Negated: n != nil, Simple: s}, nil
   567  }
   568  
   569  func (p *parser) simple() (*Simple, error) {
   570  	r, err := p.restriction()
   571  	if err != nil {
   572  		return nil, err
   573  	}
   574  	if r != nil {
   575  		return &Simple{Restriction: r}, nil
   576  	}
   577  	c, err := p.composite()
   578  	if err != nil {
   579  		return nil, err
   580  	}
   581  	if c != nil {
   582  		return &Simple{Composite: c}, nil
   583  	}
   584  	return nil, nil
   585  }
   586  
   587  func (p *parser) restriction() (*Restriction, error) {
   588  	comparable, err := p.comparable()
   589  	if err != nil {
   590  		return nil, err
   591  	}
   592  	if comparable == nil {
   593  		return nil, nil
   594  	}
   595  	comparator, err := p.accept(kindComparator)
   596  	if err != nil {
   597  		return nil, err
   598  	}
   599  	if comparator == nil {
   600  		return &Restriction{Comparable: comparable}, nil
   601  	}
   602  	arg, err := p.arg()
   603  	if err != nil {
   604  		return nil, err
   605  	}
   606  	if arg == nil {
   607  		return nil, fmt.Errorf("expected arg after %s", comparator.value)
   608  	}
   609  	return &Restriction{Comparable: comparable, Comparator: comparator.value, Arg: arg}, nil
   610  }
   611  
   612  func (p *parser) comparable() (*Comparable, error) {
   613  	m, err := p.member()
   614  	if err != nil {
   615  		return nil, err
   616  	}
   617  	if m == nil {
   618  		return nil, nil
   619  	}
   620  	return &Comparable{Member: m}, nil
   621  }
   622  
   623  func (p *parser) member() (*Member, error) {
   624  	v, err := p.accept(kindString)
   625  	if err != nil {
   626  		return nil, err
   627  	}
   628  	if v != nil {
   629  		v.value, err = strconv.Unquote(v.value)
   630  		if err != nil {
   631  			return nil, fmt.Errorf("error unquoting string: %w", err)
   632  		}
   633  		return &Member{Value: v.value}, nil
   634  	}
   635  
   636  	v, err = p.accept(kindText)
   637  	if err != nil {
   638  		return nil, err
   639  	}
   640  	if v == nil {
   641  		return nil, nil
   642  	}
   643  	m := &Member{Value: v.value}
   644  	for {
   645  		dot, err := p.accept(kindDot)
   646  		if err != nil {
   647  			return nil, err
   648  		}
   649  		if dot == nil {
   650  			break
   651  		}
   652  		f, err := p.accept(kindText)
   653  		if err != nil {
   654  			return nil, err
   655  		}
   656  		if f == nil {
   657  			return nil, fmt.Errorf("expected field name after '.'")
   658  		}
   659  		m.Fields = append(m.Fields, f.value)
   660  	}
   661  	return m, nil
   662  }
   663  
   664  func (p *parser) composite() (*Expression, error) {
   665  	lparen, err := p.accept(kindLParen)
   666  	if err != nil {
   667  		return nil, err
   668  	}
   669  	if lparen == nil {
   670  		return nil, nil
   671  	}
   672  	e, err := p.expression()
   673  	if err != nil {
   674  		return nil, err
   675  	}
   676  	if e == nil {
   677  		return nil, fmt.Errorf("expected expression")
   678  	}
   679  	return e, p.expect(kindRParen)
   680  }
   681  
   682  func (p *parser) arg() (*Arg, error) {
   683  	comparable, err := p.comparable()
   684  	if err != nil {
   685  		return nil, err
   686  	}
   687  	if comparable != nil {
   688  		return &Arg{Comparable: comparable}, nil
   689  	}
   690  	composite, err := p.composite()
   691  	if err != nil {
   692  		return nil, err
   693  	}
   694  	if composite != nil {
   695  		return &Arg{Composite: composite}, nil
   696  	}
   697  	return nil, nil
   698  }