go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/rules/lang/lang.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package lang parses failure association rule predicates. The predicate
    16  // syntax defined here is intended to be a subset of BigQuery Standard SQL's
    17  // Expression syntax, with the same semantics. This provides a few benefits:
    18  //   - Well-known and understood syntax and semantics.
    19  //   - Ability to leverage existing high-quality documentation to communicate
    20  //     language concepts to end-users.
    21  //   - Simplified debugging of LUCI Analysis (by allowing direct copy- paste of
    22  //     expressions into BigQuery to verify clustering is correct).
    23  //   - Possibility of using BigQuery as an execution engine in future.
    24  //
    25  // Rules permitted by this package look similar to:
    26  //
    27  //	reason LIKE "% exited with code 5 %" AND NOT
    28  //	  ( test = "arc.Boot" OR test = "arc.StartStop" )
    29  //
    30  // The grammar for the language in Extended Backus-Naur form follows. The
    31  // top-level production rule is BoolExpr.
    32  //
    33  // BoolExpr = BoolTerm , ( "OR" , BoolTerm )* ;
    34  // BoolTerm = BoolFactor , ( "AND" , BoolFactor )* ;
    35  // BoolFactor = [ "NOT" ] BoolPrimary ;
    36  // BoolPrimary = BoolItem | BoolPredicate ;
    37  // BoolItem = BoolConst | "(" , BoolExpr , ")" | BoolFunc ;
    38  // BoolConst = "TRUE" | "FALSE" ;
    39  // BoolFunc = Identifier , "(" , StringExpr , ( "," , StringExpr )* , ")" ;
    40  // BoolPredicate = StringExpr , BoolTest ;
    41  // BoolTest = CompPredicate | NegatablePredicate ;
    42  // CompPredicate = Operator , StringExpr ;
    43  // Operator = "!=" | "<>" | "="
    44  // NegatablePredicate = [ "NOT" ] , ( InPredicate | LikePredicate ) ;
    45  // InPredicate = "IN" , "(" , StringExpr , ( "," , StringExpr )* , ")" ;
    46  // LikePredicate = "LIKE" , String ;
    47  // StringExpr = String | Identifier ;
    48  //
    49  // Where:
    50  // - Identifier represents the production rule for identifiers.
    51  // - String is the production rule for a double-quoted string literal.
    52  // The precise definitions of which are omitted here but found in the
    53  // implementation.
    54  package lang
    55  
    56  import (
    57  	"bytes"
    58  	"fmt"
    59  	"io"
    60  	"regexp"
    61  	"strings"
    62  
    63  	participle "github.com/alecthomas/participle/v2"
    64  	"github.com/alecthomas/participle/v2/lexer"
    65  
    66  	"go.chromium.org/luci/common/errors"
    67  
    68  	"go.chromium.org/luci/analysis/internal/clustering"
    69  )
    70  
    71  type validator struct {
    72  	errors []error
    73  }
    74  
    75  func newValidator() *validator {
    76  	return &validator{}
    77  }
    78  
    79  // ReportError reports a validation error.
    80  func (v *validator) reportError(err error) {
    81  	v.errors = append(v.errors, err)
    82  }
    83  
    84  // Error returns all validation errors that were encountered.
    85  func (v *validator) error() error {
    86  	if len(v.errors) > 0 {
    87  		return errors.NewMultiError(v.errors...)
    88  	}
    89  	return nil
    90  }
    91  
    92  type failure *clustering.Failure
    93  type boolEval func(failure) bool
    94  type stringEval func(failure) string
    95  type predicateEval func(failure, string) bool
    96  
    97  // Expr represents a predicate for a failure association rule.
    98  type Expr struct {
    99  	expr *boolExpr
   100  	eval boolEval
   101  }
   102  
   103  // String returns the predicate as a string, with normalised formatting.
   104  func (e *Expr) String() string {
   105  	var buf bytes.Buffer
   106  	e.expr.format(&buf)
   107  	return buf.String()
   108  }
   109  
   110  // Evaluate evaluates the given expression, using the given values
   111  // for variables used in the expression.
   112  func (e *Expr) Evaluate(failure *clustering.Failure) bool {
   113  	return e.eval(failure)
   114  }
   115  
   116  type boolExpr struct {
   117  	Terms []*boolTerm `parser:"@@ ( 'OR' @@ )*"`
   118  }
   119  
   120  func (e *boolExpr) format(w io.Writer) {
   121  	for i, t := range e.Terms {
   122  		if i > 0 {
   123  			io.WriteString(w, " OR ")
   124  		}
   125  		t.format(w)
   126  	}
   127  }
   128  
   129  func (e *boolExpr) evaluator(v *validator) boolEval {
   130  	var termEvals []boolEval
   131  	for _, t := range e.Terms {
   132  		termEvals = append(termEvals, t.evaluator(v))
   133  	}
   134  	if len(termEvals) == 1 {
   135  		return termEvals[0]
   136  	}
   137  	return func(f failure) bool {
   138  		for _, termEval := range termEvals {
   139  			if termEval(f) {
   140  				return true
   141  			}
   142  		}
   143  		return false
   144  	}
   145  }
   146  
   147  type boolTerm struct {
   148  	Factors []*boolFactor `parser:"@@ ( 'AND' @@ )*"`
   149  }
   150  
   151  func (t *boolTerm) format(w io.Writer) {
   152  	for i, f := range t.Factors {
   153  		if i > 0 {
   154  			io.WriteString(w, " AND ")
   155  		}
   156  		f.format(w)
   157  	}
   158  }
   159  
   160  func (t *boolTerm) evaluator(v *validator) boolEval {
   161  	var factorEvals []boolEval
   162  	for _, f := range t.Factors {
   163  		factorEvals = append(factorEvals, f.evaluator(v))
   164  	}
   165  	if len(factorEvals) == 1 {
   166  		return factorEvals[0]
   167  	}
   168  	return func(f failure) bool {
   169  		for _, factorEval := range factorEvals {
   170  			if !factorEval(f) {
   171  				return false
   172  			}
   173  		}
   174  		return true
   175  	}
   176  }
   177  
   178  type boolFactor struct {
   179  	Not     bool         `parser:"( @'NOT' )?"`
   180  	Primary *boolPrimary `parser:"@@"`
   181  }
   182  
   183  func (f *boolFactor) format(w io.Writer) {
   184  	if f.Not {
   185  		io.WriteString(w, "NOT ")
   186  	}
   187  	f.Primary.format(w)
   188  }
   189  
   190  func (f *boolFactor) evaluator(v *validator) boolEval {
   191  	predicate := f.Primary.evaluator(v)
   192  	if f.Not {
   193  		return func(f failure) bool {
   194  			return !predicate(f)
   195  		}
   196  	}
   197  	return predicate
   198  }
   199  
   200  type boolPrimary struct {
   201  	Item *boolItem      `parser:"@@"`
   202  	Test *boolPredicate `parser:"| @@"`
   203  }
   204  
   205  func (p *boolPrimary) format(w io.Writer) {
   206  	if p.Item != nil {
   207  		p.Item.format(w)
   208  	}
   209  	if p.Test != nil {
   210  		p.Test.format(w)
   211  	}
   212  }
   213  
   214  func (p *boolPrimary) evaluator(v *validator) boolEval {
   215  	if p.Item != nil {
   216  		return p.Item.evaluator(v)
   217  	}
   218  	return p.Test.evaluator(v)
   219  }
   220  
   221  type boolItem struct {
   222  	Const *boolConst    `parser:"@@"`
   223  	Expr  *boolExpr     `parser:"| '(' @@ ')'"`
   224  	Func  *boolFunction `parser:"| @@"`
   225  }
   226  
   227  func (i *boolItem) format(w io.Writer) {
   228  	if i.Const != nil {
   229  		i.Const.format(w)
   230  	}
   231  	if i.Expr != nil {
   232  		io.WriteString(w, "(")
   233  		i.Expr.format(w)
   234  		io.WriteString(w, ")")
   235  	}
   236  	if i.Func != nil {
   237  		i.Func.format(w)
   238  	}
   239  }
   240  
   241  func (p *boolItem) evaluator(v *validator) boolEval {
   242  	if p.Const != nil {
   243  		return p.Const.evaluator(v)
   244  	}
   245  	if p.Expr != nil {
   246  		return p.Expr.evaluator(v)
   247  	}
   248  	if p.Func != nil {
   249  		return p.Func.evaluator(v)
   250  	}
   251  	return nil
   252  }
   253  
   254  type boolConst struct {
   255  	Value string `parser:"@( 'TRUE' | 'FALSE' )"`
   256  }
   257  
   258  func (c *boolConst) format(w io.Writer) {
   259  	io.WriteString(w, c.Value)
   260  }
   261  
   262  func (c *boolConst) evaluator(v *validator) boolEval {
   263  	value := c.Value == "TRUE"
   264  	return func(f failure) bool {
   265  		return value
   266  	}
   267  }
   268  
   269  type boolFunction struct {
   270  	Function string        `parser:"@Ident"`
   271  	Args     []*stringExpr `parser:"'(' @@ ( ',' @@ )* ')'"`
   272  }
   273  
   274  func (f *boolFunction) format(w io.Writer) {
   275  	io.WriteString(w, f.Function)
   276  	io.WriteString(w, "(")
   277  	for i, arg := range f.Args {
   278  		if i > 0 {
   279  			io.WriteString(w, ", ")
   280  		}
   281  		arg.format(w)
   282  	}
   283  	io.WriteString(w, ")")
   284  }
   285  
   286  func (f *boolFunction) evaluator(v *validator) boolEval {
   287  	switch strings.ToLower(f.Function) {
   288  	case "regexp_contains":
   289  		if len(f.Args) != 2 {
   290  			v.reportError(fmt.Errorf("invalid number of arguments to REGEXP_CONTAINS: got %v, want 2", len(f.Args)))
   291  			return nil
   292  		}
   293  		valueEval := f.Args[0].evaluator(v)
   294  		pattern, ok := f.Args[1].asConstant(v)
   295  		if !ok {
   296  			// For efficiency reasons, we require the second argument to be a
   297  			// constant so that we can pre-compile the regular expression.
   298  			v.reportError(fmt.Errorf("expected second argument to REGEXP_CONTAINS to be a constant pattern"))
   299  			return nil
   300  		}
   301  		re, err := regexp.Compile(pattern)
   302  		if err != nil {
   303  			v.reportError(fmt.Errorf("invalid regular expression %q", pattern))
   304  			return nil
   305  		}
   306  
   307  		return func(f failure) bool {
   308  			value := valueEval(f)
   309  			return re.MatchString(value)
   310  		}
   311  	default:
   312  		v.reportError(fmt.Errorf("undefined function: %q", f.Function))
   313  		return nil
   314  	}
   315  }
   316  
   317  type boolPredicate struct {
   318  	Value *stringExpr `parser:"@@"`
   319  	Test  *boolTest   `parser:"@@"`
   320  }
   321  
   322  func (t *boolPredicate) format(w io.Writer) {
   323  	t.Value.format(w)
   324  	t.Test.format(w)
   325  }
   326  
   327  func (t *boolPredicate) evaluator(v *validator) boolEval {
   328  	value := t.Value.evaluator(v)
   329  	test := t.Test.evaluator(v)
   330  	return func(f failure) bool {
   331  		return test(f, value(f))
   332  	}
   333  }
   334  
   335  type boolTest struct {
   336  	Comp      *compPredicate      `parser:"@@"`
   337  	Negatable *negatablePredicate `parser:"| @@"`
   338  }
   339  
   340  func (t *boolTest) format(w io.Writer) {
   341  	if t.Comp != nil {
   342  		t.Comp.format(w)
   343  	}
   344  	if t.Negatable != nil {
   345  		t.Negatable.format(w)
   346  	}
   347  }
   348  
   349  func (t *boolTest) evaluator(v *validator) predicateEval {
   350  	if t.Comp != nil {
   351  		return t.Comp.evaluator(v)
   352  	}
   353  	return t.Negatable.evaluator(v)
   354  }
   355  
   356  type negatablePredicate struct {
   357  	Not  bool           `parser:"( @'NOT' )?"`
   358  	In   *inPredicate   `parser:"( @@"`
   359  	Like *likePredicate `parser:"| @@ )"`
   360  }
   361  
   362  func (p *negatablePredicate) format(w io.Writer) {
   363  	if p.Not {
   364  		io.WriteString(w, " NOT")
   365  	}
   366  	if p.In != nil {
   367  		p.In.format(w)
   368  	}
   369  	if p.Like != nil {
   370  		p.Like.format(w)
   371  	}
   372  }
   373  
   374  func (p *negatablePredicate) evaluator(v *validator) predicateEval {
   375  	var predicate predicateEval
   376  	if p.In != nil {
   377  		predicate = p.In.evaluator(v)
   378  	}
   379  	if p.Like != nil {
   380  		predicate = p.Like.evaluator(v)
   381  	}
   382  	if p.Not {
   383  		return func(f failure, s string) bool {
   384  			return !predicate(f, s)
   385  		}
   386  	}
   387  	return predicate
   388  }
   389  
   390  type compPredicate struct {
   391  	Op    string      `parser:"@( '=' | '!=' | '<>' )"`
   392  	Value *stringExpr `parser:"@@"`
   393  }
   394  
   395  func (p *compPredicate) format(w io.Writer) {
   396  	fmt.Fprintf(w, " %s ", p.Op)
   397  	p.Value.format(w)
   398  }
   399  
   400  func (p *compPredicate) evaluator(v *validator) predicateEval {
   401  	val := p.Value.evaluator(v)
   402  	switch p.Op {
   403  	case "=":
   404  		return func(f failure, s string) bool {
   405  			return s == val(f)
   406  		}
   407  	case "!=", "<>":
   408  		return func(f failure, s string) bool {
   409  			return s != val(f)
   410  		}
   411  	default:
   412  		panic("invalid op")
   413  	}
   414  }
   415  
   416  type inPredicate struct {
   417  	List []*stringExpr `parser:"'IN' '(' @@ ( ',' @@ )* ')'"`
   418  }
   419  
   420  func (p *inPredicate) format(w io.Writer) {
   421  	io.WriteString(w, " IN (")
   422  	for i, v := range p.List {
   423  		if i > 0 {
   424  			io.WriteString(w, ", ")
   425  		}
   426  		v.format(w)
   427  	}
   428  	io.WriteString(w, ")")
   429  }
   430  
   431  func (p *inPredicate) evaluator(v *validator) predicateEval {
   432  	var list []stringEval
   433  	for _, item := range p.List {
   434  		list = append(list, item.evaluator(v))
   435  	}
   436  	return func(f failure, s string) bool {
   437  		for _, item := range list {
   438  			if item(f) == s {
   439  				return true
   440  			}
   441  		}
   442  		return false
   443  	}
   444  }
   445  
   446  type likePredicate struct {
   447  	Pattern *string `parser:"'LIKE' @String"`
   448  }
   449  
   450  func (p *likePredicate) format(w io.Writer) {
   451  	io.WriteString(w, " LIKE ")
   452  	io.WriteString(w, *p.Pattern)
   453  }
   454  
   455  func (p *likePredicate) evaluator(v *validator) predicateEval {
   456  	likePattern, err := unescapeStringLiteral(*p.Pattern)
   457  	if err != nil {
   458  		v.reportError(err)
   459  		return nil
   460  	}
   461  
   462  	// Rewrite the LIKE syntax in terms of a regular expression syntax.
   463  	regexpPattern, err := likePatternToRegexp(likePattern)
   464  	if err != nil {
   465  		v.reportError(err)
   466  		return nil
   467  	}
   468  
   469  	re, err := regexp.Compile(regexpPattern)
   470  	if err != nil {
   471  		v.reportError(fmt.Errorf("invalid LIKE expression: %s", likePattern))
   472  		return nil
   473  	}
   474  	return func(f failure, s string) bool {
   475  		return re.MatchString(s)
   476  	}
   477  }
   478  
   479  type stringExpr struct {
   480  	Literal *string `parser:"@String"`
   481  	Ident   *string `parser:"| @Ident"`
   482  }
   483  
   484  func (e *stringExpr) format(w io.Writer) {
   485  	if e.Literal != nil {
   486  		io.WriteString(w, *e.Literal)
   487  	}
   488  	if e.Ident != nil {
   489  		io.WriteString(w, *e.Ident)
   490  	}
   491  }
   492  
   493  // asConstant attempts to evaluate stringExpr as a compile-time constant.
   494  // Returns the string value (assuming it is valid and constant) and
   495  // whether it is a constant.
   496  func (e *stringExpr) asConstant(v *validator) (value string, ok bool) {
   497  	if e.Literal != nil {
   498  		literal, err := unescapeStringLiteral(*e.Literal)
   499  		if err != nil {
   500  			v.reportError(err)
   501  			return "", true
   502  		}
   503  		return literal, true
   504  	}
   505  	return "", false
   506  }
   507  
   508  func (e *stringExpr) evaluator(v *validator) stringEval {
   509  	if e.Literal != nil {
   510  		literal, err := unescapeStringLiteral(*e.Literal)
   511  		if err != nil {
   512  			v.reportError(err)
   513  			return nil
   514  		}
   515  		return func(f failure) string { return literal }
   516  	}
   517  	if e.Ident != nil {
   518  		varName := *e.Ident
   519  		var accessor func(c *clustering.Failure) string
   520  		switch varName {
   521  		case "test":
   522  			accessor = func(f *clustering.Failure) string {
   523  				return f.TestID
   524  			}
   525  		case "reason":
   526  			accessor = func(f *clustering.Failure) string {
   527  				return f.Reason.GetPrimaryErrorMessage()
   528  			}
   529  		default:
   530  			v.reportError(fmt.Errorf("undeclared identifier %q", varName))
   531  		}
   532  		return func(f failure) string { return accessor(f) }
   533  	}
   534  	return nil
   535  }
   536  
   537  var (
   538  	lex = lexer.MustSimple([]lexer.SimpleRule{
   539  		{Name: "whitespace", Pattern: `\s+`},
   540  		{Name: "Keyword", Pattern: `(?i)(TRUE|FALSE|AND|OR|NOT|LIKE|IN)\b`},
   541  		{Name: "Ident", Pattern: `([a-zA-Z_][a-zA-Z0-9_]*)\b`},
   542  		{Name: "String", Pattern: stringLiteralPattern},
   543  		{Name: "Operators", Pattern: `!=|<>|[,()=]`},
   544  	})
   545  
   546  	parser = participle.MustBuild[boolExpr](
   547  		participle.Lexer(lex),
   548  		participle.Upper("Keyword"),
   549  		participle.Map(lowerMapper, "Ident"),
   550  		participle.CaseInsensitive("Keyword"))
   551  )
   552  
   553  func lowerMapper(token lexer.Token) (lexer.Token, error) {
   554  	token.Value = strings.ToLower(token.Value)
   555  	return token, nil
   556  }
   557  
   558  // Parse parses a failure association rule from the specified text.
   559  // idents is the set of identifiers that are recognised by the application.
   560  func Parse(text string) (*Expr, error) {
   561  	expr, err := parser.ParseString("", text)
   562  	if err != nil {
   563  		return nil, errors.Annotate(err, "syntax error").Err()
   564  	}
   565  
   566  	v := newValidator()
   567  	eval := expr.evaluator(v)
   568  	if err := v.error(); err != nil {
   569  		return nil, err
   570  	}
   571  	return &Expr{
   572  		expr: expr,
   573  		eval: eval,
   574  	}, nil
   575  }