go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/rules/lang/lang.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package lang parses failure association rule predicates. The predicate 16 // syntax defined here is intended to be a subset of BigQuery Standard SQL's 17 // Expression syntax, with the same semantics. This provides a few benefits: 18 // - Well-known and understood syntax and semantics. 19 // - Ability to leverage existing high-quality documentation to communicate 20 // language concepts to end-users. 21 // - Simplified debugging of LUCI Analysis (by allowing direct copy- paste of 22 // expressions into BigQuery to verify clustering is correct). 23 // - Possibility of using BigQuery as an execution engine in future. 24 // 25 // Rules permitted by this package look similar to: 26 // 27 // reason LIKE "% exited with code 5 %" AND NOT 28 // ( test = "arc.Boot" OR test = "arc.StartStop" ) 29 // 30 // The grammar for the language in Extended Backus-Naur form follows. The 31 // top-level production rule is BoolExpr. 32 // 33 // BoolExpr = BoolTerm , ( "OR" , BoolTerm )* ; 34 // BoolTerm = BoolFactor , ( "AND" , BoolFactor )* ; 35 // BoolFactor = [ "NOT" ] BoolPrimary ; 36 // BoolPrimary = BoolItem | BoolPredicate ; 37 // BoolItem = BoolConst | "(" , BoolExpr , ")" | BoolFunc ; 38 // BoolConst = "TRUE" | "FALSE" ; 39 // BoolFunc = Identifier , "(" , StringExpr , ( "," , StringExpr )* , ")" ; 40 // BoolPredicate = StringExpr , BoolTest ; 41 // BoolTest = CompPredicate | NegatablePredicate ; 42 // CompPredicate = Operator , StringExpr ; 43 // Operator = "!=" | "<>" | "=" 44 // NegatablePredicate = [ "NOT" ] , ( InPredicate | LikePredicate ) ; 45 // InPredicate = "IN" , "(" , StringExpr , ( "," , StringExpr )* , ")" ; 46 // LikePredicate = "LIKE" , String ; 47 // StringExpr = String | Identifier ; 48 // 49 // Where: 50 // - Identifier represents the production rule for identifiers. 51 // - String is the production rule for a double-quoted string literal. 52 // The precise definitions of which are omitted here but found in the 53 // implementation. 54 package lang 55 56 import ( 57 "bytes" 58 "fmt" 59 "io" 60 "regexp" 61 "strings" 62 63 participle "github.com/alecthomas/participle/v2" 64 "github.com/alecthomas/participle/v2/lexer" 65 66 "go.chromium.org/luci/common/errors" 67 68 "go.chromium.org/luci/analysis/internal/clustering" 69 ) 70 71 type validator struct { 72 errors []error 73 } 74 75 func newValidator() *validator { 76 return &validator{} 77 } 78 79 // ReportError reports a validation error. 80 func (v *validator) reportError(err error) { 81 v.errors = append(v.errors, err) 82 } 83 84 // Error returns all validation errors that were encountered. 85 func (v *validator) error() error { 86 if len(v.errors) > 0 { 87 return errors.NewMultiError(v.errors...) 88 } 89 return nil 90 } 91 92 type failure *clustering.Failure 93 type boolEval func(failure) bool 94 type stringEval func(failure) string 95 type predicateEval func(failure, string) bool 96 97 // Expr represents a predicate for a failure association rule. 98 type Expr struct { 99 expr *boolExpr 100 eval boolEval 101 } 102 103 // String returns the predicate as a string, with normalised formatting. 104 func (e *Expr) String() string { 105 var buf bytes.Buffer 106 e.expr.format(&buf) 107 return buf.String() 108 } 109 110 // Evaluate evaluates the given expression, using the given values 111 // for variables used in the expression. 112 func (e *Expr) Evaluate(failure *clustering.Failure) bool { 113 return e.eval(failure) 114 } 115 116 type boolExpr struct { 117 Terms []*boolTerm `parser:"@@ ( 'OR' @@ )*"` 118 } 119 120 func (e *boolExpr) format(w io.Writer) { 121 for i, t := range e.Terms { 122 if i > 0 { 123 io.WriteString(w, " OR ") 124 } 125 t.format(w) 126 } 127 } 128 129 func (e *boolExpr) evaluator(v *validator) boolEval { 130 var termEvals []boolEval 131 for _, t := range e.Terms { 132 termEvals = append(termEvals, t.evaluator(v)) 133 } 134 if len(termEvals) == 1 { 135 return termEvals[0] 136 } 137 return func(f failure) bool { 138 for _, termEval := range termEvals { 139 if termEval(f) { 140 return true 141 } 142 } 143 return false 144 } 145 } 146 147 type boolTerm struct { 148 Factors []*boolFactor `parser:"@@ ( 'AND' @@ )*"` 149 } 150 151 func (t *boolTerm) format(w io.Writer) { 152 for i, f := range t.Factors { 153 if i > 0 { 154 io.WriteString(w, " AND ") 155 } 156 f.format(w) 157 } 158 } 159 160 func (t *boolTerm) evaluator(v *validator) boolEval { 161 var factorEvals []boolEval 162 for _, f := range t.Factors { 163 factorEvals = append(factorEvals, f.evaluator(v)) 164 } 165 if len(factorEvals) == 1 { 166 return factorEvals[0] 167 } 168 return func(f failure) bool { 169 for _, factorEval := range factorEvals { 170 if !factorEval(f) { 171 return false 172 } 173 } 174 return true 175 } 176 } 177 178 type boolFactor struct { 179 Not bool `parser:"( @'NOT' )?"` 180 Primary *boolPrimary `parser:"@@"` 181 } 182 183 func (f *boolFactor) format(w io.Writer) { 184 if f.Not { 185 io.WriteString(w, "NOT ") 186 } 187 f.Primary.format(w) 188 } 189 190 func (f *boolFactor) evaluator(v *validator) boolEval { 191 predicate := f.Primary.evaluator(v) 192 if f.Not { 193 return func(f failure) bool { 194 return !predicate(f) 195 } 196 } 197 return predicate 198 } 199 200 type boolPrimary struct { 201 Item *boolItem `parser:"@@"` 202 Test *boolPredicate `parser:"| @@"` 203 } 204 205 func (p *boolPrimary) format(w io.Writer) { 206 if p.Item != nil { 207 p.Item.format(w) 208 } 209 if p.Test != nil { 210 p.Test.format(w) 211 } 212 } 213 214 func (p *boolPrimary) evaluator(v *validator) boolEval { 215 if p.Item != nil { 216 return p.Item.evaluator(v) 217 } 218 return p.Test.evaluator(v) 219 } 220 221 type boolItem struct { 222 Const *boolConst `parser:"@@"` 223 Expr *boolExpr `parser:"| '(' @@ ')'"` 224 Func *boolFunction `parser:"| @@"` 225 } 226 227 func (i *boolItem) format(w io.Writer) { 228 if i.Const != nil { 229 i.Const.format(w) 230 } 231 if i.Expr != nil { 232 io.WriteString(w, "(") 233 i.Expr.format(w) 234 io.WriteString(w, ")") 235 } 236 if i.Func != nil { 237 i.Func.format(w) 238 } 239 } 240 241 func (p *boolItem) evaluator(v *validator) boolEval { 242 if p.Const != nil { 243 return p.Const.evaluator(v) 244 } 245 if p.Expr != nil { 246 return p.Expr.evaluator(v) 247 } 248 if p.Func != nil { 249 return p.Func.evaluator(v) 250 } 251 return nil 252 } 253 254 type boolConst struct { 255 Value string `parser:"@( 'TRUE' | 'FALSE' )"` 256 } 257 258 func (c *boolConst) format(w io.Writer) { 259 io.WriteString(w, c.Value) 260 } 261 262 func (c *boolConst) evaluator(v *validator) boolEval { 263 value := c.Value == "TRUE" 264 return func(f failure) bool { 265 return value 266 } 267 } 268 269 type boolFunction struct { 270 Function string `parser:"@Ident"` 271 Args []*stringExpr `parser:"'(' @@ ( ',' @@ )* ')'"` 272 } 273 274 func (f *boolFunction) format(w io.Writer) { 275 io.WriteString(w, f.Function) 276 io.WriteString(w, "(") 277 for i, arg := range f.Args { 278 if i > 0 { 279 io.WriteString(w, ", ") 280 } 281 arg.format(w) 282 } 283 io.WriteString(w, ")") 284 } 285 286 func (f *boolFunction) evaluator(v *validator) boolEval { 287 switch strings.ToLower(f.Function) { 288 case "regexp_contains": 289 if len(f.Args) != 2 { 290 v.reportError(fmt.Errorf("invalid number of arguments to REGEXP_CONTAINS: got %v, want 2", len(f.Args))) 291 return nil 292 } 293 valueEval := f.Args[0].evaluator(v) 294 pattern, ok := f.Args[1].asConstant(v) 295 if !ok { 296 // For efficiency reasons, we require the second argument to be a 297 // constant so that we can pre-compile the regular expression. 298 v.reportError(fmt.Errorf("expected second argument to REGEXP_CONTAINS to be a constant pattern")) 299 return nil 300 } 301 re, err := regexp.Compile(pattern) 302 if err != nil { 303 v.reportError(fmt.Errorf("invalid regular expression %q", pattern)) 304 return nil 305 } 306 307 return func(f failure) bool { 308 value := valueEval(f) 309 return re.MatchString(value) 310 } 311 default: 312 v.reportError(fmt.Errorf("undefined function: %q", f.Function)) 313 return nil 314 } 315 } 316 317 type boolPredicate struct { 318 Value *stringExpr `parser:"@@"` 319 Test *boolTest `parser:"@@"` 320 } 321 322 func (t *boolPredicate) format(w io.Writer) { 323 t.Value.format(w) 324 t.Test.format(w) 325 } 326 327 func (t *boolPredicate) evaluator(v *validator) boolEval { 328 value := t.Value.evaluator(v) 329 test := t.Test.evaluator(v) 330 return func(f failure) bool { 331 return test(f, value(f)) 332 } 333 } 334 335 type boolTest struct { 336 Comp *compPredicate `parser:"@@"` 337 Negatable *negatablePredicate `parser:"| @@"` 338 } 339 340 func (t *boolTest) format(w io.Writer) { 341 if t.Comp != nil { 342 t.Comp.format(w) 343 } 344 if t.Negatable != nil { 345 t.Negatable.format(w) 346 } 347 } 348 349 func (t *boolTest) evaluator(v *validator) predicateEval { 350 if t.Comp != nil { 351 return t.Comp.evaluator(v) 352 } 353 return t.Negatable.evaluator(v) 354 } 355 356 type negatablePredicate struct { 357 Not bool `parser:"( @'NOT' )?"` 358 In *inPredicate `parser:"( @@"` 359 Like *likePredicate `parser:"| @@ )"` 360 } 361 362 func (p *negatablePredicate) format(w io.Writer) { 363 if p.Not { 364 io.WriteString(w, " NOT") 365 } 366 if p.In != nil { 367 p.In.format(w) 368 } 369 if p.Like != nil { 370 p.Like.format(w) 371 } 372 } 373 374 func (p *negatablePredicate) evaluator(v *validator) predicateEval { 375 var predicate predicateEval 376 if p.In != nil { 377 predicate = p.In.evaluator(v) 378 } 379 if p.Like != nil { 380 predicate = p.Like.evaluator(v) 381 } 382 if p.Not { 383 return func(f failure, s string) bool { 384 return !predicate(f, s) 385 } 386 } 387 return predicate 388 } 389 390 type compPredicate struct { 391 Op string `parser:"@( '=' | '!=' | '<>' )"` 392 Value *stringExpr `parser:"@@"` 393 } 394 395 func (p *compPredicate) format(w io.Writer) { 396 fmt.Fprintf(w, " %s ", p.Op) 397 p.Value.format(w) 398 } 399 400 func (p *compPredicate) evaluator(v *validator) predicateEval { 401 val := p.Value.evaluator(v) 402 switch p.Op { 403 case "=": 404 return func(f failure, s string) bool { 405 return s == val(f) 406 } 407 case "!=", "<>": 408 return func(f failure, s string) bool { 409 return s != val(f) 410 } 411 default: 412 panic("invalid op") 413 } 414 } 415 416 type inPredicate struct { 417 List []*stringExpr `parser:"'IN' '(' @@ ( ',' @@ )* ')'"` 418 } 419 420 func (p *inPredicate) format(w io.Writer) { 421 io.WriteString(w, " IN (") 422 for i, v := range p.List { 423 if i > 0 { 424 io.WriteString(w, ", ") 425 } 426 v.format(w) 427 } 428 io.WriteString(w, ")") 429 } 430 431 func (p *inPredicate) evaluator(v *validator) predicateEval { 432 var list []stringEval 433 for _, item := range p.List { 434 list = append(list, item.evaluator(v)) 435 } 436 return func(f failure, s string) bool { 437 for _, item := range list { 438 if item(f) == s { 439 return true 440 } 441 } 442 return false 443 } 444 } 445 446 type likePredicate struct { 447 Pattern *string `parser:"'LIKE' @String"` 448 } 449 450 func (p *likePredicate) format(w io.Writer) { 451 io.WriteString(w, " LIKE ") 452 io.WriteString(w, *p.Pattern) 453 } 454 455 func (p *likePredicate) evaluator(v *validator) predicateEval { 456 likePattern, err := unescapeStringLiteral(*p.Pattern) 457 if err != nil { 458 v.reportError(err) 459 return nil 460 } 461 462 // Rewrite the LIKE syntax in terms of a regular expression syntax. 463 regexpPattern, err := likePatternToRegexp(likePattern) 464 if err != nil { 465 v.reportError(err) 466 return nil 467 } 468 469 re, err := regexp.Compile(regexpPattern) 470 if err != nil { 471 v.reportError(fmt.Errorf("invalid LIKE expression: %s", likePattern)) 472 return nil 473 } 474 return func(f failure, s string) bool { 475 return re.MatchString(s) 476 } 477 } 478 479 type stringExpr struct { 480 Literal *string `parser:"@String"` 481 Ident *string `parser:"| @Ident"` 482 } 483 484 func (e *stringExpr) format(w io.Writer) { 485 if e.Literal != nil { 486 io.WriteString(w, *e.Literal) 487 } 488 if e.Ident != nil { 489 io.WriteString(w, *e.Ident) 490 } 491 } 492 493 // asConstant attempts to evaluate stringExpr as a compile-time constant. 494 // Returns the string value (assuming it is valid and constant) and 495 // whether it is a constant. 496 func (e *stringExpr) asConstant(v *validator) (value string, ok bool) { 497 if e.Literal != nil { 498 literal, err := unescapeStringLiteral(*e.Literal) 499 if err != nil { 500 v.reportError(err) 501 return "", true 502 } 503 return literal, true 504 } 505 return "", false 506 } 507 508 func (e *stringExpr) evaluator(v *validator) stringEval { 509 if e.Literal != nil { 510 literal, err := unescapeStringLiteral(*e.Literal) 511 if err != nil { 512 v.reportError(err) 513 return nil 514 } 515 return func(f failure) string { return literal } 516 } 517 if e.Ident != nil { 518 varName := *e.Ident 519 var accessor func(c *clustering.Failure) string 520 switch varName { 521 case "test": 522 accessor = func(f *clustering.Failure) string { 523 return f.TestID 524 } 525 case "reason": 526 accessor = func(f *clustering.Failure) string { 527 return f.Reason.GetPrimaryErrorMessage() 528 } 529 default: 530 v.reportError(fmt.Errorf("undeclared identifier %q", varName)) 531 } 532 return func(f failure) string { return accessor(f) } 533 } 534 return nil 535 } 536 537 var ( 538 lex = lexer.MustSimple([]lexer.SimpleRule{ 539 {Name: "whitespace", Pattern: `\s+`}, 540 {Name: "Keyword", Pattern: `(?i)(TRUE|FALSE|AND|OR|NOT|LIKE|IN)\b`}, 541 {Name: "Ident", Pattern: `([a-zA-Z_][a-zA-Z0-9_]*)\b`}, 542 {Name: "String", Pattern: stringLiteralPattern}, 543 {Name: "Operators", Pattern: `!=|<>|[,()=]`}, 544 }) 545 546 parser = participle.MustBuild[boolExpr]( 547 participle.Lexer(lex), 548 participle.Upper("Keyword"), 549 participle.Map(lowerMapper, "Ident"), 550 participle.CaseInsensitive("Keyword")) 551 ) 552 553 func lowerMapper(token lexer.Token) (lexer.Token, error) { 554 token.Value = strings.ToLower(token.Value) 555 return token, nil 556 } 557 558 // Parse parses a failure association rule from the specified text. 559 // idents is the set of identifiers that are recognised by the application. 560 func Parse(text string) (*Expr, error) { 561 expr, err := parser.ParseString("", text) 562 if err != nil { 563 return nil, errors.Annotate(err, "syntax error").Err() 564 } 565 566 v := newValidator() 567 eval := expr.evaluator(v) 568 if err := v.error(); err != nil { 569 return nil, err 570 } 571 return &Expr{ 572 expr: expr, 573 eval: eval, 574 }, nil 575 }