golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/watchflakes/internal/script/script.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package script implements a simple classification scripting language. 6 // A script is a sequence of rules of the form “action <- pattern”, 7 // meaning send results matching pattern to the named action. 8 package script 9 10 import ( 11 "fmt" 12 "regexp" 13 "strconv" 14 "strings" 15 "unicode/utf8" 16 ) 17 18 // A Script is a sequence of Action <- Pattern rules. 19 type Script struct { 20 File string 21 Rules []*Rule 22 } 23 24 // A Rule is a single Action <- Pattern rule. 25 type Rule struct { 26 Action string // "skip", "post", and so on 27 Pattern Expr // pattern expression 28 } 29 30 // Action returns the action specified by the script for the given record. 31 func (s *Script) Action(record Record) string { 32 for _, r := range s.Rules { 33 if r.Pattern.Match(record) { 34 return r.Action 35 } 36 } 37 return "" 38 } 39 40 // A Record is a set of key:value pairs. 41 type Record map[string]string 42 43 // An Expr is a pattern expression that can evaluate itself on a Record. 44 // The underlying concrete type is *CmpExpr, *AndExpr, *OrExpr, *NotExpr, or *RegExpr. 45 type Expr interface { 46 // String returns the syntax for the pattern. 47 String() string 48 49 // Match reports whether the pattern matches the record. 50 Match(record Record) bool 51 } 52 53 // A CmpExpr is an Expr for a string comparison. 54 type CmpExpr struct { 55 Field string 56 Op string 57 Literal string 58 } 59 60 func (x *CmpExpr) Match(record Record) bool { 61 f := record[x.Field] 62 l := x.Literal 63 switch x.Op { 64 case "==": 65 return f == l 66 case "!=": 67 return f != l 68 case "<": 69 return f < l 70 case "<=": 71 return f <= l 72 case ">": 73 return f > l 74 case ">=": 75 return f >= l 76 } 77 return false 78 } 79 80 func (x *CmpExpr) String() string { 81 s := strconv.Quote(x.Literal) 82 if x.Field == "" { 83 return s 84 } 85 return x.Field + " " + x.Op + " " + s 86 } 87 88 func cmp(field, op, literal string) Expr { return &CmpExpr{field, op, literal} } 89 90 // A RegExpr is an Expr for a regular expression test. 91 type RegExpr struct { 92 Field string 93 Not bool 94 Regexp *regexp.Regexp 95 } 96 97 func (x *RegExpr) Match(record Record) bool { 98 ok := x.Regexp.MatchString(record[x.Field]) 99 if x.Not { 100 return !ok 101 } 102 return ok 103 } 104 105 func (x *RegExpr) String() string { 106 s := x.Regexp.String() 107 s = "`" + strings.ReplaceAll(s, "`", `\x60`) + "`" 108 if x.Field == "" { 109 return s 110 } 111 op := " ~ " 112 if x.Not { 113 op = " !~ " 114 } 115 return x.Field + op + s 116 } 117 118 func regx(field string, not bool, re *regexp.Regexp) Expr { return &RegExpr{field, not, re} } 119 func regcomp(s string) (*regexp.Regexp, error) { 120 return regexp.Compile("(?m)" + s) 121 } 122 123 // A NotExpr represents the expression !X (the negation of X). 124 type NotExpr struct { 125 X Expr 126 } 127 128 func (x *NotExpr) Match(record Record) bool { 129 return !x.X.Match(record) 130 } 131 132 func (x *NotExpr) String() string { 133 return "!(" + x.X.String() + ")" 134 } 135 136 func not(x Expr) Expr { return &NotExpr{x} } 137 138 // An AndExpr represents the expression X && Y. 139 type AndExpr struct { 140 X, Y Expr 141 } 142 143 func (x *AndExpr) Match(record Record) bool { 144 return x.X.Match(record) && x.Y.Match(record) 145 } 146 147 func (x *AndExpr) String() string { 148 return andArg(x.X) + " && " + andArg(x.Y) 149 } 150 151 func andArg(x Expr) string { 152 s := x.String() 153 if _, ok := x.(*OrExpr); ok { 154 s = "(" + s + ")" 155 } 156 return s 157 } 158 159 func and(x, y Expr) Expr { 160 return &AndExpr{x, y} 161 } 162 163 // An OrExpr represents the expression X || Y. 164 type OrExpr struct { 165 X, Y Expr 166 } 167 168 func (x *OrExpr) Match(record Record) bool { 169 return x.X.Match(record) || x.Y.Match(record) 170 } 171 172 func (x *OrExpr) String() string { 173 return orArg(x.X) + " || " + orArg(x.Y) 174 } 175 176 func orArg(x Expr) string { 177 s := x.String() 178 if _, ok := x.(*AndExpr); ok { 179 s = "(" + s + ")" 180 } 181 return s 182 } 183 184 func or(x, y Expr) Expr { 185 return &OrExpr{x, y} 186 } 187 188 // A SyntaxError reports a syntax error in a parsed match expression. 189 type SyntaxError struct { 190 File string // input file 191 Line int // line number where error was detected (1-indexed) 192 Offset int // byte offset in line where error was detected (1-indexed) 193 Err string // description of error 194 } 195 196 func (e *SyntaxError) Error() string { 197 if e.Offset == 0 { 198 return fmt.Sprintf("%s:%d: %s", e.File, e.Line, e.Err) 199 } 200 return fmt.Sprintf("%s:%d.%d: %s", e.File, e.Line, e.Offset, e.Err) 201 } 202 203 // A parser holds state for parsing a build expression. 204 type parser struct { 205 file string // input file, for errors 206 s string // input string 207 i int // next read location in s 208 fields map[string]bool // known input fields for comparisons 209 210 tok string // last token read; "`", "\"", "a" for backquoted regexp, literal string, identifier 211 lit string // text of backquoted regexp, literal string, or identifier 212 pos int // position (start) of last token 213 } 214 215 // Parse parses text as a script, 216 // returning the parsed form and any parse errors found. 217 // (The parser attempts to recover after parse errors by starting over 218 // at the next newline, so multiple parse errors are possible.) 219 // The file argument is used for reporting the file name in errors 220 // and in the Script's File field; 221 // Parse does not read from the file itself. 222 func Parse(file, text string, fields []string) (*Script, []*SyntaxError) { 223 p := &parser{ 224 file: file, 225 s: text, 226 } 227 p.fields = make(map[string]bool) 228 for _, f := range fields { 229 p.fields[f] = true 230 } 231 var s Script 232 s.File = file 233 var errs []*SyntaxError 234 for { 235 r, err := p.parseRule() 236 if err != nil { 237 errs = append(errs, err.(*SyntaxError)) 238 i := strings.Index(p.s[p.i:], "\n") 239 if i < 0 { 240 break 241 } 242 p.i += i + 1 243 continue 244 } 245 if r == nil { 246 break 247 } 248 s.Rules = append(s.Rules, r) 249 } 250 return &s, errs 251 } 252 253 // parseRule parses a single rule from a script. 254 // On entry, the next input token has not been lexed. 255 // On exit, the next input token has been lexed and is in p.tok. 256 // If there is an error, it is guaranteed to be a *SyntaxError. 257 // parseRule returns nil, nil at end of file. 258 func (p *parser) parseRule() (x *Rule, err error) { 259 defer func() { 260 if e := recover(); e != nil { 261 if e, ok := e.(*SyntaxError); ok { 262 err = e 263 return 264 } 265 panic(e) // unreachable unless parser has a bug 266 } 267 }() 268 269 x = p.rule() 270 if p.tok != "" && p.tok != "\n" { 271 p.unexpected() 272 } 273 return x, nil 274 } 275 276 // unexpected reports a parse error due to an unexpected token 277 func (p *parser) unexpected() { 278 what := p.tok 279 switch what { 280 case "a": 281 what = "identifier " + p.lit 282 case "\"": 283 what = "quoted string " + p.lit 284 case "`": 285 what = "backquoted string " + p.lit 286 case "\n": 287 what = "end of line" 288 case "": 289 what = "end of script" 290 } 291 p.parseError("unexpected " + what) 292 } 293 294 // rule parses a single rule. 295 // On entry, the next input token has not yet been lexed. 296 // On exit, the next input token has been lexed and is in p.tok. 297 // If there is no next rule (the script has been read in its entirety), rule returns nil. 298 func (p *parser) rule() *Rule { 299 p.lex() 300 for p.tok == "\n" { 301 p.lex() 302 } 303 if p.tok == "" { 304 return nil 305 } 306 if p.tok != "a" { 307 p.unexpected() 308 } 309 action := p.lit 310 p.lex() 311 if p.tok != "<-" { 312 p.unexpected() 313 } 314 return &Rule{Action: action, Pattern: p.or()} 315 } 316 317 // or parses a sequence of || expressions. 318 // On entry, the next input token has not yet been lexed. 319 // On exit, the next input token has been lexed and is in p.tok. 320 func (p *parser) or() Expr { 321 x := p.and() 322 for p.tok == "||" { 323 x = or(x, p.and()) 324 } 325 return x 326 } 327 328 // and parses a sequence of && expressions. 329 // On entry, the next input token has not yet been lexed. 330 // On exit, the next input token has been lexed and is in p.tok. 331 func (p *parser) and() Expr { 332 x := p.cmp() 333 for p.tok == "&&" { 334 x = and(x, p.cmp()) 335 } 336 return x 337 } 338 339 // cmp parses a comparison expression or atom. 340 // On entry, the next input token has not been lexed. 341 // On exit, the next input token has been lexed and is in p.tok. 342 func (p *parser) cmp() Expr { 343 p.lex() 344 switch p.tok { 345 default: 346 p.unexpected() 347 case "!": 348 p.lex() 349 return not(p.atom()) 350 case "(", "\"", "`": 351 return p.atom() 352 case "a": 353 // comparison 354 field := p.lit 355 if !p.fields[field] { 356 p.parseError("unknown field " + field) 357 } 358 p.lex() 359 switch p.tok { 360 default: 361 p.unexpected() 362 case "==", "!=", "<", "<=", ">", ">=": 363 op := p.tok 364 p.lex() 365 if p.tok != "\"" { 366 p.parseError(op + " requires quoted string") 367 } 368 s := p.lit 369 p.lex() 370 return cmp(field, op, s) 371 case "~", "!~": 372 op := p.tok 373 p.lex() 374 if p.tok != "`" { 375 p.parseError(op + " requires backquoted regexp") 376 } 377 re, err := regcomp(p.lit) 378 if err != nil { 379 p.parseError("invalid regexp: " + err.Error()) 380 } 381 p.lex() 382 return regx(field, op == "!~", re) 383 } 384 } 385 panic("unreachable") 386 } 387 388 // atom parses a regexp or string comparison or a parenthesized expression. 389 // On entry, the next input token HAS been lexed. 390 // On exit, the next input token has been lexed and is in p.tok. 391 func (p *parser) atom() Expr { 392 // first token already in p.tok 393 switch p.tok { 394 default: 395 p.unexpected() 396 397 case "(": 398 defer func() { 399 if e := recover(); e != nil { 400 if e, ok := e.(*SyntaxError); ok && e.Err == "unexpected end of expression" { 401 e.Err = "missing close paren" 402 } 403 panic(e) 404 } 405 }() 406 x := p.or() 407 if p.tok != ")" { 408 p.parseError("missing close paren") 409 } 410 p.lex() 411 return x 412 413 case "`": 414 re, err := regcomp(p.lit) 415 if err != nil { 416 p.parseError("invalid regexp: " + err.Error()) 417 } 418 p.lex() 419 return regx("", false, re) 420 } 421 panic("unreachable") 422 } 423 424 // lex finds and consumes the next token in the input stream. 425 // On return, p.tok is set to the token text 426 // and p.pos records the byte offset of the start of the token in the input stream. 427 // If lex reaches the end of the input, p.tok is set to the empty string. 428 // For any other syntax error, lex panics with a SyntaxError. 429 func (p *parser) lex() { 430 Top: 431 for p.i < len(p.s) && (p.s[p.i] == ' ' || p.s[p.i] == '\t') { 432 p.i++ 433 } 434 if p.i >= len(p.s) { 435 p.tok = "" 436 p.pos = p.i 437 return 438 } 439 switch p.s[p.i] { 440 case '#': 441 // line comment 442 for p.i < len(p.s) && p.s[p.i] != '\n' { 443 p.i++ 444 } 445 goto Top 446 case '\n': 447 // like in Go, not a line ending if it follows a continuation token. 448 switch p.tok { 449 case "(", "&&", "||", "==", "!=", "~", "!~", "!", "<-": 450 p.i++ 451 goto Top 452 } 453 p.pos = p.i 454 p.i++ 455 p.tok = p.s[p.pos:p.i] 456 return 457 case '<': // <-, <= 458 p.pos = p.i 459 p.i++ 460 if p.i < len(p.s) && (p.s[p.i] == '-' || p.s[p.i] == '=') { 461 p.i++ 462 } 463 p.tok = p.s[p.pos:p.i] 464 return 465 case '!', '>': // ! != > >= 466 p.pos = p.i 467 p.i++ 468 if p.i < len(p.s) && p.s[p.i] == '=' { 469 p.i++ 470 } 471 p.tok = p.s[p.pos:p.i] 472 return 473 case '(', ')', '~': // ( ) ~ 474 p.pos = p.i 475 p.i++ 476 p.tok = p.s[p.pos:p.i] 477 return 478 case '&', '|', '=': // && || == 479 if p.i+1 >= len(p.s) || p.s[p.i+1] != p.s[p.i] { 480 p.lexError("invalid syntax at " + string(rune(p.s[p.i]))) 481 } 482 p.pos = p.i 483 p.i += 2 484 p.tok = p.s[p.pos:p.i] 485 return 486 case '`': 487 j := p.i + 1 488 for j < len(p.s) && p.s[j] != '`' { 489 if p.s[j] == '\n' { 490 p.lexError("newline in backquoted regexp") 491 } 492 j++ 493 } 494 if j >= len(p.s) { 495 p.lexError("unterminated backquoted regexp") 496 } 497 p.pos = p.i 498 p.i = j + 1 499 p.tok = "`" 500 p.lit = p.s[p.pos+1 : j] 501 return 502 case '"': 503 j := p.i + 1 504 for j < len(p.s) && p.s[j] != '"' { 505 if p.s[j] == '\n' { 506 p.lexError("newline in quoted string") 507 } 508 if p.s[j] == '\\' { 509 j++ 510 } 511 j++ 512 } 513 if j >= len(p.s) { 514 p.lexError("unterminated quoted string") 515 } 516 s, err := strconv.Unquote(p.s[p.i : j+1]) 517 if err != nil { 518 p.lexError("invalid quoted string: " + err.Error()) 519 } 520 p.pos = p.i 521 p.i = j + 1 522 p.tok = "\"" 523 p.lit = s 524 return 525 case '\'': 526 p.lexError("single-quoted strings not allowed") 527 } 528 529 // ascii name 530 if isalpha(p.s[p.i]) { 531 j := p.i 532 for j < len(p.s) && isalnum(p.s[j]) { 533 j++ 534 } 535 p.pos = p.i 536 p.i = j 537 p.tok = "a" 538 p.lit = p.s[p.pos:p.i] 539 return 540 } 541 542 c, _ := utf8.DecodeRuneInString(p.s[p.i:]) 543 p.lexError(fmt.Sprintf("invalid syntax at %q (U+%04x)", c, c)) 544 } 545 546 // lexError reports a lex error with the given error text. 547 func (p *parser) lexError(err string) { 548 p.errorAt(p.i, err) 549 } 550 551 // parseError reports a parse error with the given error text. 552 // (A parse error differs from a lex error in which parser position 553 // the error is attributed to.) 554 func (p *parser) parseError(err string) { 555 p.errorAt(p.pos, err) 556 } 557 558 // errorAt reports a syntax error at the given position. 559 func (p *parser) errorAt(pos int, err string) { 560 line := 1 + strings.Count(p.s[:pos], "\n") 561 i := pos - strings.LastIndex(p.s[:pos], "\n") 562 panic(&SyntaxError{File: p.file, Line: line, Offset: i, Err: err}) 563 } 564 565 // isalpha reports whether c is an ASCII alphabetic or _. 566 func isalpha(c byte) bool { 567 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '_' 568 } 569 570 // isalnum reports whether c is an ASCII alphanumeric or _. 571 func isalnum(c byte) bool { 572 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' 573 }