golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/watchflakes/internal/script/script.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package script implements a simple classification scripting language.
     6  // A script is a sequence of rules of the form “action <- pattern”,
     7  // meaning send results matching pattern to the named action.
     8  package script
     9  
    10  import (
    11  	"fmt"
    12  	"regexp"
    13  	"strconv"
    14  	"strings"
    15  	"unicode/utf8"
    16  )
    17  
    18  // A Script is a sequence of Action <- Pattern rules.
    19  type Script struct {
    20  	File  string
    21  	Rules []*Rule
    22  }
    23  
    24  // A Rule is a single Action <- Pattern rule.
    25  type Rule struct {
    26  	Action  string // "skip", "post", and so on
    27  	Pattern Expr   // pattern expression
    28  }
    29  
    30  // Action returns the action specified by the script for the given record.
    31  func (s *Script) Action(record Record) string {
    32  	for _, r := range s.Rules {
    33  		if r.Pattern.Match(record) {
    34  			return r.Action
    35  		}
    36  	}
    37  	return ""
    38  }
    39  
    40  // A Record is a set of key:value pairs.
    41  type Record map[string]string
    42  
    43  // An Expr is a pattern expression that can evaluate itself on a Record.
    44  // The underlying concrete type is *CmpExpr, *AndExpr, *OrExpr, *NotExpr, or *RegExpr.
    45  type Expr interface {
    46  	// String returns the syntax for the pattern.
    47  	String() string
    48  
    49  	// Match reports whether the pattern matches the record.
    50  	Match(record Record) bool
    51  }
    52  
    53  // A CmpExpr is an Expr for a string comparison.
    54  type CmpExpr struct {
    55  	Field   string
    56  	Op      string
    57  	Literal string
    58  }
    59  
    60  func (x *CmpExpr) Match(record Record) bool {
    61  	f := record[x.Field]
    62  	l := x.Literal
    63  	switch x.Op {
    64  	case "==":
    65  		return f == l
    66  	case "!=":
    67  		return f != l
    68  	case "<":
    69  		return f < l
    70  	case "<=":
    71  		return f <= l
    72  	case ">":
    73  		return f > l
    74  	case ">=":
    75  		return f >= l
    76  	}
    77  	return false
    78  }
    79  
    80  func (x *CmpExpr) String() string {
    81  	s := strconv.Quote(x.Literal)
    82  	if x.Field == "" {
    83  		return s
    84  	}
    85  	return x.Field + " " + x.Op + " " + s
    86  }
    87  
    88  func cmp(field, op, literal string) Expr { return &CmpExpr{field, op, literal} }
    89  
    90  // A RegExpr is an Expr for a regular expression test.
    91  type RegExpr struct {
    92  	Field  string
    93  	Not    bool
    94  	Regexp *regexp.Regexp
    95  }
    96  
    97  func (x *RegExpr) Match(record Record) bool {
    98  	ok := x.Regexp.MatchString(record[x.Field])
    99  	if x.Not {
   100  		return !ok
   101  	}
   102  	return ok
   103  }
   104  
   105  func (x *RegExpr) String() string {
   106  	s := x.Regexp.String()
   107  	s = "`" + strings.ReplaceAll(s, "`", `\x60`) + "`"
   108  	if x.Field == "" {
   109  		return s
   110  	}
   111  	op := " ~ "
   112  	if x.Not {
   113  		op = " !~ "
   114  	}
   115  	return x.Field + op + s
   116  }
   117  
   118  func regx(field string, not bool, re *regexp.Regexp) Expr { return &RegExpr{field, not, re} }
   119  func regcomp(s string) (*regexp.Regexp, error) {
   120  	return regexp.Compile("(?m)" + s)
   121  }
   122  
   123  // A NotExpr represents the expression !X (the negation of X).
   124  type NotExpr struct {
   125  	X Expr
   126  }
   127  
   128  func (x *NotExpr) Match(record Record) bool {
   129  	return !x.X.Match(record)
   130  }
   131  
   132  func (x *NotExpr) String() string {
   133  	return "!(" + x.X.String() + ")"
   134  }
   135  
   136  func not(x Expr) Expr { return &NotExpr{x} }
   137  
   138  // An AndExpr represents the expression X && Y.
   139  type AndExpr struct {
   140  	X, Y Expr
   141  }
   142  
   143  func (x *AndExpr) Match(record Record) bool {
   144  	return x.X.Match(record) && x.Y.Match(record)
   145  }
   146  
   147  func (x *AndExpr) String() string {
   148  	return andArg(x.X) + " && " + andArg(x.Y)
   149  }
   150  
   151  func andArg(x Expr) string {
   152  	s := x.String()
   153  	if _, ok := x.(*OrExpr); ok {
   154  		s = "(" + s + ")"
   155  	}
   156  	return s
   157  }
   158  
   159  func and(x, y Expr) Expr {
   160  	return &AndExpr{x, y}
   161  }
   162  
   163  // An OrExpr represents the expression X || Y.
   164  type OrExpr struct {
   165  	X, Y Expr
   166  }
   167  
   168  func (x *OrExpr) Match(record Record) bool {
   169  	return x.X.Match(record) || x.Y.Match(record)
   170  }
   171  
   172  func (x *OrExpr) String() string {
   173  	return orArg(x.X) + " || " + orArg(x.Y)
   174  }
   175  
   176  func orArg(x Expr) string {
   177  	s := x.String()
   178  	if _, ok := x.(*AndExpr); ok {
   179  		s = "(" + s + ")"
   180  	}
   181  	return s
   182  }
   183  
   184  func or(x, y Expr) Expr {
   185  	return &OrExpr{x, y}
   186  }
   187  
   188  // A SyntaxError reports a syntax error in a parsed match expression.
   189  type SyntaxError struct {
   190  	File   string // input file
   191  	Line   int    // line number where error was detected (1-indexed)
   192  	Offset int    // byte offset in line where error was detected (1-indexed)
   193  	Err    string // description of error
   194  }
   195  
   196  func (e *SyntaxError) Error() string {
   197  	if e.Offset == 0 {
   198  		return fmt.Sprintf("%s:%d: %s", e.File, e.Line, e.Err)
   199  	}
   200  	return fmt.Sprintf("%s:%d.%d: %s", e.File, e.Line, e.Offset, e.Err)
   201  }
   202  
   203  // A parser holds state for parsing a build expression.
   204  type parser struct {
   205  	file   string          // input file, for errors
   206  	s      string          // input string
   207  	i      int             // next read location in s
   208  	fields map[string]bool // known input fields for comparisons
   209  
   210  	tok string // last token read; "`", "\"", "a" for backquoted regexp, literal string, identifier
   211  	lit string // text of backquoted regexp, literal string, or identifier
   212  	pos int    // position (start) of last token
   213  }
   214  
   215  // Parse parses text as a script,
   216  // returning the parsed form and any parse errors found.
   217  // (The parser attempts to recover after parse errors by starting over
   218  // at the next newline, so multiple parse errors are possible.)
   219  // The file argument is used for reporting the file name in errors
   220  // and in the Script's File field;
   221  // Parse does not read from the file itself.
   222  func Parse(file, text string, fields []string) (*Script, []*SyntaxError) {
   223  	p := &parser{
   224  		file: file,
   225  		s:    text,
   226  	}
   227  	p.fields = make(map[string]bool)
   228  	for _, f := range fields {
   229  		p.fields[f] = true
   230  	}
   231  	var s Script
   232  	s.File = file
   233  	var errs []*SyntaxError
   234  	for {
   235  		r, err := p.parseRule()
   236  		if err != nil {
   237  			errs = append(errs, err.(*SyntaxError))
   238  			i := strings.Index(p.s[p.i:], "\n")
   239  			if i < 0 {
   240  				break
   241  			}
   242  			p.i += i + 1
   243  			continue
   244  		}
   245  		if r == nil {
   246  			break
   247  		}
   248  		s.Rules = append(s.Rules, r)
   249  	}
   250  	return &s, errs
   251  }
   252  
   253  // parseRule parses a single rule from a script.
   254  // On entry, the next input token has not been lexed.
   255  // On exit, the next input token has been lexed and is in p.tok.
   256  // If there is an error, it is guaranteed to be a *SyntaxError.
   257  // parseRule returns nil, nil at end of file.
   258  func (p *parser) parseRule() (x *Rule, err error) {
   259  	defer func() {
   260  		if e := recover(); e != nil {
   261  			if e, ok := e.(*SyntaxError); ok {
   262  				err = e
   263  				return
   264  			}
   265  			panic(e) // unreachable unless parser has a bug
   266  		}
   267  	}()
   268  
   269  	x = p.rule()
   270  	if p.tok != "" && p.tok != "\n" {
   271  		p.unexpected()
   272  	}
   273  	return x, nil
   274  }
   275  
   276  // unexpected reports a parse error due to an unexpected token
   277  func (p *parser) unexpected() {
   278  	what := p.tok
   279  	switch what {
   280  	case "a":
   281  		what = "identifier " + p.lit
   282  	case "\"":
   283  		what = "quoted string " + p.lit
   284  	case "`":
   285  		what = "backquoted string " + p.lit
   286  	case "\n":
   287  		what = "end of line"
   288  	case "":
   289  		what = "end of script"
   290  	}
   291  	p.parseError("unexpected " + what)
   292  }
   293  
   294  // rule parses a single rule.
   295  // On entry, the next input token has not yet been lexed.
   296  // On exit, the next input token has been lexed and is in p.tok.
   297  // If there is no next rule (the script has been read in its entirety), rule returns nil.
   298  func (p *parser) rule() *Rule {
   299  	p.lex()
   300  	for p.tok == "\n" {
   301  		p.lex()
   302  	}
   303  	if p.tok == "" {
   304  		return nil
   305  	}
   306  	if p.tok != "a" {
   307  		p.unexpected()
   308  	}
   309  	action := p.lit
   310  	p.lex()
   311  	if p.tok != "<-" {
   312  		p.unexpected()
   313  	}
   314  	return &Rule{Action: action, Pattern: p.or()}
   315  }
   316  
   317  // or parses a sequence of || expressions.
   318  // On entry, the next input token has not yet been lexed.
   319  // On exit, the next input token has been lexed and is in p.tok.
   320  func (p *parser) or() Expr {
   321  	x := p.and()
   322  	for p.tok == "||" {
   323  		x = or(x, p.and())
   324  	}
   325  	return x
   326  }
   327  
   328  // and parses a sequence of && expressions.
   329  // On entry, the next input token has not yet been lexed.
   330  // On exit, the next input token has been lexed and is in p.tok.
   331  func (p *parser) and() Expr {
   332  	x := p.cmp()
   333  	for p.tok == "&&" {
   334  		x = and(x, p.cmp())
   335  	}
   336  	return x
   337  }
   338  
   339  // cmp parses a comparison expression or atom.
   340  // On entry, the next input token has not been lexed.
   341  // On exit, the next input token has been lexed and is in p.tok.
   342  func (p *parser) cmp() Expr {
   343  	p.lex()
   344  	switch p.tok {
   345  	default:
   346  		p.unexpected()
   347  	case "!":
   348  		p.lex()
   349  		return not(p.atom())
   350  	case "(", "\"", "`":
   351  		return p.atom()
   352  	case "a":
   353  		// comparison
   354  		field := p.lit
   355  		if !p.fields[field] {
   356  			p.parseError("unknown field " + field)
   357  		}
   358  		p.lex()
   359  		switch p.tok {
   360  		default:
   361  			p.unexpected()
   362  		case "==", "!=", "<", "<=", ">", ">=":
   363  			op := p.tok
   364  			p.lex()
   365  			if p.tok != "\"" {
   366  				p.parseError(op + " requires quoted string")
   367  			}
   368  			s := p.lit
   369  			p.lex()
   370  			return cmp(field, op, s)
   371  		case "~", "!~":
   372  			op := p.tok
   373  			p.lex()
   374  			if p.tok != "`" {
   375  				p.parseError(op + " requires backquoted regexp")
   376  			}
   377  			re, err := regcomp(p.lit)
   378  			if err != nil {
   379  				p.parseError("invalid regexp: " + err.Error())
   380  			}
   381  			p.lex()
   382  			return regx(field, op == "!~", re)
   383  		}
   384  	}
   385  	panic("unreachable")
   386  }
   387  
   388  // atom parses a regexp or string comparison or a parenthesized expression.
   389  // On entry, the next input token HAS been lexed.
   390  // On exit, the next input token has been lexed and is in p.tok.
   391  func (p *parser) atom() Expr {
   392  	// first token already in p.tok
   393  	switch p.tok {
   394  	default:
   395  		p.unexpected()
   396  
   397  	case "(":
   398  		defer func() {
   399  			if e := recover(); e != nil {
   400  				if e, ok := e.(*SyntaxError); ok && e.Err == "unexpected end of expression" {
   401  					e.Err = "missing close paren"
   402  				}
   403  				panic(e)
   404  			}
   405  		}()
   406  		x := p.or()
   407  		if p.tok != ")" {
   408  			p.parseError("missing close paren")
   409  		}
   410  		p.lex()
   411  		return x
   412  
   413  	case "`":
   414  		re, err := regcomp(p.lit)
   415  		if err != nil {
   416  			p.parseError("invalid regexp: " + err.Error())
   417  		}
   418  		p.lex()
   419  		return regx("", false, re)
   420  	}
   421  	panic("unreachable")
   422  }
   423  
   424  // lex finds and consumes the next token in the input stream.
   425  // On return, p.tok is set to the token text
   426  // and p.pos records the byte offset of the start of the token in the input stream.
   427  // If lex reaches the end of the input, p.tok is set to the empty string.
   428  // For any other syntax error, lex panics with a SyntaxError.
   429  func (p *parser) lex() {
   430  Top:
   431  	for p.i < len(p.s) && (p.s[p.i] == ' ' || p.s[p.i] == '\t') {
   432  		p.i++
   433  	}
   434  	if p.i >= len(p.s) {
   435  		p.tok = ""
   436  		p.pos = p.i
   437  		return
   438  	}
   439  	switch p.s[p.i] {
   440  	case '#':
   441  		// line comment
   442  		for p.i < len(p.s) && p.s[p.i] != '\n' {
   443  			p.i++
   444  		}
   445  		goto Top
   446  	case '\n':
   447  		// like in Go, not a line ending if it follows a continuation token.
   448  		switch p.tok {
   449  		case "(", "&&", "||", "==", "!=", "~", "!~", "!", "<-":
   450  			p.i++
   451  			goto Top
   452  		}
   453  		p.pos = p.i
   454  		p.i++
   455  		p.tok = p.s[p.pos:p.i]
   456  		return
   457  	case '<': // <-, <=
   458  		p.pos = p.i
   459  		p.i++
   460  		if p.i < len(p.s) && (p.s[p.i] == '-' || p.s[p.i] == '=') {
   461  			p.i++
   462  		}
   463  		p.tok = p.s[p.pos:p.i]
   464  		return
   465  	case '!', '>': // ! != > >=
   466  		p.pos = p.i
   467  		p.i++
   468  		if p.i < len(p.s) && p.s[p.i] == '=' {
   469  			p.i++
   470  		}
   471  		p.tok = p.s[p.pos:p.i]
   472  		return
   473  	case '(', ')', '~': // ( ) ~
   474  		p.pos = p.i
   475  		p.i++
   476  		p.tok = p.s[p.pos:p.i]
   477  		return
   478  	case '&', '|', '=': // && || ==
   479  		if p.i+1 >= len(p.s) || p.s[p.i+1] != p.s[p.i] {
   480  			p.lexError("invalid syntax at " + string(rune(p.s[p.i])))
   481  		}
   482  		p.pos = p.i
   483  		p.i += 2
   484  		p.tok = p.s[p.pos:p.i]
   485  		return
   486  	case '`':
   487  		j := p.i + 1
   488  		for j < len(p.s) && p.s[j] != '`' {
   489  			if p.s[j] == '\n' {
   490  				p.lexError("newline in backquoted regexp")
   491  			}
   492  			j++
   493  		}
   494  		if j >= len(p.s) {
   495  			p.lexError("unterminated backquoted regexp")
   496  		}
   497  		p.pos = p.i
   498  		p.i = j + 1
   499  		p.tok = "`"
   500  		p.lit = p.s[p.pos+1 : j]
   501  		return
   502  	case '"':
   503  		j := p.i + 1
   504  		for j < len(p.s) && p.s[j] != '"' {
   505  			if p.s[j] == '\n' {
   506  				p.lexError("newline in quoted string")
   507  			}
   508  			if p.s[j] == '\\' {
   509  				j++
   510  			}
   511  			j++
   512  		}
   513  		if j >= len(p.s) {
   514  			p.lexError("unterminated quoted string")
   515  		}
   516  		s, err := strconv.Unquote(p.s[p.i : j+1])
   517  		if err != nil {
   518  			p.lexError("invalid quoted string: " + err.Error())
   519  		}
   520  		p.pos = p.i
   521  		p.i = j + 1
   522  		p.tok = "\""
   523  		p.lit = s
   524  		return
   525  	case '\'':
   526  		p.lexError("single-quoted strings not allowed")
   527  	}
   528  
   529  	// ascii name
   530  	if isalpha(p.s[p.i]) {
   531  		j := p.i
   532  		for j < len(p.s) && isalnum(p.s[j]) {
   533  			j++
   534  		}
   535  		p.pos = p.i
   536  		p.i = j
   537  		p.tok = "a"
   538  		p.lit = p.s[p.pos:p.i]
   539  		return
   540  	}
   541  
   542  	c, _ := utf8.DecodeRuneInString(p.s[p.i:])
   543  	p.lexError(fmt.Sprintf("invalid syntax at %q (U+%04x)", c, c))
   544  }
   545  
   546  // lexError reports a lex error with the given error text.
   547  func (p *parser) lexError(err string) {
   548  	p.errorAt(p.i, err)
   549  }
   550  
   551  // parseError reports a parse error with the given error text.
   552  // (A parse error differs from a lex error in which parser position
   553  // the error is attributed to.)
   554  func (p *parser) parseError(err string) {
   555  	p.errorAt(p.pos, err)
   556  }
   557  
   558  // errorAt reports a syntax error at the given position.
   559  func (p *parser) errorAt(pos int, err string) {
   560  	line := 1 + strings.Count(p.s[:pos], "\n")
   561  	i := pos - strings.LastIndex(p.s[:pos], "\n")
   562  	panic(&SyntaxError{File: p.file, Line: line, Offset: i, Err: err})
   563  }
   564  
   565  // isalpha reports whether c is an ASCII alphabetic or _.
   566  func isalpha(c byte) bool {
   567  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '_'
   568  }
   569  
   570  // isalnum reports whether c is an ASCII alphanumeric or _.
   571  func isalnum(c byte) bool {
   572  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_'
   573  }