go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/text/sequence/pattern.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package sequence implements matching for sequences of strings.
    16  //
    17  // The primary way to use this package is by first making a Pattern:
    18  //
    19  //	pat := NewPattern("^", "/(cat|bear)/", "...", "hello")
    20  //
    21  // Then you can use `pat` to match sequences of strings like:
    22  //
    23  //	pat.In("cat", "says", "hello", "friend") // true
    24  //	pat.In("bear", "hello")                  // true
    25  //	pat.In("dog", "hello")                   // false
    26  //	pat.In("extra", "cat", "hello")          // false
    27  //
    28  // See NewPattern for the types of tokens supported.
    29  //
    30  // You can also manually assemble a Pattern from Matchers (including the special
    31  // Ellipsis and Edge Matchers in this package), but it's anticipated that this
    32  // will be overly verbose for tests (where we expect this package will see the
    33  // most use).
    34  package sequence
    35  
    36  import (
    37  	"regexp"
    38  	"strings"
    39  
    40  	"go.chromium.org/luci/common/errors"
    41  )
    42  
    43  // Pattern is a group of Matchers which can be matched against a string
    44  // sequence.
    45  type Pattern []Matcher
    46  
    47  // NewPattern returns a Pattern from a series of tokens.
    48  //
    49  // Tokens can be:
    50  //   - "/a regex/" - A regular expression surrounded by slashes.
    51  //   - "..." - An Ellipsis which matches any number of sequence entries.
    52  //   - "^" at index 0 - Zero-width matches at the beginning of the sequence.
    53  //   - "$" at index -1 - Zero-width matches at the end of the sequence.
    54  //   - "=string" - Literally match anything after the "=". Allows escaping
    55  //     special strings, e.g. "=/regex/", "=...", "=^", "=$", "==something".
    56  //   - "any other string" - Literally match without escaping.
    57  func NewPattern(patternTokens ...string) (Pattern, error) {
    58  	if len(patternTokens) == 0 {
    59  		return nil, nil
    60  	}
    61  	ret := make(Pattern, len(patternTokens))
    62  
    63  	prevEllipsis := false
    64  	for i, p := range patternTokens {
    65  		if strings.HasPrefix(p, "=") {
    66  			ret[i] = LiteralMatcher(p[1:])
    67  		} else if strings.HasPrefix(p, "/") && strings.HasSuffix(p, "/") {
    68  			pat, err := regexp.Compile(p[1 : len(p)-1])
    69  			if err != nil {
    70  				return nil, errors.Annotate(err, "invalid regexp (i=%d)", i).Err()
    71  			}
    72  			ret[i] = RegexpMatcher{pat}
    73  		} else if p == "..." {
    74  			ret[i] = Ellipsis
    75  		} else if p == "^" {
    76  			if i != 0 {
    77  				return nil, errors.Reason("cannot use `^` for Edge except at beginning (i=%d)", i).Err()
    78  			}
    79  			ret[i] = Edge
    80  		} else if p == "$" {
    81  			if i != len(patternTokens)-1 {
    82  				return nil, errors.Reason("cannot use `$` for Edge except at end (i=%d)", i).Err()
    83  			}
    84  			ret[i] = Edge
    85  		} else {
    86  			ret[i] = LiteralMatcher(p)
    87  		}
    88  		if ret[i] == Ellipsis {
    89  			if prevEllipsis {
    90  				return nil, errors.Reason("cannot have multiple Ellipsis in a row (i=%d)", i).Err()
    91  			}
    92  			prevEllipsis = true
    93  		} else {
    94  			prevEllipsis = false
    95  		}
    96  	}
    97  
    98  	return ret, nil
    99  }