go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/text/sequence/pattern.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package sequence implements matching for sequences of strings. 16 // 17 // The primary way to use this package is by first making a Pattern: 18 // 19 // pat := NewPattern("^", "/(cat|bear)/", "...", "hello") 20 // 21 // Then you can use `pat` to match sequences of strings like: 22 // 23 // pat.In("cat", "says", "hello", "friend") // true 24 // pat.In("bear", "hello") // true 25 // pat.In("dog", "hello") // false 26 // pat.In("extra", "cat", "hello") // false 27 // 28 // See NewPattern for the types of tokens supported. 29 // 30 // You can also manually assemble a Pattern from Matchers (including the special 31 // Ellipsis and Edge Matchers in this package), but it's anticipated that this 32 // will be overly verbose for tests (where we expect this package will see the 33 // most use). 34 package sequence 35 36 import ( 37 "regexp" 38 "strings" 39 40 "go.chromium.org/luci/common/errors" 41 ) 42 43 // Pattern is a group of Matchers which can be matched against a string 44 // sequence. 45 type Pattern []Matcher 46 47 // NewPattern returns a Pattern from a series of tokens. 48 // 49 // Tokens can be: 50 // - "/a regex/" - A regular expression surrounded by slashes. 51 // - "..." - An Ellipsis which matches any number of sequence entries. 52 // - "^" at index 0 - Zero-width matches at the beginning of the sequence. 53 // - "$" at index -1 - Zero-width matches at the end of the sequence. 54 // - "=string" - Literally match anything after the "=". Allows escaping 55 // special strings, e.g. "=/regex/", "=...", "=^", "=$", "==something". 56 // - "any other string" - Literally match without escaping. 57 func NewPattern(patternTokens ...string) (Pattern, error) { 58 if len(patternTokens) == 0 { 59 return nil, nil 60 } 61 ret := make(Pattern, len(patternTokens)) 62 63 prevEllipsis := false 64 for i, p := range patternTokens { 65 if strings.HasPrefix(p, "=") { 66 ret[i] = LiteralMatcher(p[1:]) 67 } else if strings.HasPrefix(p, "/") && strings.HasSuffix(p, "/") { 68 pat, err := regexp.Compile(p[1 : len(p)-1]) 69 if err != nil { 70 return nil, errors.Annotate(err, "invalid regexp (i=%d)", i).Err() 71 } 72 ret[i] = RegexpMatcher{pat} 73 } else if p == "..." { 74 ret[i] = Ellipsis 75 } else if p == "^" { 76 if i != 0 { 77 return nil, errors.Reason("cannot use `^` for Edge except at beginning (i=%d)", i).Err() 78 } 79 ret[i] = Edge 80 } else if p == "$" { 81 if i != len(patternTokens)-1 { 82 return nil, errors.Reason("cannot use `$` for Edge except at end (i=%d)", i).Err() 83 } 84 ret[i] = Edge 85 } else { 86 ret[i] = LiteralMatcher(p) 87 } 88 if ret[i] == Ellipsis { 89 if prevEllipsis { 90 return nil, errors.Reason("cannot have multiple Ellipsis in a row (i=%d)", i).Err() 91 } 92 prevEllipsis = true 93 } else { 94 prevEllipsis = false 95 } 96 } 97 98 return ret, nil 99 }