go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/text/sequence/in.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sequence
    16  
    17  import (
    18  	"go.chromium.org/luci/common/errors"
    19  )
    20  
    21  // In checks this pattern against the given sequence.
    22  //
    23  // If this Pattern is malformed, panics; If you created Pattern with NewPattern
    24  // and it didn't return an error, the Pattern is not malformed and this method
    25  // will not panic.
    26  //
    27  // By default In matches the pattern sequence anywhere; You can constrain
    28  // this by setting Edge at the start or end of the pattern.
    29  //
    30  // Examples:
    31  //
    32  //	// true
    33  //	NewPattern("foo").In("foo", "bar", "baz")
    34  //	NewPattern("/a/").In("foo", "bar", "baz")
    35  //	NewPattern("foo", "bar").In("foo", "bar", "baz")
    36  //	NewPattern("/o$/", "bar", "/^b/").In("foo", "bar", "baz")
    37  //	NewPattern("foo", "...", "bar").In("foo", "a", "b", "bar")
    38  //	NewPattern("foo", "...", "bar").In("foo", "bar")
    39  //	NewPattern("^", "bar", "baz").In("bar", "baz", "foo")
    40  //
    41  //	// false
    42  //	NewPattern("^", "bar", "baz").In("foo", "bar", "baz")
    43  func (p Pattern) In(seq ...string) bool {
    44  	if len(p) == 0 {
    45  		return true
    46  	}
    47  
    48  	// Ellipsis and Edge have a minimum width of 0, everything else has a match
    49  	// size of 1 slot. Do a pass over matchers to calculate the number of slots
    50  	// required to match `p[i:]`.
    51  	minSlotsCount := 0
    52  	minSlots := make([]int, len(p))
    53  	prevEllipsis := false
    54  	for i, matcher := range p {
    55  		isEdge := matcher == Edge
    56  		isEllipsis := matcher == Ellipsis
    57  		if isEdge && i > 0 && i < len(p)-1 {
    58  			panic(errors.Reason("cannot have Edge in the middle of a Pattern (i=%d)", i).Err())
    59  		}
    60  		if isEllipsis {
    61  			if prevEllipsis {
    62  				panic(errors.Reason("cannot have multiple Ellipsis in a row (i=%d)", i).Err())
    63  			}
    64  			prevEllipsis = true
    65  		} else {
    66  			prevEllipsis = false
    67  		}
    68  		if !isEllipsis && !isEdge {
    69  			minSlotsCount++
    70  		}
    71  		minSlots[len(minSlots)-1-i] = minSlotsCount
    72  	}
    73  	// If p looked like ['a', 'b', ..., 'c'], minSlots now looks like:
    74  	// [3, 2, 1, 1]
    75  
    76  	var cachedMatchesSeq func(pOffset, seqOffset int) bool
    77  
    78  	matchesPattern := func(pOffset, seqOffset int) bool {
    79  		maxSlot := len(seq) - minSlots[pOffset]
    80  		if maxSlot < seqOffset {
    81  			return false
    82  		}
    83  
    84  		for seqIdx := seqOffset; seqIdx < maxSlot+1; seqIdx++ {
    85  			numMatched := 0
    86  			matches := true
    87  			for matcherIdx := pOffset; matcherIdx < len(p); matcherIdx++ {
    88  				matcher := p[matcherIdx]
    89  
    90  				if matcher == Edge {
    91  					// edge is a 0-width match if the current sequence item is the start
    92  					// or end of the sequence.
    93  					//
    94  					// Note that we compare against len(seq) rather than len(seq)-1,
    95  					// because a 1-length sequence would, in fact, have seqIdx==0 and
    96  					// numMatched==1.
    97  					if (seqIdx+numMatched) == 0 || (seqIdx+numMatched) == len(seq) {
    98  						continue
    99  					}
   100  
   101  					// If the edge doesn't match, there is no use in trying to match it at
   102  					// other positions.
   103  					return false
   104  				}
   105  
   106  				// If this is Ellipsis we consume it, and try matching the rest of the
   107  				// matchers against the rest of the sequence at every offset.
   108  				if matcher == Ellipsis {
   109  					for startIdx := seqOffset + numMatched; startIdx < len(seq)-minSlots[matcherIdx+1]; startIdx++ {
   110  						if cachedMatchesSeq(matcherIdx+1, startIdx) {
   111  							return true
   112  						}
   113  					}
   114  					return false
   115  				}
   116  
   117  				if !matcher.Matches(seq[seqIdx+numMatched]) {
   118  					matches = false
   119  					break
   120  				}
   121  				numMatched++
   122  			}
   123  			if matches {
   124  				return true
   125  			}
   126  		}
   127  		return false
   128  	}
   129  
   130  	// Since we have Ellipsis which can match any number of positions, including
   131  	// zero, we memoize _matches_seq to avoid doing duplicate checks. This caps
   132  	// the runtime of this matcher at O(len(matchers) * len(seq)); Otherwise this
   133  	// would be quadratic on seq.
   134  	type cacheKey struct {
   135  		pOffset   int
   136  		seqOffset int
   137  	}
   138  	cache := map[cacheKey]bool{}
   139  
   140  	cachedMatchesSeq = func(pOffset, seqOffset int) bool {
   141  		key := cacheKey{pOffset, seqOffset}
   142  		if ret, ok := cache[key]; ok {
   143  			return ret
   144  		}
   145  		ret := matchesPattern(pOffset, seqOffset)
   146  		cache[key] = ret
   147  		return ret
   148  	}
   149  
   150  	return cachedMatchesSeq(0, 0)
   151  }