github.com/m3db/m3@v1.5.0/src/m3ninx/index/segment/fst/regexp/regexp.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  // Adapted from: https://raw.githubusercontent.com/blevesearch/bleve/master/index/scorch/segment/regexp.go
    22  
    23  package regexp
    24  
    25  import (
    26  	"regexp/syntax"
    27  
    28  	vregexp "github.com/m3dbx/vellum/regexp"
    29  )
    30  
    31  // ParseRegexp parses the provided regexp pattern into an equivalent matching automaton, and
    32  // corresponding keys to bound prefix beginning and end during the FST search.
    33  func ParseRegexp(pattern string) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
    34  	parsed, err := syntax.Parse(pattern, syntax.Perl)
    35  	if err != nil {
    36  		return nil, nil, nil, err
    37  	}
    38  	return ParsedRegexp(pattern, parsed)
    39  }
    40  
    41  // ParsedRegexp uses the pre-parsed regexp pattern and creates an equivalent matching automaton, and
    42  // corresponding keys to bound prefix beginning and end during the FST search.
    43  func ParsedRegexp(pattern string, parsed *syntax.Regexp) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
    44  	re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit())
    45  	if err != nil {
    46  		return nil, nil, nil, err
    47  	}
    48  
    49  	prefix := LiteralPrefix(parsed)
    50  	if prefix != "" {
    51  		prefixBeg := []byte(prefix)
    52  		prefixEnd := IncrementBytes(prefixBeg)
    53  		return re, prefixBeg, prefixEnd, nil
    54  	}
    55  
    56  	return re, nil, nil, nil
    57  }
    58  
    59  // LiteralPrefix returns the literal prefix given the parse tree for a regexp
    60  func LiteralPrefix(s *syntax.Regexp) string {
    61  	// traverse the left-most branch in the parse tree as long as the
    62  	// node represents a concatenation
    63  	for s != nil && s.Op == syntax.OpConcat {
    64  		if len(s.Sub) < 1 {
    65  			return ""
    66  		}
    67  
    68  		s = s.Sub[0]
    69  	}
    70  
    71  	if s.Op == syntax.OpLiteral {
    72  		return string(s.Rune)
    73  	}
    74  
    75  	return "" // no literal prefix
    76  }
    77  
    78  // IncrementBytes increments the provided bytes to the next word boundary.
    79  func IncrementBytes(in []byte) []byte {
    80  	rv := make([]byte, len(in))
    81  	copy(rv, in)
    82  	for i := len(rv) - 1; i >= 0; i-- {
    83  		rv[i] = rv[i] + 1
    84  		if rv[i] != 0 {
    85  			return rv // didn't overflow, so stop
    86  		}
    87  	}
    88  	return nil // overflowed
    89  }