github.com/m3db/m3@v1.5.0/src/m3ninx/index/segment/fst/regexp/regexp.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 // Adapted from: https://raw.githubusercontent.com/blevesearch/bleve/master/index/scorch/segment/regexp.go 22 23 package regexp 24 25 import ( 26 "regexp/syntax" 27 28 vregexp "github.com/m3dbx/vellum/regexp" 29 ) 30 31 // ParseRegexp parses the provided regexp pattern into an equivalent matching automaton, and 32 // corresponding keys to bound prefix beginning and end during the FST search. 33 func ParseRegexp(pattern string) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte, err error) { 34 parsed, err := syntax.Parse(pattern, syntax.Perl) 35 if err != nil { 36 return nil, nil, nil, err 37 } 38 return ParsedRegexp(pattern, parsed) 39 } 40 41 // ParsedRegexp uses the pre-parsed regexp pattern and creates an equivalent matching automaton, and 42 // corresponding keys to bound prefix beginning and end during the FST search. 43 func ParsedRegexp(pattern string, parsed *syntax.Regexp) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte, err error) { 44 re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit()) 45 if err != nil { 46 return nil, nil, nil, err 47 } 48 49 prefix := LiteralPrefix(parsed) 50 if prefix != "" { 51 prefixBeg := []byte(prefix) 52 prefixEnd := IncrementBytes(prefixBeg) 53 return re, prefixBeg, prefixEnd, nil 54 } 55 56 return re, nil, nil, nil 57 } 58 59 // LiteralPrefix returns the literal prefix given the parse tree for a regexp 60 func LiteralPrefix(s *syntax.Regexp) string { 61 // traverse the left-most branch in the parse tree as long as the 62 // node represents a concatenation 63 for s != nil && s.Op == syntax.OpConcat { 64 if len(s.Sub) < 1 { 65 return "" 66 } 67 68 s = s.Sub[0] 69 } 70 71 if s.Op == syntax.OpLiteral { 72 return string(s.Rune) 73 } 74 75 return "" // no literal prefix 76 } 77 78 // IncrementBytes increments the provided bytes to the next word boundary. 79 func IncrementBytes(in []byte) []byte { 80 rv := make([]byte, len(in)) 81 copy(rv, in) 82 for i := len(rv) - 1; i >= 0; i-- { 83 rv[i] = rv[i] + 1 84 if rv[i] != 0 { 85 return rv // didn't overflow, so stop 86 } 87 } 88 return nil // overflowed 89 }