github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/scrape/labels/regexp.go (about) 1 // Copyright 2020 The Prometheus Authors 2 // Copyright 2021 The Pyroscope Authors 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package labels 17 18 import ( 19 "regexp" 20 "regexp/syntax" 21 "strings" 22 ) 23 24 type FastRegexMatcher struct { 25 re *regexp.Regexp 26 prefix string 27 suffix string 28 contains string 29 } 30 31 func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { 32 re, err := regexp.Compile("^(?:" + v + ")$") 33 if err != nil { 34 return nil, err 35 } 36 37 parsed, err := syntax.Parse(v, syntax.Perl) 38 if err != nil { 39 return nil, err 40 } 41 42 m := &FastRegexMatcher{ 43 re: re, 44 } 45 46 if parsed.Op == syntax.OpConcat { 47 m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed) 48 } 49 50 return m, nil 51 } 52 53 func (m *FastRegexMatcher) MatchString(s string) bool { 54 if m.prefix != "" && !strings.HasPrefix(s, m.prefix) { 55 return false 56 } 57 if m.suffix != "" && !strings.HasSuffix(s, m.suffix) { 58 return false 59 } 60 if m.contains != "" && !strings.Contains(s, m.contains) { 61 return false 62 } 63 return m.re.MatchString(s) 64 } 65 66 func (m *FastRegexMatcher) GetRegexString() string { 67 return m.re.String() 68 } 69 70 // optimizeConcatRegex returns literal prefix/suffix text that can be safely 71 // checked against the label value before running the regexp matcher. 72 func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) { 73 sub := r.Sub 74 75 // We can safely remove begin and end text matchers respectively 76 // at the beginning and end of the regexp. 77 if len(sub) > 0 && sub[0].Op == syntax.OpBeginText { 78 sub = sub[1:] 79 } 80 if len(sub) > 0 && sub[len(sub)-1].Op == syntax.OpEndText { 81 sub = sub[:len(sub)-1] 82 } 83 84 if len(sub) == 0 { 85 return prefix, suffix, contains 86 } 87 88 // Given Prometheus regex matchers are always anchored to the begin/end 89 // of the text, if the first/last operations are literals, we can safely 90 // treat them as prefix/suffix. 91 if sub[0].Op == syntax.OpLiteral && (sub[0].Flags&syntax.FoldCase) == 0 { 92 prefix = string(sub[0].Rune) 93 } 94 if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral && (sub[last].Flags&syntax.FoldCase) == 0 { 95 suffix = string(sub[last].Rune) 96 } 97 98 // If contains any literal which is not a prefix/suffix, we keep the 99 // 1st one. We do not keep the whole list of literals to simplify the 100 // fast path. 101 for i := 1; i < len(sub)-1; i++ { 102 if sub[i].Op == syntax.OpLiteral && (sub[i].Flags&syntax.FoldCase) == 0 { 103 contains = string(sub[i].Rune) 104 break 105 } 106 } 107 108 return prefix, suffix, contains 109 }