github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/scrape/labels/regexp.go (about)

     1  // Copyright 2020 The Prometheus Authors
     2  // Copyright 2021 The Pyroscope Authors
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  // http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package labels
    17  
    18  import (
    19  	"regexp"
    20  	"regexp/syntax"
    21  	"strings"
    22  )
    23  
    24  type FastRegexMatcher struct {
    25  	re       *regexp.Regexp
    26  	prefix   string
    27  	suffix   string
    28  	contains string
    29  }
    30  
    31  func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
    32  	re, err := regexp.Compile("^(?:" + v + ")$")
    33  	if err != nil {
    34  		return nil, err
    35  	}
    36  
    37  	parsed, err := syntax.Parse(v, syntax.Perl)
    38  	if err != nil {
    39  		return nil, err
    40  	}
    41  
    42  	m := &FastRegexMatcher{
    43  		re: re,
    44  	}
    45  
    46  	if parsed.Op == syntax.OpConcat {
    47  		m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
    48  	}
    49  
    50  	return m, nil
    51  }
    52  
    53  func (m *FastRegexMatcher) MatchString(s string) bool {
    54  	if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
    55  		return false
    56  	}
    57  	if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
    58  		return false
    59  	}
    60  	if m.contains != "" && !strings.Contains(s, m.contains) {
    61  		return false
    62  	}
    63  	return m.re.MatchString(s)
    64  }
    65  
    66  func (m *FastRegexMatcher) GetRegexString() string {
    67  	return m.re.String()
    68  }
    69  
    70  // optimizeConcatRegex returns literal prefix/suffix text that can be safely
    71  // checked against the label value before running the regexp matcher.
    72  func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
    73  	sub := r.Sub
    74  
    75  	// We can safely remove begin and end text matchers respectively
    76  	// at the beginning and end of the regexp.
    77  	if len(sub) > 0 && sub[0].Op == syntax.OpBeginText {
    78  		sub = sub[1:]
    79  	}
    80  	if len(sub) > 0 && sub[len(sub)-1].Op == syntax.OpEndText {
    81  		sub = sub[:len(sub)-1]
    82  	}
    83  
    84  	if len(sub) == 0 {
    85  		return prefix, suffix, contains
    86  	}
    87  
    88  	// Given Prometheus regex matchers are always anchored to the begin/end
    89  	// of the text, if the first/last operations are literals, we can safely
    90  	// treat them as prefix/suffix.
    91  	if sub[0].Op == syntax.OpLiteral && (sub[0].Flags&syntax.FoldCase) == 0 {
    92  		prefix = string(sub[0].Rune)
    93  	}
    94  	if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral && (sub[last].Flags&syntax.FoldCase) == 0 {
    95  		suffix = string(sub[last].Rune)
    96  	}
    97  
    98  	// If contains any literal which is not a prefix/suffix, we keep the
    99  	// 1st one. We do not keep the whole list of literals to simplify the
   100  	// fast path.
   101  	for i := 1; i < len(sub)-1; i++ {
   102  		if sub[i].Op == syntax.OpLiteral && (sub[i].Flags&syntax.FoldCase) == 0 {
   103  			contains = string(sub[i].Rune)
   104  			break
   105  		}
   106  	}
   107  
   108  	return prefix, suffix, contains
   109  }