github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/regexp/empty_matcher.go (about)

     1  // Copyright (c) 2023  Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  // Package regexp contains regexp processing related utilities.
    22  package regexp
    23  
    24  import (
    25  	"regexp"
    26  	"regexp/syntax"
    27  
    28  	"github.com/m3db/m3/src/m3ninx/index"
    29  )
    30  
    31  // MatchesEmptyValue returns true if the given regexp would match an empty value.
    32  func MatchesEmptyValue(expr []byte) (bool, error) {
    33  	parsed, err := syntax.Parse(string(expr), syntax.Perl)
    34  	if err != nil {
    35  		return false, err //nolint:propagate_error
    36  	}
    37  
    38  	switch matchesEmptyValueAnalytically(parsed) {
    39  	case yes:
    40  		return true, nil
    41  	case no:
    42  		return false, nil
    43  	default: // unknown - only now we resort to compilation and actual attempt to match the regexp
    44  		return matchesEmptyValueEmpirically(parsed)
    45  	}
    46  }
    47  
    48  func matchesEmptyValueAnalytically(r *syntax.Regexp) threeValuedLogic {
    49  	switch r.Op {
    50  	case syntax.OpEmptyMatch:
    51  		return yes
    52  
    53  	case syntax.OpLiteral:
    54  		if len(r.Rune) == 0 {
    55  			return yes
    56  		}
    57  		return no
    58  
    59  	case syntax.OpCharClass:
    60  		return no
    61  
    62  	case syntax.OpStar:
    63  		return yes
    64  
    65  	case syntax.OpCapture, syntax.OpPlus:
    66  		return matchesEmptyValueAnalytically(r.Sub[0])
    67  
    68  	case syntax.OpConcat:
    69  		var res = yes
    70  		for _, s := range r.Sub {
    71  			if m := matchesEmptyValueAnalytically(s); m == no {
    72  				return no
    73  			} else if m == unknown {
    74  				res = unknown
    75  			}
    76  		}
    77  		return res
    78  
    79  	case syntax.OpAlternate:
    80  		var res = no
    81  		for _, s := range r.Sub {
    82  			if m := matchesEmptyValueAnalytically(s); m == yes {
    83  				return yes
    84  			} else if m == unknown {
    85  				res = unknown
    86  			}
    87  		}
    88  		return res
    89  
    90  	default:
    91  		// If we even hit this case then we should fall back to
    92  		// compiling and running the regexp against an empty string.
    93  		return unknown
    94  	}
    95  }
    96  
    97  // matchesEmptyValueEmpirically follows the logic of index.CompileRegex(expr).
    98  func matchesEmptyValueEmpirically(r *syntax.Regexp) (bool, error) {
    99  	unanchored, err := index.EnsureRegexpUnanchored(r)
   100  	if err != nil {
   101  		return false, err //nolint:propagate_error
   102  	}
   103  
   104  	anchored := index.EnsureRegexpAnchored(unanchored)
   105  
   106  	return regexp.Match(anchored.String(), nil)
   107  }
   108  
   109  type threeValuedLogic uint8
   110  
   111  const (
   112  	no threeValuedLogic = iota
   113  	yes
   114  	unknown
   115  )