github.com/fufuok/utils@v1.0.10/xjson/match/match.go (about)

     1  // Package match provides a simple pattern matcher with unicode support.
     2  package match
     3  
     4  import (
     5  	"unicode/utf8"
     6  )
     7  
     8  // Match returns true if str matches pattern. This is a very
     9  // simple wildcard match where '*' matches on any number characters
    10  // and '?' matches on any one character.
    11  //
    12  // pattern:
    13  //
    14  //	{ term }
    15  //
    16  // term:
    17  //
    18  //	'*'         matches any sequence of non-Separator characters
    19  //	'?'         matches any single non-Separator character
    20  //	c           matches character c (c != '*', '?', '\\')
    21  //	'\\' c      matches character c
    22  func Match(str, pattern string) bool {
    23  	if pattern == "*" {
    24  		return true
    25  	}
    26  	return match(str, pattern, 0, nil, -1) == rMatch
    27  }
    28  
    29  // MatchLimit is the same as Match but will limit the complexity of the match
    30  // operation. This is to avoid long running matches, specifically to avoid ReDos
    31  // attacks from arbritary inputs.
    32  //
    33  // How it works:
    34  // The underlying match routine is recursive and may call itself when it
    35  // encounters a sandwiched wildcard pattern, such as: `user:*:name`.
    36  // Everytime it calls itself a counter is incremented.
    37  // The operation is stopped when counter > maxcomp*len(str).
    38  func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
    39  	if pattern == "*" {
    40  		return true, false
    41  	}
    42  	counter := 0
    43  	r := match(str, pattern, len(str), &counter, maxcomp)
    44  	if r == rStop {
    45  		return false, true
    46  	}
    47  	return r == rMatch, false
    48  }
    49  
    50  type result int
    51  
    52  const (
    53  	rNoMatch result = iota
    54  	rMatch
    55  	rStop
    56  )
    57  
    58  func match(str, pat string, slen int, counter *int, maxcomp int) result {
    59  	// check complexity limit
    60  	if maxcomp > -1 {
    61  		if *counter > slen*maxcomp {
    62  			return rStop
    63  		}
    64  		*counter++
    65  	}
    66  
    67  	for len(pat) > 0 {
    68  		var wild bool
    69  		pc, ps := rune(pat[0]), 1
    70  		if pc > 0x7f {
    71  			pc, ps = utf8.DecodeRuneInString(pat)
    72  		}
    73  		var sc rune
    74  		var ss int
    75  		if len(str) > 0 {
    76  			sc, ss = rune(str[0]), 1
    77  			if sc > 0x7f {
    78  				sc, ss = utf8.DecodeRuneInString(str)
    79  			}
    80  		}
    81  		switch pc {
    82  		case '?':
    83  			if ss == 0 {
    84  				return rNoMatch
    85  			}
    86  		case '*':
    87  			// Ignore repeating stars.
    88  			for len(pat) > 1 && pat[1] == '*' {
    89  				pat = pat[1:]
    90  			}
    91  
    92  			// If this star is the last character then it must be a match.
    93  			if len(pat) == 1 {
    94  				return rMatch
    95  			}
    96  
    97  			// Match and trim any non-wildcard suffix characters.
    98  			var ok bool
    99  			str, pat, ok = matchTrimSuffix(str, pat)
   100  			if !ok {
   101  				return rNoMatch
   102  			}
   103  
   104  			// Check for single star again.
   105  			if len(pat) == 1 {
   106  				return rMatch
   107  			}
   108  
   109  			// Perform recursive wildcard search.
   110  			r := match(str, pat[1:], slen, counter, maxcomp)
   111  			if r != rNoMatch {
   112  				return r
   113  			}
   114  			if len(str) == 0 {
   115  				return rNoMatch
   116  			}
   117  			wild = true
   118  		default:
   119  			if ss == 0 {
   120  				return rNoMatch
   121  			}
   122  			if pc == '\\' {
   123  				pat = pat[ps:]
   124  				pc, ps = utf8.DecodeRuneInString(pat)
   125  				if ps == 0 {
   126  					return rNoMatch
   127  				}
   128  			}
   129  			if sc != pc {
   130  				return rNoMatch
   131  			}
   132  		}
   133  		str = str[ss:]
   134  		if !wild {
   135  			pat = pat[ps:]
   136  		}
   137  	}
   138  	if len(str) == 0 {
   139  		return rMatch
   140  	}
   141  	return rNoMatch
   142  }
   143  
   144  // matchTrimSuffix matches and trims any non-wildcard suffix characters.
   145  // Returns the trimed string and pattern.
   146  //
   147  // This is called because the pattern contains extra data after the wildcard
   148  // star. Here we compare any suffix characters in the pattern to the suffix of
   149  // the target string. Basically a reverse match that stops when a wildcard
   150  // character is reached. This is a little trickier than a forward match because
   151  // we need to evaluate an escaped character in reverse.
   152  //
   153  // Any matched characters will be trimmed from both the target
   154  // string and the pattern.
   155  func matchTrimSuffix(str, pat string) (string, string, bool) {
   156  	// It's expected that the pattern has at least two bytes and the first byte
   157  	// is a wildcard star '*'
   158  	match := true
   159  	for len(str) > 0 && len(pat) > 1 {
   160  		pc, ps := utf8.DecodeLastRuneInString(pat)
   161  		var esc bool
   162  		for i := 0; ; i++ {
   163  			if pat[len(pat)-ps-i-1] != '\\' {
   164  				if i&1 == 1 {
   165  					esc = true
   166  					ps++
   167  				}
   168  				break
   169  			}
   170  		}
   171  		if pc == '*' && !esc {
   172  			match = true
   173  			break
   174  		}
   175  		sc, ss := utf8.DecodeLastRuneInString(str)
   176  		if !((pc == '?' && !esc) || pc == sc) {
   177  			match = false
   178  			break
   179  		}
   180  		str = str[:len(str)-ss]
   181  		pat = pat[:len(pat)-ps]
   182  	}
   183  	return str, pat, match
   184  }
   185  
   186  var maxRuneBytes = [...]byte{244, 143, 191, 191}
   187  
   188  // Allowable parses the pattern and determines the minimum and maximum allowable
   189  // values that the pattern can represent.
   190  // When the max cannot be determined, 'true' will be returned
   191  // for infinite.
   192  func Allowable(pattern string) (min, max string) {
   193  	if pattern == "" || pattern[0] == '*' {
   194  		return "", ""
   195  	}
   196  
   197  	minb := make([]byte, 0, len(pattern))
   198  	maxb := make([]byte, 0, len(pattern))
   199  	var wild bool
   200  	for i := 0; i < len(pattern); i++ {
   201  		if pattern[i] == '*' {
   202  			wild = true
   203  			break
   204  		}
   205  		if pattern[i] == '?' {
   206  			minb = append(minb, 0)
   207  			maxb = append(maxb, maxRuneBytes[:]...)
   208  		} else {
   209  			minb = append(minb, pattern[i])
   210  			maxb = append(maxb, pattern[i])
   211  		}
   212  	}
   213  	if wild {
   214  		r, n := utf8.DecodeLastRune(maxb)
   215  		if r != utf8.RuneError {
   216  			if r < utf8.MaxRune {
   217  				r++
   218  				if r > 0x7f {
   219  					b := make([]byte, 4)
   220  					nn := utf8.EncodeRune(b, r)
   221  					maxb = append(maxb[:len(maxb)-n], b[:nn]...)
   222  				} else {
   223  					maxb = append(maxb[:len(maxb)-n], byte(r))
   224  				}
   225  			}
   226  		}
   227  	}
   228  	return string(minb), string(maxb)
   229  }
   230  
   231  // IsPattern returns true if the string is a pattern.
   232  func IsPattern(str string) bool {
   233  	for i := 0; i < len(str); i++ {
   234  		if str[i] == '*' || str[i] == '?' {
   235  			return true
   236  		}
   237  	}
   238  	return false
   239  }