github.com/zhangdapeng520/zdpgo_json@v0.1.5/match/match.go (about)

     1  // Package match provides a simple pattern matcher with unicode support.
     2  package match
     3  
     4  import (
     5  	"unicode/utf8"
     6  )
     7  
     8  // Match returns true if str matches pattern. This is a very
     9  // simple wildcard match where '*' matches on any number characters
    10  // and '?' matches on any one character.
    11  //
    12  // pattern:
    13  // 	{ term }
    14  // term:
    15  // 	'*'         matches any sequence of non-Separator characters
    16  // 	'?'         matches any single non-Separator character
    17  // 	c           matches character c (c != '*', '?', '\\')
    18  // 	'\\' c      matches character c
    19  //
    20  func Match(str, pattern string) bool {
    21  	if pattern == "*" {
    22  		return true
    23  	}
    24  	return match(str, pattern, 0, nil, -1) == rMatch
    25  }
    26  
    27  // MatchLimit is the same as Match but will limit the complexity of the match
    28  // operation. This is to avoid long running matches, specifically to avoid ReDos
    29  // attacks from arbritary inputs.
    30  //
    31  // How it works:
    32  // The underlying match routine is recursive and may call itself when it
    33  // encounters a sandwiched wildcard pattern, such as: `user:*:name`.
    34  // Everytime it calls itself a counter is incremented.
    35  // The operation is stopped when counter > maxcomp*len(str).
    36  func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
    37  	if pattern == "*" {
    38  		return true, false
    39  	}
    40  	counter := 0
    41  	r := match(str, pattern, len(str), &counter, maxcomp)
    42  	if r == rStop {
    43  		return false, true
    44  	}
    45  	return r == rMatch, false
    46  }
    47  
    48  type result int
    49  
    50  const (
    51  	rNoMatch result = iota
    52  	rMatch
    53  	rStop
    54  )
    55  
    56  func match(str, pat string, slen int, counter *int, maxcomp int) result {
    57  	// check complexity limit
    58  	if maxcomp > -1 {
    59  		if *counter > slen*maxcomp {
    60  			return rStop
    61  		}
    62  		*counter++
    63  	}
    64  
    65  	for len(pat) > 0 {
    66  		var wild bool
    67  		pc, ps := rune(pat[0]), 1
    68  		if pc > 0x7f {
    69  			pc, ps = utf8.DecodeRuneInString(pat)
    70  		}
    71  		var sc rune
    72  		var ss int
    73  		if len(str) > 0 {
    74  			sc, ss = rune(str[0]), 1
    75  			if sc > 0x7f {
    76  				sc, ss = utf8.DecodeRuneInString(str)
    77  			}
    78  		}
    79  		switch pc {
    80  		case '?':
    81  			if ss == 0 {
    82  				return rNoMatch
    83  			}
    84  		case '*':
    85  			// Ignore repeating stars.
    86  			for len(pat) > 1 && pat[1] == '*' {
    87  				pat = pat[1:]
    88  			}
    89  
    90  			// If this star is the last character then it must be a match.
    91  			if len(pat) == 1 {
    92  				return rMatch
    93  			}
    94  
    95  			// Match and trim any non-wildcard suffix characters.
    96  			var ok bool
    97  			str, pat, ok = matchTrimSuffix(str, pat)
    98  			if !ok {
    99  				return rNoMatch
   100  			}
   101  
   102  			// Check for single star again.
   103  			if len(pat) == 1 {
   104  				return rMatch
   105  			}
   106  
   107  			// Perform recursive wildcard search.
   108  			r := match(str, pat[1:], slen, counter, maxcomp)
   109  			if r != rNoMatch {
   110  				return r
   111  			}
   112  			if len(str) == 0 {
   113  				return rNoMatch
   114  			}
   115  			wild = true
   116  		default:
   117  			if ss == 0 {
   118  				return rNoMatch
   119  			}
   120  			if pc == '\\' {
   121  				pat = pat[ps:]
   122  				pc, ps = utf8.DecodeRuneInString(pat)
   123  				if ps == 0 {
   124  					return rNoMatch
   125  				}
   126  			}
   127  			if sc != pc {
   128  				return rNoMatch
   129  			}
   130  		}
   131  		str = str[ss:]
   132  		if !wild {
   133  			pat = pat[ps:]
   134  		}
   135  	}
   136  	if len(str) == 0 {
   137  		return rMatch
   138  	}
   139  	return rNoMatch
   140  }
   141  
   142  // matchTrimSuffix matches and trims any non-wildcard suffix characters.
   143  // Returns the trimed string and pattern.
   144  //
   145  // This is called because the pattern contains extra data after the wildcard
   146  // star. Here we compare any suffix characters in the pattern to the suffix of
   147  // the target string. Basically a reverse match that stops when a wildcard
   148  // character is reached. This is a little trickier than a forward match because
   149  // we need to evaluate an escaped character in reverse.
   150  //
   151  // Any matched characters will be trimmed from both the target
   152  // string and the pattern.
   153  func matchTrimSuffix(str, pat string) (string, string, bool) {
   154  	// It's expected that the pattern has at least two bytes and the first byte
   155  	// is a wildcard star '*'
   156  	match := true
   157  	for len(str) > 0 && len(pat) > 1 {
   158  		pc, ps := utf8.DecodeLastRuneInString(pat)
   159  		var esc bool
   160  		for i := 0; ; i++ {
   161  			if pat[len(pat)-ps-i-1] != '\\' {
   162  				if i&1 == 1 {
   163  					esc = true
   164  					ps++
   165  				}
   166  				break
   167  			}
   168  		}
   169  		if pc == '*' && !esc {
   170  			match = true
   171  			break
   172  		}
   173  		sc, ss := utf8.DecodeLastRuneInString(str)
   174  		if !((pc == '?' && !esc) || pc == sc) {
   175  			match = false
   176  			break
   177  		}
   178  		str = str[:len(str)-ss]
   179  		pat = pat[:len(pat)-ps]
   180  	}
   181  	return str, pat, match
   182  }
   183  
   184  var maxRuneBytes = [...]byte{244, 143, 191, 191}
   185  
   186  // Allowable parses the pattern and determines the minimum and maximum allowable
   187  // values that the pattern can represent.
   188  // When the max cannot be determined, 'true' will be returned
   189  // for infinite.
   190  func Allowable(pattern string) (min, max string) {
   191  	if pattern == "" || pattern[0] == '*' {
   192  		return "", ""
   193  	}
   194  
   195  	minb := make([]byte, 0, len(pattern))
   196  	maxb := make([]byte, 0, len(pattern))
   197  	var wild bool
   198  	for i := 0; i < len(pattern); i++ {
   199  		if pattern[i] == '*' {
   200  			wild = true
   201  			break
   202  		}
   203  		if pattern[i] == '?' {
   204  			minb = append(minb, 0)
   205  			maxb = append(maxb, maxRuneBytes[:]...)
   206  		} else {
   207  			minb = append(minb, pattern[i])
   208  			maxb = append(maxb, pattern[i])
   209  		}
   210  	}
   211  	if wild {
   212  		r, n := utf8.DecodeLastRune(maxb)
   213  		if r != utf8.RuneError {
   214  			if r < utf8.MaxRune {
   215  				r++
   216  				if r > 0x7f {
   217  					b := make([]byte, 4)
   218  					nn := utf8.EncodeRune(b, r)
   219  					maxb = append(maxb[:len(maxb)-n], b[:nn]...)
   220  				} else {
   221  					maxb = append(maxb[:len(maxb)-n], byte(r))
   222  				}
   223  			}
   224  		}
   225  	}
   226  	return string(minb), string(maxb)
   227  }
   228  
   229  // IsPattern returns true if the string is a pattern.
   230  func IsPattern(str string) bool {
   231  	for i := 0; i < len(str); i++ {
   232  		if str[i] == '*' || str[i] == '?' {
   233  			return true
   234  		}
   235  	}
   236  	return false
   237  }