github.com/fufuok/utils@v1.0.10/xjson/match/match.go (about) 1 // Package match provides a simple pattern matcher with unicode support. 2 package match 3 4 import ( 5 "unicode/utf8" 6 ) 7 8 // Match returns true if str matches pattern. This is a very 9 // simple wildcard match where '*' matches on any number characters 10 // and '?' matches on any one character. 11 // 12 // pattern: 13 // 14 // { term } 15 // 16 // term: 17 // 18 // '*' matches any sequence of non-Separator characters 19 // '?' matches any single non-Separator character 20 // c matches character c (c != '*', '?', '\\') 21 // '\\' c matches character c 22 func Match(str, pattern string) bool { 23 if pattern == "*" { 24 return true 25 } 26 return match(str, pattern, 0, nil, -1) == rMatch 27 } 28 29 // MatchLimit is the same as Match but will limit the complexity of the match 30 // operation. This is to avoid long running matches, specifically to avoid ReDos 31 // attacks from arbritary inputs. 32 // 33 // How it works: 34 // The underlying match routine is recursive and may call itself when it 35 // encounters a sandwiched wildcard pattern, such as: `user:*:name`. 36 // Everytime it calls itself a counter is incremented. 37 // The operation is stopped when counter > maxcomp*len(str). 38 func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) { 39 if pattern == "*" { 40 return true, false 41 } 42 counter := 0 43 r := match(str, pattern, len(str), &counter, maxcomp) 44 if r == rStop { 45 return false, true 46 } 47 return r == rMatch, false 48 } 49 50 type result int 51 52 const ( 53 rNoMatch result = iota 54 rMatch 55 rStop 56 ) 57 58 func match(str, pat string, slen int, counter *int, maxcomp int) result { 59 // check complexity limit 60 if maxcomp > -1 { 61 if *counter > slen*maxcomp { 62 return rStop 63 } 64 *counter++ 65 } 66 67 for len(pat) > 0 { 68 var wild bool 69 pc, ps := rune(pat[0]), 1 70 if pc > 0x7f { 71 pc, ps = utf8.DecodeRuneInString(pat) 72 } 73 var sc rune 74 var ss int 75 if len(str) > 0 { 76 sc, ss = rune(str[0]), 1 77 if sc > 0x7f { 78 sc, ss = utf8.DecodeRuneInString(str) 79 } 80 } 81 switch pc { 82 case '?': 83 if ss == 0 { 84 return rNoMatch 85 } 86 case '*': 87 // Ignore repeating stars. 88 for len(pat) > 1 && pat[1] == '*' { 89 pat = pat[1:] 90 } 91 92 // If this star is the last character then it must be a match. 93 if len(pat) == 1 { 94 return rMatch 95 } 96 97 // Match and trim any non-wildcard suffix characters. 98 var ok bool 99 str, pat, ok = matchTrimSuffix(str, pat) 100 if !ok { 101 return rNoMatch 102 } 103 104 // Check for single star again. 105 if len(pat) == 1 { 106 return rMatch 107 } 108 109 // Perform recursive wildcard search. 110 r := match(str, pat[1:], slen, counter, maxcomp) 111 if r != rNoMatch { 112 return r 113 } 114 if len(str) == 0 { 115 return rNoMatch 116 } 117 wild = true 118 default: 119 if ss == 0 { 120 return rNoMatch 121 } 122 if pc == '\\' { 123 pat = pat[ps:] 124 pc, ps = utf8.DecodeRuneInString(pat) 125 if ps == 0 { 126 return rNoMatch 127 } 128 } 129 if sc != pc { 130 return rNoMatch 131 } 132 } 133 str = str[ss:] 134 if !wild { 135 pat = pat[ps:] 136 } 137 } 138 if len(str) == 0 { 139 return rMatch 140 } 141 return rNoMatch 142 } 143 144 // matchTrimSuffix matches and trims any non-wildcard suffix characters. 145 // Returns the trimed string and pattern. 146 // 147 // This is called because the pattern contains extra data after the wildcard 148 // star. Here we compare any suffix characters in the pattern to the suffix of 149 // the target string. Basically a reverse match that stops when a wildcard 150 // character is reached. This is a little trickier than a forward match because 151 // we need to evaluate an escaped character in reverse. 152 // 153 // Any matched characters will be trimmed from both the target 154 // string and the pattern. 155 func matchTrimSuffix(str, pat string) (string, string, bool) { 156 // It's expected that the pattern has at least two bytes and the first byte 157 // is a wildcard star '*' 158 match := true 159 for len(str) > 0 && len(pat) > 1 { 160 pc, ps := utf8.DecodeLastRuneInString(pat) 161 var esc bool 162 for i := 0; ; i++ { 163 if pat[len(pat)-ps-i-1] != '\\' { 164 if i&1 == 1 { 165 esc = true 166 ps++ 167 } 168 break 169 } 170 } 171 if pc == '*' && !esc { 172 match = true 173 break 174 } 175 sc, ss := utf8.DecodeLastRuneInString(str) 176 if !((pc == '?' && !esc) || pc == sc) { 177 match = false 178 break 179 } 180 str = str[:len(str)-ss] 181 pat = pat[:len(pat)-ps] 182 } 183 return str, pat, match 184 } 185 186 var maxRuneBytes = [...]byte{244, 143, 191, 191} 187 188 // Allowable parses the pattern and determines the minimum and maximum allowable 189 // values that the pattern can represent. 190 // When the max cannot be determined, 'true' will be returned 191 // for infinite. 192 func Allowable(pattern string) (min, max string) { 193 if pattern == "" || pattern[0] == '*' { 194 return "", "" 195 } 196 197 minb := make([]byte, 0, len(pattern)) 198 maxb := make([]byte, 0, len(pattern)) 199 var wild bool 200 for i := 0; i < len(pattern); i++ { 201 if pattern[i] == '*' { 202 wild = true 203 break 204 } 205 if pattern[i] == '?' { 206 minb = append(minb, 0) 207 maxb = append(maxb, maxRuneBytes[:]...) 208 } else { 209 minb = append(minb, pattern[i]) 210 maxb = append(maxb, pattern[i]) 211 } 212 } 213 if wild { 214 r, n := utf8.DecodeLastRune(maxb) 215 if r != utf8.RuneError { 216 if r < utf8.MaxRune { 217 r++ 218 if r > 0x7f { 219 b := make([]byte, 4) 220 nn := utf8.EncodeRune(b, r) 221 maxb = append(maxb[:len(maxb)-n], b[:nn]...) 222 } else { 223 maxb = append(maxb[:len(maxb)-n], byte(r)) 224 } 225 } 226 } 227 } 228 return string(minb), string(maxb) 229 } 230 231 // IsPattern returns true if the string is a pattern. 232 func IsPattern(str string) bool { 233 for i := 0; i < len(str); i++ { 234 if str[i] == '*' || str[i] == '?' { 235 return true 236 } 237 } 238 return false 239 }