github.com/AndrewDeryabin/doublestar/v4@v4.0.0-20230123132908-d9476b7d41be/match.go (about) 1 package doublestar 2 3 import ( 4 "path/filepath" 5 "unicode/utf8" 6 ) 7 8 // Match reports whether name matches the shell pattern. 9 // The pattern syntax is: 10 // 11 // pattern: 12 // { term } 13 // term: 14 // '*' matches any sequence of non-path-separators 15 // '/**/' matches zero or more directories 16 // '?' matches any single non-path-separator character 17 // '[' [ '^' '!' ] { character-range } ']' 18 // character class (must be non-empty) 19 // starting with `^` or `!` negates the class 20 // '{' { term } [ ',' { term } ... ] '}' 21 // alternatives 22 // c matches character c (c != '*', '?', '\\', '[') 23 // '\\' c matches character c 24 // 25 // character-range: 26 // c matches character c (c != '\\', '-', ']') 27 // '\\' c matches character c 28 // lo '-' hi matches character c for lo <= c <= hi 29 // 30 // Match returns true if `name` matches the file name `pattern`. `name` and 31 // `pattern` are split on forward slash (`/`) characters and may be relative or 32 // absolute. 33 // 34 // Match requires pattern to match all of name, not just a substring. 35 // The only possible returned error is ErrBadPattern, when pattern 36 // is malformed. 37 // 38 // A doublestar (`**`) should appear surrounded by path separators such as 39 // `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar 40 // option: a pattern such as `path/to/**.txt` would return the same results as 41 // `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`. 42 // 43 // Note: this is meant as a drop-in replacement for path.Match() which 44 // always uses '/' as the path separator. If you want to support systems 45 // which use a different path separator (such as Windows), what you want 46 // is PathMatch(). Alternatively, you can run filepath.ToSlash() on both 47 // pattern and name and then use this function. 48 // 49 // Note: users should _not_ count on the returned error, 50 // doublestar.ErrBadPattern, being equal to path.ErrBadPattern. 51 // 52 func Match(pattern, name string) (bool, error) { 53 return matchWithSeparator(pattern, name, '/', true) 54 } 55 56 // PathMatch returns true if `name` matches the file name `pattern`. The 57 // difference between Match and PathMatch is that PathMatch will automatically 58 // use your system's path separator to split `name` and `pattern`. On systems 59 // where the path separator is `'\'`, escaping will be disabled. 60 // 61 // Note: this is meant as a drop-in replacement for filepath.Match(). It 62 // assumes that both `pattern` and `name` are using the system's path 63 // separator. If you can't be sure of that, use filepath.ToSlash() on both 64 // `pattern` and `name`, and then use the Match() function instead. 65 // 66 func PathMatch(pattern, name string) (bool, error) { 67 return matchWithSeparator(pattern, name, filepath.Separator, true) 68 } 69 70 // MatchWithSeparator returns true if `name` matches the file name `pattern` 71 // using the specified rune as separator. 72 func MatchWithSeparator(pattern, name string, separator rune) (bool, error) { 73 return matchWithSeparator(pattern, name, separator, true) 74 } 75 76 func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) { 77 return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0) 78 } 79 80 func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) { 81 patLen := len(pattern) 82 nameLen := len(name) 83 startOfSegment := true 84 MATCH: 85 for nameIdx < nameLen { 86 if patIdx < patLen { 87 switch pattern[patIdx] { 88 case '*': 89 if patIdx++; patIdx < patLen && pattern[patIdx] == '*' { 90 // doublestar - must begin with a path separator, otherwise we'll 91 // treat it like a single star like bash 92 patIdx++ 93 if startOfSegment { 94 if patIdx >= patLen { 95 // pattern ends in `/**`: return true 96 return true, nil 97 } 98 99 // doublestar must also end with a path separator, otherwise we're 100 // just going to treat the doublestar as a single star like bash 101 patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) 102 if patRune == separator { 103 patIdx += patRuneLen 104 105 doublestarPatternBacktrack = patIdx 106 doublestarNameBacktrack = nameIdx 107 starPatternBacktrack = -1 108 starNameBacktrack = -1 109 continue 110 } 111 } 112 } 113 startOfSegment = false 114 115 starPatternBacktrack = patIdx 116 starNameBacktrack = nameIdx 117 continue 118 119 case '?': 120 startOfSegment = false 121 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) 122 if nameRune == separator { 123 // `?` cannot match the separator 124 break 125 } 126 127 patIdx++ 128 nameIdx += nameRuneLen 129 continue 130 131 case '[': 132 startOfSegment = false 133 if patIdx++; patIdx >= patLen { 134 // class didn't end 135 return false, ErrBadPattern 136 } 137 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) 138 139 matched := false 140 negate := pattern[patIdx] == '!' || pattern[patIdx] == '^' 141 if negate { 142 patIdx++ 143 } 144 145 if patIdx >= patLen || pattern[patIdx] == ']' { 146 // class didn't end or empty character class 147 return false, ErrBadPattern 148 } 149 150 last := utf8.MaxRune 151 for patIdx < patLen && pattern[patIdx] != ']' { 152 patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) 153 patIdx += patRuneLen 154 155 // match a range 156 if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' { 157 if pattern[patIdx] == '\\' { 158 // next character is escaped 159 patIdx++ 160 } 161 patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) 162 patIdx += patRuneLen 163 164 if last <= nameRune && nameRune <= patRune { 165 matched = true 166 break 167 } 168 169 // didn't match range - reset `last` 170 last = utf8.MaxRune 171 continue 172 } 173 174 // not a range - check if the next rune is escaped 175 if patRune == '\\' { 176 patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:]) 177 patIdx += patRuneLen 178 } 179 180 // check if the rune matches 181 if patRune == nameRune { 182 matched = true 183 break 184 } 185 186 // no matches yet 187 last = patRune 188 } 189 190 if matched == negate { 191 // failed to match - if we reached the end of the pattern, that means 192 // we never found a closing `]` 193 if patIdx >= patLen { 194 return false, ErrBadPattern 195 } 196 break 197 } 198 199 closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true) 200 if closingIdx == -1 { 201 // no closing `]` 202 return false, ErrBadPattern 203 } 204 205 patIdx += closingIdx + 1 206 nameIdx += nameRuneLen 207 continue 208 209 case '{': 210 startOfSegment = false 211 beforeIdx := patIdx 212 patIdx++ 213 closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\') 214 if closingIdx == -1 { 215 // no closing `}` 216 return false, ErrBadPattern 217 } 218 closingIdx += patIdx 219 220 for { 221 commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') 222 if commaIdx == -1 { 223 break 224 } 225 commaIdx += patIdx 226 227 result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx) 228 if result || err != nil { 229 return result, err 230 } 231 232 patIdx = commaIdx + 1 233 } 234 return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx) 235 236 case '\\': 237 if separator != '\\' { 238 // next rune is "escaped" in the pattern - literal match 239 if patIdx++; patIdx >= patLen { 240 // pattern ended 241 return false, ErrBadPattern 242 } 243 } 244 fallthrough 245 246 default: 247 patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:]) 248 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) 249 if patRune != nameRune { 250 if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' { 251 // if this rune was meant to be escaped, we need to move patIdx 252 // back to the backslash before backtracking or validating below 253 patIdx-- 254 } 255 break 256 } 257 258 patIdx += patRuneLen 259 nameIdx += nameRuneLen 260 startOfSegment = patRune == separator 261 continue 262 } 263 } 264 265 if starPatternBacktrack >= 0 { 266 // `*` backtrack, but only if the `name` rune isn't the separator 267 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:]) 268 if nameRune != separator { 269 starNameBacktrack += nameRuneLen 270 patIdx = starPatternBacktrack 271 nameIdx = starNameBacktrack 272 startOfSegment = false 273 continue 274 } 275 } 276 277 if doublestarPatternBacktrack >= 0 { 278 // `**` backtrack, advance `name` past next separator 279 nameIdx = doublestarNameBacktrack 280 for nameIdx < nameLen { 281 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:]) 282 nameIdx += nameRuneLen 283 if nameRune == separator { 284 doublestarNameBacktrack = nameIdx 285 patIdx = doublestarPatternBacktrack 286 startOfSegment = true 287 continue MATCH 288 } 289 } 290 } 291 292 if validate && patIdx < patLen && !ValidateWithSeparator(pattern[patIdx:], separator) { 293 return false, ErrBadPattern 294 } 295 return false, nil 296 } 297 298 if nameIdx < nameLen { 299 // we reached the end of `pattern` before the end of `name` 300 return false, nil 301 } 302 303 // we've reached the end of `name`; we've successfully matched if we've also 304 // reached the end of `pattern`, or if the rest of `pattern` can match a 305 // zero-length string 306 return isZeroLengthPattern(pattern[patIdx:], separator) 307 } 308 309 func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) { 310 // `/**` is a special case - a pattern such as `path/to/a/**` *should* match 311 // `path/to/a` because `a` might be a directory 312 if pattern == "" || pattern == "*" || pattern == "**" || pattern == string(separator)+"**" { 313 return true, nil 314 } 315 316 if pattern[0] == '{' { 317 closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\') 318 if closingIdx == -1 { 319 // no closing '}' 320 return false, ErrBadPattern 321 } 322 closingIdx += 1 323 324 patIdx := 1 325 for { 326 commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\') 327 if commaIdx == -1 { 328 break 329 } 330 commaIdx += patIdx 331 332 ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator) 333 if ret || err != nil { 334 return 335 } 336 337 patIdx = commaIdx + 1 338 } 339 return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator) 340 } 341 342 // no luck - validate the rest of the pattern 343 if !ValidateWithSeparator(pattern, separator) { 344 return false, ErrBadPattern 345 } 346 return false, nil 347 } 348 349 // Finds the index of the first unescaped byte `c`, or negative 1. 350 func indexUnescapedByte(s string, c byte, allowEscaping bool) int { 351 l := len(s) 352 for i := 0; i < l; i++ { 353 if allowEscaping && s[i] == '\\' { 354 // skip next byte 355 i++ 356 } else if s[i] == c { 357 return i 358 } 359 } 360 return -1 361 } 362 363 // Assuming the byte before the beginning of `s` is an opening `{`, this 364 // function will find the index of the matching `}`. That is, it'll skip over 365 // any nested `{}` and account for escaping 366 func indexMatchedClosingAlt(s string, allowEscaping bool) int { 367 alts := 1 368 l := len(s) 369 for i := 0; i < l; i++ { 370 if allowEscaping && s[i] == '\\' { 371 // skip next byte 372 i++ 373 } else if s[i] == '{' { 374 alts++ 375 } else if s[i] == '}' { 376 if alts--; alts == 0 { 377 return i 378 } 379 } 380 } 381 return -1 382 }