github.com/AndrewDeryabin/doublestar/v4@v4.0.0-20230123132908-d9476b7d41be/globwalk.go (about) 1 package doublestar 2 3 import ( 4 "errors" 5 "io/fs" 6 "path" 7 "path/filepath" 8 "strings" 9 ) 10 11 // If returned from GlobWalkFunc, will cause GlobWalk to skip the current 12 // directory. In other words, if the current path is a directory, GlobWalk will 13 // not recurse into it. Otherwise, GlobWalk will skip the rest of the current 14 // directory. 15 var SkipDir = fs.SkipDir 16 17 // Callback function for GlobWalk(). If the function returns an error, GlobWalk 18 // will end immediately and return the same error. 19 type GlobWalkFunc func(path string, d fs.DirEntry) error 20 21 // GlobWalk calls the callback function `fn` for every file matching pattern. 22 // The syntax of pattern is the same as in Match() and the behavior is the same 23 // as Glob(), with regard to limitations (such as patterns containing `/./`, 24 // `/../`, or starting with `/`). The pattern may describe hierarchical names 25 // such as usr/*/bin/ed. 26 // 27 // GlobWalk may have a small performance benefit over Glob if you do not need a 28 // slice of matches because it can avoid allocating memory for the matches. 29 // Additionally, GlobWalk gives you access to the `fs.DirEntry` objects for 30 // each match, and lets you quit early by returning a non-nil error from your 31 // callback function. Like `io/fs.WalkDir`, if your callback returns `SkipDir`, 32 // GlobWalk will skip the current directory. This means that if the current 33 // path _is_ a directory, GlobWalk will not recurse into it. If the current 34 // path is not a directory, the rest of the parent directory will be skipped. 35 // 36 // GlobWalk ignores file system errors such as I/O errors reading directories 37 // by default. GlobWalk may return ErrBadPattern, reporting that the pattern is 38 // malformed. 39 // 40 // To enable aborting on I/O errors, the WithFailOnIOErrors option can be 41 // passed. 42 // 43 // Additionally, if the callback function `fn` returns an error, GlobWalk will 44 // exit immediately and return that error. 45 // 46 // Like Glob(), this function assumes that your pattern uses `/` as the path 47 // separator even if that's not correct for your OS (like Windows). If you 48 // aren't sure if that's the case, you can use filepath.ToSlash() on your 49 // pattern before calling GlobWalk(). 50 // 51 // Note: users should _not_ count on the returned error, 52 // doublestar.ErrBadPattern, being equal to path.ErrBadPattern. 53 // 54 func GlobWalk(fsys fs.FS, pattern string, fn GlobWalkFunc, opts ...GlobOption) error { 55 if !ValidatePattern(pattern) { 56 return ErrBadPattern 57 } 58 59 g := newGlob(opts...) 60 return g.doGlobWalk(fsys, pattern, true, true, fn) 61 } 62 63 // Actually execute GlobWalk 64 // - firstSegment is true if we're in the first segment of the pattern, ie, 65 // the right-most part where we can match files. If it's false, we're 66 // somewhere in the middle (or at the beginning) and can only match 67 // directories since there are path segments above us. 68 // - beforeMeta is true if we're exploring segments before any meta 69 // characters, ie, in a pattern such as `path/to/file*.txt`, the `path/to/` 70 // bit does not contain any meta characters. 71 func (g *glob) doGlobWalk(fsys fs.FS, pattern string, firstSegment, beforeMeta bool, fn GlobWalkFunc) error { 72 patternStart := indexMeta(pattern) 73 if patternStart == -1 { 74 // pattern doesn't contain any meta characters - does a file matching the 75 // pattern exist? 76 // The pattern may contain escaped wildcard characters for an exact path match. 77 path := unescapeMeta(pattern) 78 info, pathExists, err := g.exists(fsys, path, beforeMeta) 79 if pathExists && (!firstSegment || !g.filesOnly || !info.IsDir()) { 80 err = fn(path, dirEntryFromFileInfo(info)) 81 if err == SkipDir { 82 err = nil 83 } 84 } 85 return err 86 } 87 88 dir := "." 89 splitIdx := lastIndexSlashOrAlt(pattern) 90 if splitIdx != -1 { 91 if pattern[splitIdx] == '}' { 92 openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx]) 93 if openingIdx == -1 { 94 // if there's no matching opening index, technically Match() will treat 95 // an unmatched `}` as nothing special, so... we will, too! 96 splitIdx = lastIndexSlash(pattern[:splitIdx]) 97 if splitIdx != -1 { 98 dir = pattern[:splitIdx] 99 pattern = pattern[splitIdx+1:] 100 } 101 } else { 102 // otherwise, we have to handle the alts: 103 return g.globAltsWalk(fsys, pattern, openingIdx, splitIdx, firstSegment, beforeMeta, fn) 104 } 105 } else { 106 dir = pattern[:splitIdx] 107 pattern = pattern[splitIdx+1:] 108 } 109 } 110 111 // if `splitIdx` is less than `patternStart`, we know `dir` has no meta 112 // characters. They would be equal if they are both -1, which means `dir` 113 // will be ".", and we know that doesn't have meta characters either. 114 if splitIdx <= patternStart { 115 return g.globDirWalk(fsys, dir, pattern, firstSegment, beforeMeta, fn) 116 } 117 118 return g.doGlobWalk(fsys, dir, false, beforeMeta, func(p string, d fs.DirEntry) error { 119 if err := g.globDirWalk(fsys, p, pattern, firstSegment, false, fn); err != nil { 120 return err 121 } 122 return nil 123 }) 124 } 125 126 // handle alts in the glob pattern - `openingIdx` and `closingIdx` are the 127 // indexes of `{` and `}`, respectively 128 func (g *glob) globAltsWalk(fsys fs.FS, pattern string, openingIdx, closingIdx int, firstSegment, beforeMeta bool, fn GlobWalkFunc) (err error) { 129 var matches []DirEntryWithFullPath 130 startIdx := 0 131 afterIdx := closingIdx + 1 132 splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx]) 133 if splitIdx == -1 || pattern[splitIdx] == '}' { 134 // no common prefix 135 matches, err = g.doGlobAltsWalk(fsys, "", pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, beforeMeta, matches) 136 if err != nil { 137 return 138 } 139 } else { 140 // our alts have a common prefix that we can process first 141 startIdx = splitIdx + 1 142 innerBeforeMeta := beforeMeta && !hasMetaExceptAlts(pattern[:splitIdx]) 143 err = g.doGlobWalk(fsys, pattern[:splitIdx], false, beforeMeta, func(p string, d fs.DirEntry) (e error) { 144 matches, e = g.doGlobAltsWalk(fsys, p, pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, innerBeforeMeta, matches) 145 return e 146 }) 147 if err != nil { 148 return 149 } 150 } 151 152 skip := "" 153 for _, m := range matches { 154 if skip != "" { 155 // Because matches are sorted, we know that descendants of the skipped 156 // item must come immediately after the skipped item. If we find an item 157 // that does not have a prefix matching the skipped item, we know we're 158 // done skipping. I'm using strings.HasPrefix here because 159 // filepath.HasPrefix has been marked deprecated (and just calls 160 // strings.HasPrefix anyway). The reason it's deprecated is because it 161 // doesn't handle case-insensitive paths, nor does it guarantee that the 162 // prefix is actually a parent directory. Neither is an issue here: the 163 // paths come from the system so their cases will match, and we guarantee 164 // a parent directory by appending a slash to the prefix. 165 // 166 // NOTE: m.Path will always use slashes as path separators. 167 if strings.HasPrefix(m.Path, skip) { 168 continue 169 } 170 skip = "" 171 } 172 if err = fn(m.Path, m.Entry); err != nil { 173 if err == SkipDir { 174 isDir, err := g.isDir(fsys, "", m.Path, m.Entry) 175 if err != nil { 176 return err 177 } 178 if isDir { 179 // append a slash to guarantee `skip` will be treated as a parent dir 180 skip = m.Path + "/" 181 } else { 182 // Dir() calls Clean() which calls FromSlash(), so we need to convert 183 // back to slashes 184 skip = filepath.ToSlash(filepath.Dir(m.Path)) + "/" 185 } 186 err = nil 187 continue 188 } 189 return 190 } 191 } 192 193 return 194 } 195 196 // runs actual matching for alts 197 func (g *glob) doGlobAltsWalk(fsys fs.FS, d, pattern string, startIdx, openingIdx, closingIdx, afterIdx int, firstSegment, beforeMeta bool, m []DirEntryWithFullPath) (matches []DirEntryWithFullPath, err error) { 198 matches = m 199 matchesLen := len(m) 200 patIdx := openingIdx + 1 201 for patIdx < closingIdx { 202 nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true) 203 if nextIdx == -1 { 204 nextIdx = closingIdx 205 } else { 206 nextIdx += patIdx 207 } 208 209 alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx) 210 err = g.doGlobWalk(fsys, alt, firstSegment, beforeMeta, func(p string, d fs.DirEntry) error { 211 // insertion sort, ignoring dups 212 insertIdx := matchesLen 213 for insertIdx > 0 && matches[insertIdx-1].Path > p { 214 insertIdx-- 215 } 216 if insertIdx > 0 && matches[insertIdx-1].Path == p { 217 // dup 218 return nil 219 } 220 221 // append to grow the slice, then insert 222 entry := DirEntryWithFullPath{d, p} 223 matches = append(matches, entry) 224 for i := matchesLen; i > insertIdx; i-- { 225 matches[i] = matches[i-1] 226 } 227 matches[insertIdx] = entry 228 matchesLen++ 229 230 return nil 231 }) 232 if err != nil { 233 return 234 } 235 236 patIdx = nextIdx + 1 237 } 238 239 return 240 } 241 242 func (g *glob) globDirWalk(fsys fs.FS, dir, pattern string, canMatchFiles, beforeMeta bool, fn GlobWalkFunc) (e error) { 243 if pattern == "" { 244 if !canMatchFiles || !g.filesOnly { 245 // pattern can be an empty string if the original pattern ended in a 246 // slash, in which case, we should just return dir, but only if it 247 // actually exists and it's a directory (or a symlink to a directory) 248 info, isDir, err := g.isPathDir(fsys, dir, beforeMeta) 249 if err != nil { 250 return err 251 } 252 if isDir { 253 e = fn(dir, dirEntryFromFileInfo(info)) 254 if e == SkipDir { 255 e = nil 256 } 257 } 258 } 259 return 260 } 261 262 if pattern == "**" { 263 // `**` can match *this* dir 264 info, dirExists, err := g.exists(fsys, dir, beforeMeta) 265 if err != nil { 266 return err 267 } 268 if !dirExists || !info.IsDir() { 269 return nil 270 } 271 if !canMatchFiles || !g.filesOnly { 272 if e = fn(dir, dirEntryFromFileInfo(info)); e != nil { 273 if e == SkipDir { 274 e = nil 275 } 276 return 277 } 278 } 279 return g.globDoubleStarWalk(fsys, dir, canMatchFiles, fn) 280 } 281 282 dirs, err := fs.ReadDir(fsys, dir) 283 if err != nil { 284 if errors.Is(err, fs.ErrNotExist) { 285 return g.handlePatternNotExist(beforeMeta) 286 } 287 return g.forwardErrIfFailOnIOErrors(err) 288 } 289 290 var matched bool 291 for _, info := range dirs { 292 name := info.Name() 293 matched, e = matchWithSeparator(pattern, name, '/', false) 294 if e != nil { 295 return 296 } 297 if matched { 298 matched = canMatchFiles 299 if !matched || g.filesOnly { 300 matched, e = g.isDir(fsys, dir, name, info) 301 if e != nil { 302 return e 303 } 304 if canMatchFiles { 305 // if we're here, it's because g.filesOnly 306 // is set and we don't want directories 307 matched = !matched 308 } 309 } 310 if matched { 311 if e = fn(path.Join(dir, name), info); e != nil { 312 if e == SkipDir { 313 e = nil 314 } 315 return 316 } 317 } 318 } 319 } 320 321 return 322 } 323 324 // recursively walk files/directories in a directory 325 func (g *glob) globDoubleStarWalk(fsys fs.FS, dir string, canMatchFiles bool, fn GlobWalkFunc) (e error) { 326 dirs, err := fs.ReadDir(fsys, dir) 327 if err != nil { 328 if errors.Is(err, fs.ErrNotExist) { 329 // This function is only ever called after we know the top-most directory 330 // exists, so, if we ever get here, we know we'll never return 331 // ErrPatternNotExist. 332 return nil 333 } 334 return g.forwardErrIfFailOnIOErrors(err) 335 } 336 337 for _, info := range dirs { 338 name := info.Name() 339 isDir, err := g.isDir(fsys, dir, name, info) 340 if err != nil { 341 return err 342 } 343 344 if isDir { 345 p := path.Join(dir, name) 346 if !canMatchFiles || !g.filesOnly { 347 // `**` can match *this* dir, so add it 348 if e = fn(p, info); e != nil { 349 if e == SkipDir { 350 e = nil 351 continue 352 } 353 return 354 } 355 } 356 if e = g.globDoubleStarWalk(fsys, p, canMatchFiles, fn); e != nil { 357 return 358 } 359 } else if canMatchFiles { 360 if e = fn(path.Join(dir, name), info); e != nil { 361 if e == SkipDir { 362 e = nil 363 } 364 return 365 } 366 } 367 } 368 369 return 370 } 371 372 type DirEntryFromFileInfo struct { 373 fi fs.FileInfo 374 } 375 376 func (d *DirEntryFromFileInfo) Name() string { 377 return d.fi.Name() 378 } 379 380 func (d *DirEntryFromFileInfo) IsDir() bool { 381 return d.fi.IsDir() 382 } 383 384 func (d *DirEntryFromFileInfo) Type() fs.FileMode { 385 return d.fi.Mode().Type() 386 } 387 388 func (d *DirEntryFromFileInfo) Info() (fs.FileInfo, error) { 389 return d.fi, nil 390 } 391 392 func dirEntryFromFileInfo(fi fs.FileInfo) fs.DirEntry { 393 return &DirEntryFromFileInfo{fi} 394 } 395 396 type DirEntryWithFullPath struct { 397 Entry fs.DirEntry 398 Path string 399 } 400 401 func hasMetaExceptAlts(s string) bool { 402 var c byte 403 l := len(s) 404 for i := 0; i < l; i++ { 405 c = s[i] 406 if c == '*' || c == '?' || c == '[' { 407 return true 408 } else if c == '\\' { 409 // skip next byte 410 i++ 411 } 412 } 413 return false 414 }