github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/walk/walk.go (about) 1 // Package walk walks directories 2 package walk 3 4 import ( 5 "context" 6 "errors" 7 "fmt" 8 "path" 9 "sort" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/rclone/rclone/fs" 15 "github.com/rclone/rclone/fs/dirtree" 16 "github.com/rclone/rclone/fs/filter" 17 "github.com/rclone/rclone/fs/list" 18 ) 19 20 // ErrorSkipDir is used as a return value from Walk to indicate that the 21 // directory named in the call is to be skipped. It is not returned as 22 // an error by any function. 23 var ErrorSkipDir = errors.New("skip this directory") 24 25 // ErrorCantListR is returned by WalkR if the underlying Fs isn't 26 // capable of doing a recursive listing. 27 var ErrorCantListR = errors.New("recursive directory listing not available") 28 29 // Func is the type of the function called for directory 30 // visited by Walk. The path argument contains remote path to the directory. 31 // 32 // If there was a problem walking to directory named by path, the 33 // incoming error will describe the problem and the function can 34 // decide how to handle that error (and Walk will not descend into 35 // that directory). If an error is returned, processing stops. The 36 // sole exception is when the function returns the special value 37 // ErrorSkipDir. If the function returns ErrorSkipDir, Walk skips the 38 // directory's contents entirely. 39 type Func func(path string, entries fs.DirEntries, err error) error 40 41 // Walk lists the directory. 42 // 43 // If includeAll is not set it will use the filters defined. 44 // 45 // If maxLevel is < 0 then it will recurse indefinitely, else it will 46 // only do maxLevel levels. 47 // 48 // It calls fn for each tranche of DirEntries read. 49 // 50 // Note that fn will not be called concurrently whereas the directory 51 // listing will proceed concurrently. 52 // 53 // Parent directories are always listed before their children. 54 // 55 // This is implemented by WalkR if Config.UseListR is true 56 // and f supports it and level > 1, or WalkN otherwise. 57 // 58 // If --files-from and --no-traverse is set then a DirTree will be 59 // constructed with just those files in and then walked with WalkR 60 // 61 // Note: this will flag filter-aware backends! 62 // 63 // NB (f, path) to be replaced by fs.Dir at some point 64 func Walk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error { 65 ci := fs.GetConfig(ctx) 66 fi := filter.GetConfig(ctx) 67 ctx = filter.SetUseFilter(ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List 68 if ci.NoTraverse && fi.HaveFilesFrom() { 69 return walkR(ctx, f, path, includeAll, maxLevel, fn, fi.MakeListR(ctx, f.NewObject)) 70 } 71 // FIXME should this just be maxLevel < 0 - why the maxLevel > 1 72 if (maxLevel < 0 || maxLevel > 1) && ci.UseListR && f.Features().ListR != nil { 73 return walkListR(ctx, f, path, includeAll, maxLevel, fn) 74 } 75 return walkListDirSorted(ctx, f, path, includeAll, maxLevel, fn) 76 } 77 78 // ListType is uses to choose which combination of files or directories is requires 79 type ListType byte 80 81 // Types of listing for ListR 82 const ( 83 ListObjects ListType = 1 << iota // list objects only 84 ListDirs // list dirs only 85 ListAll = ListObjects | ListDirs // list files and dirs 86 ) 87 88 // Objects returns true if the list type specifies objects 89 func (l ListType) Objects() bool { 90 return (l & ListObjects) != 0 91 } 92 93 // Dirs returns true if the list type specifies dirs 94 func (l ListType) Dirs() bool { 95 return (l & ListDirs) != 0 96 } 97 98 // Filter in (inplace) to only contain the type of list entry required 99 func (l ListType) Filter(in *fs.DirEntries) { 100 if l == ListAll { 101 return 102 } 103 out := (*in)[:0] 104 for _, entry := range *in { 105 switch entry.(type) { 106 case fs.Object: 107 if l.Objects() { 108 out = append(out, entry) 109 } 110 case fs.Directory: 111 if l.Dirs() { 112 out = append(out, entry) 113 } 114 default: 115 fs.Errorf(nil, "Unknown object type %T", entry) 116 } 117 } 118 *in = out 119 } 120 121 // ListR lists the directory recursively. 122 // 123 // If includeAll is not set it will use the filters defined. 124 // 125 // If maxLevel is < 0 then it will recurse indefinitely, else it will 126 // only do maxLevel levels. 127 // 128 // If synthesizeDirs is set then for bucket-based remotes it will 129 // synthesize directories from the file structure. This uses extra 130 // memory so don't set this if you don't need directories, likewise do 131 // set this if you are interested in directories. 132 // 133 // It calls fn for each tranche of DirEntries read. Note that these 134 // don't necessarily represent a directory 135 // 136 // Note that fn will not be called concurrently whereas the directory 137 // listing will proceed concurrently. 138 // 139 // Directories are not listed in any particular order so you can't 140 // rely on parents coming before children or alphabetical ordering 141 // 142 // This is implemented by using ListR on the backend if possible and 143 // efficient, otherwise by Walk. 144 // 145 // Note: this will flag filter-aware backends 146 // 147 // NB (f, path) to be replaced by fs.Dir at some point 148 func ListR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listType ListType, fn fs.ListRCallback) error { 149 fi := filter.GetConfig(ctx) 150 // FIXME disable this with --no-fast-list ??? `--disable ListR` will do it... 151 doListR := f.Features().ListR 152 153 // Can't use ListR if... 154 if doListR == nil || // ...no ListR 155 fi.HaveFilesFrom() || // ...using --files-from 156 maxLevel >= 0 || // ...using bounded recursion 157 len(fi.Opt.ExcludeFile) > 0 || // ...using --exclude-file 158 fi.UsesDirectoryFilters() { // ...using any directory filters 159 return listRwalk(ctx, f, path, includeAll, maxLevel, listType, fn) 160 } 161 ctx = filter.SetUseFilter(ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List 162 return listR(ctx, f, path, includeAll, listType, fn, doListR, listType.Dirs() && f.Features().BucketBased) 163 } 164 165 // listRwalk walks the file tree for ListR using Walk 166 // Note: this will flag filter-aware backends (via Walk) 167 func listRwalk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listType ListType, fn fs.ListRCallback) error { 168 var listErr error 169 walkErr := Walk(ctx, f, path, includeAll, maxLevel, func(path string, entries fs.DirEntries, err error) error { 170 // Carry on listing but return the error at the end 171 if err != nil { 172 listErr = err 173 err = fs.CountError(err) 174 fs.Errorf(path, "error listing: %v", err) 175 return nil 176 } 177 listType.Filter(&entries) 178 return fn(entries) 179 }) 180 if listErr != nil { 181 return listErr 182 } 183 return walkErr 184 } 185 186 // dirMap keeps track of directories made for bucket-based remotes. 187 // true => directory has been sent 188 // false => directory has been seen but not sent 189 type dirMap struct { 190 mu sync.Mutex 191 m map[string]bool 192 root string 193 } 194 195 // make a new dirMap 196 func newDirMap(root string) *dirMap { 197 return &dirMap{ 198 m: make(map[string]bool), 199 root: root, 200 } 201 } 202 203 // add adds a directory and parents with sent 204 func (dm *dirMap) add(dir string, sent bool) { 205 for { 206 if dir == dm.root || dir == "" { 207 return 208 } 209 currentSent, found := dm.m[dir] 210 if found { 211 // If it has been sent already then nothing more to do 212 if currentSent { 213 return 214 } 215 // If not sent already don't override 216 if !sent { 217 return 218 } 219 // currentSent == false && sent == true so needs overriding 220 } 221 dm.m[dir] = sent 222 // Add parents in as unsent 223 dir = parentDir(dir) 224 sent = false 225 } 226 } 227 228 // parentDir finds the parent directory of path 229 func parentDir(entryPath string) string { 230 dirPath := path.Dir(entryPath) 231 if dirPath == "." { 232 dirPath = "" 233 } 234 return dirPath 235 } 236 237 // add all the directories in entries and their parents to the dirMap 238 func (dm *dirMap) addEntries(entries fs.DirEntries) error { 239 dm.mu.Lock() 240 defer dm.mu.Unlock() 241 for _, entry := range entries { 242 switch x := entry.(type) { 243 case fs.Object: 244 dm.add(parentDir(x.Remote()), false) 245 case fs.Directory: 246 dm.add(x.Remote(), true) 247 default: 248 return fmt.Errorf("unknown object type %T", entry) 249 } 250 } 251 return nil 252 } 253 254 // send any missing parents to fn 255 func (dm *dirMap) sendEntries(fn fs.ListRCallback) (err error) { 256 // Count the strings first so we allocate the minimum memory 257 n := 0 258 for _, sent := range dm.m { 259 if !sent { 260 n++ 261 } 262 } 263 if n == 0 { 264 return nil 265 } 266 dirs := make([]string, 0, n) 267 // Fill the dirs up then sort it 268 for dir, sent := range dm.m { 269 if !sent { 270 dirs = append(dirs, dir) 271 } 272 } 273 sort.Strings(dirs) 274 // Now convert to bulkier Dir in batches and send 275 now := time.Now() 276 list := NewListRHelper(fn) 277 for _, dir := range dirs { 278 err = list.Add(fs.NewDir(dir, now)) 279 if err != nil { 280 return err 281 } 282 } 283 return list.Flush() 284 } 285 286 // listR walks the file tree using ListR 287 func listR(ctx context.Context, f fs.Fs, path string, includeAll bool, listType ListType, fn fs.ListRCallback, doListR fs.ListRFn, synthesizeDirs bool) error { 288 fi := filter.GetConfig(ctx) 289 includeDirectory := fi.IncludeDirectory(ctx, f) 290 if !includeAll { 291 includeAll = fi.InActive() 292 } 293 var dm *dirMap 294 if synthesizeDirs { 295 dm = newDirMap(path) 296 } 297 var mu sync.Mutex 298 err := doListR(ctx, path, func(entries fs.DirEntries) (err error) { 299 if synthesizeDirs { 300 err = dm.addEntries(entries) 301 if err != nil { 302 return err 303 } 304 } 305 listType.Filter(&entries) 306 if !includeAll { 307 filteredEntries := entries[:0] 308 for _, entry := range entries { 309 var include bool 310 switch x := entry.(type) { 311 case fs.Object: 312 include = fi.IncludeObject(ctx, x) 313 case fs.Directory: 314 include, err = includeDirectory(x.Remote()) 315 if err != nil { 316 return err 317 } 318 default: 319 return fmt.Errorf("unknown object type %T", entry) 320 } 321 if include { 322 filteredEntries = append(filteredEntries, entry) 323 } 324 } 325 entries = filteredEntries 326 } 327 mu.Lock() 328 defer mu.Unlock() 329 return fn(entries) 330 }) 331 if err != nil { 332 return err 333 } 334 if synthesizeDirs { 335 err = dm.sendEntries(fn) 336 if err != nil { 337 return err 338 } 339 } 340 return nil 341 } 342 343 // walkListDirSorted lists the directory. 344 // 345 // It implements Walk using non recursive directory listing. 346 func walkListDirSorted(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error { 347 return walk(ctx, f, path, includeAll, maxLevel, fn, list.DirSorted) 348 } 349 350 // walkListR lists the directory. 351 // 352 // It implements Walk using recursive directory listing if 353 // available, or returns ErrorCantListR if not. 354 func walkListR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error { 355 listR := f.Features().ListR 356 if listR == nil { 357 return ErrorCantListR 358 } 359 return walkR(ctx, f, path, includeAll, maxLevel, fn, listR) 360 } 361 362 type listDirFunc func(ctx context.Context, fs fs.Fs, includeAll bool, dir string) (entries fs.DirEntries, err error) 363 364 func walk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func, listDir listDirFunc) error { 365 var ( 366 wg sync.WaitGroup // sync closing of go routines 367 traversing sync.WaitGroup // running directory traversals 368 doClose sync.Once // close the channel once 369 mu sync.Mutex // stop fn being called concurrently 370 ci = fs.GetConfig(ctx) // current config 371 ) 372 // listJob describe a directory listing that needs to be done 373 type listJob struct { 374 remote string 375 depth int 376 } 377 378 in := make(chan listJob, ci.Checkers) 379 errs := make(chan error, 1) 380 quit := make(chan struct{}) 381 closeQuit := func() { 382 doClose.Do(func() { 383 close(quit) 384 go func() { 385 for range in { 386 traversing.Done() 387 } 388 }() 389 }) 390 } 391 for i := 0; i < ci.Checkers; i++ { 392 wg.Add(1) 393 go func() { 394 defer wg.Done() 395 for { 396 select { 397 case job, ok := <-in: 398 if !ok { 399 return 400 } 401 entries, err := listDir(ctx, f, includeAll, job.remote) 402 var jobs []listJob 403 if err == nil && job.depth != 0 { 404 entries.ForDir(func(dir fs.Directory) { 405 // Recurse for the directory 406 jobs = append(jobs, listJob{ 407 remote: dir.Remote(), 408 depth: job.depth - 1, 409 }) 410 }) 411 } 412 mu.Lock() 413 err = fn(job.remote, entries, err) 414 mu.Unlock() 415 // NB once we have passed entries to fn we mustn't touch it again 416 if err != nil && err != ErrorSkipDir { 417 traversing.Done() 418 err = fs.CountError(err) 419 fs.Errorf(job.remote, "error listing: %v", err) 420 closeQuit() 421 // Send error to error channel if space 422 select { 423 case errs <- err: 424 default: 425 } 426 continue 427 } 428 if err == nil && len(jobs) > 0 { 429 traversing.Add(len(jobs)) 430 go func() { 431 // Now we have traversed this directory, send these 432 // jobs off for traversal in the background 433 for _, newJob := range jobs { 434 in <- newJob 435 } 436 }() 437 } 438 traversing.Done() 439 case <-quit: 440 return 441 } 442 } 443 }() 444 } 445 // Start the process 446 traversing.Add(1) 447 in <- listJob{ 448 remote: path, 449 depth: maxLevel - 1, 450 } 451 traversing.Wait() 452 close(in) 453 wg.Wait() 454 close(errs) 455 // return the first error returned or nil 456 return <-errs 457 } 458 459 func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll bool, maxLevel int, listR fs.ListRFn) (dirtree.DirTree, error) { 460 fi := filter.GetConfig(ctx) 461 dirs := dirtree.New() 462 // Entries can come in arbitrary order. We use toPrune to keep 463 // all directories to exclude later. 464 toPrune := make(map[string]bool) 465 includeDirectory := fi.IncludeDirectory(ctx, f) 466 var mu sync.Mutex 467 err := listR(ctx, startPath, func(entries fs.DirEntries) error { 468 mu.Lock() 469 defer mu.Unlock() 470 for _, entry := range entries { 471 slashes := strings.Count(entry.Remote(), "/") 472 excluded := true 473 switch x := entry.(type) { 474 case fs.Object: 475 // Make sure we don't delete excluded files if not required 476 if includeAll || fi.IncludeObject(ctx, x) { 477 if maxLevel < 0 || slashes <= maxLevel-1 { 478 dirs.Add(x) 479 excluded = false 480 } 481 } 482 // Make sure we include any parent directories of excluded objects 483 if excluded { 484 dirPath := parentDir(x.Remote()) 485 slashes-- 486 if maxLevel >= 0 { 487 for ; slashes > maxLevel-1; slashes-- { 488 dirPath = parentDir(dirPath) 489 } 490 } 491 inc, err := includeDirectory(dirPath) 492 if err != nil { 493 return err 494 } 495 if inc || includeAll { 496 // If the directory doesn't exist already, create it 497 _, obj := dirs.Find(dirPath) 498 if obj == nil { 499 dirs.AddDir(fs.NewDir(dirPath, time.Now())) 500 } 501 } 502 } 503 // Check if we need to prune a directory later. 504 if !includeAll && len(fi.Opt.ExcludeFile) > 0 { 505 basename := path.Base(x.Remote()) 506 for _, excludeFile := range fi.Opt.ExcludeFile { 507 if basename == excludeFile { 508 excludeDir := parentDir(x.Remote()) 509 toPrune[excludeDir] = true 510 } 511 } 512 } 513 case fs.Directory: 514 inc, err := includeDirectory(x.Remote()) 515 if err != nil { 516 return err 517 } 518 if includeAll || inc { 519 if maxLevel < 0 || slashes <= maxLevel-1 { 520 if slashes == maxLevel-1 { 521 // Just add the object if at maxLevel 522 dirs.Add(x) 523 } else { 524 dirs.AddDir(x) 525 } 526 } 527 } 528 default: 529 return fmt.Errorf("unknown object type %T", entry) 530 } 531 } 532 return nil 533 }) 534 if err != nil { 535 return nil, err 536 } 537 dirs.CheckParents(startPath) 538 if len(dirs) == 0 { 539 dirs[startPath] = nil 540 } 541 err = dirs.Prune(toPrune) 542 if err != nil { 543 return nil, err 544 } 545 dirs.Sort() 546 return dirs, nil 547 } 548 549 // Create a DirTree using List 550 func walkNDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listDir listDirFunc) (dirtree.DirTree, error) { 551 dirs := make(dirtree.DirTree) 552 fn := func(dirPath string, entries fs.DirEntries, err error) error { 553 if err == nil { 554 dirs[dirPath] = entries 555 } 556 return err 557 } 558 err := walk(ctx, f, path, includeAll, maxLevel, fn, listDir) 559 if err != nil { 560 return nil, err 561 } 562 return dirs, nil 563 } 564 565 // NewDirTree returns a DirTree filled with the directory listing 566 // using the parameters supplied. 567 // 568 // If includeAll is not set it will use the filters defined. 569 // 570 // If maxLevel is < 0 then it will recurse indefinitely, else it will 571 // only do maxLevel levels. 572 // 573 // This is implemented by WalkR if f supports ListR and level > 1, or 574 // WalkN otherwise. 575 // 576 // If --files-from and --no-traverse is set then a DirTree will be 577 // constructed with just those files in. 578 // 579 // NB (f, path) to be replaced by fs.Dir at some point 580 func NewDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int) (dirtree.DirTree, error) { 581 ci := fs.GetConfig(ctx) 582 fi := filter.GetConfig(ctx) 583 // if --no-traverse and --files-from build DirTree just from files 584 if ci.NoTraverse && fi.HaveFilesFrom() { 585 return walkRDirTree(ctx, f, path, includeAll, maxLevel, fi.MakeListR(ctx, f.NewObject)) 586 } 587 // if have ListR; and recursing; and not using --files-from; then build a DirTree with ListR 588 if ListR := f.Features().ListR; (maxLevel < 0 || maxLevel > 1) && ListR != nil && !fi.HaveFilesFrom() { 589 return walkRDirTree(ctx, f, path, includeAll, maxLevel, ListR) 590 } 591 // otherwise just use List 592 return walkNDirTree(ctx, f, path, includeAll, maxLevel, list.DirSorted) 593 } 594 595 func walkR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func, listR fs.ListRFn) error { 596 dirs, err := walkRDirTree(ctx, f, path, includeAll, maxLevel, listR) 597 if err != nil { 598 return err 599 } 600 skipping := false 601 skipPrefix := "" 602 emptyDir := fs.DirEntries{} 603 for _, dirPath := range dirs.Dirs() { 604 if skipping { 605 // Skip over directories as required 606 if strings.HasPrefix(dirPath, skipPrefix) { 607 continue 608 } 609 skipping = false 610 } 611 entries := dirs[dirPath] 612 if entries == nil { 613 entries = emptyDir 614 } 615 err = fn(dirPath, entries, nil) 616 if err == ErrorSkipDir { 617 skipping = true 618 skipPrefix = dirPath 619 if skipPrefix != "" { 620 skipPrefix += "/" 621 } 622 } else if err != nil { 623 return err 624 } 625 } 626 return nil 627 } 628 629 // GetAll runs ListR getting all the results 630 func GetAll(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int) (objs []fs.Object, dirs []fs.Directory, err error) { 631 err = ListR(ctx, f, path, includeAll, maxLevel, ListAll, func(entries fs.DirEntries) error { 632 for _, entry := range entries { 633 switch x := entry.(type) { 634 case fs.Object: 635 objs = append(objs, x) 636 case fs.Directory: 637 dirs = append(dirs, x) 638 } 639 } 640 return nil 641 }) 642 return 643 } 644 645 // ListRHelper is used in the implementation of ListR to accumulate DirEntries 646 type ListRHelper struct { 647 callback fs.ListRCallback 648 entries fs.DirEntries 649 } 650 651 // NewListRHelper should be called from ListR with the callback passed in 652 func NewListRHelper(callback fs.ListRCallback) *ListRHelper { 653 return &ListRHelper{ 654 callback: callback, 655 } 656 } 657 658 // send sends the stored entries to the callback if there are >= max 659 // entries. 660 func (lh *ListRHelper) send(max int) (err error) { 661 if len(lh.entries) >= max { 662 err = lh.callback(lh.entries) 663 lh.entries = lh.entries[:0] 664 } 665 return err 666 } 667 668 // Add an entry to the stored entries and send them if there are more 669 // than a certain amount 670 func (lh *ListRHelper) Add(entry fs.DirEntry) error { 671 if entry == nil { 672 return nil 673 } 674 lh.entries = append(lh.entries, entry) 675 return lh.send(100) 676 } 677 678 // Flush the stored entries (if any) sending them to the callback 679 func (lh *ListRHelper) Flush() error { 680 return lh.send(1) 681 }