github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/fs/walk/walk.go (about) 1 // Package walk walks directories 2 package walk 3 4 import ( 5 "context" 6 "path" 7 "sort" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/pkg/errors" 13 "github.com/rclone/rclone/fs" 14 "github.com/rclone/rclone/fs/dirtree" 15 "github.com/rclone/rclone/fs/filter" 16 "github.com/rclone/rclone/fs/list" 17 ) 18 19 // ErrorSkipDir is used as a return value from Walk to indicate that the 20 // directory named in the call is to be skipped. It is not returned as 21 // an error by any function. 22 var ErrorSkipDir = errors.New("skip this directory") 23 24 // ErrorCantListR is returned by WalkR if the underlying Fs isn't 25 // capable of doing a recursive listing. 26 var ErrorCantListR = errors.New("recursive directory listing not available") 27 28 // Func is the type of the function called for directory 29 // visited by Walk. The path argument contains remote path to the directory. 30 // 31 // If there was a problem walking to directory named by path, the 32 // incoming error will describe the problem and the function can 33 // decide how to handle that error (and Walk will not descend into 34 // that directory). If an error is returned, processing stops. The 35 // sole exception is when the function returns the special value 36 // ErrorSkipDir. If the function returns ErrorSkipDir, Walk skips the 37 // directory's contents entirely. 38 type Func func(path string, entries fs.DirEntries, err error) error 39 40 // Walk lists the directory. 41 // 42 // If includeAll is not set it will use the filters defined. 43 // 44 // If maxLevel is < 0 then it will recurse indefinitely, else it will 45 // only do maxLevel levels. 46 // 47 // It calls fn for each tranche of DirEntries read. 48 // 49 // Note that fn will not be called concurrently whereas the directory 50 // listing will proceed concurrently. 51 // 52 // Parent directories are always listed before their children 53 // 54 // This is implemented by WalkR if Config.UseListR is true 55 // and f supports it and level > 1, or WalkN otherwise. 56 // 57 // If --files-from and --no-traverse is set then a DirTree will be 58 // constructed with just those files in and then walked with WalkR 59 // 60 // NB (f, path) to be replaced by fs.Dir at some point 61 func Walk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error { 62 if fs.Config.NoTraverse && filter.Active.HaveFilesFrom() { 63 return walkR(ctx, f, path, includeAll, maxLevel, fn, filter.Active.MakeListR(ctx, f.NewObject)) 64 } 65 // FIXME should this just be maxLevel < 0 - why the maxLevel > 1 66 if (maxLevel < 0 || maxLevel > 1) && fs.Config.UseListR && f.Features().ListR != nil { 67 return walkListR(ctx, f, path, includeAll, maxLevel, fn) 68 } 69 return walkListDirSorted(ctx, f, path, includeAll, maxLevel, fn) 70 } 71 72 // ListType is uses to choose which combination of files or directories is requires 73 type ListType byte 74 75 // Types of listing for ListR 76 const ( 77 ListObjects ListType = 1 << iota // list objects only 78 ListDirs // list dirs only 79 ListAll = ListObjects | ListDirs // list files and dirs 80 ) 81 82 // Objects returns true if the list type specifies objects 83 func (l ListType) Objects() bool { 84 return (l & ListObjects) != 0 85 } 86 87 // Dirs returns true if the list type specifies dirs 88 func (l ListType) Dirs() bool { 89 return (l & ListDirs) != 0 90 } 91 92 // Filter in (inplace) to only contain the type of list entry required 93 func (l ListType) Filter(in *fs.DirEntries) { 94 if l == ListAll { 95 return 96 } 97 out := (*in)[:0] 98 for _, entry := range *in { 99 switch entry.(type) { 100 case fs.Object: 101 if l.Objects() { 102 out = append(out, entry) 103 } 104 case fs.Directory: 105 if l.Dirs() { 106 out = append(out, entry) 107 } 108 default: 109 fs.Errorf(nil, "Unknown object type %T", entry) 110 } 111 } 112 *in = out 113 } 114 115 // ListR lists the directory recursively. 116 // 117 // If includeAll is not set it will use the filters defined. 118 // 119 // If maxLevel is < 0 then it will recurse indefinitely, else it will 120 // only do maxLevel levels. 121 // 122 // If synthesizeDirs is set then for bucket based remotes it will 123 // synthesize directories from the file structure. This uses extra 124 // memory so don't set this if you don't need directories, likewise do 125 // set this if you are interested in directories. 126 // 127 // It calls fn for each tranche of DirEntries read. Note that these 128 // don't necessarily represent a directory 129 // 130 // Note that fn will not be called concurrently whereas the directory 131 // listing will proceed concurrently. 132 // 133 // Directories are not listed in any particular order so you can't 134 // rely on parents coming before children or alphabetical ordering 135 // 136 // This is implemented by using ListR on the backend if possible and 137 // efficient, otherwise by Walk. 138 // 139 // NB (f, path) to be replaced by fs.Dir at some point 140 func ListR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listType ListType, fn fs.ListRCallback) error { 141 // FIXME disable this with --no-fast-list ??? `--disable ListR` will do it... 142 doListR := f.Features().ListR 143 144 // Can't use ListR if... 145 if doListR == nil || // ...no ListR 146 filter.Active.HaveFilesFrom() || // ...using --files-from 147 maxLevel >= 0 || // ...using bounded recursion 148 len(filter.Active.Opt.ExcludeFile) > 0 || // ...using --exclude-file 149 filter.Active.UsesDirectoryFilters() { // ...using any directory filters 150 return listRwalk(ctx, f, path, includeAll, maxLevel, listType, fn) 151 } 152 return listR(ctx, f, path, includeAll, listType, fn, doListR, listType.Dirs() && f.Features().BucketBased) 153 } 154 155 // listRwalk walks the file tree for ListR using Walk 156 func listRwalk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listType ListType, fn fs.ListRCallback) error { 157 var listErr error 158 walkErr := Walk(ctx, f, path, includeAll, maxLevel, func(path string, entries fs.DirEntries, err error) error { 159 // Carry on listing but return the error at the end 160 if err != nil { 161 listErr = err 162 err = fs.CountError(err) 163 fs.Errorf(path, "error listing: %v", err) 164 return nil 165 } 166 listType.Filter(&entries) 167 return fn(entries) 168 }) 169 if listErr != nil { 170 return listErr 171 } 172 return walkErr 173 } 174 175 // dirMap keeps track of directories made for bucket based remotes. 176 // true => directory has been sent 177 // false => directory has been seen but not sent 178 type dirMap struct { 179 mu sync.Mutex 180 m map[string]bool 181 root string 182 } 183 184 // make a new dirMap 185 func newDirMap(root string) *dirMap { 186 return &dirMap{ 187 m: make(map[string]bool), 188 root: root, 189 } 190 } 191 192 // add adds a directory and parents with sent 193 func (dm *dirMap) add(dir string, sent bool) { 194 for { 195 if dir == dm.root || dir == "" { 196 return 197 } 198 currentSent, found := dm.m[dir] 199 if found { 200 // If it has been sent already then nothing more to do 201 if currentSent { 202 return 203 } 204 // If not sent already don't override 205 if !sent { 206 return 207 } 208 // currenSent == false && sent == true so needs overriding 209 } 210 dm.m[dir] = sent 211 // Add parents in as unsent 212 dir = parentDir(dir) 213 sent = false 214 } 215 } 216 217 // parentDir finds the parent directory of path 218 func parentDir(entryPath string) string { 219 dirPath := path.Dir(entryPath) 220 if dirPath == "." { 221 dirPath = "" 222 } 223 return dirPath 224 } 225 226 // add all the directories in entries and their parents to the dirMap 227 func (dm *dirMap) addEntries(entries fs.DirEntries) error { 228 dm.mu.Lock() 229 defer dm.mu.Unlock() 230 for _, entry := range entries { 231 switch x := entry.(type) { 232 case fs.Object: 233 dm.add(parentDir(x.Remote()), false) 234 case fs.Directory: 235 dm.add(x.Remote(), true) 236 default: 237 return errors.Errorf("unknown object type %T", entry) 238 } 239 } 240 return nil 241 } 242 243 // send any missing parents to fn 244 func (dm *dirMap) sendEntries(fn fs.ListRCallback) (err error) { 245 // Count the strings first so we allocate the minimum memory 246 n := 0 247 for _, sent := range dm.m { 248 if !sent { 249 n++ 250 } 251 } 252 if n == 0 { 253 return nil 254 } 255 dirs := make([]string, 0, n) 256 // Fill the dirs up then sort it 257 for dir, sent := range dm.m { 258 if !sent { 259 dirs = append(dirs, dir) 260 } 261 } 262 sort.Strings(dirs) 263 // Now convert to bulkier Dir in batches and send 264 now := time.Now() 265 list := NewListRHelper(fn) 266 for _, dir := range dirs { 267 err = list.Add(fs.NewDir(dir, now)) 268 if err != nil { 269 return err 270 } 271 } 272 return list.Flush() 273 } 274 275 // listR walks the file tree using ListR 276 func listR(ctx context.Context, f fs.Fs, path string, includeAll bool, listType ListType, fn fs.ListRCallback, doListR fs.ListRFn, synthesizeDirs bool) error { 277 includeDirectory := filter.Active.IncludeDirectory(ctx, f) 278 if !includeAll { 279 includeAll = filter.Active.InActive() 280 } 281 var dm *dirMap 282 if synthesizeDirs { 283 dm = newDirMap(path) 284 } 285 var mu sync.Mutex 286 err := doListR(ctx, path, func(entries fs.DirEntries) (err error) { 287 if synthesizeDirs { 288 err = dm.addEntries(entries) 289 if err != nil { 290 return err 291 } 292 } 293 listType.Filter(&entries) 294 if !includeAll { 295 filteredEntries := entries[:0] 296 for _, entry := range entries { 297 var include bool 298 switch x := entry.(type) { 299 case fs.Object: 300 include = filter.Active.IncludeObject(ctx, x) 301 case fs.Directory: 302 include, err = includeDirectory(x.Remote()) 303 if err != nil { 304 return err 305 } 306 default: 307 return errors.Errorf("unknown object type %T", entry) 308 } 309 if include { 310 filteredEntries = append(filteredEntries, entry) 311 } else { 312 fs.Debugf(entry, "Excluded from sync (and deletion)") 313 } 314 } 315 entries = filteredEntries 316 } 317 mu.Lock() 318 defer mu.Unlock() 319 return fn(entries) 320 }) 321 if err != nil { 322 return err 323 } 324 if synthesizeDirs { 325 err = dm.sendEntries(fn) 326 if err != nil { 327 return err 328 } 329 } 330 return nil 331 } 332 333 // walkListDirSorted lists the directory. 334 // 335 // It implements Walk using non recursive directory listing. 336 func walkListDirSorted(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error { 337 return walk(ctx, f, path, includeAll, maxLevel, fn, list.DirSorted) 338 } 339 340 // walkListR lists the directory. 341 // 342 // It implements Walk using recursive directory listing if 343 // available, or returns ErrorCantListR if not. 344 func walkListR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error { 345 listR := f.Features().ListR 346 if listR == nil { 347 return ErrorCantListR 348 } 349 return walkR(ctx, f, path, includeAll, maxLevel, fn, listR) 350 } 351 352 type listDirFunc func(ctx context.Context, fs fs.Fs, includeAll bool, dir string) (entries fs.DirEntries, err error) 353 354 func walk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func, listDir listDirFunc) error { 355 var ( 356 wg sync.WaitGroup // sync closing of go routines 357 traversing sync.WaitGroup // running directory traversals 358 doClose sync.Once // close the channel once 359 mu sync.Mutex // stop fn being called concurrently 360 ) 361 // listJob describe a directory listing that needs to be done 362 type listJob struct { 363 remote string 364 depth int 365 } 366 367 in := make(chan listJob, fs.Config.Checkers) 368 errs := make(chan error, 1) 369 quit := make(chan struct{}) 370 closeQuit := func() { 371 doClose.Do(func() { 372 close(quit) 373 go func() { 374 for range in { 375 traversing.Done() 376 } 377 }() 378 }) 379 } 380 for i := 0; i < fs.Config.Checkers; i++ { 381 wg.Add(1) 382 go func() { 383 defer wg.Done() 384 for { 385 select { 386 case job, ok := <-in: 387 if !ok { 388 return 389 } 390 entries, err := listDir(ctx, f, includeAll, job.remote) 391 var jobs []listJob 392 if err == nil && job.depth != 0 { 393 entries.ForDir(func(dir fs.Directory) { 394 // Recurse for the directory 395 jobs = append(jobs, listJob{ 396 remote: dir.Remote(), 397 depth: job.depth - 1, 398 }) 399 }) 400 } 401 mu.Lock() 402 err = fn(job.remote, entries, err) 403 mu.Unlock() 404 // NB once we have passed entries to fn we mustn't touch it again 405 if err != nil && err != ErrorSkipDir { 406 traversing.Done() 407 err = fs.CountError(err) 408 fs.Errorf(job.remote, "error listing: %v", err) 409 closeQuit() 410 // Send error to error channel if space 411 select { 412 case errs <- err: 413 default: 414 } 415 continue 416 } 417 if err == nil && len(jobs) > 0 { 418 traversing.Add(len(jobs)) 419 go func() { 420 // Now we have traversed this directory, send these 421 // jobs off for traversal in the background 422 for _, newJob := range jobs { 423 in <- newJob 424 } 425 }() 426 } 427 traversing.Done() 428 case <-quit: 429 return 430 } 431 } 432 }() 433 } 434 // Start the process 435 traversing.Add(1) 436 in <- listJob{ 437 remote: path, 438 depth: maxLevel - 1, 439 } 440 traversing.Wait() 441 close(in) 442 wg.Wait() 443 close(errs) 444 // return the first error returned or nil 445 return <-errs 446 } 447 448 func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll bool, maxLevel int, listR fs.ListRFn) (dirtree.DirTree, error) { 449 dirs := dirtree.New() 450 // Entries can come in arbitrary order. We use toPrune to keep 451 // all directories to exclude later. 452 toPrune := make(map[string]bool) 453 includeDirectory := filter.Active.IncludeDirectory(ctx, f) 454 var mu sync.Mutex 455 err := listR(ctx, startPath, func(entries fs.DirEntries) error { 456 mu.Lock() 457 defer mu.Unlock() 458 for _, entry := range entries { 459 slashes := strings.Count(entry.Remote(), "/") 460 switch x := entry.(type) { 461 case fs.Object: 462 // Make sure we don't delete excluded files if not required 463 if includeAll || filter.Active.IncludeObject(ctx, x) { 464 if maxLevel < 0 || slashes <= maxLevel-1 { 465 dirs.Add(x) 466 } else { 467 // Make sure we include any parent directories of excluded objects 468 dirPath := x.Remote() 469 for ; slashes > maxLevel-1; slashes-- { 470 dirPath = parentDir(dirPath) 471 } 472 dirs.CheckParent(startPath, dirPath) 473 } 474 } else { 475 fs.Debugf(x, "Excluded from sync (and deletion)") 476 } 477 // Check if we need to prune a directory later. 478 if !includeAll && len(filter.Active.Opt.ExcludeFile) > 0 { 479 basename := path.Base(x.Remote()) 480 if basename == filter.Active.Opt.ExcludeFile { 481 excludeDir := parentDir(x.Remote()) 482 toPrune[excludeDir] = true 483 fs.Debugf(basename, "Excluded from sync (and deletion) based on exclude file") 484 } 485 } 486 case fs.Directory: 487 inc, err := includeDirectory(x.Remote()) 488 if err != nil { 489 return err 490 } 491 if includeAll || inc { 492 if maxLevel < 0 || slashes <= maxLevel-1 { 493 if slashes == maxLevel-1 { 494 // Just add the object if at maxLevel 495 dirs.Add(x) 496 } else { 497 dirs.AddDir(x) 498 } 499 } 500 } else { 501 fs.Debugf(x, "Excluded from sync (and deletion)") 502 } 503 default: 504 return errors.Errorf("unknown object type %T", entry) 505 } 506 } 507 return nil 508 }) 509 if err != nil { 510 return nil, err 511 } 512 dirs.CheckParents(startPath) 513 if len(dirs) == 0 { 514 dirs[startPath] = nil 515 } 516 err = dirs.Prune(toPrune) 517 if err != nil { 518 return nil, err 519 } 520 dirs.Sort() 521 return dirs, nil 522 } 523 524 // Create a DirTree using List 525 func walkNDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listDir listDirFunc) (dirtree.DirTree, error) { 526 dirs := make(dirtree.DirTree) 527 fn := func(dirPath string, entries fs.DirEntries, err error) error { 528 if err == nil { 529 dirs[dirPath] = entries 530 } 531 return err 532 } 533 err := walk(ctx, f, path, includeAll, maxLevel, fn, listDir) 534 if err != nil { 535 return nil, err 536 } 537 return dirs, nil 538 } 539 540 // NewDirTree returns a DirTree filled with the directory listing 541 // using the parameters supplied. 542 // 543 // If includeAll is not set it will use the filters defined. 544 // 545 // If maxLevel is < 0 then it will recurse indefinitely, else it will 546 // only do maxLevel levels. 547 // 548 // This is implemented by WalkR if f supports ListR and level > 1, or 549 // WalkN otherwise. 550 // 551 // If --files-from and --no-traverse is set then a DirTree will be 552 // constructed with just those files in. 553 // 554 // NB (f, path) to be replaced by fs.Dir at some point 555 func NewDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int) (dirtree.DirTree, error) { 556 // if --no-traverse and --files-from build DirTree just from files 557 if fs.Config.NoTraverse && filter.Active.HaveFilesFrom() { 558 return walkRDirTree(ctx, f, path, includeAll, maxLevel, filter.Active.MakeListR(ctx, f.NewObject)) 559 } 560 // if have ListR; and recursing; and not using --files-from; then build a DirTree with ListR 561 if ListR := f.Features().ListR; (maxLevel < 0 || maxLevel > 1) && ListR != nil && !filter.Active.HaveFilesFrom() { 562 return walkRDirTree(ctx, f, path, includeAll, maxLevel, ListR) 563 } 564 // otherwise just use List 565 return walkNDirTree(ctx, f, path, includeAll, maxLevel, list.DirSorted) 566 } 567 568 func walkR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func, listR fs.ListRFn) error { 569 dirs, err := walkRDirTree(ctx, f, path, includeAll, maxLevel, listR) 570 if err != nil { 571 return err 572 } 573 skipping := false 574 skipPrefix := "" 575 emptyDir := fs.DirEntries{} 576 for _, dirPath := range dirs.Dirs() { 577 if skipping { 578 // Skip over directories as required 579 if strings.HasPrefix(dirPath, skipPrefix) { 580 continue 581 } 582 skipping = false 583 } 584 entries := dirs[dirPath] 585 if entries == nil { 586 entries = emptyDir 587 } 588 err = fn(dirPath, entries, nil) 589 if err == ErrorSkipDir { 590 skipping = true 591 skipPrefix = dirPath 592 if skipPrefix != "" { 593 skipPrefix += "/" 594 } 595 } else if err != nil { 596 return err 597 } 598 } 599 return nil 600 } 601 602 // GetAll runs ListR getting all the results 603 func GetAll(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int) (objs []fs.Object, dirs []fs.Directory, err error) { 604 err = ListR(ctx, f, path, includeAll, maxLevel, ListAll, func(entries fs.DirEntries) error { 605 for _, entry := range entries { 606 switch x := entry.(type) { 607 case fs.Object: 608 objs = append(objs, x) 609 case fs.Directory: 610 dirs = append(dirs, x) 611 } 612 } 613 return nil 614 }) 615 return 616 } 617 618 // ListRHelper is used in the implementation of ListR to accumulate DirEntries 619 type ListRHelper struct { 620 callback fs.ListRCallback 621 entries fs.DirEntries 622 } 623 624 // NewListRHelper should be called from ListR with the callback passed in 625 func NewListRHelper(callback fs.ListRCallback) *ListRHelper { 626 return &ListRHelper{ 627 callback: callback, 628 } 629 } 630 631 // send sends the stored entries to the callback if there are >= max 632 // entries. 633 func (lh *ListRHelper) send(max int) (err error) { 634 if len(lh.entries) >= max { 635 err = lh.callback(lh.entries) 636 lh.entries = lh.entries[:0] 637 } 638 return err 639 } 640 641 // Add an entry to the stored entries and send them if there are more 642 // than a certain amount 643 func (lh *ListRHelper) Add(entry fs.DirEntry) error { 644 if entry == nil { 645 return nil 646 } 647 lh.entries = append(lh.entries, entry) 648 return lh.send(100) 649 } 650 651 // Flush the stored entries (if any) sending them to the callback 652 func (lh *ListRHelper) Flush() error { 653 return lh.send(1) 654 }