github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/fs/walk/walk.go (about)

     1  // Package walk walks directories
     2  package walk
     3  
     4  import (
     5  	"context"
     6  	"path"
     7  	"sort"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/pkg/errors"
    13  	"github.com/rclone/rclone/fs"
    14  	"github.com/rclone/rclone/fs/dirtree"
    15  	"github.com/rclone/rclone/fs/filter"
    16  	"github.com/rclone/rclone/fs/list"
    17  )
    18  
    19  // ErrorSkipDir is used as a return value from Walk to indicate that the
    20  // directory named in the call is to be skipped. It is not returned as
    21  // an error by any function.
    22  var ErrorSkipDir = errors.New("skip this directory")
    23  
    24  // ErrorCantListR is returned by WalkR if the underlying Fs isn't
    25  // capable of doing a recursive listing.
    26  var ErrorCantListR = errors.New("recursive directory listing not available")
    27  
    28  // Func is the type of the function called for directory
    29  // visited by Walk. The path argument contains remote path to the directory.
    30  //
    31  // If there was a problem walking to directory named by path, the
    32  // incoming error will describe the problem and the function can
    33  // decide how to handle that error (and Walk will not descend into
    34  // that directory). If an error is returned, processing stops. The
    35  // sole exception is when the function returns the special value
    36  // ErrorSkipDir. If the function returns ErrorSkipDir, Walk skips the
    37  // directory's contents entirely.
    38  type Func func(path string, entries fs.DirEntries, err error) error
    39  
    40  // Walk lists the directory.
    41  //
    42  // If includeAll is not set it will use the filters defined.
    43  //
    44  // If maxLevel is < 0 then it will recurse indefinitely, else it will
    45  // only do maxLevel levels.
    46  //
    47  // It calls fn for each tranche of DirEntries read.
    48  //
    49  // Note that fn will not be called concurrently whereas the directory
    50  // listing will proceed concurrently.
    51  //
    52  // Parent directories are always listed before their children
    53  //
    54  // This is implemented by WalkR if Config.UseListR is true
    55  // and f supports it and level > 1, or WalkN otherwise.
    56  //
    57  // If --files-from and --no-traverse is set then a DirTree will be
    58  // constructed with just those files in and then walked with WalkR
    59  //
    60  // NB (f, path) to be replaced by fs.Dir at some point
    61  func Walk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error {
    62  	if fs.Config.NoTraverse && filter.Active.HaveFilesFrom() {
    63  		return walkR(ctx, f, path, includeAll, maxLevel, fn, filter.Active.MakeListR(ctx, f.NewObject))
    64  	}
    65  	// FIXME should this just be maxLevel < 0 - why the maxLevel > 1
    66  	if (maxLevel < 0 || maxLevel > 1) && fs.Config.UseListR && f.Features().ListR != nil {
    67  		return walkListR(ctx, f, path, includeAll, maxLevel, fn)
    68  	}
    69  	return walkListDirSorted(ctx, f, path, includeAll, maxLevel, fn)
    70  }
    71  
    72  // ListType is uses to choose which combination of files or directories is requires
    73  type ListType byte
    74  
    75  // Types of listing for ListR
    76  const (
    77  	ListObjects ListType                 = 1 << iota // list objects only
    78  	ListDirs                                         // list dirs only
    79  	ListAll     = ListObjects | ListDirs             // list files and dirs
    80  )
    81  
    82  // Objects returns true if the list type specifies objects
    83  func (l ListType) Objects() bool {
    84  	return (l & ListObjects) != 0
    85  }
    86  
    87  // Dirs returns true if the list type specifies dirs
    88  func (l ListType) Dirs() bool {
    89  	return (l & ListDirs) != 0
    90  }
    91  
    92  // Filter in (inplace) to only contain the type of list entry required
    93  func (l ListType) Filter(in *fs.DirEntries) {
    94  	if l == ListAll {
    95  		return
    96  	}
    97  	out := (*in)[:0]
    98  	for _, entry := range *in {
    99  		switch entry.(type) {
   100  		case fs.Object:
   101  			if l.Objects() {
   102  				out = append(out, entry)
   103  			}
   104  		case fs.Directory:
   105  			if l.Dirs() {
   106  				out = append(out, entry)
   107  			}
   108  		default:
   109  			fs.Errorf(nil, "Unknown object type %T", entry)
   110  		}
   111  	}
   112  	*in = out
   113  }
   114  
   115  // ListR lists the directory recursively.
   116  //
   117  // If includeAll is not set it will use the filters defined.
   118  //
   119  // If maxLevel is < 0 then it will recurse indefinitely, else it will
   120  // only do maxLevel levels.
   121  //
   122  // If synthesizeDirs is set then for bucket based remotes it will
   123  // synthesize directories from the file structure.  This uses extra
   124  // memory so don't set this if you don't need directories, likewise do
   125  // set this if you are interested in directories.
   126  //
   127  // It calls fn for each tranche of DirEntries read. Note that these
   128  // don't necessarily represent a directory
   129  //
   130  // Note that fn will not be called concurrently whereas the directory
   131  // listing will proceed concurrently.
   132  //
   133  // Directories are not listed in any particular order so you can't
   134  // rely on parents coming before children or alphabetical ordering
   135  //
   136  // This is implemented by using ListR on the backend if possible and
   137  // efficient, otherwise by Walk.
   138  //
   139  // NB (f, path) to be replaced by fs.Dir at some point
   140  func ListR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listType ListType, fn fs.ListRCallback) error {
   141  	// FIXME disable this with --no-fast-list ??? `--disable ListR` will do it...
   142  	doListR := f.Features().ListR
   143  
   144  	// Can't use ListR if...
   145  	if doListR == nil || // ...no ListR
   146  		filter.Active.HaveFilesFrom() || // ...using --files-from
   147  		maxLevel >= 0 || // ...using bounded recursion
   148  		len(filter.Active.Opt.ExcludeFile) > 0 || // ...using --exclude-file
   149  		filter.Active.UsesDirectoryFilters() { // ...using any directory filters
   150  		return listRwalk(ctx, f, path, includeAll, maxLevel, listType, fn)
   151  	}
   152  	return listR(ctx, f, path, includeAll, listType, fn, doListR, listType.Dirs() && f.Features().BucketBased)
   153  }
   154  
   155  // listRwalk walks the file tree for ListR using Walk
   156  func listRwalk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listType ListType, fn fs.ListRCallback) error {
   157  	var listErr error
   158  	walkErr := Walk(ctx, f, path, includeAll, maxLevel, func(path string, entries fs.DirEntries, err error) error {
   159  		// Carry on listing but return the error at the end
   160  		if err != nil {
   161  			listErr = err
   162  			err = fs.CountError(err)
   163  			fs.Errorf(path, "error listing: %v", err)
   164  			return nil
   165  		}
   166  		listType.Filter(&entries)
   167  		return fn(entries)
   168  	})
   169  	if listErr != nil {
   170  		return listErr
   171  	}
   172  	return walkErr
   173  }
   174  
   175  // dirMap keeps track of directories made for bucket based remotes.
   176  // true => directory has been sent
   177  // false => directory has been seen but not sent
   178  type dirMap struct {
   179  	mu   sync.Mutex
   180  	m    map[string]bool
   181  	root string
   182  }
   183  
   184  // make a new dirMap
   185  func newDirMap(root string) *dirMap {
   186  	return &dirMap{
   187  		m:    make(map[string]bool),
   188  		root: root,
   189  	}
   190  }
   191  
   192  // add adds a directory and parents with sent
   193  func (dm *dirMap) add(dir string, sent bool) {
   194  	for {
   195  		if dir == dm.root || dir == "" {
   196  			return
   197  		}
   198  		currentSent, found := dm.m[dir]
   199  		if found {
   200  			// If it has been sent already then nothing more to do
   201  			if currentSent {
   202  				return
   203  			}
   204  			// If not sent already don't override
   205  			if !sent {
   206  				return
   207  			}
   208  			// currenSent == false && sent == true so needs overriding
   209  		}
   210  		dm.m[dir] = sent
   211  		// Add parents in as unsent
   212  		dir = parentDir(dir)
   213  		sent = false
   214  	}
   215  }
   216  
   217  // parentDir finds the parent directory of path
   218  func parentDir(entryPath string) string {
   219  	dirPath := path.Dir(entryPath)
   220  	if dirPath == "." {
   221  		dirPath = ""
   222  	}
   223  	return dirPath
   224  }
   225  
   226  // add all the directories in entries and their parents to the dirMap
   227  func (dm *dirMap) addEntries(entries fs.DirEntries) error {
   228  	dm.mu.Lock()
   229  	defer dm.mu.Unlock()
   230  	for _, entry := range entries {
   231  		switch x := entry.(type) {
   232  		case fs.Object:
   233  			dm.add(parentDir(x.Remote()), false)
   234  		case fs.Directory:
   235  			dm.add(x.Remote(), true)
   236  		default:
   237  			return errors.Errorf("unknown object type %T", entry)
   238  		}
   239  	}
   240  	return nil
   241  }
   242  
   243  // send any missing parents to fn
   244  func (dm *dirMap) sendEntries(fn fs.ListRCallback) (err error) {
   245  	// Count the strings first so we allocate the minimum memory
   246  	n := 0
   247  	for _, sent := range dm.m {
   248  		if !sent {
   249  			n++
   250  		}
   251  	}
   252  	if n == 0 {
   253  		return nil
   254  	}
   255  	dirs := make([]string, 0, n)
   256  	// Fill the dirs up then sort it
   257  	for dir, sent := range dm.m {
   258  		if !sent {
   259  			dirs = append(dirs, dir)
   260  		}
   261  	}
   262  	sort.Strings(dirs)
   263  	// Now convert to bulkier Dir in batches and send
   264  	now := time.Now()
   265  	list := NewListRHelper(fn)
   266  	for _, dir := range dirs {
   267  		err = list.Add(fs.NewDir(dir, now))
   268  		if err != nil {
   269  			return err
   270  		}
   271  	}
   272  	return list.Flush()
   273  }
   274  
   275  // listR walks the file tree using ListR
   276  func listR(ctx context.Context, f fs.Fs, path string, includeAll bool, listType ListType, fn fs.ListRCallback, doListR fs.ListRFn, synthesizeDirs bool) error {
   277  	includeDirectory := filter.Active.IncludeDirectory(ctx, f)
   278  	if !includeAll {
   279  		includeAll = filter.Active.InActive()
   280  	}
   281  	var dm *dirMap
   282  	if synthesizeDirs {
   283  		dm = newDirMap(path)
   284  	}
   285  	var mu sync.Mutex
   286  	err := doListR(ctx, path, func(entries fs.DirEntries) (err error) {
   287  		if synthesizeDirs {
   288  			err = dm.addEntries(entries)
   289  			if err != nil {
   290  				return err
   291  			}
   292  		}
   293  		listType.Filter(&entries)
   294  		if !includeAll {
   295  			filteredEntries := entries[:0]
   296  			for _, entry := range entries {
   297  				var include bool
   298  				switch x := entry.(type) {
   299  				case fs.Object:
   300  					include = filter.Active.IncludeObject(ctx, x)
   301  				case fs.Directory:
   302  					include, err = includeDirectory(x.Remote())
   303  					if err != nil {
   304  						return err
   305  					}
   306  				default:
   307  					return errors.Errorf("unknown object type %T", entry)
   308  				}
   309  				if include {
   310  					filteredEntries = append(filteredEntries, entry)
   311  				} else {
   312  					fs.Debugf(entry, "Excluded from sync (and deletion)")
   313  				}
   314  			}
   315  			entries = filteredEntries
   316  		}
   317  		mu.Lock()
   318  		defer mu.Unlock()
   319  		return fn(entries)
   320  	})
   321  	if err != nil {
   322  		return err
   323  	}
   324  	if synthesizeDirs {
   325  		err = dm.sendEntries(fn)
   326  		if err != nil {
   327  			return err
   328  		}
   329  	}
   330  	return nil
   331  }
   332  
   333  // walkListDirSorted lists the directory.
   334  //
   335  // It implements Walk using non recursive directory listing.
   336  func walkListDirSorted(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error {
   337  	return walk(ctx, f, path, includeAll, maxLevel, fn, list.DirSorted)
   338  }
   339  
   340  // walkListR lists the directory.
   341  //
   342  // It implements Walk using recursive directory listing if
   343  // available, or returns ErrorCantListR if not.
   344  func walkListR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error {
   345  	listR := f.Features().ListR
   346  	if listR == nil {
   347  		return ErrorCantListR
   348  	}
   349  	return walkR(ctx, f, path, includeAll, maxLevel, fn, listR)
   350  }
   351  
   352  type listDirFunc func(ctx context.Context, fs fs.Fs, includeAll bool, dir string) (entries fs.DirEntries, err error)
   353  
   354  func walk(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func, listDir listDirFunc) error {
   355  	var (
   356  		wg         sync.WaitGroup // sync closing of go routines
   357  		traversing sync.WaitGroup // running directory traversals
   358  		doClose    sync.Once      // close the channel once
   359  		mu         sync.Mutex     // stop fn being called concurrently
   360  	)
   361  	// listJob describe a directory listing that needs to be done
   362  	type listJob struct {
   363  		remote string
   364  		depth  int
   365  	}
   366  
   367  	in := make(chan listJob, fs.Config.Checkers)
   368  	errs := make(chan error, 1)
   369  	quit := make(chan struct{})
   370  	closeQuit := func() {
   371  		doClose.Do(func() {
   372  			close(quit)
   373  			go func() {
   374  				for range in {
   375  					traversing.Done()
   376  				}
   377  			}()
   378  		})
   379  	}
   380  	for i := 0; i < fs.Config.Checkers; i++ {
   381  		wg.Add(1)
   382  		go func() {
   383  			defer wg.Done()
   384  			for {
   385  				select {
   386  				case job, ok := <-in:
   387  					if !ok {
   388  						return
   389  					}
   390  					entries, err := listDir(ctx, f, includeAll, job.remote)
   391  					var jobs []listJob
   392  					if err == nil && job.depth != 0 {
   393  						entries.ForDir(func(dir fs.Directory) {
   394  							// Recurse for the directory
   395  							jobs = append(jobs, listJob{
   396  								remote: dir.Remote(),
   397  								depth:  job.depth - 1,
   398  							})
   399  						})
   400  					}
   401  					mu.Lock()
   402  					err = fn(job.remote, entries, err)
   403  					mu.Unlock()
   404  					// NB once we have passed entries to fn we mustn't touch it again
   405  					if err != nil && err != ErrorSkipDir {
   406  						traversing.Done()
   407  						err = fs.CountError(err)
   408  						fs.Errorf(job.remote, "error listing: %v", err)
   409  						closeQuit()
   410  						// Send error to error channel if space
   411  						select {
   412  						case errs <- err:
   413  						default:
   414  						}
   415  						continue
   416  					}
   417  					if err == nil && len(jobs) > 0 {
   418  						traversing.Add(len(jobs))
   419  						go func() {
   420  							// Now we have traversed this directory, send these
   421  							// jobs off for traversal in the background
   422  							for _, newJob := range jobs {
   423  								in <- newJob
   424  							}
   425  						}()
   426  					}
   427  					traversing.Done()
   428  				case <-quit:
   429  					return
   430  				}
   431  			}
   432  		}()
   433  	}
   434  	// Start the process
   435  	traversing.Add(1)
   436  	in <- listJob{
   437  		remote: path,
   438  		depth:  maxLevel - 1,
   439  	}
   440  	traversing.Wait()
   441  	close(in)
   442  	wg.Wait()
   443  	close(errs)
   444  	// return the first error returned or nil
   445  	return <-errs
   446  }
   447  
   448  func walkRDirTree(ctx context.Context, f fs.Fs, startPath string, includeAll bool, maxLevel int, listR fs.ListRFn) (dirtree.DirTree, error) {
   449  	dirs := dirtree.New()
   450  	// Entries can come in arbitrary order. We use toPrune to keep
   451  	// all directories to exclude later.
   452  	toPrune := make(map[string]bool)
   453  	includeDirectory := filter.Active.IncludeDirectory(ctx, f)
   454  	var mu sync.Mutex
   455  	err := listR(ctx, startPath, func(entries fs.DirEntries) error {
   456  		mu.Lock()
   457  		defer mu.Unlock()
   458  		for _, entry := range entries {
   459  			slashes := strings.Count(entry.Remote(), "/")
   460  			switch x := entry.(type) {
   461  			case fs.Object:
   462  				// Make sure we don't delete excluded files if not required
   463  				if includeAll || filter.Active.IncludeObject(ctx, x) {
   464  					if maxLevel < 0 || slashes <= maxLevel-1 {
   465  						dirs.Add(x)
   466  					} else {
   467  						// Make sure we include any parent directories of excluded objects
   468  						dirPath := x.Remote()
   469  						for ; slashes > maxLevel-1; slashes-- {
   470  							dirPath = parentDir(dirPath)
   471  						}
   472  						dirs.CheckParent(startPath, dirPath)
   473  					}
   474  				} else {
   475  					fs.Debugf(x, "Excluded from sync (and deletion)")
   476  				}
   477  				// Check if we need to prune a directory later.
   478  				if !includeAll && len(filter.Active.Opt.ExcludeFile) > 0 {
   479  					basename := path.Base(x.Remote())
   480  					if basename == filter.Active.Opt.ExcludeFile {
   481  						excludeDir := parentDir(x.Remote())
   482  						toPrune[excludeDir] = true
   483  						fs.Debugf(basename, "Excluded from sync (and deletion) based on exclude file")
   484  					}
   485  				}
   486  			case fs.Directory:
   487  				inc, err := includeDirectory(x.Remote())
   488  				if err != nil {
   489  					return err
   490  				}
   491  				if includeAll || inc {
   492  					if maxLevel < 0 || slashes <= maxLevel-1 {
   493  						if slashes == maxLevel-1 {
   494  							// Just add the object if at maxLevel
   495  							dirs.Add(x)
   496  						} else {
   497  							dirs.AddDir(x)
   498  						}
   499  					}
   500  				} else {
   501  					fs.Debugf(x, "Excluded from sync (and deletion)")
   502  				}
   503  			default:
   504  				return errors.Errorf("unknown object type %T", entry)
   505  			}
   506  		}
   507  		return nil
   508  	})
   509  	if err != nil {
   510  		return nil, err
   511  	}
   512  	dirs.CheckParents(startPath)
   513  	if len(dirs) == 0 {
   514  		dirs[startPath] = nil
   515  	}
   516  	err = dirs.Prune(toPrune)
   517  	if err != nil {
   518  		return nil, err
   519  	}
   520  	dirs.Sort()
   521  	return dirs, nil
   522  }
   523  
   524  // Create a DirTree using List
   525  func walkNDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, listDir listDirFunc) (dirtree.DirTree, error) {
   526  	dirs := make(dirtree.DirTree)
   527  	fn := func(dirPath string, entries fs.DirEntries, err error) error {
   528  		if err == nil {
   529  			dirs[dirPath] = entries
   530  		}
   531  		return err
   532  	}
   533  	err := walk(ctx, f, path, includeAll, maxLevel, fn, listDir)
   534  	if err != nil {
   535  		return nil, err
   536  	}
   537  	return dirs, nil
   538  }
   539  
   540  // NewDirTree returns a DirTree filled with the directory listing
   541  // using the parameters supplied.
   542  //
   543  // If includeAll is not set it will use the filters defined.
   544  //
   545  // If maxLevel is < 0 then it will recurse indefinitely, else it will
   546  // only do maxLevel levels.
   547  //
   548  // This is implemented by WalkR if f supports ListR and level > 1, or
   549  // WalkN otherwise.
   550  //
   551  // If --files-from and --no-traverse is set then a DirTree will be
   552  // constructed with just those files in.
   553  //
   554  // NB (f, path) to be replaced by fs.Dir at some point
   555  func NewDirTree(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int) (dirtree.DirTree, error) {
   556  	// if --no-traverse and --files-from build DirTree just from files
   557  	if fs.Config.NoTraverse && filter.Active.HaveFilesFrom() {
   558  		return walkRDirTree(ctx, f, path, includeAll, maxLevel, filter.Active.MakeListR(ctx, f.NewObject))
   559  	}
   560  	// if have ListR; and recursing; and not using --files-from; then build a DirTree with ListR
   561  	if ListR := f.Features().ListR; (maxLevel < 0 || maxLevel > 1) && ListR != nil && !filter.Active.HaveFilesFrom() {
   562  		return walkRDirTree(ctx, f, path, includeAll, maxLevel, ListR)
   563  	}
   564  	// otherwise just use List
   565  	return walkNDirTree(ctx, f, path, includeAll, maxLevel, list.DirSorted)
   566  }
   567  
   568  func walkR(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int, fn Func, listR fs.ListRFn) error {
   569  	dirs, err := walkRDirTree(ctx, f, path, includeAll, maxLevel, listR)
   570  	if err != nil {
   571  		return err
   572  	}
   573  	skipping := false
   574  	skipPrefix := ""
   575  	emptyDir := fs.DirEntries{}
   576  	for _, dirPath := range dirs.Dirs() {
   577  		if skipping {
   578  			// Skip over directories as required
   579  			if strings.HasPrefix(dirPath, skipPrefix) {
   580  				continue
   581  			}
   582  			skipping = false
   583  		}
   584  		entries := dirs[dirPath]
   585  		if entries == nil {
   586  			entries = emptyDir
   587  		}
   588  		err = fn(dirPath, entries, nil)
   589  		if err == ErrorSkipDir {
   590  			skipping = true
   591  			skipPrefix = dirPath
   592  			if skipPrefix != "" {
   593  				skipPrefix += "/"
   594  			}
   595  		} else if err != nil {
   596  			return err
   597  		}
   598  	}
   599  	return nil
   600  }
   601  
   602  // GetAll runs ListR getting all the results
   603  func GetAll(ctx context.Context, f fs.Fs, path string, includeAll bool, maxLevel int) (objs []fs.Object, dirs []fs.Directory, err error) {
   604  	err = ListR(ctx, f, path, includeAll, maxLevel, ListAll, func(entries fs.DirEntries) error {
   605  		for _, entry := range entries {
   606  			switch x := entry.(type) {
   607  			case fs.Object:
   608  				objs = append(objs, x)
   609  			case fs.Directory:
   610  				dirs = append(dirs, x)
   611  			}
   612  		}
   613  		return nil
   614  	})
   615  	return
   616  }
   617  
   618  // ListRHelper is used in the implementation of ListR to accumulate DirEntries
   619  type ListRHelper struct {
   620  	callback fs.ListRCallback
   621  	entries  fs.DirEntries
   622  }
   623  
   624  // NewListRHelper should be called from ListR with the callback passed in
   625  func NewListRHelper(callback fs.ListRCallback) *ListRHelper {
   626  	return &ListRHelper{
   627  		callback: callback,
   628  	}
   629  }
   630  
   631  // send sends the stored entries to the callback if there are >= max
   632  // entries.
   633  func (lh *ListRHelper) send(max int) (err error) {
   634  	if len(lh.entries) >= max {
   635  		err = lh.callback(lh.entries)
   636  		lh.entries = lh.entries[:0]
   637  	}
   638  	return err
   639  }
   640  
   641  // Add an entry to the stored entries and send them if there are more
   642  // than a certain amount
   643  func (lh *ListRHelper) Add(entry fs.DirEntry) error {
   644  	if entry == nil {
   645  		return nil
   646  	}
   647  	lh.entries = append(lh.entries, entry)
   648  	return lh.send(100)
   649  }
   650  
   651  // Flush the stored entries (if any) sending them to the callback
   652  func (lh *ListRHelper) Flush() error {
   653  	return lh.send(1)
   654  }