github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/fs/march/march.go (about)

     1  // Package march traverses two directories in lock step
     2  package march
     3  
     4  import (
     5  	"context"
     6  	"path"
     7  	"sort"
     8  	"strings"
     9  	"sync"
    10  
    11  	"github.com/pkg/errors"
    12  
    13  	"github.com/rclone/rclone/fs"
    14  	"github.com/rclone/rclone/fs/dirtree"
    15  	"github.com/rclone/rclone/fs/filter"
    16  	"github.com/rclone/rclone/fs/list"
    17  	"github.com/rclone/rclone/fs/walk"
    18  	"golang.org/x/text/unicode/norm"
    19  )
    20  
    21  // March holds the data used to traverse two Fs simultaneously,
    22  // calling Callback for each match
    23  type March struct {
    24  	// parameters
    25  	Ctx                    context.Context // context for background goroutines
    26  	Fdst                   fs.Fs           // source Fs
    27  	Fsrc                   fs.Fs           // dest Fs
    28  	Dir                    string          // directory
    29  	NoTraverse             bool            // don't traverse the destination
    30  	SrcIncludeAll          bool            // don't include all files in the src
    31  	DstIncludeAll          bool            // don't include all files in the destination
    32  	Callback               Marcher         // object to call with results
    33  	NoCheckDest            bool            // transfer all objects regardless without checking dst
    34  	NoUnicodeNormalization bool            // don't normalize unicode characters in filenames
    35  	// internal state
    36  	srcListDir listDirFn // function to call to list a directory in the src
    37  	dstListDir listDirFn // function to call to list a directory in the dst
    38  	transforms []matchTransformFn
    39  }
    40  
    41  // Marcher is called on each match
    42  type Marcher interface {
    43  	// SrcOnly is called for a DirEntry found only in the source
    44  	SrcOnly(src fs.DirEntry) (recurse bool)
    45  	// DstOnly is called for a DirEntry found only in the destination
    46  	DstOnly(dst fs.DirEntry) (recurse bool)
    47  	// Match is called for a DirEntry found both in the source and destination
    48  	Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool)
    49  }
    50  
    51  // init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match
    52  func (m *March) init() {
    53  	m.srcListDir = m.makeListDir(m.Fsrc, m.SrcIncludeAll)
    54  	if !m.NoTraverse {
    55  		m.dstListDir = m.makeListDir(m.Fdst, m.DstIncludeAll)
    56  	}
    57  	// Now create the matching transform
    58  	// ..normalise the UTF8 first
    59  	if !m.NoUnicodeNormalization {
    60  		m.transforms = append(m.transforms, norm.NFC.String)
    61  	}
    62  	// ..if destination is caseInsensitive then make it lower case
    63  	// case Insensitive | src | dst | lower case compare |
    64  	//                  | No  | No  | No                 |
    65  	//                  | Yes | No  | No                 |
    66  	//                  | No  | Yes | Yes                |
    67  	//                  | Yes | Yes | Yes                |
    68  	if m.Fdst.Features().CaseInsensitive || fs.Config.IgnoreCaseSync {
    69  		m.transforms = append(m.transforms, strings.ToLower)
    70  	}
    71  }
    72  
    73  // list a directory into entries, err
    74  type listDirFn func(dir string) (entries fs.DirEntries, err error)
    75  
    76  // makeListDir makes constructs a listing function for the given fs
    77  // and includeAll flags for marching through the file system.
    78  func (m *March) makeListDir(f fs.Fs, includeAll bool) listDirFn {
    79  	if !(fs.Config.UseListR && f.Features().ListR != nil) && // !--fast-list active and
    80  		!(fs.Config.NoTraverse && filter.Active.HaveFilesFrom()) { // !(--files-from and --no-traverse)
    81  		return func(dir string) (entries fs.DirEntries, err error) {
    82  			return list.DirSorted(m.Ctx, f, includeAll, dir)
    83  		}
    84  	}
    85  
    86  	// This returns a closure for use when --fast-list is active or for when
    87  	// --files-from and --no-traverse is set
    88  	var (
    89  		mu      sync.Mutex
    90  		started bool
    91  		dirs    dirtree.DirTree
    92  		dirsErr error
    93  	)
    94  	return func(dir string) (entries fs.DirEntries, err error) {
    95  		mu.Lock()
    96  		defer mu.Unlock()
    97  		if !started {
    98  			dirs, dirsErr = walk.NewDirTree(m.Ctx, f, m.Dir, includeAll, fs.Config.MaxDepth)
    99  			started = true
   100  		}
   101  		if dirsErr != nil {
   102  			return nil, dirsErr
   103  		}
   104  		entries, ok := dirs[dir]
   105  		if !ok {
   106  			err = fs.ErrorDirNotFound
   107  		} else {
   108  			delete(dirs, dir)
   109  		}
   110  		return entries, err
   111  	}
   112  }
   113  
   114  // listDirJob describe a directory listing that needs to be done
   115  type listDirJob struct {
   116  	srcRemote string
   117  	dstRemote string
   118  	srcDepth  int
   119  	dstDepth  int
   120  	noSrc     bool
   121  	noDst     bool
   122  }
   123  
   124  // Run starts the matching process off
   125  func (m *March) Run() error {
   126  	m.init()
   127  
   128  	srcDepth := fs.Config.MaxDepth
   129  	if srcDepth < 0 {
   130  		srcDepth = fs.MaxLevel
   131  	}
   132  	dstDepth := srcDepth
   133  	if filter.Active.Opt.DeleteExcluded {
   134  		dstDepth = fs.MaxLevel
   135  	}
   136  
   137  	var mu sync.Mutex // Protects vars below
   138  	var jobError error
   139  	var errCount int
   140  
   141  	// Start some directory listing go routines
   142  	var wg sync.WaitGroup         // sync closing of go routines
   143  	var traversing sync.WaitGroup // running directory traversals
   144  	in := make(chan listDirJob, fs.Config.Checkers)
   145  	for i := 0; i < fs.Config.Checkers; i++ {
   146  		wg.Add(1)
   147  		go func() {
   148  			defer wg.Done()
   149  			for {
   150  				select {
   151  				case <-m.Ctx.Done():
   152  					return
   153  				case job, ok := <-in:
   154  					if !ok {
   155  						return
   156  					}
   157  					jobs, err := m.processJob(job)
   158  					if err != nil {
   159  						mu.Lock()
   160  						// Keep reference only to the first encountered error
   161  						if jobError == nil {
   162  							jobError = err
   163  						}
   164  						errCount++
   165  						mu.Unlock()
   166  					}
   167  					if len(jobs) > 0 {
   168  						traversing.Add(len(jobs))
   169  						go func() {
   170  							// Now we have traversed this directory, send these
   171  							// jobs off for traversal in the background
   172  							for _, newJob := range jobs {
   173  								select {
   174  								case <-m.Ctx.Done():
   175  									// discard job if finishing
   176  									traversing.Done()
   177  								case in <- newJob:
   178  								}
   179  							}
   180  						}()
   181  					}
   182  					traversing.Done()
   183  				}
   184  			}
   185  		}()
   186  	}
   187  
   188  	// Start the process
   189  	traversing.Add(1)
   190  	in <- listDirJob{
   191  		srcRemote: m.Dir,
   192  		srcDepth:  srcDepth - 1,
   193  		dstRemote: m.Dir,
   194  		dstDepth:  dstDepth - 1,
   195  		noDst:     m.NoCheckDest,
   196  	}
   197  	go func() {
   198  		// when the context is cancelled discard the remaining jobs
   199  		<-m.Ctx.Done()
   200  		for range in {
   201  			traversing.Done()
   202  		}
   203  	}()
   204  	traversing.Wait()
   205  	close(in)
   206  	wg.Wait()
   207  
   208  	if errCount > 1 {
   209  		return errors.Wrapf(jobError, "march failed with %d error(s): first error", errCount)
   210  	}
   211  	return jobError
   212  }
   213  
   214  // Check to see if the context has been cancelled
   215  func (m *March) aborting() bool {
   216  	select {
   217  	case <-m.Ctx.Done():
   218  		return true
   219  	default:
   220  	}
   221  	return false
   222  }
   223  
   224  // matchEntry is an entry plus transformed name
   225  type matchEntry struct {
   226  	entry fs.DirEntry
   227  	leaf  string
   228  	name  string
   229  }
   230  
   231  // matchEntries contains many matchEntry~s
   232  type matchEntries []matchEntry
   233  
   234  // Len is part of sort.Interface.
   235  func (es matchEntries) Len() int { return len(es) }
   236  
   237  // Swap is part of sort.Interface.
   238  func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] }
   239  
   240  // Less is part of sort.Interface.
   241  //
   242  // Compare in order (name, leaf, remote)
   243  func (es matchEntries) Less(i, j int) bool {
   244  	ei, ej := &es[i], &es[j]
   245  	if ei.name == ej.name {
   246  		if ei.leaf == ej.leaf {
   247  			return fs.CompareDirEntries(ei.entry, ej.entry) < 0
   248  		}
   249  		return ei.leaf < ej.leaf
   250  	}
   251  	return ei.name < ej.name
   252  }
   253  
   254  // Sort the directory entries by (name, leaf, remote)
   255  //
   256  // We use a stable sort here just in case there are
   257  // duplicates. Assuming the remote delivers the entries in a
   258  // consistent order, this will give the best user experience
   259  // in syncing as it will use the first entry for the sync
   260  // comparison.
   261  func (es matchEntries) sort() {
   262  	sort.Stable(es)
   263  }
   264  
   265  // make a matchEntries from a newMatch entries
   266  func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries {
   267  	es := make(matchEntries, len(entries))
   268  	for i := range es {
   269  		es[i].entry = entries[i]
   270  		name := path.Base(entries[i].Remote())
   271  		es[i].leaf = name
   272  		for _, transform := range transforms {
   273  			name = transform(name)
   274  		}
   275  		es[i].name = name
   276  	}
   277  	es.sort()
   278  	return es
   279  }
   280  
   281  // matchPair is a matched pair of direntries returned by matchListings
   282  type matchPair struct {
   283  	src, dst fs.DirEntry
   284  }
   285  
   286  // matchTransformFn converts a name into a form which is used for
   287  // comparison in matchListings.
   288  type matchTransformFn func(name string) string
   289  
   290  // Process the two listings, matching up the items in the two slices
   291  // using the transform function on each name first.
   292  //
   293  // Into srcOnly go Entries which only exist in the srcList
   294  // Into dstOnly go Entries which only exist in the dstList
   295  // Into matches go matchPair's of src and dst which have the same name
   296  //
   297  // This checks for duplicates and checks the list is sorted.
   298  func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) {
   299  	srcList := newMatchEntries(srcListEntries, transforms)
   300  	dstList := newMatchEntries(dstListEntries, transforms)
   301  
   302  	for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 {
   303  		var src, dst fs.DirEntry
   304  		var srcName, dstName string
   305  		if iSrc < len(srcList) {
   306  			src = srcList[iSrc].entry
   307  			srcName = srcList[iSrc].name
   308  		}
   309  		if iDst < len(dstList) {
   310  			dst = dstList[iDst].entry
   311  			dstName = dstList[iDst].name
   312  		}
   313  		if src == nil && dst == nil {
   314  			break
   315  		}
   316  		if src != nil && iSrc > 0 {
   317  			prev := srcList[iSrc-1].entry
   318  			prevName := srcList[iSrc-1].name
   319  			if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) {
   320  				fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src))
   321  				iDst-- // ignore the src and retry the dst
   322  				continue
   323  			} else if srcName < prevName {
   324  				// this should never happen since we sort the listings
   325  				panic("Out of order listing in source")
   326  			}
   327  		}
   328  		if dst != nil && iDst > 0 {
   329  			prev := dstList[iDst-1].entry
   330  			prevName := dstList[iDst-1].name
   331  			if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) {
   332  				fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst))
   333  				iSrc-- // ignore the dst and retry the src
   334  				continue
   335  			} else if dstName < prevName {
   336  				// this should never happen since we sort the listings
   337  				panic("Out of order listing in destination")
   338  			}
   339  		}
   340  		if src != nil && dst != nil {
   341  			// we can't use CompareDirEntries because srcName, dstName could
   342  			// be different then src.Remote() or dst.Remote()
   343  			srcType := fs.DirEntryType(src)
   344  			dstType := fs.DirEntryType(dst)
   345  			if srcName > dstName || (srcName == dstName && srcType > dstType) {
   346  				src = nil
   347  				iSrc--
   348  			} else if srcName < dstName || (srcName == dstName && srcType < dstType) {
   349  				dst = nil
   350  				iDst--
   351  			}
   352  		}
   353  		// Debugf(nil, "src = %v, dst = %v", src, dst)
   354  		switch {
   355  		case src == nil && dst == nil:
   356  			// do nothing
   357  		case src == nil:
   358  			dstOnly = append(dstOnly, dst)
   359  		case dst == nil:
   360  			srcOnly = append(srcOnly, src)
   361  		default:
   362  			matches = append(matches, matchPair{src: src, dst: dst})
   363  		}
   364  	}
   365  	return
   366  }
   367  
   368  // processJob processes a listDirJob listing the source and
   369  // destination directories, comparing them and returning a slice of
   370  // more jobs
   371  //
   372  // returns errors using processError
   373  func (m *March) processJob(job listDirJob) ([]listDirJob, error) {
   374  	var (
   375  		jobs                   []listDirJob
   376  		srcList, dstList       fs.DirEntries
   377  		srcListErr, dstListErr error
   378  		wg                     sync.WaitGroup
   379  	)
   380  
   381  	// List the src and dst directories
   382  	if !job.noSrc {
   383  		wg.Add(1)
   384  		go func() {
   385  			defer wg.Done()
   386  			srcList, srcListErr = m.srcListDir(job.srcRemote)
   387  		}()
   388  	}
   389  	if !m.NoTraverse && !job.noDst {
   390  		wg.Add(1)
   391  		go func() {
   392  			defer wg.Done()
   393  			dstList, dstListErr = m.dstListDir(job.dstRemote)
   394  		}()
   395  	}
   396  
   397  	// Wait for listings to complete and report errors
   398  	wg.Wait()
   399  	if srcListErr != nil {
   400  		fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr)
   401  		srcListErr = fs.CountError(srcListErr)
   402  		return nil, srcListErr
   403  	}
   404  	if dstListErr == fs.ErrorDirNotFound {
   405  		// Copy the stuff anyway
   406  	} else if dstListErr != nil {
   407  		fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr)
   408  		dstListErr = fs.CountError(dstListErr)
   409  		return nil, dstListErr
   410  	}
   411  
   412  	// If NoTraverse is set, then try to find a matching object
   413  	// for each item in the srcList
   414  	if m.NoTraverse && !m.NoCheckDest {
   415  		for _, src := range srcList {
   416  			if srcObj, ok := src.(fs.Object); ok {
   417  				leaf := path.Base(srcObj.Remote())
   418  				dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf))
   419  				if err == nil {
   420  					dstList = append(dstList, dstObj)
   421  				}
   422  			}
   423  		}
   424  	}
   425  
   426  	// Work out what to do and do it
   427  	srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms)
   428  	for _, src := range srcOnly {
   429  		if m.aborting() {
   430  			return nil, m.Ctx.Err()
   431  		}
   432  		recurse := m.Callback.SrcOnly(src)
   433  		if recurse && job.srcDepth > 0 {
   434  			jobs = append(jobs, listDirJob{
   435  				srcRemote: src.Remote(),
   436  				dstRemote: src.Remote(),
   437  				srcDepth:  job.srcDepth - 1,
   438  				noDst:     true,
   439  			})
   440  		}
   441  
   442  	}
   443  	for _, dst := range dstOnly {
   444  		if m.aborting() {
   445  			return nil, m.Ctx.Err()
   446  		}
   447  		recurse := m.Callback.DstOnly(dst)
   448  		if recurse && job.dstDepth > 0 {
   449  			jobs = append(jobs, listDirJob{
   450  				srcRemote: dst.Remote(),
   451  				dstRemote: dst.Remote(),
   452  				dstDepth:  job.dstDepth - 1,
   453  				noSrc:     true,
   454  			})
   455  		}
   456  	}
   457  	for _, match := range matches {
   458  		if m.aborting() {
   459  			return nil, m.Ctx.Err()
   460  		}
   461  		recurse := m.Callback.Match(m.Ctx, match.dst, match.src)
   462  		if recurse && job.srcDepth > 0 && job.dstDepth > 0 {
   463  			jobs = append(jobs, listDirJob{
   464  				srcRemote: match.src.Remote(),
   465  				dstRemote: match.dst.Remote(),
   466  				srcDepth:  job.srcDepth - 1,
   467  				dstDepth:  job.dstDepth - 1,
   468  			})
   469  		}
   470  	}
   471  	return jobs, nil
   472  }