github.com/ncw/rclone@v1.48.1-0.20190724201158-a35aa1360e3e/fs/march/march.go (about)

     1  // Package march traverses two directories in lock step
     2  package march
     3  
     4  import (
     5  	"context"
     6  	"path"
     7  	"sort"
     8  	"strings"
     9  	"sync"
    10  
    11  	"github.com/pkg/errors"
    12  
    13  	"github.com/ncw/rclone/fs"
    14  	"github.com/ncw/rclone/fs/dirtree"
    15  	"github.com/ncw/rclone/fs/filter"
    16  	"github.com/ncw/rclone/fs/list"
    17  	"github.com/ncw/rclone/fs/walk"
    18  	"golang.org/x/text/unicode/norm"
    19  )
    20  
    21  // March holds the data used to traverse two Fs simultaneously,
    22  // calling Callback for each match
    23  type March struct {
    24  	// parameters
    25  	Ctx           context.Context // context for background goroutines
    26  	Fdst          fs.Fs           // source Fs
    27  	Fsrc          fs.Fs           // dest Fs
    28  	Dir           string          // directory
    29  	NoTraverse    bool            // don't traverse the destination
    30  	SrcIncludeAll bool            // don't include all files in the src
    31  	DstIncludeAll bool            // don't include all files in the destination
    32  	Callback      Marcher         // object to call with results
    33  	// internal state
    34  	srcListDir listDirFn // function to call to list a directory in the src
    35  	dstListDir listDirFn // function to call to list a directory in the dst
    36  	transforms []matchTransformFn
    37  }
    38  
    39  // Marcher is called on each match
    40  type Marcher interface {
    41  	// SrcOnly is called for a DirEntry found only in the source
    42  	SrcOnly(src fs.DirEntry) (recurse bool)
    43  	// DstOnly is called for a DirEntry found only in the destination
    44  	DstOnly(dst fs.DirEntry) (recurse bool)
    45  	// Match is called for a DirEntry found both in the source and destination
    46  	Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool)
    47  }
    48  
    49  // init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match
    50  func (m *March) init() {
    51  	m.srcListDir = m.makeListDir(m.Fsrc, m.SrcIncludeAll)
    52  	if !m.NoTraverse {
    53  		m.dstListDir = m.makeListDir(m.Fdst, m.DstIncludeAll)
    54  	}
    55  	// Now create the matching transform
    56  	// ..normalise the UTF8 first
    57  	m.transforms = append(m.transforms, norm.NFC.String)
    58  	// ..if destination is caseInsensitive then make it lower case
    59  	// case Insensitive | src | dst | lower case compare |
    60  	//                  | No  | No  | No                 |
    61  	//                  | Yes | No  | No                 |
    62  	//                  | No  | Yes | Yes                |
    63  	//                  | Yes | Yes | Yes                |
    64  	if m.Fdst.Features().CaseInsensitive || fs.Config.IgnoreCaseSync {
    65  		m.transforms = append(m.transforms, strings.ToLower)
    66  	}
    67  }
    68  
    69  // list a directory into entries, err
    70  type listDirFn func(dir string) (entries fs.DirEntries, err error)
    71  
    72  // makeListDir makes a listing function for the given fs and includeAll flags
    73  func (m *March) makeListDir(f fs.Fs, includeAll bool) listDirFn {
    74  	if (!fs.Config.UseListR || f.Features().ListR == nil) && !filter.Active.HaveFilesFrom() {
    75  		return func(dir string) (entries fs.DirEntries, err error) {
    76  			return list.DirSorted(m.Ctx, f, includeAll, dir)
    77  		}
    78  	}
    79  	var (
    80  		mu      sync.Mutex
    81  		started bool
    82  		dirs    dirtree.DirTree
    83  		dirsErr error
    84  	)
    85  	return func(dir string) (entries fs.DirEntries, err error) {
    86  		mu.Lock()
    87  		defer mu.Unlock()
    88  		if !started {
    89  			dirs, dirsErr = walk.NewDirTree(m.Ctx, f, m.Dir, includeAll, fs.Config.MaxDepth)
    90  			started = true
    91  		}
    92  		if dirsErr != nil {
    93  			return nil, dirsErr
    94  		}
    95  		entries, ok := dirs[dir]
    96  		if !ok {
    97  			err = fs.ErrorDirNotFound
    98  		} else {
    99  			delete(dirs, dir)
   100  		}
   101  		return entries, err
   102  	}
   103  }
   104  
   105  // listDirJob describe a directory listing that needs to be done
   106  type listDirJob struct {
   107  	srcRemote string
   108  	dstRemote string
   109  	srcDepth  int
   110  	dstDepth  int
   111  	noSrc     bool
   112  	noDst     bool
   113  }
   114  
   115  // Run starts the matching process off
   116  func (m *March) Run() error {
   117  	m.init()
   118  
   119  	srcDepth := fs.Config.MaxDepth
   120  	if srcDepth < 0 {
   121  		srcDepth = fs.MaxLevel
   122  	}
   123  	dstDepth := srcDepth
   124  	if filter.Active.Opt.DeleteExcluded {
   125  		dstDepth = fs.MaxLevel
   126  	}
   127  
   128  	var mu sync.Mutex // Protects vars below
   129  	var jobError error
   130  	var errCount int
   131  
   132  	// Start some directory listing go routines
   133  	var wg sync.WaitGroup         // sync closing of go routines
   134  	var traversing sync.WaitGroup // running directory traversals
   135  	in := make(chan listDirJob, fs.Config.Checkers)
   136  	for i := 0; i < fs.Config.Checkers; i++ {
   137  		wg.Add(1)
   138  		go func() {
   139  			defer wg.Done()
   140  			for {
   141  				select {
   142  				case <-m.Ctx.Done():
   143  					return
   144  				case job, ok := <-in:
   145  					if !ok {
   146  						return
   147  					}
   148  					jobs, err := m.processJob(job)
   149  					if err != nil {
   150  						mu.Lock()
   151  						// Keep reference only to the first encountered error
   152  						if jobError == nil {
   153  							jobError = err
   154  						}
   155  						errCount++
   156  						mu.Unlock()
   157  					}
   158  					if len(jobs) > 0 {
   159  						traversing.Add(len(jobs))
   160  						go func() {
   161  							// Now we have traversed this directory, send these
   162  							// jobs off for traversal in the background
   163  							for _, newJob := range jobs {
   164  								select {
   165  								case <-m.Ctx.Done():
   166  									// discard job if finishing
   167  									traversing.Done()
   168  								case in <- newJob:
   169  								}
   170  							}
   171  						}()
   172  					}
   173  					traversing.Done()
   174  				}
   175  			}
   176  		}()
   177  	}
   178  
   179  	// Start the process
   180  	traversing.Add(1)
   181  	in <- listDirJob{
   182  		srcRemote: m.Dir,
   183  		srcDepth:  srcDepth - 1,
   184  		dstRemote: m.Dir,
   185  		dstDepth:  dstDepth - 1,
   186  	}
   187  	go func() {
   188  		// when the context is cancelled discard the remaining jobs
   189  		<-m.Ctx.Done()
   190  		for range in {
   191  			traversing.Done()
   192  		}
   193  	}()
   194  	traversing.Wait()
   195  	close(in)
   196  	wg.Wait()
   197  
   198  	if errCount > 1 {
   199  		return errors.Wrapf(jobError, "march failed with %d error(s): first error", errCount)
   200  	}
   201  	return jobError
   202  }
   203  
   204  // Check to see if the context has been cancelled
   205  func (m *March) aborting() bool {
   206  	select {
   207  	case <-m.Ctx.Done():
   208  		return true
   209  	default:
   210  	}
   211  	return false
   212  }
   213  
   214  // matchEntry is an entry plus transformed name
   215  type matchEntry struct {
   216  	entry fs.DirEntry
   217  	leaf  string
   218  	name  string
   219  }
   220  
   221  // matchEntries contains many matchEntry~s
   222  type matchEntries []matchEntry
   223  
   224  // Len is part of sort.Interface.
   225  func (es matchEntries) Len() int { return len(es) }
   226  
   227  // Swap is part of sort.Interface.
   228  func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] }
   229  
   230  // Less is part of sort.Interface.
   231  //
   232  // Compare in order (name, leaf, remote)
   233  func (es matchEntries) Less(i, j int) bool {
   234  	ei, ej := &es[i], &es[j]
   235  	if ei.name == ej.name {
   236  		if ei.leaf == ej.leaf {
   237  			return fs.CompareDirEntries(ei.entry, ej.entry) < 0
   238  		}
   239  		return ei.leaf < ej.leaf
   240  	}
   241  	return ei.name < ej.name
   242  }
   243  
   244  // Sort the directory entries by (name, leaf, remote)
   245  //
   246  // We use a stable sort here just in case there are
   247  // duplicates. Assuming the remote delivers the entries in a
   248  // consistent order, this will give the best user experience
   249  // in syncing as it will use the first entry for the sync
   250  // comparison.
   251  func (es matchEntries) sort() {
   252  	sort.Stable(es)
   253  }
   254  
   255  // make a matchEntries from a newMatch entries
   256  func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries {
   257  	es := make(matchEntries, len(entries))
   258  	for i := range es {
   259  		es[i].entry = entries[i]
   260  		name := path.Base(entries[i].Remote())
   261  		es[i].leaf = name
   262  		for _, transform := range transforms {
   263  			name = transform(name)
   264  		}
   265  		es[i].name = name
   266  	}
   267  	es.sort()
   268  	return es
   269  }
   270  
   271  // matchPair is a matched pair of direntries returned by matchListings
   272  type matchPair struct {
   273  	src, dst fs.DirEntry
   274  }
   275  
   276  // matchTransformFn converts a name into a form which is used for
   277  // comparison in matchListings.
   278  type matchTransformFn func(name string) string
   279  
   280  // Process the two listings, matching up the items in the two slices
   281  // using the transform function on each name first.
   282  //
   283  // Into srcOnly go Entries which only exist in the srcList
   284  // Into dstOnly go Entries which only exist in the dstList
   285  // Into matches go matchPair's of src and dst which have the same name
   286  //
   287  // This checks for duplicates and checks the list is sorted.
   288  func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) {
   289  	srcList := newMatchEntries(srcListEntries, transforms)
   290  	dstList := newMatchEntries(dstListEntries, transforms)
   291  
   292  	for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 {
   293  		var src, dst fs.DirEntry
   294  		var srcName, dstName string
   295  		if iSrc < len(srcList) {
   296  			src = srcList[iSrc].entry
   297  			srcName = srcList[iSrc].name
   298  		}
   299  		if iDst < len(dstList) {
   300  			dst = dstList[iDst].entry
   301  			dstName = dstList[iDst].name
   302  		}
   303  		if src == nil && dst == nil {
   304  			break
   305  		}
   306  		if src != nil && iSrc > 0 {
   307  			prev := srcList[iSrc-1].entry
   308  			prevName := srcList[iSrc-1].name
   309  			if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) {
   310  				fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src))
   311  				iDst-- // ignore the src and retry the dst
   312  				continue
   313  			} else if srcName < prevName {
   314  				// this should never happen since we sort the listings
   315  				panic("Out of order listing in source")
   316  			}
   317  		}
   318  		if dst != nil && iDst > 0 {
   319  			prev := dstList[iDst-1].entry
   320  			prevName := dstList[iDst-1].name
   321  			if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) {
   322  				fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst))
   323  				iSrc-- // ignore the dst and retry the src
   324  				continue
   325  			} else if dstName < prevName {
   326  				// this should never happen since we sort the listings
   327  				panic("Out of order listing in destination")
   328  			}
   329  		}
   330  		if src != nil && dst != nil {
   331  			// we can't use CompareDirEntries because srcName, dstName could
   332  			// be different then src.Remote() or dst.Remote()
   333  			srcType := fs.DirEntryType(src)
   334  			dstType := fs.DirEntryType(dst)
   335  			if srcName > dstName || (srcName == dstName && srcType > dstType) {
   336  				src = nil
   337  				iSrc--
   338  			} else if srcName < dstName || (srcName == dstName && srcType < dstType) {
   339  				dst = nil
   340  				iDst--
   341  			}
   342  		}
   343  		// Debugf(nil, "src = %v, dst = %v", src, dst)
   344  		switch {
   345  		case src == nil && dst == nil:
   346  			// do nothing
   347  		case src == nil:
   348  			dstOnly = append(dstOnly, dst)
   349  		case dst == nil:
   350  			srcOnly = append(srcOnly, src)
   351  		default:
   352  			matches = append(matches, matchPair{src: src, dst: dst})
   353  		}
   354  	}
   355  	return
   356  }
   357  
   358  // processJob processes a listDirJob listing the source and
   359  // destination directories, comparing them and returning a slice of
   360  // more jobs
   361  //
   362  // returns errors using processError
   363  func (m *March) processJob(job listDirJob) ([]listDirJob, error) {
   364  	var (
   365  		jobs                   []listDirJob
   366  		srcList, dstList       fs.DirEntries
   367  		srcListErr, dstListErr error
   368  		wg                     sync.WaitGroup
   369  	)
   370  
   371  	// List the src and dst directories
   372  	if !job.noSrc {
   373  		wg.Add(1)
   374  		go func() {
   375  			defer wg.Done()
   376  			srcList, srcListErr = m.srcListDir(job.srcRemote)
   377  		}()
   378  	}
   379  	if !m.NoTraverse && !job.noDst {
   380  		wg.Add(1)
   381  		go func() {
   382  			defer wg.Done()
   383  			dstList, dstListErr = m.dstListDir(job.dstRemote)
   384  		}()
   385  	}
   386  
   387  	// Wait for listings to complete and report errors
   388  	wg.Wait()
   389  	if srcListErr != nil {
   390  		fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr)
   391  		fs.CountError(srcListErr)
   392  		return nil, srcListErr
   393  	}
   394  	if dstListErr == fs.ErrorDirNotFound {
   395  		// Copy the stuff anyway
   396  	} else if dstListErr != nil {
   397  		fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr)
   398  		fs.CountError(dstListErr)
   399  		return nil, dstListErr
   400  	}
   401  
   402  	// If NoTraverse is set, then try to find a matching object
   403  	// for each item in the srcList
   404  	if m.NoTraverse {
   405  		for _, src := range srcList {
   406  			if srcObj, ok := src.(fs.Object); ok {
   407  				leaf := path.Base(srcObj.Remote())
   408  				dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf))
   409  				if err == nil {
   410  					dstList = append(dstList, dstObj)
   411  				}
   412  			}
   413  		}
   414  	}
   415  
   416  	// Work out what to do and do it
   417  	srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms)
   418  	for _, src := range srcOnly {
   419  		if m.aborting() {
   420  			return nil, m.Ctx.Err()
   421  		}
   422  		recurse := m.Callback.SrcOnly(src)
   423  		if recurse && job.srcDepth > 0 {
   424  			jobs = append(jobs, listDirJob{
   425  				srcRemote: src.Remote(),
   426  				srcDepth:  job.srcDepth - 1,
   427  				noDst:     true,
   428  			})
   429  		}
   430  
   431  	}
   432  	for _, dst := range dstOnly {
   433  		if m.aborting() {
   434  			return nil, m.Ctx.Err()
   435  		}
   436  		recurse := m.Callback.DstOnly(dst)
   437  		if recurse && job.dstDepth > 0 {
   438  			jobs = append(jobs, listDirJob{
   439  				dstRemote: dst.Remote(),
   440  				dstDepth:  job.dstDepth - 1,
   441  				noSrc:     true,
   442  			})
   443  		}
   444  	}
   445  	for _, match := range matches {
   446  		if m.aborting() {
   447  			return nil, m.Ctx.Err()
   448  		}
   449  		recurse := m.Callback.Match(m.Ctx, match.dst, match.src)
   450  		if recurse && job.srcDepth > 0 && job.dstDepth > 0 {
   451  			jobs = append(jobs, listDirJob{
   452  				srcRemote: match.src.Remote(),
   453  				dstRemote: match.dst.Remote(),
   454  				srcDepth:  job.srcDepth - 1,
   455  				dstDepth:  job.dstDepth - 1,
   456  			})
   457  		}
   458  	}
   459  	return jobs, nil
   460  }