github.com/divyam234/rclone@v1.64.1/fs/march/march.go (about)

     1  // Package march traverses two directories in lock step
     2  package march
     3  
     4  import (
     5  	"context"
     6  	"fmt"
     7  	"path"
     8  	"sort"
     9  	"strings"
    10  	"sync"
    11  
    12  	"github.com/divyam234/rclone/fs"
    13  	"github.com/divyam234/rclone/fs/dirtree"
    14  	"github.com/divyam234/rclone/fs/filter"
    15  	"github.com/divyam234/rclone/fs/list"
    16  	"github.com/divyam234/rclone/fs/walk"
    17  	"golang.org/x/text/unicode/norm"
    18  )
    19  
    20  // March holds the data used to traverse two Fs simultaneously,
    21  // calling Callback for each match
    22  type March struct {
    23  	// parameters
    24  	Ctx                    context.Context // context for background goroutines
    25  	Fdst                   fs.Fs           // source Fs
    26  	Fsrc                   fs.Fs           // dest Fs
    27  	Dir                    string          // directory
    28  	NoTraverse             bool            // don't traverse the destination
    29  	SrcIncludeAll          bool            // don't include all files in the src
    30  	DstIncludeAll          bool            // don't include all files in the destination
    31  	Callback               Marcher         // object to call with results
    32  	NoCheckDest            bool            // transfer all objects regardless without checking dst
    33  	NoUnicodeNormalization bool            // don't normalize unicode characters in filenames
    34  	// internal state
    35  	srcListDir listDirFn // function to call to list a directory in the src
    36  	dstListDir listDirFn // function to call to list a directory in the dst
    37  	transforms []matchTransformFn
    38  }
    39  
    40  // Marcher is called on each match
    41  type Marcher interface {
    42  	// SrcOnly is called for a DirEntry found only in the source
    43  	SrcOnly(src fs.DirEntry) (recurse bool)
    44  	// DstOnly is called for a DirEntry found only in the destination
    45  	DstOnly(dst fs.DirEntry) (recurse bool)
    46  	// Match is called for a DirEntry found both in the source and destination
    47  	Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool)
    48  }
    49  
    50  // init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match
    51  // Note: this will flag filter-aware backends on the source side
    52  func (m *March) init(ctx context.Context) {
    53  	ci := fs.GetConfig(ctx)
    54  	m.srcListDir = m.makeListDir(ctx, m.Fsrc, m.SrcIncludeAll)
    55  	if !m.NoTraverse {
    56  		m.dstListDir = m.makeListDir(ctx, m.Fdst, m.DstIncludeAll)
    57  	}
    58  	// Now create the matching transform
    59  	// ..normalise the UTF8 first
    60  	if !m.NoUnicodeNormalization {
    61  		m.transforms = append(m.transforms, norm.NFC.String)
    62  	}
    63  	// ..if destination is caseInsensitive then make it lower case
    64  	// case Insensitive | src | dst | lower case compare |
    65  	//                  | No  | No  | No                 |
    66  	//                  | Yes | No  | No                 |
    67  	//                  | No  | Yes | Yes                |
    68  	//                  | Yes | Yes | Yes                |
    69  	if m.Fdst.Features().CaseInsensitive || ci.IgnoreCaseSync {
    70  		m.transforms = append(m.transforms, strings.ToLower)
    71  	}
    72  }
    73  
    74  // list a directory into entries, err
    75  type listDirFn func(dir string) (entries fs.DirEntries, err error)
    76  
    77  // makeListDir makes constructs a listing function for the given fs
    78  // and includeAll flags for marching through the file system.
    79  // Note: this will optionally flag filter-aware backends!
    80  func (m *March) makeListDir(ctx context.Context, f fs.Fs, includeAll bool) listDirFn {
    81  	ci := fs.GetConfig(ctx)
    82  	fi := filter.GetConfig(ctx)
    83  	if !(ci.UseListR && f.Features().ListR != nil) && // !--fast-list active and
    84  		!(ci.NoTraverse && fi.HaveFilesFrom()) { // !(--files-from and --no-traverse)
    85  		return func(dir string) (entries fs.DirEntries, err error) {
    86  			dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List
    87  			return list.DirSorted(dirCtx, f, includeAll, dir)
    88  		}
    89  	}
    90  
    91  	// This returns a closure for use when --fast-list is active or for when
    92  	// --files-from and --no-traverse is set
    93  	var (
    94  		mu      sync.Mutex
    95  		started bool
    96  		dirs    dirtree.DirTree
    97  		dirsErr error
    98  	)
    99  	return func(dir string) (entries fs.DirEntries, err error) {
   100  		mu.Lock()
   101  		defer mu.Unlock()
   102  		if !started {
   103  			dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List
   104  			dirs, dirsErr = walk.NewDirTree(dirCtx, f, m.Dir, includeAll, ci.MaxDepth)
   105  			started = true
   106  		}
   107  		if dirsErr != nil {
   108  			return nil, dirsErr
   109  		}
   110  		entries, ok := dirs[dir]
   111  		if !ok {
   112  			err = fs.ErrorDirNotFound
   113  		} else {
   114  			delete(dirs, dir)
   115  		}
   116  		return entries, err
   117  	}
   118  }
   119  
   120  // listDirJob describe a directory listing that needs to be done
   121  type listDirJob struct {
   122  	srcRemote string
   123  	dstRemote string
   124  	srcDepth  int
   125  	dstDepth  int
   126  	noSrc     bool
   127  	noDst     bool
   128  }
   129  
   130  // Run starts the matching process off
   131  func (m *March) Run(ctx context.Context) error {
   132  	ci := fs.GetConfig(ctx)
   133  	fi := filter.GetConfig(ctx)
   134  	m.init(ctx)
   135  
   136  	srcDepth := ci.MaxDepth
   137  	if srcDepth < 0 {
   138  		srcDepth = fs.MaxLevel
   139  	}
   140  	dstDepth := srcDepth
   141  	if fi.Opt.DeleteExcluded {
   142  		dstDepth = fs.MaxLevel
   143  	}
   144  
   145  	var mu sync.Mutex // Protects vars below
   146  	var jobError error
   147  	var errCount int
   148  
   149  	// Start some directory listing go routines
   150  	var wg sync.WaitGroup         // sync closing of go routines
   151  	var traversing sync.WaitGroup // running directory traversals
   152  	checkers := ci.Checkers
   153  	in := make(chan listDirJob, checkers)
   154  	for i := 0; i < checkers; i++ {
   155  		wg.Add(1)
   156  		go func() {
   157  			defer wg.Done()
   158  			for {
   159  				select {
   160  				case <-m.Ctx.Done():
   161  					return
   162  				case job, ok := <-in:
   163  					if !ok {
   164  						return
   165  					}
   166  					jobs, err := m.processJob(job)
   167  					if err != nil {
   168  						mu.Lock()
   169  						// Keep reference only to the first encountered error
   170  						if jobError == nil {
   171  							jobError = err
   172  						}
   173  						errCount++
   174  						mu.Unlock()
   175  					}
   176  					if len(jobs) > 0 {
   177  						traversing.Add(len(jobs))
   178  						go func() {
   179  							// Now we have traversed this directory, send these
   180  							// jobs off for traversal in the background
   181  							for _, newJob := range jobs {
   182  								select {
   183  								case <-m.Ctx.Done():
   184  									// discard job if finishing
   185  									traversing.Done()
   186  								case in <- newJob:
   187  								}
   188  							}
   189  						}()
   190  					}
   191  					traversing.Done()
   192  				}
   193  			}
   194  		}()
   195  	}
   196  
   197  	// Start the process
   198  	traversing.Add(1)
   199  	in <- listDirJob{
   200  		srcRemote: m.Dir,
   201  		srcDepth:  srcDepth - 1,
   202  		dstRemote: m.Dir,
   203  		dstDepth:  dstDepth - 1,
   204  		noDst:     m.NoCheckDest,
   205  	}
   206  	go func() {
   207  		// when the context is cancelled discard the remaining jobs
   208  		<-m.Ctx.Done()
   209  		for range in {
   210  			traversing.Done()
   211  		}
   212  	}()
   213  	traversing.Wait()
   214  	close(in)
   215  	wg.Wait()
   216  
   217  	if errCount > 1 {
   218  		return fmt.Errorf("march failed with %d error(s): first error: %w", errCount, jobError)
   219  	}
   220  	return jobError
   221  }
   222  
   223  // Check to see if the context has been cancelled
   224  func (m *March) aborting() bool {
   225  	select {
   226  	case <-m.Ctx.Done():
   227  		return true
   228  	default:
   229  	}
   230  	return false
   231  }
   232  
   233  // matchEntry is an entry plus transformed name
   234  type matchEntry struct {
   235  	entry fs.DirEntry
   236  	leaf  string
   237  	name  string
   238  }
   239  
   240  // matchEntries contains many matchEntry~s
   241  type matchEntries []matchEntry
   242  
   243  // Len is part of sort.Interface.
   244  func (es matchEntries) Len() int { return len(es) }
   245  
   246  // Swap is part of sort.Interface.
   247  func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] }
   248  
   249  // Less is part of sort.Interface.
   250  //
   251  // Compare in order (name, leaf, remote)
   252  func (es matchEntries) Less(i, j int) bool {
   253  	ei, ej := &es[i], &es[j]
   254  	if ei.name == ej.name {
   255  		if ei.leaf == ej.leaf {
   256  			return fs.CompareDirEntries(ei.entry, ej.entry) < 0
   257  		}
   258  		return ei.leaf < ej.leaf
   259  	}
   260  	return ei.name < ej.name
   261  }
   262  
   263  // Sort the directory entries by (name, leaf, remote)
   264  //
   265  // We use a stable sort here just in case there are
   266  // duplicates. Assuming the remote delivers the entries in a
   267  // consistent order, this will give the best user experience
   268  // in syncing as it will use the first entry for the sync
   269  // comparison.
   270  func (es matchEntries) sort() {
   271  	sort.Stable(es)
   272  }
   273  
   274  // make a matchEntries from a newMatch entries
   275  func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries {
   276  	es := make(matchEntries, len(entries))
   277  	for i := range es {
   278  		es[i].entry = entries[i]
   279  		name := path.Base(entries[i].Remote())
   280  		es[i].leaf = name
   281  		for _, transform := range transforms {
   282  			name = transform(name)
   283  		}
   284  		es[i].name = name
   285  	}
   286  	es.sort()
   287  	return es
   288  }
   289  
   290  // matchPair is a matched pair of direntries returned by matchListings
   291  type matchPair struct {
   292  	src, dst fs.DirEntry
   293  }
   294  
   295  // matchTransformFn converts a name into a form which is used for
   296  // comparison in matchListings.
   297  type matchTransformFn func(name string) string
   298  
   299  // Process the two listings, matching up the items in the two slices
   300  // using the transform function on each name first.
   301  //
   302  // Into srcOnly go Entries which only exist in the srcList
   303  // Into dstOnly go Entries which only exist in the dstList
   304  // Into matches go matchPair's of src and dst which have the same name
   305  //
   306  // This checks for duplicates and checks the list is sorted.
   307  func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) {
   308  	srcList := newMatchEntries(srcListEntries, transforms)
   309  	dstList := newMatchEntries(dstListEntries, transforms)
   310  
   311  	for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 {
   312  		var src, dst fs.DirEntry
   313  		var srcName, dstName string
   314  		if iSrc < len(srcList) {
   315  			src = srcList[iSrc].entry
   316  			srcName = srcList[iSrc].name
   317  		}
   318  		if iDst < len(dstList) {
   319  			dst = dstList[iDst].entry
   320  			dstName = dstList[iDst].name
   321  		}
   322  		if src == nil && dst == nil {
   323  			break
   324  		}
   325  		if src != nil && iSrc > 0 {
   326  			prev := srcList[iSrc-1].entry
   327  			prevName := srcList[iSrc-1].name
   328  			if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) {
   329  				fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src))
   330  				iDst-- // ignore the src and retry the dst
   331  				continue
   332  			} else if srcName < prevName {
   333  				// this should never happen since we sort the listings
   334  				panic("Out of order listing in source")
   335  			}
   336  		}
   337  		if dst != nil && iDst > 0 {
   338  			prev := dstList[iDst-1].entry
   339  			prevName := dstList[iDst-1].name
   340  			if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) {
   341  				fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst))
   342  				iSrc-- // ignore the dst and retry the src
   343  				continue
   344  			} else if dstName < prevName {
   345  				// this should never happen since we sort the listings
   346  				panic("Out of order listing in destination")
   347  			}
   348  		}
   349  		if src != nil && dst != nil {
   350  			// we can't use CompareDirEntries because srcName, dstName could
   351  			// be different then src.Remote() or dst.Remote()
   352  			srcType := fs.DirEntryType(src)
   353  			dstType := fs.DirEntryType(dst)
   354  			if srcName > dstName || (srcName == dstName && srcType > dstType) {
   355  				src = nil
   356  				iSrc--
   357  			} else if srcName < dstName || (srcName == dstName && srcType < dstType) {
   358  				dst = nil
   359  				iDst--
   360  			}
   361  		}
   362  		// Debugf(nil, "src = %v, dst = %v", src, dst)
   363  		switch {
   364  		case src == nil && dst == nil:
   365  			// do nothing
   366  		case src == nil:
   367  			dstOnly = append(dstOnly, dst)
   368  		case dst == nil:
   369  			srcOnly = append(srcOnly, src)
   370  		default:
   371  			matches = append(matches, matchPair{src: src, dst: dst})
   372  		}
   373  	}
   374  	return
   375  }
   376  
   377  // processJob processes a listDirJob listing the source and
   378  // destination directories, comparing them and returning a slice of
   379  // more jobs
   380  //
   381  // returns errors using processError
   382  func (m *March) processJob(job listDirJob) ([]listDirJob, error) {
   383  	var (
   384  		jobs                   []listDirJob
   385  		srcList, dstList       fs.DirEntries
   386  		srcListErr, dstListErr error
   387  		wg                     sync.WaitGroup
   388  		mu                     sync.Mutex
   389  	)
   390  
   391  	// List the src and dst directories
   392  	if !job.noSrc {
   393  		wg.Add(1)
   394  		go func() {
   395  			defer wg.Done()
   396  			srcList, srcListErr = m.srcListDir(job.srcRemote)
   397  		}()
   398  	}
   399  	if !m.NoTraverse && !job.noDst {
   400  		wg.Add(1)
   401  		go func() {
   402  			defer wg.Done()
   403  			dstList, dstListErr = m.dstListDir(job.dstRemote)
   404  		}()
   405  	}
   406  
   407  	// Wait for listings to complete and report errors
   408  	wg.Wait()
   409  	if srcListErr != nil {
   410  		if job.srcRemote != "" {
   411  			fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr)
   412  		} else {
   413  			fs.Errorf(m.Fsrc, "error reading source root directory: %v", srcListErr)
   414  		}
   415  		srcListErr = fs.CountError(srcListErr)
   416  		return nil, srcListErr
   417  	}
   418  	if dstListErr == fs.ErrorDirNotFound {
   419  		// Copy the stuff anyway
   420  	} else if dstListErr != nil {
   421  		if job.dstRemote != "" {
   422  			fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr)
   423  		} else {
   424  			fs.Errorf(m.Fdst, "error reading destination root directory: %v", dstListErr)
   425  		}
   426  		dstListErr = fs.CountError(dstListErr)
   427  		return nil, dstListErr
   428  	}
   429  
   430  	// If NoTraverse is set, then try to find a matching object
   431  	// for each item in the srcList to head dst object
   432  	ci := fs.GetConfig(m.Ctx)
   433  	limiter := make(chan struct{}, ci.Checkers)
   434  	if m.NoTraverse && !m.NoCheckDest {
   435  		for _, src := range srcList {
   436  			wg.Add(1)
   437  			limiter <- struct{}{}
   438  			go func(limiter chan struct{}, src fs.DirEntry) {
   439  				defer wg.Done()
   440  				if srcObj, ok := src.(fs.Object); ok {
   441  					leaf := path.Base(srcObj.Remote())
   442  					dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf))
   443  					if err == nil {
   444  						mu.Lock()
   445  						dstList = append(dstList, dstObj)
   446  						mu.Unlock()
   447  					}
   448  				}
   449  				<-limiter
   450  			}(limiter, src)
   451  		}
   452  		wg.Wait()
   453  	}
   454  
   455  	// Work out what to do and do it
   456  	srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms)
   457  	for _, src := range srcOnly {
   458  		if m.aborting() {
   459  			return nil, m.Ctx.Err()
   460  		}
   461  		recurse := m.Callback.SrcOnly(src)
   462  		if recurse && job.srcDepth > 0 {
   463  			jobs = append(jobs, listDirJob{
   464  				srcRemote: src.Remote(),
   465  				dstRemote: src.Remote(),
   466  				srcDepth:  job.srcDepth - 1,
   467  				noDst:     true,
   468  			})
   469  		}
   470  
   471  	}
   472  	for _, dst := range dstOnly {
   473  		if m.aborting() {
   474  			return nil, m.Ctx.Err()
   475  		}
   476  		recurse := m.Callback.DstOnly(dst)
   477  		if recurse && job.dstDepth > 0 {
   478  			jobs = append(jobs, listDirJob{
   479  				srcRemote: dst.Remote(),
   480  				dstRemote: dst.Remote(),
   481  				dstDepth:  job.dstDepth - 1,
   482  				noSrc:     true,
   483  			})
   484  		}
   485  	}
   486  	for _, match := range matches {
   487  		if m.aborting() {
   488  			return nil, m.Ctx.Err()
   489  		}
   490  		recurse := m.Callback.Match(m.Ctx, match.dst, match.src)
   491  		if recurse && job.srcDepth > 0 && job.dstDepth > 0 {
   492  			jobs = append(jobs, listDirJob{
   493  				srcRemote: match.src.Remote(),
   494  				dstRemote: match.dst.Remote(),
   495  				srcDepth:  job.srcDepth - 1,
   496  				dstDepth:  job.dstDepth - 1,
   497  			})
   498  		}
   499  	}
   500  	return jobs, nil
   501  }