github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/march/march.go (about)

     1  // Package march traverses two directories in lock step
     2  package march
     3  
     4  import (
     5  	"context"
     6  	"fmt"
     7  	"path"
     8  	"sort"
     9  	"strings"
    10  	"sync"
    11  
    12  	"github.com/rclone/rclone/fs"
    13  	"github.com/rclone/rclone/fs/dirtree"
    14  	"github.com/rclone/rclone/fs/filter"
    15  	"github.com/rclone/rclone/fs/list"
    16  	"github.com/rclone/rclone/fs/walk"
    17  	"golang.org/x/text/unicode/norm"
    18  )
    19  
    20  // March holds the data used to traverse two Fs simultaneously,
    21  // calling Callback for each match
    22  type March struct {
    23  	// parameters
    24  	Ctx                    context.Context // context for background goroutines
    25  	Fdst                   fs.Fs           // source Fs
    26  	Fsrc                   fs.Fs           // dest Fs
    27  	Dir                    string          // directory
    28  	NoTraverse             bool            // don't traverse the destination
    29  	SrcIncludeAll          bool            // don't include all files in the src
    30  	DstIncludeAll          bool            // don't include all files in the destination
    31  	Callback               Marcher         // object to call with results
    32  	NoCheckDest            bool            // transfer all objects regardless without checking dst
    33  	NoUnicodeNormalization bool            // don't normalize unicode characters in filenames
    34  	// internal state
    35  	srcListDir listDirFn // function to call to list a directory in the src
    36  	dstListDir listDirFn // function to call to list a directory in the dst
    37  	transforms []matchTransformFn
    38  	limiter    chan struct{} // make sure we don't do too many operations at once
    39  }
    40  
    41  // Marcher is called on each match
    42  type Marcher interface {
    43  	// SrcOnly is called for a DirEntry found only in the source
    44  	SrcOnly(src fs.DirEntry) (recurse bool)
    45  	// DstOnly is called for a DirEntry found only in the destination
    46  	DstOnly(dst fs.DirEntry) (recurse bool)
    47  	// Match is called for a DirEntry found both in the source and destination
    48  	Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool)
    49  }
    50  
    51  // init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match
    52  // Note: this will flag filter-aware backends on the source side
    53  func (m *March) init(ctx context.Context) {
    54  	ci := fs.GetConfig(ctx)
    55  	m.srcListDir = m.makeListDir(ctx, m.Fsrc, m.SrcIncludeAll)
    56  	if !m.NoTraverse {
    57  		m.dstListDir = m.makeListDir(ctx, m.Fdst, m.DstIncludeAll)
    58  	}
    59  	// Now create the matching transform
    60  	// ..normalise the UTF8 first
    61  	if !m.NoUnicodeNormalization {
    62  		m.transforms = append(m.transforms, norm.NFC.String)
    63  	}
    64  	// ..if destination is caseInsensitive then make it lower case
    65  	// case Insensitive | src | dst | lower case compare |
    66  	//                  | No  | No  | No                 |
    67  	//                  | Yes | No  | No                 |
    68  	//                  | No  | Yes | Yes                |
    69  	//                  | Yes | Yes | Yes                |
    70  	if m.Fdst.Features().CaseInsensitive || ci.IgnoreCaseSync {
    71  		m.transforms = append(m.transforms, strings.ToLower)
    72  	}
    73  	// Limit parallelism for operations
    74  	m.limiter = make(chan struct{}, ci.Checkers)
    75  }
    76  
    77  // list a directory into entries, err
    78  type listDirFn func(dir string) (entries fs.DirEntries, err error)
    79  
    80  // makeListDir makes constructs a listing function for the given fs
    81  // and includeAll flags for marching through the file system.
    82  // Note: this will optionally flag filter-aware backends!
    83  func (m *March) makeListDir(ctx context.Context, f fs.Fs, includeAll bool) listDirFn {
    84  	ci := fs.GetConfig(ctx)
    85  	fi := filter.GetConfig(ctx)
    86  	if !(ci.UseListR && f.Features().ListR != nil) && // !--fast-list active and
    87  		!(ci.NoTraverse && fi.HaveFilesFrom()) { // !(--files-from and --no-traverse)
    88  		return func(dir string) (entries fs.DirEntries, err error) {
    89  			dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List
    90  			return list.DirSorted(dirCtx, f, includeAll, dir)
    91  		}
    92  	}
    93  
    94  	// This returns a closure for use when --fast-list is active or for when
    95  	// --files-from and --no-traverse is set
    96  	var (
    97  		mu      sync.Mutex
    98  		started bool
    99  		dirs    dirtree.DirTree
   100  		dirsErr error
   101  	)
   102  	return func(dir string) (entries fs.DirEntries, err error) {
   103  		mu.Lock()
   104  		defer mu.Unlock()
   105  		if !started {
   106  			dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List
   107  			dirs, dirsErr = walk.NewDirTree(dirCtx, f, m.Dir, includeAll, ci.MaxDepth)
   108  			started = true
   109  		}
   110  		if dirsErr != nil {
   111  			return nil, dirsErr
   112  		}
   113  		entries, ok := dirs[dir]
   114  		if !ok {
   115  			err = fs.ErrorDirNotFound
   116  		} else {
   117  			delete(dirs, dir)
   118  		}
   119  		return entries, err
   120  	}
   121  }
   122  
   123  // listDirJob describe a directory listing that needs to be done
   124  type listDirJob struct {
   125  	srcRemote string
   126  	dstRemote string
   127  	srcDepth  int
   128  	dstDepth  int
   129  	noSrc     bool
   130  	noDst     bool
   131  }
   132  
   133  // Run starts the matching process off
   134  func (m *March) Run(ctx context.Context) error {
   135  	ci := fs.GetConfig(ctx)
   136  	fi := filter.GetConfig(ctx)
   137  	m.init(ctx)
   138  
   139  	srcDepth := ci.MaxDepth
   140  	if srcDepth < 0 {
   141  		srcDepth = fs.MaxLevel
   142  	}
   143  	dstDepth := srcDepth
   144  	if fi.Opt.DeleteExcluded {
   145  		dstDepth = fs.MaxLevel
   146  	}
   147  
   148  	var mu sync.Mutex // Protects vars below
   149  	var jobError error
   150  	var errCount int
   151  
   152  	// Start some directory listing go routines
   153  	var wg sync.WaitGroup         // sync closing of go routines
   154  	var traversing sync.WaitGroup // running directory traversals
   155  	checkers := ci.Checkers
   156  	in := make(chan listDirJob, checkers)
   157  	for i := 0; i < checkers; i++ {
   158  		wg.Add(1)
   159  		go func() {
   160  			defer wg.Done()
   161  			for {
   162  				select {
   163  				case <-m.Ctx.Done():
   164  					return
   165  				case job, ok := <-in:
   166  					if !ok {
   167  						return
   168  					}
   169  					jobs, err := m.processJob(job)
   170  					if err != nil {
   171  						mu.Lock()
   172  						// Keep reference only to the first encountered error
   173  						if jobError == nil {
   174  							jobError = err
   175  						}
   176  						errCount++
   177  						mu.Unlock()
   178  					}
   179  					if len(jobs) > 0 {
   180  						traversing.Add(len(jobs))
   181  						go func() {
   182  							// Now we have traversed this directory, send these
   183  							// jobs off for traversal in the background
   184  							for _, newJob := range jobs {
   185  								select {
   186  								case <-m.Ctx.Done():
   187  									// discard job if finishing
   188  									traversing.Done()
   189  								case in <- newJob:
   190  								}
   191  							}
   192  						}()
   193  					}
   194  					traversing.Done()
   195  				}
   196  			}
   197  		}()
   198  	}
   199  
   200  	// Start the process
   201  	traversing.Add(1)
   202  	in <- listDirJob{
   203  		srcRemote: m.Dir,
   204  		srcDepth:  srcDepth - 1,
   205  		dstRemote: m.Dir,
   206  		dstDepth:  dstDepth - 1,
   207  		noDst:     m.NoCheckDest,
   208  	}
   209  	go func() {
   210  		// when the context is cancelled discard the remaining jobs
   211  		<-m.Ctx.Done()
   212  		for range in {
   213  			traversing.Done()
   214  		}
   215  	}()
   216  	traversing.Wait()
   217  	close(in)
   218  	wg.Wait()
   219  
   220  	if errCount > 1 {
   221  		return fmt.Errorf("march failed with %d error(s): first error: %w", errCount, jobError)
   222  	}
   223  	return jobError
   224  }
   225  
   226  // Check to see if the context has been cancelled
   227  func (m *March) aborting() bool {
   228  	select {
   229  	case <-m.Ctx.Done():
   230  		return true
   231  	default:
   232  	}
   233  	return false
   234  }
   235  
   236  // matchEntry is an entry plus transformed name
   237  type matchEntry struct {
   238  	entry fs.DirEntry
   239  	leaf  string
   240  	name  string
   241  }
   242  
   243  // matchEntries contains many matchEntry~s
   244  type matchEntries []matchEntry
   245  
   246  // Len is part of sort.Interface.
   247  func (es matchEntries) Len() int { return len(es) }
   248  
   249  // Swap is part of sort.Interface.
   250  func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] }
   251  
   252  // Less is part of sort.Interface.
   253  //
   254  // Compare in order (name, leaf, remote)
   255  func (es matchEntries) Less(i, j int) bool {
   256  	ei, ej := &es[i], &es[j]
   257  	if ei.name == ej.name {
   258  		if ei.leaf == ej.leaf {
   259  			return fs.CompareDirEntries(ei.entry, ej.entry) < 0
   260  		}
   261  		return ei.leaf < ej.leaf
   262  	}
   263  	return ei.name < ej.name
   264  }
   265  
   266  // Sort the directory entries by (name, leaf, remote)
   267  //
   268  // We use a stable sort here just in case there are
   269  // duplicates. Assuming the remote delivers the entries in a
   270  // consistent order, this will give the best user experience
   271  // in syncing as it will use the first entry for the sync
   272  // comparison.
   273  func (es matchEntries) sort() {
   274  	sort.Stable(es)
   275  }
   276  
   277  // make a matchEntries from a newMatch entries
   278  func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries {
   279  	es := make(matchEntries, len(entries))
   280  	for i := range es {
   281  		es[i].entry = entries[i]
   282  		name := path.Base(entries[i].Remote())
   283  		es[i].leaf = name
   284  		for _, transform := range transforms {
   285  			name = transform(name)
   286  		}
   287  		es[i].name = name
   288  	}
   289  	es.sort()
   290  	return es
   291  }
   292  
   293  // matchPair is a matched pair of direntries returned by matchListings
   294  type matchPair struct {
   295  	src, dst fs.DirEntry
   296  }
   297  
   298  // matchTransformFn converts a name into a form which is used for
   299  // comparison in matchListings.
   300  type matchTransformFn func(name string) string
   301  
   302  // Process the two listings, matching up the items in the two slices
   303  // using the transform function on each name first.
   304  //
   305  // Into srcOnly go Entries which only exist in the srcList
   306  // Into dstOnly go Entries which only exist in the dstList
   307  // Into matches go matchPair's of src and dst which have the same name
   308  //
   309  // This checks for duplicates and checks the list is sorted.
   310  func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) {
   311  	srcList := newMatchEntries(srcListEntries, transforms)
   312  	dstList := newMatchEntries(dstListEntries, transforms)
   313  
   314  	for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 {
   315  		var src, dst fs.DirEntry
   316  		var srcName, dstName string
   317  		if iSrc < len(srcList) {
   318  			src = srcList[iSrc].entry
   319  			srcName = srcList[iSrc].name
   320  		}
   321  		if iDst < len(dstList) {
   322  			dst = dstList[iDst].entry
   323  			dstName = dstList[iDst].name
   324  		}
   325  		if src == nil && dst == nil {
   326  			break
   327  		}
   328  		if src != nil && iSrc > 0 {
   329  			prev := srcList[iSrc-1].entry
   330  			prevName := srcList[iSrc-1].name
   331  			if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) {
   332  				fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src))
   333  				iDst-- // ignore the src and retry the dst
   334  				continue
   335  			} else if srcName < prevName {
   336  				// this should never happen since we sort the listings
   337  				panic("Out of order listing in source")
   338  			}
   339  		}
   340  		if dst != nil && iDst > 0 {
   341  			prev := dstList[iDst-1].entry
   342  			prevName := dstList[iDst-1].name
   343  			if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) {
   344  				fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst))
   345  				iSrc-- // ignore the dst and retry the src
   346  				continue
   347  			} else if dstName < prevName {
   348  				// this should never happen since we sort the listings
   349  				panic("Out of order listing in destination")
   350  			}
   351  		}
   352  		if src != nil && dst != nil {
   353  			// we can't use CompareDirEntries because srcName, dstName could
   354  			// be different then src.Remote() or dst.Remote()
   355  			srcType := fs.DirEntryType(src)
   356  			dstType := fs.DirEntryType(dst)
   357  			if srcName > dstName || (srcName == dstName && srcType > dstType) {
   358  				src = nil
   359  				iSrc--
   360  			} else if srcName < dstName || (srcName == dstName && srcType < dstType) {
   361  				dst = nil
   362  				iDst--
   363  			}
   364  		}
   365  		// Debugf(nil, "src = %v, dst = %v", src, dst)
   366  		switch {
   367  		case src == nil && dst == nil:
   368  			// do nothing
   369  		case src == nil:
   370  			dstOnly = append(dstOnly, dst)
   371  		case dst == nil:
   372  			srcOnly = append(srcOnly, src)
   373  		default:
   374  			matches = append(matches, matchPair{src: src, dst: dst})
   375  		}
   376  	}
   377  	return
   378  }
   379  
   380  // processJob processes a listDirJob listing the source and
   381  // destination directories, comparing them and returning a slice of
   382  // more jobs
   383  //
   384  // returns errors using processError
   385  func (m *March) processJob(job listDirJob) ([]listDirJob, error) {
   386  	var (
   387  		jobs                   []listDirJob
   388  		srcList, dstList       fs.DirEntries
   389  		srcListErr, dstListErr error
   390  		wg                     sync.WaitGroup
   391  		mu                     sync.Mutex
   392  	)
   393  
   394  	// List the src and dst directories
   395  	if !job.noSrc {
   396  		wg.Add(1)
   397  		go func() {
   398  			defer wg.Done()
   399  			srcList, srcListErr = m.srcListDir(job.srcRemote)
   400  		}()
   401  	}
   402  	if !m.NoTraverse && !job.noDst {
   403  		wg.Add(1)
   404  		go func() {
   405  			defer wg.Done()
   406  			dstList, dstListErr = m.dstListDir(job.dstRemote)
   407  		}()
   408  	}
   409  
   410  	// Wait for listings to complete and report errors
   411  	wg.Wait()
   412  	if srcListErr != nil {
   413  		if job.srcRemote != "" {
   414  			fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr)
   415  		} else {
   416  			fs.Errorf(m.Fsrc, "error reading source root directory: %v", srcListErr)
   417  		}
   418  		srcListErr = fs.CountError(srcListErr)
   419  		return nil, srcListErr
   420  	}
   421  	if dstListErr == fs.ErrorDirNotFound {
   422  		// Copy the stuff anyway
   423  	} else if dstListErr != nil {
   424  		if job.dstRemote != "" {
   425  			fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr)
   426  		} else {
   427  			fs.Errorf(m.Fdst, "error reading destination root directory: %v", dstListErr)
   428  		}
   429  		dstListErr = fs.CountError(dstListErr)
   430  		return nil, dstListErr
   431  	}
   432  
   433  	// If NoTraverse is set, then try to find a matching object
   434  	// for each item in the srcList to head dst object
   435  	if m.NoTraverse && !m.NoCheckDest {
   436  		for _, src := range srcList {
   437  			wg.Add(1)
   438  			m.limiter <- struct{}{}
   439  			go func(src fs.DirEntry) {
   440  				defer wg.Done()
   441  				if srcObj, ok := src.(fs.Object); ok {
   442  					leaf := path.Base(srcObj.Remote())
   443  					dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf))
   444  					if err == nil {
   445  						mu.Lock()
   446  						dstList = append(dstList, dstObj)
   447  						mu.Unlock()
   448  					}
   449  				}
   450  				<-m.limiter
   451  			}(src)
   452  		}
   453  		wg.Wait()
   454  	}
   455  
   456  	// Work out what to do and do it
   457  	srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms)
   458  	for _, src := range srcOnly {
   459  		if m.aborting() {
   460  			return nil, m.Ctx.Err()
   461  		}
   462  		recurse := m.Callback.SrcOnly(src)
   463  		if recurse && job.srcDepth > 0 {
   464  			jobs = append(jobs, listDirJob{
   465  				srcRemote: src.Remote(),
   466  				dstRemote: src.Remote(),
   467  				srcDepth:  job.srcDepth - 1,
   468  				noDst:     true,
   469  			})
   470  		}
   471  
   472  	}
   473  	for _, dst := range dstOnly {
   474  		if m.aborting() {
   475  			return nil, m.Ctx.Err()
   476  		}
   477  		recurse := m.Callback.DstOnly(dst)
   478  		if recurse && job.dstDepth > 0 {
   479  			jobs = append(jobs, listDirJob{
   480  				srcRemote: dst.Remote(),
   481  				dstRemote: dst.Remote(),
   482  				dstDepth:  job.dstDepth - 1,
   483  				noSrc:     true,
   484  			})
   485  		}
   486  	}
   487  	for _, match := range matches {
   488  		if m.aborting() {
   489  			return nil, m.Ctx.Err()
   490  		}
   491  		recurse := m.Callback.Match(m.Ctx, match.dst, match.src)
   492  		if recurse && job.srcDepth > 0 && job.dstDepth > 0 {
   493  			jobs = append(jobs, listDirJob{
   494  				srcRemote: match.src.Remote(),
   495  				dstRemote: match.dst.Remote(),
   496  				srcDepth:  job.srcDepth - 1,
   497  				dstDepth:  job.dstDepth - 1,
   498  			})
   499  		}
   500  	}
   501  	return jobs, nil
   502  }