github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/march/march.go (about) 1 // Package march traverses two directories in lock step 2 package march 3 4 import ( 5 "context" 6 "fmt" 7 "path" 8 "sort" 9 "strings" 10 "sync" 11 12 "github.com/rclone/rclone/fs" 13 "github.com/rclone/rclone/fs/dirtree" 14 "github.com/rclone/rclone/fs/filter" 15 "github.com/rclone/rclone/fs/list" 16 "github.com/rclone/rclone/fs/walk" 17 "golang.org/x/text/unicode/norm" 18 ) 19 20 // March holds the data used to traverse two Fs simultaneously, 21 // calling Callback for each match 22 type March struct { 23 // parameters 24 Ctx context.Context // context for background goroutines 25 Fdst fs.Fs // source Fs 26 Fsrc fs.Fs // dest Fs 27 Dir string // directory 28 NoTraverse bool // don't traverse the destination 29 SrcIncludeAll bool // don't include all files in the src 30 DstIncludeAll bool // don't include all files in the destination 31 Callback Marcher // object to call with results 32 NoCheckDest bool // transfer all objects regardless without checking dst 33 NoUnicodeNormalization bool // don't normalize unicode characters in filenames 34 // internal state 35 srcListDir listDirFn // function to call to list a directory in the src 36 dstListDir listDirFn // function to call to list a directory in the dst 37 transforms []matchTransformFn 38 limiter chan struct{} // make sure we don't do too many operations at once 39 } 40 41 // Marcher is called on each match 42 type Marcher interface { 43 // SrcOnly is called for a DirEntry found only in the source 44 SrcOnly(src fs.DirEntry) (recurse bool) 45 // DstOnly is called for a DirEntry found only in the destination 46 DstOnly(dst fs.DirEntry) (recurse bool) 47 // Match is called for a DirEntry found both in the source and destination 48 Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool) 49 } 50 51 // init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match 52 // Note: this will flag filter-aware backends on the source side 53 func (m *March) init(ctx context.Context) { 54 ci := fs.GetConfig(ctx) 55 m.srcListDir = m.makeListDir(ctx, m.Fsrc, m.SrcIncludeAll) 56 if !m.NoTraverse { 57 m.dstListDir = m.makeListDir(ctx, m.Fdst, m.DstIncludeAll) 58 } 59 // Now create the matching transform 60 // ..normalise the UTF8 first 61 if !m.NoUnicodeNormalization { 62 m.transforms = append(m.transforms, norm.NFC.String) 63 } 64 // ..if destination is caseInsensitive then make it lower case 65 // case Insensitive | src | dst | lower case compare | 66 // | No | No | No | 67 // | Yes | No | No | 68 // | No | Yes | Yes | 69 // | Yes | Yes | Yes | 70 if m.Fdst.Features().CaseInsensitive || ci.IgnoreCaseSync { 71 m.transforms = append(m.transforms, strings.ToLower) 72 } 73 // Limit parallelism for operations 74 m.limiter = make(chan struct{}, ci.Checkers) 75 } 76 77 // list a directory into entries, err 78 type listDirFn func(dir string) (entries fs.DirEntries, err error) 79 80 // makeListDir makes constructs a listing function for the given fs 81 // and includeAll flags for marching through the file system. 82 // Note: this will optionally flag filter-aware backends! 83 func (m *March) makeListDir(ctx context.Context, f fs.Fs, includeAll bool) listDirFn { 84 ci := fs.GetConfig(ctx) 85 fi := filter.GetConfig(ctx) 86 if !(ci.UseListR && f.Features().ListR != nil) && // !--fast-list active and 87 !(ci.NoTraverse && fi.HaveFilesFrom()) { // !(--files-from and --no-traverse) 88 return func(dir string) (entries fs.DirEntries, err error) { 89 dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List 90 return list.DirSorted(dirCtx, f, includeAll, dir) 91 } 92 } 93 94 // This returns a closure for use when --fast-list is active or for when 95 // --files-from and --no-traverse is set 96 var ( 97 mu sync.Mutex 98 started bool 99 dirs dirtree.DirTree 100 dirsErr error 101 ) 102 return func(dir string) (entries fs.DirEntries, err error) { 103 mu.Lock() 104 defer mu.Unlock() 105 if !started { 106 dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List 107 dirs, dirsErr = walk.NewDirTree(dirCtx, f, m.Dir, includeAll, ci.MaxDepth) 108 started = true 109 } 110 if dirsErr != nil { 111 return nil, dirsErr 112 } 113 entries, ok := dirs[dir] 114 if !ok { 115 err = fs.ErrorDirNotFound 116 } else { 117 delete(dirs, dir) 118 } 119 return entries, err 120 } 121 } 122 123 // listDirJob describe a directory listing that needs to be done 124 type listDirJob struct { 125 srcRemote string 126 dstRemote string 127 srcDepth int 128 dstDepth int 129 noSrc bool 130 noDst bool 131 } 132 133 // Run starts the matching process off 134 func (m *March) Run(ctx context.Context) error { 135 ci := fs.GetConfig(ctx) 136 fi := filter.GetConfig(ctx) 137 m.init(ctx) 138 139 srcDepth := ci.MaxDepth 140 if srcDepth < 0 { 141 srcDepth = fs.MaxLevel 142 } 143 dstDepth := srcDepth 144 if fi.Opt.DeleteExcluded { 145 dstDepth = fs.MaxLevel 146 } 147 148 var mu sync.Mutex // Protects vars below 149 var jobError error 150 var errCount int 151 152 // Start some directory listing go routines 153 var wg sync.WaitGroup // sync closing of go routines 154 var traversing sync.WaitGroup // running directory traversals 155 checkers := ci.Checkers 156 in := make(chan listDirJob, checkers) 157 for i := 0; i < checkers; i++ { 158 wg.Add(1) 159 go func() { 160 defer wg.Done() 161 for { 162 select { 163 case <-m.Ctx.Done(): 164 return 165 case job, ok := <-in: 166 if !ok { 167 return 168 } 169 jobs, err := m.processJob(job) 170 if err != nil { 171 mu.Lock() 172 // Keep reference only to the first encountered error 173 if jobError == nil { 174 jobError = err 175 } 176 errCount++ 177 mu.Unlock() 178 } 179 if len(jobs) > 0 { 180 traversing.Add(len(jobs)) 181 go func() { 182 // Now we have traversed this directory, send these 183 // jobs off for traversal in the background 184 for _, newJob := range jobs { 185 select { 186 case <-m.Ctx.Done(): 187 // discard job if finishing 188 traversing.Done() 189 case in <- newJob: 190 } 191 } 192 }() 193 } 194 traversing.Done() 195 } 196 } 197 }() 198 } 199 200 // Start the process 201 traversing.Add(1) 202 in <- listDirJob{ 203 srcRemote: m.Dir, 204 srcDepth: srcDepth - 1, 205 dstRemote: m.Dir, 206 dstDepth: dstDepth - 1, 207 noDst: m.NoCheckDest, 208 } 209 go func() { 210 // when the context is cancelled discard the remaining jobs 211 <-m.Ctx.Done() 212 for range in { 213 traversing.Done() 214 } 215 }() 216 traversing.Wait() 217 close(in) 218 wg.Wait() 219 220 if errCount > 1 { 221 return fmt.Errorf("march failed with %d error(s): first error: %w", errCount, jobError) 222 } 223 return jobError 224 } 225 226 // Check to see if the context has been cancelled 227 func (m *March) aborting() bool { 228 select { 229 case <-m.Ctx.Done(): 230 return true 231 default: 232 } 233 return false 234 } 235 236 // matchEntry is an entry plus transformed name 237 type matchEntry struct { 238 entry fs.DirEntry 239 leaf string 240 name string 241 } 242 243 // matchEntries contains many matchEntry~s 244 type matchEntries []matchEntry 245 246 // Len is part of sort.Interface. 247 func (es matchEntries) Len() int { return len(es) } 248 249 // Swap is part of sort.Interface. 250 func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] } 251 252 // Less is part of sort.Interface. 253 // 254 // Compare in order (name, leaf, remote) 255 func (es matchEntries) Less(i, j int) bool { 256 ei, ej := &es[i], &es[j] 257 if ei.name == ej.name { 258 if ei.leaf == ej.leaf { 259 return fs.CompareDirEntries(ei.entry, ej.entry) < 0 260 } 261 return ei.leaf < ej.leaf 262 } 263 return ei.name < ej.name 264 } 265 266 // Sort the directory entries by (name, leaf, remote) 267 // 268 // We use a stable sort here just in case there are 269 // duplicates. Assuming the remote delivers the entries in a 270 // consistent order, this will give the best user experience 271 // in syncing as it will use the first entry for the sync 272 // comparison. 273 func (es matchEntries) sort() { 274 sort.Stable(es) 275 } 276 277 // make a matchEntries from a newMatch entries 278 func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries { 279 es := make(matchEntries, len(entries)) 280 for i := range es { 281 es[i].entry = entries[i] 282 name := path.Base(entries[i].Remote()) 283 es[i].leaf = name 284 for _, transform := range transforms { 285 name = transform(name) 286 } 287 es[i].name = name 288 } 289 es.sort() 290 return es 291 } 292 293 // matchPair is a matched pair of direntries returned by matchListings 294 type matchPair struct { 295 src, dst fs.DirEntry 296 } 297 298 // matchTransformFn converts a name into a form which is used for 299 // comparison in matchListings. 300 type matchTransformFn func(name string) string 301 302 // Process the two listings, matching up the items in the two slices 303 // using the transform function on each name first. 304 // 305 // Into srcOnly go Entries which only exist in the srcList 306 // Into dstOnly go Entries which only exist in the dstList 307 // Into matches go matchPair's of src and dst which have the same name 308 // 309 // This checks for duplicates and checks the list is sorted. 310 func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) { 311 srcList := newMatchEntries(srcListEntries, transforms) 312 dstList := newMatchEntries(dstListEntries, transforms) 313 314 for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 { 315 var src, dst fs.DirEntry 316 var srcName, dstName string 317 if iSrc < len(srcList) { 318 src = srcList[iSrc].entry 319 srcName = srcList[iSrc].name 320 } 321 if iDst < len(dstList) { 322 dst = dstList[iDst].entry 323 dstName = dstList[iDst].name 324 } 325 if src == nil && dst == nil { 326 break 327 } 328 if src != nil && iSrc > 0 { 329 prev := srcList[iSrc-1].entry 330 prevName := srcList[iSrc-1].name 331 if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) { 332 fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src)) 333 iDst-- // ignore the src and retry the dst 334 continue 335 } else if srcName < prevName { 336 // this should never happen since we sort the listings 337 panic("Out of order listing in source") 338 } 339 } 340 if dst != nil && iDst > 0 { 341 prev := dstList[iDst-1].entry 342 prevName := dstList[iDst-1].name 343 if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) { 344 fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst)) 345 iSrc-- // ignore the dst and retry the src 346 continue 347 } else if dstName < prevName { 348 // this should never happen since we sort the listings 349 panic("Out of order listing in destination") 350 } 351 } 352 if src != nil && dst != nil { 353 // we can't use CompareDirEntries because srcName, dstName could 354 // be different then src.Remote() or dst.Remote() 355 srcType := fs.DirEntryType(src) 356 dstType := fs.DirEntryType(dst) 357 if srcName > dstName || (srcName == dstName && srcType > dstType) { 358 src = nil 359 iSrc-- 360 } else if srcName < dstName || (srcName == dstName && srcType < dstType) { 361 dst = nil 362 iDst-- 363 } 364 } 365 // Debugf(nil, "src = %v, dst = %v", src, dst) 366 switch { 367 case src == nil && dst == nil: 368 // do nothing 369 case src == nil: 370 dstOnly = append(dstOnly, dst) 371 case dst == nil: 372 srcOnly = append(srcOnly, src) 373 default: 374 matches = append(matches, matchPair{src: src, dst: dst}) 375 } 376 } 377 return 378 } 379 380 // processJob processes a listDirJob listing the source and 381 // destination directories, comparing them and returning a slice of 382 // more jobs 383 // 384 // returns errors using processError 385 func (m *March) processJob(job listDirJob) ([]listDirJob, error) { 386 var ( 387 jobs []listDirJob 388 srcList, dstList fs.DirEntries 389 srcListErr, dstListErr error 390 wg sync.WaitGroup 391 mu sync.Mutex 392 ) 393 394 // List the src and dst directories 395 if !job.noSrc { 396 wg.Add(1) 397 go func() { 398 defer wg.Done() 399 srcList, srcListErr = m.srcListDir(job.srcRemote) 400 }() 401 } 402 if !m.NoTraverse && !job.noDst { 403 wg.Add(1) 404 go func() { 405 defer wg.Done() 406 dstList, dstListErr = m.dstListDir(job.dstRemote) 407 }() 408 } 409 410 // Wait for listings to complete and report errors 411 wg.Wait() 412 if srcListErr != nil { 413 if job.srcRemote != "" { 414 fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr) 415 } else { 416 fs.Errorf(m.Fsrc, "error reading source root directory: %v", srcListErr) 417 } 418 srcListErr = fs.CountError(srcListErr) 419 return nil, srcListErr 420 } 421 if dstListErr == fs.ErrorDirNotFound { 422 // Copy the stuff anyway 423 } else if dstListErr != nil { 424 if job.dstRemote != "" { 425 fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr) 426 } else { 427 fs.Errorf(m.Fdst, "error reading destination root directory: %v", dstListErr) 428 } 429 dstListErr = fs.CountError(dstListErr) 430 return nil, dstListErr 431 } 432 433 // If NoTraverse is set, then try to find a matching object 434 // for each item in the srcList to head dst object 435 if m.NoTraverse && !m.NoCheckDest { 436 for _, src := range srcList { 437 wg.Add(1) 438 m.limiter <- struct{}{} 439 go func(src fs.DirEntry) { 440 defer wg.Done() 441 if srcObj, ok := src.(fs.Object); ok { 442 leaf := path.Base(srcObj.Remote()) 443 dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf)) 444 if err == nil { 445 mu.Lock() 446 dstList = append(dstList, dstObj) 447 mu.Unlock() 448 } 449 } 450 <-m.limiter 451 }(src) 452 } 453 wg.Wait() 454 } 455 456 // Work out what to do and do it 457 srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms) 458 for _, src := range srcOnly { 459 if m.aborting() { 460 return nil, m.Ctx.Err() 461 } 462 recurse := m.Callback.SrcOnly(src) 463 if recurse && job.srcDepth > 0 { 464 jobs = append(jobs, listDirJob{ 465 srcRemote: src.Remote(), 466 dstRemote: src.Remote(), 467 srcDepth: job.srcDepth - 1, 468 noDst: true, 469 }) 470 } 471 472 } 473 for _, dst := range dstOnly { 474 if m.aborting() { 475 return nil, m.Ctx.Err() 476 } 477 recurse := m.Callback.DstOnly(dst) 478 if recurse && job.dstDepth > 0 { 479 jobs = append(jobs, listDirJob{ 480 srcRemote: dst.Remote(), 481 dstRemote: dst.Remote(), 482 dstDepth: job.dstDepth - 1, 483 noSrc: true, 484 }) 485 } 486 } 487 for _, match := range matches { 488 if m.aborting() { 489 return nil, m.Ctx.Err() 490 } 491 recurse := m.Callback.Match(m.Ctx, match.dst, match.src) 492 if recurse && job.srcDepth > 0 && job.dstDepth > 0 { 493 jobs = append(jobs, listDirJob{ 494 srcRemote: match.src.Remote(), 495 dstRemote: match.dst.Remote(), 496 srcDepth: job.srcDepth - 1, 497 dstDepth: job.dstDepth - 1, 498 }) 499 } 500 } 501 return jobs, nil 502 }