github.com/divyam234/rclone@v1.64.1/fs/march/march.go (about) 1 // Package march traverses two directories in lock step 2 package march 3 4 import ( 5 "context" 6 "fmt" 7 "path" 8 "sort" 9 "strings" 10 "sync" 11 12 "github.com/divyam234/rclone/fs" 13 "github.com/divyam234/rclone/fs/dirtree" 14 "github.com/divyam234/rclone/fs/filter" 15 "github.com/divyam234/rclone/fs/list" 16 "github.com/divyam234/rclone/fs/walk" 17 "golang.org/x/text/unicode/norm" 18 ) 19 20 // March holds the data used to traverse two Fs simultaneously, 21 // calling Callback for each match 22 type March struct { 23 // parameters 24 Ctx context.Context // context for background goroutines 25 Fdst fs.Fs // source Fs 26 Fsrc fs.Fs // dest Fs 27 Dir string // directory 28 NoTraverse bool // don't traverse the destination 29 SrcIncludeAll bool // don't include all files in the src 30 DstIncludeAll bool // don't include all files in the destination 31 Callback Marcher // object to call with results 32 NoCheckDest bool // transfer all objects regardless without checking dst 33 NoUnicodeNormalization bool // don't normalize unicode characters in filenames 34 // internal state 35 srcListDir listDirFn // function to call to list a directory in the src 36 dstListDir listDirFn // function to call to list a directory in the dst 37 transforms []matchTransformFn 38 } 39 40 // Marcher is called on each match 41 type Marcher interface { 42 // SrcOnly is called for a DirEntry found only in the source 43 SrcOnly(src fs.DirEntry) (recurse bool) 44 // DstOnly is called for a DirEntry found only in the destination 45 DstOnly(dst fs.DirEntry) (recurse bool) 46 // Match is called for a DirEntry found both in the source and destination 47 Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool) 48 } 49 50 // init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match 51 // Note: this will flag filter-aware backends on the source side 52 func (m *March) init(ctx context.Context) { 53 ci := fs.GetConfig(ctx) 54 m.srcListDir = m.makeListDir(ctx, m.Fsrc, m.SrcIncludeAll) 55 if !m.NoTraverse { 56 m.dstListDir = m.makeListDir(ctx, m.Fdst, m.DstIncludeAll) 57 } 58 // Now create the matching transform 59 // ..normalise the UTF8 first 60 if !m.NoUnicodeNormalization { 61 m.transforms = append(m.transforms, norm.NFC.String) 62 } 63 // ..if destination is caseInsensitive then make it lower case 64 // case Insensitive | src | dst | lower case compare | 65 // | No | No | No | 66 // | Yes | No | No | 67 // | No | Yes | Yes | 68 // | Yes | Yes | Yes | 69 if m.Fdst.Features().CaseInsensitive || ci.IgnoreCaseSync { 70 m.transforms = append(m.transforms, strings.ToLower) 71 } 72 } 73 74 // list a directory into entries, err 75 type listDirFn func(dir string) (entries fs.DirEntries, err error) 76 77 // makeListDir makes constructs a listing function for the given fs 78 // and includeAll flags for marching through the file system. 79 // Note: this will optionally flag filter-aware backends! 80 func (m *March) makeListDir(ctx context.Context, f fs.Fs, includeAll bool) listDirFn { 81 ci := fs.GetConfig(ctx) 82 fi := filter.GetConfig(ctx) 83 if !(ci.UseListR && f.Features().ListR != nil) && // !--fast-list active and 84 !(ci.NoTraverse && fi.HaveFilesFrom()) { // !(--files-from and --no-traverse) 85 return func(dir string) (entries fs.DirEntries, err error) { 86 dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List 87 return list.DirSorted(dirCtx, f, includeAll, dir) 88 } 89 } 90 91 // This returns a closure for use when --fast-list is active or for when 92 // --files-from and --no-traverse is set 93 var ( 94 mu sync.Mutex 95 started bool 96 dirs dirtree.DirTree 97 dirsErr error 98 ) 99 return func(dir string) (entries fs.DirEntries, err error) { 100 mu.Lock() 101 defer mu.Unlock() 102 if !started { 103 dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List 104 dirs, dirsErr = walk.NewDirTree(dirCtx, f, m.Dir, includeAll, ci.MaxDepth) 105 started = true 106 } 107 if dirsErr != nil { 108 return nil, dirsErr 109 } 110 entries, ok := dirs[dir] 111 if !ok { 112 err = fs.ErrorDirNotFound 113 } else { 114 delete(dirs, dir) 115 } 116 return entries, err 117 } 118 } 119 120 // listDirJob describe a directory listing that needs to be done 121 type listDirJob struct { 122 srcRemote string 123 dstRemote string 124 srcDepth int 125 dstDepth int 126 noSrc bool 127 noDst bool 128 } 129 130 // Run starts the matching process off 131 func (m *March) Run(ctx context.Context) error { 132 ci := fs.GetConfig(ctx) 133 fi := filter.GetConfig(ctx) 134 m.init(ctx) 135 136 srcDepth := ci.MaxDepth 137 if srcDepth < 0 { 138 srcDepth = fs.MaxLevel 139 } 140 dstDepth := srcDepth 141 if fi.Opt.DeleteExcluded { 142 dstDepth = fs.MaxLevel 143 } 144 145 var mu sync.Mutex // Protects vars below 146 var jobError error 147 var errCount int 148 149 // Start some directory listing go routines 150 var wg sync.WaitGroup // sync closing of go routines 151 var traversing sync.WaitGroup // running directory traversals 152 checkers := ci.Checkers 153 in := make(chan listDirJob, checkers) 154 for i := 0; i < checkers; i++ { 155 wg.Add(1) 156 go func() { 157 defer wg.Done() 158 for { 159 select { 160 case <-m.Ctx.Done(): 161 return 162 case job, ok := <-in: 163 if !ok { 164 return 165 } 166 jobs, err := m.processJob(job) 167 if err != nil { 168 mu.Lock() 169 // Keep reference only to the first encountered error 170 if jobError == nil { 171 jobError = err 172 } 173 errCount++ 174 mu.Unlock() 175 } 176 if len(jobs) > 0 { 177 traversing.Add(len(jobs)) 178 go func() { 179 // Now we have traversed this directory, send these 180 // jobs off for traversal in the background 181 for _, newJob := range jobs { 182 select { 183 case <-m.Ctx.Done(): 184 // discard job if finishing 185 traversing.Done() 186 case in <- newJob: 187 } 188 } 189 }() 190 } 191 traversing.Done() 192 } 193 } 194 }() 195 } 196 197 // Start the process 198 traversing.Add(1) 199 in <- listDirJob{ 200 srcRemote: m.Dir, 201 srcDepth: srcDepth - 1, 202 dstRemote: m.Dir, 203 dstDepth: dstDepth - 1, 204 noDst: m.NoCheckDest, 205 } 206 go func() { 207 // when the context is cancelled discard the remaining jobs 208 <-m.Ctx.Done() 209 for range in { 210 traversing.Done() 211 } 212 }() 213 traversing.Wait() 214 close(in) 215 wg.Wait() 216 217 if errCount > 1 { 218 return fmt.Errorf("march failed with %d error(s): first error: %w", errCount, jobError) 219 } 220 return jobError 221 } 222 223 // Check to see if the context has been cancelled 224 func (m *March) aborting() bool { 225 select { 226 case <-m.Ctx.Done(): 227 return true 228 default: 229 } 230 return false 231 } 232 233 // matchEntry is an entry plus transformed name 234 type matchEntry struct { 235 entry fs.DirEntry 236 leaf string 237 name string 238 } 239 240 // matchEntries contains many matchEntry~s 241 type matchEntries []matchEntry 242 243 // Len is part of sort.Interface. 244 func (es matchEntries) Len() int { return len(es) } 245 246 // Swap is part of sort.Interface. 247 func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] } 248 249 // Less is part of sort.Interface. 250 // 251 // Compare in order (name, leaf, remote) 252 func (es matchEntries) Less(i, j int) bool { 253 ei, ej := &es[i], &es[j] 254 if ei.name == ej.name { 255 if ei.leaf == ej.leaf { 256 return fs.CompareDirEntries(ei.entry, ej.entry) < 0 257 } 258 return ei.leaf < ej.leaf 259 } 260 return ei.name < ej.name 261 } 262 263 // Sort the directory entries by (name, leaf, remote) 264 // 265 // We use a stable sort here just in case there are 266 // duplicates. Assuming the remote delivers the entries in a 267 // consistent order, this will give the best user experience 268 // in syncing as it will use the first entry for the sync 269 // comparison. 270 func (es matchEntries) sort() { 271 sort.Stable(es) 272 } 273 274 // make a matchEntries from a newMatch entries 275 func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries { 276 es := make(matchEntries, len(entries)) 277 for i := range es { 278 es[i].entry = entries[i] 279 name := path.Base(entries[i].Remote()) 280 es[i].leaf = name 281 for _, transform := range transforms { 282 name = transform(name) 283 } 284 es[i].name = name 285 } 286 es.sort() 287 return es 288 } 289 290 // matchPair is a matched pair of direntries returned by matchListings 291 type matchPair struct { 292 src, dst fs.DirEntry 293 } 294 295 // matchTransformFn converts a name into a form which is used for 296 // comparison in matchListings. 297 type matchTransformFn func(name string) string 298 299 // Process the two listings, matching up the items in the two slices 300 // using the transform function on each name first. 301 // 302 // Into srcOnly go Entries which only exist in the srcList 303 // Into dstOnly go Entries which only exist in the dstList 304 // Into matches go matchPair's of src and dst which have the same name 305 // 306 // This checks for duplicates and checks the list is sorted. 307 func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) { 308 srcList := newMatchEntries(srcListEntries, transforms) 309 dstList := newMatchEntries(dstListEntries, transforms) 310 311 for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 { 312 var src, dst fs.DirEntry 313 var srcName, dstName string 314 if iSrc < len(srcList) { 315 src = srcList[iSrc].entry 316 srcName = srcList[iSrc].name 317 } 318 if iDst < len(dstList) { 319 dst = dstList[iDst].entry 320 dstName = dstList[iDst].name 321 } 322 if src == nil && dst == nil { 323 break 324 } 325 if src != nil && iSrc > 0 { 326 prev := srcList[iSrc-1].entry 327 prevName := srcList[iSrc-1].name 328 if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) { 329 fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src)) 330 iDst-- // ignore the src and retry the dst 331 continue 332 } else if srcName < prevName { 333 // this should never happen since we sort the listings 334 panic("Out of order listing in source") 335 } 336 } 337 if dst != nil && iDst > 0 { 338 prev := dstList[iDst-1].entry 339 prevName := dstList[iDst-1].name 340 if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) { 341 fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst)) 342 iSrc-- // ignore the dst and retry the src 343 continue 344 } else if dstName < prevName { 345 // this should never happen since we sort the listings 346 panic("Out of order listing in destination") 347 } 348 } 349 if src != nil && dst != nil { 350 // we can't use CompareDirEntries because srcName, dstName could 351 // be different then src.Remote() or dst.Remote() 352 srcType := fs.DirEntryType(src) 353 dstType := fs.DirEntryType(dst) 354 if srcName > dstName || (srcName == dstName && srcType > dstType) { 355 src = nil 356 iSrc-- 357 } else if srcName < dstName || (srcName == dstName && srcType < dstType) { 358 dst = nil 359 iDst-- 360 } 361 } 362 // Debugf(nil, "src = %v, dst = %v", src, dst) 363 switch { 364 case src == nil && dst == nil: 365 // do nothing 366 case src == nil: 367 dstOnly = append(dstOnly, dst) 368 case dst == nil: 369 srcOnly = append(srcOnly, src) 370 default: 371 matches = append(matches, matchPair{src: src, dst: dst}) 372 } 373 } 374 return 375 } 376 377 // processJob processes a listDirJob listing the source and 378 // destination directories, comparing them and returning a slice of 379 // more jobs 380 // 381 // returns errors using processError 382 func (m *March) processJob(job listDirJob) ([]listDirJob, error) { 383 var ( 384 jobs []listDirJob 385 srcList, dstList fs.DirEntries 386 srcListErr, dstListErr error 387 wg sync.WaitGroup 388 mu sync.Mutex 389 ) 390 391 // List the src and dst directories 392 if !job.noSrc { 393 wg.Add(1) 394 go func() { 395 defer wg.Done() 396 srcList, srcListErr = m.srcListDir(job.srcRemote) 397 }() 398 } 399 if !m.NoTraverse && !job.noDst { 400 wg.Add(1) 401 go func() { 402 defer wg.Done() 403 dstList, dstListErr = m.dstListDir(job.dstRemote) 404 }() 405 } 406 407 // Wait for listings to complete and report errors 408 wg.Wait() 409 if srcListErr != nil { 410 if job.srcRemote != "" { 411 fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr) 412 } else { 413 fs.Errorf(m.Fsrc, "error reading source root directory: %v", srcListErr) 414 } 415 srcListErr = fs.CountError(srcListErr) 416 return nil, srcListErr 417 } 418 if dstListErr == fs.ErrorDirNotFound { 419 // Copy the stuff anyway 420 } else if dstListErr != nil { 421 if job.dstRemote != "" { 422 fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr) 423 } else { 424 fs.Errorf(m.Fdst, "error reading destination root directory: %v", dstListErr) 425 } 426 dstListErr = fs.CountError(dstListErr) 427 return nil, dstListErr 428 } 429 430 // If NoTraverse is set, then try to find a matching object 431 // for each item in the srcList to head dst object 432 ci := fs.GetConfig(m.Ctx) 433 limiter := make(chan struct{}, ci.Checkers) 434 if m.NoTraverse && !m.NoCheckDest { 435 for _, src := range srcList { 436 wg.Add(1) 437 limiter <- struct{}{} 438 go func(limiter chan struct{}, src fs.DirEntry) { 439 defer wg.Done() 440 if srcObj, ok := src.(fs.Object); ok { 441 leaf := path.Base(srcObj.Remote()) 442 dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf)) 443 if err == nil { 444 mu.Lock() 445 dstList = append(dstList, dstObj) 446 mu.Unlock() 447 } 448 } 449 <-limiter 450 }(limiter, src) 451 } 452 wg.Wait() 453 } 454 455 // Work out what to do and do it 456 srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms) 457 for _, src := range srcOnly { 458 if m.aborting() { 459 return nil, m.Ctx.Err() 460 } 461 recurse := m.Callback.SrcOnly(src) 462 if recurse && job.srcDepth > 0 { 463 jobs = append(jobs, listDirJob{ 464 srcRemote: src.Remote(), 465 dstRemote: src.Remote(), 466 srcDepth: job.srcDepth - 1, 467 noDst: true, 468 }) 469 } 470 471 } 472 for _, dst := range dstOnly { 473 if m.aborting() { 474 return nil, m.Ctx.Err() 475 } 476 recurse := m.Callback.DstOnly(dst) 477 if recurse && job.dstDepth > 0 { 478 jobs = append(jobs, listDirJob{ 479 srcRemote: dst.Remote(), 480 dstRemote: dst.Remote(), 481 dstDepth: job.dstDepth - 1, 482 noSrc: true, 483 }) 484 } 485 } 486 for _, match := range matches { 487 if m.aborting() { 488 return nil, m.Ctx.Err() 489 } 490 recurse := m.Callback.Match(m.Ctx, match.dst, match.src) 491 if recurse && job.srcDepth > 0 && job.dstDepth > 0 { 492 jobs = append(jobs, listDirJob{ 493 srcRemote: match.src.Remote(), 494 dstRemote: match.dst.Remote(), 495 srcDepth: job.srcDepth - 1, 496 dstDepth: job.dstDepth - 1, 497 }) 498 } 499 } 500 return jobs, nil 501 }