github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/fs/march/march.go (about) 1 // Package march traverses two directories in lock step 2 package march 3 4 import ( 5 "context" 6 "path" 7 "sort" 8 "strings" 9 "sync" 10 11 "github.com/pkg/errors" 12 13 "github.com/rclone/rclone/fs" 14 "github.com/rclone/rclone/fs/dirtree" 15 "github.com/rclone/rclone/fs/filter" 16 "github.com/rclone/rclone/fs/list" 17 "github.com/rclone/rclone/fs/walk" 18 "golang.org/x/text/unicode/norm" 19 ) 20 21 // March holds the data used to traverse two Fs simultaneously, 22 // calling Callback for each match 23 type March struct { 24 // parameters 25 Ctx context.Context // context for background goroutines 26 Fdst fs.Fs // source Fs 27 Fsrc fs.Fs // dest Fs 28 Dir string // directory 29 NoTraverse bool // don't traverse the destination 30 SrcIncludeAll bool // don't include all files in the src 31 DstIncludeAll bool // don't include all files in the destination 32 Callback Marcher // object to call with results 33 NoCheckDest bool // transfer all objects regardless without checking dst 34 NoUnicodeNormalization bool // don't normalize unicode characters in filenames 35 // internal state 36 srcListDir listDirFn // function to call to list a directory in the src 37 dstListDir listDirFn // function to call to list a directory in the dst 38 transforms []matchTransformFn 39 } 40 41 // Marcher is called on each match 42 type Marcher interface { 43 // SrcOnly is called for a DirEntry found only in the source 44 SrcOnly(src fs.DirEntry) (recurse bool) 45 // DstOnly is called for a DirEntry found only in the destination 46 DstOnly(dst fs.DirEntry) (recurse bool) 47 // Match is called for a DirEntry found both in the source and destination 48 Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool) 49 } 50 51 // init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match 52 func (m *March) init() { 53 m.srcListDir = m.makeListDir(m.Fsrc, m.SrcIncludeAll) 54 if !m.NoTraverse { 55 m.dstListDir = m.makeListDir(m.Fdst, m.DstIncludeAll) 56 } 57 // Now create the matching transform 58 // ..normalise the UTF8 first 59 if !m.NoUnicodeNormalization { 60 m.transforms = append(m.transforms, norm.NFC.String) 61 } 62 // ..if destination is caseInsensitive then make it lower case 63 // case Insensitive | src | dst | lower case compare | 64 // | No | No | No | 65 // | Yes | No | No | 66 // | No | Yes | Yes | 67 // | Yes | Yes | Yes | 68 if m.Fdst.Features().CaseInsensitive || fs.Config.IgnoreCaseSync { 69 m.transforms = append(m.transforms, strings.ToLower) 70 } 71 } 72 73 // list a directory into entries, err 74 type listDirFn func(dir string) (entries fs.DirEntries, err error) 75 76 // makeListDir makes constructs a listing function for the given fs 77 // and includeAll flags for marching through the file system. 78 func (m *March) makeListDir(f fs.Fs, includeAll bool) listDirFn { 79 if !(fs.Config.UseListR && f.Features().ListR != nil) && // !--fast-list active and 80 !(fs.Config.NoTraverse && filter.Active.HaveFilesFrom()) { // !(--files-from and --no-traverse) 81 return func(dir string) (entries fs.DirEntries, err error) { 82 return list.DirSorted(m.Ctx, f, includeAll, dir) 83 } 84 } 85 86 // This returns a closure for use when --fast-list is active or for when 87 // --files-from and --no-traverse is set 88 var ( 89 mu sync.Mutex 90 started bool 91 dirs dirtree.DirTree 92 dirsErr error 93 ) 94 return func(dir string) (entries fs.DirEntries, err error) { 95 mu.Lock() 96 defer mu.Unlock() 97 if !started { 98 dirs, dirsErr = walk.NewDirTree(m.Ctx, f, m.Dir, includeAll, fs.Config.MaxDepth) 99 started = true 100 } 101 if dirsErr != nil { 102 return nil, dirsErr 103 } 104 entries, ok := dirs[dir] 105 if !ok { 106 err = fs.ErrorDirNotFound 107 } else { 108 delete(dirs, dir) 109 } 110 return entries, err 111 } 112 } 113 114 // listDirJob describe a directory listing that needs to be done 115 type listDirJob struct { 116 srcRemote string 117 dstRemote string 118 srcDepth int 119 dstDepth int 120 noSrc bool 121 noDst bool 122 } 123 124 // Run starts the matching process off 125 func (m *March) Run() error { 126 m.init() 127 128 srcDepth := fs.Config.MaxDepth 129 if srcDepth < 0 { 130 srcDepth = fs.MaxLevel 131 } 132 dstDepth := srcDepth 133 if filter.Active.Opt.DeleteExcluded { 134 dstDepth = fs.MaxLevel 135 } 136 137 var mu sync.Mutex // Protects vars below 138 var jobError error 139 var errCount int 140 141 // Start some directory listing go routines 142 var wg sync.WaitGroup // sync closing of go routines 143 var traversing sync.WaitGroup // running directory traversals 144 in := make(chan listDirJob, fs.Config.Checkers) 145 for i := 0; i < fs.Config.Checkers; i++ { 146 wg.Add(1) 147 go func() { 148 defer wg.Done() 149 for { 150 select { 151 case <-m.Ctx.Done(): 152 return 153 case job, ok := <-in: 154 if !ok { 155 return 156 } 157 jobs, err := m.processJob(job) 158 if err != nil { 159 mu.Lock() 160 // Keep reference only to the first encountered error 161 if jobError == nil { 162 jobError = err 163 } 164 errCount++ 165 mu.Unlock() 166 } 167 if len(jobs) > 0 { 168 traversing.Add(len(jobs)) 169 go func() { 170 // Now we have traversed this directory, send these 171 // jobs off for traversal in the background 172 for _, newJob := range jobs { 173 select { 174 case <-m.Ctx.Done(): 175 // discard job if finishing 176 traversing.Done() 177 case in <- newJob: 178 } 179 } 180 }() 181 } 182 traversing.Done() 183 } 184 } 185 }() 186 } 187 188 // Start the process 189 traversing.Add(1) 190 in <- listDirJob{ 191 srcRemote: m.Dir, 192 srcDepth: srcDepth - 1, 193 dstRemote: m.Dir, 194 dstDepth: dstDepth - 1, 195 noDst: m.NoCheckDest, 196 } 197 go func() { 198 // when the context is cancelled discard the remaining jobs 199 <-m.Ctx.Done() 200 for range in { 201 traversing.Done() 202 } 203 }() 204 traversing.Wait() 205 close(in) 206 wg.Wait() 207 208 if errCount > 1 { 209 return errors.Wrapf(jobError, "march failed with %d error(s): first error", errCount) 210 } 211 return jobError 212 } 213 214 // Check to see if the context has been cancelled 215 func (m *March) aborting() bool { 216 select { 217 case <-m.Ctx.Done(): 218 return true 219 default: 220 } 221 return false 222 } 223 224 // matchEntry is an entry plus transformed name 225 type matchEntry struct { 226 entry fs.DirEntry 227 leaf string 228 name string 229 } 230 231 // matchEntries contains many matchEntry~s 232 type matchEntries []matchEntry 233 234 // Len is part of sort.Interface. 235 func (es matchEntries) Len() int { return len(es) } 236 237 // Swap is part of sort.Interface. 238 func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] } 239 240 // Less is part of sort.Interface. 241 // 242 // Compare in order (name, leaf, remote) 243 func (es matchEntries) Less(i, j int) bool { 244 ei, ej := &es[i], &es[j] 245 if ei.name == ej.name { 246 if ei.leaf == ej.leaf { 247 return fs.CompareDirEntries(ei.entry, ej.entry) < 0 248 } 249 return ei.leaf < ej.leaf 250 } 251 return ei.name < ej.name 252 } 253 254 // Sort the directory entries by (name, leaf, remote) 255 // 256 // We use a stable sort here just in case there are 257 // duplicates. Assuming the remote delivers the entries in a 258 // consistent order, this will give the best user experience 259 // in syncing as it will use the first entry for the sync 260 // comparison. 261 func (es matchEntries) sort() { 262 sort.Stable(es) 263 } 264 265 // make a matchEntries from a newMatch entries 266 func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries { 267 es := make(matchEntries, len(entries)) 268 for i := range es { 269 es[i].entry = entries[i] 270 name := path.Base(entries[i].Remote()) 271 es[i].leaf = name 272 for _, transform := range transforms { 273 name = transform(name) 274 } 275 es[i].name = name 276 } 277 es.sort() 278 return es 279 } 280 281 // matchPair is a matched pair of direntries returned by matchListings 282 type matchPair struct { 283 src, dst fs.DirEntry 284 } 285 286 // matchTransformFn converts a name into a form which is used for 287 // comparison in matchListings. 288 type matchTransformFn func(name string) string 289 290 // Process the two listings, matching up the items in the two slices 291 // using the transform function on each name first. 292 // 293 // Into srcOnly go Entries which only exist in the srcList 294 // Into dstOnly go Entries which only exist in the dstList 295 // Into matches go matchPair's of src and dst which have the same name 296 // 297 // This checks for duplicates and checks the list is sorted. 298 func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) { 299 srcList := newMatchEntries(srcListEntries, transforms) 300 dstList := newMatchEntries(dstListEntries, transforms) 301 302 for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 { 303 var src, dst fs.DirEntry 304 var srcName, dstName string 305 if iSrc < len(srcList) { 306 src = srcList[iSrc].entry 307 srcName = srcList[iSrc].name 308 } 309 if iDst < len(dstList) { 310 dst = dstList[iDst].entry 311 dstName = dstList[iDst].name 312 } 313 if src == nil && dst == nil { 314 break 315 } 316 if src != nil && iSrc > 0 { 317 prev := srcList[iSrc-1].entry 318 prevName := srcList[iSrc-1].name 319 if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) { 320 fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src)) 321 iDst-- // ignore the src and retry the dst 322 continue 323 } else if srcName < prevName { 324 // this should never happen since we sort the listings 325 panic("Out of order listing in source") 326 } 327 } 328 if dst != nil && iDst > 0 { 329 prev := dstList[iDst-1].entry 330 prevName := dstList[iDst-1].name 331 if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) { 332 fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst)) 333 iSrc-- // ignore the dst and retry the src 334 continue 335 } else if dstName < prevName { 336 // this should never happen since we sort the listings 337 panic("Out of order listing in destination") 338 } 339 } 340 if src != nil && dst != nil { 341 // we can't use CompareDirEntries because srcName, dstName could 342 // be different then src.Remote() or dst.Remote() 343 srcType := fs.DirEntryType(src) 344 dstType := fs.DirEntryType(dst) 345 if srcName > dstName || (srcName == dstName && srcType > dstType) { 346 src = nil 347 iSrc-- 348 } else if srcName < dstName || (srcName == dstName && srcType < dstType) { 349 dst = nil 350 iDst-- 351 } 352 } 353 // Debugf(nil, "src = %v, dst = %v", src, dst) 354 switch { 355 case src == nil && dst == nil: 356 // do nothing 357 case src == nil: 358 dstOnly = append(dstOnly, dst) 359 case dst == nil: 360 srcOnly = append(srcOnly, src) 361 default: 362 matches = append(matches, matchPair{src: src, dst: dst}) 363 } 364 } 365 return 366 } 367 368 // processJob processes a listDirJob listing the source and 369 // destination directories, comparing them and returning a slice of 370 // more jobs 371 // 372 // returns errors using processError 373 func (m *March) processJob(job listDirJob) ([]listDirJob, error) { 374 var ( 375 jobs []listDirJob 376 srcList, dstList fs.DirEntries 377 srcListErr, dstListErr error 378 wg sync.WaitGroup 379 ) 380 381 // List the src and dst directories 382 if !job.noSrc { 383 wg.Add(1) 384 go func() { 385 defer wg.Done() 386 srcList, srcListErr = m.srcListDir(job.srcRemote) 387 }() 388 } 389 if !m.NoTraverse && !job.noDst { 390 wg.Add(1) 391 go func() { 392 defer wg.Done() 393 dstList, dstListErr = m.dstListDir(job.dstRemote) 394 }() 395 } 396 397 // Wait for listings to complete and report errors 398 wg.Wait() 399 if srcListErr != nil { 400 fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr) 401 srcListErr = fs.CountError(srcListErr) 402 return nil, srcListErr 403 } 404 if dstListErr == fs.ErrorDirNotFound { 405 // Copy the stuff anyway 406 } else if dstListErr != nil { 407 fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr) 408 dstListErr = fs.CountError(dstListErr) 409 return nil, dstListErr 410 } 411 412 // If NoTraverse is set, then try to find a matching object 413 // for each item in the srcList 414 if m.NoTraverse && !m.NoCheckDest { 415 for _, src := range srcList { 416 if srcObj, ok := src.(fs.Object); ok { 417 leaf := path.Base(srcObj.Remote()) 418 dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf)) 419 if err == nil { 420 dstList = append(dstList, dstObj) 421 } 422 } 423 } 424 } 425 426 // Work out what to do and do it 427 srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms) 428 for _, src := range srcOnly { 429 if m.aborting() { 430 return nil, m.Ctx.Err() 431 } 432 recurse := m.Callback.SrcOnly(src) 433 if recurse && job.srcDepth > 0 { 434 jobs = append(jobs, listDirJob{ 435 srcRemote: src.Remote(), 436 dstRemote: src.Remote(), 437 srcDepth: job.srcDepth - 1, 438 noDst: true, 439 }) 440 } 441 442 } 443 for _, dst := range dstOnly { 444 if m.aborting() { 445 return nil, m.Ctx.Err() 446 } 447 recurse := m.Callback.DstOnly(dst) 448 if recurse && job.dstDepth > 0 { 449 jobs = append(jobs, listDirJob{ 450 srcRemote: dst.Remote(), 451 dstRemote: dst.Remote(), 452 dstDepth: job.dstDepth - 1, 453 noSrc: true, 454 }) 455 } 456 } 457 for _, match := range matches { 458 if m.aborting() { 459 return nil, m.Ctx.Err() 460 } 461 recurse := m.Callback.Match(m.Ctx, match.dst, match.src) 462 if recurse && job.srcDepth > 0 && job.dstDepth > 0 { 463 jobs = append(jobs, listDirJob{ 464 srcRemote: match.src.Remote(), 465 dstRemote: match.dst.Remote(), 466 srcDepth: job.srcDepth - 1, 467 dstDepth: job.dstDepth - 1, 468 }) 469 } 470 } 471 return jobs, nil 472 }