github.com/ncw/rclone@v1.48.1-0.20190724201158-a35aa1360e3e/fs/march/march.go (about) 1 // Package march traverses two directories in lock step 2 package march 3 4 import ( 5 "context" 6 "path" 7 "sort" 8 "strings" 9 "sync" 10 11 "github.com/pkg/errors" 12 13 "github.com/ncw/rclone/fs" 14 "github.com/ncw/rclone/fs/dirtree" 15 "github.com/ncw/rclone/fs/filter" 16 "github.com/ncw/rclone/fs/list" 17 "github.com/ncw/rclone/fs/walk" 18 "golang.org/x/text/unicode/norm" 19 ) 20 21 // March holds the data used to traverse two Fs simultaneously, 22 // calling Callback for each match 23 type March struct { 24 // parameters 25 Ctx context.Context // context for background goroutines 26 Fdst fs.Fs // source Fs 27 Fsrc fs.Fs // dest Fs 28 Dir string // directory 29 NoTraverse bool // don't traverse the destination 30 SrcIncludeAll bool // don't include all files in the src 31 DstIncludeAll bool // don't include all files in the destination 32 Callback Marcher // object to call with results 33 // internal state 34 srcListDir listDirFn // function to call to list a directory in the src 35 dstListDir listDirFn // function to call to list a directory in the dst 36 transforms []matchTransformFn 37 } 38 39 // Marcher is called on each match 40 type Marcher interface { 41 // SrcOnly is called for a DirEntry found only in the source 42 SrcOnly(src fs.DirEntry) (recurse bool) 43 // DstOnly is called for a DirEntry found only in the destination 44 DstOnly(dst fs.DirEntry) (recurse bool) 45 // Match is called for a DirEntry found both in the source and destination 46 Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool) 47 } 48 49 // init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match 50 func (m *March) init() { 51 m.srcListDir = m.makeListDir(m.Fsrc, m.SrcIncludeAll) 52 if !m.NoTraverse { 53 m.dstListDir = m.makeListDir(m.Fdst, m.DstIncludeAll) 54 } 55 // Now create the matching transform 56 // ..normalise the UTF8 first 57 m.transforms = append(m.transforms, norm.NFC.String) 58 // ..if destination is caseInsensitive then make it lower case 59 // case Insensitive | src | dst | lower case compare | 60 // | No | No | No | 61 // | Yes | No | No | 62 // | No | Yes | Yes | 63 // | Yes | Yes | Yes | 64 if m.Fdst.Features().CaseInsensitive || fs.Config.IgnoreCaseSync { 65 m.transforms = append(m.transforms, strings.ToLower) 66 } 67 } 68 69 // list a directory into entries, err 70 type listDirFn func(dir string) (entries fs.DirEntries, err error) 71 72 // makeListDir makes a listing function for the given fs and includeAll flags 73 func (m *March) makeListDir(f fs.Fs, includeAll bool) listDirFn { 74 if (!fs.Config.UseListR || f.Features().ListR == nil) && !filter.Active.HaveFilesFrom() { 75 return func(dir string) (entries fs.DirEntries, err error) { 76 return list.DirSorted(m.Ctx, f, includeAll, dir) 77 } 78 } 79 var ( 80 mu sync.Mutex 81 started bool 82 dirs dirtree.DirTree 83 dirsErr error 84 ) 85 return func(dir string) (entries fs.DirEntries, err error) { 86 mu.Lock() 87 defer mu.Unlock() 88 if !started { 89 dirs, dirsErr = walk.NewDirTree(m.Ctx, f, m.Dir, includeAll, fs.Config.MaxDepth) 90 started = true 91 } 92 if dirsErr != nil { 93 return nil, dirsErr 94 } 95 entries, ok := dirs[dir] 96 if !ok { 97 err = fs.ErrorDirNotFound 98 } else { 99 delete(dirs, dir) 100 } 101 return entries, err 102 } 103 } 104 105 // listDirJob describe a directory listing that needs to be done 106 type listDirJob struct { 107 srcRemote string 108 dstRemote string 109 srcDepth int 110 dstDepth int 111 noSrc bool 112 noDst bool 113 } 114 115 // Run starts the matching process off 116 func (m *March) Run() error { 117 m.init() 118 119 srcDepth := fs.Config.MaxDepth 120 if srcDepth < 0 { 121 srcDepth = fs.MaxLevel 122 } 123 dstDepth := srcDepth 124 if filter.Active.Opt.DeleteExcluded { 125 dstDepth = fs.MaxLevel 126 } 127 128 var mu sync.Mutex // Protects vars below 129 var jobError error 130 var errCount int 131 132 // Start some directory listing go routines 133 var wg sync.WaitGroup // sync closing of go routines 134 var traversing sync.WaitGroup // running directory traversals 135 in := make(chan listDirJob, fs.Config.Checkers) 136 for i := 0; i < fs.Config.Checkers; i++ { 137 wg.Add(1) 138 go func() { 139 defer wg.Done() 140 for { 141 select { 142 case <-m.Ctx.Done(): 143 return 144 case job, ok := <-in: 145 if !ok { 146 return 147 } 148 jobs, err := m.processJob(job) 149 if err != nil { 150 mu.Lock() 151 // Keep reference only to the first encountered error 152 if jobError == nil { 153 jobError = err 154 } 155 errCount++ 156 mu.Unlock() 157 } 158 if len(jobs) > 0 { 159 traversing.Add(len(jobs)) 160 go func() { 161 // Now we have traversed this directory, send these 162 // jobs off for traversal in the background 163 for _, newJob := range jobs { 164 select { 165 case <-m.Ctx.Done(): 166 // discard job if finishing 167 traversing.Done() 168 case in <- newJob: 169 } 170 } 171 }() 172 } 173 traversing.Done() 174 } 175 } 176 }() 177 } 178 179 // Start the process 180 traversing.Add(1) 181 in <- listDirJob{ 182 srcRemote: m.Dir, 183 srcDepth: srcDepth - 1, 184 dstRemote: m.Dir, 185 dstDepth: dstDepth - 1, 186 } 187 go func() { 188 // when the context is cancelled discard the remaining jobs 189 <-m.Ctx.Done() 190 for range in { 191 traversing.Done() 192 } 193 }() 194 traversing.Wait() 195 close(in) 196 wg.Wait() 197 198 if errCount > 1 { 199 return errors.Wrapf(jobError, "march failed with %d error(s): first error", errCount) 200 } 201 return jobError 202 } 203 204 // Check to see if the context has been cancelled 205 func (m *March) aborting() bool { 206 select { 207 case <-m.Ctx.Done(): 208 return true 209 default: 210 } 211 return false 212 } 213 214 // matchEntry is an entry plus transformed name 215 type matchEntry struct { 216 entry fs.DirEntry 217 leaf string 218 name string 219 } 220 221 // matchEntries contains many matchEntry~s 222 type matchEntries []matchEntry 223 224 // Len is part of sort.Interface. 225 func (es matchEntries) Len() int { return len(es) } 226 227 // Swap is part of sort.Interface. 228 func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] } 229 230 // Less is part of sort.Interface. 231 // 232 // Compare in order (name, leaf, remote) 233 func (es matchEntries) Less(i, j int) bool { 234 ei, ej := &es[i], &es[j] 235 if ei.name == ej.name { 236 if ei.leaf == ej.leaf { 237 return fs.CompareDirEntries(ei.entry, ej.entry) < 0 238 } 239 return ei.leaf < ej.leaf 240 } 241 return ei.name < ej.name 242 } 243 244 // Sort the directory entries by (name, leaf, remote) 245 // 246 // We use a stable sort here just in case there are 247 // duplicates. Assuming the remote delivers the entries in a 248 // consistent order, this will give the best user experience 249 // in syncing as it will use the first entry for the sync 250 // comparison. 251 func (es matchEntries) sort() { 252 sort.Stable(es) 253 } 254 255 // make a matchEntries from a newMatch entries 256 func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries { 257 es := make(matchEntries, len(entries)) 258 for i := range es { 259 es[i].entry = entries[i] 260 name := path.Base(entries[i].Remote()) 261 es[i].leaf = name 262 for _, transform := range transforms { 263 name = transform(name) 264 } 265 es[i].name = name 266 } 267 es.sort() 268 return es 269 } 270 271 // matchPair is a matched pair of direntries returned by matchListings 272 type matchPair struct { 273 src, dst fs.DirEntry 274 } 275 276 // matchTransformFn converts a name into a form which is used for 277 // comparison in matchListings. 278 type matchTransformFn func(name string) string 279 280 // Process the two listings, matching up the items in the two slices 281 // using the transform function on each name first. 282 // 283 // Into srcOnly go Entries which only exist in the srcList 284 // Into dstOnly go Entries which only exist in the dstList 285 // Into matches go matchPair's of src and dst which have the same name 286 // 287 // This checks for duplicates and checks the list is sorted. 288 func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) { 289 srcList := newMatchEntries(srcListEntries, transforms) 290 dstList := newMatchEntries(dstListEntries, transforms) 291 292 for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 { 293 var src, dst fs.DirEntry 294 var srcName, dstName string 295 if iSrc < len(srcList) { 296 src = srcList[iSrc].entry 297 srcName = srcList[iSrc].name 298 } 299 if iDst < len(dstList) { 300 dst = dstList[iDst].entry 301 dstName = dstList[iDst].name 302 } 303 if src == nil && dst == nil { 304 break 305 } 306 if src != nil && iSrc > 0 { 307 prev := srcList[iSrc-1].entry 308 prevName := srcList[iSrc-1].name 309 if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) { 310 fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src)) 311 iDst-- // ignore the src and retry the dst 312 continue 313 } else if srcName < prevName { 314 // this should never happen since we sort the listings 315 panic("Out of order listing in source") 316 } 317 } 318 if dst != nil && iDst > 0 { 319 prev := dstList[iDst-1].entry 320 prevName := dstList[iDst-1].name 321 if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) { 322 fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst)) 323 iSrc-- // ignore the dst and retry the src 324 continue 325 } else if dstName < prevName { 326 // this should never happen since we sort the listings 327 panic("Out of order listing in destination") 328 } 329 } 330 if src != nil && dst != nil { 331 // we can't use CompareDirEntries because srcName, dstName could 332 // be different then src.Remote() or dst.Remote() 333 srcType := fs.DirEntryType(src) 334 dstType := fs.DirEntryType(dst) 335 if srcName > dstName || (srcName == dstName && srcType > dstType) { 336 src = nil 337 iSrc-- 338 } else if srcName < dstName || (srcName == dstName && srcType < dstType) { 339 dst = nil 340 iDst-- 341 } 342 } 343 // Debugf(nil, "src = %v, dst = %v", src, dst) 344 switch { 345 case src == nil && dst == nil: 346 // do nothing 347 case src == nil: 348 dstOnly = append(dstOnly, dst) 349 case dst == nil: 350 srcOnly = append(srcOnly, src) 351 default: 352 matches = append(matches, matchPair{src: src, dst: dst}) 353 } 354 } 355 return 356 } 357 358 // processJob processes a listDirJob listing the source and 359 // destination directories, comparing them and returning a slice of 360 // more jobs 361 // 362 // returns errors using processError 363 func (m *March) processJob(job listDirJob) ([]listDirJob, error) { 364 var ( 365 jobs []listDirJob 366 srcList, dstList fs.DirEntries 367 srcListErr, dstListErr error 368 wg sync.WaitGroup 369 ) 370 371 // List the src and dst directories 372 if !job.noSrc { 373 wg.Add(1) 374 go func() { 375 defer wg.Done() 376 srcList, srcListErr = m.srcListDir(job.srcRemote) 377 }() 378 } 379 if !m.NoTraverse && !job.noDst { 380 wg.Add(1) 381 go func() { 382 defer wg.Done() 383 dstList, dstListErr = m.dstListDir(job.dstRemote) 384 }() 385 } 386 387 // Wait for listings to complete and report errors 388 wg.Wait() 389 if srcListErr != nil { 390 fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr) 391 fs.CountError(srcListErr) 392 return nil, srcListErr 393 } 394 if dstListErr == fs.ErrorDirNotFound { 395 // Copy the stuff anyway 396 } else if dstListErr != nil { 397 fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr) 398 fs.CountError(dstListErr) 399 return nil, dstListErr 400 } 401 402 // If NoTraverse is set, then try to find a matching object 403 // for each item in the srcList 404 if m.NoTraverse { 405 for _, src := range srcList { 406 if srcObj, ok := src.(fs.Object); ok { 407 leaf := path.Base(srcObj.Remote()) 408 dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf)) 409 if err == nil { 410 dstList = append(dstList, dstObj) 411 } 412 } 413 } 414 } 415 416 // Work out what to do and do it 417 srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms) 418 for _, src := range srcOnly { 419 if m.aborting() { 420 return nil, m.Ctx.Err() 421 } 422 recurse := m.Callback.SrcOnly(src) 423 if recurse && job.srcDepth > 0 { 424 jobs = append(jobs, listDirJob{ 425 srcRemote: src.Remote(), 426 srcDepth: job.srcDepth - 1, 427 noDst: true, 428 }) 429 } 430 431 } 432 for _, dst := range dstOnly { 433 if m.aborting() { 434 return nil, m.Ctx.Err() 435 } 436 recurse := m.Callback.DstOnly(dst) 437 if recurse && job.dstDepth > 0 { 438 jobs = append(jobs, listDirJob{ 439 dstRemote: dst.Remote(), 440 dstDepth: job.dstDepth - 1, 441 noSrc: true, 442 }) 443 } 444 } 445 for _, match := range matches { 446 if m.aborting() { 447 return nil, m.Ctx.Err() 448 } 449 recurse := m.Callback.Match(m.Ctx, match.dst, match.src) 450 if recurse && job.srcDepth > 0 && job.dstDepth > 0 { 451 jobs = append(jobs, listDirJob{ 452 srcRemote: match.src.Remote(), 453 dstRemote: match.dst.Remote(), 454 srcDepth: job.srcDepth - 1, 455 dstDepth: job.dstDepth - 1, 456 }) 457 } 458 } 459 return jobs, nil 460 }