github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/operations/check.go (about) 1 package operations 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "regexp" 12 "strings" 13 "sync" 14 "sync/atomic" 15 16 "github.com/rclone/rclone/fs" 17 "github.com/rclone/rclone/fs/accounting" 18 "github.com/rclone/rclone/fs/filter" 19 "github.com/rclone/rclone/fs/fserrors" 20 "github.com/rclone/rclone/fs/hash" 21 "github.com/rclone/rclone/fs/march" 22 "github.com/rclone/rclone/lib/readers" 23 "golang.org/x/text/unicode/norm" 24 ) 25 26 // checkFn is the type of the checking function used in CheckFn() 27 // 28 // It should check the two objects (a, b) and return if they differ 29 // and whether the hash was used. 30 // 31 // If there are differences then this should Errorf the difference and 32 // the reason but return with err = nil. It should not CountError in 33 // this case. 34 type checkFn func(ctx context.Context, a, b fs.Object) (differ bool, noHash bool, err error) 35 36 // CheckOpt contains options for the Check functions 37 type CheckOpt struct { 38 Fdst, Fsrc fs.Fs // fses to check 39 Check checkFn // function to use for checking 40 OneWay bool // one way only? 41 Combined io.Writer // a file with file names with leading sigils 42 MissingOnSrc io.Writer // files only in the destination 43 MissingOnDst io.Writer // files only in the source 44 Match io.Writer // matching files 45 Differ io.Writer // differing files 46 Error io.Writer // files with errors of some kind 47 } 48 49 // checkMarch is used to march over two Fses in the same way as 50 // sync/copy 51 type checkMarch struct { 52 ioMu sync.Mutex 53 wg sync.WaitGroup 54 tokens chan struct{} 55 differences atomic.Int32 56 noHashes atomic.Int32 57 srcFilesMissing atomic.Int32 58 dstFilesMissing atomic.Int32 59 matches atomic.Int32 60 opt CheckOpt 61 } 62 63 // report outputs the fileName to out if required and to the combined log 64 func (c *checkMarch) report(o fs.DirEntry, out io.Writer, sigil rune) { 65 c.reportFilename(o.String(), out, sigil) 66 } 67 68 func (c *checkMarch) reportFilename(filename string, out io.Writer, sigil rune) { 69 if out != nil { 70 SyncFprintf(out, "%s\n", filename) 71 } 72 if c.opt.Combined != nil { 73 SyncFprintf(c.opt.Combined, "%c %s\n", sigil, filename) 74 } 75 } 76 77 // DstOnly have an object which is in the destination only 78 func (c *checkMarch) DstOnly(dst fs.DirEntry) (recurse bool) { 79 switch dst.(type) { 80 case fs.Object: 81 if c.opt.OneWay { 82 return false 83 } 84 err := fmt.Errorf("file not in %v", c.opt.Fsrc) 85 fs.Errorf(dst, "%v", err) 86 _ = fs.CountError(err) 87 c.differences.Add(1) 88 c.srcFilesMissing.Add(1) 89 c.report(dst, c.opt.MissingOnSrc, '-') 90 case fs.Directory: 91 // Do the same thing to the entire contents of the directory 92 if c.opt.OneWay { 93 return false 94 } 95 return true 96 default: 97 panic("Bad object in DirEntries") 98 } 99 return false 100 } 101 102 // SrcOnly have an object which is in the source only 103 func (c *checkMarch) SrcOnly(src fs.DirEntry) (recurse bool) { 104 switch src.(type) { 105 case fs.Object: 106 err := fmt.Errorf("file not in %v", c.opt.Fdst) 107 fs.Errorf(src, "%v", err) 108 _ = fs.CountError(err) 109 c.differences.Add(1) 110 c.dstFilesMissing.Add(1) 111 c.report(src, c.opt.MissingOnDst, '+') 112 case fs.Directory: 113 // Do the same thing to the entire contents of the directory 114 return true 115 default: 116 panic("Bad object in DirEntries") 117 } 118 return false 119 } 120 121 // check to see if two objects are identical using the check function 122 func (c *checkMarch) checkIdentical(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) { 123 ci := fs.GetConfig(ctx) 124 tr := accounting.Stats(ctx).NewCheckingTransfer(src, "checking") 125 defer func() { 126 tr.Done(ctx, err) 127 }() 128 if sizeDiffers(ctx, src, dst) { 129 err = fmt.Errorf("sizes differ") 130 fs.Errorf(src, "%v", err) 131 return true, false, nil 132 } 133 if ci.SizeOnly { 134 return false, false, nil 135 } 136 return c.opt.Check(ctx, dst, src) 137 } 138 139 // Match is called when src and dst are present, so sync src to dst 140 func (c *checkMarch) Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool) { 141 switch srcX := src.(type) { 142 case fs.Object: 143 dstX, ok := dst.(fs.Object) 144 if ok { 145 if SkipDestructive(ctx, src, "check") { 146 return false 147 } 148 c.wg.Add(1) 149 c.tokens <- struct{}{} // put a token to limit concurrency 150 go func() { 151 defer func() { 152 <-c.tokens // get the token back to free up a slot 153 c.wg.Done() 154 }() 155 differ, noHash, err := c.checkIdentical(ctx, dstX, srcX) 156 if err != nil { 157 fs.Errorf(src, "%v", err) 158 _ = fs.CountError(err) 159 c.report(src, c.opt.Error, '!') 160 } else if differ { 161 c.differences.Add(1) 162 err := errors.New("files differ") 163 // the checkFn has already logged the reason 164 _ = fs.CountError(err) 165 c.report(src, c.opt.Differ, '*') 166 } else { 167 c.matches.Add(1) 168 c.report(src, c.opt.Match, '=') 169 if noHash { 170 c.noHashes.Add(1) 171 fs.Debugf(dstX, "OK - could not check hash") 172 } else { 173 fs.Debugf(dstX, "OK") 174 } 175 } 176 }() 177 } else { 178 err := fmt.Errorf("is file on %v but directory on %v", c.opt.Fsrc, c.opt.Fdst) 179 fs.Errorf(src, "%v", err) 180 _ = fs.CountError(err) 181 c.differences.Add(1) 182 c.dstFilesMissing.Add(1) 183 c.report(src, c.opt.MissingOnDst, '+') 184 } 185 case fs.Directory: 186 // Do the same thing to the entire contents of the directory 187 _, ok := dst.(fs.Directory) 188 if ok { 189 return true 190 } 191 err := fmt.Errorf("is file on %v but directory on %v", c.opt.Fdst, c.opt.Fsrc) 192 fs.Errorf(dst, "%v", err) 193 _ = fs.CountError(err) 194 c.differences.Add(1) 195 c.srcFilesMissing.Add(1) 196 c.report(dst, c.opt.MissingOnSrc, '-') 197 198 default: 199 panic("Bad object in DirEntries") 200 } 201 return false 202 } 203 204 // CheckFn checks the files in fsrc and fdst according to Size and 205 // hash using checkFunction on each file to check the hashes. 206 // 207 // checkFunction sees if dst and src are identical 208 // 209 // it returns true if differences were found 210 // it also returns whether it couldn't be hashed 211 func CheckFn(ctx context.Context, opt *CheckOpt) error { 212 ci := fs.GetConfig(ctx) 213 if opt.Check == nil { 214 return errors.New("internal error: nil check function") 215 } 216 c := &checkMarch{ 217 tokens: make(chan struct{}, ci.Checkers), 218 opt: *opt, 219 } 220 221 // set up a march over fdst and fsrc 222 m := &march.March{ 223 Ctx: ctx, 224 Fdst: c.opt.Fdst, 225 Fsrc: c.opt.Fsrc, 226 Dir: "", 227 Callback: c, 228 NoTraverse: ci.NoTraverse, 229 NoUnicodeNormalization: ci.NoUnicodeNormalization, 230 } 231 fs.Debugf(c.opt.Fdst, "Waiting for checks to finish") 232 err := m.Run(ctx) 233 c.wg.Wait() // wait for background go-routines 234 235 return c.reportResults(ctx, err) 236 } 237 238 func (c *checkMarch) reportResults(ctx context.Context, err error) error { 239 if c.dstFilesMissing.Load() > 0 { 240 fs.Logf(c.opt.Fdst, "%d files missing", c.dstFilesMissing.Load()) 241 } 242 if c.srcFilesMissing.Load() > 0 { 243 entity := "files" 244 if c.opt.Fsrc == nil { 245 entity = "hashes" 246 } 247 fs.Logf(c.opt.Fsrc, "%d %s missing", c.srcFilesMissing.Load(), entity) 248 } 249 250 fs.Logf(c.opt.Fdst, "%d differences found", accounting.Stats(ctx).GetErrors()) 251 if errs := accounting.Stats(ctx).GetErrors(); errs > 0 { 252 fs.Logf(c.opt.Fdst, "%d errors while checking", errs) 253 } 254 if c.noHashes.Load() > 0 { 255 fs.Logf(c.opt.Fdst, "%d hashes could not be checked", c.noHashes.Load()) 256 } 257 if c.matches.Load() > 0 { 258 fs.Logf(c.opt.Fdst, "%d matching files", c.matches.Load()) 259 } 260 if err != nil { 261 return err 262 } 263 if c.differences.Load() > 0 { 264 // Return an already counted error so we don't double count this error too 265 err = fserrors.FsError(fmt.Errorf("%d differences found", c.differences.Load())) 266 fserrors.Count(err) 267 return err 268 } 269 return nil 270 } 271 272 // Check the files in fsrc and fdst according to Size and hash 273 func Check(ctx context.Context, opt *CheckOpt) error { 274 optCopy := *opt 275 optCopy.Check = func(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) { 276 same, ht, err := CheckHashes(ctx, src, dst) 277 if err != nil { 278 return true, false, err 279 } 280 if ht == hash.None { 281 return false, true, nil 282 } 283 if !same { 284 err = fmt.Errorf("%v differ", ht) 285 fs.Errorf(src, "%v", err) 286 return true, false, nil 287 } 288 return false, false, nil 289 } 290 291 return CheckFn(ctx, &optCopy) 292 } 293 294 // CheckEqualReaders checks to see if in1 and in2 have the same 295 // content when read. 296 // 297 // it returns true if differences were found 298 func CheckEqualReaders(in1, in2 io.Reader) (differ bool, err error) { 299 const bufSize = 64 * 1024 300 buf1 := make([]byte, bufSize) 301 buf2 := make([]byte, bufSize) 302 for { 303 n1, err1 := readers.ReadFill(in1, buf1) 304 n2, err2 := readers.ReadFill(in2, buf2) 305 // check errors 306 if err1 != nil && err1 != io.EOF { 307 return true, err1 308 } else if err2 != nil && err2 != io.EOF { 309 return true, err2 310 } 311 // err1 && err2 are nil or io.EOF here 312 // process the data 313 if n1 != n2 || !bytes.Equal(buf1[:n1], buf2[:n2]) { 314 return true, nil 315 } 316 // if both streams finished the we have finished 317 if err1 == io.EOF && err2 == io.EOF { 318 break 319 } 320 } 321 return false, nil 322 } 323 324 // CheckIdenticalDownload checks to see if dst and src are identical 325 // by reading all their bytes if necessary. 326 // 327 // it returns true if differences were found 328 func CheckIdenticalDownload(ctx context.Context, dst, src fs.Object) (differ bool, err error) { 329 ci := fs.GetConfig(ctx) 330 err = Retry(ctx, src, ci.LowLevelRetries, func() error { 331 differ, err = checkIdenticalDownload(ctx, dst, src) 332 return err 333 }) 334 return differ, err 335 } 336 337 // Does the work for CheckIdenticalDownload 338 func checkIdenticalDownload(ctx context.Context, dst, src fs.Object) (differ bool, err error) { 339 var in1, in2 io.ReadCloser 340 in1, err = Open(ctx, dst) 341 if err != nil { 342 return true, fmt.Errorf("failed to open %q: %w", dst, err) 343 } 344 tr1 := accounting.Stats(ctx).NewTransfer(dst, nil) 345 defer func() { 346 tr1.Done(ctx, nil) // error handling is done by the caller 347 }() 348 in1 = tr1.Account(ctx, in1).WithBuffer() // account and buffer the transfer 349 350 in2, err = Open(ctx, src) 351 if err != nil { 352 return true, fmt.Errorf("failed to open %q: %w", src, err) 353 } 354 tr2 := accounting.Stats(ctx).NewTransfer(dst, nil) 355 defer func() { 356 tr2.Done(ctx, nil) // error handling is done by the caller 357 }() 358 in2 = tr2.Account(ctx, in2).WithBuffer() // account and buffer the transfer 359 360 // To assign err variable before defer. 361 differ, err = CheckEqualReaders(in1, in2) 362 return 363 } 364 365 // CheckDownload checks the files in fsrc and fdst according to Size 366 // and the actual contents of the files. 367 func CheckDownload(ctx context.Context, opt *CheckOpt) error { 368 optCopy := *opt 369 optCopy.Check = func(ctx context.Context, a, b fs.Object) (differ bool, noHash bool, err error) { 370 differ, err = CheckIdenticalDownload(ctx, a, b) 371 if err != nil { 372 return true, true, fmt.Errorf("failed to download: %w", err) 373 } 374 return differ, false, nil 375 } 376 return CheckFn(ctx, &optCopy) 377 } 378 379 // ApplyTransforms handles --no-unicode-normalization and --ignore-case-sync for CheckSum 380 // so that it matches behavior of Check (where it's handled by March) 381 func ApplyTransforms(ctx context.Context, s string) string { 382 ci := fs.GetConfig(ctx) 383 return ToNormal(s, !ci.NoUnicodeNormalization, ci.IgnoreCaseSync) 384 } 385 386 // ToNormal normalizes case and unicode form and returns the transformed string. 387 // It is similar to ApplyTransforms but does not use a context. 388 // If normUnicode == true, s will be transformed to NFC. 389 // If normCase == true, s will be transformed to lowercase. 390 // If both are true, both transformations will be performed. 391 func ToNormal(s string, normUnicode, normCase bool) string { 392 if normUnicode { 393 s = norm.NFC.String(s) 394 } 395 if normCase { 396 s = strings.ToLower(s) 397 } 398 return s 399 } 400 401 // CheckSum checks filesystem hashes against a SUM file 402 func CheckSum(ctx context.Context, fsrc, fsum fs.Fs, sumFile string, hashType hash.Type, opt *CheckOpt, download bool) error { 403 var options CheckOpt 404 if opt != nil { 405 options = *opt 406 } else { 407 // default options for hashsum -c 408 options.Combined = os.Stdout 409 } 410 // CheckSum treats Fsrc and Fdst specially: 411 options.Fsrc = nil // no file system here, corresponds to the sum list 412 options.Fdst = fsrc // denotes the file system to check 413 opt = &options // override supplied argument 414 415 if !download && (hashType == hash.None || !opt.Fdst.Hashes().Contains(hashType)) { 416 return fmt.Errorf("%s: hash type is not supported by file system: %s", hashType, opt.Fdst) 417 } 418 419 if sumFile == "" { 420 return fmt.Errorf("not a sum file: %s", fsum) 421 } 422 sumObj, err := fsum.NewObject(ctx, sumFile) 423 if err != nil { 424 return fmt.Errorf("cannot open sum file: %w", err) 425 } 426 hashes, err := ParseSumFile(ctx, sumObj) 427 if err != nil { 428 return fmt.Errorf("failed to parse sum file: %w", err) 429 } 430 431 ci := fs.GetConfig(ctx) 432 c := &checkMarch{ 433 tokens: make(chan struct{}, ci.Checkers), 434 opt: *opt, 435 } 436 lastErr := ListFn(ctx, opt.Fdst, func(obj fs.Object) { 437 c.checkSum(ctx, obj, download, hashes, hashType) 438 }) 439 c.wg.Wait() // wait for background go-routines 440 441 // make census of unhandled sums 442 fi := filter.GetConfig(ctx) 443 for filename, hash := range hashes { 444 if hash == "" { // the sum has been successfully consumed 445 continue 446 } 447 if !fi.IncludeRemote(filename) { // the file was filtered out 448 continue 449 } 450 // filesystem missed the file, sum wasn't consumed 451 err := fmt.Errorf("file not in %v", opt.Fdst) 452 fs.Errorf(filename, "%v", err) 453 _ = fs.CountError(err) 454 if lastErr == nil { 455 lastErr = err 456 } 457 c.dstFilesMissing.Add(1) 458 c.reportFilename(filename, opt.MissingOnDst, '+') 459 } 460 461 return c.reportResults(ctx, lastErr) 462 } 463 464 // checkSum checks single object against golden hashes 465 func (c *checkMarch) checkSum(ctx context.Context, obj fs.Object, download bool, hashes HashSums, hashType hash.Type) { 466 normalizedRemote := ApplyTransforms(ctx, obj.Remote()) 467 c.ioMu.Lock() 468 sumHash, sumFound := hashes[normalizedRemote] 469 hashes[normalizedRemote] = "" // mark sum as consumed 470 c.ioMu.Unlock() 471 472 if !sumFound && c.opt.OneWay { 473 return 474 } 475 476 var err error 477 tr := accounting.Stats(ctx).NewCheckingTransfer(obj, "hashing") 478 defer tr.Done(ctx, err) 479 480 if !sumFound { 481 err = errors.New("sum not found") 482 _ = fs.CountError(err) 483 fs.Errorf(obj, "%v", err) 484 c.differences.Add(1) 485 c.srcFilesMissing.Add(1) 486 c.report(obj, c.opt.MissingOnSrc, '-') 487 return 488 } 489 490 if !download { 491 var objHash string 492 objHash, err = obj.Hash(ctx, hashType) 493 c.matchSum(ctx, sumHash, objHash, obj, err, hashType) 494 return 495 } 496 497 c.wg.Add(1) 498 c.tokens <- struct{}{} // put a token to limit concurrency 499 go func() { 500 var ( 501 objHash string 502 err error 503 in io.ReadCloser 504 ) 505 defer func() { 506 c.matchSum(ctx, sumHash, objHash, obj, err, hashType) 507 <-c.tokens // get the token back to free up a slot 508 c.wg.Done() 509 }() 510 if in, err = Open(ctx, obj); err != nil { 511 return 512 } 513 tr := accounting.Stats(ctx).NewTransfer(obj, nil) 514 in = tr.Account(ctx, in).WithBuffer() // account and buffer the transfer 515 defer func() { 516 tr.Done(ctx, nil) // will close the stream 517 }() 518 hashVals, err2 := hash.StreamTypes(in, hash.NewHashSet(hashType)) 519 if err2 != nil { 520 err = err2 // pass to matchSum 521 return 522 } 523 objHash = hashVals[hashType] 524 }() 525 } 526 527 // matchSum sums up the results of hashsum matching for an object 528 func (c *checkMarch) matchSum(ctx context.Context, sumHash, objHash string, obj fs.Object, err error, hashType hash.Type) { 529 switch { 530 case err != nil: 531 _ = fs.CountError(err) 532 fs.Errorf(obj, "Failed to calculate hash: %v", err) 533 c.report(obj, c.opt.Error, '!') 534 case sumHash == "": 535 err = errors.New("duplicate file") 536 _ = fs.CountError(err) 537 fs.Errorf(obj, "%v", err) 538 c.report(obj, c.opt.Error, '!') 539 case objHash == "": 540 fs.Debugf(nil, "%v = %s (sum)", hashType, sumHash) 541 fs.Debugf(obj, "%v - could not check hash (%v)", hashType, c.opt.Fdst) 542 c.noHashes.Add(1) 543 c.matches.Add(1) 544 c.report(obj, c.opt.Match, '=') 545 case objHash == sumHash: 546 fs.Debugf(obj, "%v = %s OK", hashType, sumHash) 547 c.matches.Add(1) 548 c.report(obj, c.opt.Match, '=') 549 default: 550 err = errors.New("files differ") 551 _ = fs.CountError(err) 552 fs.Debugf(nil, "%v = %s (sum)", hashType, sumHash) 553 fs.Debugf(obj, "%v = %s (%v)", hashType, objHash, c.opt.Fdst) 554 fs.Errorf(obj, "%v", err) 555 c.differences.Add(1) 556 c.report(obj, c.opt.Differ, '*') 557 } 558 } 559 560 // HashSums represents a parsed SUM file 561 type HashSums map[string]string 562 563 // ParseSumFile parses a hash SUM file and returns hashes as a map 564 func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) { 565 rd, err := Open(ctx, sumFile) 566 if err != nil { 567 return nil, err 568 } 569 parser := bufio.NewReader(rd) 570 571 const maxWarn = 3 572 numWarn := 0 573 574 re := regexp.MustCompile(`^([^ ]+) [ *](.+)$`) 575 hashes := HashSums{} 576 for lineNo := 0; true; lineNo++ { 577 lineBytes, _, err := parser.ReadLine() 578 if err == io.EOF { 579 break 580 } 581 if err != nil { 582 return nil, err 583 } 584 line := string(lineBytes) 585 if line == "" { 586 continue 587 } 588 589 fields := re.FindStringSubmatch(ApplyTransforms(ctx, line)) 590 if fields == nil { 591 numWarn++ 592 if numWarn <= maxWarn { 593 fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo) 594 } 595 continue 596 } 597 598 sum, file := fields[1], fields[2] 599 if hashes[file] != "" { 600 numWarn++ 601 if numWarn <= maxWarn { 602 fs.Logf(sumFile, "duplicate file on checksum line %d", lineNo) 603 } 604 continue 605 } 606 607 // We've standardised on lower case checksums in rclone internals. 608 hashes[file] = strings.ToLower(sum) 609 } 610 611 if numWarn > maxWarn { 612 fs.Logf(sumFile, "%d warning(s) suppressed...", numWarn-maxWarn) 613 } 614 if err = rd.Close(); err != nil { 615 return nil, err 616 } 617 return hashes, nil 618 }