github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/backend/chunker/chunker.go (about) 1 // Package chunker provides wrappers for Fs and Object which split large files in chunks 2 package chunker 3 4 import ( 5 "bytes" 6 "context" 7 "crypto/md5" 8 "crypto/sha1" 9 "encoding/hex" 10 "encoding/json" 11 "errors" 12 "fmt" 13 gohash "hash" 14 "io" 15 "math/rand" 16 "path" 17 "regexp" 18 "sort" 19 "strconv" 20 "strings" 21 "sync" 22 "time" 23 24 "github.com/rclone/rclone/fs" 25 "github.com/rclone/rclone/fs/accounting" 26 "github.com/rclone/rclone/fs/cache" 27 "github.com/rclone/rclone/fs/config/configmap" 28 "github.com/rclone/rclone/fs/config/configstruct" 29 "github.com/rclone/rclone/fs/fspath" 30 "github.com/rclone/rclone/fs/hash" 31 "github.com/rclone/rclone/fs/operations" 32 "github.com/rclone/rclone/lib/encoder" 33 ) 34 35 // Chunker's composite files have one or more chunks 36 // and optional metadata object. If it's present, 37 // meta object is named after the original file. 38 // 39 // The only supported metadata format is simplejson atm. 40 // It supports only per-file meta objects that are rudimentary, 41 // used mostly for consistency checks (lazily for performance reasons). 42 // Other formats can be developed that use an external meta store 43 // free of these limitations, but this needs some support from 44 // rclone core (e.g. metadata store interfaces). 45 // 46 // The following types of chunks are supported: 47 // data and control, active and temporary. 48 // Chunk type is identified by matching chunk file name 49 // based on the chunk name format configured by user and transaction 50 // style being used. 51 // 52 // Both data and control chunks can be either temporary (aka hidden) 53 // or active (non-temporary aka normal aka permanent). 54 // An operation creates temporary chunks while it runs. 55 // By completion it removes temporary and leaves active chunks. 56 // 57 // Temporary chunks have a special hardcoded suffix in addition 58 // to the configured name pattern. 59 // Temporary suffix includes so called transaction identifier 60 // (abbreviated as `xactID` below), a generic non-negative base-36 "number" 61 // used by parallel operations to share a composite object. 62 // Chunker also accepts the longer decimal temporary suffix (obsolete), 63 // which is transparently converted to the new format. In its maximum 64 // length of 13 decimals it makes a 7-digit base-36 number. 65 // 66 // When transactions is set to the norename style, data chunks will 67 // keep their temporary chunk names (with the transaction identifier 68 // suffix). To distinguish them from temporary chunks, the txn field 69 // of the metadata file is set to match the transaction identifier of 70 // the data chunks. 71 // 72 // Chunker can tell data chunks from control chunks by the characters 73 // located in the "hash placeholder" position of configured format. 74 // Data chunks have decimal digits there. 75 // Control chunks have in that position a short lowercase alphanumeric 76 // string (starting with a letter) prepended by underscore. 77 // 78 // Metadata format v1 does not define any control chunk types, 79 // they are currently ignored aka reserved. 80 // In future they can be used to implement resumable uploads etc. 81 const ( 82 ctrlTypeRegStr = `[a-z][a-z0-9]{2,6}` 83 tempSuffixFormat = `_%04s` 84 tempSuffixRegStr = `_([0-9a-z]{4,9})` 85 tempSuffixRegOld = `\.\.tmp_([0-9]{10,13})` 86 ) 87 88 var ( 89 // regular expressions to validate control type and temporary suffix 90 ctrlTypeRegexp = regexp.MustCompile(`^` + ctrlTypeRegStr + `$`) 91 tempSuffixRegexp = regexp.MustCompile(`^` + tempSuffixRegStr + `$`) 92 ) 93 94 // Normally metadata is a small piece of JSON (about 100-300 bytes). 95 // The size of valid metadata must never exceed this limit. 96 // Current maximum provides a reasonable room for future extensions. 97 // 98 // Please refrain from increasing it, this can cause old rclone versions 99 // to fail, or worse, treat meta object as a normal file (see NewObject). 100 // If more room is needed please bump metadata version forcing previous 101 // releases to ask for upgrade, and offload extra info to a control chunk. 102 // 103 // And still chunker's primary function is to chunk large files 104 // rather than serve as a generic metadata container. 105 const ( 106 maxMetadataSize = 1023 107 maxMetadataSizeWritten = 255 108 ) 109 110 // Current/highest supported metadata format. 111 const metadataVersion = 2 112 113 // optimizeFirstChunk enables the following optimization in the Put: 114 // If a single chunk is expected, put the first chunk using the 115 // base target name instead of a temporary name, thus avoiding 116 // extra rename operation. 117 // Warning: this optimization is not transaction safe. 118 const optimizeFirstChunk = false 119 120 // revealHidden is a stub until chunker lands the `reveal hidden` option. 121 const revealHidden = false 122 123 // Prevent memory overflow due to specially crafted chunk name 124 const maxSafeChunkNumber = 10000000 125 126 // Number of attempts to find unique transaction identifier 127 const maxTransactionProbes = 100 128 129 // standard chunker errors 130 var ( 131 ErrChunkOverflow = errors.New("chunk number overflow") 132 ErrMetaTooBig = errors.New("metadata is too big") 133 ErrMetaUnknown = errors.New("unknown metadata, please upgrade rclone") 134 ) 135 136 // variants of baseMove's parameter delMode 137 const ( 138 delNever = 0 // don't delete, just move 139 delAlways = 1 // delete destination before moving 140 delFailed = 2 // move, then delete and try again if failed 141 ) 142 143 // Register with Fs 144 func init() { 145 fs.Register(&fs.RegInfo{ 146 Name: "chunker", 147 Description: "Transparently chunk/split large files", 148 NewFs: NewFs, 149 Options: []fs.Option{{ 150 Name: "remote", 151 Required: true, 152 Help: `Remote to chunk/unchunk. 153 154 Normally should contain a ':' and a path, e.g. "myremote:path/to/dir", 155 "myremote:bucket" or maybe "myremote:" (not recommended).`, 156 }, { 157 Name: "chunk_size", 158 Advanced: false, 159 Default: fs.SizeSuffix(2147483648), // 2 GiB 160 Help: `Files larger than chunk size will be split in chunks.`, 161 }, { 162 Name: "name_format", 163 Advanced: true, 164 Hide: fs.OptionHideCommandLine, 165 Default: `*.rclone_chunk.###`, 166 Help: `String format of chunk file names. 167 168 The two placeholders are: base file name (*) and chunk number (#...). 169 There must be one and only one asterisk and one or more consecutive hash characters. 170 If chunk number has less digits than the number of hashes, it is left-padded by zeros. 171 If there are more digits in the number, they are left as is. 172 Possible chunk files are ignored if their name does not match given format.`, 173 }, { 174 Name: "start_from", 175 Advanced: true, 176 Hide: fs.OptionHideCommandLine, 177 Default: 1, 178 Help: `Minimum valid chunk number. Usually 0 or 1. 179 180 By default chunk numbers start from 1.`, 181 }, { 182 Name: "meta_format", 183 Advanced: true, 184 Hide: fs.OptionHideCommandLine, 185 Default: "simplejson", 186 Help: `Format of the metadata object or "none". 187 188 By default "simplejson". 189 Metadata is a small JSON file named after the composite file.`, 190 Examples: []fs.OptionExample{{ 191 Value: "none", 192 Help: `Do not use metadata files at all. 193 Requires hash type "none".`, 194 }, { 195 Value: "simplejson", 196 Help: `Simple JSON supports hash sums and chunk validation. 197 198 It has the following fields: ver, size, nchunks, md5, sha1.`, 199 }}, 200 }, { 201 Name: "hash_type", 202 Advanced: false, 203 Default: "md5", 204 Help: `Choose how chunker handles hash sums. 205 206 All modes but "none" require metadata.`, 207 Examples: []fs.OptionExample{{ 208 Value: "none", 209 Help: `Pass any hash supported by wrapped remote for non-chunked files. 210 Return nothing otherwise.`, 211 }, { 212 Value: "md5", 213 Help: `MD5 for composite files.`, 214 }, { 215 Value: "sha1", 216 Help: `SHA1 for composite files.`, 217 }, { 218 Value: "md5all", 219 Help: `MD5 for all files.`, 220 }, { 221 Value: "sha1all", 222 Help: `SHA1 for all files.`, 223 }, { 224 Value: "md5quick", 225 Help: `Copying a file to chunker will request MD5 from the source. 226 Falling back to SHA1 if unsupported.`, 227 }, { 228 Value: "sha1quick", 229 Help: `Similar to "md5quick" but prefers SHA1 over MD5.`, 230 }}, 231 }, { 232 Name: "fail_hard", 233 Advanced: true, 234 Default: false, 235 Help: `Choose how chunker should handle files with missing or invalid chunks.`, 236 Examples: []fs.OptionExample{ 237 { 238 Value: "true", 239 Help: "Report errors and abort current command.", 240 }, { 241 Value: "false", 242 Help: "Warn user, skip incomplete file and proceed.", 243 }, 244 }, 245 }, { 246 Name: "transactions", 247 Advanced: true, 248 Default: "rename", 249 Help: `Choose how chunker should handle temporary files during transactions.`, 250 Hide: fs.OptionHideCommandLine, 251 Examples: []fs.OptionExample{ 252 { 253 Value: "rename", 254 Help: "Rename temporary files after a successful transaction.", 255 }, { 256 Value: "norename", 257 Help: `Leave temporary file names and write transaction ID to metadata file. 258 Metadata is required for no rename transactions (meta format cannot be "none"). 259 If you are using norename transactions you should be careful not to downgrade Rclone 260 as older versions of Rclone don't support this transaction style and will misinterpret 261 files manipulated by norename transactions. 262 This method is EXPERIMENTAL, don't use on production systems.`, 263 }, { 264 Value: "auto", 265 Help: `Rename or norename will be used depending on capabilities of the backend. 266 If meta format is set to "none", rename transactions will always be used. 267 This method is EXPERIMENTAL, don't use on production systems.`, 268 }, 269 }, 270 }}, 271 }) 272 } 273 274 // NewFs constructs an Fs from the path, container:path 275 func NewFs(ctx context.Context, name, rpath string, m configmap.Mapper) (fs.Fs, error) { 276 // Parse config into Options struct 277 opt := new(Options) 278 err := configstruct.Set(m, opt) 279 if err != nil { 280 return nil, err 281 } 282 if opt.StartFrom < 0 { 283 return nil, errors.New("start_from must be non-negative") 284 } 285 286 remote := opt.Remote 287 if strings.HasPrefix(remote, name+":") { 288 return nil, errors.New("can't point remote at itself - check the value of the remote setting") 289 } 290 291 baseName, basePath, err := fspath.SplitFs(remote) 292 if err != nil { 293 return nil, fmt.Errorf("failed to parse remote %q to wrap: %w", remote, err) 294 } 295 // Look for a file first 296 remotePath := fspath.JoinRootPath(basePath, rpath) 297 baseFs, err := cache.Get(ctx, baseName+remotePath) 298 if err != fs.ErrorIsFile && err != nil { 299 return nil, fmt.Errorf("failed to make remote %q to wrap: %w", baseName+remotePath, err) 300 } 301 if !operations.CanServerSideMove(baseFs) { 302 return nil, errors.New("can't use chunker on a backend which doesn't support server-side move or copy") 303 } 304 305 f := &Fs{ 306 base: baseFs, 307 name: name, 308 root: rpath, 309 opt: *opt, 310 } 311 f.dirSort = true // processEntries requires that meta Objects prerun data chunks atm. 312 313 if err := f.configure(opt.NameFormat, opt.MetaFormat, opt.HashType, opt.Transactions); err != nil { 314 return nil, err 315 } 316 317 // Handle the tricky case detected by FsMkdir/FsPutFiles/FsIsFile 318 // when `rpath` points to a composite multi-chunk file without metadata, 319 // i.e. `rpath` does not exist in the wrapped remote, but chunker 320 // detects a composite file because it finds the first chunk! 321 // (yet can't satisfy fstest.CheckListing, will ignore) 322 if err == nil && !f.useMeta { 323 firstChunkPath := f.makeChunkName(remotePath, 0, "", "") 324 newBase, testErr := cache.Get(ctx, baseName+firstChunkPath) 325 if testErr == fs.ErrorIsFile { 326 f.base = newBase 327 err = testErr 328 } 329 } 330 cache.PinUntilFinalized(f.base, f) 331 332 // Correct root if definitely pointing to a file 333 if err == fs.ErrorIsFile { 334 f.root = path.Dir(f.root) 335 if f.root == "." || f.root == "/" { 336 f.root = "" 337 } 338 } 339 340 // Note 1: the features here are ones we could support, and they are 341 // ANDed with the ones from wrappedFs. 342 // Note 2: features.Fill() points features.PutStream to our PutStream, 343 // but features.Mask() will nullify it if wrappedFs does not have it. 344 f.features = (&fs.Features{ 345 CaseInsensitive: true, 346 DuplicateFiles: true, 347 ReadMimeType: false, // Object.MimeType not supported 348 WriteMimeType: true, 349 BucketBased: true, 350 CanHaveEmptyDirectories: true, 351 ServerSideAcrossConfigs: true, 352 ReadDirMetadata: true, 353 WriteDirMetadata: true, 354 WriteDirSetModTime: true, 355 UserDirMetadata: true, 356 DirModTimeUpdatesOnWrite: true, 357 }).Fill(ctx, f).Mask(ctx, baseFs).WrapsFs(f, baseFs) 358 359 f.features.Disable("ListR") // Recursive listing may cause chunker skip files 360 361 return f, err 362 } 363 364 // Options defines the configuration for this backend 365 type Options struct { 366 Remote string `config:"remote"` 367 ChunkSize fs.SizeSuffix `config:"chunk_size"` 368 NameFormat string `config:"name_format"` 369 StartFrom int `config:"start_from"` 370 MetaFormat string `config:"meta_format"` 371 HashType string `config:"hash_type"` 372 FailHard bool `config:"fail_hard"` 373 Transactions string `config:"transactions"` 374 } 375 376 // Fs represents a wrapped fs.Fs 377 type Fs struct { 378 name string 379 root string 380 base fs.Fs // remote wrapped by chunker overlay 381 wrapper fs.Fs // wrapper is used by SetWrapper 382 useMeta bool // false if metadata format is 'none' 383 useMD5 bool // mutually exclusive with useSHA1 384 useSHA1 bool // mutually exclusive with useMD5 385 hashFallback bool // allows fallback from MD5 to SHA1 and vice versa 386 hashAll bool // hash all files, mutually exclusive with hashFallback 387 dataNameFmt string // name format of data chunks 388 ctrlNameFmt string // name format of control chunks 389 nameRegexp *regexp.Regexp // regular expression to match chunk names 390 xactIDRand *rand.Rand // generator of random transaction identifiers 391 xactIDMutex sync.Mutex // mutex for the source of randomness 392 opt Options // copy of Options 393 features *fs.Features // optional features 394 dirSort bool // reserved for future, ignored 395 useNoRename bool // can be set with the transactions option 396 } 397 398 // configure sets up chunker for given name format, meta format and hash type. 399 // It also seeds the source of random transaction identifiers. 400 // configure must be called only from NewFs or by unit tests. 401 func (f *Fs) configure(nameFormat, metaFormat, hashType, transactionMode string) error { 402 if err := f.setChunkNameFormat(nameFormat); err != nil { 403 return fmt.Errorf("invalid name format '%s': %w", nameFormat, err) 404 } 405 if err := f.setMetaFormat(metaFormat); err != nil { 406 return err 407 } 408 if err := f.setHashType(hashType); err != nil { 409 return err 410 } 411 if err := f.setTransactionMode(transactionMode); err != nil { 412 return err 413 } 414 415 randomSeed := time.Now().UnixNano() 416 f.xactIDRand = rand.New(rand.NewSource(randomSeed)) 417 418 return nil 419 } 420 421 func (f *Fs) setMetaFormat(metaFormat string) error { 422 switch metaFormat { 423 case "none": 424 f.useMeta = false 425 case "simplejson": 426 f.useMeta = true 427 default: 428 return fmt.Errorf("unsupported meta format '%s'", metaFormat) 429 } 430 return nil 431 } 432 433 // setHashType 434 // must be called *after* setMetaFormat. 435 // 436 // In the "All" mode chunker will force metadata on all files 437 // if the wrapped remote can't provide given hashsum. 438 func (f *Fs) setHashType(hashType string) error { 439 f.useMD5 = false 440 f.useSHA1 = false 441 f.hashFallback = false 442 f.hashAll = false 443 requireMetaHash := true 444 445 switch hashType { 446 case "none": 447 requireMetaHash = false 448 case "md5": 449 f.useMD5 = true 450 case "sha1": 451 f.useSHA1 = true 452 case "md5quick": 453 f.useMD5 = true 454 f.hashFallback = true 455 case "sha1quick": 456 f.useSHA1 = true 457 f.hashFallback = true 458 case "md5all": 459 f.useMD5 = true 460 f.hashAll = !f.base.Hashes().Contains(hash.MD5) || f.base.Features().SlowHash 461 case "sha1all": 462 f.useSHA1 = true 463 f.hashAll = !f.base.Hashes().Contains(hash.SHA1) || f.base.Features().SlowHash 464 default: 465 return fmt.Errorf("unsupported hash type '%s'", hashType) 466 } 467 if requireMetaHash && !f.useMeta { 468 return fmt.Errorf("hash type '%s' requires compatible meta format", hashType) 469 } 470 return nil 471 } 472 473 func (f *Fs) setTransactionMode(transactionMode string) error { 474 switch transactionMode { 475 case "rename": 476 f.useNoRename = false 477 case "norename": 478 if !f.useMeta { 479 return errors.New("incompatible transaction options") 480 } 481 f.useNoRename = true 482 case "auto": 483 f.useNoRename = !f.CanQuickRename() 484 if f.useNoRename && !f.useMeta { 485 f.useNoRename = false 486 return errors.New("using norename transactions requires metadata") 487 } 488 default: 489 return fmt.Errorf("unsupported transaction mode '%s'", transactionMode) 490 } 491 return nil 492 } 493 494 // setChunkNameFormat converts pattern based chunk name format 495 // into Printf format and Regular expressions for data and 496 // control chunks. 497 func (f *Fs) setChunkNameFormat(pattern string) error { 498 // validate pattern 499 if strings.Count(pattern, "*") != 1 { 500 return errors.New("pattern must have exactly one asterisk (*)") 501 } 502 numDigits := strings.Count(pattern, "#") 503 if numDigits < 1 { 504 return errors.New("pattern must have a hash character (#)") 505 } 506 if strings.Index(pattern, "*") > strings.Index(pattern, "#") { 507 return errors.New("asterisk (*) in pattern must come before hashes (#)") 508 } 509 if ok, _ := regexp.MatchString("^[^#]*[#]+[^#]*$", pattern); !ok { 510 return errors.New("hashes (#) in pattern must be consecutive") 511 } 512 if dir, _ := path.Split(pattern); dir != "" { 513 return errors.New("directory separator prohibited") 514 } 515 if pattern[0] != '*' { 516 return errors.New("pattern must start with asterisk") // to be lifted later 517 } 518 519 // craft a unified regular expression for all types of chunks 520 reHashes := regexp.MustCompile("[#]+") 521 reDigits := "[0-9]+" 522 if numDigits > 1 { 523 reDigits = fmt.Sprintf("[0-9]{%d,}", numDigits) 524 } 525 reDataOrCtrl := fmt.Sprintf("(?:(%s)|_(%s))", reDigits, ctrlTypeRegStr) 526 527 // this must be non-greedy or else it could eat up temporary suffix 528 const mainNameRegStr = "(.+?)" 529 530 strRegex := regexp.QuoteMeta(pattern) 531 strRegex = reHashes.ReplaceAllLiteralString(strRegex, reDataOrCtrl) 532 strRegex = strings.ReplaceAll(strRegex, "\\*", mainNameRegStr) 533 strRegex = fmt.Sprintf("^%s(?:%s|%s)?$", strRegex, tempSuffixRegStr, tempSuffixRegOld) 534 f.nameRegexp = regexp.MustCompile(strRegex) 535 536 // craft printf formats for active data/control chunks 537 fmtDigits := "%d" 538 if numDigits > 1 { 539 fmtDigits = fmt.Sprintf("%%0%dd", numDigits) 540 } 541 strFmt := strings.ReplaceAll(pattern, "%", "%%") 542 strFmt = strings.Replace(strFmt, "*", "%s", 1) 543 f.dataNameFmt = reHashes.ReplaceAllLiteralString(strFmt, fmtDigits) 544 f.ctrlNameFmt = reHashes.ReplaceAllLiteralString(strFmt, "_%s") 545 return nil 546 } 547 548 // makeChunkName produces chunk name (or path) for a given file. 549 // 550 // filePath can be name, relative or absolute path of main file. 551 // 552 // chunkNo must be a zero based index of data chunk. 553 // Negative chunkNo e.g. -1 indicates a control chunk. 554 // ctrlType is type of control chunk (must be valid). 555 // ctrlType must be "" for data chunks. 556 // 557 // xactID is a transaction identifier. Empty xactID denotes active chunk, 558 // otherwise temporary chunk name is produced. 559 func (f *Fs) makeChunkName(filePath string, chunkNo int, ctrlType, xactID string) string { 560 dir, parentName := path.Split(filePath) 561 var name, tempSuffix string 562 switch { 563 case chunkNo >= 0 && ctrlType == "": 564 name = fmt.Sprintf(f.dataNameFmt, parentName, chunkNo+f.opt.StartFrom) 565 case chunkNo < 0 && ctrlTypeRegexp.MatchString(ctrlType): 566 name = fmt.Sprintf(f.ctrlNameFmt, parentName, ctrlType) 567 default: 568 panic("makeChunkName: invalid argument") // must not produce something we can't consume 569 } 570 if xactID != "" { 571 tempSuffix = fmt.Sprintf(tempSuffixFormat, xactID) 572 if !tempSuffixRegexp.MatchString(tempSuffix) { 573 panic("makeChunkName: invalid argument") 574 } 575 } 576 return dir + name + tempSuffix 577 } 578 579 // parseChunkName checks whether given file path belongs to 580 // a chunk and extracts chunk name parts. 581 // 582 // filePath can be name, relative or absolute path of a file. 583 // 584 // Returned parentPath is path of the composite file owning the chunk. 585 // It's a non-empty string if valid chunk name is detected 586 // or "" if it's not a chunk. 587 // Other returned values depend on detected chunk type: 588 // data or control, active or temporary: 589 // 590 // data chunk - the returned chunkNo is non-negative and ctrlType is "" 591 // control chunk - the chunkNo is -1 and ctrlType is a non-empty string 592 // active chunk - the returned xactID is "" 593 // temporary chunk - the xactID is a non-empty string 594 func (f *Fs) parseChunkName(filePath string) (parentPath string, chunkNo int, ctrlType, xactID string) { 595 dir, name := path.Split(filePath) 596 match := f.nameRegexp.FindStringSubmatch(name) 597 if match == nil || match[1] == "" { 598 return "", -1, "", "" 599 } 600 var err error 601 602 chunkNo = -1 603 if match[2] != "" { 604 if chunkNo, err = strconv.Atoi(match[2]); err != nil { 605 chunkNo = -1 606 } 607 if chunkNo -= f.opt.StartFrom; chunkNo < 0 { 608 fs.Infof(f, "invalid data chunk number in file %q", name) 609 return "", -1, "", "" 610 } 611 } 612 613 if match[4] != "" { 614 xactID = match[4] 615 } 616 if match[5] != "" { 617 // old-style temporary suffix 618 number, err := strconv.ParseInt(match[5], 10, 64) 619 if err != nil || number < 0 { 620 fs.Infof(f, "invalid old-style transaction number in file %q", name) 621 return "", -1, "", "" 622 } 623 // convert old-style transaction number to base-36 transaction ID 624 xactID = fmt.Sprintf(tempSuffixFormat, strconv.FormatInt(number, 36)) 625 xactID = xactID[1:] // strip leading underscore 626 } 627 628 parentPath = dir + match[1] 629 ctrlType = match[3] 630 return 631 } 632 633 // forbidChunk prints error message or raises error if file is chunk. 634 // First argument sets log prefix, use `false` to suppress message. 635 func (f *Fs) forbidChunk(o interface{}, filePath string) error { 636 if parentPath, _, _, _ := f.parseChunkName(filePath); parentPath != "" { 637 if f.opt.FailHard { 638 return fmt.Errorf("chunk overlap with %q", parentPath) 639 } 640 if boolVal, isBool := o.(bool); !isBool || boolVal { 641 fs.Errorf(o, "chunk overlap with %q", parentPath) 642 } 643 } 644 return nil 645 } 646 647 // newXactID produces a sufficiently random transaction identifier. 648 // 649 // The temporary suffix mask allows identifiers consisting of 4-9 650 // base-36 digits (ie. digits 0-9 or lowercase letters a-z). 651 // The identifiers must be unique between transactions running on 652 // the single file in parallel. 653 // 654 // Currently the function produces 6-character identifiers. 655 // Together with underscore this makes a 7-character temporary suffix. 656 // 657 // The first 4 characters isolate groups of transactions by time intervals. 658 // The maximum length of interval is base-36 "zzzz" ie. 1,679,615 seconds. 659 // The function rather takes a maximum prime closest to this number 660 // (see https://primes.utm.edu) as the interval length to better safeguard 661 // against repeating pseudo-random sequences in cases when rclone is 662 // invoked from a periodic scheduler like unix cron. 663 // Thus, the interval is slightly more than 19 days 10 hours 33 minutes. 664 // 665 // The remaining 2 base-36 digits (in the range from 0 to 1295 inclusive) 666 // are taken from the local random source. 667 // This provides about 0.1% collision probability for two parallel 668 // operations started at the same second and working on the same file. 669 // 670 // Non-empty filePath argument enables probing for existing temporary chunk 671 // to further eliminate collisions. 672 func (f *Fs) newXactID(ctx context.Context, filePath string) (xactID string, err error) { 673 const closestPrimeZzzzSeconds = 1679609 674 const maxTwoBase36Digits = 1295 675 676 unixSec := time.Now().Unix() 677 if unixSec < 0 { 678 unixSec = -unixSec // unlikely but the number must be positive 679 } 680 circleSec := unixSec % closestPrimeZzzzSeconds 681 first4chars := strconv.FormatInt(circleSec, 36) 682 683 for tries := 0; tries < maxTransactionProbes; tries++ { 684 f.xactIDMutex.Lock() 685 randomness := f.xactIDRand.Int63n(maxTwoBase36Digits + 1) 686 f.xactIDMutex.Unlock() 687 688 last2chars := strconv.FormatInt(randomness, 36) 689 xactID = fmt.Sprintf("%04s%02s", first4chars, last2chars) 690 691 if filePath == "" { 692 return 693 } 694 probeChunk := f.makeChunkName(filePath, 0, "", xactID) 695 _, probeErr := f.base.NewObject(ctx, probeChunk) 696 if probeErr != nil { 697 return 698 } 699 } 700 701 return "", fmt.Errorf("can't setup transaction for %s", filePath) 702 } 703 704 // List the objects and directories in dir into entries. 705 // The entries can be returned in any order but should be 706 // for a complete directory. 707 // 708 // dir should be "" to list the root, and should not have 709 // trailing slashes. 710 // 711 // This should return ErrDirNotFound if the directory isn't found. 712 // 713 // Commands normally cleanup all temporary chunks in case of a failure. 714 // However, if rclone dies unexpectedly, it can leave behind a bunch of 715 // hidden temporary chunks. List and its underlying chunkEntries() 716 // silently skip all temporary chunks in the directory. It's okay if 717 // they belong to an unfinished command running in parallel. 718 // 719 // However, there is no way to discover dead temporary chunks atm. 720 // As a workaround users can use `purge` to forcibly remove the whole 721 // directory together with dead chunks. 722 // In future a flag named like `--chunker-list-hidden` may be added to 723 // rclone that will tell List to reveal hidden chunks. 724 func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { 725 entries, err = f.base.List(ctx, dir) 726 if err != nil { 727 return nil, err 728 } 729 return f.processEntries(ctx, entries, dir) 730 } 731 732 // ListR lists the objects and directories of the Fs starting 733 // from dir recursively into out. 734 // 735 // dir should be "" to start from the root, and should not 736 // have trailing slashes. 737 // 738 // This should return ErrDirNotFound if the directory isn't 739 // found. 740 // 741 // It should call callback for each tranche of entries read. 742 // These need not be returned in any particular order. If 743 // callback returns an error then the listing will stop 744 // immediately. 745 // 746 // Don't implement this unless you have a more efficient way 747 // of listing recursively than doing a directory traversal. 748 func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) { 749 do := f.base.Features().ListR 750 return do(ctx, dir, func(entries fs.DirEntries) error { 751 newEntries, err := f.processEntries(ctx, entries, dir) 752 if err != nil { 753 return err 754 } 755 return callback(newEntries) 756 }) 757 } 758 759 // processEntries assembles chunk entries into composite entries 760 func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirPath string) (newEntries fs.DirEntries, err error) { 761 var sortedEntries fs.DirEntries 762 if f.dirSort { 763 // sort entries so that meta objects go before their chunks 764 sortedEntries = make(fs.DirEntries, len(origEntries)) 765 copy(sortedEntries, origEntries) 766 sort.Sort(sortedEntries) 767 } else { 768 sortedEntries = origEntries 769 } 770 771 byRemote := make(map[string]*Object) 772 badEntry := make(map[string]bool) 773 isSubdir := make(map[string]bool) 774 txnByRemote := map[string]string{} 775 776 var tempEntries fs.DirEntries 777 for _, dirOrObject := range sortedEntries { 778 switch entry := dirOrObject.(type) { 779 case fs.Object: 780 remote := entry.Remote() 781 mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(remote) 782 if mainRemote == "" { 783 // this is meta object or standalone file 784 object := f.newObject("", entry, nil) 785 byRemote[remote] = object 786 tempEntries = append(tempEntries, object) 787 if f.useNoRename { 788 txnByRemote[remote], err = object.readXactID(ctx) 789 if err != nil { 790 return nil, err 791 } 792 } 793 break 794 } 795 // this is some kind of chunk 796 // metobject should have been created above if present 797 mainObject := byRemote[mainRemote] 798 isSpecial := xactID != txnByRemote[mainRemote] || ctrlType != "" 799 if mainObject == nil && f.useMeta && !isSpecial { 800 fs.Debugf(f, "skip orphan data chunk %q", remote) 801 break 802 } 803 if mainObject == nil && !f.useMeta { 804 // this is the "nometa" case 805 // create dummy chunked object without metadata 806 mainObject = f.newObject(mainRemote, nil, nil) 807 byRemote[mainRemote] = mainObject 808 if !badEntry[mainRemote] { 809 tempEntries = append(tempEntries, mainObject) 810 } 811 } 812 if isSpecial { 813 if revealHidden { 814 fs.Infof(f, "ignore non-data chunk %q", remote) 815 } 816 // need to read metadata to ensure actual object type 817 // no need to read if metaobject is too big or absent, 818 // use the fact that before calling validate() 819 // the `size` field caches metaobject size, if any 820 if f.useMeta && mainObject != nil && mainObject.size <= maxMetadataSize { 821 mainObject.unsure = true 822 } 823 break 824 } 825 if err := mainObject.addChunk(entry, chunkNo); err != nil { 826 if f.opt.FailHard { 827 return nil, err 828 } 829 badEntry[mainRemote] = true 830 } 831 case fs.Directory: 832 isSubdir[entry.Remote()] = true 833 wrapDir := fs.NewDirWrapper(entry.Remote(), entry) 834 tempEntries = append(tempEntries, wrapDir) 835 default: 836 if f.opt.FailHard { 837 return nil, fmt.Errorf("unknown object type %T", entry) 838 } 839 fs.Debugf(f, "unknown object type %T", entry) 840 } 841 } 842 843 for _, entry := range tempEntries { 844 if object, ok := entry.(*Object); ok { 845 remote := object.Remote() 846 if isSubdir[remote] { 847 if f.opt.FailHard { 848 return nil, fmt.Errorf("%q is both meta object and directory", remote) 849 } 850 badEntry[remote] = true // fall thru 851 } 852 if badEntry[remote] { 853 fs.Debugf(f, "invalid directory entry %q", remote) 854 continue 855 } 856 if err := object.validate(); err != nil { 857 if f.opt.FailHard { 858 return nil, err 859 } 860 fs.Debugf(f, "invalid chunks in object %q", remote) 861 continue 862 } 863 } 864 newEntries = append(newEntries, entry) 865 } 866 867 if f.dirSort { 868 sort.Sort(newEntries) 869 } 870 return newEntries, nil 871 } 872 873 // NewObject finds the Object at remote. 874 // 875 // Please note that every NewObject invocation will scan the whole directory. 876 // Using here something like fs.DirCache might improve performance 877 // (yet making the logic more complex). 878 // 879 // Note that chunker prefers analyzing file names rather than reading 880 // the content of meta object assuming that directory scans are fast 881 // but opening even a small file can be slow on some backends. 882 func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { 883 return f.scanObject(ctx, remote, false) 884 } 885 886 // scanObject is like NewObject with optional quick scan mode. 887 // The quick mode avoids directory requests other than `List`, 888 // ignores non-chunked objects and skips chunk size checks. 889 func (f *Fs) scanObject(ctx context.Context, remote string, quickScan bool) (fs.Object, error) { 890 if err := f.forbidChunk(false, remote); err != nil { 891 return nil, fmt.Errorf("can't access: %w", err) 892 } 893 894 var ( 895 o *Object 896 baseObj fs.Object 897 currentXactID string 898 err error 899 sameMain bool 900 ) 901 902 if f.useMeta { 903 baseObj, err = f.base.NewObject(ctx, remote) 904 if err != nil { 905 return nil, err 906 } 907 remote = baseObj.Remote() 908 909 // Chunker's meta object cannot be large and maxMetadataSize acts 910 // as a hard limit. Anything larger than that is treated as a 911 // non-chunked file without even checking its contents, so it's 912 // paramount to prevent metadata from exceeding the maximum size. 913 // Anything smaller is additionally checked for format. 914 o = f.newObject("", baseObj, nil) 915 if o.size > maxMetadataSize { 916 return o, nil 917 } 918 } else { 919 // Metadata is disabled, hence this is either a multi-chunk 920 // composite file without meta object or a non-chunked file. 921 // Create an empty wrapper here, scan directory to determine 922 // which case it is and postpone reading if it's the latter one. 923 o = f.newObject(remote, nil, nil) 924 } 925 926 // If the object is small, it's probably a meta object. 927 // However, composite file must have data chunks besides it. 928 // Scan directory for possible data chunks now and decide later on. 929 dir := path.Dir(strings.TrimRight(remote, "/")) 930 if dir == "." { 931 dir = "" 932 } 933 entries, err := f.base.List(ctx, dir) 934 switch err { 935 case nil: 936 // OK, fall thru 937 case fs.ErrorDirNotFound: 938 entries = nil 939 default: 940 return nil, fmt.Errorf("can't detect composite file: %w", err) 941 } 942 943 if f.useNoRename { 944 currentXactID, err = o.readXactID(ctx) 945 if err != nil { 946 return nil, err 947 } 948 } 949 caseInsensitive := f.features.CaseInsensitive 950 951 for _, dirOrObject := range entries { 952 entry, ok := dirOrObject.(fs.Object) 953 if !ok { 954 continue 955 } 956 entryRemote := entry.Remote() 957 if !caseInsensitive && !strings.Contains(entryRemote, remote) { 958 continue // bypass regexp to save cpu 959 } 960 mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote) 961 if mainRemote == "" { 962 continue // skip non-chunks 963 } 964 if caseInsensitive { 965 sameMain = strings.EqualFold(mainRemote, remote) 966 if sameMain && f.base.Features().IsLocal { 967 // on local, make sure the EqualFold still holds true when accounting for encoding. 968 // sometimes paths with special characters will only normalize the same way in Standard Encoding. 969 sameMain = strings.EqualFold(encoder.OS.FromStandardPath(mainRemote), encoder.OS.FromStandardPath(remote)) 970 } 971 } else { 972 sameMain = mainRemote == remote 973 } 974 if !sameMain { 975 continue // skip alien chunks 976 } 977 if ctrlType != "" || xactID != currentXactID { 978 if f.useMeta { 979 // temporary/control chunk calls for lazy metadata read 980 o.unsure = true 981 } 982 continue 983 } 984 // fs.Debugf(f, "%q belongs to %q as chunk %d", entryRemote, mainRemote, chunkNo) 985 if err := o.addChunk(entry, chunkNo); err != nil { 986 return nil, err 987 } 988 } 989 990 if o.main == nil && (o.chunks == nil || len(o.chunks) == 0) { 991 // Scanning hasn't found data chunks with conforming names. 992 if f.useMeta || quickScan { 993 // Metadata is required but absent and there are no chunks. 994 return nil, fs.ErrorObjectNotFound 995 } 996 997 // Data chunks are not found and metadata is disabled. 998 // Thus, we are in the "latter case" from above. 999 // Let's try the postponed reading of a non-chunked file and add it 1000 // as a single chunk to the empty composite wrapper created above 1001 // with nil metadata. 1002 baseObj, err = f.base.NewObject(ctx, remote) 1003 if err == nil { 1004 err = o.addChunk(baseObj, 0) 1005 } 1006 if err != nil { 1007 return nil, err 1008 } 1009 } 1010 1011 // This is either a composite object with metadata or a non-chunked 1012 // file without metadata. Validate it and update the total data size. 1013 // As an optimization, skip metadata reading here - we will call 1014 // readMetadata lazily when needed (reading can be expensive). 1015 if !quickScan { 1016 if err := o.validate(); err != nil { 1017 return nil, err 1018 } 1019 } 1020 return o, nil 1021 } 1022 1023 // readMetadata reads composite object metadata and caches results, 1024 // in case of critical errors metadata is not cached. 1025 // Returns ErrMetaUnknown if an unsupported metadata format is detected. 1026 // If object is not chunked but marked by List or NewObject for recheck, 1027 // readMetadata will attempt to parse object as composite with fallback 1028 // to non-chunked representation if the attempt fails. 1029 func (o *Object) readMetadata(ctx context.Context) error { 1030 // return quickly if metadata is absent or has been already cached 1031 if !o.f.useMeta { 1032 o.isFull = true 1033 } 1034 if o.isFull { 1035 return nil 1036 } 1037 if !o.isComposite() && !o.unsure { 1038 // this for sure is a non-chunked standalone file 1039 o.isFull = true 1040 return nil 1041 } 1042 1043 // validate metadata 1044 metaObject := o.main 1045 if metaObject.Size() > maxMetadataSize { 1046 if o.unsure { 1047 // this is not metadata but a foreign object 1048 o.unsure = false 1049 o.chunks = nil // make isComposite return false 1050 o.isFull = true // cache results 1051 return nil 1052 } 1053 return ErrMetaTooBig 1054 } 1055 1056 // size is within limits, perform consistency checks 1057 reader, err := metaObject.Open(ctx) 1058 if err != nil { 1059 return err 1060 } 1061 metadata, err := io.ReadAll(reader) 1062 _ = reader.Close() // ensure file handle is freed on windows 1063 if err != nil { 1064 return err 1065 } 1066 1067 switch o.f.opt.MetaFormat { 1068 case "simplejson": 1069 metaInfo, madeByChunker, err := unmarshalSimpleJSON(ctx, metaObject, metadata) 1070 if o.unsure { 1071 o.unsure = false 1072 if !madeByChunker { 1073 // this is not metadata but a foreign object 1074 o.chunks = nil // make isComposite return false 1075 o.isFull = true // cache results 1076 return nil 1077 } 1078 } 1079 switch err { 1080 case nil: 1081 // fall thru 1082 case ErrMetaTooBig, ErrMetaUnknown: 1083 return err // return these errors unwrapped for unit tests 1084 default: 1085 return fmt.Errorf("invalid metadata: %w", err) 1086 } 1087 if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks { 1088 return errors.New("metadata doesn't match file size") 1089 } 1090 o.md5 = metaInfo.md5 1091 o.sha1 = metaInfo.sha1 1092 o.xactID = metaInfo.xactID 1093 } 1094 1095 o.isFull = true // cache results 1096 o.xIDCached = true 1097 return nil 1098 } 1099 1100 // readXactID returns the transaction ID stored in the passed metadata object 1101 func (o *Object) readXactID(ctx context.Context) (xactID string, err error) { 1102 // if xactID has already been read and cached return it now 1103 if o.xIDCached { 1104 return o.xactID, nil 1105 } 1106 // Avoid reading metadata for backends that don't use xactID to identify permanent chunks 1107 if !o.f.useNoRename { 1108 return "", errors.New("readXactID requires norename transactions") 1109 } 1110 if o.main == nil { 1111 return "", errors.New("readXactID requires valid metaobject") 1112 } 1113 if o.main.Size() > maxMetadataSize { 1114 return "", nil // this was likely not a metadata object, return empty xactID but don't throw error 1115 } 1116 reader, err := o.main.Open(ctx) 1117 if err != nil { 1118 return "", err 1119 } 1120 data, err := io.ReadAll(reader) 1121 _ = reader.Close() // ensure file handle is freed on windows 1122 if err != nil { 1123 return "", err 1124 } 1125 1126 switch o.f.opt.MetaFormat { 1127 case "simplejson": 1128 if len(data) > maxMetadataSizeWritten { 1129 return "", nil // this was likely not a metadata object, return empty xactID but don't throw error 1130 } 1131 var metadata metaSimpleJSON 1132 err = json.Unmarshal(data, &metadata) 1133 if err != nil { 1134 return "", nil // this was likely not a metadata object, return empty xactID but don't throw error 1135 } 1136 xactID = metadata.XactID 1137 } 1138 o.xactID = xactID 1139 o.xIDCached = true 1140 return xactID, nil 1141 } 1142 1143 // put implements Put, PutStream, PutUnchecked, Update 1144 func (f *Fs) put( 1145 ctx context.Context, in io.Reader, src fs.ObjectInfo, remote string, options []fs.OpenOption, 1146 basePut putFn, action string, target fs.Object, 1147 ) (obj fs.Object, err error) { 1148 // Perform consistency checks 1149 if err := f.forbidChunk(src, remote); err != nil { 1150 return nil, fmt.Errorf("%s refused: %w", action, err) 1151 } 1152 if target == nil { 1153 // Get target object with a quick directory scan 1154 // skip metadata check if target object does not exist. 1155 // ignore not-chunked objects, skip chunk size checks. 1156 if obj, err := f.scanObject(ctx, remote, true); err == nil { 1157 target = obj 1158 } 1159 } 1160 if target != nil { 1161 obj := target.(*Object) 1162 if err := obj.readMetadata(ctx); err == ErrMetaUnknown { 1163 // refuse to update a file of unsupported format 1164 return nil, fmt.Errorf("refusing to %s: %w", action, err) 1165 } 1166 } 1167 1168 // Prepare to upload 1169 c := f.newChunkingReader(src) 1170 wrapIn := c.wrapStream(ctx, in, src) 1171 1172 var metaObject fs.Object 1173 defer func() { 1174 if err != nil { 1175 c.rollback(ctx, metaObject) 1176 } 1177 }() 1178 1179 baseRemote := remote 1180 xactID, errXact := f.newXactID(ctx, baseRemote) 1181 if errXact != nil { 1182 return nil, errXact 1183 } 1184 1185 // Transfer chunks data 1186 for c.chunkNo = 0; !c.done; c.chunkNo++ { 1187 if c.chunkNo > maxSafeChunkNumber { 1188 return nil, ErrChunkOverflow 1189 } 1190 1191 tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactID) 1192 size := c.sizeLeft 1193 if size > c.chunkSize { 1194 size = c.chunkSize 1195 } 1196 savedReadCount := c.readCount 1197 1198 // If a single chunk is expected, avoid the extra rename operation 1199 chunkRemote := tempRemote 1200 if c.expectSingle && c.chunkNo == 0 && optimizeFirstChunk { 1201 chunkRemote = baseRemote 1202 } 1203 info := f.wrapInfo(src, chunkRemote, size) 1204 1205 // Refill chunkLimit and let basePut repeatedly call chunkingReader.Read() 1206 c.chunkLimit = c.chunkSize 1207 // TODO: handle range/limit options 1208 chunk, errChunk := basePut(ctx, wrapIn, info, options...) 1209 if errChunk != nil { 1210 return nil, errChunk 1211 } 1212 1213 if size > 0 && c.readCount == savedReadCount && c.expectSingle { 1214 // basePut returned success but didn't call chunkingReader's Read. 1215 // This is possible if wrapped remote has performed the put by hash 1216 // because chunker bridges Hash from source for non-chunked files. 1217 // Hence, force Read here to update accounting and hashsums. 1218 if err := c.dummyRead(wrapIn, size); err != nil { 1219 return nil, err 1220 } 1221 } 1222 if c.sizeLeft == 0 && !c.done { 1223 // The file has been apparently put by hash, force completion. 1224 c.done = true 1225 } 1226 1227 // Expected a single chunk but more to come, so name it as usual. 1228 if !c.done && chunkRemote != tempRemote { 1229 fs.Infof(chunk, "Expected single chunk, got more") 1230 chunkMoved, errMove := f.baseMove(ctx, chunk, tempRemote, delFailed) 1231 if errMove != nil { 1232 silentlyRemove(ctx, chunk) 1233 return nil, errMove 1234 } 1235 chunk = chunkMoved 1236 } 1237 1238 // Wrapped remote may or may not have seen EOF from chunking reader, 1239 // e.g. the box multi-uploader reads exactly the chunk size specified 1240 // and skips the "EOF" read. Hence, switch to next limit here. 1241 if !(c.chunkLimit == 0 || c.chunkLimit == c.chunkSize || c.sizeTotal == -1 || c.done) { 1242 silentlyRemove(ctx, chunk) 1243 return nil, fmt.Errorf("destination ignored %d data bytes", c.chunkLimit) 1244 } 1245 c.chunkLimit = c.chunkSize 1246 1247 c.chunks = append(c.chunks, chunk) 1248 } 1249 1250 // Validate uploaded size 1251 if c.sizeTotal != -1 && c.readCount != c.sizeTotal { 1252 return nil, fmt.Errorf("incorrect upload size %d != %d", c.readCount, c.sizeTotal) 1253 } 1254 1255 // Check for input that looks like valid metadata 1256 needMeta := len(c.chunks) > 1 1257 if c.readCount <= maxMetadataSize && len(c.chunks) == 1 { 1258 _, madeByChunker, _ := unmarshalSimpleJSON(ctx, c.chunks[0], c.smallHead) 1259 needMeta = madeByChunker 1260 } 1261 1262 // Finalize small object as non-chunked. 1263 // This can be bypassed, and single chunk with metadata will be 1264 // created if forced by consistent hashing or due to unsafe input. 1265 if !needMeta && !f.hashAll && f.useMeta { 1266 // If previous object was chunked, remove its chunks 1267 f.removeOldChunks(ctx, baseRemote) 1268 1269 // Rename single data chunk in place 1270 chunk := c.chunks[0] 1271 if chunk.Remote() != baseRemote { 1272 chunkMoved, errMove := f.baseMove(ctx, chunk, baseRemote, delAlways) 1273 if errMove != nil { 1274 silentlyRemove(ctx, chunk) 1275 return nil, errMove 1276 } 1277 chunk = chunkMoved 1278 } 1279 1280 return f.newObject("", chunk, nil), nil 1281 } 1282 1283 // Validate total size of data chunks 1284 var sizeTotal int64 1285 for _, chunk := range c.chunks { 1286 sizeTotal += chunk.Size() 1287 } 1288 if sizeTotal != c.readCount { 1289 return nil, fmt.Errorf("incorrect chunks size %d != %d", sizeTotal, c.readCount) 1290 } 1291 1292 // If previous object was chunked, remove its chunks 1293 f.removeOldChunks(ctx, baseRemote) 1294 1295 if !f.useNoRename { 1296 // The transaction suffix will be removed for backends with quick rename operations 1297 for chunkNo, chunk := range c.chunks { 1298 chunkRemote := f.makeChunkName(baseRemote, chunkNo, "", "") 1299 chunkMoved, errMove := f.baseMove(ctx, chunk, chunkRemote, delFailed) 1300 if errMove != nil { 1301 return nil, errMove 1302 } 1303 c.chunks[chunkNo] = chunkMoved 1304 } 1305 xactID = "" 1306 } 1307 1308 if !f.useMeta { 1309 // Remove stale metadata, if any 1310 oldMeta, errOldMeta := f.base.NewObject(ctx, baseRemote) 1311 if errOldMeta == nil { 1312 silentlyRemove(ctx, oldMeta) 1313 } 1314 1315 o := f.newObject(baseRemote, nil, c.chunks) 1316 o.size = sizeTotal 1317 return o, nil 1318 } 1319 1320 // Update meta object 1321 var metadata []byte 1322 switch f.opt.MetaFormat { 1323 case "simplejson": 1324 c.updateHashes() 1325 metadata, err = marshalSimpleJSON(ctx, sizeTotal, len(c.chunks), c.md5, c.sha1, xactID) 1326 } 1327 if err == nil { 1328 metaInfo := f.wrapInfo(src, baseRemote, int64(len(metadata))) 1329 metaObject, err = basePut(ctx, bytes.NewReader(metadata), metaInfo) 1330 } 1331 if err != nil { 1332 return nil, err 1333 } 1334 1335 o := f.newObject("", metaObject, c.chunks) 1336 o.size = sizeTotal 1337 o.xactID = xactID 1338 return o, nil 1339 } 1340 1341 type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) 1342 1343 type chunkingReader struct { 1344 baseReader io.Reader 1345 sizeTotal int64 1346 sizeLeft int64 1347 readCount int64 1348 chunkSize int64 1349 chunkLimit int64 1350 chunkNo int 1351 err error 1352 done bool 1353 chunks []fs.Object 1354 expectSingle bool 1355 smallHead []byte 1356 fs *Fs 1357 hasher gohash.Hash 1358 md5 string 1359 sha1 string 1360 } 1361 1362 func (f *Fs) newChunkingReader(src fs.ObjectInfo) *chunkingReader { 1363 c := &chunkingReader{ 1364 fs: f, 1365 chunkSize: int64(f.opt.ChunkSize), 1366 sizeTotal: src.Size(), 1367 } 1368 c.chunkLimit = c.chunkSize 1369 c.sizeLeft = c.sizeTotal 1370 c.expectSingle = c.sizeTotal >= 0 && c.sizeTotal <= c.chunkSize 1371 return c 1372 } 1373 1374 func (c *chunkingReader) wrapStream(ctx context.Context, in io.Reader, src fs.ObjectInfo) io.Reader { 1375 baseIn, wrapBack := accounting.UnWrap(in) 1376 1377 switch { 1378 case c.fs.useMD5: 1379 srcObj := fs.UnWrapObjectInfo(src) 1380 if srcObj != nil && srcObj.Fs().Features().SlowHash { 1381 fs.Debugf(src, "skip slow MD5 on source file, hashing in-transit") 1382 c.hasher = md5.New() 1383 break 1384 } 1385 if c.md5, _ = src.Hash(ctx, hash.MD5); c.md5 == "" { 1386 if c.fs.hashFallback { 1387 c.sha1, _ = src.Hash(ctx, hash.SHA1) 1388 } else { 1389 c.hasher = md5.New() 1390 } 1391 } 1392 case c.fs.useSHA1: 1393 srcObj := fs.UnWrapObjectInfo(src) 1394 if srcObj != nil && srcObj.Fs().Features().SlowHash { 1395 fs.Debugf(src, "skip slow SHA1 on source file, hashing in-transit") 1396 c.hasher = sha1.New() 1397 break 1398 } 1399 if c.sha1, _ = src.Hash(ctx, hash.SHA1); c.sha1 == "" { 1400 if c.fs.hashFallback { 1401 c.md5, _ = src.Hash(ctx, hash.MD5) 1402 } else { 1403 c.hasher = sha1.New() 1404 } 1405 } 1406 } 1407 1408 if c.hasher != nil { 1409 baseIn = io.TeeReader(baseIn, c.hasher) 1410 } 1411 c.baseReader = baseIn 1412 return wrapBack(c) 1413 } 1414 1415 func (c *chunkingReader) updateHashes() { 1416 if c.hasher == nil { 1417 return 1418 } 1419 switch { 1420 case c.fs.useMD5: 1421 c.md5 = hex.EncodeToString(c.hasher.Sum(nil)) 1422 case c.fs.useSHA1: 1423 c.sha1 = hex.EncodeToString(c.hasher.Sum(nil)) 1424 } 1425 } 1426 1427 // Note: Read is not called if wrapped remote performs put by hash. 1428 func (c *chunkingReader) Read(buf []byte) (bytesRead int, err error) { 1429 if c.chunkLimit <= 0 { 1430 // Chunk complete - switch to next one. 1431 // Note #1: 1432 // We might not get here because some remotes (e.g. box multi-uploader) 1433 // read the specified size exactly and skip the concluding EOF Read. 1434 // Then a check in the put loop will kick in. 1435 // Note #2: 1436 // The crypt backend after receiving EOF here will call Read again 1437 // and we must insist on returning EOF, so we postpone refilling 1438 // chunkLimit to the main loop. 1439 return 0, io.EOF 1440 } 1441 if int64(len(buf)) > c.chunkLimit { 1442 buf = buf[0:c.chunkLimit] 1443 } 1444 bytesRead, err = c.baseReader.Read(buf) 1445 if err != nil && err != io.EOF { 1446 c.err = err 1447 c.done = true 1448 return 1449 } 1450 c.accountBytes(int64(bytesRead)) 1451 if c.chunkNo == 0 && c.expectSingle && bytesRead > 0 && c.readCount <= maxMetadataSize { 1452 c.smallHead = append(c.smallHead, buf[:bytesRead]...) 1453 } 1454 if bytesRead == 0 && c.sizeLeft == 0 { 1455 err = io.EOF // Force EOF when no data left. 1456 } 1457 if err == io.EOF { 1458 c.done = true 1459 } 1460 return 1461 } 1462 1463 func (c *chunkingReader) accountBytes(bytesRead int64) { 1464 c.readCount += bytesRead 1465 c.chunkLimit -= bytesRead 1466 if c.sizeLeft != -1 { 1467 c.sizeLeft -= bytesRead 1468 } 1469 } 1470 1471 // dummyRead updates accounting, hashsums, etc. by simulating reads 1472 func (c *chunkingReader) dummyRead(in io.Reader, size int64) error { 1473 if c.hasher == nil && c.readCount+size > maxMetadataSize { 1474 c.accountBytes(size) 1475 return nil 1476 } 1477 const bufLen = 1048576 // 1 MiB 1478 buf := make([]byte, bufLen) 1479 for size > 0 { 1480 n := size 1481 if n > bufLen { 1482 n = bufLen 1483 } 1484 if _, err := io.ReadFull(in, buf[0:n]); err != nil { 1485 return err 1486 } 1487 size -= n 1488 } 1489 return nil 1490 } 1491 1492 // rollback removes uploaded temporary chunks 1493 func (c *chunkingReader) rollback(ctx context.Context, metaObject fs.Object) { 1494 if metaObject != nil { 1495 c.chunks = append(c.chunks, metaObject) 1496 } 1497 for _, chunk := range c.chunks { 1498 if err := chunk.Remove(ctx); err != nil { 1499 fs.Errorf(chunk, "Failed to remove temporary chunk: %v", err) 1500 } 1501 } 1502 } 1503 1504 func (f *Fs) removeOldChunks(ctx context.Context, remote string) { 1505 oldFsObject, err := f.NewObject(ctx, remote) 1506 if err == nil { 1507 oldObject := oldFsObject.(*Object) 1508 for _, chunk := range oldObject.chunks { 1509 if err := chunk.Remove(ctx); err != nil { 1510 fs.Errorf(chunk, "Failed to remove old chunk: %v", err) 1511 } 1512 } 1513 } 1514 } 1515 1516 // Put into the remote path with the given modTime and size. 1517 // 1518 // May create the object even if it returns an error - if so 1519 // will return the object and the error, otherwise will return 1520 // nil and the error 1521 func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 1522 return f.put(ctx, in, src, src.Remote(), options, f.base.Put, "put", nil) 1523 } 1524 1525 // PutStream uploads to the remote path with the modTime given of indeterminate size 1526 func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 1527 return f.put(ctx, in, src, src.Remote(), options, f.base.Features().PutStream, "upload", nil) 1528 } 1529 1530 // Update in to the object with the modTime given of the given size 1531 func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { 1532 basePut := o.f.base.Put 1533 if src.Size() < 0 { 1534 basePut = o.f.base.Features().PutStream 1535 if basePut == nil { 1536 return errors.New("wrapped file system does not support streaming uploads") 1537 } 1538 } 1539 oNew, err := o.f.put(ctx, in, src, o.Remote(), options, basePut, "update", o) 1540 if err == nil { 1541 *o = *oNew.(*Object) 1542 } 1543 return err 1544 } 1545 1546 // PutUnchecked uploads the object 1547 // 1548 // This will create a duplicate if we upload a new file without 1549 // checking to see if there is one already - use Put() for that. 1550 func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 1551 do := f.base.Features().PutUnchecked 1552 if do == nil { 1553 return nil, errors.New("can't PutUnchecked") 1554 } 1555 // TODO: handle range/limit options and really chunk stream here! 1556 o, err := do(ctx, in, f.wrapInfo(src, "", -1)) 1557 if err != nil { 1558 return nil, err 1559 } 1560 return f.newObject("", o, nil), nil 1561 } 1562 1563 // Hashes returns the supported hash sets. 1564 // Chunker advertises a hash type if and only if it can be calculated 1565 // for files of any size, non-chunked or composite. 1566 func (f *Fs) Hashes() hash.Set { 1567 // composites AND no fallback AND (chunker OR wrapped Fs will hash all non-chunked's) 1568 if f.useMD5 && !f.hashFallback && (f.hashAll || f.base.Hashes().Contains(hash.MD5)) { 1569 return hash.NewHashSet(hash.MD5) 1570 } 1571 if f.useSHA1 && !f.hashFallback && (f.hashAll || f.base.Hashes().Contains(hash.SHA1)) { 1572 return hash.NewHashSet(hash.SHA1) 1573 } 1574 return hash.NewHashSet() // can't provide strong guarantees 1575 } 1576 1577 // Mkdir makes the directory (container, bucket) 1578 // 1579 // Shouldn't return an error if it already exists 1580 func (f *Fs) Mkdir(ctx context.Context, dir string) error { 1581 if err := f.forbidChunk(dir, dir); err != nil { 1582 return fmt.Errorf("can't mkdir: %w", err) 1583 } 1584 return f.base.Mkdir(ctx, dir) 1585 } 1586 1587 // MkdirMetadata makes the root directory of the Fs object 1588 func (f *Fs) MkdirMetadata(ctx context.Context, dir string, metadata fs.Metadata) (fs.Directory, error) { 1589 if do := f.base.Features().MkdirMetadata; do != nil { 1590 return do(ctx, dir, metadata) 1591 } 1592 return nil, fs.ErrorNotImplemented 1593 } 1594 1595 // Rmdir removes the directory (container, bucket) if empty 1596 // 1597 // Return an error if it doesn't exist or isn't empty 1598 func (f *Fs) Rmdir(ctx context.Context, dir string) error { 1599 return f.base.Rmdir(ctx, dir) 1600 } 1601 1602 // Purge all files in the directory 1603 // 1604 // Implement this if you have a way of deleting all the files 1605 // quicker than just running Remove() on the result of List() 1606 // 1607 // Return an error if it doesn't exist. 1608 // 1609 // This command will chain to `purge` from wrapped remote. 1610 // As a result it removes not only composite chunker files with their 1611 // active chunks but also all hidden temporary chunks in the directory. 1612 func (f *Fs) Purge(ctx context.Context, dir string) error { 1613 do := f.base.Features().Purge 1614 if do == nil { 1615 return fs.ErrorCantPurge 1616 } 1617 return do(ctx, dir) 1618 } 1619 1620 // Remove an object (chunks and metadata, if any) 1621 // 1622 // Remove deletes only active chunks of the composite object. 1623 // It does not try to look for temporary chunks because they could belong 1624 // to another command modifying this composite file in parallel. 1625 // 1626 // Commands normally cleanup all temporary chunks in case of a failure. 1627 // However, if rclone dies unexpectedly, it can leave hidden temporary 1628 // chunks, which cannot be discovered using the `list` command. 1629 // Remove does not try to search for such chunks or to delete them. 1630 // Sometimes this can lead to strange results e.g. when `list` shows that 1631 // directory is empty but `rmdir` refuses to remove it because on the 1632 // level of wrapped remote it's actually *not* empty. 1633 // As a workaround users can use `purge` to forcibly remove it. 1634 // 1635 // In future, a flag `--chunker-delete-hidden` may be added which tells 1636 // Remove to search directory for hidden chunks and remove them too 1637 // (at the risk of breaking parallel commands). 1638 // 1639 // Remove is the only operation allowed on the composite files with 1640 // invalid or future metadata format. 1641 // We don't let user copy/move/update unsupported composite files. 1642 // Let's at least let her get rid of them, just complain loudly. 1643 // 1644 // This can litter directory with orphan chunks of unsupported types, 1645 // but as long as we remove meta object, even future releases will 1646 // treat the composite file as removed and refuse to act upon it. 1647 // 1648 // Disclaimer: corruption can still happen if unsupported file is removed 1649 // and then recreated with the same name. 1650 // Unsupported control chunks will get re-picked by a more recent 1651 // rclone version with unexpected results. This can be helped by 1652 // the `delete hidden` flag above or at least the user has been warned. 1653 func (o *Object) Remove(ctx context.Context) (err error) { 1654 if err := o.f.forbidChunk(o, o.Remote()); err != nil { 1655 // operations.Move can still call Remove if chunker's Move refuses 1656 // to corrupt file in hard mode. Hence, refuse to Remove, too. 1657 return fmt.Errorf("refuse to corrupt: %w", err) 1658 } 1659 if err := o.readMetadata(ctx); err == ErrMetaUnknown { 1660 // Proceed but warn user that unexpected things can happen. 1661 fs.Errorf(o, "Removing a file with unsupported metadata: %v", err) 1662 } 1663 1664 // Remove non-chunked file or meta object of a composite file. 1665 if o.main != nil { 1666 err = o.main.Remove(ctx) 1667 } 1668 1669 // Remove only active data chunks, ignore any temporary chunks that 1670 // might probably be created in parallel by other transactions. 1671 for _, chunk := range o.chunks { 1672 chunkErr := chunk.Remove(ctx) 1673 if err == nil { 1674 err = chunkErr 1675 } 1676 } 1677 1678 // There are no known control chunks to remove atm. 1679 return err 1680 } 1681 1682 // copyOrMove implements copy or move 1683 func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMoveFn, md5, sha1, opName string) (fs.Object, error) { 1684 if err := f.forbidChunk(o, remote); err != nil { 1685 return nil, fmt.Errorf("can't %s: %w", opName, err) 1686 } 1687 if err := o.readMetadata(ctx); err != nil { 1688 // Refuse to copy/move composite files with invalid or future 1689 // metadata format which might involve unsupported chunk types. 1690 return nil, fmt.Errorf("can't %s this file: %w", opName, err) 1691 } 1692 if !o.isComposite() { 1693 fs.Debugf(o, "%s non-chunked object...", opName) 1694 oResult, err := do(ctx, o.mainChunk(), remote) // chain operation to a single wrapped chunk 1695 if err != nil { 1696 return nil, err 1697 } 1698 return f.newObject("", oResult, nil), nil 1699 } 1700 1701 fs.Debugf(o, "%s %d data chunks...", opName, len(o.chunks)) 1702 mainRemote := o.remote 1703 var newChunks []fs.Object 1704 var err error 1705 1706 // Copy/move active data chunks. 1707 // Ignore possible temporary chunks being created by parallel operations. 1708 for _, chunk := range o.chunks { 1709 chunkRemote := chunk.Remote() 1710 if !strings.HasPrefix(chunkRemote, mainRemote) { 1711 err = fmt.Errorf("invalid chunk name %q", chunkRemote) 1712 break 1713 } 1714 chunkSuffix := chunkRemote[len(mainRemote):] 1715 chunkResult, err := do(ctx, chunk, remote+chunkSuffix) 1716 if err != nil { 1717 break 1718 } 1719 newChunks = append(newChunks, chunkResult) 1720 } 1721 1722 // Copy or move old metadata. 1723 // There are no known control chunks to move/copy atm. 1724 var metaObject fs.Object 1725 if err == nil && o.main != nil { 1726 metaObject, err = do(ctx, o.main, remote) 1727 } 1728 if err != nil { 1729 for _, chunk := range newChunks { 1730 silentlyRemove(ctx, chunk) 1731 } 1732 return nil, err 1733 } 1734 1735 // Create wrapping object, calculate and validate total size 1736 newObj := f.newObject(remote, metaObject, newChunks) 1737 err = newObj.validate() 1738 if err != nil { 1739 silentlyRemove(ctx, newObj) 1740 return nil, err 1741 } 1742 1743 // Update metadata 1744 var metadata []byte 1745 switch f.opt.MetaFormat { 1746 case "simplejson": 1747 metadata, err = marshalSimpleJSON(ctx, newObj.size, len(newChunks), md5, sha1, o.xactID) 1748 if err == nil { 1749 metaInfo := f.wrapInfo(metaObject, "", int64(len(metadata))) 1750 err = newObj.main.Update(ctx, bytes.NewReader(metadata), metaInfo) 1751 } 1752 case "none": 1753 if newObj.main != nil { 1754 err = newObj.main.Remove(ctx) 1755 } 1756 } 1757 1758 // Return the composite object 1759 if err != nil { 1760 silentlyRemove(ctx, newObj) 1761 return nil, err 1762 } 1763 return newObj, nil 1764 } 1765 1766 type copyMoveFn func(context.Context, fs.Object, string) (fs.Object, error) 1767 1768 func (f *Fs) okForServerSide(ctx context.Context, src fs.Object, opName string) (obj *Object, md5, sha1 string, ok bool) { 1769 var diff string 1770 obj, ok = src.(*Object) 1771 1772 switch { 1773 case !ok: 1774 diff = "remote types" 1775 case !operations.SameConfig(f.base, obj.f.base): 1776 diff = "wrapped remotes" 1777 case f.opt.ChunkSize != obj.f.opt.ChunkSize: 1778 diff = "chunk sizes" 1779 case f.opt.NameFormat != obj.f.opt.NameFormat: 1780 diff = "chunk name formats" 1781 case f.opt.StartFrom != obj.f.opt.StartFrom: 1782 diff = "chunk numbering" 1783 case f.opt.MetaFormat != obj.f.opt.MetaFormat: 1784 diff = "meta formats" 1785 } 1786 if diff != "" { 1787 fs.Debugf(src, "Can't %s - different %s", opName, diff) 1788 ok = false 1789 return 1790 } 1791 1792 if obj.unsure { 1793 // ensure object is composite if need to re-read metadata 1794 _ = obj.readMetadata(ctx) 1795 } 1796 requireMetaHash := obj.isComposite() && f.opt.MetaFormat == "simplejson" 1797 if !requireMetaHash && !f.hashAll { 1798 ok = true // hash is not required for metadata 1799 return 1800 } 1801 1802 switch { 1803 case f.useMD5: 1804 md5, _ = obj.Hash(ctx, hash.MD5) 1805 ok = md5 != "" 1806 if !ok && f.hashFallback { 1807 sha1, _ = obj.Hash(ctx, hash.SHA1) 1808 ok = sha1 != "" 1809 } 1810 case f.useSHA1: 1811 sha1, _ = obj.Hash(ctx, hash.SHA1) 1812 ok = sha1 != "" 1813 if !ok && f.hashFallback { 1814 md5, _ = obj.Hash(ctx, hash.MD5) 1815 ok = md5 != "" 1816 } 1817 default: 1818 ok = false 1819 } 1820 if !ok { 1821 fs.Debugf(src, "Can't %s - required hash not found", opName) 1822 } 1823 return 1824 } 1825 1826 // Copy src to this remote using server-side copy operations. 1827 // 1828 // This is stored with the remote path given. 1829 // 1830 // It returns the destination Object and a possible error. 1831 // 1832 // Will only be called if src.Fs().Name() == f.Name() 1833 // 1834 // If it isn't possible then return fs.ErrorCantCopy 1835 func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 1836 baseCopy := f.base.Features().Copy 1837 if baseCopy == nil { 1838 return nil, fs.ErrorCantCopy 1839 } 1840 obj, md5, sha1, ok := f.okForServerSide(ctx, src, "copy") 1841 if !ok { 1842 return nil, fs.ErrorCantCopy 1843 } 1844 return f.copyOrMove(ctx, obj, remote, baseCopy, md5, sha1, "copy") 1845 } 1846 1847 // Move src to this remote using server-side move operations. 1848 // 1849 // This is stored with the remote path given. 1850 // 1851 // It returns the destination Object and a possible error. 1852 // 1853 // Will only be called if src.Fs().Name() == f.Name() 1854 // 1855 // If it isn't possible then return fs.ErrorCantMove 1856 func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 1857 baseMove := func(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 1858 return f.baseMove(ctx, src, remote, delNever) 1859 } 1860 obj, md5, sha1, ok := f.okForServerSide(ctx, src, "move") 1861 if !ok { 1862 return nil, fs.ErrorCantMove 1863 } 1864 return f.copyOrMove(ctx, obj, remote, baseMove, md5, sha1, "move") 1865 } 1866 1867 // baseMove chains to the wrapped Move or simulates it by Copy+Delete 1868 func (f *Fs) baseMove(ctx context.Context, src fs.Object, remote string, delMode int) (fs.Object, error) { 1869 var ( 1870 dest fs.Object 1871 err error 1872 ) 1873 switch delMode { 1874 case delAlways: 1875 dest, err = f.base.NewObject(ctx, remote) 1876 case delFailed: 1877 dest, err = operations.Move(ctx, f.base, nil, remote, src) 1878 if err == nil { 1879 return dest, err 1880 } 1881 dest, err = f.base.NewObject(ctx, remote) 1882 case delNever: 1883 // fall thru, the default 1884 } 1885 if err != nil { 1886 dest = nil 1887 } 1888 return operations.Move(ctx, f.base, dest, remote, src) 1889 } 1890 1891 // DirMove moves src, srcRemote to this remote at dstRemote 1892 // using server-side move operations. 1893 // 1894 // Will only be called if src.Fs().Name() == f.Name() 1895 // 1896 // If it isn't possible then return fs.ErrorCantDirMove 1897 // 1898 // If destination exists then return fs.ErrorDirExists 1899 func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) error { 1900 do := f.base.Features().DirMove 1901 if do == nil { 1902 return fs.ErrorCantDirMove 1903 } 1904 srcFs, ok := src.(*Fs) 1905 if !ok { 1906 fs.Debugf(srcFs, "Can't move directory - not same remote type") 1907 return fs.ErrorCantDirMove 1908 } 1909 return do(ctx, srcFs.base, srcRemote, dstRemote) 1910 } 1911 1912 // DirSetModTime sets the directory modtime for dir 1913 func (f *Fs) DirSetModTime(ctx context.Context, dir string, modTime time.Time) error { 1914 if do := f.base.Features().DirSetModTime; do != nil { 1915 return do(ctx, dir, modTime) 1916 } 1917 return fs.ErrorNotImplemented 1918 } 1919 1920 // CleanUp the trash in the Fs 1921 // 1922 // Implement this if you have a way of emptying the trash or 1923 // otherwise cleaning up old versions of files. 1924 func (f *Fs) CleanUp(ctx context.Context) error { 1925 do := f.base.Features().CleanUp 1926 if do == nil { 1927 return errors.New("not supported by underlying remote") 1928 } 1929 return do(ctx) 1930 } 1931 1932 // About gets quota information from the Fs 1933 func (f *Fs) About(ctx context.Context) (*fs.Usage, error) { 1934 do := f.base.Features().About 1935 if do == nil { 1936 return nil, errors.New("not supported by underlying remote") 1937 } 1938 return do(ctx) 1939 } 1940 1941 // UnWrap returns the Fs that this Fs is wrapping 1942 func (f *Fs) UnWrap() fs.Fs { 1943 return f.base 1944 } 1945 1946 // WrapFs returns the Fs that is wrapping this Fs 1947 func (f *Fs) WrapFs() fs.Fs { 1948 return f.wrapper 1949 } 1950 1951 // SetWrapper sets the Fs that is wrapping this Fs 1952 func (f *Fs) SetWrapper(wrapper fs.Fs) { 1953 f.wrapper = wrapper 1954 } 1955 1956 // ChangeNotify calls the passed function with a path 1957 // that has had changes. If the implementation 1958 // uses polling, it should adhere to the given interval. 1959 // 1960 // Replace data chunk names by the name of composite file. 1961 // Ignore temporary and control chunks. 1962 func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryType), pollIntervalChan <-chan time.Duration) { 1963 do := f.base.Features().ChangeNotify 1964 if do == nil { 1965 return 1966 } 1967 wrappedNotifyFunc := func(path string, entryType fs.EntryType) { 1968 // fs.Debugf(f, "ChangeNotify: path %q entryType %d", path, entryType) 1969 if entryType == fs.EntryObject { 1970 mainPath, _, _, xactID := f.parseChunkName(path) 1971 metaXactID := "" 1972 if f.useNoRename { 1973 metaObject, _ := f.base.NewObject(ctx, mainPath) 1974 dummyObject := f.newObject("", metaObject, nil) 1975 metaXactID, _ = dummyObject.readXactID(ctx) 1976 } 1977 if mainPath != "" && xactID == metaXactID { 1978 path = mainPath 1979 } 1980 } 1981 notifyFunc(path, entryType) 1982 } 1983 do(ctx, wrappedNotifyFunc, pollIntervalChan) 1984 } 1985 1986 // Shutdown the backend, closing any background tasks and any 1987 // cached connections. 1988 func (f *Fs) Shutdown(ctx context.Context) error { 1989 do := f.base.Features().Shutdown 1990 if do == nil { 1991 return nil 1992 } 1993 return do(ctx) 1994 } 1995 1996 // Object represents a composite file wrapping one or more data chunks 1997 type Object struct { 1998 remote string 1999 main fs.Object // meta object if file is composite, or wrapped non-chunked file, nil if meta format is 'none' 2000 chunks []fs.Object // active data chunks if file is composite, or wrapped file as a single chunk if meta format is 'none' 2001 size int64 // cached total size of chunks in a composite file or -1 for non-chunked files 2002 isFull bool // true if metadata has been read 2003 xIDCached bool // true if xactID has been read 2004 unsure bool // true if need to read metadata to detect object type 2005 xactID string // transaction ID for "norename" or empty string for "renamed" chunks 2006 md5 string 2007 sha1 string 2008 f *Fs 2009 } 2010 2011 func (o *Object) addChunk(chunk fs.Object, chunkNo int) error { 2012 if chunkNo < 0 { 2013 return fmt.Errorf("invalid chunk number %d", chunkNo+o.f.opt.StartFrom) 2014 } 2015 if chunkNo == len(o.chunks) { 2016 o.chunks = append(o.chunks, chunk) 2017 return nil 2018 } 2019 if chunkNo > maxSafeChunkNumber { 2020 return ErrChunkOverflow 2021 } 2022 if chunkNo > len(o.chunks) { 2023 newChunks := make([]fs.Object, (chunkNo + 1), (chunkNo+1)*2) 2024 copy(newChunks, o.chunks) 2025 o.chunks = newChunks 2026 } 2027 if o.chunks[chunkNo] != nil { 2028 return fmt.Errorf("duplicate chunk number %d", chunkNo+o.f.opt.StartFrom) 2029 } 2030 o.chunks[chunkNo] = chunk 2031 return nil 2032 } 2033 2034 // validate verifies the object internals and updates total size 2035 func (o *Object) validate() error { 2036 if !o.isComposite() { 2037 _ = o.mainChunk() // verify that single wrapped chunk exists 2038 return nil 2039 } 2040 2041 metaObject := o.main // this file is composite - o.main refers to meta object (or nil if meta format is 'none') 2042 if metaObject != nil && metaObject.Size() > maxMetadataSize { 2043 // metadata of a chunked file must be a tiny piece of json 2044 o.size = -1 2045 return fmt.Errorf("%q metadata is too large", o.remote) 2046 } 2047 2048 var totalSize int64 2049 for _, chunk := range o.chunks { 2050 if chunk == nil { 2051 o.size = -1 2052 return fmt.Errorf("%q has missing chunks", o) 2053 } 2054 totalSize += chunk.Size() 2055 } 2056 o.size = totalSize // cache up the total data size 2057 return nil 2058 } 2059 2060 func (f *Fs) newObject(remote string, main fs.Object, chunks []fs.Object) *Object { 2061 var size int64 = -1 2062 if main != nil { 2063 size = main.Size() 2064 if remote == "" { 2065 remote = main.Remote() 2066 } 2067 } 2068 return &Object{ 2069 remote: remote, 2070 main: main, 2071 size: size, 2072 f: f, 2073 chunks: chunks, 2074 } 2075 } 2076 2077 // mainChunk returns: 2078 // - a wrapped object for non-chunked files 2079 // - meta object for chunked files with metadata 2080 // - first chunk for chunked files without metadata 2081 // Never returns nil. 2082 func (o *Object) mainChunk() fs.Object { 2083 if o.main != nil { 2084 return o.main // meta object or non-chunked wrapped file 2085 } 2086 if o.chunks != nil { 2087 return o.chunks[0] // first chunk of a chunked composite file 2088 } 2089 panic("invalid chunked object") // very unlikely 2090 } 2091 2092 func (o *Object) isComposite() bool { 2093 return o.chunks != nil 2094 } 2095 2096 // Fs returns read only access to the Fs that this object is part of 2097 func (o *Object) Fs() fs.Info { 2098 return o.f 2099 } 2100 2101 // Return a string version 2102 func (o *Object) String() string { 2103 if o == nil { 2104 return "<nil>" 2105 } 2106 return o.remote 2107 } 2108 2109 // Remote returns the remote path 2110 func (o *Object) Remote() string { 2111 return o.remote 2112 } 2113 2114 // Size returns the size of the file 2115 func (o *Object) Size() int64 { 2116 if o.isComposite() { 2117 return o.size // total size of data chunks in a composite file 2118 } 2119 return o.mainChunk().Size() // size of wrapped non-chunked file 2120 } 2121 2122 // Storable returns whether object is storable 2123 func (o *Object) Storable() bool { 2124 return o.mainChunk().Storable() 2125 } 2126 2127 // ModTime returns the modification time of the file 2128 func (o *Object) ModTime(ctx context.Context) time.Time { 2129 return o.mainChunk().ModTime(ctx) 2130 } 2131 2132 // SetModTime sets the modification time of the file 2133 func (o *Object) SetModTime(ctx context.Context, mtime time.Time) error { 2134 if err := o.readMetadata(ctx); err != nil { 2135 return err // refuse to act on unsupported format 2136 } 2137 return o.mainChunk().SetModTime(ctx, mtime) 2138 } 2139 2140 // Hash returns the selected checksum of the file. 2141 // If no checksum is available it returns "". 2142 // 2143 // Hash won't fail with `unsupported` error but return empty 2144 // hash string if a particular hashsum type is not supported 2145 // 2146 // Hash takes hashsum from metadata if available or requests it 2147 // from wrapped remote for non-chunked files. 2148 // Metadata (if meta format is not 'none') is by default kept 2149 // only for composite files. In the "All" hashing mode chunker 2150 // will force metadata on all files if particular hashsum type 2151 // is not supported by wrapped remote. 2152 // 2153 // Note that Hash prefers the wrapped hashsum for non-chunked 2154 // file, then tries to read it from metadata. This in theory 2155 // handles the unusual case when a small file has been tampered 2156 // on the level of wrapped remote but chunker is unaware of that. 2157 func (o *Object) Hash(ctx context.Context, hashType hash.Type) (string, error) { 2158 if err := o.readMetadata(ctx); err != nil { 2159 return "", err // valid metadata is required to get hash, abort 2160 } 2161 if !o.isComposite() { 2162 // First, chain to the wrapped non-chunked file if possible. 2163 if value, err := o.mainChunk().Hash(ctx, hashType); err == nil && value != "" { 2164 return value, nil 2165 } 2166 } 2167 2168 // Try hash from metadata if the file is composite or if wrapped remote fails. 2169 switch hashType { 2170 case hash.MD5: 2171 if o.md5 == "" { 2172 return "", nil 2173 } 2174 return o.md5, nil 2175 case hash.SHA1: 2176 if o.sha1 == "" { 2177 return "", nil 2178 } 2179 return o.sha1, nil 2180 default: 2181 return "", hash.ErrUnsupported 2182 } 2183 } 2184 2185 // UnWrap returns the wrapped Object 2186 func (o *Object) UnWrap() fs.Object { 2187 return o.mainChunk() 2188 } 2189 2190 // Open opens the file for read. Call Close() on the returned io.ReadCloser 2191 func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) { 2192 if err := o.readMetadata(ctx); err != nil { 2193 // refuse to open unsupported format 2194 return nil, fmt.Errorf("can't open: %w", err) 2195 } 2196 if !o.isComposite() { 2197 return o.mainChunk().Open(ctx, options...) // chain to wrapped non-chunked file 2198 } 2199 2200 var openOptions []fs.OpenOption 2201 var offset, limit int64 = 0, -1 2202 2203 for _, option := range options { 2204 switch opt := option.(type) { 2205 case *fs.SeekOption: 2206 offset = opt.Offset 2207 case *fs.RangeOption: 2208 offset, limit = opt.Decode(o.size) 2209 default: 2210 // pass Options on to the wrapped open, if appropriate 2211 openOptions = append(openOptions, option) 2212 } 2213 } 2214 2215 if offset < 0 { 2216 return nil, errors.New("invalid offset") 2217 } 2218 if limit < 0 { 2219 limit = o.size - offset 2220 } 2221 2222 return o.newLinearReader(ctx, offset, limit, openOptions) 2223 } 2224 2225 // linearReader opens and reads file chunks sequentially, without read-ahead 2226 type linearReader struct { 2227 ctx context.Context 2228 chunks []fs.Object 2229 options []fs.OpenOption 2230 limit int64 2231 count int64 2232 pos int 2233 reader io.ReadCloser 2234 err error 2235 } 2236 2237 func (o *Object) newLinearReader(ctx context.Context, offset, limit int64, options []fs.OpenOption) (io.ReadCloser, error) { 2238 r := &linearReader{ 2239 ctx: ctx, 2240 chunks: o.chunks, 2241 options: options, 2242 limit: limit, 2243 } 2244 2245 // skip to chunk for given offset 2246 err := io.EOF 2247 for offset >= 0 && err != nil { 2248 offset, err = r.nextChunk(offset) 2249 } 2250 if err == nil || err == io.EOF { 2251 r.err = err 2252 return r, nil 2253 } 2254 return nil, err 2255 } 2256 2257 func (r *linearReader) nextChunk(offset int64) (int64, error) { 2258 if r.err != nil { 2259 return -1, r.err 2260 } 2261 if r.pos >= len(r.chunks) || r.limit <= 0 || offset < 0 { 2262 return -1, io.EOF 2263 } 2264 2265 chunk := r.chunks[r.pos] 2266 count := chunk.Size() 2267 r.pos++ 2268 2269 if offset >= count { 2270 return offset - count, io.EOF 2271 } 2272 count -= offset 2273 if r.limit < count { 2274 count = r.limit 2275 } 2276 options := append(r.options, &fs.RangeOption{Start: offset, End: offset + count - 1}) 2277 2278 if err := r.Close(); err != nil { 2279 return -1, err 2280 } 2281 2282 reader, err := chunk.Open(r.ctx, options...) 2283 if err != nil { 2284 return -1, err 2285 } 2286 2287 r.reader = reader 2288 r.count = count 2289 return offset, nil 2290 } 2291 2292 func (r *linearReader) Read(p []byte) (n int, err error) { 2293 if r.err != nil { 2294 return 0, r.err 2295 } 2296 if r.limit <= 0 { 2297 r.err = io.EOF 2298 return 0, io.EOF 2299 } 2300 2301 for r.count <= 0 { 2302 // current chunk has been read completely or its size is zero 2303 off, err := r.nextChunk(0) 2304 if off < 0 { 2305 r.err = err 2306 return 0, err 2307 } 2308 } 2309 2310 n, err = r.reader.Read(p) 2311 if err == nil || err == io.EOF { 2312 r.count -= int64(n) 2313 r.limit -= int64(n) 2314 if r.limit > 0 { 2315 err = nil // more data to read 2316 } 2317 } 2318 r.err = err 2319 return 2320 } 2321 2322 func (r *linearReader) Close() (err error) { 2323 if r.reader != nil { 2324 err = r.reader.Close() 2325 r.reader = nil 2326 } 2327 return 2328 } 2329 2330 // ObjectInfo describes a wrapped fs.ObjectInfo for being the source 2331 type ObjectInfo struct { 2332 src fs.ObjectInfo 2333 fs *Fs 2334 nChunks int // number of data chunks 2335 xactID string // transaction ID for "norename" or empty string for "renamed" chunks 2336 size int64 // overrides source size by the total size of data chunks 2337 remote string // overrides remote name 2338 md5 string // overrides MD5 checksum 2339 sha1 string // overrides SHA1 checksum 2340 } 2341 2342 func (f *Fs) wrapInfo(src fs.ObjectInfo, newRemote string, totalSize int64) *ObjectInfo { 2343 return &ObjectInfo{ 2344 src: src, 2345 fs: f, 2346 size: totalSize, 2347 remote: newRemote, 2348 } 2349 } 2350 2351 // Fs returns read only access to the Fs that this object is part of 2352 func (oi *ObjectInfo) Fs() fs.Info { 2353 if oi.fs == nil { 2354 panic("stub ObjectInfo") 2355 } 2356 return oi.fs 2357 } 2358 2359 // String returns string representation 2360 func (oi *ObjectInfo) String() string { 2361 return oi.src.String() 2362 } 2363 2364 // Storable returns whether object is storable 2365 func (oi *ObjectInfo) Storable() bool { 2366 return oi.src.Storable() 2367 } 2368 2369 // Remote returns the remote path 2370 func (oi *ObjectInfo) Remote() string { 2371 if oi.remote != "" { 2372 return oi.remote 2373 } 2374 return oi.src.Remote() 2375 } 2376 2377 // Size returns the size of the file 2378 func (oi *ObjectInfo) Size() int64 { 2379 if oi.size != -1 { 2380 return oi.size 2381 } 2382 return oi.src.Size() 2383 } 2384 2385 // ModTime returns the modification time 2386 func (oi *ObjectInfo) ModTime(ctx context.Context) time.Time { 2387 return oi.src.ModTime(ctx) 2388 } 2389 2390 // Hash returns the selected checksum of the wrapped file 2391 // It returns "" if no checksum is available or if this 2392 // info doesn't wrap the complete file. 2393 func (oi *ObjectInfo) Hash(ctx context.Context, hashType hash.Type) (string, error) { 2394 var errUnsupported error 2395 switch hashType { 2396 case hash.MD5: 2397 if oi.md5 != "" { 2398 return oi.md5, nil 2399 } 2400 case hash.SHA1: 2401 if oi.sha1 != "" { 2402 return oi.sha1, nil 2403 } 2404 default: 2405 errUnsupported = hash.ErrUnsupported 2406 } 2407 if oi.Size() != oi.src.Size() { 2408 // fail if this info wraps only a part of the file 2409 return "", errUnsupported 2410 } 2411 // chain to full source if possible 2412 value, err := oi.src.Hash(ctx, hashType) 2413 if err == hash.ErrUnsupported { 2414 return "", errUnsupported 2415 } 2416 return value, err 2417 } 2418 2419 // ID returns the ID of the Object if known, or "" if not 2420 func (o *Object) ID() string { 2421 if doer, ok := o.mainChunk().(fs.IDer); ok { 2422 return doer.ID() 2423 } 2424 return "" 2425 } 2426 2427 // Meta format `simplejson` 2428 type metaSimpleJSON struct { 2429 // required core fields 2430 Version *int `json:"ver"` 2431 Size *int64 `json:"size"` // total size of data chunks 2432 ChunkNum *int `json:"nchunks"` // number of data chunks 2433 // optional extra fields 2434 MD5 string `json:"md5,omitempty"` 2435 SHA1 string `json:"sha1,omitempty"` 2436 XactID string `json:"txn,omitempty"` // transaction ID for norename transactions 2437 } 2438 2439 // marshalSimpleJSON 2440 // 2441 // Current implementation creates metadata in three cases: 2442 // - for files larger than chunk size 2443 // - if file contents can be mistaken as meta object 2444 // - if consistent hashing is On but wrapped remote can't provide given hash 2445 func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1, xactID string) ([]byte, error) { 2446 version := metadataVersion 2447 if xactID == "" && version == 2 { 2448 version = 1 2449 } 2450 metadata := metaSimpleJSON{ 2451 // required core fields 2452 Version: &version, 2453 Size: &size, 2454 ChunkNum: &nChunks, 2455 // optional extra fields 2456 MD5: md5, 2457 SHA1: sha1, 2458 XactID: xactID, 2459 } 2460 data, err := json.Marshal(&metadata) 2461 if err == nil && data != nil && len(data) >= maxMetadataSizeWritten { 2462 // be a nitpicker, never produce something you can't consume 2463 return nil, errors.New("metadata can't be this big, please report to rclone developers") 2464 } 2465 return data, err 2466 } 2467 2468 // unmarshalSimpleJSON parses metadata. 2469 // 2470 // In case of errors returns a flag telling whether input has been 2471 // produced by incompatible version of rclone vs wasn't metadata at all. 2472 // Only metadata format version 1 is supported atm. 2473 // Future releases will transparently migrate older metadata objects. 2474 // New format will have a higher version number and cannot be correctly 2475 // handled by current implementation. 2476 // The version check below will then explicitly ask user to upgrade rclone. 2477 func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, madeByChunker bool, err error) { 2478 // Be strict about JSON format 2479 // to reduce possibility that a random small file resembles metadata. 2480 if len(data) > maxMetadataSizeWritten { 2481 return nil, false, ErrMetaTooBig 2482 } 2483 if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' { 2484 return nil, false, errors.New("invalid json") 2485 } 2486 var metadata metaSimpleJSON 2487 err = json.Unmarshal(data, &metadata) 2488 if err != nil { 2489 return nil, false, err 2490 } 2491 // Basic fields are strictly required 2492 // to reduce possibility that a random small file resembles metadata. 2493 if metadata.Version == nil || metadata.Size == nil || metadata.ChunkNum == nil { 2494 return nil, false, errors.New("missing required field") 2495 } 2496 // Perform strict checks, avoid corruption of future metadata formats. 2497 if *metadata.Version < 1 { 2498 return nil, false, errors.New("wrong version") 2499 } 2500 if *metadata.Size < 0 { 2501 return nil, false, errors.New("negative file size") 2502 } 2503 if *metadata.ChunkNum < 0 { 2504 return nil, false, errors.New("negative number of chunks") 2505 } 2506 if *metadata.ChunkNum > maxSafeChunkNumber { 2507 return nil, true, ErrChunkOverflow // produced by incompatible version of rclone 2508 } 2509 if metadata.MD5 != "" { 2510 _, err = hex.DecodeString(metadata.MD5) 2511 if len(metadata.MD5) != 32 || err != nil { 2512 return nil, false, errors.New("wrong md5 hash") 2513 } 2514 } 2515 if metadata.SHA1 != "" { 2516 _, err = hex.DecodeString(metadata.SHA1) 2517 if len(metadata.SHA1) != 40 || err != nil { 2518 return nil, false, errors.New("wrong sha1 hash") 2519 } 2520 } 2521 // ChunkNum is allowed to be 0 in future versions 2522 if *metadata.ChunkNum < 1 && *metadata.Version <= metadataVersion { 2523 return nil, false, errors.New("wrong number of chunks") 2524 } 2525 // Non-strict mode also accepts future metadata versions 2526 if *metadata.Version > metadataVersion { 2527 return nil, true, ErrMetaUnknown // produced by incompatible version of rclone 2528 } 2529 2530 var nilFs *Fs // nil object triggers appropriate type method 2531 info = nilFs.wrapInfo(metaObject, "", *metadata.Size) 2532 info.nChunks = *metadata.ChunkNum 2533 info.md5 = metadata.MD5 2534 info.sha1 = metadata.SHA1 2535 info.xactID = metadata.XactID 2536 return info, true, nil 2537 } 2538 2539 func silentlyRemove(ctx context.Context, o fs.Object) { 2540 _ = o.Remove(ctx) // ignore error 2541 } 2542 2543 // Name of the remote (as passed into NewFs) 2544 func (f *Fs) Name() string { 2545 return f.name 2546 } 2547 2548 // Root of the remote (as passed into NewFs) 2549 func (f *Fs) Root() string { 2550 return f.root 2551 } 2552 2553 // Features returns the optional features of this Fs 2554 func (f *Fs) Features() *fs.Features { 2555 return f.features 2556 } 2557 2558 // String returns a description of the FS 2559 func (f *Fs) String() string { 2560 return fmt.Sprintf("Chunked '%s:%s'", f.name, f.root) 2561 } 2562 2563 // Precision returns the precision of this Fs 2564 func (f *Fs) Precision() time.Duration { 2565 return f.base.Precision() 2566 } 2567 2568 // CanQuickRename returns true if the Fs supports a quick rename operation 2569 func (f *Fs) CanQuickRename() bool { 2570 return f.base.Features().Move != nil 2571 } 2572 2573 // Check the interfaces are satisfied 2574 var ( 2575 _ fs.Fs = (*Fs)(nil) 2576 _ fs.Purger = (*Fs)(nil) 2577 _ fs.Copier = (*Fs)(nil) 2578 _ fs.Mover = (*Fs)(nil) 2579 _ fs.DirMover = (*Fs)(nil) 2580 _ fs.DirSetModTimer = (*Fs)(nil) 2581 _ fs.MkdirMetadataer = (*Fs)(nil) 2582 _ fs.PutUncheckeder = (*Fs)(nil) 2583 _ fs.PutStreamer = (*Fs)(nil) 2584 _ fs.CleanUpper = (*Fs)(nil) 2585 _ fs.UnWrapper = (*Fs)(nil) 2586 _ fs.ListRer = (*Fs)(nil) 2587 _ fs.Abouter = (*Fs)(nil) 2588 _ fs.Wrapper = (*Fs)(nil) 2589 _ fs.ChangeNotifier = (*Fs)(nil) 2590 _ fs.Shutdowner = (*Fs)(nil) 2591 _ fs.ObjectInfo = (*ObjectInfo)(nil) 2592 _ fs.Object = (*Object)(nil) 2593 _ fs.ObjectUnWrapper = (*Object)(nil) 2594 _ fs.IDer = (*Object)(nil) 2595 )