github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/backend/chunker/chunker.go (about) 1 // Package chunker provides wrappers for Fs and Object which split large files in chunks 2 package chunker 3 4 import ( 5 "bytes" 6 "context" 7 "crypto/md5" 8 "crypto/sha1" 9 "encoding/hex" 10 "encoding/json" 11 "fmt" 12 gohash "hash" 13 "io" 14 "io/ioutil" 15 "math/rand" 16 "path" 17 "regexp" 18 "sort" 19 "strconv" 20 "strings" 21 "sync" 22 "time" 23 24 "github.com/pkg/errors" 25 "github.com/rclone/rclone/fs" 26 "github.com/rclone/rclone/fs/accounting" 27 "github.com/rclone/rclone/fs/config/configmap" 28 "github.com/rclone/rclone/fs/config/configstruct" 29 "github.com/rclone/rclone/fs/fspath" 30 "github.com/rclone/rclone/fs/hash" 31 "github.com/rclone/rclone/fs/operations" 32 ) 33 34 // 35 // Chunker's composite files have one or more chunks 36 // and optional metadata object. If it's present, 37 // meta object is named after the original file. 38 // 39 // The only supported metadata format is simplejson atm. 40 // It supports only per-file meta objects that are rudimentary, 41 // used mostly for consistency checks (lazily for performance reasons). 42 // Other formats can be developed that use an external meta store 43 // free of these limitations, but this needs some support from 44 // rclone core (eg. metadata store interfaces). 45 // 46 // The following types of chunks are supported: 47 // data and control, active and temporary. 48 // Chunk type is identified by matching chunk file name 49 // based on the chunk name format configured by user. 50 // 51 // Both data and control chunks can be either temporary (aka hidden) 52 // or active (non-temporary aka normal aka permanent). 53 // An operation creates temporary chunks while it runs. 54 // By completion it removes temporary and leaves active chunks. 55 // 56 // Temporary chunks have a special hardcoded suffix in addition 57 // to the configured name pattern. 58 // Temporary suffix includes so called transaction identifier 59 // (abbreviated as `xactID` below), a generic non-negative base-36 "number" 60 // used by parallel operations to share a composite object. 61 // Chunker also accepts the longer decimal temporary suffix (obsolete), 62 // which is transparently converted to the new format. In its maximum 63 // length of 13 decimals it makes a 7-digit base-36 number. 64 // 65 // Chunker can tell data chunks from control chunks by the characters 66 // located in the "hash placeholder" position of configured format. 67 // Data chunks have decimal digits there. 68 // Control chunks have in that position a short lowercase alphanumeric 69 // string (starting with a letter) prepended by underscore. 70 // 71 // Metadata format v1 does not define any control chunk types, 72 // they are currently ignored aka reserved. 73 // In future they can be used to implement resumable uploads etc. 74 // 75 const ( 76 ctrlTypeRegStr = `[a-z][a-z0-9]{2,6}` 77 tempSuffixFormat = `_%04s` 78 tempSuffixRegStr = `_([0-9a-z]{4,9})` 79 tempSuffixRegOld = `\.\.tmp_([0-9]{10,13})` 80 ) 81 82 var ( 83 // regular expressions to validate control type and temporary suffix 84 ctrlTypeRegexp = regexp.MustCompile(`^` + ctrlTypeRegStr + `$`) 85 tempSuffixRegexp = regexp.MustCompile(`^` + tempSuffixRegStr + `$`) 86 ) 87 88 // Normally metadata is a small piece of JSON (about 100-300 bytes). 89 // The size of valid metadata must never exceed this limit. 90 // Current maximum provides a reasonable room for future extensions. 91 // 92 // Please refrain from increasing it, this can cause old rclone versions 93 // to fail, or worse, treat meta object as a normal file (see NewObject). 94 // If more room is needed please bump metadata version forcing previous 95 // releases to ask for upgrade, and offload extra info to a control chunk. 96 // 97 // And still chunker's primary function is to chunk large files 98 // rather than serve as a generic metadata container. 99 const maxMetadataSize = 255 100 101 // Current/highest supported metadata format. 102 const metadataVersion = 1 103 104 // optimizeFirstChunk enables the following optimization in the Put: 105 // If a single chunk is expected, put the first chunk using the 106 // base target name instead of a temporary name, thus avoiding 107 // extra rename operation. 108 // Warning: this optimization is not transaction safe. 109 const optimizeFirstChunk = false 110 111 // revealHidden is a stub until chunker lands the `reveal hidden` option. 112 const revealHidden = false 113 114 // Prevent memory overflow due to specially crafted chunk name 115 const maxSafeChunkNumber = 10000000 116 117 // Number of attempts to find unique transaction identifier 118 const maxTransactionProbes = 100 119 120 // standard chunker errors 121 var ( 122 ErrChunkOverflow = errors.New("chunk number overflow") 123 ) 124 125 // variants of baseMove's parameter delMode 126 const ( 127 delNever = 0 // don't delete, just move 128 delAlways = 1 // delete destination before moving 129 delFailed = 2 // move, then delete and try again if failed 130 ) 131 132 // Register with Fs 133 func init() { 134 fs.Register(&fs.RegInfo{ 135 Name: "chunker", 136 Description: "Transparently chunk/split large files", 137 NewFs: NewFs, 138 Options: []fs.Option{{ 139 Name: "remote", 140 Required: true, 141 Help: `Remote to chunk/unchunk. 142 Normally should contain a ':' and a path, eg "myremote:path/to/dir", 143 "myremote:bucket" or maybe "myremote:" (not recommended).`, 144 }, { 145 Name: "chunk_size", 146 Advanced: false, 147 Default: fs.SizeSuffix(2147483648), // 2GB 148 Help: `Files larger than chunk size will be split in chunks.`, 149 }, { 150 Name: "name_format", 151 Advanced: true, 152 Default: `*.rclone_chunk.###`, 153 Help: `String format of chunk file names. 154 The two placeholders are: base file name (*) and chunk number (#...). 155 There must be one and only one asterisk and one or more consecutive hash characters. 156 If chunk number has less digits than the number of hashes, it is left-padded by zeros. 157 If there are more digits in the number, they are left as is. 158 Possible chunk files are ignored if their name does not match given format.`, 159 }, { 160 Name: "start_from", 161 Advanced: true, 162 Default: 1, 163 Help: `Minimum valid chunk number. Usually 0 or 1. 164 By default chunk numbers start from 1.`, 165 }, { 166 Name: "meta_format", 167 Advanced: true, 168 Default: "simplejson", 169 Help: `Format of the metadata object or "none". By default "simplejson". 170 Metadata is a small JSON file named after the composite file.`, 171 Examples: []fs.OptionExample{{ 172 Value: "none", 173 Help: `Do not use metadata files at all. Requires hash type "none".`, 174 }, { 175 Value: "simplejson", 176 Help: `Simple JSON supports hash sums and chunk validation. 177 It has the following fields: ver, size, nchunks, md5, sha1.`, 178 }}, 179 }, { 180 Name: "hash_type", 181 Advanced: false, 182 Default: "md5", 183 Help: `Choose how chunker handles hash sums. All modes but "none" require metadata.`, 184 Examples: []fs.OptionExample{{ 185 Value: "none", 186 Help: `Pass any hash supported by wrapped remote for non-chunked files, return nothing otherwise`, 187 }, { 188 Value: "md5", 189 Help: `MD5 for composite files`, 190 }, { 191 Value: "sha1", 192 Help: `SHA1 for composite files`, 193 }, { 194 Value: "md5all", 195 Help: `MD5 for all files`, 196 }, { 197 Value: "sha1all", 198 Help: `SHA1 for all files`, 199 }, { 200 Value: "md5quick", 201 Help: `Copying a file to chunker will request MD5 from the source falling back to SHA1 if unsupported`, 202 }, { 203 Value: "sha1quick", 204 Help: `Similar to "md5quick" but prefers SHA1 over MD5`, 205 }}, 206 }, { 207 Name: "fail_hard", 208 Advanced: true, 209 Default: false, 210 Help: `Choose how chunker should handle files with missing or invalid chunks.`, 211 Examples: []fs.OptionExample{ 212 { 213 Value: "true", 214 Help: "Report errors and abort current command.", 215 }, { 216 Value: "false", 217 Help: "Warn user, skip incomplete file and proceed.", 218 }, 219 }, 220 }}, 221 }) 222 } 223 224 // NewFs constructs an Fs from the path, container:path 225 func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) { 226 // Parse config into Options struct 227 opt := new(Options) 228 err := configstruct.Set(m, opt) 229 if err != nil { 230 return nil, err 231 } 232 if opt.StartFrom < 0 { 233 return nil, errors.New("start_from must be non-negative") 234 } 235 236 remote := opt.Remote 237 if strings.HasPrefix(remote, name+":") { 238 return nil, errors.New("can't point remote at itself - check the value of the remote setting") 239 } 240 241 baseInfo, baseName, basePath, baseConfig, err := fs.ConfigFs(remote) 242 if err != nil { 243 return nil, errors.Wrapf(err, "failed to parse remote %q to wrap", remote) 244 } 245 // Look for a file first 246 remotePath := fspath.JoinRootPath(basePath, rpath) 247 baseFs, err := baseInfo.NewFs(baseName, remotePath, baseConfig) 248 if err != fs.ErrorIsFile && err != nil { 249 return nil, errors.Wrapf(err, "failed to make remote %s:%q to wrap", baseName, remotePath) 250 } 251 if !operations.CanServerSideMove(baseFs) { 252 return nil, errors.New("can't use chunker on a backend which doesn't support server side move or copy") 253 } 254 255 f := &Fs{ 256 base: baseFs, 257 name: name, 258 root: rpath, 259 opt: *opt, 260 } 261 f.dirSort = true // processEntries requires that meta Objects prerun data chunks atm. 262 263 if err := f.configure(opt.NameFormat, opt.MetaFormat, opt.HashType); err != nil { 264 return nil, err 265 } 266 267 // Handle the tricky case detected by FsMkdir/FsPutFiles/FsIsFile 268 // when `rpath` points to a composite multi-chunk file without metadata, 269 // i.e. `rpath` does not exist in the wrapped remote, but chunker 270 // detects a composite file because it finds the first chunk! 271 // (yet can't satisfy fstest.CheckListing, will ignore) 272 if err == nil && !f.useMeta && strings.Contains(rpath, "/") { 273 firstChunkPath := f.makeChunkName(remotePath, 0, "", "") 274 _, testErr := baseInfo.NewFs(baseName, firstChunkPath, baseConfig) 275 if testErr == fs.ErrorIsFile { 276 err = testErr 277 } 278 } 279 280 // Note 1: the features here are ones we could support, and they are 281 // ANDed with the ones from wrappedFs. 282 // Note 2: features.Fill() points features.PutStream to our PutStream, 283 // but features.Mask() will nullify it if wrappedFs does not have it. 284 f.features = (&fs.Features{ 285 CaseInsensitive: true, 286 DuplicateFiles: true, 287 ReadMimeType: true, 288 WriteMimeType: true, 289 BucketBased: true, 290 CanHaveEmptyDirectories: true, 291 ServerSideAcrossConfigs: true, 292 }).Fill(f).Mask(baseFs).WrapsFs(f, baseFs) 293 294 return f, err 295 } 296 297 // Options defines the configuration for this backend 298 type Options struct { 299 Remote string `config:"remote"` 300 ChunkSize fs.SizeSuffix `config:"chunk_size"` 301 NameFormat string `config:"name_format"` 302 StartFrom int `config:"start_from"` 303 MetaFormat string `config:"meta_format"` 304 HashType string `config:"hash_type"` 305 FailHard bool `config:"fail_hard"` 306 } 307 308 // Fs represents a wrapped fs.Fs 309 type Fs struct { 310 name string 311 root string 312 base fs.Fs // remote wrapped by chunker overlay 313 wrapper fs.Fs // wrapper is used by SetWrapper 314 useMeta bool // false if metadata format is 'none' 315 useMD5 bool // mutually exclusive with useSHA1 316 useSHA1 bool // mutually exclusive with useMD5 317 hashFallback bool // allows fallback from MD5 to SHA1 and vice versa 318 hashAll bool // hash all files, mutually exclusive with hashFallback 319 dataNameFmt string // name format of data chunks 320 ctrlNameFmt string // name format of control chunks 321 nameRegexp *regexp.Regexp // regular expression to match chunk names 322 xactIDRand *rand.Rand // generator of random transaction identifiers 323 xactIDMutex sync.Mutex // mutex for the source of randomness 324 opt Options // copy of Options 325 features *fs.Features // optional features 326 dirSort bool // reserved for future, ignored 327 } 328 329 // configure sets up chunker for given name format, meta format and hash type. 330 // It also seeds the source of random transaction identifiers. 331 // configure must be called only from NewFs or by unit tests. 332 func (f *Fs) configure(nameFormat, metaFormat, hashType string) error { 333 if err := f.setChunkNameFormat(nameFormat); err != nil { 334 return errors.Wrapf(err, "invalid name format '%s'", nameFormat) 335 } 336 if err := f.setMetaFormat(metaFormat); err != nil { 337 return err 338 } 339 if err := f.setHashType(hashType); err != nil { 340 return err 341 } 342 343 randomSeed := time.Now().UnixNano() 344 f.xactIDRand = rand.New(rand.NewSource(randomSeed)) 345 346 return nil 347 } 348 349 func (f *Fs) setMetaFormat(metaFormat string) error { 350 switch metaFormat { 351 case "none": 352 f.useMeta = false 353 case "simplejson": 354 f.useMeta = true 355 default: 356 return fmt.Errorf("unsupported meta format '%s'", metaFormat) 357 } 358 return nil 359 } 360 361 // setHashType 362 // must be called *after* setMetaFormat. 363 // 364 // In the "All" mode chunker will force metadata on all files 365 // if the wrapped remote can't provide given hashsum. 366 func (f *Fs) setHashType(hashType string) error { 367 f.useMD5 = false 368 f.useSHA1 = false 369 f.hashFallback = false 370 f.hashAll = false 371 requireMetaHash := true 372 373 switch hashType { 374 case "none": 375 requireMetaHash = false 376 case "md5": 377 f.useMD5 = true 378 case "sha1": 379 f.useSHA1 = true 380 case "md5quick": 381 f.useMD5 = true 382 f.hashFallback = true 383 case "sha1quick": 384 f.useSHA1 = true 385 f.hashFallback = true 386 case "md5all": 387 f.useMD5 = true 388 f.hashAll = !f.base.Hashes().Contains(hash.MD5) 389 case "sha1all": 390 f.useSHA1 = true 391 f.hashAll = !f.base.Hashes().Contains(hash.SHA1) 392 default: 393 return fmt.Errorf("unsupported hash type '%s'", hashType) 394 } 395 if requireMetaHash && !f.useMeta { 396 return fmt.Errorf("hash type '%s' requires compatible meta format", hashType) 397 } 398 return nil 399 } 400 401 // setChunkNameFormat converts pattern based chunk name format 402 // into Printf format and Regular expressions for data and 403 // control chunks. 404 func (f *Fs) setChunkNameFormat(pattern string) error { 405 // validate pattern 406 if strings.Count(pattern, "*") != 1 { 407 return errors.New("pattern must have exactly one asterisk (*)") 408 } 409 numDigits := strings.Count(pattern, "#") 410 if numDigits < 1 { 411 return errors.New("pattern must have a hash character (#)") 412 } 413 if strings.Index(pattern, "*") > strings.Index(pattern, "#") { 414 return errors.New("asterisk (*) in pattern must come before hashes (#)") 415 } 416 if ok, _ := regexp.MatchString("^[^#]*[#]+[^#]*$", pattern); !ok { 417 return errors.New("hashes (#) in pattern must be consecutive") 418 } 419 if dir, _ := path.Split(pattern); dir != "" { 420 return errors.New("directory separator prohibited") 421 } 422 if pattern[0] != '*' { 423 return errors.New("pattern must start with asterisk") // to be lifted later 424 } 425 426 // craft a unified regular expression for all types of chunks 427 reHashes := regexp.MustCompile("[#]+") 428 reDigits := "[0-9]+" 429 if numDigits > 1 { 430 reDigits = fmt.Sprintf("[0-9]{%d,}", numDigits) 431 } 432 reDataOrCtrl := fmt.Sprintf("(?:(%s)|_(%s))", reDigits, ctrlTypeRegStr) 433 434 // this must be non-greedy or else it could eat up temporary suffix 435 const mainNameRegStr = "(.+?)" 436 437 strRegex := regexp.QuoteMeta(pattern) 438 strRegex = reHashes.ReplaceAllLiteralString(strRegex, reDataOrCtrl) 439 strRegex = strings.Replace(strRegex, "\\*", mainNameRegStr, -1) 440 strRegex = fmt.Sprintf("^%s(?:%s|%s)?$", strRegex, tempSuffixRegStr, tempSuffixRegOld) 441 f.nameRegexp = regexp.MustCompile(strRegex) 442 443 // craft printf formats for active data/control chunks 444 fmtDigits := "%d" 445 if numDigits > 1 { 446 fmtDigits = fmt.Sprintf("%%0%dd", numDigits) 447 } 448 strFmt := strings.Replace(pattern, "%", "%%", -1) 449 strFmt = strings.Replace(strFmt, "*", "%s", 1) 450 f.dataNameFmt = reHashes.ReplaceAllLiteralString(strFmt, fmtDigits) 451 f.ctrlNameFmt = reHashes.ReplaceAllLiteralString(strFmt, "_%s") 452 return nil 453 } 454 455 // makeChunkName produces chunk name (or path) for a given file. 456 // 457 // filePath can be name, relative or absolute path of main file. 458 // 459 // chunkNo must be a zero based index of data chunk. 460 // Negative chunkNo eg. -1 indicates a control chunk. 461 // ctrlType is type of control chunk (must be valid). 462 // ctrlType must be "" for data chunks. 463 // 464 // xactID is a transaction identifier. Empty xactID denotes active chunk, 465 // otherwise temporary chunk name is produced. 466 // 467 func (f *Fs) makeChunkName(filePath string, chunkNo int, ctrlType, xactID string) string { 468 dir, parentName := path.Split(filePath) 469 var name, tempSuffix string 470 switch { 471 case chunkNo >= 0 && ctrlType == "": 472 name = fmt.Sprintf(f.dataNameFmt, parentName, chunkNo+f.opt.StartFrom) 473 case chunkNo < 0 && ctrlTypeRegexp.MatchString(ctrlType): 474 name = fmt.Sprintf(f.ctrlNameFmt, parentName, ctrlType) 475 default: 476 panic("makeChunkName: invalid argument") // must not produce something we can't consume 477 } 478 if xactID != "" { 479 tempSuffix = fmt.Sprintf(tempSuffixFormat, xactID) 480 if !tempSuffixRegexp.MatchString(tempSuffix) { 481 panic("makeChunkName: invalid argument") 482 } 483 } 484 return dir + name + tempSuffix 485 } 486 487 // parseChunkName checks whether given file path belongs to 488 // a chunk and extracts chunk name parts. 489 // 490 // filePath can be name, relative or absolute path of a file. 491 // 492 // Returned parentPath is path of the composite file owning the chunk. 493 // It's a non-empty string if valid chunk name is detected 494 // or "" if it's not a chunk. 495 // Other returned values depend on detected chunk type: 496 // data or control, active or temporary: 497 // 498 // data chunk - the returned chunkNo is non-negative and ctrlType is "" 499 // control chunk - the chunkNo is -1 and ctrlType is a non-empty string 500 // active chunk - the returned xactID is "" 501 // temporary chunk - the xactID is a non-empty string 502 func (f *Fs) parseChunkName(filePath string) (parentPath string, chunkNo int, ctrlType, xactID string) { 503 dir, name := path.Split(filePath) 504 match := f.nameRegexp.FindStringSubmatch(name) 505 if match == nil || match[1] == "" { 506 return "", -1, "", "" 507 } 508 var err error 509 510 chunkNo = -1 511 if match[2] != "" { 512 if chunkNo, err = strconv.Atoi(match[2]); err != nil { 513 chunkNo = -1 514 } 515 if chunkNo -= f.opt.StartFrom; chunkNo < 0 { 516 fs.Infof(f, "invalid data chunk number in file %q", name) 517 return "", -1, "", "" 518 } 519 } 520 521 if match[4] != "" { 522 xactID = match[4] 523 } 524 if match[5] != "" { 525 // old-style temporary suffix 526 number, err := strconv.ParseInt(match[5], 10, 64) 527 if err != nil || number < 0 { 528 fs.Infof(f, "invalid old-style transaction number in file %q", name) 529 return "", -1, "", "" 530 } 531 // convert old-style transaction number to base-36 transaction ID 532 xactID = fmt.Sprintf(tempSuffixFormat, strconv.FormatInt(number, 36)) 533 xactID = xactID[1:] // strip leading underscore 534 } 535 536 parentPath = dir + match[1] 537 ctrlType = match[3] 538 return 539 } 540 541 // forbidChunk prints error message or raises error if file is chunk. 542 // First argument sets log prefix, use `false` to suppress message. 543 func (f *Fs) forbidChunk(o interface{}, filePath string) error { 544 if parentPath, _, _, _ := f.parseChunkName(filePath); parentPath != "" { 545 if f.opt.FailHard { 546 return fmt.Errorf("chunk overlap with %q", parentPath) 547 } 548 if boolVal, isBool := o.(bool); !isBool || boolVal { 549 fs.Errorf(o, "chunk overlap with %q", parentPath) 550 } 551 } 552 return nil 553 } 554 555 // newXactID produces a sufficiently random transaction identifier. 556 // 557 // The temporary suffix mask allows identifiers consisting of 4-9 558 // base-36 digits (ie. digits 0-9 or lowercase letters a-z). 559 // The identifiers must be unique between transactions running on 560 // the single file in parallel. 561 // 562 // Currently the function produces 6-character identifiers. 563 // Together with underscore this makes a 7-character temporary suffix. 564 // 565 // The first 4 characters isolate groups of transactions by time intervals. 566 // The maximum length of interval is base-36 "zzzz" ie. 1,679,615 seconds. 567 // The function rather takes a maximum prime closest to this number 568 // (see https://primes.utm.edu) as the interval length to better safeguard 569 // against repeating pseudo-random sequences in cases when rclone is 570 // invoked from a periodic scheduler like unix cron. 571 // Thus, the interval is slightly more than 19 days 10 hours 33 minutes. 572 // 573 // The remaining 2 base-36 digits (in the range from 0 to 1295 inclusive) 574 // are taken from the local random source. 575 // This provides about 0.1% collision probability for two parallel 576 // operations started at the same second and working on the same file. 577 // 578 // Non-empty filePath argument enables probing for existing temporary chunk 579 // to further eliminate collisions. 580 func (f *Fs) newXactID(ctx context.Context, filePath string) (xactID string, err error) { 581 const closestPrimeZzzzSeconds = 1679609 582 const maxTwoBase36Digits = 1295 583 584 unixSec := time.Now().Unix() 585 if unixSec < 0 { 586 unixSec = -unixSec // unlikely but the number must be positive 587 } 588 circleSec := unixSec % closestPrimeZzzzSeconds 589 first4chars := strconv.FormatInt(circleSec, 36) 590 591 for tries := 0; tries < maxTransactionProbes; tries++ { 592 f.xactIDMutex.Lock() 593 randomness := f.xactIDRand.Int63n(maxTwoBase36Digits + 1) 594 f.xactIDMutex.Unlock() 595 596 last2chars := strconv.FormatInt(randomness, 36) 597 xactID = fmt.Sprintf("%04s%02s", first4chars, last2chars) 598 599 if filePath == "" { 600 return 601 } 602 probeChunk := f.makeChunkName(filePath, 0, "", xactID) 603 _, probeErr := f.base.NewObject(ctx, probeChunk) 604 if probeErr != nil { 605 return 606 } 607 } 608 609 return "", fmt.Errorf("can't setup transaction for %s", filePath) 610 } 611 612 // List the objects and directories in dir into entries. 613 // The entries can be returned in any order but should be 614 // for a complete directory. 615 // 616 // dir should be "" to list the root, and should not have 617 // trailing slashes. 618 // 619 // This should return ErrDirNotFound if the directory isn't found. 620 // 621 // Commands normally cleanup all temporary chunks in case of a failure. 622 // However, if rclone dies unexpectedly, it can leave behind a bunch of 623 // hidden temporary chunks. List and its underlying chunkEntries() 624 // silently skip all temporary chunks in the directory. It's okay if 625 // they belong to an unfinished command running in parallel. 626 // 627 // However, there is no way to discover dead temporary chunks atm. 628 // As a workaround users can use `purge` to forcibly remove the whole 629 // directory together with dead chunks. 630 // In future a flag named like `--chunker-list-hidden` may be added to 631 // rclone that will tell List to reveal hidden chunks. 632 // 633 func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { 634 entries, err = f.base.List(ctx, dir) 635 if err != nil { 636 return nil, err 637 } 638 return f.processEntries(ctx, entries, dir) 639 } 640 641 // ListR lists the objects and directories of the Fs starting 642 // from dir recursively into out. 643 // 644 // dir should be "" to start from the root, and should not 645 // have trailing slashes. 646 // 647 // This should return ErrDirNotFound if the directory isn't 648 // found. 649 // 650 // It should call callback for each tranche of entries read. 651 // These need not be returned in any particular order. If 652 // callback returns an error then the listing will stop 653 // immediately. 654 // 655 // Don't implement this unless you have a more efficient way 656 // of listing recursively than doing a directory traversal. 657 func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) { 658 do := f.base.Features().ListR 659 return do(ctx, dir, func(entries fs.DirEntries) error { 660 newEntries, err := f.processEntries(ctx, entries, dir) 661 if err != nil { 662 return err 663 } 664 return callback(newEntries) 665 }) 666 } 667 668 // processEntries assembles chunk entries into composite entries 669 func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirPath string) (newEntries fs.DirEntries, err error) { 670 var sortedEntries fs.DirEntries 671 if f.dirSort { 672 // sort entries so that meta objects go before their chunks 673 sortedEntries = make(fs.DirEntries, len(origEntries)) 674 copy(sortedEntries, origEntries) 675 sort.Sort(sortedEntries) 676 } else { 677 sortedEntries = origEntries 678 } 679 680 byRemote := make(map[string]*Object) 681 badEntry := make(map[string]bool) 682 isSubdir := make(map[string]bool) 683 684 var tempEntries fs.DirEntries 685 for _, dirOrObject := range sortedEntries { 686 switch entry := dirOrObject.(type) { 687 case fs.Object: 688 remote := entry.Remote() 689 if mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(remote); mainRemote != "" { 690 if xactID != "" { 691 if revealHidden { 692 fs.Infof(f, "ignore temporary chunk %q", remote) 693 } 694 break 695 } 696 if ctrlType != "" { 697 if revealHidden { 698 fs.Infof(f, "ignore control chunk %q", remote) 699 } 700 break 701 } 702 mainObject := byRemote[mainRemote] 703 if mainObject == nil && f.useMeta { 704 fs.Debugf(f, "skip chunk %q without meta object", remote) 705 break 706 } 707 if mainObject == nil { 708 // useMeta is false - create chunked object without metadata 709 mainObject = f.newObject(mainRemote, nil, nil) 710 byRemote[mainRemote] = mainObject 711 if !badEntry[mainRemote] { 712 tempEntries = append(tempEntries, mainObject) 713 } 714 } 715 if err := mainObject.addChunk(entry, chunkNo); err != nil { 716 if f.opt.FailHard { 717 return nil, err 718 } 719 badEntry[mainRemote] = true 720 } 721 break 722 } 723 object := f.newObject("", entry, nil) 724 byRemote[remote] = object 725 tempEntries = append(tempEntries, object) 726 case fs.Directory: 727 isSubdir[entry.Remote()] = true 728 wrapDir := fs.NewDirCopy(ctx, entry) 729 wrapDir.SetRemote(entry.Remote()) 730 tempEntries = append(tempEntries, wrapDir) 731 default: 732 if f.opt.FailHard { 733 return nil, fmt.Errorf("Unknown object type %T", entry) 734 } 735 fs.Debugf(f, "unknown object type %T", entry) 736 } 737 } 738 739 for _, entry := range tempEntries { 740 if object, ok := entry.(*Object); ok { 741 remote := object.Remote() 742 if isSubdir[remote] { 743 if f.opt.FailHard { 744 return nil, fmt.Errorf("%q is both meta object and directory", remote) 745 } 746 badEntry[remote] = true // fall thru 747 } 748 if badEntry[remote] { 749 fs.Debugf(f, "invalid directory entry %q", remote) 750 continue 751 } 752 if err := object.validate(); err != nil { 753 if f.opt.FailHard { 754 return nil, err 755 } 756 fs.Debugf(f, "invalid chunks in object %q", remote) 757 continue 758 } 759 } 760 newEntries = append(newEntries, entry) 761 } 762 763 if f.dirSort { 764 sort.Sort(newEntries) 765 } 766 return newEntries, nil 767 } 768 769 // NewObject finds the Object at remote. 770 // 771 // Please note that every NewObject invocation will scan the whole directory. 772 // Using here something like fs.DirCache might improve performance 773 // (yet making the logic more complex). 774 // 775 // Note that chunker prefers analyzing file names rather than reading 776 // the content of meta object assuming that directory scans are fast 777 // but opening even a small file can be slow on some backends. 778 // 779 func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { 780 if err := f.forbidChunk(false, remote); err != nil { 781 return nil, errors.Wrap(err, "can't access") 782 } 783 784 var ( 785 o *Object 786 baseObj fs.Object 787 err error 788 ) 789 790 if f.useMeta { 791 baseObj, err = f.base.NewObject(ctx, remote) 792 if err != nil { 793 return nil, err 794 } 795 remote = baseObj.Remote() 796 797 // Chunker's meta object cannot be large and maxMetadataSize acts 798 // as a hard limit. Anything larger than that is treated as a 799 // non-chunked file without even checking its contents, so it's 800 // paramount to prevent metadata from exceeding the maximum size. 801 o = f.newObject("", baseObj, nil) 802 if o.size > maxMetadataSize { 803 return o, nil 804 } 805 } else { 806 // Metadata is disabled, hence this is either a multi-chunk 807 // composite file without meta object or a non-chunked file. 808 // Create an empty wrapper here, scan directory to determine 809 // which case it is and postpone reading if it's the latter one. 810 o = f.newObject(remote, nil, nil) 811 } 812 813 // If the object is small, it's probably a meta object. 814 // However, composite file must have data chunks besides it. 815 // Scan directory for possible data chunks now and decide later on. 816 dir := path.Dir(strings.TrimRight(remote, "/")) 817 if dir == "." { 818 dir = "" 819 } 820 entries, err := f.base.List(ctx, dir) 821 switch err { 822 case nil: 823 // OK, fall thru 824 case fs.ErrorDirNotFound: 825 entries = nil 826 default: 827 return nil, errors.Wrap(err, "can't detect composite file") 828 } 829 830 for _, dirOrObject := range entries { 831 entry, ok := dirOrObject.(fs.Object) 832 if !ok { 833 continue 834 } 835 entryRemote := entry.Remote() 836 if !strings.Contains(entryRemote, remote) { 837 continue // bypass regexp to save cpu 838 } 839 mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote) 840 if mainRemote == "" || mainRemote != remote || ctrlType != "" || xactID != "" { 841 continue // skip non-conforming, temporary and control chunks 842 } 843 //fs.Debugf(f, "%q belongs to %q as chunk %d", entryRemote, mainRemote, chunkNo) 844 if err := o.addChunk(entry, chunkNo); err != nil { 845 return nil, err 846 } 847 } 848 849 if o.main == nil && (o.chunks == nil || len(o.chunks) == 0) { 850 // Scanning hasn't found data chunks with conforming names. 851 if f.useMeta { 852 // Metadata is required but absent and there are no chunks. 853 return nil, fs.ErrorObjectNotFound 854 } 855 856 // Data chunks are not found and metadata is disabled. 857 // Thus, we are in the "latter case" from above. 858 // Let's try the postponed reading of a non-chunked file and add it 859 // as a single chunk to the empty composite wrapper created above 860 // with nil metadata. 861 baseObj, err = f.base.NewObject(ctx, remote) 862 if err == nil { 863 err = o.addChunk(baseObj, 0) 864 } 865 if err != nil { 866 return nil, err 867 } 868 } 869 870 // This is either a composite object with metadata or a non-chunked 871 // file without metadata. Validate it and update the total data size. 872 // As an optimization, skip metadata reading here - we will call 873 // readMetadata lazily when needed (reading can be expensive). 874 if err := o.validate(); err != nil { 875 return nil, err 876 } 877 return o, nil 878 } 879 880 func (o *Object) readMetadata(ctx context.Context) error { 881 if o.isFull { 882 return nil 883 } 884 if !o.isComposite() || !o.f.useMeta { 885 o.isFull = true 886 return nil 887 } 888 889 // validate metadata 890 metaObject := o.main 891 reader, err := metaObject.Open(ctx) 892 if err != nil { 893 return err 894 } 895 metadata, err := ioutil.ReadAll(reader) 896 _ = reader.Close() // ensure file handle is freed on windows 897 if err != nil { 898 return err 899 } 900 901 switch o.f.opt.MetaFormat { 902 case "simplejson": 903 metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metadata, true) 904 if err != nil { 905 return errors.Wrap(err, "invalid metadata") 906 } 907 if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks { 908 return errors.New("metadata doesn't match file size") 909 } 910 o.md5 = metaInfo.md5 911 o.sha1 = metaInfo.sha1 912 } 913 914 o.isFull = true 915 return nil 916 } 917 918 // put implements Put, PutStream, PutUnchecked, Update 919 func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote string, options []fs.OpenOption, basePut putFn) (obj fs.Object, err error) { 920 c := f.newChunkingReader(src) 921 wrapIn := c.wrapStream(ctx, in, src) 922 923 var metaObject fs.Object 924 defer func() { 925 if err != nil { 926 c.rollback(ctx, metaObject) 927 } 928 }() 929 930 baseRemote := remote 931 xactID, errXact := f.newXactID(ctx, baseRemote) 932 if errXact != nil { 933 return nil, errXact 934 } 935 936 // Transfer chunks data 937 for c.chunkNo = 0; !c.done; c.chunkNo++ { 938 if c.chunkNo > maxSafeChunkNumber { 939 return nil, ErrChunkOverflow 940 } 941 942 tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactID) 943 size := c.sizeLeft 944 if size > c.chunkSize { 945 size = c.chunkSize 946 } 947 savedReadCount := c.readCount 948 949 // If a single chunk is expected, avoid the extra rename operation 950 chunkRemote := tempRemote 951 if c.expectSingle && c.chunkNo == 0 && optimizeFirstChunk { 952 chunkRemote = baseRemote 953 } 954 info := f.wrapInfo(src, chunkRemote, size) 955 956 // TODO: handle range/limit options 957 chunk, errChunk := basePut(ctx, wrapIn, info, options...) 958 if errChunk != nil { 959 return nil, errChunk 960 } 961 962 if size > 0 && c.readCount == savedReadCount && c.expectSingle { 963 // basePut returned success but didn't call chunkingReader's Read. 964 // This is possible if wrapped remote has performed the put by hash 965 // because chunker bridges Hash from source for non-chunked files. 966 // Hence, force Read here to update accounting and hashsums. 967 if err := c.dummyRead(wrapIn, size); err != nil { 968 return nil, err 969 } 970 } 971 if c.sizeLeft == 0 && !c.done { 972 // The file has been apparently put by hash, force completion. 973 c.done = true 974 } 975 976 // Expected a single chunk but more to come, so name it as usual. 977 if !c.done && chunkRemote != tempRemote { 978 fs.Infof(chunk, "Expected single chunk, got more") 979 chunkMoved, errMove := f.baseMove(ctx, chunk, tempRemote, delFailed) 980 if errMove != nil { 981 silentlyRemove(ctx, chunk) 982 return nil, errMove 983 } 984 chunk = chunkMoved 985 } 986 987 // Wrapped remote may or may not have seen EOF from chunking reader, 988 // eg. the box multi-uploader reads exactly the chunk size specified 989 // and skips the "EOF" read. Hence, switch to next limit here. 990 if !(c.chunkLimit == 0 || c.chunkLimit == c.chunkSize || c.sizeTotal == -1 || c.done) { 991 silentlyRemove(ctx, chunk) 992 return nil, fmt.Errorf("Destination ignored %d data bytes", c.chunkLimit) 993 } 994 c.chunkLimit = c.chunkSize 995 996 c.chunks = append(c.chunks, chunk) 997 } 998 999 // Validate uploaded size 1000 if c.sizeTotal != -1 && c.readCount != c.sizeTotal { 1001 return nil, fmt.Errorf("Incorrect upload size %d != %d", c.readCount, c.sizeTotal) 1002 } 1003 1004 // Check for input that looks like valid metadata 1005 needMeta := len(c.chunks) > 1 1006 if c.readCount <= maxMetadataSize && len(c.chunks) == 1 { 1007 _, err := unmarshalSimpleJSON(ctx, c.chunks[0], c.smallHead, false) 1008 needMeta = err == nil 1009 } 1010 1011 // Finalize small object as non-chunked. 1012 // This can be bypassed, and single chunk with metadata will be 1013 // created if forced by consistent hashing or due to unsafe input. 1014 if !needMeta && !f.hashAll && f.useMeta { 1015 // If previous object was chunked, remove its chunks 1016 f.removeOldChunks(ctx, baseRemote) 1017 1018 // Rename single data chunk in place 1019 chunk := c.chunks[0] 1020 if chunk.Remote() != baseRemote { 1021 chunkMoved, errMove := f.baseMove(ctx, chunk, baseRemote, delAlways) 1022 if errMove != nil { 1023 silentlyRemove(ctx, chunk) 1024 return nil, errMove 1025 } 1026 chunk = chunkMoved 1027 } 1028 1029 return f.newObject("", chunk, nil), nil 1030 } 1031 1032 // Validate total size of data chunks 1033 var sizeTotal int64 1034 for _, chunk := range c.chunks { 1035 sizeTotal += chunk.Size() 1036 } 1037 if sizeTotal != c.readCount { 1038 return nil, fmt.Errorf("Incorrect chunks size %d != %d", sizeTotal, c.readCount) 1039 } 1040 1041 // If previous object was chunked, remove its chunks 1042 f.removeOldChunks(ctx, baseRemote) 1043 1044 // Rename data chunks from temporary to final names 1045 for chunkNo, chunk := range c.chunks { 1046 chunkRemote := f.makeChunkName(baseRemote, chunkNo, "", "") 1047 chunkMoved, errMove := f.baseMove(ctx, chunk, chunkRemote, delFailed) 1048 if errMove != nil { 1049 return nil, errMove 1050 } 1051 c.chunks[chunkNo] = chunkMoved 1052 } 1053 1054 if !f.useMeta { 1055 // Remove stale metadata, if any 1056 oldMeta, errOldMeta := f.base.NewObject(ctx, baseRemote) 1057 if errOldMeta == nil { 1058 silentlyRemove(ctx, oldMeta) 1059 } 1060 1061 o := f.newObject(baseRemote, nil, c.chunks) 1062 o.size = sizeTotal 1063 return o, nil 1064 } 1065 1066 // Update meta object 1067 var metadata []byte 1068 switch f.opt.MetaFormat { 1069 case "simplejson": 1070 c.updateHashes() 1071 metadata, err = marshalSimpleJSON(ctx, sizeTotal, len(c.chunks), c.md5, c.sha1) 1072 } 1073 if err == nil { 1074 metaInfo := f.wrapInfo(src, baseRemote, int64(len(metadata))) 1075 metaObject, err = basePut(ctx, bytes.NewReader(metadata), metaInfo) 1076 } 1077 if err != nil { 1078 return nil, err 1079 } 1080 1081 o := f.newObject("", metaObject, c.chunks) 1082 o.size = sizeTotal 1083 return o, nil 1084 } 1085 1086 type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) 1087 1088 type chunkingReader struct { 1089 baseReader io.Reader 1090 sizeTotal int64 1091 sizeLeft int64 1092 readCount int64 1093 chunkSize int64 1094 chunkLimit int64 1095 chunkNo int 1096 err error 1097 done bool 1098 chunks []fs.Object 1099 expectSingle bool 1100 smallHead []byte 1101 fs *Fs 1102 hasher gohash.Hash 1103 md5 string 1104 sha1 string 1105 } 1106 1107 func (f *Fs) newChunkingReader(src fs.ObjectInfo) *chunkingReader { 1108 c := &chunkingReader{ 1109 fs: f, 1110 chunkSize: int64(f.opt.ChunkSize), 1111 sizeTotal: src.Size(), 1112 } 1113 c.chunkLimit = c.chunkSize 1114 c.sizeLeft = c.sizeTotal 1115 c.expectSingle = c.sizeTotal >= 0 && c.sizeTotal <= c.chunkSize 1116 return c 1117 } 1118 1119 func (c *chunkingReader) wrapStream(ctx context.Context, in io.Reader, src fs.ObjectInfo) io.Reader { 1120 baseIn, wrapBack := accounting.UnWrap(in) 1121 1122 switch { 1123 case c.fs.useMD5: 1124 if c.md5, _ = src.Hash(ctx, hash.MD5); c.md5 == "" { 1125 if c.fs.hashFallback { 1126 c.sha1, _ = src.Hash(ctx, hash.SHA1) 1127 } else { 1128 c.hasher = md5.New() 1129 } 1130 } 1131 case c.fs.useSHA1: 1132 if c.sha1, _ = src.Hash(ctx, hash.SHA1); c.sha1 == "" { 1133 if c.fs.hashFallback { 1134 c.md5, _ = src.Hash(ctx, hash.MD5) 1135 } else { 1136 c.hasher = sha1.New() 1137 } 1138 } 1139 } 1140 1141 if c.hasher != nil { 1142 baseIn = io.TeeReader(baseIn, c.hasher) 1143 } 1144 c.baseReader = baseIn 1145 return wrapBack(c) 1146 } 1147 1148 func (c *chunkingReader) updateHashes() { 1149 if c.hasher == nil { 1150 return 1151 } 1152 switch { 1153 case c.fs.useMD5: 1154 c.md5 = hex.EncodeToString(c.hasher.Sum(nil)) 1155 case c.fs.useSHA1: 1156 c.sha1 = hex.EncodeToString(c.hasher.Sum(nil)) 1157 } 1158 } 1159 1160 // Note: Read is not called if wrapped remote performs put by hash. 1161 func (c *chunkingReader) Read(buf []byte) (bytesRead int, err error) { 1162 if c.chunkLimit <= 0 { 1163 // Chunk complete - switch to next one. 1164 // We might not get here because some remotes (eg. box multi-uploader) 1165 // read the specified size exactly and skip the concluding EOF Read. 1166 // Then a check in the put loop will kick in. 1167 c.chunkLimit = c.chunkSize 1168 return 0, io.EOF 1169 } 1170 if int64(len(buf)) > c.chunkLimit { 1171 buf = buf[0:c.chunkLimit] 1172 } 1173 bytesRead, err = c.baseReader.Read(buf) 1174 if err != nil && err != io.EOF { 1175 c.err = err 1176 c.done = true 1177 return 1178 } 1179 c.accountBytes(int64(bytesRead)) 1180 if c.chunkNo == 0 && c.expectSingle && bytesRead > 0 && c.readCount <= maxMetadataSize { 1181 c.smallHead = append(c.smallHead, buf[:bytesRead]...) 1182 } 1183 if bytesRead == 0 && c.sizeLeft == 0 { 1184 err = io.EOF // Force EOF when no data left. 1185 } 1186 if err == io.EOF { 1187 c.done = true 1188 } 1189 return 1190 } 1191 1192 func (c *chunkingReader) accountBytes(bytesRead int64) { 1193 c.readCount += bytesRead 1194 c.chunkLimit -= bytesRead 1195 if c.sizeLeft != -1 { 1196 c.sizeLeft -= bytesRead 1197 } 1198 } 1199 1200 // dummyRead updates accounting, hashsums etc by simulating reads 1201 func (c *chunkingReader) dummyRead(in io.Reader, size int64) error { 1202 if c.hasher == nil && c.readCount+size > maxMetadataSize { 1203 c.accountBytes(size) 1204 return nil 1205 } 1206 const bufLen = 1048576 // 1MB 1207 buf := make([]byte, bufLen) 1208 for size > 0 { 1209 n := size 1210 if n > bufLen { 1211 n = bufLen 1212 } 1213 if _, err := io.ReadFull(in, buf[0:n]); err != nil { 1214 return err 1215 } 1216 size -= n 1217 } 1218 return nil 1219 } 1220 1221 // rollback removes uploaded temporary chunks 1222 func (c *chunkingReader) rollback(ctx context.Context, metaObject fs.Object) { 1223 if metaObject != nil { 1224 c.chunks = append(c.chunks, metaObject) 1225 } 1226 for _, chunk := range c.chunks { 1227 if err := chunk.Remove(ctx); err != nil { 1228 fs.Errorf(chunk, "Failed to remove temporary chunk: %v", err) 1229 } 1230 } 1231 } 1232 1233 func (f *Fs) removeOldChunks(ctx context.Context, remote string) { 1234 oldFsObject, err := f.NewObject(ctx, remote) 1235 if err == nil { 1236 oldObject := oldFsObject.(*Object) 1237 for _, chunk := range oldObject.chunks { 1238 if err := chunk.Remove(ctx); err != nil { 1239 fs.Errorf(chunk, "Failed to remove old chunk: %v", err) 1240 } 1241 } 1242 } 1243 } 1244 1245 // Put into the remote path with the given modTime and size. 1246 // 1247 // May create the object even if it returns an error - if so 1248 // will return the object and the error, otherwise will return 1249 // nil and the error 1250 func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 1251 if err := f.forbidChunk(src, src.Remote()); err != nil { 1252 return nil, errors.Wrap(err, "refusing to put") 1253 } 1254 return f.put(ctx, in, src, src.Remote(), options, f.base.Put) 1255 } 1256 1257 // PutStream uploads to the remote path with the modTime given of indeterminate size 1258 func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 1259 if err := f.forbidChunk(src, src.Remote()); err != nil { 1260 return nil, errors.Wrap(err, "refusing to upload") 1261 } 1262 return f.put(ctx, in, src, src.Remote(), options, f.base.Features().PutStream) 1263 } 1264 1265 // Update in to the object with the modTime given of the given size 1266 func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { 1267 if err := o.f.forbidChunk(o, o.Remote()); err != nil { 1268 return errors.Wrap(err, "update refused") 1269 } 1270 if err := o.readMetadata(ctx); err != nil { 1271 // refuse to update a file of unsupported format 1272 return errors.Wrap(err, "refusing to update") 1273 } 1274 basePut := o.f.base.Put 1275 if src.Size() < 0 { 1276 basePut = o.f.base.Features().PutStream 1277 if basePut == nil { 1278 return errors.New("wrapped file system does not support streaming uploads") 1279 } 1280 } 1281 oNew, err := o.f.put(ctx, in, src, o.Remote(), options, basePut) 1282 if err == nil { 1283 *o = *oNew.(*Object) 1284 } 1285 return err 1286 } 1287 1288 // PutUnchecked uploads the object 1289 // 1290 // This will create a duplicate if we upload a new file without 1291 // checking to see if there is one already - use Put() for that. 1292 func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 1293 do := f.base.Features().PutUnchecked 1294 if do == nil { 1295 return nil, errors.New("can't PutUnchecked") 1296 } 1297 // TODO: handle range/limit options and really chunk stream here! 1298 o, err := do(ctx, in, f.wrapInfo(src, "", -1)) 1299 if err != nil { 1300 return nil, err 1301 } 1302 return f.newObject("", o, nil), nil 1303 } 1304 1305 // Hashes returns the supported hash sets. 1306 // Chunker advertises a hash type if and only if it can be calculated 1307 // for files of any size, non-chunked or composite. 1308 func (f *Fs) Hashes() hash.Set { 1309 // composites AND no fallback AND (chunker OR wrapped Fs will hash all non-chunked's) 1310 if f.useMD5 && !f.hashFallback && (f.hashAll || f.base.Hashes().Contains(hash.MD5)) { 1311 return hash.NewHashSet(hash.MD5) 1312 } 1313 if f.useSHA1 && !f.hashFallback && (f.hashAll || f.base.Hashes().Contains(hash.SHA1)) { 1314 return hash.NewHashSet(hash.SHA1) 1315 } 1316 return hash.NewHashSet() // can't provide strong guarantees 1317 } 1318 1319 // Mkdir makes the directory (container, bucket) 1320 // 1321 // Shouldn't return an error if it already exists 1322 func (f *Fs) Mkdir(ctx context.Context, dir string) error { 1323 if err := f.forbidChunk(dir, dir); err != nil { 1324 return errors.Wrap(err, "can't mkdir") 1325 } 1326 return f.base.Mkdir(ctx, dir) 1327 } 1328 1329 // Rmdir removes the directory (container, bucket) if empty 1330 // 1331 // Return an error if it doesn't exist or isn't empty 1332 func (f *Fs) Rmdir(ctx context.Context, dir string) error { 1333 return f.base.Rmdir(ctx, dir) 1334 } 1335 1336 // Purge all files in the root and the root directory 1337 // 1338 // Implement this if you have a way of deleting all the files 1339 // quicker than just running Remove() on the result of List() 1340 // 1341 // Return an error if it doesn't exist. 1342 // 1343 // This command will chain to `purge` from wrapped remote. 1344 // As a result it removes not only composite chunker files with their 1345 // active chunks but also all hidden temporary chunks in the directory. 1346 // 1347 func (f *Fs) Purge(ctx context.Context) error { 1348 do := f.base.Features().Purge 1349 if do == nil { 1350 return fs.ErrorCantPurge 1351 } 1352 return do(ctx) 1353 } 1354 1355 // Remove an object (chunks and metadata, if any) 1356 // 1357 // Remove deletes only active chunks of the composite object. 1358 // It does not try to look for temporary chunks because they could belong 1359 // to another command modifying this composite file in parallel. 1360 // 1361 // Commands normally cleanup all temporary chunks in case of a failure. 1362 // However, if rclone dies unexpectedly, it can leave hidden temporary 1363 // chunks, which cannot be discovered using the `list` command. 1364 // Remove does not try to search for such chunks or to delete them. 1365 // Sometimes this can lead to strange results eg. when `list` shows that 1366 // directory is empty but `rmdir` refuses to remove it because on the 1367 // level of wrapped remote it's actually *not* empty. 1368 // As a workaround users can use `purge` to forcibly remove it. 1369 // 1370 // In future, a flag `--chunker-delete-hidden` may be added which tells 1371 // Remove to search directory for hidden chunks and remove them too 1372 // (at the risk of breaking parallel commands). 1373 // 1374 // Remove is the only operation allowed on the composite files with 1375 // invalid or future metadata format. 1376 // We don't let user copy/move/update unsupported composite files. 1377 // Let's at least let her get rid of them, just complain loudly. 1378 // 1379 // This can litter directory with orphan chunks of unsupported types, 1380 // but as long as we remove meta object, even future releases will 1381 // treat the composite file as removed and refuse to act upon it. 1382 // 1383 // Disclaimer: corruption can still happen if unsupported file is removed 1384 // and then recreated with the same name. 1385 // Unsupported control chunks will get re-picked by a more recent 1386 // rclone version with unexpected results. This can be helped by 1387 // the `delete hidden` flag above or at least the user has been warned. 1388 // 1389 func (o *Object) Remove(ctx context.Context) (err error) { 1390 if err := o.f.forbidChunk(o, o.Remote()); err != nil { 1391 // operations.Move can still call Remove if chunker's Move refuses 1392 // to corrupt file in hard mode. Hence, refuse to Remove, too. 1393 return errors.Wrap(err, "refuse to corrupt") 1394 } 1395 if err := o.readMetadata(ctx); err != nil { 1396 // Proceed but warn user that unexpected things can happen. 1397 fs.Errorf(o, "Removing a file with unsupported metadata: %v", err) 1398 } 1399 1400 // Remove non-chunked file or meta object of a composite file. 1401 if o.main != nil { 1402 err = o.main.Remove(ctx) 1403 } 1404 1405 // Remove only active data chunks, ignore any temporary chunks that 1406 // might probably be created in parallel by other transactions. 1407 for _, chunk := range o.chunks { 1408 chunkErr := chunk.Remove(ctx) 1409 if err == nil { 1410 err = chunkErr 1411 } 1412 } 1413 1414 // There are no known control chunks to remove atm. 1415 return err 1416 } 1417 1418 // copyOrMove implements copy or move 1419 func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMoveFn, md5, sha1, opName string) (fs.Object, error) { 1420 if err := f.forbidChunk(o, remote); err != nil { 1421 return nil, errors.Wrapf(err, "can't %s", opName) 1422 } 1423 if !o.isComposite() { 1424 fs.Debugf(o, "%s non-chunked object...", opName) 1425 oResult, err := do(ctx, o.mainChunk(), remote) // chain operation to a single wrapped chunk 1426 if err != nil { 1427 return nil, err 1428 } 1429 return f.newObject("", oResult, nil), nil 1430 } 1431 if err := o.readMetadata(ctx); err != nil { 1432 // Refuse to copy/move composite files with invalid or future 1433 // metadata format which might involve unsupported chunk types. 1434 return nil, errors.Wrapf(err, "can't %s this file", opName) 1435 } 1436 1437 fs.Debugf(o, "%s %d data chunks...", opName, len(o.chunks)) 1438 mainRemote := o.remote 1439 var newChunks []fs.Object 1440 var err error 1441 1442 // Copy/move active data chunks. 1443 // Ignore possible temporary chunks being created by parallel operations. 1444 for _, chunk := range o.chunks { 1445 chunkRemote := chunk.Remote() 1446 if !strings.HasPrefix(chunkRemote, mainRemote) { 1447 err = fmt.Errorf("invalid chunk name %q", chunkRemote) 1448 break 1449 } 1450 chunkSuffix := chunkRemote[len(mainRemote):] 1451 chunkResult, err := do(ctx, chunk, remote+chunkSuffix) 1452 if err != nil { 1453 break 1454 } 1455 newChunks = append(newChunks, chunkResult) 1456 } 1457 1458 // Copy or move old metadata. 1459 // There are no known control chunks to move/copy atm. 1460 var metaObject fs.Object 1461 if err == nil && o.main != nil { 1462 metaObject, err = do(ctx, o.main, remote) 1463 } 1464 if err != nil { 1465 for _, chunk := range newChunks { 1466 silentlyRemove(ctx, chunk) 1467 } 1468 return nil, err 1469 } 1470 1471 // Create wrapping object, calculate and validate total size 1472 newObj := f.newObject(remote, metaObject, newChunks) 1473 err = newObj.validate() 1474 if err != nil { 1475 silentlyRemove(ctx, newObj) 1476 return nil, err 1477 } 1478 1479 // Update metadata 1480 var metadata []byte 1481 switch f.opt.MetaFormat { 1482 case "simplejson": 1483 metadata, err = marshalSimpleJSON(ctx, newObj.size, len(newChunks), md5, sha1) 1484 if err == nil { 1485 metaInfo := f.wrapInfo(metaObject, "", int64(len(metadata))) 1486 err = newObj.main.Update(ctx, bytes.NewReader(metadata), metaInfo) 1487 } 1488 case "none": 1489 if newObj.main != nil { 1490 err = newObj.main.Remove(ctx) 1491 } 1492 } 1493 1494 // Return the composite object 1495 if err != nil { 1496 silentlyRemove(ctx, newObj) 1497 return nil, err 1498 } 1499 return newObj, nil 1500 } 1501 1502 type copyMoveFn func(context.Context, fs.Object, string) (fs.Object, error) 1503 1504 func (f *Fs) okForServerSide(ctx context.Context, src fs.Object, opName string) (obj *Object, md5, sha1 string, ok bool) { 1505 var diff string 1506 obj, ok = src.(*Object) 1507 1508 switch { 1509 case !ok: 1510 diff = "remote types" 1511 case !operations.SameConfig(f.base, obj.f.base): 1512 diff = "wrapped remotes" 1513 case f.opt.ChunkSize != obj.f.opt.ChunkSize: 1514 diff = "chunk sizes" 1515 case f.opt.NameFormat != obj.f.opt.NameFormat: 1516 diff = "chunk name formats" 1517 case f.opt.MetaFormat != obj.f.opt.MetaFormat: 1518 diff = "meta formats" 1519 } 1520 if diff != "" { 1521 fs.Debugf(src, "Can't %s - different %s", opName, diff) 1522 ok = false 1523 return 1524 } 1525 1526 requireMetaHash := obj.isComposite() && f.opt.MetaFormat == "simplejson" 1527 if !requireMetaHash && !f.hashAll { 1528 ok = true // hash is not required for metadata 1529 return 1530 } 1531 1532 switch { 1533 case f.useMD5: 1534 md5, _ = obj.Hash(ctx, hash.MD5) 1535 ok = md5 != "" 1536 if !ok && f.hashFallback { 1537 sha1, _ = obj.Hash(ctx, hash.SHA1) 1538 ok = sha1 != "" 1539 } 1540 case f.useSHA1: 1541 sha1, _ = obj.Hash(ctx, hash.SHA1) 1542 ok = sha1 != "" 1543 if !ok && f.hashFallback { 1544 md5, _ = obj.Hash(ctx, hash.MD5) 1545 ok = md5 != "" 1546 } 1547 default: 1548 ok = false 1549 } 1550 if !ok { 1551 fs.Debugf(src, "Can't %s - required hash not found", opName) 1552 } 1553 return 1554 } 1555 1556 // Copy src to this remote using server side copy operations. 1557 // 1558 // This is stored with the remote path given 1559 // 1560 // It returns the destination Object and a possible error 1561 // 1562 // Will only be called if src.Fs().Name() == f.Name() 1563 // 1564 // If it isn't possible then return fs.ErrorCantCopy 1565 func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 1566 baseCopy := f.base.Features().Copy 1567 if baseCopy == nil { 1568 return nil, fs.ErrorCantCopy 1569 } 1570 obj, md5, sha1, ok := f.okForServerSide(ctx, src, "copy") 1571 if !ok { 1572 return nil, fs.ErrorCantCopy 1573 } 1574 return f.copyOrMove(ctx, obj, remote, baseCopy, md5, sha1, "copy") 1575 } 1576 1577 // Move src to this remote using server side move operations. 1578 // 1579 // This is stored with the remote path given 1580 // 1581 // It returns the destination Object and a possible error 1582 // 1583 // Will only be called if src.Fs().Name() == f.Name() 1584 // 1585 // If it isn't possible then return fs.ErrorCantMove 1586 func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 1587 baseMove := func(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 1588 return f.baseMove(ctx, src, remote, delNever) 1589 } 1590 obj, md5, sha1, ok := f.okForServerSide(ctx, src, "move") 1591 if !ok { 1592 return nil, fs.ErrorCantMove 1593 } 1594 return f.copyOrMove(ctx, obj, remote, baseMove, md5, sha1, "move") 1595 } 1596 1597 // baseMove chains to the wrapped Move or simulates it by Copy+Delete 1598 func (f *Fs) baseMove(ctx context.Context, src fs.Object, remote string, delMode int) (fs.Object, error) { 1599 var ( 1600 dest fs.Object 1601 err error 1602 ) 1603 switch delMode { 1604 case delAlways: 1605 dest, err = f.base.NewObject(ctx, remote) 1606 case delFailed: 1607 dest, err = operations.Move(ctx, f.base, nil, remote, src) 1608 if err == nil { 1609 return dest, err 1610 } 1611 dest, err = f.base.NewObject(ctx, remote) 1612 case delNever: 1613 // fall thru, the default 1614 } 1615 if err != nil { 1616 dest = nil 1617 } 1618 return operations.Move(ctx, f.base, dest, remote, src) 1619 } 1620 1621 // DirMove moves src, srcRemote to this remote at dstRemote 1622 // using server side move operations. 1623 // 1624 // Will only be called if src.Fs().Name() == f.Name() 1625 // 1626 // If it isn't possible then return fs.ErrorCantDirMove 1627 // 1628 // If destination exists then return fs.ErrorDirExists 1629 func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) error { 1630 do := f.base.Features().DirMove 1631 if do == nil { 1632 return fs.ErrorCantDirMove 1633 } 1634 srcFs, ok := src.(*Fs) 1635 if !ok { 1636 fs.Debugf(srcFs, "Can't move directory - not same remote type") 1637 return fs.ErrorCantDirMove 1638 } 1639 return do(ctx, srcFs.base, srcRemote, dstRemote) 1640 } 1641 1642 // CleanUp the trash in the Fs 1643 // 1644 // Implement this if you have a way of emptying the trash or 1645 // otherwise cleaning up old versions of files. 1646 func (f *Fs) CleanUp(ctx context.Context) error { 1647 do := f.base.Features().CleanUp 1648 if do == nil { 1649 return errors.New("can't CleanUp") 1650 } 1651 return do(ctx) 1652 } 1653 1654 // About gets quota information from the Fs 1655 func (f *Fs) About(ctx context.Context) (*fs.Usage, error) { 1656 do := f.base.Features().About 1657 if do == nil { 1658 return nil, errors.New("About not supported") 1659 } 1660 return do(ctx) 1661 } 1662 1663 // UnWrap returns the Fs that this Fs is wrapping 1664 func (f *Fs) UnWrap() fs.Fs { 1665 return f.base 1666 } 1667 1668 // WrapFs returns the Fs that is wrapping this Fs 1669 func (f *Fs) WrapFs() fs.Fs { 1670 return f.wrapper 1671 } 1672 1673 // SetWrapper sets the Fs that is wrapping this Fs 1674 func (f *Fs) SetWrapper(wrapper fs.Fs) { 1675 f.wrapper = wrapper 1676 } 1677 1678 // ChangeNotify calls the passed function with a path 1679 // that has had changes. If the implementation 1680 // uses polling, it should adhere to the given interval. 1681 // 1682 // Replace data chunk names by the name of composite file. 1683 // Ignore temporary and control chunks. 1684 func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryType), pollIntervalChan <-chan time.Duration) { 1685 do := f.base.Features().ChangeNotify 1686 if do == nil { 1687 return 1688 } 1689 wrappedNotifyFunc := func(path string, entryType fs.EntryType) { 1690 //fs.Debugf(f, "ChangeNotify: path %q entryType %d", path, entryType) 1691 if entryType == fs.EntryObject { 1692 mainPath, _, _, xactID := f.parseChunkName(path) 1693 if mainPath != "" && xactID == "" { 1694 path = mainPath 1695 } 1696 } 1697 notifyFunc(path, entryType) 1698 } 1699 do(ctx, wrappedNotifyFunc, pollIntervalChan) 1700 } 1701 1702 // Object represents a composite file wrapping one or more data chunks 1703 type Object struct { 1704 remote string 1705 main fs.Object // meta object if file is composite, or wrapped non-chunked file, nil if meta format is 'none' 1706 chunks []fs.Object // active data chunks if file is composite, or wrapped file as a single chunk if meta format is 'none' 1707 size int64 // cached total size of chunks in a composite file or -1 for non-chunked files 1708 isFull bool // true if metadata has been read 1709 md5 string 1710 sha1 string 1711 f *Fs 1712 } 1713 1714 func (o *Object) addChunk(chunk fs.Object, chunkNo int) error { 1715 if chunkNo < 0 { 1716 return fmt.Errorf("invalid chunk number %d", chunkNo+o.f.opt.StartFrom) 1717 } 1718 if chunkNo == len(o.chunks) { 1719 o.chunks = append(o.chunks, chunk) 1720 return nil 1721 } 1722 if chunkNo > maxSafeChunkNumber { 1723 return ErrChunkOverflow 1724 } 1725 if chunkNo > len(o.chunks) { 1726 newChunks := make([]fs.Object, (chunkNo + 1), (chunkNo+1)*2) 1727 copy(newChunks, o.chunks) 1728 o.chunks = newChunks 1729 } 1730 o.chunks[chunkNo] = chunk 1731 return nil 1732 } 1733 1734 // validate verifies the object internals and updates total size 1735 func (o *Object) validate() error { 1736 if !o.isComposite() { 1737 _ = o.mainChunk() // verify that single wrapped chunk exists 1738 return nil 1739 } 1740 1741 metaObject := o.main // this file is composite - o.main refers to meta object (or nil if meta format is 'none') 1742 if metaObject != nil && metaObject.Size() > maxMetadataSize { 1743 // metadata of a chunked file must be a tiny piece of json 1744 o.size = -1 1745 return fmt.Errorf("%q metadata is too large", o.remote) 1746 } 1747 1748 var totalSize int64 1749 for _, chunk := range o.chunks { 1750 if chunk == nil { 1751 o.size = -1 1752 return fmt.Errorf("%q has missing chunks", o) 1753 } 1754 totalSize += chunk.Size() 1755 } 1756 o.size = totalSize // cache up the total data size 1757 return nil 1758 } 1759 1760 func (f *Fs) newObject(remote string, main fs.Object, chunks []fs.Object) *Object { 1761 var size int64 = -1 1762 if main != nil { 1763 size = main.Size() 1764 if remote == "" { 1765 remote = main.Remote() 1766 } 1767 } 1768 return &Object{ 1769 remote: remote, 1770 main: main, 1771 size: size, 1772 f: f, 1773 chunks: chunks, 1774 } 1775 } 1776 1777 // mainChunk returns: 1778 // - a wrapped object for non-chunked files 1779 // - meta object for chunked files with metadata 1780 // - first chunk for chunked files without metadata 1781 // Never returns nil. 1782 func (o *Object) mainChunk() fs.Object { 1783 if o.main != nil { 1784 return o.main // meta object or non-chunked wrapped file 1785 } 1786 if o.chunks != nil { 1787 return o.chunks[0] // first chunk of a chunked composite file 1788 } 1789 panic("invalid chunked object") // very unlikely 1790 } 1791 1792 func (o *Object) isComposite() bool { 1793 return o.chunks != nil 1794 } 1795 1796 // Fs returns read only access to the Fs that this object is part of 1797 func (o *Object) Fs() fs.Info { 1798 return o.f 1799 } 1800 1801 // Return a string version 1802 func (o *Object) String() string { 1803 if o == nil { 1804 return "<nil>" 1805 } 1806 return o.remote 1807 } 1808 1809 // Remote returns the remote path 1810 func (o *Object) Remote() string { 1811 return o.remote 1812 } 1813 1814 // Size returns the size of the file 1815 func (o *Object) Size() int64 { 1816 if o.isComposite() { 1817 return o.size // total size of data chunks in a composite file 1818 } 1819 return o.mainChunk().Size() // size of wrapped non-chunked file 1820 } 1821 1822 // Storable returns whether object is storable 1823 func (o *Object) Storable() bool { 1824 return o.mainChunk().Storable() 1825 } 1826 1827 // ModTime returns the modification time of the file 1828 func (o *Object) ModTime(ctx context.Context) time.Time { 1829 return o.mainChunk().ModTime(ctx) 1830 } 1831 1832 // SetModTime sets the modification time of the file 1833 func (o *Object) SetModTime(ctx context.Context, mtime time.Time) error { 1834 if err := o.readMetadata(ctx); err != nil { 1835 return err // refuse to act on unsupported format 1836 } 1837 return o.mainChunk().SetModTime(ctx, mtime) 1838 } 1839 1840 // Hash returns the selected checksum of the file. 1841 // If no checksum is available it returns "". 1842 // 1843 // Hash won't fail with `unsupported` error but return empty 1844 // hash string if a particular hashsum type is not supported 1845 // 1846 // Hash takes hashsum from metadata if available or requests it 1847 // from wrapped remote for non-chunked files. 1848 // Metadata (if meta format is not 'none') is by default kept 1849 // only for composite files. In the "All" hashing mode chunker 1850 // will force metadata on all files if particular hashsum type 1851 // is not supported by wrapped remote. 1852 // 1853 // Note that Hash prefers the wrapped hashsum for non-chunked 1854 // file, then tries to read it from metadata. This in theory 1855 // handles the unusual case when a small file has been tampered 1856 // on the level of wrapped remote but chunker is unaware of that. 1857 // 1858 func (o *Object) Hash(ctx context.Context, hashType hash.Type) (string, error) { 1859 if !o.isComposite() { 1860 // First, chain to the wrapped non-chunked file if possible. 1861 if value, err := o.mainChunk().Hash(ctx, hashType); err == nil && value != "" { 1862 return value, nil 1863 } 1864 } 1865 if err := o.readMetadata(ctx); err != nil { 1866 return "", err // valid metadata is required to get hash, abort 1867 } 1868 // Try hash from metadata if the file is composite or if wrapped remote fails. 1869 switch hashType { 1870 case hash.MD5: 1871 if o.md5 == "" { 1872 return "", nil 1873 } 1874 return o.md5, nil 1875 case hash.SHA1: 1876 if o.sha1 == "" { 1877 return "", nil 1878 } 1879 return o.sha1, nil 1880 default: 1881 return "", hash.ErrUnsupported 1882 } 1883 } 1884 1885 // UnWrap returns the wrapped Object 1886 func (o *Object) UnWrap() fs.Object { 1887 return o.mainChunk() 1888 } 1889 1890 // Open opens the file for read. Call Close() on the returned io.ReadCloser 1891 func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) { 1892 if !o.isComposite() { 1893 return o.mainChunk().Open(ctx, options...) // chain to wrapped non-chunked file 1894 } 1895 if err := o.readMetadata(ctx); err != nil { 1896 // refuse to open unsupported format 1897 return nil, errors.Wrap(err, "can't open") 1898 } 1899 1900 var openOptions []fs.OpenOption 1901 var offset, limit int64 = 0, -1 1902 1903 for _, option := range options { 1904 switch opt := option.(type) { 1905 case *fs.SeekOption: 1906 offset = opt.Offset 1907 case *fs.RangeOption: 1908 offset, limit = opt.Decode(o.size) 1909 default: 1910 // pass Options on to the wrapped open, if appropriate 1911 openOptions = append(openOptions, option) 1912 } 1913 } 1914 1915 if offset < 0 { 1916 return nil, errors.New("invalid offset") 1917 } 1918 if limit < 0 { 1919 limit = o.size - offset 1920 } 1921 1922 return o.newLinearReader(ctx, offset, limit, openOptions) 1923 } 1924 1925 // linearReader opens and reads file chunks sequentially, without read-ahead 1926 type linearReader struct { 1927 ctx context.Context 1928 chunks []fs.Object 1929 options []fs.OpenOption 1930 limit int64 1931 count int64 1932 pos int 1933 reader io.ReadCloser 1934 err error 1935 } 1936 1937 func (o *Object) newLinearReader(ctx context.Context, offset, limit int64, options []fs.OpenOption) (io.ReadCloser, error) { 1938 r := &linearReader{ 1939 ctx: ctx, 1940 chunks: o.chunks, 1941 options: options, 1942 limit: limit, 1943 } 1944 1945 // skip to chunk for given offset 1946 err := io.EOF 1947 for offset >= 0 && err != nil { 1948 offset, err = r.nextChunk(offset) 1949 } 1950 if err == nil || err == io.EOF { 1951 r.err = err 1952 return r, nil 1953 } 1954 return nil, err 1955 } 1956 1957 func (r *linearReader) nextChunk(offset int64) (int64, error) { 1958 if r.err != nil { 1959 return -1, r.err 1960 } 1961 if r.pos >= len(r.chunks) || r.limit <= 0 || offset < 0 { 1962 return -1, io.EOF 1963 } 1964 1965 chunk := r.chunks[r.pos] 1966 count := chunk.Size() 1967 r.pos++ 1968 1969 if offset >= count { 1970 return offset - count, io.EOF 1971 } 1972 count -= offset 1973 if r.limit < count { 1974 count = r.limit 1975 } 1976 options := append(r.options, &fs.RangeOption{Start: offset, End: offset + count - 1}) 1977 1978 if err := r.Close(); err != nil { 1979 return -1, err 1980 } 1981 1982 reader, err := chunk.Open(r.ctx, options...) 1983 if err != nil { 1984 return -1, err 1985 } 1986 1987 r.reader = reader 1988 r.count = count 1989 return offset, nil 1990 } 1991 1992 func (r *linearReader) Read(p []byte) (n int, err error) { 1993 if r.err != nil { 1994 return 0, r.err 1995 } 1996 if r.limit <= 0 { 1997 r.err = io.EOF 1998 return 0, io.EOF 1999 } 2000 2001 for r.count <= 0 { 2002 // current chunk has been read completely or its size is zero 2003 off, err := r.nextChunk(0) 2004 if off < 0 { 2005 r.err = err 2006 return 0, err 2007 } 2008 } 2009 2010 n, err = r.reader.Read(p) 2011 if err == nil || err == io.EOF { 2012 r.count -= int64(n) 2013 r.limit -= int64(n) 2014 if r.limit > 0 { 2015 err = nil // more data to read 2016 } 2017 } 2018 r.err = err 2019 return 2020 } 2021 2022 func (r *linearReader) Close() (err error) { 2023 if r.reader != nil { 2024 err = r.reader.Close() 2025 r.reader = nil 2026 } 2027 return 2028 } 2029 2030 // ObjectInfo describes a wrapped fs.ObjectInfo for being the source 2031 type ObjectInfo struct { 2032 src fs.ObjectInfo 2033 fs *Fs 2034 nChunks int // number of data chunks 2035 size int64 // overrides source size by the total size of data chunks 2036 remote string // overrides remote name 2037 md5 string // overrides MD5 checksum 2038 sha1 string // overrides SHA1 checksum 2039 } 2040 2041 func (f *Fs) wrapInfo(src fs.ObjectInfo, newRemote string, totalSize int64) *ObjectInfo { 2042 return &ObjectInfo{ 2043 src: src, 2044 fs: f, 2045 size: totalSize, 2046 remote: newRemote, 2047 } 2048 } 2049 2050 // Fs returns read only access to the Fs that this object is part of 2051 func (oi *ObjectInfo) Fs() fs.Info { 2052 if oi.fs == nil { 2053 panic("stub ObjectInfo") 2054 } 2055 return oi.fs 2056 } 2057 2058 // String returns string representation 2059 func (oi *ObjectInfo) String() string { 2060 return oi.src.String() 2061 } 2062 2063 // Storable returns whether object is storable 2064 func (oi *ObjectInfo) Storable() bool { 2065 return oi.src.Storable() 2066 } 2067 2068 // Remote returns the remote path 2069 func (oi *ObjectInfo) Remote() string { 2070 if oi.remote != "" { 2071 return oi.remote 2072 } 2073 return oi.src.Remote() 2074 } 2075 2076 // Size returns the size of the file 2077 func (oi *ObjectInfo) Size() int64 { 2078 if oi.size != -1 { 2079 return oi.size 2080 } 2081 return oi.src.Size() 2082 } 2083 2084 // ModTime returns the modification time 2085 func (oi *ObjectInfo) ModTime(ctx context.Context) time.Time { 2086 return oi.src.ModTime(ctx) 2087 } 2088 2089 // Hash returns the selected checksum of the wrapped file 2090 // It returns "" if no checksum is available or if this 2091 // info doesn't wrap the complete file. 2092 func (oi *ObjectInfo) Hash(ctx context.Context, hashType hash.Type) (string, error) { 2093 var errUnsupported error 2094 switch hashType { 2095 case hash.MD5: 2096 if oi.md5 != "" { 2097 return oi.md5, nil 2098 } 2099 case hash.SHA1: 2100 if oi.sha1 != "" { 2101 return oi.sha1, nil 2102 } 2103 default: 2104 errUnsupported = hash.ErrUnsupported 2105 } 2106 if oi.Size() != oi.src.Size() { 2107 // fail if this info wraps only a part of the file 2108 return "", errUnsupported 2109 } 2110 // chain to full source if possible 2111 value, err := oi.src.Hash(ctx, hashType) 2112 if err == hash.ErrUnsupported { 2113 return "", errUnsupported 2114 } 2115 return value, err 2116 } 2117 2118 // ID returns the ID of the Object if known, or "" if not 2119 func (o *Object) ID() string { 2120 if doer, ok := o.mainChunk().(fs.IDer); ok { 2121 return doer.ID() 2122 } 2123 return "" 2124 } 2125 2126 // Meta format `simplejson` 2127 type metaSimpleJSON struct { 2128 // required core fields 2129 Version *int `json:"ver"` 2130 Size *int64 `json:"size"` // total size of data chunks 2131 ChunkNum *int `json:"nchunks"` // number of data chunks 2132 // optional extra fields 2133 MD5 string `json:"md5,omitempty"` 2134 SHA1 string `json:"sha1,omitempty"` 2135 } 2136 2137 // marshalSimpleJSON 2138 // 2139 // Current implementation creates metadata in three cases: 2140 // - for files larger than chunk size 2141 // - if file contents can be mistaken as meta object 2142 // - if consistent hashing is On but wrapped remote can't provide given hash 2143 // 2144 func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) { 2145 version := metadataVersion 2146 metadata := metaSimpleJSON{ 2147 // required core fields 2148 Version: &version, 2149 Size: &size, 2150 ChunkNum: &nChunks, 2151 // optional extra fields 2152 MD5: md5, 2153 SHA1: sha1, 2154 } 2155 data, err := json.Marshal(&metadata) 2156 if err == nil && data != nil && len(data) >= maxMetadataSize { 2157 // be a nitpicker, never produce something you can't consume 2158 return nil, errors.New("metadata can't be this big, please report to rclone developers") 2159 } 2160 return data, err 2161 } 2162 2163 // unmarshalSimpleJSON 2164 // 2165 // Only metadata format version 1 is supported atm. 2166 // Future releases will transparently migrate older metadata objects. 2167 // New format will have a higher version number and cannot be correctly 2168 // handled by current implementation. 2169 // The version check below will then explicitly ask user to upgrade rclone. 2170 // 2171 func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte, strictChecks bool) (info *ObjectInfo, err error) { 2172 // Be strict about JSON format 2173 // to reduce possibility that a random small file resembles metadata. 2174 if data != nil && len(data) > maxMetadataSize { 2175 return nil, errors.New("too big") 2176 } 2177 if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' { 2178 return nil, errors.New("invalid json") 2179 } 2180 var metadata metaSimpleJSON 2181 err = json.Unmarshal(data, &metadata) 2182 if err != nil { 2183 return nil, err 2184 } 2185 // Basic fields are strictly required 2186 // to reduce possibility that a random small file resembles metadata. 2187 if metadata.Version == nil || metadata.Size == nil || metadata.ChunkNum == nil { 2188 return nil, errors.New("missing required field") 2189 } 2190 // Perform strict checks, avoid corruption of future metadata formats. 2191 if *metadata.Version < 1 { 2192 return nil, errors.New("wrong version") 2193 } 2194 if *metadata.Size < 0 { 2195 return nil, errors.New("negative file size") 2196 } 2197 if *metadata.ChunkNum < 0 { 2198 return nil, errors.New("negative number of chunks") 2199 } 2200 if *metadata.ChunkNum > maxSafeChunkNumber { 2201 return nil, ErrChunkOverflow 2202 } 2203 if metadata.MD5 != "" { 2204 _, err = hex.DecodeString(metadata.MD5) 2205 if len(metadata.MD5) != 32 || err != nil { 2206 return nil, errors.New("wrong md5 hash") 2207 } 2208 } 2209 if metadata.SHA1 != "" { 2210 _, err = hex.DecodeString(metadata.SHA1) 2211 if len(metadata.SHA1) != 40 || err != nil { 2212 return nil, errors.New("wrong sha1 hash") 2213 } 2214 } 2215 // ChunkNum is allowed to be 0 in future versions 2216 if *metadata.ChunkNum < 1 && *metadata.Version <= metadataVersion { 2217 return nil, errors.New("wrong number of chunks") 2218 } 2219 // Non-strict mode also accepts future metadata versions 2220 if *metadata.Version > metadataVersion && strictChecks { 2221 return nil, fmt.Errorf("version %d is not supported, please upgrade rclone", metadata.Version) 2222 } 2223 2224 var nilFs *Fs // nil object triggers appropriate type method 2225 info = nilFs.wrapInfo(metaObject, "", *metadata.Size) 2226 info.nChunks = *metadata.ChunkNum 2227 info.md5 = metadata.MD5 2228 info.sha1 = metadata.SHA1 2229 return info, nil 2230 } 2231 2232 func silentlyRemove(ctx context.Context, o fs.Object) { 2233 _ = o.Remove(ctx) // ignore error 2234 } 2235 2236 // Name of the remote (as passed into NewFs) 2237 func (f *Fs) Name() string { 2238 return f.name 2239 } 2240 2241 // Root of the remote (as passed into NewFs) 2242 func (f *Fs) Root() string { 2243 return f.root 2244 } 2245 2246 // Features returns the optional features of this Fs 2247 func (f *Fs) Features() *fs.Features { 2248 return f.features 2249 } 2250 2251 // String returns a description of the FS 2252 func (f *Fs) String() string { 2253 return fmt.Sprintf("Chunked '%s:%s'", f.name, f.root) 2254 } 2255 2256 // Precision returns the precision of this Fs 2257 func (f *Fs) Precision() time.Duration { 2258 return f.base.Precision() 2259 } 2260 2261 // Check the interfaces are satisfied 2262 var ( 2263 _ fs.Fs = (*Fs)(nil) 2264 _ fs.Purger = (*Fs)(nil) 2265 _ fs.Copier = (*Fs)(nil) 2266 _ fs.Mover = (*Fs)(nil) 2267 _ fs.DirMover = (*Fs)(nil) 2268 _ fs.PutUncheckeder = (*Fs)(nil) 2269 _ fs.PutStreamer = (*Fs)(nil) 2270 _ fs.CleanUpper = (*Fs)(nil) 2271 _ fs.UnWrapper = (*Fs)(nil) 2272 _ fs.ListRer = (*Fs)(nil) 2273 _ fs.Abouter = (*Fs)(nil) 2274 _ fs.Wrapper = (*Fs)(nil) 2275 _ fs.ChangeNotifier = (*Fs)(nil) 2276 _ fs.ObjectInfo = (*ObjectInfo)(nil) 2277 _ fs.Object = (*Object)(nil) 2278 _ fs.ObjectUnWrapper = (*Object)(nil) 2279 _ fs.IDer = (*Object)(nil) 2280 )