github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/backend/azureblob/azureblob.go (about) 1 // Package azureblob provides an interface to the Microsoft Azure blob object storage system 2 3 // +build !plan9,!solaris 4 5 package azureblob 6 7 import ( 8 "bytes" 9 "context" 10 "crypto/md5" 11 "encoding/base64" 12 "encoding/hex" 13 "fmt" 14 "io" 15 "net/http" 16 "net/url" 17 "path" 18 "strings" 19 "sync" 20 "time" 21 22 "github.com/Azure/azure-pipeline-go/pipeline" 23 "github.com/Azure/azure-storage-blob-go/azblob" 24 "github.com/pkg/errors" 25 "github.com/rclone/rclone/fs" 26 "github.com/rclone/rclone/fs/accounting" 27 "github.com/rclone/rclone/fs/config" 28 "github.com/rclone/rclone/fs/config/configmap" 29 "github.com/rclone/rclone/fs/config/configstruct" 30 "github.com/rclone/rclone/fs/fserrors" 31 "github.com/rclone/rclone/fs/fshttp" 32 "github.com/rclone/rclone/fs/hash" 33 "github.com/rclone/rclone/fs/walk" 34 "github.com/rclone/rclone/lib/bucket" 35 "github.com/rclone/rclone/lib/encoder" 36 "github.com/rclone/rclone/lib/pacer" 37 "github.com/rclone/rclone/lib/pool" 38 "github.com/rclone/rclone/lib/readers" 39 "golang.org/x/sync/errgroup" 40 ) 41 42 const ( 43 minSleep = 10 * time.Millisecond 44 maxSleep = 10 * time.Second 45 decayConstant = 1 // bigger for slower decay, exponential 46 maxListChunkSize = 5000 // number of items to read at once 47 modTimeKey = "mtime" 48 timeFormatIn = time.RFC3339 49 timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00" 50 maxTotalParts = 50000 // in multipart upload 51 storageDefaultBaseURL = "blob.core.windows.net" 52 // maxUncommittedSize = 9 << 30 // can't upload bigger than this 53 defaultChunkSize = 4 * fs.MebiByte 54 maxChunkSize = 100 * fs.MebiByte 55 defaultUploadCutoff = 256 * fs.MebiByte 56 maxUploadCutoff = 256 * fs.MebiByte 57 defaultAccessTier = azblob.AccessTierNone 58 maxTryTimeout = time.Hour * 24 * 365 //max time of an azure web request response window (whether or not data is flowing) 59 // Default storage account, key and blob endpoint for emulator support, 60 // though it is a base64 key checked in here, it is publicly available secret. 61 emulatorAccount = "devstoreaccount1" 62 emulatorAccountKey = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" 63 emulatorBlobEndpoint = "http://127.0.0.1:10000/devstoreaccount1" 64 memoryPoolFlushTime = fs.Duration(time.Minute) // flush the cached buffers after this long 65 memoryPoolUseMmap = false 66 ) 67 68 // Register with Fs 69 func init() { 70 fs.Register(&fs.RegInfo{ 71 Name: "azureblob", 72 Description: "Microsoft Azure Blob Storage", 73 NewFs: NewFs, 74 Options: []fs.Option{{ 75 Name: "account", 76 Help: "Storage Account Name (leave blank to use SAS URL or Emulator)", 77 }, { 78 Name: "key", 79 Help: "Storage Account Key (leave blank to use SAS URL or Emulator)", 80 }, { 81 Name: "sas_url", 82 Help: "SAS URL for container level access only\n(leave blank if using account/key or Emulator)", 83 }, { 84 Name: "use_emulator", 85 Help: "Uses local storage emulator if provided as 'true' (leave blank if using real azure storage endpoint)", 86 Default: false, 87 }, { 88 Name: "endpoint", 89 Help: "Endpoint for the service\nLeave blank normally.", 90 Advanced: true, 91 }, { 92 Name: "upload_cutoff", 93 Help: "Cutoff for switching to chunked upload (<= 256MB).", 94 Default: defaultUploadCutoff, 95 Advanced: true, 96 }, { 97 Name: "chunk_size", 98 Help: `Upload chunk size (<= 100MB). 99 100 Note that this is stored in memory and there may be up to 101 "--transfers" chunks stored at once in memory.`, 102 Default: defaultChunkSize, 103 Advanced: true, 104 }, { 105 Name: "list_chunk", 106 Help: `Size of blob list. 107 108 This sets the number of blobs requested in each listing chunk. Default 109 is the maximum, 5000. "List blobs" requests are permitted 2 minutes 110 per megabyte to complete. If an operation is taking longer than 2 111 minutes per megabyte on average, it will time out ( 112 [source](https://docs.microsoft.com/en-us/rest/api/storageservices/setting-timeouts-for-blob-service-operations#exceptions-to-default-timeout-interval) 113 ). This can be used to limit the number of blobs items to return, to 114 avoid the time out.`, 115 Default: maxListChunkSize, 116 Advanced: true, 117 }, { 118 Name: "access_tier", 119 Help: `Access tier of blob: hot, cool or archive. 120 121 Archived blobs can be restored by setting access tier to hot or 122 cool. Leave blank if you intend to use default access tier, which is 123 set at account level 124 125 If there is no "access tier" specified, rclone doesn't apply any tier. 126 rclone performs "Set Tier" operation on blobs while uploading, if objects 127 are not modified, specifying "access tier" to new one will have no effect. 128 If blobs are in "archive tier" at remote, trying to perform data transfer 129 operations from remote will not be allowed. User should first restore by 130 tiering blob to "Hot" or "Cool".`, 131 Advanced: true, 132 }, { 133 Name: "disable_checksum", 134 Help: `Don't store MD5 checksum with object metadata. 135 136 Normally rclone will calculate the MD5 checksum of the input before 137 uploading it so it can add it to metadata on the object. This is great 138 for data integrity checking but can cause long delays for large files 139 to start uploading.`, 140 Default: false, 141 Advanced: true, 142 }, { 143 Name: "memory_pool_flush_time", 144 Default: memoryPoolFlushTime, 145 Advanced: true, 146 Help: `How often internal memory buffer pools will be flushed. 147 Uploads which requires additional buffers (f.e multipart) will use memory pool for allocations. 148 This option controls how often unused buffers will be removed from the pool.`, 149 }, { 150 Name: "memory_pool_use_mmap", 151 Default: memoryPoolUseMmap, 152 Advanced: true, 153 Help: `Whether to use mmap buffers in internal memory pool.`, 154 }, { 155 Name: config.ConfigEncoding, 156 Help: config.ConfigEncodingHelp, 157 Advanced: true, 158 Default: (encoder.EncodeInvalidUtf8 | 159 encoder.EncodeSlash | 160 encoder.EncodeCtl | 161 encoder.EncodeDel | 162 encoder.EncodeBackSlash | 163 encoder.EncodeRightPeriod), 164 }}, 165 }) 166 } 167 168 // Options defines the configuration for this backend 169 type Options struct { 170 Account string `config:"account"` 171 Key string `config:"key"` 172 Endpoint string `config:"endpoint"` 173 SASURL string `config:"sas_url"` 174 UploadCutoff fs.SizeSuffix `config:"upload_cutoff"` 175 ChunkSize fs.SizeSuffix `config:"chunk_size"` 176 ListChunkSize uint `config:"list_chunk"` 177 AccessTier string `config:"access_tier"` 178 UseEmulator bool `config:"use_emulator"` 179 DisableCheckSum bool `config:"disable_checksum"` 180 MemoryPoolFlushTime fs.Duration `config:"memory_pool_flush_time"` 181 MemoryPoolUseMmap bool `config:"memory_pool_use_mmap"` 182 Enc encoder.MultiEncoder `config:"encoding"` 183 } 184 185 // Fs represents a remote azure server 186 type Fs struct { 187 name string // name of this remote 188 root string // the path we are working on if any 189 opt Options // parsed config options 190 features *fs.Features // optional features 191 client *http.Client // http client we are using 192 svcURL *azblob.ServiceURL // reference to serviceURL 193 cntURLcacheMu sync.Mutex // mutex to protect cntURLcache 194 cntURLcache map[string]*azblob.ContainerURL // reference to containerURL per container 195 rootContainer string // container part of root (if any) 196 rootDirectory string // directory part of root (if any) 197 isLimited bool // if limited to one container 198 cache *bucket.Cache // cache for container creation status 199 pacer *fs.Pacer // To pace and retry the API calls 200 uploadToken *pacer.TokenDispenser // control concurrency 201 pool *pool.Pool // memory pool 202 } 203 204 // Object describes an azure object 205 type Object struct { 206 fs *Fs // what this object is part of 207 remote string // The remote path 208 modTime time.Time // The modified time of the object if known 209 md5 string // MD5 hash if known 210 size int64 // Size of the object 211 mimeType string // Content-Type of the object 212 accessTier azblob.AccessTierType // Blob Access Tier 213 meta map[string]string // blob metadata 214 } 215 216 // ------------------------------------------------------------ 217 218 // Name of the remote (as passed into NewFs) 219 func (f *Fs) Name() string { 220 return f.name 221 } 222 223 // Root of the remote (as passed into NewFs) 224 func (f *Fs) Root() string { 225 return f.root 226 } 227 228 // String converts this Fs to a string 229 func (f *Fs) String() string { 230 if f.rootContainer == "" { 231 return "Azure root" 232 } 233 if f.rootDirectory == "" { 234 return fmt.Sprintf("Azure container %s", f.rootContainer) 235 } 236 return fmt.Sprintf("Azure container %s path %s", f.rootContainer, f.rootDirectory) 237 } 238 239 // Features returns the optional features of this Fs 240 func (f *Fs) Features() *fs.Features { 241 return f.features 242 } 243 244 // parsePath parses a remote 'url' 245 func parsePath(path string) (root string) { 246 root = strings.Trim(path, "/") 247 return 248 } 249 250 // split returns container and containerPath from the rootRelativePath 251 // relative to f.root 252 func (f *Fs) split(rootRelativePath string) (containerName, containerPath string) { 253 containerName, containerPath = bucket.Split(path.Join(f.root, rootRelativePath)) 254 return f.opt.Enc.FromStandardName(containerName), f.opt.Enc.FromStandardPath(containerPath) 255 } 256 257 // split returns container and containerPath from the object 258 func (o *Object) split() (container, containerPath string) { 259 return o.fs.split(o.remote) 260 } 261 262 // validateAccessTier checks if azureblob supports user supplied tier 263 func validateAccessTier(tier string) bool { 264 switch tier { 265 case string(azblob.AccessTierHot), 266 string(azblob.AccessTierCool), 267 string(azblob.AccessTierArchive): 268 // valid cases 269 return true 270 default: 271 return false 272 } 273 } 274 275 // retryErrorCodes is a slice of error codes that we will retry 276 var retryErrorCodes = []int{ 277 401, // Unauthorized (eg "Token has expired") 278 408, // Request Timeout 279 429, // Rate exceeded. 280 500, // Get occasional 500 Internal Server Error 281 503, // Service Unavailable 282 504, // Gateway Time-out 283 } 284 285 // shouldRetry returns a boolean as to whether this resp and err 286 // deserve to be retried. It returns the err as a convenience 287 func (f *Fs) shouldRetry(err error) (bool, error) { 288 // FIXME interpret special errors - more to do here 289 if storageErr, ok := err.(azblob.StorageError); ok { 290 switch storageErr.ServiceCode() { 291 case "InvalidBlobOrBlock": 292 // These errors happen sometimes in multipart uploads 293 // because of block concurrency issues 294 return true, err 295 } 296 statusCode := storageErr.Response().StatusCode 297 for _, e := range retryErrorCodes { 298 if statusCode == e { 299 return true, err 300 } 301 } 302 } 303 return fserrors.ShouldRetry(err), err 304 } 305 306 func checkUploadChunkSize(cs fs.SizeSuffix) error { 307 const minChunkSize = fs.Byte 308 if cs < minChunkSize { 309 return errors.Errorf("%s is less than %s", cs, minChunkSize) 310 } 311 if cs > maxChunkSize { 312 return errors.Errorf("%s is greater than %s", cs, maxChunkSize) 313 } 314 return nil 315 } 316 317 func (f *Fs) setUploadChunkSize(cs fs.SizeSuffix) (old fs.SizeSuffix, err error) { 318 err = checkUploadChunkSize(cs) 319 if err == nil { 320 old, f.opt.ChunkSize = f.opt.ChunkSize, cs 321 } 322 return 323 } 324 325 func checkUploadCutoff(cs fs.SizeSuffix) error { 326 if cs > maxUploadCutoff { 327 return errors.Errorf("%v must be less than or equal to %v", cs, maxUploadCutoff) 328 } 329 return nil 330 } 331 332 func (f *Fs) setUploadCutoff(cs fs.SizeSuffix) (old fs.SizeSuffix, err error) { 333 err = checkUploadCutoff(cs) 334 if err == nil { 335 old, f.opt.UploadCutoff = f.opt.UploadCutoff, cs 336 } 337 return 338 } 339 340 // httpClientFactory creates a Factory object that sends HTTP requests 341 // to an rclone's http.Client. 342 // 343 // copied from azblob.newDefaultHTTPClientFactory 344 func httpClientFactory(client *http.Client) pipeline.Factory { 345 return pipeline.FactoryFunc(func(next pipeline.Policy, po *pipeline.PolicyOptions) pipeline.PolicyFunc { 346 return func(ctx context.Context, request pipeline.Request) (pipeline.Response, error) { 347 r, err := client.Do(request.WithContext(ctx)) 348 if err != nil { 349 err = pipeline.NewError(err, "HTTP request failed") 350 } 351 return pipeline.NewHTTPResponse(r), err 352 } 353 }) 354 } 355 356 // newPipeline creates a Pipeline using the specified credentials and options. 357 // 358 // this code was copied from azblob.NewPipeline 359 func (f *Fs) newPipeline(c azblob.Credential, o azblob.PipelineOptions) pipeline.Pipeline { 360 // Don't log stuff to syslog/Windows Event log 361 pipeline.SetForceLogEnabled(false) 362 363 // Closest to API goes first; closest to the wire goes last 364 factories := []pipeline.Factory{ 365 azblob.NewTelemetryPolicyFactory(o.Telemetry), 366 azblob.NewUniqueRequestIDPolicyFactory(), 367 azblob.NewRetryPolicyFactory(o.Retry), 368 c, 369 pipeline.MethodFactoryMarker(), // indicates at what stage in the pipeline the method factory is invoked 370 azblob.NewRequestLogPolicyFactory(o.RequestLog), 371 } 372 return pipeline.NewPipeline(factories, pipeline.Options{HTTPSender: httpClientFactory(f.client), Log: o.Log}) 373 } 374 375 // setRoot changes the root of the Fs 376 func (f *Fs) setRoot(root string) { 377 f.root = parsePath(root) 378 f.rootContainer, f.rootDirectory = bucket.Split(f.root) 379 } 380 381 // NewFs constructs an Fs from the path, container:path 382 func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) { 383 ctx := context.Background() 384 // Parse config into Options struct 385 opt := new(Options) 386 err := configstruct.Set(m, opt) 387 if err != nil { 388 return nil, err 389 } 390 391 err = checkUploadCutoff(opt.UploadCutoff) 392 if err != nil { 393 return nil, errors.Wrap(err, "azure: upload cutoff") 394 } 395 err = checkUploadChunkSize(opt.ChunkSize) 396 if err != nil { 397 return nil, errors.Wrap(err, "azure: chunk size") 398 } 399 if opt.ListChunkSize > maxListChunkSize { 400 return nil, errors.Errorf("azure: blob list size can't be greater than %v - was %v", maxListChunkSize, opt.ListChunkSize) 401 } 402 if opt.Endpoint == "" { 403 opt.Endpoint = storageDefaultBaseURL 404 } 405 406 if opt.AccessTier == "" { 407 opt.AccessTier = string(defaultAccessTier) 408 } else if !validateAccessTier(opt.AccessTier) { 409 return nil, errors.Errorf("Azure Blob: Supported access tiers are %s, %s and %s", 410 string(azblob.AccessTierHot), string(azblob.AccessTierCool), string(azblob.AccessTierArchive)) 411 } 412 413 f := &Fs{ 414 name: name, 415 opt: *opt, 416 pacer: fs.NewPacer(pacer.NewS3(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))), 417 uploadToken: pacer.NewTokenDispenser(fs.Config.Transfers), 418 client: fshttp.NewClient(fs.Config), 419 cache: bucket.NewCache(), 420 cntURLcache: make(map[string]*azblob.ContainerURL, 1), 421 pool: pool.New( 422 time.Duration(opt.MemoryPoolFlushTime), 423 int(opt.ChunkSize), 424 fs.Config.Transfers, 425 opt.MemoryPoolUseMmap, 426 ), 427 } 428 f.setRoot(root) 429 f.features = (&fs.Features{ 430 ReadMimeType: true, 431 WriteMimeType: true, 432 BucketBased: true, 433 BucketBasedRootOK: true, 434 SetTier: true, 435 GetTier: true, 436 }).Fill(f) 437 438 var ( 439 u *url.URL 440 serviceURL azblob.ServiceURL 441 ) 442 switch { 443 case opt.UseEmulator: 444 credential, err := azblob.NewSharedKeyCredential(emulatorAccount, emulatorAccountKey) 445 if err != nil { 446 return nil, errors.Wrapf(err, "Failed to parse credentials") 447 } 448 u, err = url.Parse(emulatorBlobEndpoint) 449 if err != nil { 450 return nil, errors.Wrap(err, "failed to make azure storage url from account and endpoint") 451 } 452 pipeline := f.newPipeline(credential, azblob.PipelineOptions{Retry: azblob.RetryOptions{TryTimeout: maxTryTimeout}}) 453 serviceURL = azblob.NewServiceURL(*u, pipeline) 454 case opt.Account != "" && opt.Key != "": 455 credential, err := azblob.NewSharedKeyCredential(opt.Account, opt.Key) 456 if err != nil { 457 return nil, errors.Wrapf(err, "Failed to parse credentials") 458 } 459 460 u, err = url.Parse(fmt.Sprintf("https://%s.%s", opt.Account, opt.Endpoint)) 461 if err != nil { 462 return nil, errors.Wrap(err, "failed to make azure storage url from account and endpoint") 463 } 464 pipeline := f.newPipeline(credential, azblob.PipelineOptions{Retry: azblob.RetryOptions{TryTimeout: maxTryTimeout}}) 465 serviceURL = azblob.NewServiceURL(*u, pipeline) 466 case opt.SASURL != "": 467 u, err = url.Parse(opt.SASURL) 468 if err != nil { 469 return nil, errors.Wrapf(err, "failed to parse SAS URL") 470 } 471 // use anonymous credentials in case of sas url 472 pipeline := f.newPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{Retry: azblob.RetryOptions{TryTimeout: maxTryTimeout}}) 473 // Check if we have container level SAS or account level sas 474 parts := azblob.NewBlobURLParts(*u) 475 if parts.ContainerName != "" { 476 if f.rootContainer != "" && parts.ContainerName != f.rootContainer { 477 return nil, errors.New("Container name in SAS URL and container provided in command do not match") 478 } 479 containerURL := azblob.NewContainerURL(*u, pipeline) 480 f.cntURLcache[parts.ContainerName] = &containerURL 481 f.isLimited = true 482 } else { 483 serviceURL = azblob.NewServiceURL(*u, pipeline) 484 } 485 default: 486 return nil, errors.New("Need account+key or connectionString or sasURL") 487 } 488 f.svcURL = &serviceURL 489 490 if f.rootContainer != "" && f.rootDirectory != "" { 491 // Check to see if the (container,directory) is actually an existing file 492 oldRoot := f.root 493 newRoot, leaf := path.Split(oldRoot) 494 f.setRoot(newRoot) 495 _, err := f.NewObject(ctx, leaf) 496 if err != nil { 497 if err == fs.ErrorObjectNotFound || err == fs.ErrorNotAFile { 498 // File doesn't exist or is a directory so return old f 499 f.setRoot(oldRoot) 500 return f, nil 501 } 502 return nil, err 503 } 504 // return an error with an fs which points to the parent 505 return f, fs.ErrorIsFile 506 } 507 return f, nil 508 } 509 510 // return the container URL for the container passed in 511 func (f *Fs) cntURL(container string) (containerURL *azblob.ContainerURL) { 512 f.cntURLcacheMu.Lock() 513 defer f.cntURLcacheMu.Unlock() 514 var ok bool 515 if containerURL, ok = f.cntURLcache[container]; !ok { 516 cntURL := f.svcURL.NewContainerURL(container) 517 containerURL = &cntURL 518 f.cntURLcache[container] = containerURL 519 } 520 return containerURL 521 522 } 523 524 // Return an Object from a path 525 // 526 // If it can't be found it returns the error fs.ErrorObjectNotFound. 527 func (f *Fs) newObjectWithInfo(remote string, info *azblob.BlobItem) (fs.Object, error) { 528 o := &Object{ 529 fs: f, 530 remote: remote, 531 } 532 if info != nil { 533 err := o.decodeMetaDataFromBlob(info) 534 if err != nil { 535 return nil, err 536 } 537 } else { 538 err := o.readMetaData() // reads info and headers, returning an error 539 if err != nil { 540 return nil, err 541 } 542 } 543 return o, nil 544 } 545 546 // NewObject finds the Object at remote. If it can't be found 547 // it returns the error fs.ErrorObjectNotFound. 548 func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { 549 return f.newObjectWithInfo(remote, nil) 550 } 551 552 // getBlobReference creates an empty blob reference with no metadata 553 func (f *Fs) getBlobReference(container, containerPath string) azblob.BlobURL { 554 return f.cntURL(container).NewBlobURL(containerPath) 555 } 556 557 // updateMetadataWithModTime adds the modTime passed in to o.meta. 558 func (o *Object) updateMetadataWithModTime(modTime time.Time) { 559 // Make sure o.meta is not nil 560 if o.meta == nil { 561 o.meta = make(map[string]string, 1) 562 } 563 564 // Set modTimeKey in it 565 o.meta[modTimeKey] = modTime.Format(timeFormatOut) 566 } 567 568 // Returns whether file is a directory marker or not 569 func isDirectoryMarker(size int64, metadata azblob.Metadata, remote string) bool { 570 // Directory markers are 0 length 571 if size == 0 { 572 // Note that metadata with hdi_isfolder = true seems to be a 573 // defacto standard for marking blobs as directories. 574 endsWithSlash := strings.HasSuffix(remote, "/") 575 if endsWithSlash || remote == "" || metadata["hdi_isfolder"] == "true" { 576 return true 577 } 578 579 } 580 return false 581 } 582 583 // listFn is called from list to handle an object 584 type listFn func(remote string, object *azblob.BlobItem, isDirectory bool) error 585 586 // list lists the objects into the function supplied from 587 // the container and root supplied 588 // 589 // dir is the starting directory, "" for root 590 // 591 // The remote has prefix removed from it and if addContainer is set then 592 // it adds the container to the start. 593 func (f *Fs) list(ctx context.Context, container, directory, prefix string, addContainer bool, recurse bool, maxResults uint, fn listFn) error { 594 if f.cache.IsDeleted(container) { 595 return fs.ErrorDirNotFound 596 } 597 if prefix != "" { 598 prefix += "/" 599 } 600 if directory != "" { 601 directory += "/" 602 } 603 delimiter := "" 604 if !recurse { 605 delimiter = "/" 606 } 607 608 options := azblob.ListBlobsSegmentOptions{ 609 Details: azblob.BlobListingDetails{ 610 Copy: false, 611 Metadata: true, 612 Snapshots: false, 613 UncommittedBlobs: false, 614 Deleted: false, 615 }, 616 Prefix: directory, 617 MaxResults: int32(maxResults), 618 } 619 for marker := (azblob.Marker{}); marker.NotDone(); { 620 var response *azblob.ListBlobsHierarchySegmentResponse 621 err := f.pacer.Call(func() (bool, error) { 622 var err error 623 response, err = f.cntURL(container).ListBlobsHierarchySegment(ctx, marker, delimiter, options) 624 return f.shouldRetry(err) 625 }) 626 627 if err != nil { 628 // Check http error code along with service code, current SDK doesn't populate service code correctly sometimes 629 if storageErr, ok := err.(azblob.StorageError); ok && (storageErr.ServiceCode() == azblob.ServiceCodeContainerNotFound || storageErr.Response().StatusCode == http.StatusNotFound) { 630 return fs.ErrorDirNotFound 631 } 632 return err 633 } 634 // Advance marker to next 635 marker = response.NextMarker 636 for i := range response.Segment.BlobItems { 637 file := &response.Segment.BlobItems[i] 638 // Finish if file name no longer has prefix 639 // if prefix != "" && !strings.HasPrefix(file.Name, prefix) { 640 // return nil 641 // } 642 remote := f.opt.Enc.ToStandardPath(file.Name) 643 if !strings.HasPrefix(remote, prefix) { 644 fs.Debugf(f, "Odd name received %q", remote) 645 continue 646 } 647 remote = remote[len(prefix):] 648 if isDirectoryMarker(*file.Properties.ContentLength, file.Metadata, remote) { 649 continue // skip directory marker 650 } 651 if addContainer { 652 remote = path.Join(container, remote) 653 } 654 // Send object 655 err = fn(remote, file, false) 656 if err != nil { 657 return err 658 } 659 } 660 // Send the subdirectories 661 for _, remote := range response.Segment.BlobPrefixes { 662 remote := strings.TrimRight(remote.Name, "/") 663 remote = f.opt.Enc.ToStandardPath(remote) 664 if !strings.HasPrefix(remote, prefix) { 665 fs.Debugf(f, "Odd directory name received %q", remote) 666 continue 667 } 668 remote = remote[len(prefix):] 669 if addContainer { 670 remote = path.Join(container, remote) 671 } 672 // Send object 673 err = fn(remote, nil, true) 674 if err != nil { 675 return err 676 } 677 } 678 } 679 return nil 680 } 681 682 // Convert a list item into a DirEntry 683 func (f *Fs) itemToDirEntry(remote string, object *azblob.BlobItem, isDirectory bool) (fs.DirEntry, error) { 684 if isDirectory { 685 d := fs.NewDir(remote, time.Time{}) 686 return d, nil 687 } 688 o, err := f.newObjectWithInfo(remote, object) 689 if err != nil { 690 return nil, err 691 } 692 return o, nil 693 } 694 695 // listDir lists a single directory 696 func (f *Fs) listDir(ctx context.Context, container, directory, prefix string, addContainer bool) (entries fs.DirEntries, err error) { 697 err = f.list(ctx, container, directory, prefix, addContainer, false, f.opt.ListChunkSize, func(remote string, object *azblob.BlobItem, isDirectory bool) error { 698 entry, err := f.itemToDirEntry(remote, object, isDirectory) 699 if err != nil { 700 return err 701 } 702 if entry != nil { 703 entries = append(entries, entry) 704 } 705 return nil 706 }) 707 if err != nil { 708 return nil, err 709 } 710 // container must be present if listing succeeded 711 f.cache.MarkOK(container) 712 return entries, nil 713 } 714 715 // listContainers returns all the containers to out 716 func (f *Fs) listContainers(ctx context.Context) (entries fs.DirEntries, err error) { 717 if f.isLimited { 718 f.cntURLcacheMu.Lock() 719 for container := range f.cntURLcache { 720 d := fs.NewDir(container, time.Time{}) 721 entries = append(entries, d) 722 } 723 f.cntURLcacheMu.Unlock() 724 return entries, nil 725 } 726 err = f.listContainersToFn(func(container *azblob.ContainerItem) error { 727 d := fs.NewDir(f.opt.Enc.ToStandardName(container.Name), container.Properties.LastModified) 728 f.cache.MarkOK(container.Name) 729 entries = append(entries, d) 730 return nil 731 }) 732 if err != nil { 733 return nil, err 734 } 735 return entries, nil 736 } 737 738 // List the objects and directories in dir into entries. The 739 // entries can be returned in any order but should be for a 740 // complete directory. 741 // 742 // dir should be "" to list the root, and should not have 743 // trailing slashes. 744 // 745 // This should return ErrDirNotFound if the directory isn't 746 // found. 747 func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { 748 container, directory := f.split(dir) 749 if container == "" { 750 if directory != "" { 751 return nil, fs.ErrorListBucketRequired 752 } 753 return f.listContainers(ctx) 754 } 755 return f.listDir(ctx, container, directory, f.rootDirectory, f.rootContainer == "") 756 } 757 758 // ListR lists the objects and directories of the Fs starting 759 // from dir recursively into out. 760 // 761 // dir should be "" to start from the root, and should not 762 // have trailing slashes. 763 // 764 // This should return ErrDirNotFound if the directory isn't 765 // found. 766 // 767 // It should call callback for each tranche of entries read. 768 // These need not be returned in any particular order. If 769 // callback returns an error then the listing will stop 770 // immediately. 771 // 772 // Don't implement this unless you have a more efficient way 773 // of listing recursively that doing a directory traversal. 774 func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) { 775 container, directory := f.split(dir) 776 list := walk.NewListRHelper(callback) 777 listR := func(container, directory, prefix string, addContainer bool) error { 778 return f.list(ctx, container, directory, prefix, addContainer, true, f.opt.ListChunkSize, func(remote string, object *azblob.BlobItem, isDirectory bool) error { 779 entry, err := f.itemToDirEntry(remote, object, isDirectory) 780 if err != nil { 781 return err 782 } 783 return list.Add(entry) 784 }) 785 } 786 if container == "" { 787 entries, err := f.listContainers(ctx) 788 if err != nil { 789 return err 790 } 791 for _, entry := range entries { 792 err = list.Add(entry) 793 if err != nil { 794 return err 795 } 796 container := entry.Remote() 797 err = listR(container, "", f.rootDirectory, true) 798 if err != nil { 799 return err 800 } 801 // container must be present if listing succeeded 802 f.cache.MarkOK(container) 803 } 804 } else { 805 err = listR(container, directory, f.rootDirectory, f.rootContainer == "") 806 if err != nil { 807 return err 808 } 809 // container must be present if listing succeeded 810 f.cache.MarkOK(container) 811 } 812 return list.Flush() 813 } 814 815 // listContainerFn is called from listContainersToFn to handle a container 816 type listContainerFn func(*azblob.ContainerItem) error 817 818 // listContainersToFn lists the containers to the function supplied 819 func (f *Fs) listContainersToFn(fn listContainerFn) error { 820 params := azblob.ListContainersSegmentOptions{ 821 MaxResults: int32(f.opt.ListChunkSize), 822 } 823 ctx := context.Background() 824 for marker := (azblob.Marker{}); marker.NotDone(); { 825 var response *azblob.ListContainersSegmentResponse 826 err := f.pacer.Call(func() (bool, error) { 827 var err error 828 response, err = f.svcURL.ListContainersSegment(ctx, marker, params) 829 return f.shouldRetry(err) 830 }) 831 if err != nil { 832 return err 833 } 834 835 for i := range response.ContainerItems { 836 err = fn(&response.ContainerItems[i]) 837 if err != nil { 838 return err 839 } 840 } 841 marker = response.NextMarker 842 } 843 844 return nil 845 } 846 847 // Put the object into the container 848 // 849 // Copy the reader in to the new object which is returned 850 // 851 // The new object may have been created if an error is returned 852 func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 853 // Temporary Object under construction 854 fs := &Object{ 855 fs: f, 856 remote: src.Remote(), 857 } 858 return fs, fs.Update(ctx, in, src, options...) 859 } 860 861 // PutStream uploads to the remote path with the modTime given of indeterminate size 862 func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 863 return f.Put(ctx, in, src, options...) 864 } 865 866 // Mkdir creates the container if it doesn't exist 867 func (f *Fs) Mkdir(ctx context.Context, dir string) error { 868 container, _ := f.split(dir) 869 return f.makeContainer(ctx, container) 870 } 871 872 // makeContainer creates the container if it doesn't exist 873 func (f *Fs) makeContainer(ctx context.Context, container string) error { 874 return f.cache.Create(container, func() error { 875 // If this is a SAS URL limited to a container then assume it is already created 876 if f.isLimited { 877 return nil 878 } 879 // now try to create the container 880 return f.pacer.Call(func() (bool, error) { 881 _, err := f.cntURL(container).Create(ctx, azblob.Metadata{}, azblob.PublicAccessNone) 882 if err != nil { 883 if storageErr, ok := err.(azblob.StorageError); ok { 884 switch storageErr.ServiceCode() { 885 case azblob.ServiceCodeContainerAlreadyExists: 886 return false, nil 887 case azblob.ServiceCodeContainerBeingDeleted: 888 // From https://docs.microsoft.com/en-us/rest/api/storageservices/delete-container 889 // When a container is deleted, a container with the same name cannot be created 890 // for at least 30 seconds; the container may not be available for more than 30 891 // seconds if the service is still processing the request. 892 time.Sleep(6 * time.Second) // default 10 retries will be 60 seconds 893 f.cache.MarkDeleted(container) 894 return true, err 895 } 896 } 897 } 898 return f.shouldRetry(err) 899 }) 900 }, nil) 901 } 902 903 // isEmpty checks to see if a given (container, directory) is empty and returns an error if not 904 func (f *Fs) isEmpty(ctx context.Context, container, directory string) (err error) { 905 empty := true 906 err = f.list(ctx, container, directory, f.rootDirectory, f.rootContainer == "", true, 1, func(remote string, object *azblob.BlobItem, isDirectory bool) error { 907 empty = false 908 return nil 909 }) 910 if err != nil { 911 return err 912 } 913 if !empty { 914 return fs.ErrorDirectoryNotEmpty 915 } 916 return nil 917 } 918 919 // deleteContainer deletes the container. It can delete a full 920 // container so use isEmpty if you don't want that. 921 func (f *Fs) deleteContainer(ctx context.Context, container string) error { 922 return f.cache.Remove(container, func() error { 923 options := azblob.ContainerAccessConditions{} 924 return f.pacer.Call(func() (bool, error) { 925 _, err := f.cntURL(container).GetProperties(ctx, azblob.LeaseAccessConditions{}) 926 if err == nil { 927 _, err = f.cntURL(container).Delete(ctx, options) 928 } 929 930 if err != nil { 931 // Check http error code along with service code, current SDK doesn't populate service code correctly sometimes 932 if storageErr, ok := err.(azblob.StorageError); ok && (storageErr.ServiceCode() == azblob.ServiceCodeContainerNotFound || storageErr.Response().StatusCode == http.StatusNotFound) { 933 return false, fs.ErrorDirNotFound 934 } 935 936 return f.shouldRetry(err) 937 } 938 939 return f.shouldRetry(err) 940 }) 941 }) 942 } 943 944 // Rmdir deletes the container if the fs is at the root 945 // 946 // Returns an error if it isn't empty 947 func (f *Fs) Rmdir(ctx context.Context, dir string) error { 948 container, directory := f.split(dir) 949 if container == "" || directory != "" { 950 return nil 951 } 952 err := f.isEmpty(ctx, container, directory) 953 if err != nil { 954 return err 955 } 956 return f.deleteContainer(ctx, container) 957 } 958 959 // Precision of the remote 960 func (f *Fs) Precision() time.Duration { 961 return time.Nanosecond 962 } 963 964 // Hashes returns the supported hash sets. 965 func (f *Fs) Hashes() hash.Set { 966 return hash.Set(hash.MD5) 967 } 968 969 // Purge deletes all the files and directories including the old versions. 970 func (f *Fs) Purge(ctx context.Context) error { 971 dir := "" // forward compat! 972 container, directory := f.split(dir) 973 if container == "" || directory != "" { 974 // Delegate to caller if not root of a container 975 return fs.ErrorCantPurge 976 } 977 return f.deleteContainer(ctx, container) 978 } 979 980 // Copy src to this remote using server side copy operations. 981 // 982 // This is stored with the remote path given 983 // 984 // It returns the destination Object and a possible error 985 // 986 // Will only be called if src.Fs().Name() == f.Name() 987 // 988 // If it isn't possible then return fs.ErrorCantCopy 989 func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 990 dstContainer, dstPath := f.split(remote) 991 err := f.makeContainer(ctx, dstContainer) 992 if err != nil { 993 return nil, err 994 } 995 srcObj, ok := src.(*Object) 996 if !ok { 997 fs.Debugf(src, "Can't copy - not same remote type") 998 return nil, fs.ErrorCantCopy 999 } 1000 dstBlobURL := f.getBlobReference(dstContainer, dstPath) 1001 srcBlobURL := srcObj.getBlobReference() 1002 1003 source, err := url.Parse(srcBlobURL.String()) 1004 if err != nil { 1005 return nil, err 1006 } 1007 1008 options := azblob.BlobAccessConditions{} 1009 var startCopy *azblob.BlobStartCopyFromURLResponse 1010 1011 err = f.pacer.Call(func() (bool, error) { 1012 startCopy, err = dstBlobURL.StartCopyFromURL(ctx, *source, nil, azblob.ModifiedAccessConditions{}, options) 1013 return f.shouldRetry(err) 1014 }) 1015 if err != nil { 1016 return nil, err 1017 } 1018 1019 copyStatus := startCopy.CopyStatus() 1020 for copyStatus == azblob.CopyStatusPending { 1021 time.Sleep(1 * time.Second) 1022 getMetadata, err := dstBlobURL.GetProperties(ctx, options) 1023 if err != nil { 1024 return nil, err 1025 } 1026 copyStatus = getMetadata.CopyStatus() 1027 } 1028 1029 return f.NewObject(ctx, remote) 1030 } 1031 1032 func (f *Fs) getMemoryPool(size int64) *pool.Pool { 1033 if size == int64(f.opt.ChunkSize) { 1034 return f.pool 1035 } 1036 1037 return pool.New( 1038 time.Duration(f.opt.MemoryPoolFlushTime), 1039 int(size), 1040 fs.Config.Transfers, 1041 f.opt.MemoryPoolUseMmap, 1042 ) 1043 } 1044 1045 // ------------------------------------------------------------ 1046 1047 // Fs returns the parent Fs 1048 func (o *Object) Fs() fs.Info { 1049 return o.fs 1050 } 1051 1052 // Return a string version 1053 func (o *Object) String() string { 1054 if o == nil { 1055 return "<nil>" 1056 } 1057 return o.remote 1058 } 1059 1060 // Remote returns the remote path 1061 func (o *Object) Remote() string { 1062 return o.remote 1063 } 1064 1065 // Hash returns the MD5 of an object returning a lowercase hex string 1066 func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) { 1067 if t != hash.MD5 { 1068 return "", hash.ErrUnsupported 1069 } 1070 // Convert base64 encoded md5 into lower case hex 1071 if o.md5 == "" { 1072 return "", nil 1073 } 1074 data, err := base64.StdEncoding.DecodeString(o.md5) 1075 if err != nil { 1076 return "", errors.Wrapf(err, "Failed to decode Content-MD5: %q", o.md5) 1077 } 1078 return hex.EncodeToString(data), nil 1079 } 1080 1081 // Size returns the size of an object in bytes 1082 func (o *Object) Size() int64 { 1083 return o.size 1084 } 1085 1086 func (o *Object) setMetadata(metadata azblob.Metadata) { 1087 if len(metadata) > 0 { 1088 o.meta = metadata 1089 if modTime, ok := metadata[modTimeKey]; ok { 1090 when, err := time.Parse(timeFormatIn, modTime) 1091 if err != nil { 1092 fs.Debugf(o, "Couldn't parse %v = %q: %v", modTimeKey, modTime, err) 1093 } 1094 o.modTime = when 1095 } 1096 } else { 1097 o.meta = nil 1098 } 1099 } 1100 1101 // decodeMetaDataFromPropertiesResponse sets the metadata from the data passed in 1102 // 1103 // Sets 1104 // o.id 1105 // o.modTime 1106 // o.size 1107 // o.md5 1108 // o.meta 1109 func (o *Object) decodeMetaDataFromPropertiesResponse(info *azblob.BlobGetPropertiesResponse) (err error) { 1110 metadata := info.NewMetadata() 1111 size := info.ContentLength() 1112 if isDirectoryMarker(size, metadata, o.remote) { 1113 return fs.ErrorNotAFile 1114 } 1115 // NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain 1116 // this as base64 encoded string. 1117 o.md5 = base64.StdEncoding.EncodeToString(info.ContentMD5()) 1118 o.mimeType = info.ContentType() 1119 o.size = size 1120 o.modTime = info.LastModified() 1121 o.accessTier = azblob.AccessTierType(info.AccessTier()) 1122 o.setMetadata(metadata) 1123 1124 return nil 1125 } 1126 1127 func (o *Object) decodeMetaDataFromBlob(info *azblob.BlobItem) (err error) { 1128 metadata := info.Metadata 1129 size := *info.Properties.ContentLength 1130 if isDirectoryMarker(size, metadata, o.remote) { 1131 return fs.ErrorNotAFile 1132 } 1133 // NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain 1134 // this as base64 encoded string. 1135 o.md5 = base64.StdEncoding.EncodeToString(info.Properties.ContentMD5) 1136 o.mimeType = *info.Properties.ContentType 1137 o.size = size 1138 o.modTime = info.Properties.LastModified 1139 o.accessTier = info.Properties.AccessTier 1140 o.setMetadata(metadata) 1141 return nil 1142 } 1143 1144 // getBlobReference creates an empty blob reference with no metadata 1145 func (o *Object) getBlobReference() azblob.BlobURL { 1146 container, directory := o.split() 1147 return o.fs.getBlobReference(container, directory) 1148 } 1149 1150 // clearMetaData clears enough metadata so readMetaData will re-read it 1151 func (o *Object) clearMetaData() { 1152 o.modTime = time.Time{} 1153 } 1154 1155 // readMetaData gets the metadata if it hasn't already been fetched 1156 // 1157 // Sets 1158 // o.id 1159 // o.modTime 1160 // o.size 1161 // o.md5 1162 func (o *Object) readMetaData() (err error) { 1163 if !o.modTime.IsZero() { 1164 return nil 1165 } 1166 blob := o.getBlobReference() 1167 1168 // Read metadata (this includes metadata) 1169 options := azblob.BlobAccessConditions{} 1170 ctx := context.Background() 1171 var blobProperties *azblob.BlobGetPropertiesResponse 1172 err = o.fs.pacer.Call(func() (bool, error) { 1173 blobProperties, err = blob.GetProperties(ctx, options) 1174 return o.fs.shouldRetry(err) 1175 }) 1176 if err != nil { 1177 // On directories - GetProperties does not work and current SDK does not populate service code correctly hence check regular http response as well 1178 if storageErr, ok := err.(azblob.StorageError); ok && (storageErr.ServiceCode() == azblob.ServiceCodeBlobNotFound || storageErr.Response().StatusCode == http.StatusNotFound) { 1179 return fs.ErrorObjectNotFound 1180 } 1181 return err 1182 } 1183 1184 return o.decodeMetaDataFromPropertiesResponse(blobProperties) 1185 } 1186 1187 // ModTime returns the modification time of the object 1188 // 1189 // It attempts to read the objects mtime and if that isn't present the 1190 // LastModified returned in the http headers 1191 func (o *Object) ModTime(ctx context.Context) (result time.Time) { 1192 // The error is logged in readMetaData 1193 _ = o.readMetaData() 1194 return o.modTime 1195 } 1196 1197 // SetModTime sets the modification time of the local fs object 1198 func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { 1199 // Make sure o.meta is not nil 1200 if o.meta == nil { 1201 o.meta = make(map[string]string, 1) 1202 } 1203 // Set modTimeKey in it 1204 o.meta[modTimeKey] = modTime.Format(timeFormatOut) 1205 1206 blob := o.getBlobReference() 1207 err := o.fs.pacer.Call(func() (bool, error) { 1208 _, err := blob.SetMetadata(ctx, o.meta, azblob.BlobAccessConditions{}) 1209 return o.fs.shouldRetry(err) 1210 }) 1211 if err != nil { 1212 return err 1213 } 1214 o.modTime = modTime 1215 return nil 1216 } 1217 1218 // Storable returns if this object is storable 1219 func (o *Object) Storable() bool { 1220 return true 1221 } 1222 1223 // Open an object for read 1224 func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) { 1225 // Offset and Count for range download 1226 var offset int64 1227 var count int64 1228 if o.AccessTier() == azblob.AccessTierArchive { 1229 return nil, errors.Errorf("Blob in archive tier, you need to set tier to hot or cool first") 1230 } 1231 fs.FixRangeOption(options, o.size) 1232 for _, option := range options { 1233 switch x := option.(type) { 1234 case *fs.RangeOption: 1235 offset, count = x.Decode(o.size) 1236 if count < 0 { 1237 count = o.size - offset 1238 } 1239 case *fs.SeekOption: 1240 offset = x.Offset 1241 default: 1242 if option.Mandatory() { 1243 fs.Logf(o, "Unsupported mandatory option: %v", option) 1244 } 1245 } 1246 } 1247 blob := o.getBlobReference() 1248 ac := azblob.BlobAccessConditions{} 1249 var dowloadResponse *azblob.DownloadResponse 1250 err = o.fs.pacer.Call(func() (bool, error) { 1251 dowloadResponse, err = blob.Download(ctx, offset, count, ac, false) 1252 return o.fs.shouldRetry(err) 1253 }) 1254 if err != nil { 1255 return nil, errors.Wrap(err, "failed to open for download") 1256 } 1257 in = dowloadResponse.Body(azblob.RetryReaderOptions{}) 1258 return in, nil 1259 } 1260 1261 // dontEncode is the characters that do not need percent-encoding 1262 // 1263 // The characters that do not need percent-encoding are a subset of 1264 // the printable ASCII characters: upper-case letters, lower-case 1265 // letters, digits, ".", "_", "-", "/", "~", "!", "$", "'", "(", ")", 1266 // "*", ";", "=", ":", and "@". All other byte values in a UTF-8 must 1267 // be replaced with "%" and the two-digit hex value of the byte. 1268 const dontEncode = (`abcdefghijklmnopqrstuvwxyz` + 1269 `ABCDEFGHIJKLMNOPQRSTUVWXYZ` + 1270 `0123456789` + 1271 `._-/~!$'()*;=:@`) 1272 1273 // noNeedToEncode is a bitmap of characters which don't need % encoding 1274 var noNeedToEncode [256]bool 1275 1276 func init() { 1277 for _, c := range dontEncode { 1278 noNeedToEncode[c] = true 1279 } 1280 } 1281 1282 // readSeeker joins an io.Reader and an io.Seeker 1283 type readSeeker struct { 1284 io.Reader 1285 io.Seeker 1286 } 1287 1288 // increment the slice passed in as LSB binary 1289 func increment(xs []byte) { 1290 for i, digit := range xs { 1291 newDigit := digit + 1 1292 xs[i] = newDigit 1293 if newDigit >= digit { 1294 // exit if no carry 1295 break 1296 } 1297 } 1298 } 1299 1300 var warnStreamUpload sync.Once 1301 1302 // uploadMultipart uploads a file using multipart upload 1303 // 1304 // Write a larger blob, using CreateBlockBlob, PutBlock, and PutBlockList. 1305 func (o *Object) uploadMultipart(ctx context.Context, in io.Reader, size int64, blob *azblob.BlobURL, httpHeaders *azblob.BlobHTTPHeaders) (err error) { 1306 // Calculate correct chunkSize 1307 chunkSize := int64(o.fs.opt.ChunkSize) 1308 totalParts := -1 1309 1310 // Note that the max size of file is 4.75 TB (100 MB X 50,000 1311 // blocks) and this is bigger than the max uncommitted block 1312 // size (9.52 TB) so we do not need to part commit block lists 1313 // or garbage collect uncommitted blocks. 1314 // 1315 // See: https://docs.microsoft.com/en-gb/rest/api/storageservices/put-block 1316 1317 // size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize 1318 // buffers here (default 4MB). With a maximum number of parts (50,000) this will be a file of 1319 // 195GB which seems like a not too unreasonable limit. 1320 if size == -1 { 1321 warnStreamUpload.Do(func() { 1322 fs.Logf(o, "Streaming uploads using chunk size %v will have maximum file size of %v", 1323 o.fs.opt.ChunkSize, fs.SizeSuffix(chunkSize*maxTotalParts)) 1324 }) 1325 } else { 1326 // Adjust partSize until the number of parts is small enough. 1327 if size/chunkSize >= maxTotalParts { 1328 // Calculate partition size rounded up to the nearest MB 1329 chunkSize = (((size / maxTotalParts) >> 20) + 1) << 20 1330 } 1331 if chunkSize > int64(maxChunkSize) { 1332 return errors.Errorf("can't upload as it is too big %v - takes more than %d chunks of %v", fs.SizeSuffix(size), totalParts, fs.SizeSuffix(chunkSize/2)) 1333 } 1334 totalParts = int(size / chunkSize) 1335 if size%chunkSize != 0 { 1336 totalParts++ 1337 } 1338 } 1339 1340 fs.Debugf(o, "Multipart upload session started for %d parts of size %v", totalParts, fs.SizeSuffix(chunkSize)) 1341 1342 // unwrap the accounting from the input, we use wrap to put it 1343 // back on after the buffering 1344 in, wrap := accounting.UnWrap(in) 1345 1346 // Upload the chunks 1347 var ( 1348 g, gCtx = errgroup.WithContext(ctx) 1349 remaining = size // remaining size in file for logging only, -1 if size < 0 1350 position = int64(0) // position in file 1351 memPool = o.fs.getMemoryPool(chunkSize) // pool to get memory from 1352 finished = false // set when we have read EOF 1353 blocks []string // list of blocks for finalize 1354 blockBlobURL = blob.ToBlockBlobURL() // Get BlockBlobURL, we will use default pipeline here 1355 ac = azblob.LeaseAccessConditions{} // Use default lease access conditions 1356 binaryBlockID = make([]byte, 8) // block counter as LSB first 8 bytes 1357 ) 1358 for part := 0; !finished; part++ { 1359 // Get a block of memory from the pool and a token which limits concurrency 1360 o.fs.uploadToken.Get() 1361 buf := memPool.Get() 1362 1363 free := func() { 1364 memPool.Put(buf) // return the buf 1365 o.fs.uploadToken.Put() // return the token 1366 } 1367 1368 // Fail fast, in case an errgroup managed function returns an error 1369 // gCtx is cancelled. There is no point in uploading all the other parts. 1370 if gCtx.Err() != nil { 1371 free() 1372 break 1373 } 1374 1375 // Read the chunk 1376 n, err := readers.ReadFill(in, buf) // this can never return 0, nil 1377 if err == io.EOF { 1378 if n == 0 { // end if no data 1379 free() 1380 break 1381 } 1382 finished = true 1383 } else if err != nil { 1384 free() 1385 return errors.Wrap(err, "multipart upload failed to read source") 1386 } 1387 buf = buf[:n] 1388 1389 // increment the blockID and save the blocks for finalize 1390 increment(binaryBlockID) 1391 blockID := base64.StdEncoding.EncodeToString(binaryBlockID) 1392 blocks = append(blocks, blockID) 1393 1394 // Transfer the chunk 1395 fs.Debugf(o, "Uploading part %d/%d offset %v/%v part size %v", part+1, totalParts, fs.SizeSuffix(position), fs.SizeSuffix(size), fs.SizeSuffix(chunkSize)) 1396 g.Go(func() (err error) { 1397 defer free() 1398 1399 // Upload the block, with MD5 for check 1400 md5sum := md5.Sum(buf) 1401 transactionalMD5 := md5sum[:] 1402 err = o.fs.pacer.Call(func() (bool, error) { 1403 bufferReader := bytes.NewReader(buf) 1404 wrappedReader := wrap(bufferReader) 1405 rs := readSeeker{wrappedReader, bufferReader} 1406 _, err = blockBlobURL.StageBlock(ctx, blockID, &rs, ac, transactionalMD5) 1407 return o.fs.shouldRetry(err) 1408 }) 1409 if err != nil { 1410 return errors.Wrap(err, "multipart upload failed to upload part") 1411 } 1412 return nil 1413 }) 1414 1415 // ready for next block 1416 if size >= 0 { 1417 remaining -= chunkSize 1418 } 1419 position += chunkSize 1420 } 1421 err = g.Wait() 1422 if err != nil { 1423 return err 1424 } 1425 1426 // Finalise the upload session 1427 err = o.fs.pacer.Call(func() (bool, error) { 1428 _, err := blockBlobURL.CommitBlockList(ctx, blocks, *httpHeaders, o.meta, azblob.BlobAccessConditions{}) 1429 return o.fs.shouldRetry(err) 1430 }) 1431 if err != nil { 1432 return errors.Wrap(err, "multipart upload failed to finalize") 1433 } 1434 return nil 1435 } 1436 1437 // Update the object with the contents of the io.Reader, modTime and size 1438 // 1439 // The new object may have been created if an error is returned 1440 func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) { 1441 container, _ := o.split() 1442 err = o.fs.makeContainer(ctx, container) 1443 if err != nil { 1444 return err 1445 } 1446 size := src.Size() 1447 // Update Mod time 1448 o.updateMetadataWithModTime(src.ModTime(ctx)) 1449 if err != nil { 1450 return err 1451 } 1452 1453 blob := o.getBlobReference() 1454 httpHeaders := azblob.BlobHTTPHeaders{} 1455 httpHeaders.ContentType = fs.MimeType(ctx, o) 1456 // Compute the Content-MD5 of the file, for multiparts uploads it 1457 // will be set in PutBlockList API call using the 'x-ms-blob-content-md5' header 1458 // Note: If multipart, an MD5 checksum will also be computed for each uploaded block 1459 // in order to validate its integrity during transport 1460 if !o.fs.opt.DisableCheckSum { 1461 if sourceMD5, _ := src.Hash(ctx, hash.MD5); sourceMD5 != "" { 1462 sourceMD5bytes, err := hex.DecodeString(sourceMD5) 1463 if err == nil { 1464 httpHeaders.ContentMD5 = sourceMD5bytes 1465 } else { 1466 fs.Debugf(o, "Failed to decode %q as MD5: %v", sourceMD5, err) 1467 } 1468 } 1469 } 1470 1471 putBlobOptions := azblob.UploadStreamToBlockBlobOptions{ 1472 BufferSize: int(o.fs.opt.ChunkSize), 1473 MaxBuffers: 4, 1474 Metadata: o.meta, 1475 BlobHTTPHeaders: httpHeaders, 1476 } 1477 // FIXME Until https://github.com/Azure/azure-storage-blob-go/pull/75 1478 // is merged the SDK can't upload a single blob of exactly the chunk 1479 // size, so upload with a multpart upload to work around. 1480 // See: https://github.com/rclone/rclone/issues/2653 1481 multipartUpload := size < 0 || size >= int64(o.fs.opt.UploadCutoff) 1482 if size == int64(o.fs.opt.ChunkSize) { 1483 multipartUpload = true 1484 fs.Debugf(o, "Setting multipart upload for file of chunk size (%d) to work around SDK bug", size) 1485 } 1486 1487 // Don't retry, return a retry error instead 1488 err = o.fs.pacer.CallNoRetry(func() (bool, error) { 1489 if multipartUpload { 1490 // If a large file upload in chunks 1491 err = o.uploadMultipart(ctx, in, size, &blob, &httpHeaders) 1492 } else { 1493 // Write a small blob in one transaction 1494 blockBlobURL := blob.ToBlockBlobURL() 1495 _, err = azblob.UploadStreamToBlockBlob(ctx, in, blockBlobURL, putBlobOptions) 1496 } 1497 return o.fs.shouldRetry(err) 1498 }) 1499 if err != nil { 1500 return err 1501 } 1502 // Refresh metadata on object 1503 o.clearMetaData() 1504 err = o.readMetaData() 1505 if err != nil { 1506 return err 1507 } 1508 1509 // If tier is not changed or not specified, do not attempt to invoke `SetBlobTier` operation 1510 if o.fs.opt.AccessTier == string(defaultAccessTier) || o.fs.opt.AccessTier == string(o.AccessTier()) { 1511 return nil 1512 } 1513 1514 // Now, set blob tier based on configured access tier 1515 return o.SetTier(o.fs.opt.AccessTier) 1516 } 1517 1518 // Remove an object 1519 func (o *Object) Remove(ctx context.Context) error { 1520 blob := o.getBlobReference() 1521 snapShotOptions := azblob.DeleteSnapshotsOptionNone 1522 ac := azblob.BlobAccessConditions{} 1523 return o.fs.pacer.Call(func() (bool, error) { 1524 _, err := blob.Delete(ctx, snapShotOptions, ac) 1525 return o.fs.shouldRetry(err) 1526 }) 1527 } 1528 1529 // MimeType of an Object if known, "" otherwise 1530 func (o *Object) MimeType(ctx context.Context) string { 1531 return o.mimeType 1532 } 1533 1534 // AccessTier of an object, default is of type none 1535 func (o *Object) AccessTier() azblob.AccessTierType { 1536 return o.accessTier 1537 } 1538 1539 // SetTier performs changing object tier 1540 func (o *Object) SetTier(tier string) error { 1541 if !validateAccessTier(tier) { 1542 return errors.Errorf("Tier %s not supported by Azure Blob Storage", tier) 1543 } 1544 1545 // Check if current tier already matches with desired tier 1546 if o.GetTier() == tier { 1547 return nil 1548 } 1549 desiredAccessTier := azblob.AccessTierType(tier) 1550 blob := o.getBlobReference() 1551 ctx := context.Background() 1552 err := o.fs.pacer.Call(func() (bool, error) { 1553 _, err := blob.SetTier(ctx, desiredAccessTier, azblob.LeaseAccessConditions{}) 1554 return o.fs.shouldRetry(err) 1555 }) 1556 1557 if err != nil { 1558 return errors.Wrap(err, "Failed to set Blob Tier") 1559 } 1560 1561 // Set access tier on local object also, this typically 1562 // gets updated on get blob properties 1563 o.accessTier = desiredAccessTier 1564 fs.Debugf(o, "Successfully changed object tier to %s", tier) 1565 1566 return nil 1567 } 1568 1569 // GetTier returns object tier in azure as string 1570 func (o *Object) GetTier() string { 1571 return string(o.accessTier) 1572 } 1573 1574 // Check the interfaces are satisfied 1575 var ( 1576 _ fs.Fs = &Fs{} 1577 _ fs.Copier = &Fs{} 1578 _ fs.PutStreamer = &Fs{} 1579 _ fs.Purger = &Fs{} 1580 _ fs.ListRer = &Fs{} 1581 _ fs.Object = &Object{} 1582 _ fs.MimeTyper = &Object{} 1583 _ fs.GetTierer = &Object{} 1584 _ fs.SetTierer = &Object{} 1585 )