github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/backend/azureblob/azureblob.go (about) 1 //go:build !plan9 && !solaris && !js 2 3 // Package azureblob provides an interface to the Microsoft Azure blob object storage system 4 package azureblob 5 6 import ( 7 "context" 8 "crypto/md5" 9 "encoding/base64" 10 "encoding/binary" 11 "encoding/hex" 12 "encoding/json" 13 "errors" 14 "fmt" 15 "io" 16 "net/http" 17 "net/url" 18 "os" 19 "path" 20 "sort" 21 "strconv" 22 "strings" 23 "sync" 24 "time" 25 26 "github.com/Azure/azure-sdk-for-go/sdk/azcore" 27 "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" 28 "github.com/Azure/azure-sdk-for-go/sdk/azidentity" 29 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" 30 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" 31 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob" 32 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" 33 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas" 34 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service" 35 "github.com/rclone/rclone/fs" 36 "github.com/rclone/rclone/fs/chunksize" 37 "github.com/rclone/rclone/fs/config" 38 "github.com/rclone/rclone/fs/config/configmap" 39 "github.com/rclone/rclone/fs/config/configstruct" 40 "github.com/rclone/rclone/fs/config/obscure" 41 "github.com/rclone/rclone/fs/fserrors" 42 "github.com/rclone/rclone/fs/fshttp" 43 "github.com/rclone/rclone/fs/hash" 44 "github.com/rclone/rclone/fs/walk" 45 "github.com/rclone/rclone/lib/bucket" 46 "github.com/rclone/rclone/lib/encoder" 47 "github.com/rclone/rclone/lib/env" 48 "github.com/rclone/rclone/lib/multipart" 49 "github.com/rclone/rclone/lib/pacer" 50 ) 51 52 const ( 53 minSleep = 10 * time.Millisecond 54 maxSleep = 10 * time.Second 55 decayConstant = 1 // bigger for slower decay, exponential 56 maxListChunkSize = 5000 // number of items to read at once 57 modTimeKey = "mtime" 58 dirMetaKey = "hdi_isfolder" 59 dirMetaValue = "true" 60 timeFormatIn = time.RFC3339 61 timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00" 62 storageDefaultBaseURL = "blob.core.windows.net" 63 defaultChunkSize = 4 * fs.Mebi 64 defaultAccessTier = blob.AccessTier("") // FIXME AccessTierNone 65 // Default storage account, key and blob endpoint for emulator support, 66 // though it is a base64 key checked in here, it is publicly available secret. 67 emulatorAccount = "devstoreaccount1" 68 emulatorAccountKey = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" 69 emulatorBlobEndpoint = "http://127.0.0.1:10000/devstoreaccount1" 70 ) 71 72 var ( 73 errCantUpdateArchiveTierBlobs = fserrors.NoRetryError(errors.New("can't update archive tier blob without --azureblob-archive-tier-delete")) 74 75 // Take this when changing or reading metadata. 76 // 77 // It acts as global metadata lock so we don't bloat Object 78 // with an extra lock that will only very rarely be contended. 79 metadataMu sync.Mutex 80 ) 81 82 // Register with Fs 83 func init() { 84 fs.Register(&fs.RegInfo{ 85 Name: "azureblob", 86 Description: "Microsoft Azure Blob Storage", 87 NewFs: NewFs, 88 Options: []fs.Option{{ 89 Name: "account", 90 Help: `Azure Storage Account Name. 91 92 Set this to the Azure Storage Account Name in use. 93 94 Leave blank to use SAS URL or Emulator, otherwise it needs to be set. 95 96 If this is blank and if env_auth is set it will be read from the 97 environment variable ` + "`AZURE_STORAGE_ACCOUNT_NAME`" + ` if possible. 98 `, 99 Sensitive: true, 100 }, { 101 Name: "env_auth", 102 Help: `Read credentials from runtime (environment variables, CLI or MSI). 103 104 See the [authentication docs](/azureblob#authentication) for full info.`, 105 Default: false, 106 }, { 107 Name: "key", 108 Help: `Storage Account Shared Key. 109 110 Leave blank to use SAS URL or Emulator.`, 111 Sensitive: true, 112 }, { 113 Name: "sas_url", 114 Help: `SAS URL for container level access only. 115 116 Leave blank if using account/key or Emulator.`, 117 Sensitive: true, 118 }, { 119 Name: "tenant", 120 Help: `ID of the service principal's tenant. Also called its directory ID. 121 122 Set this if using 123 - Service principal with client secret 124 - Service principal with certificate 125 - User with username and password 126 `, 127 Sensitive: true, 128 }, { 129 Name: "client_id", 130 Help: `The ID of the client in use. 131 132 Set this if using 133 - Service principal with client secret 134 - Service principal with certificate 135 - User with username and password 136 `, 137 Sensitive: true, 138 }, { 139 Name: "client_secret", 140 Help: `One of the service principal's client secrets 141 142 Set this if using 143 - Service principal with client secret 144 `, 145 Sensitive: true, 146 }, { 147 Name: "client_certificate_path", 148 Help: `Path to a PEM or PKCS12 certificate file including the private key. 149 150 Set this if using 151 - Service principal with certificate 152 `, 153 }, { 154 Name: "client_certificate_password", 155 Help: `Password for the certificate file (optional). 156 157 Optionally set this if using 158 - Service principal with certificate 159 160 And the certificate has a password. 161 `, 162 IsPassword: true, 163 }, { 164 Name: "client_send_certificate_chain", 165 Help: `Send the certificate chain when using certificate auth. 166 167 Specifies whether an authentication request will include an x5c header 168 to support subject name / issuer based authentication. When set to 169 true, authentication requests include the x5c header. 170 171 Optionally set this if using 172 - Service principal with certificate 173 `, 174 Default: false, 175 Advanced: true, 176 }, { 177 Name: "username", 178 Help: `User name (usually an email address) 179 180 Set this if using 181 - User with username and password 182 `, 183 Advanced: true, 184 Sensitive: true, 185 }, { 186 Name: "password", 187 Help: `The user's password 188 189 Set this if using 190 - User with username and password 191 `, 192 IsPassword: true, 193 Advanced: true, 194 }, { 195 Name: "service_principal_file", 196 Help: `Path to file containing credentials for use with a service principal. 197 198 Leave blank normally. Needed only if you want to use a service principal instead of interactive login. 199 200 $ az ad sp create-for-rbac --name "<name>" \ 201 --role "Storage Blob Data Owner" \ 202 --scopes "/subscriptions/<subscription>/resourceGroups/<resource-group>/providers/Microsoft.Storage/storageAccounts/<storage-account>/blobServices/default/containers/<container>" \ 203 > azure-principal.json 204 205 See ["Create an Azure service principal"](https://docs.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli) and ["Assign an Azure role for access to blob data"](https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad-rbac-cli) pages for more details. 206 207 It may be more convenient to put the credentials directly into the 208 rclone config file under the ` + "`client_id`, `tenant` and `client_secret`" + ` 209 keys instead of setting ` + "`service_principal_file`" + `. 210 `, 211 Advanced: true, 212 }, { 213 Name: "use_msi", 214 Help: `Use a managed service identity to authenticate (only works in Azure). 215 216 When true, use a [managed service identity](https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/) 217 to authenticate to Azure Storage instead of a SAS token or account key. 218 219 If the VM(SS) on which this program is running has a system-assigned identity, it will 220 be used by default. If the resource has no system-assigned but exactly one user-assigned identity, 221 the user-assigned identity will be used by default. If the resource has multiple user-assigned 222 identities, the identity to use must be explicitly specified using exactly one of the msi_object_id, 223 msi_client_id, or msi_mi_res_id parameters.`, 224 Default: false, 225 Advanced: true, 226 }, { 227 Name: "msi_object_id", 228 Help: "Object ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_client_id or msi_mi_res_id specified.", 229 Advanced: true, 230 Sensitive: true, 231 }, { 232 Name: "msi_client_id", 233 Help: "Object ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_object_id or msi_mi_res_id specified.", 234 Advanced: true, 235 Sensitive: true, 236 }, { 237 Name: "msi_mi_res_id", 238 Help: "Azure resource ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_client_id or msi_object_id specified.", 239 Advanced: true, 240 Sensitive: true, 241 }, { 242 Name: "use_emulator", 243 Help: "Uses local storage emulator if provided as 'true'.\n\nLeave blank if using real azure storage endpoint.", 244 Default: false, 245 Advanced: true, 246 }, { 247 Name: "endpoint", 248 Help: "Endpoint for the service.\n\nLeave blank normally.", 249 Advanced: true, 250 }, { 251 Name: "upload_cutoff", 252 Help: "Cutoff for switching to chunked upload (<= 256 MiB) (deprecated).", 253 Advanced: true, 254 }, { 255 Name: "chunk_size", 256 Help: `Upload chunk size. 257 258 Note that this is stored in memory and there may be up to 259 "--transfers" * "--azureblob-upload-concurrency" chunks stored at once 260 in memory.`, 261 Default: defaultChunkSize, 262 Advanced: true, 263 }, { 264 Name: "upload_concurrency", 265 Help: `Concurrency for multipart uploads. 266 267 This is the number of chunks of the same file that are uploaded 268 concurrently. 269 270 If you are uploading small numbers of large files over high-speed 271 links and these uploads do not fully utilize your bandwidth, then 272 increasing this may help to speed up the transfers. 273 274 In tests, upload speed increases almost linearly with upload 275 concurrency. For example to fill a gigabit pipe it may be necessary to 276 raise this to 64. Note that this will use more memory. 277 278 Note that chunks are stored in memory and there may be up to 279 "--transfers" * "--azureblob-upload-concurrency" chunks stored at once 280 in memory.`, 281 Default: 16, 282 Advanced: true, 283 }, { 284 Name: "list_chunk", 285 Help: `Size of blob list. 286 287 This sets the number of blobs requested in each listing chunk. Default 288 is the maximum, 5000. "List blobs" requests are permitted 2 minutes 289 per megabyte to complete. If an operation is taking longer than 2 290 minutes per megabyte on average, it will time out ( 291 [source](https://docs.microsoft.com/en-us/rest/api/storageservices/setting-timeouts-for-blob-service-operations#exceptions-to-default-timeout-interval) 292 ). This can be used to limit the number of blobs items to return, to 293 avoid the time out.`, 294 Default: maxListChunkSize, 295 Advanced: true, 296 }, { 297 Name: "access_tier", 298 Help: `Access tier of blob: hot, cool, cold or archive. 299 300 Archived blobs can be restored by setting access tier to hot, cool or 301 cold. Leave blank if you intend to use default access tier, which is 302 set at account level 303 304 If there is no "access tier" specified, rclone doesn't apply any tier. 305 rclone performs "Set Tier" operation on blobs while uploading, if objects 306 are not modified, specifying "access tier" to new one will have no effect. 307 If blobs are in "archive tier" at remote, trying to perform data transfer 308 operations from remote will not be allowed. User should first restore by 309 tiering blob to "Hot", "Cool" or "Cold".`, 310 Advanced: true, 311 }, { 312 Name: "archive_tier_delete", 313 Default: false, 314 Help: fmt.Sprintf(`Delete archive tier blobs before overwriting. 315 316 Archive tier blobs cannot be updated. So without this flag, if you 317 attempt to update an archive tier blob, then rclone will produce the 318 error: 319 320 %v 321 322 With this flag set then before rclone attempts to overwrite an archive 323 tier blob, it will delete the existing blob before uploading its 324 replacement. This has the potential for data loss if the upload fails 325 (unlike updating a normal blob) and also may cost more since deleting 326 archive tier blobs early may be chargable. 327 `, errCantUpdateArchiveTierBlobs), 328 Advanced: true, 329 }, { 330 Name: "disable_checksum", 331 Help: `Don't store MD5 checksum with object metadata. 332 333 Normally rclone will calculate the MD5 checksum of the input before 334 uploading it so it can add it to metadata on the object. This is great 335 for data integrity checking but can cause long delays for large files 336 to start uploading.`, 337 Default: false, 338 Advanced: true, 339 }, { 340 Name: "memory_pool_flush_time", 341 Default: fs.Duration(time.Minute), 342 Advanced: true, 343 Hide: fs.OptionHideBoth, 344 Help: `How often internal memory buffer pools will be flushed. (no longer used)`, 345 }, { 346 Name: "memory_pool_use_mmap", 347 Default: false, 348 Advanced: true, 349 Hide: fs.OptionHideBoth, 350 Help: `Whether to use mmap buffers in internal memory pool. (no longer used)`, 351 }, { 352 Name: config.ConfigEncoding, 353 Help: config.ConfigEncodingHelp, 354 Advanced: true, 355 Default: (encoder.EncodeInvalidUtf8 | 356 encoder.EncodeSlash | 357 encoder.EncodeCtl | 358 encoder.EncodeDel | 359 encoder.EncodeBackSlash | 360 encoder.EncodeRightPeriod), 361 }, { 362 Name: "public_access", 363 Help: "Public access level of a container: blob or container.", 364 Default: "", 365 Examples: []fs.OptionExample{ 366 { 367 Value: "", 368 Help: "The container and its blobs can be accessed only with an authorized request.\nIt's a default value.", 369 }, { 370 Value: string(container.PublicAccessTypeBlob), 371 Help: "Blob data within this container can be read via anonymous request.", 372 }, { 373 Value: string(container.PublicAccessTypeContainer), 374 Help: "Allow full public read access for container and blob data.", 375 }, 376 }, 377 Advanced: true, 378 }, { 379 Name: "directory_markers", 380 Default: false, 381 Advanced: true, 382 Help: `Upload an empty object with a trailing slash when a new directory is created 383 384 Empty folders are unsupported for bucket based remotes, this option 385 creates an empty object ending with "/", to persist the folder. 386 387 This object also has the metadata "` + dirMetaKey + ` = ` + dirMetaValue + `" to conform to 388 the Microsoft standard. 389 `, 390 }, { 391 Name: "no_check_container", 392 Help: `If set, don't attempt to check the container exists or create it. 393 394 This can be useful when trying to minimise the number of transactions 395 rclone does if you know the container exists already. 396 `, 397 Default: false, 398 Advanced: true, 399 }, { 400 Name: "no_head_object", 401 Help: `If set, do not do HEAD before GET when getting objects.`, 402 Default: false, 403 Advanced: true, 404 }, { 405 Name: "delete_snapshots", 406 Help: `Set to specify how to deal with snapshots on blob deletion.`, 407 Examples: []fs.OptionExample{ 408 { 409 Value: "", 410 Help: "By default, the delete operation fails if a blob has snapshots", 411 }, { 412 Value: string(blob.DeleteSnapshotsOptionTypeInclude), 413 Help: "Specify 'include' to remove the root blob and all its snapshots", 414 }, { 415 Value: string(blob.DeleteSnapshotsOptionTypeOnly), 416 Help: "Specify 'only' to remove only the snapshots but keep the root blob.", 417 }, 418 }, 419 Default: "", 420 Exclusive: true, 421 Advanced: true, 422 }}, 423 }) 424 } 425 426 // Options defines the configuration for this backend 427 type Options struct { 428 Account string `config:"account"` 429 EnvAuth bool `config:"env_auth"` 430 Key string `config:"key"` 431 SASURL string `config:"sas_url"` 432 Tenant string `config:"tenant"` 433 ClientID string `config:"client_id"` 434 ClientSecret string `config:"client_secret"` 435 ClientCertificatePath string `config:"client_certificate_path"` 436 ClientCertificatePassword string `config:"client_certificate_password"` 437 ClientSendCertificateChain bool `config:"client_send_certificate_chain"` 438 Username string `config:"username"` 439 Password string `config:"password"` 440 ServicePrincipalFile string `config:"service_principal_file"` 441 UseMSI bool `config:"use_msi"` 442 MSIObjectID string `config:"msi_object_id"` 443 MSIClientID string `config:"msi_client_id"` 444 MSIResourceID string `config:"msi_mi_res_id"` 445 Endpoint string `config:"endpoint"` 446 ChunkSize fs.SizeSuffix `config:"chunk_size"` 447 UploadConcurrency int `config:"upload_concurrency"` 448 ListChunkSize uint `config:"list_chunk"` 449 AccessTier string `config:"access_tier"` 450 ArchiveTierDelete bool `config:"archive_tier_delete"` 451 UseEmulator bool `config:"use_emulator"` 452 DisableCheckSum bool `config:"disable_checksum"` 453 Enc encoder.MultiEncoder `config:"encoding"` 454 PublicAccess string `config:"public_access"` 455 DirectoryMarkers bool `config:"directory_markers"` 456 NoCheckContainer bool `config:"no_check_container"` 457 NoHeadObject bool `config:"no_head_object"` 458 DeleteSnapshots string `config:"delete_snapshots"` 459 } 460 461 // Fs represents a remote azure server 462 type Fs struct { 463 name string // name of this remote 464 root string // the path we are working on if any 465 opt Options // parsed config options 466 ci *fs.ConfigInfo // global config 467 features *fs.Features // optional features 468 cntSVCcacheMu sync.Mutex // mutex to protect cntSVCcache 469 cntSVCcache map[string]*container.Client // reference to containerClient per container 470 svc *service.Client // client to access azblob 471 rootContainer string // container part of root (if any) 472 rootDirectory string // directory part of root (if any) 473 isLimited bool // if limited to one container 474 cache *bucket.Cache // cache for container creation status 475 pacer *fs.Pacer // To pace and retry the API calls 476 uploadToken *pacer.TokenDispenser // control concurrency 477 publicAccess container.PublicAccessType // Container Public Access Level 478 } 479 480 // Object describes an azure object 481 type Object struct { 482 fs *Fs // what this object is part of 483 remote string // The remote path 484 modTime time.Time // The modified time of the object if known 485 md5 string // MD5 hash if known 486 size int64 // Size of the object 487 mimeType string // Content-Type of the object 488 accessTier blob.AccessTier // Blob Access Tier 489 meta map[string]string // blob metadata - take metadataMu when accessing 490 } 491 492 // ------------------------------------------------------------ 493 494 // Name of the remote (as passed into NewFs) 495 func (f *Fs) Name() string { 496 return f.name 497 } 498 499 // Root of the remote (as passed into NewFs) 500 func (f *Fs) Root() string { 501 return f.root 502 } 503 504 // String converts this Fs to a string 505 func (f *Fs) String() string { 506 if f.rootContainer == "" { 507 return "Azure root" 508 } 509 if f.rootDirectory == "" { 510 return fmt.Sprintf("Azure container %s", f.rootContainer) 511 } 512 return fmt.Sprintf("Azure container %s path %s", f.rootContainer, f.rootDirectory) 513 } 514 515 // Features returns the optional features of this Fs 516 func (f *Fs) Features() *fs.Features { 517 return f.features 518 } 519 520 // parsePath parses a remote 'url' 521 func parsePath(path string) (root string) { 522 root = strings.Trim(path, "/") 523 return 524 } 525 526 // split returns container and containerPath from the rootRelativePath 527 // relative to f.root 528 func (f *Fs) split(rootRelativePath string) (containerName, containerPath string) { 529 containerName, containerPath = bucket.Split(bucket.Join(f.root, rootRelativePath)) 530 return f.opt.Enc.FromStandardName(containerName), f.opt.Enc.FromStandardPath(containerPath) 531 } 532 533 // split returns container and containerPath from the object 534 func (o *Object) split() (container, containerPath string) { 535 return o.fs.split(o.remote) 536 } 537 538 // validateAccessTier checks if azureblob supports user supplied tier 539 func validateAccessTier(tier string) bool { 540 return strings.EqualFold(tier, string(blob.AccessTierHot)) || 541 strings.EqualFold(tier, string(blob.AccessTierCool)) || 542 strings.EqualFold(tier, string(blob.AccessTierCold)) || 543 strings.EqualFold(tier, string(blob.AccessTierArchive)) 544 } 545 546 // validatePublicAccess checks if azureblob supports use supplied public access level 547 func validatePublicAccess(publicAccess string) bool { 548 switch publicAccess { 549 case "", 550 string(container.PublicAccessTypeBlob), 551 string(container.PublicAccessTypeContainer): 552 // valid cases 553 return true 554 default: 555 return false 556 } 557 } 558 559 // retryErrorCodes is a slice of error codes that we will retry 560 var retryErrorCodes = []int{ 561 401, // Unauthorized (e.g. "Token has expired") 562 408, // Request Timeout 563 429, // Rate exceeded. 564 500, // Get occasional 500 Internal Server Error 565 503, // Service Unavailable 566 504, // Gateway Time-out 567 } 568 569 // shouldRetry returns a boolean as to whether this resp and err 570 // deserve to be retried. It returns the err as a convenience 571 func (f *Fs) shouldRetry(ctx context.Context, err error) (bool, error) { 572 if fserrors.ContextError(ctx, &err) { 573 return false, err 574 } 575 // FIXME interpret special errors - more to do here 576 if storageErr, ok := err.(*azcore.ResponseError); ok { 577 switch storageErr.ErrorCode { 578 case "InvalidBlobOrBlock": 579 // These errors happen sometimes in multipart uploads 580 // because of block concurrency issues 581 return true, err 582 } 583 statusCode := storageErr.StatusCode 584 for _, e := range retryErrorCodes { 585 if statusCode == e { 586 return true, err 587 } 588 } 589 } 590 return fserrors.ShouldRetry(err), err 591 } 592 593 func checkUploadChunkSize(cs fs.SizeSuffix) error { 594 const minChunkSize = fs.SizeSuffixBase 595 if cs < minChunkSize { 596 return fmt.Errorf("%s is less than %s", cs, minChunkSize) 597 } 598 return nil 599 } 600 601 func (f *Fs) setUploadChunkSize(cs fs.SizeSuffix) (old fs.SizeSuffix, err error) { 602 err = checkUploadChunkSize(cs) 603 if err == nil { 604 old, f.opt.ChunkSize = f.opt.ChunkSize, cs 605 } 606 return 607 } 608 609 type servicePrincipalCredentials struct { 610 AppID string `json:"appId"` 611 Password string `json:"password"` 612 Tenant string `json:"tenant"` 613 } 614 615 // parseServicePrincipalCredentials unmarshals a service principal credentials JSON file as generated by az cli. 616 func parseServicePrincipalCredentials(ctx context.Context, credentialsData []byte) (*servicePrincipalCredentials, error) { 617 var spCredentials servicePrincipalCredentials 618 if err := json.Unmarshal(credentialsData, &spCredentials); err != nil { 619 return nil, fmt.Errorf("error parsing credentials from JSON file: %w", err) 620 } 621 // TODO: support certificate credentials 622 // Validate all fields present 623 if spCredentials.AppID == "" || spCredentials.Password == "" || spCredentials.Tenant == "" { 624 return nil, fmt.Errorf("missing fields in credentials file") 625 } 626 return &spCredentials, nil 627 } 628 629 // setRoot changes the root of the Fs 630 func (f *Fs) setRoot(root string) { 631 f.root = parsePath(root) 632 f.rootContainer, f.rootDirectory = bucket.Split(f.root) 633 } 634 635 // Wrap the http.Transport to satisfy the Transporter interface 636 type transporter struct { 637 http.RoundTripper 638 } 639 640 // Make a new transporter 641 func newTransporter(ctx context.Context) transporter { 642 return transporter{ 643 RoundTripper: fshttp.NewTransport(ctx), 644 } 645 } 646 647 // Do sends the HTTP request and returns the HTTP response or error. 648 func (tr transporter) Do(req *http.Request) (*http.Response, error) { 649 return tr.RoundTripper.RoundTrip(req) 650 } 651 652 // NewFs constructs an Fs from the path, container:path 653 func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) { 654 // Parse config into Options struct 655 opt := new(Options) 656 err := configstruct.Set(m, opt) 657 if err != nil { 658 return nil, err 659 } 660 661 err = checkUploadChunkSize(opt.ChunkSize) 662 if err != nil { 663 return nil, fmt.Errorf("chunk size: %w", err) 664 } 665 if opt.ListChunkSize > maxListChunkSize { 666 return nil, fmt.Errorf("blob list size can't be greater than %v - was %v", maxListChunkSize, opt.ListChunkSize) 667 } 668 669 if opt.AccessTier == "" { 670 opt.AccessTier = string(defaultAccessTier) 671 } else if !validateAccessTier(opt.AccessTier) { 672 return nil, fmt.Errorf("supported access tiers are %s, %s, %s and %s", 673 string(blob.AccessTierHot), string(blob.AccessTierCool), string(blob.AccessTierCold), string(blob.AccessTierArchive)) 674 } 675 676 if !validatePublicAccess((opt.PublicAccess)) { 677 return nil, fmt.Errorf("supported public access level are %s and %s", 678 string(container.PublicAccessTypeBlob), string(container.PublicAccessTypeContainer)) 679 } 680 681 ci := fs.GetConfig(ctx) 682 f := &Fs{ 683 name: name, 684 opt: *opt, 685 ci: ci, 686 pacer: fs.NewPacer(ctx, pacer.NewS3(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))), 687 uploadToken: pacer.NewTokenDispenser(ci.Transfers), 688 cache: bucket.NewCache(), 689 cntSVCcache: make(map[string]*container.Client, 1), 690 } 691 f.publicAccess = container.PublicAccessType(opt.PublicAccess) 692 f.setRoot(root) 693 f.features = (&fs.Features{ 694 ReadMimeType: true, 695 WriteMimeType: true, 696 BucketBased: true, 697 BucketBasedRootOK: true, 698 SetTier: true, 699 GetTier: true, 700 }).Fill(ctx, f) 701 if opt.DirectoryMarkers { 702 f.features.CanHaveEmptyDirectories = true 703 fs.Debugf(f, "Using directory markers") 704 } 705 706 // Client options specifying our own transport 707 policyClientOptions := policy.ClientOptions{ 708 Transport: newTransporter(ctx), 709 } 710 clientOpt := service.ClientOptions{ 711 ClientOptions: policyClientOptions, 712 } 713 714 // Here we auth by setting one of cred, sharedKeyCred or f.svc 715 var ( 716 cred azcore.TokenCredential 717 sharedKeyCred *service.SharedKeyCredential 718 ) 719 switch { 720 case opt.EnvAuth: 721 // Read account from environment if needed 722 if opt.Account == "" { 723 opt.Account, _ = os.LookupEnv("AZURE_STORAGE_ACCOUNT_NAME") 724 } 725 // Read credentials from the environment 726 options := azidentity.DefaultAzureCredentialOptions{ 727 ClientOptions: policyClientOptions, 728 } 729 cred, err = azidentity.NewDefaultAzureCredential(&options) 730 if err != nil { 731 return nil, fmt.Errorf("create azure environment credential failed: %w", err) 732 } 733 case opt.UseEmulator: 734 if opt.Account == "" { 735 opt.Account = emulatorAccount 736 } 737 if opt.Key == "" { 738 opt.Key = emulatorAccountKey 739 } 740 if opt.Endpoint == "" { 741 opt.Endpoint = emulatorBlobEndpoint 742 } 743 sharedKeyCred, err = service.NewSharedKeyCredential(opt.Account, opt.Key) 744 if err != nil { 745 return nil, fmt.Errorf("create new shared key credential for emulator failed: %w", err) 746 } 747 case opt.Account != "" && opt.Key != "": 748 sharedKeyCred, err = service.NewSharedKeyCredential(opt.Account, opt.Key) 749 if err != nil { 750 return nil, fmt.Errorf("create new shared key credential failed: %w", err) 751 } 752 case opt.SASURL != "": 753 parts, err := sas.ParseURL(opt.SASURL) 754 if err != nil { 755 return nil, fmt.Errorf("failed to parse SAS URL: %w", err) 756 } 757 endpoint := opt.SASURL 758 containerName := parts.ContainerName 759 // Check if we have container level SAS or account level SAS 760 if containerName != "" { 761 // Container level SAS 762 if f.rootContainer != "" && containerName != f.rootContainer { 763 return nil, fmt.Errorf("container name in SAS URL (%q) and container provided in command (%q) do not match", containerName, f.rootContainer) 764 } 765 // Rewrite the endpoint string to be without the container 766 parts.ContainerName = "" 767 endpoint = parts.String() 768 } 769 f.svc, err = service.NewClientWithNoCredential(endpoint, &clientOpt) 770 if err != nil { 771 return nil, fmt.Errorf("unable to create SAS URL client: %w", err) 772 } 773 // if using Container level SAS put the container client into the cache 774 if containerName != "" { 775 _ = f.cntSVC(containerName) 776 f.isLimited = true 777 } 778 case opt.ClientID != "" && opt.Tenant != "" && opt.ClientSecret != "": 779 // Service principal with client secret 780 options := azidentity.ClientSecretCredentialOptions{ 781 ClientOptions: policyClientOptions, 782 } 783 cred, err = azidentity.NewClientSecretCredential(opt.Tenant, opt.ClientID, opt.ClientSecret, &options) 784 if err != nil { 785 return nil, fmt.Errorf("error creating a client secret credential: %w", err) 786 } 787 case opt.ClientID != "" && opt.Tenant != "" && opt.ClientCertificatePath != "": 788 // Service principal with certificate 789 // 790 // Read the certificate 791 data, err := os.ReadFile(env.ShellExpand(opt.ClientCertificatePath)) 792 if err != nil { 793 return nil, fmt.Errorf("error reading client certificate file: %w", err) 794 } 795 // NewClientCertificateCredential requires at least one *x509.Certificate, and a 796 // crypto.PrivateKey. 797 // 798 // ParseCertificates returns these given certificate data in PEM or PKCS12 format. 799 // It handles common scenarios but has limitations, for example it doesn't load PEM 800 // encrypted private keys. 801 var password []byte 802 if opt.ClientCertificatePassword != "" { 803 pw, err := obscure.Reveal(opt.Password) 804 if err != nil { 805 return nil, fmt.Errorf("certificate password decode failed - did you obscure it?: %w", err) 806 } 807 password = []byte(pw) 808 } 809 certs, key, err := azidentity.ParseCertificates(data, password) 810 if err != nil { 811 return nil, fmt.Errorf("failed to parse client certificate file: %w", err) 812 } 813 options := azidentity.ClientCertificateCredentialOptions{ 814 ClientOptions: policyClientOptions, 815 SendCertificateChain: opt.ClientSendCertificateChain, 816 } 817 cred, err = azidentity.NewClientCertificateCredential( 818 opt.Tenant, opt.ClientID, certs, key, &options, 819 ) 820 if err != nil { 821 return nil, fmt.Errorf("create azure service principal with client certificate credential failed: %w", err) 822 } 823 case opt.ClientID != "" && opt.Tenant != "" && opt.Username != "" && opt.Password != "": 824 // User with username and password 825 options := azidentity.UsernamePasswordCredentialOptions{ 826 ClientOptions: policyClientOptions, 827 } 828 password, err := obscure.Reveal(opt.Password) 829 if err != nil { 830 return nil, fmt.Errorf("user password decode failed - did you obscure it?: %w", err) 831 } 832 cred, err = azidentity.NewUsernamePasswordCredential( 833 opt.Tenant, opt.ClientID, opt.Username, password, &options, 834 ) 835 if err != nil { 836 return nil, fmt.Errorf("authenticate user with password failed: %w", err) 837 } 838 case opt.ServicePrincipalFile != "": 839 // Loading service principal credentials from file. 840 loadedCreds, err := os.ReadFile(env.ShellExpand(opt.ServicePrincipalFile)) 841 if err != nil { 842 return nil, fmt.Errorf("error opening service principal credentials file: %w", err) 843 } 844 parsedCreds, err := parseServicePrincipalCredentials(ctx, loadedCreds) 845 if err != nil { 846 return nil, fmt.Errorf("error parsing service principal credentials file: %w", err) 847 } 848 options := azidentity.ClientSecretCredentialOptions{ 849 ClientOptions: policyClientOptions, 850 } 851 cred, err = azidentity.NewClientSecretCredential(parsedCreds.Tenant, parsedCreds.AppID, parsedCreds.Password, &options) 852 if err != nil { 853 return nil, fmt.Errorf("error creating a client secret credential: %w", err) 854 } 855 case opt.UseMSI: 856 // Specifying a user-assigned identity. Exactly one of the above IDs must be specified. 857 // Validate and ensure exactly one is set. (To do: better validation.) 858 var b2i = map[bool]int{false: 0, true: 1} 859 set := b2i[opt.MSIClientID != ""] + b2i[opt.MSIObjectID != ""] + b2i[opt.MSIResourceID != ""] 860 if set > 1 { 861 return nil, errors.New("more than one user-assigned identity ID is set") 862 } 863 var options azidentity.ManagedIdentityCredentialOptions 864 switch { 865 case opt.MSIClientID != "": 866 options.ID = azidentity.ClientID(opt.MSIClientID) 867 case opt.MSIObjectID != "": 868 // FIXME this doesn't appear to be in the new SDK? 869 return nil, fmt.Errorf("MSI object ID is currently unsupported") 870 case opt.MSIResourceID != "": 871 options.ID = azidentity.ResourceID(opt.MSIResourceID) 872 } 873 cred, err = azidentity.NewManagedIdentityCredential(&options) 874 if err != nil { 875 return nil, fmt.Errorf("failed to acquire MSI token: %w", err) 876 } 877 default: 878 return nil, errors.New("no authentication method configured") 879 } 880 881 // Make the client if not already created 882 if f.svc == nil { 883 // Work out what the endpoint is if it is still unset 884 if opt.Endpoint == "" { 885 if opt.Account == "" { 886 return nil, fmt.Errorf("account must be set: can't make service URL") 887 } 888 u, err := url.Parse(fmt.Sprintf("https://%s.%s", opt.Account, storageDefaultBaseURL)) 889 if err != nil { 890 return nil, fmt.Errorf("failed to make azure storage URL from account: %w", err) 891 } 892 opt.Endpoint = u.String() 893 } 894 if sharedKeyCred != nil { 895 // Shared key cred 896 f.svc, err = service.NewClientWithSharedKeyCredential(opt.Endpoint, sharedKeyCred, &clientOpt) 897 if err != nil { 898 return nil, fmt.Errorf("create client with shared key failed: %w", err) 899 } 900 } else if cred != nil { 901 // Azidentity cred 902 f.svc, err = service.NewClient(opt.Endpoint, cred, &clientOpt) 903 if err != nil { 904 return nil, fmt.Errorf("create client failed: %w", err) 905 } 906 } 907 } 908 if f.svc == nil { 909 return nil, fmt.Errorf("internal error: auth failed to make credentials or client") 910 } 911 912 if f.rootContainer != "" && f.rootDirectory != "" { 913 // Check to see if the (container,directory) is actually an existing file 914 oldRoot := f.root 915 newRoot, leaf := path.Split(oldRoot) 916 f.setRoot(newRoot) 917 _, err := f.NewObject(ctx, leaf) 918 if err != nil { 919 if err == fs.ErrorObjectNotFound || err == fs.ErrorNotAFile { 920 // File doesn't exist or is a directory so return old f 921 f.setRoot(oldRoot) 922 return f, nil 923 } 924 return nil, err 925 } 926 // return an error with an fs which points to the parent 927 return f, fs.ErrorIsFile 928 } 929 return f, nil 930 } 931 932 // return the container client for the container passed in 933 func (f *Fs) cntSVC(containerName string) (containerClient *container.Client) { 934 f.cntSVCcacheMu.Lock() 935 defer f.cntSVCcacheMu.Unlock() 936 var ok bool 937 if containerClient, ok = f.cntSVCcache[containerName]; !ok { 938 containerClient = f.svc.NewContainerClient(containerName) 939 f.cntSVCcache[containerName] = containerClient 940 } 941 return containerClient 942 } 943 944 // Return an Object from a path 945 // 946 // If it can't be found it returns the error fs.ErrorObjectNotFound. 947 func (f *Fs) newObjectWithInfo(ctx context.Context, remote string, info *container.BlobItem) (fs.Object, error) { 948 o := &Object{ 949 fs: f, 950 remote: remote, 951 } 952 if info != nil { 953 err := o.decodeMetaDataFromBlob(info) 954 if err != nil { 955 return nil, err 956 } 957 } else if !o.fs.opt.NoHeadObject { 958 err := o.readMetaData(ctx) // reads info and headers, returning an error 959 if err != nil { 960 return nil, err 961 } 962 } 963 return o, nil 964 } 965 966 // NewObject finds the Object at remote. If it can't be found 967 // it returns the error fs.ErrorObjectNotFound. 968 func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { 969 return f.newObjectWithInfo(ctx, remote, nil) 970 } 971 972 // getBlobSVC creates a blob client 973 func (f *Fs) getBlobSVC(container, containerPath string) *blob.Client { 974 return f.cntSVC(container).NewBlobClient(containerPath) 975 } 976 977 // getBlockBlobSVC creates a block blob client 978 func (f *Fs) getBlockBlobSVC(container, containerPath string) *blockblob.Client { 979 return f.cntSVC(container).NewBlockBlobClient(containerPath) 980 } 981 982 // updateMetadataWithModTime adds the modTime passed in to o.meta. 983 func (o *Object) updateMetadataWithModTime(modTime time.Time) { 984 metadataMu.Lock() 985 defer metadataMu.Unlock() 986 987 // Make sure o.meta is not nil 988 if o.meta == nil { 989 o.meta = make(map[string]string, 1) 990 } 991 992 // Set modTimeKey in it 993 o.meta[modTimeKey] = modTime.Format(timeFormatOut) 994 } 995 996 // Returns whether file is a directory marker or not 997 func isDirectoryMarker(size int64, metadata map[string]*string, remote string) bool { 998 // Directory markers are 0 length 999 if size == 0 { 1000 endsWithSlash := strings.HasSuffix(remote, "/") 1001 if endsWithSlash || remote == "" { 1002 return true 1003 } 1004 // Note that metadata with hdi_isfolder = true seems to be a 1005 // defacto standard for marking blobs as directories. 1006 // Note also that the metadata hasn't been normalised to lower case yet 1007 for k, v := range metadata { 1008 if v != nil && strings.EqualFold(k, dirMetaKey) && *v == dirMetaValue { 1009 return true 1010 } 1011 } 1012 } 1013 return false 1014 } 1015 1016 // listFn is called from list to handle an object 1017 type listFn func(remote string, object *container.BlobItem, isDirectory bool) error 1018 1019 // list lists the objects into the function supplied from 1020 // the container and root supplied 1021 // 1022 // dir is the starting directory, "" for root 1023 // 1024 // The remote has prefix removed from it and if addContainer is set then 1025 // it adds the container to the start. 1026 func (f *Fs) list(ctx context.Context, containerName, directory, prefix string, addContainer bool, recurse bool, maxResults int32, fn listFn) error { 1027 if f.cache.IsDeleted(containerName) { 1028 return fs.ErrorDirNotFound 1029 } 1030 if prefix != "" { 1031 prefix += "/" 1032 } 1033 if directory != "" { 1034 directory += "/" 1035 } 1036 delimiter := "" 1037 if !recurse { 1038 delimiter = "/" 1039 } 1040 1041 pager := f.cntSVC(containerName).NewListBlobsHierarchyPager(delimiter, &container.ListBlobsHierarchyOptions{ 1042 // Copy, Metadata, Snapshots, UncommittedBlobs, Deleted, Tags, Versions, LegalHold, ImmutabilityPolicy, DeletedWithVersions bool 1043 Include: container.ListBlobsInclude{ 1044 Copy: false, 1045 Metadata: true, 1046 Snapshots: false, 1047 UncommittedBlobs: false, 1048 Deleted: false, 1049 }, 1050 Prefix: &directory, 1051 MaxResults: &maxResults, 1052 }) 1053 foundItems := 0 1054 for pager.More() { 1055 var response container.ListBlobsHierarchyResponse 1056 err := f.pacer.Call(func() (bool, error) { 1057 var err error 1058 response, err = pager.NextPage(ctx) 1059 //response, err = f.srv.ListBlobsHierarchySegment(ctx, marker, delimiter, options) 1060 return f.shouldRetry(ctx, err) 1061 }) 1062 1063 if err != nil { 1064 // Check http error code along with service code, current SDK doesn't populate service code correctly sometimes 1065 if storageErr, ok := err.(*azcore.ResponseError); ok && (storageErr.ErrorCode == string(bloberror.ContainerNotFound) || storageErr.StatusCode == http.StatusNotFound) { 1066 return fs.ErrorDirNotFound 1067 } 1068 return err 1069 } 1070 // Advance marker to next 1071 // marker = response.NextMarker 1072 foundItems += len(response.Segment.BlobItems) 1073 for i := range response.Segment.BlobItems { 1074 file := response.Segment.BlobItems[i] 1075 // Finish if file name no longer has prefix 1076 // if prefix != "" && !strings.HasPrefix(file.Name, prefix) { 1077 // return nil 1078 // } 1079 if file.Name == nil { 1080 fs.Debugf(f, "Nil name received") 1081 continue 1082 } 1083 remote := f.opt.Enc.ToStandardPath(*file.Name) 1084 if !strings.HasPrefix(remote, prefix) { 1085 fs.Debugf(f, "Odd name received %q", remote) 1086 continue 1087 } 1088 isDirectory := isDirectoryMarker(*file.Properties.ContentLength, file.Metadata, remote) 1089 if isDirectory { 1090 // Don't insert the root directory 1091 if remote == directory { 1092 continue 1093 } 1094 // process directory markers as directories 1095 remote = strings.TrimRight(remote, "/") 1096 } 1097 remote = remote[len(prefix):] 1098 if addContainer { 1099 remote = path.Join(containerName, remote) 1100 } 1101 // Send object 1102 err = fn(remote, file, isDirectory) 1103 if err != nil { 1104 return err 1105 } 1106 } 1107 // Send the subdirectories 1108 foundItems += len(response.Segment.BlobPrefixes) 1109 for _, remote := range response.Segment.BlobPrefixes { 1110 if remote.Name == nil { 1111 fs.Debugf(f, "Nil prefix received") 1112 continue 1113 } 1114 remote := strings.TrimRight(*remote.Name, "/") 1115 remote = f.opt.Enc.ToStandardPath(remote) 1116 if !strings.HasPrefix(remote, prefix) { 1117 fs.Debugf(f, "Odd directory name received %q", remote) 1118 continue 1119 } 1120 remote = remote[len(prefix):] 1121 if addContainer { 1122 remote = path.Join(containerName, remote) 1123 } 1124 // Send object 1125 err = fn(remote, nil, true) 1126 if err != nil { 1127 return err 1128 } 1129 } 1130 } 1131 if f.opt.DirectoryMarkers && foundItems == 0 && directory != "" { 1132 // Determine whether the directory exists or not by whether it has a marker 1133 _, err := f.readMetaData(ctx, containerName, directory) 1134 if err != nil { 1135 if err == fs.ErrorObjectNotFound { 1136 return fs.ErrorDirNotFound 1137 } 1138 return err 1139 } 1140 } 1141 return nil 1142 } 1143 1144 // Convert a list item into a DirEntry 1145 func (f *Fs) itemToDirEntry(ctx context.Context, remote string, object *container.BlobItem, isDirectory bool) (fs.DirEntry, error) { 1146 if isDirectory { 1147 d := fs.NewDir(remote, time.Time{}) 1148 return d, nil 1149 } 1150 o, err := f.newObjectWithInfo(ctx, remote, object) 1151 if err != nil { 1152 return nil, err 1153 } 1154 return o, nil 1155 } 1156 1157 // Check to see if this is a limited container and the container is not found 1158 func (f *Fs) containerOK(container string) bool { 1159 if !f.isLimited { 1160 return true 1161 } 1162 f.cntSVCcacheMu.Lock() 1163 defer f.cntSVCcacheMu.Unlock() 1164 for limitedContainer := range f.cntSVCcache { 1165 if container == limitedContainer { 1166 return true 1167 } 1168 } 1169 return false 1170 } 1171 1172 // listDir lists a single directory 1173 func (f *Fs) listDir(ctx context.Context, containerName, directory, prefix string, addContainer bool) (entries fs.DirEntries, err error) { 1174 if !f.containerOK(containerName) { 1175 return nil, fs.ErrorDirNotFound 1176 } 1177 err = f.list(ctx, containerName, directory, prefix, addContainer, false, int32(f.opt.ListChunkSize), func(remote string, object *container.BlobItem, isDirectory bool) error { 1178 entry, err := f.itemToDirEntry(ctx, remote, object, isDirectory) 1179 if err != nil { 1180 return err 1181 } 1182 if entry != nil { 1183 entries = append(entries, entry) 1184 } 1185 return nil 1186 }) 1187 if err != nil { 1188 return nil, err 1189 } 1190 // container must be present if listing succeeded 1191 f.cache.MarkOK(containerName) 1192 return entries, nil 1193 } 1194 1195 // listContainers returns all the containers to out 1196 func (f *Fs) listContainers(ctx context.Context) (entries fs.DirEntries, err error) { 1197 if f.isLimited { 1198 f.cntSVCcacheMu.Lock() 1199 for container := range f.cntSVCcache { 1200 d := fs.NewDir(container, time.Time{}) 1201 entries = append(entries, d) 1202 } 1203 f.cntSVCcacheMu.Unlock() 1204 return entries, nil 1205 } 1206 err = f.listContainersToFn(func(Name string, LastModified time.Time) error { 1207 d := fs.NewDir(f.opt.Enc.ToStandardName(Name), LastModified) 1208 f.cache.MarkOK(Name) 1209 entries = append(entries, d) 1210 return nil 1211 }) 1212 if err != nil { 1213 return nil, err 1214 } 1215 return entries, nil 1216 } 1217 1218 // List the objects and directories in dir into entries. The 1219 // entries can be returned in any order but should be for a 1220 // complete directory. 1221 // 1222 // dir should be "" to list the root, and should not have 1223 // trailing slashes. 1224 // 1225 // This should return ErrDirNotFound if the directory isn't 1226 // found. 1227 func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { 1228 container, directory := f.split(dir) 1229 if container == "" { 1230 if directory != "" { 1231 return nil, fs.ErrorListBucketRequired 1232 } 1233 return f.listContainers(ctx) 1234 } 1235 return f.listDir(ctx, container, directory, f.rootDirectory, f.rootContainer == "") 1236 } 1237 1238 // ListR lists the objects and directories of the Fs starting 1239 // from dir recursively into out. 1240 // 1241 // dir should be "" to start from the root, and should not 1242 // have trailing slashes. 1243 // 1244 // This should return ErrDirNotFound if the directory isn't 1245 // found. 1246 // 1247 // It should call callback for each tranche of entries read. 1248 // These need not be returned in any particular order. If 1249 // callback returns an error then the listing will stop 1250 // immediately. 1251 // 1252 // Don't implement this unless you have a more efficient way 1253 // of listing recursively that doing a directory traversal. 1254 func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) { 1255 containerName, directory := f.split(dir) 1256 list := walk.NewListRHelper(callback) 1257 listR := func(containerName, directory, prefix string, addContainer bool) error { 1258 return f.list(ctx, containerName, directory, prefix, addContainer, true, int32(f.opt.ListChunkSize), func(remote string, object *container.BlobItem, isDirectory bool) error { 1259 entry, err := f.itemToDirEntry(ctx, remote, object, isDirectory) 1260 if err != nil { 1261 return err 1262 } 1263 return list.Add(entry) 1264 }) 1265 } 1266 if containerName == "" { 1267 entries, err := f.listContainers(ctx) 1268 if err != nil { 1269 return err 1270 } 1271 for _, entry := range entries { 1272 err = list.Add(entry) 1273 if err != nil { 1274 return err 1275 } 1276 container := entry.Remote() 1277 err = listR(container, "", f.rootDirectory, true) 1278 if err != nil { 1279 return err 1280 } 1281 // container must be present if listing succeeded 1282 f.cache.MarkOK(container) 1283 } 1284 } else { 1285 if !f.containerOK(containerName) { 1286 return fs.ErrorDirNotFound 1287 } 1288 err = listR(containerName, directory, f.rootDirectory, f.rootContainer == "") 1289 if err != nil { 1290 return err 1291 } 1292 // container must be present if listing succeeded 1293 f.cache.MarkOK(containerName) 1294 } 1295 return list.Flush() 1296 } 1297 1298 // listContainerFn is called from listContainersToFn to handle a container 1299 type listContainerFn func(Name string, LastModified time.Time) error 1300 1301 // listContainersToFn lists the containers to the function supplied 1302 func (f *Fs) listContainersToFn(fn listContainerFn) error { 1303 max := int32(f.opt.ListChunkSize) 1304 pager := f.svc.NewListContainersPager(&service.ListContainersOptions{ 1305 Include: service.ListContainersInclude{Metadata: true, Deleted: true}, 1306 MaxResults: &max, 1307 }) 1308 ctx := context.Background() 1309 for pager.More() { 1310 var response service.ListContainersResponse 1311 err := f.pacer.Call(func() (bool, error) { 1312 var err error 1313 response, err = pager.NextPage(ctx) 1314 return f.shouldRetry(ctx, err) 1315 }) 1316 if err != nil { 1317 return err 1318 } 1319 1320 for _, cnt := range response.ContainerItems { 1321 if cnt == nil || cnt.Name == nil || cnt.Properties == nil || cnt.Properties.LastModified == nil { 1322 fs.Debugf(f, "nil returned in container info") 1323 } 1324 err = fn(*cnt.Name, *cnt.Properties.LastModified) 1325 if err != nil { 1326 return err 1327 } 1328 } 1329 } 1330 1331 return nil 1332 } 1333 1334 // Put the object into the container 1335 // 1336 // Copy the reader in to the new object which is returned. 1337 // 1338 // The new object may have been created if an error is returned 1339 func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 1340 // Temporary Object under construction 1341 fs := &Object{ 1342 fs: f, 1343 remote: src.Remote(), 1344 } 1345 return fs, fs.Update(ctx, in, src, options...) 1346 } 1347 1348 // PutStream uploads to the remote path with the modTime given of indeterminate size 1349 func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 1350 return f.Put(ctx, in, src, options...) 1351 } 1352 1353 // Create directory marker file and parents 1354 func (f *Fs) createDirectoryMarker(ctx context.Context, container, dir string) error { 1355 if !f.opt.DirectoryMarkers || container == "" { 1356 return nil 1357 } 1358 1359 // Object to be uploaded 1360 o := &Object{ 1361 fs: f, 1362 modTime: time.Now(), 1363 meta: map[string]string{ 1364 dirMetaKey: dirMetaValue, 1365 }, 1366 } 1367 1368 for { 1369 _, containerPath := f.split(dir) 1370 // Don't create the directory marker if it is the bucket or at the very root 1371 if containerPath == "" { 1372 break 1373 } 1374 o.remote = dir + "/" 1375 1376 // Check to see if object already exists 1377 _, err := f.readMetaData(ctx, container, containerPath+"/") 1378 if err == nil { 1379 return nil 1380 } 1381 1382 // Upload it if not 1383 fs.Debugf(o, "Creating directory marker") 1384 content := io.Reader(strings.NewReader("")) 1385 err = o.Update(ctx, content, o) 1386 if err != nil { 1387 return fmt.Errorf("creating directory marker failed: %w", err) 1388 } 1389 1390 // Now check parent directory exists 1391 dir = path.Dir(dir) 1392 if dir == "/" || dir == "." { 1393 break 1394 } 1395 } 1396 1397 return nil 1398 } 1399 1400 // Mkdir creates the container if it doesn't exist 1401 func (f *Fs) Mkdir(ctx context.Context, dir string) error { 1402 container, _ := f.split(dir) 1403 e := f.makeContainer(ctx, container) 1404 if e != nil { 1405 return e 1406 } 1407 return f.createDirectoryMarker(ctx, container, dir) 1408 } 1409 1410 // mkdirParent creates the parent bucket/directory if it doesn't exist 1411 func (f *Fs) mkdirParent(ctx context.Context, remote string) error { 1412 remote = strings.TrimRight(remote, "/") 1413 dir := path.Dir(remote) 1414 if dir == "/" || dir == "." { 1415 dir = "" 1416 } 1417 return f.Mkdir(ctx, dir) 1418 } 1419 1420 // makeContainer creates the container if it doesn't exist 1421 func (f *Fs) makeContainer(ctx context.Context, container string) error { 1422 if f.opt.NoCheckContainer { 1423 return nil 1424 } 1425 return f.cache.Create(container, func() error { 1426 // If this is a SAS URL limited to a container then assume it is already created 1427 if f.isLimited { 1428 return nil 1429 } 1430 opt := service.CreateContainerOptions{ 1431 // Optional. Specifies a user-defined name-value pair associated with the blob. 1432 //Metadata map[string]string 1433 1434 // Optional. Specifies the encryption scope settings to set on the container. 1435 //CpkScopeInfo *CpkScopeInfo 1436 } 1437 if f.publicAccess != "" { 1438 // Specifies whether data in the container may be accessed publicly and the level of access 1439 opt.Access = &f.publicAccess 1440 } 1441 // now try to create the container 1442 return f.pacer.Call(func() (bool, error) { 1443 _, err := f.svc.CreateContainer(ctx, container, &opt) 1444 if err != nil { 1445 if storageErr, ok := err.(*azcore.ResponseError); ok { 1446 switch bloberror.Code(storageErr.ErrorCode) { 1447 case bloberror.ContainerAlreadyExists: 1448 return false, nil 1449 case bloberror.ContainerBeingDeleted: 1450 // From https://docs.microsoft.com/en-us/rest/api/storageservices/delete-container 1451 // When a container is deleted, a container with the same name cannot be created 1452 // for at least 30 seconds; the container may not be available for more than 30 1453 // seconds if the service is still processing the request. 1454 time.Sleep(6 * time.Second) // default 10 retries will be 60 seconds 1455 f.cache.MarkDeleted(container) 1456 return true, err 1457 case bloberror.AuthorizationFailure: 1458 // Assume that the user does not have permission to 1459 // create the container and carry on anyway. 1460 fs.Debugf(f, "Tried to create container but got %s error - carrying on assuming container exists. Use no_check_container to stop this check..", storageErr.ErrorCode) 1461 return false, nil 1462 } 1463 } 1464 } 1465 return f.shouldRetry(ctx, err) 1466 }) 1467 }, nil) 1468 } 1469 1470 // isEmpty checks to see if a given (container, directory) is empty and returns an error if not 1471 func (f *Fs) isEmpty(ctx context.Context, containerName, directory string) (err error) { 1472 empty := true 1473 err = f.list(ctx, containerName, directory, f.rootDirectory, f.rootContainer == "", true, 1, func(remote string, object *container.BlobItem, isDirectory bool) error { 1474 empty = false 1475 return nil 1476 }) 1477 if err != nil { 1478 return err 1479 } 1480 if !empty { 1481 return fs.ErrorDirectoryNotEmpty 1482 } 1483 return nil 1484 } 1485 1486 // deleteContainer deletes the container. It can delete a full 1487 // container so use isEmpty if you don't want that. 1488 func (f *Fs) deleteContainer(ctx context.Context, containerName string) error { 1489 return f.cache.Remove(containerName, func() error { 1490 getOptions := container.GetPropertiesOptions{} 1491 delOptions := container.DeleteOptions{} 1492 return f.pacer.Call(func() (bool, error) { 1493 _, err := f.cntSVC(containerName).GetProperties(ctx, &getOptions) 1494 if err == nil { 1495 _, err = f.cntSVC(containerName).Delete(ctx, &delOptions) 1496 } 1497 1498 if err != nil { 1499 // Check http error code along with service code, current SDK doesn't populate service code correctly sometimes 1500 if storageErr, ok := err.(*azcore.ResponseError); ok && (storageErr.ErrorCode == string(bloberror.ContainerNotFound) || storageErr.StatusCode == http.StatusNotFound) { 1501 return false, fs.ErrorDirNotFound 1502 } 1503 1504 return f.shouldRetry(ctx, err) 1505 } 1506 1507 return f.shouldRetry(ctx, err) 1508 }) 1509 }) 1510 } 1511 1512 // Rmdir deletes the container if the fs is at the root 1513 // 1514 // Returns an error if it isn't empty 1515 func (f *Fs) Rmdir(ctx context.Context, dir string) error { 1516 container, directory := f.split(dir) 1517 // Remove directory marker file 1518 if f.opt.DirectoryMarkers && container != "" && directory != "" { 1519 o := &Object{ 1520 fs: f, 1521 remote: dir + "/", 1522 } 1523 fs.Debugf(o, "Removing directory marker") 1524 err := o.Remove(ctx) 1525 if err != nil { 1526 return fmt.Errorf("removing directory marker failed: %w", err) 1527 } 1528 } 1529 if container == "" || directory != "" { 1530 return nil 1531 } 1532 err := f.isEmpty(ctx, container, directory) 1533 if err != nil { 1534 return err 1535 } 1536 return f.deleteContainer(ctx, container) 1537 } 1538 1539 // Precision of the remote 1540 func (f *Fs) Precision() time.Duration { 1541 return time.Nanosecond 1542 } 1543 1544 // Hashes returns the supported hash sets. 1545 func (f *Fs) Hashes() hash.Set { 1546 return hash.Set(hash.MD5) 1547 } 1548 1549 // Purge deletes all the files and directories including the old versions. 1550 func (f *Fs) Purge(ctx context.Context, dir string) error { 1551 container, directory := f.split(dir) 1552 if container == "" { 1553 return errors.New("can't purge from root") 1554 } 1555 if directory != "" { 1556 // Delegate to caller if not root of a container 1557 return fs.ErrorCantPurge 1558 } 1559 return f.deleteContainer(ctx, container) 1560 } 1561 1562 // Copy src to this remote using server-side copy operations. 1563 // 1564 // This is stored with the remote path given. 1565 // 1566 // It returns the destination Object and a possible error. 1567 // 1568 // Will only be called if src.Fs().Name() == f.Name() 1569 // 1570 // If it isn't possible then return fs.ErrorCantCopy 1571 func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 1572 dstContainer, dstPath := f.split(remote) 1573 err := f.mkdirParent(ctx, remote) 1574 if err != nil { 1575 return nil, err 1576 } 1577 srcObj, ok := src.(*Object) 1578 if !ok { 1579 fs.Debugf(src, "Can't copy - not same remote type") 1580 return nil, fs.ErrorCantCopy 1581 } 1582 dstBlobSVC := f.getBlobSVC(dstContainer, dstPath) 1583 srcBlobSVC := srcObj.getBlobSVC() 1584 srcURL := srcBlobSVC.URL() 1585 1586 options := blob.StartCopyFromURLOptions{ 1587 Tier: parseTier(f.opt.AccessTier), 1588 } 1589 var startCopy blob.StartCopyFromURLResponse 1590 err = f.pacer.Call(func() (bool, error) { 1591 startCopy, err = dstBlobSVC.StartCopyFromURL(ctx, srcURL, &options) 1592 return f.shouldRetry(ctx, err) 1593 }) 1594 if err != nil { 1595 return nil, err 1596 } 1597 1598 copyStatus := startCopy.CopyStatus 1599 getOptions := blob.GetPropertiesOptions{} 1600 for copyStatus != nil && string(*copyStatus) == string(container.CopyStatusTypePending) { 1601 time.Sleep(1 * time.Second) 1602 getMetadata, err := dstBlobSVC.GetProperties(ctx, &getOptions) 1603 if err != nil { 1604 return nil, err 1605 } 1606 copyStatus = getMetadata.CopyStatus 1607 } 1608 1609 return f.NewObject(ctx, remote) 1610 } 1611 1612 // ------------------------------------------------------------ 1613 1614 // Fs returns the parent Fs 1615 func (o *Object) Fs() fs.Info { 1616 return o.fs 1617 } 1618 1619 // Return a string version 1620 func (o *Object) String() string { 1621 if o == nil { 1622 return "<nil>" 1623 } 1624 return o.remote 1625 } 1626 1627 // Remote returns the remote path 1628 func (o *Object) Remote() string { 1629 return o.remote 1630 } 1631 1632 // Hash returns the MD5 of an object returning a lowercase hex string 1633 func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) { 1634 if t != hash.MD5 { 1635 return "", hash.ErrUnsupported 1636 } 1637 // Convert base64 encoded md5 into lower case hex 1638 if o.md5 == "" { 1639 return "", nil 1640 } 1641 data, err := base64.StdEncoding.DecodeString(o.md5) 1642 if err != nil { 1643 return "", fmt.Errorf("failed to decode Content-MD5: %q: %w", o.md5, err) 1644 } 1645 return hex.EncodeToString(data), nil 1646 } 1647 1648 // Size returns the size of an object in bytes 1649 func (o *Object) Size() int64 { 1650 return o.size 1651 } 1652 1653 // Set o.metadata from metadata 1654 func (o *Object) setMetadata(metadata map[string]*string) { 1655 metadataMu.Lock() 1656 defer metadataMu.Unlock() 1657 1658 if len(metadata) > 0 { 1659 // Lower case the metadata 1660 o.meta = make(map[string]string, len(metadata)) 1661 for k, v := range metadata { 1662 if v != nil { 1663 o.meta[strings.ToLower(k)] = *v 1664 } 1665 } 1666 // Set o.modTime from metadata if it exists and 1667 // UseServerModTime isn't in use. 1668 if modTime, ok := o.meta[modTimeKey]; !o.fs.ci.UseServerModTime && ok { 1669 when, err := time.Parse(timeFormatIn, modTime) 1670 if err != nil { 1671 fs.Debugf(o, "Couldn't parse %v = %q: %v", modTimeKey, modTime, err) 1672 } 1673 o.modTime = when 1674 } 1675 } else { 1676 o.meta = nil 1677 } 1678 } 1679 1680 // Get metadata from o.meta 1681 func (o *Object) getMetadata() (metadata map[string]*string) { 1682 metadataMu.Lock() 1683 defer metadataMu.Unlock() 1684 1685 if len(o.meta) == 0 { 1686 return nil 1687 } 1688 metadata = make(map[string]*string, len(o.meta)) 1689 for k, v := range o.meta { 1690 v := v 1691 metadata[k] = &v 1692 } 1693 return metadata 1694 } 1695 1696 // decodeMetaDataFromPropertiesResponse sets the metadata from the data passed in 1697 // 1698 // Sets 1699 // 1700 // o.id 1701 // o.modTime 1702 // o.size 1703 // o.md5 1704 // o.meta 1705 func (o *Object) decodeMetaDataFromPropertiesResponse(info *blob.GetPropertiesResponse) (err error) { 1706 metadata := info.Metadata 1707 var size int64 1708 if info.ContentLength == nil { 1709 size = -1 1710 } else { 1711 size = *info.ContentLength 1712 } 1713 if isDirectoryMarker(size, metadata, o.remote) { 1714 return fs.ErrorNotAFile 1715 } 1716 // NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain 1717 // this as base64 encoded string. 1718 o.md5 = base64.StdEncoding.EncodeToString(info.ContentMD5) 1719 if info.ContentType == nil { 1720 o.mimeType = "" 1721 } else { 1722 o.mimeType = *info.ContentType 1723 } 1724 o.size = size 1725 if info.LastModified == nil { 1726 o.modTime = time.Now() 1727 } else { 1728 o.modTime = *info.LastModified 1729 } 1730 if info.AccessTier == nil { 1731 o.accessTier = blob.AccessTier("") 1732 } else { 1733 o.accessTier = blob.AccessTier(*info.AccessTier) 1734 } 1735 o.setMetadata(metadata) 1736 1737 return nil 1738 } 1739 1740 func (o *Object) decodeMetaDataFromDownloadResponse(info *blob.DownloadStreamResponse) (err error) { 1741 metadata := info.Metadata 1742 var size int64 1743 if info.ContentLength == nil { 1744 size = -1 1745 } else { 1746 size = *info.ContentLength 1747 } 1748 if isDirectoryMarker(size, metadata, o.remote) { 1749 return fs.ErrorNotAFile 1750 } 1751 // NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain 1752 // this as base64 encoded string. 1753 o.md5 = base64.StdEncoding.EncodeToString(info.ContentMD5) 1754 if info.ContentType == nil { 1755 o.mimeType = "" 1756 } else { 1757 o.mimeType = *info.ContentType 1758 } 1759 o.size = size 1760 if info.LastModified == nil { 1761 o.modTime = time.Now() 1762 } else { 1763 o.modTime = *info.LastModified 1764 } 1765 // FIXME response doesn't appear to have AccessTier in? 1766 // if info.AccessTier == nil { 1767 // o.accessTier = blob.AccessTier("") 1768 // } else { 1769 // o.accessTier = blob.AccessTier(*info.AccessTier) 1770 // } 1771 o.setMetadata(metadata) 1772 1773 // If it was a Range request, the size is wrong, so correct it 1774 if info.ContentRange != nil { 1775 contentRange := *info.ContentRange 1776 slash := strings.IndexRune(contentRange, '/') 1777 if slash >= 0 { 1778 i, err := strconv.ParseInt(contentRange[slash+1:], 10, 64) 1779 if err == nil { 1780 o.size = i 1781 } else { 1782 fs.Debugf(o, "Failed to find parse integer from in %q: %v", contentRange, err) 1783 } 1784 } else { 1785 fs.Debugf(o, "Failed to find length in %q", contentRange) 1786 } 1787 } 1788 1789 return nil 1790 } 1791 1792 func (o *Object) decodeMetaDataFromBlob(info *container.BlobItem) (err error) { 1793 if info.Properties == nil { 1794 return errors.New("nil Properties in decodeMetaDataFromBlob") 1795 } 1796 metadata := info.Metadata 1797 var size int64 1798 if info.Properties.ContentLength == nil { 1799 size = -1 1800 } else { 1801 size = *info.Properties.ContentLength 1802 } 1803 if isDirectoryMarker(size, metadata, o.remote) { 1804 return fs.ErrorNotAFile 1805 } 1806 // NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain 1807 // this as base64 encoded string. 1808 o.md5 = base64.StdEncoding.EncodeToString(info.Properties.ContentMD5) 1809 if info.Properties.ContentType == nil { 1810 o.mimeType = "" 1811 } else { 1812 o.mimeType = *info.Properties.ContentType 1813 } 1814 o.size = size 1815 if info.Properties.LastModified == nil { 1816 o.modTime = time.Now() 1817 } else { 1818 o.modTime = *info.Properties.LastModified 1819 } 1820 if info.Properties.AccessTier == nil { 1821 o.accessTier = blob.AccessTier("") 1822 } else { 1823 o.accessTier = *info.Properties.AccessTier 1824 } 1825 o.setMetadata(metadata) 1826 1827 return nil 1828 } 1829 1830 // getBlobSVC creates a blob client 1831 func (o *Object) getBlobSVC() *blob.Client { 1832 container, directory := o.split() 1833 return o.fs.getBlobSVC(container, directory) 1834 } 1835 1836 // clearMetaData clears enough metadata so readMetaData will re-read it 1837 func (o *Object) clearMetaData() { 1838 o.modTime = time.Time{} 1839 } 1840 1841 // readMetaData gets the metadata if it hasn't already been fetched 1842 func (f *Fs) readMetaData(ctx context.Context, container, containerPath string) (blobProperties blob.GetPropertiesResponse, err error) { 1843 if !f.containerOK(container) { 1844 return blobProperties, fs.ErrorObjectNotFound 1845 } 1846 blb := f.getBlobSVC(container, containerPath) 1847 1848 // Read metadata (this includes metadata) 1849 options := blob.GetPropertiesOptions{} 1850 err = f.pacer.Call(func() (bool, error) { 1851 blobProperties, err = blb.GetProperties(ctx, &options) 1852 return f.shouldRetry(ctx, err) 1853 }) 1854 if err != nil { 1855 // On directories - GetProperties does not work and current SDK does not populate service code correctly hence check regular http response as well 1856 if storageErr, ok := err.(*azcore.ResponseError); ok && (storageErr.ErrorCode == string(bloberror.BlobNotFound) || storageErr.StatusCode == http.StatusNotFound) { 1857 return blobProperties, fs.ErrorObjectNotFound 1858 } 1859 return blobProperties, err 1860 } 1861 return blobProperties, nil 1862 } 1863 1864 // readMetaData gets the metadata if it hasn't already been fetched 1865 // 1866 // Sets 1867 // 1868 // o.id 1869 // o.modTime 1870 // o.size 1871 // o.md5 1872 func (o *Object) readMetaData(ctx context.Context) (err error) { 1873 if !o.modTime.IsZero() { 1874 return nil 1875 } 1876 container, containerPath := o.split() 1877 blobProperties, err := o.fs.readMetaData(ctx, container, containerPath) 1878 if err != nil { 1879 return err 1880 } 1881 return o.decodeMetaDataFromPropertiesResponse(&blobProperties) 1882 } 1883 1884 // ModTime returns the modification time of the object 1885 // 1886 // It attempts to read the objects mtime and if that isn't present the 1887 // LastModified returned in the http headers 1888 func (o *Object) ModTime(ctx context.Context) (result time.Time) { 1889 // The error is logged in readMetaData 1890 _ = o.readMetaData(ctx) 1891 return o.modTime 1892 } 1893 1894 // SetModTime sets the modification time of the local fs object 1895 func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { 1896 o.updateMetadataWithModTime(modTime) 1897 1898 blb := o.getBlobSVC() 1899 opt := blob.SetMetadataOptions{} 1900 err := o.fs.pacer.Call(func() (bool, error) { 1901 _, err := blb.SetMetadata(ctx, o.getMetadata(), &opt) 1902 return o.fs.shouldRetry(ctx, err) 1903 }) 1904 if err != nil { 1905 return err 1906 } 1907 o.modTime = modTime 1908 return nil 1909 } 1910 1911 // Storable returns if this object is storable 1912 func (o *Object) Storable() bool { 1913 return true 1914 } 1915 1916 // Open an object for read 1917 func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) { 1918 // Offset and Count for range download 1919 var offset int64 1920 var count int64 1921 if o.AccessTier() == blob.AccessTierArchive { 1922 return nil, fmt.Errorf("blob in archive tier, you need to set tier to hot, cool, cold first") 1923 } 1924 fs.FixRangeOption(options, o.size) 1925 for _, option := range options { 1926 switch x := option.(type) { 1927 case *fs.RangeOption: 1928 offset, count = x.Decode(o.size) 1929 if count < 0 { 1930 count = o.size - offset 1931 } 1932 case *fs.SeekOption: 1933 offset = x.Offset 1934 default: 1935 if option.Mandatory() { 1936 fs.Logf(o, "Unsupported mandatory option: %v", option) 1937 } 1938 } 1939 } 1940 blb := o.getBlobSVC() 1941 opt := blob.DownloadStreamOptions{ 1942 // When set to true and specified together with the Range, the service returns the MD5 hash for the range, as long as the 1943 // range is less than or equal to 4 MB in size. 1944 //RangeGetContentMD5 *bool 1945 1946 // Range specifies a range of bytes. The default value is all bytes. 1947 //Range HTTPRange 1948 Range: blob.HTTPRange{ 1949 Offset: offset, 1950 Count: count, 1951 }, 1952 1953 // AccessConditions *AccessConditions 1954 // CpkInfo *CpkInfo 1955 // CpkScopeInfo *CpkScopeInfo 1956 } 1957 var downloadResponse blob.DownloadStreamResponse 1958 err = o.fs.pacer.Call(func() (bool, error) { 1959 downloadResponse, err = blb.DownloadStream(ctx, &opt) 1960 return o.fs.shouldRetry(ctx, err) 1961 }) 1962 if err != nil { 1963 return nil, fmt.Errorf("failed to open for download: %w", err) 1964 } 1965 err = o.decodeMetaDataFromDownloadResponse(&downloadResponse) 1966 if err != nil { 1967 return nil, fmt.Errorf("failed to decode metadata for download: %w", err) 1968 } 1969 return downloadResponse.Body, nil 1970 } 1971 1972 // Converts a string into a pointer to a string 1973 func pString(s string) *string { 1974 return &s 1975 } 1976 1977 // readSeekCloser joins an io.Reader and an io.Seeker and provides a no-op io.Closer 1978 type readSeekCloser struct { 1979 io.Reader 1980 io.Seeker 1981 } 1982 1983 // Close does nothing 1984 func (rs *readSeekCloser) Close() error { 1985 return nil 1986 } 1987 1988 // record chunk number and id for Close 1989 type azBlock struct { 1990 chunkNumber uint64 1991 id string 1992 } 1993 1994 // Implements the fs.ChunkWriter interface 1995 type azChunkWriter struct { 1996 chunkSize int64 1997 size int64 1998 f *Fs 1999 ui uploadInfo 2000 blocksMu sync.Mutex // protects the below 2001 blocks []azBlock // list of blocks for finalize 2002 o *Object 2003 } 2004 2005 // OpenChunkWriter returns the chunk size and a ChunkWriter 2006 // 2007 // Pass in the remote and the src object 2008 // You can also use options to hint at the desired chunk size 2009 func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectInfo, options ...fs.OpenOption) (info fs.ChunkWriterInfo, writer fs.ChunkWriter, err error) { 2010 // Temporary Object under construction 2011 o := &Object{ 2012 fs: f, 2013 remote: remote, 2014 } 2015 ui, err := o.prepareUpload(ctx, src, options) 2016 if err != nil { 2017 return info, nil, fmt.Errorf("failed to prepare upload: %w", err) 2018 } 2019 2020 // Calculate correct partSize 2021 partSize := f.opt.ChunkSize 2022 totalParts := -1 2023 size := src.Size() 2024 2025 // Note that the max size of file is 4.75 TB (100 MB X 50,000 2026 // blocks) and this is bigger than the max uncommitted block 2027 // size (9.52 TB) so we do not need to part commit block lists 2028 // or garbage collect uncommitted blocks. 2029 // 2030 // See: https://docs.microsoft.com/en-gb/rest/api/storageservices/put-block 2031 2032 // size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize 2033 // buffers here (default 4MB). With a maximum number of parts (50,000) this will be a file of 2034 // 195GB which seems like a not too unreasonable limit. 2035 if size == -1 { 2036 warnStreamUpload.Do(func() { 2037 fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v", 2038 f.opt.ChunkSize, partSize*fs.SizeSuffix(blockblob.MaxBlocks)) 2039 }) 2040 } else { 2041 partSize = chunksize.Calculator(remote, size, blockblob.MaxBlocks, f.opt.ChunkSize) 2042 if partSize > fs.SizeSuffix(blockblob.MaxStageBlockBytes) { 2043 return info, nil, fmt.Errorf("can't upload as it is too big %v - takes more than %d chunks of %v", fs.SizeSuffix(size), fs.SizeSuffix(blockblob.MaxBlocks), fs.SizeSuffix(blockblob.MaxStageBlockBytes)) 2044 } 2045 totalParts = int(fs.SizeSuffix(size) / partSize) 2046 if fs.SizeSuffix(size)%partSize != 0 { 2047 totalParts++ 2048 } 2049 } 2050 2051 fs.Debugf(o, "Multipart upload session started for %d parts of size %v", totalParts, partSize) 2052 2053 chunkWriter := &azChunkWriter{ 2054 chunkSize: int64(partSize), 2055 size: size, 2056 f: f, 2057 ui: ui, 2058 o: o, 2059 } 2060 info = fs.ChunkWriterInfo{ 2061 ChunkSize: int64(partSize), 2062 Concurrency: o.fs.opt.UploadConcurrency, 2063 //LeavePartsOnError: o.fs.opt.LeavePartsOnError, 2064 } 2065 fs.Debugf(o, "open chunk writer: started multipart upload") 2066 return info, chunkWriter, nil 2067 } 2068 2069 // WriteChunk will write chunk number with reader bytes, where chunk number >= 0 2070 func (w *azChunkWriter) WriteChunk(ctx context.Context, chunkNumber int, reader io.ReadSeeker) (int64, error) { 2071 if chunkNumber < 0 { 2072 err := fmt.Errorf("invalid chunk number provided: %v", chunkNumber) 2073 return -1, err 2074 } 2075 2076 // Upload the block, with MD5 for check 2077 m := md5.New() 2078 currentChunkSize, err := io.Copy(m, reader) 2079 if err != nil { 2080 return -1, err 2081 } 2082 // If no data read, don't write the chunk 2083 if currentChunkSize == 0 { 2084 return 0, nil 2085 } 2086 md5sum := m.Sum(nil) 2087 transactionalMD5 := md5sum[:] 2088 2089 // increment the blockID and save the blocks for finalize 2090 var binaryBlockID [8]byte // block counter as LSB first 8 bytes 2091 binary.LittleEndian.PutUint64(binaryBlockID[:], uint64(chunkNumber)) 2092 blockID := base64.StdEncoding.EncodeToString(binaryBlockID[:]) 2093 2094 // Save the blockID for the commit 2095 w.blocksMu.Lock() 2096 w.blocks = append(w.blocks, azBlock{ 2097 chunkNumber: uint64(chunkNumber), 2098 id: blockID, 2099 }) 2100 w.blocksMu.Unlock() 2101 2102 err = w.f.pacer.Call(func() (bool, error) { 2103 // rewind the reader on retry and after reading md5 2104 _, err = reader.Seek(0, io.SeekStart) 2105 if err != nil { 2106 return false, err 2107 } 2108 options := blockblob.StageBlockOptions{ 2109 // Specify the transactional md5 for the body, to be validated by the service. 2110 TransactionalValidation: blob.TransferValidationTypeMD5(transactionalMD5), 2111 } 2112 _, err = w.ui.blb.StageBlock(ctx, blockID, &readSeekCloser{Reader: reader, Seeker: reader}, &options) 2113 if err != nil { 2114 if chunkNumber <= 8 { 2115 return w.f.shouldRetry(ctx, err) 2116 } 2117 // retry all chunks once have done the first few 2118 return true, err 2119 } 2120 return false, nil 2121 }) 2122 if err != nil { 2123 return -1, fmt.Errorf("failed to upload chunk %d with %v bytes: %w", chunkNumber+1, currentChunkSize, err) 2124 } 2125 2126 fs.Debugf(w.o, "multipart upload wrote chunk %d with %v bytes", chunkNumber+1, currentChunkSize) 2127 return currentChunkSize, err 2128 } 2129 2130 // Abort the multipart upload. 2131 // 2132 // FIXME it would be nice to delete uncommitted blocks. 2133 // 2134 // See: https://github.com/rclone/rclone/issues/5583 2135 // 2136 // However there doesn't seem to be an easy way of doing this other than 2137 // by deleting the target. 2138 // 2139 // This means that a failed upload deletes the target which isn't ideal. 2140 // 2141 // Uploading a zero length blob and deleting it will remove the 2142 // uncommitted blocks I think. 2143 // 2144 // Could check to see if a file exists already and if it doesn't then 2145 // create a 0 length file and delete it to flush the uncommitted 2146 // blocks. 2147 // 2148 // This is what azcopy does 2149 // https://github.com/MicrosoftDocs/azure-docs/issues/36347#issuecomment-541457962 2150 func (w *azChunkWriter) Abort(ctx context.Context) error { 2151 fs.Debugf(w.o, "multipart upload aborted (did nothing - see issue #5583)") 2152 return nil 2153 } 2154 2155 // Close and finalise the multipart upload 2156 func (w *azChunkWriter) Close(ctx context.Context) (err error) { 2157 // sort the completed parts by part number 2158 sort.Slice(w.blocks, func(i, j int) bool { 2159 return w.blocks[i].chunkNumber < w.blocks[j].chunkNumber 2160 }) 2161 2162 // Create and check a list of block IDs 2163 blockIDs := make([]string, len(w.blocks)) 2164 for i := range w.blocks { 2165 if w.blocks[i].chunkNumber != uint64(i) { 2166 return fmt.Errorf("internal error: expecting chunkNumber %d but got %d", i, w.blocks[i].chunkNumber) 2167 } 2168 chunkBytes, err := base64.StdEncoding.DecodeString(w.blocks[i].id) 2169 if err != nil { 2170 return fmt.Errorf("internal error: bad block ID: %w", err) 2171 } 2172 chunkNumber := binary.LittleEndian.Uint64(chunkBytes) 2173 if w.blocks[i].chunkNumber != chunkNumber { 2174 return fmt.Errorf("internal error: expecting decoded chunkNumber %d but got %d", w.blocks[i].chunkNumber, chunkNumber) 2175 } 2176 blockIDs[i] = w.blocks[i].id 2177 } 2178 2179 options := blockblob.CommitBlockListOptions{ 2180 Metadata: w.o.getMetadata(), 2181 Tier: parseTier(w.f.opt.AccessTier), 2182 HTTPHeaders: &w.ui.httpHeaders, 2183 } 2184 2185 // Finalise the upload session 2186 err = w.f.pacer.Call(func() (bool, error) { 2187 _, err := w.ui.blb.CommitBlockList(ctx, blockIDs, &options) 2188 return w.f.shouldRetry(ctx, err) 2189 }) 2190 if err != nil { 2191 return fmt.Errorf("failed to complete multipart upload: %w", err) 2192 } 2193 fs.Debugf(w.o, "multipart upload finished") 2194 return err 2195 } 2196 2197 var warnStreamUpload sync.Once 2198 2199 // uploadMultipart uploads a file using multipart upload 2200 // 2201 // Write a larger blob, using CreateBlockBlob, PutBlock, and PutBlockList. 2202 func (o *Object) uploadMultipart(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (ui uploadInfo, err error) { 2203 chunkWriter, err := multipart.UploadMultipart(ctx, src, in, multipart.UploadMultipartOptions{ 2204 Open: o.fs, 2205 OpenOptions: options, 2206 }) 2207 if err != nil { 2208 return ui, err 2209 } 2210 return chunkWriter.(*azChunkWriter).ui, nil 2211 } 2212 2213 // uploadSinglepart uploads a short blob using a single part upload 2214 func (o *Object) uploadSinglepart(ctx context.Context, in io.Reader, size int64, ui uploadInfo) (err error) { 2215 chunkSize := int64(o.fs.opt.ChunkSize) 2216 // fs.Debugf(o, "Single part upload starting of object %d bytes", size) 2217 if size > chunkSize || size < 0 { 2218 return fmt.Errorf("internal error: single part upload size too big %d > %d", size, chunkSize) 2219 } 2220 2221 rw := multipart.NewRW() 2222 defer fs.CheckClose(rw, &err) 2223 2224 n, err := io.CopyN(rw, in, size+1) 2225 if err != nil && err != io.EOF { 2226 return fmt.Errorf("single part upload read failed: %w", err) 2227 } 2228 if n != size { 2229 return fmt.Errorf("single part upload: expecting to read %d bytes but read %d", size, n) 2230 } 2231 2232 rs := &readSeekCloser{Reader: rw, Seeker: rw} 2233 2234 options := blockblob.UploadOptions{ 2235 Metadata: o.getMetadata(), 2236 Tier: parseTier(o.fs.opt.AccessTier), 2237 HTTPHeaders: &ui.httpHeaders, 2238 } 2239 2240 return o.fs.pacer.Call(func() (bool, error) { 2241 // rewind the reader on retry 2242 _, err = rs.Seek(0, io.SeekStart) 2243 if err != nil { 2244 return false, err 2245 } 2246 _, err = ui.blb.Upload(ctx, rs, &options) 2247 return o.fs.shouldRetry(ctx, err) 2248 }) 2249 } 2250 2251 // Info needed for an upload 2252 type uploadInfo struct { 2253 blb *blockblob.Client 2254 httpHeaders blob.HTTPHeaders 2255 isDirMarker bool 2256 } 2257 2258 // Prepare the object for upload 2259 func (o *Object) prepareUpload(ctx context.Context, src fs.ObjectInfo, options []fs.OpenOption) (ui uploadInfo, err error) { 2260 container, containerPath := o.split() 2261 if container == "" || containerPath == "" { 2262 return ui, fmt.Errorf("can't upload to root - need a container") 2263 } 2264 // Create parent dir/bucket if not saving directory marker 2265 metadataMu.Lock() 2266 _, ui.isDirMarker = o.meta[dirMetaKey] 2267 metadataMu.Unlock() 2268 if !ui.isDirMarker { 2269 err = o.fs.mkdirParent(ctx, o.remote) 2270 if err != nil { 2271 return ui, err 2272 } 2273 } 2274 2275 // Update Mod time 2276 o.updateMetadataWithModTime(src.ModTime(ctx)) 2277 if err != nil { 2278 return ui, err 2279 } 2280 2281 // Create the HTTP headers for the upload 2282 ui.httpHeaders = blob.HTTPHeaders{ 2283 BlobContentType: pString(fs.MimeType(ctx, src)), 2284 } 2285 2286 // Compute the Content-MD5 of the file. As we stream all uploads it 2287 // will be set in PutBlockList API call using the 'x-ms-blob-content-md5' header 2288 if !o.fs.opt.DisableCheckSum { 2289 if sourceMD5, _ := src.Hash(ctx, hash.MD5); sourceMD5 != "" { 2290 sourceMD5bytes, err := hex.DecodeString(sourceMD5) 2291 if err == nil { 2292 ui.httpHeaders.BlobContentMD5 = sourceMD5bytes 2293 } else { 2294 fs.Debugf(o, "Failed to decode %q as MD5: %v", sourceMD5, err) 2295 } 2296 } 2297 } 2298 2299 // Apply upload options (also allows one to overwrite content-type) 2300 for _, option := range options { 2301 key, value := option.Header() 2302 lowerKey := strings.ToLower(key) 2303 switch lowerKey { 2304 case "": 2305 // ignore 2306 case "cache-control": 2307 ui.httpHeaders.BlobCacheControl = pString(value) 2308 case "content-disposition": 2309 ui.httpHeaders.BlobContentDisposition = pString(value) 2310 case "content-encoding": 2311 ui.httpHeaders.BlobContentEncoding = pString(value) 2312 case "content-language": 2313 ui.httpHeaders.BlobContentLanguage = pString(value) 2314 case "content-type": 2315 ui.httpHeaders.BlobContentType = pString(value) 2316 } 2317 } 2318 2319 ui.blb = o.fs.getBlockBlobSVC(container, containerPath) 2320 return ui, nil 2321 } 2322 2323 // Update the object with the contents of the io.Reader, modTime and size 2324 // 2325 // The new object may have been created if an error is returned 2326 func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) { 2327 if o.accessTier == blob.AccessTierArchive { 2328 if o.fs.opt.ArchiveTierDelete { 2329 fs.Debugf(o, "deleting archive tier blob before updating") 2330 err = o.Remove(ctx) 2331 if err != nil { 2332 return fmt.Errorf("failed to delete archive blob before updating: %w", err) 2333 } 2334 } else { 2335 return errCantUpdateArchiveTierBlobs 2336 } 2337 } 2338 2339 size := src.Size() 2340 multipartUpload := size < 0 || size > int64(o.fs.opt.ChunkSize) 2341 var ui uploadInfo 2342 2343 if multipartUpload { 2344 ui, err = o.uploadMultipart(ctx, in, src, options...) 2345 } else { 2346 ui, err = o.prepareUpload(ctx, src, options) 2347 if err != nil { 2348 return fmt.Errorf("failed to prepare upload: %w", err) 2349 } 2350 err = o.uploadSinglepart(ctx, in, size, ui) 2351 } 2352 if err != nil { 2353 return err 2354 } 2355 2356 // Refresh metadata on object 2357 if !ui.isDirMarker { 2358 o.clearMetaData() 2359 err = o.readMetaData(ctx) 2360 if err != nil { 2361 return err 2362 } 2363 } 2364 2365 // If tier is not changed or not specified, do not attempt to invoke `SetBlobTier` operation 2366 if o.fs.opt.AccessTier == string(defaultAccessTier) || o.fs.opt.AccessTier == string(o.AccessTier()) { 2367 return nil 2368 } 2369 2370 // Now, set blob tier based on configured access tier 2371 return o.SetTier(o.fs.opt.AccessTier) 2372 } 2373 2374 // Remove an object 2375 func (o *Object) Remove(ctx context.Context) error { 2376 blb := o.getBlobSVC() 2377 opt := blob.DeleteOptions{} 2378 if o.fs.opt.DeleteSnapshots != "" { 2379 action := blob.DeleteSnapshotsOptionType(o.fs.opt.DeleteSnapshots) 2380 opt.DeleteSnapshots = &action 2381 } 2382 return o.fs.pacer.Call(func() (bool, error) { 2383 _, err := blb.Delete(ctx, &opt) 2384 return o.fs.shouldRetry(ctx, err) 2385 }) 2386 } 2387 2388 // MimeType of an Object if known, "" otherwise 2389 func (o *Object) MimeType(ctx context.Context) string { 2390 return o.mimeType 2391 } 2392 2393 // AccessTier of an object, default is of type none 2394 func (o *Object) AccessTier() blob.AccessTier { 2395 return o.accessTier 2396 } 2397 2398 // SetTier performs changing object tier 2399 func (o *Object) SetTier(tier string) error { 2400 if !validateAccessTier(tier) { 2401 return fmt.Errorf("tier %s not supported by Azure Blob Storage", tier) 2402 } 2403 2404 // Check if current tier already matches with desired tier 2405 if o.GetTier() == tier { 2406 return nil 2407 } 2408 desiredAccessTier := blob.AccessTier(tier) 2409 blb := o.getBlobSVC() 2410 ctx := context.Background() 2411 priority := blob.RehydratePriorityStandard 2412 opt := blob.SetTierOptions{ 2413 RehydratePriority: &priority, 2414 } 2415 err := o.fs.pacer.Call(func() (bool, error) { 2416 _, err := blb.SetTier(ctx, desiredAccessTier, &opt) 2417 return o.fs.shouldRetry(ctx, err) 2418 }) 2419 2420 if err != nil { 2421 return fmt.Errorf("failed to set Blob Tier: %w", err) 2422 } 2423 2424 // Set access tier on local object also, this typically 2425 // gets updated on get blob properties 2426 o.accessTier = desiredAccessTier 2427 fs.Debugf(o, "Successfully changed object tier to %s", tier) 2428 2429 return nil 2430 } 2431 2432 // GetTier returns object tier in azure as string 2433 func (o *Object) GetTier() string { 2434 return string(o.accessTier) 2435 } 2436 2437 func parseTier(tier string) *blob.AccessTier { 2438 if tier == "" { 2439 return nil 2440 } 2441 msTier := blob.AccessTier(tier) 2442 return &msTier 2443 } 2444 2445 // Check the interfaces are satisfied 2446 var ( 2447 _ fs.Fs = &Fs{} 2448 _ fs.Copier = &Fs{} 2449 _ fs.PutStreamer = &Fs{} 2450 _ fs.Purger = &Fs{} 2451 _ fs.ListRer = &Fs{} 2452 _ fs.OpenChunkWriter = &Fs{} 2453 _ fs.Object = &Object{} 2454 _ fs.MimeTyper = &Object{} 2455 _ fs.GetTierer = &Object{} 2456 _ fs.SetTierer = &Object{} 2457 )