github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/backend/azureblob/azureblob.go (about)

     1  // Package azureblob provides an interface to the Microsoft Azure blob object storage system
     2  
     3  // +build !plan9,!solaris
     4  
     5  package azureblob
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/md5"
    11  	"encoding/base64"
    12  	"encoding/hex"
    13  	"fmt"
    14  	"io"
    15  	"net/http"
    16  	"net/url"
    17  	"path"
    18  	"strings"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/Azure/azure-pipeline-go/pipeline"
    23  	"github.com/Azure/azure-storage-blob-go/azblob"
    24  	"github.com/pkg/errors"
    25  	"github.com/rclone/rclone/fs"
    26  	"github.com/rclone/rclone/fs/accounting"
    27  	"github.com/rclone/rclone/fs/config"
    28  	"github.com/rclone/rclone/fs/config/configmap"
    29  	"github.com/rclone/rclone/fs/config/configstruct"
    30  	"github.com/rclone/rclone/fs/fserrors"
    31  	"github.com/rclone/rclone/fs/fshttp"
    32  	"github.com/rclone/rclone/fs/hash"
    33  	"github.com/rclone/rclone/fs/walk"
    34  	"github.com/rclone/rclone/lib/bucket"
    35  	"github.com/rclone/rclone/lib/encoder"
    36  	"github.com/rclone/rclone/lib/pacer"
    37  	"github.com/rclone/rclone/lib/pool"
    38  	"github.com/rclone/rclone/lib/readers"
    39  	"golang.org/x/sync/errgroup"
    40  )
    41  
    42  const (
    43  	minSleep              = 10 * time.Millisecond
    44  	maxSleep              = 10 * time.Second
    45  	decayConstant         = 1    // bigger for slower decay, exponential
    46  	maxListChunkSize      = 5000 // number of items to read at once
    47  	modTimeKey            = "mtime"
    48  	timeFormatIn          = time.RFC3339
    49  	timeFormatOut         = "2006-01-02T15:04:05.000000000Z07:00"
    50  	maxTotalParts         = 50000 // in multipart upload
    51  	storageDefaultBaseURL = "blob.core.windows.net"
    52  	// maxUncommittedSize = 9 << 30 // can't upload bigger than this
    53  	defaultChunkSize    = 4 * fs.MebiByte
    54  	maxChunkSize        = 100 * fs.MebiByte
    55  	defaultUploadCutoff = 256 * fs.MebiByte
    56  	maxUploadCutoff     = 256 * fs.MebiByte
    57  	defaultAccessTier   = azblob.AccessTierNone
    58  	maxTryTimeout       = time.Hour * 24 * 365 //max time of an azure web request response window (whether or not data is flowing)
    59  	// Default storage account, key and blob endpoint for emulator support,
    60  	// though it is a base64 key checked in here, it is publicly available secret.
    61  	emulatorAccount      = "devstoreaccount1"
    62  	emulatorAccountKey   = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
    63  	emulatorBlobEndpoint = "http://127.0.0.1:10000/devstoreaccount1"
    64  	memoryPoolFlushTime  = fs.Duration(time.Minute) // flush the cached buffers after this long
    65  	memoryPoolUseMmap    = false
    66  )
    67  
    68  // Register with Fs
    69  func init() {
    70  	fs.Register(&fs.RegInfo{
    71  		Name:        "azureblob",
    72  		Description: "Microsoft Azure Blob Storage",
    73  		NewFs:       NewFs,
    74  		Options: []fs.Option{{
    75  			Name: "account",
    76  			Help: "Storage Account Name (leave blank to use SAS URL or Emulator)",
    77  		}, {
    78  			Name: "key",
    79  			Help: "Storage Account Key (leave blank to use SAS URL or Emulator)",
    80  		}, {
    81  			Name: "sas_url",
    82  			Help: "SAS URL for container level access only\n(leave blank if using account/key or Emulator)",
    83  		}, {
    84  			Name:    "use_emulator",
    85  			Help:    "Uses local storage emulator if provided as 'true' (leave blank if using real azure storage endpoint)",
    86  			Default: false,
    87  		}, {
    88  			Name:     "endpoint",
    89  			Help:     "Endpoint for the service\nLeave blank normally.",
    90  			Advanced: true,
    91  		}, {
    92  			Name:     "upload_cutoff",
    93  			Help:     "Cutoff for switching to chunked upload (<= 256MB).",
    94  			Default:  defaultUploadCutoff,
    95  			Advanced: true,
    96  		}, {
    97  			Name: "chunk_size",
    98  			Help: `Upload chunk size (<= 100MB).
    99  
   100  Note that this is stored in memory and there may be up to
   101  "--transfers" chunks stored at once in memory.`,
   102  			Default:  defaultChunkSize,
   103  			Advanced: true,
   104  		}, {
   105  			Name: "list_chunk",
   106  			Help: `Size of blob list.
   107  
   108  This sets the number of blobs requested in each listing chunk. Default
   109  is the maximum, 5000. "List blobs" requests are permitted 2 minutes
   110  per megabyte to complete. If an operation is taking longer than 2
   111  minutes per megabyte on average, it will time out (
   112  [source](https://docs.microsoft.com/en-us/rest/api/storageservices/setting-timeouts-for-blob-service-operations#exceptions-to-default-timeout-interval)
   113  ). This can be used to limit the number of blobs items to return, to
   114  avoid the time out.`,
   115  			Default:  maxListChunkSize,
   116  			Advanced: true,
   117  		}, {
   118  			Name: "access_tier",
   119  			Help: `Access tier of blob: hot, cool or archive.
   120  
   121  Archived blobs can be restored by setting access tier to hot or
   122  cool. Leave blank if you intend to use default access tier, which is
   123  set at account level
   124  
   125  If there is no "access tier" specified, rclone doesn't apply any tier.
   126  rclone performs "Set Tier" operation on blobs while uploading, if objects
   127  are not modified, specifying "access tier" to new one will have no effect.
   128  If blobs are in "archive tier" at remote, trying to perform data transfer
   129  operations from remote will not be allowed. User should first restore by
   130  tiering blob to "Hot" or "Cool".`,
   131  			Advanced: true,
   132  		}, {
   133  			Name: "disable_checksum",
   134  			Help: `Don't store MD5 checksum with object metadata.
   135  
   136  Normally rclone will calculate the MD5 checksum of the input before
   137  uploading it so it can add it to metadata on the object. This is great
   138  for data integrity checking but can cause long delays for large files
   139  to start uploading.`,
   140  			Default:  false,
   141  			Advanced: true,
   142  		}, {
   143  			Name:     "memory_pool_flush_time",
   144  			Default:  memoryPoolFlushTime,
   145  			Advanced: true,
   146  			Help: `How often internal memory buffer pools will be flushed.
   147  Uploads which requires additional buffers (f.e multipart) will use memory pool for allocations.
   148  This option controls how often unused buffers will be removed from the pool.`,
   149  		}, {
   150  			Name:     "memory_pool_use_mmap",
   151  			Default:  memoryPoolUseMmap,
   152  			Advanced: true,
   153  			Help:     `Whether to use mmap buffers in internal memory pool.`,
   154  		}, {
   155  			Name:     config.ConfigEncoding,
   156  			Help:     config.ConfigEncodingHelp,
   157  			Advanced: true,
   158  			Default: (encoder.EncodeInvalidUtf8 |
   159  				encoder.EncodeSlash |
   160  				encoder.EncodeCtl |
   161  				encoder.EncodeDel |
   162  				encoder.EncodeBackSlash |
   163  				encoder.EncodeRightPeriod),
   164  		}},
   165  	})
   166  }
   167  
   168  // Options defines the configuration for this backend
   169  type Options struct {
   170  	Account             string               `config:"account"`
   171  	Key                 string               `config:"key"`
   172  	Endpoint            string               `config:"endpoint"`
   173  	SASURL              string               `config:"sas_url"`
   174  	UploadCutoff        fs.SizeSuffix        `config:"upload_cutoff"`
   175  	ChunkSize           fs.SizeSuffix        `config:"chunk_size"`
   176  	ListChunkSize       uint                 `config:"list_chunk"`
   177  	AccessTier          string               `config:"access_tier"`
   178  	UseEmulator         bool                 `config:"use_emulator"`
   179  	DisableCheckSum     bool                 `config:"disable_checksum"`
   180  	MemoryPoolFlushTime fs.Duration          `config:"memory_pool_flush_time"`
   181  	MemoryPoolUseMmap   bool                 `config:"memory_pool_use_mmap"`
   182  	Enc                 encoder.MultiEncoder `config:"encoding"`
   183  }
   184  
   185  // Fs represents a remote azure server
   186  type Fs struct {
   187  	name          string                          // name of this remote
   188  	root          string                          // the path we are working on if any
   189  	opt           Options                         // parsed config options
   190  	features      *fs.Features                    // optional features
   191  	client        *http.Client                    // http client we are using
   192  	svcURL        *azblob.ServiceURL              // reference to serviceURL
   193  	cntURLcacheMu sync.Mutex                      // mutex to protect cntURLcache
   194  	cntURLcache   map[string]*azblob.ContainerURL // reference to containerURL per container
   195  	rootContainer string                          // container part of root (if any)
   196  	rootDirectory string                          // directory part of root (if any)
   197  	isLimited     bool                            // if limited to one container
   198  	cache         *bucket.Cache                   // cache for container creation status
   199  	pacer         *fs.Pacer                       // To pace and retry the API calls
   200  	uploadToken   *pacer.TokenDispenser           // control concurrency
   201  	pool          *pool.Pool                      // memory pool
   202  }
   203  
   204  // Object describes an azure object
   205  type Object struct {
   206  	fs         *Fs                   // what this object is part of
   207  	remote     string                // The remote path
   208  	modTime    time.Time             // The modified time of the object if known
   209  	md5        string                // MD5 hash if known
   210  	size       int64                 // Size of the object
   211  	mimeType   string                // Content-Type of the object
   212  	accessTier azblob.AccessTierType // Blob Access Tier
   213  	meta       map[string]string     // blob metadata
   214  }
   215  
   216  // ------------------------------------------------------------
   217  
   218  // Name of the remote (as passed into NewFs)
   219  func (f *Fs) Name() string {
   220  	return f.name
   221  }
   222  
   223  // Root of the remote (as passed into NewFs)
   224  func (f *Fs) Root() string {
   225  	return f.root
   226  }
   227  
   228  // String converts this Fs to a string
   229  func (f *Fs) String() string {
   230  	if f.rootContainer == "" {
   231  		return "Azure root"
   232  	}
   233  	if f.rootDirectory == "" {
   234  		return fmt.Sprintf("Azure container %s", f.rootContainer)
   235  	}
   236  	return fmt.Sprintf("Azure container %s path %s", f.rootContainer, f.rootDirectory)
   237  }
   238  
   239  // Features returns the optional features of this Fs
   240  func (f *Fs) Features() *fs.Features {
   241  	return f.features
   242  }
   243  
   244  // parsePath parses a remote 'url'
   245  func parsePath(path string) (root string) {
   246  	root = strings.Trim(path, "/")
   247  	return
   248  }
   249  
   250  // split returns container and containerPath from the rootRelativePath
   251  // relative to f.root
   252  func (f *Fs) split(rootRelativePath string) (containerName, containerPath string) {
   253  	containerName, containerPath = bucket.Split(path.Join(f.root, rootRelativePath))
   254  	return f.opt.Enc.FromStandardName(containerName), f.opt.Enc.FromStandardPath(containerPath)
   255  }
   256  
   257  // split returns container and containerPath from the object
   258  func (o *Object) split() (container, containerPath string) {
   259  	return o.fs.split(o.remote)
   260  }
   261  
   262  // validateAccessTier checks if azureblob supports user supplied tier
   263  func validateAccessTier(tier string) bool {
   264  	switch tier {
   265  	case string(azblob.AccessTierHot),
   266  		string(azblob.AccessTierCool),
   267  		string(azblob.AccessTierArchive):
   268  		// valid cases
   269  		return true
   270  	default:
   271  		return false
   272  	}
   273  }
   274  
   275  // retryErrorCodes is a slice of error codes that we will retry
   276  var retryErrorCodes = []int{
   277  	401, // Unauthorized (eg "Token has expired")
   278  	408, // Request Timeout
   279  	429, // Rate exceeded.
   280  	500, // Get occasional 500 Internal Server Error
   281  	503, // Service Unavailable
   282  	504, // Gateway Time-out
   283  }
   284  
   285  // shouldRetry returns a boolean as to whether this resp and err
   286  // deserve to be retried.  It returns the err as a convenience
   287  func (f *Fs) shouldRetry(err error) (bool, error) {
   288  	// FIXME interpret special errors - more to do here
   289  	if storageErr, ok := err.(azblob.StorageError); ok {
   290  		switch storageErr.ServiceCode() {
   291  		case "InvalidBlobOrBlock":
   292  			// These errors happen sometimes in multipart uploads
   293  			// because of block concurrency issues
   294  			return true, err
   295  		}
   296  		statusCode := storageErr.Response().StatusCode
   297  		for _, e := range retryErrorCodes {
   298  			if statusCode == e {
   299  				return true, err
   300  			}
   301  		}
   302  	}
   303  	return fserrors.ShouldRetry(err), err
   304  }
   305  
   306  func checkUploadChunkSize(cs fs.SizeSuffix) error {
   307  	const minChunkSize = fs.Byte
   308  	if cs < minChunkSize {
   309  		return errors.Errorf("%s is less than %s", cs, minChunkSize)
   310  	}
   311  	if cs > maxChunkSize {
   312  		return errors.Errorf("%s is greater than %s", cs, maxChunkSize)
   313  	}
   314  	return nil
   315  }
   316  
   317  func (f *Fs) setUploadChunkSize(cs fs.SizeSuffix) (old fs.SizeSuffix, err error) {
   318  	err = checkUploadChunkSize(cs)
   319  	if err == nil {
   320  		old, f.opt.ChunkSize = f.opt.ChunkSize, cs
   321  	}
   322  	return
   323  }
   324  
   325  func checkUploadCutoff(cs fs.SizeSuffix) error {
   326  	if cs > maxUploadCutoff {
   327  		return errors.Errorf("%v must be less than or equal to %v", cs, maxUploadCutoff)
   328  	}
   329  	return nil
   330  }
   331  
   332  func (f *Fs) setUploadCutoff(cs fs.SizeSuffix) (old fs.SizeSuffix, err error) {
   333  	err = checkUploadCutoff(cs)
   334  	if err == nil {
   335  		old, f.opt.UploadCutoff = f.opt.UploadCutoff, cs
   336  	}
   337  	return
   338  }
   339  
   340  // httpClientFactory creates a Factory object that sends HTTP requests
   341  // to an rclone's http.Client.
   342  //
   343  // copied from azblob.newDefaultHTTPClientFactory
   344  func httpClientFactory(client *http.Client) pipeline.Factory {
   345  	return pipeline.FactoryFunc(func(next pipeline.Policy, po *pipeline.PolicyOptions) pipeline.PolicyFunc {
   346  		return func(ctx context.Context, request pipeline.Request) (pipeline.Response, error) {
   347  			r, err := client.Do(request.WithContext(ctx))
   348  			if err != nil {
   349  				err = pipeline.NewError(err, "HTTP request failed")
   350  			}
   351  			return pipeline.NewHTTPResponse(r), err
   352  		}
   353  	})
   354  }
   355  
   356  // newPipeline creates a Pipeline using the specified credentials and options.
   357  //
   358  // this code was copied from azblob.NewPipeline
   359  func (f *Fs) newPipeline(c azblob.Credential, o azblob.PipelineOptions) pipeline.Pipeline {
   360  	// Don't log stuff to syslog/Windows Event log
   361  	pipeline.SetForceLogEnabled(false)
   362  
   363  	// Closest to API goes first; closest to the wire goes last
   364  	factories := []pipeline.Factory{
   365  		azblob.NewTelemetryPolicyFactory(o.Telemetry),
   366  		azblob.NewUniqueRequestIDPolicyFactory(),
   367  		azblob.NewRetryPolicyFactory(o.Retry),
   368  		c,
   369  		pipeline.MethodFactoryMarker(), // indicates at what stage in the pipeline the method factory is invoked
   370  		azblob.NewRequestLogPolicyFactory(o.RequestLog),
   371  	}
   372  	return pipeline.NewPipeline(factories, pipeline.Options{HTTPSender: httpClientFactory(f.client), Log: o.Log})
   373  }
   374  
   375  // setRoot changes the root of the Fs
   376  func (f *Fs) setRoot(root string) {
   377  	f.root = parsePath(root)
   378  	f.rootContainer, f.rootDirectory = bucket.Split(f.root)
   379  }
   380  
   381  // NewFs constructs an Fs from the path, container:path
   382  func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
   383  	ctx := context.Background()
   384  	// Parse config into Options struct
   385  	opt := new(Options)
   386  	err := configstruct.Set(m, opt)
   387  	if err != nil {
   388  		return nil, err
   389  	}
   390  
   391  	err = checkUploadCutoff(opt.UploadCutoff)
   392  	if err != nil {
   393  		return nil, errors.Wrap(err, "azure: upload cutoff")
   394  	}
   395  	err = checkUploadChunkSize(opt.ChunkSize)
   396  	if err != nil {
   397  		return nil, errors.Wrap(err, "azure: chunk size")
   398  	}
   399  	if opt.ListChunkSize > maxListChunkSize {
   400  		return nil, errors.Errorf("azure: blob list size can't be greater than %v - was %v", maxListChunkSize, opt.ListChunkSize)
   401  	}
   402  	if opt.Endpoint == "" {
   403  		opt.Endpoint = storageDefaultBaseURL
   404  	}
   405  
   406  	if opt.AccessTier == "" {
   407  		opt.AccessTier = string(defaultAccessTier)
   408  	} else if !validateAccessTier(opt.AccessTier) {
   409  		return nil, errors.Errorf("Azure Blob: Supported access tiers are %s, %s and %s",
   410  			string(azblob.AccessTierHot), string(azblob.AccessTierCool), string(azblob.AccessTierArchive))
   411  	}
   412  
   413  	f := &Fs{
   414  		name:        name,
   415  		opt:         *opt,
   416  		pacer:       fs.NewPacer(pacer.NewS3(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))),
   417  		uploadToken: pacer.NewTokenDispenser(fs.Config.Transfers),
   418  		client:      fshttp.NewClient(fs.Config),
   419  		cache:       bucket.NewCache(),
   420  		cntURLcache: make(map[string]*azblob.ContainerURL, 1),
   421  		pool: pool.New(
   422  			time.Duration(opt.MemoryPoolFlushTime),
   423  			int(opt.ChunkSize),
   424  			fs.Config.Transfers,
   425  			opt.MemoryPoolUseMmap,
   426  		),
   427  	}
   428  	f.setRoot(root)
   429  	f.features = (&fs.Features{
   430  		ReadMimeType:      true,
   431  		WriteMimeType:     true,
   432  		BucketBased:       true,
   433  		BucketBasedRootOK: true,
   434  		SetTier:           true,
   435  		GetTier:           true,
   436  	}).Fill(f)
   437  
   438  	var (
   439  		u          *url.URL
   440  		serviceURL azblob.ServiceURL
   441  	)
   442  	switch {
   443  	case opt.UseEmulator:
   444  		credential, err := azblob.NewSharedKeyCredential(emulatorAccount, emulatorAccountKey)
   445  		if err != nil {
   446  			return nil, errors.Wrapf(err, "Failed to parse credentials")
   447  		}
   448  		u, err = url.Parse(emulatorBlobEndpoint)
   449  		if err != nil {
   450  			return nil, errors.Wrap(err, "failed to make azure storage url from account and endpoint")
   451  		}
   452  		pipeline := f.newPipeline(credential, azblob.PipelineOptions{Retry: azblob.RetryOptions{TryTimeout: maxTryTimeout}})
   453  		serviceURL = azblob.NewServiceURL(*u, pipeline)
   454  	case opt.Account != "" && opt.Key != "":
   455  		credential, err := azblob.NewSharedKeyCredential(opt.Account, opt.Key)
   456  		if err != nil {
   457  			return nil, errors.Wrapf(err, "Failed to parse credentials")
   458  		}
   459  
   460  		u, err = url.Parse(fmt.Sprintf("https://%s.%s", opt.Account, opt.Endpoint))
   461  		if err != nil {
   462  			return nil, errors.Wrap(err, "failed to make azure storage url from account and endpoint")
   463  		}
   464  		pipeline := f.newPipeline(credential, azblob.PipelineOptions{Retry: azblob.RetryOptions{TryTimeout: maxTryTimeout}})
   465  		serviceURL = azblob.NewServiceURL(*u, pipeline)
   466  	case opt.SASURL != "":
   467  		u, err = url.Parse(opt.SASURL)
   468  		if err != nil {
   469  			return nil, errors.Wrapf(err, "failed to parse SAS URL")
   470  		}
   471  		// use anonymous credentials in case of sas url
   472  		pipeline := f.newPipeline(azblob.NewAnonymousCredential(), azblob.PipelineOptions{Retry: azblob.RetryOptions{TryTimeout: maxTryTimeout}})
   473  		// Check if we have container level SAS or account level sas
   474  		parts := azblob.NewBlobURLParts(*u)
   475  		if parts.ContainerName != "" {
   476  			if f.rootContainer != "" && parts.ContainerName != f.rootContainer {
   477  				return nil, errors.New("Container name in SAS URL and container provided in command do not match")
   478  			}
   479  			containerURL := azblob.NewContainerURL(*u, pipeline)
   480  			f.cntURLcache[parts.ContainerName] = &containerURL
   481  			f.isLimited = true
   482  		} else {
   483  			serviceURL = azblob.NewServiceURL(*u, pipeline)
   484  		}
   485  	default:
   486  		return nil, errors.New("Need account+key or connectionString or sasURL")
   487  	}
   488  	f.svcURL = &serviceURL
   489  
   490  	if f.rootContainer != "" && f.rootDirectory != "" {
   491  		// Check to see if the (container,directory) is actually an existing file
   492  		oldRoot := f.root
   493  		newRoot, leaf := path.Split(oldRoot)
   494  		f.setRoot(newRoot)
   495  		_, err := f.NewObject(ctx, leaf)
   496  		if err != nil {
   497  			if err == fs.ErrorObjectNotFound || err == fs.ErrorNotAFile {
   498  				// File doesn't exist or is a directory so return old f
   499  				f.setRoot(oldRoot)
   500  				return f, nil
   501  			}
   502  			return nil, err
   503  		}
   504  		// return an error with an fs which points to the parent
   505  		return f, fs.ErrorIsFile
   506  	}
   507  	return f, nil
   508  }
   509  
   510  // return the container URL for the container passed in
   511  func (f *Fs) cntURL(container string) (containerURL *azblob.ContainerURL) {
   512  	f.cntURLcacheMu.Lock()
   513  	defer f.cntURLcacheMu.Unlock()
   514  	var ok bool
   515  	if containerURL, ok = f.cntURLcache[container]; !ok {
   516  		cntURL := f.svcURL.NewContainerURL(container)
   517  		containerURL = &cntURL
   518  		f.cntURLcache[container] = containerURL
   519  	}
   520  	return containerURL
   521  
   522  }
   523  
   524  // Return an Object from a path
   525  //
   526  // If it can't be found it returns the error fs.ErrorObjectNotFound.
   527  func (f *Fs) newObjectWithInfo(remote string, info *azblob.BlobItem) (fs.Object, error) {
   528  	o := &Object{
   529  		fs:     f,
   530  		remote: remote,
   531  	}
   532  	if info != nil {
   533  		err := o.decodeMetaDataFromBlob(info)
   534  		if err != nil {
   535  			return nil, err
   536  		}
   537  	} else {
   538  		err := o.readMetaData() // reads info and headers, returning an error
   539  		if err != nil {
   540  			return nil, err
   541  		}
   542  	}
   543  	return o, nil
   544  }
   545  
   546  // NewObject finds the Object at remote.  If it can't be found
   547  // it returns the error fs.ErrorObjectNotFound.
   548  func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
   549  	return f.newObjectWithInfo(remote, nil)
   550  }
   551  
   552  // getBlobReference creates an empty blob reference with no metadata
   553  func (f *Fs) getBlobReference(container, containerPath string) azblob.BlobURL {
   554  	return f.cntURL(container).NewBlobURL(containerPath)
   555  }
   556  
   557  // updateMetadataWithModTime adds the modTime passed in to o.meta.
   558  func (o *Object) updateMetadataWithModTime(modTime time.Time) {
   559  	// Make sure o.meta is not nil
   560  	if o.meta == nil {
   561  		o.meta = make(map[string]string, 1)
   562  	}
   563  
   564  	// Set modTimeKey in it
   565  	o.meta[modTimeKey] = modTime.Format(timeFormatOut)
   566  }
   567  
   568  // Returns whether file is a directory marker or not
   569  func isDirectoryMarker(size int64, metadata azblob.Metadata, remote string) bool {
   570  	// Directory markers are 0 length
   571  	if size == 0 {
   572  		// Note that metadata with hdi_isfolder = true seems to be a
   573  		// defacto standard for marking blobs as directories.
   574  		endsWithSlash := strings.HasSuffix(remote, "/")
   575  		if endsWithSlash || remote == "" || metadata["hdi_isfolder"] == "true" {
   576  			return true
   577  		}
   578  
   579  	}
   580  	return false
   581  }
   582  
   583  // listFn is called from list to handle an object
   584  type listFn func(remote string, object *azblob.BlobItem, isDirectory bool) error
   585  
   586  // list lists the objects into the function supplied from
   587  // the container and root supplied
   588  //
   589  // dir is the starting directory, "" for root
   590  //
   591  // The remote has prefix removed from it and if addContainer is set then
   592  // it adds the container to the start.
   593  func (f *Fs) list(ctx context.Context, container, directory, prefix string, addContainer bool, recurse bool, maxResults uint, fn listFn) error {
   594  	if f.cache.IsDeleted(container) {
   595  		return fs.ErrorDirNotFound
   596  	}
   597  	if prefix != "" {
   598  		prefix += "/"
   599  	}
   600  	if directory != "" {
   601  		directory += "/"
   602  	}
   603  	delimiter := ""
   604  	if !recurse {
   605  		delimiter = "/"
   606  	}
   607  
   608  	options := azblob.ListBlobsSegmentOptions{
   609  		Details: azblob.BlobListingDetails{
   610  			Copy:             false,
   611  			Metadata:         true,
   612  			Snapshots:        false,
   613  			UncommittedBlobs: false,
   614  			Deleted:          false,
   615  		},
   616  		Prefix:     directory,
   617  		MaxResults: int32(maxResults),
   618  	}
   619  	for marker := (azblob.Marker{}); marker.NotDone(); {
   620  		var response *azblob.ListBlobsHierarchySegmentResponse
   621  		err := f.pacer.Call(func() (bool, error) {
   622  			var err error
   623  			response, err = f.cntURL(container).ListBlobsHierarchySegment(ctx, marker, delimiter, options)
   624  			return f.shouldRetry(err)
   625  		})
   626  
   627  		if err != nil {
   628  			// Check http error code along with service code, current SDK doesn't populate service code correctly sometimes
   629  			if storageErr, ok := err.(azblob.StorageError); ok && (storageErr.ServiceCode() == azblob.ServiceCodeContainerNotFound || storageErr.Response().StatusCode == http.StatusNotFound) {
   630  				return fs.ErrorDirNotFound
   631  			}
   632  			return err
   633  		}
   634  		// Advance marker to next
   635  		marker = response.NextMarker
   636  		for i := range response.Segment.BlobItems {
   637  			file := &response.Segment.BlobItems[i]
   638  			// Finish if file name no longer has prefix
   639  			// if prefix != "" && !strings.HasPrefix(file.Name, prefix) {
   640  			// 	return nil
   641  			// }
   642  			remote := f.opt.Enc.ToStandardPath(file.Name)
   643  			if !strings.HasPrefix(remote, prefix) {
   644  				fs.Debugf(f, "Odd name received %q", remote)
   645  				continue
   646  			}
   647  			remote = remote[len(prefix):]
   648  			if isDirectoryMarker(*file.Properties.ContentLength, file.Metadata, remote) {
   649  				continue // skip directory marker
   650  			}
   651  			if addContainer {
   652  				remote = path.Join(container, remote)
   653  			}
   654  			// Send object
   655  			err = fn(remote, file, false)
   656  			if err != nil {
   657  				return err
   658  			}
   659  		}
   660  		// Send the subdirectories
   661  		for _, remote := range response.Segment.BlobPrefixes {
   662  			remote := strings.TrimRight(remote.Name, "/")
   663  			remote = f.opt.Enc.ToStandardPath(remote)
   664  			if !strings.HasPrefix(remote, prefix) {
   665  				fs.Debugf(f, "Odd directory name received %q", remote)
   666  				continue
   667  			}
   668  			remote = remote[len(prefix):]
   669  			if addContainer {
   670  				remote = path.Join(container, remote)
   671  			}
   672  			// Send object
   673  			err = fn(remote, nil, true)
   674  			if err != nil {
   675  				return err
   676  			}
   677  		}
   678  	}
   679  	return nil
   680  }
   681  
   682  // Convert a list item into a DirEntry
   683  func (f *Fs) itemToDirEntry(remote string, object *azblob.BlobItem, isDirectory bool) (fs.DirEntry, error) {
   684  	if isDirectory {
   685  		d := fs.NewDir(remote, time.Time{})
   686  		return d, nil
   687  	}
   688  	o, err := f.newObjectWithInfo(remote, object)
   689  	if err != nil {
   690  		return nil, err
   691  	}
   692  	return o, nil
   693  }
   694  
   695  // listDir lists a single directory
   696  func (f *Fs) listDir(ctx context.Context, container, directory, prefix string, addContainer bool) (entries fs.DirEntries, err error) {
   697  	err = f.list(ctx, container, directory, prefix, addContainer, false, f.opt.ListChunkSize, func(remote string, object *azblob.BlobItem, isDirectory bool) error {
   698  		entry, err := f.itemToDirEntry(remote, object, isDirectory)
   699  		if err != nil {
   700  			return err
   701  		}
   702  		if entry != nil {
   703  			entries = append(entries, entry)
   704  		}
   705  		return nil
   706  	})
   707  	if err != nil {
   708  		return nil, err
   709  	}
   710  	// container must be present if listing succeeded
   711  	f.cache.MarkOK(container)
   712  	return entries, nil
   713  }
   714  
   715  // listContainers returns all the containers to out
   716  func (f *Fs) listContainers(ctx context.Context) (entries fs.DirEntries, err error) {
   717  	if f.isLimited {
   718  		f.cntURLcacheMu.Lock()
   719  		for container := range f.cntURLcache {
   720  			d := fs.NewDir(container, time.Time{})
   721  			entries = append(entries, d)
   722  		}
   723  		f.cntURLcacheMu.Unlock()
   724  		return entries, nil
   725  	}
   726  	err = f.listContainersToFn(func(container *azblob.ContainerItem) error {
   727  		d := fs.NewDir(f.opt.Enc.ToStandardName(container.Name), container.Properties.LastModified)
   728  		f.cache.MarkOK(container.Name)
   729  		entries = append(entries, d)
   730  		return nil
   731  	})
   732  	if err != nil {
   733  		return nil, err
   734  	}
   735  	return entries, nil
   736  }
   737  
   738  // List the objects and directories in dir into entries.  The
   739  // entries can be returned in any order but should be for a
   740  // complete directory.
   741  //
   742  // dir should be "" to list the root, and should not have
   743  // trailing slashes.
   744  //
   745  // This should return ErrDirNotFound if the directory isn't
   746  // found.
   747  func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
   748  	container, directory := f.split(dir)
   749  	if container == "" {
   750  		if directory != "" {
   751  			return nil, fs.ErrorListBucketRequired
   752  		}
   753  		return f.listContainers(ctx)
   754  	}
   755  	return f.listDir(ctx, container, directory, f.rootDirectory, f.rootContainer == "")
   756  }
   757  
   758  // ListR lists the objects and directories of the Fs starting
   759  // from dir recursively into out.
   760  //
   761  // dir should be "" to start from the root, and should not
   762  // have trailing slashes.
   763  //
   764  // This should return ErrDirNotFound if the directory isn't
   765  // found.
   766  //
   767  // It should call callback for each tranche of entries read.
   768  // These need not be returned in any particular order.  If
   769  // callback returns an error then the listing will stop
   770  // immediately.
   771  //
   772  // Don't implement this unless you have a more efficient way
   773  // of listing recursively that doing a directory traversal.
   774  func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
   775  	container, directory := f.split(dir)
   776  	list := walk.NewListRHelper(callback)
   777  	listR := func(container, directory, prefix string, addContainer bool) error {
   778  		return f.list(ctx, container, directory, prefix, addContainer, true, f.opt.ListChunkSize, func(remote string, object *azblob.BlobItem, isDirectory bool) error {
   779  			entry, err := f.itemToDirEntry(remote, object, isDirectory)
   780  			if err != nil {
   781  				return err
   782  			}
   783  			return list.Add(entry)
   784  		})
   785  	}
   786  	if container == "" {
   787  		entries, err := f.listContainers(ctx)
   788  		if err != nil {
   789  			return err
   790  		}
   791  		for _, entry := range entries {
   792  			err = list.Add(entry)
   793  			if err != nil {
   794  				return err
   795  			}
   796  			container := entry.Remote()
   797  			err = listR(container, "", f.rootDirectory, true)
   798  			if err != nil {
   799  				return err
   800  			}
   801  			// container must be present if listing succeeded
   802  			f.cache.MarkOK(container)
   803  		}
   804  	} else {
   805  		err = listR(container, directory, f.rootDirectory, f.rootContainer == "")
   806  		if err != nil {
   807  			return err
   808  		}
   809  		// container must be present if listing succeeded
   810  		f.cache.MarkOK(container)
   811  	}
   812  	return list.Flush()
   813  }
   814  
   815  // listContainerFn is called from listContainersToFn to handle a container
   816  type listContainerFn func(*azblob.ContainerItem) error
   817  
   818  // listContainersToFn lists the containers to the function supplied
   819  func (f *Fs) listContainersToFn(fn listContainerFn) error {
   820  	params := azblob.ListContainersSegmentOptions{
   821  		MaxResults: int32(f.opt.ListChunkSize),
   822  	}
   823  	ctx := context.Background()
   824  	for marker := (azblob.Marker{}); marker.NotDone(); {
   825  		var response *azblob.ListContainersSegmentResponse
   826  		err := f.pacer.Call(func() (bool, error) {
   827  			var err error
   828  			response, err = f.svcURL.ListContainersSegment(ctx, marker, params)
   829  			return f.shouldRetry(err)
   830  		})
   831  		if err != nil {
   832  			return err
   833  		}
   834  
   835  		for i := range response.ContainerItems {
   836  			err = fn(&response.ContainerItems[i])
   837  			if err != nil {
   838  				return err
   839  			}
   840  		}
   841  		marker = response.NextMarker
   842  	}
   843  
   844  	return nil
   845  }
   846  
   847  // Put the object into the container
   848  //
   849  // Copy the reader in to the new object which is returned
   850  //
   851  // The new object may have been created if an error is returned
   852  func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
   853  	// Temporary Object under construction
   854  	fs := &Object{
   855  		fs:     f,
   856  		remote: src.Remote(),
   857  	}
   858  	return fs, fs.Update(ctx, in, src, options...)
   859  }
   860  
   861  // PutStream uploads to the remote path with the modTime given of indeterminate size
   862  func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
   863  	return f.Put(ctx, in, src, options...)
   864  }
   865  
   866  // Mkdir creates the container if it doesn't exist
   867  func (f *Fs) Mkdir(ctx context.Context, dir string) error {
   868  	container, _ := f.split(dir)
   869  	return f.makeContainer(ctx, container)
   870  }
   871  
   872  // makeContainer creates the container if it doesn't exist
   873  func (f *Fs) makeContainer(ctx context.Context, container string) error {
   874  	return f.cache.Create(container, func() error {
   875  		// If this is a SAS URL limited to a container then assume it is already created
   876  		if f.isLimited {
   877  			return nil
   878  		}
   879  		// now try to create the container
   880  		return f.pacer.Call(func() (bool, error) {
   881  			_, err := f.cntURL(container).Create(ctx, azblob.Metadata{}, azblob.PublicAccessNone)
   882  			if err != nil {
   883  				if storageErr, ok := err.(azblob.StorageError); ok {
   884  					switch storageErr.ServiceCode() {
   885  					case azblob.ServiceCodeContainerAlreadyExists:
   886  						return false, nil
   887  					case azblob.ServiceCodeContainerBeingDeleted:
   888  						// From https://docs.microsoft.com/en-us/rest/api/storageservices/delete-container
   889  						// When a container is deleted, a container with the same name cannot be created
   890  						// for at least 30 seconds; the container may not be available for more than 30
   891  						// seconds if the service is still processing the request.
   892  						time.Sleep(6 * time.Second) // default 10 retries will be 60 seconds
   893  						f.cache.MarkDeleted(container)
   894  						return true, err
   895  					}
   896  				}
   897  			}
   898  			return f.shouldRetry(err)
   899  		})
   900  	}, nil)
   901  }
   902  
   903  // isEmpty checks to see if a given (container, directory) is empty and returns an error if not
   904  func (f *Fs) isEmpty(ctx context.Context, container, directory string) (err error) {
   905  	empty := true
   906  	err = f.list(ctx, container, directory, f.rootDirectory, f.rootContainer == "", true, 1, func(remote string, object *azblob.BlobItem, isDirectory bool) error {
   907  		empty = false
   908  		return nil
   909  	})
   910  	if err != nil {
   911  		return err
   912  	}
   913  	if !empty {
   914  		return fs.ErrorDirectoryNotEmpty
   915  	}
   916  	return nil
   917  }
   918  
   919  // deleteContainer deletes the container.  It can delete a full
   920  // container so use isEmpty if you don't want that.
   921  func (f *Fs) deleteContainer(ctx context.Context, container string) error {
   922  	return f.cache.Remove(container, func() error {
   923  		options := azblob.ContainerAccessConditions{}
   924  		return f.pacer.Call(func() (bool, error) {
   925  			_, err := f.cntURL(container).GetProperties(ctx, azblob.LeaseAccessConditions{})
   926  			if err == nil {
   927  				_, err = f.cntURL(container).Delete(ctx, options)
   928  			}
   929  
   930  			if err != nil {
   931  				// Check http error code along with service code, current SDK doesn't populate service code correctly sometimes
   932  				if storageErr, ok := err.(azblob.StorageError); ok && (storageErr.ServiceCode() == azblob.ServiceCodeContainerNotFound || storageErr.Response().StatusCode == http.StatusNotFound) {
   933  					return false, fs.ErrorDirNotFound
   934  				}
   935  
   936  				return f.shouldRetry(err)
   937  			}
   938  
   939  			return f.shouldRetry(err)
   940  		})
   941  	})
   942  }
   943  
   944  // Rmdir deletes the container if the fs is at the root
   945  //
   946  // Returns an error if it isn't empty
   947  func (f *Fs) Rmdir(ctx context.Context, dir string) error {
   948  	container, directory := f.split(dir)
   949  	if container == "" || directory != "" {
   950  		return nil
   951  	}
   952  	err := f.isEmpty(ctx, container, directory)
   953  	if err != nil {
   954  		return err
   955  	}
   956  	return f.deleteContainer(ctx, container)
   957  }
   958  
   959  // Precision of the remote
   960  func (f *Fs) Precision() time.Duration {
   961  	return time.Nanosecond
   962  }
   963  
   964  // Hashes returns the supported hash sets.
   965  func (f *Fs) Hashes() hash.Set {
   966  	return hash.Set(hash.MD5)
   967  }
   968  
   969  // Purge deletes all the files and directories including the old versions.
   970  func (f *Fs) Purge(ctx context.Context) error {
   971  	dir := "" // forward compat!
   972  	container, directory := f.split(dir)
   973  	if container == "" || directory != "" {
   974  		// Delegate to caller if not root of a container
   975  		return fs.ErrorCantPurge
   976  	}
   977  	return f.deleteContainer(ctx, container)
   978  }
   979  
   980  // Copy src to this remote using server side copy operations.
   981  //
   982  // This is stored with the remote path given
   983  //
   984  // It returns the destination Object and a possible error
   985  //
   986  // Will only be called if src.Fs().Name() == f.Name()
   987  //
   988  // If it isn't possible then return fs.ErrorCantCopy
   989  func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) {
   990  	dstContainer, dstPath := f.split(remote)
   991  	err := f.makeContainer(ctx, dstContainer)
   992  	if err != nil {
   993  		return nil, err
   994  	}
   995  	srcObj, ok := src.(*Object)
   996  	if !ok {
   997  		fs.Debugf(src, "Can't copy - not same remote type")
   998  		return nil, fs.ErrorCantCopy
   999  	}
  1000  	dstBlobURL := f.getBlobReference(dstContainer, dstPath)
  1001  	srcBlobURL := srcObj.getBlobReference()
  1002  
  1003  	source, err := url.Parse(srcBlobURL.String())
  1004  	if err != nil {
  1005  		return nil, err
  1006  	}
  1007  
  1008  	options := azblob.BlobAccessConditions{}
  1009  	var startCopy *azblob.BlobStartCopyFromURLResponse
  1010  
  1011  	err = f.pacer.Call(func() (bool, error) {
  1012  		startCopy, err = dstBlobURL.StartCopyFromURL(ctx, *source, nil, azblob.ModifiedAccessConditions{}, options)
  1013  		return f.shouldRetry(err)
  1014  	})
  1015  	if err != nil {
  1016  		return nil, err
  1017  	}
  1018  
  1019  	copyStatus := startCopy.CopyStatus()
  1020  	for copyStatus == azblob.CopyStatusPending {
  1021  		time.Sleep(1 * time.Second)
  1022  		getMetadata, err := dstBlobURL.GetProperties(ctx, options)
  1023  		if err != nil {
  1024  			return nil, err
  1025  		}
  1026  		copyStatus = getMetadata.CopyStatus()
  1027  	}
  1028  
  1029  	return f.NewObject(ctx, remote)
  1030  }
  1031  
  1032  func (f *Fs) getMemoryPool(size int64) *pool.Pool {
  1033  	if size == int64(f.opt.ChunkSize) {
  1034  		return f.pool
  1035  	}
  1036  
  1037  	return pool.New(
  1038  		time.Duration(f.opt.MemoryPoolFlushTime),
  1039  		int(size),
  1040  		fs.Config.Transfers,
  1041  		f.opt.MemoryPoolUseMmap,
  1042  	)
  1043  }
  1044  
  1045  // ------------------------------------------------------------
  1046  
  1047  // Fs returns the parent Fs
  1048  func (o *Object) Fs() fs.Info {
  1049  	return o.fs
  1050  }
  1051  
  1052  // Return a string version
  1053  func (o *Object) String() string {
  1054  	if o == nil {
  1055  		return "<nil>"
  1056  	}
  1057  	return o.remote
  1058  }
  1059  
  1060  // Remote returns the remote path
  1061  func (o *Object) Remote() string {
  1062  	return o.remote
  1063  }
  1064  
  1065  // Hash returns the MD5 of an object returning a lowercase hex string
  1066  func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) {
  1067  	if t != hash.MD5 {
  1068  		return "", hash.ErrUnsupported
  1069  	}
  1070  	// Convert base64 encoded md5 into lower case hex
  1071  	if o.md5 == "" {
  1072  		return "", nil
  1073  	}
  1074  	data, err := base64.StdEncoding.DecodeString(o.md5)
  1075  	if err != nil {
  1076  		return "", errors.Wrapf(err, "Failed to decode Content-MD5: %q", o.md5)
  1077  	}
  1078  	return hex.EncodeToString(data), nil
  1079  }
  1080  
  1081  // Size returns the size of an object in bytes
  1082  func (o *Object) Size() int64 {
  1083  	return o.size
  1084  }
  1085  
  1086  func (o *Object) setMetadata(metadata azblob.Metadata) {
  1087  	if len(metadata) > 0 {
  1088  		o.meta = metadata
  1089  		if modTime, ok := metadata[modTimeKey]; ok {
  1090  			when, err := time.Parse(timeFormatIn, modTime)
  1091  			if err != nil {
  1092  				fs.Debugf(o, "Couldn't parse %v = %q: %v", modTimeKey, modTime, err)
  1093  			}
  1094  			o.modTime = when
  1095  		}
  1096  	} else {
  1097  		o.meta = nil
  1098  	}
  1099  }
  1100  
  1101  // decodeMetaDataFromPropertiesResponse sets the metadata from the data passed in
  1102  //
  1103  // Sets
  1104  //  o.id
  1105  //  o.modTime
  1106  //  o.size
  1107  //  o.md5
  1108  //  o.meta
  1109  func (o *Object) decodeMetaDataFromPropertiesResponse(info *azblob.BlobGetPropertiesResponse) (err error) {
  1110  	metadata := info.NewMetadata()
  1111  	size := info.ContentLength()
  1112  	if isDirectoryMarker(size, metadata, o.remote) {
  1113  		return fs.ErrorNotAFile
  1114  	}
  1115  	// NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain
  1116  	// this as base64 encoded string.
  1117  	o.md5 = base64.StdEncoding.EncodeToString(info.ContentMD5())
  1118  	o.mimeType = info.ContentType()
  1119  	o.size = size
  1120  	o.modTime = info.LastModified()
  1121  	o.accessTier = azblob.AccessTierType(info.AccessTier())
  1122  	o.setMetadata(metadata)
  1123  
  1124  	return nil
  1125  }
  1126  
  1127  func (o *Object) decodeMetaDataFromBlob(info *azblob.BlobItem) (err error) {
  1128  	metadata := info.Metadata
  1129  	size := *info.Properties.ContentLength
  1130  	if isDirectoryMarker(size, metadata, o.remote) {
  1131  		return fs.ErrorNotAFile
  1132  	}
  1133  	// NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain
  1134  	// this as base64 encoded string.
  1135  	o.md5 = base64.StdEncoding.EncodeToString(info.Properties.ContentMD5)
  1136  	o.mimeType = *info.Properties.ContentType
  1137  	o.size = size
  1138  	o.modTime = info.Properties.LastModified
  1139  	o.accessTier = info.Properties.AccessTier
  1140  	o.setMetadata(metadata)
  1141  	return nil
  1142  }
  1143  
  1144  // getBlobReference creates an empty blob reference with no metadata
  1145  func (o *Object) getBlobReference() azblob.BlobURL {
  1146  	container, directory := o.split()
  1147  	return o.fs.getBlobReference(container, directory)
  1148  }
  1149  
  1150  // clearMetaData clears enough metadata so readMetaData will re-read it
  1151  func (o *Object) clearMetaData() {
  1152  	o.modTime = time.Time{}
  1153  }
  1154  
  1155  // readMetaData gets the metadata if it hasn't already been fetched
  1156  //
  1157  // Sets
  1158  //  o.id
  1159  //  o.modTime
  1160  //  o.size
  1161  //  o.md5
  1162  func (o *Object) readMetaData() (err error) {
  1163  	if !o.modTime.IsZero() {
  1164  		return nil
  1165  	}
  1166  	blob := o.getBlobReference()
  1167  
  1168  	// Read metadata (this includes metadata)
  1169  	options := azblob.BlobAccessConditions{}
  1170  	ctx := context.Background()
  1171  	var blobProperties *azblob.BlobGetPropertiesResponse
  1172  	err = o.fs.pacer.Call(func() (bool, error) {
  1173  		blobProperties, err = blob.GetProperties(ctx, options)
  1174  		return o.fs.shouldRetry(err)
  1175  	})
  1176  	if err != nil {
  1177  		// On directories - GetProperties does not work and current SDK does not populate service code correctly hence check regular http response as well
  1178  		if storageErr, ok := err.(azblob.StorageError); ok && (storageErr.ServiceCode() == azblob.ServiceCodeBlobNotFound || storageErr.Response().StatusCode == http.StatusNotFound) {
  1179  			return fs.ErrorObjectNotFound
  1180  		}
  1181  		return err
  1182  	}
  1183  
  1184  	return o.decodeMetaDataFromPropertiesResponse(blobProperties)
  1185  }
  1186  
  1187  // ModTime returns the modification time of the object
  1188  //
  1189  // It attempts to read the objects mtime and if that isn't present the
  1190  // LastModified returned in the http headers
  1191  func (o *Object) ModTime(ctx context.Context) (result time.Time) {
  1192  	// The error is logged in readMetaData
  1193  	_ = o.readMetaData()
  1194  	return o.modTime
  1195  }
  1196  
  1197  // SetModTime sets the modification time of the local fs object
  1198  func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
  1199  	// Make sure o.meta is not nil
  1200  	if o.meta == nil {
  1201  		o.meta = make(map[string]string, 1)
  1202  	}
  1203  	// Set modTimeKey in it
  1204  	o.meta[modTimeKey] = modTime.Format(timeFormatOut)
  1205  
  1206  	blob := o.getBlobReference()
  1207  	err := o.fs.pacer.Call(func() (bool, error) {
  1208  		_, err := blob.SetMetadata(ctx, o.meta, azblob.BlobAccessConditions{})
  1209  		return o.fs.shouldRetry(err)
  1210  	})
  1211  	if err != nil {
  1212  		return err
  1213  	}
  1214  	o.modTime = modTime
  1215  	return nil
  1216  }
  1217  
  1218  // Storable returns if this object is storable
  1219  func (o *Object) Storable() bool {
  1220  	return true
  1221  }
  1222  
  1223  // Open an object for read
  1224  func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
  1225  	// Offset and Count for range download
  1226  	var offset int64
  1227  	var count int64
  1228  	if o.AccessTier() == azblob.AccessTierArchive {
  1229  		return nil, errors.Errorf("Blob in archive tier, you need to set tier to hot or cool first")
  1230  	}
  1231  	fs.FixRangeOption(options, o.size)
  1232  	for _, option := range options {
  1233  		switch x := option.(type) {
  1234  		case *fs.RangeOption:
  1235  			offset, count = x.Decode(o.size)
  1236  			if count < 0 {
  1237  				count = o.size - offset
  1238  			}
  1239  		case *fs.SeekOption:
  1240  			offset = x.Offset
  1241  		default:
  1242  			if option.Mandatory() {
  1243  				fs.Logf(o, "Unsupported mandatory option: %v", option)
  1244  			}
  1245  		}
  1246  	}
  1247  	blob := o.getBlobReference()
  1248  	ac := azblob.BlobAccessConditions{}
  1249  	var dowloadResponse *azblob.DownloadResponse
  1250  	err = o.fs.pacer.Call(func() (bool, error) {
  1251  		dowloadResponse, err = blob.Download(ctx, offset, count, ac, false)
  1252  		return o.fs.shouldRetry(err)
  1253  	})
  1254  	if err != nil {
  1255  		return nil, errors.Wrap(err, "failed to open for download")
  1256  	}
  1257  	in = dowloadResponse.Body(azblob.RetryReaderOptions{})
  1258  	return in, nil
  1259  }
  1260  
  1261  // dontEncode is the characters that do not need percent-encoding
  1262  //
  1263  // The characters that do not need percent-encoding are a subset of
  1264  // the printable ASCII characters: upper-case letters, lower-case
  1265  // letters, digits, ".", "_", "-", "/", "~", "!", "$", "'", "(", ")",
  1266  // "*", ";", "=", ":", and "@". All other byte values in a UTF-8 must
  1267  // be replaced with "%" and the two-digit hex value of the byte.
  1268  const dontEncode = (`abcdefghijklmnopqrstuvwxyz` +
  1269  	`ABCDEFGHIJKLMNOPQRSTUVWXYZ` +
  1270  	`0123456789` +
  1271  	`._-/~!$'()*;=:@`)
  1272  
  1273  // noNeedToEncode is a bitmap of characters which don't need % encoding
  1274  var noNeedToEncode [256]bool
  1275  
  1276  func init() {
  1277  	for _, c := range dontEncode {
  1278  		noNeedToEncode[c] = true
  1279  	}
  1280  }
  1281  
  1282  // readSeeker joins an io.Reader and an io.Seeker
  1283  type readSeeker struct {
  1284  	io.Reader
  1285  	io.Seeker
  1286  }
  1287  
  1288  // increment the slice passed in as LSB binary
  1289  func increment(xs []byte) {
  1290  	for i, digit := range xs {
  1291  		newDigit := digit + 1
  1292  		xs[i] = newDigit
  1293  		if newDigit >= digit {
  1294  			// exit if no carry
  1295  			break
  1296  		}
  1297  	}
  1298  }
  1299  
  1300  var warnStreamUpload sync.Once
  1301  
  1302  // uploadMultipart uploads a file using multipart upload
  1303  //
  1304  // Write a larger blob, using CreateBlockBlob, PutBlock, and PutBlockList.
  1305  func (o *Object) uploadMultipart(ctx context.Context, in io.Reader, size int64, blob *azblob.BlobURL, httpHeaders *azblob.BlobHTTPHeaders) (err error) {
  1306  	// Calculate correct chunkSize
  1307  	chunkSize := int64(o.fs.opt.ChunkSize)
  1308  	totalParts := -1
  1309  
  1310  	// Note that the max size of file is 4.75 TB (100 MB X 50,000
  1311  	// blocks) and this is bigger than the max uncommitted block
  1312  	// size (9.52 TB) so we do not need to part commit block lists
  1313  	// or garbage collect uncommitted blocks.
  1314  	//
  1315  	// See: https://docs.microsoft.com/en-gb/rest/api/storageservices/put-block
  1316  
  1317  	// size can be -1 here meaning we don't know the size of the incoming file.  We use ChunkSize
  1318  	// buffers here (default 4MB). With a maximum number of parts (50,000) this will be a file of
  1319  	// 195GB which seems like a not too unreasonable limit.
  1320  	if size == -1 {
  1321  		warnStreamUpload.Do(func() {
  1322  			fs.Logf(o, "Streaming uploads using chunk size %v will have maximum file size of %v",
  1323  				o.fs.opt.ChunkSize, fs.SizeSuffix(chunkSize*maxTotalParts))
  1324  		})
  1325  	} else {
  1326  		// Adjust partSize until the number of parts is small enough.
  1327  		if size/chunkSize >= maxTotalParts {
  1328  			// Calculate partition size rounded up to the nearest MB
  1329  			chunkSize = (((size / maxTotalParts) >> 20) + 1) << 20
  1330  		}
  1331  		if chunkSize > int64(maxChunkSize) {
  1332  			return errors.Errorf("can't upload as it is too big %v - takes more than %d chunks of %v", fs.SizeSuffix(size), totalParts, fs.SizeSuffix(chunkSize/2))
  1333  		}
  1334  		totalParts = int(size / chunkSize)
  1335  		if size%chunkSize != 0 {
  1336  			totalParts++
  1337  		}
  1338  	}
  1339  
  1340  	fs.Debugf(o, "Multipart upload session started for %d parts of size %v", totalParts, fs.SizeSuffix(chunkSize))
  1341  
  1342  	// unwrap the accounting from the input, we use wrap to put it
  1343  	// back on after the buffering
  1344  	in, wrap := accounting.UnWrap(in)
  1345  
  1346  	// Upload the chunks
  1347  	var (
  1348  		g, gCtx       = errgroup.WithContext(ctx)
  1349  		remaining     = size                           // remaining size in file for logging only, -1 if size < 0
  1350  		position      = int64(0)                       // position in file
  1351  		memPool       = o.fs.getMemoryPool(chunkSize)  // pool to get memory from
  1352  		finished      = false                          // set when we have read EOF
  1353  		blocks        []string                         // list of blocks for finalize
  1354  		blockBlobURL  = blob.ToBlockBlobURL()          // Get BlockBlobURL, we will use default pipeline here
  1355  		ac            = azblob.LeaseAccessConditions{} // Use default lease access conditions
  1356  		binaryBlockID = make([]byte, 8)                // block counter as LSB first 8 bytes
  1357  	)
  1358  	for part := 0; !finished; part++ {
  1359  		// Get a block of memory from the pool and a token which limits concurrency
  1360  		o.fs.uploadToken.Get()
  1361  		buf := memPool.Get()
  1362  
  1363  		free := func() {
  1364  			memPool.Put(buf)       // return the buf
  1365  			o.fs.uploadToken.Put() // return the token
  1366  		}
  1367  
  1368  		// Fail fast, in case an errgroup managed function returns an error
  1369  		// gCtx is cancelled. There is no point in uploading all the other parts.
  1370  		if gCtx.Err() != nil {
  1371  			free()
  1372  			break
  1373  		}
  1374  
  1375  		// Read the chunk
  1376  		n, err := readers.ReadFill(in, buf) // this can never return 0, nil
  1377  		if err == io.EOF {
  1378  			if n == 0 { // end if no data
  1379  				free()
  1380  				break
  1381  			}
  1382  			finished = true
  1383  		} else if err != nil {
  1384  			free()
  1385  			return errors.Wrap(err, "multipart upload failed to read source")
  1386  		}
  1387  		buf = buf[:n]
  1388  
  1389  		// increment the blockID and save the blocks for finalize
  1390  		increment(binaryBlockID)
  1391  		blockID := base64.StdEncoding.EncodeToString(binaryBlockID)
  1392  		blocks = append(blocks, blockID)
  1393  
  1394  		// Transfer the chunk
  1395  		fs.Debugf(o, "Uploading part %d/%d offset %v/%v part size %v", part+1, totalParts, fs.SizeSuffix(position), fs.SizeSuffix(size), fs.SizeSuffix(chunkSize))
  1396  		g.Go(func() (err error) {
  1397  			defer free()
  1398  
  1399  			// Upload the block, with MD5 for check
  1400  			md5sum := md5.Sum(buf)
  1401  			transactionalMD5 := md5sum[:]
  1402  			err = o.fs.pacer.Call(func() (bool, error) {
  1403  				bufferReader := bytes.NewReader(buf)
  1404  				wrappedReader := wrap(bufferReader)
  1405  				rs := readSeeker{wrappedReader, bufferReader}
  1406  				_, err = blockBlobURL.StageBlock(ctx, blockID, &rs, ac, transactionalMD5)
  1407  				return o.fs.shouldRetry(err)
  1408  			})
  1409  			if err != nil {
  1410  				return errors.Wrap(err, "multipart upload failed to upload part")
  1411  			}
  1412  			return nil
  1413  		})
  1414  
  1415  		// ready for next block
  1416  		if size >= 0 {
  1417  			remaining -= chunkSize
  1418  		}
  1419  		position += chunkSize
  1420  	}
  1421  	err = g.Wait()
  1422  	if err != nil {
  1423  		return err
  1424  	}
  1425  
  1426  	// Finalise the upload session
  1427  	err = o.fs.pacer.Call(func() (bool, error) {
  1428  		_, err := blockBlobURL.CommitBlockList(ctx, blocks, *httpHeaders, o.meta, azblob.BlobAccessConditions{})
  1429  		return o.fs.shouldRetry(err)
  1430  	})
  1431  	if err != nil {
  1432  		return errors.Wrap(err, "multipart upload failed to finalize")
  1433  	}
  1434  	return nil
  1435  }
  1436  
  1437  // Update the object with the contents of the io.Reader, modTime and size
  1438  //
  1439  // The new object may have been created if an error is returned
  1440  func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) {
  1441  	container, _ := o.split()
  1442  	err = o.fs.makeContainer(ctx, container)
  1443  	if err != nil {
  1444  		return err
  1445  	}
  1446  	size := src.Size()
  1447  	// Update Mod time
  1448  	o.updateMetadataWithModTime(src.ModTime(ctx))
  1449  	if err != nil {
  1450  		return err
  1451  	}
  1452  
  1453  	blob := o.getBlobReference()
  1454  	httpHeaders := azblob.BlobHTTPHeaders{}
  1455  	httpHeaders.ContentType = fs.MimeType(ctx, o)
  1456  	// Compute the Content-MD5 of the file, for multiparts uploads it
  1457  	// will be set in PutBlockList API call using the 'x-ms-blob-content-md5' header
  1458  	// Note: If multipart, an MD5 checksum will also be computed for each uploaded block
  1459  	// 		 in order to validate its integrity during transport
  1460  	if !o.fs.opt.DisableCheckSum {
  1461  		if sourceMD5, _ := src.Hash(ctx, hash.MD5); sourceMD5 != "" {
  1462  			sourceMD5bytes, err := hex.DecodeString(sourceMD5)
  1463  			if err == nil {
  1464  				httpHeaders.ContentMD5 = sourceMD5bytes
  1465  			} else {
  1466  				fs.Debugf(o, "Failed to decode %q as MD5: %v", sourceMD5, err)
  1467  			}
  1468  		}
  1469  	}
  1470  
  1471  	putBlobOptions := azblob.UploadStreamToBlockBlobOptions{
  1472  		BufferSize:      int(o.fs.opt.ChunkSize),
  1473  		MaxBuffers:      4,
  1474  		Metadata:        o.meta,
  1475  		BlobHTTPHeaders: httpHeaders,
  1476  	}
  1477  	// FIXME Until https://github.com/Azure/azure-storage-blob-go/pull/75
  1478  	// is merged the SDK can't upload a single blob of exactly the chunk
  1479  	// size, so upload with a multpart upload to work around.
  1480  	// See: https://github.com/rclone/rclone/issues/2653
  1481  	multipartUpload := size < 0 || size >= int64(o.fs.opt.UploadCutoff)
  1482  	if size == int64(o.fs.opt.ChunkSize) {
  1483  		multipartUpload = true
  1484  		fs.Debugf(o, "Setting multipart upload for file of chunk size (%d) to work around SDK bug", size)
  1485  	}
  1486  
  1487  	// Don't retry, return a retry error instead
  1488  	err = o.fs.pacer.CallNoRetry(func() (bool, error) {
  1489  		if multipartUpload {
  1490  			// If a large file upload in chunks
  1491  			err = o.uploadMultipart(ctx, in, size, &blob, &httpHeaders)
  1492  		} else {
  1493  			// Write a small blob in one transaction
  1494  			blockBlobURL := blob.ToBlockBlobURL()
  1495  			_, err = azblob.UploadStreamToBlockBlob(ctx, in, blockBlobURL, putBlobOptions)
  1496  		}
  1497  		return o.fs.shouldRetry(err)
  1498  	})
  1499  	if err != nil {
  1500  		return err
  1501  	}
  1502  	// Refresh metadata on object
  1503  	o.clearMetaData()
  1504  	err = o.readMetaData()
  1505  	if err != nil {
  1506  		return err
  1507  	}
  1508  
  1509  	// If tier is not changed or not specified, do not attempt to invoke `SetBlobTier` operation
  1510  	if o.fs.opt.AccessTier == string(defaultAccessTier) || o.fs.opt.AccessTier == string(o.AccessTier()) {
  1511  		return nil
  1512  	}
  1513  
  1514  	// Now, set blob tier based on configured access tier
  1515  	return o.SetTier(o.fs.opt.AccessTier)
  1516  }
  1517  
  1518  // Remove an object
  1519  func (o *Object) Remove(ctx context.Context) error {
  1520  	blob := o.getBlobReference()
  1521  	snapShotOptions := azblob.DeleteSnapshotsOptionNone
  1522  	ac := azblob.BlobAccessConditions{}
  1523  	return o.fs.pacer.Call(func() (bool, error) {
  1524  		_, err := blob.Delete(ctx, snapShotOptions, ac)
  1525  		return o.fs.shouldRetry(err)
  1526  	})
  1527  }
  1528  
  1529  // MimeType of an Object if known, "" otherwise
  1530  func (o *Object) MimeType(ctx context.Context) string {
  1531  	return o.mimeType
  1532  }
  1533  
  1534  // AccessTier of an object, default is of type none
  1535  func (o *Object) AccessTier() azblob.AccessTierType {
  1536  	return o.accessTier
  1537  }
  1538  
  1539  // SetTier performs changing object tier
  1540  func (o *Object) SetTier(tier string) error {
  1541  	if !validateAccessTier(tier) {
  1542  		return errors.Errorf("Tier %s not supported by Azure Blob Storage", tier)
  1543  	}
  1544  
  1545  	// Check if current tier already matches with desired tier
  1546  	if o.GetTier() == tier {
  1547  		return nil
  1548  	}
  1549  	desiredAccessTier := azblob.AccessTierType(tier)
  1550  	blob := o.getBlobReference()
  1551  	ctx := context.Background()
  1552  	err := o.fs.pacer.Call(func() (bool, error) {
  1553  		_, err := blob.SetTier(ctx, desiredAccessTier, azblob.LeaseAccessConditions{})
  1554  		return o.fs.shouldRetry(err)
  1555  	})
  1556  
  1557  	if err != nil {
  1558  		return errors.Wrap(err, "Failed to set Blob Tier")
  1559  	}
  1560  
  1561  	// Set access tier on local object also, this typically
  1562  	// gets updated on get blob properties
  1563  	o.accessTier = desiredAccessTier
  1564  	fs.Debugf(o, "Successfully changed object tier to %s", tier)
  1565  
  1566  	return nil
  1567  }
  1568  
  1569  // GetTier returns object tier in azure as string
  1570  func (o *Object) GetTier() string {
  1571  	return string(o.accessTier)
  1572  }
  1573  
  1574  // Check the interfaces are satisfied
  1575  var (
  1576  	_ fs.Fs          = &Fs{}
  1577  	_ fs.Copier      = &Fs{}
  1578  	_ fs.PutStreamer = &Fs{}
  1579  	_ fs.Purger      = &Fs{}
  1580  	_ fs.ListRer     = &Fs{}
  1581  	_ fs.Object      = &Object{}
  1582  	_ fs.MimeTyper   = &Object{}
  1583  	_ fs.GetTierer   = &Object{}
  1584  	_ fs.SetTierer   = &Object{}
  1585  )