github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/backend/azure.go (about)

     1  //go:build azure
     2  
     3  // Package backend contains implementation of various backend providers.
     4  /*
     5   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     6   */
     7  package backend
     8  
     9  // TODO:
    10  // - check a variety of az clients instantiated below, and alternatives
    11  //
    12  // - support alternative authentication methods (currently, NewSharedKeyCredential only)
    13  //   ref: ./storage/azblob@v1.3.0/container/examples_test.go
    14  //
    15  // - [200224] stop using etag as obj. version - see IsImmutableStorageWithVersioningEnabled, blob.VersionID, and:
    16  //   ref: https://learn.microsoft.com/en-us/azure/storage/blobs/versioning-overview#how-blob-versioning-works
    17  
    18  import (
    19  	"context"
    20  	"encoding/hex"
    21  	"errors"
    22  	"io"
    23  	"net/http"
    24  	"os"
    25  	"regexp"
    26  	"strings"
    27  
    28  	"github.com/Azure/azure-sdk-for-go/sdk/azcore"
    29  	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
    30  	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
    31  	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
    32  	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
    33  	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
    34  	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service"
    35  	"github.com/NVIDIA/aistore/api/apc"
    36  	"github.com/NVIDIA/aistore/cmn"
    37  	"github.com/NVIDIA/aistore/cmn/cos"
    38  	"github.com/NVIDIA/aistore/cmn/debug"
    39  	"github.com/NVIDIA/aistore/cmn/nlog"
    40  	"github.com/NVIDIA/aistore/core"
    41  	"github.com/NVIDIA/aistore/core/meta"
    42  )
    43  
    44  type (
    45  	azbp struct {
    46  		t     core.TargetPut
    47  		creds *azblob.SharedKeyCredential
    48  		u     string
    49  		base
    50  	}
    51  )
    52  
    53  const (
    54  	azDefaultProto = "https://"
    55  	azHost         = ".blob.core.windows.net"
    56  
    57  	azAccNameEnvVar = "AZURE_STORAGE_ACCOUNT"
    58  	azAccKeyEnvVar  = "AZURE_STORAGE_KEY" // a.k.a. AZURE_STORAGE_PRIMARY_ACCOUNT_KEY or AZURE_STORAGE_SECONDARY_ACCOUNT_KEY
    59  
    60  	// ais
    61  	azURLEnvVar   = "AIS_AZURE_URL"
    62  	azProtoEnvVar = "AIS_AZURE_PROTO"
    63  )
    64  
    65  const (
    66  	azErrPrefix = "azure-error["
    67  )
    68  
    69  // parse azure errors
    70  var (
    71  	azCleanErrRegex = regexp.MustCompile(`[^a-zA-Z0-9 ]+`)
    72  )
    73  
    74  // interface guard
    75  var _ core.Backend = (*azbp)(nil)
    76  
    77  func azProto() string {
    78  	proto := os.Getenv(azProtoEnvVar)
    79  	if proto == "" {
    80  		proto = azDefaultProto
    81  	}
    82  	return proto
    83  }
    84  
    85  func azAccName() string { return os.Getenv(azAccNameEnvVar) }
    86  func azAccKey() string  { return os.Getenv(azAccKeyEnvVar) }
    87  
    88  func asEndpoint() string {
    89  	blurl := os.Getenv(azURLEnvVar)
    90  	switch {
    91  	case blurl == "":
    92  		// the default
    93  		return azProto() + azAccName() + azHost
    94  	case strings.HasPrefix(blurl, "http"):
    95  		return blurl
    96  	default:
    97  		if !strings.HasPrefix(blurl, ".") {
    98  			blurl = "." + blurl
    99  		}
   100  		return azProto() + azAccName() + blurl
   101  	}
   102  }
   103  
   104  func NewAzure(t core.TargetPut) (core.Backend, error) {
   105  	blurl := asEndpoint()
   106  
   107  	// NOTE: NewSharedKeyCredential requires account name and its primary or secondary key
   108  	creds, err := azblob.NewSharedKeyCredential(azAccName(), azAccKey())
   109  	if err != nil {
   110  		return nil, cmn.NewErrFailedTo(nil, azErrPrefix+": init]", "credentials", err)
   111  	}
   112  
   113  	return &azbp{
   114  		t:     t,
   115  		creds: creds,
   116  		u:     blurl,
   117  		base:  base{apc.Azure},
   118  	}, nil
   119  }
   120  
   121  // (compare w/ cmn/backend)
   122  func azEncodeEtag(etag azcore.ETag) string { return cmn.UnquoteCEV(string(etag)) }
   123  
   124  func azEncodeChecksum(v []byte) string {
   125  	if len(v) == 0 {
   126  		return ""
   127  	}
   128  	return hex.EncodeToString(v)
   129  }
   130  
   131  //
   132  // format and parse errors
   133  //
   134  
   135  const (
   136  	azErrDesc = "Description"
   137  	azErrResp = "RESPONSE"
   138  	azErrCode = "Code: " // and CODE:
   139  )
   140  
   141  func azureErrorToAISError(azureError error, bck *cmn.Bck, objName string) (int, error) {
   142  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   143  		nlog.InfoDepth(1, "begin azure error =========================")
   144  		nlog.InfoDepth(1, azureError)
   145  		nlog.InfoDepth(1, "end azure error ===========================")
   146  	}
   147  
   148  	var stgErr *azcore.ResponseError
   149  	if !errors.As(azureError, &stgErr) {
   150  		return http.StatusInternalServerError, azureError
   151  	}
   152  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   153  		nlog.InfoDepth(1, "ErrorCode:", stgErr.ErrorCode, "StatusCode:", stgErr.StatusCode)
   154  	}
   155  
   156  	// NOTE: error-codes documentation seems to be incomplete and/or outdated
   157  	// ref: https://learn.microsoft.com/en-us/rest/api/storageservices/common-rest-api-error-codes
   158  
   159  	switch bloberror.Code(stgErr.ErrorCode) {
   160  	case bloberror.ContainerNotFound:
   161  		return http.StatusNotFound, cmn.NewErrRemoteBckNotFound(bck)
   162  	case bloberror.BlobNotFound:
   163  		return http.StatusNotFound, errors.New(azErrPrefix + "NotFound: " + bck.Cname(objName) + "]")
   164  	case bloberror.InvalidResourceName:
   165  		if objName != "" {
   166  			return http.StatusNotFound, errors.New(azErrPrefix + "NotFound: " + bck.Cname(objName) + "]")
   167  		}
   168  	}
   169  
   170  	// NOTE above
   171  	if objName == "" && bloberror.Code(stgErr.ErrorCode) == bloberror.OutOfRangeInput {
   172  		return http.StatusNotFound, cmn.NewErrRemoteBckNotFound(bck)
   173  	}
   174  
   175  	// azure error is usually a sizeable multi-line text with items including:
   176  	// request ID, authorization, variery of x-ms-* headers, server and user agent, and more
   177  
   178  	var (
   179  		status      = stgErr.StatusCode
   180  		code        string
   181  		description string
   182  		lines       = strings.Split(azureError.Error(), "\n")
   183  	)
   184  	if resp := stgErr.RawResponse; resp != nil {
   185  		resp.Body.Close()
   186  		debug.Assertf(resp.StatusCode == stgErr.StatusCode, "%d vs %d", resp.StatusCode, stgErr.StatusCode) // checking
   187  		status = resp.StatusCode
   188  	}
   189  	for _, line := range lines {
   190  		if strings.HasPrefix(line, azErrDesc) {
   191  			description = azCleanErrRegex.ReplaceAllString(line[len(azErrDesc):], "")
   192  		} else if strings.HasPrefix(line, azErrResp) {
   193  			i := max(0, strings.Index(line, ": "))
   194  			// alternatively, take "^RESPONSE ...: <...>" for description
   195  			description = azCleanErrRegex.ReplaceAllString(line[i:], "")
   196  		}
   197  		if i := strings.Index(line, azErrCode); i > 0 {
   198  			code = azCleanErrRegex.ReplaceAllString(line[i+len(azErrCode):], "")
   199  		} else if i := strings.Index(line, strings.ToUpper(azErrCode)); i > 0 {
   200  			code = azCleanErrRegex.ReplaceAllString(line[i+len(azErrCode):], "")
   201  		}
   202  	}
   203  	if code != "" && description != "" {
   204  		return status, errors.New(azErrPrefix + code + ": " + strings.TrimSpace(description) + "]")
   205  	}
   206  	debug.Assert(false, azureError) // expecting to parse
   207  	return status, azureError
   208  }
   209  
   210  // as core.Backend --------------------------------------------------------------
   211  
   212  //
   213  // HEAD BUCKET
   214  //
   215  
   216  func (azbp *azbp) HeadBucket(ctx context.Context, bck *meta.Bck) (cos.StrKVs, int, error) {
   217  	var (
   218  		cloudBck = bck.RemoteBck()
   219  		cntURL   = azbp.u + "/" + cloudBck.Name
   220  	)
   221  	client, err := container.NewClientWithSharedKeyCredential(cntURL, azbp.creds, nil)
   222  	if err != nil {
   223  		status, err := azureErrorToAISError(err, cloudBck, "")
   224  		return nil, status, err
   225  	}
   226  	resp, err := client.GetProperties(ctx, nil)
   227  	if err != nil {
   228  		status, err := azureErrorToAISError(err, cloudBck, "")
   229  		return nil, status, err
   230  	}
   231  
   232  	bckProps := make(cos.StrKVs, 2)
   233  	bckProps[apc.HdrBackendProvider] = apc.Azure
   234  
   235  	// TODO #200224
   236  	if true || resp.IsImmutableStorageWithVersioningEnabled != nil && *resp.IsImmutableStorageWithVersioningEnabled {
   237  		bckProps[apc.HdrBucketVerEnabled] = "true"
   238  	} else {
   239  		bckProps[apc.HdrBucketVerEnabled] = "false"
   240  	}
   241  	return bckProps, http.StatusOK, nil
   242  }
   243  
   244  //
   245  // LIST OBJECTS
   246  //
   247  
   248  // TODO: support non-recursive (apc.LsNoRecursion) operation, as in:
   249  // $ az storage blob list -c abc --prefix sub/ --delimiter /
   250  // See also: aws.go, gcp.go
   251  func (azbp *azbp) ListObjects(bck *meta.Bck, msg *apc.LsoMsg, lst *cmn.LsoRes) (int, error) {
   252  	msg.PageSize = calcPageSize(msg.PageSize, bck.MaxPageSize())
   253  	var (
   254  		cloudBck = bck.RemoteBck()
   255  		cntURL   = azbp.u + "/" + cloudBck.Name
   256  		num      = int32(msg.PageSize)
   257  		opts     = container.ListBlobsFlatOptions{Prefix: apc.Ptr(msg.Prefix), MaxResults: &num}
   258  	)
   259  	client, err := container.NewClientWithSharedKeyCredential(cntURL, azbp.creds, nil)
   260  	if err != nil {
   261  		return azureErrorToAISError(err, cloudBck, "")
   262  	}
   263  	if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   264  		nlog.Infof("list_objects %s", cloudBck.Name)
   265  	}
   266  	if msg.ContinuationToken != "" {
   267  		opts.Marker = apc.Ptr(msg.ContinuationToken)
   268  	}
   269  
   270  	pager := client.NewListBlobsFlatPager(&opts)
   271  	resp, err := pager.NextPage(context.Background())
   272  	if err != nil {
   273  		return azureErrorToAISError(err, cloudBck, "")
   274  	}
   275  
   276  	var (
   277  		custom     cos.StrKVs
   278  		l          = len(resp.Segment.BlobItems)
   279  		wantCustom = msg.WantProp(apc.GetPropsCustom)
   280  	)
   281  	for i := len(lst.Entries); i < l; i++ {
   282  		lst.Entries = append(lst.Entries, &cmn.LsoEnt{}) // add missing empty
   283  	}
   284  	if wantCustom {
   285  		custom = make(cos.StrKVs, 4) // reuse
   286  	}
   287  	for idx := range resp.Segment.BlobItems {
   288  		var (
   289  			blob  = resp.Segment.BlobItems[idx]
   290  			entry = lst.Entries[idx]
   291  		)
   292  		entry.Name = *blob.Name
   293  		entry.Size = *blob.Properties.ContentLength
   294  		if msg.IsFlagSet(apc.LsNameOnly) || msg.IsFlagSet(apc.LsNameSize) {
   295  			continue
   296  		}
   297  
   298  		entry.Checksum = azEncodeChecksum(blob.Properties.ContentMD5)
   299  
   300  		etag := azEncodeEtag(*blob.Properties.ETag)
   301  		entry.Version = etag // (TODO a the top)
   302  
   303  		// custom
   304  		if wantCustom {
   305  			clear(custom)
   306  			custom[cmn.ETag] = etag
   307  			if !blob.Properties.LastModified.IsZero() {
   308  				custom[cmn.LastModified] = fmtTime(*blob.Properties.LastModified)
   309  			}
   310  			if blob.Properties.ContentType != nil {
   311  				custom[cos.HdrContentType] = *blob.Properties.ContentType
   312  			}
   313  			if blob.VersionID != nil {
   314  				custom[cmn.VersionObjMD] = *blob.VersionID
   315  			}
   316  			entry.Custom = cmn.CustomMD2S(custom)
   317  		}
   318  	}
   319  	lst.Entries = lst.Entries[:l]
   320  
   321  	if resp.NextMarker != nil {
   322  		lst.ContinuationToken = *resp.NextMarker
   323  	}
   324  	if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   325  		nlog.Infof("[list_objects] count %d(marker: %s)", len(lst.Entries), lst.ContinuationToken)
   326  	}
   327  	return 0, nil
   328  }
   329  
   330  //
   331  // LIST BUCKETS
   332  //
   333  
   334  func (azbp *azbp) ListBuckets(cmn.QueryBcks) (bcks cmn.Bcks, _ int, _ error) {
   335  	serviceClient, err := service.NewClientWithSharedKeyCredential(azbp.u, azbp.creds, nil)
   336  	if err != nil {
   337  		status, err := azureErrorToAISError(err, &cmn.Bck{Provider: apc.Azure}, "")
   338  		return nil, status, err
   339  	}
   340  	pager := serviceClient.NewListContainersPager(&service.ListContainersOptions{})
   341  	for pager.More() {
   342  		resp, err := pager.NextPage(context.TODO())
   343  		if err != nil {
   344  			status, err := azureErrorToAISError(err, &cmn.Bck{Provider: apc.Azure}, "")
   345  			return bcks, status, err
   346  		}
   347  		for _, ci := range resp.ContainerItems {
   348  			bcks = append(bcks, cmn.Bck{
   349  				Name:     *ci.Name,
   350  				Provider: apc.Azure,
   351  			})
   352  		}
   353  	}
   354  	if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   355  		nlog.Infof("[list_buckets] count %d", len(bcks))
   356  	}
   357  	return bcks, 0, nil
   358  }
   359  
   360  //
   361  // HEAD OBJECT
   362  //
   363  
   364  func (azbp *azbp) HeadObj(ctx context.Context, lom *core.LOM, _ *http.Request) (*cmn.ObjAttrs, int, error) {
   365  	var (
   366  		cloudBck = lom.Bucket().RemoteBck()
   367  		blURL    = azbp.u + "/" + cloudBck.Name + "/" + lom.ObjName
   368  	)
   369  	client, err := blockblob.NewClientWithSharedKeyCredential(blURL, azbp.creds, nil)
   370  	if err != nil {
   371  		status, err := azureErrorToAISError(err, cloudBck, lom.ObjName)
   372  		return nil, status, err
   373  	}
   374  	resp, err := client.GetProperties(ctx, nil)
   375  	if err != nil {
   376  		status, err := azureErrorToAISError(err, cloudBck, lom.ObjName)
   377  		return nil, status, err
   378  	}
   379  
   380  	debug.Assert(resp.IsCurrentVersion == nil || *resp.IsCurrentVersion, "expecting current/latest/the-only ver")
   381  
   382  	oa := &cmn.ObjAttrs{}
   383  	oa.CustomMD = make(cos.StrKVs, 6)
   384  	oa.SetCustomKey(cmn.SourceObjMD, apc.Azure)
   385  	oa.Size = *resp.ContentLength
   386  
   387  	etag := azEncodeEtag(*resp.ETag)
   388  	oa.SetCustomKey(cmn.ETag, etag)
   389  
   390  	oa.Ver = etag // TODO #200224
   391  
   392  	if md5 := azEncodeChecksum(resp.ContentMD5); md5 != "" {
   393  		oa.SetCustomKey(cmn.MD5ObjMD, md5)
   394  	}
   395  	if v := resp.LastModified; v != nil {
   396  		oa.SetCustomKey(cmn.LastModified, fmtTime(*v))
   397  	}
   398  	if v := resp.ContentType; v != nil {
   399  		// unlike other custom attrs, "Content-Type" is not getting stored w/ LOM
   400  		// - only shown via list-objects and HEAD when not present
   401  		oa.SetCustomKey(cos.HdrContentType, *v)
   402  	}
   403  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   404  		nlog.Infof("[head_object] %s", lom)
   405  	}
   406  	return oa, 0, nil
   407  }
   408  
   409  //
   410  // GET OBJECT
   411  //
   412  
   413  func (azbp *azbp) GetObj(ctx context.Context, lom *core.LOM, owt cmn.OWT, _ *http.Request) (int, error) {
   414  	res := azbp.GetObjReader(ctx, lom, 0, 0)
   415  	if res.Err != nil {
   416  		return res.ErrCode, res.Err
   417  	}
   418  	params := allocPutParams(res, owt)
   419  	err := azbp.t.PutObject(lom, params)
   420  	core.FreePutParams(params)
   421  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   422  		nlog.Infoln("[get_object]", lom.String(), err)
   423  	}
   424  	return 0, err
   425  }
   426  
   427  func (azbp *azbp) GetObjReader(ctx context.Context, lom *core.LOM, offset, length int64) (res core.GetReaderResult) {
   428  	var (
   429  		cloudBck = lom.Bucket().RemoteBck()
   430  		blURL    = azbp.u + "/" + cloudBck.Name + "/" + lom.ObjName
   431  	)
   432  	client, err := blockblob.NewClientWithSharedKeyCredential(blURL, azbp.creds, nil)
   433  	if err != nil {
   434  		res.ErrCode, res.Err = azureErrorToAISError(err, cloudBck, lom.ObjName)
   435  		return
   436  	}
   437  
   438  	// Get checksum
   439  	respProps, err := client.GetProperties(ctx, nil)
   440  	if err != nil {
   441  		res.ErrCode, res.Err = azureErrorToAISError(err, cloudBck, lom.ObjName)
   442  		return
   443  	}
   444  
   445  	// (0, 0) range indicates "whole object"
   446  	var opts blob.DownloadStreamOptions
   447  	opts.Range.Count = length
   448  	opts.Range.Offset = offset
   449  	resp, err := client.DownloadStream(ctx, &opts)
   450  	if err != nil {
   451  		res.ErrCode, res.Err = azureErrorToAISError(err, cloudBck, lom.ObjName)
   452  		if res.ErrCode == http.StatusRequestedRangeNotSatisfiable {
   453  			res.Err = cmn.NewErrRangeNotSatisfiable(res.Err, nil, 0)
   454  		}
   455  		return res
   456  	}
   457  
   458  	debug.Assert(resp.IsCurrentVersion == nil || *resp.IsCurrentVersion, "expecting current/latest/the-only ver")
   459  	res.Size = *resp.ContentLength
   460  
   461  	if length == 0 {
   462  		// custom metadata
   463  		lom.SetCustomKey(cmn.SourceObjMD, apc.Azure)
   464  		etag := azEncodeEtag(*respProps.ETag)
   465  		lom.SetCustomKey(cmn.ETag, etag)
   466  
   467  		lom.SetVersion(etag) // TODO #200224
   468  
   469  		if md5 := azEncodeChecksum(respProps.ContentMD5); md5 != "" {
   470  			lom.SetCustomKey(cmn.MD5ObjMD, md5)
   471  			res.ExpCksum = cos.NewCksum(cos.ChecksumMD5, md5)
   472  		}
   473  	}
   474  
   475  	res.R = resp.Body
   476  	return res
   477  }
   478  
   479  //
   480  // PUT OBJECT
   481  //
   482  
   483  func (azbp *azbp) PutObj(r io.ReadCloser, lom *core.LOM, _ *http.Request) (int, error) {
   484  	defer cos.Close(r)
   485  
   486  	client, err := azblob.NewClientWithSharedKeyCredential(azbp.u, azbp.creds, nil)
   487  	if err != nil {
   488  		return azureErrorToAISError(err, &cmn.Bck{Provider: apc.Azure}, "")
   489  	}
   490  	cloudBck := lom.Bck().RemoteBck()
   491  
   492  	opts := azblob.UploadStreamOptions{}
   493  	if size := lom.SizeBytes(true); size > cos.MiB {
   494  		opts.Concurrency = int(min((size+cos.MiB-1)/cos.MiB, 8))
   495  	}
   496  
   497  	resp, err := client.UploadStream(context.Background(), cloudBck.Name, lom.ObjName, r, &opts)
   498  	if err != nil {
   499  		return azureErrorToAISError(err, cloudBck, lom.ObjName)
   500  	}
   501  
   502  	etag := azEncodeEtag(*resp.ETag)
   503  	lom.SetCustomKey(cmn.ETag, etag)
   504  
   505  	lom.SetVersion(etag) // TODO #200224
   506  
   507  	if v := resp.LastModified; v != nil {
   508  		lom.SetCustomKey(cmn.LastModified, fmtTime(*v))
   509  	}
   510  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   511  		nlog.Infof("[put_object] %s", lom)
   512  	}
   513  	return http.StatusOK, nil
   514  }
   515  
   516  //
   517  // DELETE OBJECT
   518  //
   519  
   520  func (azbp *azbp) DeleteObj(lom *core.LOM) (int, error) {
   521  	client, err := azblob.NewClientWithSharedKeyCredential(azbp.u, azbp.creds, nil)
   522  	if err != nil {
   523  		return azureErrorToAISError(err, &cmn.Bck{Provider: apc.Azure}, "")
   524  	}
   525  	cloudBck := lom.Bck().RemoteBck()
   526  
   527  	_, err = client.DeleteBlob(context.Background(), cloudBck.Name, lom.ObjName, nil)
   528  	if err != nil {
   529  		return azureErrorToAISError(err, cloudBck, lom.ObjName)
   530  	}
   531  	return http.StatusOK, nil
   532  }