github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/backend/azure.go (about) 1 //go:build azure 2 3 // Package backend contains implementation of various backend providers. 4 /* 5 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 6 */ 7 package backend 8 9 // TODO: 10 // - check a variety of az clients instantiated below, and alternatives 11 // 12 // - support alternative authentication methods (currently, NewSharedKeyCredential only) 13 // ref: ./storage/azblob@v1.3.0/container/examples_test.go 14 // 15 // - [200224] stop using etag as obj. version - see IsImmutableStorageWithVersioningEnabled, blob.VersionID, and: 16 // ref: https://learn.microsoft.com/en-us/azure/storage/blobs/versioning-overview#how-blob-versioning-works 17 18 import ( 19 "context" 20 "encoding/hex" 21 "errors" 22 "io" 23 "net/http" 24 "os" 25 "regexp" 26 "strings" 27 28 "github.com/Azure/azure-sdk-for-go/sdk/azcore" 29 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" 30 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" 31 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" 32 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob" 33 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" 34 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service" 35 "github.com/NVIDIA/aistore/api/apc" 36 "github.com/NVIDIA/aistore/cmn" 37 "github.com/NVIDIA/aistore/cmn/cos" 38 "github.com/NVIDIA/aistore/cmn/debug" 39 "github.com/NVIDIA/aistore/cmn/nlog" 40 "github.com/NVIDIA/aistore/core" 41 "github.com/NVIDIA/aistore/core/meta" 42 ) 43 44 type ( 45 azbp struct { 46 t core.TargetPut 47 creds *azblob.SharedKeyCredential 48 u string 49 base 50 } 51 ) 52 53 const ( 54 azDefaultProto = "https://" 55 azHost = ".blob.core.windows.net" 56 57 azAccNameEnvVar = "AZURE_STORAGE_ACCOUNT" 58 azAccKeyEnvVar = "AZURE_STORAGE_KEY" // a.k.a. AZURE_STORAGE_PRIMARY_ACCOUNT_KEY or AZURE_STORAGE_SECONDARY_ACCOUNT_KEY 59 60 // ais 61 azURLEnvVar = "AIS_AZURE_URL" 62 azProtoEnvVar = "AIS_AZURE_PROTO" 63 ) 64 65 const ( 66 azErrPrefix = "azure-error[" 67 ) 68 69 // parse azure errors 70 var ( 71 azCleanErrRegex = regexp.MustCompile(`[^a-zA-Z0-9 ]+`) 72 ) 73 74 // interface guard 75 var _ core.Backend = (*azbp)(nil) 76 77 func azProto() string { 78 proto := os.Getenv(azProtoEnvVar) 79 if proto == "" { 80 proto = azDefaultProto 81 } 82 return proto 83 } 84 85 func azAccName() string { return os.Getenv(azAccNameEnvVar) } 86 func azAccKey() string { return os.Getenv(azAccKeyEnvVar) } 87 88 func asEndpoint() string { 89 blurl := os.Getenv(azURLEnvVar) 90 switch { 91 case blurl == "": 92 // the default 93 return azProto() + azAccName() + azHost 94 case strings.HasPrefix(blurl, "http"): 95 return blurl 96 default: 97 if !strings.HasPrefix(blurl, ".") { 98 blurl = "." + blurl 99 } 100 return azProto() + azAccName() + blurl 101 } 102 } 103 104 func NewAzure(t core.TargetPut) (core.Backend, error) { 105 blurl := asEndpoint() 106 107 // NOTE: NewSharedKeyCredential requires account name and its primary or secondary key 108 creds, err := azblob.NewSharedKeyCredential(azAccName(), azAccKey()) 109 if err != nil { 110 return nil, cmn.NewErrFailedTo(nil, azErrPrefix+": init]", "credentials", err) 111 } 112 113 return &azbp{ 114 t: t, 115 creds: creds, 116 u: blurl, 117 base: base{apc.Azure}, 118 }, nil 119 } 120 121 // (compare w/ cmn/backend) 122 func azEncodeEtag(etag azcore.ETag) string { return cmn.UnquoteCEV(string(etag)) } 123 124 func azEncodeChecksum(v []byte) string { 125 if len(v) == 0 { 126 return "" 127 } 128 return hex.EncodeToString(v) 129 } 130 131 // 132 // format and parse errors 133 // 134 135 const ( 136 azErrDesc = "Description" 137 azErrResp = "RESPONSE" 138 azErrCode = "Code: " // and CODE: 139 ) 140 141 func azureErrorToAISError(azureError error, bck *cmn.Bck, objName string) (int, error) { 142 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 143 nlog.InfoDepth(1, "begin azure error =========================") 144 nlog.InfoDepth(1, azureError) 145 nlog.InfoDepth(1, "end azure error ===========================") 146 } 147 148 var stgErr *azcore.ResponseError 149 if !errors.As(azureError, &stgErr) { 150 return http.StatusInternalServerError, azureError 151 } 152 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 153 nlog.InfoDepth(1, "ErrorCode:", stgErr.ErrorCode, "StatusCode:", stgErr.StatusCode) 154 } 155 156 // NOTE: error-codes documentation seems to be incomplete and/or outdated 157 // ref: https://learn.microsoft.com/en-us/rest/api/storageservices/common-rest-api-error-codes 158 159 switch bloberror.Code(stgErr.ErrorCode) { 160 case bloberror.ContainerNotFound: 161 return http.StatusNotFound, cmn.NewErrRemoteBckNotFound(bck) 162 case bloberror.BlobNotFound: 163 return http.StatusNotFound, errors.New(azErrPrefix + "NotFound: " + bck.Cname(objName) + "]") 164 case bloberror.InvalidResourceName: 165 if objName != "" { 166 return http.StatusNotFound, errors.New(azErrPrefix + "NotFound: " + bck.Cname(objName) + "]") 167 } 168 } 169 170 // NOTE above 171 if objName == "" && bloberror.Code(stgErr.ErrorCode) == bloberror.OutOfRangeInput { 172 return http.StatusNotFound, cmn.NewErrRemoteBckNotFound(bck) 173 } 174 175 // azure error is usually a sizeable multi-line text with items including: 176 // request ID, authorization, variery of x-ms-* headers, server and user agent, and more 177 178 var ( 179 status = stgErr.StatusCode 180 code string 181 description string 182 lines = strings.Split(azureError.Error(), "\n") 183 ) 184 if resp := stgErr.RawResponse; resp != nil { 185 resp.Body.Close() 186 debug.Assertf(resp.StatusCode == stgErr.StatusCode, "%d vs %d", resp.StatusCode, stgErr.StatusCode) // checking 187 status = resp.StatusCode 188 } 189 for _, line := range lines { 190 if strings.HasPrefix(line, azErrDesc) { 191 description = azCleanErrRegex.ReplaceAllString(line[len(azErrDesc):], "") 192 } else if strings.HasPrefix(line, azErrResp) { 193 i := max(0, strings.Index(line, ": ")) 194 // alternatively, take "^RESPONSE ...: <...>" for description 195 description = azCleanErrRegex.ReplaceAllString(line[i:], "") 196 } 197 if i := strings.Index(line, azErrCode); i > 0 { 198 code = azCleanErrRegex.ReplaceAllString(line[i+len(azErrCode):], "") 199 } else if i := strings.Index(line, strings.ToUpper(azErrCode)); i > 0 { 200 code = azCleanErrRegex.ReplaceAllString(line[i+len(azErrCode):], "") 201 } 202 } 203 if code != "" && description != "" { 204 return status, errors.New(azErrPrefix + code + ": " + strings.TrimSpace(description) + "]") 205 } 206 debug.Assert(false, azureError) // expecting to parse 207 return status, azureError 208 } 209 210 // as core.Backend -------------------------------------------------------------- 211 212 // 213 // HEAD BUCKET 214 // 215 216 func (azbp *azbp) HeadBucket(ctx context.Context, bck *meta.Bck) (cos.StrKVs, int, error) { 217 var ( 218 cloudBck = bck.RemoteBck() 219 cntURL = azbp.u + "/" + cloudBck.Name 220 ) 221 client, err := container.NewClientWithSharedKeyCredential(cntURL, azbp.creds, nil) 222 if err != nil { 223 status, err := azureErrorToAISError(err, cloudBck, "") 224 return nil, status, err 225 } 226 resp, err := client.GetProperties(ctx, nil) 227 if err != nil { 228 status, err := azureErrorToAISError(err, cloudBck, "") 229 return nil, status, err 230 } 231 232 bckProps := make(cos.StrKVs, 2) 233 bckProps[apc.HdrBackendProvider] = apc.Azure 234 235 // TODO #200224 236 if true || resp.IsImmutableStorageWithVersioningEnabled != nil && *resp.IsImmutableStorageWithVersioningEnabled { 237 bckProps[apc.HdrBucketVerEnabled] = "true" 238 } else { 239 bckProps[apc.HdrBucketVerEnabled] = "false" 240 } 241 return bckProps, http.StatusOK, nil 242 } 243 244 // 245 // LIST OBJECTS 246 // 247 248 // TODO: support non-recursive (apc.LsNoRecursion) operation, as in: 249 // $ az storage blob list -c abc --prefix sub/ --delimiter / 250 // See also: aws.go, gcp.go 251 func (azbp *azbp) ListObjects(bck *meta.Bck, msg *apc.LsoMsg, lst *cmn.LsoRes) (int, error) { 252 msg.PageSize = calcPageSize(msg.PageSize, bck.MaxPageSize()) 253 var ( 254 cloudBck = bck.RemoteBck() 255 cntURL = azbp.u + "/" + cloudBck.Name 256 num = int32(msg.PageSize) 257 opts = container.ListBlobsFlatOptions{Prefix: apc.Ptr(msg.Prefix), MaxResults: &num} 258 ) 259 client, err := container.NewClientWithSharedKeyCredential(cntURL, azbp.creds, nil) 260 if err != nil { 261 return azureErrorToAISError(err, cloudBck, "") 262 } 263 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 264 nlog.Infof("list_objects %s", cloudBck.Name) 265 } 266 if msg.ContinuationToken != "" { 267 opts.Marker = apc.Ptr(msg.ContinuationToken) 268 } 269 270 pager := client.NewListBlobsFlatPager(&opts) 271 resp, err := pager.NextPage(context.Background()) 272 if err != nil { 273 return azureErrorToAISError(err, cloudBck, "") 274 } 275 276 var ( 277 custom cos.StrKVs 278 l = len(resp.Segment.BlobItems) 279 wantCustom = msg.WantProp(apc.GetPropsCustom) 280 ) 281 for i := len(lst.Entries); i < l; i++ { 282 lst.Entries = append(lst.Entries, &cmn.LsoEnt{}) // add missing empty 283 } 284 if wantCustom { 285 custom = make(cos.StrKVs, 4) // reuse 286 } 287 for idx := range resp.Segment.BlobItems { 288 var ( 289 blob = resp.Segment.BlobItems[idx] 290 entry = lst.Entries[idx] 291 ) 292 entry.Name = *blob.Name 293 entry.Size = *blob.Properties.ContentLength 294 if msg.IsFlagSet(apc.LsNameOnly) || msg.IsFlagSet(apc.LsNameSize) { 295 continue 296 } 297 298 entry.Checksum = azEncodeChecksum(blob.Properties.ContentMD5) 299 300 etag := azEncodeEtag(*blob.Properties.ETag) 301 entry.Version = etag // (TODO a the top) 302 303 // custom 304 if wantCustom { 305 clear(custom) 306 custom[cmn.ETag] = etag 307 if !blob.Properties.LastModified.IsZero() { 308 custom[cmn.LastModified] = fmtTime(*blob.Properties.LastModified) 309 } 310 if blob.Properties.ContentType != nil { 311 custom[cos.HdrContentType] = *blob.Properties.ContentType 312 } 313 if blob.VersionID != nil { 314 custom[cmn.VersionObjMD] = *blob.VersionID 315 } 316 entry.Custom = cmn.CustomMD2S(custom) 317 } 318 } 319 lst.Entries = lst.Entries[:l] 320 321 if resp.NextMarker != nil { 322 lst.ContinuationToken = *resp.NextMarker 323 } 324 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 325 nlog.Infof("[list_objects] count %d(marker: %s)", len(lst.Entries), lst.ContinuationToken) 326 } 327 return 0, nil 328 } 329 330 // 331 // LIST BUCKETS 332 // 333 334 func (azbp *azbp) ListBuckets(cmn.QueryBcks) (bcks cmn.Bcks, _ int, _ error) { 335 serviceClient, err := service.NewClientWithSharedKeyCredential(azbp.u, azbp.creds, nil) 336 if err != nil { 337 status, err := azureErrorToAISError(err, &cmn.Bck{Provider: apc.Azure}, "") 338 return nil, status, err 339 } 340 pager := serviceClient.NewListContainersPager(&service.ListContainersOptions{}) 341 for pager.More() { 342 resp, err := pager.NextPage(context.TODO()) 343 if err != nil { 344 status, err := azureErrorToAISError(err, &cmn.Bck{Provider: apc.Azure}, "") 345 return bcks, status, err 346 } 347 for _, ci := range resp.ContainerItems { 348 bcks = append(bcks, cmn.Bck{ 349 Name: *ci.Name, 350 Provider: apc.Azure, 351 }) 352 } 353 } 354 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 355 nlog.Infof("[list_buckets] count %d", len(bcks)) 356 } 357 return bcks, 0, nil 358 } 359 360 // 361 // HEAD OBJECT 362 // 363 364 func (azbp *azbp) HeadObj(ctx context.Context, lom *core.LOM, _ *http.Request) (*cmn.ObjAttrs, int, error) { 365 var ( 366 cloudBck = lom.Bucket().RemoteBck() 367 blURL = azbp.u + "/" + cloudBck.Name + "/" + lom.ObjName 368 ) 369 client, err := blockblob.NewClientWithSharedKeyCredential(blURL, azbp.creds, nil) 370 if err != nil { 371 status, err := azureErrorToAISError(err, cloudBck, lom.ObjName) 372 return nil, status, err 373 } 374 resp, err := client.GetProperties(ctx, nil) 375 if err != nil { 376 status, err := azureErrorToAISError(err, cloudBck, lom.ObjName) 377 return nil, status, err 378 } 379 380 debug.Assert(resp.IsCurrentVersion == nil || *resp.IsCurrentVersion, "expecting current/latest/the-only ver") 381 382 oa := &cmn.ObjAttrs{} 383 oa.CustomMD = make(cos.StrKVs, 6) 384 oa.SetCustomKey(cmn.SourceObjMD, apc.Azure) 385 oa.Size = *resp.ContentLength 386 387 etag := azEncodeEtag(*resp.ETag) 388 oa.SetCustomKey(cmn.ETag, etag) 389 390 oa.Ver = etag // TODO #200224 391 392 if md5 := azEncodeChecksum(resp.ContentMD5); md5 != "" { 393 oa.SetCustomKey(cmn.MD5ObjMD, md5) 394 } 395 if v := resp.LastModified; v != nil { 396 oa.SetCustomKey(cmn.LastModified, fmtTime(*v)) 397 } 398 if v := resp.ContentType; v != nil { 399 // unlike other custom attrs, "Content-Type" is not getting stored w/ LOM 400 // - only shown via list-objects and HEAD when not present 401 oa.SetCustomKey(cos.HdrContentType, *v) 402 } 403 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 404 nlog.Infof("[head_object] %s", lom) 405 } 406 return oa, 0, nil 407 } 408 409 // 410 // GET OBJECT 411 // 412 413 func (azbp *azbp) GetObj(ctx context.Context, lom *core.LOM, owt cmn.OWT, _ *http.Request) (int, error) { 414 res := azbp.GetObjReader(ctx, lom, 0, 0) 415 if res.Err != nil { 416 return res.ErrCode, res.Err 417 } 418 params := allocPutParams(res, owt) 419 err := azbp.t.PutObject(lom, params) 420 core.FreePutParams(params) 421 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 422 nlog.Infoln("[get_object]", lom.String(), err) 423 } 424 return 0, err 425 } 426 427 func (azbp *azbp) GetObjReader(ctx context.Context, lom *core.LOM, offset, length int64) (res core.GetReaderResult) { 428 var ( 429 cloudBck = lom.Bucket().RemoteBck() 430 blURL = azbp.u + "/" + cloudBck.Name + "/" + lom.ObjName 431 ) 432 client, err := blockblob.NewClientWithSharedKeyCredential(blURL, azbp.creds, nil) 433 if err != nil { 434 res.ErrCode, res.Err = azureErrorToAISError(err, cloudBck, lom.ObjName) 435 return 436 } 437 438 // Get checksum 439 respProps, err := client.GetProperties(ctx, nil) 440 if err != nil { 441 res.ErrCode, res.Err = azureErrorToAISError(err, cloudBck, lom.ObjName) 442 return 443 } 444 445 // (0, 0) range indicates "whole object" 446 var opts blob.DownloadStreamOptions 447 opts.Range.Count = length 448 opts.Range.Offset = offset 449 resp, err := client.DownloadStream(ctx, &opts) 450 if err != nil { 451 res.ErrCode, res.Err = azureErrorToAISError(err, cloudBck, lom.ObjName) 452 if res.ErrCode == http.StatusRequestedRangeNotSatisfiable { 453 res.Err = cmn.NewErrRangeNotSatisfiable(res.Err, nil, 0) 454 } 455 return res 456 } 457 458 debug.Assert(resp.IsCurrentVersion == nil || *resp.IsCurrentVersion, "expecting current/latest/the-only ver") 459 res.Size = *resp.ContentLength 460 461 if length == 0 { 462 // custom metadata 463 lom.SetCustomKey(cmn.SourceObjMD, apc.Azure) 464 etag := azEncodeEtag(*respProps.ETag) 465 lom.SetCustomKey(cmn.ETag, etag) 466 467 lom.SetVersion(etag) // TODO #200224 468 469 if md5 := azEncodeChecksum(respProps.ContentMD5); md5 != "" { 470 lom.SetCustomKey(cmn.MD5ObjMD, md5) 471 res.ExpCksum = cos.NewCksum(cos.ChecksumMD5, md5) 472 } 473 } 474 475 res.R = resp.Body 476 return res 477 } 478 479 // 480 // PUT OBJECT 481 // 482 483 func (azbp *azbp) PutObj(r io.ReadCloser, lom *core.LOM, _ *http.Request) (int, error) { 484 defer cos.Close(r) 485 486 client, err := azblob.NewClientWithSharedKeyCredential(azbp.u, azbp.creds, nil) 487 if err != nil { 488 return azureErrorToAISError(err, &cmn.Bck{Provider: apc.Azure}, "") 489 } 490 cloudBck := lom.Bck().RemoteBck() 491 492 opts := azblob.UploadStreamOptions{} 493 if size := lom.SizeBytes(true); size > cos.MiB { 494 opts.Concurrency = int(min((size+cos.MiB-1)/cos.MiB, 8)) 495 } 496 497 resp, err := client.UploadStream(context.Background(), cloudBck.Name, lom.ObjName, r, &opts) 498 if err != nil { 499 return azureErrorToAISError(err, cloudBck, lom.ObjName) 500 } 501 502 etag := azEncodeEtag(*resp.ETag) 503 lom.SetCustomKey(cmn.ETag, etag) 504 505 lom.SetVersion(etag) // TODO #200224 506 507 if v := resp.LastModified; v != nil { 508 lom.SetCustomKey(cmn.LastModified, fmtTime(*v)) 509 } 510 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 511 nlog.Infof("[put_object] %s", lom) 512 } 513 return http.StatusOK, nil 514 } 515 516 // 517 // DELETE OBJECT 518 // 519 520 func (azbp *azbp) DeleteObj(lom *core.LOM) (int, error) { 521 client, err := azblob.NewClientWithSharedKeyCredential(azbp.u, azbp.creds, nil) 522 if err != nil { 523 return azureErrorToAISError(err, &cmn.Bck{Provider: apc.Azure}, "") 524 } 525 cloudBck := lom.Bck().RemoteBck() 526 527 _, err = client.DeleteBlob(context.Background(), cloudBck.Name, lom.ObjName, nil) 528 if err != nil { 529 return azureErrorToAISError(err, cloudBck, lom.ObjName) 530 } 531 return http.StatusOK, nil 532 }