github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/backend/gcp.go (about) 1 //go:build gcp 2 3 // Package backend contains implementation of various backend providers. 4 /* 5 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 6 */ 7 package backend 8 9 import ( 10 "context" 11 "errors" 12 "fmt" 13 "io" 14 "net/http" 15 "os" 16 "strings" 17 18 "cloud.google.com/go/storage" 19 "github.com/NVIDIA/aistore/api/apc" 20 "github.com/NVIDIA/aistore/cmn" 21 "github.com/NVIDIA/aistore/cmn/cos" 22 "github.com/NVIDIA/aistore/cmn/nlog" 23 "github.com/NVIDIA/aistore/core" 24 "github.com/NVIDIA/aistore/core/meta" 25 jsoniter "github.com/json-iterator/go" 26 "google.golang.org/api/googleapi" 27 "google.golang.org/api/iterator" 28 "google.golang.org/api/option" 29 htransport "google.golang.org/api/transport/http" 30 ) 31 32 const ( 33 gcpChecksumType = "x-goog-meta-ais-cksum-type" 34 gcpChecksumVal = "x-goog-meta-ais-cksum-val" 35 36 projectIDField = "project_id" 37 projectIDEnvVar = "GOOGLE_CLOUD_PROJECT" 38 credPathEnvVar = "GOOGLE_APPLICATION_CREDENTIALS" //nolint:gosec // false positive G101 39 ) 40 41 type ( 42 gsbp struct { 43 t core.TargetPut 44 projectID string 45 base 46 } 47 ) 48 49 var ( 50 // quoting Google SDK: 51 // "Clients should be reused instead of created as needed. The methods of Client 52 // are safe for concurrent use by multiple goroutines. 53 // The default scope is ScopeFullControl." 54 gcpClient *storage.Client 55 56 // context placeholder 57 gctx context.Context 58 59 // interface guard 60 _ core.Backend = (*gsbp)(nil) 61 ) 62 63 func NewGCP(t core.TargetPut) (bp core.Backend, err error) { 64 var ( 65 projectID string 66 credProjectID = readCredFile() 67 envProjectID = os.Getenv(projectIDEnvVar) 68 ) 69 if credProjectID != "" && envProjectID != "" && credProjectID != envProjectID { 70 err = fmt.Errorf("both %q and %q env vars cannot be defined (and not equal %s)", 71 projectIDEnvVar, credPathEnvVar, projectIDField) 72 return 73 } 74 switch { 75 case credProjectID != "": 76 projectID = credProjectID 77 nlog.Infof("%s: %q (using %q env)", projectIDField, projectID, credPathEnvVar) 78 case envProjectID != "": 79 projectID = envProjectID 80 nlog.Infof("%s: %q (using %q env)", projectIDField, projectID, projectIDEnvVar) 81 default: 82 nlog.Warningln("unauthenticated client") 83 } 84 gsbp := &gsbp{ 85 t: t, 86 projectID: projectID, 87 base: base{apc.GCP}, 88 } 89 bp = gsbp 90 91 gctx = context.Background() 92 gcpClient, err = gsbp.createClient(gctx) 93 return 94 } 95 96 func (gsbp *gsbp) createClient(ctx context.Context) (*storage.Client, error) { 97 opts := []option.ClientOption{option.WithScopes(storage.ScopeFullControl)} 98 if gsbp.projectID == "" { 99 opts = append(opts, option.WithoutAuthentication()) 100 } 101 // create HTTP transport 102 transport, err := htransport.NewTransport(ctx, cmn.NewTransport(cmn.TransportArgs{}), opts...) 103 if err != nil { 104 if strings.Contains(err.Error(), "credentials") { 105 details := fmt.Sprintf("%s Hint: check your %q and %q environment settings for project ID=%q.", 106 err, projectIDEnvVar, credPathEnvVar, gsbp.projectID) 107 return nil, errors.New(details) 108 } 109 return nil, cmn.NewErrFailedTo(nil, "gcp-backend: create", "http transport", err) 110 } 111 opts = append(opts, option.WithHTTPClient(&http.Client{Transport: transport})) 112 // create HTTP client 113 client, err := storage.NewClient(ctx, opts...) 114 if err != nil { 115 return nil, cmn.NewErrFailedTo(nil, "gcp-backend: create", "client", err) 116 } 117 return client, nil 118 } 119 120 // as core.Backend -------------------------------------------------------------- 121 122 // 123 // HEAD BUCKET 124 // 125 126 func (*gsbp) HeadBucket(ctx context.Context, bck *meta.Bck) (bckProps cos.StrKVs, ecode int, err error) { 127 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 128 nlog.Infof("head_bucket %s", bck.Name) 129 } 130 cloudBck := bck.RemoteBck() 131 _, err = gcpClient.Bucket(cloudBck.Name).Attrs(ctx) 132 if err != nil { 133 ecode, err = gcpErrorToAISError(err, cloudBck) 134 return 135 } 136 // 137 // NOTE: return a few assorted fields, specifically to fill-in vendor-specific `cmn.ExtraProps` 138 // 139 bckProps = make(cos.StrKVs) 140 bckProps[apc.HdrBackendProvider] = apc.GCP 141 // GCP always generates a versionid for an object even if versioning is disabled. 142 // So, return that we can detect versionid change on getobj etc 143 bckProps[apc.HdrBucketVerEnabled] = "true" 144 return 145 } 146 147 // 148 // LIST OBJECTS 149 // 150 151 func (*gsbp) ListObjects(bck *meta.Bck, msg *apc.LsoMsg, lst *cmn.LsoRes) (ecode int, err error) { 152 var ( 153 query *storage.Query 154 h = cmn.BackendHelpers.Google 155 cloudBck = bck.RemoteBck() 156 ) 157 msg.PageSize = calcPageSize(msg.PageSize, bck.MaxPageSize()) 158 159 if prefix := msg.Prefix; prefix != "" { 160 var delim string 161 if msg.IsFlagSet(apc.LsNoRecursion) { 162 // NOTE: important to indicate subdirectory with trailing '/' 163 if cos.IsLastB(prefix, '/') { 164 delim = "/" 165 } 166 } 167 query = &storage.Query{Prefix: prefix, Delimiter: delim} 168 } 169 170 var ( 171 it = gcpClient.Bucket(cloudBck.Name).Objects(gctx, query) 172 pager = iterator.NewPager(it, int(msg.PageSize), msg.ContinuationToken) 173 objs = make([]*storage.ObjectAttrs, 0, msg.PageSize) 174 ) 175 nextPageToken, errPage := pager.NextPage(&objs) 176 if errPage != nil { 177 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 178 nlog.Infof("list_objects %s: %v", cloudBck.Name, errPage) 179 } 180 ecode, err = gcpErrorToAISError(errPage, cloudBck) 181 return 182 } 183 184 lst.ContinuationToken = nextPageToken 185 186 var ( 187 custom cos.StrKVs 188 i int 189 l = len(objs) 190 wantCustom = msg.WantProp(apc.GetPropsCustom) 191 ) 192 for j := len(lst.Entries); j < l; j++ { 193 lst.Entries = append(lst.Entries, &cmn.LsoEnt{}) // add missing empty 194 } 195 if wantCustom { 196 custom = make(cos.StrKVs, 3) // reuse 197 } 198 for _, attrs := range objs { 199 if msg.IsFlagSet(apc.LsNoRecursion) { 200 if attrs.Name == "" { 201 entry := lst.Entries[i] 202 entry.Name = attrs.Prefix 203 entry.Flags = apc.EntryIsDir 204 i++ 205 continue 206 } 207 } 208 entry := lst.Entries[i] 209 i++ 210 entry.Name, entry.Size = attrs.Name, attrs.Size 211 if msg.IsFlagSet(apc.LsNameOnly) || msg.IsFlagSet(apc.LsNameSize) { 212 continue 213 } 214 if v, ok := h.EncodeCksum(attrs.MD5); ok { 215 entry.Checksum = v 216 } 217 if v, ok := h.EncodeVersion(attrs.Generation); ok { 218 entry.Version = v 219 } 220 // custom 221 if wantCustom { 222 custom[cmn.ETag], _ = h.EncodeCksum(attrs.Etag) 223 custom[cmn.LastModified] = fmtTime(attrs.Updated) 224 custom[cos.HdrContentType] = attrs.ContentType 225 entry.Custom = cmn.CustomMD2S(custom) 226 } 227 } 228 lst.Entries = lst.Entries[:i] 229 230 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 231 nlog.Infof("[list_objects] count %d", len(lst.Entries)) 232 } 233 return 234 } 235 236 // 237 // LIST BUCKETS 238 // 239 240 func (gsbp *gsbp) ListBuckets(_ cmn.QueryBcks) (bcks cmn.Bcks, ecode int, err error) { 241 if gsbp.projectID == "" { 242 // NOTE: empty `projectID` results in obscure: "googleapi: Error 400: Invalid argument" 243 return nil, http.StatusBadRequest, 244 errors.New("empty project ID: cannot list GCP buckets with no authentication") 245 } 246 bcks = make(cmn.Bcks, 0, 16) 247 it := gcpClient.Buckets(gctx, gsbp.projectID) 248 for { 249 var battrs *storage.BucketAttrs 250 251 battrs, err = it.Next() 252 if err == iterator.Done { 253 err = nil 254 break 255 } 256 if err != nil { 257 ecode, err = gcpErrorToAISError(err, &cmn.Bck{Provider: apc.GCP}) 258 return 259 } 260 bcks = append(bcks, cmn.Bck{ 261 Name: battrs.Name, 262 Provider: apc.GCP, 263 }) 264 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 265 nlog.Infof("[bucket_names] %s: created %v, versioning %t", 266 battrs.Name, battrs.Created, battrs.VersioningEnabled) 267 } 268 } 269 return 270 } 271 272 // 273 // HEAD OBJECT 274 // 275 276 func (*gsbp) HeadObj(ctx context.Context, lom *core.LOM, _ *http.Request) (oa *cmn.ObjAttrs, ecode int, err error) { 277 var ( 278 attrs *storage.ObjectAttrs 279 h = cmn.BackendHelpers.Google 280 cloudBck = lom.Bck().RemoteBck() 281 ) 282 attrs, err = gcpClient.Bucket(cloudBck.Name).Object(lom.ObjName).Attrs(ctx) 283 if err != nil { 284 ecode, err = handleObjectError(ctx, gcpClient, err, cloudBck) 285 return 286 } 287 oa = &cmn.ObjAttrs{} 288 oa.CustomMD = make(cos.StrKVs, 6) 289 oa.SetCustomKey(cmn.SourceObjMD, apc.GCP) 290 oa.Size = attrs.Size 291 if v, ok := h.EncodeVersion(attrs.Generation); ok { 292 oa.SetCustomKey(cmn.VersionObjMD, v) 293 oa.Ver = v 294 } 295 if v, ok := h.EncodeCksum(attrs.MD5); ok { 296 oa.SetCustomKey(cmn.MD5ObjMD, v) 297 } 298 if v, ok := h.EncodeCksum(attrs.CRC32C); ok { 299 oa.SetCustomKey(cmn.CRC32CObjMD, v) 300 } 301 if v, ok := h.EncodeCksum(attrs.Etag); ok { 302 oa.SetCustomKey(cmn.ETag, v) 303 } 304 305 if cksumType, ok := attrs.Metadata[gcpChecksumType]; ok { 306 if cksumValue, ok := attrs.Metadata[gcpChecksumVal]; ok { 307 oa.SetCksum(cksumType, cksumValue) 308 } 309 } 310 311 oa.SetCustomKey(cmn.LastModified, fmtTime(attrs.Updated)) 312 // unlike other custom attrs, "Content-Type" is not getting stored w/ LOM 313 // - only shown via list-objects and HEAD when not present 314 oa.SetCustomKey(cos.HdrContentType, attrs.ContentType) 315 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 316 nlog.Infof("[head_object] %s", cloudBck.Cname(lom.ObjName)) 317 } 318 return 319 } 320 321 // 322 // GET OBJECT 323 // 324 325 func (gsbp *gsbp) GetObj(ctx context.Context, lom *core.LOM, owt cmn.OWT, _ *http.Request) (int, error) { 326 res := gsbp.GetObjReader(ctx, lom, 0, 0) 327 if res.Err != nil { 328 return res.ErrCode, res.Err 329 } 330 params := allocPutParams(res, owt) 331 err := gsbp.t.PutObject(lom, params) 332 core.FreePutParams(params) 333 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 334 nlog.Infoln("[get_object]", lom.String(), err) 335 } 336 return 0, err 337 } 338 339 func (*gsbp) GetObjReader(ctx context.Context, lom *core.LOM, offset, length int64) (res core.GetReaderResult) { 340 var ( 341 attrs *storage.ObjectAttrs 342 rc *storage.Reader 343 cloudBck = lom.Bck().RemoteBck() 344 o = gcpClient.Bucket(cloudBck.Name).Object(lom.ObjName) 345 ) 346 attrs, res.Err = o.Attrs(ctx) 347 if res.Err != nil { 348 res.ErrCode, res.Err = gcpErrorToAISError(res.Err, cloudBck) 349 return res 350 } 351 if length > 0 { 352 rc, res.Err = o.NewRangeReader(ctx, offset, length) 353 if res.Err != nil { 354 if res.ErrCode == http.StatusRequestedRangeNotSatisfiable { 355 res.Err = cmn.NewErrRangeNotSatisfiable(res.Err, nil, 0) 356 } 357 return res 358 } 359 } else { 360 rc, res.Err = o.NewReader(ctx) 361 if res.Err != nil { 362 return res 363 } 364 // custom metadata 365 lom.SetCustomKey(cmn.SourceObjMD, apc.GCP) 366 if cksumType, ok := attrs.Metadata[gcpChecksumType]; ok { 367 if cksumValue, ok := attrs.Metadata[gcpChecksumVal]; ok { 368 lom.SetCksum(cos.NewCksum(cksumType, cksumValue)) 369 } 370 } 371 res.ExpCksum = setCustomGs(lom, attrs) 372 } 373 374 res.Size = rc.Attrs.Size 375 res.R = rc 376 return res 377 } 378 379 func setCustomGs(lom *core.LOM, attrs *storage.ObjectAttrs) (expCksum *cos.Cksum) { 380 h := cmn.BackendHelpers.Google 381 if v, ok := h.EncodeVersion(attrs.Generation); ok { 382 lom.SetVersion(v) 383 lom.SetCustomKey(cmn.VersionObjMD, v) 384 } 385 if v, ok := h.EncodeCksum(attrs.MD5); ok { 386 lom.SetCustomKey(cmn.MD5ObjMD, v) 387 expCksum = cos.NewCksum(cos.ChecksumMD5, v) 388 } 389 if v, ok := h.EncodeCksum(attrs.CRC32C); ok { 390 lom.SetCustomKey(cmn.CRC32CObjMD, v) 391 if expCksum == nil { 392 expCksum = cos.NewCksum(cos.ChecksumCRC32C, v) 393 } 394 } 395 if v, ok := h.EncodeCksum(attrs.Etag); ok { 396 lom.SetCustomKey(cmn.ETag, v) 397 } 398 lom.SetCustomKey(cmn.LastModified, fmtTime(attrs.Updated)) 399 return 400 } 401 402 // 403 // PUT OBJECT 404 // 405 406 func (gsbp *gsbp) PutObj(r io.ReadCloser, lom *core.LOM, _ *http.Request) (ecode int, err error) { 407 var ( 408 attrs *storage.ObjectAttrs 409 written int64 410 cloudBck = lom.Bck().RemoteBck() 411 md = make(cos.StrKVs, 2) 412 gcpObj = gcpClient.Bucket(cloudBck.Name).Object(lom.ObjName) 413 wc = gcpObj.NewWriter(gctx) 414 ) 415 md[gcpChecksumType], md[gcpChecksumVal] = lom.Checksum().Get() 416 417 wc.Metadata = md 418 buf, slab := gsbp.t.PageMM().Alloc() 419 written, err = io.CopyBuffer(wc, r, buf) 420 slab.Free(buf) 421 cos.Close(r) 422 if err != nil { 423 return 424 } 425 if err = wc.Close(); err != nil { 426 ecode, err = gcpErrorToAISError(err, cloudBck) 427 return 428 } 429 attrs, err = gcpObj.Attrs(gctx) 430 if err != nil { 431 ecode, err = handleObjectError(gctx, gcpClient, err, cloudBck) 432 return 433 } 434 _ = setCustomGs(lom, attrs) 435 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 436 nlog.Infof("[put_object] %s, size %d", lom, written) 437 } 438 return 439 } 440 441 // 442 // DELETE OBJECT 443 // 444 445 func (*gsbp) DeleteObj(lom *core.LOM) (ecode int, err error) { 446 var ( 447 cloudBck = lom.Bck().RemoteBck() 448 o = gcpClient.Bucket(cloudBck.Name).Object(lom.ObjName) 449 ) 450 if err = o.Delete(gctx); err != nil { 451 ecode, err = handleObjectError(gctx, gcpClient, err, cloudBck) 452 return 453 } 454 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 455 nlog.Infof("[delete_object] %s", lom) 456 } 457 return 458 } 459 460 // 461 // static helpers 462 // 463 464 func readCredFile() (projectID string) { 465 credFile, err := os.Open(os.Getenv(credPathEnvVar)) 466 if err != nil { 467 return 468 } 469 b, err := io.ReadAll(credFile) 470 credFile.Close() 471 if err != nil { 472 return 473 } 474 projectID, _ = jsoniter.Get(b, projectIDField).GetInterface().(string) 475 return 476 } 477 478 const gcpErrPrefix = "gcp-error" 479 480 func gcpErrorToAISError(gcpError error, bck *cmn.Bck) (int, error) { 481 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 482 nlog.InfoDepth(1, "begin "+gcpErrPrefix+" =========================") 483 nlog.InfoDepth(1, gcpError) 484 nlog.InfoDepth(1, "end "+gcpErrPrefix+" ===========================") 485 } 486 if gcpError == storage.ErrBucketNotExist { 487 return http.StatusNotFound, cmn.NewErrRemoteBckNotFound(bck) 488 } 489 err := _gcpErr(gcpError) 490 if gcpError == storage.ErrObjectNotExist { 491 return http.StatusNotFound, err 492 } 493 apiErr, ok := gcpError.(*googleapi.Error) 494 if !ok { 495 return http.StatusInternalServerError, err 496 } 497 if apiErr.Code == http.StatusForbidden && strings.Contains(apiErr.Error(), "may not exist") { 498 // HACK: "not found or misspelled" vs "service not paid for" (the latter less likely) 499 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 500 nlog.Infoln(err) 501 } 502 return http.StatusNotFound, err 503 } 504 return apiErr.Code, err 505 } 506 507 // (compare w/ _awsErr) 508 func _gcpErr(gcpError error) error { 509 return errors.New(gcpErrPrefix + "[" + gcpError.Error() + "]") 510 } 511 512 func handleObjectError(ctx context.Context, gcpClient *storage.Client, objErr error, bck *cmn.Bck) (int, error) { 513 if objErr != storage.ErrObjectNotExist { 514 return http.StatusBadRequest, _gcpErr(objErr) 515 } 516 517 // Object does not exist but in GCP it doesn't necessarily mean that the bucket does. 518 if _, err := gcpClient.Bucket(bck.Name).Attrs(ctx); err != nil { 519 return gcpErrorToAISError(err, bck) 520 } 521 return http.StatusNotFound, cos.NewErrNotFound(nil, _gcpErr(objErr).Error()) 522 }