github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/backend/gcp.go (about)

     1  //go:build gcp
     2  
     3  // Package backend contains implementation of various backend providers.
     4  /*
     5   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     6   */
     7  package backend
     8  
     9  import (
    10  	"context"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"net/http"
    15  	"os"
    16  	"strings"
    17  
    18  	"cloud.google.com/go/storage"
    19  	"github.com/NVIDIA/aistore/api/apc"
    20  	"github.com/NVIDIA/aistore/cmn"
    21  	"github.com/NVIDIA/aistore/cmn/cos"
    22  	"github.com/NVIDIA/aistore/cmn/nlog"
    23  	"github.com/NVIDIA/aistore/core"
    24  	"github.com/NVIDIA/aistore/core/meta"
    25  	jsoniter "github.com/json-iterator/go"
    26  	"google.golang.org/api/googleapi"
    27  	"google.golang.org/api/iterator"
    28  	"google.golang.org/api/option"
    29  	htransport "google.golang.org/api/transport/http"
    30  )
    31  
    32  const (
    33  	gcpChecksumType = "x-goog-meta-ais-cksum-type"
    34  	gcpChecksumVal  = "x-goog-meta-ais-cksum-val"
    35  
    36  	projectIDField  = "project_id"
    37  	projectIDEnvVar = "GOOGLE_CLOUD_PROJECT"
    38  	credPathEnvVar  = "GOOGLE_APPLICATION_CREDENTIALS" //nolint:gosec // false positive G101
    39  )
    40  
    41  type (
    42  	gsbp struct {
    43  		t         core.TargetPut
    44  		projectID string
    45  		base
    46  	}
    47  )
    48  
    49  var (
    50  	// quoting Google SDK:
    51  	//    "Clients should be reused instead of created as needed. The methods of Client
    52  	//     are safe for concurrent use by multiple goroutines.
    53  	//     The default scope is ScopeFullControl."
    54  	gcpClient *storage.Client
    55  
    56  	// context placeholder
    57  	gctx context.Context
    58  
    59  	// interface guard
    60  	_ core.Backend = (*gsbp)(nil)
    61  )
    62  
    63  func NewGCP(t core.TargetPut) (bp core.Backend, err error) {
    64  	var (
    65  		projectID     string
    66  		credProjectID = readCredFile()
    67  		envProjectID  = os.Getenv(projectIDEnvVar)
    68  	)
    69  	if credProjectID != "" && envProjectID != "" && credProjectID != envProjectID {
    70  		err = fmt.Errorf("both %q and %q env vars cannot be defined (and not equal %s)",
    71  			projectIDEnvVar, credPathEnvVar, projectIDField)
    72  		return
    73  	}
    74  	switch {
    75  	case credProjectID != "":
    76  		projectID = credProjectID
    77  		nlog.Infof("%s: %q (using %q env)", projectIDField, projectID, credPathEnvVar)
    78  	case envProjectID != "":
    79  		projectID = envProjectID
    80  		nlog.Infof("%s: %q (using %q env)", projectIDField, projectID, projectIDEnvVar)
    81  	default:
    82  		nlog.Warningln("unauthenticated client")
    83  	}
    84  	gsbp := &gsbp{
    85  		t:         t,
    86  		projectID: projectID,
    87  		base:      base{apc.GCP},
    88  	}
    89  	bp = gsbp
    90  
    91  	gctx = context.Background()
    92  	gcpClient, err = gsbp.createClient(gctx)
    93  	return
    94  }
    95  
    96  func (gsbp *gsbp) createClient(ctx context.Context) (*storage.Client, error) {
    97  	opts := []option.ClientOption{option.WithScopes(storage.ScopeFullControl)}
    98  	if gsbp.projectID == "" {
    99  		opts = append(opts, option.WithoutAuthentication())
   100  	}
   101  	// create HTTP transport
   102  	transport, err := htransport.NewTransport(ctx, cmn.NewTransport(cmn.TransportArgs{}), opts...)
   103  	if err != nil {
   104  		if strings.Contains(err.Error(), "credentials") {
   105  			details := fmt.Sprintf("%s Hint: check your %q and %q environment settings for project ID=%q.",
   106  				err, projectIDEnvVar, credPathEnvVar, gsbp.projectID)
   107  			return nil, errors.New(details)
   108  		}
   109  		return nil, cmn.NewErrFailedTo(nil, "gcp-backend: create", "http transport", err)
   110  	}
   111  	opts = append(opts, option.WithHTTPClient(&http.Client{Transport: transport}))
   112  	// create HTTP client
   113  	client, err := storage.NewClient(ctx, opts...)
   114  	if err != nil {
   115  		return nil, cmn.NewErrFailedTo(nil, "gcp-backend: create", "client", err)
   116  	}
   117  	return client, nil
   118  }
   119  
   120  // as core.Backend --------------------------------------------------------------
   121  
   122  //
   123  // HEAD BUCKET
   124  //
   125  
   126  func (*gsbp) HeadBucket(ctx context.Context, bck *meta.Bck) (bckProps cos.StrKVs, ecode int, err error) {
   127  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   128  		nlog.Infof("head_bucket %s", bck.Name)
   129  	}
   130  	cloudBck := bck.RemoteBck()
   131  	_, err = gcpClient.Bucket(cloudBck.Name).Attrs(ctx)
   132  	if err != nil {
   133  		ecode, err = gcpErrorToAISError(err, cloudBck)
   134  		return
   135  	}
   136  	//
   137  	// NOTE: return a few assorted fields, specifically to fill-in vendor-specific `cmn.ExtraProps`
   138  	//
   139  	bckProps = make(cos.StrKVs)
   140  	bckProps[apc.HdrBackendProvider] = apc.GCP
   141  	// GCP always generates a versionid for an object even if versioning is disabled.
   142  	// So, return that we can detect versionid change on getobj etc
   143  	bckProps[apc.HdrBucketVerEnabled] = "true"
   144  	return
   145  }
   146  
   147  //
   148  // LIST OBJECTS
   149  //
   150  
   151  func (*gsbp) ListObjects(bck *meta.Bck, msg *apc.LsoMsg, lst *cmn.LsoRes) (ecode int, err error) {
   152  	var (
   153  		query    *storage.Query
   154  		h        = cmn.BackendHelpers.Google
   155  		cloudBck = bck.RemoteBck()
   156  	)
   157  	msg.PageSize = calcPageSize(msg.PageSize, bck.MaxPageSize())
   158  
   159  	if prefix := msg.Prefix; prefix != "" {
   160  		var delim string
   161  		if msg.IsFlagSet(apc.LsNoRecursion) {
   162  			// NOTE: important to indicate subdirectory with trailing '/'
   163  			if cos.IsLastB(prefix, '/') {
   164  				delim = "/"
   165  			}
   166  		}
   167  		query = &storage.Query{Prefix: prefix, Delimiter: delim}
   168  	}
   169  
   170  	var (
   171  		it    = gcpClient.Bucket(cloudBck.Name).Objects(gctx, query)
   172  		pager = iterator.NewPager(it, int(msg.PageSize), msg.ContinuationToken)
   173  		objs  = make([]*storage.ObjectAttrs, 0, msg.PageSize)
   174  	)
   175  	nextPageToken, errPage := pager.NextPage(&objs)
   176  	if errPage != nil {
   177  		if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   178  			nlog.Infof("list_objects %s: %v", cloudBck.Name, errPage)
   179  		}
   180  		ecode, err = gcpErrorToAISError(errPage, cloudBck)
   181  		return
   182  	}
   183  
   184  	lst.ContinuationToken = nextPageToken
   185  
   186  	var (
   187  		custom     cos.StrKVs
   188  		i          int
   189  		l          = len(objs)
   190  		wantCustom = msg.WantProp(apc.GetPropsCustom)
   191  	)
   192  	for j := len(lst.Entries); j < l; j++ {
   193  		lst.Entries = append(lst.Entries, &cmn.LsoEnt{}) // add missing empty
   194  	}
   195  	if wantCustom {
   196  		custom = make(cos.StrKVs, 3) // reuse
   197  	}
   198  	for _, attrs := range objs {
   199  		if msg.IsFlagSet(apc.LsNoRecursion) {
   200  			if attrs.Name == "" {
   201  				entry := lst.Entries[i]
   202  				entry.Name = attrs.Prefix
   203  				entry.Flags = apc.EntryIsDir
   204  				i++
   205  				continue
   206  			}
   207  		}
   208  		entry := lst.Entries[i]
   209  		i++
   210  		entry.Name, entry.Size = attrs.Name, attrs.Size
   211  		if msg.IsFlagSet(apc.LsNameOnly) || msg.IsFlagSet(apc.LsNameSize) {
   212  			continue
   213  		}
   214  		if v, ok := h.EncodeCksum(attrs.MD5); ok {
   215  			entry.Checksum = v
   216  		}
   217  		if v, ok := h.EncodeVersion(attrs.Generation); ok {
   218  			entry.Version = v
   219  		}
   220  		// custom
   221  		if wantCustom {
   222  			custom[cmn.ETag], _ = h.EncodeCksum(attrs.Etag)
   223  			custom[cmn.LastModified] = fmtTime(attrs.Updated)
   224  			custom[cos.HdrContentType] = attrs.ContentType
   225  			entry.Custom = cmn.CustomMD2S(custom)
   226  		}
   227  	}
   228  	lst.Entries = lst.Entries[:i]
   229  
   230  	if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   231  		nlog.Infof("[list_objects] count %d", len(lst.Entries))
   232  	}
   233  	return
   234  }
   235  
   236  //
   237  // LIST BUCKETS
   238  //
   239  
   240  func (gsbp *gsbp) ListBuckets(_ cmn.QueryBcks) (bcks cmn.Bcks, ecode int, err error) {
   241  	if gsbp.projectID == "" {
   242  		// NOTE: empty `projectID` results in obscure: "googleapi: Error 400: Invalid argument"
   243  		return nil, http.StatusBadRequest,
   244  			errors.New("empty project ID: cannot list GCP buckets with no authentication")
   245  	}
   246  	bcks = make(cmn.Bcks, 0, 16)
   247  	it := gcpClient.Buckets(gctx, gsbp.projectID)
   248  	for {
   249  		var battrs *storage.BucketAttrs
   250  
   251  		battrs, err = it.Next()
   252  		if err == iterator.Done {
   253  			err = nil
   254  			break
   255  		}
   256  		if err != nil {
   257  			ecode, err = gcpErrorToAISError(err, &cmn.Bck{Provider: apc.GCP})
   258  			return
   259  		}
   260  		bcks = append(bcks, cmn.Bck{
   261  			Name:     battrs.Name,
   262  			Provider: apc.GCP,
   263  		})
   264  		if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   265  			nlog.Infof("[bucket_names] %s: created %v, versioning %t",
   266  				battrs.Name, battrs.Created, battrs.VersioningEnabled)
   267  		}
   268  	}
   269  	return
   270  }
   271  
   272  //
   273  // HEAD OBJECT
   274  //
   275  
   276  func (*gsbp) HeadObj(ctx context.Context, lom *core.LOM, _ *http.Request) (oa *cmn.ObjAttrs, ecode int, err error) {
   277  	var (
   278  		attrs    *storage.ObjectAttrs
   279  		h        = cmn.BackendHelpers.Google
   280  		cloudBck = lom.Bck().RemoteBck()
   281  	)
   282  	attrs, err = gcpClient.Bucket(cloudBck.Name).Object(lom.ObjName).Attrs(ctx)
   283  	if err != nil {
   284  		ecode, err = handleObjectError(ctx, gcpClient, err, cloudBck)
   285  		return
   286  	}
   287  	oa = &cmn.ObjAttrs{}
   288  	oa.CustomMD = make(cos.StrKVs, 6)
   289  	oa.SetCustomKey(cmn.SourceObjMD, apc.GCP)
   290  	oa.Size = attrs.Size
   291  	if v, ok := h.EncodeVersion(attrs.Generation); ok {
   292  		oa.SetCustomKey(cmn.VersionObjMD, v)
   293  		oa.Ver = v
   294  	}
   295  	if v, ok := h.EncodeCksum(attrs.MD5); ok {
   296  		oa.SetCustomKey(cmn.MD5ObjMD, v)
   297  	}
   298  	if v, ok := h.EncodeCksum(attrs.CRC32C); ok {
   299  		oa.SetCustomKey(cmn.CRC32CObjMD, v)
   300  	}
   301  	if v, ok := h.EncodeCksum(attrs.Etag); ok {
   302  		oa.SetCustomKey(cmn.ETag, v)
   303  	}
   304  
   305  	if cksumType, ok := attrs.Metadata[gcpChecksumType]; ok {
   306  		if cksumValue, ok := attrs.Metadata[gcpChecksumVal]; ok {
   307  			oa.SetCksum(cksumType, cksumValue)
   308  		}
   309  	}
   310  
   311  	oa.SetCustomKey(cmn.LastModified, fmtTime(attrs.Updated))
   312  	// unlike other custom attrs, "Content-Type" is not getting stored w/ LOM
   313  	// - only shown via list-objects and HEAD when not present
   314  	oa.SetCustomKey(cos.HdrContentType, attrs.ContentType)
   315  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   316  		nlog.Infof("[head_object] %s", cloudBck.Cname(lom.ObjName))
   317  	}
   318  	return
   319  }
   320  
   321  //
   322  // GET OBJECT
   323  //
   324  
   325  func (gsbp *gsbp) GetObj(ctx context.Context, lom *core.LOM, owt cmn.OWT, _ *http.Request) (int, error) {
   326  	res := gsbp.GetObjReader(ctx, lom, 0, 0)
   327  	if res.Err != nil {
   328  		return res.ErrCode, res.Err
   329  	}
   330  	params := allocPutParams(res, owt)
   331  	err := gsbp.t.PutObject(lom, params)
   332  	core.FreePutParams(params)
   333  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   334  		nlog.Infoln("[get_object]", lom.String(), err)
   335  	}
   336  	return 0, err
   337  }
   338  
   339  func (*gsbp) GetObjReader(ctx context.Context, lom *core.LOM, offset, length int64) (res core.GetReaderResult) {
   340  	var (
   341  		attrs    *storage.ObjectAttrs
   342  		rc       *storage.Reader
   343  		cloudBck = lom.Bck().RemoteBck()
   344  		o        = gcpClient.Bucket(cloudBck.Name).Object(lom.ObjName)
   345  	)
   346  	attrs, res.Err = o.Attrs(ctx)
   347  	if res.Err != nil {
   348  		res.ErrCode, res.Err = gcpErrorToAISError(res.Err, cloudBck)
   349  		return res
   350  	}
   351  	if length > 0 {
   352  		rc, res.Err = o.NewRangeReader(ctx, offset, length)
   353  		if res.Err != nil {
   354  			if res.ErrCode == http.StatusRequestedRangeNotSatisfiable {
   355  				res.Err = cmn.NewErrRangeNotSatisfiable(res.Err, nil, 0)
   356  			}
   357  			return res
   358  		}
   359  	} else {
   360  		rc, res.Err = o.NewReader(ctx)
   361  		if res.Err != nil {
   362  			return res
   363  		}
   364  		// custom metadata
   365  		lom.SetCustomKey(cmn.SourceObjMD, apc.GCP)
   366  		if cksumType, ok := attrs.Metadata[gcpChecksumType]; ok {
   367  			if cksumValue, ok := attrs.Metadata[gcpChecksumVal]; ok {
   368  				lom.SetCksum(cos.NewCksum(cksumType, cksumValue))
   369  			}
   370  		}
   371  		res.ExpCksum = setCustomGs(lom, attrs)
   372  	}
   373  
   374  	res.Size = rc.Attrs.Size
   375  	res.R = rc
   376  	return res
   377  }
   378  
   379  func setCustomGs(lom *core.LOM, attrs *storage.ObjectAttrs) (expCksum *cos.Cksum) {
   380  	h := cmn.BackendHelpers.Google
   381  	if v, ok := h.EncodeVersion(attrs.Generation); ok {
   382  		lom.SetVersion(v)
   383  		lom.SetCustomKey(cmn.VersionObjMD, v)
   384  	}
   385  	if v, ok := h.EncodeCksum(attrs.MD5); ok {
   386  		lom.SetCustomKey(cmn.MD5ObjMD, v)
   387  		expCksum = cos.NewCksum(cos.ChecksumMD5, v)
   388  	}
   389  	if v, ok := h.EncodeCksum(attrs.CRC32C); ok {
   390  		lom.SetCustomKey(cmn.CRC32CObjMD, v)
   391  		if expCksum == nil {
   392  			expCksum = cos.NewCksum(cos.ChecksumCRC32C, v)
   393  		}
   394  	}
   395  	if v, ok := h.EncodeCksum(attrs.Etag); ok {
   396  		lom.SetCustomKey(cmn.ETag, v)
   397  	}
   398  	lom.SetCustomKey(cmn.LastModified, fmtTime(attrs.Updated))
   399  	return
   400  }
   401  
   402  //
   403  // PUT OBJECT
   404  //
   405  
   406  func (gsbp *gsbp) PutObj(r io.ReadCloser, lom *core.LOM, _ *http.Request) (ecode int, err error) {
   407  	var (
   408  		attrs    *storage.ObjectAttrs
   409  		written  int64
   410  		cloudBck = lom.Bck().RemoteBck()
   411  		md       = make(cos.StrKVs, 2)
   412  		gcpObj   = gcpClient.Bucket(cloudBck.Name).Object(lom.ObjName)
   413  		wc       = gcpObj.NewWriter(gctx)
   414  	)
   415  	md[gcpChecksumType], md[gcpChecksumVal] = lom.Checksum().Get()
   416  
   417  	wc.Metadata = md
   418  	buf, slab := gsbp.t.PageMM().Alloc()
   419  	written, err = io.CopyBuffer(wc, r, buf)
   420  	slab.Free(buf)
   421  	cos.Close(r)
   422  	if err != nil {
   423  		return
   424  	}
   425  	if err = wc.Close(); err != nil {
   426  		ecode, err = gcpErrorToAISError(err, cloudBck)
   427  		return
   428  	}
   429  	attrs, err = gcpObj.Attrs(gctx)
   430  	if err != nil {
   431  		ecode, err = handleObjectError(gctx, gcpClient, err, cloudBck)
   432  		return
   433  	}
   434  	_ = setCustomGs(lom, attrs)
   435  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   436  		nlog.Infof("[put_object] %s, size %d", lom, written)
   437  	}
   438  	return
   439  }
   440  
   441  //
   442  // DELETE OBJECT
   443  //
   444  
   445  func (*gsbp) DeleteObj(lom *core.LOM) (ecode int, err error) {
   446  	var (
   447  		cloudBck = lom.Bck().RemoteBck()
   448  		o        = gcpClient.Bucket(cloudBck.Name).Object(lom.ObjName)
   449  	)
   450  	if err = o.Delete(gctx); err != nil {
   451  		ecode, err = handleObjectError(gctx, gcpClient, err, cloudBck)
   452  		return
   453  	}
   454  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   455  		nlog.Infof("[delete_object] %s", lom)
   456  	}
   457  	return
   458  }
   459  
   460  //
   461  // static helpers
   462  //
   463  
   464  func readCredFile() (projectID string) {
   465  	credFile, err := os.Open(os.Getenv(credPathEnvVar))
   466  	if err != nil {
   467  		return
   468  	}
   469  	b, err := io.ReadAll(credFile)
   470  	credFile.Close()
   471  	if err != nil {
   472  		return
   473  	}
   474  	projectID, _ = jsoniter.Get(b, projectIDField).GetInterface().(string)
   475  	return
   476  }
   477  
   478  const gcpErrPrefix = "gcp-error"
   479  
   480  func gcpErrorToAISError(gcpError error, bck *cmn.Bck) (int, error) {
   481  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   482  		nlog.InfoDepth(1, "begin "+gcpErrPrefix+" =========================")
   483  		nlog.InfoDepth(1, gcpError)
   484  		nlog.InfoDepth(1, "end "+gcpErrPrefix+" ===========================")
   485  	}
   486  	if gcpError == storage.ErrBucketNotExist {
   487  		return http.StatusNotFound, cmn.NewErrRemoteBckNotFound(bck)
   488  	}
   489  	err := _gcpErr(gcpError)
   490  	if gcpError == storage.ErrObjectNotExist {
   491  		return http.StatusNotFound, err
   492  	}
   493  	apiErr, ok := gcpError.(*googleapi.Error)
   494  	if !ok {
   495  		return http.StatusInternalServerError, err
   496  	}
   497  	if apiErr.Code == http.StatusForbidden && strings.Contains(apiErr.Error(), "may not exist") {
   498  		// HACK: "not found or misspelled" vs  "service not paid for" (the latter less likely)
   499  		if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   500  			nlog.Infoln(err)
   501  		}
   502  		return http.StatusNotFound, err
   503  	}
   504  	return apiErr.Code, err
   505  }
   506  
   507  // (compare w/ _awsErr)
   508  func _gcpErr(gcpError error) error {
   509  	return errors.New(gcpErrPrefix + "[" + gcpError.Error() + "]")
   510  }
   511  
   512  func handleObjectError(ctx context.Context, gcpClient *storage.Client, objErr error, bck *cmn.Bck) (int, error) {
   513  	if objErr != storage.ErrObjectNotExist {
   514  		return http.StatusBadRequest, _gcpErr(objErr)
   515  	}
   516  
   517  	// Object does not exist but in GCP it doesn't necessarily mean that the bucket does.
   518  	if _, err := gcpClient.Bucket(bck.Name).Attrs(ctx); err != nil {
   519  		return gcpErrorToAISError(err, bck)
   520  	}
   521  	return http.StatusNotFound, cos.NewErrNotFound(nil, _gcpErr(objErr).Error())
   522  }