github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/blobstore/oci.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package blobstore
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"math"
    24  	"net/http"
    25  	"os"
    26  	"path"
    27  
    28  	"github.com/oracle/oci-go-sdk/v65/common"
    29  	"github.com/oracle/oci-go-sdk/v65/objectstorage"
    30  	"golang.org/x/sync/errgroup"
    31  )
    32  
    33  // 10MB part size
    34  const minPartSize = 10 * 1024 * 1024
    35  const defaultPartSize = 5 * minPartSize
    36  const maxPartNum = 10000
    37  const defaultBatchSize = 500 * 1024 * 1024
    38  const defaultConcurrentListeners = 5
    39  
    40  type toUpload struct {
    41  	b       []byte
    42  	partNum int
    43  }
    44  
    45  type uploadFunc func(ctx context.Context, objectName, uploadID string, partNumber int, contentLength int64, reader io.Reader) (objectstorage.CommitMultipartUploadPartDetails, error)
    46  
    47  type tempLocalObject struct {
    48  	path string
    49  	f    *os.File
    50  }
    51  
    52  var _ io.ReadCloser = &tempLocalObject{}
    53  
    54  func (t *tempLocalObject) Read(p []byte) (int, error) {
    55  	return t.f.Read(p)
    56  }
    57  
    58  func (t *tempLocalObject) Close() error {
    59  	err := t.f.Close()
    60  	os.Remove(t.path)
    61  	return err
    62  }
    63  
    64  // OCIBlobstore provides an OCI implementation of the Blobstore interface
    65  type OCIBlobstore struct {
    66  	provider            common.ConfigurationProvider
    67  	client              objectstorage.ObjectStorageClient
    68  	bucketName          string
    69  	namespace           string
    70  	prefix              string
    71  	concurrentListeners int
    72  }
    73  
    74  var _ Blobstore = &OCIBlobstore{}
    75  
    76  // NewOCIBlobstore creates a new instance of a OCIBlobstore
    77  func NewOCIBlobstore(ctx context.Context, provider common.ConfigurationProvider, client objectstorage.ObjectStorageClient, bucketName, prefix string) (*OCIBlobstore, error) {
    78  	for len(prefix) > 0 && prefix[0] == '/' {
    79  		prefix = prefix[1:]
    80  	}
    81  
    82  	// Disable timeout to support big file upload/download, default is 60s
    83  	client.HTTPClient = &http.Client{}
    84  
    85  	request := objectstorage.GetNamespaceRequest{}
    86  	r, err := client.GetNamespace(ctx, request)
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  
    91  	return &OCIBlobstore{provider, client, bucketName, *r.Value, prefix, defaultConcurrentListeners}, nil
    92  }
    93  
    94  func (bs *OCIBlobstore) Path() string {
    95  	return path.Join(bs.bucketName, bs.prefix)
    96  }
    97  
    98  // Exists returns true if a blob exists for the given key, and false if it does not.
    99  // For InMemoryBlobstore instances error should never be returned (though other
   100  // implementations of this interface can)
   101  func (bs *OCIBlobstore) Exists(ctx context.Context, key string) (bool, error) {
   102  	absKey := path.Join(bs.prefix, key)
   103  	_, err := bs.client.HeadObject(ctx, objectstorage.HeadObjectRequest{
   104  		NamespaceName: &bs.namespace,
   105  		BucketName:    &bs.bucketName,
   106  		ObjectName:    &absKey,
   107  	})
   108  	if err == nil {
   109  		return true, nil
   110  	}
   111  	if serr, ok := common.IsServiceError(err); ok {
   112  		// handle not found code
   113  		if serr.GetHTTPStatusCode() == 404 {
   114  			return false, nil
   115  		}
   116  	}
   117  	return false, err
   118  }
   119  
   120  // Get retrieves an io.reader for the portion of a blob specified by br along with its version
   121  func (bs *OCIBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.ReadCloser, string, error) {
   122  	absKey := path.Join(bs.prefix, key)
   123  	req := objectstorage.GetObjectRequest{
   124  		NamespaceName: &bs.namespace,
   125  		BucketName:    &bs.bucketName,
   126  		ObjectName:    &absKey,
   127  	}
   128  
   129  	byteRange := br.asHttpRangeHeader()
   130  	if byteRange != "" {
   131  		req.Range = &byteRange
   132  	}
   133  
   134  	res, err := bs.client.GetObject(ctx, req)
   135  	if err != nil {
   136  		if serr, ok := common.IsServiceError(err); ok {
   137  			// handle not found code
   138  			if serr.GetHTTPStatusCode() == 404 {
   139  				return nil, "", NotFound{"oci://" + path.Join(bs.bucketName, absKey)}
   140  			}
   141  		}
   142  		return nil, "", err
   143  	}
   144  
   145  	// handle negative offset and positive length
   146  	if br.offset < 0 && br.length > 0 {
   147  		lr := io.LimitReader(res.Content, br.length)
   148  		return io.NopCloser(lr), fmtstr(res.ETag), nil
   149  	}
   150  
   151  	return res.Content, fmtstr(res.ETag), nil
   152  }
   153  
   154  // Put sets the blob and the version for a key
   155  func (bs *OCIBlobstore) Put(ctx context.Context, key string, totalSize int64, reader io.Reader) (string, error) {
   156  	return bs.upload(ctx, "", key, totalSize, reader)
   157  }
   158  
   159  // CheckAndPut will check the current version of a blob against an expectedVersion, and if the
   160  // versions match it will update the data and version associated with the key
   161  func (bs *OCIBlobstore) CheckAndPut(ctx context.Context, expectedVersion, key string, totalSize int64, reader io.Reader) (string, error) {
   162  	return bs.upload(ctx, expectedVersion, key, totalSize, reader)
   163  }
   164  
   165  // At the time of this implementation, Oracle Cloud does not provide a way to create composite objects
   166  // via their APIs/SDKs.
   167  func (bs *OCIBlobstore) Concatenate(ctx context.Context, key string, sources []string) (string, error) {
   168  	return "", fmt.Errorf("concatenate is unimplemented on the oci blobstore")
   169  }
   170  
   171  func (bs *OCIBlobstore) upload(ctx context.Context, expectedVersion, key string, totalSize int64, reader io.Reader) (string, error) {
   172  	numParts, _ := getNumPartsAndPartSize(totalSize, defaultPartSize, maxPartNum)
   173  	if totalSize == 0 {
   174  		return "", errors.New("failed to upload to oci blobstore, no data in reader")
   175  	} else if totalSize < minPartSize {
   176  		return bs.checkAndPut(ctx, expectedVersion, key, totalSize, reader)
   177  	} else {
   178  		return bs.multipartUpload(ctx, expectedVersion, key, numParts, totalSize, reader)
   179  	}
   180  }
   181  
   182  func (bs *OCIBlobstore) checkAndPut(ctx context.Context, expectedVersion, key string, contentLength int64, reader io.Reader) (string, error) {
   183  	absKey := path.Join(bs.prefix, key)
   184  
   185  	req := objectstorage.PutObjectRequest{
   186  		NamespaceName: &bs.namespace,
   187  		BucketName:    &bs.bucketName,
   188  		ObjectName:    &absKey,
   189  		ContentLength: &contentLength,
   190  		PutObjectBody: io.NopCloser(reader),
   191  	}
   192  
   193  	if expectedVersion != "" {
   194  		req.IfMatch = &expectedVersion
   195  	} else {
   196  		star := "*"
   197  		req.IfNoneMatch = &star
   198  	}
   199  
   200  	res, err := bs.client.PutObject(ctx, req)
   201  	if err != nil {
   202  		if serr, ok := common.IsServiceError(err); ok {
   203  			if serr.GetHTTPStatusCode() == 412 {
   204  				return "", CheckAndPutError{key, expectedVersion, "unknown (Not supported in OCI implementation)"}
   205  			}
   206  		}
   207  		return "", err
   208  	}
   209  
   210  	return fmtstr(res.ETag), nil
   211  }
   212  
   213  func (bs *OCIBlobstore) multipartUpload(ctx context.Context, expectedVersion, key string, numParts int, uploadSize int64, reader io.Reader) (string, error) {
   214  	absKey := path.Join(bs.prefix, key)
   215  
   216  	startReq := objectstorage.CreateMultipartUploadRequest{
   217  		NamespaceName: &bs.namespace,
   218  		BucketName:    &bs.bucketName,
   219  		CreateMultipartUploadDetails: objectstorage.CreateMultipartUploadDetails{
   220  			Object: &absKey,
   221  		},
   222  	}
   223  
   224  	star := "*"
   225  	if expectedVersion != "" {
   226  		startReq.IfMatch = &expectedVersion
   227  	} else {
   228  		startReq.IfNoneMatch = &star
   229  	}
   230  
   231  	startRes, err := bs.client.CreateMultipartUpload(ctx, startReq)
   232  	if err != nil {
   233  		return "", err
   234  	}
   235  
   236  	parts, err := bs.uploadParts(ctx, absKey, fmtstr(startRes.UploadId), numParts, uploadSize, reader)
   237  	if err != nil {
   238  		// ignore this error
   239  		bs.client.AbortMultipartUpload(ctx, objectstorage.AbortMultipartUploadRequest{
   240  			NamespaceName:   &bs.namespace,
   241  			BucketName:      &bs.bucketName,
   242  			ObjectName:      &absKey,
   243  			UploadId:        startRes.UploadId,
   244  			RequestMetadata: common.RequestMetadata{},
   245  		})
   246  		return "", err
   247  	}
   248  
   249  	commitReq := objectstorage.CommitMultipartUploadRequest{
   250  		NamespaceName:                &bs.namespace,
   251  		BucketName:                   &bs.bucketName,
   252  		ObjectName:                   &absKey,
   253  		UploadId:                     startRes.UploadId,
   254  		CommitMultipartUploadDetails: objectstorage.CommitMultipartUploadDetails{PartsToCommit: parts},
   255  	}
   256  
   257  	if expectedVersion != "" {
   258  		commitReq.IfMatch = &expectedVersion
   259  	} else {
   260  		commitReq.IfNoneMatch = &star
   261  	}
   262  
   263  	commitRes, err := bs.client.CommitMultipartUpload(ctx, commitReq)
   264  	if err != nil {
   265  		return "", err
   266  	}
   267  
   268  	return fmtstr(commitRes.ETag), nil
   269  }
   270  
   271  func (bs *OCIBlobstore) uploadParts(ctx context.Context, objectName, uploadID string, numParts int, totalSize int64, reader io.Reader) ([]objectstorage.CommitMultipartUploadPartDetails, error) {
   272  	return uploadParts(ctx, objectName, uploadID, numParts, bs.concurrentListeners, totalSize, defaultBatchSize, reader, bs.uploadPart)
   273  }
   274  
   275  func (bs *OCIBlobstore) uploadPart(ctx context.Context, objectName, uploadID string, partNumber int, contentLength int64, reader io.Reader) (objectstorage.CommitMultipartUploadPartDetails, error) {
   276  	if objectName == "" {
   277  		return objectstorage.CommitMultipartUploadPartDetails{}, errors.New("object name required to upload part")
   278  	}
   279  
   280  	if uploadID == "" {
   281  		return objectstorage.CommitMultipartUploadPartDetails{}, errors.New("upload id required to upload part")
   282  	}
   283  
   284  	res, err := bs.client.UploadPart(ctx, objectstorage.UploadPartRequest{
   285  		NamespaceName:  &bs.namespace,
   286  		BucketName:     &bs.bucketName,
   287  		ObjectName:     &objectName,
   288  		UploadId:       &uploadID,
   289  		UploadPartNum:  &partNumber,
   290  		ContentLength:  &contentLength,
   291  		UploadPartBody: io.NopCloser(reader),
   292  	})
   293  	if err != nil {
   294  		return objectstorage.CommitMultipartUploadPartDetails{}, err
   295  	}
   296  
   297  	return objectstorage.CommitMultipartUploadPartDetails{
   298  		Etag:    res.ETag,
   299  		PartNum: &partNumber,
   300  	}, nil
   301  }
   302  
   303  func uploadParts(ctx context.Context, objectName, uploadID string, numParts, concurrentListeners int, totalSize, maxBatchSize int64, reader io.Reader, uploadF uploadFunc) ([]objectstorage.CommitMultipartUploadPartDetails, error) {
   304  	completedParts := make([]objectstorage.CommitMultipartUploadPartDetails, numParts)
   305  	partSize := int64(math.Ceil(float64(totalSize) / float64(numParts)))
   306  
   307  	eg, egCtx := errgroup.WithContext(ctx)
   308  	eg.SetLimit(concurrentListeners)
   309  
   310  	batch := make([]*toUpload, 0)
   311  	batchSize := int64(0)
   312  	partNum := 1
   313  
   314  	for {
   315  		if batchSize >= maxBatchSize {
   316  			for _, u := range batch {
   317  				u := u
   318  				eg.Go(func() error {
   319  					cp, err := uploadF(egCtx, objectName, uploadID, u.partNum, int64(len(u.b)), bytes.NewReader(u.b))
   320  					if err != nil {
   321  						return err
   322  					}
   323  					completedParts[u.partNum-1] = cp
   324  					return nil
   325  				})
   326  			}
   327  
   328  			batchSize = 0
   329  			batch = make([]*toUpload, 0)
   330  			continue
   331  		}
   332  
   333  		buf := make([]byte, partSize)
   334  		n, err := reader.Read(buf)
   335  		if err != nil {
   336  			if err == io.EOF {
   337  				break
   338  			}
   339  			return nil, err
   340  		}
   341  
   342  		buf = buf[:n]
   343  		batchSize += int64(n)
   344  		batch = append(batch, &toUpload{
   345  			b:       buf,
   346  			partNum: partNum,
   347  		})
   348  
   349  		partNum++
   350  	}
   351  
   352  	if batchSize > 0 && len(batch) > 0 {
   353  		for _, u := range batch {
   354  			u := u
   355  			eg.Go(func() error {
   356  				cp, err := uploadF(egCtx, objectName, uploadID, u.partNum, int64(len(u.b)), bytes.NewReader(u.b))
   357  				if err != nil {
   358  					return err
   359  				}
   360  				completedParts[u.partNum-1] = cp
   361  				return nil
   362  			})
   363  		}
   364  	}
   365  
   366  	err := eg.Wait()
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  
   371  	return completedParts, nil
   372  }
   373  
   374  func getNumPartsAndPartSize(totalSize, partSize, maxPartNum int64) (int, int64) {
   375  	ps := int64(math.Ceil(float64(totalSize) / float64(maxPartNum)))
   376  	if ps < partSize {
   377  		numParts := int(math.Ceil(float64(totalSize) / float64(partSize)))
   378  		return numParts, partSize
   379  	}
   380  	numParts := int(math.Ceil(float64(totalSize) / float64(ps)))
   381  	return numParts, ps
   382  }
   383  
   384  func fmtstr(s *string) string {
   385  	if s == nil {
   386  		return ""
   387  	}
   388  	return *s
   389  }