github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/blobstore/gcs.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package blobstore
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"path"
    22  	"strconv"
    23  
    24  	"cloud.google.com/go/storage"
    25  	"github.com/google/uuid"
    26  	"golang.org/x/sync/errgroup"
    27  	"google.golang.org/api/googleapi"
    28  )
    29  
    30  const (
    31  	precondFailCode = 412
    32  
    33  	composeBatch = 32
    34  )
    35  
    36  // GCSBlobstore provides a GCS implementation of the Blobstore interface
    37  type GCSBlobstore struct {
    38  	bucket     *storage.BucketHandle
    39  	bucketName string
    40  	prefix     string
    41  }
    42  
    43  var _ Blobstore = &GCSBlobstore{}
    44  
    45  // NewGCSBlobstore creates a new instance of a GCSBlobstore
    46  func NewGCSBlobstore(gcs *storage.Client, bucketName, prefix string) *GCSBlobstore {
    47  	for len(prefix) > 0 && prefix[0] == '/' {
    48  		prefix = prefix[1:]
    49  	}
    50  
    51  	bucket := gcs.Bucket(bucketName)
    52  	return &GCSBlobstore{bucket, bucketName, prefix}
    53  }
    54  
    55  func (bs *GCSBlobstore) Path() string {
    56  	return path.Join(bs.bucketName, bs.prefix)
    57  }
    58  
    59  // Exists returns true if a blob exists for the given key, and false if it does not.
    60  // For InMemoryBlobstore instances error should never be returned (though other
    61  // implementations of this interface can)
    62  func (bs *GCSBlobstore) Exists(ctx context.Context, key string) (bool, error) {
    63  	absKey := path.Join(bs.prefix, key)
    64  	oh := bs.bucket.Object(absKey)
    65  	_, err := oh.Attrs(ctx)
    66  
    67  	if err == storage.ErrObjectNotExist {
    68  		return false, nil
    69  	}
    70  
    71  	return err == nil, err
    72  }
    73  
    74  // Get retrieves an io.reader for the portion of a blob specified by br along with
    75  // its version
    76  func (bs *GCSBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.ReadCloser, string, error) {
    77  	absKey := path.Join(bs.prefix, key)
    78  	oh := bs.bucket.Object(absKey)
    79  	var reader *storage.Reader
    80  	var err error
    81  	if br.isAllRange() {
    82  		reader, err = oh.NewReader(ctx)
    83  	} else {
    84  		offset, length := br.offset, br.length
    85  		if offset < 0 {
    86  			length = -1
    87  		}
    88  		reader, err = oh.NewRangeReader(ctx, offset, length)
    89  	}
    90  
    91  	if err == storage.ErrObjectNotExist {
    92  		return nil, "", NotFound{"gs://" + path.Join(bs.bucketName, absKey)}
    93  	} else if err != nil {
    94  		return nil, "", err
    95  	}
    96  
    97  	attrs := reader.Attrs
    98  	generation := attrs.Generation
    99  
   100  	return reader, fmtGeneration(generation), nil
   101  }
   102  
   103  func writeObj(writer *storage.Writer, reader io.Reader) (string, error) {
   104  	writeErr, closeErr := func() (writeErr error, closeErr error) {
   105  		defer func() {
   106  			closeErr = writer.Close()
   107  		}()
   108  		_, writeErr = io.Copy(writer, reader)
   109  
   110  		return
   111  	}()
   112  
   113  	if writeErr != nil {
   114  		return "", writeErr
   115  	} else if closeErr != nil {
   116  		return "", closeErr
   117  	}
   118  
   119  	generation := writer.Attrs().Generation
   120  
   121  	return fmtGeneration(generation), nil
   122  }
   123  
   124  // Put sets the blob and the version for a key
   125  func (bs *GCSBlobstore) Put(ctx context.Context, key string, totalSize int64, reader io.Reader) (string, error) {
   126  	absKey := path.Join(bs.prefix, key)
   127  	oh := bs.bucket.Object(absKey)
   128  	writer := oh.NewWriter(ctx)
   129  
   130  	return writeObj(writer, reader)
   131  }
   132  
   133  // CheckAndPut will check the current version of a blob against an expectedVersion, and if the
   134  // versions match it will update the data and version associated with the key
   135  func (bs *GCSBlobstore) CheckAndPut(ctx context.Context, expectedVersion, key string, totalSize int64, reader io.Reader) (string, error) {
   136  	absKey := path.Join(bs.prefix, key)
   137  	oh := bs.bucket.Object(absKey)
   138  
   139  	var conditionalHandle *storage.ObjectHandle
   140  	if expectedVersion != "" {
   141  		expectedGen, err := strconv.ParseInt(expectedVersion, 16, 64)
   142  
   143  		if err != nil {
   144  			panic("Invalid expected Version")
   145  		}
   146  
   147  		conditionalHandle = oh.If(storage.Conditions{GenerationMatch: expectedGen})
   148  	} else {
   149  		conditionalHandle = oh.If(storage.Conditions{DoesNotExist: true})
   150  	}
   151  
   152  	writer := conditionalHandle.NewWriter(ctx)
   153  
   154  	ver, err := writeObj(writer, reader)
   155  
   156  	if err != nil {
   157  		apiErr, ok := err.(*googleapi.Error)
   158  
   159  		if ok {
   160  			if apiErr.Code == precondFailCode {
   161  				return "", CheckAndPutError{key, expectedVersion, "unknown (Not supported in GCS implementation)"}
   162  			}
   163  		}
   164  	}
   165  
   166  	return ver, err
   167  }
   168  
   169  func (bs *GCSBlobstore) Concatenate(ctx context.Context, key string, sources []string) (string, error) {
   170  	// GCS compose has a batch size limit,
   171  	// recursively compose sources
   172  	for len(sources) > composeBatch {
   173  		// compose subsets of |sources| in batches,
   174  		// store tmp composite objects in |next|
   175  		var next []string
   176  		var batches [][]string
   177  		for len(sources) > 0 {
   178  			k := min(composeBatch, len(sources))
   179  			batches = append(batches, sources[:k])
   180  			next = append(next, uuid.New().String())
   181  			sources = sources[k:]
   182  		}
   183  		// execute compose calls concurrently
   184  		eg, ectx := errgroup.WithContext(ctx)
   185  		for i := 0; i < len(batches); i++ {
   186  			idx := i
   187  			eg.Go(func() (err error) {
   188  				_, err = bs.composeObjects(ectx, next[idx], batches[idx])
   189  				return
   190  			})
   191  		}
   192  		if err := eg.Wait(); err != nil {
   193  			return "", err
   194  		}
   195  		sources = next
   196  	}
   197  	return bs.composeObjects(ctx, key, sources)
   198  }
   199  
   200  func (bs *GCSBlobstore) composeObjects(ctx context.Context, composite string, sources []string) (gen string, err error) {
   201  	if len(sources) > composeBatch {
   202  		return "", fmt.Errorf("too many objects to compose (%d > %d)", len(sources), composeBatch)
   203  	}
   204  
   205  	objects := make([]*storage.ObjectHandle, len(sources))
   206  	eg, ectx := errgroup.WithContext(ctx)
   207  	for i := range objects {
   208  		idx := i
   209  		eg.Go(func() (err error) {
   210  			var a *storage.ObjectAttrs
   211  			oh := bs.bucket.Object(path.Join(bs.prefix, sources[idx]))
   212  			if a, err = oh.Attrs(ectx); err != nil {
   213  				return err
   214  			}
   215  			objects[idx] = oh.Generation(a.Generation)
   216  			return
   217  		})
   218  	}
   219  	if err = eg.Wait(); err != nil {
   220  		return "", err
   221  	}
   222  
   223  	// compose |objects| into |c|
   224  	var a *storage.ObjectAttrs
   225  	c := bs.bucket.Object(path.Join(bs.prefix, composite))
   226  	if a, err = c.ComposerFrom(objects...).Run(ctx); err != nil {
   227  		return "", err
   228  	}
   229  	return fmtGeneration(a.Generation), nil
   230  }
   231  
   232  func fmtGeneration(g int64) string {
   233  	return strconv.FormatInt(g, 16)
   234  }
   235  
   236  func min(l, r int) (m int) {
   237  	if l < r {
   238  		m = l
   239  	} else {
   240  		m = r
   241  	}
   242  	return
   243  }