github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/blobstore/gcs.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package blobstore 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "path" 22 "strconv" 23 24 "cloud.google.com/go/storage" 25 "github.com/google/uuid" 26 "golang.org/x/sync/errgroup" 27 "google.golang.org/api/googleapi" 28 ) 29 30 const ( 31 precondFailCode = 412 32 33 composeBatch = 32 34 ) 35 36 // GCSBlobstore provides a GCS implementation of the Blobstore interface 37 type GCSBlobstore struct { 38 bucket *storage.BucketHandle 39 bucketName string 40 prefix string 41 } 42 43 var _ Blobstore = &GCSBlobstore{} 44 45 // NewGCSBlobstore creates a new instance of a GCSBlobstore 46 func NewGCSBlobstore(gcs *storage.Client, bucketName, prefix string) *GCSBlobstore { 47 for len(prefix) > 0 && prefix[0] == '/' { 48 prefix = prefix[1:] 49 } 50 51 bucket := gcs.Bucket(bucketName) 52 return &GCSBlobstore{bucket, bucketName, prefix} 53 } 54 55 func (bs *GCSBlobstore) Path() string { 56 return path.Join(bs.bucketName, bs.prefix) 57 } 58 59 // Exists returns true if a blob exists for the given key, and false if it does not. 60 // For InMemoryBlobstore instances error should never be returned (though other 61 // implementations of this interface can) 62 func (bs *GCSBlobstore) Exists(ctx context.Context, key string) (bool, error) { 63 absKey := path.Join(bs.prefix, key) 64 oh := bs.bucket.Object(absKey) 65 _, err := oh.Attrs(ctx) 66 67 if err == storage.ErrObjectNotExist { 68 return false, nil 69 } 70 71 return err == nil, err 72 } 73 74 // Get retrieves an io.reader for the portion of a blob specified by br along with 75 // its version 76 func (bs *GCSBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.ReadCloser, string, error) { 77 absKey := path.Join(bs.prefix, key) 78 oh := bs.bucket.Object(absKey) 79 var reader *storage.Reader 80 var err error 81 if br.isAllRange() { 82 reader, err = oh.NewReader(ctx) 83 } else { 84 offset, length := br.offset, br.length 85 if offset < 0 { 86 length = -1 87 } 88 reader, err = oh.NewRangeReader(ctx, offset, length) 89 } 90 91 if err == storage.ErrObjectNotExist { 92 return nil, "", NotFound{"gs://" + path.Join(bs.bucketName, absKey)} 93 } else if err != nil { 94 return nil, "", err 95 } 96 97 attrs := reader.Attrs 98 generation := attrs.Generation 99 100 return reader, fmtGeneration(generation), nil 101 } 102 103 func writeObj(writer *storage.Writer, reader io.Reader) (string, error) { 104 writeErr, closeErr := func() (writeErr error, closeErr error) { 105 defer func() { 106 closeErr = writer.Close() 107 }() 108 _, writeErr = io.Copy(writer, reader) 109 110 return 111 }() 112 113 if writeErr != nil { 114 return "", writeErr 115 } else if closeErr != nil { 116 return "", closeErr 117 } 118 119 generation := writer.Attrs().Generation 120 121 return fmtGeneration(generation), nil 122 } 123 124 // Put sets the blob and the version for a key 125 func (bs *GCSBlobstore) Put(ctx context.Context, key string, totalSize int64, reader io.Reader) (string, error) { 126 absKey := path.Join(bs.prefix, key) 127 oh := bs.bucket.Object(absKey) 128 writer := oh.NewWriter(ctx) 129 130 return writeObj(writer, reader) 131 } 132 133 // CheckAndPut will check the current version of a blob against an expectedVersion, and if the 134 // versions match it will update the data and version associated with the key 135 func (bs *GCSBlobstore) CheckAndPut(ctx context.Context, expectedVersion, key string, totalSize int64, reader io.Reader) (string, error) { 136 absKey := path.Join(bs.prefix, key) 137 oh := bs.bucket.Object(absKey) 138 139 var conditionalHandle *storage.ObjectHandle 140 if expectedVersion != "" { 141 expectedGen, err := strconv.ParseInt(expectedVersion, 16, 64) 142 143 if err != nil { 144 panic("Invalid expected Version") 145 } 146 147 conditionalHandle = oh.If(storage.Conditions{GenerationMatch: expectedGen}) 148 } else { 149 conditionalHandle = oh.If(storage.Conditions{DoesNotExist: true}) 150 } 151 152 writer := conditionalHandle.NewWriter(ctx) 153 154 ver, err := writeObj(writer, reader) 155 156 if err != nil { 157 apiErr, ok := err.(*googleapi.Error) 158 159 if ok { 160 if apiErr.Code == precondFailCode { 161 return "", CheckAndPutError{key, expectedVersion, "unknown (Not supported in GCS implementation)"} 162 } 163 } 164 } 165 166 return ver, err 167 } 168 169 func (bs *GCSBlobstore) Concatenate(ctx context.Context, key string, sources []string) (string, error) { 170 // GCS compose has a batch size limit, 171 // recursively compose sources 172 for len(sources) > composeBatch { 173 // compose subsets of |sources| in batches, 174 // store tmp composite objects in |next| 175 var next []string 176 var batches [][]string 177 for len(sources) > 0 { 178 k := min(composeBatch, len(sources)) 179 batches = append(batches, sources[:k]) 180 next = append(next, uuid.New().String()) 181 sources = sources[k:] 182 } 183 // execute compose calls concurrently 184 eg, ectx := errgroup.WithContext(ctx) 185 for i := 0; i < len(batches); i++ { 186 idx := i 187 eg.Go(func() (err error) { 188 _, err = bs.composeObjects(ectx, next[idx], batches[idx]) 189 return 190 }) 191 } 192 if err := eg.Wait(); err != nil { 193 return "", err 194 } 195 sources = next 196 } 197 return bs.composeObjects(ctx, key, sources) 198 } 199 200 func (bs *GCSBlobstore) composeObjects(ctx context.Context, composite string, sources []string) (gen string, err error) { 201 if len(sources) > composeBatch { 202 return "", fmt.Errorf("too many objects to compose (%d > %d)", len(sources), composeBatch) 203 } 204 205 objects := make([]*storage.ObjectHandle, len(sources)) 206 eg, ectx := errgroup.WithContext(ctx) 207 for i := range objects { 208 idx := i 209 eg.Go(func() (err error) { 210 var a *storage.ObjectAttrs 211 oh := bs.bucket.Object(path.Join(bs.prefix, sources[idx])) 212 if a, err = oh.Attrs(ectx); err != nil { 213 return err 214 } 215 objects[idx] = oh.Generation(a.Generation) 216 return 217 }) 218 } 219 if err = eg.Wait(); err != nil { 220 return "", err 221 } 222 223 // compose |objects| into |c| 224 var a *storage.ObjectAttrs 225 c := bs.bucket.Object(path.Join(bs.prefix, composite)) 226 if a, err = c.ComposerFrom(objects...).Run(ctx); err != nil { 227 return "", err 228 } 229 return fmtGeneration(a.Generation), nil 230 } 231 232 func fmtGeneration(g int64) string { 233 return strconv.FormatInt(g, 16) 234 } 235 236 func min(l, r int) (m int) { 237 if l < r { 238 m = l 239 } else { 240 m = r 241 } 242 return 243 }