github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/blobstore/oci.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package blobstore 16 17 import ( 18 "bytes" 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "math" 24 "net/http" 25 "os" 26 "path" 27 28 "github.com/oracle/oci-go-sdk/v65/common" 29 "github.com/oracle/oci-go-sdk/v65/objectstorage" 30 "golang.org/x/sync/errgroup" 31 ) 32 33 // 10MB part size 34 const minPartSize = 10 * 1024 * 1024 35 const defaultPartSize = 5 * minPartSize 36 const maxPartNum = 10000 37 const defaultBatchSize = 500 * 1024 * 1024 38 const defaultConcurrentListeners = 5 39 40 type toUpload struct { 41 b []byte 42 partNum int 43 } 44 45 type uploadFunc func(ctx context.Context, objectName, uploadID string, partNumber int, contentLength int64, reader io.Reader) (objectstorage.CommitMultipartUploadPartDetails, error) 46 47 type tempLocalObject struct { 48 path string 49 f *os.File 50 } 51 52 var _ io.ReadCloser = &tempLocalObject{} 53 54 func (t *tempLocalObject) Read(p []byte) (int, error) { 55 return t.f.Read(p) 56 } 57 58 func (t *tempLocalObject) Close() error { 59 err := t.f.Close() 60 os.Remove(t.path) 61 return err 62 } 63 64 // OCIBlobstore provides an OCI implementation of the Blobstore interface 65 type OCIBlobstore struct { 66 provider common.ConfigurationProvider 67 client objectstorage.ObjectStorageClient 68 bucketName string 69 namespace string 70 prefix string 71 concurrentListeners int 72 } 73 74 var _ Blobstore = &OCIBlobstore{} 75 76 // NewOCIBlobstore creates a new instance of a OCIBlobstore 77 func NewOCIBlobstore(ctx context.Context, provider common.ConfigurationProvider, client objectstorage.ObjectStorageClient, bucketName, prefix string) (*OCIBlobstore, error) { 78 for len(prefix) > 0 && prefix[0] == '/' { 79 prefix = prefix[1:] 80 } 81 82 // Disable timeout to support big file upload/download, default is 60s 83 client.HTTPClient = &http.Client{} 84 85 request := objectstorage.GetNamespaceRequest{} 86 r, err := client.GetNamespace(ctx, request) 87 if err != nil { 88 return nil, err 89 } 90 91 return &OCIBlobstore{provider, client, bucketName, *r.Value, prefix, defaultConcurrentListeners}, nil 92 } 93 94 func (bs *OCIBlobstore) Path() string { 95 return path.Join(bs.bucketName, bs.prefix) 96 } 97 98 // Exists returns true if a blob exists for the given key, and false if it does not. 99 // For InMemoryBlobstore instances error should never be returned (though other 100 // implementations of this interface can) 101 func (bs *OCIBlobstore) Exists(ctx context.Context, key string) (bool, error) { 102 absKey := path.Join(bs.prefix, key) 103 _, err := bs.client.HeadObject(ctx, objectstorage.HeadObjectRequest{ 104 NamespaceName: &bs.namespace, 105 BucketName: &bs.bucketName, 106 ObjectName: &absKey, 107 }) 108 if err == nil { 109 return true, nil 110 } 111 if serr, ok := common.IsServiceError(err); ok { 112 // handle not found code 113 if serr.GetHTTPStatusCode() == 404 { 114 return false, nil 115 } 116 } 117 return false, err 118 } 119 120 // Get retrieves an io.reader for the portion of a blob specified by br along with its version 121 func (bs *OCIBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.ReadCloser, string, error) { 122 absKey := path.Join(bs.prefix, key) 123 req := objectstorage.GetObjectRequest{ 124 NamespaceName: &bs.namespace, 125 BucketName: &bs.bucketName, 126 ObjectName: &absKey, 127 } 128 129 byteRange := br.asHttpRangeHeader() 130 if byteRange != "" { 131 req.Range = &byteRange 132 } 133 134 res, err := bs.client.GetObject(ctx, req) 135 if err != nil { 136 if serr, ok := common.IsServiceError(err); ok { 137 // handle not found code 138 if serr.GetHTTPStatusCode() == 404 { 139 return nil, "", NotFound{"oci://" + path.Join(bs.bucketName, absKey)} 140 } 141 } 142 return nil, "", err 143 } 144 145 // handle negative offset and positive length 146 if br.offset < 0 && br.length > 0 { 147 lr := io.LimitReader(res.Content, br.length) 148 return io.NopCloser(lr), fmtstr(res.ETag), nil 149 } 150 151 return res.Content, fmtstr(res.ETag), nil 152 } 153 154 // Put sets the blob and the version for a key 155 func (bs *OCIBlobstore) Put(ctx context.Context, key string, totalSize int64, reader io.Reader) (string, error) { 156 return bs.upload(ctx, "", key, totalSize, reader) 157 } 158 159 // CheckAndPut will check the current version of a blob against an expectedVersion, and if the 160 // versions match it will update the data and version associated with the key 161 func (bs *OCIBlobstore) CheckAndPut(ctx context.Context, expectedVersion, key string, totalSize int64, reader io.Reader) (string, error) { 162 return bs.upload(ctx, expectedVersion, key, totalSize, reader) 163 } 164 165 // At the time of this implementation, Oracle Cloud does not provide a way to create composite objects 166 // via their APIs/SDKs. 167 func (bs *OCIBlobstore) Concatenate(ctx context.Context, key string, sources []string) (string, error) { 168 return "", fmt.Errorf("concatenate is unimplemented on the oci blobstore") 169 } 170 171 func (bs *OCIBlobstore) upload(ctx context.Context, expectedVersion, key string, totalSize int64, reader io.Reader) (string, error) { 172 numParts, _ := getNumPartsAndPartSize(totalSize, defaultPartSize, maxPartNum) 173 if totalSize == 0 { 174 return "", errors.New("failed to upload to oci blobstore, no data in reader") 175 } else if totalSize < minPartSize { 176 return bs.checkAndPut(ctx, expectedVersion, key, totalSize, reader) 177 } else { 178 return bs.multipartUpload(ctx, expectedVersion, key, numParts, totalSize, reader) 179 } 180 } 181 182 func (bs *OCIBlobstore) checkAndPut(ctx context.Context, expectedVersion, key string, contentLength int64, reader io.Reader) (string, error) { 183 absKey := path.Join(bs.prefix, key) 184 185 req := objectstorage.PutObjectRequest{ 186 NamespaceName: &bs.namespace, 187 BucketName: &bs.bucketName, 188 ObjectName: &absKey, 189 ContentLength: &contentLength, 190 PutObjectBody: io.NopCloser(reader), 191 } 192 193 if expectedVersion != "" { 194 req.IfMatch = &expectedVersion 195 } else { 196 star := "*" 197 req.IfNoneMatch = &star 198 } 199 200 res, err := bs.client.PutObject(ctx, req) 201 if err != nil { 202 if serr, ok := common.IsServiceError(err); ok { 203 if serr.GetHTTPStatusCode() == 412 { 204 return "", CheckAndPutError{key, expectedVersion, "unknown (Not supported in OCI implementation)"} 205 } 206 } 207 return "", err 208 } 209 210 return fmtstr(res.ETag), nil 211 } 212 213 func (bs *OCIBlobstore) multipartUpload(ctx context.Context, expectedVersion, key string, numParts int, uploadSize int64, reader io.Reader) (string, error) { 214 absKey := path.Join(bs.prefix, key) 215 216 startReq := objectstorage.CreateMultipartUploadRequest{ 217 NamespaceName: &bs.namespace, 218 BucketName: &bs.bucketName, 219 CreateMultipartUploadDetails: objectstorage.CreateMultipartUploadDetails{ 220 Object: &absKey, 221 }, 222 } 223 224 star := "*" 225 if expectedVersion != "" { 226 startReq.IfMatch = &expectedVersion 227 } else { 228 startReq.IfNoneMatch = &star 229 } 230 231 startRes, err := bs.client.CreateMultipartUpload(ctx, startReq) 232 if err != nil { 233 return "", err 234 } 235 236 parts, err := bs.uploadParts(ctx, absKey, fmtstr(startRes.UploadId), numParts, uploadSize, reader) 237 if err != nil { 238 // ignore this error 239 bs.client.AbortMultipartUpload(ctx, objectstorage.AbortMultipartUploadRequest{ 240 NamespaceName: &bs.namespace, 241 BucketName: &bs.bucketName, 242 ObjectName: &absKey, 243 UploadId: startRes.UploadId, 244 RequestMetadata: common.RequestMetadata{}, 245 }) 246 return "", err 247 } 248 249 commitReq := objectstorage.CommitMultipartUploadRequest{ 250 NamespaceName: &bs.namespace, 251 BucketName: &bs.bucketName, 252 ObjectName: &absKey, 253 UploadId: startRes.UploadId, 254 CommitMultipartUploadDetails: objectstorage.CommitMultipartUploadDetails{PartsToCommit: parts}, 255 } 256 257 if expectedVersion != "" { 258 commitReq.IfMatch = &expectedVersion 259 } else { 260 commitReq.IfNoneMatch = &star 261 } 262 263 commitRes, err := bs.client.CommitMultipartUpload(ctx, commitReq) 264 if err != nil { 265 return "", err 266 } 267 268 return fmtstr(commitRes.ETag), nil 269 } 270 271 func (bs *OCIBlobstore) uploadParts(ctx context.Context, objectName, uploadID string, numParts int, totalSize int64, reader io.Reader) ([]objectstorage.CommitMultipartUploadPartDetails, error) { 272 return uploadParts(ctx, objectName, uploadID, numParts, bs.concurrentListeners, totalSize, defaultBatchSize, reader, bs.uploadPart) 273 } 274 275 func (bs *OCIBlobstore) uploadPart(ctx context.Context, objectName, uploadID string, partNumber int, contentLength int64, reader io.Reader) (objectstorage.CommitMultipartUploadPartDetails, error) { 276 if objectName == "" { 277 return objectstorage.CommitMultipartUploadPartDetails{}, errors.New("object name required to upload part") 278 } 279 280 if uploadID == "" { 281 return objectstorage.CommitMultipartUploadPartDetails{}, errors.New("upload id required to upload part") 282 } 283 284 res, err := bs.client.UploadPart(ctx, objectstorage.UploadPartRequest{ 285 NamespaceName: &bs.namespace, 286 BucketName: &bs.bucketName, 287 ObjectName: &objectName, 288 UploadId: &uploadID, 289 UploadPartNum: &partNumber, 290 ContentLength: &contentLength, 291 UploadPartBody: io.NopCloser(reader), 292 }) 293 if err != nil { 294 return objectstorage.CommitMultipartUploadPartDetails{}, err 295 } 296 297 return objectstorage.CommitMultipartUploadPartDetails{ 298 Etag: res.ETag, 299 PartNum: &partNumber, 300 }, nil 301 } 302 303 func uploadParts(ctx context.Context, objectName, uploadID string, numParts, concurrentListeners int, totalSize, maxBatchSize int64, reader io.Reader, uploadF uploadFunc) ([]objectstorage.CommitMultipartUploadPartDetails, error) { 304 completedParts := make([]objectstorage.CommitMultipartUploadPartDetails, numParts) 305 partSize := int64(math.Ceil(float64(totalSize) / float64(numParts))) 306 307 eg, egCtx := errgroup.WithContext(ctx) 308 eg.SetLimit(concurrentListeners) 309 310 batch := make([]*toUpload, 0) 311 batchSize := int64(0) 312 partNum := 1 313 314 for { 315 if batchSize >= maxBatchSize { 316 for _, u := range batch { 317 u := u 318 eg.Go(func() error { 319 cp, err := uploadF(egCtx, objectName, uploadID, u.partNum, int64(len(u.b)), bytes.NewReader(u.b)) 320 if err != nil { 321 return err 322 } 323 completedParts[u.partNum-1] = cp 324 return nil 325 }) 326 } 327 328 batchSize = 0 329 batch = make([]*toUpload, 0) 330 continue 331 } 332 333 buf := make([]byte, partSize) 334 n, err := reader.Read(buf) 335 if err != nil { 336 if err == io.EOF { 337 break 338 } 339 return nil, err 340 } 341 342 buf = buf[:n] 343 batchSize += int64(n) 344 batch = append(batch, &toUpload{ 345 b: buf, 346 partNum: partNum, 347 }) 348 349 partNum++ 350 } 351 352 if batchSize > 0 && len(batch) > 0 { 353 for _, u := range batch { 354 u := u 355 eg.Go(func() error { 356 cp, err := uploadF(egCtx, objectName, uploadID, u.partNum, int64(len(u.b)), bytes.NewReader(u.b)) 357 if err != nil { 358 return err 359 } 360 completedParts[u.partNum-1] = cp 361 return nil 362 }) 363 } 364 } 365 366 err := eg.Wait() 367 if err != nil { 368 return nil, err 369 } 370 371 return completedParts, nil 372 } 373 374 func getNumPartsAndPartSize(totalSize, partSize, maxPartNum int64) (int, int64) { 375 ps := int64(math.Ceil(float64(totalSize) / float64(maxPartNum))) 376 if ps < partSize { 377 numParts := int(math.Ceil(float64(totalSize) / float64(partSize))) 378 return numParts, partSize 379 } 380 numParts := int(math.Ceil(float64(totalSize) / float64(ps))) 381 return numParts, ps 382 } 383 384 func fmtstr(s *string) string { 385 if s == nil { 386 return "" 387 } 388 return *s 389 }