github.com/bazelbuild/remote-apis-sdks@v0.0.0-20240425170053-8a36686a6350/go/pkg/client/cas_upload.go (about) 1 package client 2 3 import ( 4 "context" 5 "fmt" 6 "strconv" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 "github.com/bazelbuild/remote-apis-sdks/go/pkg/chunker" 12 "github.com/bazelbuild/remote-apis-sdks/go/pkg/contextmd" 13 "github.com/bazelbuild/remote-apis-sdks/go/pkg/digest" 14 "github.com/bazelbuild/remote-apis-sdks/go/pkg/uploadinfo" 15 repb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" 16 log "github.com/golang/glog" 17 "github.com/google/uuid" 18 "github.com/klauspost/compress/zstd" 19 "github.com/pkg/errors" 20 "golang.org/x/sync/errgroup" 21 "google.golang.org/grpc/codes" 22 "google.golang.org/grpc/status" 23 "google.golang.org/protobuf/proto" 24 ) 25 26 // MissingBlobs queries the CAS to determine if it has the specified blobs. 27 // Returns a slice of missing blobs. 28 func (c *Client) MissingBlobs(ctx context.Context, digests []digest.Digest) ([]digest.Digest, error) { 29 var missing []digest.Digest 30 var resultMutex sync.Mutex 31 batches := c.makeQueryBatches(ctx, digests) 32 eg, eCtx := errgroup.WithContext(ctx) 33 for i, batch := range batches { 34 i, batch := i, batch // https://golang.org/doc/faq#closures_and_goroutines 35 eg.Go(func() error { 36 if err := c.casUploaders.Acquire(eCtx, 1); err != nil { 37 return err 38 } 39 defer c.casUploaders.Release(1) 40 if i%logInterval == 0 { 41 contextmd.Infof(ctx, log.Level(3), "%d missing batches left to query", len(batches)-i) 42 } 43 var batchPb []*repb.Digest 44 for _, dg := range batch { 45 batchPb = append(batchPb, dg.ToProto()) 46 } 47 req := &repb.FindMissingBlobsRequest{ 48 InstanceName: c.InstanceName, 49 BlobDigests: batchPb, 50 } 51 resp, err := c.FindMissingBlobs(eCtx, req) 52 if err != nil { 53 return err 54 } 55 resultMutex.Lock() 56 for _, d := range resp.MissingBlobDigests { 57 missing = append(missing, digest.NewFromProtoUnvalidated(d)) 58 } 59 resultMutex.Unlock() 60 if eCtx.Err() != nil { 61 return eCtx.Err() 62 } 63 return nil 64 }) 65 } 66 contextmd.Infof(ctx, log.Level(3), "Waiting for remaining query jobs") 67 err := eg.Wait() 68 contextmd.Infof(ctx, log.Level(3), "Done") 69 return missing, err 70 } 71 72 // UploadIfMissing writes the missing blobs from those specified to the CAS. 73 // 74 // The blobs are first matched against existing ones and only the missing blobs are written. 75 // Returns a slice of missing digests that were written and the sum of total bytes moved, which 76 // may be different from logical bytes moved (i.e. sum of digest sizes) due to compression. 77 func (c *Client) UploadIfMissing(ctx context.Context, entries ...*uploadinfo.Entry) ([]digest.Digest, int64, error) { 78 if c.UnifiedUploads { 79 return c.uploadUnified(ctx, entries...) 80 } 81 return c.uploadNonUnified(ctx, entries...) 82 } 83 84 // WriteBlobs is a proxy method for UploadIfMissing that facilitates specifying a map of 85 // digest-to-blob. It's intended for use with PackageTree. 86 // TODO(olaola): rethink the API of this layer: 87 // * Do we want to allow []byte uploads, or require the user to construct Chunkers? 88 // * How to consistently distinguish in the API between should we use GetMissing or not? 89 // * Should BatchWrite be a public method at all? 90 func (c *Client) WriteBlobs(ctx context.Context, blobs map[digest.Digest][]byte) error { 91 var uEntries []*uploadinfo.Entry 92 for _, blob := range blobs { 93 uEntries = append(uEntries, uploadinfo.EntryFromBlob(blob)) 94 } 95 _, _, err := c.UploadIfMissing(ctx, uEntries...) 96 return err 97 } 98 99 // WriteBlob (over)writes a blob to the CAS regardless if it already exists. 100 func (c *Client) WriteBlob(ctx context.Context, blob []byte) (digest.Digest, error) { 101 ue := uploadinfo.EntryFromBlob(blob) 102 dg := ue.Digest 103 if dg.IsEmpty() { 104 contextmd.Infof(ctx, log.Level(2), "Skipping upload of empty blob %s", dg) 105 return dg, nil 106 } 107 ch, err := chunker.New(ue, c.shouldCompressEntry(ue), int(c.ChunkMaxSize)) 108 if err != nil { 109 return dg, err 110 } 111 rscName, err := c.writeRscName(ue) 112 if err != nil { 113 return dg, err 114 } 115 _, err = c.writeChunked(ctx, rscName, ch, false, 0) 116 return dg, err 117 } 118 119 // WriteProto is a proxy method for WriteBlob that allows specifying a proto to write. 120 func (c *Client) WriteProto(ctx context.Context, msg proto.Message) (digest.Digest, error) { 121 bytes, err := proto.Marshal(msg) 122 if err != nil { 123 return digest.Empty, err 124 } 125 return c.WriteBlob(ctx, bytes) 126 } 127 128 // zstdEncoder is a shared instance that should only be used in stateless mode, i.e. only by calling EncodeAll() 129 var zstdEncoder, _ = zstd.NewWriter(nil) 130 131 // BatchWriteBlobs (over)writes specified blobs to the CAS, regardless if they already exist. 132 // 133 // The collective size must be below the maximum total size for a batch upload, which 134 // is about 4 MB (see MaxBatchSize). 135 // In case multiple errors occur during the blob upload, the last error is returned. 136 func (c *Client) BatchWriteBlobs(ctx context.Context, blobs map[digest.Digest][]byte) error { 137 var reqs []*repb.BatchUpdateBlobsRequest_Request 138 var sz int64 139 for k, b := range blobs { 140 r := &repb.BatchUpdateBlobsRequest_Request{ 141 Digest: k.ToProto(), 142 Data: b, 143 } 144 if bool(c.useBatchCompression) && c.shouldCompress(k.Size) { 145 r.Data = zstdEncoder.EncodeAll(r.Data, nil) 146 r.Compressor = repb.Compressor_ZSTD 147 sz += int64(len(r.Data)) 148 } else { 149 sz += int64(k.Size) 150 } 151 reqs = append(reqs, r) 152 } 153 if sz > int64(c.MaxBatchSize) { 154 return fmt.Errorf("batch update of %d total bytes exceeds maximum of %d", sz, c.MaxBatchSize) 155 } 156 if len(blobs) > int(c.MaxBatchDigests) { 157 return fmt.Errorf("batch update of %d total blobs exceeds maximum of %d", len(blobs), c.MaxBatchDigests) 158 } 159 opts := c.RPCOpts() 160 closure := func() error { 161 var resp *repb.BatchUpdateBlobsResponse 162 err := c.CallWithTimeout(ctx, "BatchUpdateBlobs", func(ctx context.Context) (e error) { 163 resp, e = c.cas.BatchUpdateBlobs(ctx, &repb.BatchUpdateBlobsRequest{ 164 InstanceName: c.InstanceName, 165 Requests: reqs, 166 }, opts...) 167 return e 168 }) 169 if err != nil { 170 return err 171 } 172 173 numErrs, errDg, errMsg := 0, new(repb.Digest), "" 174 var failedReqs []*repb.BatchUpdateBlobsRequest_Request 175 var retriableError error 176 allRetriable := true 177 for _, r := range resp.Responses { 178 st := status.FromProto(r.Status) 179 if st.Code() != codes.OK { 180 e := StatusDetailedError(st) 181 if c.Retrier.ShouldRetry(e) { 182 failedReqs = append(failedReqs, &repb.BatchUpdateBlobsRequest_Request{ 183 Digest: r.Digest, 184 Data: blobs[digest.NewFromProtoUnvalidated(r.Digest)], 185 }) 186 retriableError = e 187 } else { 188 allRetriable = false 189 } 190 numErrs++ 191 errDg = r.Digest 192 errMsg = e.Error() 193 } 194 } 195 reqs = failedReqs 196 if numErrs > 0 { 197 if allRetriable { 198 return retriableError // Retriable errors only, retry the failed requests. 199 } 200 return fmt.Errorf("uploading blobs as part of a batch resulted in %d failures, including blob %s: %s", numErrs, errDg, errMsg) 201 } 202 return nil 203 } 204 return c.Retrier.Do(ctx, closure) 205 } 206 207 // ResourceNameWrite generates a valid write resource name. 208 func (c *Client) ResourceNameWrite(hash string, sizeBytes int64) (string, error) { 209 id, err := uuid.NewRandom() 210 if err != nil { 211 return "", err 212 } 213 return c.ResourceName("uploads", id.String(), "blobs", hash, strconv.FormatInt(sizeBytes, 10)) 214 } 215 216 // ResourceNameCompressedWrite generates a valid write resource name. 217 // TODO(rubensf): Converge compressor to proto in https://github.com/bazelbuild/remote-apis/pull/168 once 218 // that gets merged in. 219 func (c *Client) ResourceNameCompressedWrite(hash string, sizeBytes int64) (string, error) { 220 id, err := uuid.NewRandom() 221 if err != nil { 222 return "", err 223 } 224 return c.ResourceName("uploads", id.String(), "compressed-blobs", "zstd", hash, strconv.FormatInt(sizeBytes, 10)) 225 } 226 227 func (c *Client) writeRscName(ue *uploadinfo.Entry) (string, error) { 228 if c.shouldCompressEntry(ue) { 229 return c.ResourceNameCompressedWrite(ue.Digest.Hash, ue.Digest.Size) 230 } 231 return c.ResourceNameWrite(ue.Digest.Hash, ue.Digest.Size) 232 } 233 234 type uploadRequest struct { 235 ue *uploadinfo.Entry 236 meta *contextmd.Metadata 237 wait chan<- *uploadResponse 238 cancel bool 239 } 240 241 type uploadResponse struct { 242 digest digest.Digest 243 bytesMoved int64 244 err error 245 missing bool 246 } 247 248 type uploadState struct { 249 ue *uploadinfo.Entry 250 err error 251 252 // mu protects clients anc cancel. The fields need protection since they are updated by upload 253 // whenever new clients join, and iterated on by updateAndNotify in the end of each upload. 254 // It does NOT protect data or error, because they do not need protection - 255 // they are only modified when a state object is created, and by updateAndNotify which is called 256 // exactly once for a given state object (this is the whole point of the algorithm). 257 mu sync.Mutex 258 clients []chan<- *uploadResponse 259 cancel func() 260 } 261 262 func (c *Client) uploadUnified(ctx context.Context, entries ...*uploadinfo.Entry) ([]digest.Digest, int64, error) { 263 contextmd.Infof(ctx, log.Level(2), "Request to upload %d blobs", len(entries)) 264 265 if len(entries) == 0 { 266 return nil, 0, nil 267 } 268 meta, err := contextmd.ExtractMetadata(ctx) 269 if err != nil { 270 return nil, 0, err 271 } 272 wait := make(chan *uploadResponse, len(entries)) 273 var dgs []digest.Digest 274 dedupDgs := make(map[digest.Digest]bool, len(entries)) 275 for _, ue := range entries { 276 if _, ok := dedupDgs[ue.Digest]; !ok { 277 dgs = append(dgs, ue.Digest) 278 dedupDgs[ue.Digest] = true 279 } 280 } 281 missing, err := c.MissingBlobs(ctx, dgs) 282 if err != nil { 283 return nil, 0, err 284 } 285 missingDgs := make(map[digest.Digest]bool, len(missing)) 286 for _, dg := range missing { 287 missingDgs[dg] = true 288 } 289 var reqs []*uploadRequest 290 for _, ue := range entries { 291 if _, ok := missingDgs[ue.Digest]; !ok { 292 continue 293 } 294 if ue.Digest.IsEmpty() { 295 contextmd.Infof(ctx, log.Level(2), "Skipping upload of empty entry %s", ue.Digest) 296 continue 297 } 298 if ue.IsVirtualFile() { 299 return nil, 0, fmt.Errorf("virtual input with digest %q provided, but does not exist in CAS", ue.Digest) 300 } 301 req := &uploadRequest{ 302 ue: ue, 303 meta: meta, 304 wait: wait, 305 } 306 reqs = append(reqs, req) 307 select { 308 case <-ctx.Done(): 309 contextmd.Infof(ctx, log.Level(2), "Upload canceled") 310 c.cancelPendingRequests(reqs) 311 return nil, 0, fmt.Errorf("context cancelled: %w", ctx.Err()) 312 case c.casUploadRequests <- req: 313 continue 314 } 315 } 316 totalBytesMoved := int64(0) 317 finalMissing := make([]digest.Digest, 0, len(reqs)) 318 for i := 0; i < len(reqs); i++ { 319 select { 320 case <-ctx.Done(): 321 c.cancelPendingRequests(reqs) 322 return nil, 0, fmt.Errorf("context cancelled: %w", ctx.Err()) 323 case resp := <-wait: 324 if resp.err != nil { 325 return nil, 0, resp.err 326 } 327 if resp.missing { 328 finalMissing = append(finalMissing, resp.digest) 329 } 330 totalBytesMoved += resp.bytesMoved 331 } 332 } 333 return finalMissing, totalBytesMoved, nil 334 } 335 336 func (c *Client) uploadProcessor(ctx context.Context) { 337 var buffer []*uploadRequest 338 ticker := time.NewTicker(time.Duration(c.UnifiedUploadTickDuration)) 339 for { 340 select { 341 case req, ok := <-c.casUploadRequests: 342 if !ok { 343 // Client is exiting. Notify remaining uploads to prevent deadlocks. 344 ticker.Stop() 345 if buffer != nil { 346 for _, r := range buffer { 347 r.wait <- &uploadResponse{err: context.Canceled} 348 } 349 } 350 return 351 } 352 if !req.cancel { 353 buffer = append(buffer, req) 354 if len(buffer) >= int(c.UnifiedUploadBufferSize) { 355 c.upload(ctx, buffer) 356 buffer = nil 357 } 358 continue 359 } 360 // Cancellation request. 361 var newBuffer []*uploadRequest 362 for _, r := range buffer { 363 if r.ue != req.ue || r.wait != req.wait { 364 newBuffer = append(newBuffer, r) 365 } 366 } 367 buffer = newBuffer 368 st, ok := c.casUploads[req.ue.Digest] 369 if ok { 370 st.mu.Lock() 371 var remainingClients []chan<- *uploadResponse 372 for _, w := range st.clients { 373 if w != req.wait { 374 remainingClients = append(remainingClients, w) 375 } 376 } 377 st.clients = remainingClients 378 if len(st.clients) == 0 { 379 log.V(3).Infof("Cancelling Write %v", req.ue.Digest) 380 if st.cancel != nil { 381 st.cancel() 382 } 383 delete(c.casUploads, req.ue.Digest) 384 } 385 st.mu.Unlock() 386 } 387 case <-ticker.C: 388 if buffer != nil { 389 c.upload(ctx, buffer) 390 buffer = nil 391 } 392 } 393 } 394 } 395 396 func (c *Client) upload(ctx context.Context, reqs []*uploadRequest) { 397 // Collect new uploads. 398 newStates := make(map[digest.Digest]*uploadState) 399 var newUploads []digest.Digest 400 var metas []*contextmd.Metadata 401 log.V(2).Infof("Upload is processing %d requests", len(reqs)) 402 for _, req := range reqs { 403 dg := req.ue.Digest 404 st, ok := c.casUploads[dg] 405 if ok { 406 st.mu.Lock() 407 if len(st.clients) > 0 { 408 st.clients = append(st.clients, req.wait) 409 } else { 410 req.wait <- &uploadResponse{err: st.err, missing: false} // Digest is only needed when missing=true 411 } 412 st.mu.Unlock() 413 } else { 414 st = &uploadState{ 415 clients: []chan<- *uploadResponse{req.wait}, 416 ue: req.ue, 417 } 418 c.casUploads[dg] = st 419 newUploads = append(newUploads, dg) 420 metas = append(metas, req.meta) 421 newStates[dg] = st 422 } 423 } 424 425 unifiedMeta := contextmd.MergeMetadata(metas...) 426 var err error 427 if unifiedMeta.ActionID != "" { 428 ctx, err = contextmd.WithMetadata(ctx, unifiedMeta) 429 } 430 if err != nil { 431 for _, st := range newStates { 432 updateAndNotify(st, 0, err, false) 433 } 434 return 435 } 436 437 contextmd.Infof(ctx, log.Level(2), "%d new items to store", len(newUploads)) 438 var batches [][]digest.Digest 439 if c.useBatchOps { 440 batches = c.makeBatches(ctx, newUploads, true) 441 } else { 442 contextmd.Infof(ctx, log.Level(2), "Uploading them individually") 443 for i := range newUploads { 444 contextmd.Infof(ctx, log.Level(3), "Creating single batch of blob %s", newUploads[i]) 445 batches = append(batches, newUploads[i:i+1]) 446 } 447 } 448 449 for i, batch := range batches { 450 i, batch := i, batch // https://golang.org/doc/faq#closures_and_goroutines 451 go func() { 452 if c.casUploaders.Acquire(ctx, 1) == nil { 453 defer c.casUploaders.Release(1) 454 } 455 if i%logInterval == 0 { 456 contextmd.Infof(ctx, log.Level(2), "%d batches left to store", len(batches)-i) 457 } 458 if len(batch) > 1 { 459 contextmd.Infof(ctx, log.Level(3), "Uploading batch of %d blobs", len(batch)) 460 bchMap := make(map[digest.Digest][]byte) 461 totalBytesMap := make(map[digest.Digest]int64) 462 for _, dg := range batch { 463 st := newStates[dg] 464 ch, err := chunker.New(st.ue, false, int(c.ChunkMaxSize)) 465 if err != nil { 466 updateAndNotify(st, 0, err, true) 467 continue 468 } 469 data, err := ch.FullData() 470 if err != nil { 471 updateAndNotify(st, 0, err, true) 472 continue 473 } 474 bchMap[dg] = data 475 totalBytesMap[dg] = int64(len(data)) 476 } 477 err := c.BatchWriteBlobs(ctx, bchMap) 478 for dg := range bchMap { 479 updateAndNotify(newStates[dg], totalBytesMap[dg], err, true) 480 } 481 } else { 482 contextmd.Infof(ctx, log.Level(3), "Uploading single blob with digest %s", batch[0]) 483 st := newStates[batch[0]] 484 st.mu.Lock() 485 if len(st.clients) == 0 { // Already cancelled. 486 log.V(3).Infof("Blob upload for digest %s was canceled", batch[0]) 487 st.mu.Unlock() 488 return 489 } 490 cCtx, cancel := context.WithCancel(ctx) 491 st.cancel = cancel 492 st.mu.Unlock() 493 log.V(3).Infof("Uploading single blob with digest %s", batch[0]) 494 ch, err := chunker.New(st.ue, c.shouldCompressEntry(st.ue), int(c.ChunkMaxSize)) 495 if err != nil { 496 updateAndNotify(st, 0, err, true) 497 } 498 rscName, err := c.writeRscName(st.ue) 499 if err != nil { 500 updateAndNotify(st, 0, err, true) 501 } 502 totalBytes, err := c.writeChunked(cCtx, rscName, ch, false, 0) 503 updateAndNotify(st, totalBytes, err, true) 504 } 505 }() 506 } 507 } 508 509 // This function is only used when UnifiedUploads is false. It will be removed 510 // once UnifiedUploads=true is stable. 511 func (c *Client) uploadNonUnified(ctx context.Context, data ...*uploadinfo.Entry) ([]digest.Digest, int64, error) { 512 var dgs []digest.Digest 513 ueList := make(map[digest.Digest]*uploadinfo.Entry) 514 for _, ue := range data { 515 dg := ue.Digest 516 if dg.IsEmpty() { 517 contextmd.Infof(ctx, log.Level(2), "Skipping upload of empty blob %s", dg) 518 continue 519 } 520 if _, ok := ueList[dg]; !ok { 521 dgs = append(dgs, dg) 522 ueList[dg] = ue 523 } 524 } 525 526 missing, err := c.MissingBlobs(ctx, dgs) 527 if err != nil { 528 return nil, 0, err 529 } 530 contextmd.Infof(ctx, log.Level(2), "%d items to store", len(missing)) 531 var batches [][]digest.Digest 532 if c.useBatchOps { 533 batches = c.makeBatches(ctx, missing, true) 534 } else { 535 contextmd.Infof(ctx, log.Level(2), "Uploading them individually") 536 for i := range missing { 537 contextmd.Infof(ctx, log.Level(3), "Creating single batch of blob %s", missing[i]) 538 batches = append(batches, missing[i:i+1]) 539 } 540 } 541 542 totalBytesTransferred := int64(0) 543 544 eg, eCtx := errgroup.WithContext(ctx) 545 for i, batch := range batches { 546 i, batch := i, batch // https://golang.org/doc/faq#closures_and_goroutines 547 eg.Go(func() error { 548 if err := c.casUploaders.Acquire(eCtx, 1); err != nil { 549 return err 550 } 551 defer c.casUploaders.Release(1) 552 if i%logInterval == 0 { 553 contextmd.Infof(ctx, log.Level(2), "%d batches left to store", len(batches)-i) 554 } 555 if len(batch) > 1 { 556 contextmd.Infof(ctx, log.Level(3), "Uploading batch of %d blobs", len(batch)) 557 bchMap := make(map[digest.Digest][]byte) 558 for _, dg := range batch { 559 ue := ueList[dg] 560 ch, err := chunker.New(ue, false, int(c.ChunkMaxSize)) 561 if err != nil { 562 return err 563 } 564 565 data, err := ch.FullData() 566 if err != nil { 567 return err 568 } 569 570 if dg.Size != int64(len(data)) { 571 return errors.Errorf("blob size changed while uploading, given:%d now:%d for %s", dg.Size, int64(len(data)), ue.Path) 572 } 573 574 bchMap[dg] = data 575 atomic.AddInt64(&totalBytesTransferred, int64(len(data))) 576 } 577 if err := c.BatchWriteBlobs(eCtx, bchMap); err != nil { 578 return err 579 } 580 } else { 581 contextmd.Infof(ctx, log.Level(3), "Uploading single blob with digest %s", batch[0]) 582 ue := ueList[batch[0]] 583 ch, err := chunker.New(ue, c.shouldCompressEntry(ue), int(c.ChunkMaxSize)) 584 if err != nil { 585 return err 586 } 587 rscName, err := c.writeRscName(ue) 588 if err != nil { 589 return err 590 } 591 written, err := c.writeChunked(eCtx, rscName, ch, false, 0) 592 if err != nil { 593 return fmt.Errorf("failed to upload %s: %w", ue.Path, err) 594 } 595 atomic.AddInt64(&totalBytesTransferred, written) 596 } 597 if eCtx.Err() != nil { 598 return eCtx.Err() 599 } 600 return nil 601 }) 602 } 603 604 contextmd.Infof(ctx, log.Level(2), "Waiting for remaining jobs") 605 err = eg.Wait() 606 contextmd.Infof(ctx, log.Level(2), "Done") 607 if err != nil { 608 contextmd.Infof(ctx, log.Level(2), "Upload error: %v", err) 609 } 610 611 return missing, totalBytesTransferred, err 612 } 613 614 func (c *Client) cancelPendingRequests(reqs []*uploadRequest) { 615 for _, req := range reqs { 616 c.casUploadRequests <- &uploadRequest{ 617 cancel: true, 618 ue: req.ue, 619 wait: req.wait, 620 } 621 } 622 } 623 624 func updateAndNotify(st *uploadState, bytesMoved int64, err error, missing bool) { 625 st.mu.Lock() 626 defer st.mu.Unlock() 627 st.err = err 628 for _, cl := range st.clients { 629 cl <- &uploadResponse{ 630 digest: st.ue.Digest, 631 bytesMoved: bytesMoved, 632 missing: missing, 633 err: err, 634 } 635 636 // We only report this data to the first client to prevent double accounting. 637 bytesMoved = 0 638 missing = false 639 } 640 st.clients = nil 641 st.ue = nil 642 }