github.com/bazelbuild/remote-apis-sdks@v0.0.0-20240425170053-8a36686a6350/go/pkg/cas/upload.go (about) 1 package cas 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "os" 8 "path/filepath" 9 "regexp" 10 "runtime/trace" 11 "sort" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 log "github.com/golang/glog" 18 "github.com/google/uuid" 19 "github.com/klauspost/compress/zstd" 20 "github.com/pkg/errors" 21 "golang.org/x/sync/errgroup" 22 "google.golang.org/api/support/bundler" 23 "google.golang.org/grpc/status" 24 "google.golang.org/protobuf/encoding/protowire" 25 "google.golang.org/protobuf/proto" 26 27 "github.com/bazelbuild/remote-apis-sdks/go/pkg/cache" 28 "github.com/bazelbuild/remote-apis-sdks/go/pkg/digest" 29 "github.com/bazelbuild/remote-apis-sdks/go/pkg/retry" 30 repb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" 31 bspb "google.golang.org/genproto/googleapis/bytestream" 32 ) 33 34 // zstdEncoders is a pool of ZStd encoders. 35 // Clients of this pool must call Close() on the encoder after using the 36 // encoder. 37 var zstdEncoders = sync.Pool{ 38 New: func() interface{} { 39 enc, _ := zstd.NewWriter(nil) 40 return enc 41 }, 42 } 43 44 // UploadInput specifies a file or directory to upload. 45 type UploadInput struct { 46 // Path to the file or a directory to upload. 47 // Must be absolute. 48 Path string 49 50 // Allowlist is a filter for files/directories under Path. 51 // If a file is not a present in Allowlist and does not reside in a directory 52 // present in the Allowlist, then the file is ignored. 53 // This is equivalent to deleting all not-matched files/dirs before 54 // uploading. 55 // 56 // Each path in the Allowlist must be relative to UploadInput.Path. 57 // 58 // Must be empty if Path points to a regular file. 59 Allowlist []string 60 61 // Exclude is a file/dir filter. If Exclude is not nil and the 62 // absolute path of a file/dir match this regexp, then the file/dir is skipped. 63 // Forward-slash-separated paths are matched against the regexp: PathExclude 64 // does not have to be conditional on the OS. 65 // If the Path is a directory, then the filter is evaluated against each file 66 // in the subtree. 67 // See ErrSkip comments for more details on semantics regarding excluding symlinks . 68 Exclude *regexp.Regexp 69 70 cleanPath string 71 cleanAllowlist []string 72 73 // pathInfo is result of Lstat(UploadInput.Path) 74 pathInfo os.FileInfo 75 76 // tree maps from a file/dir path to its digest and a directory node. 77 // The path is relative to UploadInput.Path. 78 // 79 // Once digests are computed successfully, guaranteed to have key ".". 80 // If allowlist is not empty, then also has a key for each clean allowlisted 81 // path, as well as each intermediate directory between the root and an 82 // allowlisted dir. 83 // 84 // The main purpose of this field is an UploadInput-local cache that couldn't 85 // be placed in uploader.fsCache because of UploadInput-specific parameters 86 // that are hard to incorporate into the cache key, namedly the allowlist. 87 tree map[string]*digested 88 digestsComputed chan struct{} 89 digestsComputedInit sync.Once 90 u *uploader 91 } 92 93 // Digest returns the digest computed for a file/dir. 94 // The relPath is relative to UploadInput.Path. Use "." for the digest of the 95 // UploadInput.Path itself. 96 // 97 // Digest is safe to call only after the channel returned by DigestsComputed() 98 // is closed. 99 // 100 // If the digest is unknown, returns (nil, err), where err is ErrDigestUnknown 101 // according to errors.Is. 102 // If the file is a danging symlink, then its digest is unknown. 103 func (in *UploadInput) Digest(relPath string) (digest.Digest, error) { 104 if in.cleanPath == "" { 105 return digest.Digest{}, errors.Errorf("Digest called too soon") 106 } 107 108 relPath = filepath.Clean(relPath) 109 110 // Check if this is the root or one of the intermediate nodes in the partial 111 // Merkle tee. 112 if dig, ok := in.tree[relPath]; ok { 113 return digest.NewFromProtoUnvalidated(dig.digest), nil 114 } 115 116 absPath := filepath.Join(in.cleanPath, relPath) 117 118 // TODO(nodir): cache this syscall, perhaps using filemetadata package. 119 info, err := os.Lstat(absPath) 120 if err != nil { 121 return digest.Digest{}, errors.WithStack(err) 122 } 123 124 key := makeFSCacheKey(absPath, info.Mode().IsRegular(), in.Exclude) 125 switch val, err, loaded := in.u.fsCache.Load(key); { 126 case !loaded: 127 return digest.Digest{}, errors.Wrapf(ErrDigestUnknown, "digest not found for %#v", absPath) 128 case err != nil: 129 return digest.Digest{}, errors.WithStack(err) 130 default: 131 return digest.NewFromProtoUnvalidated(val.(*digested).digest), nil 132 } 133 } 134 135 func (in *UploadInput) ensureDigestsComputedInited() chan struct{} { 136 in.digestsComputedInit.Do(func() { 137 in.digestsComputed = make(chan struct{}) 138 }) 139 return in.digestsComputed 140 } 141 142 // DigestsComputed returns a channel which is closed when all digests, including 143 // descendants, are computed. 144 // It is guaranteed to be closed by the time Client.Upload() returns successfully. 145 // 146 // DigestsComputed() is always safe to call. 147 func (in *UploadInput) DigestsComputed() <-chan struct{} { 148 return in.ensureDigestsComputedInited() 149 } 150 151 var oneDot = []string{"."} 152 153 // init initializes internal fields. 154 func (in *UploadInput) init(u *uploader) error { 155 in.u = u 156 157 if !filepath.IsAbs(in.Path) { 158 return errors.Errorf("%q is not absolute", in.Path) 159 } 160 in.cleanPath = filepath.Clean(in.Path) 161 162 // Do not use os.Stat() here. We want to know if it is a symlink. 163 var err error 164 if in.pathInfo, err = os.Lstat(in.cleanPath); err != nil { 165 return errors.WithStack(err) 166 } 167 168 // Process the allowlist. 169 in.tree = make(map[string]*digested, 1+len(in.Allowlist)) 170 switch { 171 case len(in.Allowlist) == 0: 172 in.cleanAllowlist = oneDot 173 174 case in.pathInfo.Mode().IsRegular(): 175 return errors.Errorf("the Allowlist is not supported for regular files") 176 177 default: 178 in.cleanAllowlist = make([]string, len(in.Allowlist)) 179 for i, subPath := range in.Allowlist { 180 if filepath.IsAbs(subPath) { 181 return errors.Errorf("the allowlisted path %q is not relative", subPath) 182 } 183 184 cleanSubPath := filepath.Clean(subPath) 185 if cleanSubPath == ".." || strings.HasPrefix(cleanSubPath, parentDirPrefix) { 186 return errors.Errorf("the allowlisted path %q is not contained by %q", subPath, in.Path) 187 } 188 in.cleanAllowlist[i] = cleanSubPath 189 } 190 } 191 return nil 192 } 193 194 // partialMerkleTree ensures that for each node in in.tree, not included by any 195 // other node, all its ancestors are also present in the tree. For example, if 196 // the tree contains only "foo/bar" and "foo/baz", then partialMerkleTree adds 197 // "foo" and ".". The latter is the root. 198 // 199 // All tree keys must be clean relative paths. 200 // Returns prepared *uploadItems that represent the ancestors that were added to 201 // the tree. 202 func (in *UploadInput) partialMerkleTree() (added []*uploadItem) { 203 // Establish parent->child edges. 204 children := map[string]map[string]struct{}{} 205 for relPath := range in.tree { 206 for relPath != "." { 207 parent := dirNameRelFast(relPath) 208 if childSet, ok := children[parent]; ok { 209 childSet[relPath] = struct{}{} 210 } else { 211 children[parent] = map[string]struct{}{relPath: {}} 212 } 213 relPath = parent 214 } 215 } 216 217 // Add the missing ancestors by traversing in post-order. 218 var dfs func(relPath string) proto.Message 219 dfs = func(relPath string) proto.Message { 220 if dig, ok := in.tree[relPath]; ok { 221 return dig.dirEntry 222 } 223 224 dir := &repb.Directory{} 225 for child := range children[relPath] { 226 addDirEntry(dir, dfs(child)) 227 } 228 229 // Prepare an uploadItem. 230 absPath := joinFilePathsFast(in.cleanPath, relPath) 231 item := uploadItemFromDirMsg(absPath, dir) // normalizes the dir 232 added = append(added, item) 233 234 // Compute a directory entry for the parent. 235 node := &repb.DirectoryNode{ 236 Name: filepath.Base(absPath), 237 Digest: item.Digest, 238 } 239 240 in.tree[relPath] = &digested{dirEntry: node, digest: item.Digest} 241 return node 242 } 243 dfs(".") 244 return added 245 } 246 247 // TransferStats is upload/download statistics. 248 type TransferStats struct { 249 CacheHits DigestStat 250 CacheMisses DigestStat 251 252 Streamed DigestStat // streamed transfers 253 Batched DigestStat // batched transfers 254 } 255 256 // DigestStat is aggregated statistics over a set of digests. 257 type DigestStat struct { 258 Digests int64 // number of unique digests 259 Bytes int64 // total sum of digest sizes 260 261 // TODO(nodir): add something like TransferBytes, i.e. how much was actually transferred 262 } 263 264 // UploadOptions is optional configuration for Upload function. 265 // The default options are the zero value of this struct. 266 type UploadOptions struct { 267 // PreserveSymlinks specifies whether to preserve symlinks or convert them 268 // to regular files. This doesn't upload target of symlinks, caller needs 269 // to specify targets explicitly if those are necessary too. 270 PreserveSymlinks bool 271 272 // AllowDanglingSymlinks specifies whether to upload dangling links or halt 273 // the upload with an error. 274 // 275 // This field is ignored if PreserveSymlinks is false, which is the default. 276 AllowDanglingSymlinks bool 277 278 // Prelude is called for each file/dir to be read and uploaded. 279 // If it returns an error which is ErrSkip according to errors.Is, then the 280 // file/dir is not processed. 281 // If it returns another error, then the upload is halted with that error. 282 // 283 // Prelude might be called multiple times for the same file if different 284 // UploadInputs directly/indirectly refer to the same file, but with different 285 // UploadInput.Exclude. 286 // 287 // Prelude is called from different goroutines. 288 Prelude func(absPath string, mode os.FileMode) error 289 } 290 291 // digested is a result of preprocessing a file/dir. 292 type digested struct { 293 dirEntry proto.Message // FileNode, DirectoryNode or SymlinkNode 294 digest *repb.Digest // may be nil, e.g. for dangling symlinks 295 } 296 297 var ( 298 // ErrSkip when returned by UploadOptions.Prelude, means the file/dir must be 299 // not be uploaded. 300 // 301 // Note that if UploadOptions.PreserveSymlinks is true and the ErrSkip is 302 // returned for a symlink target, but not the symlink itself, then it may 303 // result in a dangling symlink. 304 ErrSkip = errors.New("skip file") 305 306 // ErrDigestUnknown indicates that the requested digest is unknown. 307 // Use errors.Is instead of direct equality check. 308 ErrDigestUnknown = errors.New("the requested digest is unknown") 309 ) 310 311 // UploadResult is the result of a Client.Upload call. 312 // It provides file/dir digests and statistics. 313 type UploadResult struct { 314 Stats TransferStats 315 u *uploader 316 } 317 318 // Upload uploads all files/directories specified by inputC. 319 // 320 // Upload assumes ownership of UploadInputs received from inputC. 321 // They must not be mutated after sending. 322 // 323 // Close inputC to indicate that there are no more files/dirs to upload. 324 // When inputC is closed, Upload finishes uploading the remaining files/dirs and 325 // exits successfully. 326 // 327 // If ctx is canceled, the Upload returns with an error. 328 func (c *Client) Upload(ctx context.Context, opt UploadOptions, inputC <-chan *UploadInput) (*UploadResult, error) { 329 eg, ctx := errgroup.WithContext(ctx) 330 // Do not exit until all sub-goroutines exit, to prevent goroutine leaks. 331 defer eg.Wait() 332 333 u := &uploader{ 334 Client: c, 335 UploadOptions: opt, 336 eg: eg, 337 } 338 339 // Initialize checkBundler, which checks if a blob is present on the server. 340 var wgChecks sync.WaitGroup 341 u.checkBundler = bundler.NewBundler(&uploadItem{}, func(items interface{}) { 342 wgChecks.Add(1) 343 // Handle errors and context cancelation via errgroup. 344 eg.Go(func() error { 345 defer wgChecks.Done() 346 return u.check(ctx, items.([]*uploadItem)) 347 }) 348 }) 349 // Given that all digests are small (no more than 40 bytes), the count limit 350 // is the bottleneck. 351 // We might run into the request size limits only if we have >100K digests. 352 u.checkBundler.BundleCountThreshold = u.Config.FindMissingBlobs.MaxItems 353 354 // Initialize batchBundler, which uploads blobs in batches. 355 u.batchBundler = bundler.NewBundler(&repb.BatchUpdateBlobsRequest_Request{}, func(subReq interface{}) { 356 // Handle errors and context cancelation via errgroup. 357 eg.Go(func() error { 358 return u.uploadBatch(ctx, subReq.([]*repb.BatchUpdateBlobsRequest_Request)) 359 }) 360 }) 361 // Limit the sum of sub-request sizes to (maxRequestSize - requestOverhead). 362 // Subtract 1KB to be on the safe side. 363 u.batchBundler.BundleByteLimit = c.Config.BatchUpdateBlobs.MaxSizeBytes - int(marshalledFieldSize(int64(len(c.InstanceName)))) - 1000 364 u.batchBundler.BundleCountThreshold = c.Config.BatchUpdateBlobs.MaxItems 365 366 // Start processing path specs. 367 eg.Go(func() error { 368 // Before exiting this main goroutine, ensure all the work has been completed. 369 // Just waiting for u.eg isn't enough because some work may be temporarily 370 // in a bundler. 371 defer func() { 372 u.wgFS.Wait() 373 u.checkBundler.Flush() // only after FS walk is done. 374 wgChecks.Wait() // only after checkBundler is flushed 375 u.batchBundler.Flush() // only after wgChecks is done. 376 }() 377 378 for { 379 select { 380 case <-ctx.Done(): 381 return ctx.Err() 382 case in, ok := <-inputC: 383 if !ok { 384 return nil 385 } 386 log.Infof("start startProcessing %s", in.Path) 387 if err := u.startProcessing(ctx, in); err != nil { 388 return err 389 } 390 log.Infof("finish startProcessing %s", in.Path) 391 } 392 } 393 }) 394 395 return &UploadResult{Stats: u.stats, u: u}, errors.WithStack(eg.Wait()) 396 } 397 398 // uploader implements a concurrent multi-stage pipeline to read blobs from the 399 // file system, check their presence on the server and then upload if necessary. 400 // Common blobs are deduplicated. 401 // 402 // uploader.eg is used to schedule work, while concurrency of individual 403 // expensive operations is controlled via separate semaphores. 404 // 405 // Special care is taken for large files: they are read sequentially, opened 406 // only once per file, and read with large IO size. 407 // 408 // Note: uploader shouldn't store semaphores/locks that protect global 409 // resources, such as file system. They should be stored in the Client instead. 410 type uploader struct { 411 *Client 412 UploadOptions 413 eg *errgroup.Group 414 stats TransferStats 415 416 // wgFS is used to wait for all FS walking to finish. 417 wgFS sync.WaitGroup 418 419 // fsCache contains already-processed files. 420 // A key can be produced by makeFSCacheKey. 421 // The values are of type *digested. 422 fsCache cache.SingleFlight 423 424 // checkBundler bundles digests that need to be checked for presence on the 425 // server. 426 checkBundler *bundler.Bundler 427 seenDigests sync.Map // TODO: consider making it more global 428 429 // batchBundler bundles blobs that can be uploaded using UploadBlobs RPC. 430 batchBundler *bundler.Bundler 431 } 432 433 // startProcessing adds the item to the appropriate stage depending on its type. 434 func (u *uploader) startProcessing(ctx context.Context, in *UploadInput) error { 435 if !filepath.IsAbs(in.Path) { 436 return errors.Errorf("%q is not absolute", in.Path) 437 } 438 439 if err := in.init(u); err != nil { 440 return errors.WithStack(err) 441 } 442 443 // Schedule a file system walk. 444 u.wgFS.Add(1) 445 u.eg.Go(func() error { 446 defer u.wgFS.Done() 447 448 // Concurrently visit each allowlisted path, and use the results to 449 // construct a partial Merkle tree. Note that we are not visiting 450 // the entire in.cleanPath, which may be much larger than the union of the 451 // allowlisted paths. 452 log.Infof("start localEg %s", in.Path) 453 localEg, ctx := errgroup.WithContext(ctx) 454 var treeMu sync.Mutex 455 for _, relPath := range in.cleanAllowlist { 456 relPath := relPath 457 // Schedule a file system walk. 458 localEg.Go(func() error { 459 absPath := in.cleanPath 460 info := in.pathInfo 461 if relPath != "." { 462 absPath = joinFilePathsFast(in.cleanPath, relPath) 463 var err error 464 // TODO(nodir): cache this syscall too. 465 if info, err = os.Lstat(absPath); err != nil { 466 return errors.WithStack(err) 467 } 468 } 469 470 switch dig, err := u.visitPath(ctx, absPath, info, in.Exclude); { 471 case err != nil: 472 return errors.Wrapf(err, "%q", absPath) 473 case dig != nil: 474 treeMu.Lock() 475 in.tree[relPath] = dig 476 treeMu.Unlock() 477 } 478 return nil 479 }) 480 } 481 if err := localEg.Wait(); err != nil { 482 return errors.WithStack(err) 483 } 484 log.Infof("done localEg %s", in.Path) 485 // At this point, all allowlisted paths are digest'ed, and we only need to 486 // compute a partial Merkle tree and upload the implied ancestors. 487 for _, item := range in.partialMerkleTree() { 488 if err := u.scheduleCheck(ctx, item); err != nil { 489 return err 490 } 491 } 492 493 // The entire tree is digested. Notify the caller. 494 close(in.ensureDigestsComputedInited()) 495 return nil 496 }) 497 return nil 498 } 499 500 // makeFSCacheKey returns a key for u.fsCache. 501 func makeFSCacheKey(absPath string, isRegularFile bool, pathExclude *regexp.Regexp) interface{} { 502 // The structure of the cache key is incapsulated by this function. 503 type cacheKey struct { 504 AbsPath string 505 ExcludeRegexp string 506 } 507 508 key := cacheKey{ 509 AbsPath: absPath, 510 } 511 512 if isRegularFile { 513 // This is a regular file. 514 // Its digest depends only on the file path (assuming content didn't change), 515 // so the cache key is complete. Just return it. 516 return key 517 } 518 // This is a directory and/or a symlink, so the digest also depends on fs-walk 519 // settings. Incroporate those too. 520 521 if pathExclude != nil { 522 key.ExcludeRegexp = pathExclude.String() 523 } 524 return key 525 } 526 527 // visitPath visits the file/dir depending on its type (regular, dir, symlink). 528 // Visits each file only once. 529 // 530 // If the file should be skipped, then returns (nil, nil). 531 // The returned digested.digest may also be nil if the symlink is dangling. 532 func (u *uploader) visitPath(ctx context.Context, absPath string, info os.FileInfo, pathExclude *regexp.Regexp) (*digested, error) { 533 // First, check if the file passes all filters. 534 if pathExclude != nil && pathExclude.MatchString(filepath.ToSlash(absPath)) { 535 return nil, nil 536 } 537 // Call the Prelude only after checking the pathExclude. 538 if u.Prelude != nil { 539 switch err := u.Prelude(absPath, info.Mode()); { 540 case errors.Is(err, ErrSkip): 541 return nil, nil 542 case err != nil: 543 return nil, err 544 } 545 } 546 547 cacheKey := makeFSCacheKey(absPath, info.Mode().IsRegular(), pathExclude) 548 cached, err := u.fsCache.LoadOrStore(cacheKey, func() (interface{}, error) { 549 switch { 550 case info.Mode()&os.ModeSymlink == os.ModeSymlink: 551 return u.visitSymlink(ctx, absPath, pathExclude) 552 553 case info.Mode().IsDir(): 554 node, err := u.visitDir(ctx, absPath, pathExclude) 555 return &digested{dirEntry: node, digest: node.GetDigest()}, err 556 557 case info.Mode().IsRegular(): 558 // Note: makeFSCacheKey assumes that pathExclude is not used here. 559 node, err := u.visitRegularFile(ctx, absPath, info) 560 return &digested{dirEntry: node, digest: node.GetDigest()}, err 561 562 // Ignore all non-expected modes (e.g. domain sockets as used by git 563 // fsmonitor). 564 default: 565 return nil, nil 566 } 567 }) 568 if err != nil { 569 return nil, err 570 } 571 return cached.(*digested), nil 572 } 573 574 // visitRegularFile computes the hash of a regular file and schedules a presence 575 // check. 576 // 577 // It distinguishes three categories of file sizes: 578 // - small: small files are buffered in memory entirely, thus read only once. 579 // See also ClientConfig.SmallFileThreshold. 580 // - medium: the hash is computed, the file is closed and a presence check is 581 // scheduled. 582 // - large: the hash is computed, the file is rewinded without closing and 583 // streamed via ByteStream. 584 // If the file is already present on the server, the ByteStream preempts 585 // the stream with EOF and WriteResponse.CommittedSize == Digest.Size. 586 // Rewinding helps locality: there is no delay between reading the file for 587 // the first and the second times. 588 // Only one large file is processed at a time because most GCE disks are 589 // network disks. Reading many large files concurrently appears to saturate 590 // the network and slows down the progress. 591 // See also ClientConfig.LargeFileThreshold. 592 func (u *uploader) visitRegularFile(ctx context.Context, absPath string, info os.FileInfo) (*repb.FileNode, error) { 593 isLarge := info.Size() >= u.Config.LargeFileThreshold 594 595 // Lock the mutex before acquiring a semaphore to avoid hogging the latter. 596 if isLarge { 597 // Read only a few large files at a time. 598 if err := u.semLargeFile.Acquire(ctx, 1); err != nil { 599 return nil, errors.WithStack(err) 600 } 601 defer u.semLargeFile.Release(1) 602 } 603 604 if err := u.semFileIO.Acquire(ctx, 1); err != nil { 605 return nil, err 606 } 607 defer u.semFileIO.Release(1) 608 609 f, err := u.openFileSource(absPath) 610 if err != nil { 611 return nil, err 612 } 613 defer f.Close() 614 615 ret := &repb.FileNode{ 616 Name: info.Name(), 617 IsExecutable: (info.Mode() & 0100) != 0, 618 } 619 620 if info.Size() <= u.Config.SmallFileThreshold { 621 // This file is small enough to buffer it entirely. 622 contents, err := io.ReadAll(f) 623 if err != nil { 624 return nil, err 625 } 626 item := uploadItemFromBlob(absPath, contents) 627 ret.Digest = item.Digest 628 return ret, u.scheduleCheck(ctx, item) 629 } 630 631 // It is a medium or large file. 632 633 tctx, task := trace.NewTask(ctx, "medium or large file") 634 defer task.End() 635 trace.Log(tctx, "file", info.Name()) 636 637 // Compute the hash. 638 now := time.Now() 639 region := trace.StartRegion(tctx, "digest") 640 dig, err := digest.NewFromReader(f) 641 region.End() 642 if err != nil { 643 return nil, errors.Wrapf(err, "failed to compute hash") 644 } 645 log.Infof("compute digest %s: %s", info.Name(), time.Since(now)) 646 ret.Digest = dig.ToProto() 647 648 item := &uploadItem{ 649 Title: absPath, 650 Digest: ret.Digest, 651 } 652 653 if isLarge { 654 // Large files are special: locality is important - we want to re-read the 655 // file ASAP. 656 // Also we are not going to use BatchUploads anyway, so we can take 657 // advantage of ByteStream's built-in presence check. 658 // https://github.com/bazelbuild/remote-apis/blob/0cd22f7b466ced15d7803e8845d08d3e8d2c51bc/build/bazel/remote/execution/v2/remote_execution.proto#L250-L254 659 660 if res, err := u.findMissingBlobs(ctx, []*uploadItem{item}); err != nil { 661 return nil, errors.Wrapf(err, "failed to check existence") 662 } else if len(res.MissingBlobDigests) == 0 { 663 log.Infof("the file already exists. do not upload %s", absPath) 664 atomic.AddInt64(&u.stats.CacheHits.Digests, 1) 665 atomic.AddInt64(&u.stats.CacheHits.Bytes, ret.Digest.SizeBytes) 666 return ret, nil 667 } 668 669 item.Open = func() (uploadSource, error) { 670 return f, f.SeekStart(0) 671 } 672 return ret, u.stream(tctx, item, true) 673 } 674 675 // Schedule a check and close the file (in defer). 676 // item.Open will reopen the file. 677 678 item.Open = func() (uploadSource, error) { 679 return u.openFileSource(absPath) 680 } 681 return ret, u.scheduleCheck(ctx, item) 682 } 683 684 func (u *uploader) openFileSource(absPath string) (uploadSource, error) { 685 f, err := os.Open(absPath) 686 if err != nil { 687 return nil, err 688 } 689 return newFileSource(f, &u.fileBufReaders), nil 690 } 691 692 // visitDir reads a directory and its descendants. The function blocks until 693 // each descendant is visited, but the visitation happens concurrently, using 694 // u.eg. 695 func (u *uploader) visitDir(ctx context.Context, absPath string, pathExclude *regexp.Regexp) (*repb.DirectoryNode, error) { 696 var mu sync.Mutex 697 dir := &repb.Directory{} 698 var subErr error 699 var wgChildren sync.WaitGroup 700 701 // This sub-function exist to avoid holding the semaphore while waiting for 702 // children. 703 err := func() error { 704 if err := u.semFileIO.Acquire(ctx, 1); err != nil { 705 return err 706 } 707 defer u.semFileIO.Release(1) 708 709 f, err := os.Open(absPath) 710 if err != nil { 711 return err 712 } 713 defer f.Close() 714 715 // Check the context, since file IO functions don't. 716 for ctx.Err() == nil { 717 infos, err := f.Readdir(128) 718 if err == io.EOF { 719 break 720 } 721 if err != nil { 722 return err 723 } 724 725 for _, info := range infos { 726 info := info 727 absChild := joinFilePathsFast(absPath, info.Name()) 728 wgChildren.Add(1) 729 u.wgFS.Add(1) 730 u.eg.Go(func() error { 731 defer wgChildren.Done() 732 defer u.wgFS.Done() 733 digested, err := u.visitPath(ctx, absChild, info, pathExclude) 734 mu.Lock() 735 defer mu.Unlock() 736 737 switch { 738 case err != nil: 739 subErr = err 740 return err 741 case digested == nil: 742 // This file should be ignored. 743 return nil 744 } 745 746 addDirEntry(dir, digested.dirEntry) 747 return nil 748 }) 749 } 750 } 751 return nil 752 }() 753 if err != nil { 754 return nil, err 755 } 756 757 wgChildren.Wait() 758 if subErr != nil { 759 return nil, errors.Wrapf(subErr, "failed to read the directory %q entirely", absPath) 760 } 761 762 item := uploadItemFromDirMsg(absPath, dir) 763 if err := u.scheduleCheck(ctx, item); err != nil { 764 return nil, err 765 } 766 return &repb.DirectoryNode{ 767 Name: filepath.Base(absPath), 768 Digest: item.Digest, 769 }, nil 770 } 771 772 // visitSymlink converts a symlink to a directory node and schedules visitation 773 // of the target file. 774 // If u.PreserveSymlinks is true, then returns a SymlinkNode, otherwise 775 // returns the directory node of the target file. 776 // 777 // The returned digested.digest is nil if u.PreserveSymlinks is set. 778 func (u *uploader) visitSymlink(ctx context.Context, absPath string, pathExclude *regexp.Regexp) (*digested, error) { 779 target, err := os.Readlink(absPath) 780 if err != nil { 781 return nil, errors.Wrapf(err, "os.ReadLink") 782 } 783 784 // Determine absolute and relative paths of the target. 785 var absTarget, relTarget string 786 symlinkDir := filepath.Dir(absPath) 787 target = filepath.Clean(target) // target may end with slash 788 if filepath.IsAbs(target) { 789 absTarget = target 790 if relTarget, err = filepath.Rel(symlinkDir, absTarget); err != nil { 791 return nil, err 792 } 793 } else { 794 relTarget = target 795 // Note: we can't use joinFilePathsFast here because relTarget may start 796 // with "../". 797 absTarget = filepath.Join(symlinkDir, relTarget) 798 } 799 800 symlinkNode := &repb.SymlinkNode{ 801 Name: filepath.Base(absPath), 802 Target: filepath.ToSlash(relTarget), 803 } 804 805 if u.PreserveSymlinks && u.AllowDanglingSymlinks { 806 return &digested{dirEntry: symlinkNode}, nil 807 } 808 809 // Need to check symlink if AllowDanglingSymlinks is not set. 810 targetInfo, err := os.Lstat(absTarget) 811 if err != nil { 812 return nil, errors.Wrapf(err, "lstat to target of symlink (%s -> %s) has error", absPath, relTarget) 813 } 814 815 // TODO: detect cycles by symlink if needs to follow symlinks in this case. 816 if u.PreserveSymlinks { 817 return &digested{dirEntry: symlinkNode}, nil 818 } 819 820 return u.visitPath(ctx, absTarget, targetInfo, pathExclude) 821 } 822 823 // uploadItem is a blob to potentially upload. 824 type uploadItem struct { 825 Title string 826 Digest *repb.Digest 827 Open func() (uploadSource, error) 828 } 829 830 func (item *uploadItem) ReadAll() ([]byte, error) { 831 r, err := item.Open() 832 if err != nil { 833 return nil, err 834 } 835 defer r.Close() 836 return io.ReadAll(r) 837 } 838 839 // scheduleCheck schedules a blob presence check on the server. If it fails, 840 // then the blob is uploaded. 841 func (u *uploader) scheduleCheck(ctx context.Context, item *uploadItem) error { 842 if u.testScheduleCheck != nil { 843 return u.testScheduleCheck(ctx, item) 844 } 845 846 // Do not check the same digest twice. 847 cacheKey := digest.NewFromProtoUnvalidated(item.Digest) 848 if _, ok := u.seenDigests.LoadOrStore(cacheKey, struct{}{}); ok { 849 return nil 850 } 851 return u.checkBundler.AddWait(ctx, item, 0) 852 } 853 854 func (u *uploader) findMissingBlobs(ctx context.Context, items []*uploadItem) (res *repb.FindMissingBlobsResponse, err error) { 855 if err := u.semFindMissingBlobs.Acquire(ctx, 1); err != nil { 856 return nil, errors.WithStack(err) 857 } 858 defer u.semFindMissingBlobs.Release(1) 859 860 req := &repb.FindMissingBlobsRequest{ 861 InstanceName: u.InstanceName, 862 BlobDigests: make([]*repb.Digest, len(items)), 863 } 864 865 for i, item := range items { 866 req.BlobDigests[i] = item.Digest 867 } 868 869 err = u.unaryRPC(ctx, &u.Config.FindMissingBlobs, func(ctx context.Context) (err error) { 870 res, err = u.cas.FindMissingBlobs(ctx, req) 871 return 872 }) 873 return res, err 874 } 875 876 // check checks which items are present on the server, and schedules upload for 877 // the missing ones. 878 func (u *uploader) check(ctx context.Context, items []*uploadItem) error { 879 res, err := u.findMissingBlobs(ctx, items) 880 if err != nil { 881 return err 882 } 883 byDigest := make(map[digest.Digest]*uploadItem, len(items)) 884 totalBytes := int64(0) 885 for _, item := range items { 886 byDigest[digest.NewFromProtoUnvalidated(item.Digest)] = item 887 totalBytes += item.Digest.SizeBytes 888 } 889 890 missingBytes := int64(0) 891 for _, d := range res.MissingBlobDigests { 892 missingBytes += d.SizeBytes 893 item := byDigest[digest.NewFromProtoUnvalidated(d)] 894 if err := u.scheduleUpload(ctx, item); err != nil { 895 return errors.Wrapf(err, "%q", item.Title) 896 } 897 } 898 atomic.AddInt64(&u.stats.CacheMisses.Digests, int64(len(res.MissingBlobDigests))) 899 atomic.AddInt64(&u.stats.CacheMisses.Bytes, missingBytes) 900 atomic.AddInt64(&u.stats.CacheHits.Digests, int64(len(items)-len(res.MissingBlobDigests))) 901 atomic.AddInt64(&u.stats.CacheHits.Bytes, totalBytes-missingBytes) 902 return nil 903 } 904 905 func (u *uploader) scheduleUpload(ctx context.Context, item *uploadItem) error { 906 // Check if this blob can be uploaded in a batch. 907 if marshalledRequestSize(item.Digest) > int64(u.batchBundler.BundleByteLimit) { 908 // There is no way this blob can fit in a batch request. 909 u.eg.Go(func() error { 910 return errors.Wrap(u.stream(ctx, item, false), item.Title) 911 }) 912 return nil 913 } 914 915 // Since this blob is small enough, just read it entirely. 916 contents, err := item.ReadAll() 917 if err != nil { 918 return errors.Wrapf(err, "failed to read the item") 919 } 920 req := &repb.BatchUpdateBlobsRequest_Request{Digest: item.Digest, Data: contents} 921 return u.batchBundler.AddWait(ctx, req, proto.Size(req)) 922 } 923 924 // uploadBatch uploads blobs in using BatchUpdateBlobs RPC. 925 func (u *uploader) uploadBatch(ctx context.Context, reqs []*repb.BatchUpdateBlobsRequest_Request) error { 926 if err := u.semBatchUpdateBlobs.Acquire(ctx, 1); err != nil { 927 return err 928 } 929 defer u.semBatchUpdateBlobs.Release(1) 930 931 reqMap := make(map[digest.Digest]*repb.BatchUpdateBlobsRequest_Request, len(reqs)) 932 for _, r := range reqs { 933 reqMap[digest.NewFromProtoUnvalidated(r.Digest)] = r 934 } 935 936 req := &repb.BatchUpdateBlobsRequest{ 937 InstanceName: u.InstanceName, 938 Requests: reqs, 939 } 940 return u.unaryRPC(ctx, &u.Config.BatchUpdateBlobs, func(ctx context.Context) error { 941 res, err := u.cas.BatchUpdateBlobs(ctx, req) 942 if err != nil { 943 return err 944 } 945 946 bytesTransferred := int64(0) 947 digestsTransferred := int64(0) 948 var retriableErr error 949 req.Requests = req.Requests[:0] // reset for the next attempt 950 for _, r := range res.Responses { 951 if err := status.FromProto(r.Status).Err(); err != nil { 952 if !retry.TransientOnly(err) { 953 return err 954 } 955 // This error is retriable. Save it to return later, and 956 // save the failed sub-request for the next attempt. 957 retriableErr = err 958 req.Requests = append(req.Requests, reqMap[digest.NewFromProtoUnvalidated(r.Digest)]) 959 continue 960 } 961 bytesTransferred += r.Digest.SizeBytes 962 digestsTransferred++ 963 } 964 atomic.AddInt64(&u.stats.Batched.Bytes, bytesTransferred) 965 atomic.AddInt64(&u.stats.Batched.Digests, digestsTransferred) 966 return retriableErr 967 }) 968 } 969 970 // stream uploads the item using ByteStream service. 971 // 972 // If the blob is already uploaded, then the function returns quickly and 973 // without an error. 974 func (u *uploader) stream(ctx context.Context, item *uploadItem, updateCacheStats bool) error { 975 if err := u.semByteStreamWrite.Acquire(ctx, 1); err != nil { 976 return err 977 } 978 defer u.semByteStreamWrite.Release(1) 979 980 ctx, task := trace.NewTask(ctx, "uploader.stream") 981 defer task.End() 982 983 log.Infof("start stream upload %s, size %d", item.Title, item.Digest.SizeBytes) 984 now := time.Now() 985 defer func() { 986 log.Infof("finish stream upload %s, size %d: %s", item.Title, item.Digest.SizeBytes, time.Since(now)) 987 }() 988 989 // Open the item. 990 r, err := item.Open() 991 if err != nil { 992 return err 993 } 994 defer r.Close() 995 996 rewind := false 997 return u.withRetries(ctx, func(ctx context.Context) error { 998 // TODO(nodir): add support for resumable uploads. 999 1000 // Do not rewind if this is the first attempt. 1001 if rewind { 1002 if err := r.SeekStart(0); err != nil { 1003 return err 1004 } 1005 } 1006 rewind = true 1007 1008 if u.Config.CompressedBytestreamThreshold < 0 || item.Digest.SizeBytes < u.Config.CompressedBytestreamThreshold { 1009 // No compression. 1010 return u.streamFromReader(ctx, r, item.Digest, false, updateCacheStats) 1011 } 1012 1013 // Compress using an in-memory pipe. This is mostly to accommodate the fact 1014 // that zstd package expects a writer. 1015 // Note that using io.Pipe() means we buffer only bytes that were not uploaded yet. 1016 pr, pw := io.Pipe() 1017 1018 enc := zstdEncoders.Get().(*zstd.Encoder) 1019 defer func() { 1020 enc.Close() 1021 zstdEncoders.Put(enc) 1022 }() 1023 enc.Reset(pw) 1024 1025 // Read from disk and make RPCs concurrently. 1026 eg, ctx := errgroup.WithContext(ctx) 1027 eg.Go(func() error { 1028 switch _, err := enc.ReadFrom(r); { 1029 case err == io.ErrClosedPipe: 1030 // The other goroutine exited before we finished encoding. 1031 // Might be a cache hit or context cancelation. 1032 // In any case, the other goroutine has the actual error, so return nil 1033 // here. 1034 return nil 1035 case err != nil: 1036 return errors.Wrapf(err, "failed to read the file/blob") 1037 } 1038 1039 if err := enc.Close(); err != nil { 1040 return errors.Wrapf(err, "failed to close the zstd encoder") 1041 } 1042 return pw.Close() 1043 }) 1044 eg.Go(func() error { 1045 defer pr.Close() 1046 return u.streamFromReader(ctx, pr, item.Digest, true, updateCacheStats) 1047 }) 1048 return eg.Wait() 1049 }) 1050 } 1051 1052 func (u *uploader) streamFromReader(ctx context.Context, r io.Reader, digest *repb.Digest, compressed, updateCacheStats bool) (rerr error) { 1053 ctx, cancel, withTimeout := withPerCallTimeout(ctx, u.Config.ByteStreamWrite.Timeout) 1054 defer cancel() 1055 1056 stream, err := u.byteStream.Write(ctx) 1057 if err != nil { 1058 return err 1059 } 1060 defer func() { 1061 if _, err := stream.CloseAndRecv(); rerr == nil && err != io.EOF { 1062 rerr = err 1063 } 1064 }() 1065 1066 req := &bspb.WriteRequest{} 1067 instanceSegment := u.InstanceName + "/" 1068 if instanceSegment == "/" { 1069 instanceSegment = "" 1070 } 1071 uploadID, err := uuid.NewRandom() 1072 if err != nil { 1073 return err 1074 } 1075 if compressed { 1076 req.ResourceName = fmt.Sprintf("%suploads/%s/compressed-blobs/zstd/%s/%d", instanceSegment, uploadID.String(), digest.Hash, digest.SizeBytes) 1077 } else { 1078 req.ResourceName = fmt.Sprintf("%suploads/%s/blobs/%s/%d", instanceSegment, uploadID.String(), digest.Hash, digest.SizeBytes) 1079 } 1080 1081 buf := u.streamBufs.Get().(*[]byte) 1082 defer u.streamBufs.Put(buf) 1083 1084 chunkLoop: 1085 for { 1086 // Before reading, check if the context if canceled. 1087 if ctx.Err() != nil { 1088 return ctx.Err() 1089 } 1090 1091 // Read the next chunk from the pipe. 1092 // Use ReadFull to ensure we aren't sending tiny blobs over RPC. 1093 region := trace.StartRegion(ctx, "ReadFull in streamFromReader") 1094 n, err := io.ReadFull(r, *buf) 1095 region.End() 1096 switch { 1097 case err == io.EOF || err == io.ErrUnexpectedEOF: 1098 req.FinishWrite = true 1099 case err != nil: 1100 return err 1101 } 1102 req.Data = (*buf)[:n] // must limit by `:n` in ErrUnexpectedEOF case 1103 1104 // Send the chunk. 1105 withTimeout(func() { 1106 trace.WithRegion(ctx, "stream.Send", func() { 1107 err = stream.Send(req) 1108 }) 1109 }) 1110 switch { 1111 case err == io.EOF: 1112 // The server closed the stream. 1113 // Most likely the file is already uploaded, see the CommittedSize check below. 1114 break chunkLoop 1115 case err != nil: 1116 return err 1117 case req.FinishWrite: 1118 break chunkLoop 1119 } 1120 1121 // Prepare the next request. 1122 req.ResourceName = "" // send the resource name only in the first request 1123 req.WriteOffset += int64(len(req.Data)) 1124 } 1125 1126 // Finalize the request. 1127 switch res, err := stream.CloseAndRecv(); { 1128 case err != nil: 1129 return err 1130 case res.CommittedSize != digest.SizeBytes: 1131 return fmt.Errorf("unexpected commitSize: got %d, want %d", res.CommittedSize, digest.SizeBytes) 1132 } 1133 1134 // Update stats. 1135 cacheHit := !req.FinishWrite 1136 if !cacheHit { 1137 atomic.AddInt64(&u.stats.Streamed.Bytes, digest.SizeBytes) 1138 atomic.AddInt64(&u.stats.Streamed.Digests, 1) 1139 } 1140 if updateCacheStats { 1141 st := &u.stats.CacheMisses 1142 if cacheHit { 1143 st = &u.stats.CacheHits 1144 } 1145 atomic.AddInt64(&st.Bytes, digest.SizeBytes) 1146 atomic.AddInt64(&st.Digests, 1) 1147 } 1148 return nil 1149 } 1150 1151 // uploadItemFromDirMsg creates an upload item for a directory. 1152 // Sorts directory entries. 1153 func uploadItemFromDirMsg(title string, dir *repb.Directory) *uploadItem { 1154 // Normalize the dir before marshaling, for determinism. 1155 sort.Slice(dir.Files, func(i, j int) bool { 1156 return dir.Files[i].Name < dir.Files[j].Name 1157 }) 1158 sort.Slice(dir.Directories, func(i, j int) bool { 1159 return dir.Directories[i].Name < dir.Directories[j].Name 1160 }) 1161 sort.Slice(dir.Symlinks, func(i, j int) bool { 1162 return dir.Symlinks[i].Name < dir.Symlinks[j].Name 1163 }) 1164 1165 blob, err := proto.Marshal(dir) 1166 if err != nil { 1167 panic(err) // impossible 1168 } 1169 return uploadItemFromBlob(title, blob) 1170 } 1171 1172 func uploadItemFromBlob(title string, blob []byte) *uploadItem { 1173 item := &uploadItem{ 1174 Title: title, 1175 Digest: digest.NewFromBlob(blob).ToProto(), 1176 Open: func() (uploadSource, error) { 1177 return newByteSliceSource(blob), nil 1178 }, 1179 } 1180 if item.Title == "" { 1181 item.Title = fmt.Sprintf("digest %s/%d", item.Digest.Hash, item.Digest.SizeBytes) 1182 } 1183 return item 1184 } 1185 1186 const ( 1187 pathSep = string(filepath.Separator) 1188 parentDirPrefix = ".." + pathSep 1189 ) 1190 1191 // joinFilePathsFast is a faster version of filepath.Join because it does not 1192 // call filepath.Clean. Assumes arguments are clean according to filepath.Clean specs. 1193 func joinFilePathsFast(a, b string) string { 1194 if b == "." { 1195 return a 1196 } 1197 if strings.HasSuffix(a, pathSep) { 1198 // May happen if a is the root. 1199 return a + b 1200 } 1201 return a + pathSep + b 1202 } 1203 1204 // dirNameRelFast is a faster version of filepath.Dir because it does not call 1205 // filepath.Clean. Assumes the argument is clean and relative. 1206 // Does not work for absolute paths. 1207 func dirNameRelFast(relPath string) string { 1208 i := strings.LastIndex(relPath, pathSep) 1209 if i < 0 { 1210 return "." 1211 } 1212 return relPath[:i] 1213 } 1214 1215 func marshalledFieldSize(size int64) int64 { 1216 return 1 + int64(protowire.SizeVarint(uint64(size))) + size 1217 } 1218 1219 func marshalledRequestSize(d *repb.Digest) int64 { 1220 // An additional BatchUpdateBlobsRequest_Request includes the Digest and data fields, 1221 // as well as the message itself. Every field has a 1-byte size tag, followed by 1222 // the varint field size for variable-sized fields (digest hash and data). 1223 // Note that the BatchReadBlobsResponse_Response field is similar, but includes 1224 // and additional Status proto which can theoretically be unlimited in size. 1225 // We do not account for it here, relying on the Client setting a large (100MB) 1226 // limit for incoming messages. 1227 digestSize := marshalledFieldSize(int64(len(d.Hash))) 1228 if d.SizeBytes > 0 { 1229 digestSize += 1 + int64(protowire.SizeVarint(uint64(d.SizeBytes))) 1230 } 1231 reqSize := marshalledFieldSize(digestSize) 1232 if d.SizeBytes > 0 { 1233 reqSize += marshalledFieldSize(int64(d.SizeBytes)) 1234 } 1235 return marshalledFieldSize(reqSize) 1236 } 1237 1238 func addDirEntry(dir *repb.Directory, node proto.Message) { 1239 switch node := node.(type) { 1240 case *repb.FileNode: 1241 dir.Files = append(dir.Files, node) 1242 case *repb.DirectoryNode: 1243 dir.Directories = append(dir.Directories, node) 1244 case *repb.SymlinkNode: 1245 dir.Symlinks = append(dir.Symlinks, node) 1246 default: 1247 // This condition is impossible because all functions in this file 1248 // return one of the three types above. 1249 panic(fmt.Sprintf("unexpected node type %T", node)) 1250 } 1251 }