github.com/bazelbuild/remote-apis-sdks@v0.0.0-20240425170053-8a36686a6350/go/pkg/cas/upload.go (about)

     1  package cas
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"path/filepath"
     9  	"regexp"
    10  	"runtime/trace"
    11  	"sort"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	log "github.com/golang/glog"
    18  	"github.com/google/uuid"
    19  	"github.com/klauspost/compress/zstd"
    20  	"github.com/pkg/errors"
    21  	"golang.org/x/sync/errgroup"
    22  	"google.golang.org/api/support/bundler"
    23  	"google.golang.org/grpc/status"
    24  	"google.golang.org/protobuf/encoding/protowire"
    25  	"google.golang.org/protobuf/proto"
    26  
    27  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/cache"
    28  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/digest"
    29  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/retry"
    30  	repb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2"
    31  	bspb "google.golang.org/genproto/googleapis/bytestream"
    32  )
    33  
    34  // zstdEncoders is a pool of ZStd encoders.
    35  // Clients of this pool must call Close() on the encoder after using the
    36  // encoder.
    37  var zstdEncoders = sync.Pool{
    38  	New: func() interface{} {
    39  		enc, _ := zstd.NewWriter(nil)
    40  		return enc
    41  	},
    42  }
    43  
    44  // UploadInput specifies a file or directory to upload.
    45  type UploadInput struct {
    46  	// Path to the file or a directory to upload.
    47  	// Must be absolute.
    48  	Path string
    49  
    50  	// Allowlist is a filter for files/directories under Path.
    51  	// If a file is not a present in Allowlist and does not reside in a directory
    52  	// present in the Allowlist, then the file is ignored.
    53  	// This is equivalent to deleting all not-matched files/dirs before
    54  	// uploading.
    55  	//
    56  	// Each path in the Allowlist must be relative to UploadInput.Path.
    57  	//
    58  	// Must be empty if Path points to a regular file.
    59  	Allowlist []string
    60  
    61  	// Exclude is a file/dir filter. If Exclude is not nil and the
    62  	// absolute path of a file/dir match this regexp, then the file/dir is skipped.
    63  	// Forward-slash-separated paths are matched against the regexp: PathExclude
    64  	// does not have to be conditional on the OS.
    65  	// If the Path is a directory, then the filter is evaluated against each file
    66  	// in the subtree.
    67  	// See ErrSkip comments for more details on semantics regarding excluding symlinks .
    68  	Exclude *regexp.Regexp
    69  
    70  	cleanPath      string
    71  	cleanAllowlist []string
    72  
    73  	// pathInfo is result of Lstat(UploadInput.Path)
    74  	pathInfo os.FileInfo
    75  
    76  	// tree maps from a file/dir path to its digest and a directory node.
    77  	// The path is relative to UploadInput.Path.
    78  	//
    79  	// Once digests are computed successfully, guaranteed to have key ".".
    80  	// If allowlist is not empty, then also has a key for each clean allowlisted
    81  	// path, as well as each intermediate directory between the root and an
    82  	// allowlisted dir.
    83  	//
    84  	// The main purpose of this field is an UploadInput-local cache that couldn't
    85  	// be placed in uploader.fsCache because of UploadInput-specific parameters
    86  	// that are hard to incorporate into the cache key, namedly the allowlist.
    87  	tree                map[string]*digested
    88  	digestsComputed     chan struct{}
    89  	digestsComputedInit sync.Once
    90  	u                   *uploader
    91  }
    92  
    93  // Digest returns the digest computed for a file/dir.
    94  // The relPath is relative to UploadInput.Path. Use "." for the digest of the
    95  // UploadInput.Path itself.
    96  //
    97  // Digest is safe to call only after the channel returned by DigestsComputed()
    98  // is closed.
    99  //
   100  // If the digest is unknown, returns (nil, err), where err is ErrDigestUnknown
   101  // according to errors.Is.
   102  // If the file is a danging symlink, then its digest is unknown.
   103  func (in *UploadInput) Digest(relPath string) (digest.Digest, error) {
   104  	if in.cleanPath == "" {
   105  		return digest.Digest{}, errors.Errorf("Digest called too soon")
   106  	}
   107  
   108  	relPath = filepath.Clean(relPath)
   109  
   110  	// Check if this is the root or one of the intermediate nodes in the partial
   111  	// Merkle tee.
   112  	if dig, ok := in.tree[relPath]; ok {
   113  		return digest.NewFromProtoUnvalidated(dig.digest), nil
   114  	}
   115  
   116  	absPath := filepath.Join(in.cleanPath, relPath)
   117  
   118  	// TODO(nodir): cache this syscall, perhaps using filemetadata package.
   119  	info, err := os.Lstat(absPath)
   120  	if err != nil {
   121  		return digest.Digest{}, errors.WithStack(err)
   122  	}
   123  
   124  	key := makeFSCacheKey(absPath, info.Mode().IsRegular(), in.Exclude)
   125  	switch val, err, loaded := in.u.fsCache.Load(key); {
   126  	case !loaded:
   127  		return digest.Digest{}, errors.Wrapf(ErrDigestUnknown, "digest not found for %#v", absPath)
   128  	case err != nil:
   129  		return digest.Digest{}, errors.WithStack(err)
   130  	default:
   131  		return digest.NewFromProtoUnvalidated(val.(*digested).digest), nil
   132  	}
   133  }
   134  
   135  func (in *UploadInput) ensureDigestsComputedInited() chan struct{} {
   136  	in.digestsComputedInit.Do(func() {
   137  		in.digestsComputed = make(chan struct{})
   138  	})
   139  	return in.digestsComputed
   140  }
   141  
   142  // DigestsComputed returns a channel which is closed when all digests, including
   143  // descendants, are computed.
   144  // It is guaranteed to be closed by the time Client.Upload() returns successfully.
   145  //
   146  // DigestsComputed() is always safe to call.
   147  func (in *UploadInput) DigestsComputed() <-chan struct{} {
   148  	return in.ensureDigestsComputedInited()
   149  }
   150  
   151  var oneDot = []string{"."}
   152  
   153  // init initializes internal fields.
   154  func (in *UploadInput) init(u *uploader) error {
   155  	in.u = u
   156  
   157  	if !filepath.IsAbs(in.Path) {
   158  		return errors.Errorf("%q is not absolute", in.Path)
   159  	}
   160  	in.cleanPath = filepath.Clean(in.Path)
   161  
   162  	// Do not use os.Stat() here. We want to know if it is a symlink.
   163  	var err error
   164  	if in.pathInfo, err = os.Lstat(in.cleanPath); err != nil {
   165  		return errors.WithStack(err)
   166  	}
   167  
   168  	// Process the allowlist.
   169  	in.tree = make(map[string]*digested, 1+len(in.Allowlist))
   170  	switch {
   171  	case len(in.Allowlist) == 0:
   172  		in.cleanAllowlist = oneDot
   173  
   174  	case in.pathInfo.Mode().IsRegular():
   175  		return errors.Errorf("the Allowlist is not supported for regular files")
   176  
   177  	default:
   178  		in.cleanAllowlist = make([]string, len(in.Allowlist))
   179  		for i, subPath := range in.Allowlist {
   180  			if filepath.IsAbs(subPath) {
   181  				return errors.Errorf("the allowlisted path %q is not relative", subPath)
   182  			}
   183  
   184  			cleanSubPath := filepath.Clean(subPath)
   185  			if cleanSubPath == ".." || strings.HasPrefix(cleanSubPath, parentDirPrefix) {
   186  				return errors.Errorf("the allowlisted path %q is not contained by %q", subPath, in.Path)
   187  			}
   188  			in.cleanAllowlist[i] = cleanSubPath
   189  		}
   190  	}
   191  	return nil
   192  }
   193  
   194  // partialMerkleTree ensures that for each node in in.tree, not included by any
   195  // other node, all its ancestors are also present in the tree. For example, if
   196  // the tree contains only "foo/bar" and "foo/baz", then partialMerkleTree adds
   197  // "foo" and ".". The latter is the root.
   198  //
   199  // All tree keys must be clean relative paths.
   200  // Returns prepared *uploadItems that represent the ancestors that were added to
   201  // the tree.
   202  func (in *UploadInput) partialMerkleTree() (added []*uploadItem) {
   203  	// Establish parent->child edges.
   204  	children := map[string]map[string]struct{}{}
   205  	for relPath := range in.tree {
   206  		for relPath != "." {
   207  			parent := dirNameRelFast(relPath)
   208  			if childSet, ok := children[parent]; ok {
   209  				childSet[relPath] = struct{}{}
   210  			} else {
   211  				children[parent] = map[string]struct{}{relPath: {}}
   212  			}
   213  			relPath = parent
   214  		}
   215  	}
   216  
   217  	// Add the missing ancestors by traversing in post-order.
   218  	var dfs func(relPath string) proto.Message
   219  	dfs = func(relPath string) proto.Message {
   220  		if dig, ok := in.tree[relPath]; ok {
   221  			return dig.dirEntry
   222  		}
   223  
   224  		dir := &repb.Directory{}
   225  		for child := range children[relPath] {
   226  			addDirEntry(dir, dfs(child))
   227  		}
   228  
   229  		// Prepare an uploadItem.
   230  		absPath := joinFilePathsFast(in.cleanPath, relPath)
   231  		item := uploadItemFromDirMsg(absPath, dir) // normalizes the dir
   232  		added = append(added, item)
   233  
   234  		// Compute a directory entry for the parent.
   235  		node := &repb.DirectoryNode{
   236  			Name:   filepath.Base(absPath),
   237  			Digest: item.Digest,
   238  		}
   239  
   240  		in.tree[relPath] = &digested{dirEntry: node, digest: item.Digest}
   241  		return node
   242  	}
   243  	dfs(".")
   244  	return added
   245  }
   246  
   247  // TransferStats is upload/download statistics.
   248  type TransferStats struct {
   249  	CacheHits   DigestStat
   250  	CacheMisses DigestStat
   251  
   252  	Streamed DigestStat // streamed transfers
   253  	Batched  DigestStat // batched transfers
   254  }
   255  
   256  // DigestStat is aggregated statistics over a set of digests.
   257  type DigestStat struct {
   258  	Digests int64 // number of unique digests
   259  	Bytes   int64 // total sum of digest sizes
   260  
   261  	// TODO(nodir): add something like TransferBytes, i.e. how much was actually transferred
   262  }
   263  
   264  // UploadOptions is optional configuration for Upload function.
   265  // The default options are the zero value of this struct.
   266  type UploadOptions struct {
   267  	// PreserveSymlinks specifies whether to preserve symlinks or convert them
   268  	// to regular files. This doesn't upload target of symlinks, caller needs
   269  	// to specify targets explicitly if those are necessary too.
   270  	PreserveSymlinks bool
   271  
   272  	// AllowDanglingSymlinks specifies whether to upload dangling links or halt
   273  	// the upload with an error.
   274  	//
   275  	// This field is ignored if PreserveSymlinks is false, which is the default.
   276  	AllowDanglingSymlinks bool
   277  
   278  	// Prelude is called for each file/dir to be read and uploaded.
   279  	// If it returns an error which is ErrSkip according to errors.Is, then the
   280  	// file/dir is not processed.
   281  	// If it returns another error, then the upload is halted with that error.
   282  	//
   283  	// Prelude might be called multiple times for the same file if different
   284  	// UploadInputs directly/indirectly refer to the same file, but with different
   285  	// UploadInput.Exclude.
   286  	//
   287  	// Prelude is called from different goroutines.
   288  	Prelude func(absPath string, mode os.FileMode) error
   289  }
   290  
   291  // digested is a result of preprocessing a file/dir.
   292  type digested struct {
   293  	dirEntry proto.Message // FileNode, DirectoryNode or SymlinkNode
   294  	digest   *repb.Digest  // may be nil, e.g. for dangling symlinks
   295  }
   296  
   297  var (
   298  	// ErrSkip when returned by UploadOptions.Prelude, means the file/dir must be
   299  	// not be uploaded.
   300  	//
   301  	// Note that if UploadOptions.PreserveSymlinks is true and the ErrSkip is
   302  	// returned for a symlink target, but not the symlink itself, then it may
   303  	// result in a dangling symlink.
   304  	ErrSkip = errors.New("skip file")
   305  
   306  	// ErrDigestUnknown indicates that the requested digest is unknown.
   307  	// Use errors.Is instead of direct equality check.
   308  	ErrDigestUnknown = errors.New("the requested digest is unknown")
   309  )
   310  
   311  // UploadResult is the result of a Client.Upload call.
   312  // It provides file/dir digests and statistics.
   313  type UploadResult struct {
   314  	Stats TransferStats
   315  	u     *uploader
   316  }
   317  
   318  // Upload uploads all files/directories specified by inputC.
   319  //
   320  // Upload assumes ownership of UploadInputs received from inputC.
   321  // They must not be mutated after sending.
   322  //
   323  // Close inputC to indicate that there are no more files/dirs to upload.
   324  // When inputC is closed, Upload finishes uploading the remaining files/dirs and
   325  // exits successfully.
   326  //
   327  // If ctx is canceled, the Upload returns with an error.
   328  func (c *Client) Upload(ctx context.Context, opt UploadOptions, inputC <-chan *UploadInput) (*UploadResult, error) {
   329  	eg, ctx := errgroup.WithContext(ctx)
   330  	// Do not exit until all sub-goroutines exit, to prevent goroutine leaks.
   331  	defer eg.Wait()
   332  
   333  	u := &uploader{
   334  		Client:        c,
   335  		UploadOptions: opt,
   336  		eg:            eg,
   337  	}
   338  
   339  	// Initialize checkBundler, which checks if a blob is present on the server.
   340  	var wgChecks sync.WaitGroup
   341  	u.checkBundler = bundler.NewBundler(&uploadItem{}, func(items interface{}) {
   342  		wgChecks.Add(1)
   343  		// Handle errors and context cancelation via errgroup.
   344  		eg.Go(func() error {
   345  			defer wgChecks.Done()
   346  			return u.check(ctx, items.([]*uploadItem))
   347  		})
   348  	})
   349  	// Given that all digests are small (no more than 40 bytes), the count limit
   350  	// is the bottleneck.
   351  	// We might run into the request size limits only if we have >100K digests.
   352  	u.checkBundler.BundleCountThreshold = u.Config.FindMissingBlobs.MaxItems
   353  
   354  	// Initialize batchBundler, which uploads blobs in batches.
   355  	u.batchBundler = bundler.NewBundler(&repb.BatchUpdateBlobsRequest_Request{}, func(subReq interface{}) {
   356  		// Handle errors and context cancelation via errgroup.
   357  		eg.Go(func() error {
   358  			return u.uploadBatch(ctx, subReq.([]*repb.BatchUpdateBlobsRequest_Request))
   359  		})
   360  	})
   361  	// Limit the sum of sub-request sizes to (maxRequestSize - requestOverhead).
   362  	// Subtract 1KB to be on the safe side.
   363  	u.batchBundler.BundleByteLimit = c.Config.BatchUpdateBlobs.MaxSizeBytes - int(marshalledFieldSize(int64(len(c.InstanceName)))) - 1000
   364  	u.batchBundler.BundleCountThreshold = c.Config.BatchUpdateBlobs.MaxItems
   365  
   366  	// Start processing path specs.
   367  	eg.Go(func() error {
   368  		// Before exiting this main goroutine, ensure all the work has been completed.
   369  		// Just waiting for u.eg isn't enough because some work may be temporarily
   370  		// in a bundler.
   371  		defer func() {
   372  			u.wgFS.Wait()
   373  			u.checkBundler.Flush() // only after FS walk is done.
   374  			wgChecks.Wait()        // only after checkBundler is flushed
   375  			u.batchBundler.Flush() // only after wgChecks is done.
   376  		}()
   377  
   378  		for {
   379  			select {
   380  			case <-ctx.Done():
   381  				return ctx.Err()
   382  			case in, ok := <-inputC:
   383  				if !ok {
   384  					return nil
   385  				}
   386  				log.Infof("start startProcessing %s", in.Path)
   387  				if err := u.startProcessing(ctx, in); err != nil {
   388  					return err
   389  				}
   390  				log.Infof("finish startProcessing %s", in.Path)
   391  			}
   392  		}
   393  	})
   394  
   395  	return &UploadResult{Stats: u.stats, u: u}, errors.WithStack(eg.Wait())
   396  }
   397  
   398  // uploader implements a concurrent multi-stage pipeline to read blobs from the
   399  // file system, check their presence on the server and then upload if necessary.
   400  // Common blobs are deduplicated.
   401  //
   402  // uploader.eg is used to schedule work, while concurrency of individual
   403  // expensive operations is controlled via separate semaphores.
   404  //
   405  // Special care is taken for large files: they are read sequentially, opened
   406  // only once per file, and read with large IO size.
   407  //
   408  // Note: uploader shouldn't store semaphores/locks that protect global
   409  // resources, such as file system. They should be stored in the Client instead.
   410  type uploader struct {
   411  	*Client
   412  	UploadOptions
   413  	eg    *errgroup.Group
   414  	stats TransferStats
   415  
   416  	// wgFS is used to wait for all FS walking to finish.
   417  	wgFS sync.WaitGroup
   418  
   419  	// fsCache contains already-processed files.
   420  	// A key can be produced by makeFSCacheKey.
   421  	// The values are of type *digested.
   422  	fsCache cache.SingleFlight
   423  
   424  	// checkBundler bundles digests that need to be checked for presence on the
   425  	// server.
   426  	checkBundler *bundler.Bundler
   427  	seenDigests  sync.Map // TODO: consider making it more global
   428  
   429  	// batchBundler bundles blobs that can be uploaded using UploadBlobs RPC.
   430  	batchBundler *bundler.Bundler
   431  }
   432  
   433  // startProcessing adds the item to the appropriate stage depending on its type.
   434  func (u *uploader) startProcessing(ctx context.Context, in *UploadInput) error {
   435  	if !filepath.IsAbs(in.Path) {
   436  		return errors.Errorf("%q is not absolute", in.Path)
   437  	}
   438  
   439  	if err := in.init(u); err != nil {
   440  		return errors.WithStack(err)
   441  	}
   442  
   443  	// Schedule a file system walk.
   444  	u.wgFS.Add(1)
   445  	u.eg.Go(func() error {
   446  		defer u.wgFS.Done()
   447  
   448  		// Concurrently visit each allowlisted path, and use the results to
   449  		// construct a partial Merkle tree. Note that we are not visiting
   450  		// the entire in.cleanPath, which may be much larger than the union of the
   451  		// allowlisted paths.
   452  		log.Infof("start localEg %s", in.Path)
   453  		localEg, ctx := errgroup.WithContext(ctx)
   454  		var treeMu sync.Mutex
   455  		for _, relPath := range in.cleanAllowlist {
   456  			relPath := relPath
   457  			// Schedule a file system walk.
   458  			localEg.Go(func() error {
   459  				absPath := in.cleanPath
   460  				info := in.pathInfo
   461  				if relPath != "." {
   462  					absPath = joinFilePathsFast(in.cleanPath, relPath)
   463  					var err error
   464  					// TODO(nodir): cache this syscall too.
   465  					if info, err = os.Lstat(absPath); err != nil {
   466  						return errors.WithStack(err)
   467  					}
   468  				}
   469  
   470  				switch dig, err := u.visitPath(ctx, absPath, info, in.Exclude); {
   471  				case err != nil:
   472  					return errors.Wrapf(err, "%q", absPath)
   473  				case dig != nil:
   474  					treeMu.Lock()
   475  					in.tree[relPath] = dig
   476  					treeMu.Unlock()
   477  				}
   478  				return nil
   479  			})
   480  		}
   481  		if err := localEg.Wait(); err != nil {
   482  			return errors.WithStack(err)
   483  		}
   484  		log.Infof("done localEg %s", in.Path)
   485  		// At this point, all allowlisted paths are digest'ed, and we only need to
   486  		// compute a partial Merkle tree and upload the implied ancestors.
   487  		for _, item := range in.partialMerkleTree() {
   488  			if err := u.scheduleCheck(ctx, item); err != nil {
   489  				return err
   490  			}
   491  		}
   492  
   493  		// The entire tree is digested. Notify the caller.
   494  		close(in.ensureDigestsComputedInited())
   495  		return nil
   496  	})
   497  	return nil
   498  }
   499  
   500  // makeFSCacheKey returns a key for u.fsCache.
   501  func makeFSCacheKey(absPath string, isRegularFile bool, pathExclude *regexp.Regexp) interface{} {
   502  	// The structure of the cache key is incapsulated by this function.
   503  	type cacheKey struct {
   504  		AbsPath       string
   505  		ExcludeRegexp string
   506  	}
   507  
   508  	key := cacheKey{
   509  		AbsPath: absPath,
   510  	}
   511  
   512  	if isRegularFile {
   513  		// This is a regular file.
   514  		// Its digest depends only on the file path (assuming content didn't change),
   515  		// so the cache key is complete. Just return it.
   516  		return key
   517  	}
   518  	// This is a directory and/or a symlink, so the digest also depends on fs-walk
   519  	// settings. Incroporate those too.
   520  
   521  	if pathExclude != nil {
   522  		key.ExcludeRegexp = pathExclude.String()
   523  	}
   524  	return key
   525  }
   526  
   527  // visitPath visits the file/dir depending on its type (regular, dir, symlink).
   528  // Visits each file only once.
   529  //
   530  // If the file should be skipped, then returns (nil, nil).
   531  // The returned digested.digest may also be nil if the symlink is dangling.
   532  func (u *uploader) visitPath(ctx context.Context, absPath string, info os.FileInfo, pathExclude *regexp.Regexp) (*digested, error) {
   533  	// First, check if the file passes all filters.
   534  	if pathExclude != nil && pathExclude.MatchString(filepath.ToSlash(absPath)) {
   535  		return nil, nil
   536  	}
   537  	// Call the Prelude only after checking the pathExclude.
   538  	if u.Prelude != nil {
   539  		switch err := u.Prelude(absPath, info.Mode()); {
   540  		case errors.Is(err, ErrSkip):
   541  			return nil, nil
   542  		case err != nil:
   543  			return nil, err
   544  		}
   545  	}
   546  
   547  	cacheKey := makeFSCacheKey(absPath, info.Mode().IsRegular(), pathExclude)
   548  	cached, err := u.fsCache.LoadOrStore(cacheKey, func() (interface{}, error) {
   549  		switch {
   550  		case info.Mode()&os.ModeSymlink == os.ModeSymlink:
   551  			return u.visitSymlink(ctx, absPath, pathExclude)
   552  
   553  		case info.Mode().IsDir():
   554  			node, err := u.visitDir(ctx, absPath, pathExclude)
   555  			return &digested{dirEntry: node, digest: node.GetDigest()}, err
   556  
   557  		case info.Mode().IsRegular():
   558  			// Note: makeFSCacheKey assumes that pathExclude is not used here.
   559  			node, err := u.visitRegularFile(ctx, absPath, info)
   560  			return &digested{dirEntry: node, digest: node.GetDigest()}, err
   561  
   562  		// Ignore all non-expected modes (e.g. domain sockets as used by git
   563  		// fsmonitor).
   564  		default:
   565  			return nil, nil
   566  		}
   567  	})
   568  	if err != nil {
   569  		return nil, err
   570  	}
   571  	return cached.(*digested), nil
   572  }
   573  
   574  // visitRegularFile computes the hash of a regular file and schedules a presence
   575  // check.
   576  //
   577  // It distinguishes three categories of file sizes:
   578  //   - small: small files are buffered in memory entirely, thus read only once.
   579  //     See also ClientConfig.SmallFileThreshold.
   580  //   - medium: the hash is computed, the file is closed and a presence check is
   581  //     scheduled.
   582  //   - large: the hash is computed, the file is rewinded without closing and
   583  //     streamed via ByteStream.
   584  //     If the file is already present on the server, the ByteStream preempts
   585  //     the stream with EOF and WriteResponse.CommittedSize == Digest.Size.
   586  //     Rewinding helps locality: there is no delay between reading the file for
   587  //     the first and the second times.
   588  //     Only one large file is processed at a time because most GCE disks are
   589  //     network disks. Reading many large files concurrently appears to saturate
   590  //     the network and slows down the progress.
   591  //     See also ClientConfig.LargeFileThreshold.
   592  func (u *uploader) visitRegularFile(ctx context.Context, absPath string, info os.FileInfo) (*repb.FileNode, error) {
   593  	isLarge := info.Size() >= u.Config.LargeFileThreshold
   594  
   595  	// Lock the mutex before acquiring a semaphore to avoid hogging the latter.
   596  	if isLarge {
   597  		// Read only a few large files at a time.
   598  		if err := u.semLargeFile.Acquire(ctx, 1); err != nil {
   599  			return nil, errors.WithStack(err)
   600  		}
   601  		defer u.semLargeFile.Release(1)
   602  	}
   603  
   604  	if err := u.semFileIO.Acquire(ctx, 1); err != nil {
   605  		return nil, err
   606  	}
   607  	defer u.semFileIO.Release(1)
   608  
   609  	f, err := u.openFileSource(absPath)
   610  	if err != nil {
   611  		return nil, err
   612  	}
   613  	defer f.Close()
   614  
   615  	ret := &repb.FileNode{
   616  		Name:         info.Name(),
   617  		IsExecutable: (info.Mode() & 0100) != 0,
   618  	}
   619  
   620  	if info.Size() <= u.Config.SmallFileThreshold {
   621  		// This file is small enough to buffer it entirely.
   622  		contents, err := io.ReadAll(f)
   623  		if err != nil {
   624  			return nil, err
   625  		}
   626  		item := uploadItemFromBlob(absPath, contents)
   627  		ret.Digest = item.Digest
   628  		return ret, u.scheduleCheck(ctx, item)
   629  	}
   630  
   631  	// It is a medium or large file.
   632  
   633  	tctx, task := trace.NewTask(ctx, "medium or large file")
   634  	defer task.End()
   635  	trace.Log(tctx, "file", info.Name())
   636  
   637  	// Compute the hash.
   638  	now := time.Now()
   639  	region := trace.StartRegion(tctx, "digest")
   640  	dig, err := digest.NewFromReader(f)
   641  	region.End()
   642  	if err != nil {
   643  		return nil, errors.Wrapf(err, "failed to compute hash")
   644  	}
   645  	log.Infof("compute digest %s: %s", info.Name(), time.Since(now))
   646  	ret.Digest = dig.ToProto()
   647  
   648  	item := &uploadItem{
   649  		Title:  absPath,
   650  		Digest: ret.Digest,
   651  	}
   652  
   653  	if isLarge {
   654  		// Large files are special: locality is important - we want to re-read the
   655  		// file ASAP.
   656  		// Also we are not going to use BatchUploads anyway, so we can take
   657  		// advantage of ByteStream's built-in presence check.
   658  		// https://github.com/bazelbuild/remote-apis/blob/0cd22f7b466ced15d7803e8845d08d3e8d2c51bc/build/bazel/remote/execution/v2/remote_execution.proto#L250-L254
   659  
   660  		if res, err := u.findMissingBlobs(ctx, []*uploadItem{item}); err != nil {
   661  			return nil, errors.Wrapf(err, "failed to check existence")
   662  		} else if len(res.MissingBlobDigests) == 0 {
   663  			log.Infof("the file already exists. do not upload %s", absPath)
   664  			atomic.AddInt64(&u.stats.CacheHits.Digests, 1)
   665  			atomic.AddInt64(&u.stats.CacheHits.Bytes, ret.Digest.SizeBytes)
   666  			return ret, nil
   667  		}
   668  
   669  		item.Open = func() (uploadSource, error) {
   670  			return f, f.SeekStart(0)
   671  		}
   672  		return ret, u.stream(tctx, item, true)
   673  	}
   674  
   675  	// Schedule a check and close the file (in defer).
   676  	// item.Open will reopen the file.
   677  
   678  	item.Open = func() (uploadSource, error) {
   679  		return u.openFileSource(absPath)
   680  	}
   681  	return ret, u.scheduleCheck(ctx, item)
   682  }
   683  
   684  func (u *uploader) openFileSource(absPath string) (uploadSource, error) {
   685  	f, err := os.Open(absPath)
   686  	if err != nil {
   687  		return nil, err
   688  	}
   689  	return newFileSource(f, &u.fileBufReaders), nil
   690  }
   691  
   692  // visitDir reads a directory and its descendants. The function blocks until
   693  // each descendant is visited, but the visitation happens concurrently, using
   694  // u.eg.
   695  func (u *uploader) visitDir(ctx context.Context, absPath string, pathExclude *regexp.Regexp) (*repb.DirectoryNode, error) {
   696  	var mu sync.Mutex
   697  	dir := &repb.Directory{}
   698  	var subErr error
   699  	var wgChildren sync.WaitGroup
   700  
   701  	// This sub-function exist to avoid holding the semaphore while waiting for
   702  	// children.
   703  	err := func() error {
   704  		if err := u.semFileIO.Acquire(ctx, 1); err != nil {
   705  			return err
   706  		}
   707  		defer u.semFileIO.Release(1)
   708  
   709  		f, err := os.Open(absPath)
   710  		if err != nil {
   711  			return err
   712  		}
   713  		defer f.Close()
   714  
   715  		// Check the context, since file IO functions don't.
   716  		for ctx.Err() == nil {
   717  			infos, err := f.Readdir(128)
   718  			if err == io.EOF {
   719  				break
   720  			}
   721  			if err != nil {
   722  				return err
   723  			}
   724  
   725  			for _, info := range infos {
   726  				info := info
   727  				absChild := joinFilePathsFast(absPath, info.Name())
   728  				wgChildren.Add(1)
   729  				u.wgFS.Add(1)
   730  				u.eg.Go(func() error {
   731  					defer wgChildren.Done()
   732  					defer u.wgFS.Done()
   733  					digested, err := u.visitPath(ctx, absChild, info, pathExclude)
   734  					mu.Lock()
   735  					defer mu.Unlock()
   736  
   737  					switch {
   738  					case err != nil:
   739  						subErr = err
   740  						return err
   741  					case digested == nil:
   742  						// This file should be ignored.
   743  						return nil
   744  					}
   745  
   746  					addDirEntry(dir, digested.dirEntry)
   747  					return nil
   748  				})
   749  			}
   750  		}
   751  		return nil
   752  	}()
   753  	if err != nil {
   754  		return nil, err
   755  	}
   756  
   757  	wgChildren.Wait()
   758  	if subErr != nil {
   759  		return nil, errors.Wrapf(subErr, "failed to read the directory %q entirely", absPath)
   760  	}
   761  
   762  	item := uploadItemFromDirMsg(absPath, dir)
   763  	if err := u.scheduleCheck(ctx, item); err != nil {
   764  		return nil, err
   765  	}
   766  	return &repb.DirectoryNode{
   767  		Name:   filepath.Base(absPath),
   768  		Digest: item.Digest,
   769  	}, nil
   770  }
   771  
   772  // visitSymlink converts a symlink to a directory node and schedules visitation
   773  // of the target file.
   774  // If u.PreserveSymlinks is true, then returns a SymlinkNode, otherwise
   775  // returns the directory node of the target file.
   776  //
   777  // The returned digested.digest is nil if u.PreserveSymlinks is set.
   778  func (u *uploader) visitSymlink(ctx context.Context, absPath string, pathExclude *regexp.Regexp) (*digested, error) {
   779  	target, err := os.Readlink(absPath)
   780  	if err != nil {
   781  		return nil, errors.Wrapf(err, "os.ReadLink")
   782  	}
   783  
   784  	// Determine absolute and relative paths of the target.
   785  	var absTarget, relTarget string
   786  	symlinkDir := filepath.Dir(absPath)
   787  	target = filepath.Clean(target) // target may end with slash
   788  	if filepath.IsAbs(target) {
   789  		absTarget = target
   790  		if relTarget, err = filepath.Rel(symlinkDir, absTarget); err != nil {
   791  			return nil, err
   792  		}
   793  	} else {
   794  		relTarget = target
   795  		// Note: we can't use joinFilePathsFast here because relTarget may start
   796  		// with "../".
   797  		absTarget = filepath.Join(symlinkDir, relTarget)
   798  	}
   799  
   800  	symlinkNode := &repb.SymlinkNode{
   801  		Name:   filepath.Base(absPath),
   802  		Target: filepath.ToSlash(relTarget),
   803  	}
   804  
   805  	if u.PreserveSymlinks && u.AllowDanglingSymlinks {
   806  		return &digested{dirEntry: symlinkNode}, nil
   807  	}
   808  
   809  	// Need to check symlink if AllowDanglingSymlinks is not set.
   810  	targetInfo, err := os.Lstat(absTarget)
   811  	if err != nil {
   812  		return nil, errors.Wrapf(err, "lstat to target of symlink (%s -> %s) has error", absPath, relTarget)
   813  	}
   814  
   815  	// TODO: detect cycles by symlink if needs to follow symlinks in this case.
   816  	if u.PreserveSymlinks {
   817  		return &digested{dirEntry: symlinkNode}, nil
   818  	}
   819  
   820  	return u.visitPath(ctx, absTarget, targetInfo, pathExclude)
   821  }
   822  
   823  // uploadItem is a blob to potentially upload.
   824  type uploadItem struct {
   825  	Title  string
   826  	Digest *repb.Digest
   827  	Open   func() (uploadSource, error)
   828  }
   829  
   830  func (item *uploadItem) ReadAll() ([]byte, error) {
   831  	r, err := item.Open()
   832  	if err != nil {
   833  		return nil, err
   834  	}
   835  	defer r.Close()
   836  	return io.ReadAll(r)
   837  }
   838  
   839  // scheduleCheck schedules a blob presence check on the server. If it fails,
   840  // then the blob is uploaded.
   841  func (u *uploader) scheduleCheck(ctx context.Context, item *uploadItem) error {
   842  	if u.testScheduleCheck != nil {
   843  		return u.testScheduleCheck(ctx, item)
   844  	}
   845  
   846  	// Do not check the same digest twice.
   847  	cacheKey := digest.NewFromProtoUnvalidated(item.Digest)
   848  	if _, ok := u.seenDigests.LoadOrStore(cacheKey, struct{}{}); ok {
   849  		return nil
   850  	}
   851  	return u.checkBundler.AddWait(ctx, item, 0)
   852  }
   853  
   854  func (u *uploader) findMissingBlobs(ctx context.Context, items []*uploadItem) (res *repb.FindMissingBlobsResponse, err error) {
   855  	if err := u.semFindMissingBlobs.Acquire(ctx, 1); err != nil {
   856  		return nil, errors.WithStack(err)
   857  	}
   858  	defer u.semFindMissingBlobs.Release(1)
   859  
   860  	req := &repb.FindMissingBlobsRequest{
   861  		InstanceName: u.InstanceName,
   862  		BlobDigests:  make([]*repb.Digest, len(items)),
   863  	}
   864  
   865  	for i, item := range items {
   866  		req.BlobDigests[i] = item.Digest
   867  	}
   868  
   869  	err = u.unaryRPC(ctx, &u.Config.FindMissingBlobs, func(ctx context.Context) (err error) {
   870  		res, err = u.cas.FindMissingBlobs(ctx, req)
   871  		return
   872  	})
   873  	return res, err
   874  }
   875  
   876  // check checks which items are present on the server, and schedules upload for
   877  // the missing ones.
   878  func (u *uploader) check(ctx context.Context, items []*uploadItem) error {
   879  	res, err := u.findMissingBlobs(ctx, items)
   880  	if err != nil {
   881  		return err
   882  	}
   883  	byDigest := make(map[digest.Digest]*uploadItem, len(items))
   884  	totalBytes := int64(0)
   885  	for _, item := range items {
   886  		byDigest[digest.NewFromProtoUnvalidated(item.Digest)] = item
   887  		totalBytes += item.Digest.SizeBytes
   888  	}
   889  
   890  	missingBytes := int64(0)
   891  	for _, d := range res.MissingBlobDigests {
   892  		missingBytes += d.SizeBytes
   893  		item := byDigest[digest.NewFromProtoUnvalidated(d)]
   894  		if err := u.scheduleUpload(ctx, item); err != nil {
   895  			return errors.Wrapf(err, "%q", item.Title)
   896  		}
   897  	}
   898  	atomic.AddInt64(&u.stats.CacheMisses.Digests, int64(len(res.MissingBlobDigests)))
   899  	atomic.AddInt64(&u.stats.CacheMisses.Bytes, missingBytes)
   900  	atomic.AddInt64(&u.stats.CacheHits.Digests, int64(len(items)-len(res.MissingBlobDigests)))
   901  	atomic.AddInt64(&u.stats.CacheHits.Bytes, totalBytes-missingBytes)
   902  	return nil
   903  }
   904  
   905  func (u *uploader) scheduleUpload(ctx context.Context, item *uploadItem) error {
   906  	// Check if this blob can be uploaded in a batch.
   907  	if marshalledRequestSize(item.Digest) > int64(u.batchBundler.BundleByteLimit) {
   908  		// There is no way this blob can fit in a batch request.
   909  		u.eg.Go(func() error {
   910  			return errors.Wrap(u.stream(ctx, item, false), item.Title)
   911  		})
   912  		return nil
   913  	}
   914  
   915  	// Since this blob is small enough, just read it entirely.
   916  	contents, err := item.ReadAll()
   917  	if err != nil {
   918  		return errors.Wrapf(err, "failed to read the item")
   919  	}
   920  	req := &repb.BatchUpdateBlobsRequest_Request{Digest: item.Digest, Data: contents}
   921  	return u.batchBundler.AddWait(ctx, req, proto.Size(req))
   922  }
   923  
   924  // uploadBatch uploads blobs in using BatchUpdateBlobs RPC.
   925  func (u *uploader) uploadBatch(ctx context.Context, reqs []*repb.BatchUpdateBlobsRequest_Request) error {
   926  	if err := u.semBatchUpdateBlobs.Acquire(ctx, 1); err != nil {
   927  		return err
   928  	}
   929  	defer u.semBatchUpdateBlobs.Release(1)
   930  
   931  	reqMap := make(map[digest.Digest]*repb.BatchUpdateBlobsRequest_Request, len(reqs))
   932  	for _, r := range reqs {
   933  		reqMap[digest.NewFromProtoUnvalidated(r.Digest)] = r
   934  	}
   935  
   936  	req := &repb.BatchUpdateBlobsRequest{
   937  		InstanceName: u.InstanceName,
   938  		Requests:     reqs,
   939  	}
   940  	return u.unaryRPC(ctx, &u.Config.BatchUpdateBlobs, func(ctx context.Context) error {
   941  		res, err := u.cas.BatchUpdateBlobs(ctx, req)
   942  		if err != nil {
   943  			return err
   944  		}
   945  
   946  		bytesTransferred := int64(0)
   947  		digestsTransferred := int64(0)
   948  		var retriableErr error
   949  		req.Requests = req.Requests[:0] // reset for the next attempt
   950  		for _, r := range res.Responses {
   951  			if err := status.FromProto(r.Status).Err(); err != nil {
   952  				if !retry.TransientOnly(err) {
   953  					return err
   954  				}
   955  				// This error is retriable. Save it to return later, and
   956  				// save the failed sub-request for the next attempt.
   957  				retriableErr = err
   958  				req.Requests = append(req.Requests, reqMap[digest.NewFromProtoUnvalidated(r.Digest)])
   959  				continue
   960  			}
   961  			bytesTransferred += r.Digest.SizeBytes
   962  			digestsTransferred++
   963  		}
   964  		atomic.AddInt64(&u.stats.Batched.Bytes, bytesTransferred)
   965  		atomic.AddInt64(&u.stats.Batched.Digests, digestsTransferred)
   966  		return retriableErr
   967  	})
   968  }
   969  
   970  // stream uploads the item using ByteStream service.
   971  //
   972  // If the blob is already uploaded, then the function returns quickly and
   973  // without an error.
   974  func (u *uploader) stream(ctx context.Context, item *uploadItem, updateCacheStats bool) error {
   975  	if err := u.semByteStreamWrite.Acquire(ctx, 1); err != nil {
   976  		return err
   977  	}
   978  	defer u.semByteStreamWrite.Release(1)
   979  
   980  	ctx, task := trace.NewTask(ctx, "uploader.stream")
   981  	defer task.End()
   982  
   983  	log.Infof("start stream upload %s, size %d", item.Title, item.Digest.SizeBytes)
   984  	now := time.Now()
   985  	defer func() {
   986  		log.Infof("finish stream upload %s, size %d: %s", item.Title, item.Digest.SizeBytes, time.Since(now))
   987  	}()
   988  
   989  	// Open the item.
   990  	r, err := item.Open()
   991  	if err != nil {
   992  		return err
   993  	}
   994  	defer r.Close()
   995  
   996  	rewind := false
   997  	return u.withRetries(ctx, func(ctx context.Context) error {
   998  		// TODO(nodir): add support for resumable uploads.
   999  
  1000  		// Do not rewind if this is the first attempt.
  1001  		if rewind {
  1002  			if err := r.SeekStart(0); err != nil {
  1003  				return err
  1004  			}
  1005  		}
  1006  		rewind = true
  1007  
  1008  		if u.Config.CompressedBytestreamThreshold < 0 || item.Digest.SizeBytes < u.Config.CompressedBytestreamThreshold {
  1009  			// No compression.
  1010  			return u.streamFromReader(ctx, r, item.Digest, false, updateCacheStats)
  1011  		}
  1012  
  1013  		// Compress using an in-memory pipe. This is mostly to accommodate the fact
  1014  		// that zstd package expects a writer.
  1015  		// Note that using io.Pipe() means we buffer only bytes that were not uploaded yet.
  1016  		pr, pw := io.Pipe()
  1017  
  1018  		enc := zstdEncoders.Get().(*zstd.Encoder)
  1019  		defer func() {
  1020  			enc.Close()
  1021  			zstdEncoders.Put(enc)
  1022  		}()
  1023  		enc.Reset(pw)
  1024  
  1025  		// Read from disk and make RPCs concurrently.
  1026  		eg, ctx := errgroup.WithContext(ctx)
  1027  		eg.Go(func() error {
  1028  			switch _, err := enc.ReadFrom(r); {
  1029  			case err == io.ErrClosedPipe:
  1030  				// The other goroutine exited before we finished encoding.
  1031  				// Might be a cache hit or context cancelation.
  1032  				// In any case, the other goroutine has the actual error, so return nil
  1033  				// here.
  1034  				return nil
  1035  			case err != nil:
  1036  				return errors.Wrapf(err, "failed to read the file/blob")
  1037  			}
  1038  
  1039  			if err := enc.Close(); err != nil {
  1040  				return errors.Wrapf(err, "failed to close the zstd encoder")
  1041  			}
  1042  			return pw.Close()
  1043  		})
  1044  		eg.Go(func() error {
  1045  			defer pr.Close()
  1046  			return u.streamFromReader(ctx, pr, item.Digest, true, updateCacheStats)
  1047  		})
  1048  		return eg.Wait()
  1049  	})
  1050  }
  1051  
  1052  func (u *uploader) streamFromReader(ctx context.Context, r io.Reader, digest *repb.Digest, compressed, updateCacheStats bool) (rerr error) {
  1053  	ctx, cancel, withTimeout := withPerCallTimeout(ctx, u.Config.ByteStreamWrite.Timeout)
  1054  	defer cancel()
  1055  
  1056  	stream, err := u.byteStream.Write(ctx)
  1057  	if err != nil {
  1058  		return err
  1059  	}
  1060  	defer func() {
  1061  		if _, err := stream.CloseAndRecv(); rerr == nil && err != io.EOF {
  1062  			rerr = err
  1063  		}
  1064  	}()
  1065  
  1066  	req := &bspb.WriteRequest{}
  1067  	instanceSegment := u.InstanceName + "/"
  1068  	if instanceSegment == "/" {
  1069  		instanceSegment = ""
  1070  	}
  1071  	uploadID, err := uuid.NewRandom()
  1072  	if err != nil {
  1073  		return err
  1074  	}
  1075  	if compressed {
  1076  		req.ResourceName = fmt.Sprintf("%suploads/%s/compressed-blobs/zstd/%s/%d", instanceSegment, uploadID.String(), digest.Hash, digest.SizeBytes)
  1077  	} else {
  1078  		req.ResourceName = fmt.Sprintf("%suploads/%s/blobs/%s/%d", instanceSegment, uploadID.String(), digest.Hash, digest.SizeBytes)
  1079  	}
  1080  
  1081  	buf := u.streamBufs.Get().(*[]byte)
  1082  	defer u.streamBufs.Put(buf)
  1083  
  1084  chunkLoop:
  1085  	for {
  1086  		// Before reading, check if the context if canceled.
  1087  		if ctx.Err() != nil {
  1088  			return ctx.Err()
  1089  		}
  1090  
  1091  		// Read the next chunk from the pipe.
  1092  		// Use ReadFull to ensure we aren't sending tiny blobs over RPC.
  1093  		region := trace.StartRegion(ctx, "ReadFull in streamFromReader")
  1094  		n, err := io.ReadFull(r, *buf)
  1095  		region.End()
  1096  		switch {
  1097  		case err == io.EOF || err == io.ErrUnexpectedEOF:
  1098  			req.FinishWrite = true
  1099  		case err != nil:
  1100  			return err
  1101  		}
  1102  		req.Data = (*buf)[:n] // must limit by `:n` in ErrUnexpectedEOF case
  1103  
  1104  		// Send the chunk.
  1105  		withTimeout(func() {
  1106  			trace.WithRegion(ctx, "stream.Send", func() {
  1107  				err = stream.Send(req)
  1108  			})
  1109  		})
  1110  		switch {
  1111  		case err == io.EOF:
  1112  			// The server closed the stream.
  1113  			// Most likely the file is already uploaded, see the CommittedSize check below.
  1114  			break chunkLoop
  1115  		case err != nil:
  1116  			return err
  1117  		case req.FinishWrite:
  1118  			break chunkLoop
  1119  		}
  1120  
  1121  		// Prepare the next request.
  1122  		req.ResourceName = "" // send the resource name only in the first request
  1123  		req.WriteOffset += int64(len(req.Data))
  1124  	}
  1125  
  1126  	// Finalize the request.
  1127  	switch res, err := stream.CloseAndRecv(); {
  1128  	case err != nil:
  1129  		return err
  1130  	case res.CommittedSize != digest.SizeBytes:
  1131  		return fmt.Errorf("unexpected commitSize: got %d, want %d", res.CommittedSize, digest.SizeBytes)
  1132  	}
  1133  
  1134  	// Update stats.
  1135  	cacheHit := !req.FinishWrite
  1136  	if !cacheHit {
  1137  		atomic.AddInt64(&u.stats.Streamed.Bytes, digest.SizeBytes)
  1138  		atomic.AddInt64(&u.stats.Streamed.Digests, 1)
  1139  	}
  1140  	if updateCacheStats {
  1141  		st := &u.stats.CacheMisses
  1142  		if cacheHit {
  1143  			st = &u.stats.CacheHits
  1144  		}
  1145  		atomic.AddInt64(&st.Bytes, digest.SizeBytes)
  1146  		atomic.AddInt64(&st.Digests, 1)
  1147  	}
  1148  	return nil
  1149  }
  1150  
  1151  // uploadItemFromDirMsg creates an upload item for a directory.
  1152  // Sorts directory entries.
  1153  func uploadItemFromDirMsg(title string, dir *repb.Directory) *uploadItem {
  1154  	// Normalize the dir before marshaling, for determinism.
  1155  	sort.Slice(dir.Files, func(i, j int) bool {
  1156  		return dir.Files[i].Name < dir.Files[j].Name
  1157  	})
  1158  	sort.Slice(dir.Directories, func(i, j int) bool {
  1159  		return dir.Directories[i].Name < dir.Directories[j].Name
  1160  	})
  1161  	sort.Slice(dir.Symlinks, func(i, j int) bool {
  1162  		return dir.Symlinks[i].Name < dir.Symlinks[j].Name
  1163  	})
  1164  
  1165  	blob, err := proto.Marshal(dir)
  1166  	if err != nil {
  1167  		panic(err) // impossible
  1168  	}
  1169  	return uploadItemFromBlob(title, blob)
  1170  }
  1171  
  1172  func uploadItemFromBlob(title string, blob []byte) *uploadItem {
  1173  	item := &uploadItem{
  1174  		Title:  title,
  1175  		Digest: digest.NewFromBlob(blob).ToProto(),
  1176  		Open: func() (uploadSource, error) {
  1177  			return newByteSliceSource(blob), nil
  1178  		},
  1179  	}
  1180  	if item.Title == "" {
  1181  		item.Title = fmt.Sprintf("digest %s/%d", item.Digest.Hash, item.Digest.SizeBytes)
  1182  	}
  1183  	return item
  1184  }
  1185  
  1186  const (
  1187  	pathSep         = string(filepath.Separator)
  1188  	parentDirPrefix = ".." + pathSep
  1189  )
  1190  
  1191  // joinFilePathsFast is a faster version of filepath.Join because it does not
  1192  // call filepath.Clean. Assumes arguments are clean according to filepath.Clean specs.
  1193  func joinFilePathsFast(a, b string) string {
  1194  	if b == "." {
  1195  		return a
  1196  	}
  1197  	if strings.HasSuffix(a, pathSep) {
  1198  		// May happen if a is the root.
  1199  		return a + b
  1200  	}
  1201  	return a + pathSep + b
  1202  }
  1203  
  1204  // dirNameRelFast is a faster version of filepath.Dir because it does not call
  1205  // filepath.Clean. Assumes the argument is clean and relative.
  1206  // Does not work for absolute paths.
  1207  func dirNameRelFast(relPath string) string {
  1208  	i := strings.LastIndex(relPath, pathSep)
  1209  	if i < 0 {
  1210  		return "."
  1211  	}
  1212  	return relPath[:i]
  1213  }
  1214  
  1215  func marshalledFieldSize(size int64) int64 {
  1216  	return 1 + int64(protowire.SizeVarint(uint64(size))) + size
  1217  }
  1218  
  1219  func marshalledRequestSize(d *repb.Digest) int64 {
  1220  	// An additional BatchUpdateBlobsRequest_Request includes the Digest and data fields,
  1221  	// as well as the message itself. Every field has a 1-byte size tag, followed by
  1222  	// the varint field size for variable-sized fields (digest hash and data).
  1223  	// Note that the BatchReadBlobsResponse_Response field is similar, but includes
  1224  	// and additional Status proto which can theoretically be unlimited in size.
  1225  	// We do not account for it here, relying on the Client setting a large (100MB)
  1226  	// limit for incoming messages.
  1227  	digestSize := marshalledFieldSize(int64(len(d.Hash)))
  1228  	if d.SizeBytes > 0 {
  1229  		digestSize += 1 + int64(protowire.SizeVarint(uint64(d.SizeBytes)))
  1230  	}
  1231  	reqSize := marshalledFieldSize(digestSize)
  1232  	if d.SizeBytes > 0 {
  1233  		reqSize += marshalledFieldSize(int64(d.SizeBytes))
  1234  	}
  1235  	return marshalledFieldSize(reqSize)
  1236  }
  1237  
  1238  func addDirEntry(dir *repb.Directory, node proto.Message) {
  1239  	switch node := node.(type) {
  1240  	case *repb.FileNode:
  1241  		dir.Files = append(dir.Files, node)
  1242  	case *repb.DirectoryNode:
  1243  		dir.Directories = append(dir.Directories, node)
  1244  	case *repb.SymlinkNode:
  1245  		dir.Symlinks = append(dir.Symlinks, node)
  1246  	default:
  1247  		// This condition is impossible because all functions in this file
  1248  		// return one of the three types above.
  1249  		panic(fmt.Sprintf("unexpected node type %T", node))
  1250  	}
  1251  }