go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/cas/cas.go (about)

     1  // Copyright 2017 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cas
    16  
    17  import (
    18  	"context"
    19  	"encoding/hex"
    20  	"fmt"
    21  	"io"
    22  	"net/http"
    23  	"strings"
    24  
    25  	"go.opentelemetry.io/otel/trace"
    26  	"google.golang.org/grpc/codes"
    27  	"google.golang.org/grpc/status"
    28  	"google.golang.org/protobuf/proto"
    29  
    30  	"go.chromium.org/luci/common/clock"
    31  	"go.chromium.org/luci/common/errors"
    32  	"go.chromium.org/luci/common/logging"
    33  	"go.chromium.org/luci/common/retry/transient"
    34  	"go.chromium.org/luci/gae/service/datastore"
    35  	"go.chromium.org/luci/grpc/grpcutil"
    36  	"go.chromium.org/luci/server"
    37  	"go.chromium.org/luci/server/auth"
    38  	"go.chromium.org/luci/server/bqlog"
    39  	"go.chromium.org/luci/server/tq"
    40  
    41  	api "go.chromium.org/luci/cipd/api/cipd/v1"
    42  	"go.chromium.org/luci/cipd/appengine/impl/cas/tasks"
    43  	"go.chromium.org/luci/cipd/appengine/impl/cas/upload"
    44  	"go.chromium.org/luci/cipd/appengine/impl/gs"
    45  	"go.chromium.org/luci/cipd/appengine/impl/monitoring"
    46  	"go.chromium.org/luci/cipd/appengine/impl/settings"
    47  	"go.chromium.org/luci/cipd/common"
    48  )
    49  
    50  // readBufferSize is size of a buffer used to read Google Storage files.
    51  //
    52  // Larger values mean fewer Google Storage RPC calls, but more memory usage.
    53  const readBufferSize = 64 * 1024 * 1024
    54  
    55  // StorageServer extends StorageServer RPC interface with some methods used
    56  // internally by other CIPD server modules.
    57  type StorageServer interface {
    58  	api.StorageServer
    59  
    60  	// GetReader returns an io.ReaderAt implementation to read contents of an
    61  	// object in the storage.
    62  	//
    63  	// Returns grpc errors. In particular NotFound is returned if there's no such
    64  	// object in the storage.
    65  	GetReader(ctx context.Context, ref *api.ObjectRef) (gs.Reader, error)
    66  }
    67  
    68  // Internal returns non-ACLed implementation of StorageService.
    69  //
    70  // It can be used internally by the backend. Assumes ACL checks are already
    71  // done.
    72  //
    73  // Registers some task queue tasks in the given dispatcher and log sinks in
    74  // the given bundler.
    75  func Internal(d *tq.Dispatcher, b *bqlog.Bundler, s *settings.Settings, opts *server.Options) StorageServer {
    76  	impl := &storageImpl{
    77  		tq:             d,
    78  		settings:       s,
    79  		serviceVersion: opts.ImageVersion(),
    80  		processID:      opts.Hostname,
    81  		getGS:          gs.Get,
    82  		getSignedURL:   getSignedURL,
    83  		submitLog:      func(ctx context.Context, entry *api.VerificationLogEntry) { b.Log(ctx, entry) },
    84  	}
    85  	impl.registerTasks()
    86  	b.RegisterSink(bqlog.Sink{
    87  		Prototype: &api.VerificationLogEntry{},
    88  		Table:     "verification",
    89  	})
    90  	return impl
    91  }
    92  
    93  // storageImpl implements api.StorageServer and task queue handlers.
    94  //
    95  // Doesn't do any ACL checks.
    96  type storageImpl struct {
    97  	api.UnimplementedStorageServer
    98  
    99  	tq       *tq.Dispatcher
   100  	settings *settings.Settings
   101  
   102  	// For VerificationLogEntry fields.
   103  	serviceVersion string
   104  	processID      string
   105  
   106  	// Mocking points for tests. See Internal() for real implementations.
   107  	getGS        func(ctx context.Context) gs.GoogleStorage
   108  	getSignedURL func(ctx context.Context, gsPath, filename string, signer signerFactory, gs gs.GoogleStorage) (string, uint64, error)
   109  	submitLog    func(ctx context.Context, entry *api.VerificationLogEntry)
   110  }
   111  
   112  // registerTasks adds tasks to the tq Dispatcher.
   113  func (s *storageImpl) registerTasks() {
   114  	// See queue.yaml for "cas-uploads" task queue definition.
   115  	s.tq.RegisterTaskClass(tq.TaskClass{
   116  		ID:        "verify-upload",
   117  		Prototype: &tasks.VerifyUpload{},
   118  		Kind:      tq.Transactional,
   119  		Queue:     "cas-uploads",
   120  		Handler: func(ctx context.Context, m proto.Message) error {
   121  			return s.verifyUploadTask(ctx, m.(*tasks.VerifyUpload))
   122  		},
   123  	})
   124  	s.tq.RegisterTaskClass(tq.TaskClass{
   125  		ID:        "cleanup-upload",
   126  		Prototype: &tasks.CleanupUpload{},
   127  		Kind:      tq.Transactional,
   128  		Queue:     "cas-uploads",
   129  		Handler: func(ctx context.Context, m proto.Message) error {
   130  			return s.cleanupUploadTask(ctx, m.(*tasks.CleanupUpload))
   131  		},
   132  	})
   133  }
   134  
   135  // GetReader is part of StorageServer interface.
   136  func (s *storageImpl) GetReader(ctx context.Context, ref *api.ObjectRef) (r gs.Reader, err error) {
   137  	defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }()
   138  
   139  	if err = common.ValidateObjectRef(ref, common.KnownHash); err != nil {
   140  		return nil, errors.Annotate(err, "bad ref").Err()
   141  	}
   142  
   143  	r, err = s.getGS(ctx).Reader(ctx, s.settings.ObjectPath(ref), 0)
   144  	if err != nil {
   145  		ann := errors.Annotate(err, "can't read the object")
   146  		if gs.StatusCode(err) == http.StatusNotFound {
   147  			ann.Tag(grpcutil.NotFoundTag)
   148  		}
   149  		return nil, ann.Err()
   150  	}
   151  	return r, nil
   152  }
   153  
   154  // GetObjectURL implements the corresponding RPC method, see the proto doc.
   155  func (s *storageImpl) GetObjectURL(ctx context.Context, r *api.GetObjectURLRequest) (resp *api.ObjectURL, err error) {
   156  	defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }()
   157  
   158  	if err := common.ValidateObjectRef(r.Object, common.KnownHash); err != nil {
   159  		return nil, errors.Annotate(err, "bad 'object' field").Err()
   160  	}
   161  
   162  	// Lite validation for Content-Disposition header. As long as the filename
   163  	// doesn't have '"' or '\n', we are constructing a valid header. Let the
   164  	// browser do the rest of the validation however it likes.
   165  	if strings.ContainsAny(r.DownloadFilename, "\"\r\n") {
   166  		return nil, status.Errorf(codes.InvalidArgument, "bad 'download_filename' field, contains one of %q", "\"\r\n")
   167  	}
   168  
   169  	url, size, err := s.getSignedURL(ctx, s.settings.ObjectPath(r.Object), r.DownloadFilename, defaultSigner, s.getGS(ctx))
   170  	if err != nil {
   171  		return nil, errors.Annotate(err, "failed to get signed URL").Err()
   172  	}
   173  	monitoring.FileSize(ctx, size)
   174  	return &api.ObjectURL{SignedUrl: url}, nil
   175  }
   176  
   177  // BeginUpload implements the corresponding RPC method, see the proto doc.
   178  func (s *storageImpl) BeginUpload(ctx context.Context, r *api.BeginUploadRequest) (resp *api.UploadOperation, err error) {
   179  	defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }()
   180  
   181  	// Either Object or HashAlgo should be given. If both are, algos must match.
   182  	var hashAlgo api.HashAlgo
   183  	var hexDigest string
   184  	if r.Object != nil {
   185  		if err := common.ValidateObjectRef(r.Object, common.KnownHash); err != nil {
   186  			return nil, errors.Annotate(err, "bad 'object'").Err()
   187  		}
   188  		if r.HashAlgo != 0 && r.HashAlgo != r.Object.HashAlgo {
   189  			return nil, errors.Reason("'hash_algo' and 'object.hash_algo' do not match").
   190  				Tag(grpcutil.InvalidArgumentTag).Err()
   191  		}
   192  		hashAlgo = r.Object.HashAlgo
   193  		hexDigest = r.Object.HexDigest
   194  	} else if err := common.ValidateHashAlgo(r.HashAlgo); err != nil {
   195  		return nil, errors.Annotate(err, "bad 'hash_algo'").Err()
   196  	} else {
   197  		hashAlgo = r.HashAlgo
   198  	}
   199  
   200  	gs := s.getGS(ctx)
   201  
   202  	// If we know the name of the object being uploaded, check we don't have it
   203  	// in the store already to avoid wasting time uploading it. Note that it is
   204  	// always fine to "overwrite" objects, so if the object appears while the
   205  	// client is still uploading, nothing catastrophic happens, just some time
   206  	// gets wasted.
   207  	if r.Object != nil {
   208  		switch yes, err := gs.Exists(ctx, s.settings.ObjectPath(r.Object)); {
   209  		case err != nil:
   210  			return nil, errors.Annotate(err, "failed to check the object's presence").
   211  				Tag(grpcutil.InternalTag).Err()
   212  		case yes:
   213  			return nil, status.Errorf(codes.AlreadyExists, "the object is already in the store")
   214  		}
   215  	}
   216  
   217  	// Grab new unique ID for the upload operation, it is used in GS filenames.
   218  	opID, err := upload.NewOpID(ctx)
   219  	if err != nil {
   220  		return nil, errors.Annotate(err, "failed to allocate upload operation ID").
   221  			Tag(grpcutil.InternalTag).Err()
   222  	}
   223  
   224  	// Attach HMAC to it, to be returned to the client to make sure clients can't
   225  	// access sessions they don't own. Do it early, to avoid storing stuff in
   226  	// the datastore and GS if WrapOpID fails.
   227  	caller := auth.CurrentIdentity(ctx)
   228  	wrappedOpID, err := upload.WrapOpID(ctx, opID, caller)
   229  	if err != nil {
   230  		return nil, errors.Annotate(err, "failed to HMAC-tag upload operation ID").
   231  			Tag(grpcutil.InternalTag).Err()
   232  	}
   233  
   234  	// GS path to which the client will upload the data. Prefix it with the
   235  	// current timestamp to make bucket listing sorted by time.
   236  	now := clock.Now(ctx)
   237  	tempGSPath := fmt.Sprintf("%s/%d_%d", s.settings.TempGSPath, now.Unix(), opID)
   238  
   239  	// Initiate Google Storage resumable upload session to this path. The returned
   240  	// URL can be accessed unauthenticated. The client will use it directly to
   241  	// upload the data. If left open, the GS session eventually expires, so it's
   242  	// not big deal if we loose it (e.g. due to a crash before returning).
   243  	uploadURL, err := gs.StartUpload(ctx, tempGSPath)
   244  	if err != nil {
   245  		return nil, errors.Annotate(err, "failed to start resumable upload").
   246  			Tag(grpcutil.InternalTag).Err()
   247  	}
   248  
   249  	// Save the operation. It is accessed in FinishUpload.
   250  	op := upload.Operation{
   251  		ID:         opID,
   252  		Status:     api.UploadStatus_UPLOADING,
   253  		TempGSPath: tempGSPath,
   254  		UploadURL:  uploadURL,
   255  		HashAlgo:   hashAlgo,
   256  		HexDigest:  hexDigest, // may be empty, means the server should calculate it
   257  		CreatedBy:  caller,
   258  		CreatedTS:  now.UTC(),
   259  		UpdatedTS:  now.UTC(),
   260  	}
   261  	if err = datastore.Put(ctx, &op); err != nil {
   262  		return nil, errors.Annotate(err, "failed to persist upload operation").
   263  			Tag(grpcutil.InternalTag).Err()
   264  	}
   265  
   266  	return op.ToProto(wrappedOpID), nil
   267  }
   268  
   269  // FinishUpload implements the corresponding RPC method, see the proto doc.
   270  func (s *storageImpl) FinishUpload(ctx context.Context, r *api.FinishUploadRequest) (resp *api.UploadOperation, err error) {
   271  	defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }()
   272  
   273  	if r.ForceHash != nil {
   274  		if err := common.ValidateObjectRef(r.ForceHash, common.KnownHash); err != nil {
   275  			return nil, errors.Annotate(err, "bad 'force_hash' field").Err()
   276  		}
   277  	}
   278  
   279  	// Grab the corresponding operation and inspect its status.
   280  	op, err := fetchOp(ctx, r.UploadOperationId)
   281  	switch {
   282  	case err != nil:
   283  		return nil, err
   284  	case op.Status != api.UploadStatus_UPLOADING:
   285  		// Nothing to do if the operation is already closed or being verified.
   286  		return op.ToProto(r.UploadOperationId), nil
   287  	}
   288  
   289  	// If the forced hash is provided by the (trusted) caller, we are almost done.
   290  	// Just need to move the temp file to its final location based on this hash
   291  	// and close the operation.
   292  	if r.ForceHash != nil {
   293  		mutated, err := s.finishAndForcedHash(ctx, op, r.ForceHash)
   294  		if err != nil {
   295  			return nil, err
   296  		}
   297  		return mutated.ToProto(r.UploadOperationId), nil
   298  	}
   299  
   300  	// Otherwise start the hash verification task, see verifyUploadTask below.
   301  	mutated, err := op.Advance(ctx, func(ctx context.Context, op *upload.Operation) error {
   302  		op.Status = api.UploadStatus_VERIFYING
   303  		return s.tq.AddTask(ctx, &tq.Task{
   304  			Title:   fmt.Sprintf("%d", op.ID),
   305  			Payload: &tasks.VerifyUpload{UploadOperationId: op.ID},
   306  		})
   307  	})
   308  	if err != nil {
   309  		return nil, errors.Annotate(err, "failed to start the verification task").
   310  			Tag(grpcutil.InternalTag).Err()
   311  	}
   312  	return mutated.ToProto(r.UploadOperationId), nil
   313  }
   314  
   315  // CancelUpload implements the corresponding RPC method, see the proto doc.
   316  func (s *storageImpl) CancelUpload(ctx context.Context, r *api.CancelUploadRequest) (resp *api.UploadOperation, err error) {
   317  	defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }()
   318  
   319  	handleOpStatus := func(op *upload.Operation) (*api.UploadOperation, error) {
   320  		if op.Status == api.UploadStatus_ERRORED || op.Status == api.UploadStatus_CANCELED {
   321  			return op.ToProto(r.UploadOperationId), nil
   322  		}
   323  		return nil, errors.Reason("the operation is in state %s and can't be canceled", op.Status).Tag(grpcutil.FailedPreconditionTag).Err()
   324  	}
   325  
   326  	// Grab the corresponding operation and inspect its status.
   327  	op, err := fetchOp(ctx, r.UploadOperationId)
   328  	switch {
   329  	case err != nil:
   330  		return nil, err
   331  	case op.Status != api.UploadStatus_UPLOADING:
   332  		return handleOpStatus(op)
   333  	}
   334  
   335  	// Move the operation to canceled state and launch the TQ task to cleanup.
   336  	mutated, err := op.Advance(ctx, func(ctx context.Context, op *upload.Operation) error {
   337  		op.Status = api.UploadStatus_CANCELED
   338  		return s.tq.AddTask(ctx, &tq.Task{
   339  			Title: fmt.Sprintf("%d", op.ID),
   340  			Payload: &tasks.CleanupUpload{
   341  				UploadOperationId: op.ID,
   342  				UploadUrl:         op.UploadURL,
   343  				PathToCleanup:     op.TempGSPath,
   344  			},
   345  		})
   346  	})
   347  	if err != nil {
   348  		return nil, errors.Annotate(err, "failed to start the cleanup task").
   349  			Tag(grpcutil.InternalTag).Err()
   350  	}
   351  	return handleOpStatus(mutated)
   352  }
   353  
   354  // fethcOp unwraps upload operation ID and fetches upload.Operation entity.
   355  //
   356  // Returns an grpc-tagged error on failure that can be returned to the RPC
   357  // caller right away.
   358  func fetchOp(ctx context.Context, wrappedOpID string) (*upload.Operation, error) {
   359  	opID, err := upload.UnwrapOpID(ctx, wrappedOpID, auth.CurrentIdentity(ctx))
   360  	if err != nil {
   361  		if transient.Tag.In(err) {
   362  			return nil, errors.Annotate(err, "failed to check HMAC on upload_operation_id").Err()
   363  		}
   364  		logging.Infof(ctx, "HMAC check failed - %s", err)
   365  		return nil, errors.Reason("no such upload operation").Tag(grpcutil.NotFoundTag).Err()
   366  	}
   367  
   368  	op := &upload.Operation{ID: opID}
   369  	switch err := datastore.Get(ctx, op); {
   370  	case err == datastore.ErrNoSuchEntity:
   371  		return nil, errors.Reason("no such upload operation").
   372  			Tag(grpcutil.NotFoundTag).Err()
   373  	case err != nil:
   374  		return nil, errors.Annotate(err, "failed to fetch the upload operation").
   375  			Tag(grpcutil.InternalTag).Err()
   376  	}
   377  
   378  	return op, nil
   379  }
   380  
   381  // finishAndForcedHash finalizes uploads that use ForceHash field.
   382  //
   383  // It publishes the object immediately, skipping the verification.
   384  func (s *storageImpl) finishAndForcedHash(ctx context.Context, op *upload.Operation, hash *api.ObjectRef) (*upload.Operation, error) {
   385  	gs := s.getGS(ctx)
   386  
   387  	// Try to move the object into the final location. This may fail
   388  	// transiently, in which case we ask the client to retry, or fatally, in
   389  	// which case we close the upload operation with an error.
   390  	pubErr := gs.Publish(ctx, s.settings.ObjectPath(hash), op.TempGSPath, -1)
   391  	if transient.Tag.In(pubErr) {
   392  		return nil, errors.Annotate(pubErr, "failed to publish the object").
   393  			Tag(grpcutil.InternalTag).Err()
   394  	}
   395  
   396  	// Try to remove the leftover garbage. See maybeDelete doc for possible
   397  	// caveats.
   398  	if err := s.maybeDelete(ctx, gs, op.TempGSPath); err != nil {
   399  		return nil, err
   400  	}
   401  
   402  	// Set the status of the operation based on whether we published the file
   403  	// or not.
   404  	return op.Advance(ctx, func(_ context.Context, op *upload.Operation) error {
   405  		if pubErr != nil {
   406  			op.Status = api.UploadStatus_ERRORED
   407  			op.Error = fmt.Sprintf("Failed to publish the object - %s", pubErr)
   408  		} else {
   409  			op.Status = api.UploadStatus_PUBLISHED
   410  			op.HashAlgo = hash.HashAlgo
   411  			op.HexDigest = hash.HexDigest
   412  		}
   413  		return nil
   414  	})
   415  }
   416  
   417  // verifyUploadTask verifies data uploaded by a user and closes the upload
   418  // operation based on the result.
   419  //
   420  // Returning a transient error here causes the task queue service to retry the
   421  // task.
   422  func (s *storageImpl) verifyUploadTask(ctx context.Context, task *tasks.VerifyUpload) (err error) {
   423  	op := &upload.Operation{ID: task.UploadOperationId}
   424  	switch err := datastore.Get(ctx, op); {
   425  	case err == datastore.ErrNoSuchEntity:
   426  		return errors.Reason("no such upload operation %d", op.ID).Err()
   427  	case err != nil:
   428  		return errors.Annotate(err, "failed to fetch upload operation %d", op.ID).
   429  			Tag(transient.Tag).Err()
   430  	case op.Status != api.UploadStatus_VERIFYING:
   431  		logging.Infof(ctx, "The upload operation %d is not pending verification anymore (status = %s)", op.ID, op.Status)
   432  		return nil
   433  	}
   434  
   435  	gs := s.getGS(ctx)
   436  
   437  	// If the destination file exists already, we are done. This may happen on
   438  	// a task retry or if the file was uploaded concurrently by someone else.
   439  	// Otherwise we still need to verify the temp file, and then move it into
   440  	// the final location.
   441  	if op.HexDigest != "" {
   442  		exists, err := gs.Exists(ctx, s.settings.ObjectPath(&api.ObjectRef{
   443  			HashAlgo:  op.HashAlgo,
   444  			HexDigest: op.HexDigest,
   445  		}))
   446  		switch {
   447  		case err != nil:
   448  			return errors.Annotate(err, "failed to check the presence of the destination file").
   449  				Tag(transient.Tag).Err()
   450  		case exists:
   451  			if err := s.maybeDelete(ctx, gs, op.TempGSPath); err != nil {
   452  				return err
   453  			}
   454  			_, err = op.Advance(ctx, func(_ context.Context, op *upload.Operation) error {
   455  				op.Status = api.UploadStatus_PUBLISHED
   456  				return nil
   457  			})
   458  			return err
   459  		}
   460  	}
   461  
   462  	verifiedHexDigest := "" // set after the successful hash verification below
   463  
   464  	// Log some details about the verification operation.
   465  	logEntry := &api.VerificationLogEntry{
   466  		OperationId:    op.ID,
   467  		InitiatedBy:    string(op.CreatedBy),
   468  		TempGsPath:     op.TempGSPath,
   469  		Submitted:      op.CreatedTS.UnixNano() / 1000,
   470  		Started:        clock.Now(ctx).UnixNano() / 1000,
   471  		ServiceVersion: s.serviceVersion,
   472  		ProcessId:      s.processID,
   473  		TraceId:        trace.SpanContextFromContext(ctx).TraceID().String(),
   474  	}
   475  	if op.HexDigest != "" {
   476  		logEntry.ExpectedInstanceId = common.ObjectRefToInstanceID(&api.ObjectRef{
   477  			HashAlgo:  op.HashAlgo,
   478  			HexDigest: op.HexDigest,
   479  		})
   480  	}
   481  
   482  	submitLog := func(outcome api.UploadStatus, error string) {
   483  		logEntry.Outcome = outcome.String()
   484  		logEntry.Error = error
   485  		logEntry.Finished = clock.Now(ctx).UnixNano() / 1000
   486  
   487  		verificationTimeSec := float64(logEntry.Finished-logEntry.Started) / 1e6
   488  		if verificationTimeSec < 0.001 {
   489  			verificationTimeSec = 0.001
   490  		}
   491  		logEntry.VerificationSpeed = int64(float64(logEntry.FileSize) / verificationTimeSec)
   492  
   493  		if s.submitLog != nil {
   494  			s.submitLog(ctx, logEntry)
   495  		}
   496  	}
   497  
   498  	defer func() {
   499  		if err != nil {
   500  			logging.Errorf(ctx, "Verification error: %s", err)
   501  		}
   502  
   503  		// On transient errors don't touch the temp file or the operation, we need
   504  		// them for retries.
   505  		if transient.Tag.In(err) {
   506  			submitLog(api.UploadStatus_ERRORED, fmt.Sprintf("Transient error: %s", err))
   507  			return
   508  		}
   509  
   510  		// Update the status of the operation based on 'err'. If Advance fails
   511  		// itself, return a transient error to make sure 'verifyUploadTask' is
   512  		// retried.
   513  		advancedOp, opErr := op.Advance(ctx, func(_ context.Context, op *upload.Operation) error {
   514  			if err != nil {
   515  				op.Status = api.UploadStatus_ERRORED
   516  				op.Error = fmt.Sprintf("Verification failed: %s", err)
   517  			} else {
   518  				op.Status = api.UploadStatus_PUBLISHED
   519  				op.HexDigest = verifiedHexDigest
   520  			}
   521  			return nil
   522  		})
   523  		if opErr != nil {
   524  			err = opErr // override the error returned by the task
   525  			submitLog(api.UploadStatus_ERRORED, fmt.Sprintf("Error updating UploadOperation: %s", err))
   526  			return
   527  		}
   528  
   529  		submitLog(advancedOp.Status, advancedOp.Error)
   530  
   531  		// Best effort deletion of the temporary file. We do it here, after updating
   532  		// the operation, to avoid retrying the expensive verification procedure
   533  		// just because Delete is flaky. Having a little garbage in the temporary
   534  		// directory doesn't hurt (it is marked with operation ID and timestamp,
   535  		// so we can always clean it up offline).
   536  		if delErr := gs.Delete(ctx, op.TempGSPath); delErr != nil {
   537  			logging.WithError(delErr).Errorf(ctx,
   538  				"Failed to remove temporary Google Storage file, it is dead garbage now: %s", op.TempGSPath)
   539  		}
   540  	}()
   541  
   542  	hash, err := common.NewHash(op.HashAlgo)
   543  	if err != nil {
   544  		return err
   545  	}
   546  
   547  	// Prepare reading the most recent generation of the uploaded temporary file.
   548  	r, err := gs.Reader(ctx, op.TempGSPath, 0)
   549  	if err != nil {
   550  		return errors.Annotate(err, "failed to start reading Google Storage file").Err()
   551  	}
   552  
   553  	// Pick large buffer to reduce number of Google Storage RPC calls. Don't
   554  	// allocate more than necessary though.
   555  	fileSize := r.Size()
   556  	bufSize := readBufferSize
   557  	if fileSize < int64(bufSize) {
   558  		bufSize = int(fileSize)
   559  	}
   560  	logEntry.FileSize = fileSize
   561  
   562  	// Feed the file to the hasher.
   563  	_, err = io.CopyBuffer(hash, io.NewSectionReader(r, 0, fileSize), make([]byte, bufSize))
   564  	if err != nil {
   565  		return errors.Annotate(err, "failed to read Google Storage file").Err()
   566  	}
   567  	verifiedHexDigest = hex.EncodeToString(hash.Sum(nil))
   568  
   569  	// This should usually match logEntry.ExpectedInstanceId.
   570  	logEntry.VerifiedInstanceId = common.ObjectRefToInstanceID(&api.ObjectRef{
   571  		HashAlgo:  op.HashAlgo,
   572  		HexDigest: verifiedHexDigest,
   573  	})
   574  
   575  	// If we know the expected hash, verify it matches what we have calculated.
   576  	if op.HexDigest != "" && op.HexDigest != verifiedHexDigest {
   577  		return errors.Reason("expected %s to be %s, got %s", op.HashAlgo, op.HexDigest, verifiedHexDigest).Err()
   578  	}
   579  
   580  	// The verification was successful, move the temp file (at the generation we
   581  	// have just verified) to the final location. If the file was modified after
   582  	// we have verified it (has different generation number), Publish fails:
   583  	// clients must not modify uploads after calling FinishUpload, this is
   584  	// sneaky behavior. Regardless of the outcome of this operation, the upload
   585  	// operation is closed in the defer above.
   586  	err = gs.Publish(ctx, s.settings.ObjectPath(&api.ObjectRef{
   587  		HashAlgo:  op.HashAlgo,
   588  		HexDigest: verifiedHexDigest,
   589  	}), op.TempGSPath, r.Generation())
   590  	if err != nil {
   591  		return errors.Annotate(err, "failed to publish the verified file").Err()
   592  	}
   593  	return nil
   594  }
   595  
   596  // cleanupUploadTask is called to clean up after a canceled upload.
   597  //
   598  // Best effort. If the temporary file can't be deleted from GS due to some
   599  // non-transient error, logs the error and ignores it, since retrying won't
   600  // help.
   601  func (s *storageImpl) cleanupUploadTask(ctx context.Context, task *tasks.CleanupUpload) (err error) {
   602  	gs := s.getGS(ctx)
   603  
   604  	if err := gs.CancelUpload(ctx, task.UploadUrl); err != nil {
   605  		if transient.Tag.In(err) {
   606  			return errors.Annotate(err, "transient error when canceling the resumable upload").Err()
   607  		}
   608  		logging.WithError(err).Errorf(ctx, "Failed to cancel resumable upload")
   609  	}
   610  
   611  	if err := gs.Delete(ctx, task.PathToCleanup); err != nil {
   612  		if transient.Tag.In(err) {
   613  			return errors.Annotate(err, "transient error when deleting the temp file").Err()
   614  		}
   615  		logging.WithError(err).Errorf(ctx, "Failed to delete the temp file")
   616  	}
   617  
   618  	return nil
   619  }
   620  
   621  // maybeDelete is called to delete temporary file when finishing an upload.
   622  //
   623  // If this fails transiently, we ask the client (or the task queue) to retry the
   624  // corresponding RPC (by returning transient errors), so the file is deleted
   625  // eventually. It means Publish may be called again too, but it is idempotent,
   626  // so it is fine.
   627  //
   628  // If Delete fails fatally, we are in a tough position, since we did publish the
   629  // file already, so the upload operation is technically successful and marking
   630  // it as failed is a lie. So we log and ignore fatal Delete errors. They should
   631  // not happen anyway.
   632  //
   633  // Thus, this function returns either nil or a transient error.
   634  func (s *storageImpl) maybeDelete(ctx context.Context, gs gs.GoogleStorage, path string) error {
   635  	switch err := gs.Delete(ctx, path); {
   636  	case transient.Tag.In(err):
   637  		return errors.Annotate(err, "transient error when removing temporary Google Storage file").
   638  			Tag(grpcutil.InternalTag).Err()
   639  	case err != nil:
   640  		logging.WithError(err).Errorf(ctx, "Failed to remove temporary Google Storage file, it is dead garbage now: %s", path)
   641  	}
   642  	return nil
   643  }