go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/cas/cas.go (about) 1 // Copyright 2017 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cas 16 17 import ( 18 "context" 19 "encoding/hex" 20 "fmt" 21 "io" 22 "net/http" 23 "strings" 24 25 "go.opentelemetry.io/otel/trace" 26 "google.golang.org/grpc/codes" 27 "google.golang.org/grpc/status" 28 "google.golang.org/protobuf/proto" 29 30 "go.chromium.org/luci/common/clock" 31 "go.chromium.org/luci/common/errors" 32 "go.chromium.org/luci/common/logging" 33 "go.chromium.org/luci/common/retry/transient" 34 "go.chromium.org/luci/gae/service/datastore" 35 "go.chromium.org/luci/grpc/grpcutil" 36 "go.chromium.org/luci/server" 37 "go.chromium.org/luci/server/auth" 38 "go.chromium.org/luci/server/bqlog" 39 "go.chromium.org/luci/server/tq" 40 41 api "go.chromium.org/luci/cipd/api/cipd/v1" 42 "go.chromium.org/luci/cipd/appengine/impl/cas/tasks" 43 "go.chromium.org/luci/cipd/appengine/impl/cas/upload" 44 "go.chromium.org/luci/cipd/appengine/impl/gs" 45 "go.chromium.org/luci/cipd/appengine/impl/monitoring" 46 "go.chromium.org/luci/cipd/appengine/impl/settings" 47 "go.chromium.org/luci/cipd/common" 48 ) 49 50 // readBufferSize is size of a buffer used to read Google Storage files. 51 // 52 // Larger values mean fewer Google Storage RPC calls, but more memory usage. 53 const readBufferSize = 64 * 1024 * 1024 54 55 // StorageServer extends StorageServer RPC interface with some methods used 56 // internally by other CIPD server modules. 57 type StorageServer interface { 58 api.StorageServer 59 60 // GetReader returns an io.ReaderAt implementation to read contents of an 61 // object in the storage. 62 // 63 // Returns grpc errors. In particular NotFound is returned if there's no such 64 // object in the storage. 65 GetReader(ctx context.Context, ref *api.ObjectRef) (gs.Reader, error) 66 } 67 68 // Internal returns non-ACLed implementation of StorageService. 69 // 70 // It can be used internally by the backend. Assumes ACL checks are already 71 // done. 72 // 73 // Registers some task queue tasks in the given dispatcher and log sinks in 74 // the given bundler. 75 func Internal(d *tq.Dispatcher, b *bqlog.Bundler, s *settings.Settings, opts *server.Options) StorageServer { 76 impl := &storageImpl{ 77 tq: d, 78 settings: s, 79 serviceVersion: opts.ImageVersion(), 80 processID: opts.Hostname, 81 getGS: gs.Get, 82 getSignedURL: getSignedURL, 83 submitLog: func(ctx context.Context, entry *api.VerificationLogEntry) { b.Log(ctx, entry) }, 84 } 85 impl.registerTasks() 86 b.RegisterSink(bqlog.Sink{ 87 Prototype: &api.VerificationLogEntry{}, 88 Table: "verification", 89 }) 90 return impl 91 } 92 93 // storageImpl implements api.StorageServer and task queue handlers. 94 // 95 // Doesn't do any ACL checks. 96 type storageImpl struct { 97 api.UnimplementedStorageServer 98 99 tq *tq.Dispatcher 100 settings *settings.Settings 101 102 // For VerificationLogEntry fields. 103 serviceVersion string 104 processID string 105 106 // Mocking points for tests. See Internal() for real implementations. 107 getGS func(ctx context.Context) gs.GoogleStorage 108 getSignedURL func(ctx context.Context, gsPath, filename string, signer signerFactory, gs gs.GoogleStorage) (string, uint64, error) 109 submitLog func(ctx context.Context, entry *api.VerificationLogEntry) 110 } 111 112 // registerTasks adds tasks to the tq Dispatcher. 113 func (s *storageImpl) registerTasks() { 114 // See queue.yaml for "cas-uploads" task queue definition. 115 s.tq.RegisterTaskClass(tq.TaskClass{ 116 ID: "verify-upload", 117 Prototype: &tasks.VerifyUpload{}, 118 Kind: tq.Transactional, 119 Queue: "cas-uploads", 120 Handler: func(ctx context.Context, m proto.Message) error { 121 return s.verifyUploadTask(ctx, m.(*tasks.VerifyUpload)) 122 }, 123 }) 124 s.tq.RegisterTaskClass(tq.TaskClass{ 125 ID: "cleanup-upload", 126 Prototype: &tasks.CleanupUpload{}, 127 Kind: tq.Transactional, 128 Queue: "cas-uploads", 129 Handler: func(ctx context.Context, m proto.Message) error { 130 return s.cleanupUploadTask(ctx, m.(*tasks.CleanupUpload)) 131 }, 132 }) 133 } 134 135 // GetReader is part of StorageServer interface. 136 func (s *storageImpl) GetReader(ctx context.Context, ref *api.ObjectRef) (r gs.Reader, err error) { 137 defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }() 138 139 if err = common.ValidateObjectRef(ref, common.KnownHash); err != nil { 140 return nil, errors.Annotate(err, "bad ref").Err() 141 } 142 143 r, err = s.getGS(ctx).Reader(ctx, s.settings.ObjectPath(ref), 0) 144 if err != nil { 145 ann := errors.Annotate(err, "can't read the object") 146 if gs.StatusCode(err) == http.StatusNotFound { 147 ann.Tag(grpcutil.NotFoundTag) 148 } 149 return nil, ann.Err() 150 } 151 return r, nil 152 } 153 154 // GetObjectURL implements the corresponding RPC method, see the proto doc. 155 func (s *storageImpl) GetObjectURL(ctx context.Context, r *api.GetObjectURLRequest) (resp *api.ObjectURL, err error) { 156 defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }() 157 158 if err := common.ValidateObjectRef(r.Object, common.KnownHash); err != nil { 159 return nil, errors.Annotate(err, "bad 'object' field").Err() 160 } 161 162 // Lite validation for Content-Disposition header. As long as the filename 163 // doesn't have '"' or '\n', we are constructing a valid header. Let the 164 // browser do the rest of the validation however it likes. 165 if strings.ContainsAny(r.DownloadFilename, "\"\r\n") { 166 return nil, status.Errorf(codes.InvalidArgument, "bad 'download_filename' field, contains one of %q", "\"\r\n") 167 } 168 169 url, size, err := s.getSignedURL(ctx, s.settings.ObjectPath(r.Object), r.DownloadFilename, defaultSigner, s.getGS(ctx)) 170 if err != nil { 171 return nil, errors.Annotate(err, "failed to get signed URL").Err() 172 } 173 monitoring.FileSize(ctx, size) 174 return &api.ObjectURL{SignedUrl: url}, nil 175 } 176 177 // BeginUpload implements the corresponding RPC method, see the proto doc. 178 func (s *storageImpl) BeginUpload(ctx context.Context, r *api.BeginUploadRequest) (resp *api.UploadOperation, err error) { 179 defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }() 180 181 // Either Object or HashAlgo should be given. If both are, algos must match. 182 var hashAlgo api.HashAlgo 183 var hexDigest string 184 if r.Object != nil { 185 if err := common.ValidateObjectRef(r.Object, common.KnownHash); err != nil { 186 return nil, errors.Annotate(err, "bad 'object'").Err() 187 } 188 if r.HashAlgo != 0 && r.HashAlgo != r.Object.HashAlgo { 189 return nil, errors.Reason("'hash_algo' and 'object.hash_algo' do not match"). 190 Tag(grpcutil.InvalidArgumentTag).Err() 191 } 192 hashAlgo = r.Object.HashAlgo 193 hexDigest = r.Object.HexDigest 194 } else if err := common.ValidateHashAlgo(r.HashAlgo); err != nil { 195 return nil, errors.Annotate(err, "bad 'hash_algo'").Err() 196 } else { 197 hashAlgo = r.HashAlgo 198 } 199 200 gs := s.getGS(ctx) 201 202 // If we know the name of the object being uploaded, check we don't have it 203 // in the store already to avoid wasting time uploading it. Note that it is 204 // always fine to "overwrite" objects, so if the object appears while the 205 // client is still uploading, nothing catastrophic happens, just some time 206 // gets wasted. 207 if r.Object != nil { 208 switch yes, err := gs.Exists(ctx, s.settings.ObjectPath(r.Object)); { 209 case err != nil: 210 return nil, errors.Annotate(err, "failed to check the object's presence"). 211 Tag(grpcutil.InternalTag).Err() 212 case yes: 213 return nil, status.Errorf(codes.AlreadyExists, "the object is already in the store") 214 } 215 } 216 217 // Grab new unique ID for the upload operation, it is used in GS filenames. 218 opID, err := upload.NewOpID(ctx) 219 if err != nil { 220 return nil, errors.Annotate(err, "failed to allocate upload operation ID"). 221 Tag(grpcutil.InternalTag).Err() 222 } 223 224 // Attach HMAC to it, to be returned to the client to make sure clients can't 225 // access sessions they don't own. Do it early, to avoid storing stuff in 226 // the datastore and GS if WrapOpID fails. 227 caller := auth.CurrentIdentity(ctx) 228 wrappedOpID, err := upload.WrapOpID(ctx, opID, caller) 229 if err != nil { 230 return nil, errors.Annotate(err, "failed to HMAC-tag upload operation ID"). 231 Tag(grpcutil.InternalTag).Err() 232 } 233 234 // GS path to which the client will upload the data. Prefix it with the 235 // current timestamp to make bucket listing sorted by time. 236 now := clock.Now(ctx) 237 tempGSPath := fmt.Sprintf("%s/%d_%d", s.settings.TempGSPath, now.Unix(), opID) 238 239 // Initiate Google Storage resumable upload session to this path. The returned 240 // URL can be accessed unauthenticated. The client will use it directly to 241 // upload the data. If left open, the GS session eventually expires, so it's 242 // not big deal if we loose it (e.g. due to a crash before returning). 243 uploadURL, err := gs.StartUpload(ctx, tempGSPath) 244 if err != nil { 245 return nil, errors.Annotate(err, "failed to start resumable upload"). 246 Tag(grpcutil.InternalTag).Err() 247 } 248 249 // Save the operation. It is accessed in FinishUpload. 250 op := upload.Operation{ 251 ID: opID, 252 Status: api.UploadStatus_UPLOADING, 253 TempGSPath: tempGSPath, 254 UploadURL: uploadURL, 255 HashAlgo: hashAlgo, 256 HexDigest: hexDigest, // may be empty, means the server should calculate it 257 CreatedBy: caller, 258 CreatedTS: now.UTC(), 259 UpdatedTS: now.UTC(), 260 } 261 if err = datastore.Put(ctx, &op); err != nil { 262 return nil, errors.Annotate(err, "failed to persist upload operation"). 263 Tag(grpcutil.InternalTag).Err() 264 } 265 266 return op.ToProto(wrappedOpID), nil 267 } 268 269 // FinishUpload implements the corresponding RPC method, see the proto doc. 270 func (s *storageImpl) FinishUpload(ctx context.Context, r *api.FinishUploadRequest) (resp *api.UploadOperation, err error) { 271 defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }() 272 273 if r.ForceHash != nil { 274 if err := common.ValidateObjectRef(r.ForceHash, common.KnownHash); err != nil { 275 return nil, errors.Annotate(err, "bad 'force_hash' field").Err() 276 } 277 } 278 279 // Grab the corresponding operation and inspect its status. 280 op, err := fetchOp(ctx, r.UploadOperationId) 281 switch { 282 case err != nil: 283 return nil, err 284 case op.Status != api.UploadStatus_UPLOADING: 285 // Nothing to do if the operation is already closed or being verified. 286 return op.ToProto(r.UploadOperationId), nil 287 } 288 289 // If the forced hash is provided by the (trusted) caller, we are almost done. 290 // Just need to move the temp file to its final location based on this hash 291 // and close the operation. 292 if r.ForceHash != nil { 293 mutated, err := s.finishAndForcedHash(ctx, op, r.ForceHash) 294 if err != nil { 295 return nil, err 296 } 297 return mutated.ToProto(r.UploadOperationId), nil 298 } 299 300 // Otherwise start the hash verification task, see verifyUploadTask below. 301 mutated, err := op.Advance(ctx, func(ctx context.Context, op *upload.Operation) error { 302 op.Status = api.UploadStatus_VERIFYING 303 return s.tq.AddTask(ctx, &tq.Task{ 304 Title: fmt.Sprintf("%d", op.ID), 305 Payload: &tasks.VerifyUpload{UploadOperationId: op.ID}, 306 }) 307 }) 308 if err != nil { 309 return nil, errors.Annotate(err, "failed to start the verification task"). 310 Tag(grpcutil.InternalTag).Err() 311 } 312 return mutated.ToProto(r.UploadOperationId), nil 313 } 314 315 // CancelUpload implements the corresponding RPC method, see the proto doc. 316 func (s *storageImpl) CancelUpload(ctx context.Context, r *api.CancelUploadRequest) (resp *api.UploadOperation, err error) { 317 defer func() { err = grpcutil.GRPCifyAndLogErr(ctx, err) }() 318 319 handleOpStatus := func(op *upload.Operation) (*api.UploadOperation, error) { 320 if op.Status == api.UploadStatus_ERRORED || op.Status == api.UploadStatus_CANCELED { 321 return op.ToProto(r.UploadOperationId), nil 322 } 323 return nil, errors.Reason("the operation is in state %s and can't be canceled", op.Status).Tag(grpcutil.FailedPreconditionTag).Err() 324 } 325 326 // Grab the corresponding operation and inspect its status. 327 op, err := fetchOp(ctx, r.UploadOperationId) 328 switch { 329 case err != nil: 330 return nil, err 331 case op.Status != api.UploadStatus_UPLOADING: 332 return handleOpStatus(op) 333 } 334 335 // Move the operation to canceled state and launch the TQ task to cleanup. 336 mutated, err := op.Advance(ctx, func(ctx context.Context, op *upload.Operation) error { 337 op.Status = api.UploadStatus_CANCELED 338 return s.tq.AddTask(ctx, &tq.Task{ 339 Title: fmt.Sprintf("%d", op.ID), 340 Payload: &tasks.CleanupUpload{ 341 UploadOperationId: op.ID, 342 UploadUrl: op.UploadURL, 343 PathToCleanup: op.TempGSPath, 344 }, 345 }) 346 }) 347 if err != nil { 348 return nil, errors.Annotate(err, "failed to start the cleanup task"). 349 Tag(grpcutil.InternalTag).Err() 350 } 351 return handleOpStatus(mutated) 352 } 353 354 // fethcOp unwraps upload operation ID and fetches upload.Operation entity. 355 // 356 // Returns an grpc-tagged error on failure that can be returned to the RPC 357 // caller right away. 358 func fetchOp(ctx context.Context, wrappedOpID string) (*upload.Operation, error) { 359 opID, err := upload.UnwrapOpID(ctx, wrappedOpID, auth.CurrentIdentity(ctx)) 360 if err != nil { 361 if transient.Tag.In(err) { 362 return nil, errors.Annotate(err, "failed to check HMAC on upload_operation_id").Err() 363 } 364 logging.Infof(ctx, "HMAC check failed - %s", err) 365 return nil, errors.Reason("no such upload operation").Tag(grpcutil.NotFoundTag).Err() 366 } 367 368 op := &upload.Operation{ID: opID} 369 switch err := datastore.Get(ctx, op); { 370 case err == datastore.ErrNoSuchEntity: 371 return nil, errors.Reason("no such upload operation"). 372 Tag(grpcutil.NotFoundTag).Err() 373 case err != nil: 374 return nil, errors.Annotate(err, "failed to fetch the upload operation"). 375 Tag(grpcutil.InternalTag).Err() 376 } 377 378 return op, nil 379 } 380 381 // finishAndForcedHash finalizes uploads that use ForceHash field. 382 // 383 // It publishes the object immediately, skipping the verification. 384 func (s *storageImpl) finishAndForcedHash(ctx context.Context, op *upload.Operation, hash *api.ObjectRef) (*upload.Operation, error) { 385 gs := s.getGS(ctx) 386 387 // Try to move the object into the final location. This may fail 388 // transiently, in which case we ask the client to retry, or fatally, in 389 // which case we close the upload operation with an error. 390 pubErr := gs.Publish(ctx, s.settings.ObjectPath(hash), op.TempGSPath, -1) 391 if transient.Tag.In(pubErr) { 392 return nil, errors.Annotate(pubErr, "failed to publish the object"). 393 Tag(grpcutil.InternalTag).Err() 394 } 395 396 // Try to remove the leftover garbage. See maybeDelete doc for possible 397 // caveats. 398 if err := s.maybeDelete(ctx, gs, op.TempGSPath); err != nil { 399 return nil, err 400 } 401 402 // Set the status of the operation based on whether we published the file 403 // or not. 404 return op.Advance(ctx, func(_ context.Context, op *upload.Operation) error { 405 if pubErr != nil { 406 op.Status = api.UploadStatus_ERRORED 407 op.Error = fmt.Sprintf("Failed to publish the object - %s", pubErr) 408 } else { 409 op.Status = api.UploadStatus_PUBLISHED 410 op.HashAlgo = hash.HashAlgo 411 op.HexDigest = hash.HexDigest 412 } 413 return nil 414 }) 415 } 416 417 // verifyUploadTask verifies data uploaded by a user and closes the upload 418 // operation based on the result. 419 // 420 // Returning a transient error here causes the task queue service to retry the 421 // task. 422 func (s *storageImpl) verifyUploadTask(ctx context.Context, task *tasks.VerifyUpload) (err error) { 423 op := &upload.Operation{ID: task.UploadOperationId} 424 switch err := datastore.Get(ctx, op); { 425 case err == datastore.ErrNoSuchEntity: 426 return errors.Reason("no such upload operation %d", op.ID).Err() 427 case err != nil: 428 return errors.Annotate(err, "failed to fetch upload operation %d", op.ID). 429 Tag(transient.Tag).Err() 430 case op.Status != api.UploadStatus_VERIFYING: 431 logging.Infof(ctx, "The upload operation %d is not pending verification anymore (status = %s)", op.ID, op.Status) 432 return nil 433 } 434 435 gs := s.getGS(ctx) 436 437 // If the destination file exists already, we are done. This may happen on 438 // a task retry or if the file was uploaded concurrently by someone else. 439 // Otherwise we still need to verify the temp file, and then move it into 440 // the final location. 441 if op.HexDigest != "" { 442 exists, err := gs.Exists(ctx, s.settings.ObjectPath(&api.ObjectRef{ 443 HashAlgo: op.HashAlgo, 444 HexDigest: op.HexDigest, 445 })) 446 switch { 447 case err != nil: 448 return errors.Annotate(err, "failed to check the presence of the destination file"). 449 Tag(transient.Tag).Err() 450 case exists: 451 if err := s.maybeDelete(ctx, gs, op.TempGSPath); err != nil { 452 return err 453 } 454 _, err = op.Advance(ctx, func(_ context.Context, op *upload.Operation) error { 455 op.Status = api.UploadStatus_PUBLISHED 456 return nil 457 }) 458 return err 459 } 460 } 461 462 verifiedHexDigest := "" // set after the successful hash verification below 463 464 // Log some details about the verification operation. 465 logEntry := &api.VerificationLogEntry{ 466 OperationId: op.ID, 467 InitiatedBy: string(op.CreatedBy), 468 TempGsPath: op.TempGSPath, 469 Submitted: op.CreatedTS.UnixNano() / 1000, 470 Started: clock.Now(ctx).UnixNano() / 1000, 471 ServiceVersion: s.serviceVersion, 472 ProcessId: s.processID, 473 TraceId: trace.SpanContextFromContext(ctx).TraceID().String(), 474 } 475 if op.HexDigest != "" { 476 logEntry.ExpectedInstanceId = common.ObjectRefToInstanceID(&api.ObjectRef{ 477 HashAlgo: op.HashAlgo, 478 HexDigest: op.HexDigest, 479 }) 480 } 481 482 submitLog := func(outcome api.UploadStatus, error string) { 483 logEntry.Outcome = outcome.String() 484 logEntry.Error = error 485 logEntry.Finished = clock.Now(ctx).UnixNano() / 1000 486 487 verificationTimeSec := float64(logEntry.Finished-logEntry.Started) / 1e6 488 if verificationTimeSec < 0.001 { 489 verificationTimeSec = 0.001 490 } 491 logEntry.VerificationSpeed = int64(float64(logEntry.FileSize) / verificationTimeSec) 492 493 if s.submitLog != nil { 494 s.submitLog(ctx, logEntry) 495 } 496 } 497 498 defer func() { 499 if err != nil { 500 logging.Errorf(ctx, "Verification error: %s", err) 501 } 502 503 // On transient errors don't touch the temp file or the operation, we need 504 // them for retries. 505 if transient.Tag.In(err) { 506 submitLog(api.UploadStatus_ERRORED, fmt.Sprintf("Transient error: %s", err)) 507 return 508 } 509 510 // Update the status of the operation based on 'err'. If Advance fails 511 // itself, return a transient error to make sure 'verifyUploadTask' is 512 // retried. 513 advancedOp, opErr := op.Advance(ctx, func(_ context.Context, op *upload.Operation) error { 514 if err != nil { 515 op.Status = api.UploadStatus_ERRORED 516 op.Error = fmt.Sprintf("Verification failed: %s", err) 517 } else { 518 op.Status = api.UploadStatus_PUBLISHED 519 op.HexDigest = verifiedHexDigest 520 } 521 return nil 522 }) 523 if opErr != nil { 524 err = opErr // override the error returned by the task 525 submitLog(api.UploadStatus_ERRORED, fmt.Sprintf("Error updating UploadOperation: %s", err)) 526 return 527 } 528 529 submitLog(advancedOp.Status, advancedOp.Error) 530 531 // Best effort deletion of the temporary file. We do it here, after updating 532 // the operation, to avoid retrying the expensive verification procedure 533 // just because Delete is flaky. Having a little garbage in the temporary 534 // directory doesn't hurt (it is marked with operation ID and timestamp, 535 // so we can always clean it up offline). 536 if delErr := gs.Delete(ctx, op.TempGSPath); delErr != nil { 537 logging.WithError(delErr).Errorf(ctx, 538 "Failed to remove temporary Google Storage file, it is dead garbage now: %s", op.TempGSPath) 539 } 540 }() 541 542 hash, err := common.NewHash(op.HashAlgo) 543 if err != nil { 544 return err 545 } 546 547 // Prepare reading the most recent generation of the uploaded temporary file. 548 r, err := gs.Reader(ctx, op.TempGSPath, 0) 549 if err != nil { 550 return errors.Annotate(err, "failed to start reading Google Storage file").Err() 551 } 552 553 // Pick large buffer to reduce number of Google Storage RPC calls. Don't 554 // allocate more than necessary though. 555 fileSize := r.Size() 556 bufSize := readBufferSize 557 if fileSize < int64(bufSize) { 558 bufSize = int(fileSize) 559 } 560 logEntry.FileSize = fileSize 561 562 // Feed the file to the hasher. 563 _, err = io.CopyBuffer(hash, io.NewSectionReader(r, 0, fileSize), make([]byte, bufSize)) 564 if err != nil { 565 return errors.Annotate(err, "failed to read Google Storage file").Err() 566 } 567 verifiedHexDigest = hex.EncodeToString(hash.Sum(nil)) 568 569 // This should usually match logEntry.ExpectedInstanceId. 570 logEntry.VerifiedInstanceId = common.ObjectRefToInstanceID(&api.ObjectRef{ 571 HashAlgo: op.HashAlgo, 572 HexDigest: verifiedHexDigest, 573 }) 574 575 // If we know the expected hash, verify it matches what we have calculated. 576 if op.HexDigest != "" && op.HexDigest != verifiedHexDigest { 577 return errors.Reason("expected %s to be %s, got %s", op.HashAlgo, op.HexDigest, verifiedHexDigest).Err() 578 } 579 580 // The verification was successful, move the temp file (at the generation we 581 // have just verified) to the final location. If the file was modified after 582 // we have verified it (has different generation number), Publish fails: 583 // clients must not modify uploads after calling FinishUpload, this is 584 // sneaky behavior. Regardless of the outcome of this operation, the upload 585 // operation is closed in the defer above. 586 err = gs.Publish(ctx, s.settings.ObjectPath(&api.ObjectRef{ 587 HashAlgo: op.HashAlgo, 588 HexDigest: verifiedHexDigest, 589 }), op.TempGSPath, r.Generation()) 590 if err != nil { 591 return errors.Annotate(err, "failed to publish the verified file").Err() 592 } 593 return nil 594 } 595 596 // cleanupUploadTask is called to clean up after a canceled upload. 597 // 598 // Best effort. If the temporary file can't be deleted from GS due to some 599 // non-transient error, logs the error and ignores it, since retrying won't 600 // help. 601 func (s *storageImpl) cleanupUploadTask(ctx context.Context, task *tasks.CleanupUpload) (err error) { 602 gs := s.getGS(ctx) 603 604 if err := gs.CancelUpload(ctx, task.UploadUrl); err != nil { 605 if transient.Tag.In(err) { 606 return errors.Annotate(err, "transient error when canceling the resumable upload").Err() 607 } 608 logging.WithError(err).Errorf(ctx, "Failed to cancel resumable upload") 609 } 610 611 if err := gs.Delete(ctx, task.PathToCleanup); err != nil { 612 if transient.Tag.In(err) { 613 return errors.Annotate(err, "transient error when deleting the temp file").Err() 614 } 615 logging.WithError(err).Errorf(ctx, "Failed to delete the temp file") 616 } 617 618 return nil 619 } 620 621 // maybeDelete is called to delete temporary file when finishing an upload. 622 // 623 // If this fails transiently, we ask the client (or the task queue) to retry the 624 // corresponding RPC (by returning transient errors), so the file is deleted 625 // eventually. It means Publish may be called again too, but it is idempotent, 626 // so it is fine. 627 // 628 // If Delete fails fatally, we are in a tough position, since we did publish the 629 // file already, so the upload operation is technically successful and marking 630 // it as failed is a lie. So we log and ignore fatal Delete errors. They should 631 // not happen anyway. 632 // 633 // Thus, this function returns either nil or a transient error. 634 func (s *storageImpl) maybeDelete(ctx context.Context, gs gs.GoogleStorage, path string) error { 635 switch err := gs.Delete(ctx, path); { 636 case transient.Tag.In(err): 637 return errors.Annotate(err, "transient error when removing temporary Google Storage file"). 638 Tag(grpcutil.InternalTag).Err() 639 case err != nil: 640 logging.WithError(err).Errorf(ctx, "Failed to remove temporary Google Storage file, it is dead garbage now: %s", path) 641 } 642 return nil 643 }