go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/gs/retry.go (about) 1 // Copyright 2017 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gs 16 17 import ( 18 "context" 19 "net/http" 20 "time" 21 22 "google.golang.org/api/googleapi" 23 24 "go.chromium.org/luci/common/errors" 25 "go.chromium.org/luci/common/logging" 26 "go.chromium.org/luci/common/retry" 27 "go.chromium.org/luci/common/retry/transient" 28 ) 29 30 var statusCodeTagKey = errors.NewTagKey("Google Storage API Status Code") 31 32 // StatusCode returns HTTP status code embedded inside the annotated error. 33 // 34 // Returns http.StatusOK if err is nil and 0 if the error doesn't have a status 35 // code. 36 func StatusCode(err error) int { 37 if err == nil { 38 return http.StatusOK 39 } 40 if val, ok := errors.TagValueIn(statusCodeTagKey, err); ok { 41 return val.(int) 42 } 43 return 0 44 } 45 46 // StatusCodeTag can be used to attach HTTP status code to the error. 47 // 48 // This code will be available via StatusCode(err) function. 49 func StatusCodeTag(code int) errors.TagValue { 50 return errors.TagValue{Key: statusCodeTagKey, Value: code} 51 } 52 53 // withRetry executes a Google Storage API call, retrying on transient errors. 54 // 55 // If request reached GS, but the service replied with an error, the 56 // corresponding HTTP status code can be extracted from the error via 57 // StatusCode(err). The error is also tagged as transient based on the code: 58 // response with HTTP statuses >=500 and 429 are considered transient errors. 59 // 60 // If the request never reached GS, StatusCode(err) would return 0 and the error 61 // will be tagged as transient. 62 func withRetry(ctx context.Context, call func() error) error { 63 return retry.Retry(ctx, transient.Only(retry.Default), func() error { 64 err := call() 65 if err == nil { 66 return nil 67 } 68 apiErr, _ := err.(*googleapi.Error) 69 if apiErr == nil { 70 // RestartUploadError errors are fatal and should be passed unannotated. 71 if _, ok := err.(*RestartUploadError); ok { 72 return err 73 } 74 return errors.Annotate(err, "failed to call GS").Tag(transient.Tag).Err() 75 } 76 logging.Infof(ctx, "GS replied with HTTP code %d", apiErr.Code) 77 logging.Debugf(ctx, "full response body:\n%s", apiErr.Body) 78 ann := errors.Annotate(err, "GS replied with HTTP code %d", apiErr.Code). 79 Tag(StatusCodeTag(apiErr.Code)) 80 // Retry only on 429 and 5xx responses, according to 81 // https://cloud.google.com/storage/docs/exponential-backoff. 82 if apiErr.Code == 429 || apiErr.Code >= 500 { 83 ann.Tag(transient.Tag) 84 } 85 return ann.Err() 86 }, func(err error, d time.Duration) { 87 logging.WithError(err).Errorf(ctx, "Transient error when accessing GS. Retrying in %s...", d) 88 }) 89 }