github.com/bazelbuild/remote-apis-sdks@v0.0.0-20240425170053-8a36686a6350/go/pkg/retry/retry.go (about) 1 // Package retry implements retry logic helpers, which can be used to wrap operations that can 2 // intermittently fail, but can be retried at a higher level. 3 // 4 // Consider replacing with https://github.com/eapache/go-resiliency/tree/master/retrier. This would 5 // have slightly different values for backoff consts. 6 package retry 7 8 import ( 9 "context" 10 stderrors "errors" 11 "fmt" 12 "math/rand" 13 "sync" 14 "time" 15 16 log "github.com/golang/glog" 17 "github.com/pkg/errors" 18 "google.golang.org/grpc/codes" 19 "google.golang.org/grpc/status" 20 ) 21 22 const ( 23 backoffFactor = 1.3 // backoff increases by this factor on each retry 24 backoffRange = 0.4 // backoff is randomized downwards by this factor 25 ) 26 27 // BackoffPolicy describes how to back off when retrying, and how many times to retry. 28 type BackoffPolicy struct { 29 baseDelay, maxDelay time.Duration 30 maxAttempts Attempts // 0 means unlimited 31 } 32 33 // ExponentialBackoff returns an exponential backoff implementation. 34 // 35 // Starting from baseDelay, it will delay by an additional fixed multiple with each retry, never 36 // delaying by more than maxDelay. For example, ExponentialBackoff(time.Second, time.Hour, 5) will 37 // produce delays of roughly: 1s, 1.5s, 2s, 3s, 4s. 38 // 39 // Note that delays are randomized, so the exact values are not guaranteed. attempts=0 means 40 // unlimited attempts. See UnlimitedAttempts. 41 func ExponentialBackoff(baseDelay, maxDelay time.Duration, attempts Attempts) BackoffPolicy { 42 return BackoffPolicy{baseDelay, maxDelay, attempts} 43 } 44 45 // Immediately returns a retrier that retries right away. 46 func Immediately(attempts Attempts) BackoffPolicy { 47 return BackoffPolicy{0, 0, attempts} 48 } 49 50 // Attempts is the number of times to attempt something before giving up. A value of 0 represents 51 // an effectively unlimited number of attempts, or you can use the equivalent 52 // retry.UnlimitedAttempts. 53 type Attempts uint 54 55 // UnlimitedAttempts is used to specify no limit to the number of attempts. 56 const UnlimitedAttempts = Attempts(0) 57 58 // ShouldRetry encapsulates the decision of whether an error is retry-able. If an error should not 59 // be retried, the function must return false. 60 type ShouldRetry func(error) bool 61 62 // Always always retries, regardless of error. 63 func Always(error) bool { return true } 64 65 // TransientOnly returns true if the error is transient. 66 // It implements ShouldRetry type. 67 func TransientOnly(err error) bool { 68 // Retry RPC timeouts. Note that we do *not* retry context cancellations (context.Cancelled); 69 // if the user wants to back out of the call we should let them. 70 if stderrors.Is(err, context.DeadlineExceeded) { 71 return true 72 } 73 s, ok := status.FromError(err) 74 if !ok { 75 return false 76 } 77 switch s.Code() { 78 case codes.Canceled, codes.Unknown, codes.DeadlineExceeded, codes.Aborted, 79 codes.Internal, codes.Unavailable, codes.ResourceExhausted: 80 return true 81 default: 82 return false 83 } 84 } 85 86 // WithPolicy retries f until either it succeeds, or shouldRetry returns false, or the number of 87 // retries is capped by the backoff policy. Returns the error returned by the final attempt. It 88 // annotates the error message in case the retry budget is exhausted. 89 func WithPolicy(ctx context.Context, shouldRetry ShouldRetry, bp BackoffPolicy, f func() error) error { 90 timeAfter, ok := ctx.Value(TimeAfterContextKey).(func(time.Duration) <-chan time.Time) 91 if !ok { 92 timeAfter = time.After 93 } 94 95 for attempts := 0; ; attempts++ { 96 err := f() 97 if err == nil || !shouldRetry(err) { 98 return err 99 } 100 101 if log.V(1) { 102 // This log depth is custom-tailored to the SDK usage, which always calls the retrier from within client.CallWithTimeout. 103 log.InfoDepth(3, fmt.Sprintf("call failed with err=%v, retrying.", err)) 104 } 105 106 if attempts+1 == int(bp.maxAttempts) { 107 // Annotates the error message to indicate the retry budget was exhausted. 108 // 109 // This is a little hacky, but generic status annotation preserving status code doesn't exist 110 // in gRPC's status library yet, and it's overkill to implement it here for just this. 111 if s, ok := status.FromError(err); ok { 112 spb := s.Proto() 113 spb.Message = fmt.Sprintf("retry budget exhausted (%d attempts): ", bp.maxAttempts) + spb.Message 114 return status.ErrorProto(spb) 115 } 116 return errors.Wrapf(err, "retry budget exhausted (%d attempts)", bp.maxAttempts) 117 } 118 119 select { 120 case <-ctx.Done(): 121 return ctx.Err() 122 case <-timeAfter(backoff(bp.baseDelay, bp.maxDelay, attempts)): 123 124 } 125 } 126 } 127 128 type timeAfterContextKey struct{} 129 130 // TimeAfterContextKey is to be used as a key in the context to provide a value that is compatible 131 // with time.After. The main purpose is to mock out time.After in the tests. 132 var TimeAfterContextKey = timeAfterContextKey{} 133 134 var ( 135 mu sync.Mutex 136 rng = rand.New(rand.NewSource(time.Now().UnixNano())) 137 ) 138 139 // randFloat64 is equivalent to calling rng.Float64, but safe for concurrent use. 140 func randFloat64() float64 { 141 mu.Lock() 142 f := rng.Float64() 143 mu.Unlock() 144 return f 145 } 146 147 // backoff returns a random value in [0, maxDelay] that increases exponentially with value of 148 // retries, starting from baseDelay. Set retries to 0 for the first call and increment with each 149 // subsequent call. 150 func backoff(baseDelay, maxDelay time.Duration, retries int) time.Duration { 151 backoff, max := float64(baseDelay), float64(maxDelay) 152 for backoff < max && retries > 0 { 153 backoff = backoff * backoffFactor 154 retries-- 155 } 156 if backoff > max { 157 backoff = max 158 } 159 160 // Randomize backoff delays so that if a cluster of requests start at the same time, they won't 161 // operate in lockstep. We just subtract up to 40% so that we obey maxDelay. 162 backoff -= backoff * backoffRange * randFloat64() 163 if backoff < 0 { 164 return 0 165 } 166 return time.Duration(backoff) 167 }