github.com/grailbio/base@v0.0.11/retry/retry.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 // Package retry contains utilities for implementing retry logic. 6 package retry 7 8 import ( 9 "context" 10 "fmt" 11 "math" 12 "math/rand" 13 "reflect" 14 "time" 15 16 "github.com/grailbio/base/errors" 17 ) 18 19 // A Policy is an interface that abstracts retry policies. Typically 20 // users will not call methods directly on a Policy but rather use 21 // the package function retry.Wait. 22 type Policy interface { 23 // Retry tells whether the a new retry should be attempted, 24 // and after how long. 25 Retry(retry int) (bool, time.Duration) 26 } 27 28 // Wait queries the provided policy at the provided retry number and 29 // sleeps until the next try should be attempted. Wait returns an 30 // error if the policy prohibits further tries or if the context was 31 // canceled, or if its deadline would run out while waiting for the 32 // next try. 33 func Wait(ctx context.Context, policy Policy, retry int) error { 34 keepgoing, wait := policy.Retry(retry) 35 if !keepgoing { 36 return errors.E(errors.TooManyTries, fmt.Sprintf("gave up after %d tries", retry)) 37 } 38 if deadline, ok := ctx.Deadline(); ok && time.Until(deadline) < wait { 39 return errors.E(errors.Timeout, "ran out of time while waiting for retry") 40 } 41 select { 42 case <-time.After(wait): 43 return nil 44 case <-ctx.Done(): 45 return ctx.Err() 46 } 47 } 48 49 // WaitForFn uses the above Wait function taking the same policy and retry 50 // number and generalizes it for a use of a function. Just like Wait it 51 // errors in the cases of extra tries, context cancel, or if its deadline 52 // runs out waiting for the next try 53 func WaitForFn(ctx context.Context, policy Policy, fn interface{}, params ...interface{}) (result []reflect.Value) { 54 var out []reflect.Value 55 f := reflect.ValueOf(fn) 56 inputs := make([]reflect.Value, len(params)) 57 for i, in := range params { 58 inputs[i] = reflect.ValueOf(in) 59 } 60 61 // will break out of loop if function doesn't error 62 for retries := 0; ; retries++ { 63 out = f.Call(inputs) 64 if out[len(out)-1].IsNil() { // assumes last output value of function is an error object 65 break 66 } 67 if retryErr := Wait(ctx, policy, retries); retryErr != nil { 68 return out 69 } 70 } 71 72 return out 73 } 74 75 type backoff struct { 76 factor float64 77 initial, max time.Duration 78 } 79 80 // maxInt64Convertible is the maximum float64 that can be converted to an int64 81 // accurately. We use this to prevent overflow when computing the exponential 82 // backoff, which we compute with float64s. It is important that we push it 83 // through float64 then int64 so that we get compilation error if we use a 84 // value that cannot be represented as an int64. This value was produced with: 85 // math.Nextafter(float64(math.MaxInt64), 0) 86 const maxInt64Convertible = int64(float64(9223372036854774784)) 87 88 // MaxBackoffMax is the maximum value that can be passed as max to Backoff. 89 const MaxBackoffMax = time.Duration(maxInt64Convertible) 90 91 // Backoff returns a Policy that initially waits for the amount of 92 // time specified by parameter initial; on each try this value is 93 // multiplied by the provided factor, up to the max duration. 94 func Backoff(initial, max time.Duration, factor float64) Policy { 95 if max > MaxBackoffMax { 96 panic("max > MaxBackoffMax") 97 } 98 return &backoff{ 99 initial: initial, 100 max: max, 101 factor: factor, 102 } 103 } 104 105 func (b *backoff) Retry(retries int) (bool, time.Duration) { 106 if retries < 0 { 107 panic("retries < 0") 108 } 109 nsfloat64 := float64(b.initial) * math.Pow(b.factor, float64(retries)) 110 nsfloat64 = math.Min(nsfloat64, float64(b.max)) 111 return true, time.Duration(int64(nsfloat64)) 112 } 113 114 // BackoffWithTimeout returns a Policy that initially waits for the amount of 115 // time specified by parameter initial; on each try this value is 116 // multiplied by the provided factor, up to the max duration. 117 // After the max duration, the Policy will timeout and return an error. 118 func BackoffWithTimeout(initial, max time.Duration, factor float64) Policy { 119 n := int(math.Floor(math.Log(float64(max/initial))/math.Log(factor))) + 1 120 return MaxRetries(Backoff(initial, max, factor), n) 121 } 122 123 type jitter struct { 124 policy Policy 125 // frac is the fraction of the wait time to "jitter". 126 // Eg: if frac is 0.2, the policy will retain 80% of the wait time 127 // and jitter the remaining 20% 128 frac float64 129 } 130 131 // Jitter returns a policy that jitters 'frac' fraction of the wait times 132 // returned by the provided policy. For example, setting frac to 1.0 and 0.5 133 // will implement "full jitter" and "equal jitter" approaches respectively. 134 // These approaches are describer here: 135 // https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ 136 func Jitter(policy Policy, frac float64) Policy { 137 return &jitter{policy, frac} 138 } 139 140 func (b *jitter) Retry(retries int) (bool, time.Duration) { 141 ok, wait := b.policy.Retry(retries) 142 if wait > 0 { 143 prop := time.Duration(b.frac * float64(wait)) 144 wait = wait - prop + time.Duration(rand.Int63n(prop.Nanoseconds())) 145 } 146 return ok, wait 147 } 148 149 type maxtries struct { 150 policy Policy 151 max int 152 } 153 154 // MaxRetries returns a policy that enforces a maximum number of 155 // attempts. The provided policy is invoked when the current number 156 // of tries is within the permissible limit. If policy is nil, the 157 // returned policy will permit an immediate retry when the number of 158 // tries is within the allowable limits. 159 func MaxRetries(policy Policy, n int) Policy { 160 if n < 1 { 161 panic("retry.MaxRetries: n < 1") 162 } 163 return &maxtries{policy, n - 1} 164 } 165 166 func (m *maxtries) Retry(retries int) (bool, time.Duration) { 167 if retries > m.max { 168 return false, time.Duration(0) 169 } 170 if m.policy != nil { 171 return m.policy.Retry(retries) 172 } 173 return true, time.Duration(0) 174 }