github.com/m3db/m3@v1.5.0/src/x/retry/retry.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package retry 22 23 import ( 24 "errors" 25 "math" 26 "time" 27 28 xerrors "github.com/m3db/m3/src/x/errors" 29 30 "github.com/uber-go/tally" 31 ) 32 33 var ( 34 // ErrWhileConditionFalse is returned when the while condition to a while retry 35 // method evaluates false. 36 ErrWhileConditionFalse = errors.New("retry while condition evaluated to false") 37 ) 38 39 type retrier struct { 40 opts Options 41 initialBackoff time.Duration 42 backoffFactor float64 43 maxBackoff time.Duration 44 maxRetries int 45 forever bool 46 jitter bool 47 rngFn RngFn 48 sleepFn func(t time.Duration) 49 metrics retrierMetrics 50 } 51 52 type retrierMetrics struct { 53 calls tally.Counter 54 attempts tally.Counter 55 success tally.Counter 56 successLatency tally.Histogram 57 errors tally.Counter 58 errorsNotRetryable tally.Counter 59 errorsFinal tally.Counter 60 errorsLatency tally.Histogram 61 retries tally.Counter 62 } 63 64 // NewRetrier creates a new retrier. 65 func NewRetrier(opts Options) Retrier { 66 scope := opts.MetricsScope() 67 errorTags := struct { 68 retryable map[string]string 69 notRetryable map[string]string 70 }{ 71 map[string]string{ 72 "type": "retryable", 73 }, 74 map[string]string{ 75 "type": "not-retryable", 76 }, 77 } 78 79 return &retrier{ 80 opts: opts, 81 initialBackoff: opts.InitialBackoff(), 82 backoffFactor: opts.BackoffFactor(), 83 maxBackoff: opts.MaxBackoff(), 84 maxRetries: opts.MaxRetries(), 85 forever: opts.Forever(), 86 jitter: opts.Jitter(), 87 rngFn: opts.RngFn(), 88 sleepFn: time.Sleep, 89 metrics: retrierMetrics{ 90 calls: scope.Counter("calls"), 91 attempts: scope.Counter("attempts"), 92 success: scope.Counter("success"), 93 successLatency: histogramWithDurationBuckets(scope, "success-latency"), 94 errors: scope.Tagged(errorTags.retryable).Counter("errors"), 95 errorsNotRetryable: scope.Tagged(errorTags.notRetryable).Counter("errors"), 96 errorsFinal: scope.Counter("errors-final"), 97 errorsLatency: histogramWithDurationBuckets(scope, "errors-latency"), 98 retries: scope.Counter("retries"), 99 }, 100 } 101 } 102 103 func (r *retrier) Options() Options { 104 return r.opts 105 } 106 107 func (r *retrier) Attempt(fn Fn) error { 108 return r.attempt(nil, fn) 109 } 110 111 func (r *retrier) AttemptWhile(continueFn ContinueFn, fn Fn) error { 112 return r.attempt(continueFn, fn) 113 } 114 115 func (r *retrier) attempt(continueFn ContinueFn, fn Fn) error { 116 // Always track a call, useful for counting number of total operations. 117 r.metrics.calls.Inc(1) 118 119 attempt := 0 120 121 if continueFn != nil && !continueFn(attempt) { 122 return ErrWhileConditionFalse 123 } 124 125 start := time.Now() 126 err := fn() 127 duration := time.Since(start) 128 r.metrics.attempts.Inc(1) 129 attempt++ 130 if err == nil { 131 r.metrics.successLatency.RecordDuration(duration) 132 r.metrics.success.Inc(1) 133 return nil 134 } 135 r.metrics.errorsLatency.RecordDuration(duration) 136 if xerrors.IsNonRetryableError(err) { 137 r.metrics.errorsNotRetryable.Inc(1) 138 return err 139 } 140 r.metrics.errors.Inc(1) 141 142 for i := 1; r.forever || i <= r.maxRetries; i++ { 143 r.sleepFn(time.Duration(BackoffNanos( 144 i, 145 r.jitter, 146 r.backoffFactor, 147 r.initialBackoff, 148 r.maxBackoff, 149 r.rngFn, 150 ))) 151 152 if continueFn != nil && !continueFn(attempt) { 153 return ErrWhileConditionFalse 154 } 155 156 r.metrics.retries.Inc(1) 157 start := time.Now() 158 err = fn() 159 duration := time.Since(start) 160 r.metrics.attempts.Inc(1) 161 attempt++ 162 if err == nil { 163 r.metrics.successLatency.RecordDuration(duration) 164 r.metrics.success.Inc(1) 165 return nil 166 } 167 r.metrics.errorsLatency.RecordDuration(duration) 168 if xerrors.IsNonRetryableError(err) { 169 r.metrics.errorsNotRetryable.Inc(1) 170 return err 171 } 172 r.metrics.errors.Inc(1) 173 } 174 r.metrics.errorsFinal.Inc(1) 175 176 return err 177 } 178 179 // BackoffNanos calculates the backoff for a retry in nanoseconds. 180 func BackoffNanos( 181 retry int, 182 jitter bool, 183 backoffFactor float64, 184 initialBackoff time.Duration, 185 maxBackoff time.Duration, 186 rngFn RngFn, 187 ) int64 { 188 backoff := initialBackoff.Nanoseconds() 189 if retry >= 1 { 190 backoffFloat64 := float64(backoff) * math.Pow(backoffFactor, float64(retry-1)) 191 // math.Inf is also larger than math.MaxInt64. 192 if backoffFloat64 > math.MaxInt64 { 193 return maxBackoff.Nanoseconds() 194 } 195 backoff = int64(backoffFloat64) 196 } 197 // Validate the value of backoff to make sure Int63n() does not panic. 198 if jitter && backoff >= 2 { 199 half := backoff / 2 200 backoff = half + rngFn(half) 201 } 202 if maxBackoff := maxBackoff.Nanoseconds(); backoff > maxBackoff { 203 backoff = maxBackoff 204 } 205 return backoff 206 } 207 208 // histogramWithDurationBuckets returns a histogram with the standard duration buckets. 209 func histogramWithDurationBuckets(scope tally.Scope, name string) tally.Histogram { 210 sub := scope.Tagged(map[string]string{ 211 // Bump the version if the histogram buckets need to be changed to avoid overlapping buckets 212 // in the same query causing errors. 213 "schema": "v1", 214 }) 215 buckets := append(tally.DurationBuckets{0, time.Millisecond}, 216 tally.MustMakeExponentialDurationBuckets(2*time.Millisecond, 1.5, 30)...) 217 return sub.Histogram(name, buckets) 218 }