go.uber.org/cadence@v1.2.9/internal/common/backoff/retry.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package backoff 22 23 import ( 24 "context" 25 "errors" 26 "sync" 27 "time" 28 29 s "go.uber.org/cadence/.gen/go/shared" 30 ) 31 32 type ( 33 // Operation to retry 34 Operation func() error 35 36 // IsRetryable handler can be used to exclude certain errors during retry 37 IsRetryable func(error) bool 38 39 // ConcurrentRetrier is used for client-side throttling. It determines whether to 40 // throttle outgoing traffic in case downstream backend server rejects 41 // requests due to out-of-quota or server busy errors. 42 ConcurrentRetrier struct { 43 sync.Mutex 44 retrier Retrier // Backoff retrier 45 failureCount int64 // Number of consecutive failures seen 46 } 47 ) 48 49 // Throttle Sleep if there were failures since the last success call. 50 func (c *ConcurrentRetrier) Throttle() { 51 c.throttleInternal() 52 } 53 54 func (c *ConcurrentRetrier) throttleInternal() time.Duration { 55 next := done 56 57 // Check if we have failure count. 58 c.Lock() 59 if c.failureCount > 0 { 60 next = c.retrier.NextBackOff() 61 } 62 c.Unlock() 63 64 if next != done { 65 time.Sleep(next) 66 } 67 68 return next 69 } 70 71 // Succeeded marks client request succeeded. 72 func (c *ConcurrentRetrier) Succeeded() { 73 defer c.Unlock() 74 c.Lock() 75 c.failureCount = 0 76 c.retrier.Reset() 77 } 78 79 // Failed marks client request failed because backend is busy. 80 func (c *ConcurrentRetrier) Failed() { 81 defer c.Unlock() 82 c.Lock() 83 c.failureCount++ 84 } 85 86 // NewConcurrentRetrier returns an instance of concurrent backoff retrier. 87 func NewConcurrentRetrier(retryPolicy RetryPolicy) *ConcurrentRetrier { 88 retrier := NewRetrier(retryPolicy, SystemClock) 89 return &ConcurrentRetrier{retrier: retrier} 90 } 91 92 // Retry function can be used to wrap any call with retry logic using the passed in policy 93 func Retry(ctx context.Context, operation Operation, policy RetryPolicy, isRetriable IsRetryable) error { 94 var err error 95 var next time.Duration 96 97 r := NewRetrier(policy, SystemClock) 98 Retry_Loop: 99 for { 100 // operation completed successfully. No need to retry. 101 if err = operation(); err == nil { 102 return nil 103 } 104 105 if next = r.NextBackOff(); next == done { 106 return err 107 } 108 109 if !isRetriable(err) { 110 return err 111 } 112 113 retryAfter := ErrRetryableAfter(err) 114 // update the time to wait until the next attempt. 115 // as this is a *minimum*, just add it to the current delay time. 116 // 117 // this could be changed to clamp to retryAfter as a minimum. 118 // this is intentionally *not* done here, so repeated service-busy errors are guaranteed 119 // to generate *increasing* amount of time between requests, and not just send N in a row 120 // with 1 second of delay. duplicates imply "still overloaded", so this will hopefully 121 // help reduce the odds of snowballing. 122 // this is a pretty minor thing though, and it should not cause problems if we change it 123 // to make behavior more predictable. 124 next += retryAfter 125 126 // check if ctx is done 127 if ctx.Err() != nil { 128 return err 129 } 130 131 // wait for the next retry period (or context timeout) 132 if ctxDone := ctx.Done(); ctxDone != nil { 133 // we could check if this is longer than context deadline and immediately fail... 134 // ...but wasting time prevents higher-level retries from trying too early. 135 // this is particularly useful for service-busy, but seems valid for essentially all retried errors. 136 // 137 // this could probably be changed if we get requests for it, but for now it better-protects 138 // the server by preventing "external" retry storms. 139 timer := time.NewTimer(next) 140 select { 141 case <-ctxDone: 142 timer.Stop() 143 return err 144 case <-timer.C: 145 continue Retry_Loop 146 } 147 } 148 149 // ctx is not cancellable 150 time.Sleep(next) 151 } 152 } 153 154 // ErrRetryableAfter returns a minimum delay until the next attempt. 155 // 156 // for most errors this will be 0, and normal backoff logic will determine 157 // the full retry period, but e.g. service busy errors (or any case where the 158 // server knows a "time until it is not useful to retry") are safe to assume 159 // that a literally immediate retry is *not* going to be useful. 160 // 161 // note that this is only a minimum, however. longer delays are assumed to 162 // be equally valid. 163 func ErrRetryableAfter(err error) (retryAfter time.Duration) { 164 if target := (*s.ServiceBusyError)(nil); errors.As(err, &target) { 165 // eventually: return a time-until-retry from the server. 166 // for now though, just ensure at least one second before the next attempt. 167 return time.Second 168 } 169 return 0 170 }