github.com/openshift-online/ocm-sdk-go@v0.1.473/retry/transport_wrapper.go (about) 1 /* 2 Copyright (c) 2021 Red Hat, Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // This file contains the implementations of a transport wrapper that knows how 18 // to retry requests. 19 20 package retry 21 22 import ( 23 "bytes" 24 "context" 25 "io" 26 "math/rand" 27 "strings" 28 29 "fmt" 30 "net/http" 31 "time" 32 33 "github.com/openshift-online/ocm-sdk-go/logging" 34 ) 35 36 // Default configuration: 37 const ( 38 DefaultLimit = 2 39 DefaultInterval = 1 * time.Second 40 DefaultJitter = 0.2 41 ) 42 43 // TransportWrapperBuilder contains the data and logic needed to create a new retry transport 44 // wrapper. 45 type TransportWrapperBuilder struct { 46 logger logging.Logger 47 limit int 48 interval time.Duration 49 jitter float64 50 } 51 52 // TransportWrapper contains the data and logic needed to wrap an HTTP round tripper with another 53 // one that adds retry capability. 54 type TransportWrapper struct { 55 logger logging.Logger 56 limit int 57 interval time.Duration 58 jitter float64 59 } 60 61 // roundTripper is a round tripper that adds retry logic. 62 type roundTripper struct { 63 logger logging.Logger 64 limit int 65 interval time.Duration 66 jitter float64 67 transport http.RoundTripper 68 } 69 70 // Make sure that we implement the interface: 71 var _ http.RoundTripper = (*roundTripper)(nil) 72 73 // NewTransportWrapper creates a new builder that can then be used to configure and create a new 74 // retry round tripper. 75 func NewTransportWrapper() *TransportWrapperBuilder { 76 return &TransportWrapperBuilder{ 77 limit: DefaultLimit, 78 interval: DefaultInterval, 79 jitter: DefaultJitter, 80 } 81 } 82 83 // Logger sets the logger that will be used by the wrapper and by the round trippers that it 84 // creates. 85 func (b *TransportWrapperBuilder) Logger(value logging.Logger) *TransportWrapperBuilder { 86 b.logger = value 87 return b 88 } 89 90 // Limit sets the maximum number of retries for a request. When this is zero no retries will be 91 // performed. The default value is two. 92 func (b *TransportWrapperBuilder) Limit(value int) *TransportWrapperBuilder { 93 b.limit = value 94 return b 95 } 96 97 // Interval sets the time to wait before the first retry. The interval time will be doubled for each 98 // retry. For example, if this is set to one second then the first retry will happen approximately 99 // one second after the failure of the initial request, the second retry will happen affer four 100 // seconds, the third will happen after eitght seconds, so on. 101 func (b *TransportWrapperBuilder) Interval(value time.Duration) *TransportWrapperBuilder { 102 b.interval = value 103 return b 104 } 105 106 // Jitter sets a factor that will be used to randomize the retry intervals. For example, if this is 107 // set to 0.1 then a random adjustment between -10% and +10% will be done to the interval for each 108 // retry. This is intended to reduce simultaneous retries by clients when a server starts failing. 109 // The default value is 0.2. 110 func (b *TransportWrapperBuilder) Jitter(value float64) *TransportWrapperBuilder { 111 b.jitter = value 112 return b 113 } 114 115 // Build uses the information stored in the builder to create a new transport wrapper. 116 func (b *TransportWrapperBuilder) Build(ctx context.Context) (result *TransportWrapper, err error) { 117 // Check parameters: 118 if b.logger == nil { 119 err = fmt.Errorf("logger is mandatory") 120 return 121 } 122 if b.limit < 0 { 123 err = fmt.Errorf( 124 "retry limit %d isn't valid, it should be greater or equal than zero", 125 b.limit, 126 ) 127 return 128 } 129 if b.interval <= 0 { 130 err = fmt.Errorf( 131 "retry interval %s isn't valid, it should be greater than zero", 132 b.interval, 133 ) 134 return 135 } 136 if b.jitter < 0 || b.jitter > 1 { 137 err = fmt.Errorf( 138 "retry jitter %f isn't valid, it should be between zero and one", 139 b.jitter, 140 ) 141 return 142 } 143 144 // Create and populate the object: 145 result = &TransportWrapper{ 146 logger: b.logger, 147 limit: b.limit, 148 interval: b.interval, 149 jitter: b.jitter, 150 } 151 152 return 153 } 154 155 // Wrap creates a new round tripper that wraps the given one and implements the retry logic. 156 func (w *TransportWrapper) Wrap(transport http.RoundTripper) http.RoundTripper { 157 return &roundTripper{ 158 logger: w.logger, 159 limit: w.limit, 160 interval: w.interval, 161 jitter: w.jitter, 162 transport: transport, 163 } 164 } 165 166 // Limit returns the maximum number of retries. 167 func (w *TransportWrapper) Limit() int { 168 return w.limit 169 } 170 171 // Interval returns the initial retry interval. 172 func (w *TransportWrapper) Interval() time.Duration { 173 return w.interval 174 } 175 176 // Jitter returns the retry interval jitter factor. 177 func (w *TransportWrapper) Jitter() float64 { 178 return w.jitter 179 } 180 181 // Close releases all the resources used by the wrapper. 182 func (w *TransportWrapper) Close() error { 183 return nil 184 } 185 186 // RoundTrip is the implementation of the round tripper interface. 187 func (t *roundTripper) RoundTrip(request *http.Request) (response *http.Response, err error) { 188 // Get the context: 189 ctx := request.Context() 190 191 // If the request has a body then we need to read it fully and copy it in memory, so that we 192 // can later use that copy to retry the request. We also need to restore the old body before 193 // returning because the caller my rely on the type of body that it passed, for example. 194 originalBody := request.Body 195 defer func() { 196 request.Body = originalBody 197 }() 198 var bodyCopy []byte 199 if originalBody != nil { 200 bodyCopy, err = io.ReadAll(originalBody) 201 if err != nil { 202 return 203 } 204 } 205 206 // Try to send the request till it succeeds or else the retry limit is exceeded: 207 attempt := 0 208 for { 209 // If this is not the first attempt then we should wait: 210 if attempt > 0 { 211 t.sleep(ctx, attempt) 212 } 213 214 // Each time that we retry the request we need to rewind the request body: 215 if bodyCopy != nil { 216 request.Body = io.NopCloser(bytes.NewBuffer(bodyCopy)) 217 } 218 219 // Do an attempt, and return inmediately if this is the last one: 220 response, err = t.transport.RoundTrip(request) 221 attempt++ 222 if attempt > t.limit { 223 return 224 } 225 226 // Handle errors without HTTP response: 227 if err != nil { 228 message := err.Error() 229 switch request.Method { 230 case http.MethodGet: 231 // GETs can retry on more types of failures because GET is naturally idempotent, other verbs are not. 232 switch { 233 case strings.Contains(message, "EOF"): 234 // EOF can happen after request bytes are sent. This makes it unsafe to retry on mutating requests, 235 // but ok to retry on idempotent ones. 236 t.logger.Warn( 237 ctx, 238 "Request for method %s and URL '%s' failed with EOF, "+ 239 "will try again: %v", 240 request.Method, request.URL, err, 241 ) 242 continue 243 case strings.Contains(message, "connection reset by peer"): 244 // "connection reset by peer"" can happen after request bytes are sent. This makes it unsafe to 245 // retry on mutating requests, but ok to retry on idempotent ones. 246 t.logger.Warn( 247 ctx, 248 "Request for method %s and URL '%s' failed with connection "+ 249 "reset by peer, will try again: %v", 250 request.Method, request.URL, err, 251 ) 252 continue 253 } 254 fallthrough // GETS can also retry on all generally retriable errors 255 256 default: 257 switch { 258 case strings.Contains(message, "PROTOCOL_ERROR"): 259 t.logger.Warn( 260 ctx, 261 "Request for method %s and URL '%s' failed with protocol error, "+ 262 "will try again: %v", 263 request.Method, request.URL, err, 264 ) 265 continue 266 case strings.Contains(message, "REFUSED_STREAM"): 267 t.logger.Warn( 268 ctx, 269 "Request for method %s and URL '%s' failed with refused stream, "+ 270 "will try again: %v", 271 request.Method, request.URL, err, 272 ) 273 continue 274 default: 275 // For any other error we just report it to the caller: 276 err = fmt.Errorf("can't send request: %w", err) 277 return 278 } 279 } 280 281 } 282 283 // Handle HTTP responses with error codes: 284 method := request.Method 285 code := response.StatusCode 286 switch { 287 case code == http.StatusServiceUnavailable || code == http.StatusTooManyRequests: 288 // For 429 and 503 we know that the server didn't process the request, so we 289 // can safely retry regardless of the method. 290 t.logger.Warn( 291 ctx, 292 "Request for method %s and URL '%s' failed with code %d, "+ 293 "will try again", 294 request.Method, request.URL, code, 295 ) 296 err = response.Body.Close() 297 if err != nil { 298 t.logger.Error( 299 ctx, 300 "Failed to close response body for method '%s' and URL '%s'", 301 request.Method, request.URL, 302 ) 303 } 304 continue 305 case code >= 500 && method == http.MethodGet: 306 // For any other 5xx status code we can't be sure if the server processed 307 // the request, so we retry only GET requests, as those don't have side 308 // effects. 309 t.logger.Warn( 310 ctx, 311 "Request for method %s and URL '%s' failed with code %d, "+ 312 "will try again", 313 request.Method, request.URL, code, 314 ) 315 err = response.Body.Close() 316 if err != nil { 317 t.logger.Error( 318 ctx, 319 "Failed to close response body for method '%s' and URL '%s'", 320 request.Method, request.URL, 321 ) 322 } 323 continue 324 default: 325 // For any other status code we can't be sure if the server processed the 326 // request, so we just return the result to the caller. 327 return 328 } 329 } 330 } 331 332 // sleep calculates a retry interval taking into account the configured interval and jitter factor 333 // and then waits that time. 334 func (t *roundTripper) sleep(ctx context.Context, attempt int) { 335 // Start with the configured interval: 336 interval := t.interval 337 338 // Double the interval for each attempt: 339 interval *= 1 << (attempt - 1) 340 341 // Adjust the interval adding or subtracting a random amount. For example, if the jitter 342 // factor given in the configuration is 0.1 will add or sustract up to a 10%. 343 factor := t.jitter * (1 - 2*rand.Float64()) 344 delta := time.Duration(float64(interval) * factor) 345 interval += delta 346 347 // Go sleep for a while: 348 t.logger.Debug(ctx, "Wating %s before next attempt", interval) 349 time.Sleep(interval) 350 }