github.com/openshift-online/ocm-sdk-go@v0.1.473/retry/transport_wrapper.go (about)

     1  /*
     2  Copyright (c) 2021 Red Hat, Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8    http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // This file contains the implementations of a transport wrapper that knows how
    18  // to retry requests.
    19  
    20  package retry
    21  
    22  import (
    23  	"bytes"
    24  	"context"
    25  	"io"
    26  	"math/rand"
    27  	"strings"
    28  
    29  	"fmt"
    30  	"net/http"
    31  	"time"
    32  
    33  	"github.com/openshift-online/ocm-sdk-go/logging"
    34  )
    35  
    36  // Default configuration:
    37  const (
    38  	DefaultLimit    = 2
    39  	DefaultInterval = 1 * time.Second
    40  	DefaultJitter   = 0.2
    41  )
    42  
    43  // TransportWrapperBuilder contains the data and logic needed to create a new retry transport
    44  // wrapper.
    45  type TransportWrapperBuilder struct {
    46  	logger   logging.Logger
    47  	limit    int
    48  	interval time.Duration
    49  	jitter   float64
    50  }
    51  
    52  // TransportWrapper contains the data and logic needed to wrap an HTTP round tripper with another
    53  // one that adds retry capability.
    54  type TransportWrapper struct {
    55  	logger   logging.Logger
    56  	limit    int
    57  	interval time.Duration
    58  	jitter   float64
    59  }
    60  
    61  // roundTripper is a round tripper that adds retry logic.
    62  type roundTripper struct {
    63  	logger    logging.Logger
    64  	limit     int
    65  	interval  time.Duration
    66  	jitter    float64
    67  	transport http.RoundTripper
    68  }
    69  
    70  // Make sure that we implement the interface:
    71  var _ http.RoundTripper = (*roundTripper)(nil)
    72  
    73  // NewTransportWrapper creates a new builder that can then be used to configure and create a new
    74  // retry round tripper.
    75  func NewTransportWrapper() *TransportWrapperBuilder {
    76  	return &TransportWrapperBuilder{
    77  		limit:    DefaultLimit,
    78  		interval: DefaultInterval,
    79  		jitter:   DefaultJitter,
    80  	}
    81  }
    82  
    83  // Logger sets the logger that will be used by the wrapper and by the round trippers that it
    84  // creates.
    85  func (b *TransportWrapperBuilder) Logger(value logging.Logger) *TransportWrapperBuilder {
    86  	b.logger = value
    87  	return b
    88  }
    89  
    90  // Limit sets the maximum number of retries for a request. When this is zero no retries will be
    91  // performed. The default value is two.
    92  func (b *TransportWrapperBuilder) Limit(value int) *TransportWrapperBuilder {
    93  	b.limit = value
    94  	return b
    95  }
    96  
    97  // Interval sets the time to wait before the first retry. The interval time will be doubled for each
    98  // retry. For example, if this is set to one second then the first retry will happen approximately
    99  // one second after the failure of the initial request, the second retry will happen affer four
   100  // seconds, the third will happen after eitght seconds, so on.
   101  func (b *TransportWrapperBuilder) Interval(value time.Duration) *TransportWrapperBuilder {
   102  	b.interval = value
   103  	return b
   104  }
   105  
   106  // Jitter sets a factor that will be used to randomize the retry intervals. For example, if this is
   107  // set to 0.1 then a random adjustment between -10% and +10% will be done to the interval for each
   108  // retry.  This is intended to reduce simultaneous retries by clients when a server starts failing.
   109  // The default value is 0.2.
   110  func (b *TransportWrapperBuilder) Jitter(value float64) *TransportWrapperBuilder {
   111  	b.jitter = value
   112  	return b
   113  }
   114  
   115  // Build uses the information stored in the builder to create a new transport wrapper.
   116  func (b *TransportWrapperBuilder) Build(ctx context.Context) (result *TransportWrapper, err error) {
   117  	// Check parameters:
   118  	if b.logger == nil {
   119  		err = fmt.Errorf("logger is mandatory")
   120  		return
   121  	}
   122  	if b.limit < 0 {
   123  		err = fmt.Errorf(
   124  			"retry limit %d isn't valid, it should be greater or equal than zero",
   125  			b.limit,
   126  		)
   127  		return
   128  	}
   129  	if b.interval <= 0 {
   130  		err = fmt.Errorf(
   131  			"retry interval %s isn't valid, it should be greater than zero",
   132  			b.interval,
   133  		)
   134  		return
   135  	}
   136  	if b.jitter < 0 || b.jitter > 1 {
   137  		err = fmt.Errorf(
   138  			"retry jitter %f isn't valid, it should be between zero and one",
   139  			b.jitter,
   140  		)
   141  		return
   142  	}
   143  
   144  	// Create and populate the object:
   145  	result = &TransportWrapper{
   146  		logger:   b.logger,
   147  		limit:    b.limit,
   148  		interval: b.interval,
   149  		jitter:   b.jitter,
   150  	}
   151  
   152  	return
   153  }
   154  
   155  // Wrap creates a new round tripper that wraps the given one and implements the retry logic.
   156  func (w *TransportWrapper) Wrap(transport http.RoundTripper) http.RoundTripper {
   157  	return &roundTripper{
   158  		logger:    w.logger,
   159  		limit:     w.limit,
   160  		interval:  w.interval,
   161  		jitter:    w.jitter,
   162  		transport: transport,
   163  	}
   164  }
   165  
   166  // Limit returns the maximum number of retries.
   167  func (w *TransportWrapper) Limit() int {
   168  	return w.limit
   169  }
   170  
   171  // Interval returns the initial retry interval.
   172  func (w *TransportWrapper) Interval() time.Duration {
   173  	return w.interval
   174  }
   175  
   176  // Jitter returns the retry interval jitter factor.
   177  func (w *TransportWrapper) Jitter() float64 {
   178  	return w.jitter
   179  }
   180  
   181  // Close releases all the resources used by the wrapper.
   182  func (w *TransportWrapper) Close() error {
   183  	return nil
   184  }
   185  
   186  // RoundTrip is the implementation of the round tripper interface.
   187  func (t *roundTripper) RoundTrip(request *http.Request) (response *http.Response, err error) {
   188  	// Get the context:
   189  	ctx := request.Context()
   190  
   191  	// If the request has a body then we need to read it fully and copy it in memory, so that we
   192  	// can later use that copy to retry the request. We also need to restore the old body before
   193  	// returning because the caller my rely on the type of body that it passed, for example.
   194  	originalBody := request.Body
   195  	defer func() {
   196  		request.Body = originalBody
   197  	}()
   198  	var bodyCopy []byte
   199  	if originalBody != nil {
   200  		bodyCopy, err = io.ReadAll(originalBody)
   201  		if err != nil {
   202  			return
   203  		}
   204  	}
   205  
   206  	// Try to send the request till it succeeds or else the retry limit is exceeded:
   207  	attempt := 0
   208  	for {
   209  		// If this is not the first attempt then we should wait:
   210  		if attempt > 0 {
   211  			t.sleep(ctx, attempt)
   212  		}
   213  
   214  		// Each time that we retry the request we need to rewind the request body:
   215  		if bodyCopy != nil {
   216  			request.Body = io.NopCloser(bytes.NewBuffer(bodyCopy))
   217  		}
   218  
   219  		// Do an attempt, and return inmediately if this is the last one:
   220  		response, err = t.transport.RoundTrip(request)
   221  		attempt++
   222  		if attempt > t.limit {
   223  			return
   224  		}
   225  
   226  		// Handle errors without HTTP response:
   227  		if err != nil {
   228  			message := err.Error()
   229  			switch request.Method {
   230  			case http.MethodGet:
   231  				// GETs can retry on more types of failures because GET is naturally idempotent, other verbs are not.
   232  				switch {
   233  				case strings.Contains(message, "EOF"):
   234  					// EOF can happen after request bytes are sent. This makes it unsafe to retry on mutating requests,
   235  					// but ok to retry on idempotent ones.
   236  					t.logger.Warn(
   237  						ctx,
   238  						"Request for method %s and URL '%s' failed with EOF, "+
   239  							"will try again: %v",
   240  						request.Method, request.URL, err,
   241  					)
   242  					continue
   243  				case strings.Contains(message, "connection reset by peer"):
   244  					// "connection reset by peer"" can happen after request bytes are sent. This makes it unsafe to
   245  					// retry on mutating requests, but ok to retry on idempotent ones.
   246  					t.logger.Warn(
   247  						ctx,
   248  						"Request for method %s and URL '%s' failed with connection "+
   249  							"reset by peer, will try again: %v",
   250  						request.Method, request.URL, err,
   251  					)
   252  					continue
   253  				}
   254  				fallthrough // GETS can also retry on all generally retriable errors
   255  
   256  			default:
   257  				switch {
   258  				case strings.Contains(message, "PROTOCOL_ERROR"):
   259  					t.logger.Warn(
   260  						ctx,
   261  						"Request for method %s and URL '%s' failed with protocol error, "+
   262  							"will try again: %v",
   263  						request.Method, request.URL, err,
   264  					)
   265  					continue
   266  				case strings.Contains(message, "REFUSED_STREAM"):
   267  					t.logger.Warn(
   268  						ctx,
   269  						"Request for method %s and URL '%s' failed with refused stream, "+
   270  							"will try again: %v",
   271  						request.Method, request.URL, err,
   272  					)
   273  					continue
   274  				default:
   275  					// For any other error we just report it to the caller:
   276  					err = fmt.Errorf("can't send request: %w", err)
   277  					return
   278  				}
   279  			}
   280  
   281  		}
   282  
   283  		// Handle HTTP responses with error codes:
   284  		method := request.Method
   285  		code := response.StatusCode
   286  		switch {
   287  		case code == http.StatusServiceUnavailable || code == http.StatusTooManyRequests:
   288  			// For 429 and 503 we know that the server didn't process the request, so we
   289  			// can safely retry regardless of the method.
   290  			t.logger.Warn(
   291  				ctx,
   292  				"Request for method %s and URL '%s' failed with code %d, "+
   293  					"will try again",
   294  				request.Method, request.URL, code,
   295  			)
   296  			err = response.Body.Close()
   297  			if err != nil {
   298  				t.logger.Error(
   299  					ctx,
   300  					"Failed to close response body for method '%s' and URL '%s'",
   301  					request.Method, request.URL,
   302  				)
   303  			}
   304  			continue
   305  		case code >= 500 && method == http.MethodGet:
   306  			// For any other 5xx status code we can't be sure if the server processed
   307  			// the request, so we retry only GET requests, as those don't have side
   308  			// effects.
   309  			t.logger.Warn(
   310  				ctx,
   311  				"Request for method %s and URL '%s' failed with code %d, "+
   312  					"will try again",
   313  				request.Method, request.URL, code,
   314  			)
   315  			err = response.Body.Close()
   316  			if err != nil {
   317  				t.logger.Error(
   318  					ctx,
   319  					"Failed to close response body for method '%s' and URL '%s'",
   320  					request.Method, request.URL,
   321  				)
   322  			}
   323  			continue
   324  		default:
   325  			// For any other status code we can't be sure if the server processed the
   326  			// request, so we just return the result to the caller.
   327  			return
   328  		}
   329  	}
   330  }
   331  
   332  // sleep calculates a retry interval taking into account the configured interval and jitter factor
   333  // and then waits that time.
   334  func (t *roundTripper) sleep(ctx context.Context, attempt int) {
   335  	// Start with the configured interval:
   336  	interval := t.interval
   337  
   338  	// Double the interval for each attempt:
   339  	interval *= 1 << (attempt - 1)
   340  
   341  	// Adjust the interval adding or subtracting a random amount. For example, if the jitter
   342  	// factor given in the configuration is 0.1 will add or sustract up to a 10%.
   343  	factor := t.jitter * (1 - 2*rand.Float64())
   344  	delta := time.Duration(float64(interval) * factor)
   345  	interval += delta
   346  
   347  	// Go sleep for a while:
   348  	t.logger.Debug(ctx, "Wating %s before next attempt", interval)
   349  	time.Sleep(interval)
   350  }