github.com/m3db/m3@v1.5.0/src/x/retry/retry.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package retry
    22  
    23  import (
    24  	"errors"
    25  	"math"
    26  	"time"
    27  
    28  	xerrors "github.com/m3db/m3/src/x/errors"
    29  
    30  	"github.com/uber-go/tally"
    31  )
    32  
    33  var (
    34  	// ErrWhileConditionFalse is returned when the while condition to a while retry
    35  	// method evaluates false.
    36  	ErrWhileConditionFalse = errors.New("retry while condition evaluated to false")
    37  )
    38  
    39  type retrier struct {
    40  	opts           Options
    41  	initialBackoff time.Duration
    42  	backoffFactor  float64
    43  	maxBackoff     time.Duration
    44  	maxRetries     int
    45  	forever        bool
    46  	jitter         bool
    47  	rngFn          RngFn
    48  	sleepFn        func(t time.Duration)
    49  	metrics        retrierMetrics
    50  }
    51  
    52  type retrierMetrics struct {
    53  	calls              tally.Counter
    54  	attempts           tally.Counter
    55  	success            tally.Counter
    56  	successLatency     tally.Histogram
    57  	errors             tally.Counter
    58  	errorsNotRetryable tally.Counter
    59  	errorsFinal        tally.Counter
    60  	errorsLatency      tally.Histogram
    61  	retries            tally.Counter
    62  }
    63  
    64  // NewRetrier creates a new retrier.
    65  func NewRetrier(opts Options) Retrier {
    66  	scope := opts.MetricsScope()
    67  	errorTags := struct {
    68  		retryable    map[string]string
    69  		notRetryable map[string]string
    70  	}{
    71  		map[string]string{
    72  			"type": "retryable",
    73  		},
    74  		map[string]string{
    75  			"type": "not-retryable",
    76  		},
    77  	}
    78  
    79  	return &retrier{
    80  		opts:           opts,
    81  		initialBackoff: opts.InitialBackoff(),
    82  		backoffFactor:  opts.BackoffFactor(),
    83  		maxBackoff:     opts.MaxBackoff(),
    84  		maxRetries:     opts.MaxRetries(),
    85  		forever:        opts.Forever(),
    86  		jitter:         opts.Jitter(),
    87  		rngFn:          opts.RngFn(),
    88  		sleepFn:        time.Sleep,
    89  		metrics: retrierMetrics{
    90  			calls:              scope.Counter("calls"),
    91  			attempts:           scope.Counter("attempts"),
    92  			success:            scope.Counter("success"),
    93  			successLatency:     histogramWithDurationBuckets(scope, "success-latency"),
    94  			errors:             scope.Tagged(errorTags.retryable).Counter("errors"),
    95  			errorsNotRetryable: scope.Tagged(errorTags.notRetryable).Counter("errors"),
    96  			errorsFinal:        scope.Counter("errors-final"),
    97  			errorsLatency:      histogramWithDurationBuckets(scope, "errors-latency"),
    98  			retries:            scope.Counter("retries"),
    99  		},
   100  	}
   101  }
   102  
   103  func (r *retrier) Options() Options {
   104  	return r.opts
   105  }
   106  
   107  func (r *retrier) Attempt(fn Fn) error {
   108  	return r.attempt(nil, fn)
   109  }
   110  
   111  func (r *retrier) AttemptWhile(continueFn ContinueFn, fn Fn) error {
   112  	return r.attempt(continueFn, fn)
   113  }
   114  
   115  func (r *retrier) attempt(continueFn ContinueFn, fn Fn) error {
   116  	// Always track a call, useful for counting number of total operations.
   117  	r.metrics.calls.Inc(1)
   118  
   119  	attempt := 0
   120  
   121  	if continueFn != nil && !continueFn(attempt) {
   122  		return ErrWhileConditionFalse
   123  	}
   124  
   125  	start := time.Now()
   126  	err := fn()
   127  	duration := time.Since(start)
   128  	r.metrics.attempts.Inc(1)
   129  	attempt++
   130  	if err == nil {
   131  		r.metrics.successLatency.RecordDuration(duration)
   132  		r.metrics.success.Inc(1)
   133  		return nil
   134  	}
   135  	r.metrics.errorsLatency.RecordDuration(duration)
   136  	if xerrors.IsNonRetryableError(err) {
   137  		r.metrics.errorsNotRetryable.Inc(1)
   138  		return err
   139  	}
   140  	r.metrics.errors.Inc(1)
   141  
   142  	for i := 1; r.forever || i <= r.maxRetries; i++ {
   143  		r.sleepFn(time.Duration(BackoffNanos(
   144  			i,
   145  			r.jitter,
   146  			r.backoffFactor,
   147  			r.initialBackoff,
   148  			r.maxBackoff,
   149  			r.rngFn,
   150  		)))
   151  
   152  		if continueFn != nil && !continueFn(attempt) {
   153  			return ErrWhileConditionFalse
   154  		}
   155  
   156  		r.metrics.retries.Inc(1)
   157  		start := time.Now()
   158  		err = fn()
   159  		duration := time.Since(start)
   160  		r.metrics.attempts.Inc(1)
   161  		attempt++
   162  		if err == nil {
   163  			r.metrics.successLatency.RecordDuration(duration)
   164  			r.metrics.success.Inc(1)
   165  			return nil
   166  		}
   167  		r.metrics.errorsLatency.RecordDuration(duration)
   168  		if xerrors.IsNonRetryableError(err) {
   169  			r.metrics.errorsNotRetryable.Inc(1)
   170  			return err
   171  		}
   172  		r.metrics.errors.Inc(1)
   173  	}
   174  	r.metrics.errorsFinal.Inc(1)
   175  
   176  	return err
   177  }
   178  
   179  // BackoffNanos calculates the backoff for a retry in nanoseconds.
   180  func BackoffNanos(
   181  	retry int,
   182  	jitter bool,
   183  	backoffFactor float64,
   184  	initialBackoff time.Duration,
   185  	maxBackoff time.Duration,
   186  	rngFn RngFn,
   187  ) int64 {
   188  	backoff := initialBackoff.Nanoseconds()
   189  	if retry >= 1 {
   190  		backoffFloat64 := float64(backoff) * math.Pow(backoffFactor, float64(retry-1))
   191  		// math.Inf is also larger than math.MaxInt64.
   192  		if backoffFloat64 > math.MaxInt64 {
   193  			return maxBackoff.Nanoseconds()
   194  		}
   195  		backoff = int64(backoffFloat64)
   196  	}
   197  	// Validate the value of backoff to make sure Int63n() does not panic.
   198  	if jitter && backoff >= 2 {
   199  		half := backoff / 2
   200  		backoff = half + rngFn(half)
   201  	}
   202  	if maxBackoff := maxBackoff.Nanoseconds(); backoff > maxBackoff {
   203  		backoff = maxBackoff
   204  	}
   205  	return backoff
   206  }
   207  
   208  // histogramWithDurationBuckets returns a histogram with the standard duration buckets.
   209  func histogramWithDurationBuckets(scope tally.Scope, name string) tally.Histogram {
   210  	sub := scope.Tagged(map[string]string{
   211  		// Bump the version if the histogram buckets need to be changed to avoid overlapping buckets
   212  		// in the same query causing errors.
   213  		"schema": "v1",
   214  	})
   215  	buckets := append(tally.DurationBuckets{0, time.Millisecond},
   216  		tally.MustMakeExponentialDurationBuckets(2*time.Millisecond, 1.5, 30)...)
   217  	return sub.Histogram(name, buckets)
   218  }