github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/retry/retry.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package retry contains utilities for implementing retry logic.
     6  package retry
     7  
     8  import (
     9  	"context"
    10  	"fmt"
    11  	"math"
    12  	"math/rand"
    13  	"reflect"
    14  	"time"
    15  
    16  	"github.com/Schaudge/grailbase/errors"
    17  )
    18  
    19  // A Policy is an interface that abstracts retry policies. Typically
    20  // users will not call methods directly on a Policy but rather use
    21  // the package function retry.Wait.
    22  type Policy interface {
    23  	// Retry tells whether the a new retry should be attempted,
    24  	// and after how long.
    25  	Retry(retry int) (bool, time.Duration)
    26  }
    27  
    28  // Wait queries the provided policy at the provided retry number and
    29  // sleeps until the next try should be attempted. Wait returns an
    30  // error if the policy prohibits further tries or if the context was
    31  // canceled, or if its deadline would run out while waiting for the
    32  // next try.
    33  func Wait(ctx context.Context, policy Policy, retry int) error {
    34  	keepgoing, wait := policy.Retry(retry)
    35  	if !keepgoing {
    36  		return errors.E(errors.TooManyTries, fmt.Sprintf("gave up after %d tries", retry))
    37  	}
    38  	if deadline, ok := ctx.Deadline(); ok && time.Until(deadline) < wait {
    39  		return errors.E(errors.Timeout, "ran out of time while waiting for retry")
    40  	}
    41  	select {
    42  	case <-time.After(wait):
    43  		return nil
    44  	case <-ctx.Done():
    45  		return ctx.Err()
    46  	}
    47  }
    48  
    49  // WaitForFn uses the above Wait function taking the same policy and retry
    50  // number and generalizes it for a use of a function. Just like Wait it
    51  // errors in the cases of extra tries, context cancel, or if its deadline
    52  // runs out waiting for the next try
    53  func WaitForFn(ctx context.Context, policy Policy, fn interface{}, params ...interface{}) (result []reflect.Value) {
    54  	var out []reflect.Value
    55  	f := reflect.ValueOf(fn)
    56  	inputs := make([]reflect.Value, len(params))
    57  	for i, in := range params {
    58  		inputs[i] = reflect.ValueOf(in)
    59  	}
    60  
    61  	// will break out of loop if function doesn't error
    62  	for retries := 0; ; retries++ {
    63  		out = f.Call(inputs)
    64  		if out[len(out)-1].IsNil() { // assumes last output value of function is an error object
    65  			break
    66  		}
    67  		if retryErr := Wait(ctx, policy, retries); retryErr != nil {
    68  			return out
    69  		}
    70  	}
    71  
    72  	return out
    73  }
    74  
    75  type backoff struct {
    76  	factor       float64
    77  	initial, max time.Duration
    78  }
    79  
    80  // maxInt64Convertible is the maximum float64 that can be converted to an int64
    81  // accurately. We use this to prevent overflow when computing the exponential
    82  // backoff, which we compute with float64s. It is important that we push it
    83  // through float64 then int64 so that we get compilation error if we use a
    84  // value that cannot be represented as an int64. This value was produced with:
    85  //   math.Nextafter(float64(math.MaxInt64), 0)
    86  const maxInt64Convertible = int64(float64(9223372036854774784))
    87  
    88  // MaxBackoffMax is the maximum value that can be passed as max to Backoff.
    89  const MaxBackoffMax = time.Duration(maxInt64Convertible)
    90  
    91  // Backoff returns a Policy that initially waits for the amount of
    92  // time specified by parameter initial; on each try this value is
    93  // multiplied by the provided factor, up to the max duration.
    94  func Backoff(initial, max time.Duration, factor float64) Policy {
    95  	if max > MaxBackoffMax {
    96  		panic("max > MaxBackoffMax")
    97  	}
    98  	return &backoff{
    99  		initial: initial,
   100  		max:     max,
   101  		factor:  factor,
   102  	}
   103  }
   104  
   105  func (b *backoff) Retry(retries int) (bool, time.Duration) {
   106  	if retries < 0 {
   107  		panic("retries < 0")
   108  	}
   109  	nsfloat64 := float64(b.initial) * math.Pow(b.factor, float64(retries))
   110  	nsfloat64 = math.Min(nsfloat64, float64(b.max))
   111  	return true, time.Duration(int64(nsfloat64))
   112  }
   113  
   114  // BackoffWithTimeout returns a Policy that initially waits for the amount of
   115  // time specified by parameter initial; on each try this value is
   116  // multiplied by the provided factor, up to the max duration.
   117  // After the max duration, the Policy will timeout and return an error.
   118  func BackoffWithTimeout(initial, max time.Duration, factor float64) Policy {
   119  	n := int(math.Floor(math.Log(float64(max/initial))/math.Log(factor))) + 1
   120  	return MaxRetries(Backoff(initial, max, factor), n)
   121  }
   122  
   123  type jitter struct {
   124  	policy Policy
   125  	// frac is the fraction of the wait time to "jitter".
   126  	// Eg: if frac is 0.2, the policy will retain 80% of the wait time
   127  	// and jitter the remaining 20%
   128  	frac float64
   129  }
   130  
   131  // Jitter returns a policy that jitters 'frac' fraction of the wait times
   132  // returned  by the provided policy.  For example, setting frac to 1.0 and 0.5
   133  // will implement "full jitter" and "equal jitter" approaches respectively.
   134  // These approaches are describer here:
   135  // https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
   136  func Jitter(policy Policy, frac float64) Policy {
   137  	return &jitter{policy, frac}
   138  }
   139  
   140  func (b *jitter) Retry(retries int) (bool, time.Duration) {
   141  	ok, wait := b.policy.Retry(retries)
   142  	if wait > 0 {
   143  		prop := time.Duration(b.frac * float64(wait))
   144  		wait = wait - prop + time.Duration(rand.Int63n(prop.Nanoseconds()))
   145  	}
   146  	return ok, wait
   147  }
   148  
   149  type maxtries struct {
   150  	policy Policy
   151  	max    int
   152  }
   153  
   154  // MaxRetries returns a policy that enforces a maximum number of
   155  // attempts. The provided policy is invoked when the current number
   156  // of tries is within the permissible limit. If policy is nil, the
   157  // returned policy will permit an immediate retry when the number of
   158  // tries is within the allowable limits.
   159  func MaxRetries(policy Policy, n int) Policy {
   160  	if n < 1 {
   161  		panic("retry.MaxRetries: n < 1")
   162  	}
   163  	return &maxtries{policy, n - 1}
   164  }
   165  
   166  func (m *maxtries) Retry(retries int) (bool, time.Duration) {
   167  	if retries > m.max {
   168  		return false, time.Duration(0)
   169  	}
   170  	if m.policy != nil {
   171  		return m.policy.Retry(retries)
   172  	}
   173  	return true, time.Duration(0)
   174  }