github.com/grafana/pyroscope@v1.18.0/pkg/util/retry/hedged.go (about)

     1  package retry
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  	"sync/atomic"
     7  	"time"
     8  )
     9  
    10  // Hedged executes Call with a speculative retry after trigger fires
    11  // if it has not returned earlier.
    12  //
    13  // By default, if one of the attempts fails, another one is not canceled.
    14  // In this case the speculative attempt will not start until the trigger fires.
    15  // For more granular control, use FailFast.
    16  type Hedged[T any] struct {
    17  	// The function must be thread-safe because multiple instances may be running
    18  	// concurrently. The function must return as soon as possible after context
    19  	// cancellation, otherwise the speculation makes no sense.
    20  	//
    21  	// The function argument indicates whether this is a speculative retry attempt.
    22  	Call    Call[T]
    23  	Trigger <-chan time.Time
    24  
    25  	// FailFast specifies how a failure is handled. If it is set to true:
    26  	//  - the result received first is returned, regardless of anything.
    27  	//  - if Call fails before the trigger fires, it won't be retried.
    28  	FailFast bool
    29  }
    30  
    31  type Call[T any] func(ctx context.Context, isRetry bool) (T, error)
    32  
    33  func (s Hedged[T]) Do(ctx context.Context) (T, error) {
    34  	attemptCtx, cancel := context.WithCancel(ctx)
    35  	defer cancel()
    36  	var (
    37  		ret    T
    38  		err    error
    39  		failed uint64
    40  
    41  		wg sync.WaitGroup
    42  		do sync.Once
    43  	)
    44  
    45  	attempt := func(isRetry bool) {
    46  		wg.Add(1)
    47  		go func() {
    48  			defer wg.Done()
    49  			attemptRet, attemptErr := s.Call(attemptCtx, isRetry)
    50  			if attemptErr != nil && atomic.SwapUint64(&failed, 1) == 0 && !s.FailFast {
    51  				// This attempt has failed, but not another one. If allowed,
    52  				// we give another attempt a chance. Otherwise, if both ones
    53  				// did fail, or it's not allowed to proceed after the first
    54  				// failure, we store the result with error and cancel any
    55  				// ongoing attempt.
    56  				return
    57  			}
    58  			// If there is an ongoing attempt, it will be cancelled,
    59  			// because we already got the result.
    60  			cancel()
    61  			do.Do(func() {
    62  				ret, err = attemptRet, attemptErr
    63  			})
    64  		}()
    65  	}
    66  
    67  	attempt(false)
    68  	select {
    69  	case <-attemptCtx.Done():
    70  		// Call has returned, or caller cancelled the request.
    71  	case <-s.Trigger:
    72  		attempt(true)
    73  	}
    74  
    75  	wg.Wait()
    76  	return ret, err
    77  }