istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/test/util/retry/retry.go (about)

     1  //  Copyright Istio Authors
     2  //
     3  //  Licensed under the Apache License, Version 2.0 (the "License");
     4  //  you may not use this file except in compliance with the License.
     5  //  You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  //  Unless required by applicable law or agreed to in writing, software
    10  //  distributed under the License is distributed on an "AS IS" BASIS,
    11  //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  //  See the License for the specific language governing permissions and
    13  //  limitations under the License.
    14  
    15  package retry
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"time"
    21  
    22  	"istio.io/istio/pkg/log"
    23  	"istio.io/istio/pkg/test"
    24  )
    25  
    26  var scope = log.RegisterScope("retry", "logs for retries")
    27  
    28  const (
    29  	// DefaultTimeout the default timeout for the entire retry operation
    30  	DefaultTimeout = time.Second * 30
    31  
    32  	// DefaultDelay the default delay between successive retry attempts
    33  	DefaultDelay = time.Millisecond * 10
    34  
    35  	// DefaultConverge the default converge, requiring something to succeed one time
    36  	DefaultConverge = 1
    37  )
    38  
    39  var defaultConfig = config{
    40  	timeout:  DefaultTimeout,
    41  	delay:    DefaultDelay,
    42  	delayMax: DefaultDelay * 16,
    43  	converge: DefaultConverge,
    44  }
    45  
    46  type config struct {
    47  	error       string
    48  	timeout     time.Duration
    49  	delay       time.Duration
    50  	delayMax    time.Duration
    51  	converge    int
    52  	maxAttempts int
    53  }
    54  
    55  // Option for a retry operation.
    56  type Option func(cfg *config)
    57  
    58  // Timeout sets the timeout for the entire retry operation.
    59  func Timeout(timeout time.Duration) Option {
    60  	return func(cfg *config) {
    61  		cfg.timeout = timeout
    62  	}
    63  }
    64  
    65  // Delay sets the delay between successive retry attempts.
    66  func Delay(delay time.Duration) Option {
    67  	return func(cfg *config) {
    68  		cfg.delay = delay
    69  		cfg.delayMax = delay
    70  	}
    71  }
    72  
    73  func BackoffDelay(delay time.Duration) Option {
    74  	return func(cfg *config) {
    75  		cfg.delay = delay
    76  		// Currently, hardcode to 16 backoffs. We can make it configurable if needed
    77  		cfg.delayMax = delay * 16
    78  	}
    79  }
    80  
    81  // Converge sets the number of successes in a row needed to count a success.
    82  // This is useful to avoid the case where tests like `coin.Flip() == HEADS` will always
    83  // return success due to random variance.
    84  func Converge(successes int) Option {
    85  	return func(cfg *config) {
    86  		cfg.converge = successes
    87  	}
    88  }
    89  
    90  // Message defines a more detailed error message to use when failing
    91  func Message(errorMessage string) Option {
    92  	return func(cfg *config) {
    93  		cfg.error = errorMessage
    94  	}
    95  }
    96  
    97  // MaxAttempts allows defining a maximum number of attempts. If unset, only timeout is considered.
    98  func MaxAttempts(attempts int) Option {
    99  	return func(cfg *config) {
   100  		cfg.maxAttempts = attempts
   101  	}
   102  }
   103  
   104  // RetriableFunc a function that can be retried.
   105  type RetriableFunc func() (result any, completed bool, err error)
   106  
   107  // UntilSuccess retries the given function until success, timeout, or until the passed-in function returns nil.
   108  func UntilSuccess(fn func() error, options ...Option) error {
   109  	_, e := UntilComplete(func() (any, bool, error) {
   110  		err := fn()
   111  		if err != nil {
   112  			return nil, false, err
   113  		}
   114  
   115  		return nil, true, nil
   116  	}, options...)
   117  
   118  	return e
   119  }
   120  
   121  // UntilSuccessOrFail calls UntilSuccess, and fails t with Fatalf if it ends up returning an error
   122  func UntilSuccessOrFail(t test.Failer, fn func() error, options ...Option) {
   123  	t.Helper()
   124  	err := UntilSuccess(fn, options...)
   125  	if err != nil {
   126  		t.Fatalf("retry.UntilSuccessOrFail: %v", err)
   127  	}
   128  }
   129  
   130  var ErrConditionNotMet = errors.New("expected condition not met")
   131  
   132  // Until retries the given function until it returns true or hits the timeout
   133  func Until(fn func() bool, options ...Option) error {
   134  	return UntilSuccess(func() error {
   135  		if !fn() {
   136  			return getErrorMessage(options)
   137  		}
   138  		return nil
   139  	}, options...)
   140  }
   141  
   142  // UntilOrFail calls Until, and fails t with Fatalf if it ends up returning an error
   143  func UntilOrFail(t test.Failer, fn func() bool, options ...Option) {
   144  	t.Helper()
   145  	err := Until(fn, options...)
   146  	if err != nil {
   147  		t.Fatalf("retry.UntilOrFail: %v", err)
   148  	}
   149  }
   150  
   151  func getErrorMessage(options []Option) error {
   152  	cfg := defaultConfig
   153  	for _, option := range options {
   154  		option(&cfg)
   155  	}
   156  	if cfg.error == "" {
   157  		return ErrConditionNotMet
   158  	}
   159  	return errors.New(cfg.error)
   160  }
   161  
   162  // UntilComplete retries the given function, until there is a timeout, or until the function indicates that it has completed.
   163  // Once complete, the returned value and error are returned.
   164  func UntilComplete(fn RetriableFunc, options ...Option) (any, error) {
   165  	cfg := defaultConfig
   166  	for _, option := range options {
   167  		option(&cfg)
   168  	}
   169  
   170  	successes := 0
   171  	attempts := 0
   172  	var lasterr error
   173  	to := time.After(cfg.timeout)
   174  	delay := cfg.delay
   175  	for {
   176  		if cfg.maxAttempts > 0 && attempts >= cfg.maxAttempts {
   177  			return nil, fmt.Errorf("hit max attempts %d attempts (last error: %v)", attempts, lasterr)
   178  		}
   179  		select {
   180  		case <-to:
   181  			return nil, fmt.Errorf("timeout while waiting after %d attempts (last error: %v)", attempts, lasterr)
   182  		default:
   183  		}
   184  
   185  		result, completed, err := fn()
   186  		attempts++
   187  		if completed {
   188  			if err == nil {
   189  				successes++
   190  			} else {
   191  				successes = 0
   192  			}
   193  			if successes >= cfg.converge {
   194  				return result, err
   195  			}
   196  
   197  			// Skip delay if we have a success
   198  			continue
   199  		}
   200  		successes = 0
   201  		if err != nil {
   202  			scope.Debugf("encountered an error on attempt %d: %v", attempts, err)
   203  			lasterr = err
   204  		}
   205  
   206  		select {
   207  		case <-to:
   208  			convergeStr := ""
   209  			if cfg.converge > 1 {
   210  				convergeStr = fmt.Sprintf(", %d/%d successes", successes, cfg.converge)
   211  			}
   212  			return nil, fmt.Errorf("timeout while waiting after %d attempts%s (last error: %v)", attempts, convergeStr, lasterr)
   213  		case <-time.After(delay):
   214  			delay *= 2
   215  			if delay > cfg.delayMax {
   216  				delay = cfg.delayMax
   217  			}
   218  		}
   219  	}
   220  }