github.com/cloudwego/kitex@v0.9.0/pkg/retry/failure_retryer.go (about)

     1  /*
     2   * Copyright 2021 CloudWeGo Authors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package retry
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"strconv"
    24  	"strings"
    25  	"sync"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	"github.com/cloudwego/kitex/pkg/circuitbreak"
    30  	"github.com/cloudwego/kitex/pkg/kerrors"
    31  	"github.com/cloudwego/kitex/pkg/klog"
    32  	"github.com/cloudwego/kitex/pkg/rpcinfo"
    33  )
    34  
    35  func newFailureRetryer(policy Policy, r *ShouldResultRetry, cbC *cbContainer) (Retryer, error) {
    36  	fr := &failureRetryer{specifiedResultRetry: r, cbContainer: cbC}
    37  	if err := fr.UpdatePolicy(policy); err != nil {
    38  		return nil, fmt.Errorf("newfailureRetryer failed, err=%w", err)
    39  	}
    40  	return fr, nil
    41  }
    42  
    43  type failureRetryer struct {
    44  	enable               bool
    45  	policy               *FailurePolicy
    46  	backOff              BackOff
    47  	cbContainer          *cbContainer
    48  	specifiedResultRetry *ShouldResultRetry
    49  	sync.RWMutex
    50  	errMsg string
    51  }
    52  
    53  // ShouldRetry implements the Retryer interface.
    54  func (r *failureRetryer) ShouldRetry(ctx context.Context, err error, callTimes int, req interface{}, cbKey string) (string, bool) {
    55  	r.RLock()
    56  	defer r.RUnlock()
    57  	if !r.enable {
    58  		return "", false
    59  	}
    60  	if stop, msg := circuitBreakerStop(ctx, r.policy.StopPolicy, r.cbContainer, req, cbKey); stop {
    61  		return msg, false
    62  	}
    63  	if stop, msg := ddlStop(ctx, r.policy.StopPolicy); stop {
    64  		return msg, false
    65  	}
    66  	r.backOff.Wait(callTimes)
    67  	return "", true
    68  }
    69  
    70  // AllowRetry implements the Retryer interface.
    71  func (r *failureRetryer) AllowRetry(ctx context.Context) (string, bool) {
    72  	r.RLock()
    73  	defer r.RUnlock()
    74  	if !r.enable || r.policy.StopPolicy.MaxRetryTimes == 0 {
    75  		return "", false
    76  	}
    77  	if stop, msg := chainStop(ctx, r.policy.StopPolicy); stop {
    78  		return msg, false
    79  	}
    80  	if stop, msg := ddlStop(ctx, r.policy.StopPolicy); stop {
    81  		return msg, false
    82  	}
    83  	return "", true
    84  }
    85  
    86  // Do implement the Retryer interface.
    87  func (r *failureRetryer) Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rpcinfo.RPCInfo, req interface{}) (lastRI rpcinfo.RPCInfo, recycleRI bool, err error) {
    88  	r.RLock()
    89  	var maxDuration time.Duration
    90  	if r.policy.StopPolicy.MaxDurationMS > 0 {
    91  		maxDuration = time.Duration(r.policy.StopPolicy.MaxDurationMS) * time.Millisecond
    92  	}
    93  	retryTimes := r.policy.StopPolicy.MaxRetryTimes
    94  	r.RUnlock()
    95  
    96  	var callTimes int32
    97  	var callCosts strings.Builder
    98  	var cRI rpcinfo.RPCInfo
    99  	cbKey, _ := r.cbContainer.cbCtl.GetKey(ctx, req)
   100  	defer func() {
   101  		if panicInfo := recover(); panicInfo != nil {
   102  			err = panicToErr(ctx, panicInfo, firstRI)
   103  		}
   104  	}()
   105  	startTime := time.Now()
   106  	for i := 0; i <= retryTimes; i++ {
   107  		var resp interface{}
   108  		var callStart time.Time
   109  		if i == 0 {
   110  			callStart = startTime
   111  		} else if i > 0 {
   112  			if maxDuration > 0 && time.Since(startTime) > maxDuration {
   113  				err = makeRetryErr(ctx, "exceed max duration", callTimes)
   114  				break
   115  			}
   116  			if msg, ok := r.ShouldRetry(ctx, err, i, req, cbKey); !ok {
   117  				if msg != "" {
   118  					appendMsg := fmt.Sprintf("retried %d, %s", i-1, msg)
   119  					appendErrMsg(err, appendMsg)
   120  				}
   121  				break
   122  			}
   123  			callStart = time.Now()
   124  			callCosts.WriteByte(',')
   125  			if respOp, ok := ctx.Value(CtxRespOp).(*int32); ok {
   126  				atomic.StoreInt32(respOp, OpNo)
   127  			}
   128  		}
   129  		callTimes++
   130  		if r.cbContainer.enablePercentageLimit {
   131  			// record stat before call since requests may be slow, making the limiter more accurate
   132  			recordRetryStat(cbKey, r.cbContainer.cbPanel, callTimes)
   133  		}
   134  		cRI, resp, err = rpcCall(ctx, r)
   135  		callCosts.WriteString(strconv.FormatInt(time.Since(callStart).Microseconds(), 10))
   136  
   137  		if !r.cbContainer.enablePercentageLimit && r.cbContainer.cbStat {
   138  			circuitbreak.RecordStat(ctx, req, nil, err, cbKey, r.cbContainer.cbCtl, r.cbContainer.cbPanel)
   139  		}
   140  		if err == nil {
   141  			if r.policy.IsRespRetryNonNil() && r.policy.ShouldResultRetry.RespRetry(resp, cRI) {
   142  				// user specified resp to do retry
   143  				continue
   144  			}
   145  			break
   146  		} else {
   147  			if i == retryTimes {
   148  				// stop retry then wrap error
   149  				err = kerrors.ErrRetry.WithCause(err)
   150  			} else if !r.isRetryErr(err, cRI) {
   151  				// not timeout or user specified error won't do retry
   152  				break
   153  			}
   154  		}
   155  	}
   156  	recordRetryInfo(cRI, callTimes, callCosts.String())
   157  	if err == nil && callTimes == 1 {
   158  		return cRI, true, nil
   159  	}
   160  	return cRI, false, err
   161  }
   162  
   163  // UpdatePolicy implements the Retryer interface.
   164  func (r *failureRetryer) UpdatePolicy(rp Policy) (err error) {
   165  	if !rp.Enable {
   166  		r.Lock()
   167  		r.enable = rp.Enable
   168  		r.Unlock()
   169  		return nil
   170  	}
   171  	var errMsg string
   172  	if rp.FailurePolicy == nil || rp.Type != FailureType {
   173  		errMsg = "FailurePolicy is nil or retry type not match, cannot do update in failureRetryer"
   174  		err = errors.New(errMsg)
   175  	}
   176  	rt := rp.FailurePolicy.StopPolicy.MaxRetryTimes
   177  	if errMsg == "" && (rt < 0 || rt > maxFailureRetryTimes) {
   178  		errMsg = fmt.Sprintf("invalid failure MaxRetryTimes[%d]", rt)
   179  		err = errors.New(errMsg)
   180  	}
   181  	if errMsg == "" {
   182  		if e := checkCBErrorRate(&rp.FailurePolicy.StopPolicy.CBPolicy); e != nil {
   183  			rp.FailurePolicy.StopPolicy.CBPolicy.ErrorRate = defaultCBErrRate
   184  			errMsg = fmt.Sprintf("failureRetryer %s, use default %0.2f", e.Error(), defaultCBErrRate)
   185  			klog.Warnf(errMsg)
   186  		}
   187  	}
   188  	r.Lock()
   189  	defer r.Unlock()
   190  	r.enable = rp.Enable
   191  	if err != nil {
   192  		r.errMsg = errMsg
   193  		return err
   194  	}
   195  	r.policy = rp.FailurePolicy
   196  	r.setSpecifiedResultRetryIfNeeded(r.specifiedResultRetry)
   197  	if bo, e := initBackOff(rp.FailurePolicy.BackOffPolicy); e != nil {
   198  		r.errMsg = fmt.Sprintf("failureRetryer update BackOffPolicy failed, err=%s", e.Error())
   199  		klog.Warnf(r.errMsg)
   200  	} else {
   201  		r.backOff = bo
   202  	}
   203  	return nil
   204  }
   205  
   206  // AppendErrMsgIfNeeded implements the Retryer interface.
   207  func (r *failureRetryer) AppendErrMsgIfNeeded(err error, ri rpcinfo.RPCInfo, msg string) {
   208  	if r.isRetryErr(err, ri) {
   209  		// Add additional reason when retry is not applied.
   210  		appendErrMsg(err, msg)
   211  	}
   212  }
   213  
   214  // Prepare implements the Retryer interface.
   215  func (r *failureRetryer) Prepare(ctx context.Context, prevRI, retryRI rpcinfo.RPCInfo) {
   216  	handleRetryInstance(r.policy.RetrySameNode, prevRI, retryRI)
   217  }
   218  
   219  func (r *failureRetryer) isRetryErr(err error, ri rpcinfo.RPCInfo) bool {
   220  	if err == nil {
   221  		return false
   222  	}
   223  	// Logic Notice:
   224  	// some kinds of error cannot be retried, eg: ServiceCircuitBreak.
   225  	// But CircuitBreak has been checked in ShouldRetry, it doesn't need to filter ServiceCircuitBreak.
   226  	// If there are some other specified errors that cannot be retried, it should be filtered here.
   227  
   228  	if r.policy.IsRetryForTimeout() && kerrors.IsTimeoutError(err) {
   229  		return true
   230  	}
   231  	if r.policy.IsErrorRetryNonNil() && r.policy.ShouldResultRetry.ErrorRetry(err, ri) {
   232  		return true
   233  	}
   234  	return false
   235  }
   236  
   237  func initBackOff(policy *BackOffPolicy) (bo BackOff, err error) {
   238  	bo = NoneBackOff
   239  	if policy == nil {
   240  		return
   241  	}
   242  	switch policy.BackOffType {
   243  	case NoneBackOffType:
   244  	case FixedBackOffType:
   245  		if policy.CfgItems == nil {
   246  			return bo, errors.New("invalid FixedBackOff, CfgItems is nil")
   247  		}
   248  		fixMS := policy.CfgItems[FixMSBackOffCfgKey]
   249  		fixMSInt, _ := strconv.Atoi(fmt.Sprintf("%1.0f", fixMS))
   250  		if err = checkFixedBackOff(fixMSInt); err != nil {
   251  			return
   252  		}
   253  		bo = newFixedBackOff(fixMSInt)
   254  	case RandomBackOffType:
   255  		if policy.CfgItems == nil {
   256  			return bo, errors.New("invalid FixedBackOff, CfgItems is nil")
   257  		}
   258  		minMS := policy.CfgItems[MinMSBackOffCfgKey]
   259  		maxMS := policy.CfgItems[MaxMSBackOffCfgKey]
   260  		minMSInt, _ := strconv.Atoi(fmt.Sprintf("%1.0f", minMS))
   261  		maxMSInt, _ := strconv.Atoi(fmt.Sprintf("%1.0f", maxMS))
   262  		if err = checkRandomBackOff(minMSInt, maxMSInt); err != nil {
   263  			return
   264  		}
   265  		bo = newRandomBackOff(minMSInt, maxMSInt)
   266  	default:
   267  		return bo, fmt.Errorf("invalid backoffType=%v", policy.BackOffType)
   268  	}
   269  	return
   270  }
   271  
   272  // Type implements the Retryer interface.
   273  func (r *failureRetryer) Type() Type {
   274  	return FailureType
   275  }
   276  
   277  // Dump implements the Retryer interface.
   278  func (r *failureRetryer) Dump() map[string]interface{} {
   279  	r.RLock()
   280  	defer r.RUnlock()
   281  	dm := make(map[string]interface{})
   282  	dm["enable"] = r.enable
   283  	dm["failure_retry"] = r.policy
   284  	if r.policy != nil {
   285  		dm["specified_result_retry"] = map[string]bool{
   286  			"error_retry": r.policy.IsErrorRetryNonNil(),
   287  			"resp_retry":  r.policy.IsRespRetryNonNil(),
   288  		}
   289  	}
   290  	if r.errMsg != "" {
   291  		dm["errMsg"] = r.errMsg
   292  	}
   293  	return dm
   294  }
   295  
   296  func (r *failureRetryer) setSpecifiedResultRetryIfNeeded(rr *ShouldResultRetry) {
   297  	if rr != nil {
   298  		// save the object specified by client.WithSpecifiedResultRetry(..)
   299  		r.specifiedResultRetry = rr
   300  	}
   301  	if r.policy != nil && r.specifiedResultRetry != nil {
   302  		// The priority of client.WithSpecifiedResultRetry(..) is higher, so always update it
   303  		// NOTE: client.WithSpecifiedResultRetry(..) will always reject a nil object
   304  		r.policy.ShouldResultRetry = r.specifiedResultRetry
   305  	}
   306  }