github.com/cloudwego/kitex@v0.9.0/pkg/retry/retryer.go (about)

     1  /*
     2   * Copyright 2021 CloudWeGo Authors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package retry implements rpc retry
    18  package retry
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/bytedance/gopkg/cloud/circuitbreaker"
    27  
    28  	"github.com/cloudwego/kitex/pkg/circuitbreak"
    29  	"github.com/cloudwego/kitex/pkg/klog"
    30  	"github.com/cloudwego/kitex/pkg/rpcinfo"
    31  )
    32  
    33  // RPCCallFunc is the definition with wrap rpc call
    34  type RPCCallFunc func(context.Context, Retryer) (rpcinfo rpcinfo.RPCInfo, resp interface{}, err error)
    35  
    36  // Retryer is the interface for Retry implements
    37  type Retryer interface {
    38  	// AllowRetry to check if current request satisfy retry condition[eg: circuit, retry times == 0, chain stop, ddl].
    39  	// If not satisfy won't execute Retryer.Do and return the reason message
    40  	// Execute anyway for the first time regardless of able to retry.
    41  	AllowRetry(ctx context.Context) (msg string, ok bool)
    42  
    43  	// ShouldRetry to check if retry request can be called, it is checked in retryer.Do.
    44  	// If not satisfy will return the reason message
    45  	ShouldRetry(ctx context.Context, err error, callTimes int, req interface{}, cbKey string) (msg string, ok bool)
    46  	UpdatePolicy(policy Policy) error
    47  
    48  	// Retry policy execute func. recycleRI is to decide if the firstRI can be recycled.
    49  	Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rpcinfo.RPCInfo, request interface{}) (lastRI rpcinfo.RPCInfo, recycleRI bool, err error)
    50  	AppendErrMsgIfNeeded(err error, ri rpcinfo.RPCInfo, msg string)
    51  
    52  	// Prepare to do something needed before retry call.
    53  	Prepare(ctx context.Context, prevRI, retryRI rpcinfo.RPCInfo)
    54  	Dump() map[string]interface{}
    55  	Type() Type
    56  }
    57  
    58  // NewRetryContainerWithCB build Container that doesn't do circuit breaker statistic but get statistic result.
    59  // Which is used in case that circuit breaker is enabled.
    60  // eg:
    61  //
    62  //	   cbs := circuitbreak.NewCBSuite(circuitbreak.RPCInfo2Key)
    63  //	   retryC := retry.NewRetryContainerWithCB(cbs.ServiceControl(), cbs.ServicePanel())
    64  //		  var opts []client.Option
    65  //		  opts = append(opts, client.WithRetryContainer(retryC))
    66  //	   // enable service circuit breaker
    67  //		  opts = append(opts, client.WithMiddleware(cbs.ServiceCBMW()))
    68  func NewRetryContainerWithCB(cc *circuitbreak.Control, cp circuitbreaker.Panel) *Container {
    69  	return NewRetryContainer(WithContainerCBControl(cc), WithContainerCBPanel(cp))
    70  }
    71  
    72  func newCBSuite() *circuitbreak.CBSuite {
    73  	return circuitbreak.NewCBSuite(circuitbreak.RPCInfo2Key)
    74  }
    75  
    76  // NewRetryContainerWithCBStat build Container that need to do circuit breaker statistic.
    77  // Which is used in case that the service CB key is customized.
    78  // eg:
    79  //
    80  //	cbs := circuitbreak.NewCBSuite(YourGenServiceCBKeyFunc)
    81  //	retry.NewRetryContainerWithCBStat(cbs.ServiceControl(), cbs.ServicePanel())
    82  func NewRetryContainerWithCBStat(cc *circuitbreak.Control, cp circuitbreaker.Panel) *Container {
    83  	return NewRetryContainer(WithContainerCBControl(cc), WithContainerCBPanel(cp), WithContainerCBStat())
    84  }
    85  
    86  // NewRetryContainerWithPercentageLimit build a Container to limiting the percentage of retry requests;
    87  // This is the RECOMMENDED initializer if you want to control PRECISELY the percentage of retry requests.
    88  func NewRetryContainerWithPercentageLimit() *Container {
    89  	return NewRetryContainer(WithContainerEnablePercentageLimit())
    90  }
    91  
    92  // ContainerOption is used when initializing a Container
    93  type ContainerOption func(rc *Container)
    94  
    95  // WithContainerCBSuite specifies the CBSuite used in the retry circuitbreak
    96  // retryer will use its ServiceControl and ServicePanel
    97  // Its priority is lower than WithContainerCBControl and WithContainerCBPanel
    98  func WithContainerCBSuite(cbs *circuitbreak.CBSuite) ContainerOption {
    99  	return func(rc *Container) {
   100  		rc.cbContainer.cbSuite = cbs
   101  	}
   102  }
   103  
   104  // WithContainerCBControl is specifies the circuitbreak.Control used in the retry circuitbreaker
   105  // It's user's responsibility to make sure it's paired with panel
   106  func WithContainerCBControl(ctrl *circuitbreak.Control) ContainerOption {
   107  	return func(rc *Container) {
   108  		rc.cbContainer.cbCtl = ctrl
   109  	}
   110  }
   111  
   112  // WithContainerCBPanel is specifies the circuitbreaker.Panel used in the retry circuitbreaker
   113  // It's user's responsibility to make sure it's paired with control
   114  func WithContainerCBPanel(panel circuitbreaker.Panel) ContainerOption {
   115  	return func(rc *Container) {
   116  		rc.cbContainer.cbPanel = panel
   117  	}
   118  }
   119  
   120  // WithContainerCBStat instructs the circuitbreak.RecordStat is called within the retryer
   121  func WithContainerCBStat() ContainerOption {
   122  	return func(rc *Container) {
   123  		rc.cbContainer.cbStat = true
   124  	}
   125  }
   126  
   127  // WithContainerEnablePercentageLimit should be called for limiting the percentage of retry requests
   128  func WithContainerEnablePercentageLimit() ContainerOption {
   129  	return func(rc *Container) {
   130  		rc.cbContainer.enablePercentageLimit = true
   131  	}
   132  }
   133  
   134  // NewRetryContainer build Container that need to build circuit breaker and do circuit breaker statistic.
   135  // The caller is responsible for calling Container.Close() to release resources referenced.
   136  func NewRetryContainer(opts ...ContainerOption) *Container {
   137  	rc := &Container{
   138  		cbContainer: &cbContainer{
   139  			cbSuite: nil,
   140  		},
   141  		retryerMap: sync.Map{},
   142  	}
   143  	for _, opt := range opts {
   144  		opt(rc)
   145  	}
   146  
   147  	if rc.cbContainer.enablePercentageLimit {
   148  		// ignore cbSuite/cbCtl/cbPanel options
   149  		rc.cbContainer = &cbContainer{
   150  			enablePercentageLimit: true,
   151  			cbSuite:               newCBSuite(),
   152  		}
   153  	}
   154  
   155  	container := rc.cbContainer
   156  	if container.cbCtl == nil && container.cbPanel == nil {
   157  		if container.cbSuite == nil {
   158  			container.cbSuite = newCBSuite()
   159  			container.cbStat = true
   160  		}
   161  		container.cbCtl = container.cbSuite.ServiceControl()
   162  		container.cbPanel = container.cbSuite.ServicePanel()
   163  	}
   164  	if !container.IsValid() {
   165  		panic("KITEX: invalid container")
   166  	}
   167  	return rc
   168  }
   169  
   170  // Container is a wrapper for Retryer.
   171  type Container struct {
   172  	hasCodeCfg  bool
   173  	retryerMap  sync.Map // <method: retryer>
   174  	cbContainer *cbContainer
   175  	msg         string
   176  	sync.RWMutex
   177  
   178  	// shouldResultRetry is only used with FailureRetry
   179  	shouldResultRetry *ShouldResultRetry
   180  }
   181  
   182  // Recommended usage: NewRetryContainerWithPercentageLimit()
   183  // For more details, refer to the following comments for each field.
   184  type cbContainer struct {
   185  	// In NewRetryContainer, if cbCtrl & cbPanel are not set, Kitex will use cbSuite.ServiceControl() and
   186  	// cbSuite.ServicePanel(); If cbSuite is nil, Kitex will create one.
   187  	cbSuite *circuitbreak.CBSuite
   188  
   189  	// It's more recommended to rely on the cbSuite than specifying cbCtl & cbPanel with corresponding options,
   190  	// since cbCtl & cbPanel should be correctly paired, and with the cbSuite, Kitex will ensure it by using the
   191  	// cbSuite.ServiceControl() and cbSuite.ServicePanel().
   192  	cbCtl   *circuitbreak.Control
   193  	cbPanel circuitbreaker.Panel
   194  
   195  	// If cbStat && !enablePercentageLimit, retryer will call `circuitbreak.RecordStat` after rpcCall to record
   196  	// rpc failures/timeouts, for cutting down on the retry requests when the error rate is beyond the threshold.
   197  	cbStat bool
   198  
   199  	// If enabled, Kitex will always create a cbSuite and use its cbCtl & cbPanel, and retryer will call
   200  	// recordRetryStat before rpcCall, to precisely control the percentage of retry requests over all requests.
   201  	enablePercentageLimit bool
   202  }
   203  
   204  // IsValid returns true when both cbCtl & cbPanel are not nil
   205  // It's the user's responsibility to guarantee that cbCtl & cbPanel are correctly paired.
   206  func (c *cbContainer) IsValid() bool {
   207  	return c.cbCtl != nil && c.cbPanel != nil
   208  }
   209  
   210  // InitWithPolicies to init Retryer with methodPolicies
   211  // Notice, InitWithPolicies is export func, the lock should be added inside
   212  func (rc *Container) InitWithPolicies(methodPolicies map[string]Policy) error {
   213  	if methodPolicies == nil {
   214  		return nil
   215  	}
   216  	rc.Lock()
   217  	defer rc.Unlock()
   218  	var inited bool
   219  	for m := range methodPolicies {
   220  		if methodPolicies[m].Enable {
   221  			inited = true
   222  			if _, ok := rc.retryerMap.Load(m); ok {
   223  				// NotifyPolicyChange may happen before
   224  				continue
   225  			}
   226  			if err := rc.initRetryer(m, methodPolicies[m]); err != nil {
   227  				rc.msg = err.Error()
   228  				return err
   229  			}
   230  		}
   231  	}
   232  	rc.hasCodeCfg = inited
   233  	return nil
   234  }
   235  
   236  // DeletePolicy to delete the method by method.
   237  func (rc *Container) DeletePolicy(method string) {
   238  	rc.Lock()
   239  	defer rc.Unlock()
   240  	rc.msg = ""
   241  	if rc.hasCodeCfg {
   242  		// the priority of user setup code policy is higher than remote config
   243  		return
   244  	}
   245  	_, ok := rc.retryerMap.Load(method)
   246  	if ok {
   247  		rc.retryerMap.Delete(method)
   248  		rc.msg = fmt.Sprintf("delete retryer[%s] at %s", method, time.Now())
   249  	}
   250  }
   251  
   252  // NotifyPolicyChange to receive policy when it changes
   253  func (rc *Container) NotifyPolicyChange(method string, p Policy) {
   254  	rc.Lock()
   255  	defer rc.Unlock()
   256  	rc.msg = ""
   257  	if rc.hasCodeCfg {
   258  		// the priority of user setup code policy is higher than remote config
   259  		return
   260  	}
   261  	r, ok := rc.retryerMap.Load(method)
   262  	if ok && r != nil {
   263  		retryer, ok := r.(Retryer)
   264  		if ok {
   265  			if retryer.Type() == p.Type {
   266  				retryer.UpdatePolicy(p)
   267  				rc.msg = fmt.Sprintf("update retryer[%s-%s] at %s", method, retryer.Type(), time.Now())
   268  				return
   269  			}
   270  			rc.retryerMap.Delete(method)
   271  			rc.msg = fmt.Sprintf("delete retryer[%s-%s] at %s", method, retryer.Type(), time.Now())
   272  		}
   273  	}
   274  	rc.initRetryer(method, p)
   275  }
   276  
   277  // Init to build Retryer with code config.
   278  func (rc *Container) Init(mp map[string]Policy, rr *ShouldResultRetry) (err error) {
   279  	// NotifyPolicyChange func may execute before Init func.
   280  	// Because retry Container is built before Client init, NotifyPolicyChange can be triggered first
   281  	rc.updateRetryer(rr)
   282  	if err = rc.InitWithPolicies(mp); err != nil {
   283  		return fmt.Errorf("NewRetryer in Init failed, err=%w", err)
   284  	}
   285  	return nil
   286  }
   287  
   288  // PrepareRetryContext adds necessary keys to context for retry
   289  // These keys should be added to `ctx` no matter whether there's a need to retry, to avoid sharing the same
   290  // object objects with another method call, since `ctx` might be reused in user-defined middlewares.
   291  func PrepareRetryContext(ctx context.Context) context.Context {
   292  	// reqOp can be used to avoid multiple writes to the request object.
   293  	// If a blocking write is needed, implement a lock based on it (spin-lock for example).
   294  	reqOp := OpNo
   295  	ctx = context.WithValue(ctx, CtxReqOp, &reqOp)
   296  
   297  	// `respOp` is used to avoid concurrent write/read on the response object, especially for backup requests.
   298  	// If `respOp` is modified by one request of this method call, all other requests will skip decoding.
   299  	respOp := OpNo
   300  	ctx = context.WithValue(ctx, CtxRespOp, &respOp)
   301  	return ctx
   302  }
   303  
   304  // WithRetryIfNeeded to check if there is a retryer can be used and if current call can retry.
   305  // When the retry condition is satisfied, use retryer to call
   306  func (rc *Container) WithRetryIfNeeded(ctx context.Context, callOptRetry *Policy, rpcCall RPCCallFunc, ri rpcinfo.RPCInfo, request interface{}) (lastRI rpcinfo.RPCInfo, recycleRI bool, err error) {
   307  	var retryer Retryer
   308  	if callOptRetry != nil && callOptRetry.Enable {
   309  		// build retryer for call level if retry policy is set up with callopt
   310  		if retryer, err = NewRetryer(*callOptRetry, nil, rc.cbContainer); err != nil {
   311  			klog.Warnf("KITEX: new callopt retryer[%s] failed, err=%w", callOptRetry.Type, err)
   312  		}
   313  	} else {
   314  		retryer = rc.getRetryer(ri)
   315  	}
   316  
   317  	// case 1(default, fast path): no retry policy
   318  	if retryer == nil {
   319  		if _, _, err = rpcCall(ctx, nil); err == nil {
   320  			return ri, true, nil
   321  		}
   322  		return ri, false, err
   323  	}
   324  
   325  	// case 2: setup retry policy, but not satisfy retry condition eg: circuit, retry times == 0, chain stop, ddl
   326  	if msg, ok := retryer.AllowRetry(ctx); !ok {
   327  		if _, _, err = rpcCall(ctx, retryer); err == nil {
   328  			return ri, true, err
   329  		}
   330  		if msg != "" {
   331  			retryer.AppendErrMsgIfNeeded(err, ri, msg)
   332  		}
   333  		return ri, false, err
   334  	}
   335  
   336  	// case 3: do rpc call with retry policy
   337  	lastRI, recycleRI, err = retryer.Do(ctx, rpcCall, ri, request)
   338  	return
   339  }
   340  
   341  // NewRetryer build a retryer with policy
   342  func NewRetryer(p Policy, r *ShouldResultRetry, cbC *cbContainer) (retryer Retryer, err error) {
   343  	// just one retry policy can be enabled at same time
   344  	if p.Type == BackupType {
   345  		retryer, err = newBackupRetryer(p, cbC)
   346  	} else {
   347  		retryer, err = newFailureRetryer(p, r, cbC)
   348  	}
   349  	return
   350  }
   351  
   352  func (rc *Container) getRetryer(ri rpcinfo.RPCInfo) Retryer {
   353  	// the priority of specific method is high
   354  	r, ok := rc.retryerMap.Load(ri.To().Method())
   355  	if ok {
   356  		return r.(Retryer)
   357  	}
   358  	r, ok = rc.retryerMap.Load(Wildcard)
   359  	if ok {
   360  		return r.(Retryer)
   361  	}
   362  	return nil
   363  }
   364  
   365  // Dump is used to show current retry policy
   366  func (rc *Container) Dump() interface{} {
   367  	rc.RLock()
   368  	dm := make(map[string]interface{})
   369  	dm["has_code_cfg"] = rc.hasCodeCfg
   370  	rc.retryerMap.Range(func(key, value interface{}) bool {
   371  		if r, ok := value.(Retryer); ok {
   372  			dm[key.(string)] = r.Dump()
   373  		}
   374  		return true
   375  	})
   376  	if rc.msg != "" {
   377  		dm["msg"] = rc.msg
   378  	}
   379  	rc.RUnlock()
   380  	return dm
   381  }
   382  
   383  func (rc *Container) initRetryer(method string, p Policy) error {
   384  	retryer, err := NewRetryer(p, rc.shouldResultRetry, rc.cbContainer)
   385  	if err != nil {
   386  		errMsg := fmt.Sprintf("new retryer[%s-%s] failed, err=%s, at %s", method, p.Type, err.Error(), time.Now())
   387  		rc.msg = errMsg
   388  		klog.Warnf(errMsg)
   389  		return err
   390  	}
   391  
   392  	rc.retryerMap.Store(method, retryer)
   393  	if p.Enable {
   394  		rc.msg = fmt.Sprintf("new retryer[%s-%s] at %s", method, retryer.Type(), time.Now())
   395  	} else {
   396  		rc.msg = fmt.Sprintf("disable retryer[%s-%s](enable=%t) %s", method, p.Type, p.Enable, time.Now())
   397  	}
   398  	return nil
   399  }
   400  
   401  func (rc *Container) updateRetryer(rr *ShouldResultRetry) {
   402  	rc.Lock()
   403  	defer rc.Unlock()
   404  
   405  	rc.shouldResultRetry = rr
   406  	if rc.shouldResultRetry != nil {
   407  		rc.retryerMap.Range(func(key, value interface{}) bool {
   408  			if fr, ok := value.(*failureRetryer); ok {
   409  				fr.setSpecifiedResultRetryIfNeeded(rc.shouldResultRetry)
   410  			}
   411  			return true
   412  		})
   413  	}
   414  }
   415  
   416  // Close releases all possible resources referenced.
   417  func (rc *Container) Close() (err error) {
   418  	if rc.cbContainer != nil && rc.cbContainer.cbSuite != nil {
   419  		err = rc.cbContainer.cbSuite.Close()
   420  	}
   421  	return
   422  }