github.com/cloudwego/kitex@v0.9.0/pkg/retry/util.go (about)

     1  /*
     2   * Copyright 2021 CloudWeGo Authors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package retry
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"runtime/debug"
    24  	"strconv"
    25  
    26  	"github.com/bytedance/gopkg/cloud/metainfo"
    27  
    28  	"github.com/cloudwego/kitex/pkg/kerrors"
    29  	"github.com/cloudwego/kitex/pkg/klog"
    30  	"github.com/cloudwego/kitex/pkg/rpcinfo"
    31  	"github.com/cloudwego/kitex/pkg/rpcinfo/remoteinfo"
    32  )
    33  
    34  type ctxKey string
    35  
    36  const (
    37  	// TransitKey is transited persistently when the req is a retry call.
    38  	// When a request with this key, the downstream will not do retry(just support in kitex now).
    39  	TransitKey = "RetryReq"
    40  
    41  	// CtxReqOp is used to ignore RPC Request concurrent write
    42  	CtxReqOp ctxKey = "K_REQ_OP"
    43  
    44  	// CtxRespOp is used to ignore RPC Response concurrent write/read.
    45  	CtxRespOp ctxKey = "K_RESP_OP"
    46  
    47  	// Wildcard stands for 'any method' when associated with a retryer.
    48  	Wildcard = "*"
    49  )
    50  
    51  // Req or Resp operation state, just be useful when concurrent write may happen
    52  const (
    53  	OpNo int32 = iota
    54  	OpDoing
    55  	OpDone
    56  )
    57  
    58  var tagValueFirstTry = "0"
    59  
    60  // DDLStopFunc is the definition of ddlStop func
    61  type DDLStopFunc func(ctx context.Context, policy StopPolicy) (bool, string)
    62  
    63  var ddlStopFunc DDLStopFunc
    64  
    65  // RegisterDDLStop registers ddl stop.
    66  func RegisterDDLStop(f DDLStopFunc) {
    67  	ddlStopFunc = f
    68  }
    69  
    70  // If Ingress is turned on in the current node, check whether RPC_PERSIST_DDL_REMAIN_TIME exists,
    71  // if it exists calculate handler time consumed by RPC_PERSIST_INGRESS_START_TIME and current time,
    72  // if handler cost > ddl remain time, then do not execute retry.
    73  func ddlStop(ctx context.Context, policy StopPolicy) (bool, string) {
    74  	if !policy.DDLStop {
    75  		return false, ""
    76  	}
    77  	if ddlStopFunc == nil {
    78  		klog.Warnf("enable ddl stop for retry, but no ddlStopFunc is registered")
    79  		return false, ""
    80  	}
    81  	return ddlStopFunc(ctx, policy)
    82  }
    83  
    84  func chainStop(ctx context.Context, policy StopPolicy) (bool, string) {
    85  	if policy.DisableChainStop {
    86  		return false, ""
    87  	}
    88  	if !IsRemoteRetryRequest(ctx) {
    89  		return false, ""
    90  	}
    91  	return true, "chain stop retry"
    92  }
    93  
    94  func circuitBreakerStop(ctx context.Context, policy StopPolicy, cbC *cbContainer, request interface{}, cbKey string) (bool, string) {
    95  	if cbC.cbCtl == nil || cbC.cbPanel == nil {
    96  		return false, ""
    97  	}
    98  	metricer := cbC.cbPanel.GetMetricer(cbKey)
    99  	errRate := metricer.ErrorRate()
   100  	sample := metricer.Samples()
   101  	if sample < cbMinSample || errRate < policy.CBPolicy.ErrorRate {
   102  		return false, ""
   103  	}
   104  	return true, fmt.Sprintf("retry circuit break, errRate=%0.3f, sample=%d", errRate, sample)
   105  }
   106  
   107  func handleRetryInstance(retrySameNode bool, prevRI, retryRI rpcinfo.RPCInfo) {
   108  	calledInst := remoteinfo.AsRemoteInfo(prevRI.To()).GetInstance()
   109  	if calledInst == nil {
   110  		return
   111  	}
   112  	if retrySameNode {
   113  		remoteinfo.AsRemoteInfo(retryRI.To()).SetInstance(calledInst)
   114  	} else {
   115  		if me := remoteinfo.AsRemoteInfo(retryRI.To()); me != nil {
   116  			me.SetTag(rpcinfo.RetryPrevInstTag, calledInst.Address().String())
   117  		}
   118  	}
   119  }
   120  
   121  func makeRetryErr(ctx context.Context, msg string, callTimes int32) error {
   122  	var ctxErr string
   123  	if ctx.Err() == context.Canceled {
   124  		ctxErr = "context canceled by business."
   125  	}
   126  
   127  	ri := rpcinfo.GetRPCInfo(ctx)
   128  	to := ri.To()
   129  
   130  	errMsg := fmt.Sprintf("retry[%d] failed, %s, to=%s, method=%s", callTimes-1, msg, to.ServiceName(), to.Method())
   131  	target := to.Address()
   132  	if target != nil {
   133  		errMsg = fmt.Sprintf("%s, remote=%s", errMsg, target.String())
   134  	}
   135  	if ctxErr != "" {
   136  		errMsg = fmt.Sprintf("%s, %s", errMsg, ctxErr)
   137  	}
   138  	return kerrors.ErrRetry.WithCause(errors.New(errMsg))
   139  }
   140  
   141  func panicToErr(ctx context.Context, panicInfo interface{}, ri rpcinfo.RPCInfo) error {
   142  	toService, toMethod := "unknown", "unknown"
   143  	if ri != nil {
   144  		toService, toMethod = ri.To().ServiceName(), ri.To().Method()
   145  	}
   146  	err := fmt.Errorf("KITEX: panic in retry, to_service=%s to_method=%s error=%v\nstack=%s",
   147  		toService, toMethod, panicInfo, debug.Stack())
   148  	klog.CtxErrorf(ctx, "%s", err.Error())
   149  	return err
   150  }
   151  
   152  func appendErrMsg(err error, msg string) {
   153  	if e, ok := err.(*kerrors.DetailedError); ok {
   154  		// append no retry reason
   155  		e.WithExtraMsg(msg)
   156  	}
   157  }
   158  
   159  func recordRetryInfo(ri rpcinfo.RPCInfo, callTimes int32, lastCosts string) {
   160  	if callTimes > 1 {
   161  		if re := remoteinfo.AsRemoteInfo(ri.To()); re != nil {
   162  			re.SetTag(rpcinfo.RetryTag, strconv.Itoa(int(callTimes)-1))
   163  			// record last cost
   164  			re.SetTag(rpcinfo.RetryLastCostTag, lastCosts)
   165  		}
   166  	}
   167  }
   168  
   169  // IsLocalRetryRequest checks whether it's a retry request by checking the RetryTag set in rpcinfo
   170  // It's supposed to be used in client middlewares
   171  func IsLocalRetryRequest(ctx context.Context) bool {
   172  	ri := rpcinfo.GetRPCInfo(ctx)
   173  	retryCountStr := ri.To().DefaultTag(rpcinfo.RetryTag, tagValueFirstTry)
   174  	return retryCountStr != tagValueFirstTry
   175  }
   176  
   177  // IsRemoteRetryRequest checks whether it's a retry request by checking the TransitKey in metainfo
   178  // It's supposed to be used in server side (handler/middleware)
   179  func IsRemoteRetryRequest(ctx context.Context) bool {
   180  	_, isRetry := metainfo.GetPersistentValue(ctx, TransitKey)
   181  	return isRetry
   182  }