github.com/cloudwego/kitex@v0.9.0/pkg/retry/util.go (about) 1 /* 2 * Copyright 2021 CloudWeGo Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package retry 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "runtime/debug" 24 "strconv" 25 26 "github.com/bytedance/gopkg/cloud/metainfo" 27 28 "github.com/cloudwego/kitex/pkg/kerrors" 29 "github.com/cloudwego/kitex/pkg/klog" 30 "github.com/cloudwego/kitex/pkg/rpcinfo" 31 "github.com/cloudwego/kitex/pkg/rpcinfo/remoteinfo" 32 ) 33 34 type ctxKey string 35 36 const ( 37 // TransitKey is transited persistently when the req is a retry call. 38 // When a request with this key, the downstream will not do retry(just support in kitex now). 39 TransitKey = "RetryReq" 40 41 // CtxReqOp is used to ignore RPC Request concurrent write 42 CtxReqOp ctxKey = "K_REQ_OP" 43 44 // CtxRespOp is used to ignore RPC Response concurrent write/read. 45 CtxRespOp ctxKey = "K_RESP_OP" 46 47 // Wildcard stands for 'any method' when associated with a retryer. 48 Wildcard = "*" 49 ) 50 51 // Req or Resp operation state, just be useful when concurrent write may happen 52 const ( 53 OpNo int32 = iota 54 OpDoing 55 OpDone 56 ) 57 58 var tagValueFirstTry = "0" 59 60 // DDLStopFunc is the definition of ddlStop func 61 type DDLStopFunc func(ctx context.Context, policy StopPolicy) (bool, string) 62 63 var ddlStopFunc DDLStopFunc 64 65 // RegisterDDLStop registers ddl stop. 66 func RegisterDDLStop(f DDLStopFunc) { 67 ddlStopFunc = f 68 } 69 70 // If Ingress is turned on in the current node, check whether RPC_PERSIST_DDL_REMAIN_TIME exists, 71 // if it exists calculate handler time consumed by RPC_PERSIST_INGRESS_START_TIME and current time, 72 // if handler cost > ddl remain time, then do not execute retry. 73 func ddlStop(ctx context.Context, policy StopPolicy) (bool, string) { 74 if !policy.DDLStop { 75 return false, "" 76 } 77 if ddlStopFunc == nil { 78 klog.Warnf("enable ddl stop for retry, but no ddlStopFunc is registered") 79 return false, "" 80 } 81 return ddlStopFunc(ctx, policy) 82 } 83 84 func chainStop(ctx context.Context, policy StopPolicy) (bool, string) { 85 if policy.DisableChainStop { 86 return false, "" 87 } 88 if !IsRemoteRetryRequest(ctx) { 89 return false, "" 90 } 91 return true, "chain stop retry" 92 } 93 94 func circuitBreakerStop(ctx context.Context, policy StopPolicy, cbC *cbContainer, request interface{}, cbKey string) (bool, string) { 95 if cbC.cbCtl == nil || cbC.cbPanel == nil { 96 return false, "" 97 } 98 metricer := cbC.cbPanel.GetMetricer(cbKey) 99 errRate := metricer.ErrorRate() 100 sample := metricer.Samples() 101 if sample < cbMinSample || errRate < policy.CBPolicy.ErrorRate { 102 return false, "" 103 } 104 return true, fmt.Sprintf("retry circuit break, errRate=%0.3f, sample=%d", errRate, sample) 105 } 106 107 func handleRetryInstance(retrySameNode bool, prevRI, retryRI rpcinfo.RPCInfo) { 108 calledInst := remoteinfo.AsRemoteInfo(prevRI.To()).GetInstance() 109 if calledInst == nil { 110 return 111 } 112 if retrySameNode { 113 remoteinfo.AsRemoteInfo(retryRI.To()).SetInstance(calledInst) 114 } else { 115 if me := remoteinfo.AsRemoteInfo(retryRI.To()); me != nil { 116 me.SetTag(rpcinfo.RetryPrevInstTag, calledInst.Address().String()) 117 } 118 } 119 } 120 121 func makeRetryErr(ctx context.Context, msg string, callTimes int32) error { 122 var ctxErr string 123 if ctx.Err() == context.Canceled { 124 ctxErr = "context canceled by business." 125 } 126 127 ri := rpcinfo.GetRPCInfo(ctx) 128 to := ri.To() 129 130 errMsg := fmt.Sprintf("retry[%d] failed, %s, to=%s, method=%s", callTimes-1, msg, to.ServiceName(), to.Method()) 131 target := to.Address() 132 if target != nil { 133 errMsg = fmt.Sprintf("%s, remote=%s", errMsg, target.String()) 134 } 135 if ctxErr != "" { 136 errMsg = fmt.Sprintf("%s, %s", errMsg, ctxErr) 137 } 138 return kerrors.ErrRetry.WithCause(errors.New(errMsg)) 139 } 140 141 func panicToErr(ctx context.Context, panicInfo interface{}, ri rpcinfo.RPCInfo) error { 142 toService, toMethod := "unknown", "unknown" 143 if ri != nil { 144 toService, toMethod = ri.To().ServiceName(), ri.To().Method() 145 } 146 err := fmt.Errorf("KITEX: panic in retry, to_service=%s to_method=%s error=%v\nstack=%s", 147 toService, toMethod, panicInfo, debug.Stack()) 148 klog.CtxErrorf(ctx, "%s", err.Error()) 149 return err 150 } 151 152 func appendErrMsg(err error, msg string) { 153 if e, ok := err.(*kerrors.DetailedError); ok { 154 // append no retry reason 155 e.WithExtraMsg(msg) 156 } 157 } 158 159 func recordRetryInfo(ri rpcinfo.RPCInfo, callTimes int32, lastCosts string) { 160 if callTimes > 1 { 161 if re := remoteinfo.AsRemoteInfo(ri.To()); re != nil { 162 re.SetTag(rpcinfo.RetryTag, strconv.Itoa(int(callTimes)-1)) 163 // record last cost 164 re.SetTag(rpcinfo.RetryLastCostTag, lastCosts) 165 } 166 } 167 } 168 169 // IsLocalRetryRequest checks whether it's a retry request by checking the RetryTag set in rpcinfo 170 // It's supposed to be used in client middlewares 171 func IsLocalRetryRequest(ctx context.Context) bool { 172 ri := rpcinfo.GetRPCInfo(ctx) 173 retryCountStr := ri.To().DefaultTag(rpcinfo.RetryTag, tagValueFirstTry) 174 return retryCountStr != tagValueFirstTry 175 } 176 177 // IsRemoteRetryRequest checks whether it's a retry request by checking the TransitKey in metainfo 178 // It's supposed to be used in server side (handler/middleware) 179 func IsRemoteRetryRequest(ctx context.Context) bool { 180 _, isRetry := metainfo.GetPersistentValue(ctx, TransitKey) 181 return isRetry 182 }