github.com/cloudwego/kitex@v0.9.0/pkg/retry/failure_retryer.go (about) 1 /* 2 * Copyright 2021 CloudWeGo Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package retry 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "strconv" 24 "strings" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/cloudwego/kitex/pkg/circuitbreak" 30 "github.com/cloudwego/kitex/pkg/kerrors" 31 "github.com/cloudwego/kitex/pkg/klog" 32 "github.com/cloudwego/kitex/pkg/rpcinfo" 33 ) 34 35 func newFailureRetryer(policy Policy, r *ShouldResultRetry, cbC *cbContainer) (Retryer, error) { 36 fr := &failureRetryer{specifiedResultRetry: r, cbContainer: cbC} 37 if err := fr.UpdatePolicy(policy); err != nil { 38 return nil, fmt.Errorf("newfailureRetryer failed, err=%w", err) 39 } 40 return fr, nil 41 } 42 43 type failureRetryer struct { 44 enable bool 45 policy *FailurePolicy 46 backOff BackOff 47 cbContainer *cbContainer 48 specifiedResultRetry *ShouldResultRetry 49 sync.RWMutex 50 errMsg string 51 } 52 53 // ShouldRetry implements the Retryer interface. 54 func (r *failureRetryer) ShouldRetry(ctx context.Context, err error, callTimes int, req interface{}, cbKey string) (string, bool) { 55 r.RLock() 56 defer r.RUnlock() 57 if !r.enable { 58 return "", false 59 } 60 if stop, msg := circuitBreakerStop(ctx, r.policy.StopPolicy, r.cbContainer, req, cbKey); stop { 61 return msg, false 62 } 63 if stop, msg := ddlStop(ctx, r.policy.StopPolicy); stop { 64 return msg, false 65 } 66 r.backOff.Wait(callTimes) 67 return "", true 68 } 69 70 // AllowRetry implements the Retryer interface. 71 func (r *failureRetryer) AllowRetry(ctx context.Context) (string, bool) { 72 r.RLock() 73 defer r.RUnlock() 74 if !r.enable || r.policy.StopPolicy.MaxRetryTimes == 0 { 75 return "", false 76 } 77 if stop, msg := chainStop(ctx, r.policy.StopPolicy); stop { 78 return msg, false 79 } 80 if stop, msg := ddlStop(ctx, r.policy.StopPolicy); stop { 81 return msg, false 82 } 83 return "", true 84 } 85 86 // Do implement the Retryer interface. 87 func (r *failureRetryer) Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rpcinfo.RPCInfo, req interface{}) (lastRI rpcinfo.RPCInfo, recycleRI bool, err error) { 88 r.RLock() 89 var maxDuration time.Duration 90 if r.policy.StopPolicy.MaxDurationMS > 0 { 91 maxDuration = time.Duration(r.policy.StopPolicy.MaxDurationMS) * time.Millisecond 92 } 93 retryTimes := r.policy.StopPolicy.MaxRetryTimes 94 r.RUnlock() 95 96 var callTimes int32 97 var callCosts strings.Builder 98 var cRI rpcinfo.RPCInfo 99 cbKey, _ := r.cbContainer.cbCtl.GetKey(ctx, req) 100 defer func() { 101 if panicInfo := recover(); panicInfo != nil { 102 err = panicToErr(ctx, panicInfo, firstRI) 103 } 104 }() 105 startTime := time.Now() 106 for i := 0; i <= retryTimes; i++ { 107 var resp interface{} 108 var callStart time.Time 109 if i == 0 { 110 callStart = startTime 111 } else if i > 0 { 112 if maxDuration > 0 && time.Since(startTime) > maxDuration { 113 err = makeRetryErr(ctx, "exceed max duration", callTimes) 114 break 115 } 116 if msg, ok := r.ShouldRetry(ctx, err, i, req, cbKey); !ok { 117 if msg != "" { 118 appendMsg := fmt.Sprintf("retried %d, %s", i-1, msg) 119 appendErrMsg(err, appendMsg) 120 } 121 break 122 } 123 callStart = time.Now() 124 callCosts.WriteByte(',') 125 if respOp, ok := ctx.Value(CtxRespOp).(*int32); ok { 126 atomic.StoreInt32(respOp, OpNo) 127 } 128 } 129 callTimes++ 130 if r.cbContainer.enablePercentageLimit { 131 // record stat before call since requests may be slow, making the limiter more accurate 132 recordRetryStat(cbKey, r.cbContainer.cbPanel, callTimes) 133 } 134 cRI, resp, err = rpcCall(ctx, r) 135 callCosts.WriteString(strconv.FormatInt(time.Since(callStart).Microseconds(), 10)) 136 137 if !r.cbContainer.enablePercentageLimit && r.cbContainer.cbStat { 138 circuitbreak.RecordStat(ctx, req, nil, err, cbKey, r.cbContainer.cbCtl, r.cbContainer.cbPanel) 139 } 140 if err == nil { 141 if r.policy.IsRespRetryNonNil() && r.policy.ShouldResultRetry.RespRetry(resp, cRI) { 142 // user specified resp to do retry 143 continue 144 } 145 break 146 } else { 147 if i == retryTimes { 148 // stop retry then wrap error 149 err = kerrors.ErrRetry.WithCause(err) 150 } else if !r.isRetryErr(err, cRI) { 151 // not timeout or user specified error won't do retry 152 break 153 } 154 } 155 } 156 recordRetryInfo(cRI, callTimes, callCosts.String()) 157 if err == nil && callTimes == 1 { 158 return cRI, true, nil 159 } 160 return cRI, false, err 161 } 162 163 // UpdatePolicy implements the Retryer interface. 164 func (r *failureRetryer) UpdatePolicy(rp Policy) (err error) { 165 if !rp.Enable { 166 r.Lock() 167 r.enable = rp.Enable 168 r.Unlock() 169 return nil 170 } 171 var errMsg string 172 if rp.FailurePolicy == nil || rp.Type != FailureType { 173 errMsg = "FailurePolicy is nil or retry type not match, cannot do update in failureRetryer" 174 err = errors.New(errMsg) 175 } 176 rt := rp.FailurePolicy.StopPolicy.MaxRetryTimes 177 if errMsg == "" && (rt < 0 || rt > maxFailureRetryTimes) { 178 errMsg = fmt.Sprintf("invalid failure MaxRetryTimes[%d]", rt) 179 err = errors.New(errMsg) 180 } 181 if errMsg == "" { 182 if e := checkCBErrorRate(&rp.FailurePolicy.StopPolicy.CBPolicy); e != nil { 183 rp.FailurePolicy.StopPolicy.CBPolicy.ErrorRate = defaultCBErrRate 184 errMsg = fmt.Sprintf("failureRetryer %s, use default %0.2f", e.Error(), defaultCBErrRate) 185 klog.Warnf(errMsg) 186 } 187 } 188 r.Lock() 189 defer r.Unlock() 190 r.enable = rp.Enable 191 if err != nil { 192 r.errMsg = errMsg 193 return err 194 } 195 r.policy = rp.FailurePolicy 196 r.setSpecifiedResultRetryIfNeeded(r.specifiedResultRetry) 197 if bo, e := initBackOff(rp.FailurePolicy.BackOffPolicy); e != nil { 198 r.errMsg = fmt.Sprintf("failureRetryer update BackOffPolicy failed, err=%s", e.Error()) 199 klog.Warnf(r.errMsg) 200 } else { 201 r.backOff = bo 202 } 203 return nil 204 } 205 206 // AppendErrMsgIfNeeded implements the Retryer interface. 207 func (r *failureRetryer) AppendErrMsgIfNeeded(err error, ri rpcinfo.RPCInfo, msg string) { 208 if r.isRetryErr(err, ri) { 209 // Add additional reason when retry is not applied. 210 appendErrMsg(err, msg) 211 } 212 } 213 214 // Prepare implements the Retryer interface. 215 func (r *failureRetryer) Prepare(ctx context.Context, prevRI, retryRI rpcinfo.RPCInfo) { 216 handleRetryInstance(r.policy.RetrySameNode, prevRI, retryRI) 217 } 218 219 func (r *failureRetryer) isRetryErr(err error, ri rpcinfo.RPCInfo) bool { 220 if err == nil { 221 return false 222 } 223 // Logic Notice: 224 // some kinds of error cannot be retried, eg: ServiceCircuitBreak. 225 // But CircuitBreak has been checked in ShouldRetry, it doesn't need to filter ServiceCircuitBreak. 226 // If there are some other specified errors that cannot be retried, it should be filtered here. 227 228 if r.policy.IsRetryForTimeout() && kerrors.IsTimeoutError(err) { 229 return true 230 } 231 if r.policy.IsErrorRetryNonNil() && r.policy.ShouldResultRetry.ErrorRetry(err, ri) { 232 return true 233 } 234 return false 235 } 236 237 func initBackOff(policy *BackOffPolicy) (bo BackOff, err error) { 238 bo = NoneBackOff 239 if policy == nil { 240 return 241 } 242 switch policy.BackOffType { 243 case NoneBackOffType: 244 case FixedBackOffType: 245 if policy.CfgItems == nil { 246 return bo, errors.New("invalid FixedBackOff, CfgItems is nil") 247 } 248 fixMS := policy.CfgItems[FixMSBackOffCfgKey] 249 fixMSInt, _ := strconv.Atoi(fmt.Sprintf("%1.0f", fixMS)) 250 if err = checkFixedBackOff(fixMSInt); err != nil { 251 return 252 } 253 bo = newFixedBackOff(fixMSInt) 254 case RandomBackOffType: 255 if policy.CfgItems == nil { 256 return bo, errors.New("invalid FixedBackOff, CfgItems is nil") 257 } 258 minMS := policy.CfgItems[MinMSBackOffCfgKey] 259 maxMS := policy.CfgItems[MaxMSBackOffCfgKey] 260 minMSInt, _ := strconv.Atoi(fmt.Sprintf("%1.0f", minMS)) 261 maxMSInt, _ := strconv.Atoi(fmt.Sprintf("%1.0f", maxMS)) 262 if err = checkRandomBackOff(minMSInt, maxMSInt); err != nil { 263 return 264 } 265 bo = newRandomBackOff(minMSInt, maxMSInt) 266 default: 267 return bo, fmt.Errorf("invalid backoffType=%v", policy.BackOffType) 268 } 269 return 270 } 271 272 // Type implements the Retryer interface. 273 func (r *failureRetryer) Type() Type { 274 return FailureType 275 } 276 277 // Dump implements the Retryer interface. 278 func (r *failureRetryer) Dump() map[string]interface{} { 279 r.RLock() 280 defer r.RUnlock() 281 dm := make(map[string]interface{}) 282 dm["enable"] = r.enable 283 dm["failure_retry"] = r.policy 284 if r.policy != nil { 285 dm["specified_result_retry"] = map[string]bool{ 286 "error_retry": r.policy.IsErrorRetryNonNil(), 287 "resp_retry": r.policy.IsRespRetryNonNil(), 288 } 289 } 290 if r.errMsg != "" { 291 dm["errMsg"] = r.errMsg 292 } 293 return dm 294 } 295 296 func (r *failureRetryer) setSpecifiedResultRetryIfNeeded(rr *ShouldResultRetry) { 297 if rr != nil { 298 // save the object specified by client.WithSpecifiedResultRetry(..) 299 r.specifiedResultRetry = rr 300 } 301 if r.policy != nil && r.specifiedResultRetry != nil { 302 // The priority of client.WithSpecifiedResultRetry(..) is higher, so always update it 303 // NOTE: client.WithSpecifiedResultRetry(..) will always reject a nil object 304 r.policy.ShouldResultRetry = r.specifiedResultRetry 305 } 306 }