github.com/cloudwego/kitex@v0.9.0/pkg/retry/retryer.go (about) 1 /* 2 * Copyright 2021 CloudWeGo Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package retry implements rpc retry 18 package retry 19 20 import ( 21 "context" 22 "fmt" 23 "sync" 24 "time" 25 26 "github.com/bytedance/gopkg/cloud/circuitbreaker" 27 28 "github.com/cloudwego/kitex/pkg/circuitbreak" 29 "github.com/cloudwego/kitex/pkg/klog" 30 "github.com/cloudwego/kitex/pkg/rpcinfo" 31 ) 32 33 // RPCCallFunc is the definition with wrap rpc call 34 type RPCCallFunc func(context.Context, Retryer) (rpcinfo rpcinfo.RPCInfo, resp interface{}, err error) 35 36 // Retryer is the interface for Retry implements 37 type Retryer interface { 38 // AllowRetry to check if current request satisfy retry condition[eg: circuit, retry times == 0, chain stop, ddl]. 39 // If not satisfy won't execute Retryer.Do and return the reason message 40 // Execute anyway for the first time regardless of able to retry. 41 AllowRetry(ctx context.Context) (msg string, ok bool) 42 43 // ShouldRetry to check if retry request can be called, it is checked in retryer.Do. 44 // If not satisfy will return the reason message 45 ShouldRetry(ctx context.Context, err error, callTimes int, req interface{}, cbKey string) (msg string, ok bool) 46 UpdatePolicy(policy Policy) error 47 48 // Retry policy execute func. recycleRI is to decide if the firstRI can be recycled. 49 Do(ctx context.Context, rpcCall RPCCallFunc, firstRI rpcinfo.RPCInfo, request interface{}) (lastRI rpcinfo.RPCInfo, recycleRI bool, err error) 50 AppendErrMsgIfNeeded(err error, ri rpcinfo.RPCInfo, msg string) 51 52 // Prepare to do something needed before retry call. 53 Prepare(ctx context.Context, prevRI, retryRI rpcinfo.RPCInfo) 54 Dump() map[string]interface{} 55 Type() Type 56 } 57 58 // NewRetryContainerWithCB build Container that doesn't do circuit breaker statistic but get statistic result. 59 // Which is used in case that circuit breaker is enabled. 60 // eg: 61 // 62 // cbs := circuitbreak.NewCBSuite(circuitbreak.RPCInfo2Key) 63 // retryC := retry.NewRetryContainerWithCB(cbs.ServiceControl(), cbs.ServicePanel()) 64 // var opts []client.Option 65 // opts = append(opts, client.WithRetryContainer(retryC)) 66 // // enable service circuit breaker 67 // opts = append(opts, client.WithMiddleware(cbs.ServiceCBMW())) 68 func NewRetryContainerWithCB(cc *circuitbreak.Control, cp circuitbreaker.Panel) *Container { 69 return NewRetryContainer(WithContainerCBControl(cc), WithContainerCBPanel(cp)) 70 } 71 72 func newCBSuite() *circuitbreak.CBSuite { 73 return circuitbreak.NewCBSuite(circuitbreak.RPCInfo2Key) 74 } 75 76 // NewRetryContainerWithCBStat build Container that need to do circuit breaker statistic. 77 // Which is used in case that the service CB key is customized. 78 // eg: 79 // 80 // cbs := circuitbreak.NewCBSuite(YourGenServiceCBKeyFunc) 81 // retry.NewRetryContainerWithCBStat(cbs.ServiceControl(), cbs.ServicePanel()) 82 func NewRetryContainerWithCBStat(cc *circuitbreak.Control, cp circuitbreaker.Panel) *Container { 83 return NewRetryContainer(WithContainerCBControl(cc), WithContainerCBPanel(cp), WithContainerCBStat()) 84 } 85 86 // NewRetryContainerWithPercentageLimit build a Container to limiting the percentage of retry requests; 87 // This is the RECOMMENDED initializer if you want to control PRECISELY the percentage of retry requests. 88 func NewRetryContainerWithPercentageLimit() *Container { 89 return NewRetryContainer(WithContainerEnablePercentageLimit()) 90 } 91 92 // ContainerOption is used when initializing a Container 93 type ContainerOption func(rc *Container) 94 95 // WithContainerCBSuite specifies the CBSuite used in the retry circuitbreak 96 // retryer will use its ServiceControl and ServicePanel 97 // Its priority is lower than WithContainerCBControl and WithContainerCBPanel 98 func WithContainerCBSuite(cbs *circuitbreak.CBSuite) ContainerOption { 99 return func(rc *Container) { 100 rc.cbContainer.cbSuite = cbs 101 } 102 } 103 104 // WithContainerCBControl is specifies the circuitbreak.Control used in the retry circuitbreaker 105 // It's user's responsibility to make sure it's paired with panel 106 func WithContainerCBControl(ctrl *circuitbreak.Control) ContainerOption { 107 return func(rc *Container) { 108 rc.cbContainer.cbCtl = ctrl 109 } 110 } 111 112 // WithContainerCBPanel is specifies the circuitbreaker.Panel used in the retry circuitbreaker 113 // It's user's responsibility to make sure it's paired with control 114 func WithContainerCBPanel(panel circuitbreaker.Panel) ContainerOption { 115 return func(rc *Container) { 116 rc.cbContainer.cbPanel = panel 117 } 118 } 119 120 // WithContainerCBStat instructs the circuitbreak.RecordStat is called within the retryer 121 func WithContainerCBStat() ContainerOption { 122 return func(rc *Container) { 123 rc.cbContainer.cbStat = true 124 } 125 } 126 127 // WithContainerEnablePercentageLimit should be called for limiting the percentage of retry requests 128 func WithContainerEnablePercentageLimit() ContainerOption { 129 return func(rc *Container) { 130 rc.cbContainer.enablePercentageLimit = true 131 } 132 } 133 134 // NewRetryContainer build Container that need to build circuit breaker and do circuit breaker statistic. 135 // The caller is responsible for calling Container.Close() to release resources referenced. 136 func NewRetryContainer(opts ...ContainerOption) *Container { 137 rc := &Container{ 138 cbContainer: &cbContainer{ 139 cbSuite: nil, 140 }, 141 retryerMap: sync.Map{}, 142 } 143 for _, opt := range opts { 144 opt(rc) 145 } 146 147 if rc.cbContainer.enablePercentageLimit { 148 // ignore cbSuite/cbCtl/cbPanel options 149 rc.cbContainer = &cbContainer{ 150 enablePercentageLimit: true, 151 cbSuite: newCBSuite(), 152 } 153 } 154 155 container := rc.cbContainer 156 if container.cbCtl == nil && container.cbPanel == nil { 157 if container.cbSuite == nil { 158 container.cbSuite = newCBSuite() 159 container.cbStat = true 160 } 161 container.cbCtl = container.cbSuite.ServiceControl() 162 container.cbPanel = container.cbSuite.ServicePanel() 163 } 164 if !container.IsValid() { 165 panic("KITEX: invalid container") 166 } 167 return rc 168 } 169 170 // Container is a wrapper for Retryer. 171 type Container struct { 172 hasCodeCfg bool 173 retryerMap sync.Map // <method: retryer> 174 cbContainer *cbContainer 175 msg string 176 sync.RWMutex 177 178 // shouldResultRetry is only used with FailureRetry 179 shouldResultRetry *ShouldResultRetry 180 } 181 182 // Recommended usage: NewRetryContainerWithPercentageLimit() 183 // For more details, refer to the following comments for each field. 184 type cbContainer struct { 185 // In NewRetryContainer, if cbCtrl & cbPanel are not set, Kitex will use cbSuite.ServiceControl() and 186 // cbSuite.ServicePanel(); If cbSuite is nil, Kitex will create one. 187 cbSuite *circuitbreak.CBSuite 188 189 // It's more recommended to rely on the cbSuite than specifying cbCtl & cbPanel with corresponding options, 190 // since cbCtl & cbPanel should be correctly paired, and with the cbSuite, Kitex will ensure it by using the 191 // cbSuite.ServiceControl() and cbSuite.ServicePanel(). 192 cbCtl *circuitbreak.Control 193 cbPanel circuitbreaker.Panel 194 195 // If cbStat && !enablePercentageLimit, retryer will call `circuitbreak.RecordStat` after rpcCall to record 196 // rpc failures/timeouts, for cutting down on the retry requests when the error rate is beyond the threshold. 197 cbStat bool 198 199 // If enabled, Kitex will always create a cbSuite and use its cbCtl & cbPanel, and retryer will call 200 // recordRetryStat before rpcCall, to precisely control the percentage of retry requests over all requests. 201 enablePercentageLimit bool 202 } 203 204 // IsValid returns true when both cbCtl & cbPanel are not nil 205 // It's the user's responsibility to guarantee that cbCtl & cbPanel are correctly paired. 206 func (c *cbContainer) IsValid() bool { 207 return c.cbCtl != nil && c.cbPanel != nil 208 } 209 210 // InitWithPolicies to init Retryer with methodPolicies 211 // Notice, InitWithPolicies is export func, the lock should be added inside 212 func (rc *Container) InitWithPolicies(methodPolicies map[string]Policy) error { 213 if methodPolicies == nil { 214 return nil 215 } 216 rc.Lock() 217 defer rc.Unlock() 218 var inited bool 219 for m := range methodPolicies { 220 if methodPolicies[m].Enable { 221 inited = true 222 if _, ok := rc.retryerMap.Load(m); ok { 223 // NotifyPolicyChange may happen before 224 continue 225 } 226 if err := rc.initRetryer(m, methodPolicies[m]); err != nil { 227 rc.msg = err.Error() 228 return err 229 } 230 } 231 } 232 rc.hasCodeCfg = inited 233 return nil 234 } 235 236 // DeletePolicy to delete the method by method. 237 func (rc *Container) DeletePolicy(method string) { 238 rc.Lock() 239 defer rc.Unlock() 240 rc.msg = "" 241 if rc.hasCodeCfg { 242 // the priority of user setup code policy is higher than remote config 243 return 244 } 245 _, ok := rc.retryerMap.Load(method) 246 if ok { 247 rc.retryerMap.Delete(method) 248 rc.msg = fmt.Sprintf("delete retryer[%s] at %s", method, time.Now()) 249 } 250 } 251 252 // NotifyPolicyChange to receive policy when it changes 253 func (rc *Container) NotifyPolicyChange(method string, p Policy) { 254 rc.Lock() 255 defer rc.Unlock() 256 rc.msg = "" 257 if rc.hasCodeCfg { 258 // the priority of user setup code policy is higher than remote config 259 return 260 } 261 r, ok := rc.retryerMap.Load(method) 262 if ok && r != nil { 263 retryer, ok := r.(Retryer) 264 if ok { 265 if retryer.Type() == p.Type { 266 retryer.UpdatePolicy(p) 267 rc.msg = fmt.Sprintf("update retryer[%s-%s] at %s", method, retryer.Type(), time.Now()) 268 return 269 } 270 rc.retryerMap.Delete(method) 271 rc.msg = fmt.Sprintf("delete retryer[%s-%s] at %s", method, retryer.Type(), time.Now()) 272 } 273 } 274 rc.initRetryer(method, p) 275 } 276 277 // Init to build Retryer with code config. 278 func (rc *Container) Init(mp map[string]Policy, rr *ShouldResultRetry) (err error) { 279 // NotifyPolicyChange func may execute before Init func. 280 // Because retry Container is built before Client init, NotifyPolicyChange can be triggered first 281 rc.updateRetryer(rr) 282 if err = rc.InitWithPolicies(mp); err != nil { 283 return fmt.Errorf("NewRetryer in Init failed, err=%w", err) 284 } 285 return nil 286 } 287 288 // PrepareRetryContext adds necessary keys to context for retry 289 // These keys should be added to `ctx` no matter whether there's a need to retry, to avoid sharing the same 290 // object objects with another method call, since `ctx` might be reused in user-defined middlewares. 291 func PrepareRetryContext(ctx context.Context) context.Context { 292 // reqOp can be used to avoid multiple writes to the request object. 293 // If a blocking write is needed, implement a lock based on it (spin-lock for example). 294 reqOp := OpNo 295 ctx = context.WithValue(ctx, CtxReqOp, &reqOp) 296 297 // `respOp` is used to avoid concurrent write/read on the response object, especially for backup requests. 298 // If `respOp` is modified by one request of this method call, all other requests will skip decoding. 299 respOp := OpNo 300 ctx = context.WithValue(ctx, CtxRespOp, &respOp) 301 return ctx 302 } 303 304 // WithRetryIfNeeded to check if there is a retryer can be used and if current call can retry. 305 // When the retry condition is satisfied, use retryer to call 306 func (rc *Container) WithRetryIfNeeded(ctx context.Context, callOptRetry *Policy, rpcCall RPCCallFunc, ri rpcinfo.RPCInfo, request interface{}) (lastRI rpcinfo.RPCInfo, recycleRI bool, err error) { 307 var retryer Retryer 308 if callOptRetry != nil && callOptRetry.Enable { 309 // build retryer for call level if retry policy is set up with callopt 310 if retryer, err = NewRetryer(*callOptRetry, nil, rc.cbContainer); err != nil { 311 klog.Warnf("KITEX: new callopt retryer[%s] failed, err=%w", callOptRetry.Type, err) 312 } 313 } else { 314 retryer = rc.getRetryer(ri) 315 } 316 317 // case 1(default, fast path): no retry policy 318 if retryer == nil { 319 if _, _, err = rpcCall(ctx, nil); err == nil { 320 return ri, true, nil 321 } 322 return ri, false, err 323 } 324 325 // case 2: setup retry policy, but not satisfy retry condition eg: circuit, retry times == 0, chain stop, ddl 326 if msg, ok := retryer.AllowRetry(ctx); !ok { 327 if _, _, err = rpcCall(ctx, retryer); err == nil { 328 return ri, true, err 329 } 330 if msg != "" { 331 retryer.AppendErrMsgIfNeeded(err, ri, msg) 332 } 333 return ri, false, err 334 } 335 336 // case 3: do rpc call with retry policy 337 lastRI, recycleRI, err = retryer.Do(ctx, rpcCall, ri, request) 338 return 339 } 340 341 // NewRetryer build a retryer with policy 342 func NewRetryer(p Policy, r *ShouldResultRetry, cbC *cbContainer) (retryer Retryer, err error) { 343 // just one retry policy can be enabled at same time 344 if p.Type == BackupType { 345 retryer, err = newBackupRetryer(p, cbC) 346 } else { 347 retryer, err = newFailureRetryer(p, r, cbC) 348 } 349 return 350 } 351 352 func (rc *Container) getRetryer(ri rpcinfo.RPCInfo) Retryer { 353 // the priority of specific method is high 354 r, ok := rc.retryerMap.Load(ri.To().Method()) 355 if ok { 356 return r.(Retryer) 357 } 358 r, ok = rc.retryerMap.Load(Wildcard) 359 if ok { 360 return r.(Retryer) 361 } 362 return nil 363 } 364 365 // Dump is used to show current retry policy 366 func (rc *Container) Dump() interface{} { 367 rc.RLock() 368 dm := make(map[string]interface{}) 369 dm["has_code_cfg"] = rc.hasCodeCfg 370 rc.retryerMap.Range(func(key, value interface{}) bool { 371 if r, ok := value.(Retryer); ok { 372 dm[key.(string)] = r.Dump() 373 } 374 return true 375 }) 376 if rc.msg != "" { 377 dm["msg"] = rc.msg 378 } 379 rc.RUnlock() 380 return dm 381 } 382 383 func (rc *Container) initRetryer(method string, p Policy) error { 384 retryer, err := NewRetryer(p, rc.shouldResultRetry, rc.cbContainer) 385 if err != nil { 386 errMsg := fmt.Sprintf("new retryer[%s-%s] failed, err=%s, at %s", method, p.Type, err.Error(), time.Now()) 387 rc.msg = errMsg 388 klog.Warnf(errMsg) 389 return err 390 } 391 392 rc.retryerMap.Store(method, retryer) 393 if p.Enable { 394 rc.msg = fmt.Sprintf("new retryer[%s-%s] at %s", method, retryer.Type(), time.Now()) 395 } else { 396 rc.msg = fmt.Sprintf("disable retryer[%s-%s](enable=%t) %s", method, p.Type, p.Enable, time.Now()) 397 } 398 return nil 399 } 400 401 func (rc *Container) updateRetryer(rr *ShouldResultRetry) { 402 rc.Lock() 403 defer rc.Unlock() 404 405 rc.shouldResultRetry = rr 406 if rc.shouldResultRetry != nil { 407 rc.retryerMap.Range(func(key, value interface{}) bool { 408 if fr, ok := value.(*failureRetryer); ok { 409 fr.setSpecifiedResultRetryIfNeeded(rc.shouldResultRetry) 410 } 411 return true 412 }) 413 } 414 } 415 416 // Close releases all possible resources referenced. 417 func (rc *Container) Close() (err error) { 418 if rc.cbContainer != nil && rc.cbContainer.cbSuite != nil { 419 err = rc.cbContainer.cbSuite.Close() 420 } 421 return 422 }