go.etcd.io/etcd@v3.3.27+incompatible/clientv3/retry_interceptor.go (about) 1 // Copyright 2016 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Based on github.com/grpc-ecosystem/go-grpc-middleware/retry, but modified to support the more 16 // fine grained error checking required by write-at-most-once retry semantics of etcd. 17 18 package clientv3 19 20 import ( 21 "context" 22 "io" 23 "sync" 24 "time" 25 26 "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" 27 "go.uber.org/zap" 28 "google.golang.org/grpc" 29 "google.golang.org/grpc/codes" 30 "google.golang.org/grpc/metadata" 31 "google.golang.org/grpc/status" 32 ) 33 34 // unaryClientInterceptor returns a new retrying unary client interceptor. 35 // 36 // The default configuration of the interceptor is to not retry *at all*. This behaviour can be 37 // changed through options (e.g. WithMax) on creation of the interceptor or on call (through grpc.CallOptions). 38 func (c *Client) unaryClientInterceptor(logger *zap.Logger, optFuncs ...retryOption) grpc.UnaryClientInterceptor { 39 intOpts := reuseOrNewWithCallOptions(defaultOptions, optFuncs) 40 return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { 41 ctx = withVersion(ctx) 42 grpcOpts, retryOpts := filterCallOptions(opts) 43 callOpts := reuseOrNewWithCallOptions(intOpts, retryOpts) 44 // short circuit for simplicity, and avoiding allocations. 45 if callOpts.max == 0 { 46 return invoker(ctx, method, req, reply, cc, grpcOpts...) 47 } 48 var lastErr error 49 for attempt := uint(0); attempt < callOpts.max; attempt++ { 50 if err := waitRetryBackoff(ctx, attempt, callOpts); err != nil { 51 return err 52 } 53 logger.Debug( 54 "retrying of unary invoker", 55 zap.String("target", cc.Target()), 56 zap.Uint("attempt", attempt), 57 ) 58 lastErr = invoker(ctx, method, req, reply, cc, grpcOpts...) 59 if lastErr == nil { 60 return nil 61 } 62 logger.Warn( 63 "retrying of unary invoker failed", 64 zap.String("target", cc.Target()), 65 zap.Uint("attempt", attempt), 66 zap.Error(lastErr), 67 ) 68 if isContextError(lastErr) { 69 if ctx.Err() != nil { 70 // its the context deadline or cancellation. 71 return lastErr 72 } 73 // its the callCtx deadline or cancellation, in which case try again. 74 continue 75 } 76 if callOpts.retryAuth && rpctypes.Error(lastErr) == rpctypes.ErrInvalidAuthToken { 77 gterr := c.getToken(ctx) 78 if gterr != nil { 79 logger.Warn( 80 "retrying of unary invoker failed to fetch new auth token", 81 zap.String("target", cc.Target()), 82 zap.Error(gterr), 83 ) 84 return gterr // lastErr must be invalid auth token 85 } 86 continue 87 } 88 if !isSafeRetry(c.lg, lastErr, callOpts) { 89 return lastErr 90 } 91 } 92 return lastErr 93 } 94 } 95 96 // streamClientInterceptor returns a new retrying stream client interceptor for server side streaming calls. 97 // 98 // The default configuration of the interceptor is to not retry *at all*. This behaviour can be 99 // changed through options (e.g. WithMax) on creation of the interceptor or on call (through grpc.CallOptions). 100 // 101 // Retry logic is available *only for ServerStreams*, i.e. 1:n streams, as the internal logic needs 102 // to buffer the messages sent by the client. If retry is enabled on any other streams (ClientStreams, 103 // BidiStreams), the retry interceptor will fail the call. 104 func (c *Client) streamClientInterceptor(logger *zap.Logger, optFuncs ...retryOption) grpc.StreamClientInterceptor { 105 intOpts := reuseOrNewWithCallOptions(defaultOptions, optFuncs) 106 return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { 107 ctx = withVersion(ctx) 108 // getToken automatically 109 // TODO(cfc4n): keep this code block, remove codes about getToken in client.go after pr #12165 merged. 110 if c.authTokenBundle != nil { 111 // equal to c.Username != "" && c.Password != "" 112 err := c.getToken(ctx) 113 if err != nil && rpctypes.Error(err) != rpctypes.ErrAuthNotEnabled { 114 logger.Error("clientv3/retry_interceptor: getToken failed", zap.Error(err)) 115 return nil, err 116 } 117 } 118 grpcOpts, retryOpts := filterCallOptions(opts) 119 callOpts := reuseOrNewWithCallOptions(intOpts, retryOpts) 120 // short circuit for simplicity, and avoiding allocations. 121 if callOpts.max == 0 { 122 return streamer(ctx, desc, cc, method, grpcOpts...) 123 } 124 if desc.ClientStreams { 125 return nil, status.Errorf(codes.Unimplemented, "clientv3/retry_interceptor: cannot retry on ClientStreams, set Disable()") 126 } 127 newStreamer, err := streamer(ctx, desc, cc, method, grpcOpts...) 128 if err != nil { 129 logger.Error("streamer failed to create ClientStream", zap.Error(err)) 130 return nil, err // TODO(mwitkow): Maybe dial and transport errors should be retriable? 131 } 132 retryingStreamer := &serverStreamingRetryingStream{ 133 client: c, 134 ClientStream: newStreamer, 135 callOpts: callOpts, 136 ctx: ctx, 137 streamerCall: func(ctx context.Context) (grpc.ClientStream, error) { 138 return streamer(ctx, desc, cc, method, grpcOpts...) 139 }, 140 } 141 return retryingStreamer, nil 142 } 143 } 144 145 // type serverStreamingRetryingStream is the implementation of grpc.ClientStream that acts as a 146 // proxy to the underlying call. If any of the RecvMsg() calls fail, it will try to reestablish 147 // a new ClientStream according to the retry policy. 148 type serverStreamingRetryingStream struct { 149 grpc.ClientStream 150 client *Client 151 bufferedSends []interface{} // single message that the client can sen 152 receivedGood bool // indicates whether any prior receives were successful 153 wasClosedSend bool // indicates that CloseSend was closed 154 ctx context.Context 155 callOpts *options 156 streamerCall func(ctx context.Context) (grpc.ClientStream, error) 157 mu sync.RWMutex 158 } 159 160 func (s *serverStreamingRetryingStream) setStream(clientStream grpc.ClientStream) { 161 s.mu.Lock() 162 s.ClientStream = clientStream 163 s.mu.Unlock() 164 } 165 166 func (s *serverStreamingRetryingStream) getStream() grpc.ClientStream { 167 s.mu.RLock() 168 defer s.mu.RUnlock() 169 return s.ClientStream 170 } 171 172 func (s *serverStreamingRetryingStream) SendMsg(m interface{}) error { 173 s.mu.Lock() 174 s.bufferedSends = append(s.bufferedSends, m) 175 s.mu.Unlock() 176 return s.getStream().SendMsg(m) 177 } 178 179 func (s *serverStreamingRetryingStream) CloseSend() error { 180 s.mu.Lock() 181 s.wasClosedSend = true 182 s.mu.Unlock() 183 return s.getStream().CloseSend() 184 } 185 186 func (s *serverStreamingRetryingStream) Header() (metadata.MD, error) { 187 return s.getStream().Header() 188 } 189 190 func (s *serverStreamingRetryingStream) Trailer() metadata.MD { 191 return s.getStream().Trailer() 192 } 193 194 func (s *serverStreamingRetryingStream) RecvMsg(m interface{}) error { 195 attemptRetry, lastErr := s.receiveMsgAndIndicateRetry(m) 196 if !attemptRetry { 197 return lastErr // success or hard failure 198 } 199 200 // We start off from attempt 1, because zeroth was already made on normal SendMsg(). 201 for attempt := uint(1); attempt < s.callOpts.max; attempt++ { 202 if err := waitRetryBackoff(s.ctx, attempt, s.callOpts); err != nil { 203 return err 204 } 205 newStream, err := s.reestablishStreamAndResendBuffer(s.ctx) 206 if err != nil { 207 s.client.lg.Error("failed reestablishStreamAndResendBuffer", zap.Error(err)) 208 return err // TODO(mwitkow): Maybe dial and transport errors should be retriable? 209 } 210 s.setStream(newStream) 211 212 s.client.lg.Warn("retrying RecvMsg", zap.Error(lastErr)) 213 attemptRetry, lastErr = s.receiveMsgAndIndicateRetry(m) 214 if !attemptRetry { 215 return lastErr 216 } 217 } 218 return lastErr 219 } 220 221 func (s *serverStreamingRetryingStream) receiveMsgAndIndicateRetry(m interface{}) (bool, error) { 222 s.mu.RLock() 223 wasGood := s.receivedGood 224 s.mu.RUnlock() 225 err := s.getStream().RecvMsg(m) 226 if err == nil || err == io.EOF { 227 s.mu.Lock() 228 s.receivedGood = true 229 s.mu.Unlock() 230 return false, err 231 } else if wasGood { 232 // previous RecvMsg in the stream succeeded, no retry logic should interfere 233 return false, err 234 } 235 if isContextError(err) { 236 if s.ctx.Err() != nil { 237 return false, err 238 } 239 // its the callCtx deadline or cancellation, in which case try again. 240 return true, err 241 } 242 if s.callOpts.retryAuth && rpctypes.Error(err) == rpctypes.ErrInvalidAuthToken { 243 gterr := s.client.getToken(s.ctx) 244 if gterr != nil { 245 s.client.lg.Warn("retry failed to fetch new auth token", zap.Error(gterr)) 246 return false, err // return the original error for simplicity 247 } 248 return true, err 249 250 } 251 return isSafeRetry(s.client.lg, err, s.callOpts), err 252 } 253 254 func (s *serverStreamingRetryingStream) reestablishStreamAndResendBuffer(callCtx context.Context) (grpc.ClientStream, error) { 255 s.mu.RLock() 256 bufferedSends := s.bufferedSends 257 s.mu.RUnlock() 258 newStream, err := s.streamerCall(callCtx) 259 if err != nil { 260 return nil, err 261 } 262 for _, msg := range bufferedSends { 263 if err := newStream.SendMsg(msg); err != nil { 264 return nil, err 265 } 266 } 267 if err := newStream.CloseSend(); err != nil { 268 return nil, err 269 } 270 return newStream, nil 271 } 272 273 func waitRetryBackoff(ctx context.Context, attempt uint, callOpts *options) error { 274 waitTime := time.Duration(0) 275 if attempt > 0 { 276 waitTime = callOpts.backoffFunc(attempt) 277 } 278 if waitTime > 0 { 279 timer := time.NewTimer(waitTime) 280 select { 281 case <-ctx.Done(): 282 timer.Stop() 283 return contextErrToGrpcErr(ctx.Err()) 284 case <-timer.C: 285 } 286 } 287 return nil 288 } 289 290 // isSafeRetry returns "true", if request is safe for retry with the given error. 291 func isSafeRetry(lg *zap.Logger, err error, callOpts *options) bool { 292 if isContextError(err) { 293 return false 294 } 295 switch callOpts.retryPolicy { 296 case repeatable: 297 return isSafeRetryImmutableRPC(err) 298 case nonRepeatable: 299 return isSafeRetryMutableRPC(err) 300 default: 301 lg.Warn("unrecognized retry policy", zap.String("retryPolicy", callOpts.retryPolicy.String())) 302 return false 303 } 304 } 305 306 func isContextError(err error) bool { 307 return grpc.Code(err) == codes.DeadlineExceeded || grpc.Code(err) == codes.Canceled 308 } 309 310 func contextErrToGrpcErr(err error) error { 311 switch err { 312 case context.DeadlineExceeded: 313 return status.Errorf(codes.DeadlineExceeded, err.Error()) 314 case context.Canceled: 315 return status.Errorf(codes.Canceled, err.Error()) 316 default: 317 return status.Errorf(codes.Unknown, err.Error()) 318 } 319 } 320 321 var ( 322 defaultOptions = &options{ 323 retryPolicy: nonRepeatable, 324 max: 0, // disable 325 backoffFunc: backoffLinearWithJitter(50*time.Millisecond /*jitter*/, 0.10), 326 retryAuth: true, 327 } 328 ) 329 330 // backoffFunc denotes a family of functions that control the backoff duration between call retries. 331 // 332 // They are called with an identifier of the attempt, and should return a time the system client should 333 // hold off for. If the time returned is longer than the `context.Context.Deadline` of the request 334 // the deadline of the request takes precedence and the wait will be interrupted before proceeding 335 // with the next iteration. 336 type backoffFunc func(attempt uint) time.Duration 337 338 // withRetryPolicy sets the retry policy of this call. 339 func withRetryPolicy(rp retryPolicy) retryOption { 340 return retryOption{applyFunc: func(o *options) { 341 o.retryPolicy = rp 342 }} 343 } 344 345 // withMax sets the maximum number of retries on this call, or this interceptor. 346 func withMax(maxRetries uint) retryOption { 347 return retryOption{applyFunc: func(o *options) { 348 o.max = maxRetries 349 }} 350 } 351 352 // WithBackoff sets the `BackoffFunc `used to control time between retries. 353 func withBackoff(bf backoffFunc) retryOption { 354 return retryOption{applyFunc: func(o *options) { 355 o.backoffFunc = bf 356 }} 357 } 358 359 type options struct { 360 retryPolicy retryPolicy 361 max uint 362 backoffFunc backoffFunc 363 retryAuth bool 364 } 365 366 // retryOption is a grpc.CallOption that is local to clientv3's retry interceptor. 367 type retryOption struct { 368 grpc.EmptyCallOption // make sure we implement private after() and before() fields so we don't panic. 369 applyFunc func(opt *options) 370 } 371 372 func reuseOrNewWithCallOptions(opt *options, retryOptions []retryOption) *options { 373 if len(retryOptions) == 0 { 374 return opt 375 } 376 optCopy := &options{} 377 *optCopy = *opt 378 for _, f := range retryOptions { 379 f.applyFunc(optCopy) 380 } 381 return optCopy 382 } 383 384 func filterCallOptions(callOptions []grpc.CallOption) (grpcOptions []grpc.CallOption, retryOptions []retryOption) { 385 for _, opt := range callOptions { 386 if co, ok := opt.(retryOption); ok { 387 retryOptions = append(retryOptions, co) 388 } else { 389 grpcOptions = append(grpcOptions, opt) 390 } 391 } 392 return grpcOptions, retryOptions 393 } 394 395 // BackoffLinearWithJitter waits a set period of time, allowing for jitter (fractional adjustment). 396 // 397 // For example waitBetween=1s and jitter=0.10 can generate waits between 900ms and 1100ms. 398 func backoffLinearWithJitter(waitBetween time.Duration, jitterFraction float64) backoffFunc { 399 return func(attempt uint) time.Duration { 400 return jitterUp(waitBetween, jitterFraction) 401 } 402 }