github.com/KinWaiYuen/client-go/v2@v2.5.4/internal/locate/region_request.go (about) 1 // Copyright 2021 TiKV Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // NOTE: The code in this file is based on code from the 16 // TiDB project, licensed under the Apache License v 2.0 17 // 18 // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/locate/region_request.go 19 // 20 21 // Copyright 2016 PingCAP, Inc. 22 // 23 // Licensed under the Apache License, Version 2.0 (the "License"); 24 // you may not use this file except in compliance with the License. 25 // You may obtain a copy of the License at 26 // 27 // http://www.apache.org/licenses/LICENSE-2.0 28 // 29 // Unless required by applicable law or agreed to in writing, software 30 // distributed under the License is distributed on an "AS IS" BASIS, 31 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 // See the License for the specific language governing permissions and 33 // limitations under the License. 34 35 package locate 36 37 import ( 38 "context" 39 "fmt" 40 "math/rand" 41 "strconv" 42 "strings" 43 "sync" 44 "sync/atomic" 45 "time" 46 47 "go.uber.org/zap" 48 "google.golang.org/grpc/codes" 49 "google.golang.org/grpc/status" 50 51 tikverr "github.com/KinWaiYuen/client-go/v2/error" 52 "github.com/KinWaiYuen/client-go/v2/internal/client" 53 "github.com/KinWaiYuen/client-go/v2/internal/logutil" 54 "github.com/KinWaiYuen/client-go/v2/internal/retry" 55 "github.com/KinWaiYuen/client-go/v2/kv" 56 "github.com/KinWaiYuen/client-go/v2/metrics" 57 "github.com/KinWaiYuen/client-go/v2/tikvrpc" 58 "github.com/KinWaiYuen/client-go/v2/util" 59 "github.com/opentracing/opentracing-go" 60 "github.com/pingcap/errors" 61 "github.com/pingcap/kvproto/pkg/coprocessor" 62 "github.com/pingcap/kvproto/pkg/errorpb" 63 "github.com/pingcap/kvproto/pkg/kvrpcpb" 64 "github.com/pingcap/kvproto/pkg/metapb" 65 ) 66 67 // shuttingDown is a flag to indicate tidb-server is exiting (Ctrl+C signal 68 // receved for example). If this flag is set, tikv client should not retry on 69 // network error because tidb-server expect tikv client to exit as soon as possible. 70 var shuttingDown uint32 71 72 // StoreShuttingDown atomically stores ShuttingDown into v. 73 func StoreShuttingDown(v uint32) { 74 atomic.StoreUint32(&shuttingDown, v) 75 } 76 77 // LoadShuttingDown atomically loads ShuttingDown. 78 func LoadShuttingDown() uint32 { 79 return atomic.LoadUint32(&shuttingDown) 80 } 81 82 // RegionRequestSender sends KV/Cop requests to tikv server. It handles network 83 // errors and some region errors internally. 84 // 85 // Typically, a KV/Cop request is bind to a region, all keys that are involved 86 // in the request should be located in the region. 87 // The sending process begins with looking for the address of leader store's 88 // address of the target region from cache, and the request is then sent to the 89 // destination tikv server over TCP connection. 90 // If region is updated, can be caused by leader transfer, region split, region 91 // merge, or region balance, tikv server may not able to process request and 92 // send back a RegionError. 93 // RegionRequestSender takes care of errors that does not relevant to region 94 // range, such as 'I/O timeout', 'NotLeader', and 'ServerIsBusy'. If fails to 95 // send the request to all replicas, a fake rregion error may be returned. 96 // Caller which receives the error should retry the request. 97 // 98 // For other region errors, since region range have changed, the request may need to 99 // split, so we simply return the error to caller. 100 type RegionRequestSender struct { 101 regionCache *RegionCache 102 client client.Client 103 storeAddr string 104 rpcError error 105 replicaSelector *replicaSelector 106 failStoreIDs map[uint64]struct{} 107 failProxyStoreIDs map[uint64]struct{} 108 RegionRequestRuntimeStats 109 } 110 111 // RegionRequestRuntimeStats records the runtime stats of send region requests. 112 type RegionRequestRuntimeStats struct { 113 Stats map[tikvrpc.CmdType]*RPCRuntimeStats 114 } 115 116 // NewRegionRequestRuntimeStats returns a new RegionRequestRuntimeStats. 117 func NewRegionRequestRuntimeStats() RegionRequestRuntimeStats { 118 return RegionRequestRuntimeStats{ 119 Stats: make(map[tikvrpc.CmdType]*RPCRuntimeStats), 120 } 121 } 122 123 // RPCRuntimeStats indicates the RPC request count and consume time. 124 type RPCRuntimeStats struct { 125 Count int64 126 // Send region request consume time. 127 Consume int64 128 } 129 130 // String implements fmt.Stringer interface. 131 func (r *RegionRequestRuntimeStats) String() string { 132 var builder strings.Builder 133 for k, v := range r.Stats { 134 if builder.Len() > 0 { 135 builder.WriteByte(',') 136 } 137 // append string: fmt.Sprintf("%s:{num_rpc:%v, total_time:%s}", k.String(), v.Count, util.FormatDuration(time.Duration(v.Consume))") 138 builder.WriteString(k.String()) 139 builder.WriteString(":{num_rpc:") 140 builder.WriteString(strconv.FormatInt(v.Count, 10)) 141 builder.WriteString(", total_time:") 142 builder.WriteString(util.FormatDuration(time.Duration(v.Consume))) 143 builder.WriteString("}") 144 } 145 return builder.String() 146 } 147 148 // Clone returns a copy of itself. 149 func (r *RegionRequestRuntimeStats) Clone() RegionRequestRuntimeStats { 150 newRs := NewRegionRequestRuntimeStats() 151 for cmd, v := range r.Stats { 152 newRs.Stats[cmd] = &RPCRuntimeStats{ 153 Count: v.Count, 154 Consume: v.Consume, 155 } 156 } 157 return newRs 158 } 159 160 // Merge merges other RegionRequestRuntimeStats. 161 func (r *RegionRequestRuntimeStats) Merge(rs RegionRequestRuntimeStats) { 162 for cmd, v := range rs.Stats { 163 stat, ok := r.Stats[cmd] 164 if !ok { 165 r.Stats[cmd] = &RPCRuntimeStats{ 166 Count: v.Count, 167 Consume: v.Consume, 168 } 169 continue 170 } 171 stat.Count += v.Count 172 stat.Consume += v.Consume 173 } 174 } 175 176 // RecordRegionRequestRuntimeStats records request runtime stats. 177 func RecordRegionRequestRuntimeStats(stats map[tikvrpc.CmdType]*RPCRuntimeStats, cmd tikvrpc.CmdType, d time.Duration) { 178 stat, ok := stats[cmd] 179 if !ok { 180 stats[cmd] = &RPCRuntimeStats{ 181 Count: 1, 182 Consume: int64(d), 183 } 184 return 185 } 186 stat.Count++ 187 stat.Consume += int64(d) 188 } 189 190 // NewRegionRequestSender creates a new sender. 191 func NewRegionRequestSender(regionCache *RegionCache, client client.Client) *RegionRequestSender { 192 return &RegionRequestSender{ 193 regionCache: regionCache, 194 client: client, 195 } 196 } 197 198 // GetRegionCache returns the region cache. 199 func (s *RegionRequestSender) GetRegionCache() *RegionCache { 200 return s.regionCache 201 } 202 203 // GetClient returns the RPC client. 204 func (s *RegionRequestSender) GetClient() client.Client { 205 return s.client 206 } 207 208 // SetStoreAddr specifies the dest store address. 209 func (s *RegionRequestSender) SetStoreAddr(addr string) { 210 s.storeAddr = addr 211 } 212 213 // GetStoreAddr returns the dest store address. 214 func (s *RegionRequestSender) GetStoreAddr() string { 215 return s.storeAddr 216 } 217 218 // GetRPCError returns the RPC error. 219 func (s *RegionRequestSender) GetRPCError() error { 220 return s.rpcError 221 } 222 223 // SetRPCError rewrite the rpc error. 224 func (s *RegionRequestSender) SetRPCError(err error) { 225 s.rpcError = err 226 } 227 228 // SendReq sends a request to tikv server. If fails to send the request to all replicas, 229 // a fake region error may be returned. Caller which receives the error should retry the request. 230 func (s *RegionRequestSender) SendReq(bo *retry.Backoffer, req *tikvrpc.Request, regionID RegionVerID, timeout time.Duration) (*tikvrpc.Response, error) { 231 resp, _, err := s.SendReqCtx(bo, req, regionID, timeout, tikvrpc.TiKV) 232 return resp, err 233 } 234 235 type replica struct { 236 store *Store 237 peer *metapb.Peer 238 epoch uint32 239 attempts int 240 } 241 242 func (r *replica) isEpochStale() bool { 243 return r.epoch != atomic.LoadUint32(&r.store.epoch) 244 } 245 246 func (r *replica) isExhausted(maxAttempt int) bool { 247 return r.attempts >= maxAttempt 248 } 249 250 type replicaSelector struct { 251 regionCache *RegionCache 252 region *Region 253 regionStore *regionStore 254 replicas []*replica 255 state selectorState 256 // replicas[targetIdx] is the replica handling the request this time 257 targetIdx AccessIndex 258 // replicas[proxyIdx] is the store used to redirect requests this time 259 proxyIdx AccessIndex 260 } 261 262 // selectorState is the interface of states of the replicaSelector. 263 // Here is the main state transition diagram: 264 // 265 // exceeding maxReplicaAttempt 266 // +-------------------+ || RPC failure && unreachable && no forwarding 267 // +-------->+ accessKnownLeader +----------------+ 268 // | +------+------------+ | 269 // | | | 270 // | | RPC failure v 271 // | | && unreachable +-----+-----+ 272 // | | && enable forwarding |tryFollower+------+ 273 // | | +-----------+ | 274 // | leader becomes v | all followers 275 // | reachable +----+-------------+ | are tried 276 // +-----------+accessByKnownProxy| | 277 // ^ +------+-----------+ | 278 // | | +-------+ | 279 // | | RPC failure |backoff+<---+ 280 // | leader becomes v +---+---+ 281 // | reachable +-----+-----+ all proxies are tried ^ 282 // +------------+tryNewProxy+-------------------------+ 283 // +-----------+ 284 type selectorState interface { 285 next(*retry.Backoffer, *replicaSelector) (*RPCContext, error) 286 onSendSuccess(*replicaSelector) 287 onSendFailure(*retry.Backoffer, *replicaSelector, error) 288 onNoLeader(*replicaSelector) 289 } 290 291 type stateChanged struct{} 292 293 func (c stateChanged) Error() string { 294 return "replicaSelector state changed" 295 } 296 297 type stateBase struct{} 298 299 func (s stateBase) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { 300 return nil, nil 301 } 302 303 func (s stateBase) onSendSuccess(selector *replicaSelector) { 304 } 305 306 func (s stateBase) onSendFailure(backoffer *retry.Backoffer, selector *replicaSelector, err error) { 307 } 308 309 func (s stateBase) onNoLeader(selector *replicaSelector) { 310 } 311 312 // accessKnownLeader is the state where we are sending requests 313 // to the leader we suppose to be. 314 // 315 // After attempting maxReplicaAttempt times without success 316 // and without receiving new leader from the responses error, 317 // we should switch to tryFollower state. 318 type accessKnownLeader struct { 319 stateBase 320 leaderIdx AccessIndex 321 } 322 323 func (state *accessKnownLeader) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { 324 leader := selector.replicas[state.leaderIdx] 325 if leader.isExhausted(maxReplicaAttempt) { 326 selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx} 327 return nil, stateChanged{} 328 } 329 selector.targetIdx = state.leaderIdx 330 return selector.buildRPCContext(bo) 331 } 332 333 func (state *accessKnownLeader) onSendFailure(bo *retry.Backoffer, selector *replicaSelector, cause error) { 334 liveness := selector.checkLiveness(bo, selector.targetReplica()) 335 if liveness != reachable && len(selector.replicas) > 1 && selector.regionCache.enableForwarding { 336 selector.state = &accessByKnownProxy{leaderIdx: state.leaderIdx} 337 return 338 } 339 if liveness != reachable || selector.targetReplica().isExhausted(maxReplicaAttempt) { 340 selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx} 341 } 342 if liveness != reachable { 343 selector.invalidateReplicaStore(selector.targetReplica(), cause) 344 } 345 } 346 347 func (state *accessKnownLeader) onNoLeader(selector *replicaSelector) { 348 selector.state = &tryFollower{leaderIdx: state.leaderIdx, lastIdx: state.leaderIdx} 349 } 350 351 // tryFollower is the state where we cannot access the known leader 352 // but still try other replicas in case they have become the leader. 353 // 354 // In this state, a follower that is not tried will be used. If all 355 // followers are tried, we think we have exhausted the replicas. 356 // On sending failure in this state, if leader info is returned, 357 // the leader will be updated to replicas[0] and give it another chance. 358 type tryFollower struct { 359 stateBase 360 leaderIdx AccessIndex 361 lastIdx AccessIndex 362 } 363 364 func (state *tryFollower) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { 365 var targetReplica *replica 366 // Search replica that is not attempted from the last accessed replica 367 for i := 1; i < len(selector.replicas); i++ { 368 idx := AccessIndex((int(state.lastIdx) + i) % len(selector.replicas)) 369 if idx == state.leaderIdx { 370 continue 371 } 372 targetReplica = selector.replicas[idx] 373 // Each follower is only tried once 374 if !targetReplica.isExhausted(1) { 375 state.lastIdx = idx 376 selector.targetIdx = idx 377 break 378 } 379 } 380 // If all followers are tried and fail, backoff and retry. 381 if selector.targetIdx < 0 { 382 metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("exhausted").Inc() 383 selector.invalidateRegion() 384 return nil, nil 385 } 386 return selector.buildRPCContext(bo) 387 } 388 389 func (state *tryFollower) onSendSuccess(selector *replicaSelector) { 390 if !selector.regionCache.switchWorkLeaderToPeer(selector.region, selector.targetReplica().peer) { 391 panic("the store must exist") 392 } 393 } 394 395 func (state *tryFollower) onSendFailure(bo *retry.Backoffer, selector *replicaSelector, cause error) { 396 if selector.checkLiveness(bo, selector.targetReplica()) != reachable { 397 selector.invalidateReplicaStore(selector.targetReplica(), cause) 398 } 399 } 400 401 // accessByKnownProxy is the state where we are sending requests through 402 // regionStore.proxyTiKVIdx as a proxy. 403 type accessByKnownProxy struct { 404 stateBase 405 leaderIdx AccessIndex 406 } 407 408 func (state *accessByKnownProxy) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { 409 leader := selector.replicas[state.leaderIdx] 410 if atomic.LoadInt32(&leader.store.unreachable) == 0 { 411 selector.regionStore.unsetProxyStoreIfNeeded(selector.region) 412 selector.state = &accessKnownLeader{leaderIdx: state.leaderIdx} 413 return nil, stateChanged{} 414 } 415 416 if selector.regionStore.proxyTiKVIdx >= 0 { 417 selector.targetIdx = state.leaderIdx 418 selector.proxyIdx = selector.regionStore.proxyTiKVIdx 419 return selector.buildRPCContext(bo) 420 } 421 422 selector.state = &tryNewProxy{leaderIdx: state.leaderIdx} 423 return nil, stateChanged{} 424 } 425 426 func (state *accessByKnownProxy) onSendFailure(bo *retry.Backoffer, selector *replicaSelector, cause error) { 427 selector.state = &tryNewProxy{leaderIdx: state.leaderIdx} 428 if selector.checkLiveness(bo, selector.proxyReplica()) != reachable { 429 selector.invalidateReplicaStore(selector.proxyReplica(), cause) 430 } 431 } 432 433 func (state *accessByKnownProxy) onNoLeader(selector *replicaSelector) { 434 selector.state = &invalidLeader{} 435 } 436 437 // tryNewProxy is the state where we try to find a node from followers as proxy. 438 type tryNewProxy struct { 439 stateBase 440 leaderIdx AccessIndex 441 } 442 443 func (state *tryNewProxy) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { 444 leader := selector.replicas[state.leaderIdx] 445 if atomic.LoadInt32(&leader.store.unreachable) == 0 { 446 selector.regionStore.unsetProxyStoreIfNeeded(selector.region) 447 selector.state = &accessKnownLeader{leaderIdx: state.leaderIdx} 448 return nil, stateChanged{} 449 } 450 451 candidateNum := 0 452 for idx, replica := range selector.replicas { 453 if state.isCandidate(AccessIndex(idx), replica) { 454 candidateNum++ 455 } 456 } 457 458 // If all followers are tried as a proxy and fail, mark the leader store invalid, then backoff and retry. 459 if candidateNum == 0 { 460 metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("exhausted").Inc() 461 selector.invalidateReplicaStore(leader, errors.Errorf("all followers are tried as proxy but fail")) 462 selector.region.scheduleReload() 463 return nil, nil 464 } 465 466 // Skip advanceCnt valid candidates to find a proxy peer randomly 467 advanceCnt := rand.Intn(candidateNum) 468 for idx, replica := range selector.replicas { 469 if !state.isCandidate(AccessIndex(idx), replica) { 470 continue 471 } 472 if advanceCnt == 0 { 473 selector.targetIdx = state.leaderIdx 474 selector.proxyIdx = AccessIndex(idx) 475 break 476 } 477 advanceCnt-- 478 } 479 return selector.buildRPCContext(bo) 480 } 481 482 func (state *tryNewProxy) isCandidate(idx AccessIndex, replica *replica) bool { 483 // Try each peer only once 484 return idx != state.leaderIdx && !replica.isExhausted(1) 485 } 486 487 func (state *tryNewProxy) onSendSuccess(selector *replicaSelector) { 488 selector.regionStore.setProxyStoreIdx(selector.region, selector.proxyIdx) 489 } 490 491 func (state *tryNewProxy) onSendFailure(bo *retry.Backoffer, selector *replicaSelector, cause error) { 492 if selector.checkLiveness(bo, selector.proxyReplica()) != reachable { 493 selector.invalidateReplicaStore(selector.proxyReplica(), cause) 494 } 495 } 496 497 func (state *tryNewProxy) onNoLeader(selector *replicaSelector) { 498 selector.state = &invalidLeader{} 499 } 500 501 // accessFollower is the state where we are sending requests to TiKV followers. 502 // If there is no suitable follower, requests will be sent to the leader as a fallback. 503 type accessFollower struct { 504 stateBase 505 // If tryLeader is true, the request can also be sent to the leader. 506 tryLeader bool 507 isGlobalStaleRead bool 508 option storeSelectorOp 509 leaderIdx AccessIndex 510 lastIdx AccessIndex 511 } 512 513 func (state *accessFollower) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { 514 if state.lastIdx < 0 { 515 if state.tryLeader { 516 state.lastIdx = AccessIndex(rand.Intn(len(selector.replicas))) 517 } else { 518 if len(selector.replicas) <= 1 { 519 state.lastIdx = state.leaderIdx 520 } else { 521 // Randomly select a non-leader peer 522 state.lastIdx = AccessIndex(rand.Intn(len(selector.replicas) - 1)) 523 if state.lastIdx >= state.leaderIdx { 524 state.lastIdx++ 525 } 526 } 527 } 528 } else { 529 // Stale Read request will retry the leader or next peer on error, 530 // if txnScope is global, we will only retry the leader by using the WithLeaderOnly option, 531 // if txnScope is local, we will retry both other peers and the leader by the strategy of replicaSelector. 532 if state.isGlobalStaleRead { 533 WithLeaderOnly()(&state.option) 534 } 535 state.lastIdx++ 536 } 537 538 for i := 0; i < len(selector.replicas) && !state.option.leaderOnly; i++ { 539 idx := AccessIndex((int(state.lastIdx) + i) % len(selector.replicas)) 540 if state.isCandidate(idx, selector.replicas[idx]) { 541 state.lastIdx = idx 542 selector.targetIdx = idx 543 break 544 } 545 } 546 // If there is no candidate, fallback to the leader. 547 if selector.targetIdx < 0 { 548 if len(state.option.labels) > 0 { 549 logutil.BgLogger().Warn("unable to find stores with given labels") 550 } 551 leader := selector.replicas[state.leaderIdx] 552 if leader.isEpochStale() || leader.isExhausted(1) { 553 metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("exhausted").Inc() 554 selector.invalidateRegion() 555 return nil, nil 556 } 557 state.lastIdx = state.leaderIdx 558 selector.targetIdx = state.leaderIdx 559 } 560 return selector.buildRPCContext(bo) 561 } 562 563 func (state *accessFollower) onSendFailure(bo *retry.Backoffer, selector *replicaSelector, cause error) { 564 if selector.checkLiveness(bo, selector.targetReplica()) != reachable { 565 selector.invalidateReplicaStore(selector.targetReplica(), cause) 566 } 567 } 568 569 func (state *accessFollower) isCandidate(idx AccessIndex, replica *replica) bool { 570 return !replica.isEpochStale() && !replica.isExhausted(1) && 571 // The request can only be sent to the leader. 572 ((state.option.leaderOnly && idx == state.leaderIdx) || 573 // Choose a replica with matched labels. 574 (!state.option.leaderOnly && (state.tryLeader || idx != state.leaderIdx) && replica.store.IsLabelsMatch(state.option.labels))) 575 } 576 577 type invalidStore struct { 578 stateBase 579 } 580 581 func (state *invalidStore) next(_ *retry.Backoffer, _ *replicaSelector) (*RPCContext, error) { 582 metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("invalidStore").Inc() 583 return nil, nil 584 } 585 586 // TODO(sticnarf): If using request forwarding and the leader is unknown, try other followers 587 // instead of just switching to this state to backoff and retry. 588 type invalidLeader struct { 589 stateBase 590 } 591 592 func (state *invalidLeader) next(_ *retry.Backoffer, _ *replicaSelector) (*RPCContext, error) { 593 metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("invalidLeader").Inc() 594 return nil, nil 595 } 596 597 // newReplicaSelector creates a replicaSelector which selects replicas according to reqType and opts. 598 // opts is currently only effective for follower read. 599 func newReplicaSelector(regionCache *RegionCache, regionID RegionVerID, req *tikvrpc.Request, opts ...StoreSelectorOption) (*replicaSelector, error) { 600 cachedRegion := regionCache.GetCachedRegionWithRLock(regionID) 601 if cachedRegion == nil || !cachedRegion.isValid() { 602 return nil, nil 603 } 604 regionStore := cachedRegion.getStore() 605 replicas := make([]*replica, 0, regionStore.accessStoreNum(tiKVOnly)) 606 for _, storeIdx := range regionStore.accessIndex[tiKVOnly] { 607 replicas = append(replicas, &replica{ 608 store: regionStore.stores[storeIdx], 609 peer: cachedRegion.meta.Peers[storeIdx], 610 epoch: regionStore.storeEpochs[storeIdx], 611 attempts: 0, 612 }) 613 } 614 var state selectorState 615 if !req.ReplicaReadType.IsFollowerRead() { 616 if regionCache.enableForwarding && regionStore.proxyTiKVIdx >= 0 { 617 state = &accessByKnownProxy{leaderIdx: regionStore.workTiKVIdx} 618 } else { 619 state = &accessKnownLeader{leaderIdx: regionStore.workTiKVIdx} 620 } 621 } else { 622 option := storeSelectorOp{} 623 for _, op := range opts { 624 op(&option) 625 } 626 state = &accessFollower{ 627 tryLeader: req.ReplicaReadType == kv.ReplicaReadMixed, 628 isGlobalStaleRead: req.IsGlobalStaleRead(), 629 option: option, 630 leaderIdx: regionStore.workTiKVIdx, 631 lastIdx: -1, 632 } 633 } 634 635 return &replicaSelector{ 636 regionCache, 637 cachedRegion, 638 regionStore, 639 replicas, 640 state, 641 -1, 642 -1, 643 }, nil 644 } 645 646 const maxReplicaAttempt = 10 647 648 // next creates the RPCContext of the current candidate replica. 649 // It returns a SendError if runs out of all replicas or the cached region is invalidated. 650 func (s *replicaSelector) next(bo *retry.Backoffer) (rpcCtx *RPCContext, err error) { 651 if !s.region.isValid() { 652 metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("invalid").Inc() 653 return nil, nil 654 } 655 656 s.targetIdx = -1 657 s.proxyIdx = -1 658 s.refreshRegionStore() 659 for { 660 rpcCtx, err = s.state.next(bo, s) 661 if _, isStateChanged := err.(stateChanged); !isStateChanged { 662 return 663 } 664 } 665 } 666 667 func (s *replicaSelector) targetReplica() *replica { 668 if s.targetIdx >= 0 && int(s.targetIdx) < len(s.replicas) { 669 return s.replicas[s.targetIdx] 670 } 671 return nil 672 } 673 674 func (s *replicaSelector) proxyReplica() *replica { 675 if s.proxyIdx >= 0 && int(s.proxyIdx) < len(s.replicas) { 676 return s.replicas[s.proxyIdx] 677 } 678 return nil 679 } 680 681 func (s *replicaSelector) refreshRegionStore() { 682 oldRegionStore := s.regionStore 683 newRegionStore := s.region.getStore() 684 if oldRegionStore == newRegionStore { 685 return 686 } 687 s.regionStore = newRegionStore 688 689 // In the current implementation, if stores change, the address of it must change. 690 // So we just compare the address here. 691 // When stores change, we mark this replicaSelector as invalid to let the caller 692 // recreate a new replicaSelector. 693 if &oldRegionStore.stores != &newRegionStore.stores { 694 s.state = &invalidStore{} 695 return 696 } 697 698 // If leader has changed, it means a recent request succeeds an RPC 699 // on the new leader. 700 if oldRegionStore.workTiKVIdx != newRegionStore.workTiKVIdx { 701 switch state := s.state.(type) { 702 case *accessFollower: 703 state.leaderIdx = newRegionStore.workTiKVIdx 704 default: 705 // Try the new leader and give it an addition chance if the 706 // request is sent to the leader. 707 newLeaderIdx := newRegionStore.workTiKVIdx 708 s.state = &accessKnownLeader{leaderIdx: newLeaderIdx} 709 if s.replicas[newLeaderIdx].attempts == maxReplicaAttempt { 710 s.replicas[newLeaderIdx].attempts-- 711 } 712 } 713 } 714 } 715 716 func (s *replicaSelector) buildRPCContext(bo *retry.Backoffer) (*RPCContext, error) { 717 targetReplica, proxyReplica := s.targetReplica(), s.proxyReplica() 718 719 // Backoff and retry if no replica is selected or the selected replica is stale 720 if targetReplica == nil || targetReplica.isEpochStale() || 721 (proxyReplica != nil && proxyReplica.isEpochStale()) { 722 // TODO(youjiali1995): Is it necessary to invalidate the region? 723 metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("stale_store").Inc() 724 s.invalidateRegion() 725 return nil, nil 726 } 727 728 rpcCtx := &RPCContext{ 729 Region: s.region.VerID(), 730 Meta: s.region.meta, 731 Peer: targetReplica.peer, 732 Store: targetReplica.store, 733 AccessMode: tiKVOnly, 734 TiKVNum: len(s.replicas), 735 } 736 737 // Set leader addr 738 addr, err := s.regionCache.getStoreAddr(bo, s.region, targetReplica.store) 739 if err != nil { 740 return nil, err 741 } 742 if len(addr) == 0 { 743 return nil, nil 744 } 745 rpcCtx.Addr = addr 746 targetReplica.attempts++ 747 748 // Set proxy addr 749 if proxyReplica != nil { 750 addr, err = s.regionCache.getStoreAddr(bo, s.region, proxyReplica.store) 751 if err != nil { 752 return nil, err 753 } 754 if len(addr) == 0 { 755 return nil, nil 756 } 757 rpcCtx.ProxyStore = proxyReplica.store 758 rpcCtx.ProxyAddr = addr 759 proxyReplica.attempts++ 760 } 761 762 return rpcCtx, nil 763 } 764 765 func (s *replicaSelector) onSendFailure(bo *retry.Backoffer, err error) { 766 metrics.RegionCacheCounterWithSendFail.Inc() 767 s.state.onSendFailure(bo, s, err) 768 } 769 770 func (s *replicaSelector) checkLiveness(bo *retry.Backoffer, accessReplica *replica) livenessState { 771 store := accessReplica.store 772 liveness := store.requestLiveness(bo, s.regionCache) 773 // We only check health in loop if forwarding is enabled now. 774 // The restriction might be relaxed if necessary, but the implementation 775 // may be checked carefully again. 776 if liveness != reachable && s.regionCache.enableForwarding { 777 store.startHealthCheckLoopIfNeeded(s.regionCache) 778 } 779 return liveness 780 } 781 782 func (s *replicaSelector) invalidateReplicaStore(replica *replica, cause error) { 783 store := replica.store 784 if atomic.CompareAndSwapUint32(&store.epoch, replica.epoch, replica.epoch+1) { 785 logutil.BgLogger().Info("mark store's regions need be refill", zap.Uint64("id", store.storeID), zap.String("addr", store.addr), zap.Error(cause)) 786 metrics.RegionCacheCounterWithInvalidateStoreRegionsOK.Inc() 787 // schedule a store addr resolve. 788 store.markNeedCheck(s.regionCache.notifyCheckCh) 789 } 790 } 791 792 func (s *replicaSelector) onSendSuccess() { 793 s.state.onSendSuccess(s) 794 } 795 796 func (s *replicaSelector) onNotLeader(bo *retry.Backoffer, ctx *RPCContext, notLeader *errorpb.NotLeader) (shouldRetry bool, err error) { 797 leader := notLeader.GetLeader() 798 if leader == nil { 799 // The region may be during transferring leader. 800 s.state.onNoLeader(s) 801 if err = bo.Backoff(retry.BoRegionScheduling, errors.Errorf("no leader, ctx: %v", ctx)); err != nil { 802 return false, errors.Trace(err) 803 } 804 } else { 805 s.updateLeader(notLeader.GetLeader()) 806 } 807 return true, nil 808 } 809 810 // updateLeader updates the leader of the cached region. 811 // If the leader peer isn't found in the region, the region will be invalidated. 812 func (s *replicaSelector) updateLeader(leader *metapb.Peer) { 813 if leader == nil { 814 return 815 } 816 for i, replica := range s.replicas { 817 if isSamePeer(replica.peer, leader) { 818 if replica.isExhausted(maxReplicaAttempt) { 819 // Give the replica one more chance and because each follower is tried only once, 820 // it won't result in infinite retry. 821 replica.attempts = maxReplicaAttempt - 1 822 } 823 s.state = &accessKnownLeader{leaderIdx: AccessIndex(i)} 824 // Update the workTiKVIdx so that following requests can be sent to the leader immediately. 825 if !s.regionCache.switchWorkLeaderToPeer(s.region, leader) { 826 panic("the store must exist") 827 } 828 logutil.BgLogger().Debug("switch region leader to specific leader due to kv return NotLeader", 829 zap.Uint64("regionID", s.region.GetID()), 830 zap.Uint64("leaderStoreID", leader.GetStoreId())) 831 return 832 } 833 } 834 // Invalidate the region since the new leader is not in the cached version. 835 s.region.invalidate(StoreNotFound) 836 } 837 838 func (s *replicaSelector) invalidateRegion() { 839 if s.region != nil { 840 s.region.invalidate(Other) 841 } 842 } 843 844 func (s *RegionRequestSender) getRPCContext( 845 bo *retry.Backoffer, 846 req *tikvrpc.Request, 847 regionID RegionVerID, 848 et tikvrpc.EndpointType, 849 opts ...StoreSelectorOption, 850 ) (*RPCContext, error) { 851 switch et { 852 case tikvrpc.TiKV: 853 if s.replicaSelector == nil { 854 selector, err := newReplicaSelector(s.regionCache, regionID, req, opts...) 855 if selector == nil || err != nil { 856 return nil, err 857 } 858 s.replicaSelector = selector 859 } 860 return s.replicaSelector.next(bo) 861 case tikvrpc.TiFlash: 862 return s.regionCache.GetTiFlashRPCContext(bo, regionID, true) 863 case tikvrpc.TiDB: 864 return &RPCContext{Addr: s.storeAddr}, nil 865 default: 866 return nil, errors.Errorf("unsupported storage type: %v", et) 867 } 868 } 869 870 func (s *RegionRequestSender) reset() { 871 s.replicaSelector = nil 872 s.failStoreIDs = nil 873 s.failProxyStoreIDs = nil 874 } 875 876 // IsFakeRegionError returns true if err is fack region error. 877 func IsFakeRegionError(err *errorpb.Error) bool { 878 return err != nil && err.GetEpochNotMatch() != nil && len(err.GetEpochNotMatch().CurrentRegions) == 0 879 } 880 881 // SendReqCtx sends a request to tikv server and return response and RPCCtx of this RPC. 882 func (s *RegionRequestSender) SendReqCtx( 883 bo *retry.Backoffer, 884 req *tikvrpc.Request, 885 regionID RegionVerID, 886 timeout time.Duration, 887 et tikvrpc.EndpointType, 888 opts ...StoreSelectorOption, 889 ) ( 890 resp *tikvrpc.Response, 891 rpcCtx *RPCContext, 892 err error, 893 ) { 894 if span := opentracing.SpanFromContext(bo.GetCtx()); span != nil && span.Tracer() != nil { 895 span1 := span.Tracer().StartSpan("regionRequest.SendReqCtx", opentracing.ChildOf(span.Context())) 896 defer span1.Finish() 897 bo.SetCtx(opentracing.ContextWithSpan(bo.GetCtx(), span1)) 898 } 899 900 if val, err := util.EvalFailpoint("tikvStoreSendReqResult"); err == nil { 901 switch val.(string) { 902 case "timeout": 903 return nil, nil, errors.New("timeout") 904 case "GCNotLeader": 905 if req.Type == tikvrpc.CmdGC { 906 return &tikvrpc.Response{ 907 Resp: &kvrpcpb.GCResponse{RegionError: &errorpb.Error{NotLeader: &errorpb.NotLeader{}}}, 908 }, nil, nil 909 } 910 case "GCServerIsBusy": 911 if req.Type == tikvrpc.CmdGC { 912 return &tikvrpc.Response{ 913 Resp: &kvrpcpb.GCResponse{RegionError: &errorpb.Error{ServerIsBusy: &errorpb.ServerIsBusy{}}}, 914 }, nil, nil 915 } 916 case "busy": 917 return &tikvrpc.Response{ 918 Resp: &kvrpcpb.GCResponse{RegionError: &errorpb.Error{ServerIsBusy: &errorpb.ServerIsBusy{}}}, 919 }, nil, nil 920 case "requestTiDBStoreError": 921 if et == tikvrpc.TiDB { 922 return nil, nil, tikverr.ErrTiKVServerTimeout 923 } 924 case "requestTiFlashError": 925 if et == tikvrpc.TiFlash { 926 return nil, nil, tikverr.ErrTiFlashServerTimeout 927 } 928 } 929 } 930 931 // If the MaxExecutionDurationMs is not set yet, we set it to be the RPC timeout duration 932 // so TiKV can give up the requests whose response TiDB cannot receive due to timeout. 933 if req.Context.MaxExecutionDurationMs == 0 { 934 req.Context.MaxExecutionDurationMs = uint64(timeout.Milliseconds()) 935 } 936 937 s.reset() 938 tryTimes := 0 939 defer func() { 940 if tryTimes > 0 { 941 metrics.TiKVRequestRetryTimesHistogram.Observe(float64(tryTimes)) 942 } 943 }() 944 for { 945 if tryTimes > 0 { 946 req.IsRetryRequest = true 947 if tryTimes%100 == 0 { 948 logutil.Logger(bo.GetCtx()).Warn("retry", zap.Uint64("region", regionID.GetID()), zap.Int("times", tryTimes)) 949 } 950 } 951 952 rpcCtx, err = s.getRPCContext(bo, req, regionID, et, opts...) 953 if err != nil { 954 return nil, nil, err 955 } 956 957 if _, err := util.EvalFailpoint("invalidCacheAndRetry"); err == nil { 958 // cooperate with tikvclient/setGcResolveMaxBackoff 959 if c := bo.GetCtx().Value("injectedBackoff"); c != nil { 960 resp, err = tikvrpc.GenRegionErrorResp(req, &errorpb.Error{EpochNotMatch: &errorpb.EpochNotMatch{}}) 961 return resp, nil, err 962 } 963 } 964 if rpcCtx == nil { 965 // TODO(youjiali1995): remove it when using the replica selector for all requests. 966 // If the region is not found in cache, it must be out 967 // of date and already be cleaned up. We can skip the 968 // RPC by returning RegionError directly. 969 970 // TODO: Change the returned error to something like "region missing in cache", 971 // and handle this error like EpochNotMatch, which means to re-split the request and retry. 972 logutil.Logger(bo.GetCtx()).Debug("throwing pseudo region error due to region not found in cache", zap.Stringer("region", ®ionID)) 973 resp, err = tikvrpc.GenRegionErrorResp(req, &errorpb.Error{EpochNotMatch: &errorpb.EpochNotMatch{}}) 974 return resp, nil, err 975 } 976 977 logutil.Eventf(bo.GetCtx(), "send %s request to region %d at %s", req.Type, regionID.id, rpcCtx.Addr) 978 s.storeAddr = rpcCtx.Addr 979 var retry bool 980 resp, retry, err = s.sendReqToRegion(bo, rpcCtx, req, timeout) 981 if err != nil { 982 return nil, nil, errors.Trace(err) 983 } 984 985 // recheck whether the session/query is killed during the Next() 986 boVars := bo.GetVars() 987 if boVars != nil && boVars.Killed != nil && atomic.LoadUint32(boVars.Killed) == 1 { 988 return nil, nil, tikverr.ErrQueryInterrupted 989 } 990 if val, err := util.EvalFailpoint("mockRetrySendReqToRegion"); err == nil { 991 if val.(bool) { 992 retry = true 993 } 994 } 995 if retry { 996 tryTimes++ 997 continue 998 } 999 1000 var regionErr *errorpb.Error 1001 regionErr, err = resp.GetRegionError() 1002 if err != nil { 1003 return nil, nil, errors.Trace(err) 1004 } 1005 if regionErr != nil { 1006 retry, err = s.onRegionError(bo, rpcCtx, req, regionErr) 1007 if err != nil { 1008 return nil, nil, errors.Trace(err) 1009 } 1010 if retry { 1011 tryTimes++ 1012 continue 1013 } 1014 } else { 1015 if s.replicaSelector != nil { 1016 s.replicaSelector.onSendSuccess() 1017 } 1018 } 1019 return resp, rpcCtx, nil 1020 } 1021 } 1022 1023 // RPCCancellerCtxKey is context key attach rpc send cancelFunc collector to ctx. 1024 type RPCCancellerCtxKey struct{} 1025 1026 // RPCCanceller is rpc send cancelFunc collector. 1027 type RPCCanceller struct { 1028 sync.Mutex 1029 allocID int 1030 cancels map[int]func() 1031 cancelled bool 1032 } 1033 1034 // NewRPCanceller creates RPCCanceller with init state. 1035 func NewRPCanceller() *RPCCanceller { 1036 return &RPCCanceller{cancels: make(map[int]func())} 1037 } 1038 1039 // WithCancel generates new context with cancel func. 1040 func (h *RPCCanceller) WithCancel(ctx context.Context) (context.Context, func()) { 1041 nctx, cancel := context.WithCancel(ctx) 1042 h.Lock() 1043 if h.cancelled { 1044 h.Unlock() 1045 cancel() 1046 return nctx, func() {} 1047 } 1048 id := h.allocID 1049 h.allocID++ 1050 h.cancels[id] = cancel 1051 h.Unlock() 1052 return nctx, func() { 1053 cancel() 1054 h.Lock() 1055 delete(h.cancels, id) 1056 h.Unlock() 1057 } 1058 } 1059 1060 // CancelAll cancels all inflight rpc context. 1061 func (h *RPCCanceller) CancelAll() { 1062 h.Lock() 1063 for _, c := range h.cancels { 1064 c() 1065 } 1066 h.cancelled = true 1067 h.Unlock() 1068 } 1069 1070 func (s *RegionRequestSender) sendReqToRegion(bo *retry.Backoffer, rpcCtx *RPCContext, req *tikvrpc.Request, timeout time.Duration) (resp *tikvrpc.Response, retry bool, err error) { 1071 if e := tikvrpc.SetContext(req, rpcCtx.Meta, rpcCtx.Peer); e != nil { 1072 return nil, false, errors.Trace(e) 1073 } 1074 // judge the store limit switch. 1075 if limit := kv.StoreLimit.Load(); limit > 0 { 1076 if err := s.getStoreToken(rpcCtx.Store, limit); err != nil { 1077 return nil, false, err 1078 } 1079 defer s.releaseStoreToken(rpcCtx.Store) 1080 } 1081 1082 ctx := bo.GetCtx() 1083 if rawHook := ctx.Value(RPCCancellerCtxKey{}); rawHook != nil { 1084 var cancel context.CancelFunc 1085 ctx, cancel = rawHook.(*RPCCanceller).WithCancel(ctx) 1086 defer cancel() 1087 } 1088 1089 // sendToAddr is the first target address that will receive the request. If proxy is used, sendToAddr will point to 1090 // the proxy that will forward the request to the final target. 1091 sendToAddr := rpcCtx.Addr 1092 if rpcCtx.ProxyStore == nil { 1093 req.ForwardedHost = "" 1094 } else { 1095 req.ForwardedHost = rpcCtx.Addr 1096 sendToAddr = rpcCtx.ProxyAddr 1097 } 1098 1099 var sessionID uint64 1100 if v := bo.GetCtx().Value(util.SessionID); v != nil { 1101 sessionID = v.(uint64) 1102 } 1103 1104 injectFailOnSend := false 1105 if val, e := util.EvalFailpoint("rpcFailOnSend"); e == nil { 1106 inject := true 1107 // Optional filters 1108 if s, ok := val.(string); ok { 1109 if s == "greengc" && !req.IsGreenGCRequest() { 1110 inject = false 1111 } else if s == "write" && !req.IsTxnWriteRequest() { 1112 inject = false 1113 } 1114 } else if sessionID == 0 { 1115 inject = false 1116 } 1117 1118 if inject { 1119 logutil.Logger(ctx).Info("[failpoint] injected RPC error on send", zap.Stringer("type", req.Type), 1120 zap.Stringer("req", req.Req.(fmt.Stringer)), zap.Stringer("ctx", &req.Context)) 1121 injectFailOnSend = true 1122 err = errors.New("injected RPC error on send") 1123 } 1124 } 1125 1126 if !injectFailOnSend { 1127 start := time.Now() 1128 resp, err = s.client.SendRequest(ctx, sendToAddr, req, timeout) 1129 if s.Stats != nil { 1130 RecordRegionRequestRuntimeStats(s.Stats, req.Type, time.Since(start)) 1131 if val, err := util.EvalFailpoint("tikvStoreRespResult"); err == nil { 1132 if val.(bool) { 1133 if req.Type == tikvrpc.CmdCop && bo.GetTotalSleep() == 0 { 1134 return &tikvrpc.Response{ 1135 Resp: &coprocessor.Response{RegionError: &errorpb.Error{EpochNotMatch: &errorpb.EpochNotMatch{}}}, 1136 }, false, nil 1137 } 1138 } 1139 } 1140 } 1141 1142 if val, e := util.EvalFailpoint("rpcFailOnRecv"); e == nil { 1143 inject := true 1144 // Optional filters 1145 if s, ok := val.(string); ok { 1146 if s == "greengc" && !req.IsGreenGCRequest() { 1147 inject = false 1148 } else if s == "write" && !req.IsTxnWriteRequest() { 1149 inject = false 1150 } 1151 } else if sessionID == 0 { 1152 inject = false 1153 } 1154 1155 if inject { 1156 logutil.Logger(ctx).Info("[failpoint] injected RPC error on recv", zap.Stringer("type", req.Type), 1157 zap.Stringer("req", req.Req.(fmt.Stringer)), zap.Stringer("ctx", &req.Context)) 1158 err = errors.New("injected RPC error on recv") 1159 resp = nil 1160 } 1161 } 1162 1163 if val, e := util.EvalFailpoint("rpcContextCancelErr"); e == nil { 1164 if val.(bool) { 1165 ctx1, cancel := context.WithCancel(context.Background()) 1166 cancel() 1167 <-ctx1.Done() 1168 ctx = ctx1 1169 err = ctx.Err() 1170 resp = nil 1171 } 1172 } 1173 } 1174 1175 if rpcCtx.ProxyStore != nil { 1176 fromStore := strconv.FormatUint(rpcCtx.ProxyStore.storeID, 10) 1177 toStore := strconv.FormatUint(rpcCtx.Store.storeID, 10) 1178 result := "ok" 1179 if err != nil { 1180 result = "fail" 1181 } 1182 metrics.TiKVForwardRequestCounter.WithLabelValues(fromStore, toStore, req.Type.String(), result).Inc() 1183 } 1184 1185 if err != nil { 1186 s.rpcError = err 1187 1188 // Because in rpc logic, context.Cancel() will be transferred to rpcContext.Cancel error. For rpcContext cancel, 1189 // we need to retry the request. But for context cancel active, for example, limitExec gets the required rows, 1190 // we shouldn't retry the request, it will go to backoff and hang in retry logic. 1191 if ctx.Err() != nil && errors.Cause(ctx.Err()) == context.Canceled { 1192 return nil, false, errors.Trace(ctx.Err()) 1193 } 1194 1195 if val, e := util.EvalFailpoint("noRetryOnRpcError"); e == nil { 1196 if val.(bool) { 1197 return nil, false, err 1198 } 1199 } 1200 if e := s.onSendFail(bo, rpcCtx, err); e != nil { 1201 return nil, false, errors.Trace(e) 1202 } 1203 return nil, true, nil 1204 } 1205 return 1206 } 1207 1208 func (s *RegionRequestSender) getStoreToken(st *Store, limit int64) error { 1209 // Checking limit is not thread safe, preferring this for avoiding load in loop. 1210 count := st.tokenCount.Load() 1211 if count < limit { 1212 // Adding tokenCount is no thread safe, preferring this for avoiding check in loop. 1213 st.tokenCount.Add(1) 1214 return nil 1215 } 1216 metrics.TiKVStoreLimitErrorCounter.WithLabelValues(st.addr, strconv.FormatUint(st.storeID, 10)).Inc() 1217 return &tikverr.ErrTokenLimit{StoreID: st.storeID} 1218 } 1219 1220 func (s *RegionRequestSender) releaseStoreToken(st *Store) { 1221 count := st.tokenCount.Load() 1222 // Decreasing tokenCount is no thread safe, preferring this for avoiding check in loop. 1223 if count > 0 { 1224 st.tokenCount.Sub(1) 1225 return 1226 } 1227 logutil.BgLogger().Warn("release store token failed, count equals to 0") 1228 } 1229 1230 func (s *RegionRequestSender) onSendFail(bo *retry.Backoffer, ctx *RPCContext, err error) error { 1231 if span := opentracing.SpanFromContext(bo.GetCtx()); span != nil && span.Tracer() != nil { 1232 span1 := span.Tracer().StartSpan("regionRequest.onSendFail", opentracing.ChildOf(span.Context())) 1233 defer span1.Finish() 1234 bo.SetCtx(opentracing.ContextWithSpan(bo.GetCtx(), span1)) 1235 } 1236 // If it failed because the context is cancelled by ourself, don't retry. 1237 if errors.Cause(err) == context.Canceled { 1238 return errors.Trace(err) 1239 } else if LoadShuttingDown() > 0 { 1240 return tikverr.ErrTiDBShuttingDown 1241 } 1242 if status.Code(errors.Cause(err)) == codes.Canceled { 1243 select { 1244 case <-bo.GetCtx().Done(): 1245 return errors.Trace(err) 1246 default: 1247 // If we don't cancel, but the error code is Canceled, it must be from grpc remote. 1248 // This may happen when tikv is killed and exiting. 1249 // Backoff and retry in this case. 1250 logutil.BgLogger().Warn("receive a grpc cancel signal from remote", zap.Error(err)) 1251 } 1252 } 1253 1254 if ctx.Meta != nil { 1255 if s.replicaSelector != nil { 1256 s.replicaSelector.onSendFailure(bo, err) 1257 } else { 1258 s.regionCache.OnSendFail(bo, ctx, s.NeedReloadRegion(ctx), err) 1259 } 1260 } 1261 1262 // Retry on send request failure when it's not canceled. 1263 // When a store is not available, the leader of related region should be elected quickly. 1264 // TODO: the number of retry time should be limited:since region may be unavailable 1265 // when some unrecoverable disaster happened. 1266 if ctx.Store != nil && ctx.Store.storeType == tikvrpc.TiFlash { 1267 err = bo.Backoff(retry.BoTiFlashRPC, errors.Errorf("send tiflash request error: %v, ctx: %v, try next peer later", err, ctx)) 1268 } else { 1269 err = bo.Backoff(retry.BoTiKVRPC, errors.Errorf("send tikv request error: %v, ctx: %v, try next peer later", err, ctx)) 1270 } 1271 return errors.Trace(err) 1272 } 1273 1274 // NeedReloadRegion checks is all peers has sent failed, if so need reload. 1275 func (s *RegionRequestSender) NeedReloadRegion(ctx *RPCContext) (need bool) { 1276 if s.failStoreIDs == nil { 1277 s.failStoreIDs = make(map[uint64]struct{}) 1278 } 1279 if s.failProxyStoreIDs == nil { 1280 s.failProxyStoreIDs = make(map[uint64]struct{}) 1281 } 1282 s.failStoreIDs[ctx.Store.storeID] = struct{}{} 1283 if ctx.ProxyStore != nil { 1284 s.failProxyStoreIDs[ctx.ProxyStore.storeID] = struct{}{} 1285 } 1286 1287 if ctx.AccessMode == tiKVOnly && len(s.failStoreIDs)+len(s.failProxyStoreIDs) >= ctx.TiKVNum { 1288 need = true 1289 } else if ctx.AccessMode == tiFlashOnly && len(s.failStoreIDs) >= len(ctx.Meta.Peers)-ctx.TiKVNum { 1290 need = true 1291 } else if len(s.failStoreIDs)+len(s.failProxyStoreIDs) >= len(ctx.Meta.Peers) { 1292 need = true 1293 } 1294 1295 if need { 1296 s.failStoreIDs = nil 1297 s.failProxyStoreIDs = nil 1298 } 1299 return 1300 } 1301 1302 func regionErrorToLabel(e *errorpb.Error) string { 1303 if e.GetNotLeader() != nil { 1304 return "not_leader" 1305 } else if e.GetRegionNotFound() != nil { 1306 return "region_not_found" 1307 } else if e.GetKeyNotInRegion() != nil { 1308 return "key_not_in_region" 1309 } else if e.GetEpochNotMatch() != nil { 1310 return "epoch_not_match" 1311 } else if e.GetServerIsBusy() != nil { 1312 return "server_is_busy" 1313 } else if e.GetStaleCommand() != nil { 1314 return "stale_command" 1315 } else if e.GetStoreNotMatch() != nil { 1316 return "store_not_match" 1317 } else if e.GetRaftEntryTooLarge() != nil { 1318 return "raft_entry_too_large" 1319 } else if e.GetMaxTimestampNotSynced() != nil { 1320 return "max_timestamp_not_synced" 1321 } else if e.GetReadIndexNotReady() != nil { 1322 return "read_index_not_ready" 1323 } else if e.GetProposalInMergingMode() != nil { 1324 return "proposal_in_merging_mode" 1325 } else if e.GetDataIsNotReady() != nil { 1326 return "data_is_not_ready" 1327 } else if e.GetRegionNotInitialized() != nil { 1328 return "region_not_initialized" 1329 } else if e.GetDiskFull() != nil { 1330 return "disk_full" 1331 } 1332 return "unknown" 1333 } 1334 1335 func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext, req *tikvrpc.Request, regionErr *errorpb.Error) (shouldRetry bool, err error) { 1336 if span := opentracing.SpanFromContext(bo.GetCtx()); span != nil && span.Tracer() != nil { 1337 span1 := span.Tracer().StartSpan("tikv.onRegionError", opentracing.ChildOf(span.Context())) 1338 defer span1.Finish() 1339 bo.SetCtx(opentracing.ContextWithSpan(bo.GetCtx(), span1)) 1340 } 1341 1342 // NOTE: Please add the region error handler in the same order of errorpb.Error. 1343 metrics.TiKVRegionErrorCounter.WithLabelValues(regionErrorToLabel(regionErr)).Inc() 1344 1345 if notLeader := regionErr.GetNotLeader(); notLeader != nil { 1346 // Retry if error is `NotLeader`. 1347 logutil.BgLogger().Debug("tikv reports `NotLeader` retry later", 1348 zap.String("notLeader", notLeader.String()), 1349 zap.String("ctx", ctx.String())) 1350 1351 if s.replicaSelector != nil { 1352 return s.replicaSelector.onNotLeader(bo, ctx, notLeader) 1353 } else if notLeader.GetLeader() == nil { 1354 // The peer doesn't know who is the current leader. Generally it's because 1355 // the Raft group is in an election, but it's possible that the peer is 1356 // isolated and removed from the Raft group. So it's necessary to reload 1357 // the region from PD. 1358 s.regionCache.InvalidateCachedRegionWithReason(ctx.Region, NoLeader) 1359 if err = bo.Backoff(retry.BoRegionScheduling, errors.Errorf("not leader: %v, ctx: %v", notLeader, ctx)); err != nil { 1360 return false, errors.Trace(err) 1361 } 1362 return false, nil 1363 } else { 1364 // don't backoff if a new leader is returned. 1365 s.regionCache.UpdateLeader(ctx.Region, notLeader.GetLeader(), ctx.AccessIdx) 1366 return true, nil 1367 } 1368 } 1369 1370 // Retry it when tikv disk full happens. 1371 if diskFull := regionErr.GetDiskFull(); diskFull != nil { 1372 if err = bo.Backoff(retry.BoTiKVDiskFull, errors.Errorf("tikv disk full: %v ctx: %v", diskFull.String(), ctx.String())); err != nil { 1373 retry.BoTiKVDiskFull.SetErrors(errors.Errorf("tikv disk full: %v", diskFull.String())) 1374 return false, nil 1375 } 1376 return true, nil 1377 } 1378 1379 // This peer is removed from the region. Invalidate the region since it's too stale. 1380 if regionErr.GetRegionNotFound() != nil { 1381 s.regionCache.InvalidateCachedRegion(ctx.Region) 1382 return false, nil 1383 } 1384 1385 if regionErr.GetKeyNotInRegion() != nil { 1386 logutil.BgLogger().Debug("tikv reports `KeyNotInRegion`", zap.Stringer("ctx", ctx)) 1387 s.regionCache.InvalidateCachedRegion(ctx.Region) 1388 return false, nil 1389 } 1390 1391 if epochNotMatch := regionErr.GetEpochNotMatch(); epochNotMatch != nil { 1392 logutil.BgLogger().Debug("tikv reports `EpochNotMatch` retry later", 1393 zap.Stringer("EpochNotMatch", epochNotMatch), 1394 zap.Stringer("ctx", ctx)) 1395 retry, err := s.regionCache.OnRegionEpochNotMatch(bo, ctx, epochNotMatch.CurrentRegions) 1396 if !retry && s.replicaSelector != nil { 1397 s.replicaSelector.invalidateRegion() 1398 } 1399 return retry, errors.Trace(err) 1400 } 1401 1402 if regionErr.GetServerIsBusy() != nil { 1403 logutil.BgLogger().Warn("tikv reports `ServerIsBusy` retry later", 1404 zap.String("reason", regionErr.GetServerIsBusy().GetReason()), 1405 zap.Stringer("ctx", ctx)) 1406 if ctx != nil && ctx.Store != nil && ctx.Store.storeType == tikvrpc.TiFlash { 1407 err = bo.Backoff(retry.BoTiFlashServerBusy, errors.Errorf("server is busy, ctx: %v", ctx)) 1408 } else { 1409 err = bo.Backoff(retry.BoTiKVServerBusy, errors.Errorf("server is busy, ctx: %v", ctx)) 1410 } 1411 if err != nil { 1412 return false, errors.Trace(err) 1413 } 1414 return true, nil 1415 } 1416 1417 // StaleCommand error indicates the request is sent to the old leader and its term is changed. 1418 // We can't know whether the request is committed or not, so it's an undetermined error too, 1419 // but we don't handle it now. 1420 if regionErr.GetStaleCommand() != nil { 1421 logutil.BgLogger().Debug("tikv reports `StaleCommand`", zap.Stringer("ctx", ctx)) 1422 if s.replicaSelector != nil { 1423 // Needn't backoff because the new leader should be elected soon 1424 // and the replicaSelector will try the next peer. 1425 } else { 1426 err = bo.Backoff(retry.BoStaleCmd, errors.Errorf("stale command, ctx: %v", ctx)) 1427 if err != nil { 1428 return false, errors.Trace(err) 1429 } 1430 } 1431 return true, nil 1432 } 1433 1434 if storeNotMatch := regionErr.GetStoreNotMatch(); storeNotMatch != nil { 1435 // store not match 1436 logutil.BgLogger().Debug("tikv reports `StoreNotMatch` retry later", 1437 zap.Stringer("storeNotMatch", storeNotMatch), 1438 zap.Stringer("ctx", ctx)) 1439 ctx.Store.markNeedCheck(s.regionCache.notifyCheckCh) 1440 s.regionCache.InvalidateCachedRegion(ctx.Region) 1441 return false, nil 1442 } 1443 1444 if regionErr.GetRaftEntryTooLarge() != nil { 1445 logutil.BgLogger().Warn("tikv reports `RaftEntryTooLarge`", zap.Stringer("ctx", ctx)) 1446 return false, errors.New(regionErr.String()) 1447 } 1448 1449 if regionErr.GetMaxTimestampNotSynced() != nil { 1450 logutil.BgLogger().Debug("tikv reports `MaxTimestampNotSynced`", zap.Stringer("ctx", ctx)) 1451 err = bo.Backoff(retry.BoMaxTsNotSynced, errors.Errorf("max timestamp not synced, ctx: %v", ctx)) 1452 if err != nil { 1453 return false, errors.Trace(err) 1454 } 1455 return true, nil 1456 } 1457 1458 // A read request may be sent to a peer which has not been initialized yet, we should retry in this case. 1459 if regionErr.GetRegionNotInitialized() != nil { 1460 logutil.BgLogger().Debug("tikv reports `RegionNotInitialized` retry later", 1461 zap.Uint64("store-id", ctx.Store.storeID), 1462 zap.Uint64("region-id", regionErr.GetRegionNotInitialized().GetRegionId()), 1463 zap.Stringer("ctx", ctx)) 1464 err = bo.Backoff(retry.BoMaxRegionNotInitialized, errors.Errorf("region not initialized")) 1465 if err != nil { 1466 return false, errors.Trace(err) 1467 } 1468 return true, nil 1469 } 1470 1471 // The read-index can't be handled timely because the region is splitting or merging. 1472 if regionErr.GetReadIndexNotReady() != nil { 1473 logutil.BgLogger().Debug("tikv reports `ReadIndexNotReady` retry later", 1474 zap.Uint64("store-id", ctx.Store.storeID), 1475 zap.Uint64("region-id", regionErr.GetRegionNotInitialized().GetRegionId()), 1476 zap.Stringer("ctx", ctx)) 1477 // The region can't provide service until split or merge finished, so backoff. 1478 err = bo.Backoff(retry.BoRegionScheduling, errors.Errorf("read index not ready, ctx: %v", ctx)) 1479 if err != nil { 1480 return false, errors.Trace(err) 1481 } 1482 return true, nil 1483 } 1484 1485 if regionErr.GetProposalInMergingMode() != nil { 1486 logutil.BgLogger().Debug("tikv reports `ProposalInMergingMode`", zap.Stringer("ctx", ctx)) 1487 // The region is merging and it can't provide service until merge finished, so backoff. 1488 err = bo.Backoff(retry.BoRegionScheduling, errors.Errorf("region is merging, ctx: %v", ctx)) 1489 if err != nil { 1490 return false, errors.Trace(err) 1491 } 1492 return true, nil 1493 } 1494 1495 // A stale read request may be sent to a peer which the data is not ready yet, we should retry in this case. 1496 // This error is specific to stale read and the target replica is randomly selected. If the request is sent 1497 // to the leader, the data must be ready, so we don't backoff here. 1498 if regionErr.GetDataIsNotReady() != nil { 1499 logutil.BgLogger().Warn("tikv reports `DataIsNotReady` retry later", 1500 zap.Uint64("store-id", ctx.Store.storeID), 1501 zap.Uint64("peer-id", regionErr.GetDataIsNotReady().GetPeerId()), 1502 zap.Uint64("region-id", regionErr.GetDataIsNotReady().GetRegionId()), 1503 zap.Uint64("safe-ts", regionErr.GetDataIsNotReady().GetSafeTs()), 1504 zap.Stringer("ctx", ctx)) 1505 err = bo.Backoff(retry.BoMaxDataNotReady, errors.Errorf("data is not ready")) 1506 if err != nil { 1507 return false, errors.Trace(err) 1508 } 1509 return true, nil 1510 } 1511 1512 logutil.BgLogger().Debug("tikv reports region failed", 1513 zap.Stringer("regionErr", regionErr), 1514 zap.Stringer("ctx", ctx)) 1515 1516 if s.replicaSelector != nil { 1517 // Try the next replica. 1518 return true, nil 1519 } 1520 1521 // When the request is sent to TiDB, there is no region in the request, so the region id will be 0. 1522 // So when region id is 0, there is no business with region cache. 1523 if ctx.Region.id != 0 { 1524 s.regionCache.InvalidateCachedRegion(ctx.Region) 1525 } 1526 // For other errors, we only drop cache here. 1527 // Because caller may need to re-split the request. 1528 return false, nil 1529 }