github.com/KinWaiYuen/client-go/v2@v2.5.4/internal/locate/region_cache.go (about) 1 // Copyright 2021 TiKV Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // NOTE: The code in this file is based on code from the 16 // TiDB project, licensed under the Apache License v 2.0 17 // 18 // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/locate/region_cache.go 19 // 20 21 // Copyright 2016 PingCAP, Inc. 22 // 23 // Licensed under the Apache License, Version 2.0 (the "License"); 24 // you may not use this file except in compliance with the License. 25 // You may obtain a copy of the License at 26 // 27 // http://www.apache.org/licenses/LICENSE-2.0 28 // 29 // Unless required by applicable law or agreed to in writing, software 30 // distributed under the License is distributed on an "AS IS" BASIS, 31 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 // See the License for the specific language governing permissions and 33 // limitations under the License. 34 35 package locate 36 37 import ( 38 "bytes" 39 "context" 40 "fmt" 41 "math/rand" 42 "strings" 43 "sync" 44 "sync/atomic" 45 "time" 46 "unsafe" 47 48 "github.com/KinWaiYuen/client-go/v2/config" 49 "github.com/KinWaiYuen/client-go/v2/internal/client" 50 "github.com/KinWaiYuen/client-go/v2/internal/logutil" 51 "github.com/KinWaiYuen/client-go/v2/internal/retry" 52 "github.com/KinWaiYuen/client-go/v2/kv" 53 "github.com/KinWaiYuen/client-go/v2/metrics" 54 "github.com/KinWaiYuen/client-go/v2/tikvrpc" 55 "github.com/KinWaiYuen/client-go/v2/util" 56 "github.com/gogo/protobuf/proto" 57 "github.com/google/btree" 58 "github.com/opentracing/opentracing-go" 59 "github.com/pingcap/errors" 60 "github.com/pingcap/kvproto/pkg/metapb" 61 "github.com/pingcap/parser/terror" 62 pd "github.com/tikv/pd/client" 63 atomic2 "go.uber.org/atomic" 64 "go.uber.org/zap" 65 "golang.org/x/sync/singleflight" 66 "google.golang.org/grpc" 67 "google.golang.org/grpc/backoff" 68 "google.golang.org/grpc/credentials" 69 healthpb "google.golang.org/grpc/health/grpc_health_v1" 70 "google.golang.org/grpc/keepalive" 71 ) 72 73 const ( 74 btreeDegree = 32 75 invalidatedLastAccessTime = -1 76 defaultRegionsPerBatch = 128 77 ) 78 79 // regionCacheTTLSec is the max idle time for regions in the region cache. 80 var regionCacheTTLSec int64 = 600 81 82 // SetRegionCacheTTLSec sets regionCacheTTLSec to t. 83 func SetRegionCacheTTLSec(t int64) { 84 regionCacheTTLSec = t 85 } 86 87 const ( 88 updated int32 = iota // region is updated and no need to reload. 89 needSync // need sync new region info. 90 ) 91 92 // InvalidReason is the reason why a cached region is invalidated. 93 // The region cache may take different strategies to handle different reasons. 94 // For example, when a cached region is invalidated due to no leader, region cache 95 // will always access to a different peer. 96 type InvalidReason int32 97 98 const ( 99 // Ok indicates the cached region is valid 100 Ok InvalidReason = iota 101 // NoLeader indicates it's invalidated due to no leader 102 NoLeader 103 // RegionNotFound indicates it's invalidated due to region not found in the store 104 RegionNotFound 105 // EpochNotMatch indicates it's invalidated due to epoch not match 106 EpochNotMatch 107 // StoreNotFound indicates it's invalidated due to store not found in PD 108 StoreNotFound 109 // Other indicates it's invalidated due to other reasons, e.g., the store 110 // is removed from the cluster, fail to send requests to the store. 111 Other 112 ) 113 114 // Region presents kv region 115 type Region struct { 116 meta *metapb.Region // raw region meta from PD immutable after init 117 store unsafe.Pointer // point to region store info, see RegionStore 118 syncFlag int32 // region need be sync in next turn 119 lastAccess int64 // last region access time, see checkRegionCacheTTL 120 invalidReason InvalidReason // the reason why the region is invalidated 121 } 122 123 // AccessIndex represent the index for accessIndex array 124 type AccessIndex int 125 126 // regionStore represents region stores info 127 // it will be store as unsafe.Pointer and be load at once 128 type regionStore struct { 129 workTiKVIdx AccessIndex // point to current work peer in meta.Peers and work store in stores(same idx) for tikv peer 130 proxyTiKVIdx AccessIndex // point to the tikv peer that can forward requests to the leader. -1 means not using proxy 131 workTiFlashIdx int32 // point to current work peer in meta.Peers and work store in stores(same idx) for tiflash peer 132 stores []*Store // stores in this region 133 storeEpochs []uint32 // snapshots of store's epoch, need reload when `storeEpochs[curr] != stores[cur].fail` 134 accessIndex [numAccessMode][]int // AccessMode => idx in stores 135 } 136 137 func (r *regionStore) accessStore(mode accessMode, idx AccessIndex) (int, *Store) { 138 sidx := r.accessIndex[mode][idx] 139 return sidx, r.stores[sidx] 140 } 141 142 func (r *regionStore) getAccessIndex(mode accessMode, store *Store) AccessIndex { 143 for index, sidx := range r.accessIndex[mode] { 144 if r.stores[sidx].storeID == store.storeID { 145 return AccessIndex(index) 146 } 147 } 148 return -1 149 } 150 151 func (r *regionStore) accessStoreNum(mode accessMode) int { 152 return len(r.accessIndex[mode]) 153 } 154 155 // clone clones region store struct. 156 func (r *regionStore) clone() *regionStore { 157 storeEpochs := make([]uint32, len(r.stores)) 158 rs := ®ionStore{ 159 workTiFlashIdx: r.workTiFlashIdx, 160 proxyTiKVIdx: r.proxyTiKVIdx, 161 workTiKVIdx: r.workTiKVIdx, 162 stores: r.stores, 163 storeEpochs: storeEpochs, 164 } 165 copy(storeEpochs, r.storeEpochs) 166 for i := 0; i < int(numAccessMode); i++ { 167 rs.accessIndex[i] = make([]int, len(r.accessIndex[i])) 168 copy(rs.accessIndex[i], r.accessIndex[i]) 169 } 170 return rs 171 } 172 173 // return next follower store's index 174 func (r *regionStore) follower(seed uint32, op *storeSelectorOp) AccessIndex { 175 l := uint32(r.accessStoreNum(tiKVOnly)) 176 if l <= 1 { 177 return r.workTiKVIdx 178 } 179 180 for retry := l - 1; retry > 0; retry-- { 181 followerIdx := AccessIndex(seed % (l - 1)) 182 if followerIdx >= r.workTiKVIdx { 183 followerIdx++ 184 } 185 storeIdx, s := r.accessStore(tiKVOnly, followerIdx) 186 if r.storeEpochs[storeIdx] == atomic.LoadUint32(&s.epoch) && r.filterStoreCandidate(followerIdx, op) { 187 return followerIdx 188 } 189 seed++ 190 } 191 return r.workTiKVIdx 192 } 193 194 // return next leader or follower store's index 195 func (r *regionStore) kvPeer(seed uint32, op *storeSelectorOp) AccessIndex { 196 if op.leaderOnly { 197 return r.workTiKVIdx 198 } 199 candidates := make([]AccessIndex, 0, r.accessStoreNum(tiKVOnly)) 200 for i := 0; i < r.accessStoreNum(tiKVOnly); i++ { 201 accessIdx := AccessIndex(i) 202 storeIdx, s := r.accessStore(tiKVOnly, accessIdx) 203 if r.storeEpochs[storeIdx] != atomic.LoadUint32(&s.epoch) || !r.filterStoreCandidate(accessIdx, op) { 204 continue 205 } 206 candidates = append(candidates, accessIdx) 207 } 208 // If there is no candidates, send to current workTiKVIdx which generally is the leader. 209 if len(candidates) == 0 { 210 return r.workTiKVIdx 211 } 212 return candidates[seed%uint32(len(candidates))] 213 } 214 215 func (r *regionStore) filterStoreCandidate(aidx AccessIndex, op *storeSelectorOp) bool { 216 _, s := r.accessStore(tiKVOnly, aidx) 217 // filter label unmatched store 218 return s.IsLabelsMatch(op.labels) 219 } 220 221 // init initializes region after constructed. 222 func (r *Region) init(bo *retry.Backoffer, c *RegionCache) error { 223 // region store pull used store from global store map 224 // to avoid acquire storeMu in later access. 225 rs := ®ionStore{ 226 workTiKVIdx: 0, 227 proxyTiKVIdx: -1, 228 workTiFlashIdx: 0, 229 stores: make([]*Store, 0, len(r.meta.Peers)), 230 storeEpochs: make([]uint32, 0, len(r.meta.Peers)), 231 } 232 availablePeers := r.meta.GetPeers()[:0] 233 for _, p := range r.meta.Peers { 234 c.storeMu.RLock() 235 store, exists := c.storeMu.stores[p.StoreId] 236 c.storeMu.RUnlock() 237 if !exists { 238 store = c.getStoreByStoreID(p.StoreId) 239 } 240 addr, err := store.initResolve(bo, c) 241 if err != nil { 242 return err 243 } 244 // Filter the peer on a tombstone store. 245 if addr == "" { 246 continue 247 } 248 availablePeers = append(availablePeers, p) 249 switch store.storeType { 250 case tikvrpc.TiKV: 251 rs.accessIndex[tiKVOnly] = append(rs.accessIndex[tiKVOnly], len(rs.stores)) 252 case tikvrpc.TiFlash: 253 rs.accessIndex[tiFlashOnly] = append(rs.accessIndex[tiFlashOnly], len(rs.stores)) 254 } 255 rs.stores = append(rs.stores, store) 256 rs.storeEpochs = append(rs.storeEpochs, atomic.LoadUint32(&store.epoch)) 257 } 258 // TODO(youjiali1995): It's possible the region info in PD is stale for now but it can recover. 259 // Maybe we need backoff here. 260 if len(availablePeers) == 0 { 261 return errors.Errorf("no available peers, region: {%v}", r.meta) 262 } 263 r.meta.Peers = availablePeers 264 265 atomic.StorePointer(&r.store, unsafe.Pointer(rs)) 266 267 // mark region has been init accessed. 268 r.lastAccess = time.Now().Unix() 269 return nil 270 } 271 272 func (r *Region) getStore() (store *regionStore) { 273 store = (*regionStore)(atomic.LoadPointer(&r.store)) 274 return 275 } 276 277 func (r *Region) compareAndSwapStore(oldStore, newStore *regionStore) bool { 278 return atomic.CompareAndSwapPointer(&r.store, unsafe.Pointer(oldStore), unsafe.Pointer(newStore)) 279 } 280 281 func (r *Region) checkRegionCacheTTL(ts int64) bool { 282 // Only consider use percentage on this failpoint, for example, "2%return" 283 if _, err := util.EvalFailpoint("invalidateRegionCache"); err == nil { 284 r.invalidate(Other) 285 } 286 for { 287 lastAccess := atomic.LoadInt64(&r.lastAccess) 288 if ts-lastAccess > regionCacheTTLSec { 289 return false 290 } 291 if atomic.CompareAndSwapInt64(&r.lastAccess, lastAccess, ts) { 292 return true 293 } 294 } 295 } 296 297 // invalidate invalidates a region, next time it will got null result. 298 func (r *Region) invalidate(reason InvalidReason) { 299 metrics.RegionCacheCounterWithInvalidateRegionFromCacheOK.Inc() 300 atomic.StoreInt32((*int32)(&r.invalidReason), int32(reason)) 301 atomic.StoreInt64(&r.lastAccess, invalidatedLastAccessTime) 302 } 303 304 // scheduleReload schedules reload region request in next LocateKey. 305 func (r *Region) scheduleReload() { 306 oldValue := atomic.LoadInt32(&r.syncFlag) 307 if oldValue != updated { 308 return 309 } 310 atomic.CompareAndSwapInt32(&r.syncFlag, oldValue, needSync) 311 } 312 313 // checkNeedReloadAndMarkUpdated returns whether the region need reload and marks the region to be updated. 314 func (r *Region) checkNeedReloadAndMarkUpdated() bool { 315 oldValue := atomic.LoadInt32(&r.syncFlag) 316 if oldValue == updated { 317 return false 318 } 319 return atomic.CompareAndSwapInt32(&r.syncFlag, oldValue, updated) 320 } 321 322 func (r *Region) checkNeedReload() bool { 323 v := atomic.LoadInt32(&r.syncFlag) 324 return v != updated 325 } 326 327 func (r *Region) isValid() bool { 328 return r != nil && !r.checkNeedReload() && r.checkRegionCacheTTL(time.Now().Unix()) 329 } 330 331 // RegionCache caches Regions loaded from PD. 332 type RegionCache struct { 333 pdClient pd.Client 334 enableForwarding bool 335 336 mu struct { 337 sync.RWMutex // mutex protect cached region 338 regions map[RegionVerID]*Region // cached regions are organized as regionVerID to region ref mapping 339 latestVersions map[uint64]RegionVerID // cache the map from regionID to its latest RegionVerID 340 sorted *btree.BTree // cache regions are organized as sorted key to region ref mapping 341 } 342 storeMu struct { 343 sync.RWMutex 344 stores map[uint64]*Store 345 } 346 notifyCheckCh chan struct{} 347 closeCh chan struct{} 348 349 testingKnobs struct { 350 // Replace the requestLiveness function for test purpose. Note that in unit tests, if this is not set, 351 // requestLiveness always returns unreachable. 352 mockRequestLiveness func(s *Store, bo *retry.Backoffer) livenessState 353 } 354 } 355 356 // NewRegionCache creates a RegionCache. 357 func NewRegionCache(pdClient pd.Client) *RegionCache { 358 c := &RegionCache{ 359 pdClient: pdClient, 360 } 361 c.mu.regions = make(map[RegionVerID]*Region) 362 c.mu.latestVersions = make(map[uint64]RegionVerID) 363 c.mu.sorted = btree.New(btreeDegree) 364 c.storeMu.stores = make(map[uint64]*Store) 365 c.notifyCheckCh = make(chan struct{}, 1) 366 c.closeCh = make(chan struct{}) 367 interval := config.GetGlobalConfig().StoresRefreshInterval 368 go c.asyncCheckAndResolveLoop(time.Duration(interval) * time.Second) 369 c.enableForwarding = config.GetGlobalConfig().EnableForwarding 370 return c 371 } 372 373 // clear clears all cached data in the RegionCache. It's only used in tests. 374 func (c *RegionCache) clear() { 375 c.mu.Lock() 376 c.mu.regions = make(map[RegionVerID]*Region) 377 c.mu.latestVersions = make(map[uint64]RegionVerID) 378 c.mu.sorted = btree.New(btreeDegree) 379 c.mu.Unlock() 380 c.storeMu.Lock() 381 c.storeMu.stores = make(map[uint64]*Store) 382 c.storeMu.Unlock() 383 } 384 385 // Close releases region cache's resource. 386 func (c *RegionCache) Close() { 387 close(c.closeCh) 388 } 389 390 // asyncCheckAndResolveLoop with 391 func (c *RegionCache) asyncCheckAndResolveLoop(interval time.Duration) { 392 ticker := time.NewTicker(interval) 393 defer ticker.Stop() 394 var needCheckStores []*Store 395 for { 396 needCheckStores = needCheckStores[:0] 397 select { 398 case <-c.closeCh: 399 return 400 case <-c.notifyCheckCh: 401 c.checkAndResolve(needCheckStores, func(s *Store) bool { 402 return s.getResolveState() == needCheck 403 }) 404 case <-ticker.C: 405 // refresh store to update labels. 406 c.checkAndResolve(needCheckStores, func(s *Store) bool { 407 state := s.getResolveState() 408 // Only valid stores should be reResolved. In fact, it's impossible 409 // there's a deleted store in the stores map which guaranteed by reReslve(). 410 return state != unresolved && state != tombstone && state != deleted 411 }) 412 } 413 } 414 } 415 416 // checkAndResolve checks and resolve addr of failed stores. 417 // this method isn't thread-safe and only be used by one goroutine. 418 func (c *RegionCache) checkAndResolve(needCheckStores []*Store, needCheck func(*Store) bool) { 419 defer func() { 420 r := recover() 421 if r != nil { 422 logutil.BgLogger().Error("panic in the checkAndResolve goroutine", 423 zap.Reflect("r", r), 424 zap.Stack("stack trace")) 425 } 426 }() 427 428 c.storeMu.RLock() 429 for _, store := range c.storeMu.stores { 430 if needCheck(store) { 431 needCheckStores = append(needCheckStores, store) 432 } 433 } 434 c.storeMu.RUnlock() 435 436 for _, store := range needCheckStores { 437 _, err := store.reResolve(c) 438 terror.Log(err) 439 } 440 } 441 442 // SetRegionCacheStore is used to set a store in region cache, for testing only 443 func (c *RegionCache) SetRegionCacheStore(id uint64, storeType tikvrpc.EndpointType, state uint64, labels []*metapb.StoreLabel) { 444 c.storeMu.Lock() 445 defer c.storeMu.Unlock() 446 c.storeMu.stores[id] = &Store{ 447 storeID: id, 448 storeType: storeType, 449 state: state, 450 labels: labels, 451 } 452 } 453 454 // SetPDClient replaces pd client,for testing only 455 func (c *RegionCache) SetPDClient(client pd.Client) { 456 c.pdClient = client 457 } 458 459 // RPCContext contains data that is needed to send RPC to a region. 460 type RPCContext struct { 461 Region RegionVerID 462 Meta *metapb.Region 463 Peer *metapb.Peer 464 AccessIdx AccessIndex 465 Store *Store 466 Addr string 467 AccessMode accessMode 468 ProxyStore *Store // nil means proxy is not used 469 ProxyAddr string // valid when ProxyStore is not nil 470 TiKVNum int // Number of TiKV nodes among the region's peers. Assuming non-TiKV peers are all TiFlash peers. 471 } 472 473 func (c *RPCContext) String() string { 474 var runStoreType string 475 if c.Store != nil { 476 runStoreType = c.Store.storeType.Name() 477 } 478 res := fmt.Sprintf("region ID: %d, meta: %s, peer: %s, addr: %s, idx: %d, reqStoreType: %s, runStoreType: %s", 479 c.Region.GetID(), c.Meta, c.Peer, c.Addr, c.AccessIdx, c.AccessMode, runStoreType) 480 if c.ProxyStore != nil { 481 res += fmt.Sprintf(", proxy store id: %d, proxy addr: %s", c.ProxyStore.storeID, c.ProxyStore.addr) 482 } 483 return res 484 } 485 486 type storeSelectorOp struct { 487 leaderOnly bool 488 labels []*metapb.StoreLabel 489 } 490 491 // StoreSelectorOption configures storeSelectorOp. 492 type StoreSelectorOption func(*storeSelectorOp) 493 494 // WithMatchLabels indicates selecting stores with matched labels. 495 func WithMatchLabels(labels []*metapb.StoreLabel) StoreSelectorOption { 496 return func(op *storeSelectorOp) { 497 op.labels = append(op.labels, labels...) 498 } 499 } 500 501 // WithLeaderOnly indicates selecting stores with leader only. 502 func WithLeaderOnly() StoreSelectorOption { 503 return func(op *storeSelectorOp) { 504 op.leaderOnly = true 505 } 506 } 507 508 // GetTiKVRPCContext returns RPCContext for a region. If it returns nil, the region 509 // must be out of date and already dropped from cache. 510 func (c *RegionCache) GetTiKVRPCContext(bo *retry.Backoffer, id RegionVerID, replicaRead kv.ReplicaReadType, followerStoreSeed uint32, opts ...StoreSelectorOption) (*RPCContext, error) { 511 ts := time.Now().Unix() 512 513 cachedRegion := c.GetCachedRegionWithRLock(id) 514 if cachedRegion == nil { 515 return nil, nil 516 } 517 518 if cachedRegion.checkNeedReload() { 519 return nil, nil 520 } 521 522 if !cachedRegion.checkRegionCacheTTL(ts) { 523 return nil, nil 524 } 525 526 regionStore := cachedRegion.getStore() 527 var ( 528 store *Store 529 peer *metapb.Peer 530 storeIdx int 531 accessIdx AccessIndex 532 ) 533 options := &storeSelectorOp{} 534 for _, op := range opts { 535 op(options) 536 } 537 isLeaderReq := false 538 switch replicaRead { 539 case kv.ReplicaReadFollower: 540 store, peer, accessIdx, storeIdx = cachedRegion.FollowerStorePeer(regionStore, followerStoreSeed, options) 541 case kv.ReplicaReadMixed: 542 store, peer, accessIdx, storeIdx = cachedRegion.AnyStorePeer(regionStore, followerStoreSeed, options) 543 default: 544 isLeaderReq = true 545 store, peer, accessIdx, storeIdx = cachedRegion.WorkStorePeer(regionStore) 546 } 547 addr, err := c.getStoreAddr(bo, cachedRegion, store) 548 if err != nil { 549 return nil, err 550 } 551 // enable by `curl -XPUT -d '1*return("[some-addr]")->return("")' http://host:port/tikvclient/injectWrongStoreAddr` 552 if val, err := util.EvalFailpoint("injectWrongStoreAddr"); err == nil { 553 if a, ok := val.(string); ok && len(a) > 0 { 554 addr = a 555 } 556 } 557 if store == nil || len(addr) == 0 { 558 // Store not found, region must be out of date. 559 cachedRegion.invalidate(StoreNotFound) 560 return nil, nil 561 } 562 563 storeFailEpoch := atomic.LoadUint32(&store.epoch) 564 if storeFailEpoch != regionStore.storeEpochs[storeIdx] { 565 cachedRegion.invalidate(Other) 566 logutil.BgLogger().Info("invalidate current region, because others failed on same store", 567 zap.Uint64("region", id.GetID()), 568 zap.String("store", store.addr)) 569 return nil, nil 570 } 571 572 var ( 573 proxyStore *Store 574 proxyAddr string 575 ) 576 if c.enableForwarding && isLeaderReq { 577 if atomic.LoadInt32(&store.unreachable) == 0 { 578 regionStore.unsetProxyStoreIfNeeded(cachedRegion) 579 } else { 580 proxyStore, _, _ = c.getProxyStore(cachedRegion, store, regionStore, accessIdx) 581 if proxyStore != nil { 582 proxyAddr, err = c.getStoreAddr(bo, cachedRegion, proxyStore) 583 if err != nil { 584 return nil, err 585 } 586 } 587 } 588 } 589 590 return &RPCContext{ 591 Region: id, 592 Meta: cachedRegion.meta, 593 Peer: peer, 594 AccessIdx: accessIdx, 595 Store: store, 596 Addr: addr, 597 AccessMode: tiKVOnly, 598 ProxyStore: proxyStore, 599 ProxyAddr: proxyAddr, 600 TiKVNum: regionStore.accessStoreNum(tiKVOnly), 601 }, nil 602 } 603 604 // GetAllValidTiFlashStores returns the store ids of all valid TiFlash stores, the store id of currentStore is always the first one 605 func (c *RegionCache) GetAllValidTiFlashStores(id RegionVerID, currentStore *Store) []uint64 { 606 // set the cap to 2 because usually, TiFlash table will have 2 replicas 607 allStores := make([]uint64, 0, 2) 608 // make sure currentStore id is always the first in allStores 609 allStores = append(allStores, currentStore.storeID) 610 ts := time.Now().Unix() 611 cachedRegion := c.GetCachedRegionWithRLock(id) 612 if cachedRegion == nil { 613 return allStores 614 } 615 if !cachedRegion.checkRegionCacheTTL(ts) { 616 return allStores 617 } 618 regionStore := cachedRegion.getStore() 619 currentIndex := regionStore.getAccessIndex(tiFlashOnly, currentStore) 620 if currentIndex == -1 { 621 return allStores 622 } 623 for startOffset := 1; startOffset < regionStore.accessStoreNum(tiFlashOnly); startOffset++ { 624 accessIdx := AccessIndex((int(currentIndex) + startOffset) % regionStore.accessStoreNum(tiFlashOnly)) 625 storeIdx, store := regionStore.accessStore(tiFlashOnly, accessIdx) 626 if store.getResolveState() == needCheck { 627 continue 628 } 629 storeFailEpoch := atomic.LoadUint32(&store.epoch) 630 if storeFailEpoch != regionStore.storeEpochs[storeIdx] { 631 continue 632 } 633 allStores = append(allStores, store.storeID) 634 } 635 return allStores 636 } 637 638 // GetTiFlashRPCContext returns RPCContext for a region must access flash store. If it returns nil, the region 639 // must be out of date and already dropped from cache or not flash store found. 640 // `loadBalance` is an option. For MPP and batch cop, it is pointless and might cause try the failed store repeatly. 641 func (c *RegionCache) GetTiFlashRPCContext(bo *retry.Backoffer, id RegionVerID, loadBalance bool) (*RPCContext, error) { 642 ts := time.Now().Unix() 643 644 cachedRegion := c.GetCachedRegionWithRLock(id) 645 if cachedRegion == nil { 646 return nil, nil 647 } 648 if !cachedRegion.checkRegionCacheTTL(ts) { 649 return nil, nil 650 } 651 652 regionStore := cachedRegion.getStore() 653 654 // sIdx is for load balance of TiFlash store. 655 var sIdx int 656 if loadBalance { 657 sIdx = int(atomic.AddInt32(®ionStore.workTiFlashIdx, 1)) 658 } else { 659 sIdx = int(atomic.LoadInt32(®ionStore.workTiFlashIdx)) 660 } 661 for i := 0; i < regionStore.accessStoreNum(tiFlashOnly); i++ { 662 accessIdx := AccessIndex((sIdx + i) % regionStore.accessStoreNum(tiFlashOnly)) 663 storeIdx, store := regionStore.accessStore(tiFlashOnly, accessIdx) 664 addr, err := c.getStoreAddr(bo, cachedRegion, store) 665 if err != nil { 666 return nil, err 667 } 668 if len(addr) == 0 { 669 cachedRegion.invalidate(StoreNotFound) 670 return nil, nil 671 } 672 if store.getResolveState() == needCheck { 673 _, err := store.reResolve(c) 674 terror.Log(err) 675 } 676 atomic.StoreInt32(®ionStore.workTiFlashIdx, int32(accessIdx)) 677 peer := cachedRegion.meta.Peers[storeIdx] 678 storeFailEpoch := atomic.LoadUint32(&store.epoch) 679 if storeFailEpoch != regionStore.storeEpochs[storeIdx] { 680 cachedRegion.invalidate(Other) 681 logutil.BgLogger().Info("invalidate current region, because others failed on same store", 682 zap.Uint64("region", id.GetID()), 683 zap.String("store", store.addr)) 684 // TiFlash will always try to find out a valid peer, avoiding to retry too many times. 685 continue 686 } 687 return &RPCContext{ 688 Region: id, 689 Meta: cachedRegion.meta, 690 Peer: peer, 691 AccessIdx: accessIdx, 692 Store: store, 693 Addr: addr, 694 AccessMode: tiFlashOnly, 695 TiKVNum: regionStore.accessStoreNum(tiKVOnly), 696 }, nil 697 } 698 699 cachedRegion.invalidate(Other) 700 return nil, nil 701 } 702 703 // KeyLocation is the region and range that a key is located. 704 type KeyLocation struct { 705 Region RegionVerID 706 StartKey []byte 707 EndKey []byte 708 } 709 710 // Contains checks if key is in [StartKey, EndKey). 711 func (l *KeyLocation) Contains(key []byte) bool { 712 return bytes.Compare(l.StartKey, key) <= 0 && 713 (bytes.Compare(key, l.EndKey) < 0 || len(l.EndKey) == 0) 714 } 715 716 // String implements fmt.Stringer interface. 717 func (l *KeyLocation) String() string { 718 return fmt.Sprintf("region %s,startKey:%s,endKey:%s", l.Region.String(), kv.StrKey(l.StartKey), kv.StrKey(l.EndKey)) 719 } 720 721 // LocateKey searches for the region and range that the key is located. 722 func (c *RegionCache) LocateKey(bo *retry.Backoffer, key []byte) (*KeyLocation, error) { 723 r, err := c.findRegionByKey(bo, key, false) 724 if err != nil { 725 return nil, err 726 } 727 return &KeyLocation{ 728 Region: r.VerID(), 729 StartKey: r.StartKey(), 730 EndKey: r.EndKey(), 731 }, nil 732 } 733 734 // LocateEndKey searches for the region and range that the key is located. 735 // Unlike LocateKey, start key of a region is exclusive and end key is inclusive. 736 func (c *RegionCache) LocateEndKey(bo *retry.Backoffer, key []byte) (*KeyLocation, error) { 737 r, err := c.findRegionByKey(bo, key, true) 738 if err != nil { 739 return nil, err 740 } 741 return &KeyLocation{ 742 Region: r.VerID(), 743 StartKey: r.StartKey(), 744 EndKey: r.EndKey(), 745 }, nil 746 } 747 748 func (c *RegionCache) findRegionByKey(bo *retry.Backoffer, key []byte, isEndKey bool) (r *Region, err error) { 749 r = c.searchCachedRegion(key, isEndKey) 750 if r == nil { 751 // load region when it is not exists or expired. 752 lr, err := c.loadRegion(bo, key, isEndKey) 753 if err != nil { 754 // no region data, return error if failure. 755 return nil, err 756 } 757 logutil.Eventf(bo.GetCtx(), "load region %d from pd, due to cache-miss", lr.GetID()) 758 r = lr 759 c.mu.Lock() 760 c.insertRegionToCache(r) 761 c.mu.Unlock() 762 } else if r.checkNeedReloadAndMarkUpdated() { 763 // load region when it be marked as need reload. 764 lr, err := c.loadRegion(bo, key, isEndKey) 765 if err != nil { 766 // ignore error and use old region info. 767 logutil.Logger(bo.GetCtx()).Error("load region failure", 768 zap.ByteString("key", key), zap.Error(err)) 769 } else { 770 logutil.Eventf(bo.GetCtx(), "load region %d from pd, due to need-reload", lr.GetID()) 771 r = lr 772 c.mu.Lock() 773 c.insertRegionToCache(r) 774 c.mu.Unlock() 775 } 776 } 777 return r, nil 778 } 779 780 // OnSendFailForTiFlash handles send request fail logic for tiflash. 781 func (c *RegionCache) OnSendFailForTiFlash(bo *retry.Backoffer, store *Store, region RegionVerID, prev *metapb.Region, scheduleReload bool, err error, skipSwitchPeerLog bool) { 782 783 r := c.GetCachedRegionWithRLock(region) 784 if r == nil { 785 return 786 } 787 788 rs := r.getStore() 789 peersNum := len(r.GetMeta().Peers) 790 if len(prev.Peers) != peersNum { 791 logutil.Logger(bo.GetCtx()).Info("retry and refresh current region after send request fail and up/down stores length changed", 792 zap.Stringer("region", ®ion), 793 zap.Bool("needReload", scheduleReload), 794 zap.Reflect("oldPeers", prev.Peers), 795 zap.Reflect("newPeers", r.GetMeta().Peers), 796 zap.Error(err)) 797 return 798 } 799 800 accessMode := tiFlashOnly 801 accessIdx := rs.getAccessIndex(accessMode, store) 802 if accessIdx == -1 { 803 logutil.Logger(bo.GetCtx()).Warn("can not get access index for region " + region.String()) 804 return 805 } 806 if err != nil { 807 storeIdx, s := rs.accessStore(accessMode, accessIdx) 808 c.markRegionNeedBeRefill(s, storeIdx, rs) 809 } 810 811 // try next peer 812 rs.switchNextFlashPeer(r, accessIdx) 813 // In most scenarios, TiFlash will batch all the regions in one TiFlash store into one request, so when meet send failure, 814 // this function is called repeatedly for all the regions, since one TiFlash store might contain thousands of regions, we 815 // need a way to avoid generating too much useless log 816 if !skipSwitchPeerLog { 817 logutil.Logger(bo.GetCtx()).Info("switch region tiflash peer to next due to send request fail", 818 zap.Stringer("region", ®ion), 819 zap.Bool("needReload", scheduleReload), 820 zap.Error(err)) 821 } 822 823 // force reload region when retry all known peers in region. 824 if scheduleReload { 825 r.scheduleReload() 826 } 827 } 828 829 func (c *RegionCache) markRegionNeedBeRefill(s *Store, storeIdx int, rs *regionStore) int { 830 incEpochStoreIdx := -1 831 // invalidate regions in store. 832 epoch := rs.storeEpochs[storeIdx] 833 if atomic.CompareAndSwapUint32(&s.epoch, epoch, epoch+1) { 834 logutil.BgLogger().Info("mark store's regions need be refill", zap.String("store", s.addr)) 835 incEpochStoreIdx = storeIdx 836 metrics.RegionCacheCounterWithInvalidateStoreRegionsOK.Inc() 837 } 838 // schedule a store addr resolve. 839 s.markNeedCheck(c.notifyCheckCh) 840 return incEpochStoreIdx 841 } 842 843 // OnSendFail handles send request fail logic. 844 func (c *RegionCache) OnSendFail(bo *retry.Backoffer, ctx *RPCContext, scheduleReload bool, err error) { 845 metrics.RegionCacheCounterWithSendFail.Inc() 846 r := c.GetCachedRegionWithRLock(ctx.Region) 847 if r == nil { 848 return 849 } 850 peersNum := len(r.meta.Peers) 851 if len(ctx.Meta.Peers) != peersNum { 852 logutil.Logger(bo.GetCtx()).Info("retry and refresh current ctx after send request fail and up/down stores length changed", 853 zap.Stringer("current", ctx), 854 zap.Bool("needReload", scheduleReload), 855 zap.Reflect("oldPeers", ctx.Meta.Peers), 856 zap.Reflect("newPeers", r.meta.Peers), 857 zap.Error(err)) 858 return 859 } 860 861 rs := r.getStore() 862 863 if err != nil { 864 storeIdx, s := rs.accessStore(ctx.AccessMode, ctx.AccessIdx) 865 866 // invalidate regions in store. 867 c.markRegionNeedBeRefill(s, storeIdx, rs) 868 } 869 870 // try next peer to found new leader. 871 if ctx.AccessMode == tiKVOnly { 872 rs.switchNextTiKVPeer(r, ctx.AccessIdx) 873 logutil.Logger(bo.GetCtx()).Info("switch region peer to next due to send request fail", 874 zap.Stringer("current", ctx), 875 zap.Bool("needReload", scheduleReload), 876 zap.Error(err)) 877 } else { 878 rs.switchNextFlashPeer(r, ctx.AccessIdx) 879 logutil.Logger(bo.GetCtx()).Info("switch region tiflash peer to next due to send request fail", 880 zap.Stringer("current", ctx), 881 zap.Bool("needReload", scheduleReload), 882 zap.Error(err)) 883 } 884 885 // force reload region when retry all known peers in region. 886 if scheduleReload { 887 r.scheduleReload() 888 } 889 890 } 891 892 // LocateRegionByID searches for the region with ID. 893 func (c *RegionCache) LocateRegionByID(bo *retry.Backoffer, regionID uint64) (*KeyLocation, error) { 894 c.mu.RLock() 895 r := c.getRegionByIDFromCache(regionID) 896 c.mu.RUnlock() 897 if r != nil { 898 if r.checkNeedReloadAndMarkUpdated() { 899 lr, err := c.loadRegionByID(bo, regionID) 900 if err != nil { 901 // ignore error and use old region info. 902 logutil.Logger(bo.GetCtx()).Error("load region failure", 903 zap.Uint64("regionID", regionID), zap.Error(err)) 904 } else { 905 r = lr 906 c.mu.Lock() 907 c.insertRegionToCache(r) 908 c.mu.Unlock() 909 } 910 } 911 loc := &KeyLocation{ 912 Region: r.VerID(), 913 StartKey: r.StartKey(), 914 EndKey: r.EndKey(), 915 } 916 return loc, nil 917 } 918 919 r, err := c.loadRegionByID(bo, regionID) 920 if err != nil { 921 return nil, errors.Trace(err) 922 } 923 924 c.mu.Lock() 925 c.insertRegionToCache(r) 926 c.mu.Unlock() 927 return &KeyLocation{ 928 Region: r.VerID(), 929 StartKey: r.StartKey(), 930 EndKey: r.EndKey(), 931 }, nil 932 } 933 934 // GroupKeysByRegion separates keys into groups by their belonging Regions. 935 // Specially it also returns the first key's region which may be used as the 936 // 'PrimaryLockKey' and should be committed ahead of others. 937 // filter is used to filter some unwanted keys. 938 func (c *RegionCache) GroupKeysByRegion(bo *retry.Backoffer, keys [][]byte, filter func(key, regionStartKey []byte) bool) (map[RegionVerID][][]byte, RegionVerID, error) { 939 groups := make(map[RegionVerID][][]byte) 940 var first RegionVerID 941 var lastLoc *KeyLocation 942 for i, k := range keys { 943 if lastLoc == nil || !lastLoc.Contains(k) { 944 var err error 945 lastLoc, err = c.LocateKey(bo, k) 946 if err != nil { 947 return nil, first, errors.Trace(err) 948 } 949 if filter != nil && filter(k, lastLoc.StartKey) { 950 continue 951 } 952 } 953 id := lastLoc.Region 954 if i == 0 { 955 first = id 956 } 957 groups[id] = append(groups[id], k) 958 } 959 return groups, first, nil 960 } 961 962 // ListRegionIDsInKeyRange lists ids of regions in [start_key,end_key]. 963 func (c *RegionCache) ListRegionIDsInKeyRange(bo *retry.Backoffer, startKey, endKey []byte) (regionIDs []uint64, err error) { 964 for { 965 curRegion, err := c.LocateKey(bo, startKey) 966 if err != nil { 967 return nil, errors.Trace(err) 968 } 969 regionIDs = append(regionIDs, curRegion.Region.id) 970 if curRegion.Contains(endKey) { 971 break 972 } 973 startKey = curRegion.EndKey 974 } 975 return regionIDs, nil 976 } 977 978 // LoadRegionsInKeyRange lists regions in [start_key,end_key]. 979 func (c *RegionCache) LoadRegionsInKeyRange(bo *retry.Backoffer, startKey, endKey []byte) (regions []*Region, err error) { 980 var batchRegions []*Region 981 for { 982 batchRegions, err = c.BatchLoadRegionsWithKeyRange(bo, startKey, endKey, defaultRegionsPerBatch) 983 if err != nil { 984 return nil, errors.Trace(err) 985 } 986 if len(batchRegions) == 0 { 987 // should never happen 988 break 989 } 990 regions = append(regions, batchRegions...) 991 endRegion := batchRegions[len(batchRegions)-1] 992 if endRegion.ContainsByEnd(endKey) { 993 break 994 } 995 startKey = endRegion.EndKey() 996 } 997 return 998 } 999 1000 // BatchLoadRegionsWithKeyRange loads at most given numbers of regions to the RegionCache, 1001 // within the given key range from the startKey to endKey. Returns the loaded regions. 1002 func (c *RegionCache) BatchLoadRegionsWithKeyRange(bo *retry.Backoffer, startKey []byte, endKey []byte, count int) (regions []*Region, err error) { 1003 regions, err = c.scanRegions(bo, startKey, endKey, count) 1004 if err != nil { 1005 return 1006 } 1007 if len(regions) == 0 { 1008 err = errors.New("PD returned no region") 1009 return 1010 } 1011 1012 c.mu.Lock() 1013 defer c.mu.Unlock() 1014 1015 for _, region := range regions { 1016 c.insertRegionToCache(region) 1017 } 1018 1019 return 1020 } 1021 1022 // BatchLoadRegionsFromKey loads at most given numbers of regions to the RegionCache, from the given startKey. Returns 1023 // the endKey of the last loaded region. If some of the regions has no leader, their entries in RegionCache will not be 1024 // updated. 1025 func (c *RegionCache) BatchLoadRegionsFromKey(bo *retry.Backoffer, startKey []byte, count int) ([]byte, error) { 1026 regions, err := c.BatchLoadRegionsWithKeyRange(bo, startKey, nil, count) 1027 if err != nil { 1028 return nil, errors.Trace(err) 1029 } 1030 return regions[len(regions)-1].EndKey(), nil 1031 } 1032 1033 // InvalidateCachedRegion removes a cached Region. 1034 func (c *RegionCache) InvalidateCachedRegion(id RegionVerID) { 1035 c.InvalidateCachedRegionWithReason(id, Other) 1036 } 1037 1038 // InvalidateCachedRegionWithReason removes a cached Region with the reason why it's invalidated. 1039 func (c *RegionCache) InvalidateCachedRegionWithReason(id RegionVerID, reason InvalidReason) { 1040 cachedRegion := c.GetCachedRegionWithRLock(id) 1041 if cachedRegion == nil { 1042 return 1043 } 1044 cachedRegion.invalidate(reason) 1045 } 1046 1047 // UpdateLeader update some region cache with newer leader info. 1048 func (c *RegionCache) UpdateLeader(regionID RegionVerID, leader *metapb.Peer, currentPeerIdx AccessIndex) { 1049 r := c.GetCachedRegionWithRLock(regionID) 1050 if r == nil { 1051 logutil.BgLogger().Debug("regionCache: cannot find region when updating leader", 1052 zap.Uint64("regionID", regionID.GetID())) 1053 return 1054 } 1055 1056 if leader == nil { 1057 rs := r.getStore() 1058 rs.switchNextTiKVPeer(r, currentPeerIdx) 1059 logutil.BgLogger().Info("switch region peer to next due to NotLeader with NULL leader", 1060 zap.Int("currIdx", int(currentPeerIdx)), 1061 zap.Uint64("regionID", regionID.GetID())) 1062 return 1063 } 1064 1065 if !c.switchWorkLeaderToPeer(r, leader) { 1066 logutil.BgLogger().Info("invalidate region cache due to cannot find peer when updating leader", 1067 zap.Uint64("regionID", regionID.GetID()), 1068 zap.Int("currIdx", int(currentPeerIdx)), 1069 zap.Uint64("leaderStoreID", leader.GetStoreId())) 1070 r.invalidate(StoreNotFound) 1071 } else { 1072 logutil.BgLogger().Info("switch region leader to specific leader due to kv return NotLeader", 1073 zap.Uint64("regionID", regionID.GetID()), 1074 zap.Int("currIdx", int(currentPeerIdx)), 1075 zap.Uint64("leaderStoreID", leader.GetStoreId())) 1076 } 1077 } 1078 1079 // removeVersionFromCache removes a RegionVerID from cache, tries to cleanup 1080 // both c.mu.regions and c.mu.versions. Note this function is not thread-safe. 1081 func (c *RegionCache) removeVersionFromCache(oldVer RegionVerID, regionID uint64) { 1082 delete(c.mu.regions, oldVer) 1083 if ver, ok := c.mu.latestVersions[regionID]; ok && ver.Equals(oldVer) { 1084 delete(c.mu.latestVersions, regionID) 1085 } 1086 } 1087 1088 // insertRegionToCache tries to insert the Region to cache. 1089 // It should be protected by c.mu.Lock(). 1090 func (c *RegionCache) insertRegionToCache(cachedRegion *Region) { 1091 old := c.mu.sorted.ReplaceOrInsert(newBtreeItem(cachedRegion)) 1092 if old != nil { 1093 store := cachedRegion.getStore() 1094 oldRegion := old.(*btreeItem).cachedRegion 1095 oldRegionStore := oldRegion.getStore() 1096 // TODO(youjiali1995): remove this because the new retry logic can handle this issue. 1097 // 1098 // Joint consensus is enabled in v5.0, which is possible to make a leader step down as a learner during a conf change. 1099 // And if hibernate region is enabled, after the leader step down, there can be a long time that there is no leader 1100 // in the region and the leader info in PD is stale until requests are sent to followers or hibernate timeout. 1101 // To solve it, one solution is always to try a different peer if the invalid reason of the old cached region is no-leader. 1102 // There is a small probability that the current peer who reports no-leader becomes a leader and TiDB has to retry once in this case. 1103 if InvalidReason(atomic.LoadInt32((*int32)(&oldRegion.invalidReason))) == NoLeader { 1104 store.workTiKVIdx = (oldRegionStore.workTiKVIdx + 1) % AccessIndex(store.accessStoreNum(tiKVOnly)) 1105 } 1106 // Invalidate the old region in case it's not invalidated and some requests try with the stale region information. 1107 oldRegion.invalidate(Other) 1108 // Don't refresh TiFlash work idx for region. Otherwise, it will always goto a invalid store which 1109 // is under transferring regions. 1110 store.workTiFlashIdx = atomic.LoadInt32(&oldRegionStore.workTiFlashIdx) 1111 c.removeVersionFromCache(oldRegion.VerID(), cachedRegion.VerID().id) 1112 } 1113 c.mu.regions[cachedRegion.VerID()] = cachedRegion 1114 newVer := cachedRegion.VerID() 1115 latest, ok := c.mu.latestVersions[cachedRegion.VerID().id] 1116 if !ok || latest.GetVer() < newVer.GetVer() || latest.GetConfVer() < newVer.GetConfVer() { 1117 c.mu.latestVersions[cachedRegion.VerID().id] = newVer 1118 } 1119 } 1120 1121 // searchCachedRegion finds a region from cache by key. Like `getCachedRegion`, 1122 // it should be called with c.mu.RLock(), and the returned Region should not be 1123 // used after c.mu is RUnlock(). 1124 // If the given key is the end key of the region that you want, you may set the second argument to true. This is useful 1125 // when processing in reverse order. 1126 func (c *RegionCache) searchCachedRegion(key []byte, isEndKey bool) *Region { 1127 ts := time.Now().Unix() 1128 var r *Region 1129 c.mu.RLock() 1130 c.mu.sorted.DescendLessOrEqual(newBtreeSearchItem(key), func(item btree.Item) bool { 1131 r = item.(*btreeItem).cachedRegion 1132 if isEndKey && bytes.Equal(r.StartKey(), key) { 1133 r = nil // clear result 1134 return true // iterate next item 1135 } 1136 if !r.checkRegionCacheTTL(ts) { 1137 r = nil 1138 return true 1139 } 1140 return false 1141 }) 1142 c.mu.RUnlock() 1143 if r != nil && (!isEndKey && r.Contains(key) || isEndKey && r.ContainsByEnd(key)) { 1144 return r 1145 } 1146 return nil 1147 } 1148 1149 // getRegionByIDFromCache tries to get region by regionID from cache. Like 1150 // `getCachedRegion`, it should be called with c.mu.RLock(), and the returned 1151 // Region should not be used after c.mu is RUnlock(). 1152 func (c *RegionCache) getRegionByIDFromCache(regionID uint64) *Region { 1153 ts := time.Now().Unix() 1154 ver, ok := c.mu.latestVersions[regionID] 1155 if !ok { 1156 return nil 1157 } 1158 latestRegion, ok := c.mu.regions[ver] 1159 if !ok { 1160 // should not happen 1161 logutil.BgLogger().Warn("region version not found", 1162 zap.Uint64("regionID", regionID), zap.Stringer("version", &ver)) 1163 return nil 1164 } 1165 lastAccess := atomic.LoadInt64(&latestRegion.lastAccess) 1166 if ts-lastAccess > regionCacheTTLSec { 1167 return nil 1168 } 1169 if latestRegion != nil { 1170 atomic.CompareAndSwapInt64(&latestRegion.lastAccess, atomic.LoadInt64(&latestRegion.lastAccess), ts) 1171 } 1172 return latestRegion 1173 } 1174 1175 // GetStoresByType gets stores by type `typ` 1176 // TODO: revise it by get store by closure. 1177 func (c *RegionCache) GetStoresByType(typ tikvrpc.EndpointType) []*Store { 1178 c.storeMu.Lock() 1179 defer c.storeMu.Unlock() 1180 stores := make([]*Store, 0) 1181 for _, store := range c.storeMu.stores { 1182 if store.getResolveState() != resolved { 1183 continue 1184 } 1185 if store.storeType == typ { 1186 //TODO: revise it with store.clone() 1187 storeLabel := make([]*metapb.StoreLabel, 0) 1188 for _, label := range store.labels { 1189 storeLabel = append(storeLabel, &metapb.StoreLabel{ 1190 Key: label.Key, 1191 Value: label.Value, 1192 }) 1193 } 1194 stores = append(stores, &Store{ 1195 addr: store.addr, 1196 storeID: store.storeID, 1197 labels: storeLabel, 1198 }) 1199 } 1200 } 1201 return stores 1202 } 1203 1204 func filterUnavailablePeers(region *pd.Region) { 1205 if len(region.DownPeers) == 0 { 1206 return 1207 } 1208 new := region.Meta.Peers[:0] 1209 for _, p := range region.Meta.Peers { 1210 available := true 1211 for _, downPeer := range region.DownPeers { 1212 if p.Id == downPeer.Id && p.StoreId == downPeer.StoreId { 1213 available = false 1214 break 1215 } 1216 } 1217 if available { 1218 new = append(new, p) 1219 } 1220 } 1221 region.Meta.Peers = new 1222 } 1223 1224 // loadRegion loads region from pd client, and picks the first peer as leader. 1225 // If the given key is the end key of the region that you want, you may set the second argument to true. This is useful 1226 // when processing in reverse order. 1227 func (c *RegionCache) loadRegion(bo *retry.Backoffer, key []byte, isEndKey bool) (*Region, error) { 1228 ctx := bo.GetCtx() 1229 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 1230 span1 := span.Tracer().StartSpan("loadRegion", opentracing.ChildOf(span.Context())) 1231 defer span1.Finish() 1232 ctx = opentracing.ContextWithSpan(ctx, span1) 1233 } 1234 1235 var backoffErr error 1236 searchPrev := false 1237 for { 1238 if backoffErr != nil { 1239 err := bo.Backoff(retry.BoPDRPC, backoffErr) 1240 if err != nil { 1241 return nil, errors.Trace(err) 1242 } 1243 } 1244 var reg *pd.Region 1245 var err error 1246 if searchPrev { 1247 reg, err = c.pdClient.GetPrevRegion(ctx, key) 1248 } else { 1249 reg, err = c.pdClient.GetRegion(ctx, key) 1250 } 1251 if err != nil { 1252 metrics.RegionCacheCounterWithGetRegionError.Inc() 1253 } else { 1254 metrics.RegionCacheCounterWithGetRegionOK.Inc() 1255 } 1256 if err != nil { 1257 if isDecodeError(err) { 1258 return nil, errors.Errorf("failed to decode region range key, key: %q, err: %v", key, err) 1259 } 1260 backoffErr = errors.Errorf("loadRegion from PD failed, key: %q, err: %v", key, err) 1261 continue 1262 } 1263 if reg == nil || reg.Meta == nil { 1264 backoffErr = errors.Errorf("region not found for key %q", key) 1265 continue 1266 } 1267 filterUnavailablePeers(reg) 1268 if len(reg.Meta.Peers) == 0 { 1269 return nil, errors.New("receive Region with no available peer") 1270 } 1271 if isEndKey && !searchPrev && bytes.Equal(reg.Meta.StartKey, key) && len(reg.Meta.StartKey) != 0 { 1272 searchPrev = true 1273 continue 1274 } 1275 region := &Region{meta: reg.Meta} 1276 err = region.init(bo, c) 1277 if err != nil { 1278 return nil, err 1279 } 1280 if reg.Leader != nil { 1281 c.switchWorkLeaderToPeer(region, reg.Leader) 1282 } 1283 return region, nil 1284 } 1285 } 1286 1287 // loadRegionByID loads region from pd client, and picks the first peer as leader. 1288 func (c *RegionCache) loadRegionByID(bo *retry.Backoffer, regionID uint64) (*Region, error) { 1289 ctx := bo.GetCtx() 1290 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 1291 span1 := span.Tracer().StartSpan("loadRegionByID", opentracing.ChildOf(span.Context())) 1292 defer span1.Finish() 1293 ctx = opentracing.ContextWithSpan(ctx, span1) 1294 } 1295 var backoffErr error 1296 for { 1297 if backoffErr != nil { 1298 err := bo.Backoff(retry.BoPDRPC, backoffErr) 1299 if err != nil { 1300 return nil, errors.Trace(err) 1301 } 1302 } 1303 reg, err := c.pdClient.GetRegionByID(ctx, regionID) 1304 if err != nil { 1305 metrics.RegionCacheCounterWithGetRegionByIDError.Inc() 1306 } else { 1307 metrics.RegionCacheCounterWithGetRegionByIDOK.Inc() 1308 } 1309 if err != nil { 1310 if isDecodeError(err) { 1311 return nil, errors.Errorf("failed to decode region range key, regionID: %q, err: %v", regionID, err) 1312 } 1313 backoffErr = errors.Errorf("loadRegion from PD failed, regionID: %v, err: %v", regionID, err) 1314 continue 1315 } 1316 if reg == nil || reg.Meta == nil { 1317 return nil, errors.Errorf("region not found for regionID %d", regionID) 1318 } 1319 filterUnavailablePeers(reg) 1320 if len(reg.Meta.Peers) == 0 { 1321 return nil, errors.New("receive Region with no available peer") 1322 } 1323 region := &Region{meta: reg.Meta} 1324 err = region.init(bo, c) 1325 if err != nil { 1326 return nil, err 1327 } 1328 if reg.Leader != nil { 1329 c.switchWorkLeaderToPeer(region, reg.Leader) 1330 } 1331 return region, nil 1332 } 1333 } 1334 1335 // scanRegions scans at most `limit` regions from PD, starts from the region containing `startKey` and in key order. 1336 // Regions with no leader will not be returned. 1337 func (c *RegionCache) scanRegions(bo *retry.Backoffer, startKey, endKey []byte, limit int) ([]*Region, error) { 1338 if limit == 0 { 1339 return nil, nil 1340 } 1341 ctx := bo.GetCtx() 1342 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 1343 span1 := span.Tracer().StartSpan("scanRegions", opentracing.ChildOf(span.Context())) 1344 defer span1.Finish() 1345 ctx = opentracing.ContextWithSpan(ctx, span1) 1346 } 1347 1348 var backoffErr error 1349 for { 1350 if backoffErr != nil { 1351 err := bo.Backoff(retry.BoPDRPC, backoffErr) 1352 if err != nil { 1353 return nil, errors.Trace(err) 1354 } 1355 } 1356 regionsInfo, err := c.pdClient.ScanRegions(ctx, startKey, endKey, limit) 1357 if err != nil { 1358 if isDecodeError(err) { 1359 return nil, errors.Errorf("failed to decode region range key, startKey: %q, limit: %q, err: %v", startKey, limit, err) 1360 } 1361 metrics.RegionCacheCounterWithScanRegionsError.Inc() 1362 backoffErr = errors.Errorf( 1363 "scanRegion from PD failed, startKey: %q, limit: %q, err: %v", 1364 startKey, 1365 limit, 1366 err) 1367 continue 1368 } 1369 1370 metrics.RegionCacheCounterWithScanRegionsOK.Inc() 1371 1372 if len(regionsInfo) == 0 { 1373 return nil, errors.New("PD returned no region") 1374 } 1375 regions := make([]*Region, 0, len(regionsInfo)) 1376 for _, r := range regionsInfo { 1377 region := &Region{meta: r.Meta} 1378 err := region.init(bo, c) 1379 if err != nil { 1380 return nil, err 1381 } 1382 leader := r.Leader 1383 // Leader id = 0 indicates no leader. 1384 if leader != nil && leader.GetId() != 0 { 1385 c.switchWorkLeaderToPeer(region, leader) 1386 regions = append(regions, region) 1387 } 1388 } 1389 if len(regions) == 0 { 1390 return nil, errors.New("receive Regions with no peer") 1391 } 1392 if len(regions) < len(regionsInfo) { 1393 logutil.Logger(context.Background()).Debug( 1394 "regionCache: scanRegion finished but some regions has no leader.") 1395 } 1396 return regions, nil 1397 } 1398 } 1399 1400 // GetCachedRegionWithRLock returns region with lock. 1401 func (c *RegionCache) GetCachedRegionWithRLock(regionID RegionVerID) (r *Region) { 1402 c.mu.RLock() 1403 r = c.mu.regions[regionID] 1404 c.mu.RUnlock() 1405 return 1406 } 1407 1408 func (c *RegionCache) getStoreAddr(bo *retry.Backoffer, region *Region, store *Store) (addr string, err error) { 1409 state := store.getResolveState() 1410 switch state { 1411 case resolved, needCheck: 1412 addr = store.addr 1413 return 1414 case unresolved: 1415 addr, err = store.initResolve(bo, c) 1416 return 1417 case deleted: 1418 addr = c.changeToActiveStore(region, store) 1419 return 1420 case tombstone: 1421 return "", nil 1422 default: 1423 panic("unsupported resolve state") 1424 } 1425 } 1426 1427 func (c *RegionCache) getProxyStore(region *Region, store *Store, rs *regionStore, workStoreIdx AccessIndex) (proxyStore *Store, proxyAccessIdx AccessIndex, proxyStoreIdx int) { 1428 if !c.enableForwarding || store.storeType != tikvrpc.TiKV || atomic.LoadInt32(&store.unreachable) == 0 { 1429 return 1430 } 1431 1432 if rs.proxyTiKVIdx >= 0 { 1433 storeIdx, proxyStore := rs.accessStore(tiKVOnly, rs.proxyTiKVIdx) 1434 return proxyStore, rs.proxyTiKVIdx, storeIdx 1435 } 1436 1437 tikvNum := rs.accessStoreNum(tiKVOnly) 1438 if tikvNum <= 1 { 1439 return 1440 } 1441 1442 // Randomly select an non-leader peer 1443 first := rand.Intn(tikvNum - 1) 1444 if first >= int(workStoreIdx) { 1445 first = (first + 1) % tikvNum 1446 } 1447 1448 // If the current selected peer is not reachable, switch to the next one, until a reachable peer is found or all 1449 // peers are checked. 1450 for i := 0; i < tikvNum; i++ { 1451 index := (i + first) % tikvNum 1452 // Skip work store which is the actual store to be accessed 1453 if index == int(workStoreIdx) { 1454 continue 1455 } 1456 storeIdx, store := rs.accessStore(tiKVOnly, AccessIndex(index)) 1457 // Skip unreachable stores. 1458 if atomic.LoadInt32(&store.unreachable) != 0 { 1459 continue 1460 } 1461 1462 rs.setProxyStoreIdx(region, AccessIndex(index)) 1463 return store, AccessIndex(index), storeIdx 1464 } 1465 1466 return nil, 0, 0 1467 } 1468 1469 // changeToActiveStore replace the deleted store in the region by an up-to-date store in the stores map. 1470 // The order is guaranteed by reResolve() which adds the new store before marking old store deleted. 1471 func (c *RegionCache) changeToActiveStore(region *Region, store *Store) (addr string) { 1472 c.storeMu.RLock() 1473 store = c.storeMu.stores[store.storeID] 1474 c.storeMu.RUnlock() 1475 for { 1476 oldRegionStore := region.getStore() 1477 newRegionStore := oldRegionStore.clone() 1478 newRegionStore.stores = make([]*Store, 0, len(oldRegionStore.stores)) 1479 for _, s := range oldRegionStore.stores { 1480 if s.storeID == store.storeID { 1481 newRegionStore.stores = append(newRegionStore.stores, store) 1482 } else { 1483 newRegionStore.stores = append(newRegionStore.stores, s) 1484 } 1485 } 1486 if region.compareAndSwapStore(oldRegionStore, newRegionStore) { 1487 break 1488 } 1489 } 1490 addr = store.addr 1491 return 1492 } 1493 1494 func (c *RegionCache) getStoreByStoreID(storeID uint64) (store *Store) { 1495 var ok bool 1496 c.storeMu.Lock() 1497 store, ok = c.storeMu.stores[storeID] 1498 if ok { 1499 c.storeMu.Unlock() 1500 return 1501 } 1502 store = &Store{storeID: storeID} 1503 c.storeMu.stores[storeID] = store 1504 c.storeMu.Unlock() 1505 return 1506 } 1507 1508 func (c *RegionCache) getStoresByLabels(labels []*metapb.StoreLabel) []*Store { 1509 c.storeMu.RLock() 1510 defer c.storeMu.RUnlock() 1511 s := make([]*Store, 0) 1512 for _, store := range c.storeMu.stores { 1513 if store.IsLabelsMatch(labels) { 1514 s = append(s, store) 1515 } 1516 } 1517 return s 1518 } 1519 1520 // OnRegionEpochNotMatch removes the old region and inserts new regions into the cache. 1521 // It returns whether retries the request because it's possible the region epoch is ahead of TiKV's due to slow appling. 1522 func (c *RegionCache) OnRegionEpochNotMatch(bo *retry.Backoffer, ctx *RPCContext, currentRegions []*metapb.Region) (bool, error) { 1523 if len(currentRegions) == 0 { 1524 c.InvalidateCachedRegionWithReason(ctx.Region, EpochNotMatch) 1525 return false, nil 1526 } 1527 1528 // Find whether the region epoch in `ctx` is ahead of TiKV's. If so, backoff. 1529 for _, meta := range currentRegions { 1530 if meta.GetId() == ctx.Region.id && 1531 (meta.GetRegionEpoch().GetConfVer() < ctx.Region.confVer || 1532 meta.GetRegionEpoch().GetVersion() < ctx.Region.ver) { 1533 err := errors.Errorf("region epoch is ahead of tikv. rpc ctx: %+v, currentRegions: %+v", ctx, currentRegions) 1534 logutil.BgLogger().Info("region epoch is ahead of tikv", zap.Error(err)) 1535 return true, bo.Backoff(retry.BoRegionMiss, err) 1536 } 1537 } 1538 1539 needInvalidateOld := true 1540 newRegions := make([]*Region, 0, len(currentRegions)) 1541 // If the region epoch is not ahead of TiKV's, replace region meta in region cache. 1542 for _, meta := range currentRegions { 1543 if _, ok := c.pdClient.(*CodecPDClient); ok { 1544 var err error 1545 if meta, err = decodeRegionMetaKeyWithShallowCopy(meta); err != nil { 1546 return false, errors.Errorf("newRegion's range key is not encoded: %v, %v", meta, err) 1547 } 1548 } 1549 region := &Region{meta: meta} 1550 err := region.init(bo, c) 1551 if err != nil { 1552 return false, err 1553 } 1554 var initLeaderStoreID uint64 1555 if ctx.Store.storeType == tikvrpc.TiFlash { 1556 initLeaderStoreID = region.findElectableStoreID() 1557 } else { 1558 initLeaderStoreID = ctx.Store.storeID 1559 } 1560 c.switchWorkLeaderToPeer(region, region.getPeerOnStore(initLeaderStoreID)) 1561 newRegions = append(newRegions, region) 1562 if ctx.Region == region.VerID() { 1563 needInvalidateOld = false 1564 } 1565 } 1566 c.mu.Lock() 1567 for _, region := range newRegions { 1568 c.insertRegionToCache(region) 1569 } 1570 if needInvalidateOld { 1571 cachedRegion, ok := c.mu.regions[ctx.Region] 1572 if ok { 1573 cachedRegion.invalidate(EpochNotMatch) 1574 } 1575 } 1576 c.mu.Unlock() 1577 return false, nil 1578 } 1579 1580 // PDClient returns the pd.Client in RegionCache. 1581 func (c *RegionCache) PDClient() pd.Client { 1582 return c.pdClient 1583 } 1584 1585 // GetTiFlashStores returns the information of all tiflash nodes. 1586 func (c *RegionCache) GetTiFlashStores() []*Store { 1587 c.storeMu.RLock() 1588 defer c.storeMu.RUnlock() 1589 var stores []*Store 1590 for _, s := range c.storeMu.stores { 1591 if s.storeType == tikvrpc.TiFlash { 1592 stores = append(stores, s) 1593 } 1594 } 1595 return stores 1596 } 1597 1598 // btreeItem is BTree's Item that uses []byte to compare. 1599 type btreeItem struct { 1600 key []byte 1601 cachedRegion *Region 1602 } 1603 1604 func newBtreeItem(cr *Region) *btreeItem { 1605 return &btreeItem{ 1606 key: cr.StartKey(), 1607 cachedRegion: cr, 1608 } 1609 } 1610 1611 func newBtreeSearchItem(key []byte) *btreeItem { 1612 return &btreeItem{ 1613 key: key, 1614 } 1615 } 1616 1617 func (item *btreeItem) Less(other btree.Item) bool { 1618 return bytes.Compare(item.key, other.(*btreeItem).key) < 0 1619 } 1620 1621 // GetID returns id. 1622 func (r *Region) GetID() uint64 { 1623 return r.meta.GetId() 1624 } 1625 1626 // GetMeta returns region meta. 1627 func (r *Region) GetMeta() *metapb.Region { 1628 return proto.Clone(r.meta).(*metapb.Region) 1629 } 1630 1631 // GetLeaderPeerID returns leader peer ID. 1632 func (r *Region) GetLeaderPeerID() uint64 { 1633 store := r.getStore() 1634 if int(store.workTiKVIdx) >= store.accessStoreNum(tiKVOnly) { 1635 return 0 1636 } 1637 storeIdx, _ := store.accessStore(tiKVOnly, store.workTiKVIdx) 1638 return r.meta.Peers[storeIdx].Id 1639 } 1640 1641 // GetLeaderStoreID returns the store ID of the leader region. 1642 func (r *Region) GetLeaderStoreID() uint64 { 1643 store := r.getStore() 1644 if int(store.workTiKVIdx) >= store.accessStoreNum(tiKVOnly) { 1645 return 0 1646 } 1647 storeIdx, _ := store.accessStore(tiKVOnly, store.workTiKVIdx) 1648 return r.meta.Peers[storeIdx].StoreId 1649 } 1650 1651 func (r *Region) getKvStorePeer(rs *regionStore, aidx AccessIndex) (store *Store, peer *metapb.Peer, accessIdx AccessIndex, storeIdx int) { 1652 storeIdx, store = rs.accessStore(tiKVOnly, aidx) 1653 peer = r.meta.Peers[storeIdx] 1654 accessIdx = aidx 1655 return 1656 } 1657 1658 // WorkStorePeer returns current work store with work peer. 1659 func (r *Region) WorkStorePeer(rs *regionStore) (store *Store, peer *metapb.Peer, accessIdx AccessIndex, storeIdx int) { 1660 return r.getKvStorePeer(rs, rs.workTiKVIdx) 1661 } 1662 1663 // FollowerStorePeer returns a follower store with follower peer. 1664 func (r *Region) FollowerStorePeer(rs *regionStore, followerStoreSeed uint32, op *storeSelectorOp) (store *Store, peer *metapb.Peer, accessIdx AccessIndex, storeIdx int) { 1665 return r.getKvStorePeer(rs, rs.follower(followerStoreSeed, op)) 1666 } 1667 1668 // AnyStorePeer returns a leader or follower store with the associated peer. 1669 func (r *Region) AnyStorePeer(rs *regionStore, followerStoreSeed uint32, op *storeSelectorOp) (store *Store, peer *metapb.Peer, accessIdx AccessIndex, storeIdx int) { 1670 return r.getKvStorePeer(rs, rs.kvPeer(followerStoreSeed, op)) 1671 } 1672 1673 // RegionVerID is a unique ID that can identify a Region at a specific version. 1674 type RegionVerID struct { 1675 id uint64 1676 confVer uint64 1677 ver uint64 1678 } 1679 1680 // NewRegionVerID creates a region ver id, which used for invalidating regions. 1681 func NewRegionVerID(id, confVer, ver uint64) RegionVerID { 1682 return RegionVerID{id, confVer, ver} 1683 } 1684 1685 // GetID returns the id of the region 1686 func (r *RegionVerID) GetID() uint64 { 1687 return r.id 1688 } 1689 1690 // GetVer returns the version of the region's epoch 1691 func (r *RegionVerID) GetVer() uint64 { 1692 return r.ver 1693 } 1694 1695 // GetConfVer returns the conf ver of the region's epoch 1696 func (r *RegionVerID) GetConfVer() uint64 { 1697 return r.confVer 1698 } 1699 1700 // String formats the RegionVerID to string 1701 func (r *RegionVerID) String() string { 1702 return fmt.Sprintf("{ region id: %v, ver: %v, confVer: %v }", r.id, r.ver, r.confVer) 1703 } 1704 1705 // Equals checks whether the RegionVerID equals to another one 1706 func (r *RegionVerID) Equals(another RegionVerID) bool { 1707 return r.id == another.id && r.confVer == another.confVer && r.ver == another.ver 1708 } 1709 1710 // VerID returns the Region's RegionVerID. 1711 func (r *Region) VerID() RegionVerID { 1712 return RegionVerID{ 1713 id: r.meta.GetId(), 1714 confVer: r.meta.GetRegionEpoch().GetConfVer(), 1715 ver: r.meta.GetRegionEpoch().GetVersion(), 1716 } 1717 } 1718 1719 // StartKey returns StartKey. 1720 func (r *Region) StartKey() []byte { 1721 return r.meta.StartKey 1722 } 1723 1724 // EndKey returns EndKey. 1725 func (r *Region) EndKey() []byte { 1726 return r.meta.EndKey 1727 } 1728 1729 // switchWorkLeaderToPeer switches current store to the one on specific store. It returns 1730 // false if no peer matches the peer. 1731 func (c *RegionCache) switchWorkLeaderToPeer(r *Region, peer *metapb.Peer) (found bool) { 1732 globalStoreIdx, found := c.getPeerStoreIndex(r, peer) 1733 if !found { 1734 return 1735 } 1736 retry: 1737 // switch to new leader. 1738 oldRegionStore := r.getStore() 1739 var leaderIdx AccessIndex 1740 for i, gIdx := range oldRegionStore.accessIndex[tiKVOnly] { 1741 if gIdx == globalStoreIdx { 1742 leaderIdx = AccessIndex(i) 1743 } 1744 } 1745 if oldRegionStore.workTiKVIdx == leaderIdx { 1746 return 1747 } 1748 newRegionStore := oldRegionStore.clone() 1749 newRegionStore.workTiKVIdx = leaderIdx 1750 if !r.compareAndSwapStore(oldRegionStore, newRegionStore) { 1751 goto retry 1752 } 1753 return 1754 } 1755 1756 func (r *regionStore) switchNextFlashPeer(rr *Region, currentPeerIdx AccessIndex) { 1757 nextIdx := (currentPeerIdx + 1) % AccessIndex(r.accessStoreNum(tiFlashOnly)) 1758 newRegionStore := r.clone() 1759 newRegionStore.workTiFlashIdx = int32(nextIdx) 1760 rr.compareAndSwapStore(r, newRegionStore) 1761 } 1762 1763 func (r *regionStore) switchNextTiKVPeer(rr *Region, currentPeerIdx AccessIndex) { 1764 if r.workTiKVIdx != currentPeerIdx { 1765 return 1766 } 1767 nextIdx := (currentPeerIdx + 1) % AccessIndex(r.accessStoreNum(tiKVOnly)) 1768 newRegionStore := r.clone() 1769 newRegionStore.workTiKVIdx = nextIdx 1770 rr.compareAndSwapStore(r, newRegionStore) 1771 } 1772 1773 func (r *regionStore) setProxyStoreIdx(rr *Region, idx AccessIndex) { 1774 if r.proxyTiKVIdx == idx { 1775 return 1776 } 1777 1778 newRegionStore := r.clone() 1779 newRegionStore.proxyTiKVIdx = idx 1780 success := rr.compareAndSwapStore(r, newRegionStore) 1781 logutil.BgLogger().Debug("try set proxy store index", 1782 zap.Uint64("region", rr.GetID()), 1783 zap.Int("index", int(idx)), 1784 zap.Bool("success", success)) 1785 } 1786 1787 func (r *regionStore) unsetProxyStoreIfNeeded(rr *Region) { 1788 r.setProxyStoreIdx(rr, -1) 1789 } 1790 1791 func (r *Region) findElectableStoreID() uint64 { 1792 if len(r.meta.Peers) == 0 { 1793 return 0 1794 } 1795 for _, p := range r.meta.Peers { 1796 if p.Role != metapb.PeerRole_Learner { 1797 return p.StoreId 1798 } 1799 } 1800 return 0 1801 } 1802 1803 func (r *Region) getPeerOnStore(storeID uint64) *metapb.Peer { 1804 for _, p := range r.meta.Peers { 1805 if p.StoreId == storeID { 1806 return p 1807 } 1808 } 1809 return nil 1810 } 1811 1812 func (c *RegionCache) getPeerStoreIndex(r *Region, peer *metapb.Peer) (idx int, found bool) { 1813 if len(r.meta.Peers) == 0 || peer == nil { 1814 return 1815 } 1816 for i, p := range r.meta.Peers { 1817 if isSamePeer(p, peer) { 1818 idx = i 1819 found = true 1820 return 1821 } 1822 } 1823 return 1824 } 1825 1826 // Contains checks whether the key is in the region, for the maximum region endKey is empty. 1827 // startKey <= key < endKey. 1828 func (r *Region) Contains(key []byte) bool { 1829 return bytes.Compare(r.meta.GetStartKey(), key) <= 0 && 1830 (bytes.Compare(key, r.meta.GetEndKey()) < 0 || len(r.meta.GetEndKey()) == 0) 1831 } 1832 1833 // ContainsByEnd check the region contains the greatest key that is less than key. 1834 // for the maximum region endKey is empty. 1835 // startKey < key <= endKey. 1836 func (r *Region) ContainsByEnd(key []byte) bool { 1837 return bytes.Compare(r.meta.GetStartKey(), key) < 0 && 1838 (bytes.Compare(key, r.meta.GetEndKey()) <= 0 || len(r.meta.GetEndKey()) == 0) 1839 } 1840 1841 // Store contains a kv process's address. 1842 type Store struct { 1843 addr string // loaded store address 1844 saddr string // loaded store status address 1845 storeID uint64 // store's id 1846 state uint64 // unsafe store storeState 1847 labels []*metapb.StoreLabel // stored store labels 1848 resolveMutex sync.Mutex // protect pd from concurrent init requests 1849 epoch uint32 // store fail epoch, see RegionStore.storeEpochs 1850 storeType tikvrpc.EndpointType // type of the store 1851 tokenCount atomic2.Int64 // used store token count 1852 1853 // whether the store is unreachable due to some reason, therefore requests to the store needs to be 1854 // forwarded by other stores. this is also the flag that a checkUntilHealth goroutine is running for this store. 1855 // this mechanism is currently only applicable for TiKV stores. 1856 unreachable int32 1857 unreachableSince time.Time 1858 } 1859 1860 type resolveState uint64 1861 1862 const ( 1863 // The store is just created and normally is being resolved. 1864 // Store in this state will only be resolved by initResolve(). 1865 unresolved resolveState = iota 1866 // The store is resolved and its address is valid. 1867 resolved 1868 // Request failed on this store and it will be re-resolved by asyncCheckAndResolveLoop(). 1869 needCheck 1870 // The store's address or label is changed and marked deleted. 1871 // There is a new store struct replaced it in the RegionCache and should 1872 // call changeToActiveStore() to get the new struct. 1873 deleted 1874 // The store is a tombstone. Should invalidate the region if tries to access it. 1875 tombstone 1876 ) 1877 1878 // IsTiFlash returns true if the storeType is TiFlash 1879 func (s *Store) IsTiFlash() bool { 1880 return s.storeType == tikvrpc.TiFlash 1881 } 1882 1883 // StoreID returns storeID. 1884 func (s *Store) StoreID() uint64 { 1885 return s.storeID 1886 } 1887 1888 // initResolve resolves the address of the store that never resolved and returns an 1889 // empty string if it's a tombstone. 1890 func (s *Store) initResolve(bo *retry.Backoffer, c *RegionCache) (addr string, err error) { 1891 s.resolveMutex.Lock() 1892 state := s.getResolveState() 1893 defer s.resolveMutex.Unlock() 1894 if state != unresolved { 1895 if state != tombstone { 1896 addr = s.addr 1897 } 1898 return 1899 } 1900 var store *metapb.Store 1901 for { 1902 store, err = c.pdClient.GetStore(bo.GetCtx(), s.storeID) 1903 if err != nil { 1904 metrics.RegionCacheCounterWithGetStoreError.Inc() 1905 } else { 1906 metrics.RegionCacheCounterWithGetStoreOK.Inc() 1907 } 1908 if bo.GetCtx().Err() != nil && errors.Cause(bo.GetCtx().Err()) == context.Canceled { 1909 return 1910 } 1911 if err != nil && !isStoreNotFoundError(err) { 1912 // TODO: more refine PD error status handle. 1913 err = errors.Errorf("loadStore from PD failed, id: %d, err: %v", s.storeID, err) 1914 if err = bo.Backoff(retry.BoPDRPC, err); err != nil { 1915 return 1916 } 1917 continue 1918 } 1919 // The store is a tombstone. 1920 if store == nil { 1921 s.setResolveState(tombstone) 1922 return "", nil 1923 } 1924 addr = store.GetAddress() 1925 if addr == "" { 1926 return "", errors.Errorf("empty store(%d) address", s.storeID) 1927 } 1928 s.addr = addr 1929 s.saddr = store.GetStatusAddress() 1930 s.storeType = tikvrpc.GetStoreTypeByMeta(store) 1931 s.labels = store.GetLabels() 1932 // Shouldn't have other one changing its state concurrently, but we still use changeResolveStateTo for safety. 1933 s.changeResolveStateTo(unresolved, resolved) 1934 return s.addr, nil 1935 } 1936 } 1937 1938 // A quick and dirty solution to find out whether an err is caused by StoreNotFound. 1939 // todo: A better solution, maybe some err-code based error handling? 1940 func isStoreNotFoundError(err error) bool { 1941 return strings.Contains(err.Error(), "invalid store ID") && strings.Contains(err.Error(), "not found") 1942 } 1943 1944 // reResolve try to resolve addr for store that need check. Returns false if the region is in tombstone state or is 1945 // deleted. 1946 func (s *Store) reResolve(c *RegionCache) (bool, error) { 1947 var addr string 1948 store, err := c.pdClient.GetStore(context.Background(), s.storeID) 1949 if err != nil { 1950 metrics.RegionCacheCounterWithGetStoreError.Inc() 1951 } else { 1952 metrics.RegionCacheCounterWithGetStoreOK.Inc() 1953 } 1954 // `err` here can mean either "load Store from PD failed" or "store not found" 1955 // If load Store from PD is successful but PD didn't find the store 1956 // the err should be handled by next `if` instead of here 1957 if err != nil && !isStoreNotFoundError(err) { 1958 logutil.BgLogger().Error("loadStore from PD failed", zap.Uint64("id", s.storeID), zap.Error(err)) 1959 // we cannot do backoff in reResolve loop but try check other store and wait tick. 1960 return false, err 1961 } 1962 if store == nil { 1963 // store has be removed in PD, we should invalidate all regions using those store. 1964 logutil.BgLogger().Info("invalidate regions in removed store", 1965 zap.Uint64("store", s.storeID), zap.String("add", s.addr)) 1966 atomic.AddUint32(&s.epoch, 1) 1967 s.setResolveState(tombstone) 1968 metrics.RegionCacheCounterWithInvalidateStoreRegionsOK.Inc() 1969 return false, nil 1970 } 1971 1972 storeType := tikvrpc.GetStoreTypeByMeta(store) 1973 addr = store.GetAddress() 1974 if s.addr != addr || !s.IsSameLabels(store.GetLabels()) { 1975 newStore := &Store{storeID: s.storeID, addr: addr, saddr: store.GetStatusAddress(), storeType: storeType, labels: store.GetLabels(), state: uint64(resolved)} 1976 c.storeMu.Lock() 1977 c.storeMu.stores[newStore.storeID] = newStore 1978 c.storeMu.Unlock() 1979 s.setResolveState(deleted) 1980 return false, nil 1981 } 1982 s.changeResolveStateTo(needCheck, resolved) 1983 return true, nil 1984 } 1985 1986 func (s *Store) getResolveState() resolveState { 1987 var state resolveState 1988 if s == nil { 1989 return state 1990 } 1991 return resolveState(atomic.LoadUint64(&s.state)) 1992 } 1993 1994 func (s *Store) setResolveState(state resolveState) { 1995 atomic.StoreUint64(&s.state, uint64(state)) 1996 } 1997 1998 // changeResolveStateTo changes the store resolveState from the old state to the new state. 1999 // Returns true if it changes the state successfully, and false if the store's state 2000 // is changed by another one. 2001 func (s *Store) changeResolveStateTo(from, to resolveState) bool { 2002 for { 2003 state := s.getResolveState() 2004 if state == to { 2005 return true 2006 } 2007 if state != from { 2008 return false 2009 } 2010 if atomic.CompareAndSwapUint64(&s.state, uint64(from), uint64(to)) { 2011 return true 2012 } 2013 } 2014 } 2015 2016 // markNeedCheck marks resolved store to be async resolve to check store addr change. 2017 func (s *Store) markNeedCheck(notifyCheckCh chan struct{}) { 2018 if s.changeResolveStateTo(resolved, needCheck) { 2019 select { 2020 case notifyCheckCh <- struct{}{}: 2021 default: 2022 } 2023 } 2024 } 2025 2026 // IsSameLabels returns whether the store have the same labels with target labels 2027 func (s *Store) IsSameLabels(labels []*metapb.StoreLabel) bool { 2028 if len(s.labels) != len(labels) { 2029 return false 2030 } 2031 return s.IsLabelsMatch(labels) 2032 } 2033 2034 // IsLabelsMatch return whether the store's labels match the target labels 2035 func (s *Store) IsLabelsMatch(labels []*metapb.StoreLabel) bool { 2036 if len(labels) < 1 { 2037 return true 2038 } 2039 for _, targetLabel := range labels { 2040 match := false 2041 for _, label := range s.labels { 2042 if targetLabel.Key == label.Key && targetLabel.Value == label.Value { 2043 match = true 2044 break 2045 } 2046 } 2047 if !match { 2048 return false 2049 } 2050 } 2051 return true 2052 } 2053 2054 type livenessState uint32 2055 2056 var ( 2057 livenessSf singleflight.Group 2058 // storeLivenessTimeout is the max duration of resolving liveness of a TiKV instance. 2059 storeLivenessTimeout time.Duration 2060 ) 2061 2062 // SetStoreLivenessTimeout sets storeLivenessTimeout to t. 2063 func SetStoreLivenessTimeout(t time.Duration) { 2064 storeLivenessTimeout = t 2065 } 2066 2067 // GetStoreLivenessTimeout returns storeLivenessTimeout. 2068 func GetStoreLivenessTimeout() time.Duration { 2069 return storeLivenessTimeout 2070 } 2071 2072 const ( 2073 unknown livenessState = iota 2074 reachable 2075 unreachable 2076 ) 2077 2078 func (s *Store) startHealthCheckLoopIfNeeded(c *RegionCache) { 2079 // This mechanism doesn't support non-TiKV stores currently. 2080 if s.storeType != tikvrpc.TiKV { 2081 logutil.BgLogger().Info("[health check] skip running health check loop for non-tikv store", 2082 zap.Uint64("storeID", s.storeID), zap.String("addr", s.addr)) 2083 return 2084 } 2085 2086 // It may be already started by another thread. 2087 if atomic.CompareAndSwapInt32(&s.unreachable, 0, 1) { 2088 s.unreachableSince = time.Now() 2089 go s.checkUntilHealth(c) 2090 } 2091 } 2092 2093 func (s *Store) checkUntilHealth(c *RegionCache) { 2094 defer atomic.CompareAndSwapInt32(&s.unreachable, 1, 0) 2095 2096 ticker := time.NewTicker(time.Second) 2097 lastCheckPDTime := time.Now() 2098 2099 // TODO(MyonKeminta): Set a more proper ctx here so that it can be interrupted immediately when the RegionCache is 2100 // shutdown. 2101 ctx := context.Background() 2102 for { 2103 select { 2104 case <-c.closeCh: 2105 return 2106 case <-ticker.C: 2107 if time.Since(lastCheckPDTime) > time.Second*30 { 2108 lastCheckPDTime = time.Now() 2109 2110 valid, err := s.reResolve(c) 2111 if err != nil { 2112 logutil.BgLogger().Warn("[health check] failed to re-resolve unhealthy store", zap.Error(err)) 2113 } else if !valid { 2114 logutil.BgLogger().Info("[health check] store meta deleted, stop checking", zap.Uint64("storeID", s.storeID), zap.String("addr", s.addr)) 2115 return 2116 } 2117 } 2118 2119 bo := retry.NewNoopBackoff(ctx) 2120 l := s.requestLiveness(bo, c) 2121 if l == reachable { 2122 logutil.BgLogger().Info("[health check] store became reachable", zap.Uint64("storeID", s.storeID)) 2123 2124 return 2125 } 2126 } 2127 } 2128 } 2129 2130 func (s *Store) requestLiveness(bo *retry.Backoffer, c *RegionCache) (l livenessState) { 2131 if c != nil && c.testingKnobs.mockRequestLiveness != nil { 2132 return c.testingKnobs.mockRequestLiveness(s, bo) 2133 } 2134 2135 if storeLivenessTimeout == 0 { 2136 return unreachable 2137 } 2138 2139 if s.getResolveState() != resolved { 2140 l = unknown 2141 return 2142 } 2143 addr := s.addr 2144 rsCh := livenessSf.DoChan(addr, func() (interface{}, error) { 2145 return invokeKVStatusAPI(addr, storeLivenessTimeout), nil 2146 }) 2147 var ctx context.Context 2148 if bo != nil { 2149 ctx = bo.GetCtx() 2150 } else { 2151 ctx = context.Background() 2152 } 2153 select { 2154 case rs := <-rsCh: 2155 l = rs.Val.(livenessState) 2156 case <-ctx.Done(): 2157 l = unknown 2158 return 2159 } 2160 return 2161 } 2162 2163 // GetAddr returns the address of the store 2164 func (s *Store) GetAddr() string { 2165 return s.addr 2166 } 2167 2168 func invokeKVStatusAPI(addr string, timeout time.Duration) (l livenessState) { 2169 start := time.Now() 2170 defer func() { 2171 if l == reachable { 2172 metrics.StatusCountWithOK.Inc() 2173 } else { 2174 metrics.StatusCountWithError.Inc() 2175 } 2176 metrics.TiKVStatusDuration.WithLabelValues(addr).Observe(time.Since(start).Seconds()) 2177 }() 2178 ctx, cancel := context.WithTimeout(context.Background(), timeout) 2179 defer cancel() 2180 2181 conn, cli, err := createKVHealthClient(ctx, addr) 2182 if err != nil { 2183 logutil.BgLogger().Info("[health check] create grpc connection failed", zap.String("store", addr), zap.Error(err)) 2184 l = unreachable 2185 return 2186 } 2187 defer func() { 2188 err := conn.Close() 2189 if err != nil { 2190 logutil.BgLogger().Info("[health check] failed to close the grpc connection for health check", zap.String("store", addr), zap.Error(err)) 2191 } 2192 }() 2193 2194 req := &healthpb.HealthCheckRequest{} 2195 resp, err := cli.Check(ctx, req) 2196 if err != nil { 2197 logutil.BgLogger().Info("[health check] check health error", zap.String("store", addr), zap.Error(err)) 2198 l = unreachable 2199 return 2200 } 2201 2202 status := resp.GetStatus() 2203 if status == healthpb.HealthCheckResponse_UNKNOWN { 2204 logutil.BgLogger().Info("[health check] check health returns unknown", zap.String("store", addr)) 2205 l = unknown 2206 return 2207 } 2208 2209 if status != healthpb.HealthCheckResponse_SERVING { 2210 logutil.BgLogger().Info("[health check] service not serving", zap.Stringer("status", status)) 2211 l = unreachable 2212 return 2213 } 2214 2215 l = reachable 2216 return 2217 } 2218 2219 func createKVHealthClient(ctx context.Context, addr string) (*grpc.ClientConn, healthpb.HealthClient, error) { 2220 // Temporarily directly load the config from the global config, however it's not a good idea to let RegionCache to 2221 // access it. 2222 // TODO: Pass the config in a better way, or use the connArray inner the client directly rather than creating new 2223 // connection. 2224 2225 cfg := config.GetGlobalConfig() 2226 2227 opt := grpc.WithInsecure() 2228 if len(cfg.Security.ClusterSSLCA) != 0 { 2229 tlsConfig, err := cfg.Security.ToTLSConfig() 2230 if err != nil { 2231 return nil, nil, errors.Trace(err) 2232 } 2233 opt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)) 2234 } 2235 keepAlive := cfg.TiKVClient.GrpcKeepAliveTime 2236 keepAliveTimeout := cfg.TiKVClient.GrpcKeepAliveTimeout 2237 conn, err := grpc.DialContext( 2238 ctx, 2239 addr, 2240 opt, 2241 grpc.WithInitialWindowSize(client.GrpcInitialWindowSize), 2242 grpc.WithInitialConnWindowSize(client.GrpcInitialConnWindowSize), 2243 grpc.WithConnectParams(grpc.ConnectParams{ 2244 Backoff: backoff.Config{ 2245 BaseDelay: 100 * time.Millisecond, // Default was 1s. 2246 Multiplier: 1.6, // Default 2247 Jitter: 0.2, // Default 2248 MaxDelay: 3 * time.Second, // Default was 120s. 2249 }, 2250 MinConnectTimeout: 5 * time.Second, 2251 }), 2252 grpc.WithKeepaliveParams(keepalive.ClientParameters{ 2253 Time: time.Duration(keepAlive) * time.Second, 2254 Timeout: time.Duration(keepAliveTimeout) * time.Second, 2255 PermitWithoutStream: true, 2256 }), 2257 ) 2258 if err != nil { 2259 return nil, nil, errors.Trace(err) 2260 } 2261 cli := healthpb.NewHealthClient(conn) 2262 return conn, cli, nil 2263 } 2264 2265 func isSamePeer(lhs *metapb.Peer, rhs *metapb.Peer) bool { 2266 return lhs == rhs || (lhs.GetId() == rhs.GetId() && lhs.GetStoreId() == rhs.GetStoreId()) 2267 }