github.com/KinWaiYuen/client-go/v2@v2.5.4/internal/locate/region_request3_test.go (about) 1 // Copyright 2021 TiKV Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // NOTE: The code in this file is based on code from the 16 // TiDB project, licensed under the Apache License v 2.0 17 // 18 // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/locate/region_request_test.go 19 // 20 21 // Copyright 2017 PingCAP, Inc. 22 // 23 // Licensed under the Apache License, Version 2.0 (the "License"); 24 // you may not use this file except in compliance with the License. 25 // You may obtain a copy of the License at 26 // 27 // http://www.apache.org/licenses/LICENSE-2.0 28 // 29 // Unless required by applicable law or agreed to in writing, software 30 // distributed under the License is distributed on an "AS IS" BASIS, 31 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 // See the License for the specific language governing permissions and 33 // limitations under the License. 34 35 package locate 36 37 import ( 38 "context" 39 "sync/atomic" 40 "testing" 41 "time" 42 "unsafe" 43 44 tikverr "github.com/KinWaiYuen/client-go/v2/error" 45 "github.com/KinWaiYuen/client-go/v2/internal/mockstore/mocktikv" 46 "github.com/KinWaiYuen/client-go/v2/internal/retry" 47 "github.com/KinWaiYuen/client-go/v2/kv" 48 "github.com/KinWaiYuen/client-go/v2/oracle" 49 "github.com/KinWaiYuen/client-go/v2/tikvrpc" 50 "github.com/pingcap/errors" 51 "github.com/pingcap/kvproto/pkg/errorpb" 52 "github.com/pingcap/kvproto/pkg/kvrpcpb" 53 "github.com/pingcap/kvproto/pkg/metapb" 54 "github.com/stretchr/testify/suite" 55 ) 56 57 func TestRegionRequestToThreeStores(t *testing.T) { 58 suite.Run(t, new(testRegionRequestToThreeStoresSuite)) 59 } 60 61 type testRegionRequestToThreeStoresSuite struct { 62 suite.Suite 63 cluster *mocktikv.Cluster 64 storeIDs []uint64 65 peerIDs []uint64 66 regionID uint64 67 leaderPeer uint64 68 cache *RegionCache 69 bo *retry.Backoffer 70 regionRequestSender *RegionRequestSender 71 mvccStore mocktikv.MVCCStore 72 } 73 74 func (s *testRegionRequestToThreeStoresSuite) SetupTest() { 75 s.mvccStore = mocktikv.MustNewMVCCStore() 76 s.cluster = mocktikv.NewCluster(s.mvccStore) 77 s.storeIDs, s.peerIDs, s.regionID, s.leaderPeer = mocktikv.BootstrapWithMultiStores(s.cluster, 3) 78 pdCli := &CodecPDClient{mocktikv.NewPDClient(s.cluster)} 79 s.cache = NewRegionCache(pdCli) 80 s.bo = retry.NewNoopBackoff(context.Background()) 81 client := mocktikv.NewRPCClient(s.cluster, s.mvccStore, nil) 82 s.regionRequestSender = NewRegionRequestSender(s.cache, client) 83 } 84 85 func (s *testRegionRequestToThreeStoresSuite) TearDownTest() { 86 s.cache.Close() 87 s.mvccStore.Close() 88 } 89 90 func (s *testRegionRequestToThreeStoresSuite) TestStoreTokenLimit() { 91 req := tikvrpc.NewRequest(tikvrpc.CmdPrewrite, &kvrpcpb.PrewriteRequest{}, kvrpcpb.Context{}) 92 region, err := s.cache.LocateRegionByID(s.bo, s.regionID) 93 s.Nil(err) 94 s.NotNil(region) 95 oldStoreLimit := kv.StoreLimit.Load() 96 kv.StoreLimit.Store(500) 97 s.cache.getStoreByStoreID(s.storeIDs[0]).tokenCount.Store(500) 98 // cause there is only one region in this cluster, regionID maps this leader. 99 resp, err := s.regionRequestSender.SendReq(s.bo, req, region.Region, time.Second) 100 s.NotNil(err) 101 s.Nil(resp) 102 e, ok := errors.Cause(err).(*tikverr.ErrTokenLimit) 103 s.True(ok) 104 s.Equal(e.StoreID, uint64(1)) 105 kv.StoreLimit.Store(oldStoreLimit) 106 } 107 108 func (s *testRegionRequestToThreeStoresSuite) TestSwitchPeerWhenNoLeader() { 109 var leaderAddr string 110 s.regionRequestSender.client = &fnClient{func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { 111 if leaderAddr == "" { 112 leaderAddr = addr 113 } 114 // Returns OK when switches to a different peer. 115 if leaderAddr != addr { 116 return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{}}, nil 117 } 118 return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{ 119 RegionError: &errorpb.Error{NotLeader: &errorpb.NotLeader{}}, 120 }}, nil 121 }} 122 123 req := tikvrpc.NewRequest(tikvrpc.CmdRawPut, &kvrpcpb.RawPutRequest{ 124 Key: []byte("key"), 125 Value: []byte("value"), 126 }) 127 128 bo := retry.NewBackofferWithVars(context.Background(), 5, nil) 129 loc, err := s.cache.LocateKey(s.bo, []byte("key")) 130 s.Nil(err) 131 resp, err := s.regionRequestSender.SendReq(bo, req, loc.Region, time.Second) 132 s.Nil(err) 133 s.NotNil(resp) 134 } 135 136 func (s *testRegionRequestToThreeStoresSuite) loadAndGetLeaderStore() (*Store, string) { 137 region, err := s.regionRequestSender.regionCache.findRegionByKey(s.bo, []byte("a"), false) 138 s.Nil(err) 139 leaderStore, leaderPeer, _, _ := region.WorkStorePeer(region.getStore()) 140 s.Equal(leaderPeer.Id, s.leaderPeer) 141 leaderAddr, err := s.regionRequestSender.regionCache.getStoreAddr(s.bo, region, leaderStore) 142 s.Nil(err) 143 return leaderStore, leaderAddr 144 } 145 146 func (s *testRegionRequestToThreeStoresSuite) TestForwarding() { 147 s.regionRequestSender.regionCache.enableForwarding = true 148 149 // First get the leader's addr from region cache 150 leaderStore, leaderAddr := s.loadAndGetLeaderStore() 151 152 bo := retry.NewBackoffer(context.Background(), 10000) 153 154 // Simulate that the leader is network-partitioned but can be accessed by forwarding via a follower 155 innerClient := s.regionRequestSender.client 156 s.regionRequestSender.client = &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (*tikvrpc.Response, error) { 157 if addr == leaderAddr { 158 return nil, errors.New("simulated rpc error") 159 } 160 // MockTiKV doesn't support forwarding. Simulate forwarding here. 161 if len(req.ForwardedHost) != 0 { 162 addr = req.ForwardedHost 163 } 164 return innerClient.SendRequest(ctx, addr, req, timeout) 165 }} 166 var storeState uint32 = uint32(unreachable) 167 s.regionRequestSender.regionCache.testingKnobs.mockRequestLiveness = func(s *Store, bo *retry.Backoffer) livenessState { 168 return livenessState(atomic.LoadUint32(&storeState)) 169 } 170 171 loc, err := s.regionRequestSender.regionCache.LocateKey(bo, []byte("k")) 172 s.Nil(err) 173 s.Equal(loc.Region.GetID(), s.regionID) 174 req := tikvrpc.NewRequest(tikvrpc.CmdRawPut, &kvrpcpb.RawPutRequest{ 175 Key: []byte("k"), 176 Value: []byte("v1"), 177 }) 178 resp, ctx, err := s.regionRequestSender.SendReqCtx(bo, req, loc.Region, time.Second, tikvrpc.TiKV) 179 s.Nil(err) 180 regionErr, err := resp.GetRegionError() 181 s.Nil(err) 182 s.Nil(regionErr) 183 s.Equal(resp.Resp.(*kvrpcpb.RawPutResponse).Error, "") 184 s.Equal(ctx.Addr, leaderAddr) 185 s.NotNil(ctx.ProxyStore) 186 s.NotEqual(ctx.ProxyAddr, leaderAddr) 187 s.Nil(err) 188 189 // Simulate recovering to normal 190 s.regionRequestSender.client = innerClient 191 atomic.StoreUint32(&storeState, uint32(reachable)) 192 start := time.Now() 193 for { 194 if atomic.LoadInt32(&leaderStore.unreachable) == 0 { 195 break 196 } 197 if time.Since(start) > 3*time.Second { 198 s.FailNow("store didn't recover to normal in time") 199 } 200 time.Sleep(time.Millisecond * 200) 201 } 202 atomic.StoreUint32(&storeState, uint32(unreachable)) 203 204 req = tikvrpc.NewRequest(tikvrpc.CmdRawGet, &kvrpcpb.RawGetRequest{Key: []byte("k")}) 205 resp, ctx, err = s.regionRequestSender.SendReqCtx(bo, req, loc.Region, time.Second, tikvrpc.TiKV) 206 s.Nil(err) 207 regionErr, err = resp.GetRegionError() 208 s.Nil(err) 209 s.Nil(regionErr) 210 s.Equal(resp.Resp.(*kvrpcpb.RawGetResponse).Value, []byte("v1")) 211 s.Nil(ctx.ProxyStore) 212 213 // Simulate server down 214 s.regionRequestSender.client = &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (*tikvrpc.Response, error) { 215 if addr == leaderAddr || req.ForwardedHost == leaderAddr { 216 return nil, errors.New("simulated rpc error") 217 } 218 219 // MockTiKV doesn't support forwarding. Simulate forwarding here. 220 if len(req.ForwardedHost) != 0 { 221 addr = req.ForwardedHost 222 } 223 return innerClient.SendRequest(ctx, addr, req, timeout) 224 }} 225 // The leader is changed after a store is down. 226 newLeaderPeerID := s.peerIDs[0] 227 if newLeaderPeerID == s.leaderPeer { 228 newLeaderPeerID = s.peerIDs[1] 229 } 230 231 s.NotEqual(newLeaderPeerID, s.leaderPeer) 232 s.cluster.ChangeLeader(s.regionID, newLeaderPeerID) 233 234 req = tikvrpc.NewRequest(tikvrpc.CmdRawPut, &kvrpcpb.RawPutRequest{ 235 Key: []byte("k"), 236 Value: []byte("v2"), 237 }) 238 resp, ctx, err = s.regionRequestSender.SendReqCtx(bo, req, loc.Region, time.Second, tikvrpc.TiKV) 239 s.Nil(err) 240 regionErr, err = resp.GetRegionError() 241 s.Nil(err) 242 // After several retries, the region will be marked as needReload. 243 // Then SendReqCtx will throw a pseudo EpochNotMatch to tell the caller to reload the region. 244 s.NotNil(regionErr.EpochNotMatch) 245 s.Nil(ctx) 246 s.Equal(len(s.regionRequestSender.failStoreIDs), 0) 247 s.Equal(len(s.regionRequestSender.failProxyStoreIDs), 0) 248 region := s.regionRequestSender.regionCache.GetCachedRegionWithRLock(loc.Region) 249 s.NotNil(region) 250 s.False(region.isValid()) 251 252 loc, err = s.regionRequestSender.regionCache.LocateKey(bo, []byte("k")) 253 s.Nil(err) 254 req = tikvrpc.NewRequest(tikvrpc.CmdRawPut, &kvrpcpb.RawPutRequest{ 255 Key: []byte("k"), 256 Value: []byte("v2"), 257 }) 258 resp, ctx, err = s.regionRequestSender.SendReqCtx(bo, req, loc.Region, time.Second, tikvrpc.TiKV) 259 s.Nil(err) 260 regionErr, err = resp.GetRegionError() 261 s.Nil(err) 262 s.Nil(regionErr) 263 s.Equal(resp.Resp.(*kvrpcpb.RawPutResponse).Error, "") 264 // Leader changed 265 s.NotEqual(ctx.Store.storeID, leaderStore.storeID) 266 s.Nil(ctx.ProxyStore) 267 } 268 269 func refreshEpochs(regionStore *regionStore) { 270 for i, store := range regionStore.stores { 271 regionStore.storeEpochs[i] = atomic.LoadUint32(&store.epoch) 272 } 273 } 274 275 func (s *testRegionRequestToThreeStoresSuite) TestReplicaSelector() { 276 regionLoc, err := s.cache.LocateRegionByID(s.bo, s.regionID) 277 s.Nil(err) 278 s.NotNil(regionLoc) 279 region := s.cache.GetCachedRegionWithRLock(regionLoc.Region) 280 regionStore := region.getStore() 281 req := tikvrpc.NewRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{}, kvrpcpb.Context{}) 282 283 // Create a fake region and change its leader to the last peer. 284 regionStore = regionStore.clone() 285 regionStore.workTiKVIdx = AccessIndex(len(regionStore.stores) - 1) 286 sidx, _ := regionStore.accessStore(tiKVOnly, regionStore.workTiKVIdx) 287 regionStore.stores[sidx].epoch++ 288 regionStore.storeEpochs[sidx]++ 289 // Add a TiFlash peer to the region. 290 peer := &metapb.Peer{Id: s.cluster.AllocID(), StoreId: s.cluster.AllocID()} 291 regionStore.accessIndex[tiFlashOnly] = append(regionStore.accessIndex[tiFlashOnly], len(regionStore.stores)) 292 regionStore.stores = append(regionStore.stores, &Store{storeID: peer.StoreId, storeType: tikvrpc.TiFlash}) 293 regionStore.storeEpochs = append(regionStore.storeEpochs, 0) 294 295 region = &Region{ 296 meta: region.GetMeta(), 297 } 298 region.lastAccess = time.Now().Unix() 299 region.meta.Peers = append(region.meta.Peers, peer) 300 atomic.StorePointer(®ion.store, unsafe.Pointer(regionStore)) 301 302 cache := NewRegionCache(s.cache.pdClient) 303 defer cache.Close() 304 cache.insertRegionToCache(region) 305 306 // Verify creating the replicaSelector. 307 replicaSelector, err := newReplicaSelector(cache, regionLoc.Region, req) 308 s.NotNil(replicaSelector) 309 s.Nil(err) 310 s.Equal(replicaSelector.region, region) 311 // Should only contain TiKV stores. 312 s.Equal(len(replicaSelector.replicas), regionStore.accessStoreNum(tiKVOnly)) 313 s.Equal(len(replicaSelector.replicas), len(regionStore.stores)-1) 314 s.IsType(&accessKnownLeader{}, replicaSelector.state) 315 316 // Verify that the store matches the peer and epoch. 317 for _, replica := range replicaSelector.replicas { 318 s.Equal(replica.store.storeID, replica.peer.GetStoreId()) 319 s.Equal(replica.peer, region.getPeerOnStore(replica.store.storeID)) 320 s.True(replica.attempts == 0) 321 322 for i, store := range regionStore.stores { 323 if replica.store == store { 324 s.Equal(replica.epoch, regionStore.storeEpochs[i]) 325 } 326 } 327 } 328 329 assertRPCCtxEqual := func(rpcCtx *RPCContext, target *replica, proxy *replica) { 330 s.Equal(rpcCtx.Store, target.store) 331 s.Equal(rpcCtx.Peer, target.peer) 332 s.Equal(rpcCtx.Addr, target.store.addr) 333 s.Equal(rpcCtx.AccessMode, tiKVOnly) 334 if proxy != nil { 335 s.Equal(rpcCtx.ProxyStore, proxy.store) 336 s.Equal(rpcCtx.ProxyAddr, proxy.store.addr) 337 } 338 } 339 340 // Test accessKnownLeader state 341 s.IsType(&accessKnownLeader{}, replicaSelector.state) 342 // Try the leader for maxReplicaAttempt times 343 for i := 1; i <= maxReplicaAttempt; i++ { 344 rpcCtx, err := replicaSelector.next(s.bo) 345 s.Nil(err) 346 assertRPCCtxEqual(rpcCtx, replicaSelector.replicas[regionStore.workTiKVIdx], nil) 347 s.IsType(&accessKnownLeader{}, replicaSelector.state) 348 s.Equal(replicaSelector.replicas[regionStore.workTiKVIdx].attempts, i) 349 } 350 351 // After that it should switch to tryFollower 352 for i := 0; i < len(replicaSelector.replicas)-1; i++ { 353 rpcCtx, err := replicaSelector.next(s.bo) 354 s.Nil(err) 355 state, ok := replicaSelector.state.(*tryFollower) 356 s.True(ok) 357 s.Equal(regionStore.workTiKVIdx, state.leaderIdx) 358 s.NotEqual(state.lastIdx, regionStore.workTiKVIdx) 359 s.Equal(replicaSelector.targetIdx, state.lastIdx) 360 assertRPCCtxEqual(rpcCtx, replicaSelector.replicas[replicaSelector.targetIdx], nil) 361 s.Equal(replicaSelector.targetReplica().attempts, 1) 362 } 363 // In tryFollower state, if all replicas are tried, nil RPCContext should be returned 364 rpcCtx, err := replicaSelector.next(s.bo) 365 s.Nil(err) 366 s.Nil(rpcCtx) 367 // The region should be invalidated 368 s.False(replicaSelector.region.isValid()) 369 370 // Test switching to tryFollower if leader is unreachable 371 region.lastAccess = time.Now().Unix() 372 replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req) 373 s.Nil(err) 374 s.NotNil(replicaSelector) 375 cache.testingKnobs.mockRequestLiveness = func(s *Store, bo *retry.Backoffer) livenessState { 376 return unreachable 377 } 378 s.IsType(&accessKnownLeader{}, replicaSelector.state) 379 _, err = replicaSelector.next(s.bo) 380 s.Nil(err) 381 replicaSelector.onSendFailure(s.bo, nil) 382 rpcCtx, err = replicaSelector.next(s.bo) 383 s.NotNil(rpcCtx) 384 s.Nil(err) 385 s.IsType(&tryFollower{}, replicaSelector.state) 386 s.NotEqual(replicaSelector.targetIdx, regionStore.workTiKVIdx) 387 assertRPCCtxEqual(rpcCtx, replicaSelector.targetReplica(), nil) 388 s.Equal(replicaSelector.targetReplica().attempts, 1) 389 390 // Test switching to tryNewProxy if leader is unreachable and forwarding is enabled 391 refreshEpochs(regionStore) 392 cache.enableForwarding = true 393 replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req) 394 s.Nil(err) 395 s.NotNil(replicaSelector) 396 cache.testingKnobs.mockRequestLiveness = func(s *Store, bo *retry.Backoffer) livenessState { 397 return unreachable 398 } 399 s.IsType(&accessKnownLeader{}, replicaSelector.state) 400 _, err = replicaSelector.next(s.bo) 401 s.Nil(err) 402 replicaSelector.onSendFailure(s.bo, nil) 403 rpcCtx, err = replicaSelector.next(s.bo) 404 s.NotNil(rpcCtx) 405 s.Nil(err) 406 state, ok := replicaSelector.state.(*tryNewProxy) 407 s.True(ok) 408 s.Equal(regionStore.workTiKVIdx, state.leaderIdx) 409 s.Equal(AccessIndex(2), replicaSelector.targetIdx) 410 s.NotEqual(AccessIndex(2), replicaSelector.proxyIdx) 411 assertRPCCtxEqual(rpcCtx, replicaSelector.targetReplica(), replicaSelector.proxyReplica()) 412 s.Equal(replicaSelector.targetReplica().attempts, 2) 413 s.Equal(replicaSelector.proxyReplica().attempts, 1) 414 415 // When the current proxy node fails, it should try another one. 416 lastProxy := replicaSelector.proxyIdx 417 replicaSelector.onSendFailure(s.bo, nil) 418 rpcCtx, err = replicaSelector.next(s.bo) 419 s.NotNil(rpcCtx) 420 s.Nil(err) 421 state, ok = replicaSelector.state.(*tryNewProxy) 422 s.True(ok) 423 s.Equal(regionStore.workTiKVIdx, state.leaderIdx) 424 s.Equal(AccessIndex(2), replicaSelector.targetIdx) 425 s.NotEqual(lastProxy, replicaSelector.proxyIdx) 426 s.Equal(replicaSelector.targetReplica().attempts, 3) 427 s.Equal(replicaSelector.proxyReplica().attempts, 1) 428 429 // Test proxy store is saves when proxy is enabled 430 replicaSelector.onSendSuccess() 431 regionStore = region.getStore() 432 s.Equal(replicaSelector.proxyIdx, regionStore.proxyTiKVIdx) 433 434 // Test initial state is accessByKnownProxy when proxyTiKVIdx is valid 435 refreshEpochs(regionStore) 436 cache.enableForwarding = true 437 replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req) 438 s.Nil(err) 439 s.NotNil(replicaSelector) 440 state2, ok := replicaSelector.state.(*accessByKnownProxy) 441 s.True(ok) 442 s.Equal(regionStore.workTiKVIdx, state2.leaderIdx) 443 _, err = replicaSelector.next(s.bo) 444 s.Nil(err) 445 assertRPCCtxEqual(rpcCtx, replicaSelector.targetReplica(), replicaSelector.proxyReplica()) 446 447 // Switch to tryNewProxy if the current proxy is not available 448 replicaSelector.onSendFailure(s.bo, nil) 449 s.IsType(&tryNewProxy{}, replicaSelector.state) 450 rpcCtx, err = replicaSelector.next(s.bo) 451 s.Nil(err) 452 assertRPCCtxEqual(rpcCtx, replicaSelector.targetReplica(), replicaSelector.proxyReplica()) 453 s.Equal(regionStore.workTiKVIdx, state2.leaderIdx) 454 s.Equal(AccessIndex(2), replicaSelector.targetIdx) 455 s.NotEqual(regionStore.proxyTiKVIdx, replicaSelector.proxyIdx) 456 s.Equal(replicaSelector.targetReplica().attempts, 2) 457 s.Equal(replicaSelector.proxyReplica().attempts, 1) 458 459 // Test accessFollower state with kv.ReplicaReadFollower request type. 460 req = tikvrpc.NewReplicaReadRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{}, kv.ReplicaReadFollower, nil) 461 refreshEpochs(regionStore) 462 replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req) 463 s.Nil(err) 464 s.NotNil(replicaSelector) 465 state3, ok := replicaSelector.state.(*accessFollower) 466 s.True(ok) 467 s.False(state3.tryLeader) 468 s.Equal(regionStore.workTiKVIdx, state3.leaderIdx) 469 s.Equal(state3.lastIdx, AccessIndex(-1)) 470 471 lastIdx := AccessIndex(-1) 472 for i := 0; i < regionStore.accessStoreNum(tiKVOnly)-1; i++ { 473 rpcCtx, err := replicaSelector.next(s.bo) 474 s.Nil(err) 475 // Should swith to the next follower. 476 s.NotEqual(lastIdx, state3.lastIdx) 477 // Shouldn't access the leader if followers aren't exhausted. 478 s.NotEqual(regionStore.workTiKVIdx, state3.lastIdx) 479 s.Equal(replicaSelector.targetIdx, state3.lastIdx) 480 assertRPCCtxEqual(rpcCtx, replicaSelector.replicas[replicaSelector.targetIdx], nil) 481 lastIdx = state3.lastIdx 482 } 483 // Fallback to the leader for 1 time 484 rpcCtx, err = replicaSelector.next(s.bo) 485 s.Nil(err) 486 s.Equal(regionStore.workTiKVIdx, state3.lastIdx) 487 s.Equal(replicaSelector.targetIdx, state3.lastIdx) 488 assertRPCCtxEqual(rpcCtx, replicaSelector.replicas[regionStore.workTiKVIdx], nil) 489 // All replicas are exhausted. 490 rpcCtx, err = replicaSelector.next(s.bo) 491 s.Nil(rpcCtx) 492 s.Nil(err) 493 494 // Test accessFollower state filtering epoch-stale stores. 495 region.lastAccess = time.Now().Unix() 496 refreshEpochs(regionStore) 497 // Mark all followers as stale. 498 tiKVNum := regionStore.accessStoreNum(tiKVOnly) 499 for i := 1; i < tiKVNum; i++ { 500 regionStore.storeEpochs[(regionStore.workTiKVIdx+AccessIndex(i))%AccessIndex(tiKVNum)]++ 501 } 502 replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req) 503 s.NotNil(replicaSelector) 504 s.Nil(err) 505 state3 = replicaSelector.state.(*accessFollower) 506 // Should fallback to the leader immediately. 507 rpcCtx, err = replicaSelector.next(s.bo) 508 s.Nil(err) 509 s.Equal(regionStore.workTiKVIdx, state3.lastIdx) 510 s.Equal(replicaSelector.targetIdx, state3.lastIdx) 511 assertRPCCtxEqual(rpcCtx, replicaSelector.replicas[regionStore.workTiKVIdx], nil) 512 513 // Test accessFollower state filtering label-not-match stores. 514 region.lastAccess = time.Now().Unix() 515 refreshEpochs(regionStore) 516 labels := []*metapb.StoreLabel{ 517 { 518 Key: "a", 519 Value: "b", 520 }, 521 } 522 regionStore.workTiKVIdx = AccessIndex(0) 523 accessIdx := AccessIndex(regionStore.accessStoreNum(tiKVOnly) - 1) 524 _, store := regionStore.accessStore(tiKVOnly, accessIdx) 525 store.labels = labels 526 for i := 0; i < 5; i++ { 527 replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req, WithMatchLabels(labels)) 528 s.NotNil(replicaSelector) 529 s.Nil(err) 530 rpcCtx, err = replicaSelector.next(s.bo) 531 s.Nil(err) 532 assertRPCCtxEqual(rpcCtx, replicaSelector.replicas[accessIdx], nil) 533 } 534 535 // Test accessFollower state with leaderOnly option 536 region.lastAccess = time.Now().Unix() 537 refreshEpochs(regionStore) 538 for i := 0; i < 5; i++ { 539 replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req, WithLeaderOnly()) 540 s.NotNil(replicaSelector) 541 s.Nil(err) 542 rpcCtx, err = replicaSelector.next(s.bo) 543 s.Nil(err) 544 // Should always access the leader. 545 assertRPCCtxEqual(rpcCtx, replicaSelector.replicas[regionStore.workTiKVIdx], nil) 546 } 547 548 // Test accessFollower state with kv.ReplicaReadMixed request type. 549 region.lastAccess = time.Now().Unix() 550 refreshEpochs(regionStore) 551 req.ReplicaReadType = kv.ReplicaReadMixed 552 replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req) 553 s.NotNil(replicaSelector) 554 s.Nil(err) 555 556 // Invalidate the region if the leader is not in the region. 557 region.lastAccess = time.Now().Unix() 558 replicaSelector.updateLeader(&metapb.Peer{Id: s.cluster.AllocID(), StoreId: s.cluster.AllocID()}) 559 s.False(region.isValid()) 560 // Don't try next replica if the region is invalidated. 561 rpcCtx, err = replicaSelector.next(s.bo) 562 s.Nil(rpcCtx) 563 s.Nil(err) 564 } 565 566 // TODO(youjiali1995): Remove duplicated tests. This test may be duplicated with other 567 // tests but it's a dedicated one to test sending requests with the replica selector. 568 func (s *testRegionRequestToThreeStoresSuite) TestSendReqWithReplicaSelector() { 569 req := tikvrpc.NewRequest(tikvrpc.CmdRawPut, &kvrpcpb.RawPutRequest{ 570 Key: []byte("key"), 571 Value: []byte("value"), 572 }) 573 region, err := s.cache.LocateRegionByID(s.bo, s.regionID) 574 s.Nil(err) 575 s.NotNil(region) 576 577 reloadRegion := func() { 578 s.regionRequestSender.replicaSelector.region.invalidate(Other) 579 region, _ = s.cache.LocateRegionByID(s.bo, s.regionID) 580 } 581 582 hasFakeRegionError := func(resp *tikvrpc.Response) bool { 583 if resp == nil { 584 return false 585 } 586 regionErr, err := resp.GetRegionError() 587 if err != nil { 588 return false 589 } 590 return IsFakeRegionError(regionErr) 591 } 592 593 // Normal 594 bo := retry.NewBackoffer(context.Background(), -1) 595 sender := s.regionRequestSender 596 resp, err := sender.SendReq(bo, req, region.Region, time.Second) 597 s.Nil(err) 598 s.NotNil(resp) 599 s.True(bo.GetTotalBackoffTimes() == 0) 600 601 // Switch to the next Peer due to store failure and the leader is on the next peer. 602 bo = retry.NewBackoffer(context.Background(), -1) 603 s.cluster.ChangeLeader(s.regionID, s.peerIDs[1]) 604 s.cluster.StopStore(s.storeIDs[0]) 605 resp, err = sender.SendReq(bo, req, region.Region, time.Second) 606 s.Nil(err) 607 s.NotNil(resp) 608 s.Equal(sender.replicaSelector.targetIdx, AccessIndex(1)) 609 s.True(bo.GetTotalBackoffTimes() == 1) 610 s.cluster.StartStore(s.storeIDs[0]) 611 612 // Leader is updated because of send success, so no backoff. 613 bo = retry.NewBackoffer(context.Background(), -1) 614 resp, err = sender.SendReq(bo, req, region.Region, time.Second) 615 s.Nil(err) 616 s.NotNil(resp) 617 s.Equal(sender.replicaSelector.targetIdx, AccessIndex(1)) 618 s.True(bo.GetTotalBackoffTimes() == 0) 619 620 // Switch to the next peer due to leader failure but the new leader is not elected. 621 // Region will be invalidated due to store epoch changed. 622 reloadRegion() 623 s.cluster.StopStore(s.storeIDs[1]) 624 bo = retry.NewBackoffer(context.Background(), -1) 625 resp, err = sender.SendReq(bo, req, region.Region, time.Second) 626 s.Nil(err) 627 s.True(hasFakeRegionError(resp)) 628 s.Equal(bo.GetTotalBackoffTimes(), 1) 629 s.cluster.StartStore(s.storeIDs[1]) 630 631 // Leader is changed. No backoff. 632 reloadRegion() 633 s.cluster.ChangeLeader(s.regionID, s.peerIDs[0]) 634 bo = retry.NewBackoffer(context.Background(), -1) 635 resp, err = sender.SendReq(bo, req, region.Region, time.Second) 636 s.Nil(err) 637 s.NotNil(resp) 638 s.Equal(bo.GetTotalBackoffTimes(), 0) 639 640 // No leader. Backoff for each replica and runs out all replicas. 641 s.cluster.GiveUpLeader(s.regionID) 642 bo = retry.NewBackoffer(context.Background(), -1) 643 resp, err = sender.SendReq(bo, req, region.Region, time.Second) 644 s.Nil(err) 645 s.True(hasFakeRegionError(resp)) 646 s.Equal(bo.GetTotalBackoffTimes(), 3) 647 s.False(sender.replicaSelector.region.isValid()) 648 s.cluster.ChangeLeader(s.regionID, s.peerIDs[0]) 649 650 // The leader store is alive but can't provide service. 651 // Region will be invalidated due to running out of all replicas. 652 s.regionRequestSender.regionCache.testingKnobs.mockRequestLiveness = func(s *Store, bo *retry.Backoffer) livenessState { 653 return reachable 654 } 655 reloadRegion() 656 s.cluster.StopStore(s.storeIDs[0]) 657 bo = retry.NewBackoffer(context.Background(), -1) 658 resp, err = sender.SendReq(bo, req, region.Region, time.Second) 659 s.Nil(err) 660 s.True(hasFakeRegionError(resp)) 661 s.False(sender.replicaSelector.region.isValid()) 662 s.Equal(bo.GetTotalBackoffTimes(), maxReplicaAttempt+2) 663 s.cluster.StartStore(s.storeIDs[0]) 664 665 // Verify that retry the same replica when meets ServerIsBusy/MaxTimestampNotSynced/ReadIndexNotReady/ProposalInMergingMode. 666 for _, regionErr := range []*errorpb.Error{ 667 // ServerIsBusy takes too much time to test. 668 // {ServerIsBusy: &errorpb.ServerIsBusy{}}, 669 {MaxTimestampNotSynced: &errorpb.MaxTimestampNotSynced{}}, 670 {ReadIndexNotReady: &errorpb.ReadIndexNotReady{}}, 671 {ProposalInMergingMode: &errorpb.ProposalInMergingMode{}}, 672 } { 673 func() { 674 oc := sender.client 675 defer func() { 676 sender.client = oc 677 }() 678 s.regionRequestSender.client = &fnClient{func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { 679 // Return the specific region error when accesses the leader. 680 if addr == s.cluster.GetStore(s.storeIDs[0]).Address { 681 return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{RegionError: regionErr}}, nil 682 } 683 // Return the not leader error when accesses followers. 684 return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{RegionError: &errorpb.Error{ 685 NotLeader: &errorpb.NotLeader{ 686 RegionId: region.Region.id, Leader: &metapb.Peer{Id: s.peerIDs[0], StoreId: s.storeIDs[0]}, 687 }}}}, nil 688 689 }} 690 reloadRegion() 691 bo = retry.NewBackoffer(context.Background(), -1) 692 resp, err := sender.SendReq(bo, req, region.Region, time.Second) 693 s.Nil(err) 694 s.True(hasFakeRegionError(resp)) 695 s.False(sender.replicaSelector.region.isValid()) 696 s.Equal(bo.GetTotalBackoffTimes(), maxReplicaAttempt+2) 697 }() 698 } 699 700 // Verify switch to the next peer immediately when meets StaleCommand. 701 reloadRegion() 702 func() { 703 oc := sender.client 704 defer func() { 705 sender.client = oc 706 }() 707 s.regionRequestSender.client = &fnClient{func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { 708 return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{RegionError: &errorpb.Error{StaleCommand: &errorpb.StaleCommand{}}}}, nil 709 }} 710 reloadRegion() 711 bo = retry.NewBackoffer(context.Background(), -1) 712 resp, err := sender.SendReq(bo, req, region.Region, time.Second) 713 s.Nil(err) 714 s.True(hasFakeRegionError(resp)) 715 s.False(sender.replicaSelector.region.isValid()) 716 s.Equal(bo.GetTotalBackoffTimes(), 0) 717 }() 718 719 // Verify don't invalidate region when meets unknown region errors. 720 reloadRegion() 721 func() { 722 oc := sender.client 723 defer func() { 724 sender.client = oc 725 }() 726 s.regionRequestSender.client = &fnClient{func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { 727 return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{RegionError: &errorpb.Error{Message: ""}}}, nil 728 }} 729 reloadRegion() 730 bo = retry.NewBackoffer(context.Background(), -1) 731 resp, err := sender.SendReq(bo, req, region.Region, time.Second) 732 s.Nil(err) 733 s.True(hasFakeRegionError(resp)) 734 s.False(sender.replicaSelector.region.isValid()) 735 s.Equal(bo.GetTotalBackoffTimes(), 0) 736 }() 737 738 // Verify invalidate region when meets StoreNotMatch/RegionNotFound/EpochNotMatch/NotLeader and can't find the leader in region. 739 for i, regionErr := range []*errorpb.Error{ 740 {StoreNotMatch: &errorpb.StoreNotMatch{}}, 741 {RegionNotFound: &errorpb.RegionNotFound{}}, 742 {EpochNotMatch: &errorpb.EpochNotMatch{}}, 743 {NotLeader: &errorpb.NotLeader{Leader: &metapb.Peer{}}}} { 744 func() { 745 oc := sender.client 746 defer func() { 747 sender.client = oc 748 }() 749 s.regionRequestSender.client = &fnClient{func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { 750 return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{RegionError: regionErr}}, nil 751 752 }} 753 reloadRegion() 754 bo = retry.NewBackoffer(context.Background(), -1) 755 resp, err := sender.SendReq(bo, req, region.Region, time.Second) 756 757 // Return a sendError when meets NotLeader and can't find the leader in the region. 758 if i == 3 { 759 s.Nil(err) 760 s.True(hasFakeRegionError(resp)) 761 } else { 762 s.Nil(err) 763 s.NotNil(resp) 764 regionErr, _ := resp.GetRegionError() 765 s.NotNil(regionErr) 766 } 767 s.False(sender.replicaSelector.region.isValid()) 768 s.Equal(bo.GetTotalBackoffTimes(), 0) 769 }() 770 } 771 772 // Runs out of all replicas and then returns a send error. 773 s.regionRequestSender.regionCache.testingKnobs.mockRequestLiveness = func(s *Store, bo *retry.Backoffer) livenessState { 774 return unreachable 775 } 776 reloadRegion() 777 for _, store := range s.storeIDs { 778 s.cluster.StopStore(store) 779 } 780 bo = retry.NewBackoffer(context.Background(), -1) 781 resp, err = sender.SendReq(bo, req, region.Region, time.Second) 782 s.Nil(err) 783 s.True(hasFakeRegionError(resp)) 784 s.True(bo.GetTotalBackoffTimes() == 3) 785 s.False(sender.replicaSelector.region.isValid()) 786 for _, store := range s.storeIDs { 787 s.cluster.StartStore(store) 788 } 789 790 // Verify switch to the leader immediately when stale read requests with global txn scope meet region errors. 791 s.cluster.ChangeLeader(region.Region.id, s.peerIDs[0]) 792 reloadRegion() 793 req = tikvrpc.NewRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{Key: []byte("key")}) 794 req.ReadReplicaScope = oracle.GlobalTxnScope 795 req.TxnScope = oracle.GlobalTxnScope 796 req.EnableStaleRead() 797 for i := 0; i < 5; i++ { 798 // The request may be sent to the leader directly. We have to distinguish it. 799 failureOnFollower := false 800 s.regionRequestSender.client = &fnClient{func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { 801 if addr != s.cluster.GetStore(s.storeIDs[0]).Address { 802 failureOnFollower = true 803 return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{RegionError: &errorpb.Error{}}}, nil 804 } 805 return &tikvrpc.Response{Resp: &kvrpcpb.GetResponse{}}, nil 806 }} 807 sender.SendReq(bo, req, region.Region, time.Second) 808 state, ok := sender.replicaSelector.state.(*accessFollower) 809 s.True(ok) 810 s.True(!failureOnFollower || state.option.leaderOnly) 811 totalAttempts := 0 812 for idx, replica := range sender.replicaSelector.replicas { 813 totalAttempts += replica.attempts 814 if idx == int(state.leaderIdx) { 815 s.Equal(1, replica.attempts) 816 } else { 817 s.True(replica.attempts <= 1) 818 } 819 } 820 s.True(totalAttempts <= 2) 821 } 822 }