github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/client_test.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 /* Package storage_test provides a means of testing store 12 functionality which depends on a fully-functional KV client. This 13 cannot be done within the storage package because of circular 14 dependencies. 15 16 By convention, tests in package storage_test have names of the form 17 client_*.go. 18 */ 19 package kvserver_test 20 21 import ( 22 "context" 23 "fmt" 24 "math/rand" 25 "net" 26 "reflect" 27 "sort" 28 "sync" 29 "testing" 30 "time" 31 32 "github.com/cenkalti/backoff" 33 circuit "github.com/cockroachdb/circuitbreaker" 34 "github.com/cockroachdb/cockroach/pkg/base" 35 "github.com/cockroachdb/cockroach/pkg/clusterversion" 36 "github.com/cockroachdb/cockroach/pkg/config" 37 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 38 "github.com/cockroachdb/cockroach/pkg/gossip" 39 "github.com/cockroachdb/cockroach/pkg/gossip/resolver" 40 "github.com/cockroachdb/cockroach/pkg/keys" 41 "github.com/cockroachdb/cockroach/pkg/kv" 42 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 43 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 44 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer" 45 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader" 46 "github.com/cockroachdb/cockroach/pkg/roachpb" 47 "github.com/cockroachdb/cockroach/pkg/rpc" 48 "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" 49 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 50 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 51 "github.com/cockroachdb/cockroach/pkg/storage" 52 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 53 "github.com/cockroachdb/cockroach/pkg/testutils" 54 "github.com/cockroachdb/cockroach/pkg/util" 55 "github.com/cockroachdb/cockroach/pkg/util/hlc" 56 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 57 "github.com/cockroachdb/cockroach/pkg/util/log" 58 "github.com/cockroachdb/cockroach/pkg/util/metric" 59 "github.com/cockroachdb/cockroach/pkg/util/netutil" 60 "github.com/cockroachdb/cockroach/pkg/util/retry" 61 "github.com/cockroachdb/cockroach/pkg/util/stop" 62 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 63 "github.com/cockroachdb/cockroach/pkg/util/uuid" 64 "github.com/cockroachdb/errors" 65 "github.com/kr/pretty" 66 "github.com/stretchr/testify/require" 67 "go.etcd.io/etcd/raft" 68 "google.golang.org/grpc" 69 ) 70 71 // createTestStore creates a test store using an in-memory 72 // engine. 73 func createTestStore(t testing.TB, stopper *stop.Stopper) (*kvserver.Store, *hlc.ManualClock) { 74 manual := hlc.NewManualClock(123) 75 cfg := kvserver.TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) 76 store := createTestStoreWithOpts(t, testStoreOpts{cfg: &cfg}, stopper) 77 return store, manual 78 } 79 80 // DEPRECATED. Use createTestStoreWithOpts(). 81 func createTestStoreWithConfig( 82 t testing.TB, stopper *stop.Stopper, storeCfg kvserver.StoreConfig, 83 ) *kvserver.Store { 84 store := createTestStoreWithOpts(t, 85 testStoreOpts{ 86 cfg: &storeCfg, 87 }, 88 stopper, 89 ) 90 return store 91 } 92 93 // testStoreOpts affords control over aspects of store creation. 94 type testStoreOpts struct { 95 // dontBootstrap, if set, means that the engine will not be bootstrapped. 96 dontBootstrap bool 97 // dontCreateSystemRanges is relevant only if dontBootstrap is not set. 98 // If set, the store will have a single range. If not set, the store will have 99 // all the system ranges that are generally created for a cluster at boostrap. 100 dontCreateSystemRanges bool 101 102 cfg *kvserver.StoreConfig 103 eng storage.Engine 104 } 105 106 // createTestStoreWithOpts creates a test store using the given engine and clock. 107 // TestStoreConfig() can be used for creating a config suitable for most 108 // tests. 109 func createTestStoreWithOpts( 110 t testing.TB, opts testStoreOpts, stopper *stop.Stopper, 111 ) *kvserver.Store { 112 var storeCfg kvserver.StoreConfig 113 if opts.cfg == nil { 114 manual := hlc.NewManualClock(123) 115 storeCfg = kvserver.TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) 116 } else { 117 storeCfg = *opts.cfg 118 } 119 eng := opts.eng 120 if eng == nil { 121 eng = storage.NewDefaultInMem() 122 stopper.AddCloser(eng) 123 } 124 125 tracer := storeCfg.Settings.Tracer 126 ac := log.AmbientContext{Tracer: tracer} 127 storeCfg.AmbientCtx = ac 128 129 rpcContext := rpc.NewContext( 130 ac, &base.Config{Insecure: true}, storeCfg.Clock, stopper, storeCfg.Settings) 131 // Ensure that tests using this test context and restart/shut down 132 // their servers do not inadvertently start talking to servers from 133 // unrelated concurrent tests. 134 rpcContext.ClusterID.Set(context.Background(), uuid.MakeV4()) 135 nodeDesc := &roachpb.NodeDescriptor{ 136 NodeID: 1, 137 Address: util.MakeUnresolvedAddr("tcp", "invalid.invalid:26257"), 138 } 139 server := rpc.NewServer(rpcContext) // never started 140 storeCfg.Gossip = gossip.NewTest( 141 nodeDesc.NodeID, rpcContext, server, stopper, metric.NewRegistry(), storeCfg.DefaultZoneConfig, 142 ) 143 storeCfg.ScanMaxIdleTime = 1 * time.Second 144 stores := kvserver.NewStores(ac, storeCfg.Clock) 145 146 if err := storeCfg.Gossip.SetNodeDescriptor(nodeDesc); err != nil { 147 t.Fatal(err) 148 } 149 150 retryOpts := base.DefaultRetryOptions() 151 retryOpts.Closer = stopper.ShouldQuiesce() 152 distSender := kvcoord.NewDistSender(kvcoord.DistSenderConfig{ 153 AmbientCtx: ac, 154 Clock: storeCfg.Clock, 155 Settings: storeCfg.Settings, 156 RPCContext: rpcContext, 157 TestingKnobs: kvcoord.ClientTestingKnobs{ 158 TransportFactory: kvcoord.SenderTransportFactory(tracer, stores), 159 }, 160 RPCRetryOptions: &retryOpts, 161 }, storeCfg.Gossip) 162 163 tcsFactory := kvcoord.NewTxnCoordSenderFactory( 164 kvcoord.TxnCoordSenderFactoryConfig{ 165 AmbientCtx: ac, 166 Settings: storeCfg.Settings, 167 Clock: storeCfg.Clock, 168 Stopper: stopper, 169 }, 170 distSender, 171 ) 172 storeCfg.DB = kv.NewDB(ac, tcsFactory, storeCfg.Clock) 173 storeCfg.StorePool = kvserver.NewTestStorePool(storeCfg) 174 storeCfg.Transport = kvserver.NewDummyRaftTransport(storeCfg.Settings) 175 // TODO(bdarnell): arrange to have the transport closed. 176 ctx := context.Background() 177 if !opts.dontBootstrap { 178 require.NoError(t, kvserver.WriteClusterVersion(ctx, eng, clusterversion.TestingClusterVersion)) 179 if err := kvserver.InitEngine( 180 ctx, eng, roachpb.StoreIdent{NodeID: 1, StoreID: 1}, 181 ); err != nil { 182 t.Fatal(err) 183 } 184 } 185 store := kvserver.NewStore(ctx, storeCfg, eng, nodeDesc) 186 if !opts.dontBootstrap { 187 var kvs []roachpb.KeyValue 188 var splits []roachpb.RKey 189 kvs, tableSplits := sqlbase.MakeMetadataSchema( 190 keys.SystemSQLCodec, storeCfg.DefaultZoneConfig, storeCfg.DefaultSystemZoneConfig, 191 ).GetInitialValues() 192 if !opts.dontCreateSystemRanges { 193 splits = config.StaticSplits() 194 splits = append(splits, tableSplits...) 195 sort.Slice(splits, func(i, j int) bool { 196 return splits[i].Less(splits[j]) 197 }) 198 } 199 err := kvserver.WriteInitialClusterData( 200 ctx, 201 eng, 202 kvs, /* initialValues */ 203 clusterversion.TestingBinaryVersion, 204 1 /* numStores */, splits, storeCfg.Clock.PhysicalNow()) 205 if err != nil { 206 t.Fatal(err) 207 } 208 } 209 if err := store.Start(ctx, stopper); err != nil { 210 t.Fatal(err) 211 } 212 stores.AddStore(store) 213 214 // Connect to gossip and gossip the store's capacity. 215 <-store.Gossip().Connected 216 if err := store.GossipStore(ctx, false /* useCached */); err != nil { 217 t.Fatal(err) 218 } 219 // Wait for the store's single range to have quorum before proceeding. 220 repl := store.LookupReplica(roachpb.RKeyMin) 221 222 // Send a request through the range to make sure everything is warmed up 223 // and works. 224 // NB: it's unclear if this code is necessary. 225 var ba roachpb.BatchRequest 226 get := roachpb.GetRequest{} 227 get.Key = keys.LocalMax 228 ba.Header.Replica = repl.Desc().Replicas().Voters()[0] 229 ba.Header.RangeID = repl.RangeID 230 ba.Add(&get) 231 _, pErr := store.Send(ctx, ba) 232 require.NoError(t, pErr.GoError()) 233 234 // Wait for the system config to be available in gossip. All sorts of things 235 // might not work properly while the system config is not available. 236 testutils.SucceedsSoon(t, func() error { 237 if cfg := store.Gossip().GetSystemConfig(); cfg == nil { 238 return errors.Errorf("system config not available in gossip yet") 239 } 240 return nil 241 }) 242 243 // Make all the initial ranges part of replication queue purgatory. This is 244 // similar to what a real cluster does after bootstrap - we want the initial 245 // ranges to up-replicate as soon as other nodes join. 246 if err := store.ForceReplicationScanAndProcess(); err != nil { 247 t.Fatal(err) 248 } 249 250 return store 251 } 252 253 type multiTestContext struct { 254 t testing.TB 255 storeConfig *kvserver.StoreConfig 256 manualClock *hlc.ManualClock 257 rpcContext *rpc.Context 258 // rpcTestingKnobs are optional configuration for the rpcContext. 259 rpcTestingKnobs rpc.ContextTestingKnobs 260 261 // By default, a multiTestContext starts with a bunch of system ranges, just 262 // like a regular Server after bootstrap. If startWithSingleRange is set, 263 // we'll start with a single range spanning all the key space. The split 264 // queue, if not disabled, might then create other range system ranges. 265 startWithSingleRange bool 266 267 nodeIDtoAddrMu struct { 268 *syncutil.RWMutex 269 nodeIDtoAddr map[roachpb.NodeID]net.Addr 270 } 271 272 nodeDialer *nodedialer.Dialer 273 transport *kvserver.RaftTransport 274 275 // The per-store clocks slice normally contains aliases of 276 // multiTestContext.clock, but it may be populated before Start() to 277 // use distinct clocks per store. 278 clocks []*hlc.Clock 279 engines []storage.Engine 280 grpcServers []*grpc.Server 281 distSenders []*kvcoord.DistSender 282 dbs []*kv.DB 283 gossips []*gossip.Gossip 284 storePools []*kvserver.StorePool 285 // We use multiple stoppers so we can restart different parts of the 286 // test individually. transportStopper is for 'transport', and the 287 // 'stoppers' slice corresponds to the 'stores'. 288 transportStopper *stop.Stopper 289 engineStoppers []*stop.Stopper 290 291 // The fields below may mutate at runtime so the pointers they contain are 292 // protected by 'mu'. 293 mu *syncutil.RWMutex 294 senders []*kvserver.Stores 295 stores []*kvserver.Store 296 stoppers []*stop.Stopper 297 idents []roachpb.StoreIdent 298 nodeLivenesses []*kvserver.NodeLiveness 299 } 300 301 func (m *multiTestContext) getNodeIDAddress(nodeID roachpb.NodeID) (net.Addr, error) { 302 m.nodeIDtoAddrMu.RLock() 303 addr, ok := m.nodeIDtoAddrMu.nodeIDtoAddr[nodeID] 304 m.nodeIDtoAddrMu.RUnlock() 305 if ok { 306 return addr, nil 307 } 308 return nil, errors.Errorf("unknown peer %d", nodeID) 309 } 310 311 func (m *multiTestContext) Start(t testing.TB, numStores int) { 312 { 313 // Only the fields we nil out below can be injected into m as it 314 // starts up, so fail early if anything else was set (as we'd likely 315 // override it and the test wouldn't get what it wanted). 316 mCopy := *m 317 mCopy.storeConfig = nil 318 mCopy.clocks = nil 319 mCopy.engines = nil 320 mCopy.engineStoppers = nil 321 mCopy.startWithSingleRange = false 322 mCopy.rpcTestingKnobs = rpc.ContextTestingKnobs{} 323 var empty multiTestContext 324 if !reflect.DeepEqual(empty, mCopy) { 325 t.Fatalf("illegal fields set in multiTestContext:\n%s", pretty.Diff(empty, mCopy)) 326 } 327 } 328 329 m.t = t 330 331 m.nodeIDtoAddrMu.RWMutex = &syncutil.RWMutex{} 332 m.mu = &syncutil.RWMutex{} 333 m.stores = make([]*kvserver.Store, numStores) 334 m.storePools = make([]*kvserver.StorePool, numStores) 335 m.distSenders = make([]*kvcoord.DistSender, numStores) 336 m.dbs = make([]*kv.DB, numStores) 337 m.stoppers = make([]*stop.Stopper, numStores) 338 m.senders = make([]*kvserver.Stores, numStores) 339 m.idents = make([]roachpb.StoreIdent, numStores) 340 m.grpcServers = make([]*grpc.Server, numStores) 341 m.gossips = make([]*gossip.Gossip, numStores) 342 m.nodeLivenesses = make([]*kvserver.NodeLiveness, numStores) 343 344 if m.storeConfig != nil && m.storeConfig.Clock != nil { 345 require.Nil(t, m.manualClock, "can't use manual clock; storeConfig.Clock is set") 346 require.Empty(t, m.clocks, "can't populate .clocks; storeConfig.Clock is set") 347 m.clocks = []*hlc.Clock{m.storeConfig.Clock} 348 } else if len(m.clocks) == 0 { 349 if m.manualClock == nil { 350 m.manualClock = hlc.NewManualClock(123) 351 } 352 m.clocks = []*hlc.Clock{hlc.NewClock(m.manualClock.UnixNano, time.Nanosecond)} 353 } 354 355 if m.storeConfig != nil { 356 // Either they're equal, or the left is initially nil (see the golf 357 // above). 358 m.storeConfig.Clock = m.clocks[0] 359 } 360 361 if m.transportStopper == nil { 362 m.transportStopper = stop.NewStopper() 363 } 364 st := cluster.MakeTestingClusterSettings() 365 if m.rpcContext == nil { 366 m.rpcContext = rpc.NewContextWithTestingKnobs(log.AmbientContext{Tracer: st.Tracer}, &base.Config{Insecure: true}, m.clock(), 367 m.transportStopper, st, m.rpcTestingKnobs) 368 // Ensure that tests using this test context and restart/shut down 369 // their servers do not inadvertently start talking to servers from 370 // unrelated concurrent tests. 371 m.rpcContext.ClusterID.Set(context.Background(), uuid.MakeV4()) 372 // We are sharing the same RPC context for all simulated nodes, so we can't enforce 373 // some of the RPC check validation. 374 m.rpcContext.TestingAllowNamedRPCToAnonymousServer = true 375 376 // Create a breaker which never trips and never backs off to avoid 377 // introducing timing-based flakes. 378 m.rpcContext.BreakerFactory = func() *circuit.Breaker { 379 return circuit.NewBreakerWithOptions(&circuit.Options{ 380 BackOff: &backoff.ZeroBackOff{}, 381 }) 382 } 383 } 384 m.nodeDialer = nodedialer.New(m.rpcContext, m.getNodeIDAddress) 385 m.transport = kvserver.NewRaftTransport( 386 log.AmbientContext{Tracer: st.Tracer}, st, 387 m.nodeDialer, nil, m.transportStopper, 388 ) 389 390 for idx := 0; idx < numStores; idx++ { 391 m.addStore(idx) 392 } 393 394 // Wait for gossip to startup. 395 testutils.SucceedsSoon(t, func() error { 396 for i, g := range m.gossips { 397 if cfg := g.GetSystemConfig(); cfg == nil { 398 return errors.Errorf("system config not available at index %d", i) 399 } 400 } 401 return nil 402 }) 403 } 404 405 func (m *multiTestContext) clock() *hlc.Clock { 406 return m.clocks[0] 407 } 408 409 func (m *multiTestContext) Stop() { 410 done := make(chan struct{}) 411 go func() { 412 defer func() { 413 if r := recover(); r != nil { 414 m.t.Errorf("mtc.Stop() panicked: %+v", r) 415 } 416 }() 417 m.mu.RLock() 418 419 // Quiesce everyone in parallel (before the transport stopper) to avoid 420 // deadlocks. 421 var wg sync.WaitGroup 422 wg.Add(len(m.stoppers)) 423 for _, s := range m.stoppers { 424 go func(s *stop.Stopper) { 425 defer wg.Done() 426 // Some Stoppers may be nil if stopStore has been called 427 // without restartStore. 428 if s != nil { 429 // TODO(tschottdorf): seems like it *should* be possible to 430 // call .Stop() directly, but then stressing essentially 431 // any test (TestRaftAfterRemove is a good example) results 432 // in deadlocks where a task can't finish because of 433 // getting stuck in addWriteCommand. 434 s.Quiesce(context.Background()) 435 } 436 }(s) 437 } 438 m.mu.RUnlock() 439 wg.Wait() 440 441 m.mu.RLock() 442 defer m.mu.RUnlock() 443 for _, stopper := range m.stoppers { 444 if stopper != nil { 445 stopper.Stop(context.Background()) 446 } 447 } 448 m.transportStopper.Stop(context.Background()) 449 450 for _, s := range m.engineStoppers { 451 s.Stop(context.Background()) 452 } 453 close(done) 454 }() 455 456 select { 457 case <-done: 458 case <-time.After(30 * time.Second): 459 // If we've already failed, just attach another failure to the 460 // test, since a timeout during shutdown after a failure is 461 // probably not interesting, and will prevent the display of any 462 // pending t.Error. If we're timing out but the test was otherwise 463 // a success, panic so we see stack traces from other goroutines. 464 if m.t.Failed() { 465 m.t.Error("timed out during shutdown") 466 } else { 467 panic("timed out during shutdown") 468 } 469 } 470 471 m.mu.RLock() 472 defer m.mu.RUnlock() 473 for _, s := range m.stores { 474 if s != nil { 475 s.AssertInvariants() 476 } 477 } 478 } 479 480 // gossipStores forces each store to gossip its store descriptor and then 481 // blocks until all nodes have received these updated descriptors. 482 func (m *multiTestContext) gossipStores() { 483 timestamps := make(map[string]int64) 484 for i := 0; i < len(m.stores); i++ { 485 <-m.gossips[i].Connected 486 if err := m.stores[i].GossipStore(context.Background(), false /* useCached */); err != nil { 487 m.t.Fatal(err) 488 } 489 infoStatus := m.gossips[i].GetInfoStatus() 490 storeKey := gossip.MakeStoreKey(m.stores[i].Ident.StoreID) 491 timestamps[storeKey] = infoStatus.Infos[storeKey].OrigStamp 492 } 493 // Wait until all stores know about each other. 494 testutils.SucceedsSoon(m.t, func() error { 495 for i := 0; i < len(m.stores); i++ { 496 nodeID := m.stores[i].Ident.NodeID 497 infoStatus := m.gossips[i].GetInfoStatus() 498 for storeKey, timestamp := range timestamps { 499 info, ok := infoStatus.Infos[storeKey] 500 if !ok { 501 return errors.Errorf("node %d does not have a storeDesc for %s yet", nodeID, storeKey) 502 } 503 if info.OrigStamp < timestamp { 504 return errors.Errorf("node %d's storeDesc for %s is not up to date", nodeID, storeKey) 505 } 506 } 507 } 508 return nil 509 }) 510 } 511 512 // initGossipNetwork gossips all store descriptors and waits until all 513 // storePools have received those descriptors. 514 func (m *multiTestContext) initGossipNetwork() { 515 m.gossipStores() 516 testutils.SucceedsSoon(m.t, func() error { 517 for i := 0; i < len(m.stores); i++ { 518 if _, alive, _ := m.storePools[i].GetStoreList(); alive != len(m.stores) { 519 return errors.Errorf("node %d's store pool only has %d alive stores, expected %d", 520 m.stores[i].Ident.NodeID, alive, len(m.stores)) 521 } 522 } 523 return nil 524 }) 525 log.Info(context.Background(), "gossip network initialized") 526 } 527 528 type multiTestContextKVTransport struct { 529 mtc *multiTestContext 530 idx int 531 replicas kvcoord.ReplicaSlice 532 mu struct { 533 syncutil.Mutex 534 pending map[roachpb.ReplicaID]struct{} 535 } 536 } 537 538 func (m *multiTestContext) kvTransportFactory( 539 _ kvcoord.SendOptions, _ *nodedialer.Dialer, replicas kvcoord.ReplicaSlice, 540 ) (kvcoord.Transport, error) { 541 t := &multiTestContextKVTransport{ 542 mtc: m, 543 replicas: replicas, 544 } 545 t.mu.pending = map[roachpb.ReplicaID]struct{}{} 546 return t, nil 547 } 548 549 func (t *multiTestContextKVTransport) String() string { 550 return fmt.Sprintf("%T: replicas=%v, idx=%d", t, t.replicas, t.idx) 551 } 552 553 func (t *multiTestContextKVTransport) IsExhausted() bool { 554 return t.idx == len(t.replicas) 555 } 556 557 func (t *multiTestContextKVTransport) SendNext( 558 ctx context.Context, ba roachpb.BatchRequest, 559 ) (*roachpb.BatchResponse, error) { 560 if ctx.Err() != nil { 561 return nil, errors.Wrap(ctx.Err(), "send context is canceled") 562 } 563 rep := t.replicas[t.idx] 564 t.idx++ 565 t.setPending(rep.ReplicaID, true) 566 567 // Node IDs are assigned in the order the nodes are created by 568 // the multi test context, so we can derive the index for stoppers 569 // and senders by subtracting 1 from the node ID. 570 nodeIndex := int(rep.NodeID) - 1 571 if log.V(1) { 572 log.Infof(ctx, "SendNext nodeIndex=%d", nodeIndex) 573 } 574 575 // This method crosses store boundaries: it is possible that the 576 // destination store is stopped while the source is still running. 577 // Run the send in a Task on the destination store to simulate what 578 // would happen with real RPCs. 579 t.mtc.mu.RLock() 580 s := t.mtc.stoppers[nodeIndex] 581 sender := t.mtc.senders[nodeIndex] 582 t.mtc.mu.RUnlock() 583 584 if s == nil { 585 t.setPending(rep.ReplicaID, false) 586 return nil, roachpb.NewSendError("store is stopped") 587 } 588 589 // Clone txn of ba args for sending. 590 ba.Replica = rep.ReplicaDescriptor 591 if txn := ba.Txn; txn != nil { 592 ba.Txn = ba.Txn.Clone() 593 } 594 var br *roachpb.BatchResponse 595 var pErr *roachpb.Error 596 if err := s.RunTask(ctx, "mtc send", func(ctx context.Context) { 597 br, pErr = sender.Send(ctx, ba) 598 }); err != nil { 599 pErr = roachpb.NewError(err) 600 } 601 if br == nil { 602 br = &roachpb.BatchResponse{} 603 } 604 if br.Error != nil { 605 panic(roachpb.ErrorUnexpectedlySet(sender, br)) 606 } 607 br.Error = pErr 608 609 // On certain errors, we must expire leases to ensure that the 610 // next attempt has a chance of succeeding. 611 switch tErr := pErr.GetDetail().(type) { 612 case *roachpb.NotLeaseHolderError: 613 if leaseHolder := tErr.LeaseHolder; leaseHolder != nil { 614 t.mtc.mu.RLock() 615 leaseHolderStore := t.mtc.stores[leaseHolder.NodeID-1] 616 t.mtc.mu.RUnlock() 617 if leaseHolderStore == nil { 618 // The lease holder is known but down, so expire its lease. 619 if t.mtc.manualClock != nil { 620 t.mtc.advanceClock(ctx) 621 } 622 } 623 } else { 624 // stores has the range, is *not* the lease holder, but the 625 // lease holder is not known; this can happen if the lease 626 // holder is removed from the group. Move the manual clock 627 // forward in an attempt to expire the lease. 628 if t.mtc.manualClock != nil { 629 t.mtc.advanceClock(ctx) 630 } 631 } 632 } 633 t.setPending(rep.ReplicaID, false) 634 return br, nil 635 } 636 637 func (t *multiTestContextKVTransport) NextInternalClient( 638 ctx context.Context, 639 ) (context.Context, roachpb.InternalClient, error) { 640 panic("unimplemented") 641 } 642 643 func (t *multiTestContextKVTransport) NextReplica() roachpb.ReplicaDescriptor { 644 if t.IsExhausted() { 645 return roachpb.ReplicaDescriptor{} 646 } 647 return t.replicas[t.idx].ReplicaDescriptor 648 } 649 650 func (t *multiTestContextKVTransport) MoveToFront(replica roachpb.ReplicaDescriptor) { 651 t.mu.Lock() 652 defer t.mu.Unlock() 653 if _, ok := t.mu.pending[replica.ReplicaID]; ok { 654 return 655 } 656 for i := range t.replicas { 657 if t.replicas[i].ReplicaDescriptor == replica { 658 if i < t.idx { 659 t.idx-- 660 } 661 // Swap the client representing this replica to the front. 662 t.replicas[i], t.replicas[t.idx] = t.replicas[t.idx], t.replicas[i] 663 return 664 } 665 } 666 } 667 668 func (t *multiTestContextKVTransport) setPending(repID roachpb.ReplicaID, pending bool) { 669 t.mu.Lock() 670 defer t.mu.Unlock() 671 if pending { 672 t.mu.pending[repID] = struct{}{} 673 } else { 674 delete(t.mu.pending, repID) 675 } 676 } 677 678 // rangeDescByAge implements sort.Interface for RangeDescriptor, sorting by the 679 // age of the RangeDescriptor. This is intended to find the most recent version 680 // of the same RangeDescriptor, when multiple versions of it are available. 681 type rangeDescByAge []*roachpb.RangeDescriptor 682 683 func (rd rangeDescByAge) Len() int { return len(rd) } 684 func (rd rangeDescByAge) Swap(i, j int) { rd[i], rd[j] = rd[j], rd[i] } 685 func (rd rangeDescByAge) Less(i, j int) bool { 686 // "Less" means "older" according to this sort. 687 // A RangeDescriptor version with a higher NextReplicaID is always more recent. 688 if rd[i].NextReplicaID != rd[j].NextReplicaID { 689 return rd[i].NextReplicaID < rd[j].NextReplicaID 690 } 691 // If two RangeDescriptor versions have the same NextReplicaID, then the one 692 // with the fewest replicas is the newest. 693 return len(rd[i].InternalReplicas) > len(rd[j].InternalReplicas) 694 } 695 696 // FirstRange implements the RangeDescriptorDB interface. It returns the range 697 // descriptor which contains roachpb.KeyMin. 698 // 699 // DistSender's implementation of FirstRange() does not work correctly because 700 // the gossip network used by multiTestContext is only partially operational. 701 func (m *multiTestContext) FirstRange() (*roachpb.RangeDescriptor, error) { 702 m.mu.RLock() 703 defer m.mu.RUnlock() 704 var descs []*roachpb.RangeDescriptor 705 for _, str := range m.senders { 706 // Node liveness heartbeats start quickly, sometimes before the first 707 // range would be available here and before we've added all ranges. 708 if str == nil { 709 continue 710 } 711 // Find every version of the RangeDescriptor for the first range by 712 // querying all stores; it may not be present on all stores, but the 713 // current version is guaranteed to be present on one of them as long 714 // as all stores are alive. 715 if err := str.VisitStores(func(s *kvserver.Store) error { 716 firstRng := s.LookupReplica(roachpb.RKeyMin) 717 if firstRng != nil { 718 descs = append(descs, firstRng.Desc()) 719 } 720 return nil 721 }); err != nil { 722 m.t.Fatalf("no error should be possible from this invocation of VisitStores, but found %s", err) 723 } 724 } 725 if len(descs) == 0 { 726 return nil, errors.New("first Range is not present on any live store in the multiTestContext") 727 } 728 // Sort the RangeDescriptor versions by age and return the most recent 729 // version. 730 sort.Sort(rangeDescByAge(descs)) 731 return descs[len(descs)-1], nil 732 } 733 734 func (m *multiTestContext) makeStoreConfig(i int) kvserver.StoreConfig { 735 var cfg kvserver.StoreConfig 736 if m.storeConfig != nil { 737 cfg = *m.storeConfig 738 cfg.Clock = m.clocks[i] 739 } else { 740 cfg = kvserver.TestStoreConfig(m.clocks[i]) 741 m.storeConfig = &cfg 742 } 743 cfg.NodeDialer = m.nodeDialer 744 cfg.Transport = m.transport 745 cfg.Gossip = m.gossips[i] 746 cfg.TestingKnobs.DisableMergeQueue = true 747 cfg.TestingKnobs.DisableSplitQueue = true 748 cfg.TestingKnobs.ReplicateQueueAcceptsUnsplit = true 749 return cfg 750 } 751 752 var _ kvcoord.RangeDescriptorDB = mtcRangeDescriptorDB{} 753 754 type mtcRangeDescriptorDB struct { 755 *multiTestContext 756 ds **kvcoord.DistSender 757 } 758 759 func (mrdb mtcRangeDescriptorDB) RangeLookup( 760 ctx context.Context, key roachpb.RKey, useReverseScan bool, 761 ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) { 762 return (*mrdb.ds).RangeLookup(ctx, key, useReverseScan) 763 } 764 765 func (m *multiTestContext) populateDB(idx int, st *cluster.Settings, stopper *stop.Stopper) { 766 retryOpts := base.DefaultRetryOptions() 767 retryOpts.Closer = stopper.ShouldQuiesce() 768 ambient := m.storeConfig.AmbientCtx 769 m.distSenders[idx] = kvcoord.NewDistSender(kvcoord.DistSenderConfig{ 770 AmbientCtx: ambient, 771 Clock: m.clocks[idx], 772 RPCContext: m.rpcContext, 773 RangeDescriptorDB: mtcRangeDescriptorDB{ 774 multiTestContext: m, 775 ds: &m.distSenders[idx], 776 }, 777 Settings: st, 778 TestingKnobs: kvcoord.ClientTestingKnobs{ 779 TransportFactory: m.kvTransportFactory, 780 }, 781 RPCRetryOptions: &retryOpts, 782 }, m.gossips[idx]) 783 tcsFactory := kvcoord.NewTxnCoordSenderFactory( 784 kvcoord.TxnCoordSenderFactoryConfig{ 785 AmbientCtx: ambient, 786 Settings: m.storeConfig.Settings, 787 Clock: m.clocks[idx], 788 Stopper: stopper, 789 }, 790 m.distSenders[idx], 791 ) 792 m.dbs[idx] = kv.NewDB(ambient, tcsFactory, m.clocks[idx]) 793 } 794 795 func (m *multiTestContext) populateStorePool( 796 idx int, cfg kvserver.StoreConfig, nodeLiveness *kvserver.NodeLiveness, 797 ) { 798 m.storePools[idx] = kvserver.NewStorePool( 799 cfg.AmbientCtx, 800 cfg.Settings, 801 m.gossips[idx], 802 m.clocks[idx], 803 nodeLiveness.GetNodeCount, 804 kvserver.MakeStorePoolNodeLivenessFunc(nodeLiveness), 805 /* deterministic */ false, 806 ) 807 } 808 809 // AddStore creates a new store on the same Transport but doesn't create any ranges. 810 func (m *multiTestContext) addStore(idx int) { 811 var clock *hlc.Clock 812 if len(m.clocks) > idx { 813 clock = m.clocks[idx] 814 } else { 815 clock = m.storeConfig.Clock 816 m.clocks = append(m.clocks, clock) 817 } 818 var eng storage.Engine 819 var needBootstrap bool 820 if len(m.engines) > idx { 821 eng = m.engines[idx] 822 _, err := kvserver.ReadStoreIdent(context.Background(), eng) 823 if errors.HasType(err, (*kvserver.NotBootstrappedError)(nil)) { 824 needBootstrap = true 825 } else if err != nil { 826 m.t.Fatal(err) 827 } 828 } else { 829 engineStopper := stop.NewStopper() 830 m.engineStoppers = append(m.engineStoppers, engineStopper) 831 eng = storage.NewDefaultInMem() 832 engineStopper.AddCloser(eng) 833 m.engines = append(m.engines, eng) 834 needBootstrap = true 835 } 836 grpcServer := rpc.NewServer(m.rpcContext) 837 m.grpcServers[idx] = grpcServer 838 kvserver.RegisterMultiRaftServer(grpcServer, m.transport) 839 840 stopper := stop.NewStopper() 841 842 // Give this store the first store as a resolver. We don't provide all of the 843 // previous stores as resolvers as doing so can cause delays in bringing the 844 // gossip network up. 845 resolvers := func() []resolver.Resolver { 846 m.nodeIDtoAddrMu.Lock() 847 defer m.nodeIDtoAddrMu.Unlock() 848 addr := m.nodeIDtoAddrMu.nodeIDtoAddr[1] 849 if addr == nil { 850 return nil 851 } 852 r, err := resolver.NewResolverFromAddress(addr) 853 if err != nil { 854 m.t.Fatal(err) 855 } 856 return []resolver.Resolver{r} 857 }() 858 m.gossips[idx] = gossip.NewTest( 859 roachpb.NodeID(idx+1), 860 m.rpcContext, 861 grpcServer, 862 m.transportStopper, 863 metric.NewRegistry(), 864 zonepb.DefaultZoneConfigRef(), 865 ) 866 867 nodeID := roachpb.NodeID(idx + 1) 868 cfg := m.makeStoreConfig(idx) 869 ambient := log.AmbientContext{Tracer: cfg.Settings.Tracer} 870 m.populateDB(idx, cfg.Settings, stopper) 871 nlActive, nlRenewal := cfg.NodeLivenessDurations() 872 m.nodeLivenesses[idx] = kvserver.NewNodeLiveness( 873 ambient, m.clocks[idx], m.dbs[idx], m.gossips[idx], 874 nlActive, nlRenewal, cfg.Settings, metric.TestSampleInterval, 875 ) 876 m.populateStorePool(idx, cfg, m.nodeLivenesses[idx]) 877 cfg.DB = m.dbs[idx] 878 cfg.NodeLiveness = m.nodeLivenesses[idx] 879 cfg.StorePool = m.storePools[idx] 880 881 ctx := context.Background() 882 if needBootstrap { 883 require.NoError(m.t, kvserver.WriteClusterVersion(ctx, eng, clusterversion.TestingClusterVersion)) 884 if err := kvserver.InitEngine(ctx, eng, roachpb.StoreIdent{ 885 NodeID: roachpb.NodeID(idx + 1), 886 StoreID: roachpb.StoreID(idx + 1), 887 }); err != nil { 888 m.t.Fatal(err) 889 } 890 } 891 if needBootstrap && idx == 0 { 892 // Bootstrap the initial range on the first engine. 893 var splits []roachpb.RKey 894 kvs, tableSplits := sqlbase.MakeMetadataSchema( 895 keys.SystemSQLCodec, cfg.DefaultZoneConfig, cfg.DefaultSystemZoneConfig, 896 ).GetInitialValues() 897 if !m.startWithSingleRange { 898 splits = config.StaticSplits() 899 splits = append(splits, tableSplits...) 900 sort.Slice(splits, func(i, j int) bool { 901 return splits[i].Less(splits[j]) 902 }) 903 } 904 err := kvserver.WriteInitialClusterData( 905 ctx, 906 eng, 907 kvs, /* initialValues */ 908 clusterversion.TestingBinaryVersion, 909 len(m.engines), splits, cfg.Clock.PhysicalNow()) 910 if err != nil { 911 m.t.Fatal(err) 912 } 913 } 914 store := kvserver.NewStore(ctx, cfg, eng, &roachpb.NodeDescriptor{NodeID: nodeID}) 915 if err := store.Start(ctx, stopper); err != nil { 916 m.t.Fatal(err) 917 } 918 919 sender := kvserver.NewStores(ambient, clock) 920 sender.AddStore(store) 921 perReplicaServer := kvserver.MakeServer(&roachpb.NodeDescriptor{NodeID: nodeID}, sender) 922 kvserver.RegisterPerReplicaServer(grpcServer, perReplicaServer) 923 924 ln, err := netutil.ListenAndServeGRPC(m.transportStopper, grpcServer, util.TestAddr) 925 if err != nil { 926 m.t.Fatal(err) 927 } 928 m.nodeIDtoAddrMu.Lock() 929 if m.nodeIDtoAddrMu.nodeIDtoAddr == nil { 930 m.nodeIDtoAddrMu.nodeIDtoAddr = make(map[roachpb.NodeID]net.Addr) 931 } 932 _, ok := m.nodeIDtoAddrMu.nodeIDtoAddr[nodeID] 933 if !ok { 934 m.nodeIDtoAddrMu.nodeIDtoAddr[nodeID] = ln.Addr() 935 } 936 m.nodeIDtoAddrMu.Unlock() 937 if ok { 938 m.t.Fatalf("node %d already listening", nodeID) 939 } 940 941 // Add newly created objects to the multiTestContext's collections. 942 // (these must be populated before the store is started so that 943 // FirstRange() can find the sender) 944 m.mu.Lock() 945 m.stores[idx] = store 946 m.stoppers[idx] = stopper 947 m.senders[idx] = sender 948 // Save the store identities for later so we can use them in 949 // replication operations even while the store is stopped. 950 m.idents[idx] = *store.Ident 951 m.mu.Unlock() 952 953 // NB: On Mac OS X, we sporadically see excessively long dialing times (~15s) 954 // which cause various trickle down badness in tests. To avoid every test 955 // having to worry about such conditions we pre-warm the connection 956 // cache. See #8440 for an example of the headaches the long dial times 957 // cause. 958 if _, err := m.rpcContext.GRPCDialNode(ln.Addr().String(), nodeID, 959 rpc.DefaultClass).Connect(ctx); err != nil { 960 m.t.Fatal(err) 961 } 962 963 m.gossips[idx].Start(ln.Addr(), resolvers) 964 965 if err := m.gossipNodeDesc(m.gossips[idx], nodeID); err != nil { 966 m.t.Fatal(err) 967 } 968 969 ran := struct { 970 sync.Once 971 ch chan struct{} 972 }{ 973 ch: make(chan struct{}), 974 } 975 m.nodeLivenesses[idx].StartHeartbeat(ctx, stopper, m.engines[idx:idx+1], func(ctx context.Context) { 976 now := clock.Now() 977 if err := store.WriteLastUpTimestamp(ctx, now); err != nil { 978 log.Warningf(ctx, "%v", err) 979 } 980 ran.Do(func() { 981 close(ran.ch) 982 }) 983 }) 984 985 store.WaitForInit() 986 987 // Wait until we see the first heartbeat by waiting for the callback (which 988 // fires *after* the node becomes live). 989 <-ran.ch 990 } 991 992 func (m *multiTestContext) nodeDesc(nodeID roachpb.NodeID) *roachpb.NodeDescriptor { 993 addr := m.nodeIDtoAddrMu.nodeIDtoAddr[nodeID] 994 return &roachpb.NodeDescriptor{ 995 NodeID: nodeID, 996 Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), 997 } 998 } 999 1000 // gossipNodeDesc adds the node descriptor to the gossip network. 1001 // Mostly makes sure that we don't see a warning per request. 1002 func (m *multiTestContext) gossipNodeDesc(g *gossip.Gossip, nodeID roachpb.NodeID) error { 1003 return g.SetNodeDescriptor(m.nodeDesc(nodeID)) 1004 } 1005 1006 // StopStore stops a store but leaves the engine intact. 1007 // All stopped stores must be restarted before multiTestContext.Stop is called. 1008 func (m *multiTestContext) stopStore(i int) { 1009 // Attempting to acquire a write lock here could lead to a situation in 1010 // which an outstanding Raft proposal would never return due to address 1011 // resolution calling back into the `multiTestContext` and attempting to 1012 // acquire a read lock while this write lock is block on another read lock 1013 // held by `SendNext` which in turn is waiting on that Raft proposal: 1014 // 1015 // SendNext[hold RLock] -> Raft[want RLock] 1016 // ĘŚ / 1017 // \ v 1018 // stopStore[want Lock] 1019 // 1020 // Instead, we only acquire a read lock to fetch the stopper, and are 1021 // careful not to hold any locks while stopping the stopper. 1022 m.mu.RLock() 1023 stopper := m.stoppers[i] 1024 m.mu.RUnlock() 1025 1026 stopper.Stop(context.Background()) 1027 1028 m.mu.Lock() 1029 m.stoppers[i] = nil 1030 // Break the transport breakers for this node so that messages sent between 1031 // a store stopping and that store restarting will never remain in-flight in 1032 // the transport and end up reaching the store. This has been the cause of 1033 // flakiness in the past. 1034 m.transport.GetCircuitBreaker(m.idents[i].NodeID, rpc.DefaultClass).Break() 1035 m.transport.GetCircuitBreaker(m.idents[i].NodeID, rpc.SystemClass).Break() 1036 m.senders[i].RemoveStore(m.stores[i]) 1037 m.stores[i] = nil 1038 m.mu.Unlock() 1039 } 1040 1041 // restartStore restarts a store previously stopped with StopStore. It does not 1042 // wait for the store to successfully perform a heartbeat before returning. This 1043 // is important for tests where a restarted store may not be able to heartbeat 1044 // immediately. 1045 func (m *multiTestContext) restartStoreWithoutHeartbeat(i int) { 1046 m.mu.Lock() 1047 stopper := stop.NewStopper() 1048 m.stoppers[i] = stopper 1049 cfg := m.makeStoreConfig(i) 1050 m.populateDB(i, m.storeConfig.Settings, stopper) 1051 nlActive, nlRenewal := cfg.NodeLivenessDurations() 1052 m.nodeLivenesses[i] = kvserver.NewNodeLiveness( 1053 log.AmbientContext{Tracer: m.storeConfig.Settings.Tracer}, m.clocks[i], m.dbs[i], 1054 m.gossips[i], nlActive, nlRenewal, cfg.Settings, metric.TestSampleInterval, 1055 ) 1056 m.populateStorePool(i, cfg, m.nodeLivenesses[i]) 1057 cfg.DB = m.dbs[i] 1058 cfg.NodeLiveness = m.nodeLivenesses[i] 1059 cfg.StorePool = m.storePools[i] 1060 ctx := context.Background() 1061 store := kvserver.NewStore(ctx, cfg, m.engines[i], &roachpb.NodeDescriptor{NodeID: roachpb.NodeID(i + 1)}) 1062 m.stores[i] = store 1063 1064 // Need to start the store before adding it so that the store ID is initialized. 1065 if err := store.Start(ctx, stopper); err != nil { 1066 m.t.Fatal(err) 1067 } 1068 m.senders[i].AddStore(store) 1069 m.transport.GetCircuitBreaker(m.idents[i].NodeID, rpc.DefaultClass).Reset() 1070 m.transport.GetCircuitBreaker(m.idents[i].NodeID, rpc.SystemClass).Reset() 1071 m.mu.Unlock() 1072 cfg.NodeLiveness.StartHeartbeat(ctx, stopper, m.engines[i:i+1], func(ctx context.Context) { 1073 now := m.clocks[i].Now() 1074 if err := store.WriteLastUpTimestamp(ctx, now); err != nil { 1075 log.Warningf(ctx, "%v", err) 1076 } 1077 }) 1078 } 1079 1080 // restartStore restarts a store previously stopped with StopStore. 1081 func (m *multiTestContext) restartStore(i int) { 1082 m.restartStoreWithoutHeartbeat(i) 1083 1084 // Wait until we see the first heartbeat. 1085 liveness := m.nodeLivenesses[i] 1086 testutils.SucceedsSoon(m.t, func() error { 1087 if live, err := liveness.IsLive(roachpb.NodeID(i + 1)); err != nil || !live { 1088 return errors.New("node not live") 1089 } 1090 return nil 1091 }) 1092 } 1093 1094 func (m *multiTestContext) Store(i int) *kvserver.Store { 1095 m.mu.RLock() 1096 defer m.mu.RUnlock() 1097 return m.stores[i] 1098 } 1099 1100 // findStartKeyLocked returns the start key of the given range. 1101 func (m *multiTestContext) findStartKeyLocked(rangeID roachpb.RangeID) roachpb.RKey { 1102 // We can use the first store that returns results because the start 1103 // key never changes. 1104 for _, s := range m.stores { 1105 rep, err := s.GetReplica(rangeID) 1106 if err == nil && rep.IsInitialized() { 1107 return rep.Desc().StartKey 1108 } 1109 } 1110 m.t.Fatalf("couldn't find range %s on any store", rangeID) 1111 return nil // unreached, but the compiler can't tell. 1112 } 1113 1114 // findMemberStoreLocked finds a non-stopped Store which is a member 1115 // of the given range. 1116 func (m *multiTestContext) findMemberStoreLocked(desc roachpb.RangeDescriptor) *kvserver.Store { 1117 for _, s := range m.stores { 1118 if s == nil { 1119 // Store is stopped. 1120 continue 1121 } 1122 for _, r := range desc.InternalReplicas { 1123 if s.StoreID() == r.StoreID { 1124 return s 1125 } 1126 } 1127 } 1128 m.t.Fatalf("couldn't find a live member of %s", &desc) 1129 return nil // unreached, but the compiler can't tell. 1130 } 1131 1132 // restart stops and restarts all stores but leaves the engines intact, 1133 // so the stores should contain the same persistent storage as before. 1134 func (m *multiTestContext) restart() { 1135 for i := range m.stores { 1136 m.stopStore(i) 1137 } 1138 for i := range m.stores { 1139 m.restartStore(i) 1140 } 1141 } 1142 1143 // changeReplicas performs a ChangeReplicas operation, retrying until the 1144 // destination store has been addded or removed. Returns the range's 1145 // NextReplicaID, which is the ID of the newly-added replica if this is an add. 1146 func (m *multiTestContext) changeReplicas( 1147 startKey roachpb.RKey, dest int, changeType roachpb.ReplicaChangeType, 1148 ) (roachpb.ReplicaID, error) { 1149 ctx := context.Background() 1150 1151 var alreadyDoneErr string 1152 switch changeType { 1153 case roachpb.ADD_REPLICA: 1154 alreadyDoneErr = "unable to add replica .* which is already present" 1155 case roachpb.REMOVE_REPLICA: 1156 alreadyDoneErr = "unable to remove replica .* which is not present" 1157 } 1158 1159 retryOpts := retry.Options{ 1160 InitialBackoff: time.Millisecond, 1161 MaxBackoff: 50 * time.Millisecond, 1162 } 1163 var desc roachpb.RangeDescriptor 1164 for r := retry.Start(retryOpts); r.Next(); { 1165 1166 // Perform a consistent read to get the updated range descriptor (as 1167 // opposed to just going to one of the stores), to make sure we have 1168 // the effects of any previous ChangeReplicas call. By the time 1169 // ChangeReplicas returns the raft leader is guaranteed to have the 1170 // updated version, but followers are not. 1171 if err := m.dbs[0].GetProto(ctx, keys.RangeDescriptorKey(startKey), &desc); err != nil { 1172 return 0, err 1173 } 1174 1175 _, err := m.dbs[0].AdminChangeReplicas( 1176 ctx, startKey.AsRawKey(), 1177 desc, 1178 roachpb.MakeReplicationChanges( 1179 changeType, 1180 roachpb.ReplicationTarget{ 1181 NodeID: m.idents[dest].NodeID, 1182 StoreID: m.idents[dest].StoreID, 1183 }), 1184 ) 1185 1186 if err == nil || testutils.IsError(err, alreadyDoneErr) { 1187 break 1188 } 1189 1190 if errors.HasType(err, (*roachpb.AmbiguousResultError)(nil)) { 1191 // Try again after an AmbiguousResultError. If the operation 1192 // succeeded, then the next attempt will return alreadyDoneErr; 1193 // if it failed then the next attempt should succeed. 1194 continue 1195 } 1196 1197 // We can't use storage.IsSnapshotError() because the original error object 1198 // is lost. We could make a this into a roachpb.Error but it seems overkill 1199 // for this one usage. 1200 if testutils.IsError(err, "snapshot failed: .*|descriptor changed") { 1201 log.Infof(ctx, "%v", err) 1202 continue 1203 } 1204 return 0, err 1205 } 1206 1207 return desc.NextReplicaID, nil 1208 } 1209 1210 // replicateRange replicates the given range onto the given stores. 1211 func (m *multiTestContext) replicateRange(rangeID roachpb.RangeID, dests ...int) { 1212 m.t.Helper() 1213 if err := m.replicateRangeNonFatal(rangeID, dests...); err != nil { 1214 m.t.Fatal(err) 1215 } 1216 } 1217 1218 // replicateRangeNonFatal replicates the given range onto the given stores. 1219 func (m *multiTestContext) replicateRangeNonFatal(rangeID roachpb.RangeID, dests ...int) error { 1220 m.mu.RLock() 1221 startKey := m.findStartKeyLocked(rangeID) 1222 m.mu.RUnlock() 1223 1224 expectedReplicaIDs := make([]roachpb.ReplicaID, len(dests)) 1225 for i, dest := range dests { 1226 var err error 1227 expectedReplicaIDs[i], err = m.changeReplicas(startKey, dest, roachpb.ADD_REPLICA) 1228 if err != nil { 1229 return err 1230 } 1231 } 1232 1233 // Wait for the replication to complete on all destination nodes. 1234 return retry.ForDuration(testutils.DefaultSucceedsSoonDuration, func() error { 1235 for i, dest := range dests { 1236 repl, err := m.stores[dest].GetReplica(rangeID) 1237 if err != nil { 1238 return err 1239 } 1240 repDesc, err := repl.GetReplicaDescriptor() 1241 if err != nil { 1242 return err 1243 } 1244 if e := expectedReplicaIDs[i]; repDesc.ReplicaID != e { 1245 return errors.Errorf("expected replica %s to have ID %d", repl, e) 1246 } 1247 if t := repDesc.GetType(); t != roachpb.VOTER_FULL { 1248 return errors.Errorf("expected replica %s to be a voter was %s", repl, t) 1249 } 1250 if !repl.Desc().ContainsKey(startKey) { 1251 return errors.Errorf("expected replica %s to contain %s", repl, startKey) 1252 } 1253 } 1254 return nil 1255 }) 1256 } 1257 1258 // unreplicateRange removes a replica of the range from the dest store. 1259 func (m *multiTestContext) unreplicateRange(rangeID roachpb.RangeID, dest int) { 1260 m.t.Helper() 1261 if err := m.unreplicateRangeNonFatal(rangeID, dest); err != nil { 1262 m.t.Fatal(err) 1263 } 1264 } 1265 1266 // unreplicateRangeNonFatal removes a replica of the range from the dest store. 1267 // Returns an error rather than calling m.t.Fatal upon error. 1268 func (m *multiTestContext) unreplicateRangeNonFatal(rangeID roachpb.RangeID, dest int) error { 1269 m.mu.RLock() 1270 startKey := m.findStartKeyLocked(rangeID) 1271 m.mu.RUnlock() 1272 1273 _, err := m.changeReplicas(startKey, dest, roachpb.REMOVE_REPLICA) 1274 return err 1275 } 1276 1277 // waitForUnreplicated waits until no replica exists for the specified range 1278 // on the dest store. 1279 func (m *multiTestContext) waitForUnreplicated(rangeID roachpb.RangeID, dest int) error { 1280 // Wait for the unreplications to complete on destination node. 1281 return retry.ForDuration(testutils.DefaultSucceedsSoonDuration, func() error { 1282 _, err := m.stores[dest].GetReplica(rangeID) 1283 if err == nil { 1284 return fmt.Errorf("replica still exists on dest %d", dest) 1285 } else if errors.HasType(err, (*roachpb.RangeNotFoundError)(nil)) { 1286 return nil 1287 } 1288 return err 1289 }) 1290 } 1291 1292 // readIntFromEngines reads the current integer value at the given key 1293 // from all configured engines, filling in zeros when the value is not 1294 // found. Returns a slice of the same length as mtc.engines. 1295 func (m *multiTestContext) readIntFromEngines(key roachpb.Key) []int64 { 1296 results := make([]int64, len(m.engines)) 1297 for i, eng := range m.engines { 1298 val, _, err := storage.MVCCGet(context.Background(), eng, key, m.clocks[i].Now(), 1299 storage.MVCCGetOptions{}) 1300 if err != nil { 1301 log.VEventf(context.Background(), 1, "engine %d: error reading from key %s: %s", i, key, err) 1302 } else if val == nil { 1303 log.VEventf(context.Background(), 1, "engine %d: missing key %s", i, key) 1304 } else { 1305 results[i], err = val.GetInt() 1306 if err != nil { 1307 log.Errorf(context.Background(), "engine %d: error decoding %s from key %s: %+v", i, val, key, err) 1308 } 1309 } 1310 } 1311 return results 1312 } 1313 1314 // waitForValuesT is like waitForValues but allows the caller to provide a 1315 // testing.T which may differ from m.t. 1316 func (m *multiTestContext) waitForValuesT(t testing.TB, key roachpb.Key, expected []int64) { 1317 t.Helper() 1318 // This test relies on concurrently waiting for a value to change in the 1319 // underlying engine(s). Since the teeing engine does not respond well to 1320 // value mismatches, whether transient or permanent, skip this test if the 1321 // teeing engine is being used. See 1322 // https://github.com/cockroachdb/cockroach/issues/42656 for more context. 1323 if storage.DefaultStorageEngine == enginepb.EngineTypeTeePebbleRocksDB { 1324 t.Skip("disabled on teeing engine") 1325 } 1326 testutils.SucceedsSoon(t, func() error { 1327 actual := m.readIntFromEngines(key) 1328 if !reflect.DeepEqual(expected, actual) { 1329 return errors.Errorf("expected %v, got %v", expected, actual) 1330 } 1331 return nil 1332 }) 1333 } 1334 1335 // waitForValues waits up to the given duration for the integer values 1336 // at the given key to match the expected slice (across all engines). 1337 // Fails the test if they do not match. 1338 func (m *multiTestContext) waitForValues(key roachpb.Key, expected []int64) { 1339 m.t.Helper() 1340 m.waitForValuesT(m.t, key, expected) 1341 } 1342 1343 // transferLease transfers the lease for the given range from the source 1344 // replica to the target replica. Assumes that the caller knows who the 1345 // current leaseholder is. 1346 func (m *multiTestContext) transferLease( 1347 ctx context.Context, rangeID roachpb.RangeID, source int, dest int, 1348 ) { 1349 if err := m.transferLeaseNonFatal(ctx, rangeID, source, dest); err != nil { 1350 m.t.Fatal(err) 1351 } 1352 } 1353 1354 // transferLease transfers the lease for the given range from the source 1355 // replica to the target replica. Assumes that the caller knows who the 1356 // current leaseholder is. 1357 // Returns an error rather than calling m.t.Fatal upon error. 1358 func (m *multiTestContext) transferLeaseNonFatal( 1359 ctx context.Context, rangeID roachpb.RangeID, source int, dest int, 1360 ) error { 1361 live := m.stores[dest] != nil && !m.stores[dest].IsDraining() 1362 if !live { 1363 return errors.Errorf("can't transfer lease to down or draining node at index %d", dest) 1364 } 1365 1366 // Heartbeat the liveness record of the destination node to make sure that the 1367 // lease we're about to transfer can be used afterwards. Otherwise, the 1368 // liveness record might be expired and the node is considered down, making 1369 // this transfer irrelevant. In particular, this can happen if the clock was 1370 // advanced recently, so all the liveness records (including the destination) 1371 // are expired. In that case, the simple fact that the transfer succeeded 1372 // doesn't mean that the destination now has a usable lease. 1373 if err := m.heartbeatLiveness(ctx, dest); err != nil { 1374 return err 1375 } 1376 1377 sourceRepl, err := m.stores[source].GetReplica(rangeID) 1378 if err != nil { 1379 return err 1380 } 1381 if err := sourceRepl.AdminTransferLease(ctx, m.idents[dest].StoreID); err != nil { 1382 return err 1383 } 1384 1385 return nil 1386 } 1387 1388 func (m *multiTestContext) heartbeatLiveness(ctx context.Context, store int) error { 1389 m.mu.RLock() 1390 nl := m.nodeLivenesses[store] 1391 m.mu.RUnlock() 1392 l, err := nl.Self() 1393 if err != nil { 1394 return err 1395 } 1396 1397 for r := retry.StartWithCtx(ctx, retry.Options{MaxRetries: 5}); r.Next(); { 1398 if err = nl.Heartbeat(ctx, l); !errors.Is(err, kvserver.ErrEpochIncremented) { 1399 break 1400 } 1401 } 1402 return err 1403 } 1404 1405 // advanceClock advances the mtc's manual clock such that all 1406 // expiration-based leases become expired. The liveness records of all the nodes 1407 // will also become expired on the new clock value (and this will cause all the 1408 // epoch-based leases to be considered expired until the liveness record is 1409 // heartbeated). 1410 // 1411 // This method asserts that all the stores share the manual clock. Otherwise, 1412 // the desired effect would be ambiguous. 1413 func (m *multiTestContext) advanceClock(ctx context.Context) { 1414 for i, clock := range m.clocks { 1415 if clock != m.clock() { 1416 log.Fatalf(ctx, "clock at index %d is different from the shared clock", i) 1417 } 1418 } 1419 m.manualClock.Increment(m.storeConfig.LeaseExpiration()) 1420 log.Infof(ctx, "test clock advanced to: %s", m.clock().Now()) 1421 } 1422 1423 // getRaftLeader returns the replica that is the current raft leader for the 1424 // specified rangeID. 1425 func (m *multiTestContext) getRaftLeader(rangeID roachpb.RangeID) *kvserver.Replica { 1426 m.t.Helper() 1427 var raftLeaderRepl *kvserver.Replica 1428 testutils.SucceedsSoon(m.t, func() error { 1429 m.mu.RLock() 1430 defer m.mu.RUnlock() 1431 var latestTerm uint64 1432 for _, store := range m.stores { 1433 raftStatus := store.RaftStatus(rangeID) 1434 if raftStatus == nil { 1435 // Replica does not exist on this store or there is no raft 1436 // status yet. 1437 continue 1438 } 1439 if raftStatus.Term > latestTerm || (raftLeaderRepl == nil && raftStatus.Term == latestTerm) { 1440 // If we find any newer term, it means any previous election is 1441 // invalid. 1442 raftLeaderRepl = nil 1443 latestTerm = raftStatus.Term 1444 if raftStatus.RaftState == raft.StateLeader { 1445 var err error 1446 raftLeaderRepl, err = store.GetReplica(rangeID) 1447 if err != nil { 1448 return err 1449 } 1450 } 1451 } 1452 } 1453 if latestTerm == 0 || raftLeaderRepl == nil { 1454 return errors.Errorf("could not find a raft leader for range %s", rangeID) 1455 } 1456 return nil 1457 }) 1458 return raftLeaderRepl 1459 } 1460 1461 // getArgs returns a GetRequest and GetResponse pair addressed to 1462 // the default replica for the specified key. 1463 func getArgs(key roachpb.Key) *roachpb.GetRequest { 1464 return &roachpb.GetRequest{ 1465 RequestHeader: roachpb.RequestHeader{ 1466 Key: key, 1467 }, 1468 } 1469 } 1470 1471 // putArgs returns a PutRequest and PutResponse pair addressed to 1472 // the default replica for the specified key / value. 1473 func putArgs(key roachpb.Key, value []byte) *roachpb.PutRequest { 1474 return &roachpb.PutRequest{ 1475 RequestHeader: roachpb.RequestHeader{ 1476 Key: key, 1477 }, 1478 Value: roachpb.MakeValueFromBytes(value), 1479 } 1480 } 1481 1482 // incrementArgs returns an IncrementRequest addressed to the default replica 1483 // for the specified key. 1484 func incrementArgs(key roachpb.Key, inc int64) *roachpb.IncrementRequest { 1485 return &roachpb.IncrementRequest{ 1486 RequestHeader: roachpb.RequestHeader{ 1487 Key: key, 1488 }, 1489 Increment: inc, 1490 } 1491 } 1492 1493 func truncateLogArgs(index uint64, rangeID roachpb.RangeID) *roachpb.TruncateLogRequest { 1494 return &roachpb.TruncateLogRequest{ 1495 Index: index, 1496 RangeID: rangeID, 1497 } 1498 } 1499 1500 func heartbeatArgs( 1501 txn *roachpb.Transaction, now hlc.Timestamp, 1502 ) (*roachpb.HeartbeatTxnRequest, roachpb.Header) { 1503 return &roachpb.HeartbeatTxnRequest{ 1504 RequestHeader: roachpb.RequestHeader{ 1505 Key: txn.Key, 1506 }, 1507 Now: now, 1508 }, roachpb.Header{Txn: txn} 1509 } 1510 1511 func pushTxnArgs( 1512 pusher, pushee *roachpb.Transaction, pushType roachpb.PushTxnType, 1513 ) *roachpb.PushTxnRequest { 1514 return &roachpb.PushTxnRequest{ 1515 RequestHeader: roachpb.RequestHeader{ 1516 Key: pushee.Key, 1517 }, 1518 PushTo: pusher.WriteTimestamp.Next(), 1519 PusherTxn: *pusher, 1520 PusheeTxn: pushee.TxnMeta, 1521 PushType: pushType, 1522 } 1523 } 1524 1525 func TestSortRangeDescByAge(t *testing.T) { 1526 defer leaktest.AfterTest(t)() 1527 var replicaDescs []roachpb.ReplicaDescriptor 1528 var rangeDescs []*roachpb.RangeDescriptor 1529 1530 expectedReplicas := 0 1531 nextReplID := 1 1532 1533 // Cut a new range version with the current replica set. 1534 newRangeVersion := func(marker string) { 1535 currentRepls := append([]roachpb.ReplicaDescriptor(nil), replicaDescs...) 1536 rangeDescs = append(rangeDescs, &roachpb.RangeDescriptor{ 1537 RangeID: roachpb.RangeID(1), 1538 InternalReplicas: currentRepls, 1539 NextReplicaID: roachpb.ReplicaID(nextReplID), 1540 EndKey: roachpb.RKey(marker), 1541 }) 1542 } 1543 1544 // function to add a replica. 1545 addReplica := func(marker string) { 1546 replicaDescs = append(replicaDescs, roachpb.ReplicaDescriptor{ 1547 NodeID: roachpb.NodeID(nextReplID), 1548 StoreID: roachpb.StoreID(nextReplID), 1549 ReplicaID: roachpb.ReplicaID(nextReplID), 1550 }) 1551 nextReplID++ 1552 newRangeVersion(marker) 1553 expectedReplicas++ 1554 } 1555 1556 // function to remove a replica. 1557 removeReplica := func(marker string) { 1558 remove := rand.Intn(len(replicaDescs)) 1559 replicaDescs = append(replicaDescs[:remove], replicaDescs[remove+1:]...) 1560 newRangeVersion(marker) 1561 expectedReplicas-- 1562 } 1563 1564 for i := 0; i < 10; i++ { 1565 addReplica(fmt.Sprint("added", i)) 1566 } 1567 for i := 0; i < 3; i++ { 1568 removeReplica(fmt.Sprint("removed", i)) 1569 } 1570 addReplica("final-add") 1571 1572 // randomize array 1573 sortedRangeDescs := make([]*roachpb.RangeDescriptor, len(rangeDescs)) 1574 for i, r := range rand.Perm(len(rangeDescs)) { 1575 sortedRangeDescs[i] = rangeDescs[r] 1576 } 1577 // Sort array by age. 1578 sort.Sort(rangeDescByAge(sortedRangeDescs)) 1579 // Make sure both arrays are equal. 1580 if !reflect.DeepEqual(sortedRangeDescs, rangeDescs) { 1581 t.Fatalf("RangeDescriptor sort by age was not correct. Diff: %s", pretty.Diff(sortedRangeDescs, rangeDescs)) 1582 } 1583 } 1584 1585 func verifyRangeStats( 1586 reader storage.Reader, rangeID roachpb.RangeID, expMS enginepb.MVCCStats, 1587 ) error { 1588 ms, err := stateloader.Make(rangeID).LoadMVCCStats(context.Background(), reader) 1589 if err != nil { 1590 return err 1591 } 1592 // Clear system counts as these are expected to vary. 1593 ms.SysBytes, ms.SysCount = 0, 0 1594 if ms != expMS { 1595 return errors.Errorf("expected and actual stats differ:\n%s", pretty.Diff(expMS, ms)) 1596 } 1597 return nil 1598 } 1599 1600 func verifyRecomputedStats( 1601 reader storage.Reader, d *roachpb.RangeDescriptor, expMS enginepb.MVCCStats, nowNanos int64, 1602 ) error { 1603 if ms, err := rditer.ComputeStatsForRange(d, reader, nowNanos); err != nil { 1604 return err 1605 } else if expMS != ms { 1606 return fmt.Errorf("expected range's stats to agree with recomputation: got\n%+v\nrecomputed\n%+v", expMS, ms) 1607 } 1608 return nil 1609 } 1610 1611 func waitForTombstone( 1612 t *testing.T, reader storage.Reader, rangeID roachpb.RangeID, 1613 ) (tombstone roachpb.RangeTombstone) { 1614 testutils.SucceedsSoon(t, func() error { 1615 tombstoneKey := keys.RangeTombstoneKey(rangeID) 1616 ok, err := storage.MVCCGetProto( 1617 context.Background(), reader, tombstoneKey, hlc.Timestamp{}, &tombstone, storage.MVCCGetOptions{}, 1618 ) 1619 if err != nil { 1620 t.Fatalf("failed to read tombstone: %v", err) 1621 } 1622 if !ok { 1623 return fmt.Errorf("tombstone not found for range %d", rangeID) 1624 } 1625 return nil 1626 }) 1627 return tombstone 1628 }