github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/client_replica_test.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver_test 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "math" 18 "math/rand" 19 "reflect" 20 "strconv" 21 "sync" 22 "sync/atomic" 23 "testing" 24 "time" 25 26 "github.com/cockroachdb/cockroach/pkg/base" 27 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 28 "github.com/cockroachdb/cockroach/pkg/keys" 29 "github.com/cockroachdb/cockroach/pkg/kv" 30 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 31 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval" 32 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 33 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 34 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb" 35 "github.com/cockroachdb/cockroach/pkg/roachpb" 36 "github.com/cockroachdb/cockroach/pkg/server" 37 "github.com/cockroachdb/cockroach/pkg/sql" 38 "github.com/cockroachdb/cockroach/pkg/storage" 39 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 40 "github.com/cockroachdb/cockroach/pkg/testutils" 41 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 42 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 43 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 44 "github.com/cockroachdb/cockroach/pkg/util" 45 "github.com/cockroachdb/cockroach/pkg/util/caller" 46 "github.com/cockroachdb/cockroach/pkg/util/encoding" 47 "github.com/cockroachdb/cockroach/pkg/util/hlc" 48 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 49 "github.com/cockroachdb/cockroach/pkg/util/log" 50 "github.com/cockroachdb/cockroach/pkg/util/randutil" 51 "github.com/cockroachdb/cockroach/pkg/util/stop" 52 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 53 "github.com/cockroachdb/cockroach/pkg/util/uuid" 54 "github.com/cockroachdb/errors" 55 "github.com/kr/pretty" 56 "github.com/stretchr/testify/assert" 57 "github.com/stretchr/testify/require" 58 "go.etcd.io/etcd/raft/raftpb" 59 ) 60 61 func strToValue(s string) *roachpb.Value { 62 v := roachpb.MakeValueFromBytes([]byte(s)) 63 return &v 64 } 65 66 // TestRangeCommandClockUpdate verifies that followers update their 67 // clocks when executing a command, even if the lease holder's clock is far 68 // in the future. 69 func TestRangeCommandClockUpdate(t *testing.T) { 70 defer leaktest.AfterTest(t)() 71 72 const numNodes = 3 73 var manuals []*hlc.ManualClock 74 var clocks []*hlc.Clock 75 for i := 0; i < numNodes; i++ { 76 manuals = append(manuals, hlc.NewManualClock(1)) 77 clocks = append(clocks, hlc.NewClock(manuals[i].UnixNano, 100*time.Millisecond)) 78 } 79 mtc := &multiTestContext{ 80 clocks: clocks, 81 // This test was written before the multiTestContext started creating many 82 // system ranges at startup, and hasn't been update to take that into 83 // account. 84 startWithSingleRange: true, 85 } 86 defer mtc.Stop() 87 mtc.Start(t, numNodes) 88 mtc.replicateRange(1, 1, 2) 89 90 // Advance the lease holder's clock ahead of the followers (by more than 91 // MaxOffset but less than the range lease) and execute a command. 92 manuals[0].Increment(int64(500 * time.Millisecond)) 93 incArgs := incrementArgs([]byte("a"), 5) 94 ts := clocks[0].Now() 95 if _, err := kv.SendWrappedWith(context.Background(), mtc.stores[0].TestSender(), roachpb.Header{Timestamp: ts}, incArgs); err != nil { 96 t.Fatal(err) 97 } 98 99 // Wait for that command to execute on all the followers. 100 testutils.SucceedsSoon(t, func() error { 101 values := []int64{} 102 for _, eng := range mtc.engines { 103 val, _, err := storage.MVCCGet(context.Background(), eng, roachpb.Key("a"), clocks[0].Now(), 104 storage.MVCCGetOptions{}) 105 if err != nil { 106 return err 107 } 108 values = append(values, mustGetInt(val)) 109 } 110 if !reflect.DeepEqual(values, []int64{5, 5, 5}) { 111 return errors.Errorf("expected (5, 5, 5), got %v", values) 112 } 113 return nil 114 }) 115 116 // Verify that all the followers have accepted the clock update from 117 // node 0 even though it comes from outside the usual max offset. 118 now := clocks[0].Now() 119 for i, clock := range clocks { 120 // Only compare the WallTimes: it's normal for clock 0 to be a few logical ticks ahead. 121 if clock.Now().WallTime < now.WallTime { 122 t.Errorf("clock %d is behind clock 0: %s vs %s", i, clock.Now(), now) 123 } 124 } 125 } 126 127 // TestRejectFutureCommand verifies that lease holders reject commands that 128 // would cause a large time jump. 129 func TestRejectFutureCommand(t *testing.T) { 130 defer leaktest.AfterTest(t)() 131 132 manual := hlc.NewManualClock(123) 133 clock := hlc.NewClock(manual.UnixNano, 100*time.Millisecond) 134 sc := kvserver.TestStoreConfig(clock) 135 mtc := &multiTestContext{storeConfig: &sc} 136 defer mtc.Stop() 137 mtc.Start(t, 1) 138 139 ts1 := clock.Now() 140 141 key := roachpb.Key("a") 142 incArgs := incrementArgs(key, 5) 143 144 // Commands with a future timestamp that is within the MaxOffset 145 // bound will be accepted and will cause the clock to advance. 146 const numCmds = 3 147 clockOffset := clock.MaxOffset() / numCmds 148 for i := int64(1); i <= numCmds; i++ { 149 ts := ts1.Add(i*clockOffset.Nanoseconds(), 0) 150 if _, err := kv.SendWrappedWith(context.Background(), mtc.stores[0].TestSender(), roachpb.Header{Timestamp: ts}, incArgs); err != nil { 151 t.Fatal(err) 152 } 153 } 154 155 ts2 := clock.Now() 156 if expAdvance, advance := ts2.GoTime().Sub(ts1.GoTime()), numCmds*clockOffset; advance != expAdvance { 157 t.Fatalf("expected clock to advance %s; got %s", expAdvance, advance) 158 } 159 160 // Once the accumulated offset reaches MaxOffset, commands will be rejected. 161 _, pErr := kv.SendWrappedWith(context.Background(), mtc.stores[0].TestSender(), roachpb.Header{Timestamp: ts1.Add(clock.MaxOffset().Nanoseconds()+1, 0)}, incArgs) 162 if !testutils.IsPError(pErr, "remote wall time is too far ahead") { 163 t.Fatalf("unexpected error %v", pErr) 164 } 165 166 // The clock did not advance and the final command was not executed. 167 ts3 := clock.Now() 168 if advance := ts3.GoTime().Sub(ts2.GoTime()); advance != 0 { 169 t.Fatalf("expected clock not to advance, but it advanced by %s", advance) 170 } 171 val, _, err := storage.MVCCGet(context.Background(), mtc.engines[0], key, ts3, 172 storage.MVCCGetOptions{}) 173 if err != nil { 174 t.Fatal(err) 175 } 176 if a, e := mustGetInt(val), incArgs.Increment*numCmds; a != e { 177 t.Errorf("expected %d, got %d", e, a) 178 } 179 } 180 181 // TestTxnPutOutOfOrder tests a case where a put operation of an older 182 // timestamp comes after a put operation of a newer timestamp in a 183 // txn. The test ensures such an out-of-order put succeeds and 184 // overrides an old value. The test uses a "Writer" and a "Reader" 185 // to reproduce an out-of-order put. 186 // 187 // 1) The Writer executes a cput operation and writes a write intent with 188 // time T in a txn. 189 // 2) Before the Writer's txn is committed, the Reader sends a high priority 190 // get operation with time T+100. This pushes the Writer txn timestamp to 191 // T+100. The Reader also writes to the same key the Writer did a cput to 192 // in order to trigger the restart of the Writer's txn. The original 193 // write intent timestamp is also updated to T+100. 194 // 3) The Writer starts a new epoch of the txn, but before it writes, the 195 // Reader sends another high priority get operation with time T+200. This 196 // pushes the Writer txn timestamp to T+200 to trigger a restart of the 197 // Writer txn. The Writer will not actually restart until it tries to commit 198 // the current epoch of the transaction. The Reader updates the timestamp of 199 // the write intent to T+200. The test deliberately fails the Reader get 200 // operation, and cockroach doesn't update its timestamp cache. 201 // 4) The Writer executes the put operation again. This put operation comes 202 // out-of-order since its timestamp is T+100, while the intent timestamp 203 // updated at Step 3 is T+200. 204 // 5) The put operation overrides the old value using timestamp T+100. 205 // 6) When the Writer attempts to commit its txn, the txn will be restarted 206 // again at a new epoch timestamp T+200, which will finally succeed. 207 func TestTxnPutOutOfOrder(t *testing.T) { 208 defer leaktest.AfterTest(t)() 209 210 // key is selected to fall within the meta range in order for the later 211 // routing of requests to range 1 to work properly. Removing the routing 212 // of all requests to range 1 would allow us to make the key more normal. 213 const ( 214 key = "key" 215 restartKey = "restart" 216 ) 217 // Set up a filter to so that the get operation at Step 3 will return an error. 218 var numGets int32 219 220 stopper := stop.NewStopper() 221 defer stopper.Stop(context.Background()) 222 manual := hlc.NewManualClock(123) 223 cfg := kvserver.TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) 224 // Splits can cause our chosen key to end up on a range other than range 1, 225 // and trying to handle that complicates the test without providing any 226 // added benefit. 227 cfg.TestingKnobs.DisableSplitQueue = true 228 cfg.TestingKnobs.EvalKnobs.TestingEvalFilter = 229 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 230 if _, ok := filterArgs.Req.(*roachpb.GetRequest); ok && 231 filterArgs.Req.Header().Key.Equal(roachpb.Key(key)) && 232 filterArgs.Hdr.Txn == nil { 233 // The Reader executes two get operations, each of which triggers two get requests 234 // (the first request fails and triggers txn push, and then the second request 235 // succeeds). Returns an error for the fourth get request to avoid timestamp cache 236 // update after the third get operation pushes the txn timestamp. 237 if atomic.AddInt32(&numGets, 1) == 4 { 238 return roachpb.NewErrorWithTxn(errors.Errorf("Test"), filterArgs.Hdr.Txn) 239 } 240 } 241 return nil 242 } 243 eng := storage.NewDefaultInMem() 244 stopper.AddCloser(eng) 245 store := createTestStoreWithOpts(t, 246 testStoreOpts{eng: eng, cfg: &cfg}, 247 stopper, 248 ) 249 250 // Put an initial value. 251 initVal := []byte("initVal") 252 err := store.DB().Put(context.Background(), key, initVal) 253 if err != nil { 254 t.Fatalf("failed to put: %+v", err) 255 } 256 257 waitPut := make(chan struct{}) 258 waitFirstGet := make(chan struct{}) 259 waitTxnRestart := make(chan struct{}) 260 waitSecondGet := make(chan struct{}) 261 errChan := make(chan error) 262 263 // Start the Writer. 264 go func() { 265 epoch := -1 266 // Start a txn that does read-after-write. 267 // The txn will be restarted twice, and the out-of-order put 268 // will happen in the second epoch. 269 errChan <- store.DB().Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error { 270 epoch++ 271 272 if epoch == 1 { 273 // Wait until the second get operation is issued. 274 close(waitTxnRestart) 275 <-waitSecondGet 276 } 277 278 // Get a key which we can write to from the Reader in order to force a restart. 279 if _, err := txn.Get(ctx, restartKey); err != nil { 280 return err 281 } 282 283 updatedVal := []byte("updatedVal") 284 if err := txn.CPut(ctx, key, updatedVal, strToValue("initVal")); err != nil { 285 log.Errorf(context.Background(), "failed put value: %+v", err) 286 return err 287 } 288 289 // Make sure a get will return the value that was just written. 290 actual, err := txn.Get(ctx, key) 291 if err != nil { 292 return err 293 } 294 if !bytes.Equal(actual.ValueBytes(), updatedVal) { 295 return errors.Errorf("unexpected get result: %s", actual) 296 } 297 298 if epoch == 0 { 299 // Wait until the first get operation will push the txn timestamp. 300 close(waitPut) 301 <-waitFirstGet 302 } 303 304 b := txn.NewBatch() 305 return txn.CommitInBatch(ctx, b) 306 }) 307 308 if epoch != 2 { 309 file, line, _ := caller.Lookup(0) 310 errChan <- errors.Errorf("%s:%d unexpected number of txn retries. "+ 311 "Expected epoch 2, got: %d.", file, line, epoch) 312 } else { 313 errChan <- nil 314 } 315 }() 316 317 <-waitPut 318 319 // Start the Reader. 320 321 // Advance the clock and send a get operation with higher 322 // priority to trigger the txn restart. 323 manual.Increment(100) 324 325 priority := roachpb.UserPriority(-math.MaxInt32) 326 requestHeader := roachpb.RequestHeader{ 327 Key: roachpb.Key(key), 328 } 329 h := roachpb.Header{ 330 Timestamp: cfg.Clock.Now(), 331 UserPriority: priority, 332 } 333 if _, err := kv.SendWrappedWith( 334 context.Background(), store.TestSender(), h, &roachpb.GetRequest{RequestHeader: requestHeader}, 335 ); err != nil { 336 t.Fatalf("failed to get: %+v", err) 337 } 338 // Write to the restart key so that the Writer's txn must restart. 339 putReq := &roachpb.PutRequest{ 340 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(restartKey)}, 341 Value: roachpb.MakeValueFromBytes([]byte("restart-value")), 342 } 343 if _, err := kv.SendWrappedWith(context.Background(), store.TestSender(), h, putReq); err != nil { 344 t.Fatalf("failed to put: %+v", err) 345 } 346 347 // Wait until the writer restarts the txn. 348 close(waitFirstGet) 349 <-waitTxnRestart 350 351 // Advance the clock and send a get operation again. This time 352 // we use TestingCommandFilter so that a get operation is not 353 // processed after the write intent is resolved (to prevent the 354 // timestamp cache from being updated). 355 manual.Increment(100) 356 357 h.Timestamp = cfg.Clock.Now() 358 if _, err := kv.SendWrappedWith( 359 context.Background(), store.TestSender(), h, &roachpb.GetRequest{RequestHeader: requestHeader}, 360 ); err == nil { 361 t.Fatal("unexpected success of get") 362 } 363 if _, err := kv.SendWrappedWith(context.Background(), store.TestSender(), h, putReq); err != nil { 364 t.Fatalf("failed to put: %+v", err) 365 } 366 367 close(waitSecondGet) 368 for i := 0; i < 2; i++ { 369 if err := <-errChan; err != nil { 370 t.Fatal(err) 371 } 372 } 373 } 374 375 // TestRangeLookupUseReverse tests whether the results and the results count 376 // are correct when scanning in reverse order. 377 func TestRangeLookupUseReverse(t *testing.T) { 378 defer leaktest.AfterTest(t)() 379 storeCfg := kvserver.TestStoreConfig(nil) 380 storeCfg.TestingKnobs.DisableSplitQueue = true 381 storeCfg.TestingKnobs.DisableMergeQueue = true 382 stopper := stop.NewStopper() 383 defer stopper.Stop(context.Background()) 384 store := createTestStoreWithOpts( 385 t, 386 testStoreOpts{ 387 // This test was written before the test stores were able to start with 388 // more than one range and is not prepared to handle many ranges. 389 dontCreateSystemRanges: true, 390 cfg: &storeCfg, 391 }, 392 stopper) 393 394 // Init test ranges: 395 // ["","a"), ["a","c"), ["c","e"), ["e","g") and ["g","\xff\xff"). 396 splits := []*roachpb.AdminSplitRequest{ 397 adminSplitArgs(roachpb.Key("g")), 398 adminSplitArgs(roachpb.Key("e")), 399 adminSplitArgs(roachpb.Key("c")), 400 adminSplitArgs(roachpb.Key("a")), 401 } 402 403 for _, split := range splits { 404 _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), split) 405 if pErr != nil { 406 t.Fatalf("%q: split unexpected error: %s", split.SplitKey, pErr) 407 } 408 } 409 410 // Resolve the intents. 411 scanArgs := roachpb.ScanRequest{ 412 RequestHeader: roachpb.RequestHeader{ 413 Key: keys.RangeMetaKey(roachpb.RKeyMin.Next()).AsRawKey(), 414 EndKey: keys.RangeMetaKey(roachpb.RKeyMax).AsRawKey(), 415 }, 416 } 417 testutils.SucceedsSoon(t, func() error { 418 _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), &scanArgs) 419 return pErr.GoError() 420 }) 421 422 testCases := []struct { 423 key roachpb.RKey 424 maxResults int64 425 expected []roachpb.RangeDescriptor 426 expectedPre []roachpb.RangeDescriptor 427 }{ 428 // Test key in the middle of the range. 429 { 430 key: roachpb.RKey("f"), 431 maxResults: 2, 432 // ["e","g") and ["c","e"). 433 expected: []roachpb.RangeDescriptor{ 434 {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, 435 }, 436 expectedPre: []roachpb.RangeDescriptor{ 437 {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, 438 }, 439 }, 440 // Test key in the end key of the range. 441 { 442 key: roachpb.RKey("g"), 443 maxResults: 3, 444 // ["e","g"), ["c","e") and ["a","c"). 445 expected: []roachpb.RangeDescriptor{ 446 {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, 447 }, 448 expectedPre: []roachpb.RangeDescriptor{ 449 {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, 450 {StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, 451 }, 452 }, 453 { 454 key: roachpb.RKey("e"), 455 maxResults: 2, 456 // ["c","e") and ["a","c"). 457 expected: []roachpb.RangeDescriptor{ 458 {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, 459 }, 460 expectedPre: []roachpb.RangeDescriptor{ 461 {StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, 462 }, 463 }, 464 // Test RKeyMax. 465 { 466 key: roachpb.RKeyMax, 467 maxResults: 2, 468 // ["e","g") and ["g","\xff\xff") 469 expected: []roachpb.RangeDescriptor{ 470 {StartKey: roachpb.RKey("g"), EndKey: roachpb.RKey("\xff\xff")}, 471 }, 472 expectedPre: []roachpb.RangeDescriptor{ 473 {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, 474 }, 475 }, 476 // Test Meta2KeyMax. 477 { 478 key: roachpb.RKey(keys.Meta2KeyMax), 479 maxResults: 1, 480 // ["","a") 481 expected: []roachpb.RangeDescriptor{ 482 {StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("a")}, 483 }, 484 }, 485 } 486 487 for _, test := range testCases { 488 t.Run(fmt.Sprintf("key=%s", test.key), func(t *testing.T) { 489 rs, preRs, err := kv.RangeLookup(context.Background(), store.TestSender(), 490 test.key.AsRawKey(), roachpb.READ_UNCOMMITTED, test.maxResults-1, true /* prefetchReverse */) 491 if err != nil { 492 t.Fatalf("LookupRange error: %+v", err) 493 } 494 495 // Checks the results count. 496 if rsLen, preRsLen := len(rs), len(preRs); int64(rsLen+preRsLen) != test.maxResults { 497 t.Fatalf("returned results count, expected %d, but got %d+%d", test.maxResults, rsLen, preRsLen) 498 } 499 // Checks the range descriptors. 500 for _, rngSlice := range []struct { 501 expect, reply []roachpb.RangeDescriptor 502 }{ 503 {test.expected, rs}, 504 {test.expectedPre, preRs}, 505 } { 506 for i, rng := range rngSlice.expect { 507 if !(rng.StartKey.Equal(rngSlice.reply[i].StartKey) && rng.EndKey.Equal(rngSlice.reply[i].EndKey)) { 508 t.Fatalf("returned range is not correct, expected %v, but got %v", rng, rngSlice.reply[i]) 509 } 510 } 511 } 512 }) 513 } 514 } 515 516 type leaseTransferTest struct { 517 mtc *multiTestContext 518 // replicas of range covering key "a" on the first and the second stores. 519 replica0, replica1 *kvserver.Replica 520 replica0Desc, replica1Desc roachpb.ReplicaDescriptor 521 leftKey roachpb.Key 522 filterMu syncutil.Mutex 523 filter func(filterArgs kvserverbase.FilterArgs) *roachpb.Error 524 waitForTransferBlocked atomic.Value 525 transferBlocked chan struct{} 526 } 527 528 func setupLeaseTransferTest(t *testing.T) *leaseTransferTest { 529 l := &leaseTransferTest{ 530 leftKey: roachpb.Key("a"), 531 } 532 533 cfg := kvserver.TestStoreConfig(nil) 534 cfg.Clock = nil // manual clock 535 // Ensure the node liveness duration isn't too short. By default it is 900ms 536 // for TestStoreConfig(). 537 cfg.RangeLeaseRaftElectionTimeoutMultiplier = 538 float64((9 * time.Second) / cfg.RaftElectionTimeout()) 539 cfg.TestingKnobs.EvalKnobs.TestingEvalFilter = 540 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 541 l.filterMu.Lock() 542 filterCopy := l.filter 543 l.filterMu.Unlock() 544 if filterCopy != nil { 545 return filterCopy(filterArgs) 546 } 547 return nil 548 } 549 550 l.waitForTransferBlocked.Store(false) 551 l.transferBlocked = make(chan struct{}) 552 cfg.TestingKnobs.LeaseTransferBlockedOnExtensionEvent = func( 553 _ roachpb.ReplicaDescriptor) { 554 if l.waitForTransferBlocked.Load().(bool) { 555 l.transferBlocked <- struct{}{} 556 l.waitForTransferBlocked.Store(false) 557 } 558 } 559 560 l.mtc = &multiTestContext{} 561 // This test was written before the multiTestContext started creating many 562 // system ranges at startup, and hasn't been update to take that into account. 563 l.mtc.startWithSingleRange = true 564 l.mtc.storeConfig = &cfg 565 l.mtc.Start(t, 2) 566 l.mtc.initGossipNetwork() 567 568 // First, do a write; we'll use it to determine when the dust has settled. 569 l.leftKey = roachpb.Key("a") 570 incArgs := incrementArgs(l.leftKey, 1) 571 if _, pErr := kv.SendWrapped(context.Background(), l.mtc.distSenders[0], incArgs); pErr != nil { 572 t.Fatal(pErr) 573 } 574 575 // Get the left range's ID. 576 rangeID := l.mtc.stores[0].LookupReplica(keys.MustAddr(l.leftKey)).RangeID 577 578 // Replicate the left range onto node 1. 579 l.mtc.replicateRange(rangeID, 1) 580 581 l.replica0 = l.mtc.stores[0].LookupReplica(roachpb.RKey("a")) 582 l.replica1 = l.mtc.stores[1].LookupReplica(roachpb.RKey("a")) 583 { 584 var err error 585 if l.replica0Desc, err = l.replica0.GetReplicaDescriptor(); err != nil { 586 t.Fatal(err) 587 } 588 if l.replica1Desc, err = l.replica1.GetReplicaDescriptor(); err != nil { 589 t.Fatal(err) 590 } 591 } 592 593 // Check that replica0 can serve reads OK. 594 if pErr := l.sendRead(0); pErr != nil { 595 t.Fatal(pErr) 596 } 597 return l 598 } 599 600 func (l *leaseTransferTest) sendRead(storeIdx int) *roachpb.Error { 601 desc := l.mtc.stores[storeIdx].LookupReplica(keys.MustAddr(l.leftKey)) 602 replicaDesc, err := desc.GetReplicaDescriptor() 603 if err != nil { 604 return roachpb.NewError(err) 605 } 606 _, pErr := kv.SendWrappedWith( 607 context.Background(), 608 l.mtc.senders[storeIdx], 609 roachpb.Header{RangeID: desc.RangeID, Replica: replicaDesc}, 610 getArgs(l.leftKey), 611 ) 612 if pErr != nil { 613 log.Warningf(context.Background(), "%v", pErr) 614 } 615 return pErr 616 } 617 618 // checkHasLease checks that a lease for the left range is owned by a 619 // replica. The check is executed in a retry loop because the lease may not 620 // have been applied yet. 621 func (l *leaseTransferTest) checkHasLease(t *testing.T, storeIdx int) { 622 t.Helper() 623 testutils.SucceedsSoon(t, func() error { 624 return l.sendRead(storeIdx).GoError() 625 }) 626 } 627 628 // setFilter is a helper function to enable/disable the blocking of 629 // RequestLeaseRequests on replica1. This function will notify that an 630 // extension is blocked on the passed in channel and will wait on the same 631 // channel to unblock the extension. Note that once an extension is blocked, 632 // the filter is cleared. 633 func (l *leaseTransferTest) setFilter(setTo bool, extensionSem chan struct{}) { 634 l.filterMu.Lock() 635 defer l.filterMu.Unlock() 636 if !setTo { 637 l.filter = nil 638 return 639 } 640 l.filter = func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 641 if filterArgs.Sid != l.mtc.stores[1].Ident.StoreID { 642 return nil 643 } 644 llReq, ok := filterArgs.Req.(*roachpb.RequestLeaseRequest) 645 if !ok { 646 return nil 647 } 648 if llReq.Lease.Replica == l.replica1Desc { 649 // Notify the main thread that the extension is in progress and wait for 650 // the signal to proceed. 651 l.filterMu.Lock() 652 l.filter = nil 653 l.filterMu.Unlock() 654 extensionSem <- struct{}{} 655 log.Infof(filterArgs.Ctx, "filter blocking request: %s", llReq) 656 <-extensionSem 657 log.Infof(filterArgs.Ctx, "filter unblocking lease request") 658 } 659 return nil 660 } 661 } 662 663 // forceLeaseExtension moves the clock forward close to the lease's expiration, 664 // and then performs a read on the range, which will force the lease to be 665 // renewed. This assumes the lease is not epoch-based. 666 func (l *leaseTransferTest) forceLeaseExtension(storeIdx int, lease roachpb.Lease) error { 667 // Set the clock close to the lease's expiration. 668 l.mtc.manualClock.Set(lease.Expiration.WallTime - 10) 669 err := l.sendRead(storeIdx).GoError() 670 // We can sometimes receive an error from our renewal attempt because the 671 // lease transfer ends up causing the renewal to re-propose and second 672 // attempt fails because it's already been renewed. This used to work 673 // before we compared the proposer's lease with the actual lease because 674 // the renewed lease still encompassed the previous request. 675 if errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil)) { 676 err = nil 677 } 678 return err 679 } 680 681 // ensureLeaderAndRaftState is a helper function that blocks until leader is 682 // the raft leader and follower is up to date. 683 func (l *leaseTransferTest) ensureLeaderAndRaftState( 684 t *testing.T, leader *kvserver.Replica, follower roachpb.ReplicaDescriptor, 685 ) { 686 t.Helper() 687 leaderDesc, err := leader.GetReplicaDescriptor() 688 if err != nil { 689 t.Fatal(err) 690 } 691 testutils.SucceedsSoon(t, func() error { 692 r := l.mtc.getRaftLeader(l.replica0.RangeID) 693 if r == nil { 694 return errors.Errorf("could not find raft leader replica for range %d", l.replica0.RangeID) 695 } 696 desc, err := r.GetReplicaDescriptor() 697 if err != nil { 698 return errors.Wrap(err, "could not get replica descriptor") 699 } 700 if desc != leaderDesc { 701 return errors.Errorf( 702 "expected replica with id %v to be raft leader, instead got id %v", 703 leaderDesc.ReplicaID, 704 desc.ReplicaID, 705 ) 706 } 707 return nil 708 }) 709 710 testutils.SucceedsSoon(t, func() error { 711 status := leader.RaftStatus() 712 progress, ok := status.Progress[uint64(follower.ReplicaID)] 713 if !ok { 714 return errors.Errorf( 715 "replica %v progress not found in progress map: %v", 716 follower.ReplicaID, 717 status.Progress, 718 ) 719 } 720 if progress.Match < status.Commit { 721 return errors.Errorf("replica %v failed to catch up", follower.ReplicaID) 722 } 723 return nil 724 }) 725 } 726 727 func TestLeaseExpirationBasedRangeTransfer(t *testing.T) { 728 defer leaktest.AfterTest(t)() 729 730 l := setupLeaseTransferTest(t) 731 defer l.mtc.Stop() 732 origLease, _ := l.replica0.GetLease() 733 { 734 // Transferring the lease to ourself should be a no-op. 735 if err := l.replica0.AdminTransferLease(context.Background(), l.replica0Desc.StoreID); err != nil { 736 t.Fatal(err) 737 } 738 newLease, _ := l.replica0.GetLease() 739 if !origLease.Equivalent(newLease) { 740 t.Fatalf("original lease %v and new lease %v not equivalent", origLease, newLease) 741 } 742 } 743 744 { 745 // An invalid target should result in an error. 746 const expected = "unable to find store .* in range" 747 if err := l.replica0.AdminTransferLease(context.Background(), 1000); !testutils.IsError(err, expected) { 748 t.Fatalf("expected %s, but found %v", expected, err) 749 } 750 } 751 752 if err := l.replica0.AdminTransferLease(context.Background(), l.replica1Desc.StoreID); err != nil { 753 t.Fatal(err) 754 } 755 756 // Check that replica0 doesn't serve reads any more. 757 pErr := l.sendRead(0) 758 nlhe, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError) 759 if !ok { 760 t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, pErr) 761 } 762 if !nlhe.LeaseHolder.Equal(&l.replica1Desc) { 763 t.Fatalf("expected lease holder %+v, got %+v", 764 l.replica1Desc, nlhe.LeaseHolder) 765 } 766 767 // Check that replica1 now has the lease. 768 l.checkHasLease(t, 1) 769 770 replica1Lease, _ := l.replica1.GetLease() 771 772 // We'd like to verify the timestamp cache's low water mark, but this is 773 // impossible to determine precisely in all cases because it may have 774 // been subsumed by future tscache accesses. So instead of checking the 775 // low water mark, we make sure that the high water mark is equal to or 776 // greater than the new lease start time, which is less than the 777 // previous lease's expiration time. 778 if highWater := l.replica1.GetTSCacheHighWater(); highWater.Less(replica1Lease.Start) { 779 t.Fatalf("expected timestamp cache high water %s, but found %s", 780 replica1Lease.Start, highWater) 781 } 782 783 } 784 785 // TestLeaseExpirationBasedRangeTransferWithExtension make replica1 786 // extend its lease and transfer the lease immediately after 787 // that. Test that the transfer still happens (it'll wait until the 788 // extension is done). 789 func TestLeaseExpirationBasedRangeTransferWithExtension(t *testing.T) { 790 defer leaktest.AfterTest(t)() 791 792 l := setupLeaseTransferTest(t) 793 defer l.mtc.Stop() 794 // Ensure that replica1 has the lease. 795 if err := l.replica0.AdminTransferLease(context.Background(), l.replica1Desc.StoreID); err != nil { 796 t.Fatal(err) 797 } 798 l.checkHasLease(t, 1) 799 800 extensionSem := make(chan struct{}) 801 l.setFilter(true, extensionSem) 802 803 // Initiate an extension. 804 renewalErrCh := make(chan error) 805 go func() { 806 lease, _ := l.replica1.GetLease() 807 renewalErrCh <- l.forceLeaseExtension(1, lease) 808 }() 809 810 // Wait for extension to be blocked. 811 <-extensionSem 812 l.waitForTransferBlocked.Store(true) 813 // Initiate a transfer. 814 transferErrCh := make(chan error) 815 go func() { 816 // Transfer back from replica1 to replica0. 817 err := l.replica1.AdminTransferLease(context.Background(), l.replica0Desc.StoreID) 818 // Ignore not leaseholder errors which can arise due to re-proposals. 819 if errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil)) { 820 err = nil 821 } 822 transferErrCh <- err 823 }() 824 // Wait for the transfer to be blocked by the extension. 825 <-l.transferBlocked 826 // Now unblock the extension. 827 extensionSem <- struct{}{} 828 l.checkHasLease(t, 0) 829 l.setFilter(false, nil) 830 831 if err := <-renewalErrCh; err != nil { 832 t.Errorf("unexpected error from lease renewal: %+v", err) 833 } 834 if err := <-transferErrCh; err != nil { 835 t.Errorf("unexpected error from lease transfer: %+v", err) 836 } 837 } 838 839 // TestLeaseExpirationBasedDrainTransfer verifies that a draining store attempts to transfer away 840 // range leases owned by its replicas. 841 func TestLeaseExpirationBasedDrainTransfer(t *testing.T) { 842 defer leaktest.AfterTest(t)() 843 844 l := setupLeaseTransferTest(t) 845 defer l.mtc.Stop() 846 // We have to ensure that replica0 is the raft leader and that replica1 has 847 // caught up to replica0 as draining code doesn't transfer leases to 848 // behind replicas. 849 l.ensureLeaderAndRaftState(t, l.replica0, l.replica1Desc) 850 l.mtc.stores[0].SetDraining(true, nil /* reporter */) 851 852 // Check that replica0 doesn't serve reads any more. 853 pErr := l.sendRead(0) 854 nlhe, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError) 855 if !ok { 856 t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, pErr) 857 } 858 if nlhe.LeaseHolder == nil || !nlhe.LeaseHolder.Equal(&l.replica1Desc) { 859 t.Fatalf("expected lease holder %+v, got %+v", 860 l.replica1Desc, nlhe.LeaseHolder) 861 } 862 863 // Check that replica1 now has the lease. 864 l.checkHasLease(t, 1) 865 866 l.mtc.stores[0].SetDraining(false, nil /* reporter */) 867 } 868 869 // TestLeaseExpirationBasedDrainTransferWithExtension verifies that 870 // a draining store waits for any in-progress lease requests to 871 // complete before transferring away the new lease. 872 func TestLeaseExpirationBasedDrainTransferWithExtension(t *testing.T) { 873 defer leaktest.AfterTest(t)() 874 875 l := setupLeaseTransferTest(t) 876 defer l.mtc.Stop() 877 // Ensure that replica1 has the lease. 878 if err := l.replica0.AdminTransferLease(context.Background(), l.replica1Desc.StoreID); err != nil { 879 t.Fatal(err) 880 } 881 l.checkHasLease(t, 1) 882 883 extensionSem := make(chan struct{}) 884 l.setFilter(true, extensionSem) 885 886 // Initiate an extension. 887 renewalErrCh := make(chan error) 888 go func() { 889 lease, _ := l.replica1.GetLease() 890 renewalErrCh <- l.forceLeaseExtension(1, lease) 891 }() 892 893 // Wait for extension to be blocked. 894 <-extensionSem 895 896 // Make sure that replica 0 is up to date enough to receive the lease. 897 l.ensureLeaderAndRaftState(t, l.replica1, l.replica0Desc) 898 899 // Drain node 1 with an extension in progress. 900 go func() { 901 l.mtc.stores[1].SetDraining(true, nil /* reporter */) 902 }() 903 // Now unblock the extension. 904 extensionSem <- struct{}{} 905 906 l.checkHasLease(t, 0) 907 l.setFilter(false, nil) 908 909 if err := <-renewalErrCh; err != nil { 910 t.Errorf("unexpected error from lease renewal: %+v", err) 911 } 912 } 913 914 // TestRangeLimitTxnMaxTimestamp verifies that on lease transfer, the 915 // normal limiting of a txn's max timestamp to the first observed 916 // timestamp on a node is extended to include the lease start 917 // timestamp. This disallows the possibility that a write to another 918 // replica of the range (on node n1) happened at a later timestamp 919 // than the originally observed timestamp for the node which now owns 920 // the lease (n2). This can happen if the replication of the write 921 // doesn't make it from n1 to n2 before the transaction observes n2's 922 // clock time. 923 func TestRangeLimitTxnMaxTimestamp(t *testing.T) { 924 defer leaktest.AfterTest(t)() 925 cfg := kvserver.TestStoreConfig(nil) 926 cfg.RangeLeaseRaftElectionTimeoutMultiplier = 927 float64((9 * time.Second) / cfg.RaftElectionTimeout()) 928 cfg.Clock = nil // manual clock 929 mtc := &multiTestContext{} 930 mtc.storeConfig = &cfg 931 keyA := roachpb.Key("a") 932 // Create a new clock for node2 to allow drift between the two wall clocks. 933 manual1 := hlc.NewManualClock(100) // node1 clock is @t=100 934 clock1 := hlc.NewClock(manual1.UnixNano, 250*time.Nanosecond) 935 manual2 := hlc.NewManualClock(98) // node2 clock is @t=98 936 clock2 := hlc.NewClock(manual2.UnixNano, 250*time.Nanosecond) 937 mtc.clocks = []*hlc.Clock{clock1, clock2} 938 939 // Start a transaction using node2 as a gateway. 940 txn := roachpb.MakeTransaction("test", keyA, 1, clock2.Now(), 250 /* maxOffsetNs */) 941 // Simulate a read to another range on node2 by setting the observed timestamp. 942 txn.UpdateObservedTimestamp(2, clock2.Now()) 943 944 defer mtc.Stop() 945 mtc.Start(t, 2) 946 947 // Do a write on node1 to establish a key with its timestamp @t=100. 948 if _, pErr := kv.SendWrapped( 949 context.Background(), mtc.distSenders[0], putArgs(keyA, []byte("value")), 950 ); pErr != nil { 951 t.Fatal(pErr) 952 } 953 954 // Up-replicate the data in the range to node2. 955 replica1 := mtc.stores[0].LookupReplica(roachpb.RKey(keyA)) 956 mtc.replicateRange(replica1.RangeID, 1) 957 958 // Transfer the lease from node1 to node2. 959 replica2 := mtc.stores[1].LookupReplica(roachpb.RKey(keyA)) 960 replica2Desc, err := replica2.GetReplicaDescriptor() 961 if err != nil { 962 t.Fatal(err) 963 } 964 testutils.SucceedsSoon(t, func() error { 965 if err := replica1.AdminTransferLease(context.Background(), replica2Desc.StoreID); err != nil { 966 t.Fatal(err) 967 } 968 lease, _ := replica2.GetLease() 969 if lease.Replica.NodeID != replica2.NodeID() { 970 return errors.Errorf("expected lease transfer to node2: %s", lease) 971 } 972 return nil 973 }) 974 // Verify that after the lease transfer, node2's clock has advanced to at least 100. 975 if now1, now2 := clock1.Now(), clock2.Now(); now2.WallTime < now1.WallTime { 976 t.Fatalf("expected node2's clock walltime to be >= %d; got %d", now1.WallTime, now2.WallTime) 977 } 978 979 // Send a get request for keyA to node2, which is now the 980 // leaseholder. If the max timestamp were not being properly limited, 981 // we would end up incorrectly reading nothing for keyA. Instead we 982 // expect to see an uncertainty interval error. 983 h := roachpb.Header{Txn: &txn} 984 if _, pErr := kv.SendWrappedWith( 985 context.Background(), mtc.distSenders[0], h, getArgs(keyA), 986 ); !testutils.IsPError(pErr, "uncertainty") { 987 t.Fatalf("expected an uncertainty interval error; got %v", pErr) 988 } 989 } 990 991 // TestLeaseMetricsOnSplitAndTransfer verifies that lease-related metrics 992 // are updated after splitting a range and then initiating one successful 993 // and one failing lease transfer. 994 func TestLeaseMetricsOnSplitAndTransfer(t *testing.T) { 995 defer leaktest.AfterTest(t)() 996 var injectLeaseTransferError atomic.Value 997 sc := kvserver.TestStoreConfig(nil) 998 sc.TestingKnobs.DisableSplitQueue = true 999 sc.TestingKnobs.DisableMergeQueue = true 1000 sc.TestingKnobs.EvalKnobs.TestingEvalFilter = 1001 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 1002 if args, ok := filterArgs.Req.(*roachpb.TransferLeaseRequest); ok { 1003 if val := injectLeaseTransferError.Load(); val != nil && val.(bool) { 1004 // Note that we can't just return an error here as we only 1005 // end up counting failures in the metrics if the command 1006 // makes it through to being executed. So use a fake store ID. 1007 args.Lease.Replica.StoreID = roachpb.StoreID(1000) 1008 } 1009 } 1010 return nil 1011 } 1012 sc.Clock = nil // manual clock 1013 mtc := &multiTestContext{ 1014 storeConfig: &sc, 1015 // This test was written before the multiTestContext started creating many 1016 // system ranges at startup, and hasn't been update to take that into 1017 // account. 1018 startWithSingleRange: true, 1019 } 1020 defer mtc.Stop() 1021 mtc.Start(t, 2) 1022 1023 // Up-replicate to two replicas. 1024 keyMinReplica0 := mtc.stores[0].LookupReplica(roachpb.RKeyMin) 1025 mtc.replicateRange(keyMinReplica0.RangeID, 1) 1026 1027 // Split the key space at key "a". 1028 splitKey := roachpb.RKey("a") 1029 splitArgs := adminSplitArgs(splitKey.AsRawKey()) 1030 if _, pErr := kv.SendWrapped( 1031 context.Background(), mtc.stores[0].TestSender(), splitArgs, 1032 ); pErr != nil { 1033 t.Fatal(pErr) 1034 } 1035 1036 // Now, a successful transfer from LHS replica 0 to replica 1. 1037 injectLeaseTransferError.Store(false) 1038 if err := mtc.dbs[0].AdminTransferLease( 1039 context.Background(), keyMinReplica0.Desc().StartKey.AsRawKey(), mtc.stores[1].StoreID(), 1040 ); err != nil { 1041 t.Fatalf("unable to transfer lease to replica 1: %+v", err) 1042 } 1043 // Wait for all replicas to process. 1044 testutils.SucceedsSoon(t, func() error { 1045 for i := 0; i < 2; i++ { 1046 r := mtc.stores[i].LookupReplica(roachpb.RKeyMin) 1047 if l, _ := r.GetLease(); l.Replica.StoreID != mtc.stores[1].StoreID() { 1048 return errors.Errorf("expected lease to transfer to replica 2: got %s", l) 1049 } 1050 } 1051 return nil 1052 }) 1053 1054 // Next a failed transfer from RHS replica 0 to replica 1. 1055 injectLeaseTransferError.Store(true) 1056 keyAReplica0 := mtc.stores[0].LookupReplica(splitKey) 1057 if err := mtc.dbs[0].AdminTransferLease( 1058 context.Background(), keyAReplica0.Desc().StartKey.AsRawKey(), mtc.stores[1].StoreID(), 1059 ); err == nil { 1060 t.Fatal("expected an error transferring to an unknown store ID") 1061 } 1062 1063 metrics := mtc.stores[0].Metrics() 1064 if a, e := metrics.LeaseTransferSuccessCount.Count(), int64(1); a != e { 1065 t.Errorf("expected %d lease transfer successes; got %d", e, a) 1066 } 1067 if a, e := metrics.LeaseTransferErrorCount.Count(), int64(1); a != e { 1068 t.Errorf("expected %d lease transfer errors; got %d", e, a) 1069 } 1070 1071 // Expire current leases and put a key to RHS of split to request 1072 // an epoch-based lease. 1073 testutils.SucceedsSoon(t, func() error { 1074 mtc.advanceClock(context.Background()) 1075 if err := mtc.stores[0].DB().Put(context.Background(), "a", "foo"); err != nil { 1076 return err 1077 } 1078 1079 // Update replication gauges for all stores and verify we have 1 each of 1080 // expiration and epoch leases. 1081 var expirationLeases int64 1082 var epochLeases int64 1083 for i := range mtc.stores { 1084 if err := mtc.stores[i].ComputeMetrics(context.Background(), 0); err != nil { 1085 return err 1086 } 1087 metrics = mtc.stores[i].Metrics() 1088 expirationLeases += metrics.LeaseExpirationCount.Value() 1089 epochLeases += metrics.LeaseEpochCount.Value() 1090 } 1091 if a, e := expirationLeases, int64(1); a != e { 1092 return errors.Errorf("expected %d expiration lease count; got %d", e, a) 1093 } 1094 if a, e := epochLeases, int64(1); a != e { 1095 return errors.Errorf("expected %d epoch lease count; got %d", e, a) 1096 } 1097 return nil 1098 }) 1099 } 1100 1101 // Test that leases held before a restart are not used after the restart. 1102 // See replica.mu.minLeaseProposedTS for the reasons why this isn't allowed. 1103 func TestLeaseNotUsedAfterRestart(t *testing.T) { 1104 defer leaktest.AfterTest(t)() 1105 1106 ctx := context.Background() 1107 1108 sc := kvserver.TestStoreConfig(nil) 1109 sc.Clock = nil // manual clock 1110 var leaseAcquisitionTrap atomic.Value 1111 // Disable the split queue so that no ranges are split. This makes it easy 1112 // below to trap any lease request and infer that it refers to the range we're 1113 // interested in. 1114 sc.TestingKnobs.DisableSplitQueue = true 1115 sc.TestingKnobs.LeaseRequestEvent = func(ts hlc.Timestamp) { 1116 val := leaseAcquisitionTrap.Load() 1117 if val == nil { 1118 return 1119 } 1120 trapCallback := val.(func(ts hlc.Timestamp)) 1121 if trapCallback != nil { 1122 trapCallback(ts) 1123 } 1124 } 1125 mtc := &multiTestContext{storeConfig: &sc} 1126 defer mtc.Stop() 1127 mtc.Start(t, 1) 1128 1129 key := []byte("a") 1130 // Send a read, to acquire a lease. 1131 getArgs := getArgs(key) 1132 if _, err := kv.SendWrapped(ctx, mtc.stores[0].TestSender(), getArgs); err != nil { 1133 t.Fatal(err) 1134 } 1135 1136 preRestartLease, _ := mtc.stores[0].LookupReplica(key).GetLease() 1137 1138 mtc.manualClock.Increment(1e9) 1139 1140 // Restart the mtc. Before we do that, we're installing a callback used to 1141 // assert that a new lease has been requested. The callback is installed 1142 // before the restart, as the lease might be requested at any time and for 1143 // many reasons by background processes, even before we send the read below. 1144 leaseAcquisitionCh := make(chan error) 1145 var once sync.Once 1146 leaseAcquisitionTrap.Store(func(_ hlc.Timestamp) { 1147 once.Do(func() { 1148 close(leaseAcquisitionCh) 1149 }) 1150 }) 1151 1152 log.Info(ctx, "restarting") 1153 mtc.restart() 1154 1155 // Send another read and check that the pre-existing lease has not been used. 1156 // Concretely, we check that a new lease is requested. 1157 if _, err := kv.SendWrapped(ctx, mtc.stores[0].TestSender(), getArgs); err != nil { 1158 t.Fatal(err) 1159 } 1160 // Check that the Send above triggered a lease acquisition. 1161 select { 1162 case <-leaseAcquisitionCh: 1163 case <-time.After(time.Second): 1164 t.Fatalf("read did not acquire a new lease") 1165 } 1166 1167 postRestartLease, _ := mtc.stores[0].LookupReplica(key).GetLease() 1168 1169 // Verify that not only is a new lease requested, it also gets a new sequence 1170 // number. This makes sure that previously proposed commands actually fail at 1171 // apply time. 1172 if preRestartLease.Sequence == postRestartLease.Sequence { 1173 t.Fatalf("lease was not replaced:\nprev: %v\nnow: %v", preRestartLease, postRestartLease) 1174 } 1175 } 1176 1177 // Test that a lease extension (a RequestLeaseRequest that doesn't change the 1178 // lease holder) is not blocked by ongoing reads. The test relies on the fact 1179 // that RequestLeaseRequest does not declare to touch the whole key span of the 1180 // range, and thus don't conflict through the command queue with other reads. 1181 func TestLeaseExtensionNotBlockedByRead(t *testing.T) { 1182 defer leaktest.AfterTest(t)() 1183 readBlocked := make(chan struct{}) 1184 cmdFilter := func(fArgs kvserverbase.FilterArgs) *roachpb.Error { 1185 if fArgs.Hdr.UserPriority == 42 { 1186 // Signal that the read is blocked. 1187 readBlocked <- struct{}{} 1188 // Wait for read to be unblocked. 1189 <-readBlocked 1190 } 1191 return nil 1192 } 1193 srv, _, _ := serverutils.StartServer(t, 1194 base.TestServerArgs{ 1195 Knobs: base.TestingKnobs{ 1196 Store: &kvserver.StoreTestingKnobs{ 1197 EvalKnobs: kvserverbase.BatchEvalTestingKnobs{ 1198 TestingEvalFilter: cmdFilter, 1199 }, 1200 }, 1201 }, 1202 }) 1203 s := srv.(*server.TestServer) 1204 defer s.Stopper().Stop(context.Background()) 1205 1206 store, err := s.GetStores().(*kvserver.Stores).GetStore(s.GetFirstStoreID()) 1207 if err != nil { 1208 t.Fatal(err) 1209 } 1210 1211 // Start a read and wait for it to block. 1212 key := roachpb.Key("a") 1213 errChan := make(chan error) 1214 go func() { 1215 getReq := roachpb.GetRequest{ 1216 RequestHeader: roachpb.RequestHeader{ 1217 Key: key, 1218 }, 1219 } 1220 if _, pErr := kv.SendWrappedWith(context.Background(), s.DB().NonTransactionalSender(), 1221 roachpb.Header{UserPriority: 42}, 1222 &getReq); pErr != nil { 1223 errChan <- pErr.GoError() 1224 } 1225 }() 1226 1227 select { 1228 case err := <-errChan: 1229 t.Fatal(err) 1230 case <-readBlocked: 1231 // Send the lease request. 1232 rKey, err := keys.Addr(key) 1233 if err != nil { 1234 t.Fatal(err) 1235 } 1236 repl := store.LookupReplica(rKey) 1237 if repl == nil { 1238 t.Fatalf("replica for key %s not found", rKey) 1239 } 1240 replDesc, found := repl.Desc().GetReplicaDescriptor(store.StoreID()) 1241 if !found { 1242 t.Fatalf("replica descriptor for key %s not found", rKey) 1243 } 1244 1245 leaseReq := roachpb.RequestLeaseRequest{ 1246 RequestHeader: roachpb.RequestHeader{ 1247 Key: key, 1248 }, 1249 Lease: roachpb.Lease{ 1250 Start: s.Clock().Now(), 1251 Expiration: s.Clock().Now().Add(time.Second.Nanoseconds(), 0).Clone(), 1252 Replica: replDesc, 1253 }, 1254 } 1255 1256 for { 1257 curLease, _, err := s.GetRangeLease(context.Background(), key) 1258 if err != nil { 1259 t.Fatal(err) 1260 } 1261 leaseReq.PrevLease = curLease 1262 1263 _, pErr := kv.SendWrapped(context.Background(), s.DB().NonTransactionalSender(), &leaseReq) 1264 if _, ok := pErr.GetDetail().(*roachpb.AmbiguousResultError); ok { 1265 log.Infof(context.Background(), "retrying lease after %s", pErr) 1266 continue 1267 } 1268 if _, ok := pErr.GetDetail().(*roachpb.LeaseRejectedError); ok { 1269 // Lease rejected? Try again. The extension should work because 1270 // extending is idempotent (assuming the PrevLease matches). 1271 log.Infof(context.Background(), "retrying lease after %s", pErr) 1272 continue 1273 } 1274 if pErr != nil { 1275 t.Errorf("%T %s", pErr.GetDetail(), pErr) // NB: don't fatal or shutdown hangs 1276 } 1277 break 1278 } 1279 // Unblock the read. 1280 readBlocked <- struct{}{} 1281 } 1282 } 1283 1284 // LeaseInfo runs a LeaseInfoRequest using the specified server. 1285 func LeaseInfo( 1286 t *testing.T, 1287 db *kv.DB, 1288 rangeDesc roachpb.RangeDescriptor, 1289 readConsistency roachpb.ReadConsistencyType, 1290 ) roachpb.LeaseInfoResponse { 1291 leaseInfoReq := &roachpb.LeaseInfoRequest{ 1292 RequestHeader: roachpb.RequestHeader{ 1293 Key: rangeDesc.StartKey.AsRawKey(), 1294 }, 1295 } 1296 reply, pErr := kv.SendWrappedWith(context.Background(), db.NonTransactionalSender(), roachpb.Header{ 1297 ReadConsistency: readConsistency, 1298 }, leaseInfoReq) 1299 if pErr != nil { 1300 t.Fatal(pErr) 1301 } 1302 return *(reply.(*roachpb.LeaseInfoResponse)) 1303 } 1304 1305 func TestLeaseInfoRequest(t *testing.T) { 1306 defer leaktest.AfterTest(t)() 1307 tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{}) 1308 defer tc.Stopper().Stop(context.Background()) 1309 1310 kvDB0 := tc.Servers[0].DB() 1311 kvDB1 := tc.Servers[1].DB() 1312 1313 key := []byte("a") 1314 rangeDesc, err := tc.LookupRange(key) 1315 if err != nil { 1316 t.Fatal(err) 1317 } 1318 replicas := make([]roachpb.ReplicaDescriptor, 3) 1319 for i := 0; i < 3; i++ { 1320 var ok bool 1321 replicas[i], ok = rangeDesc.GetReplicaDescriptor(tc.Servers[i].GetFirstStoreID()) 1322 if !ok { 1323 t.Fatalf("expected to find replica in server %d", i) 1324 } 1325 } 1326 1327 // Transfer the lease to Servers[0] so we start in a known state. Otherwise, 1328 // there might be already a lease owned by a random node. 1329 err = tc.TransferRangeLease(rangeDesc, tc.Target(0)) 1330 if err != nil { 1331 t.Fatal(err) 1332 } 1333 1334 // Now test the LeaseInfo. We might need to loop until the node we query has 1335 // applied the lease. 1336 testutils.SucceedsSoon(t, func() error { 1337 leaseHolderReplica := LeaseInfo(t, kvDB0, rangeDesc, roachpb.INCONSISTENT).Lease.Replica 1338 if leaseHolderReplica != replicas[0] { 1339 return fmt.Errorf("lease holder should be replica %+v, but is: %+v", 1340 replicas[0], leaseHolderReplica) 1341 } 1342 return nil 1343 }) 1344 1345 // Transfer the lease to Server 1 and check that LeaseInfoRequest gets the 1346 // right answer. 1347 err = tc.TransferRangeLease(rangeDesc, tc.Target(1)) 1348 if err != nil { 1349 t.Fatal(err) 1350 } 1351 // An inconsistent LeaseInfoReqeust on the old lease holder should give us the 1352 // right answer immediately, since the old holder has definitely applied the 1353 // transfer before TransferRangeLease returned. 1354 leaseHolderReplica := LeaseInfo(t, kvDB0, rangeDesc, roachpb.INCONSISTENT).Lease.Replica 1355 if !leaseHolderReplica.Equal(replicas[1]) { 1356 t.Fatalf("lease holder should be replica %+v, but is: %+v", 1357 replicas[1], leaseHolderReplica) 1358 } 1359 1360 // A read on the new lease holder does not necessarily succeed immediately, 1361 // since it might take a while for it to apply the transfer. 1362 testutils.SucceedsSoon(t, func() error { 1363 // We can't reliably do a CONSISTENT read here, even though we're reading 1364 // from the supposed lease holder, because this node might initially be 1365 // unaware of the new lease and so the request might bounce around for a 1366 // while (see #8816). 1367 leaseHolderReplica = LeaseInfo(t, kvDB1, rangeDesc, roachpb.INCONSISTENT).Lease.Replica 1368 if !leaseHolderReplica.Equal(replicas[1]) { 1369 return errors.Errorf("lease holder should be replica %+v, but is: %+v", 1370 replicas[1], leaseHolderReplica) 1371 } 1372 return nil 1373 }) 1374 1375 // Transfer the lease to Server 2 and check that LeaseInfoRequest gets the 1376 // right answer. 1377 err = tc.TransferRangeLease(rangeDesc, tc.Target(2)) 1378 if err != nil { 1379 t.Fatal(err) 1380 } 1381 1382 // We're now going to ask servers[1] for the lease info. We don't use kvDB1; 1383 // instead we go directly to the store because otherwise the DistSender might 1384 // use an old, cached, version of the range descriptor that doesn't have the 1385 // local replica in it (and so the request would be routed away). 1386 // TODO(andrei): Add a batch option to not use the range cache. 1387 s, err := tc.Servers[1].Stores().GetStore(tc.Servers[1].GetFirstStoreID()) 1388 if err != nil { 1389 t.Fatal(err) 1390 } 1391 leaseInfoReq := &roachpb.LeaseInfoRequest{ 1392 RequestHeader: roachpb.RequestHeader{ 1393 Key: rangeDesc.StartKey.AsRawKey(), 1394 }, 1395 } 1396 reply, pErr := kv.SendWrappedWith( 1397 context.Background(), s, roachpb.Header{ 1398 RangeID: rangeDesc.RangeID, 1399 ReadConsistency: roachpb.INCONSISTENT, 1400 }, leaseInfoReq) 1401 if pErr != nil { 1402 t.Fatal(pErr) 1403 } 1404 resp := *(reply.(*roachpb.LeaseInfoResponse)) 1405 leaseHolderReplica = resp.Lease.Replica 1406 1407 if !leaseHolderReplica.Equal(replicas[2]) { 1408 t.Fatalf("lease holder should be replica %s, but is: %s", replicas[2], leaseHolderReplica) 1409 } 1410 1411 // TODO(andrei): test the side-effect of LeaseInfoRequest when there's no 1412 // active lease - the node getting the request is supposed to acquire the 1413 // lease. This requires a way to expire leases; the TestCluster probably needs 1414 // to use a mock clock. 1415 } 1416 1417 // Test that an error encountered by a read-only "NonKV" command is not 1418 // swallowed, and doesn't otherwise cause a panic. 1419 // We had a bug cause by the fact that errors for these commands aren't passed 1420 // through the epilogue returned by replica.beginCommands() and were getting 1421 // swallowed. 1422 func TestErrorHandlingForNonKVCommand(t *testing.T) { 1423 defer leaktest.AfterTest(t)() 1424 cmdFilter := func(fArgs kvserverbase.FilterArgs) *roachpb.Error { 1425 if fArgs.Hdr.UserPriority == 42 { 1426 return roachpb.NewErrorf("injected error") 1427 } 1428 return nil 1429 } 1430 srv, _, _ := serverutils.StartServer(t, 1431 base.TestServerArgs{ 1432 Knobs: base.TestingKnobs{ 1433 Store: &kvserver.StoreTestingKnobs{ 1434 EvalKnobs: kvserverbase.BatchEvalTestingKnobs{ 1435 TestingEvalFilter: cmdFilter, 1436 }, 1437 }, 1438 }, 1439 }) 1440 s := srv.(*server.TestServer) 1441 defer s.Stopper().Stop(context.Background()) 1442 1443 // Send the lease request. 1444 key := roachpb.Key("a") 1445 leaseReq := roachpb.LeaseInfoRequest{ 1446 RequestHeader: roachpb.RequestHeader{ 1447 Key: key, 1448 }, 1449 } 1450 _, pErr := kv.SendWrappedWith( 1451 context.Background(), 1452 s.DB().NonTransactionalSender(), 1453 roachpb.Header{UserPriority: 42}, 1454 &leaseReq, 1455 ) 1456 if !testutils.IsPError(pErr, "injected error") { 1457 t.Fatalf("expected error %q, got: %s", "injected error", pErr) 1458 } 1459 } 1460 1461 func TestRangeInfo(t *testing.T) { 1462 defer leaktest.AfterTest(t)() 1463 storeCfg := kvserver.TestStoreConfig(nil /* clock */) 1464 storeCfg.TestingKnobs.DisableMergeQueue = true 1465 storeCfg.Clock = nil // manual clock 1466 mtc := &multiTestContext{ 1467 storeConfig: &storeCfg, 1468 // This test was written before the multiTestContext started creating many 1469 // system ranges at startup, and hasn't been update to take that into 1470 // account. 1471 startWithSingleRange: true, 1472 } 1473 defer mtc.Stop() 1474 mtc.Start(t, 2) 1475 1476 // Up-replicate to two replicas. 1477 mtc.replicateRange(mtc.stores[0].LookupReplica(roachpb.RKeyMin).RangeID, 1) 1478 1479 // Split the key space at key "a". 1480 splitKey := roachpb.RKey("a") 1481 splitArgs := adminSplitArgs(splitKey.AsRawKey()) 1482 if _, pErr := kv.SendWrapped( 1483 context.Background(), mtc.stores[0].TestSender(), splitArgs, 1484 ); pErr != nil { 1485 t.Fatal(pErr) 1486 } 1487 1488 // Get the replicas for each side of the split. This is done within 1489 // a SucceedsSoon loop to ensure the split completes. 1490 var lhsReplica0, lhsReplica1, rhsReplica0, rhsReplica1 *kvserver.Replica 1491 testutils.SucceedsSoon(t, func() error { 1492 lhsReplica0 = mtc.stores[0].LookupReplica(roachpb.RKeyMin) 1493 lhsReplica1 = mtc.stores[1].LookupReplica(roachpb.RKeyMin) 1494 rhsReplica0 = mtc.stores[0].LookupReplica(splitKey) 1495 rhsReplica1 = mtc.stores[1].LookupReplica(splitKey) 1496 if lhsReplica0 == rhsReplica0 || lhsReplica1 == rhsReplica1 { 1497 return errors.Errorf("replicas not post-split %v, %v, %v, %v", 1498 lhsReplica0, rhsReplica0, rhsReplica0, rhsReplica1) 1499 } 1500 return nil 1501 }) 1502 lhsLease, _ := lhsReplica0.GetLease() 1503 rhsLease, _ := rhsReplica0.GetLease() 1504 1505 // Verify range info is not set if unrequested. 1506 getArgs := getArgs(splitKey.AsRawKey()) 1507 reply, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], getArgs) 1508 if pErr != nil { 1509 t.Fatal(pErr) 1510 } 1511 if len(reply.Header().RangeInfos) > 0 { 1512 t.Errorf("expected empty range infos if unrequested; got %v", reply.Header().RangeInfos) 1513 } 1514 1515 // Verify range info on a get request. 1516 h := roachpb.Header{ 1517 ReturnRangeInfo: true, 1518 } 1519 reply, pErr = kv.SendWrappedWith(context.Background(), mtc.distSenders[0], h, getArgs) 1520 if pErr != nil { 1521 t.Fatal(pErr) 1522 } 1523 expRangeInfos := []roachpb.RangeInfo{ 1524 { 1525 Desc: *rhsReplica0.Desc(), 1526 Lease: rhsLease, 1527 }, 1528 } 1529 if !reflect.DeepEqual(reply.Header().RangeInfos, expRangeInfos) { 1530 t.Errorf("on get reply, expected %+v; got %+v", expRangeInfos, reply.Header().RangeInfos) 1531 } 1532 1533 // Verify range info on a put request. 1534 putArgs := putArgs(splitKey.AsRawKey(), []byte("foo")) 1535 reply, pErr = kv.SendWrappedWith(context.Background(), mtc.distSenders[0], h, putArgs) 1536 if pErr != nil { 1537 t.Fatal(pErr) 1538 } 1539 if !reflect.DeepEqual(reply.Header().RangeInfos, expRangeInfos) { 1540 t.Errorf("on put reply, expected %+v; got %+v", expRangeInfos, reply.Header().RangeInfos) 1541 } 1542 1543 // Verify range info on an admin request. 1544 adminArgs := &roachpb.AdminTransferLeaseRequest{ 1545 RequestHeader: roachpb.RequestHeader{ 1546 Key: splitKey.AsRawKey(), 1547 }, 1548 Target: rhsLease.Replica.StoreID, 1549 } 1550 reply, pErr = kv.SendWrappedWith(context.Background(), mtc.distSenders[0], h, adminArgs) 1551 if pErr != nil { 1552 t.Fatal(pErr) 1553 } 1554 if !reflect.DeepEqual(reply.Header().RangeInfos, expRangeInfos) { 1555 t.Errorf("on admin reply, expected %+v; got %+v", expRangeInfos, reply.Header().RangeInfos) 1556 } 1557 1558 // Verify multiple range infos on a scan request. 1559 scanArgs := roachpb.ScanRequest{ 1560 RequestHeader: roachpb.RequestHeader{ 1561 Key: keys.SystemMax, 1562 EndKey: roachpb.KeyMax, 1563 }, 1564 } 1565 txn := roachpb.MakeTransaction("test", roachpb.KeyMin, 1, mtc.clock().Now(), 0) 1566 h.Txn = &txn 1567 reply, pErr = kv.SendWrappedWith(context.Background(), mtc.distSenders[0], h, &scanArgs) 1568 if pErr != nil { 1569 t.Fatal(pErr) 1570 } 1571 expRangeInfos = []roachpb.RangeInfo{ 1572 { 1573 Desc: *lhsReplica0.Desc(), 1574 Lease: lhsLease, 1575 }, 1576 { 1577 Desc: *rhsReplica0.Desc(), 1578 Lease: rhsLease, 1579 }, 1580 } 1581 if !reflect.DeepEqual(reply.Header().RangeInfos, expRangeInfos) { 1582 t.Errorf("on scan reply, expected %+v; got %+v", expRangeInfos, reply.Header().RangeInfos) 1583 } 1584 1585 // Verify multiple range infos and order on a reverse scan request. 1586 revScanArgs := roachpb.ReverseScanRequest{ 1587 RequestHeader: roachpb.RequestHeader{ 1588 Key: keys.SystemMax, 1589 EndKey: roachpb.KeyMax, 1590 }, 1591 } 1592 reply, pErr = kv.SendWrappedWith(context.Background(), mtc.distSenders[0], h, &revScanArgs) 1593 if pErr != nil { 1594 t.Fatal(pErr) 1595 } 1596 expRangeInfos = []roachpb.RangeInfo{ 1597 { 1598 Desc: *rhsReplica0.Desc(), 1599 Lease: rhsLease, 1600 }, 1601 { 1602 Desc: *lhsReplica0.Desc(), 1603 Lease: lhsLease, 1604 }, 1605 } 1606 if !reflect.DeepEqual(reply.Header().RangeInfos, expRangeInfos) { 1607 t.Errorf("on reverse scan reply, expected %+v; got %+v", expRangeInfos, reply.Header().RangeInfos) 1608 } 1609 1610 // Change lease holders for both ranges and re-scan. 1611 for _, r := range []*kvserver.Replica{lhsReplica1, rhsReplica1} { 1612 replDesc, err := r.GetReplicaDescriptor() 1613 if err != nil { 1614 t.Fatal(err) 1615 } 1616 if err = mtc.dbs[0].AdminTransferLease(context.Background(), 1617 r.Desc().StartKey.AsRawKey(), replDesc.StoreID); err != nil { 1618 t.Fatalf("unable to transfer lease to replica %s: %+v", r, err) 1619 } 1620 } 1621 reply, pErr = kv.SendWrappedWith(context.Background(), mtc.distSenders[0], h, &scanArgs) 1622 if pErr != nil { 1623 t.Fatal(pErr) 1624 } 1625 // Read the expected lease from replica0 rather than replica1 as it may serve 1626 // a follower read which will contain the new lease information before 1627 // replica1 has applied the lease transfer. 1628 lhsLease, _ = lhsReplica0.GetLease() 1629 rhsLease, _ = rhsReplica0.GetLease() 1630 expRangeInfos = []roachpb.RangeInfo{ 1631 { 1632 Desc: *lhsReplica1.Desc(), 1633 Lease: lhsLease, 1634 }, 1635 { 1636 Desc: *rhsReplica1.Desc(), 1637 Lease: rhsLease, 1638 }, 1639 } 1640 if !reflect.DeepEqual(reply.Header().RangeInfos, expRangeInfos) { 1641 t.Errorf("on scan reply, expected %+v; got %+v", expRangeInfos, reply.Header().RangeInfos) 1642 } 1643 } 1644 1645 // TestDrainRangeRejection verifies that an attempt to transfer a range to a 1646 // draining store fails. 1647 func TestDrainRangeRejection(t *testing.T) { 1648 defer leaktest.AfterTest(t)() 1649 mtc := &multiTestContext{} 1650 defer mtc.Stop() 1651 mtc.Start(t, 2) 1652 1653 repl, err := mtc.stores[0].GetReplica(1) 1654 if err != nil { 1655 t.Fatal(err) 1656 } 1657 1658 drainingIdx := 1 1659 mtc.stores[drainingIdx].SetDraining(true, nil /* reporter */) 1660 chgs := roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, 1661 roachpb.ReplicationTarget{ 1662 NodeID: mtc.idents[drainingIdx].NodeID, 1663 StoreID: mtc.idents[drainingIdx].StoreID, 1664 }) 1665 if _, err := repl.ChangeReplicas(context.Background(), repl.Desc(), kvserver.SnapshotRequest_REBALANCE, kvserverpb.ReasonRangeUnderReplicated, "", chgs); !testutils.IsError(err, "store is draining") { 1666 t.Fatalf("unexpected error: %+v", err) 1667 } 1668 } 1669 1670 func TestChangeReplicasGeneration(t *testing.T) { 1671 defer leaktest.AfterTest(t)() 1672 mtc := &multiTestContext{} 1673 defer mtc.Stop() 1674 mtc.Start(t, 2) 1675 1676 repl, err := mtc.stores[0].GetReplica(1) 1677 if err != nil { 1678 t.Fatal(err) 1679 } 1680 1681 oldGeneration := repl.Desc().Generation 1682 chgs := roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, roachpb.ReplicationTarget{ 1683 NodeID: mtc.idents[1].NodeID, 1684 StoreID: mtc.idents[1].StoreID, 1685 }) 1686 if _, err := repl.ChangeReplicas(context.Background(), repl.Desc(), kvserver.SnapshotRequest_REBALANCE, kvserverpb.ReasonRangeUnderReplicated, "", chgs); err != nil { 1687 t.Fatalf("unexpected error: %v", err) 1688 } 1689 assert.EqualValues(t, repl.Desc().Generation, oldGeneration+2) 1690 1691 oldGeneration = repl.Desc().Generation 1692 oldDesc := repl.Desc() 1693 chgs[0].ChangeType = roachpb.REMOVE_REPLICA 1694 newDesc, err := repl.ChangeReplicas(context.Background(), oldDesc, kvserver.SnapshotRequest_REBALANCE, kvserverpb.ReasonRangeOverReplicated, "", chgs) 1695 if err != nil { 1696 t.Fatalf("unexpected error: %v", err) 1697 } 1698 // Generation changes: 1699 // +1 for entering joint config due to demotion 1700 // +1 for transitioning out of joint config 1701 // +1 for removing learner 1702 assert.EqualValues(t, repl.Desc().Generation, oldGeneration+3, "\nold: %+v\nnew: %+v", oldDesc, newDesc) 1703 } 1704 1705 func TestSystemZoneConfigs(t *testing.T) { 1706 defer leaktest.AfterTest(t)() 1707 1708 // This test is relatively slow and resource intensive. When run under 1709 // stressrace on a loaded machine (as in the nightly tests), sometimes the 1710 // SucceedsSoon conditions below take longer than the allotted time (#25273). 1711 if testing.Short() || testutils.NightlyStress() || util.RaceEnabled { 1712 t.Skip() 1713 } 1714 1715 // This test relies on concurrently waiting for a value to change in the 1716 // underlying engine(s). Since the teeing engine does not respond well to 1717 // value mismatches, whether transient or permanent, skip this test if the 1718 // teeing engine is being used. See 1719 // https://github.com/cockroachdb/cockroach/issues/42656 for more context. 1720 if storage.DefaultStorageEngine == enginepb.EngineTypeTeePebbleRocksDB { 1721 t.Skip("disabled on teeing engine") 1722 } 1723 1724 ctx := context.Background() 1725 tc := testcluster.StartTestCluster(t, 7, base.TestClusterArgs{ 1726 ServerArgs: base.TestServerArgs{ 1727 Knobs: base.TestingKnobs{ 1728 Store: &kvserver.StoreTestingKnobs{ 1729 // Disable LBS because when the scan is happening at the rate it's happening 1730 // below, it's possible that one of the system ranges trigger a split. 1731 DisableLoadBasedSplitting: true, 1732 }, 1733 }, 1734 // Scan like a bat out of hell to ensure replication and replica GC 1735 // happen in a timely manner. 1736 ScanInterval: 50 * time.Millisecond, 1737 }, 1738 }) 1739 defer tc.Stopper().Stop(ctx) 1740 log.Info(ctx, "TestSystemZoneConfig: test cluster started") 1741 1742 expectedSystemRanges, err := tc.Servers[0].ExpectedInitialRangeCount() 1743 if err != nil { 1744 t.Fatal(err) 1745 } 1746 expectedUserRanges := 1 1747 expectedSystemRanges -= expectedUserRanges 1748 systemNumReplicas := int(*zonepb.DefaultSystemZoneConfig().NumReplicas) 1749 userNumReplicas := int(*zonepb.DefaultZoneConfig().NumReplicas) 1750 expectedReplicas := expectedSystemRanges*systemNumReplicas + expectedUserRanges*userNumReplicas 1751 log.Infof(ctx, "TestSystemZoneConfig: expecting %d system ranges and %d user ranges", 1752 expectedSystemRanges, expectedUserRanges) 1753 log.Infof(ctx, "TestSystemZoneConfig: expected (%dx%d) + (%dx%d) = %d replicas total", 1754 expectedSystemRanges, systemNumReplicas, expectedUserRanges, userNumReplicas, expectedReplicas) 1755 1756 waitForReplicas := func() error { 1757 replicas := make(map[roachpb.RangeID]roachpb.RangeDescriptor) 1758 for _, s := range tc.Servers { 1759 if err := kvserver.IterateRangeDescriptors(ctx, s.Engines()[0], func(desc roachpb.RangeDescriptor) (bool, error) { 1760 if len(desc.Replicas().Learners()) > 0 { 1761 return false, fmt.Errorf("descriptor contains learners: %v", desc) 1762 } 1763 if existing, ok := replicas[desc.RangeID]; ok && !existing.Equal(desc) { 1764 return false, fmt.Errorf("mismatch between\n%s\n%s", &existing, &desc) 1765 } 1766 replicas[desc.RangeID] = desc 1767 return false, nil 1768 }); err != nil { 1769 return err 1770 } 1771 } 1772 var totalReplicas int 1773 for _, desc := range replicas { 1774 totalReplicas += len(desc.Replicas().Voters()) 1775 } 1776 if totalReplicas != expectedReplicas { 1777 return fmt.Errorf("got %d voters, want %d; details: %+v", totalReplicas, expectedReplicas, replicas) 1778 } 1779 return nil 1780 } 1781 1782 // Wait until we're down to the expected number of replicas. This is 1783 // effectively waiting on replica GC to kick in to destroy any replicas that 1784 // got removed during rebalancing of the initial ranges, since the testcluster 1785 // waits until nothing is underreplicated but not until all rebalancing has 1786 // settled down. 1787 testutils.SucceedsSoon(t, waitForReplicas) 1788 log.Info(ctx, "TestSystemZoneConfig: initial replication succeeded") 1789 1790 // Update the meta zone config to have more replicas and expect the number 1791 // of replicas to go up accordingly after running all replicas through the 1792 // replicate queue. 1793 sqlDB := sqlutils.MakeSQLRunner(tc.ServerConn(0)) 1794 sqlutils.SetZoneConfig(t, sqlDB, "RANGE meta", "num_replicas: 7") 1795 expectedReplicas += 2 1796 testutils.SucceedsSoon(t, waitForReplicas) 1797 log.Info(ctx, "TestSystemZoneConfig: up-replication of meta ranges succeeded") 1798 1799 // Do the same thing, but down-replicating the timeseries range. 1800 sqlutils.SetZoneConfig(t, sqlDB, "RANGE timeseries", "num_replicas: 1") 1801 expectedReplicas -= 2 1802 testutils.SucceedsSoon(t, waitForReplicas) 1803 log.Info(ctx, "TestSystemZoneConfig: down-replication of timeseries ranges succeeded") 1804 1805 // Up-replicate the system.jobs table to demonstrate that it is configured 1806 // independently from the system database. 1807 sqlutils.SetZoneConfig(t, sqlDB, "TABLE system.jobs", "num_replicas: 7") 1808 expectedReplicas += 2 1809 testutils.SucceedsSoon(t, waitForReplicas) 1810 log.Info(ctx, "TestSystemZoneConfig: up-replication of jobs table succeeded") 1811 1812 // Finally, verify the system ranges. Note that in a new cluster there are 1813 // two system ranges, which we have to take into account here. 1814 sqlutils.SetZoneConfig(t, sqlDB, "RANGE system", "num_replicas: 7") 1815 expectedReplicas += 4 1816 testutils.SucceedsSoon(t, waitForReplicas) 1817 log.Info(ctx, "TestSystemZoneConfig: up-replication of system ranges succeeded") 1818 } 1819 1820 func TestClearRange(t *testing.T) { 1821 defer leaktest.AfterTest(t)() 1822 1823 ctx := context.Background() 1824 stopper := stop.NewStopper() 1825 defer stopper.Stop(ctx) 1826 store := createTestStoreWithConfig(t, stopper, kvserver.TestStoreConfig(nil)) 1827 1828 clearRange := func(start, end roachpb.Key) { 1829 t.Helper() 1830 if _, err := kv.SendWrapped(ctx, store.DB().NonTransactionalSender(), &roachpb.ClearRangeRequest{ 1831 RequestHeader: roachpb.RequestHeader{ 1832 Key: start, 1833 EndKey: end, 1834 }, 1835 }); err != nil { 1836 t.Fatal(err) 1837 } 1838 } 1839 1840 verifyKeysWithPrefix := func(prefix roachpb.Key, expectedKeys []roachpb.Key) { 1841 t.Helper() 1842 start := prefix 1843 end := prefix.PrefixEnd() 1844 kvs, err := storage.Scan(store.Engine(), start, end, 0 /* maxRows */) 1845 if err != nil { 1846 t.Fatal(err) 1847 } 1848 var actualKeys []roachpb.Key 1849 for _, kv := range kvs { 1850 actualKeys = append(actualKeys, kv.Key.Key) 1851 } 1852 if !reflect.DeepEqual(expectedKeys, actualKeys) { 1853 t.Fatalf("expected %v, but got %v", expectedKeys, actualKeys) 1854 } 1855 } 1856 1857 rng, _ := randutil.NewPseudoRand() 1858 1859 // Write four keys with values small enough to use individual deletions 1860 // (sm1-sm4) and four keys with values large enough to require a range 1861 // deletion tombstone (lg1-lg4). 1862 sm, sm1, sm2, sm3 := roachpb.Key("sm"), roachpb.Key("sm1"), roachpb.Key("sm2"), roachpb.Key("sm3") 1863 lg, lg1, lg2, lg3 := roachpb.Key("lg"), roachpb.Key("lg1"), roachpb.Key("lg2"), roachpb.Key("lg3") 1864 for _, key := range []roachpb.Key{sm1, sm2, sm3} { 1865 if err := store.DB().Put(ctx, key, "sm-val"); err != nil { 1866 t.Fatal(err) 1867 } 1868 } 1869 for _, key := range []roachpb.Key{lg1, lg2, lg3} { 1870 if err := store.DB().Put( 1871 ctx, key, randutil.RandBytes(rng, batcheval.ClearRangeBytesThreshold), 1872 ); err != nil { 1873 t.Fatal(err) 1874 } 1875 } 1876 verifyKeysWithPrefix(sm, []roachpb.Key{sm1, sm2, sm3}) 1877 verifyKeysWithPrefix(lg, []roachpb.Key{lg1, lg2, lg3}) 1878 1879 // Verify that a ClearRange request from [sm1, sm3) removes sm1 and sm2. 1880 clearRange(sm1, sm3) 1881 verifyKeysWithPrefix(sm, []roachpb.Key{sm3}) 1882 1883 // Verify that a ClearRange request from [lg1, lg3) removes lg1 and lg2. 1884 clearRange(lg1, lg3) 1885 verifyKeysWithPrefix(lg, []roachpb.Key{lg3}) 1886 1887 // Verify that only the large ClearRange request used a range deletion 1888 // tombstone by checking for the presence of a suggested compaction. 1889 verifyKeysWithPrefix(keys.LocalStoreSuggestedCompactionsMin, 1890 []roachpb.Key{keys.StoreSuggestedCompactionKey(lg1, lg3)}) 1891 } 1892 1893 // TestLeaseTransferInSnapshotUpdatesTimestampCache prevents a regression of 1894 // #34025. A Replica is targeted for a lease transfer target when it needs a 1895 // Raft snapshot to catch up. Normally we try to prevent this case, but it is 1896 // possible and hard to prevent entirely. The Replica will only learn that it is 1897 // the new leaseholder when it applies the snapshot. When doing so, it should 1898 // make sure to apply the lease-related side-effects to its in-memory state. 1899 func TestLeaseTransferInSnapshotUpdatesTimestampCache(t *testing.T) { 1900 defer leaktest.AfterTest(t)() 1901 1902 ctx := context.Background() 1903 sc := kvserver.TestStoreConfig(nil) 1904 // We'll control replication by hand. 1905 sc.TestingKnobs.DisableReplicateQueue = true 1906 // Avoid fighting with the merge queue while trying to reproduce this race. 1907 sc.TestingKnobs.DisableMergeQueue = true 1908 mtc := &multiTestContext{storeConfig: &sc} 1909 defer mtc.Stop() 1910 mtc.Start(t, 3) 1911 store2 := mtc.Store(2) 1912 1913 keyA := roachpb.Key("a") 1914 keyB := roachpb.Key("b") 1915 keyC := roachpb.Key("c") 1916 1917 // First, do a couple of writes; we'll use these to determine when 1918 // the dust has settled. 1919 incA := incrementArgs(keyA, 1) 1920 if _, pErr := kv.SendWrapped(ctx, mtc.stores[0].TestSender(), incA); pErr != nil { 1921 t.Fatal(pErr) 1922 } 1923 incC := incrementArgs(keyC, 2) 1924 if _, pErr := kv.SendWrapped(ctx, mtc.stores[0].TestSender(), incC); pErr != nil { 1925 t.Fatal(pErr) 1926 } 1927 1928 // Split the system range from the rest of the keyspace. 1929 splitArgs := adminSplitArgs(keys.SystemMax) 1930 if _, pErr := kv.SendWrapped(ctx, mtc.stores[0].TestSender(), splitArgs); pErr != nil { 1931 t.Fatal(pErr) 1932 } 1933 1934 // Get the range's ID. 1935 repl0 := mtc.stores[0].LookupReplica(roachpb.RKey(keyA)) 1936 rangeID := repl0.RangeID 1937 1938 // Replicate the range onto nodes 1 and 2. 1939 // Wait for all replicas to be caught up. 1940 mtc.replicateRange(rangeID, 1, 2) 1941 mtc.waitForValues(keyA, []int64{1, 1, 1}) 1942 mtc.waitForValues(keyC, []int64{2, 2, 2}) 1943 1944 // Create a transaction that will try to write "under" a served read. 1945 // The read will have been served by the original leaseholder (node 0) 1946 // and the write will be attempted on the new leaseholder (node 2). 1947 // It should not succeed because it should run into the timestamp cache. 1948 db := mtc.dbs[0] 1949 txnOld := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */) 1950 1951 // Perform a write with txnOld so that its timestamp gets set. 1952 if _, err := txnOld.Inc(ctx, keyB, 3); err != nil { 1953 t.Fatal(err) 1954 } 1955 1956 // Read keyC with txnOld, which is updated below. This prevents the 1957 // transaction from refreshing when it hits the serializable error. 1958 if _, err := txnOld.Get(ctx, keyC); err != nil { 1959 t.Fatal(err) 1960 } 1961 1962 // Ensure that the transaction sends its first hearbeat so that it creates 1963 // its transaction record and doesn't run into trouble with the low water 1964 // mark of the new leaseholder's timestamp cache. Amusingly, if the bug 1965 // we're regression testing against here still existed, we would not have 1966 // to do this. 1967 hb, hbH := heartbeatArgs(txnOld.TestingCloneTxn(), mtc.clock().Now()) 1968 if _, pErr := kv.SendWrappedWith(ctx, mtc.stores[0].TestSender(), hbH, hb); pErr != nil { 1969 t.Fatal(pErr) 1970 } 1971 1972 // Another client comes along at a higher timestamp and reads. We should 1973 // never be able to write under this time or we would be rewriting history. 1974 if _, err := db.Get(ctx, keyA); err != nil { 1975 t.Fatal(err) 1976 } 1977 1978 // Partition node 2 from the rest of its range. Once partitioned, perform 1979 // another write and truncate the Raft log on the two connected nodes. This 1980 // ensures that that when node 2 comes back up it will require a snapshot 1981 // from Raft. 1982 mtc.transport.Listen(store2.Ident.StoreID, &unreliableRaftHandler{ 1983 rangeID: rangeID, 1984 RaftMessageHandler: store2, 1985 }) 1986 1987 if _, pErr := kv.SendWrapped(ctx, mtc.stores[0].TestSender(), incC); pErr != nil { 1988 t.Fatal(pErr) 1989 } 1990 mtc.waitForValues(keyC, []int64{4, 4, 2}) 1991 1992 // Truncate the log at index+1 (log entries < N are removed, so this 1993 // includes the increment). This necessitates a snapshot when the 1994 // partitioned replica rejoins the rest of the range. 1995 index, err := repl0.GetLastIndex() 1996 if err != nil { 1997 t.Fatal(err) 1998 } 1999 truncArgs := truncateLogArgs(index+1, rangeID) 2000 truncArgs.Key = keyA 2001 if _, err := kv.SendWrapped(ctx, mtc.stores[0].TestSender(), truncArgs); err != nil { 2002 t.Fatal(err) 2003 } 2004 2005 // Finally, transfer the lease to node 2 while it is still unavailable and 2006 // behind. We try to avoid this case when picking new leaseholders in practice, 2007 // but we're never 100% successful. 2008 if err := repl0.AdminTransferLease(ctx, store2.Ident.StoreID); err != nil { 2009 t.Fatal(err) 2010 } 2011 2012 // Remove the partition. A snapshot to node 2 should follow. This snapshot 2013 // will inform node 2 that it is the new leaseholder for the range. Node 2 2014 // should act accordingly and update its internal state to reflect this. 2015 mtc.transport.Listen(store2.Ident.StoreID, store2) 2016 mtc.waitForValues(keyC, []int64{4, 4, 4}) 2017 2018 // Perform a write on the new leaseholder underneath the previously served 2019 // read. This write should hit the timestamp cache and flag the txn for a 2020 // restart when we try to commit it below. With the bug in #34025, the new 2021 // leaseholder who heard about the lease transfer from a snapshot had an 2022 // empty timestamp cache and would simply let us write under the previous 2023 // read. 2024 if _, err := txnOld.Inc(ctx, keyA, 4); err != nil { 2025 t.Fatal(err) 2026 } 2027 const exp = `TransactionRetryError: retry txn \(RETRY_SERIALIZABLE\)` 2028 if err := txnOld.Commit(ctx); !testutils.IsError(err, exp) { 2029 t.Fatalf("expected retry error, got: %v; did we write under a read?", err) 2030 } 2031 } 2032 2033 // TestConcurrentAdminChangeReplicasRequests ensures that when two attempts to 2034 // change replicas for a range race, only one will succeed. 2035 func TestConcurrentAdminChangeReplicasRequests(t *testing.T) { 2036 defer leaktest.AfterTest(t)() 2037 // With 5 nodes the test is set up to have 2 actors trying to change the 2038 // replication concurrently. The first one attempts to change the replication 2039 // from [1] to [1, 2, 3, 4] and the second one starts by assuming that the 2040 // first actor succeeded on its first request and expected [1, 2] and tries 2041 // to move the replication to [1, 2, 4, 5]. One of these actors should 2042 // succeed. 2043 const numNodes = 5 2044 tc := testcluster.StartTestCluster(t, numNodes, base.TestClusterArgs{ 2045 ReplicationMode: base.ReplicationManual, 2046 }) 2047 ctx := context.Background() 2048 defer tc.Stopper().Stop(ctx) 2049 key := roachpb.Key("a") 2050 db := tc.Servers[0].DB() 2051 rangeInfo, err := getRangeInfo(ctx, db, key) 2052 require.Nil(t, err) 2053 require.Len(t, rangeInfo.Desc.InternalReplicas, 1) 2054 targets1, targets2 := makeReplicationTargets(2, 3, 4), makeReplicationTargets(4, 5) 2055 expects1 := rangeInfo.Desc 2056 expects2 := rangeInfo.Desc 2057 expects2.InternalReplicas = append(expects2.InternalReplicas, roachpb.ReplicaDescriptor{ 2058 NodeID: 2, 2059 StoreID: 2, 2060 ReplicaID: expects2.NextReplicaID, 2061 }) 2062 expects2.NextReplicaID++ 2063 var err1, err2 error 2064 var res1, res2 *roachpb.RangeDescriptor 2065 var wg sync.WaitGroup 2066 wg.Add(2) 2067 go func() { 2068 res1, err1 = db.AdminChangeReplicas( 2069 ctx, key, expects1, roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, targets1...)) 2070 wg.Done() 2071 }() 2072 go func() { 2073 res2, err2 = db.AdminChangeReplicas( 2074 ctx, key, expects2, roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, targets2...)) 2075 wg.Done() 2076 }() 2077 wg.Wait() 2078 2079 infoAfter, err := getRangeInfo(ctx, db, key) 2080 require.Nil(t, err) 2081 2082 assert.Falsef(t, err1 == nil && err2 == nil, 2083 "expected one of racing AdminChangeReplicasRequests to fail but neither did") 2084 // It is possible that an error can occur due to a rejected snapshot from the 2085 // target range. We don't want to fail the test if we got one of those. 2086 isSnapshotErr := func(err error) bool { 2087 return testutils.IsError(err, "snapshot failed:") 2088 } 2089 atLeastOneIsSnapshotErr := isSnapshotErr(err1) || isSnapshotErr(err2) 2090 assert.Falsef(t, err1 != nil && err2 != nil && !atLeastOneIsSnapshotErr, 2091 "expected only one of racing AdminChangeReplicasRequests to fail but both "+ 2092 "had errors and neither were snapshot: %v %v", err1, err2) 2093 replicaNodeIDs := func(desc roachpb.RangeDescriptor) (ids []int) { 2094 for _, r := range desc.InternalReplicas { 2095 ids = append(ids, int(r.NodeID)) 2096 } 2097 return ids 2098 } 2099 if err1 == nil { 2100 assert.ElementsMatch(t, replicaNodeIDs(infoAfter.Desc), []int{1, 2, 3, 4}) 2101 assert.EqualValues(t, infoAfter.Desc, *res1) 2102 } else if err2 == nil { 2103 assert.ElementsMatch(t, replicaNodeIDs(infoAfter.Desc), []int{1, 2, 4, 5}) 2104 assert.EqualValues(t, infoAfter.Desc, *res2) 2105 } 2106 } 2107 2108 // TestRandomConcurrentAdminChangeReplicasRequests ensures that when multiple 2109 // AdminChangeReplicasRequests are issued concurrently, so long as requests 2110 // provide the the value of the RangeDescriptor they will not accidentally 2111 // perform replication changes. In particular this test runs a number of 2112 // concurrent actors which all use the same expectations of the RangeDescriptor 2113 // and verifies that at most one actor succeeds in making all of its changes. 2114 func TestRandomConcurrentAdminChangeReplicasRequests(t *testing.T) { 2115 defer leaktest.AfterTest(t)() 2116 const numNodes = 6 2117 tc := testcluster.StartTestCluster(t, numNodes, base.TestClusterArgs{ 2118 ReplicationMode: base.ReplicationManual, 2119 }) 2120 ctx := context.Background() 2121 defer tc.Stopper().Stop(ctx) 2122 const actors = 10 2123 errors := make([]error, actors) 2124 var wg sync.WaitGroup 2125 key := roachpb.Key("a") 2126 db := tc.Servers[0].DB() 2127 require.Nil(t, db.AdminRelocateRange(ctx, key, makeReplicationTargets(1, 2, 3))) 2128 // Random targets consisting of a random number of nodes from the set of nodes 2129 // in the cluster which currently do not have a replica. 2130 pickTargets := func() []roachpb.ReplicationTarget { 2131 availableIDs := make([]int, 0, numNodes-3) 2132 for id := 4; id <= numNodes; id++ { 2133 availableIDs = append(availableIDs, id) 2134 } 2135 rand.Shuffle(len(availableIDs), func(i, j int) { 2136 availableIDs[i], availableIDs[j] = availableIDs[j], availableIDs[i] 2137 }) 2138 n := rand.Intn(len(availableIDs)) + 1 2139 return makeReplicationTargets(availableIDs[:n]...) 2140 } 2141 // TODO(ajwerner): consider doing this read inside the addReplicas function 2142 // and then allowing multiple writes to overlap and validate that the state 2143 // corresponds to a valid history of events. 2144 rangeInfo, err := getRangeInfo(ctx, db, key) 2145 require.Nil(t, err) 2146 addReplicas := func() error { 2147 _, err := db.AdminChangeReplicas( 2148 ctx, key, rangeInfo.Desc, roachpb.MakeReplicationChanges( 2149 roachpb.ADD_REPLICA, pickTargets()...)) 2150 return err 2151 } 2152 wg.Add(actors) 2153 for i := 0; i < actors; i++ { 2154 go func(i int) { errors[i] = addReplicas(); wg.Done() }(i) 2155 } 2156 wg.Wait() 2157 var gotSuccess bool 2158 for _, err := range errors { 2159 if err != nil { 2160 const exp = "change replicas of .* failed: descriptor changed" + 2161 "|snapshot failed:" 2162 assert.True(t, testutils.IsError(err, exp), err) 2163 } else if gotSuccess { 2164 t.Error("expected only one success") 2165 } else { 2166 gotSuccess = true 2167 } 2168 } 2169 } 2170 2171 // TestReplicaTombstone ensures that tombstones are written when we expect 2172 // them to be. Tombstones are laid down when replicas are removed. 2173 // Replicas are removed for several reasons: 2174 // 2175 // (1) In response to a ChangeReplicasTrigger which removes it. 2176 // (2) In response to a ReplicaTooOldError from a sent raft message. 2177 // (3) Due to the replica GC queue detecting a replica is not in the range. 2178 // (3.1) When the replica detects the range has been merged away. 2179 // (4) Due to a raft message addressed to a newer replica ID. 2180 // (4.1) When the older replica is not initialized. 2181 // (5) Due to a merge. 2182 // (6) Due to snapshot which subsumes a range. 2183 // 2184 // This test creates all of these scenarios and ensures that tombstones are 2185 // written at sane values. 2186 func TestReplicaTombstone(t *testing.T) { 2187 defer leaktest.AfterTest(t)() 2188 2189 // This test relies on concurrently waiting for a value to change in the 2190 // underlying engine(s). Since the teeing engine does not respond well to 2191 // value mismatches, whether transient or permanent, skip this test if the 2192 // teeing engine is being used. See 2193 // https://github.com/cockroachdb/cockroach/issues/42656 for more context. 2194 if storage.DefaultStorageEngine == enginepb.EngineTypeTeePebbleRocksDB { 2195 t.Skip("disabled on teeing engine") 2196 } 2197 2198 t.Run("(1) ChangeReplicasTrigger", func(t *testing.T) { 2199 defer leaktest.AfterTest(t)() 2200 ctx := context.Background() 2201 tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ 2202 ServerArgs: base.TestServerArgs{ 2203 Knobs: base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{ 2204 DisableReplicaGCQueue: true, 2205 }}, 2206 }, 2207 ReplicationMode: base.ReplicationManual, 2208 }) 2209 defer tc.Stopper().Stop(ctx) 2210 2211 key := tc.ScratchRange(t) 2212 require.NoError(t, tc.WaitForSplitAndInitialization(key)) 2213 desc, err := tc.LookupRange(key) 2214 require.NoError(t, err) 2215 rangeID := desc.RangeID 2216 tc.AddReplicasOrFatal(t, key, tc.Target(1)) 2217 // Partition node 2 from receiving responses but not requests. 2218 // This will lead to it applying the ChangeReplicasTrigger which removes 2219 // it rather than receiving a ReplicaTooOldError. 2220 store, _ := getFirstStoreReplica(t, tc.Server(1), key) 2221 funcs := noopRaftHandlerFuncs() 2222 funcs.dropResp = func(*kvserver.RaftMessageResponse) bool { 2223 return true 2224 } 2225 tc.Servers[1].RaftTransport().Listen(store.StoreID(), &unreliableRaftHandler{ 2226 rangeID: desc.RangeID, 2227 RaftMessageHandler: store, 2228 unreliableRaftHandlerFuncs: funcs, 2229 }) 2230 tc.RemoveReplicasOrFatal(t, key, tc.Target(1)) 2231 tombstone := waitForTombstone(t, store.Engine(), rangeID) 2232 require.Equal(t, roachpb.ReplicaID(3), tombstone.NextReplicaID) 2233 }) 2234 t.Run("(2) ReplicaTooOldError", func(t *testing.T) { 2235 defer leaktest.AfterTest(t)() 2236 ctx := context.Background() 2237 tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ 2238 ServerArgs: base.TestServerArgs{ 2239 RaftConfig: base.RaftConfig{ 2240 // Make the tick interval short so we don't need to wait too long for 2241 // the partitioned node to time out but increase the lease timeout 2242 // so expiration-based leases still work. 2243 RaftTickInterval: time.Millisecond, 2244 RangeLeaseRaftElectionTimeoutMultiplier: 10000, 2245 }, 2246 Knobs: base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{ 2247 DisableReplicaGCQueue: true, 2248 }}, 2249 }, 2250 ReplicationMode: base.ReplicationManual, 2251 }) 2252 defer tc.Stopper().Stop(ctx) 2253 2254 key := tc.ScratchRange(t) 2255 require.NoError(t, tc.WaitForSplitAndInitialization(key)) 2256 desc, err := tc.LookupRange(key) 2257 require.NoError(t, err) 2258 rangeID := desc.RangeID 2259 tc.AddReplicasOrFatal(t, key, tc.Target(1), tc.Target(2)) 2260 require.NoError(t, 2261 tc.WaitForVoters(key, tc.Target(1), tc.Target(2))) 2262 store, repl := getFirstStoreReplica(t, tc.Server(2), key) 2263 // Partition the range such that it hears responses but does not hear 2264 // requests. It should destroy the local replica due to a 2265 // ReplicaTooOldError. 2266 sawTooOld := make(chan struct{}, 1) 2267 raftFuncs := noopRaftHandlerFuncs() 2268 raftFuncs.dropResp = func(resp *kvserver.RaftMessageResponse) bool { 2269 if pErr, ok := resp.Union.GetValue().(*roachpb.Error); ok { 2270 if _, isTooOld := pErr.GetDetail().(*roachpb.ReplicaTooOldError); isTooOld { 2271 select { 2272 case sawTooOld <- struct{}{}: 2273 default: 2274 } 2275 } 2276 } 2277 return false 2278 } 2279 raftFuncs.dropReq = func(req *kvserver.RaftMessageRequest) bool { 2280 return req.ToReplica.StoreID == store.StoreID() 2281 } 2282 tc.Servers[2].RaftTransport().Listen(store.StoreID(), &unreliableRaftHandler{ 2283 rangeID: desc.RangeID, 2284 RaftMessageHandler: store, 2285 unreliableRaftHandlerFuncs: raftFuncs, 2286 }) 2287 tc.RemoveReplicasOrFatal(t, key, tc.Target(2)) 2288 testutils.SucceedsSoon(t, func() error { 2289 repl.UnquiesceAndWakeLeader() 2290 if len(sawTooOld) == 0 { 2291 return errors.New("still haven't seen ReplicaTooOldError") 2292 } 2293 return nil 2294 }) 2295 // Wait until we're sure that the replica has seen ReplicaTooOld, 2296 // then go look for the tombstone. 2297 <-sawTooOld 2298 tombstone := waitForTombstone(t, store.Engine(), rangeID) 2299 require.Equal(t, roachpb.ReplicaID(4), tombstone.NextReplicaID) 2300 }) 2301 t.Run("(3) ReplicaGCQueue", func(t *testing.T) { 2302 defer leaktest.AfterTest(t)() 2303 2304 ctx := context.Background() 2305 tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ 2306 ServerArgs: base.TestServerArgs{ 2307 Knobs: base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{ 2308 DisableReplicaGCQueue: true, 2309 }}, 2310 }, 2311 ReplicationMode: base.ReplicationManual, 2312 }) 2313 defer tc.Stopper().Stop(ctx) 2314 2315 key := tc.ScratchRange(t) 2316 require.NoError(t, tc.WaitForSplitAndInitialization(key)) 2317 desc, err := tc.LookupRange(key) 2318 require.NoError(t, err) 2319 rangeID := desc.RangeID 2320 tc.AddReplicasOrFatal(t, key, tc.Target(1), tc.Target(2)) 2321 // Partition node 2 from receiving any raft messages. 2322 // It will never find out it has been removed. We'll remove it 2323 // with a manual replica GC. 2324 store, _ := getFirstStoreReplica(t, tc.Server(2), key) 2325 tc.Servers[2].RaftTransport().Listen(store.StoreID(), &unreliableRaftHandler{ 2326 rangeID: desc.RangeID, 2327 RaftMessageHandler: store, 2328 }) 2329 tc.RemoveReplicasOrFatal(t, key, tc.Target(2)) 2330 repl, err := store.GetReplica(desc.RangeID) 2331 require.NoError(t, err) 2332 require.NoError(t, store.ManualReplicaGC(repl)) 2333 tombstone := waitForTombstone(t, store.Engine(), rangeID) 2334 require.Equal(t, roachpb.ReplicaID(4), tombstone.NextReplicaID) 2335 }) 2336 // This case also detects the tombstone for nodes which processed the merge. 2337 t.Run("(3.1) (5) replica GC queue and merge", func(t *testing.T) { 2338 defer leaktest.AfterTest(t)() 2339 2340 ctx := context.Background() 2341 tc := testcluster.StartTestCluster(t, 4, base.TestClusterArgs{ 2342 ServerArgs: base.TestServerArgs{ 2343 Knobs: base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{ 2344 DisableReplicaGCQueue: true, 2345 }}, 2346 }, 2347 ReplicationMode: base.ReplicationManual, 2348 }) 2349 defer tc.Stopper().Stop(ctx) 2350 2351 key := tc.ScratchRange(t) 2352 require.NoError(t, tc.WaitForSplitAndInitialization(key)) 2353 tc.AddReplicasOrFatal(t, key, tc.Target(1)) 2354 keyA := append(key[:len(key):len(key)], 'a') 2355 _, desc, err := tc.SplitRange(keyA) 2356 require.NoError(t, err) 2357 require.NoError(t, tc.WaitForSplitAndInitialization(keyA)) 2358 tc.AddReplicasOrFatal(t, key, tc.Target(3)) 2359 tc.AddReplicasOrFatal(t, keyA, tc.Target(2)) 2360 rangeID := desc.RangeID 2361 // Partition node 2 from all raft communication. 2362 store, _ := getFirstStoreReplica(t, tc.Server(2), keyA) 2363 tc.Servers[2].RaftTransport().Listen(store.StoreID(), &unreliableRaftHandler{ 2364 rangeID: desc.RangeID, 2365 RaftMessageHandler: store, 2366 }) 2367 2368 // We'll move the range from server 2 to 3 and merge key and keyA. 2369 // Server 2 won't hear about any of that. 2370 tc.RemoveReplicasOrFatal(t, keyA, tc.Target(2)) 2371 tc.AddReplicasOrFatal(t, keyA, tc.Target(3)) 2372 require.NoError(t, tc.WaitForSplitAndInitialization(keyA)) 2373 require.NoError(t, tc.Server(0).DB().AdminMerge(ctx, key)) 2374 // Run replica GC on server 2. 2375 repl, err := store.GetReplica(desc.RangeID) 2376 require.NoError(t, err) 2377 require.NoError(t, store.ManualReplicaGC(repl)) 2378 // Verify the tombstone generated from replica GC of a merged range. 2379 tombstone := waitForTombstone(t, store.Engine(), rangeID) 2380 require.Equal(t, roachpb.ReplicaID(math.MaxInt32), tombstone.NextReplicaID) 2381 // Verify the tombstone generated from processing a merge trigger. 2382 store3, _ := getFirstStoreReplica(t, tc.Server(0), key) 2383 tombstone = waitForTombstone(t, store3.Engine(), rangeID) 2384 require.Equal(t, roachpb.ReplicaID(math.MaxInt32), tombstone.NextReplicaID) 2385 }) 2386 t.Run("(4) (4.1) raft messages to newer replicaID ", func(t *testing.T) { 2387 defer leaktest.AfterTest(t)() 2388 ctx := context.Background() 2389 tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ 2390 ServerArgs: base.TestServerArgs{ 2391 RaftConfig: base.RaftConfig{ 2392 // Make the tick interval short so we don't need to wait too long 2393 // for a heartbeat to be sent. Increase the election timeout so 2394 // expiration based leases still work. 2395 RaftTickInterval: time.Millisecond, 2396 RangeLeaseRaftElectionTimeoutMultiplier: 10000, 2397 }, 2398 Knobs: base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{ 2399 DisableReplicaGCQueue: true, 2400 }}, 2401 }, 2402 ReplicationMode: base.ReplicationManual, 2403 }) 2404 defer tc.Stopper().Stop(ctx) 2405 2406 key := tc.ScratchRange(t) 2407 desc, err := tc.LookupRange(key) 2408 require.NoError(t, err) 2409 rangeID := desc.RangeID 2410 tc.AddReplicasOrFatal(t, key, tc.Target(1), tc.Target(2)) 2411 require.NoError(t, tc.WaitForSplitAndInitialization(key)) 2412 store, repl := getFirstStoreReplica(t, tc.Server(2), key) 2413 // Set up a partition for everything but heartbeats on store 2. 2414 // Make ourselves a tool to block snapshots until we've heard a 2415 // heartbeat above a certain replica ID. 2416 var waiter struct { 2417 syncutil.Mutex 2418 sync.Cond 2419 minHeartbeatReplicaID roachpb.ReplicaID 2420 blockSnapshot bool 2421 } 2422 waiter.L = &waiter.Mutex 2423 waitForSnapshot := func() { 2424 waiter.Lock() 2425 defer waiter.Unlock() 2426 for waiter.blockSnapshot { 2427 waiter.Wait() 2428 } 2429 } 2430 recordHeartbeat := func(replicaID roachpb.ReplicaID) { 2431 waiter.Lock() 2432 defer waiter.Unlock() 2433 if waiter.blockSnapshot && replicaID >= waiter.minHeartbeatReplicaID { 2434 waiter.blockSnapshot = false 2435 waiter.Broadcast() 2436 } 2437 } 2438 setMinHeartbeat := func(replicaID roachpb.ReplicaID) { 2439 waiter.Lock() 2440 defer waiter.Unlock() 2441 waiter.minHeartbeatReplicaID = replicaID 2442 waiter.blockSnapshot = true 2443 } 2444 setMinHeartbeat(repl.ReplicaID() + 1) 2445 tc.Servers[2].RaftTransport().Listen(store.StoreID(), &unreliableRaftHandler{ 2446 rangeID: desc.RangeID, 2447 RaftMessageHandler: store, 2448 unreliableRaftHandlerFuncs: unreliableRaftHandlerFuncs{ 2449 dropResp: func(*kvserver.RaftMessageResponse) bool { 2450 return true 2451 }, 2452 dropReq: func(*kvserver.RaftMessageRequest) bool { 2453 return true 2454 }, 2455 dropHB: func(hb *kvserver.RaftHeartbeat) bool { 2456 recordHeartbeat(hb.ToReplicaID) 2457 return false 2458 }, 2459 snapErr: func(*kvserver.SnapshotRequest_Header) error { 2460 waitForSnapshot() 2461 return errors.New("boom") 2462 }, 2463 }, 2464 }) 2465 // Remove the current replica from the node, it will not hear about this. 2466 tc.RemoveReplicasOrFatal(t, key, tc.Target(2)) 2467 // Try to add it back as a learner. We'll wait until it's heard about 2468 // this as a heartbeat. This demonstrates case (4) where a raft message 2469 // to a newer replica ID (in this case a heartbeat) removes an initialized 2470 // Replica. 2471 _, err = tc.AddReplicas(key, tc.Target(2)) 2472 require.Regexp(t, "boom", err) 2473 tombstone := waitForTombstone(t, store.Engine(), rangeID) 2474 require.Equal(t, roachpb.ReplicaID(4), tombstone.NextReplicaID) 2475 // Try adding it again and again block the snapshot until a heartbeat 2476 // at a higher ID has been sent. This is case (4.1) where a raft message 2477 // removes an uninitialized Replica. 2478 // 2479 // Note that this case represents a potential memory leak. If we hear about 2480 // a Replica and then either never receive a snapshot or for whatever reason 2481 // fail to receive a snapshot and then we never hear from the range again we 2482 // may leak in-memory state about this replica. 2483 // 2484 // We could replica GC these replicas without too much extra work but they 2485 // also should be rare. Note this is not new with learner replicas. 2486 setMinHeartbeat(5) 2487 _, err = tc.AddReplicas(key, tc.Target(2)) 2488 require.Regexp(t, "boom", err) 2489 // We will start out reading the old tombstone so keep retrying. 2490 testutils.SucceedsSoon(t, func() error { 2491 tombstone = waitForTombstone(t, store.Engine(), rangeID) 2492 if tombstone.NextReplicaID != 5 { 2493 return errors.Errorf("read tombstone with NextReplicaID %d, want %d", 2494 tombstone.NextReplicaID, 5) 2495 } 2496 return nil 2497 }) 2498 }) 2499 t.Run("(6) subsumption via snapshot", func(t *testing.T) { 2500 defer leaktest.AfterTest(t)() 2501 2502 ctx := context.Background() 2503 var proposalFilter atomic.Value 2504 noopProposalFilter := func(kvserverbase.ProposalFilterArgs) *roachpb.Error { 2505 return nil 2506 } 2507 proposalFilter.Store(noopProposalFilter) 2508 tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ 2509 ServerArgs: base.TestServerArgs{ 2510 Knobs: base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{ 2511 DisableReplicaGCQueue: true, 2512 TestingProposalFilter: kvserverbase.ReplicaProposalFilter( 2513 func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 2514 return proposalFilter. 2515 Load().(func(kvserverbase.ProposalFilterArgs) *roachpb.Error)(args) 2516 }, 2517 ), 2518 }}, 2519 }, 2520 ReplicationMode: base.ReplicationManual, 2521 }) 2522 defer tc.Stopper().Stop(ctx) 2523 2524 key := tc.ScratchRange(t) 2525 require.NoError(t, tc.WaitForSplitAndInitialization(key)) 2526 tc.AddReplicasOrFatal(t, key, tc.Target(1), tc.Target(2)) 2527 keyA := append(key[:len(key):len(key)], 'a') 2528 lhsDesc, rhsDesc, err := tc.SplitRange(keyA) 2529 require.NoError(t, err) 2530 require.NoError(t, tc.WaitForSplitAndInitialization(key)) 2531 require.NoError(t, tc.WaitForSplitAndInitialization(keyA)) 2532 require.NoError(t, tc.WaitForVoters(key, tc.Target(1), tc.Target(2))) 2533 require.NoError(t, tc.WaitForVoters(keyA, tc.Target(1), tc.Target(2))) 2534 2535 // We're going to block the RHS and LHS of node 2 as soon as the merge 2536 // attempts to propose the command to commit the merge. This should prevent 2537 // the merge from being applied on node 2. Then we'll manually force a 2538 // snapshots to be sent to the LHS of store 2 after the merge commits. 2539 store, repl := getFirstStoreReplica(t, tc.Server(2), key) 2540 var partActive atomic.Value 2541 partActive.Store(false) 2542 raftFuncs := noopRaftHandlerFuncs() 2543 raftFuncs.dropReq = func(req *kvserver.RaftMessageRequest) bool { 2544 return partActive.Load().(bool) && req.Message.Type == raftpb.MsgApp 2545 } 2546 tc.Servers[2].RaftTransport().Listen(store.StoreID(), &unreliableRaftHandler{ 2547 rangeID: lhsDesc.RangeID, 2548 unreliableRaftHandlerFuncs: raftFuncs, 2549 RaftMessageHandler: &unreliableRaftHandler{ 2550 rangeID: rhsDesc.RangeID, 2551 RaftMessageHandler: store, 2552 unreliableRaftHandlerFuncs: raftFuncs, 2553 }, 2554 }) 2555 proposalFilter.Store(func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 2556 merge := args.Cmd.ReplicatedEvalResult.Merge 2557 if merge != nil && merge.LeftDesc.RangeID == lhsDesc.RangeID { 2558 partActive.Store(true) 2559 } 2560 return nil 2561 }) 2562 require.NoError(t, tc.Server(0).DB().AdminMerge(ctx, key)) 2563 var tombstone roachpb.RangeTombstone 2564 testutils.SucceedsSoon(t, func() (err error) { 2565 // One of the two other stores better be the raft leader eventually. 2566 // We keep trying to send snapshots until one takes. 2567 for i := range []int{0, 1} { 2568 s, r := getFirstStoreReplica(t, tc.Server(i), key) 2569 err = s.ManualRaftSnapshot(r, repl.ReplicaID()) 2570 if err == nil { 2571 break 2572 } 2573 } 2574 if err != nil { 2575 return err 2576 } 2577 tombstoneKey := keys.RangeTombstoneKey(rhsDesc.RangeID) 2578 ok, err := storage.MVCCGetProto( 2579 context.Background(), store.Engine(), tombstoneKey, hlc.Timestamp{}, &tombstone, storage.MVCCGetOptions{}, 2580 ) 2581 require.NoError(t, err) 2582 if !ok { 2583 return errors.New("no tombstone found") 2584 } 2585 return nil 2586 }) 2587 require.Equal(t, roachpb.ReplicaID(math.MaxInt32), tombstone.NextReplicaID) 2588 }) 2589 } 2590 2591 // TestAdminRelocateRangeSafety exercises a situation where calls to 2592 // AdminRelocateRange can race with calls to ChangeReplicas and verifies 2593 // that such races do not leave the range in an under-replicated state. 2594 func TestAdminRelocateRangeSafety(t *testing.T) { 2595 defer leaktest.AfterTest(t)() 2596 2597 // The test is going to verify that when a replica removal due to a 2598 // Replica.ChangeReplicas call coincides with the removal phase of an 2599 // AdminRelocateRangeRequest that one of the removals will fail. 2600 // In order to ensure that the AdminChangeReplicas command coincides with 2601 // the remove phase of the AdminRelocateReplicas the test injects a response 2602 // filter which, when useSeenAdd holds true, signals on seenAdd when it sees 2603 // an AdminChangeReplicasRequest which added a replica. 2604 const numNodes = 4 2605 var useSeenAdd atomic.Value 2606 useSeenAdd.Store(false) 2607 seenAdd := make(chan struct{}, 1) 2608 responseFilter := func(ctx context.Context, ba roachpb.BatchRequest, _ *roachpb.BatchResponse) *roachpb.Error { 2609 if ba.IsSingleRequest() { 2610 changeReplicas, ok := ba.Requests[0].GetInner().(*roachpb.AdminChangeReplicasRequest) 2611 if ok && changeReplicas.Changes()[0].ChangeType == roachpb.ADD_REPLICA && useSeenAdd.Load().(bool) { 2612 seenAdd <- struct{}{} 2613 } 2614 } 2615 return nil 2616 } 2617 tc := testcluster.StartTestCluster(t, numNodes, base.TestClusterArgs{ 2618 ReplicationMode: base.ReplicationManual, 2619 ServerArgs: base.TestServerArgs{ 2620 Knobs: base.TestingKnobs{ 2621 Store: &kvserver.StoreTestingKnobs{ 2622 TestingResponseFilter: responseFilter, 2623 }, 2624 }, 2625 }, 2626 }) 2627 ctx := context.Background() 2628 defer tc.Stopper().Stop(ctx) 2629 db := tc.Servers[rand.Intn(numNodes)].DB() 2630 2631 // The test assumes from the way that the range gets set up that the lease 2632 // holder is node 1 and from the above relocate call that the range in 2633 // question has replicas on nodes 1-3. Make the call to AdminRelocate range 2634 // to set up the replication and then verify the assumed state. 2635 2636 key := roachpb.Key("a") 2637 assert.Nil(t, db.AdminRelocateRange(ctx, key, makeReplicationTargets(1, 2, 3))) 2638 rangeInfo, err := getRangeInfo(ctx, db, key) 2639 assert.Nil(t, err) 2640 assert.Len(t, rangeInfo.Desc.InternalReplicas, 3) 2641 assert.Equal(t, rangeInfo.Lease.Replica.NodeID, roachpb.NodeID(1)) 2642 for id := roachpb.StoreID(1); id <= 3; id++ { 2643 _, hasReplica := rangeInfo.Desc.GetReplicaDescriptor(id) 2644 assert.Truef(t, hasReplica, "missing replica descriptor for store %d", id) 2645 } 2646 2647 // The test now proceeds to use AdminRelocateRange to move a replica from node 2648 // 3 to node 4. The call should first which will first add 4 and then 2649 // remove 3. Concurrently a separate goroutine will attempt to remove the 2650 // replica on node 2. The ResponseFilter passed in the TestingKnobs will 2651 // prevent the remove call from proceeding until after the Add of 4 has 2652 // completed. 2653 2654 // Code above verified r1 is the leaseholder, so use it to ChangeReplicas. 2655 r1, _, err := tc.Servers[0].Stores().GetReplicaForRangeID(rangeInfo.Desc.RangeID) 2656 assert.Nil(t, err) 2657 expDescAfterAdd := rangeInfo.Desc // for use with ChangeReplicas 2658 expDescAfterAdd.NextReplicaID++ 2659 expDescAfterAdd.InternalReplicas = append(expDescAfterAdd.InternalReplicas, roachpb.ReplicaDescriptor{ 2660 NodeID: 4, 2661 StoreID: 4, 2662 ReplicaID: 4, 2663 }) 2664 var relocateErr, changeErr error 2665 var changedDesc *roachpb.RangeDescriptor // only populated if changeErr == nil 2666 change := func() { 2667 <-seenAdd 2668 chgs := roachpb.MakeReplicationChanges(roachpb.REMOVE_REPLICA, makeReplicationTargets(2)...) 2669 changedDesc, changeErr = r1.ChangeReplicas(ctx, &expDescAfterAdd, kvserver.SnapshotRequest_REBALANCE, "replicate", "testing", chgs) 2670 } 2671 relocate := func() { 2672 relocateErr = db.AdminRelocateRange(ctx, key, makeReplicationTargets(1, 2, 4)) 2673 } 2674 useSeenAdd.Store(true) 2675 var wg sync.WaitGroup 2676 wg.Add(2) 2677 go func() { relocate(); wg.Done() }() 2678 go func() { change(); wg.Done() }() 2679 wg.Wait() 2680 rangeInfo, err = getRangeInfo(ctx, db, key) 2681 assert.Nil(t, err) 2682 assert.True(t, len(rangeInfo.Desc.InternalReplicas) >= 3) 2683 assert.Falsef(t, relocateErr == nil && changeErr == nil, 2684 "expected one of racing AdminRelocateReplicas and ChangeReplicas "+ 2685 "to fail but neither did") 2686 assert.Falsef(t, relocateErr != nil && changeErr != nil, 2687 "expected only one of racing AdminRelocateReplicas and ChangeReplicas "+ 2688 "to fail but both did") 2689 if changeErr == nil { 2690 assert.EqualValues(t, *changedDesc, rangeInfo.Desc) 2691 } 2692 } 2693 2694 // TestChangeReplicasLeaveAtomicRacesWithMerge exercises a hazardous case which 2695 // arises during concurrent AdminChangeReplicas requests. The code reads the 2696 // descriptor from range id local, checks to make sure that the read 2697 // descriptor matches the expectation and then uses the bytes of the read read 2698 // bytes in a CPut with the update. The code contains an optimization to 2699 // transition out of joint consensus even if the read descriptor does not match 2700 // the expectation. That optimization did not verify anything about the read 2701 // descriptor, not even if it was nil. 2702 // 2703 // This test wants to exercise this scenario. We need to get the replica in 2704 // a state where it has an outgoing voter and then we need to have two 2705 // different requests trying to make changes and only the merge succeeds. The 2706 // race is that the second command will notice the voter outgoing and will 2707 // attempt to fix it. In order to do that it reads the range descriptor to 2708 // ensure that it has not changed (and to get the raw bytes of the range 2709 // descriptor for use in a CPut as the current API only uses the in-memory 2710 // value and we need the encoding is not necessarily stable. 2711 // 2712 // The test also contains a variant whereby the range is re-split at the 2713 // same key producing a range descriptor with a different range ID. 2714 // 2715 // See https://github.com/cockroachdb/cockroach/issues/40877. 2716 func TestChangeReplicasLeaveAtomicRacesWithMerge(t *testing.T) { 2717 defer leaktest.AfterTest(t)() 2718 testutils.RunTrueAndFalse(t, "resplit", func(t *testing.T, resplit bool) { 2719 const numNodes = 4 2720 var stopAfterJointConfig atomic.Value 2721 stopAfterJointConfig.Store(false) 2722 var rangeToBlockRangeDescriptorRead atomic.Value 2723 rangeToBlockRangeDescriptorRead.Store(roachpb.RangeID(0)) 2724 blockRangeDescriptorReadChan := make(chan struct{}, 1) 2725 blockOnChangeReplicasRead := kvserverbase.ReplicaRequestFilter(func(ctx context.Context, ba roachpb.BatchRequest) *roachpb.Error { 2726 if req, isGet := ba.GetArg(roachpb.Get); !isGet || 2727 ba.RangeID != rangeToBlockRangeDescriptorRead.Load().(roachpb.RangeID) || 2728 !ba.IsSingleRequest() || 2729 !bytes.HasSuffix([]byte(req.(*roachpb.GetRequest).Key), 2730 []byte(keys.LocalRangeDescriptorSuffix)) { 2731 return nil 2732 } 2733 select { 2734 case <-blockRangeDescriptorReadChan: 2735 <-blockRangeDescriptorReadChan 2736 case <-ctx.Done(): 2737 default: 2738 } 2739 return nil 2740 }) 2741 tc := testcluster.StartTestCluster(t, numNodes, base.TestClusterArgs{ 2742 ServerArgs: base.TestServerArgs{ 2743 Knobs: base.TestingKnobs{ 2744 Store: &kvserver.StoreTestingKnobs{ 2745 TestingRequestFilter: blockOnChangeReplicasRead, 2746 ReplicaAddStopAfterJointConfig: func() bool { 2747 return stopAfterJointConfig.Load().(bool) 2748 }, 2749 }, 2750 }, 2751 }, 2752 ReplicationMode: base.ReplicationManual, 2753 }) 2754 ctx := context.Background() 2755 defer tc.Stopper().Stop(ctx) 2756 2757 // We want to first get into a joint consensus scenario. 2758 // Then we want to issue a ChangeReplicasRequest on a goroutine that will 2759 // block trying to read the RHS's range descriptor. Then we'll merge the RHS 2760 // away. 2761 2762 // Set up a userspace range to mess around with. 2763 lhs := tc.ScratchRange(t) 2764 _, err := tc.AddReplicas(lhs, tc.Targets(1, 2)...) 2765 require.NoError(t, err) 2766 2767 // Split it and then we're going to try to up-replicate. 2768 // We're going to have one goroutine trying to ADD the 4th node. 2769 // and another goroutine trying to move out of a joint config on both 2770 // sides and then merge the range. We ensure that the first goroutine 2771 // blocks and the second one succeeds. This will test that the first 2772 // goroutine detects reading the nil descriptor. 2773 rhs := append(lhs[:len(lhs):len(lhs)], 'a') 2774 lhsDesc, rhsDesc := &roachpb.RangeDescriptor{}, &roachpb.RangeDescriptor{} 2775 *lhsDesc, *rhsDesc, err = tc.SplitRange(rhs) 2776 require.NoError(t, err) 2777 2778 err = tc.WaitForSplitAndInitialization(rhs) 2779 require.NoError(t, err) 2780 2781 // Manually construct the batch because the (*DB).AdminChangeReplicas does 2782 // not yet support atomic replication changes. 2783 db := tc.Servers[0].DB() 2784 swapReplicas := func(key roachpb.Key, desc roachpb.RangeDescriptor, add, remove int) (*roachpb.RangeDescriptor, error) { 2785 return db.AdminChangeReplicas(ctx, key, desc, []roachpb.ReplicationChange{ 2786 {ChangeType: roachpb.ADD_REPLICA, Target: tc.Target(add)}, 2787 {ChangeType: roachpb.REMOVE_REPLICA, Target: tc.Target(remove)}, 2788 }) 2789 } 2790 2791 // Move the RHS and LHS to 3 from 2. 2792 _, err = swapReplicas(lhs, *lhsDesc, 3, 2) 2793 require.NoError(t, err) 2794 stopAfterJointConfig.Store(true) // keep the RHS in a joint config. 2795 rhsDesc, err = swapReplicas(rhs, *rhsDesc, 3, 2) 2796 require.NoError(t, err) 2797 stopAfterJointConfig.Store(false) 2798 2799 // Run a goroutine which sends an AdminChangeReplicasRequest which will try to 2800 // move the range out of joint config but will end up blocking on 2801 // blockRangeDescriptorReadChan until we close it later. 2802 rangeToBlockRangeDescriptorRead.Store(rhsDesc.RangeID) 2803 blockRangeDescriptorReadChan <- struct{}{} 2804 var wg sync.WaitGroup 2805 2806 defer func() { 2807 // Unblock the original add on the separate goroutine to ensure that it 2808 // properly handles reading a nil range descriptor. 2809 close(blockRangeDescriptorReadChan) 2810 wg.Wait() 2811 }() 2812 wg.Add(1) 2813 2814 go func() { 2815 defer wg.Done() 2816 _, err := db.AdminChangeReplicas( 2817 ctx, rhs, *rhsDesc, roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, tc.Target(2)), 2818 ) 2819 // We'll ultimately fail because we're going to race with the work below. 2820 msg := "descriptor changed" 2821 if resplit { 2822 // We don't convert ConditionFailedError to the "descriptor changed" 2823 // error if the range ID changed. 2824 msg = "unexpected value" 2825 } 2826 require.True(t, testutils.IsError(err, msg), err) 2827 }() 2828 // Wait until our goroutine is blocked. 2829 testutils.SucceedsSoon(t, func() error { 2830 if len(blockRangeDescriptorReadChan) != 0 { 2831 return errors.New("not blocked yet") 2832 } 2833 return nil 2834 }) 2835 // Remove the learner replica (left because the joint config was demoting 2836 // a voter) which as a side effect exists the joint config. 2837 _, err = tc.RemoveReplicas(rhs, tc.Target(2)) 2838 require.NoError(t, err) 2839 // Merge the RHS away. 2840 err = db.AdminMerge(ctx, lhs) 2841 require.NoError(t, err) 2842 if resplit { 2843 require.NoError(t, db.AdminSplit(ctx, lhs, rhs, hlc.Timestamp{WallTime: math.MaxInt64})) 2844 err = tc.WaitForSplitAndInitialization(rhs) 2845 require.NoError(t, err) 2846 } 2847 }) 2848 } 2849 2850 // This test is designed to demonstrate that it is not possible to have pending 2851 // proposals concurrent with a TransferLeaseRequest. This property ensures that 2852 // we cannot possibly receive AmbiguousResultError due to an outgoing leaseholder 2853 // being removed while still having pending proposals for a lease which did not 2854 // expire (i.e. was transferred cooperatively using TransferLease rather than 2855 // being taken with a RequestLease). 2856 // 2857 // At the time of writing this test were three hazardous cases which are now 2858 // avoided: 2859 // 2860 // (1) The outgoing leaseholder learns about its removal before applying the 2861 // lease transfer. This could happen if it has a lot left to apply but it 2862 // does indeed know in its log that it is either no longer the leaseholder 2863 // or that some of its commands will apply successfully. 2864 // 2865 // (2) The replica learns about its removal after applying the lease transfer 2866 // but it potentially still has pending commands which it thinks might 2867 // have been proposed. This can occur if there are commands which are 2868 // proposed after the lease transfer has been proposed but before the lease 2869 // transfer has applied. This can also occur if commands are re-ordered 2870 // by raft due to a leadership change. 2871 // 2872 // (3) The replica learns about its removal after applying the lease transfer 2873 // but proposed a command evaluated under the old lease after the lease 2874 // transfer has been applied. This can occur if there are commands evaluate 2875 // before the lease transfer is proposed but are not inserted into the 2876 // proposal buffer until after it has been applied. 2877 // 2878 // None of these cases are possible any longer as latches now prevent writes 2879 // from occurring concurrently with TransferLeaseRequests. (1) is prevented 2880 // because all proposals will need to apply before the TransferLeaseRequest 2881 // can be evaluated. (2) and (3) are not possible because either the commands 2882 // in question acquire their latches before the TransferLeaseRequest in which 2883 // case they'll apply before the TransferLease can be proposed or they acquire 2884 // their latches after the TransferLease applies in which case they will fail 2885 // due to NotLeaseHolderError prior to application. 2886 func TestTransferLeaseBlocksWrites(t *testing.T) { 2887 defer leaktest.AfterTest(t)() 2888 2889 // We want to verify that we will not propose a TransferLeaseRequest while 2890 // there is an outstanding proposal. 2891 var scratchRangeID atomic.Value 2892 scratchRangeID.Store(roachpb.RangeID(0)) 2893 blockInc := make(chan chan struct{}) 2894 tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ 2895 ServerArgs: base.TestServerArgs{ 2896 Knobs: base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{ 2897 TestingProposalFilter: kvserverbase.ReplicaProposalFilter( 2898 func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 2899 if args.Req.RangeID != scratchRangeID.Load().(roachpb.RangeID) { 2900 return nil 2901 } 2902 // Block increment requests on blockInc. 2903 if _, isInc := args.Req.GetArg(roachpb.Increment); isInc { 2904 unblock := make(chan struct{}) 2905 blockInc <- unblock 2906 <-unblock 2907 } 2908 return nil 2909 }, 2910 ), 2911 }}, 2912 }, 2913 ReplicationMode: base.ReplicationManual, 2914 }) 2915 defer tc.Stopper().Stop(context.Background()) 2916 2917 scratch := tc.ScratchRange(t) 2918 makeKey := func() roachpb.Key { 2919 return append(scratch[:len(scratch):len(scratch)], uuid.MakeV4().String()...) 2920 } 2921 desc := tc.AddReplicasOrFatal(t, scratch, tc.Target(1), tc.Target(2)) 2922 scratchRangeID.Store(desc.RangeID) 2923 require.NoError(t, tc.WaitForVoters(scratch, tc.Target(1), tc.Target(2))) 2924 2925 // Launch a goroutine to increment a value, it will block in the proposal 2926 // filter. 2927 incErr := make(chan error) 2928 go func() { 2929 _, err := tc.Server(1).DB().Inc(context.Background(), makeKey(), 1) 2930 incErr <- err 2931 }() 2932 2933 // Wait for the increment to be blocked on the proposal filter so we know 2934 // it holds a write latch. 2935 unblock := <-blockInc 2936 2937 // Launch a goroutine to transfer the lease to store 1. 2938 transferErr := make(chan error) 2939 go func() { 2940 transferErr <- tc.TransferRangeLease(desc, tc.Target(1)) 2941 }() 2942 2943 // Ensure that the lease transfer doesn't succeed. 2944 // We don't wait that long because we don't want this test to take too long. 2945 // The theory is that if we weren't acquiring latches over the keyspace then 2946 // the lease transfer could succeed before we unblocked the increment request. 2947 select { 2948 case <-time.After(100 * time.Millisecond): 2949 case err := <-transferErr: 2950 t.Fatalf("did not expect transfer to complete, got %v", err) 2951 } 2952 2953 close(unblock) 2954 require.NoError(t, <-incErr) 2955 require.NoError(t, <-transferErr) 2956 } 2957 2958 // TestStrictGCEnforcement ensures that strict GC enforcement is respected and 2959 // furthermore is responsive to changes in protected timestamps and in changes 2960 // to the zone configs. 2961 func TestStrictGCEnforcement(t *testing.T) { 2962 defer leaktest.AfterTest(t)() 2963 2964 // The unfortunate thing about this test is that the gcttl is in seconds and 2965 // we need to wait for the replica's lease start time to be sufficiently old. 2966 // It takes about two seconds. All of that time is in setup. 2967 if testing.Short() { 2968 return 2969 } 2970 ctx := context.Background() 2971 2972 tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ 2973 ReplicationMode: base.ReplicationManual, 2974 }) 2975 defer tc.Stopper().Stop(ctx) 2976 2977 sqlDB := sqlutils.MakeSQLRunner(tc.ServerConn(0)) 2978 sqlDB.Exec(t, `CREATE TABLE foo (i INT PRIMARY KEY)`) 2979 2980 var ( 2981 db = tc.Server(0).DB() 2982 getTableID = func() (tableID uint32) { 2983 sqlDB.QueryRow(t, `SELECT table_id FROM crdb_internal.tables`+ 2984 ` WHERE name = 'foo' AND database_name = current_database()`).Scan(&tableID) 2985 return tableID 2986 } 2987 tableID = getTableID() 2988 tenSecondsAgo hlc.Timestamp // written in setup 2989 tableKey = keys.SystemSQLCodec.TablePrefix(tableID) 2990 tableSpan = roachpb.Span{Key: tableKey, EndKey: tableKey.PrefixEnd()} 2991 mkRecord = func() ptpb.Record { 2992 return ptpb.Record{ 2993 ID: uuid.MakeV4(), 2994 Timestamp: tenSecondsAgo.Add(-10*time.Second.Nanoseconds(), 0), 2995 Spans: []roachpb.Span{tableSpan}, 2996 } 2997 } 2998 mkStaleTxn = func() *kv.Txn { 2999 txn := db.NewTxn(ctx, "foo") 3000 txn.SetFixedTimestamp(ctx, tenSecondsAgo) 3001 return txn 3002 } 3003 getRejectedMsg = func() string { 3004 return tenSecondsAgo.String() + " must be after replica GC threshold " 3005 } 3006 performScan = func() error { 3007 txn := mkStaleTxn() 3008 _, err := txn.Scan(ctx, tableKey, tableKey.PrefixEnd(), 1) 3009 return err 3010 } 3011 assertScanRejected = func(t *testing.T) { 3012 t.Helper() 3013 require.Regexp(t, getRejectedMsg(), performScan()) 3014 } 3015 3016 assertScanOk = func(t *testing.T) { 3017 t.Helper() 3018 require.NoError(t, performScan()) 3019 } 3020 // Make sure the cache has been updated. Once it has then we know it won't 3021 // be for minutes. It should read on startup. 3022 waitForCacheAfter = func(t *testing.T, min hlc.Timestamp) { 3023 t.Helper() 3024 testutils.SucceedsSoon(t, func() error { 3025 for i := 0; i < tc.NumServers(); i++ { 3026 ptp := tc.Server(i).ExecutorConfig().(sql.ExecutorConfig).ProtectedTimestampProvider 3027 if ptp.Iterate(ctx, tableKey, tableKey, func(record *ptpb.Record) (wantMore bool) { 3028 return false 3029 }).Less(min) { 3030 return errors.Errorf("not yet read") 3031 } 3032 } 3033 return nil 3034 }) 3035 } 3036 setGCTTL = func(t *testing.T, object string, exp int) { 3037 t.Helper() 3038 testutils.SucceedsSoon(t, func() error { 3039 sqlDB.Exec(t, `ALTER `+object+` CONFIGURE ZONE USING gc.ttlseconds = `+strconv.Itoa(exp)) 3040 for i := 0; i < tc.NumServers(); i++ { 3041 s := tc.Server(i) 3042 _, r := getFirstStoreReplica(t, s, tableKey) 3043 if _, z := r.DescAndZone(); z.GC.TTLSeconds != int32(exp) { 3044 _, sysCfg := getFirstStoreReplica(t, tc.Server(i), keys.SystemConfigSpan.Key) 3045 require.NoError(t, sysCfg.MaybeGossipSystemConfig(ctx)) 3046 return errors.Errorf("expected %d, got %d", exp, z.GC.TTLSeconds) 3047 } 3048 } 3049 return nil 3050 }) 3051 } 3052 setStrictGC = func(t *testing.T, val bool) { 3053 t.Helper() 3054 sqlDB.Exec(t, `SET CLUSTER SETTING kv.gc_ttl.strict_enforcement.enabled = `+fmt.Sprint(val)) 3055 testutils.SucceedsSoon(t, func() error { 3056 for i := 0; i < tc.NumServers(); i++ { 3057 s, r := getFirstStoreReplica(t, tc.Server(i), keys.SystemConfigSpan.Key) 3058 if kvserver.StrictGCEnforcement.Get(&s.ClusterSettings().SV) != val { 3059 require.NoError(t, r.MaybeGossipSystemConfig(ctx)) 3060 return errors.Errorf("expected %v, got %v", val, !val) 3061 } 3062 } 3063 return nil 3064 }) 3065 } 3066 setTableGCTTL = func(t *testing.T, exp int) { 3067 t.Helper() 3068 setGCTTL(t, "TABLE foo", exp) 3069 } 3070 setSystemGCTTL = func(t *testing.T, exp int) { 3071 // TODO(ajwerner): adopt this to test the system ranges are unaffected. 3072 t.Helper() 3073 setGCTTL(t, "RANGE system", exp) 3074 } 3075 refreshPastLeaseStart = func(t *testing.T) { 3076 for i := 0; i < tc.NumServers(); i++ { 3077 ptp := tc.Server(i).ExecutorConfig().(sql.ExecutorConfig).ProtectedTimestampProvider 3078 _, r := getFirstStoreReplica(t, tc.Server(i), tableKey) 3079 l, _ := r.GetLease() 3080 require.NoError(t, ptp.Refresh(ctx, l.Start.Next())) 3081 r.ReadProtectedTimestamps(ctx) 3082 } 3083 } 3084 ) 3085 3086 { 3087 // Setup the initial state to be sure that we'll actually strictly enforce 3088 // gc ttls. 3089 tc.SplitRangeOrFatal(t, tableKey) 3090 _, err := tc.AddReplicas(tableKey, tc.Target(1), tc.Target(2)) 3091 require.NoError(t, err) 3092 _, err = tc.AddReplicas(keys.SystemConfigSpan.Key, tc.Target(1), tc.Target(2)) 3093 require.NoError(t, err) 3094 3095 setTableGCTTL(t, 1) 3096 waitForCacheAfter(t, hlc.Timestamp{}) 3097 3098 defer sqlDB.Exec(t, `SET CLUSTER SETTING kv.gc_ttl.strict_enforcement.enabled = DEFAULT`) 3099 setStrictGC(t, true) 3100 tenSecondsAgo = tc.Server(0).Clock().Now().Add(-10*time.Second.Nanoseconds(), 0) 3101 } 3102 3103 t.Run("strict enforcement", func(t *testing.T) { 3104 refreshPastLeaseStart(t) 3105 assertScanRejected(t) 3106 }) 3107 t.Run("disable strict enforcement", func(t *testing.T) { 3108 setStrictGC(t, false) 3109 defer setStrictGC(t, true) 3110 assertScanOk(t) 3111 }) 3112 t.Run("zone config changes are respected", func(t *testing.T) { 3113 setTableGCTTL(t, 60) 3114 assertScanOk(t) 3115 setTableGCTTL(t, 1) 3116 assertScanRejected(t) 3117 }) 3118 t.Run("system ranges are unaffected", func(t *testing.T) { 3119 setSystemGCTTL(t, 1) 3120 txn := mkStaleTxn() 3121 descriptorTable := keys.SystemSQLCodec.TablePrefix(keys.DescriptorTableID) 3122 _, err := txn.Scan(ctx, descriptorTable, descriptorTable.PrefixEnd(), 1) 3123 require.NoError(t, err) 3124 }) 3125 t.Run("protected timestamps are respected", func(t *testing.T) { 3126 waitForCacheAfter(t, hlc.Timestamp{}) 3127 ptp := tc.Server(0).ExecutorConfig().(sql.ExecutorConfig).ProtectedTimestampProvider 3128 assertScanRejected(t) 3129 // Create a protected timestamp, don't verify it, make sure it's not 3130 // respected. 3131 rec := mkRecord() 3132 require.NoError(t, db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 3133 return ptp.Protect(ctx, txn, &rec) 3134 })) 3135 assertScanRejected(t) 3136 3137 require.NoError(t, ptp.Verify(ctx, rec.ID)) 3138 assertScanOk(t) 3139 3140 // Transfer the lease and demonstrate that the query succeeds because we're 3141 // cautious in the face of lease transfers. 3142 desc, err := tc.LookupRange(tableKey) 3143 require.NoError(t, err) 3144 require.NoError(t, tc.TransferRangeLease(desc, tc.Target(1))) 3145 assertScanOk(t) 3146 }) 3147 } 3148 3149 // TestProposalOverhead ensures that the command overhead for put operations 3150 // is as expected. It exists to prevent changes which might increase the 3151 // byte overhead of replicating commands. 3152 // 3153 // Note that it intentionally avoids using a system range which incurs the 3154 // overhead due to the logical op log. 3155 func TestProposalOverhead(t *testing.T) { 3156 defer leaktest.AfterTest(t)() 3157 3158 var overhead uint32 3159 var key atomic.Value 3160 key.Store(roachpb.Key{}) 3161 filter := func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 3162 if len(args.Req.Requests) != 1 { 3163 return nil 3164 } 3165 req, ok := args.Req.GetArg(roachpb.Put) 3166 if !ok { 3167 return nil 3168 } 3169 put := req.(*roachpb.PutRequest) 3170 if !bytes.Equal(put.Key, key.Load().(roachpb.Key)) { 3171 return nil 3172 } 3173 // Sometime the logical portion of the timestamp can be non-zero which makes 3174 // the overhead non-deterministic. 3175 args.Cmd.ReplicatedEvalResult.Timestamp.Logical = 0 3176 atomic.StoreUint32(&overhead, uint32(args.Cmd.Size()-args.Cmd.WriteBatch.Size())) 3177 // We don't want to print the WriteBatch because it's explicitly 3178 // excluded from the size computation. Nil'ing it out does not 3179 // affect the memory held by the caller because neither `args` nor 3180 // `args.Cmd` are pointers. 3181 args.Cmd.WriteBatch = nil 3182 t.Logf(pretty.Sprint(args.Cmd)) 3183 return nil 3184 } 3185 tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ 3186 ServerArgs: base.TestServerArgs{ 3187 Knobs: base.TestingKnobs{ 3188 Store: &kvserver.StoreTestingKnobs{TestingProposalFilter: filter}, 3189 }, 3190 }, 3191 }) 3192 ctx := context.Background() 3193 defer tc.Stopper().Stop(ctx) 3194 3195 db := tc.Server(0).DB() 3196 // NB: the expected overhead reflects the space overhead currently 3197 // present in Raft commands. This test will fail if that overhead 3198 // changes. Try to make this number go down and not up. It slightly 3199 // undercounts because our proposal filter is called before 3200 // maxLeaseIndex is filled in. The difference between the user and system 3201 // overhead is that users ranges do not have rangefeeds on by default whereas 3202 // system ranges do. 3203 const ( 3204 expectedUserOverhead uint32 = 42 3205 ) 3206 t.Run("user-key overhead", func(t *testing.T) { 3207 userKey := tc.ScratchRange(t) 3208 k := roachpb.Key(encoding.EncodeStringAscending(userKey, "foo")) 3209 key.Store(k) 3210 require.NoError(t, db.Put(ctx, k, "v")) 3211 require.Equal(t, expectedUserOverhead, atomic.LoadUint32(&overhead)) 3212 }) 3213 3214 } 3215 3216 // getRangeInfo retreives range info by performing a get against the provided 3217 // key and setting the ReturnRangeInfo flag to true. 3218 func getRangeInfo( 3219 ctx context.Context, db *kv.DB, key roachpb.Key, 3220 ) (ri *roachpb.RangeInfo, err error) { 3221 err = db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 3222 b := txn.NewBatch() 3223 b.Header.ReturnRangeInfo = true 3224 b.AddRawRequest(roachpb.NewGet(key)) 3225 if err = db.Run(ctx, b); err != nil { 3226 return err 3227 } 3228 resp := b.RawResponse() 3229 ri = &resp.Responses[0].GetInner().Header().RangeInfos[0] 3230 return nil 3231 }) 3232 return ri, err 3233 } 3234 3235 // makeReplicationTargets creates a slice of replication targets where each 3236 // target has a NodeID and StoreID with a value corresponding to an id in ids. 3237 func makeReplicationTargets(ids ...int) (targets []roachpb.ReplicationTarget) { 3238 for _, id := range ids { 3239 targets = append(targets, roachpb.ReplicationTarget{ 3240 NodeID: roachpb.NodeID(id), 3241 StoreID: roachpb.StoreID(id), 3242 }) 3243 } 3244 return targets 3245 }