github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/client_split_test.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver_test 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "math" 18 "math/rand" 19 "reflect" 20 "sort" 21 "strconv" 22 "sync/atomic" 23 "testing" 24 "time" 25 26 "github.com/cockroachdb/cockroach/pkg/base" 27 "github.com/cockroachdb/cockroach/pkg/config" 28 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 29 "github.com/cockroachdb/cockroach/pkg/gossip" 30 "github.com/cockroachdb/cockroach/pkg/keys" 31 "github.com/cockroachdb/cockroach/pkg/kv" 32 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 33 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 34 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/abortspan" 35 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 36 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader" 37 "github.com/cockroachdb/cockroach/pkg/roachpb" 38 "github.com/cockroachdb/cockroach/pkg/rpc" 39 "github.com/cockroachdb/cockroach/pkg/server" 40 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 41 "github.com/cockroachdb/cockroach/pkg/storage" 42 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 43 "github.com/cockroachdb/cockroach/pkg/testutils" 44 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 45 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 46 "github.com/cockroachdb/cockroach/pkg/ts" 47 "github.com/cockroachdb/cockroach/pkg/ts/tspb" 48 "github.com/cockroachdb/cockroach/pkg/util" 49 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 50 "github.com/cockroachdb/cockroach/pkg/util/encoding" 51 "github.com/cockroachdb/cockroach/pkg/util/hlc" 52 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 53 "github.com/cockroachdb/cockroach/pkg/util/log" 54 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 55 "github.com/cockroachdb/cockroach/pkg/util/randutil" 56 "github.com/cockroachdb/cockroach/pkg/util/stop" 57 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 58 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 59 "github.com/cockroachdb/errors" 60 "github.com/gogo/protobuf/proto" 61 "github.com/stretchr/testify/require" 62 "go.etcd.io/etcd/raft/raftpb" 63 ) 64 65 // adminSplitArgs creates an AdminSplitRequest for the provided split key. 66 func adminSplitArgs(splitKey roachpb.Key) *roachpb.AdminSplitRequest { 67 return &roachpb.AdminSplitRequest{ 68 RequestHeader: roachpb.RequestHeader{ 69 Key: splitKey, 70 }, 71 SplitKey: splitKey, 72 } 73 } 74 75 // TestStoreRangeSplitAtIllegalKeys verifies a range cannot be split 76 // at illegal keys. 77 func TestStoreRangeSplitAtIllegalKeys(t *testing.T) { 78 defer leaktest.AfterTest(t)() 79 stopper := stop.NewStopper() 80 defer stopper.Stop(context.Background()) 81 82 cfg := kvserver.TestStoreConfig(nil) 83 cfg.TestingKnobs.DisableSplitQueue = true 84 cfg.TestingKnobs.DisableMergeQueue = true 85 store := createTestStoreWithConfig(t, stopper, cfg) 86 87 for _, key := range []roachpb.Key{ 88 keys.Meta1Prefix, 89 testutils.MakeKey(keys.Meta1Prefix, []byte("a")), 90 testutils.MakeKey(keys.Meta1Prefix, roachpb.RKeyMax), 91 keys.Meta2KeyMax, 92 testutils.MakeKey(keys.Meta2KeyMax, []byte("a")), 93 keys.SystemSQLCodec.TablePrefix(10 /* system descriptor ID */), 94 } { 95 args := adminSplitArgs(key) 96 _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args) 97 if !testutils.IsPError(pErr, "cannot split") { 98 t.Errorf("%q: unexpected split error %s", key, pErr) 99 } 100 } 101 } 102 103 // Verify that on a split, only the non-expired abort span records are copied 104 // into the right hand side of the split. 105 func TestStoreSplitAbortSpan(t *testing.T) { 106 defer leaktest.AfterTest(t)() 107 108 manualClock := hlc.NewManualClock(2400 * time.Hour.Nanoseconds()) 109 clock := hlc.NewClock(manualClock.UnixNano, time.Millisecond) 110 storeCfg := kvserver.TestStoreConfig(clock) 111 storeCfg.TestingKnobs.DisableSplitQueue = true 112 storeCfg.TestingKnobs.DisableMergeQueue = true 113 114 stopper := stop.NewStopper() 115 defer stopper.Stop(context.Background()) 116 store := createTestStoreWithConfig(t, stopper, storeCfg) 117 ctx := context.Background() 118 119 left, middle, right := roachpb.Key("a"), roachpb.Key("b"), roachpb.Key("c") 120 121 txn := func(key roachpb.Key, ts hlc.Timestamp) *roachpb.Transaction { 122 txn := roachpb.MakeTransaction("test", key, 0, ts, 0) 123 return &txn 124 } 125 126 var expAll []roachpb.AbortSpanEntry 127 128 populateAbortSpan := func(key roachpb.Key, ts hlc.Timestamp) *roachpb.ResolveIntentRequest { 129 pushee := txn(key, ts) 130 131 // First write an intent on the key... 132 incArgs := incrementArgs(key, 1) 133 _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{Txn: pushee}, incArgs) 134 if pErr != nil { 135 t.Fatalf("while sending +%v: %s", incArgs, pErr) 136 } 137 138 // Then resolve the intent and poison. Without the intent write, the 139 // intent resolution would be a no-op and wouldn't leave an AbortSpan 140 // entry. 141 expAll = append(expAll, roachpb.AbortSpanEntry{ 142 Key: key, 143 Timestamp: ts, 144 }) 145 return &roachpb.ResolveIntentRequest{ 146 RequestHeader: roachpb.RequestHeader{ 147 Key: key, 148 }, 149 IntentTxn: pushee.TxnMeta, 150 Status: roachpb.ABORTED, 151 Poison: true, 152 } 153 } 154 155 key := func(k roachpb.Key, i int) roachpb.Key { 156 var r []byte 157 r = append(r, k...) 158 r = append(r, []byte(strconv.Itoa(i))...) 159 return r 160 } 161 162 thresh := kvserverbase.TxnCleanupThreshold.Nanoseconds() 163 // Pick a non-gcable and gcable timestamp, respectively. Avoid the clock's 164 // exact timestamp because of unpredictable logical ticks. 165 tsFresh := hlc.Timestamp{WallTime: manualClock.UnixNano() - thresh + 1} 166 tsStale := hlc.Timestamp{WallTime: manualClock.UnixNano() - thresh - 1} 167 168 args := []roachpb.Request{ 169 populateAbortSpan(key(left, 1), tsFresh), 170 populateAbortSpan(key(left, 2), tsStale), 171 populateAbortSpan(key(middle, 1), tsFresh), 172 populateAbortSpan(key(middle, 2), tsStale), 173 populateAbortSpan(key(right, 1), tsFresh), 174 populateAbortSpan(key(right, 2), tsStale), 175 adminSplitArgs(middle), 176 } 177 178 // Nothing gets removed from the LHS during the split. This could 179 // be done but has to be done carefully to avoid large Raft proposals, 180 // and the stats computation needs to be checked carefully. 181 expL := []roachpb.AbortSpanEntry{ 182 {Key: key(left, 1), Timestamp: tsFresh}, 183 {Key: key(left, 2), Timestamp: tsStale}, 184 {Key: key(middle, 1), Timestamp: tsFresh}, 185 {Key: key(middle, 2), Timestamp: tsStale}, 186 {Key: key(right, 1), Timestamp: tsFresh}, 187 {Key: key(right, 2), Timestamp: tsStale}, 188 } 189 190 // But we don't blindly copy everything over to the RHS. Only entries with 191 // recent timestamp are duplicated. This is important because otherwise the 192 // Raft command size can blow up and splits fail. 193 expR := []roachpb.AbortSpanEntry{ 194 {Key: key(left, 1), Timestamp: tsFresh}, 195 {Key: key(middle, 1), Timestamp: tsFresh}, 196 {Key: key(right, 1), Timestamp: tsFresh}, 197 } 198 199 for _, arg := range args { 200 _, pErr := kv.SendWrapped(ctx, store.TestSender(), arg) 201 if pErr != nil { 202 t.Fatalf("while sending +%v: %s", arg, pErr) 203 } 204 } 205 206 collect := func(as *abortspan.AbortSpan) []roachpb.AbortSpanEntry { 207 var results []roachpb.AbortSpanEntry 208 if err := as.Iterate(ctx, store.Engine(), func(_ roachpb.Key, entry roachpb.AbortSpanEntry) error { 209 entry.Priority = 0 // don't care about that 210 results = append(results, entry) 211 return nil 212 }); err != nil { 213 t.Fatal(err) 214 } 215 sort.Slice(results, func(i, j int) bool { 216 c := bytes.Compare(results[i].Key, results[j].Key) 217 if c == 0 { 218 return results[i].Timestamp.Less(results[j].Timestamp) 219 } 220 return c < 0 221 }) 222 return results 223 } 224 225 l := collect(store.LookupReplica(keys.MustAddr(left)).AbortSpan()) 226 r := collect(store.LookupReplica(keys.MustAddr(right)).AbortSpan()) 227 228 if !reflect.DeepEqual(expL, l) { 229 t.Fatalf("left hand side: expected %+v, got %+v", expL, l) 230 } 231 if !reflect.DeepEqual(expR, r) { 232 t.Fatalf("right hand side: expected %+v, got %+v", expR, r) 233 } 234 } 235 236 // TestStoreRangeSplitAtTablePrefix verifies a range can be split at 237 // UserTableDataMin and still gossip the SystemConfig properly. 238 func TestStoreRangeSplitAtTablePrefix(t *testing.T) { 239 defer leaktest.AfterTest(t)() 240 storeCfg := kvserver.TestStoreConfig(nil) 241 storeCfg.TestingKnobs.DisableSplitQueue = true 242 storeCfg.TestingKnobs.DisableMergeQueue = true 243 stopper := stop.NewStopper() 244 defer stopper.Stop(context.Background()) 245 store := createTestStoreWithConfig(t, stopper, storeCfg) 246 247 key := keys.UserTableDataMin 248 args := adminSplitArgs(key) 249 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args); pErr != nil { 250 t.Fatalf("%q: split unexpected error: %s", key, pErr) 251 } 252 253 var desc sqlbase.TableDescriptor 254 descBytes, err := protoutil.Marshal(&desc) 255 if err != nil { 256 t.Fatal(err) 257 } 258 259 // Update SystemConfig to trigger gossip. 260 if err := store.DB().Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error { 261 if err := txn.SetSystemConfigTrigger(); err != nil { 262 return err 263 } 264 // We don't care about the values, just the keys. 265 k := sqlbase.MakeDescMetadataKey(keys.SystemSQLCodec, sqlbase.ID(keys.MinUserDescID)) 266 return txn.Put(ctx, k, &desc) 267 }); err != nil { 268 t.Fatal(err) 269 } 270 271 successChan := make(chan struct{}, 1) 272 store.Gossip().RegisterCallback(gossip.KeySystemConfig, func(_ string, content roachpb.Value) { 273 contentBytes, err := content.GetBytes() 274 if err != nil { 275 t.Fatal(err) 276 } 277 if bytes.Contains(contentBytes, descBytes) { 278 select { 279 case successChan <- struct{}{}: 280 default: 281 } 282 } 283 }) 284 285 select { 286 case <-time.After(time.Second): 287 t.Errorf("expected a schema gossip containing %q, but did not see one", descBytes) 288 case <-successChan: 289 } 290 } 291 292 // TestStoreRangeSplitInsideRow verifies an attempt to split a range inside of 293 // a table row will cause a split at a boundary between rows. 294 func TestStoreRangeSplitInsideRow(t *testing.T) { 295 defer leaktest.AfterTest(t)() 296 storeCfg := kvserver.TestStoreConfig(nil) 297 storeCfg.TestingKnobs.DisableSplitQueue = true 298 storeCfg.TestingKnobs.DisableMergeQueue = true 299 stopper := stop.NewStopper() 300 defer stopper.Stop(context.Background()) 301 store := createTestStoreWithConfig(t, stopper, storeCfg) 302 303 // Manually create some the column keys corresponding to the table: 304 // 305 // CREATE TABLE t (id STRING PRIMARY KEY, col1 INT, col2 INT) 306 tableKey := roachpb.RKey(keys.SystemSQLCodec.TablePrefix(keys.MinUserDescID)) 307 rowKey := roachpb.Key(encoding.EncodeVarintAscending(append([]byte(nil), tableKey...), 1)) 308 rowKey = encoding.EncodeStringAscending(encoding.EncodeVarintAscending(rowKey, 1), "a") 309 col1Key, err := keys.EnsureSafeSplitKey(keys.MakeFamilyKey(append([]byte(nil), rowKey...), 1)) 310 if err != nil { 311 t.Fatal(err) 312 } 313 col2Key, err := keys.EnsureSafeSplitKey(keys.MakeFamilyKey(append([]byte(nil), rowKey...), 2)) 314 if err != nil { 315 t.Fatal(err) 316 } 317 318 // We don't care about the value, so just store any old thing. 319 if err := store.DB().Put(context.Background(), col1Key, "column 1"); err != nil { 320 t.Fatal(err) 321 } 322 if err := store.DB().Put(context.Background(), col2Key, "column 2"); err != nil { 323 t.Fatal(err) 324 } 325 326 // Split between col1Key and col2Key by splitting before col2Key. 327 args := adminSplitArgs(col2Key) 328 _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args) 329 if pErr != nil { 330 t.Fatalf("%s: split unexpected error: %s", col1Key, pErr) 331 } 332 333 repl1 := store.LookupReplica(roachpb.RKey(col1Key)) 334 repl2 := store.LookupReplica(roachpb.RKey(col2Key)) 335 336 // Verify the two columns are still on the same range. 337 if !reflect.DeepEqual(repl1, repl2) { 338 t.Fatalf("%s: ranges differ: %+v vs %+v", col1Key, repl1, repl2) 339 } 340 // Verify we split on a row key. 341 if startKey := repl1.Desc().StartKey; !startKey.Equal(rowKey) { 342 t.Fatalf("%s: expected split on %s, but found %s", col1Key, rowKey, startKey) 343 } 344 345 // Verify the previous range was split on a row key. 346 repl3 := store.LookupReplica(tableKey) 347 if endKey := repl3.Desc().EndKey; !endKey.Equal(rowKey) { 348 t.Fatalf("%s: expected split on %s, but found %s", col1Key, rowKey, endKey) 349 } 350 } 351 352 // TestStoreRangeSplitIntents executes a split of a range and verifies 353 // that all intents are cleared and the transaction record cleaned up. 354 func TestStoreRangeSplitIntents(t *testing.T) { 355 defer leaktest.AfterTest(t)() 356 storeCfg := kvserver.TestStoreConfig(nil) 357 storeCfg.TestingKnobs.DisableSplitQueue = true 358 storeCfg.TestingKnobs.DisableMergeQueue = true 359 stopper := stop.NewStopper() 360 defer stopper.Stop(context.Background()) 361 store := createTestStoreWithConfig(t, stopper, storeCfg) 362 363 // First, write some values left and right of the proposed split key. 364 pArgs := putArgs([]byte("c"), []byte("foo")) 365 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), pArgs); pErr != nil { 366 t.Fatal(pErr) 367 } 368 pArgs = putArgs([]byte("x"), []byte("bar")) 369 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), pArgs); pErr != nil { 370 t.Fatal(pErr) 371 } 372 373 // Split the range. 374 splitKey := roachpb.Key("m") 375 args := adminSplitArgs(splitKey) 376 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args); pErr != nil { 377 t.Fatal(pErr) 378 } 379 380 // Verify no intents remains on range descriptor keys. 381 splitKeyAddr, err := keys.Addr(splitKey) 382 if err != nil { 383 t.Fatal(err) 384 } 385 for _, key := range []roachpb.Key{keys.RangeDescriptorKey(roachpb.RKeyMin), keys.RangeDescriptorKey(splitKeyAddr)} { 386 if _, _, err := storage.MVCCGet( 387 context.Background(), store.Engine(), key, store.Clock().Now(), storage.MVCCGetOptions{}, 388 ); err != nil { 389 t.Errorf("failed to read consistent range descriptor for key %s: %+v", key, err) 390 } 391 } 392 393 txnPrefix := func(key roachpb.Key) roachpb.Key { 394 rk, err := keys.Addr(key) 395 if err != nil { 396 t.Fatal(err) 397 } 398 return keys.MakeRangeKey(rk, keys.LocalTransactionSuffix, nil) 399 } 400 // Verify the transaction record is gone. 401 start := storage.MakeMVCCMetadataKey(keys.MakeRangeKeyPrefix(roachpb.RKeyMin)) 402 end := storage.MakeMVCCMetadataKey(keys.MakeRangeKeyPrefix(roachpb.RKeyMax)) 403 iter := store.Engine().NewIterator(storage.IterOptions{UpperBound: roachpb.KeyMax}) 404 405 defer iter.Close() 406 for iter.SeekGE(start); ; iter.Next() { 407 if ok, err := iter.Valid(); err != nil { 408 t.Fatal(err) 409 } else if !ok || !iter.UnsafeKey().Less(end) { 410 break 411 } 412 413 if bytes.HasPrefix([]byte(iter.Key().Key), txnPrefix(roachpb.KeyMin)) || 414 bytes.HasPrefix([]byte(iter.Key().Key), txnPrefix(splitKey)) { 415 t.Errorf("unexpected system key: %s; txn record should have been cleaned up", iter.Key()) 416 } 417 } 418 } 419 420 // TestStoreRangeSplitAtRangeBounds verifies that attempting to 421 // split a range at its start key is a no-op and does not actually 422 // perform a split (would create zero-length range!). This sort 423 // of thing might happen in the wild if two split requests arrived for 424 // same key. The first one succeeds and second would try to split 425 // at the start of the newly split range. 426 func TestStoreRangeSplitAtRangeBounds(t *testing.T) { 427 defer leaktest.AfterTest(t)() 428 storeCfg := kvserver.TestStoreConfig(nil) 429 storeCfg.TestingKnobs.DisableSplitQueue = true 430 storeCfg.TestingKnobs.DisableMergeQueue = true 431 stopper := stop.NewStopper() 432 defer stopper.Stop(context.Background()) 433 store := createTestStoreWithConfig(t, stopper, storeCfg) 434 435 // Split range 1 at an arbitrary key. 436 key := roachpb.Key("a") 437 rngID := store.LookupReplica(roachpb.RKey(key)).RangeID 438 h := roachpb.Header{RangeID: rngID} 439 args := adminSplitArgs(key) 440 if _, pErr := kv.SendWrappedWith(context.Background(), store, h, args); pErr != nil { 441 t.Fatal(pErr) 442 } 443 replCount := store.ReplicaCount() 444 445 // An AdminSplit request sent to the end of the old range 446 // should fail with a RangeKeyMismatchError. 447 _, pErr := kv.SendWrappedWith(context.Background(), store, h, args) 448 if _, ok := pErr.GetDetail().(*roachpb.RangeKeyMismatchError); !ok { 449 t.Fatalf("expected RangeKeyMismatchError, found: %v", pErr) 450 } 451 452 // An AdminSplit request sent to the start of the new range 453 // should succeed but no new ranges should be created. 454 newRng := store.LookupReplica(roachpb.RKey(key)) 455 h.RangeID = newRng.RangeID 456 if _, pErr := kv.SendWrappedWith(context.Background(), store, h, args); pErr != nil { 457 t.Fatal(pErr) 458 } 459 460 newReplCount := store.ReplicaCount() 461 if replCount != newReplCount { 462 t.Fatalf("splitting at a range boundary should not create a new range; before second split "+ 463 "found %d ranges, after second split found %d ranges", replCount, newReplCount) 464 } 465 } 466 467 // TestSplitTriggerRaftSnapshotRace verifies that when an uninitialized Replica 468 // resulting from a split hasn't been initialized via the split trigger yet, a 469 // grace period prevents the replica from requesting an errant Raft snapshot. 470 // This is verified by running a number of splits and asserting that no Raft 471 // snapshots are observed. As a nice side effect, this also verifies that log 472 // truncations don't cause any Raft snapshots in this test. 473 func TestSplitTriggerRaftSnapshotRace(t *testing.T) { 474 defer leaktest.AfterTest(t)() 475 476 ctx := context.Background() 477 const numNodes = 3 478 var args base.TestClusterArgs 479 // NB: the merge queue is enabled for additional "chaos". Note that the test 480 // uses three nodes and so there is no replica movement, which would other- 481 // wise tickle Raft snapshots for unrelated reasons. 482 tc := testcluster.StartTestCluster(t, numNodes, args) 483 defer tc.Stopper().Stop(ctx) 484 485 numSplits := 100 486 if util.RaceEnabled { 487 // Running 100 splits is overkill in race builds. 488 numSplits = 10 489 } 490 perm := rand.Perm(numSplits) 491 idx := int32(-1) // accessed atomically 492 493 numRaftSnaps := func(when string) int { 494 var totalSnaps int 495 for i := 0; i < numNodes; i++ { 496 var n int // num rows (sanity check against test rotting) 497 var c int // num Raft snapshots 498 if err := tc.ServerConn(i).QueryRow(` 499 SELECT count(*), sum(value) FROM crdb_internal.node_metrics WHERE 500 name = 'range.snapshots.normal-applied' 501 `).Scan(&n, &c); err != nil { 502 t.Fatal(err) 503 } 504 if expRows := 1; n != expRows { 505 t.Fatalf("%s: expected %d rows, got %d", when, expRows, n) 506 } 507 totalSnaps += c 508 } 509 return totalSnaps 510 } 511 512 // There are usually no raft snaps before, but there is a race condition where 513 // they can occasionally happen during upreplication. 514 numSnapsBefore := numRaftSnaps("before") 515 516 doSplit := func(ctx context.Context, _ int) error { 517 _, _, err := tc.SplitRange( 518 []byte(fmt.Sprintf("key-%d", perm[atomic.AddInt32(&idx, 1)]))) 519 return err 520 } 521 522 if err := ctxgroup.GroupWorkers(ctx, numSplits, doSplit); err != nil { 523 t.Fatal(err) 524 } 525 526 // Check that no snaps happened during the splits. 527 require.Equal(t, numSnapsBefore, numRaftSnaps("after")) 528 } 529 530 // TestStoreRangeSplitIdempotency executes a split of a range and 531 // verifies that the resulting ranges respond to the right key ranges 532 // and that their stats have been properly accounted for and requests 533 // can't be replayed. 534 func TestStoreRangeSplitIdempotency(t *testing.T) { 535 defer leaktest.AfterTest(t)() 536 storeCfg := kvserver.TestStoreConfig(nil) 537 storeCfg.TestingKnobs.DisableSplitQueue = true 538 storeCfg.TestingKnobs.DisableMergeQueue = true 539 stopper := stop.NewStopper() 540 defer stopper.Stop(context.Background()) 541 store := createTestStoreWithOpts(t, 542 testStoreOpts{ 543 // This test was written before the test stores were able to start with 544 // more than one range and is not prepared to handle many ranges. 545 dontCreateSystemRanges: true, 546 cfg: &storeCfg}, 547 stopper) 548 rangeID := roachpb.RangeID(1) 549 splitKey := roachpb.Key("m") 550 content := roachpb.Key("asdvb") 551 552 // First, write some values left and right of the proposed split key. 553 pArgs := putArgs([]byte("c"), content) 554 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), pArgs); pErr != nil { 555 t.Fatal(pErr) 556 } 557 pArgs = putArgs([]byte("x"), content) 558 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), pArgs); pErr != nil { 559 t.Fatal(pErr) 560 } 561 562 // Increments are a good way of testing idempotency. Up here, we 563 // address them to the original range, then later to the one that 564 // contains the key. 565 txn := roachpb.MakeTransaction("test", []byte("c"), 10, store.Clock().Now(), 0) 566 lIncArgs := incrementArgs([]byte("apoptosis"), 100) 567 lTxn := txn 568 lTxn.Sequence++ 569 lIncArgs.Sequence = lTxn.Sequence 570 if _, pErr := kv.SendWrappedWith(context.Background(), store.TestSender(), roachpb.Header{ 571 Txn: &lTxn, 572 }, lIncArgs); pErr != nil { 573 t.Fatal(pErr) 574 } 575 rIncArgs := incrementArgs([]byte("wobble"), 10) 576 rTxn := txn 577 rTxn.Sequence++ 578 rIncArgs.Sequence = rTxn.Sequence 579 if _, pErr := kv.SendWrappedWith(context.Background(), store.TestSender(), roachpb.Header{ 580 Txn: &rTxn, 581 }, rIncArgs); pErr != nil { 582 t.Fatal(pErr) 583 } 584 585 // Get the original stats for key and value bytes. 586 ms, err := stateloader.Make(rangeID).LoadMVCCStats(context.Background(), store.Engine()) 587 if err != nil { 588 t.Fatal(err) 589 } 590 keyBytes, valBytes := ms.KeyBytes, ms.ValBytes 591 592 // Split the range. 593 args := adminSplitArgs(splitKey) 594 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args); pErr != nil { 595 t.Fatal(pErr) 596 } 597 598 // Verify no intents remains on range descriptor keys. 599 splitKeyAddr, err := keys.Addr(splitKey) 600 if err != nil { 601 t.Fatal(err) 602 } 603 for _, key := range []roachpb.Key{keys.RangeDescriptorKey(roachpb.RKeyMin), keys.RangeDescriptorKey(splitKeyAddr)} { 604 if _, _, err := storage.MVCCGet( 605 context.Background(), store.Engine(), key, store.Clock().Now(), storage.MVCCGetOptions{}, 606 ); err != nil { 607 t.Fatal(err) 608 } 609 } 610 611 repl := store.LookupReplica(roachpb.RKeyMin) 612 rngDesc := repl.Desc() 613 newRng := store.LookupReplica([]byte("m")) 614 newRngDesc := newRng.Desc() 615 if !bytes.Equal(newRngDesc.StartKey, splitKey) || !bytes.Equal(splitKey, rngDesc.EndKey) { 616 t.Errorf("ranges mismatched, wanted %q=%q=%q", newRngDesc.StartKey, splitKey, rngDesc.EndKey) 617 } 618 if !bytes.Equal(newRngDesc.EndKey, roachpb.RKeyMax) || !bytes.Equal(rngDesc.StartKey, roachpb.RKeyMin) { 619 t.Errorf("new ranges do not cover KeyMin-KeyMax, but only %q-%q", rngDesc.StartKey, newRngDesc.EndKey) 620 } 621 622 // Try to get values from both left and right of where the split happened. 623 gArgs := getArgs([]byte("c")) 624 if reply, pErr := kv.SendWrapped(context.Background(), store.TestSender(), gArgs); pErr != nil { 625 t.Fatal(pErr) 626 } else if replyBytes, pErr := reply.(*roachpb.GetResponse).Value.GetBytes(); pErr != nil { 627 t.Fatal(pErr) 628 } else if !bytes.Equal(replyBytes, content) { 629 t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) 630 } 631 gArgs = getArgs([]byte("x")) 632 if reply, pErr := kv.SendWrappedWith(context.Background(), store.TestSender(), roachpb.Header{ 633 RangeID: newRng.RangeID, 634 }, gArgs); pErr != nil { 635 t.Fatal(pErr) 636 } else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil { 637 t.Fatal(err) 638 } else if !bytes.Equal(replyBytes, content) { 639 t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) 640 } 641 642 // Send out an increment request copied from above (same txn/sequence) 643 // which remains in the old range. 644 _, pErr := kv.SendWrappedWith(context.Background(), store.TestSender(), roachpb.Header{ 645 Txn: &lTxn, 646 }, lIncArgs) 647 if pErr != nil { 648 t.Fatal(pErr) 649 } 650 651 // Send out the same increment copied from above (same txn/sequence), but 652 // now to the newly created range (which should hold that key). 653 _, pErr = kv.SendWrappedWith(context.Background(), store.TestSender(), roachpb.Header{ 654 RangeID: newRng.RangeID, 655 Txn: &rTxn, 656 }, rIncArgs) 657 if pErr != nil { 658 t.Fatal(pErr) 659 } 660 661 // Compare stats of split ranges to ensure they are non zero and 662 // exceed the original range when summed. 663 left, err := stateloader.Make(rangeID).LoadMVCCStats(context.Background(), store.Engine()) 664 if err != nil { 665 t.Fatal(err) 666 } 667 lKeyBytes, lValBytes := left.KeyBytes, left.ValBytes 668 right, err := stateloader.Make(newRng.RangeID).LoadMVCCStats(context.Background(), store.Engine()) 669 if err != nil { 670 t.Fatal(err) 671 } 672 rKeyBytes, rValBytes := right.KeyBytes, right.ValBytes 673 674 if lKeyBytes == 0 || rKeyBytes == 0 { 675 t.Errorf("expected non-zero key bytes; got %d, %d", lKeyBytes, rKeyBytes) 676 } 677 if lValBytes == 0 || rValBytes == 0 { 678 t.Errorf("expected non-zero val bytes; got %d, %d", lValBytes, rValBytes) 679 } 680 if lKeyBytes+rKeyBytes <= keyBytes { 681 t.Errorf("left + right key bytes don't match; %d + %d <= %d", lKeyBytes, rKeyBytes, keyBytes) 682 } 683 if lValBytes+rValBytes <= valBytes { 684 t.Errorf("left + right val bytes don't match; %d + %d <= %d", lValBytes, rValBytes, valBytes) 685 } 686 } 687 688 // TestStoreRangeSplitStats starts by splitting the system keys from user-space 689 // keys and verifying that the user space side of the split (which is empty), 690 // has all zeros for stats. It then writes random data to the user space side, 691 // splits it halfway and verifies the two splits have stats exactly equaling 692 // the pre-split. 693 func TestStoreRangeSplitStats(t *testing.T) { 694 defer leaktest.AfterTest(t)() 695 manual := hlc.NewManualClock(123) 696 storeCfg := kvserver.TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) 697 storeCfg.TestingKnobs.DisableSplitQueue = true 698 storeCfg.TestingKnobs.DisableMergeQueue = true 699 stopper := stop.NewStopper() 700 defer stopper.Stop(context.Background()) 701 store := createTestStoreWithConfig(t, stopper, storeCfg) 702 ctx := context.Background() 703 704 // Split the range after the last table data key. 705 keyPrefix := keys.SystemSQLCodec.TablePrefix(keys.MinUserDescID) 706 args := adminSplitArgs(keyPrefix) 707 if _, pErr := kv.SendWrapped(ctx, store.TestSender(), args); pErr != nil { 708 t.Fatal(pErr) 709 } 710 // Verify empty range has empty stats. 711 repl := store.LookupReplica(roachpb.RKey(keyPrefix)) 712 // NOTE that this value is expected to change over time, depending on what 713 // we store in the sys-local keyspace. Update it accordingly for this test. 714 empty := enginepb.MVCCStats{LastUpdateNanos: manual.UnixNano()} 715 if err := verifyRangeStats(store.Engine(), repl.RangeID, empty); err != nil { 716 t.Fatal(err) 717 } 718 719 // Write random data. 720 midKey := kvserver.WriteRandomDataToRange(t, store, repl.RangeID, keyPrefix) 721 722 // Get the range stats now that we have data. 723 snap := store.Engine().NewSnapshot() 724 defer snap.Close() 725 ms, err := stateloader.Make(repl.RangeID).LoadMVCCStats(ctx, snap) 726 if err != nil { 727 t.Fatal(err) 728 } 729 if err := verifyRecomputedStats(snap, repl.Desc(), ms, manual.UnixNano()); err != nil { 730 t.Fatalf("failed to verify range's stats before split: %+v", err) 731 } 732 if inMemMS := repl.GetMVCCStats(); inMemMS != ms { 733 t.Fatalf("in-memory and on-disk diverged:\n%+v\n!=\n%+v", inMemMS, ms) 734 } 735 736 manual.Increment(100) 737 738 // Split the range at approximate halfway point. 739 args = adminSplitArgs(midKey) 740 if _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{ 741 RangeID: repl.RangeID, 742 }, args); pErr != nil { 743 t.Fatal(pErr) 744 } 745 746 snap = store.Engine().NewSnapshot() 747 defer snap.Close() 748 msLeft, err := stateloader.Make(repl.RangeID).LoadMVCCStats(ctx, snap) 749 if err != nil { 750 t.Fatal(err) 751 } 752 replRight := store.LookupReplica(midKey) 753 msRight, err := stateloader.Make(replRight.RangeID).LoadMVCCStats(ctx, snap) 754 if err != nil { 755 t.Fatal(err) 756 } 757 758 // The stats should be exactly equal when added. 759 expMS := enginepb.MVCCStats{ 760 LiveBytes: msLeft.LiveBytes + msRight.LiveBytes, 761 KeyBytes: msLeft.KeyBytes + msRight.KeyBytes, 762 ValBytes: msLeft.ValBytes + msRight.ValBytes, 763 IntentBytes: msLeft.IntentBytes + msRight.IntentBytes, 764 LiveCount: msLeft.LiveCount + msRight.LiveCount, 765 KeyCount: msLeft.KeyCount + msRight.KeyCount, 766 ValCount: msLeft.ValCount + msRight.ValCount, 767 IntentCount: msLeft.IntentCount + msRight.IntentCount, 768 } 769 ms.SysBytes, ms.SysCount = 0, 0 770 ms.LastUpdateNanos = 0 771 if expMS != ms { 772 t.Errorf("expected left plus right ranges to equal original, but\n %+v\n+\n %+v\n!=\n %+v", msLeft, msRight, ms) 773 } 774 775 // Stats should both have the new timestamp. 776 now := manual.UnixNano() 777 if lTs := msLeft.LastUpdateNanos; lTs != now { 778 t.Errorf("expected left range stats to have new timestamp, want %d, got %d", now, lTs) 779 } 780 if rTs := msRight.LastUpdateNanos; rTs != now { 781 t.Errorf("expected right range stats to have new timestamp, want %d, got %d", now, rTs) 782 } 783 784 // Stats should agree with recomputation. 785 if err := verifyRecomputedStats(snap, repl.Desc(), msLeft, now); err != nil { 786 t.Fatalf("failed to verify left range's stats after split: %+v", err) 787 } 788 if err := verifyRecomputedStats(snap, replRight.Desc(), msRight, now); err != nil { 789 t.Fatalf("failed to verify right range's stats after split: %+v", err) 790 } 791 } 792 793 // RaftMessageHandlerInterceptor wraps a storage.RaftMessageHandler. It 794 // delegates all methods to the underlying storage.RaftMessageHandler, except 795 // that HandleSnapshot calls receiveSnapshotFilter with the snapshot request 796 // header before delegating to the underlying HandleSnapshot method. 797 type RaftMessageHandlerInterceptor struct { 798 kvserver.RaftMessageHandler 799 handleSnapshotFilter func(header *kvserver.SnapshotRequest_Header) 800 } 801 802 func (mh RaftMessageHandlerInterceptor) HandleSnapshot( 803 header *kvserver.SnapshotRequest_Header, respStream kvserver.SnapshotResponseStream, 804 ) error { 805 mh.handleSnapshotFilter(header) 806 return mh.RaftMessageHandler.HandleSnapshot(header, respStream) 807 } 808 809 // TestStoreEmptyRangeSnapshotSize tests that the snapshot request header for a 810 // range that contains no user data (an "empty" range) has RangeSize == 0. This 811 // is arguably a bug, because system data like the range descriptor and raft log 812 // should also count towards the size of the snapshot. Currently, though, this 813 // property conveniently allows us to optimize the rebalancing of empty ranges 814 // by throttling snapshots of empty ranges separately from non-empty snapshots. 815 // 816 // If you change the accounting of RangeSize such that this test breaks, please 817 // preserve the optimization by introducing an alternative means of identifying 818 // snapshot requests for empty or near-empty ranges, and then adjust this test 819 // accordingly. 820 func TestStoreEmptyRangeSnapshotSize(t *testing.T) { 821 defer leaktest.AfterTest(t)() 822 823 ctx := context.Background() 824 825 // Disable the replicate queue, the split queue, and the merge queue as we 826 // want to control both rebalancing, splits, and merges ourselves. 827 sc := kvserver.TestStoreConfig(nil) 828 sc.TestingKnobs.DisableReplicateQueue = true 829 sc.TestingKnobs.DisableSplitQueue = true 830 sc.TestingKnobs.DisableMergeQueue = true 831 832 mtc := &multiTestContext{storeConfig: &sc} 833 defer mtc.Stop() 834 mtc.Start(t, 2) 835 836 // Split the range after the last table data key to get a range that contains 837 // no user data. 838 splitKey := keys.SystemSQLCodec.TablePrefix(keys.MinUserDescID) 839 splitArgs := adminSplitArgs(splitKey) 840 if _, err := kv.SendWrapped(ctx, mtc.distSenders[0], splitArgs); err != nil { 841 t.Fatal(err) 842 } 843 844 // Wrap store 1's message handler to intercept and record all incoming 845 // snapshot request headers. 846 messageRecorder := struct { 847 syncutil.Mutex 848 headers []*kvserver.SnapshotRequest_Header 849 }{} 850 messageHandler := RaftMessageHandlerInterceptor{ 851 RaftMessageHandler: mtc.stores[1], 852 handleSnapshotFilter: func(header *kvserver.SnapshotRequest_Header) { 853 // Each snapshot request is handled in a new goroutine, so we need 854 // synchronization. 855 messageRecorder.Lock() 856 defer messageRecorder.Unlock() 857 messageRecorder.headers = append(messageRecorder.headers, header) 858 }, 859 } 860 mtc.transport.Listen(mtc.stores[1].StoreID(), messageHandler) 861 862 // Replicate the newly-split range to trigger a snapshot request from store 0 863 // to store 1. 864 rangeID := mtc.stores[0].LookupReplica(roachpb.RKey(splitKey)).RangeID 865 mtc.replicateRange(rangeID, 1) 866 867 // Verify that we saw at least one snapshot request, 868 messageRecorder.Lock() 869 defer messageRecorder.Unlock() 870 if a := len(messageRecorder.headers); a < 1 { 871 t.Fatalf("expected at least one snapshot header, but got %d", a) 872 } 873 for i, header := range messageRecorder.headers { 874 if e, a := header.State.Desc.RangeID, rangeID; e != a { 875 t.Errorf("%d: expected RangeID to be %d, but got %d", i, e, a) 876 } 877 if header.RangeSize != 0 { 878 t.Errorf("%d: expected RangeSize to be 0, but got %d", i, header.RangeSize) 879 } 880 } 881 } 882 883 // TestStoreRangeSplitStatsWithMerges starts by splitting the system keys from 884 // user-space keys and verifying that the user space side of the split (which is empty), 885 // has all zeros for stats. It then issues a number of Merge requests to the user 886 // space side, simulating TimeSeries data. Finally, the test splits the user space 887 // side halfway and verifies the stats on either side of the split are equal to a 888 // recomputation. 889 // 890 // Note that unlike TestStoreRangeSplitStats, we do not check if the two halves of the 891 // split's stats are equal to the pre-split stats when added, because this will not be 892 // true of ranges populated with Merge requests. The reason for this is that Merge 893 // requests' impact on MVCCStats are only estimated. See updateStatsOnMerge. 894 func TestStoreRangeSplitStatsWithMerges(t *testing.T) { 895 defer leaktest.AfterTest(t)() 896 manual := hlc.NewManualClock(123) 897 storeCfg := kvserver.TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) 898 storeCfg.TestingKnobs.DisableSplitQueue = true 899 stopper := stop.NewStopper() 900 defer stopper.Stop(context.Background()) 901 store := createTestStoreWithConfig(t, stopper, storeCfg) 902 ctx := context.Background() 903 904 // Split the range after the last table data key. 905 keyPrefix := keys.SystemSQLCodec.TablePrefix(keys.MinUserDescID) 906 args := adminSplitArgs(keyPrefix) 907 if _, pErr := kv.SendWrapped(ctx, store.TestSender(), args); pErr != nil { 908 t.Fatal(pErr) 909 } 910 // Verify empty range has empty stats. 911 repl := store.LookupReplica(roachpb.RKey(keyPrefix)) 912 // NOTE that this value is expected to change over time, depending on what 913 // we store in the sys-local keyspace. Update it accordingly for this test. 914 empty := enginepb.MVCCStats{LastUpdateNanos: manual.UnixNano()} 915 if err := verifyRangeStats(store.Engine(), repl.RangeID, empty); err != nil { 916 t.Fatal(err) 917 } 918 919 // Write random TimeSeries data. 920 midKey := writeRandomTimeSeriesDataToRange(t, store, repl.RangeID, keyPrefix) 921 manual.Increment(100) 922 923 // Split the range at approximate halfway point. 924 args = adminSplitArgs(midKey) 925 if _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{ 926 RangeID: repl.RangeID, 927 }, args); pErr != nil { 928 t.Fatal(pErr) 929 } 930 931 snap := store.Engine().NewSnapshot() 932 defer snap.Close() 933 msLeft, err := stateloader.Make(repl.RangeID).LoadMVCCStats(ctx, snap) 934 if err != nil { 935 t.Fatal(err) 936 } 937 replRight := store.LookupReplica(midKey) 938 msRight, err := stateloader.Make(replRight.RangeID).LoadMVCCStats(ctx, snap) 939 if err != nil { 940 t.Fatal(err) 941 } 942 943 // Stats should both have the new timestamp. 944 now := manual.UnixNano() 945 if lTs := msLeft.LastUpdateNanos; lTs != now { 946 t.Errorf("expected left range stats to have new timestamp, want %d, got %d", now, lTs) 947 } 948 if rTs := msRight.LastUpdateNanos; rTs != now { 949 t.Errorf("expected right range stats to have new timestamp, want %d, got %d", now, rTs) 950 } 951 952 // Stats should agree with recomputation. 953 if err := verifyRecomputedStats(snap, repl.Desc(), msLeft, now); err != nil { 954 t.Fatalf("failed to verify left range's stats after split: %+v", err) 955 } 956 if err := verifyRecomputedStats(snap, replRight.Desc(), msRight, now); err != nil { 957 t.Fatalf("failed to verify right range's stats after split: %+v", err) 958 } 959 } 960 961 // fillRange writes keys with the given prefix and associated values 962 // until bytes bytes have been written or the given range has split. 963 func fillRange( 964 t *testing.T, 965 store *kvserver.Store, 966 rangeID roachpb.RangeID, 967 prefix roachpb.Key, 968 bytes int64, 969 singleKey bool, 970 ) { 971 src := rand.New(rand.NewSource(0)) 972 var key []byte 973 for { 974 ms, err := stateloader.Make(rangeID).LoadMVCCStats(context.Background(), store.Engine()) 975 if err != nil { 976 t.Fatal(err) 977 } 978 keyBytes, valBytes := ms.KeyBytes, ms.ValBytes 979 if keyBytes+valBytes >= bytes { 980 return 981 } 982 if key == nil || !singleKey { 983 key = append(append([]byte(nil), prefix...), randutil.RandBytes(src, 100)...) 984 key = keys.MakeFamilyKey(key, src.Uint32()) 985 } 986 val := randutil.RandBytes(src, int(src.Int31n(1<<8))) 987 pArgs := putArgs(key, val) 988 _, pErr := kv.SendWrappedWith(context.Background(), store, roachpb.Header{ 989 RangeID: rangeID, 990 }, pArgs) 991 // When the split occurs in the background, our writes may start failing. 992 // We know we can stop writing when this happens. 993 if _, ok := pErr.GetDetail().(*roachpb.RangeKeyMismatchError); ok { 994 return 995 } else if pErr != nil { 996 t.Fatal(pErr) 997 } 998 } 999 } 1000 1001 // TestStoreZoneUpdateAndRangeSplit verifies that modifying the zone 1002 // configuration changes range max bytes and Range.maybeSplit() takes 1003 // max bytes into account when deciding whether to enqueue a range for 1004 // splitting. It further verifies that the range is in fact split on 1005 // exceeding zone's RangeMaxBytes. 1006 func TestStoreZoneUpdateAndRangeSplit(t *testing.T) { 1007 defer leaktest.AfterTest(t)() 1008 stopper := stop.NewStopper() 1009 defer stopper.Stop(context.Background()) 1010 storeCfg := kvserver.TestStoreConfig(nil /* clock */) 1011 storeCfg.TestingKnobs.DisableMergeQueue = true 1012 store := createTestStoreWithConfig(t, stopper, storeCfg) 1013 config.TestingSetupZoneConfigHook(stopper) 1014 1015 const maxBytes = 1 << 16 1016 // Set max bytes. 1017 descID := uint32(keys.MinUserDescID) 1018 zoneConfig := zonepb.DefaultZoneConfig() 1019 zoneConfig.RangeMaxBytes = proto.Int64(maxBytes) 1020 config.TestingSetZoneConfig(descID, zoneConfig) 1021 1022 // Trigger gossip callback. 1023 if err := store.Gossip().AddInfoProto(gossip.KeySystemConfig, &config.SystemConfigEntries{}, 0); err != nil { 1024 t.Fatal(err) 1025 } 1026 1027 tableBoundary := keys.SystemSQLCodec.TablePrefix(descID) 1028 1029 { 1030 var repl *kvserver.Replica 1031 1032 // Wait for the range to be split along table boundaries. 1033 expectedRSpan := roachpb.RSpan{Key: roachpb.RKey(tableBoundary), EndKey: roachpb.RKeyMax} 1034 testutils.SucceedsSoon(t, func() error { 1035 repl = store.LookupReplica(roachpb.RKey(tableBoundary)) 1036 if actualRSpan := repl.Desc().RSpan(); !actualRSpan.Equal(expectedRSpan) { 1037 return errors.Errorf("expected range %s to span %s", repl, expectedRSpan) 1038 } 1039 return nil 1040 }) 1041 1042 // Check range's max bytes settings. 1043 if actualMaxBytes := repl.GetMaxBytes(); actualMaxBytes != maxBytes { 1044 t.Fatalf("range %s max bytes mismatch, got: %d, expected: %d", repl, actualMaxBytes, maxBytes) 1045 } 1046 1047 // Look in the range after prefix we're writing to. 1048 fillRange(t, store, repl.RangeID, tableBoundary, maxBytes, false /* singleKey */) 1049 } 1050 1051 // Verify that the range is in fact split. 1052 testutils.SucceedsSoon(t, func() error { 1053 repl := store.LookupReplica(roachpb.RKey(keys.SystemSQLCodec.TablePrefix(descID + 1))) 1054 rngDesc := repl.Desc() 1055 rngStart, rngEnd := rngDesc.StartKey, rngDesc.EndKey 1056 if rngStart.Equal(tableBoundary) || !rngEnd.Equal(roachpb.RKeyMax) { 1057 return errors.Errorf("range %s has not yet split", repl) 1058 } 1059 return nil 1060 }) 1061 } 1062 1063 // TestStoreRangeSplitWithMaxBytesUpdate tests a scenario where a new 1064 // zone config that updates the max bytes is set and triggers a range 1065 // split. 1066 func TestStoreRangeSplitWithMaxBytesUpdate(t *testing.T) { 1067 defer leaktest.AfterTest(t)() 1068 stopper := stop.NewStopper() 1069 defer stopper.Stop(context.Background()) 1070 storeCfg := kvserver.TestStoreConfig(nil /* clock */) 1071 storeCfg.TestingKnobs.DisableMergeQueue = true 1072 store := createTestStoreWithConfig(t, stopper, storeCfg) 1073 config.TestingSetupZoneConfigHook(stopper) 1074 1075 origRng := store.LookupReplica(roachpb.RKeyMin) 1076 1077 // Set max bytes. 1078 const maxBytes = 1 << 16 1079 descID := uint32(keys.MinUserDescID) 1080 zoneConfig := zonepb.DefaultZoneConfig() 1081 zoneConfig.RangeMaxBytes = proto.Int64(maxBytes) 1082 config.TestingSetZoneConfig(descID, zoneConfig) 1083 1084 // Trigger gossip callback. 1085 if err := store.Gossip().AddInfoProto(gossip.KeySystemConfig, &config.SystemConfigEntries{}, 0); err != nil { 1086 t.Fatal(err) 1087 } 1088 1089 // Verify that the range is split and the new range has the correct max bytes. 1090 testutils.SucceedsSoon(t, func() error { 1091 newRng := store.LookupReplica(roachpb.RKey(keys.SystemSQLCodec.TablePrefix(descID))) 1092 if newRng.RangeID == origRng.RangeID { 1093 return errors.Errorf("expected new range created by split") 1094 } 1095 if newRng.GetMaxBytes() != maxBytes { 1096 return errors.Errorf("expected %d max bytes for the new range, but got %d", 1097 maxBytes, newRng.GetMaxBytes()) 1098 } 1099 return nil 1100 }) 1101 } 1102 1103 // TestStoreRangeSplitBackpressureWrites tests that ranges that grow too large 1104 // begin enforcing backpressure on writes until the range is able to split. In 1105 // the test, a range is filled past the point where it will begin applying 1106 // backpressure. Splits are then blocked in-flight and we test that any future 1107 // writes wait until the split succeeds and reduces the range size beneath the 1108 // backpressure threshold. 1109 func TestStoreRangeSplitBackpressureWrites(t *testing.T) { 1110 defer leaktest.AfterTest(t)() 1111 1112 // Backpressured writes react differently depending on whether there is an 1113 // ongoing split or not. If there is an ongoing split then the writes wait 1114 // on the split are only allowed to proceed if the split succeeds. If there 1115 // is not an ongoing split or if the range is unsplittable and in the split 1116 // queue's purgatory, the write is rejected immediately. 1117 testCases := []struct { 1118 splitOngoing bool 1119 splitErr bool 1120 splitImpossible bool 1121 expErr string 1122 }{ 1123 {splitOngoing: true, splitErr: false, expErr: ""}, 1124 {splitOngoing: true, splitErr: true, expErr: "split failed while applying backpressure.* boom"}, 1125 {splitOngoing: false, expErr: ""}, 1126 {splitImpossible: true, expErr: "split failed while applying backpressure.* could not find valid split key"}, 1127 } 1128 for _, tc := range testCases { 1129 var name string 1130 if tc.splitImpossible { 1131 name = fmt.Sprintf("splitImpossible=%t", tc.splitImpossible) 1132 } else { 1133 name = fmt.Sprintf("splitOngoing=%t,splitErr=%t", tc.splitOngoing, tc.splitErr) 1134 } 1135 t.Run(name, func(t *testing.T) { 1136 var activateSplitFilter int32 1137 splitKey := roachpb.RKey(keys.UserTableDataMin) 1138 splitPending, blockSplits := make(chan struct{}), make(chan struct{}) 1139 storeCfg := kvserver.TestStoreConfig(nil) 1140 // Set maxBytes to something small so we can exceed the maximum split 1141 // size without adding 2x64MB of data. 1142 const maxBytes = 1 << 16 1143 storeCfg.DefaultZoneConfig.RangeMaxBytes = proto.Int64(maxBytes) 1144 storeCfg.TestingKnobs.DisableGCQueue = true 1145 storeCfg.TestingKnobs.DisableMergeQueue = true 1146 storeCfg.TestingKnobs.DisableSplitQueue = true 1147 storeCfg.TestingKnobs.TestingRequestFilter = 1148 func(_ context.Context, ba roachpb.BatchRequest) *roachpb.Error { 1149 for _, req := range ba.Requests { 1150 if cPut, ok := req.GetInner().(*roachpb.ConditionalPutRequest); ok { 1151 if cPut.Key.Equal(keys.RangeDescriptorKey(splitKey)) { 1152 if atomic.CompareAndSwapInt32(&activateSplitFilter, 1, 0) { 1153 splitPending <- struct{}{} 1154 <-blockSplits 1155 if tc.splitErr { 1156 return roachpb.NewErrorf("boom") 1157 } 1158 } 1159 } 1160 } 1161 } 1162 return nil 1163 } 1164 1165 ctx := context.Background() 1166 stopper := stop.NewStopper() 1167 defer stopper.Stop(ctx) 1168 store := createTestStoreWithConfig(t, stopper, storeCfg) 1169 1170 // Split at the split key. 1171 sArgs := adminSplitArgs(splitKey.AsRawKey()) 1172 repl := store.LookupReplica(splitKey) 1173 if _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{ 1174 RangeID: repl.RangeID, 1175 }, sArgs); pErr != nil { 1176 t.Fatal(pErr) 1177 } 1178 1179 // Fill the new range past the point where writes should backpressure. 1180 repl = store.LookupReplica(splitKey) 1181 singleKey := tc.splitImpossible 1182 fillRange(t, store, repl.RangeID, splitKey.AsRawKey(), 2*maxBytes+1, singleKey) 1183 1184 if !repl.ShouldBackpressureWrites() { 1185 t.Fatal("expected ShouldBackpressureWrites=true, found false") 1186 } 1187 1188 // If necessary, allow the range to begin splitting and wait until 1189 // it gets blocked in the response filter. 1190 if tc.splitOngoing { 1191 atomic.StoreInt32(&activateSplitFilter, 1) 1192 if err := stopper.RunAsyncTask(ctx, "force split", func(_ context.Context) { 1193 store.SetSplitQueueActive(true) 1194 if err := store.ForceSplitScanAndProcess(); err != nil { 1195 log.Fatalf(ctx, "%v", err) 1196 } 1197 }); err != nil { 1198 t.Fatal(err) 1199 } 1200 <-splitPending 1201 } else if tc.splitImpossible { 1202 store.SetSplitQueueActive(true) 1203 if err := store.ForceSplitScanAndProcess(); err != nil { 1204 t.Fatal(err) 1205 } 1206 if l := store.SplitQueuePurgatoryLength(); l != 1 { 1207 t.Fatalf("expected split queue purgatory to contain 1 replica, found %d", l) 1208 } 1209 } 1210 1211 // Send a Put request. This should be backpressured on the split, so it should 1212 // not be able to succeed until we allow the split to continue. 1213 putRes := make(chan error) 1214 go func() { 1215 // Write to the first key of the range to make sure that 1216 // we don't end up on the wrong side of the split. 1217 putRes <- store.DB().Put(ctx, splitKey, "test") 1218 }() 1219 1220 // Send a Delete request in a transaction. Should also be backpressured on the split, 1221 // so it should not be able to succeed until we allow the split to continue. 1222 delRes := make(chan error) 1223 go func() { 1224 // Write to the first key of the range to make sure that 1225 // we don't end up on the wrong side of the split. 1226 delRes <- store.DB().Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1227 b := txn.NewBatch() 1228 b.Del(splitKey) 1229 return txn.CommitInBatch(ctx, b) 1230 }) 1231 }() 1232 1233 // Make sure the write doesn't return while a split is ongoing. If no 1234 // split is ongoing, the write will return an error immediately. 1235 if tc.splitOngoing { 1236 select { 1237 case err := <-putRes: 1238 close(blockSplits) 1239 t.Fatalf("put was not blocked on split, returned err %v", err) 1240 case err := <-delRes: 1241 close(blockSplits) 1242 t.Fatalf("delete was not blocked on split, returned err %v", err) 1243 case <-time.After(100 * time.Millisecond): 1244 } 1245 1246 // Let split through. Write should follow. 1247 close(blockSplits) 1248 } 1249 1250 for op, resCh := range map[string]chan error{ 1251 "put": putRes, 1252 "delete": delRes, 1253 } { 1254 if err := <-resCh; tc.expErr == "" { 1255 if err != nil { 1256 t.Fatalf("%s returned err %v, expected success", op, err) 1257 } 1258 } else { 1259 if !testutils.IsError(err, tc.expErr) { 1260 t.Fatalf("%s returned err %s, expected pattern %q", op, err, tc.expErr) 1261 } 1262 } 1263 } 1264 1265 }) 1266 } 1267 } 1268 1269 // TestStoreRangeSystemSplits verifies that splits are based on the contents of 1270 // the system.descriptor table. 1271 func TestStoreRangeSystemSplits(t *testing.T) { 1272 defer leaktest.AfterTest(t)() 1273 stopper := stop.NewStopper() 1274 defer stopper.Stop(context.Background()) 1275 // Intentionally leave the merge queue enabled. This indirectly tests that the 1276 // merge queue respects these split points. 1277 store, _ := createTestStore(t, stopper) 1278 1279 userTableMax := keys.MinUserDescID + 4 1280 var exceptions map[int]struct{} 1281 schema := sqlbase.MakeMetadataSchema( 1282 keys.SystemSQLCodec, zonepb.DefaultZoneConfigRef(), zonepb.DefaultSystemZoneConfigRef(), 1283 ) 1284 // Write table descriptors for the tables in the metadata schema as well as 1285 // five dummy user tables. This does two things: 1286 // - descriptor IDs are used to determine split keys 1287 // - the write triggers a SystemConfig update and gossip 1288 // We should end up with splits at each user table prefix. 1289 if err := store.DB().Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error { 1290 if err := txn.SetSystemConfigTrigger(); err != nil { 1291 return err 1292 } 1293 descTablePrefix := keys.SystemSQLCodec.TablePrefix(keys.DescriptorTableID) 1294 kvs, _ /* splits */ := schema.GetInitialValues() 1295 for _, kv := range kvs { 1296 if !bytes.HasPrefix(kv.Key, descTablePrefix) { 1297 continue 1298 } 1299 if err := txn.Put(ctx, kv.Key, &kv.Value); err != nil { 1300 return err 1301 } 1302 } 1303 for i := keys.MinUserDescID; i <= userTableMax; i++ { 1304 // We don't care about the value, just the key. 1305 key := sqlbase.MakeDescMetadataKey(keys.SystemSQLCodec, sqlbase.ID(i)) 1306 if err := txn.Put(ctx, key, sqlbase.WrapDescriptor(&sqlbase.TableDescriptor{})); err != nil { 1307 return err 1308 } 1309 } 1310 return nil 1311 }); err != nil { 1312 t.Fatal(err) 1313 } 1314 1315 verifySplitsAtTablePrefixes := func() { 1316 t.Helper() 1317 // We expect splits at each of the user tables and at a few fixed system 1318 // range boundaries, but not at system config table boundaries. 1319 expKeys := []roachpb.Key{ 1320 testutils.MakeKey(keys.Meta2Prefix, keys.NodeLivenessPrefix), 1321 testutils.MakeKey(keys.Meta2Prefix, keys.NodeLivenessKeyMax), 1322 testutils.MakeKey(keys.Meta2Prefix, keys.TimeseriesPrefix), 1323 testutils.MakeKey(keys.Meta2Prefix, keys.TimeseriesPrefix.PrefixEnd()), 1324 testutils.MakeKey(keys.Meta2Prefix, keys.TableDataMin), 1325 } 1326 ids := schema.DescriptorIDs() 1327 maxID := uint32(ids[len(ids)-1]) 1328 for i := uint32(keys.MaxSystemConfigDescID + 1); i <= maxID; i++ { 1329 expKeys = append(expKeys, 1330 testutils.MakeKey(keys.Meta2Prefix, keys.SystemSQLCodec.TablePrefix(i)), 1331 ) 1332 } 1333 for i := keys.MinUserDescID; i <= userTableMax; i++ { 1334 if _, ok := exceptions[i]; !ok { 1335 expKeys = append(expKeys, 1336 testutils.MakeKey(keys.Meta2Prefix, keys.SystemSQLCodec.TablePrefix(uint32(i))), 1337 ) 1338 } 1339 } 1340 expKeys = append(expKeys, testutils.MakeKey(keys.Meta2Prefix, roachpb.RKeyMax)) 1341 1342 testutils.SucceedsSoon(t, func() error { 1343 rows, err := store.DB().Scan(context.Background(), keys.Meta2Prefix, keys.MetaMax, 0) 1344 if err != nil { 1345 return err 1346 } 1347 keys := make([]roachpb.Key, 0, len(expKeys)) 1348 for _, r := range rows { 1349 keys = append(keys, r.Key) 1350 } 1351 if !reflect.DeepEqual(keys, expKeys) { 1352 return errors.Errorf("expected split keys:\n%v\nbut found:\n%v", expKeys, keys) 1353 } 1354 return nil 1355 }) 1356 } 1357 1358 verifySplitsAtTablePrefixes() 1359 1360 // Write another, disjoint (+3) descriptor for a user table. 1361 userTableMax += 3 1362 exceptions = map[int]struct{}{userTableMax - 1: {}, userTableMax - 2: {}} 1363 if err := store.DB().Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error { 1364 if err := txn.SetSystemConfigTrigger(); err != nil { 1365 return err 1366 } 1367 // This time, only write the last table descriptor. Splits only occur for 1368 // the descriptor we add. We don't care about the value, just the key. 1369 k := sqlbase.MakeDescMetadataKey(keys.SystemSQLCodec, sqlbase.ID(userTableMax)) 1370 return txn.Put(ctx, k, sqlbase.WrapDescriptor(&sqlbase.TableDescriptor{})) 1371 }); err != nil { 1372 t.Fatal(err) 1373 } 1374 1375 verifySplitsAtTablePrefixes() 1376 } 1377 1378 // runSetupSplitSnapshotRace engineers a situation in which a range has 1379 // been split but node 3 hasn't processed it yet. There is a race 1380 // depending on whether node 3 learns of the split from its left or 1381 // right side. When this function returns most of the nodes will be 1382 // stopped, and depending on the order in which they are restarted, we 1383 // can arrange for both possible outcomes of the race. 1384 // 1385 // Range 1 is the system keyspace, located on node 0. 1386 // 1387 // The range containing leftKey is the left side of the split, located 1388 // on nodes 1, 2, and 3. 1389 // 1390 // The range containing rightKey is the right side of the split, 1391 // located on nodes 3, 4, and 5. 1392 // 1393 // Nodes 1-5 are stopped; only node 0 is running. 1394 // 1395 // See https://github.com/cockroachdb/cockroach/issues/1644. 1396 func runSetupSplitSnapshotRace( 1397 t *testing.T, testFn func(*multiTestContext, roachpb.Key, roachpb.Key), 1398 ) { 1399 sc := kvserver.TestStoreConfig(nil) 1400 // We'll control replication by hand. 1401 sc.TestingKnobs.DisableReplicateQueue = true 1402 // Async intent resolution can sometimes lead to hangs when we stop 1403 // most of the stores at the end of this function. 1404 sc.TestingKnobs.IntentResolverKnobs.DisableAsyncIntentResolution = true 1405 // Avoid fighting with the merge queue while trying to reproduce this race. 1406 sc.TestingKnobs.DisableMergeQueue = true 1407 sc.TestingKnobs.DisableGCQueue = true 1408 // Disable the split delay mechanism, or it'll spend 10s going in circles. 1409 // (We can't set it to zero as otherwise the default overrides us). 1410 sc.RaftDelaySplitToSuppressSnapshotTicks = -1 1411 sc.Clock = nil // manual clock 1412 mtc := &multiTestContext{storeConfig: &sc} 1413 defer mtc.Stop() 1414 mtc.Start(t, 6) 1415 1416 leftKey := roachpb.Key("a") 1417 rightKey := roachpb.Key("z") 1418 1419 // First, do a couple of writes; we'll use these to determine when 1420 // the dust has settled. 1421 incArgs := incrementArgs(leftKey, 1) 1422 if _, pErr := kv.SendWrapped(context.Background(), mtc.stores[0].TestSender(), incArgs); pErr != nil { 1423 t.Fatal(pErr) 1424 } 1425 incArgs = incrementArgs(rightKey, 2) 1426 if _, pErr := kv.SendWrapped(context.Background(), mtc.stores[0].TestSender(), incArgs); pErr != nil { 1427 t.Fatal(pErr) 1428 } 1429 1430 // Split the system range from the rest of the keyspace. 1431 splitArgs := adminSplitArgs(keys.SystemMax) 1432 if _, pErr := kv.SendWrapped(context.Background(), mtc.stores[0].TestSender(), splitArgs); pErr != nil { 1433 t.Fatal(pErr) 1434 } 1435 1436 // Get the left range's ID. This is currently 2, but using 1437 // LookupReplica is more future-proof (and see below for 1438 // rightRangeID). 1439 leftRangeID := mtc.stores[0].LookupReplica(roachpb.RKey("a")).RangeID 1440 1441 // Replicate the left range onto nodes 1-3 and remove it from node 0. We have 1442 // to transfer the lease before unreplicating from range 0 because it isn't 1443 // safe (or allowed) for a leaseholder to remove itself from a cluster 1444 // without first giving up its lease. 1445 mtc.replicateRange(leftRangeID, 1, 2, 3) 1446 mtc.transferLease(context.Background(), leftRangeID, 0, 1) 1447 mtc.unreplicateRange(leftRangeID, 0) 1448 1449 mtc.waitForValues(leftKey, []int64{0, 1, 1, 1, 0, 0}) 1450 mtc.waitForValues(rightKey, []int64{0, 2, 2, 2, 0, 0}) 1451 1452 // Stop node 3 so it doesn't hear about the split. 1453 mtc.stopStore(3) 1454 mtc.advanceClock(context.Background()) 1455 1456 // Split the data range. 1457 splitArgs = adminSplitArgs(roachpb.Key("m")) 1458 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], splitArgs); pErr != nil { 1459 t.Fatal(pErr) 1460 } 1461 1462 // Get the right range's ID. Since the split was performed on node 1463 // 1, it is currently 11 and not 3 as might be expected. 1464 var rightRangeID roachpb.RangeID 1465 testutils.SucceedsSoon(t, func() error { 1466 rightRangeID = mtc.stores[1].LookupReplica(roachpb.RKey("z")).RangeID 1467 if rightRangeID == leftRangeID { 1468 return errors.Errorf("store 1 hasn't processed split yet") 1469 } 1470 return nil 1471 }) 1472 1473 // Relocate the right range onto nodes 3-5. 1474 mtc.replicateRange(rightRangeID, 4, 5) 1475 mtc.unreplicateRange(rightRangeID, 2) 1476 mtc.transferLease(context.Background(), rightRangeID, 1, 4) 1477 mtc.unreplicateRange(rightRangeID, 1) 1478 1479 // Perform another increment after all the replication changes. This 1480 // lets us ensure that all the replication changes have been 1481 // processed and applied on all replicas. This is necessary because 1482 // the range is in an unstable state at the time of the last 1483 // unreplicateRange call above. It has four members which means it 1484 // can only tolerate one failure without losing quorum. That failure 1485 // is store 3 which we stopped earlier. Stopping store 1 too soon 1486 // (before it has committed the final config change *and* propagated 1487 // that commit to the followers 4 and 5) would constitute a second 1488 // failure and render the range unable to achieve quorum after 1489 // restart (in the SnapshotWins branch). 1490 incArgs = incrementArgs(rightKey, 3) 1491 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil { 1492 t.Fatal(pErr) 1493 } 1494 1495 // Store 3 still has the old value, but 4 and 5 are up to date. 1496 mtc.waitForValues(rightKey, []int64{0, 0, 0, 2, 5, 5}) 1497 1498 // Scan the meta ranges to resolve all intents 1499 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], 1500 &roachpb.ScanRequest{ 1501 RequestHeader: roachpb.RequestHeader{ 1502 Key: keys.MetaMin, 1503 EndKey: keys.MetaMax, 1504 }, 1505 }); pErr != nil { 1506 t.Fatal(pErr) 1507 } 1508 1509 // Stop the remaining data stores. 1510 mtc.stopStore(1) 1511 mtc.stopStore(2) 1512 // 3 is already stopped. 1513 mtc.stopStore(4) 1514 mtc.stopStore(5) 1515 1516 testFn(mtc, leftKey, rightKey) 1517 } 1518 1519 // TestSplitSnapshotRace_SplitWins exercises one outcome of the 1520 // split/snapshot race: The left side of the split propagates first, 1521 // so the split completes before it sees a competing snapshot. This is 1522 // the more common outcome in practice. 1523 func TestSplitSnapshotRace_SplitWins(t *testing.T) { 1524 defer leaktest.AfterTest(t)() 1525 runSetupSplitSnapshotRace(t, func(mtc *multiTestContext, leftKey, rightKey roachpb.Key) { 1526 // Bring the left range up first so that the split happens before it sees a snapshot. 1527 for i := 1; i <= 3; i++ { 1528 mtc.restartStore(i) 1529 } 1530 1531 // Perform a write on the left range and wait for it to propagate. 1532 incArgs := incrementArgs(leftKey, 10) 1533 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil { 1534 t.Fatal(pErr) 1535 } 1536 mtc.waitForValues(leftKey, []int64{0, 11, 11, 11, 0, 0}) 1537 1538 // Now wake the other stores up. 1539 mtc.restartStore(4) 1540 mtc.restartStore(5) 1541 1542 // Write to the right range. 1543 incArgs = incrementArgs(rightKey, 20) 1544 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil { 1545 t.Fatal(pErr) 1546 } 1547 mtc.waitForValues(rightKey, []int64{0, 0, 0, 25, 25, 25}) 1548 }) 1549 } 1550 1551 // TestSplitSnapshotRace_SnapshotWins exercises one outcome of the 1552 // split/snapshot race: The right side of the split replicates first, 1553 // so the target node sees a raft snapshot before it has processed the 1554 // split, so it still has a conflicting range. 1555 func TestSplitSnapshotRace_SnapshotWins(t *testing.T) { 1556 defer leaktest.AfterTest(t)() 1557 runSetupSplitSnapshotRace(t, func(mtc *multiTestContext, leftKey, rightKey roachpb.Key) { 1558 // Bring the right range up first. 1559 for i := 3; i <= 5; i++ { 1560 mtc.restartStore(i) 1561 } 1562 1563 // Perform a write on the right range. 1564 incArgs := incrementArgs(rightKey, 20) 1565 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil { 1566 t.Fatal(pErr) 1567 } 1568 1569 // It immediately propagates between nodes 4 and 5, but node 3 1570 // remains at its old value. It can't accept the right-hand range 1571 // because it conflicts with its not-yet-split copy of the left-hand 1572 // range. This test is not completely deterministic: we want to make 1573 // sure that node 3 doesn't panic when it receives the snapshot, but 1574 // since it silently drops the message there is nothing we can wait 1575 // for. There is a high probability that the message will have been 1576 // received by the time that nodes 4 and 5 have processed their 1577 // update. 1578 mtc.waitForValues(rightKey, []int64{0, 0, 0, 2, 25, 25}) 1579 1580 // Wake up the left-hand range. This will allow the left-hand 1581 // range's split to complete and unblock the right-hand range. 1582 mtc.restartStore(1) 1583 mtc.restartStore(2) 1584 1585 // Perform writes on both sides. This is not strictly necessary but 1586 // it helps wake up dormant ranges that would otherwise have to wait 1587 // for retry timeouts. 1588 incArgs = incrementArgs(leftKey, 10) 1589 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil { 1590 t.Fatal(pErr) 1591 } 1592 mtc.waitForValues(leftKey, []int64{0, 11, 11, 11, 0, 0}) 1593 1594 incArgs = incrementArgs(rightKey, 200) 1595 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil { 1596 t.Fatal(pErr) 1597 } 1598 mtc.waitForValues(rightKey, []int64{0, 0, 0, 225, 225, 225}) 1599 }) 1600 } 1601 1602 // TestStoreSplitTimestampCacheDifferentLeaseHolder prevents regression of 1603 // #7899. When the first lease holder of the right-hand side of a Split was 1604 // not equal to the left-hand side lease holder (at the time of the split), 1605 // its timestamp cache would not be properly initialized, which would allow 1606 // for writes which invalidated reads previously served by the pre-split lease. 1607 func TestStoreSplitTimestampCacheDifferentLeaseHolder(t *testing.T) { 1608 defer leaktest.AfterTest(t)() 1609 1610 ctx := context.Background() 1611 1612 leftKey := roachpb.Key("a") 1613 splitKey := roachpb.Key("b") 1614 rightKey := roachpb.Key("c") 1615 1616 // This filter is better understood when reading the meat of the test 1617 // below first. 1618 var noLeaseForDesc atomic.Value 1619 filter := func(args kvserverbase.FilterArgs) *roachpb.Error { 1620 leaseReq, argOK := args.Req.(*roachpb.RequestLeaseRequest) 1621 forbiddenDesc, descOK := noLeaseForDesc.Load().(*roachpb.ReplicaDescriptor) 1622 if !argOK || !descOK || !bytes.Equal(leaseReq.Key, splitKey) { 1623 return nil 1624 } 1625 log.Infof(ctx, "received lease request (%s, %s)", 1626 leaseReq.Span(), leaseReq.Lease) 1627 if !reflect.DeepEqual(*forbiddenDesc, leaseReq.Lease.Replica) { 1628 return nil 1629 } 1630 log.Infof(ctx, 1631 "refusing lease request (%s, %s) because %+v held lease for LHS of split", 1632 leaseReq.Span(), leaseReq.Lease, forbiddenDesc) 1633 return roachpb.NewError(&roachpb.NotLeaseHolderError{RangeID: args.Hdr.RangeID}) 1634 } 1635 1636 var args base.TestClusterArgs 1637 args.ReplicationMode = base.ReplicationManual 1638 args.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 1639 EvalKnobs: kvserverbase.BatchEvalTestingKnobs{ 1640 TestingEvalFilter: filter, 1641 }, 1642 } 1643 1644 tc := testcluster.StartTestCluster(t, 2, args) 1645 defer tc.Stopper().Stop(context.Background()) 1646 1647 // Split the data range, mainly to avoid other splits getting in our way. 1648 for _, k := range []roachpb.Key{leftKey, rightKey} { 1649 if _, _, err := tc.SplitRange(k); err != nil { 1650 t.Fatal(errors.Wrapf(err, "split at %s", k)) 1651 } 1652 } 1653 if _, err := tc.AddReplicas(leftKey, tc.Target(1)); err != nil { 1654 t.Fatal(err) 1655 } 1656 1657 db := tc.Servers[0].DB() // irrelevant which one we use 1658 1659 // Make a context tied to the Stopper. The test works without, but this 1660 // is cleaner since we won't properly terminate the transaction below. 1661 ctx, cancel := tc.Server(0).Stopper().WithCancelOnQuiesce(ctx) 1662 defer cancel() 1663 1664 // This transaction will try to write "under" a served read. 1665 txnOld := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */) 1666 1667 // Do something with txnOld so that its timestamp gets set. 1668 if _, err := txnOld.Scan(ctx, "a", "b", 0); err != nil { 1669 t.Fatal(err) 1670 } 1671 1672 // Another client comes along at a higher timestamp, touching everything on 1673 // the right of the (soon-to-be) split key. 1674 if _, err := db.Scan(ctx, splitKey, rightKey, 0); err != nil { 1675 t.Fatal(err) 1676 } 1677 1678 // This block makes sure that from now on, we don't allow the current 1679 // lease holder of our range to extend. Any attempt of doing so will 1680 // catch a NotLeaseHolderError, which means a retry by DistSender (until 1681 // the other node gets to be the lease holder instead). 1682 // 1683 // This makes sure that once we split, we'll be in the situation described 1684 // in #7899 (before the fix): The first lease holder of the right hand side 1685 // of the Split will not be that of the pre-split Range. 1686 // With the fix, the right-hand lease is initialized from the left-hand 1687 // lease, so the lease holders are the same, and there will never be a 1688 // lease request for the right-hand side in this test. 1689 leaseHolder := func(k roachpb.Key) roachpb.ReplicaDescriptor { 1690 desc, err := tc.LookupRange(k) 1691 if err != nil { 1692 t.Fatal(err) 1693 } 1694 lease, _, err := tc.FindRangeLease(desc, nil) 1695 if err != nil { 1696 t.Fatal(err) 1697 } 1698 leaseHolder := lease.Replica 1699 replica, found := desc.GetReplicaDescriptor(leaseHolder.StoreID) 1700 if !found { 1701 t.Fatalf("no replica on store %d found in %+v", leaseHolder.StoreID, desc) 1702 } 1703 return replica 1704 } 1705 blacklistedLeaseHolder := leaseHolder(leftKey) 1706 log.Infof(ctx, "blacklisting replica %+v for leases", blacklistedLeaseHolder) 1707 noLeaseForDesc.Store(&blacklistedLeaseHolder) 1708 1709 // Pull the trigger. This actually also reads the RHS descriptor after the 1710 // split, so when this returns, we've got the leases set up already. 1711 // 1712 // There's a slight race here: Just above, we've settled on who must not 1713 // be the future lease holder. But between then and now, that lease could 1714 // have expired and the other Replica could have obtained it. This would 1715 // have given it a proper initialization of the timestamp cache, and so 1716 // the split trigger would populate the right hand side with a timestamp 1717 // cache which does not exhibit the anomaly. 1718 // 1719 // In practice, this should only be possible if second-long delays occur 1720 // just above this comment, and we assert against it below. 1721 log.Infof(ctx, "splitting at %s", splitKey) 1722 if _, _, err := tc.SplitRange(splitKey); err != nil { 1723 t.Fatal(err) 1724 } 1725 1726 if currentLHSLeaseHolder := leaseHolder(leftKey); !reflect.DeepEqual( 1727 currentLHSLeaseHolder, blacklistedLeaseHolder) { 1728 t.Fatalf("lease holder changed from %+v to %+v, should de-flake this test", 1729 blacklistedLeaseHolder, currentLHSLeaseHolder) 1730 } 1731 1732 // This write (to the right-hand side of the split) should hit the 1733 // timestamp cache and flag the txn for a restart when we try to commit it 1734 // below. With the bug in #7899, the RHS of the split had an empty 1735 // timestamp cache and would simply let us write behind the previous read. 1736 if err := txnOld.Put(ctx, "bb", "bump"); err != nil { 1737 t.Fatal(err) 1738 } 1739 1740 if err := txnOld.Commit(ctx); err != nil { 1741 t.Fatalf("unexpected txn commit err: %+v", err) 1742 } 1743 1744 // Verify that the txn's safe timestamp was set. 1745 if txnOld.TestingCloneTxn().ReadTimestamp == (hlc.Timestamp{}) { 1746 t.Fatal("expected non-zero refreshed timestamp") 1747 } 1748 1749 // As outlined above, the anomaly was fixed by giving the right-hand side 1750 // of the split the same lease as the left-hand side of the Split. Check 1751 // that that's what's happened (we actually test a little more, namely 1752 // that it's the same ReplicaID, which is not required but should always 1753 // hold). 1754 if rhsLease := leaseHolder(rightKey); !reflect.DeepEqual( 1755 rhsLease, blacklistedLeaseHolder, 1756 ) { 1757 t.Errorf("expected LHS and RHS to have same lease holder") 1758 } 1759 } 1760 1761 // TestStoreSplitOnRemovedReplica prevents regression of #23673. In that issue, 1762 // it was observed that the retry loop in AdminSplit could go into an infinite 1763 // loop if the replica it was being run on had been removed from the range. The 1764 // loop now checks that the replica performing the split is the leaseholder 1765 // before each iteration. 1766 func TestStoreSplitOnRemovedReplica(t *testing.T) { 1767 defer leaktest.AfterTest(t)() 1768 1769 leftKey := roachpb.Key("a") 1770 splitKey := roachpb.Key("b") 1771 rightKey := roachpb.Key("c") 1772 1773 var newDesc roachpb.RangeDescriptor 1774 inFilter := make(chan struct{}, 1) 1775 beginBlockingSplit := make(chan struct{}) 1776 finishBlockingSplit := make(chan struct{}) 1777 filter := func(_ context.Context, ba roachpb.BatchRequest) *roachpb.Error { 1778 // Block replica 1's attempt to perform the AdminSplit. We detect the 1779 // split's range descriptor update and block until the rest of the test 1780 // is ready. We then return a ConditionFailedError, simulating a 1781 // descriptor update race. 1782 if ba.Replica.NodeID == 1 { 1783 for _, req := range ba.Requests { 1784 if cput, ok := req.GetInner().(*roachpb.ConditionalPutRequest); ok { 1785 leftDescKey := keys.RangeDescriptorKey(roachpb.RKey(leftKey)) 1786 if cput.Key.Equal(leftDescKey) { 1787 var desc roachpb.RangeDescriptor 1788 if err := cput.Value.GetProto(&desc); err != nil { 1789 panic(err) 1790 } 1791 1792 if desc.EndKey.Equal(splitKey) { 1793 select { 1794 case <-beginBlockingSplit: 1795 select { 1796 case inFilter <- struct{}{}: 1797 // Let the test know we're in the filter. 1798 default: 1799 } 1800 <-finishBlockingSplit 1801 1802 var val roachpb.Value 1803 if err := val.SetProto(&newDesc); err != nil { 1804 panic(err) 1805 } 1806 return roachpb.NewError(&roachpb.ConditionFailedError{ 1807 ActualValue: &val, 1808 }) 1809 default: 1810 } 1811 } 1812 } 1813 } 1814 } 1815 } 1816 return nil 1817 } 1818 1819 var args base.TestClusterArgs 1820 args.ReplicationMode = base.ReplicationManual 1821 args.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 1822 TestingRequestFilter: filter, 1823 } 1824 1825 tc := testcluster.StartTestCluster(t, 3, args) 1826 defer tc.Stopper().Stop(context.Background()) 1827 1828 // Split the data range, mainly to avoid other splits getting in our way. 1829 for _, k := range []roachpb.Key{leftKey, rightKey} { 1830 if _, _, err := tc.SplitRange(k); err != nil { 1831 t.Fatal(errors.Wrapf(err, "split at %s", k)) 1832 } 1833 } 1834 1835 // Send an AdminSplit request to the replica. In the filter above we'll 1836 // block the first cput in this split until we're ready to let it loose 1837 // again, which will be after we remove the replica from the range. 1838 splitRes := make(chan error) 1839 close(beginBlockingSplit) 1840 go func() { 1841 _, _, err := tc.SplitRange(splitKey) 1842 splitRes <- err 1843 }() 1844 <-inFilter 1845 1846 // Move the range from node 0 to node 1. Then add node 2 to the range. 1847 // node 0 will never hear about this range descriptor update. 1848 var err error 1849 if newDesc, err = tc.AddReplicas(leftKey, tc.Target(1)); err != nil { 1850 t.Fatal(err) 1851 } 1852 if err := tc.TransferRangeLease(newDesc, tc.Target(1)); err != nil { 1853 t.Fatal(err) 1854 } 1855 if _, err := tc.RemoveReplicas(leftKey, tc.Target(0)); err != nil { 1856 t.Fatal(err) 1857 } 1858 if newDesc, err = tc.AddReplicas(leftKey, tc.Target(2)); err != nil { 1859 t.Fatal(err) 1860 } 1861 1862 // Stop blocking the split request's cput. This will cause the cput to fail 1863 // with a ConditionFailedError. The error will warrant a retry in 1864 // AdminSplit's retry loop, but when the removed replica notices that it is 1865 // no longer the leaseholder, it will return a NotLeaseholderError. This in 1866 // turn will allow the AdminSplit to be re-routed to the new leaseholder, 1867 // where it will succeed. 1868 close(finishBlockingSplit) 1869 if err = <-splitRes; err != nil { 1870 t.Errorf("AdminSplit returned error: %+v", err) 1871 } 1872 } 1873 1874 func TestStoreSplitGCThreshold(t *testing.T) { 1875 defer leaktest.AfterTest(t)() 1876 storeCfg := kvserver.TestStoreConfig(nil) 1877 storeCfg.TestingKnobs.DisableSplitQueue = true 1878 storeCfg.TestingKnobs.DisableMergeQueue = true 1879 stopper := stop.NewStopper() 1880 defer stopper.Stop(context.Background()) 1881 store := createTestStoreWithConfig(t, stopper, storeCfg) 1882 1883 leftKey := roachpb.Key("a") 1884 splitKey := roachpb.Key("b") 1885 rightKey := roachpb.Key("c") 1886 content := []byte("test") 1887 1888 pArgs := putArgs(leftKey, content) 1889 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), pArgs); pErr != nil { 1890 t.Fatal(pErr) 1891 } 1892 pArgs = putArgs(rightKey, content) 1893 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), pArgs); pErr != nil { 1894 t.Fatal(pErr) 1895 } 1896 1897 specifiedGCThreshold := hlc.Timestamp{ 1898 WallTime: 2e9, 1899 } 1900 gcArgs := &roachpb.GCRequest{ 1901 RequestHeader: roachpb.RequestHeader{ 1902 Key: leftKey, 1903 EndKey: rightKey, 1904 }, 1905 Threshold: specifiedGCThreshold, 1906 } 1907 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), gcArgs); pErr != nil { 1908 t.Fatal(pErr) 1909 } 1910 1911 args := adminSplitArgs(splitKey) 1912 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args); pErr != nil { 1913 t.Fatal(pErr) 1914 } 1915 1916 repl := store.LookupReplica(roachpb.RKey(splitKey)) 1917 gcThreshold := repl.GetGCThreshold() 1918 1919 if !reflect.DeepEqual(gcThreshold, specifiedGCThreshold) { 1920 t.Fatalf("expected RHS's GCThreshold is equal to %v, but got %v", specifiedGCThreshold, gcThreshold) 1921 } 1922 1923 repl.AssertState(context.Background(), store.Engine()) 1924 } 1925 1926 // TestStoreRangeSplitRaceUninitializedRHS reproduces #7600 (before it was 1927 // fixed). While splits are happening, we simulate incoming messages for the 1928 // right-hand side to trigger a race between the creation of the proper replica 1929 // and the uninitialized replica reacting to messages. 1930 func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) { 1931 defer leaktest.AfterTest(t)() 1932 mtc := &multiTestContext{} 1933 storeCfg := kvserver.TestStoreConfig(nil) 1934 storeCfg.TestingKnobs.DisableMergeQueue = true 1935 // An aggressive tick interval lets groups communicate more and thus 1936 // triggers test failures much more reliably. We can't go too aggressive 1937 // or race tests never make any progress. 1938 storeCfg.RaftTickInterval = 50 * time.Millisecond 1939 storeCfg.RaftElectionTimeoutTicks = 2 1940 currentTrigger := make(chan *roachpb.SplitTrigger, 1) 1941 var seen struct { 1942 syncutil.Mutex 1943 sids map[kvserverbase.CmdIDKey][2]bool 1944 } 1945 seen.sids = make(map[kvserverbase.CmdIDKey][2]bool) 1946 1947 storeCfg.TestingKnobs.EvalKnobs.TestingEvalFilter = func(args kvserverbase.FilterArgs) *roachpb.Error { 1948 et, ok := args.Req.(*roachpb.EndTxnRequest) 1949 if !ok || et.InternalCommitTrigger == nil { 1950 return nil 1951 } 1952 trigger := protoutil.Clone(et.InternalCommitTrigger.GetSplitTrigger()).(*roachpb.SplitTrigger) 1953 // The first time the trigger arrives (on each of the two stores), 1954 // return a transaction retry. This allows us to pass the trigger to 1955 // the goroutine creating faux incoming messages for the yet 1956 // nonexistent right-hand-side, giving it a head start. This code looks 1957 // fairly complicated since it wants to ensure that the two replicas 1958 // don't diverge. 1959 if trigger != nil && len(trigger.RightDesc.InternalReplicas) == 2 && args.Hdr.Txn.Epoch == 0 { 1960 seen.Lock() 1961 defer seen.Unlock() 1962 sid, sl := int(args.Sid)-1, seen.sids[args.CmdID] 1963 if !sl[sid] { 1964 sl[sid] = true 1965 seen.sids[args.CmdID] = sl 1966 } else { 1967 return nil 1968 } 1969 select { 1970 case currentTrigger <- trigger: 1971 default: 1972 } 1973 return roachpb.NewError( 1974 roachpb.NewReadWithinUncertaintyIntervalError( 1975 args.Hdr.Timestamp, args.Hdr.Timestamp, nil, 1976 )) 1977 } 1978 return nil 1979 } 1980 1981 mtc.storeConfig = &storeCfg 1982 defer mtc.Stop() 1983 mtc.Start(t, 2) 1984 1985 leftRange := mtc.stores[0].LookupReplica(roachpb.RKey("a")) 1986 1987 // Replicate the left range onto the second node. We don't wait since we 1988 // don't actually care what the second node does. All we want is that the 1989 // first node isn't surprised by messages from that node. 1990 mtc.replicateRange(leftRange.RangeID, 1) 1991 1992 for i := 0; i < 10; i++ { 1993 errChan := make(chan *roachpb.Error) 1994 1995 // Closed when the split goroutine is done. 1996 splitDone := make(chan struct{}) 1997 1998 go func() { 1999 defer close(splitDone) 2000 2001 // Split the data range. The split keys are chosen so that they move 2002 // towards "a" (so that the range being split is always the first 2003 // range). 2004 splitKey := roachpb.Key(encoding.EncodeVarintDescending([]byte("a"), int64(i))) 2005 splitArgs := adminSplitArgs(splitKey) 2006 _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], splitArgs) 2007 errChan <- pErr 2008 }() 2009 go func() { 2010 defer func() { errChan <- nil }() 2011 2012 trigger := <-currentTrigger // our own copy 2013 // Make sure the first node is first for convenience. 2014 replicas := trigger.RightDesc.InternalReplicas 2015 if replicas[0].NodeID > replicas[1].NodeID { 2016 tmp := replicas[1] 2017 replicas[1] = replicas[0] 2018 replicas[0] = tmp 2019 } 2020 2021 // Send a few vote requests which look like they're from the other 2022 // node's right hand side of the split. This triggers a race which 2023 // is discussed in #7600 (briefly, the creation of the right hand 2024 // side in the split trigger was racing with the uninitialized 2025 // version for the same group, resulting in clobbered HardState). 2026 for term := uint64(1); ; term++ { 2027 if sent := mtc.transport.SendAsync(&kvserver.RaftMessageRequest{ 2028 RangeID: trigger.RightDesc.RangeID, 2029 ToReplica: replicas[0], 2030 FromReplica: replicas[1], 2031 Message: raftpb.Message{ 2032 Type: raftpb.MsgVote, 2033 To: uint64(replicas[0].ReplicaID), 2034 From: uint64(replicas[1].ReplicaID), 2035 Term: term, 2036 }, 2037 }, rpc.DefaultClass); !sent { 2038 t.Error("transport failed to send vote request") 2039 } 2040 select { 2041 case <-splitDone: 2042 return 2043 case <-time.After(time.Microsecond): 2044 // If we busy-loop here, we monopolize processRaftMu and the 2045 // split takes a long time to complete. Sleeping reduces the 2046 // chance that we hit the race, but it still shows up under 2047 // stress. 2048 } 2049 } 2050 }() 2051 for i := 0; i < 2; i++ { 2052 if pErr := <-errChan; pErr != nil { 2053 t.Fatal(pErr) 2054 } 2055 } 2056 } 2057 } 2058 2059 // TestLeaderAfterSplit verifies that a raft group created by a split 2060 // elects a leader without waiting for an election timeout. 2061 func TestLeaderAfterSplit(t *testing.T) { 2062 defer leaktest.AfterTest(t)() 2063 storeConfig := kvserver.TestStoreConfig(nil) 2064 storeConfig.TestingKnobs.DisableMergeQueue = true 2065 storeConfig.RaftElectionTimeoutTicks = 1000000 2066 mtc := &multiTestContext{ 2067 storeConfig: &storeConfig, 2068 } 2069 defer mtc.Stop() 2070 mtc.Start(t, 3) 2071 2072 mtc.replicateRange(1, 1, 2) 2073 2074 leftKey := roachpb.Key("a") 2075 splitKey := roachpb.Key("m") 2076 rightKey := roachpb.Key("z") 2077 2078 splitArgs := adminSplitArgs(splitKey) 2079 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], splitArgs); pErr != nil { 2080 t.Fatal(pErr) 2081 } 2082 2083 incArgs := incrementArgs(leftKey, 1) 2084 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil { 2085 t.Fatal(pErr) 2086 } 2087 2088 incArgs = incrementArgs(rightKey, 2) 2089 if _, pErr := kv.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil { 2090 t.Fatal(pErr) 2091 } 2092 } 2093 2094 func BenchmarkStoreRangeSplit(b *testing.B) { 2095 var mtc multiTestContext 2096 mtc.Start(b, 1) 2097 defer mtc.Stop() 2098 store := mtc.Store(0) 2099 2100 // Perform initial split of ranges. 2101 sArgs := adminSplitArgs(roachpb.Key("b")) 2102 if _, err := kv.SendWrapped(context.Background(), store.TestSender(), sArgs); err != nil { 2103 b.Fatal(err) 2104 } 2105 2106 // Write some values left and right of the split key. 2107 aDesc := store.LookupReplica([]byte("a")).Desc() 2108 bDesc := store.LookupReplica([]byte("c")).Desc() 2109 kvserver.WriteRandomDataToRange(b, store, aDesc.RangeID, []byte("aaa")) 2110 kvserver.WriteRandomDataToRange(b, store, bDesc.RangeID, []byte("ccc")) 2111 2112 // Merge the b range back into the a range. 2113 mArgs := adminMergeArgs(roachpb.KeyMin) 2114 if _, err := kv.SendWrapped(context.Background(), store.TestSender(), mArgs); err != nil { 2115 b.Fatal(err) 2116 } 2117 2118 b.ResetTimer() 2119 for i := 0; i < b.N; i++ { 2120 // Split the range. 2121 b.StartTimer() 2122 if _, err := kv.SendWrapped(context.Background(), store.TestSender(), sArgs); err != nil { 2123 b.Fatal(err) 2124 } 2125 2126 // Merge the ranges. 2127 b.StopTimer() 2128 if _, err := kv.SendWrapped(context.Background(), store.TestSender(), mArgs); err != nil { 2129 b.Fatal(err) 2130 } 2131 } 2132 } 2133 2134 func writeRandomTimeSeriesDataToRange( 2135 t testing.TB, store *kvserver.Store, rangeID roachpb.RangeID, keyPrefix []byte, 2136 ) (midpoint []byte) { 2137 src := rand.New(rand.NewSource(0)) 2138 r := ts.Resolution10s 2139 for i := 0; i < 20; i++ { 2140 var data []tspb.TimeSeriesData 2141 for j := int64(0); j <= src.Int63n(5); j++ { 2142 d := tspb.TimeSeriesData{ 2143 Name: "test.random.metric", 2144 Source: "cpu01", 2145 } 2146 for k := int64(0); k <= src.Int63n(10); k++ { 2147 d.Datapoints = append(d.Datapoints, tspb.TimeSeriesDatapoint{ 2148 TimestampNanos: src.Int63n(200) * r.SlabDuration(), 2149 Value: src.Float64(), 2150 }) 2151 } 2152 data = append(data, d) 2153 } 2154 for _, d := range data { 2155 idatas, err := d.ToInternal(r.SlabDuration(), r.SampleDuration(), false) 2156 if err != nil { 2157 t.Fatal(err) 2158 } 2159 for _, idata := range idatas { 2160 var value roachpb.Value 2161 if err := value.SetProto(&idata); err != nil { 2162 t.Fatal(err) 2163 } 2164 mArgs := roachpb.MergeRequest{ 2165 RequestHeader: roachpb.RequestHeader{ 2166 Key: encoding.EncodeVarintAscending(keyPrefix, idata.StartTimestampNanos), 2167 }, 2168 Value: value, 2169 } 2170 if _, pErr := kv.SendWrappedWith(context.Background(), store.TestSender(), roachpb.Header{ 2171 RangeID: rangeID, 2172 }, &mArgs); pErr != nil { 2173 t.Fatal(pErr) 2174 } 2175 } 2176 } 2177 } 2178 // Return approximate midway point (100 is midway between random timestamps in range [0,200)). 2179 midKey := append([]byte(nil), keyPrefix...) 2180 midKey = encoding.EncodeVarintAscending(midKey, 100*r.SlabDuration()) 2181 return midKey 2182 } 2183 2184 // TestStoreRangeGossipOnSplits verifies that the store descriptor 2185 // is gossiped on splits up until the point where an additional 2186 // split range doesn't exceed GossipWhenCapacityDeltaExceedsFraction. 2187 func TestStoreRangeGossipOnSplits(t *testing.T) { 2188 defer leaktest.AfterTest(t)() 2189 storeCfg := kvserver.TestStoreConfig(nil) 2190 storeCfg.GossipWhenCapacityDeltaExceedsFraction = 0.5 // 50% for testing 2191 // We can't properly test how frequently changes in the number of ranges 2192 // trigger the store to gossip its capacities if we have to worry about 2193 // changes in the number of leases also triggering store gossip. 2194 storeCfg.TestingKnobs.DisableLeaseCapacityGossip = true 2195 storeCfg.TestingKnobs.DisableSplitQueue = true 2196 storeCfg.TestingKnobs.DisableMergeQueue = true 2197 storeCfg.TestingKnobs.DisableScanner = true 2198 stopper := stop.NewStopper() 2199 defer stopper.Stop(context.Background()) 2200 store := createTestStoreWithConfig(t, stopper, storeCfg) 2201 storeKey := gossip.MakeStoreKey(store.StoreID()) 2202 2203 // Avoid excessive logging on under-replicated ranges due to our many splits. 2204 config.TestingSetupZoneConfigHook(stopper) 2205 zoneConfig := zonepb.DefaultZoneConfig() 2206 zoneConfig.NumReplicas = proto.Int32(1) 2207 config.TestingSetZoneConfig(0, zoneConfig) 2208 2209 var lastSD roachpb.StoreDescriptor 2210 rangeCountCh := make(chan int32) 2211 unregister := store.Gossip().RegisterCallback(storeKey, func(_ string, val roachpb.Value) { 2212 var sd roachpb.StoreDescriptor 2213 if err := val.GetProto(&sd); err != nil { 2214 panic(err) 2215 } 2216 // Wait for range count to change as this callback is invoked 2217 // for lease count changes as well. 2218 if sd.Capacity.RangeCount == lastSD.Capacity.RangeCount { 2219 return 2220 } 2221 lastSD = sd 2222 rangeCountCh <- sd.Capacity.RangeCount 2223 }) 2224 defer unregister() 2225 2226 // Pull the first gossiped range count. 2227 lastRangeCount := <-rangeCountCh 2228 2229 splitFunc := func(i int) *roachpb.Error { 2230 splitKey := roachpb.Key(fmt.Sprintf("%02d", i)) 2231 _, pErr := store.LookupReplica(roachpb.RKey(splitKey)).AdminSplit( 2232 context.Background(), 2233 roachpb.AdminSplitRequest{ 2234 RequestHeader: roachpb.RequestHeader{ 2235 Key: splitKey, 2236 }, 2237 SplitKey: splitKey, 2238 }, 2239 "test", 2240 ) 2241 return pErr 2242 } 2243 2244 // Split until we split at least 20 ranges. 2245 var rangeCount int32 2246 for i := 0; rangeCount < 20; i++ { 2247 if pErr := splitFunc(i); pErr != nil { 2248 // Avoid flakes caused by bad clocks. 2249 if testutils.IsPError(pErr, "rejecting command with timestamp in the future") { 2250 log.Warningf(context.Background(), "ignoring split error: %s", pErr) 2251 continue 2252 } 2253 t.Fatal(pErr) 2254 } 2255 select { 2256 case rangeCount = <-rangeCountCh: 2257 changeCount := int32(math.Ceil(math.Min(float64(lastRangeCount)*0.5, 3))) 2258 diff := rangeCount - (lastRangeCount + changeCount) 2259 if diff < -1 || diff > 1 { 2260 t.Errorf("gossiped range count %d more than 1 away from expected %d", rangeCount, lastRangeCount+changeCount) 2261 } 2262 lastRangeCount = rangeCount 2263 case <-time.After(10 * time.Millisecond): 2264 } 2265 } 2266 } 2267 2268 // TestStoreTxnWaitQueueEnabledOnSplit verifies that the TxnWaitQueue for 2269 // the right hand side of the split range is enabled after a split. 2270 func TestStoreTxnWaitQueueEnabledOnSplit(t *testing.T) { 2271 defer leaktest.AfterTest(t)() 2272 storeCfg := kvserver.TestStoreConfig(nil) 2273 storeCfg.TestingKnobs.DisableSplitQueue = true 2274 storeCfg.TestingKnobs.DisableMergeQueue = true 2275 stopper := stop.NewStopper() 2276 defer stopper.Stop(context.Background()) 2277 store := createTestStoreWithConfig(t, stopper, storeCfg) 2278 2279 key := keys.UserTableDataMin 2280 args := adminSplitArgs(key) 2281 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args); pErr != nil { 2282 t.Fatalf("%q: split unexpected error: %s", key, pErr) 2283 } 2284 2285 rhsRepl := store.LookupReplica(roachpb.RKey(keys.UserTableDataMin)) 2286 if !rhsRepl.GetConcurrencyManager().TxnWaitQueue().IsEnabled() { 2287 t.Errorf("expected RHS replica's push txn queue to be enabled post-split") 2288 } 2289 } 2290 2291 // TestDistributedTxnCleanup verifies that distributed transactions 2292 // cleanup their txn records after commit or abort. 2293 func TestDistributedTxnCleanup(t *testing.T) { 2294 defer leaktest.AfterTest(t)() 2295 storeCfg := kvserver.TestStoreConfig(nil) 2296 storeCfg.TestingKnobs.DisableSplitQueue = true 2297 storeCfg.TestingKnobs.DisableMergeQueue = true 2298 stopper := stop.NewStopper() 2299 defer stopper.Stop(context.Background()) 2300 store := createTestStoreWithConfig(t, stopper, storeCfg) 2301 2302 // Split at "a". 2303 lhsKey := roachpb.Key("a") 2304 args := adminSplitArgs(lhsKey) 2305 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args); pErr != nil { 2306 t.Fatalf("split at %q: %s", lhsKey, pErr) 2307 } 2308 lhs := store.LookupReplica(roachpb.RKey("a")) 2309 2310 // Split at "b". 2311 rhsKey := roachpb.Key("b") 2312 args = adminSplitArgs(rhsKey) 2313 if _, pErr := kv.SendWrappedWith(context.Background(), store, roachpb.Header{ 2314 RangeID: lhs.RangeID, 2315 }, args); pErr != nil { 2316 t.Fatalf("split at %q: %s", rhsKey, pErr) 2317 } 2318 rhs := store.LookupReplica(roachpb.RKey("b")) 2319 2320 if lhs == rhs { 2321 t.Errorf("LHS == RHS after split: %s == %s", lhs, rhs) 2322 } 2323 2324 // Test both commit and abort cases. 2325 testutils.RunTrueAndFalse(t, "force", func(t *testing.T, force bool) { 2326 testutils.RunTrueAndFalse(t, "commit", func(t *testing.T, commit bool) { 2327 // Run a distributed transaction involving the lhsKey and rhsKey. 2328 var txnKey roachpb.Key 2329 ctx := context.Background() 2330 txn := kv.NewTxn(ctx, store.DB(), 0 /* gatewayNodeID */) 2331 txnFn := func(ctx context.Context, txn *kv.Txn) error { 2332 b := txn.NewBatch() 2333 b.Put(fmt.Sprintf("%s.force=%t,commit=%t", string(lhsKey), force, commit), "lhsValue") 2334 b.Put(fmt.Sprintf("%s.force=%t,commit=%t", string(rhsKey), force, commit), "rhsValue") 2335 if err := txn.Run(ctx, b); err != nil { 2336 return err 2337 } 2338 proto := txn.TestingCloneTxn() 2339 txnKey = keys.TransactionKey(proto.Key, proto.ID) 2340 // If force=true, we're force-aborting the txn out from underneath. 2341 // This simulates txn deadlock or a max priority txn aborting a 2342 // normal or min priority txn. 2343 if force { 2344 ba := roachpb.BatchRequest{} 2345 ba.Timestamp = store.Clock().Now() 2346 ba.RangeID = lhs.RangeID 2347 ba.Add(&roachpb.PushTxnRequest{ 2348 RequestHeader: roachpb.RequestHeader{ 2349 Key: proto.Key, 2350 }, 2351 PusheeTxn: proto.TxnMeta, 2352 PushType: roachpb.PUSH_ABORT, 2353 Force: true, 2354 }) 2355 _, pErr := store.Send(ctx, ba) 2356 if pErr != nil { 2357 t.Fatalf("failed to abort the txn: %s", pErr) 2358 } 2359 } 2360 if commit { 2361 return txn.Commit(ctx) 2362 } 2363 return errors.New("forced abort") 2364 } 2365 if err := txnFn(ctx, txn); err != nil { 2366 txn.CleanupOnError(ctx, err) 2367 if !force && commit { 2368 t.Fatalf("expected success with commit == true; got %v", err) 2369 } 2370 } 2371 2372 // Verify that the transaction record is cleaned up. 2373 testutils.SucceedsSoon(t, func() error { 2374 kv, err := store.DB().Get(ctx, txnKey) 2375 if err != nil { 2376 return err 2377 } 2378 if kv.Value != nil { 2379 return errors.Errorf("expected txn record %s to have been cleaned", txnKey) 2380 } 2381 return nil 2382 }) 2383 }) 2384 }) 2385 } 2386 2387 // TestUnsplittableRange creates an unsplittable range and tests that 2388 // it is handled correctly by the split queue's purgatory. The test: 2389 // 1. creates an unsplittable range that needs to be split 2390 // 2. makes sure that range enters purgatory 2391 // 3. makes sure a purgatory run still fails 2392 // 4. GCs part of the range so that it no longer needs to be split 2393 // 5. makes sure a purgatory run succeeds and the range leaves purgatory 2394 func TestUnsplittableRange(t *testing.T) { 2395 defer leaktest.AfterTest(t)() 2396 2397 ctx := context.Background() 2398 ttl := 1 * time.Hour 2399 const maxBytes = 1 << 16 2400 2401 stopper := stop.NewStopper() 2402 defer stopper.Stop(ctx) 2403 2404 manual := hlc.NewManualClock(123) 2405 splitQueuePurgatoryChan := make(chan time.Time, 1) 2406 cfg := kvserver.TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) 2407 cfg.DefaultZoneConfig.RangeMaxBytes = proto.Int64(maxBytes) 2408 cfg.DefaultZoneConfig.GC = &zonepb.GCPolicy{ 2409 TTLSeconds: int32(ttl.Seconds()), 2410 } 2411 cfg.DefaultSystemZoneConfig.RangeMaxBytes = proto.Int64(maxBytes) 2412 cfg.DefaultSystemZoneConfig.GC = &zonepb.GCPolicy{ 2413 TTLSeconds: int32(ttl.Seconds()), 2414 } 2415 cfg.TestingKnobs.SplitQueuePurgatoryChan = splitQueuePurgatoryChan 2416 cfg.TestingKnobs.DisableMergeQueue = true 2417 store := createTestStoreWithConfig(t, stopper, cfg) 2418 2419 // Add a single large row to /Table/14. 2420 tableKey := roachpb.RKey(keys.SystemSQLCodec.TablePrefix(keys.UITableID)) 2421 row1Key := roachpb.Key(encoding.EncodeVarintAscending(append([]byte(nil), tableKey...), 1)) 2422 col1Key := keys.MakeFamilyKey(append([]byte(nil), row1Key...), 0) 2423 valueLen := 0.9 * maxBytes 2424 value := bytes.Repeat([]byte("x"), int(valueLen)) 2425 if err := store.DB().Put(ctx, col1Key, value); err != nil { 2426 t.Fatal(err) 2427 } 2428 2429 // Wait for half of the ttl and add another large value in the same row. 2430 // Together, these two values bump the range over the max range size. 2431 manual.Increment(ttl.Nanoseconds() / 2) 2432 value2Len := 0.2 * maxBytes 2433 value2 := bytes.Repeat([]byte("y"), int(value2Len)) 2434 if err := store.DB().Put(ctx, col1Key, value2); err != nil { 2435 t.Fatal(err) 2436 } 2437 2438 // Ensure that an attempt to split the range will hit an 2439 // unsplittableRangeError and place the range in purgatory. 2440 if err := store.ForceSplitScanAndProcess(); err != nil { 2441 t.Fatal(err) 2442 } 2443 if purgLen := store.SplitQueuePurgatoryLength(); purgLen != 1 { 2444 t.Fatalf("expected split queue purgatory to contain 1 replica, found %d", purgLen) 2445 } 2446 2447 // Signal the split queue's purgatory channel and ensure that the purgatory 2448 // remains occupied because the range still needs to split but can't. 2449 splitQueuePurgatoryChan <- timeutil.Now() 2450 if purgLen := store.SplitQueuePurgatoryLength(); purgLen != 1 { 2451 t.Fatalf("expected split queue purgatory to contain 1 replica, found %d", purgLen) 2452 } 2453 2454 // Wait for much longer than the ttl to accumulate GCByteAge. 2455 manual.Increment(10 * ttl.Nanoseconds()) 2456 // Trigger the GC queue, which should clean up the earlier version of the 2457 // row. Once the first version of the row is cleaned up, the range should 2458 // exit the split queue purgatory. 2459 repl := store.LookupReplica(tableKey) 2460 if err := store.ManualGC(repl); err != nil { 2461 t.Fatal(err) 2462 } 2463 2464 // Signal the split queue's purgatory channel and ensure that the purgatory 2465 // removes its now well-sized replica. 2466 splitQueuePurgatoryChan <- timeutil.Now() 2467 testutils.SucceedsSoon(t, func() error { 2468 purgLen := store.SplitQueuePurgatoryLength() 2469 if purgLen == 0 { 2470 return nil 2471 } 2472 return errors.Errorf("expected split queue purgatory to be empty, found %d", purgLen) 2473 }) 2474 } 2475 2476 // TestTxnWaitQueueDependencyCycleWithRangeSplit verifies that a range 2477 // split which occurs while a dependency cycle is partially underway 2478 // will cause the pending push txns to be retried such that they 2479 // relocate to the appropriate new range. 2480 func TestTxnWaitQueueDependencyCycleWithRangeSplit(t *testing.T) { 2481 defer leaktest.AfterTest(t)() 2482 2483 testutils.RunTrueAndFalse(t, "read2ndPass", func(t *testing.T, read2ndPass bool) { 2484 var pushCount int32 2485 firstPush := make(chan struct{}) 2486 2487 storeCfg := kvserver.TestStoreConfig(nil) 2488 storeCfg.TestingKnobs.DisableSplitQueue = true 2489 storeCfg.TestingKnobs.DisableMergeQueue = true 2490 storeCfg.TestingKnobs.EvalKnobs.TestingEvalFilter = 2491 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 2492 if _, ok := filterArgs.Req.(*roachpb.PushTxnRequest); ok { 2493 if atomic.AddInt32(&pushCount, 1) == 1 { 2494 close(firstPush) 2495 } 2496 } 2497 return nil 2498 } 2499 stopper := stop.NewStopper() 2500 defer stopper.Stop(context.Background()) 2501 store := createTestStoreWithConfig(t, stopper, storeCfg) 2502 2503 lhsKey := roachpb.Key("a") 2504 rhsKey := roachpb.Key("b") 2505 2506 // Split at "a". 2507 args := adminSplitArgs(lhsKey) 2508 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args); pErr != nil { 2509 t.Fatalf("split at %q: %s", lhsKey, pErr) 2510 } 2511 lhs := store.LookupReplica(roachpb.RKey("a")) 2512 2513 var txnACount, txnBCount int32 2514 2515 txnAWritesA := make(chan struct{}) 2516 txnAProceeds := make(chan struct{}) 2517 txnBWritesB := make(chan struct{}) 2518 txnBProceeds := make(chan struct{}) 2519 2520 // Start txn to write key a. 2521 txnACh := make(chan error) 2522 go func() { 2523 txnACh <- store.DB().Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error { 2524 if err := txn.Put(ctx, lhsKey, "value"); err != nil { 2525 return err 2526 } 2527 if atomic.LoadInt32(&txnACount) == 0 { 2528 close(txnAWritesA) 2529 <-txnAProceeds 2530 } 2531 atomic.AddInt32(&txnACount, 1) 2532 return txn.Put(ctx, rhsKey, "value-from-A") 2533 }) 2534 }() 2535 <-txnAWritesA 2536 2537 // Start txn to write key b. 2538 txnBCh := make(chan error) 2539 go func() { 2540 txnBCh <- store.DB().Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error { 2541 if err := txn.Put(ctx, rhsKey, "value"); err != nil { 2542 return err 2543 } 2544 if atomic.LoadInt32(&txnBCount) == 0 { 2545 close(txnBWritesB) 2546 <-txnBProceeds 2547 } 2548 atomic.AddInt32(&txnBCount, 1) 2549 // Read instead of write key "a" if directed. This caused a 2550 // PUSH_TIMESTAMP to be issued from txn B instead of PUSH_ABORT. 2551 if read2ndPass { 2552 if _, err := txn.Get(ctx, lhsKey); err != nil { 2553 return err 2554 } 2555 } else { 2556 if err := txn.Put(ctx, lhsKey, "value-from-B"); err != nil { 2557 return err 2558 } 2559 } 2560 return nil 2561 }) 2562 }() 2563 <-txnBWritesB 2564 2565 // Now, let txnA proceed before splitting. 2566 close(txnAProceeds) 2567 // Wait for the push to occur. 2568 <-firstPush 2569 2570 // Split at "b". 2571 args = adminSplitArgs(rhsKey) 2572 if _, pErr := kv.SendWrappedWith(context.Background(), store, roachpb.Header{ 2573 RangeID: lhs.RangeID, 2574 }, args); pErr != nil { 2575 t.Fatalf("split at %q: %s", rhsKey, pErr) 2576 } 2577 2578 // Now that we've split, allow txnB to proceed. 2579 close(txnBProceeds) 2580 2581 // Verify that both complete. 2582 for i, ch := range []chan error{txnACh, txnBCh} { 2583 if err := <-ch; err != nil { 2584 t.Fatalf("%d: txn failure: %+v", i, err) 2585 } 2586 } 2587 }) 2588 } 2589 2590 func TestStoreCapacityAfterSplit(t *testing.T) { 2591 defer leaktest.AfterTest(t)() 2592 stopper := stop.NewStopper() 2593 defer stopper.Stop(context.Background()) 2594 manualClock := hlc.NewManualClock(123) 2595 cfg := kvserver.TestStoreConfig(hlc.NewClock(manualClock.UnixNano, time.Nanosecond)) 2596 cfg.TestingKnobs.DisableSplitQueue = true 2597 cfg.TestingKnobs.DisableMergeQueue = true 2598 s := createTestStoreWithOpts( 2599 t, 2600 testStoreOpts{ 2601 // This test was written before the test stores were able to start with 2602 // more than one range and is not prepared to handle many ranges. 2603 dontCreateSystemRanges: true, 2604 cfg: &cfg}, 2605 stopper) 2606 2607 cap, err := s.Capacity(false /* useCached */) 2608 if err != nil { 2609 t.Fatal(err) 2610 } 2611 if e, a := int32(1), cap.RangeCount; e != a { 2612 t.Errorf("expected cap.RangeCount=%d, got %d", e, a) 2613 } 2614 bpr1 := cap.BytesPerReplica 2615 if bpr1.P10 <= 0 { 2616 t.Errorf("expected all bytes-per-replica to be positive, got %+v", bpr1) 2617 } 2618 if bpr1.P10 != bpr1.P25 || bpr1.P10 != bpr1.P50 || bpr1.P10 != bpr1.P75 || bpr1.P10 != bpr1.P90 { 2619 t.Errorf("expected all bytes-per-replica percentiles to be identical, got %+v", bpr1) 2620 } 2621 wpr1 := cap.WritesPerReplica 2622 if wpr1.P10 != wpr1.P25 || wpr1.P10 != wpr1.P50 || wpr1.P10 != wpr1.P75 || wpr1.P10 != wpr1.P90 { 2623 t.Errorf("expected all writes-per-replica percentiles to be identical, got %+v", wpr1) 2624 } 2625 2626 // Increment the manual clock and do a write to increase the qps above zero. 2627 manualClock.Increment(int64(kvserver.MinStatsDuration)) 2628 key := roachpb.Key("a") 2629 pArgs := putArgs(key, []byte("aaa")) 2630 if _, pErr := kv.SendWrapped(context.Background(), s.TestSender(), pArgs); pErr != nil { 2631 t.Fatal(pErr) 2632 } 2633 2634 cap, err = s.Capacity(false /* useCached */) 2635 if err != nil { 2636 t.Fatal(err) 2637 } 2638 if e, a := int32(1), cap.RangeCount; e != a { 2639 t.Errorf("expected cap.RangeCount=%d, got %d", e, a) 2640 } 2641 if e, a := int32(1), cap.LeaseCount; e != a { 2642 t.Errorf("expected cap.LeaseCount=%d, got %d", e, a) 2643 } 2644 if minExpected, a := 1/float64(kvserver.MinStatsDuration/time.Second), cap.WritesPerSecond; minExpected > a { 2645 t.Errorf("expected cap.WritesPerSecond >= %f, got %f", minExpected, a) 2646 } 2647 bpr2 := cap.BytesPerReplica 2648 if bpr2.P10 <= bpr1.P10 { 2649 t.Errorf("expected BytesPerReplica to have increased from %+v, but got %+v", bpr1, bpr2) 2650 } 2651 if bpr2.P10 != bpr2.P25 || bpr2.P10 != bpr2.P50 || bpr2.P10 != bpr2.P75 || bpr2.P10 != bpr2.P90 { 2652 t.Errorf("expected all bytes-per-replica percentiles to be identical, got %+v", bpr2) 2653 } 2654 wpr2 := cap.WritesPerReplica 2655 if wpr2.P10 <= wpr1.P10 { 2656 t.Errorf("expected WritesPerReplica to have increased from %+v, but got %+v", wpr1, wpr2) 2657 } 2658 if wpr2.P10 != wpr2.P25 || wpr2.P10 != wpr2.P50 || wpr2.P10 != wpr2.P75 || wpr2.P10 != wpr2.P90 { 2659 t.Errorf("expected all writes-per-replica percentiles to be identical, got %+v", wpr2) 2660 } 2661 if wpr2.P10 != cap.WritesPerSecond { 2662 t.Errorf("expected WritesPerReplica.percentiles to equal cap.WritesPerSecond, but got %f and %f", 2663 wpr2.P10, cap.WritesPerSecond) 2664 } 2665 2666 // Split the range to verify stats work properly with more than one range. 2667 sArgs := adminSplitArgs(key) 2668 if _, pErr := kv.SendWrapped(context.Background(), s.TestSender(), sArgs); pErr != nil { 2669 t.Fatal(pErr) 2670 } 2671 2672 cap, err = s.Capacity(false /* useCached */) 2673 if err != nil { 2674 t.Fatal(err) 2675 } 2676 if e, a := int32(2), cap.RangeCount; e != a { 2677 t.Errorf("expected cap.RangeCount=%d, got %d", e, a) 2678 } 2679 if e, a := int32(2), cap.LeaseCount; e != a { 2680 t.Errorf("expected cap.LeaseCount=%d, got %d", e, a) 2681 } 2682 { 2683 bpr := cap.BytesPerReplica 2684 if bpr.P10 != bpr.P25 { 2685 t.Errorf("expected BytesPerReplica p10 and p25 to be equal with 2 replicas, got %+v", bpr) 2686 } 2687 if bpr.P50 != bpr.P75 || bpr.P50 != bpr.P90 { 2688 t.Errorf("expected BytesPerReplica p50, p75, and p90 to be equal with 2 replicas, got %+v", bpr) 2689 } 2690 if bpr.P10 == bpr.P90 { 2691 t.Errorf("expected BytesPerReplica p10 and p90 to be different with 2 replicas, got %+v", bpr) 2692 } 2693 } 2694 } 2695 2696 // TestRangeLookupAfterMeta2Split verifies that RangeLookup scans succeed even 2697 // when user ranges span the boundary of two split meta2 ranges. We test this 2698 // with forward and reverse ScanRequests so that we test both forward and 2699 // reverse RangeLookups. In the case of both the RangeLookup scan directions, 2700 // the forward part of the scan will need to continue onto a second range to 2701 // find the desired RangeDescriptor (remember that a reverse RangeLookup 2702 // includes an initial forward scan). 2703 func TestRangeLookupAfterMeta2Split(t *testing.T) { 2704 defer leaktest.AfterTest(t)() 2705 2706 ctx := context.Background() 2707 srv, _, _ := serverutils.StartServer(t, base.TestServerArgs{ 2708 Knobs: base.TestingKnobs{ 2709 Store: &kvserver.StoreTestingKnobs{ 2710 DisableMergeQueue: true, 2711 }, 2712 }, 2713 }) 2714 s := srv.(*server.TestServer) 2715 defer s.Stopper().Stop(ctx) 2716 2717 // Create a split at /Table/48 and /Meta2/Table/51. This creates: 2718 // meta ranges [/Min-/Meta2/Table/51) and [/Meta2/Table/51-/System) 2719 // user ranges [/Table/19-/Table/48) and [/Table/48-/Max) 2720 // 2721 // Note that the two boundaries are offset such that a lookup for key /Table/49 2722 // will first search for meta(/Table/49) which is on the left meta2 range. However, 2723 // the user range [/Table/48-/Max) is stored on the right meta2 range, so the lookup 2724 // will require a scan that continues into the next meta2 range. 2725 const tableID = keys.MinUserDescID + 1 // 51 2726 splitReq := adminSplitArgs(keys.SystemSQLCodec.TablePrefix(tableID - 3 /* 48 */)) 2727 if _, pErr := kv.SendWrapped(ctx, s.DB().NonTransactionalSender(), splitReq); pErr != nil { 2728 t.Fatal(pErr) 2729 } 2730 2731 metaKey := keys.RangeMetaKey(roachpb.RKey(keys.SystemSQLCodec.TablePrefix(tableID))).AsRawKey() 2732 splitReq = adminSplitArgs(metaKey) 2733 if _, pErr := kv.SendWrapped(ctx, s.DB().NonTransactionalSender(), splitReq); pErr != nil { 2734 t.Fatal(pErr) 2735 } 2736 2737 testutils.RunTrueAndFalse(t, "reverse", func(t *testing.T, rev bool) { 2738 // Clear the RangeDescriptorCache so that no cached descriptors are 2739 // available from previous lookups. 2740 s.DistSender().RangeDescriptorCache().Clear() 2741 2742 // Scan from [/Table/49-/Table/50) both forwards and backwards. 2743 // Either way, the resulting RangeLookup scan will be forced to 2744 // perform a continuation lookup. 2745 scanStart := keys.SystemSQLCodec.TablePrefix(tableID - 2) // 49 2746 scanEnd := scanStart.PrefixEnd() // 50 2747 header := roachpb.RequestHeader{ 2748 Key: scanStart, 2749 EndKey: scanEnd, 2750 } 2751 2752 var lookupReq roachpb.Request 2753 if rev { 2754 // A ReverseScanRequest will trigger a reverse RangeLookup scan. 2755 lookupReq = &roachpb.ReverseScanRequest{RequestHeader: header} 2756 } else { 2757 lookupReq = &roachpb.ScanRequest{RequestHeader: header} 2758 } 2759 if _, err := kv.SendWrapped(ctx, s.DB().NonTransactionalSender(), lookupReq); err != nil { 2760 t.Fatalf("%T %v", err.GoError(), err) 2761 } 2762 }) 2763 } 2764 2765 // TestStoreSplitRangeLookupRace verifies that a RangeLookup scanning across 2766 // multiple meta2 ranges that races with a split and misses all matching 2767 // descriptors will retry its scan until it succeeds. 2768 // 2769 // This test creates a series of events that result in the injected range 2770 // lookup scan response we see in TestRangeLookupRaceSplits/MissingDescriptor. 2771 // It demonstrates how it is possible for an inconsistent range lookup scan 2772 // that spans multiple ranges to completely miss its desired descriptor. 2773 func TestStoreSplitRangeLookupRace(t *testing.T) { 2774 defer leaktest.AfterTest(t)() 2775 2776 // The scenario is modeled after: 2777 // https://github.com/cockroachdb/cockroach/issues/19147#issuecomment-336741791 2778 // See that comment for a description of why a non-transactional scan 2779 // starting at "/meta2/k" may only see non-matching descriptors when racing 2780 // with a split. 2781 // 2782 // To simulate this situation, we first perform splits at "/meta2/n", "j", 2783 // and "p". This creates the following structure, where the descriptor for 2784 // range [j, p) is stored on the second meta2 range: 2785 // 2786 // [/meta2/a,/meta2/n), [/meta2/n,/meta2/z) 2787 // -----^ 2788 // ... [j, p) ... 2789 // 2790 // We then initiate a range lookup for key "k". This lookup will begin 2791 // scanning on the first meta2 range but won't find its desired desriptor. Normally, 2792 // it would continue scanning onto the second meta2 range and find the descriptor 2793 // for range [j, p) at "/meta2/p" (see TestRangeLookupAfterMeta2Split). However, 2794 // because RangeLookup scans are non-transactional, this can race with a split. 2795 // Here, we split at key "m", which creates the structure: 2796 // 2797 // [/meta2/a,/meta2/n), [/meta2/n,/meta2/z) 2798 // ^-- ---^ 2799 // ... [j,m), [m,p) ... 2800 // 2801 // If the second half of the RangeLookup scan sees the second meta2 range after 2802 // this split, it will miss the old descriptor for [j, p) and the new descriptor 2803 // for [j, m). In this case, the RangeLookup should retry. 2804 lookupKey := roachpb.Key("k") 2805 bounds, err := keys.MetaScanBounds(keys.RangeMetaKey(roachpb.RKey(lookupKey))) 2806 if err != nil { 2807 t.Fatal(err) 2808 } 2809 2810 // The following filter and set of channels is used to block the RangeLookup 2811 // scan for key "k" after it has scanned over the first meta2 range but not 2812 // the second. 2813 blockRangeLookups := make(chan struct{}) 2814 blockedRangeLookups := int32(0) 2815 rangeLookupIsBlocked := make(chan struct{}, 1) 2816 unblockRangeLookups := make(chan struct{}) 2817 respFilter := func(ctx context.Context, ba roachpb.BatchRequest, _ *roachpb.BatchResponse) *roachpb.Error { 2818 select { 2819 case <-blockRangeLookups: 2820 if kv.TestingIsRangeLookup(ba) && 2821 ba.Requests[0].GetInner().(*roachpb.ScanRequest).Key.Equal(bounds.Key.AsRawKey()) { 2822 2823 select { 2824 case rangeLookupIsBlocked <- struct{}{}: 2825 atomic.AddInt32(&blockedRangeLookups, 1) 2826 default: 2827 } 2828 <-unblockRangeLookups 2829 } 2830 default: 2831 } 2832 return nil 2833 } 2834 2835 srv, _, _ := serverutils.StartServer(t, base.TestServerArgs{ 2836 Knobs: base.TestingKnobs{ 2837 Store: &kvserver.StoreTestingKnobs{ 2838 DisableSplitQueue: true, 2839 DisableMergeQueue: true, 2840 TestingResponseFilter: respFilter, 2841 IntentResolverKnobs: kvserverbase.IntentResolverTestingKnobs{ 2842 ForceSyncIntentResolution: true, 2843 }, 2844 }, 2845 }, 2846 }) 2847 s := srv.(*server.TestServer) 2848 defer s.Stopper().Stop(context.Background()) 2849 store, err := s.Stores().GetStore(s.GetFirstStoreID()) 2850 if err != nil { 2851 t.Fatal(err) 2852 } 2853 2854 mustSplit := func(splitKey roachpb.Key) { 2855 args := adminSplitArgs(splitKey) 2856 2857 // Don't use s.DistSender() so that we don't disturb the RangeDescriptorCache. 2858 rangeID := store.LookupReplica(roachpb.RKey(splitKey)).RangeID 2859 _, pErr := kv.SendWrappedWith(context.Background(), store, roachpb.Header{ 2860 RangeID: rangeID, 2861 }, args) 2862 if pErr != nil { 2863 t.Fatal(pErr) 2864 } 2865 } 2866 2867 // Perform the initial splits. See above. 2868 mustSplit(keys.SystemPrefix) 2869 mustSplit(keys.RangeMetaKey(roachpb.RKey("n")).AsRawKey()) 2870 mustSplit(roachpb.Key("j")) 2871 mustSplit(roachpb.Key("p")) 2872 2873 // Launch a goroutine to perform a range lookup for key "k" that will race 2874 // with a split at key "m". 2875 rangeLookupErr := make(chan error) 2876 go func() { 2877 close(blockRangeLookups) 2878 2879 // Loop until at-least one range lookup is triggered and blocked. 2880 // This accommodates for races with in-flight range lookups. 2881 var err error 2882 for atomic.LoadInt32(&blockedRangeLookups) == 0 && err == nil { 2883 // Clear the RangeDescriptorCache to trigger a range lookup when the 2884 // lookupKey is next accessed. Then immediately access lookupKey. 2885 s.DistSender().RangeDescriptorCache().Clear() 2886 _, err = s.DB().Get(context.Background(), lookupKey) 2887 } 2888 rangeLookupErr <- err 2889 }() 2890 2891 // Wait until the range lookup is blocked after performing a scan of the 2892 // first range [/meta2/a,/meta2/n) but before performing a scan of the 2893 // second range [/meta2/n,/meta2/z). Then split at key "m". Finally, let the 2894 // range lookup finish. The lookup will fail because it won't get consistent 2895 // results but will eventually succeed after retrying. 2896 select { 2897 case <-rangeLookupIsBlocked: 2898 case err := <-rangeLookupErr: 2899 // Unexpected early return. 2900 t.Fatalf("unexpected range lookup error %v", err) 2901 } 2902 mustSplit(roachpb.Key("m")) 2903 close(unblockRangeLookups) 2904 2905 if err := <-rangeLookupErr; err != nil { 2906 t.Fatalf("unexpected range lookup error %v", err) 2907 } 2908 } 2909 2910 // Verify that range lookup operations do not synchronously perform intent 2911 // resolution as doing so can deadlock with the RangeDescriptorCache. See 2912 // #17760. 2913 func TestRangeLookupAsyncResolveIntent(t *testing.T) { 2914 defer leaktest.AfterTest(t)() 2915 2916 blockPushTxn := make(chan struct{}) 2917 defer close(blockPushTxn) 2918 2919 // Disable async tasks in the intent resolver. All tasks will be synchronous. 2920 cfg := kvserver.TestStoreConfig(nil) 2921 cfg.TestingKnobs.IntentResolverKnobs.ForceSyncIntentResolution = true 2922 cfg.TestingKnobs.DisableSplitQueue = true 2923 cfg.TestingKnobs.DisableMergeQueue = true 2924 cfg.TestingKnobs.TestingProposalFilter = 2925 func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 2926 for _, union := range args.Req.Requests { 2927 if union.GetInner().Method() == roachpb.PushTxn { 2928 <-blockPushTxn 2929 break 2930 } 2931 } 2932 return nil 2933 } 2934 ctx := context.Background() 2935 stopper := stop.NewStopper() 2936 defer stopper.Stop(ctx) 2937 store := createTestStoreWithConfig(t, stopper, cfg) 2938 2939 // Split range 1 at an arbitrary key so that we're not dealing with the 2940 // first range for the rest of this test. The first range is handled 2941 // specially by the range descriptor cache. 2942 key := roachpb.Key("a") 2943 args := adminSplitArgs(key) 2944 if _, pErr := kv.SendWrapped(ctx, store.TestSender(), args); pErr != nil { 2945 t.Fatal(pErr) 2946 } 2947 2948 // Get original meta2 descriptor. 2949 rs, _, err := kv.RangeLookup(ctx, store.TestSender(), key, roachpb.READ_UNCOMMITTED, 0, false) 2950 if err != nil { 2951 t.Fatal(err) 2952 } 2953 origDesc := rs[0] 2954 2955 key2 := roachpb.Key("e") 2956 newDesc := origDesc 2957 newDesc.EndKey, err = keys.Addr(key2) 2958 if err != nil { 2959 t.Fatal(err) 2960 } 2961 2962 // Write the new descriptor as an intent. 2963 data, err := protoutil.Marshal(&newDesc) 2964 if err != nil { 2965 t.Fatal(err) 2966 } 2967 txn := roachpb.MakeTransaction("test", key2, 1, 2968 store.Clock().Now(), store.Clock().MaxOffset().Nanoseconds()) 2969 // Officially begin the transaction. If not for this, the intent resolution 2970 // machinery would simply remove the intent we write below, see #3020. 2971 // We send directly to Replica throughout this test, so there's no danger 2972 // of the Store aborting this transaction (i.e. we don't have to set a high 2973 // priority). 2974 pArgs := putArgs(keys.RangeMetaKey(roachpb.RKey(key2)).AsRawKey(), data) 2975 txn.Sequence++ 2976 pArgs.Sequence = txn.Sequence 2977 if _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{Txn: &txn}, pArgs); pErr != nil { 2978 t.Fatal(pErr) 2979 } 2980 2981 // Clear the range descriptor cache so that any future requests will first 2982 // need to perform a RangeLookup. 2983 store.DB().NonTransactionalSender().(*kv.CrossRangeTxnWrapperSender).Wrapped().(*kvcoord.DistSender).RangeDescriptorCache().Clear() 2984 2985 // Now send a request, forcing the RangeLookup. Since the lookup is 2986 // inconsistent, there's no WriteIntentError, but we'll try to resolve any 2987 // intents that are found. If the RangeLookup op attempts to resolve the 2988 // intents synchronously, the operation will block forever. 2989 // 2990 // Note that 'a' < 'e'. 2991 if _, err := store.DB().Get(ctx, key); err != nil { 2992 t.Fatal(err) 2993 } 2994 } 2995 2996 // Verify that replicas don't temporarily disappear from the replicas map during 2997 // the splits. See #29144. 2998 func TestStoreSplitDisappearingReplicas(t *testing.T) { 2999 defer leaktest.AfterTest(t)() 3000 stopper := stop.NewStopper() 3001 defer stopper.Stop(context.Background()) 3002 store, _ := createTestStore(t, stopper) 3003 go kvserver.WatchForDisappearingReplicas(t, store) 3004 for i := 0; i < 100; i++ { 3005 key := roachpb.Key(fmt.Sprintf("a%d", i)) 3006 args := adminSplitArgs(key) 3007 if _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), args); pErr != nil { 3008 t.Fatalf("%q: split unexpected error: %s", key, pErr) 3009 } 3010 } 3011 } 3012 3013 // Regression test for #21146. This verifies the behavior of when the 3014 // application of some split command (part of the lhs's log) is delayed on some 3015 // store and meanwhile the rhs has rebalanced away and back, ending up with a 3016 // larger ReplicaID than the split thinks it will have. Additionally we remove 3017 // the LHS replica on the same store before the split and re-add it after, so 3018 // that when the connectivity restores the LHS will apply a split trigger while 3019 // it is not a part of the descriptor. 3020 // 3021 // Or, in pictures (s3 looks like s1 throughout and is omitted): 3022 // 3023 // s1: [----r1@all-------------] 3024 // s2: [----r1@all-------------] 3025 // Remove s2: 3026 // s1: [----r1@s1s3------------] 3027 // s2: [----r1@all-------------] (outdated) 3028 // Split r1: 3029 // s1: [-r1@s1s3-|--r2@s1s3----] 3030 // s2: [----r1@all-------------] (outdated) 3031 // Add s2: 3032 // s1: [-r1@all-|--r2@s1s3-----] 3033 // s2: [----r1@all-------------] (outdated) 3034 // Add learner to s2 on r2 (remains uninitialized due to LHS state blocking it): 3035 // s1: [-r1@s1s3-|--r2@all-----] 3036 // s2: [----r1@all-------------] (outdated), uninitialized replica r2/3 3037 // Remove and re-add learner multiple times: r2/3 becomes r2/100 3038 // (diagram looks the same except for replacing r2/3) 3039 // 3040 // When connectivity is restored, r1@s2 will start to catch up on the raft log 3041 // after it learns of its new replicaID. It first processes the replication 3042 // change that removes it and switches to a desc that doesn't contain itself as 3043 // a replica. Next it sees the split trigger that once caused a crash because 3044 // the store tried to look up itself and failed. This being handled correctly, 3045 // the split trigger next has to look up the right hand side, which surprisingly 3046 // has a higher replicaID than that seen in the split trigger. This too needs to 3047 // be tolerated. 3048 func TestSplitTriggerMeetsUnexpectedReplicaID(t *testing.T) { 3049 defer leaktest.AfterTest(t)() 3050 ctx := context.Background() 3051 3052 blockPromoteCh := make(chan struct{}) 3053 var skipLearnerSnaps int32 3054 withoutLearnerSnap := func(fn func()) { 3055 atomic.StoreInt32(&skipLearnerSnaps, 1) 3056 fn() 3057 atomic.StoreInt32(&skipLearnerSnaps, 0) 3058 } 3059 knobs := base.TestingKnobs{Store: &kvserver.StoreTestingKnobs{ 3060 ReplicaSkipLearnerSnapshot: func() bool { 3061 return atomic.LoadInt32(&skipLearnerSnaps) != 0 3062 }, 3063 ReplicaAddStopAfterLearnerSnapshot: func(targets []roachpb.ReplicationTarget) bool { 3064 if atomic.LoadInt32(&skipLearnerSnaps) != 0 { 3065 return false 3066 } 3067 if len(targets) > 0 && targets[0].StoreID == 2 { 3068 <-blockPromoteCh 3069 } 3070 return false 3071 }, 3072 ReplicaAddSkipLearnerRollback: func() bool { 3073 return true 3074 }, 3075 // We rely on replicas remaining where they are even when they are removed 3076 // from the range as this lets us set up a split trigger that will apply 3077 // on a replica that is (at the time of the split trigger) not a member. 3078 DisableReplicaGCQueue: true, 3079 }} 3080 tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ 3081 ServerArgs: base.TestServerArgs{Knobs: knobs}, 3082 ReplicationMode: base.ReplicationManual, 3083 }) 3084 defer tc.Stopper().Stop(ctx) 3085 3086 k := tc.ScratchRange(t) 3087 desc := tc.LookupRangeOrFatal(t, k) 3088 3089 // Add a replica on n3 which we'll need to achieve quorum while we cut off n2 below. 3090 tc.AddReplicasOrFatal(t, k, tc.Target(2)) 3091 3092 // First construct a range with a learner replica on the second node (index 1) 3093 // and split it, ending up with an orphaned learner on each side of the split. 3094 // After the learner is created, but before the split, block all incoming raft 3095 // traffic to the learner on the lhs of the split (which is still on the 3096 // second node). 3097 g := ctxgroup.WithContext(ctx) 3098 g.GoCtx(func(ctx context.Context) error { 3099 _, err := tc.AddReplicas(k, tc.Target(1)) 3100 return err 3101 }) 3102 3103 store, _ := getFirstStoreReplica(t, tc.Server(1), k) 3104 tc.Servers[1].RaftTransport().Listen(store.StoreID(), &unreliableRaftHandler{ 3105 rangeID: desc.RangeID, 3106 RaftMessageHandler: store, 3107 }) 3108 3109 _, kRHS := k, k.Next() 3110 // Remove the LHS on the isolated store, split the range, and re-add it. 3111 tc.RemoveReplicasOrFatal(t, k, tc.Target(1)) 3112 descLHS, descRHS := tc.SplitRangeOrFatal(t, kRHS) 3113 withoutLearnerSnap(func() { 3114 // NB: can't use AddReplicas since that waits for the target to be up 3115 // to date, which it won't in this case. 3116 // 3117 // We avoid sending a snapshot because that snapshot would include the 3118 // split trigger and we want that to be processed via the log. 3119 d, err := tc.Servers[0].DB().AdminChangeReplicas( 3120 ctx, descLHS.StartKey.AsRawKey(), descLHS, roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, tc.Target(1)), 3121 ) 3122 require.NoError(t, err) 3123 descLHS = *d 3124 }) 3125 3126 close(blockPromoteCh) 3127 if err := g.Wait(); !testutils.IsError(err, `descriptor changed`) { 3128 t.Fatalf(`expected "descriptor changed" error got: %+v`, err) 3129 } 3130 3131 // Now repeatedly re-add the learner on the rhs, so it has a 3132 // different replicaID than the split trigger expects. 3133 add := func() { 3134 _, err := tc.AddReplicas(kRHS, tc.Target(1)) 3135 // The "snapshot intersects existing range" error is expected if the store 3136 // has not heard a raft message addressed to a later replica ID while the 3137 // "was not found on" error is expected if the store has heard that it has 3138 // a newer replica ID before receiving the snapshot. 3139 if !testutils.IsError(err, `snapshot intersects existing range|r[0-9]+ was not found on s[0-9]+`) { 3140 t.Fatalf(`expected snapshot intersects existing range|r[0-9]+ was not found on s[0-9]+" error got: %+v`, err) 3141 } 3142 } 3143 for i := 0; i < 5; i++ { 3144 add() 3145 tc.RemoveReplicasOrFatal(t, kRHS, tc.Target(1)) 3146 } 3147 add() 3148 3149 // Normally AddReplicas will return the latest version of the RangeDescriptor, 3150 // but because we're getting snapshot errors and using the 3151 // ReplicaAddSkipLearnerRollback hook, we have to look it up again ourselves 3152 // to find the current replicaID for the RHS learner. 3153 descRHS = tc.LookupRangeOrFatal(t, kRHS) 3154 learnerDescRHS, ok := descRHS.GetReplicaDescriptor(store.StoreID()) 3155 require.True(t, ok) 3156 3157 // Wait for there to be an in-memory, uninitialized learner replica with the 3158 // latest ReplicaID. Note: it cannot become initialized at this point because 3159 // it needs a snapshot to do that and (as can be seen in the error check 3160 // above) snapshots will intersect the lhs replica (which doesn't know about 3161 // the split because we've blocked its raft traffic, and so it still covers 3162 // the pre-split keyspace). 3163 testutils.SucceedsSoon(t, func() error { 3164 repl, err := store.GetReplica(descRHS.RangeID) 3165 if err != nil { 3166 return err 3167 } 3168 status := repl.RaftStatus() 3169 if status == nil { 3170 return errors.New("raft group not initialized") 3171 } 3172 if replicaID := roachpb.ReplicaID(status.ID); replicaID != learnerDescRHS.ReplicaID { 3173 return errors.Errorf("expected %d got %d", learnerDescRHS.ReplicaID, replicaID) 3174 } 3175 return nil 3176 }) 3177 3178 // Re-enable raft and wait for the lhs to catch up to the post-split 3179 // descriptor. This used to panic with "raft group deleted". 3180 tc.Servers[1].RaftTransport().Listen(store.StoreID(), store) 3181 testutils.SucceedsSoon(t, func() error { 3182 repl, err := store.GetReplica(descLHS.RangeID) 3183 if err != nil { 3184 return err 3185 } 3186 if desc := repl.Desc(); desc.IsInitialized() && !descLHS.Equal(desc) { 3187 require.NoError(t, store.ManualReplicaGC(repl)) 3188 return errors.Errorf("expected %s got %s", &descLHS, desc) 3189 } 3190 return nil 3191 }) 3192 } 3193 3194 // TestSplitBlocksReadsToRHS tests that an ongoing range split does not 3195 // interrupt reads to the LHS of the split but does interrupt reads for the RHS 3196 // of the split. The test relies on the fact that EndTxn(SplitTrigger) declares 3197 // read access to the LHS of the split but declares write access to the RHS of 3198 // the split. 3199 func TestSplitBlocksReadsToRHS(t *testing.T) { 3200 defer leaktest.AfterTest(t)() 3201 3202 keyLHS, keySplit, keyRHS := roachpb.Key("a"), roachpb.Key("b"), roachpb.Key("c") 3203 splitBlocked := make(chan struct{}) 3204 propFilter := func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 3205 if req, ok := args.Req.GetArg(roachpb.EndTxn); ok { 3206 et := req.(*roachpb.EndTxnRequest) 3207 if tr := et.InternalCommitTrigger.GetSplitTrigger(); tr != nil { 3208 if tr.RightDesc.StartKey.Equal(keySplit) { 3209 // Signal that the split is blocked. 3210 splitBlocked <- struct{}{} 3211 // Wait for split to be unblocked. 3212 <-splitBlocked 3213 } 3214 } 3215 } 3216 return nil 3217 } 3218 3219 storeCfg := kvserver.TestStoreConfig(nil) 3220 storeCfg.TestingKnobs.DisableSplitQueue = true 3221 storeCfg.TestingKnobs.DisableMergeQueue = true 3222 storeCfg.TestingKnobs.TestingProposalFilter = propFilter 3223 ctx := context.Background() 3224 stopper := stop.NewStopper() 3225 defer stopper.Stop(ctx) 3226 store := createTestStoreWithConfig(t, stopper, storeCfg) 3227 repl := store.LookupReplica(roachpb.RKey(keySplit)) 3228 tsBefore := store.Clock().Now() 3229 3230 // Begin splitting the range. 3231 g := ctxgroup.WithContext(ctx) 3232 g.GoCtx(func(ctx context.Context) error { 3233 args := adminSplitArgs(keySplit) 3234 _, pErr := kv.SendWrapped(ctx, store.TestSender(), args) 3235 return pErr.GoError() 3236 }) 3237 3238 // Wait until split is underway. 3239 <-splitBlocked 3240 tsAfter := store.Clock().Now() 3241 3242 // Read from the LHS and RHS, both below and above the split timestamp. 3243 lhsDone, rhsDone := make(chan error, 2), make(chan error, 2) 3244 for _, keyAndChan := range []struct { 3245 key roachpb.Key 3246 errCh chan error 3247 }{ 3248 {keyLHS, lhsDone}, 3249 {keyRHS, rhsDone}, 3250 } { 3251 for _, ts := range []hlc.Timestamp{tsBefore, tsAfter} { 3252 h := roachpb.Header{Timestamp: ts, RangeID: repl.RangeID} 3253 args := getArgs(keyAndChan.key) 3254 errCh := keyAndChan.errCh 3255 g.GoCtx(func(ctx context.Context) error { 3256 // Send directly to repl to avoid racing with the 3257 // split and routing requests to the post-split RHS. 3258 _, pErr := kv.SendWrappedWith(ctx, repl, h, args) 3259 errCh <- pErr.GoError() 3260 return nil 3261 }) 3262 } 3263 } 3264 3265 // Only the LHS reads should succeed. The RHS reads should get 3266 // blocked waiting to acquire latches. 3267 for i := 0; i < cap(lhsDone); i++ { 3268 require.NoError(t, <-lhsDone) 3269 } 3270 select { 3271 case err := <-rhsDone: 3272 require.NoError(t, err) 3273 t.Fatal("unexpected read on RHS during split") 3274 case <-time.After(2 * time.Millisecond): 3275 } 3276 3277 // Unblock the split. 3278 splitBlocked <- struct{}{} 3279 3280 // The RHS reads should now both hit a RangeKeyMismatchError error. 3281 for i := 0; i < cap(rhsDone); i++ { 3282 require.Regexp(t, "outside of bounds of range", <-rhsDone) 3283 } 3284 require.Nil(t, g.Wait()) 3285 }