github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/node_liveness_test.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver_test 12 13 import ( 14 "context" 15 "fmt" 16 "reflect" 17 "sort" 18 "sync/atomic" 19 "testing" 20 "time" 21 22 "github.com/cockroachdb/cockroach/pkg/base" 23 "github.com/cockroachdb/cockroach/pkg/config" 24 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 25 "github.com/cockroachdb/cockroach/pkg/gossip" 26 "github.com/cockroachdb/cockroach/pkg/keys" 27 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 28 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 29 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 30 "github.com/cockroachdb/cockroach/pkg/roachpb" 31 "github.com/cockroachdb/cockroach/pkg/rpc" 32 "github.com/cockroachdb/cockroach/pkg/server" 33 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 34 "github.com/cockroachdb/cockroach/pkg/testutils" 35 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 36 "github.com/cockroachdb/cockroach/pkg/util/hlc" 37 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 38 "github.com/cockroachdb/cockroach/pkg/util/log" 39 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 40 "github.com/cockroachdb/errors" 41 "github.com/cockroachdb/logtags" 42 "github.com/gogo/protobuf/proto" 43 "github.com/stretchr/testify/require" 44 ) 45 46 func verifyLiveness(t *testing.T, mtc *multiTestContext) { 47 testutils.SucceedsSoon(t, func() error { 48 for i, nl := range mtc.nodeLivenesses { 49 for _, g := range mtc.gossips { 50 live, err := nl.IsLive(g.NodeID.Get()) 51 if err != nil { 52 return err 53 } else if !live { 54 return errors.Errorf("node %d not live", g.NodeID.Get()) 55 } 56 } 57 if a, e := nl.Metrics().LiveNodes.Value(), int64(len(mtc.nodeLivenesses)); a != e { 58 return errors.Errorf("expected node %d's LiveNodes metric to be %d; got %d", 59 mtc.gossips[i].NodeID.Get(), e, a) 60 } 61 } 62 return nil 63 }) 64 } 65 66 func pauseNodeLivenessHeartbeats(mtc *multiTestContext, pause bool) { 67 for _, nl := range mtc.nodeLivenesses { 68 nl.PauseHeartbeat(pause) 69 } 70 } 71 72 func TestNodeLiveness(t *testing.T) { 73 defer leaktest.AfterTest(t)() 74 mtc := &multiTestContext{} 75 defer mtc.Stop() 76 mtc.Start(t, 3) 77 78 // Verify liveness of all nodes for all nodes. 79 verifyLiveness(t, mtc) 80 pauseNodeLivenessHeartbeats(mtc, true) 81 82 // Advance clock past the liveness threshold to verify IsLive becomes false. 83 mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1) 84 for idx, nl := range mtc.nodeLivenesses { 85 nodeID := mtc.gossips[idx].NodeID.Get() 86 live, err := nl.IsLive(nodeID) 87 if err != nil { 88 t.Error(err) 89 } else if live { 90 t.Errorf("expected node %d to be considered not-live after advancing node clock", nodeID) 91 } 92 testutils.SucceedsSoon(t, func() error { 93 if a, e := nl.Metrics().LiveNodes.Value(), int64(0); a != e { 94 return errors.Errorf("expected node %d's LiveNodes metric to be %d; got %d", 95 nodeID, e, a) 96 } 97 return nil 98 }) 99 } 100 // Trigger a manual heartbeat and verify liveness is reestablished. 101 for _, nl := range mtc.nodeLivenesses { 102 l, err := nl.Self() 103 if err != nil { 104 t.Fatal(err) 105 } 106 for { 107 err := nl.Heartbeat(context.Background(), l) 108 if err == nil { 109 break 110 } 111 if errors.Is(err, kvserver.ErrEpochIncremented) { 112 log.Warningf(context.Background(), "retrying after %s", err) 113 continue 114 } 115 116 t.Fatal(err) 117 } 118 } 119 verifyLiveness(t, mtc) 120 121 // Verify metrics counts. 122 for i, nl := range mtc.nodeLivenesses { 123 if c := nl.Metrics().HeartbeatSuccesses.Count(); c < 2 { 124 t.Errorf("node %d: expected metrics count >= 2; got %d", (i + 1), c) 125 } 126 } 127 } 128 129 func TestNodeLivenessInitialIncrement(t *testing.T) { 130 defer leaktest.AfterTest(t)() 131 mtc := &multiTestContext{} 132 defer mtc.Stop() 133 mtc.Start(t, 1) 134 135 // Verify liveness of all nodes for all nodes. 136 verifyLiveness(t, mtc) 137 138 liveness, err := mtc.nodeLivenesses[0].GetLiveness(mtc.gossips[0].NodeID.Get()) 139 if err != nil { 140 t.Fatal(err) 141 } 142 if liveness.Epoch != 1 { 143 t.Errorf("expected epoch to be set to 1 initially; got %d", liveness.Epoch) 144 } 145 146 // Restart the node and verify the epoch is incremented with initial heartbeat. 147 mtc.stopStore(0) 148 mtc.restartStore(0) 149 verifyEpochIncremented(t, mtc, 0) 150 } 151 152 func verifyEpochIncremented(t *testing.T, mtc *multiTestContext, nodeIdx int) { 153 testutils.SucceedsSoon(t, func() error { 154 liveness, err := mtc.nodeLivenesses[nodeIdx].GetLiveness(mtc.gossips[nodeIdx].NodeID.Get()) 155 if err != nil { 156 return err 157 } 158 if liveness.Epoch < 2 { 159 return errors.Errorf("expected epoch to be >=2 on restart but was %d", liveness.Epoch) 160 } 161 return nil 162 }) 163 164 } 165 166 // TestNodeIsLiveCallback verifies that the liveness callback for a 167 // node is invoked when it changes from state false to true. 168 func TestNodeIsLiveCallback(t *testing.T) { 169 defer leaktest.AfterTest(t)() 170 mtc := &multiTestContext{} 171 defer mtc.Stop() 172 mtc.Start(t, 3) 173 174 // Verify liveness of all nodes for all nodes. 175 verifyLiveness(t, mtc) 176 pauseNodeLivenessHeartbeats(mtc, true) 177 178 var cbMu syncutil.Mutex 179 cbs := map[roachpb.NodeID]struct{}{} 180 mtc.nodeLivenesses[0].RegisterCallback(func(nodeID roachpb.NodeID) { 181 cbMu.Lock() 182 defer cbMu.Unlock() 183 cbs[nodeID] = struct{}{} 184 }) 185 186 // Advance clock past the liveness threshold. 187 mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1) 188 189 // Trigger a manual heartbeat and verify callbacks for each node ID are invoked. 190 for _, nl := range mtc.nodeLivenesses { 191 l, err := nl.Self() 192 if err != nil { 193 t.Fatal(err) 194 } 195 if err := nl.Heartbeat(context.Background(), l); err != nil { 196 t.Fatal(err) 197 } 198 } 199 200 testutils.SucceedsSoon(t, func() error { 201 cbMu.Lock() 202 defer cbMu.Unlock() 203 for _, g := range mtc.gossips { 204 nodeID := g.NodeID.Get() 205 if _, ok := cbs[nodeID]; !ok { 206 return errors.Errorf("expected IsLive callback for node %d", nodeID) 207 } 208 } 209 return nil 210 }) 211 } 212 213 // TestNodeHeartbeatCallback verifies that HeartbeatCallback is invoked whenever 214 // this node updates its own liveness status. 215 func TestNodeHeartbeatCallback(t *testing.T) { 216 defer leaktest.AfterTest(t)() 217 mtc := &multiTestContext{} 218 defer mtc.Stop() 219 mtc.Start(t, 3) 220 221 // Verify liveness of all nodes for all nodes. 222 verifyLiveness(t, mtc) 223 pauseNodeLivenessHeartbeats(mtc, true) 224 225 // Verify that last update time has been set for all nodes. 226 verifyUptimes := func() error { 227 expected := mtc.clock().Now() 228 for i, s := range mtc.stores { 229 uptm, err := s.ReadLastUpTimestamp(context.Background()) 230 if err != nil { 231 return errors.Wrapf(err, "error reading last up time from store %d", i) 232 } 233 if a, e := uptm.WallTime, expected.WallTime; a != e { 234 return errors.Errorf("store %d last uptime = %d; wanted %d", i, a, e) 235 } 236 } 237 return nil 238 } 239 240 if err := verifyUptimes(); err != nil { 241 t.Fatal(err) 242 } 243 244 // Advance clock past the liveness threshold and force a manual heartbeat on 245 // all node liveness objects, which should update the last up time for each 246 // store. 247 mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1) 248 for _, nl := range mtc.nodeLivenesses { 249 l, err := nl.Self() 250 if err != nil { 251 t.Fatal(err) 252 } 253 if err := nl.Heartbeat(context.Background(), l); err != nil { 254 t.Fatal(err) 255 } 256 } 257 // NB: since the heartbeat callback is invoked synchronously in 258 // `Heartbeat()` which this goroutine invoked, we don't need to wrap this in 259 // a retry. 260 if err := verifyUptimes(); err != nil { 261 t.Fatal(err) 262 } 263 } 264 265 // TestNodeLivenessEpochIncrement verifies that incrementing the epoch 266 // of a node requires the node to be considered not-live and that on 267 // increment, no other nodes believe the epoch-incremented node to be 268 // live. 269 func TestNodeLivenessEpochIncrement(t *testing.T) { 270 defer leaktest.AfterTest(t)() 271 mtc := &multiTestContext{} 272 defer mtc.Stop() 273 mtc.Start(t, 2) 274 275 verifyLiveness(t, mtc) 276 pauseNodeLivenessHeartbeats(mtc, true) 277 278 // First try to increment the epoch of a known-live node. 279 deadNodeID := mtc.gossips[1].NodeID.Get() 280 oldLiveness, err := mtc.nodeLivenesses[0].GetLiveness(deadNodeID) 281 if err != nil { 282 t.Fatal(err) 283 } 284 if err := mtc.nodeLivenesses[0].IncrementEpoch( 285 context.Background(), oldLiveness); !testutils.IsError(err, "cannot increment epoch on live node") { 286 t.Fatalf("expected error incrementing a live node: %+v", err) 287 } 288 289 // Advance clock past liveness threshold & increment epoch. 290 mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1) 291 if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != nil { 292 t.Fatalf("unexpected error incrementing a non-live node: %+v", err) 293 } 294 295 // Verify that the epoch has been advanced. 296 testutils.SucceedsSoon(t, func() error { 297 newLiveness, err := mtc.nodeLivenesses[0].GetLiveness(deadNodeID) 298 if err != nil { 299 return err 300 } 301 if newLiveness.Epoch != oldLiveness.Epoch+1 { 302 return errors.Errorf("expected epoch to increment") 303 } 304 if newLiveness.Expiration != oldLiveness.Expiration { 305 return errors.Errorf("expected expiration to remain unchanged") 306 } 307 if live, err := mtc.nodeLivenesses[0].IsLive(deadNodeID); live || err != nil { 308 return errors.Errorf("expected dead node to remain dead after epoch increment %t: %v", live, err) 309 } 310 return nil 311 }) 312 313 // Verify epoch increment metric count. 314 if c := mtc.nodeLivenesses[0].Metrics().EpochIncrements.Count(); c != 1 { 315 t.Errorf("expected epoch increment == 1; got %d", c) 316 } 317 318 // Verify error on incrementing an already-incremented epoch. 319 if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); !errors.Is(err, kvserver.ErrEpochAlreadyIncremented) { 320 t.Fatalf("unexpected error incrementing a non-live node: %+v", err) 321 } 322 323 // Verify error incrementing with a too-high expectation for liveness epoch. 324 oldLiveness.Epoch = 3 325 if err := mtc.nodeLivenesses[0].IncrementEpoch( 326 context.Background(), oldLiveness); !testutils.IsError(err, "unexpected liveness epoch 2; expected >= 3") { 327 t.Fatalf("expected error incrementing with a too-high expected epoch: %+v", err) 328 } 329 } 330 331 // TestNodeLivenessRestart verifies that if nodes are shutdown and 332 // restarted, the node liveness records are re-gossiped immediately. 333 func TestNodeLivenessRestart(t *testing.T) { 334 defer leaktest.AfterTest(t)() 335 mtc := &multiTestContext{} 336 defer mtc.Stop() 337 mtc.Start(t, 2) 338 339 // After verifying node is in liveness table, stop store. 340 verifyLiveness(t, mtc) 341 mtc.stopStore(0) 342 343 // Clear the liveness records in store 1's gossip to make sure we're 344 // seeing the liveness record properly gossiped at store startup. 345 var expKeys []string 346 for _, g := range mtc.gossips { 347 key := gossip.MakeNodeLivenessKey(g.NodeID.Get()) 348 expKeys = append(expKeys, key) 349 if err := g.AddInfoProto(key, &kvserverpb.Liveness{}, 0); err != nil { 350 t.Fatal(err) 351 } 352 } 353 sort.Strings(expKeys) 354 355 // Register a callback to gossip in order to verify liveness records 356 // are re-gossiped. 357 var keysMu struct { 358 syncutil.Mutex 359 keys []string 360 } 361 livenessRegex := gossip.MakePrefixPattern(gossip.KeyNodeLivenessPrefix) 362 mtc.gossips[0].RegisterCallback(livenessRegex, func(key string, _ roachpb.Value) { 363 keysMu.Lock() 364 defer keysMu.Unlock() 365 for _, k := range keysMu.keys { 366 if k == key { 367 return 368 } 369 } 370 keysMu.keys = append(keysMu.keys, key) 371 }) 372 373 // Restart store and verify gossip contains liveness record for nodes 1&2. 374 mtc.restartStore(0) 375 testutils.SucceedsSoon(t, func() error { 376 keysMu.Lock() 377 defer keysMu.Unlock() 378 sort.Strings(keysMu.keys) 379 if !reflect.DeepEqual(keysMu.keys, expKeys) { 380 return errors.Errorf("expected keys %+v != keys %+v", expKeys, keysMu.keys) 381 } 382 return nil 383 }) 384 } 385 386 // TestNodeLivenessSelf verifies that a node keeps its own most recent liveness 387 // heartbeat info in preference to anything which might be received belatedly 388 // through gossip. 389 // 390 // Note that this test originally injected a Gossip update with a higher Epoch 391 // and semantics have since changed to make the "self" record less special. It 392 // is updated like any other node's record, with appropriate safeguards against 393 // clobbering in place. 394 func TestNodeLivenessSelf(t *testing.T) { 395 defer leaktest.AfterTest(t)() 396 mtc := &multiTestContext{} 397 defer mtc.Stop() 398 mtc.Start(t, 1) 399 g := mtc.gossips[0] 400 401 pauseNodeLivenessHeartbeats(mtc, true) 402 403 // Verify liveness is properly initialized. This needs to be wrapped in a 404 // SucceedsSoon because node liveness gets initialized via an async gossip 405 // callback. 406 var liveness kvserverpb.Liveness 407 testutils.SucceedsSoon(t, func() error { 408 var err error 409 liveness, err = mtc.nodeLivenesses[0].GetLiveness(g.NodeID.Get()) 410 return err 411 }) 412 if err := mtc.nodeLivenesses[0].Heartbeat(context.Background(), liveness); err != nil { 413 t.Fatal(err) 414 } 415 416 // Gossip random nonsense for liveness and verify that asking for 417 // the node's own node ID returns the "correct" value. 418 key := gossip.MakeNodeLivenessKey(g.NodeID.Get()) 419 var count int32 420 g.RegisterCallback(key, func(_ string, val roachpb.Value) { 421 atomic.AddInt32(&count, 1) 422 }) 423 testutils.SucceedsSoon(t, func() error { 424 fakeBehindLiveness := liveness 425 fakeBehindLiveness.Epoch-- // almost certainly results in zero 426 427 if err := g.AddInfoProto(key, &fakeBehindLiveness, 0); err != nil { 428 t.Fatal(err) 429 } 430 if atomic.LoadInt32(&count) < 2 { 431 return errors.New("expected count >= 2") 432 } 433 return nil 434 }) 435 436 // Self should not see the fake liveness, but have kept the real one. 437 l := mtc.nodeLivenesses[0] 438 lGet, err := l.GetLiveness(g.NodeID.Get()) 439 if err != nil { 440 t.Fatal(err) 441 } 442 lSelf, err := l.Self() 443 if err != nil { 444 t.Fatal(err) 445 } 446 if !reflect.DeepEqual(lGet, lSelf) { 447 t.Errorf("expected GetLiveness() to return same value as Self(): %+v != %+v", lGet, lSelf) 448 } 449 if lGet.Epoch == 2 || lSelf.NodeID == 2 { 450 t.Errorf("expected GetLiveness() and Self() not to return artificially gossiped liveness: %+v, %+v", lGet, lSelf) 451 } 452 } 453 454 func TestNodeLivenessGetIsLiveMap(t *testing.T) { 455 defer leaktest.AfterTest(t)() 456 mtc := &multiTestContext{} 457 defer mtc.Stop() 458 mtc.Start(t, 3) 459 460 verifyLiveness(t, mtc) 461 pauseNodeLivenessHeartbeats(mtc, true) 462 lMap := mtc.nodeLivenesses[0].GetIsLiveMap() 463 expectedLMap := kvserver.IsLiveMap{ 464 1: {IsLive: true, Epoch: 1}, 465 2: {IsLive: true, Epoch: 1}, 466 3: {IsLive: true, Epoch: 1}, 467 } 468 if !reflect.DeepEqual(expectedLMap, lMap) { 469 t.Errorf("expected liveness map %+v; got %+v", expectedLMap, lMap) 470 } 471 472 // Advance the clock but only heartbeat node 0. 473 mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1) 474 liveness, _ := mtc.nodeLivenesses[0].GetLiveness(mtc.gossips[0].NodeID.Get()) 475 476 testutils.SucceedsSoon(t, func() error { 477 if err := mtc.nodeLivenesses[0].Heartbeat(context.Background(), liveness); err != nil { 478 if errors.Is(err, kvserver.ErrEpochIncremented) { 479 return err 480 } 481 t.Fatal(err) 482 } 483 return nil 484 }) 485 486 // Now verify only node 0 is live. 487 lMap = mtc.nodeLivenesses[0].GetIsLiveMap() 488 expectedLMap = kvserver.IsLiveMap{ 489 1: {IsLive: true, Epoch: 1}, 490 2: {IsLive: false, Epoch: 1}, 491 3: {IsLive: false, Epoch: 1}, 492 } 493 if !reflect.DeepEqual(expectedLMap, lMap) { 494 t.Errorf("expected liveness map %+v; got %+v", expectedLMap, lMap) 495 } 496 } 497 498 func TestNodeLivenessGetLivenesses(t *testing.T) { 499 defer leaktest.AfterTest(t)() 500 mtc := &multiTestContext{} 501 defer mtc.Stop() 502 mtc.Start(t, 3) 503 504 verifyLiveness(t, mtc) 505 pauseNodeLivenessHeartbeats(mtc, true) 506 507 livenesses := mtc.nodeLivenesses[0].GetLivenesses() 508 actualLMapNodes := make(map[roachpb.NodeID]struct{}) 509 originalExpiration := mtc.clock().PhysicalNow() + mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() 510 for _, l := range livenesses { 511 if a, e := l.Epoch, int64(1); a != e { 512 t.Errorf("liveness record had epoch %d, wanted %d", a, e) 513 } 514 if a, e := l.Expiration.WallTime, originalExpiration; a != e { 515 t.Errorf("liveness record had expiration %d, wanted %d", a, e) 516 } 517 actualLMapNodes[l.NodeID] = struct{}{} 518 } 519 expectedLMapNodes := map[roachpb.NodeID]struct{}{1: {}, 2: {}, 3: {}} 520 if !reflect.DeepEqual(actualLMapNodes, expectedLMapNodes) { 521 t.Errorf("got liveness map nodes %+v; wanted %+v", actualLMapNodes, expectedLMapNodes) 522 } 523 524 // Advance the clock but only heartbeat node 0. 525 mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1) 526 liveness, _ := mtc.nodeLivenesses[0].GetLiveness(mtc.gossips[0].NodeID.Get()) 527 if err := mtc.nodeLivenesses[0].Heartbeat(context.Background(), liveness); err != nil { 528 t.Fatal(err) 529 } 530 531 // Verify that node liveness receives the change. 532 livenesses = mtc.nodeLivenesses[0].GetLivenesses() 533 actualLMapNodes = make(map[roachpb.NodeID]struct{}) 534 for _, l := range livenesses { 535 if a, e := l.Epoch, int64(1); a != e { 536 t.Errorf("liveness record had epoch %d, wanted %d", a, e) 537 } 538 expectedExpiration := originalExpiration 539 if l.NodeID == 1 { 540 expectedExpiration += mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1 541 } 542 if a, e := l.Expiration.WallTime, expectedExpiration; a != e { 543 t.Errorf("liveness record had expiration %d, wanted %d", a, e) 544 } 545 actualLMapNodes[l.NodeID] = struct{}{} 546 } 547 if !reflect.DeepEqual(actualLMapNodes, expectedLMapNodes) { 548 t.Errorf("got liveness map nodes %+v; wanted %+v", actualLMapNodes, expectedLMapNodes) 549 } 550 } 551 552 // TestNodeLivenessConcurrentHeartbeats verifies that concurrent attempts 553 // to heartbeat all succeed. 554 func TestNodeLivenessConcurrentHeartbeats(t *testing.T) { 555 defer leaktest.AfterTest(t)() 556 mtc := &multiTestContext{} 557 defer mtc.Stop() 558 mtc.Start(t, 1) 559 560 verifyLiveness(t, mtc) 561 pauseNodeLivenessHeartbeats(mtc, true) 562 563 const concurrency = 10 564 565 // Advance clock past the liveness threshold & concurrently heartbeat node. 566 nl := mtc.nodeLivenesses[0] 567 mtc.manualClock.Increment(nl.GetLivenessThreshold().Nanoseconds() + 1) 568 l, err := nl.Self() 569 if err != nil { 570 t.Fatal(err) 571 } 572 errCh := make(chan error, concurrency) 573 for i := 0; i < concurrency; i++ { 574 go func() { 575 errCh <- nl.Heartbeat(context.Background(), l) 576 }() 577 } 578 for i := 0; i < concurrency; i++ { 579 if err := <-errCh; err != nil { 580 t.Fatalf("concurrent heartbeat %d failed: %+v", i, err) 581 } 582 } 583 } 584 585 // TestNodeLivenessConcurrentIncrementEpochs verifies concurrent 586 // attempts to increment liveness of another node all succeed. 587 func TestNodeLivenessConcurrentIncrementEpochs(t *testing.T) { 588 defer leaktest.AfterTest(t)() 589 mtc := &multiTestContext{} 590 defer mtc.Stop() 591 mtc.Start(t, 2) 592 593 verifyLiveness(t, mtc) 594 pauseNodeLivenessHeartbeats(mtc, true) 595 596 const concurrency = 10 597 598 // Advance the clock and this time increment epoch concurrently for node 1. 599 nl := mtc.nodeLivenesses[0] 600 mtc.manualClock.Increment(nl.GetLivenessThreshold().Nanoseconds() + 1) 601 l, err := nl.GetLiveness(mtc.gossips[1].NodeID.Get()) 602 if err != nil { 603 t.Fatal(err) 604 } 605 errCh := make(chan error, concurrency) 606 for i := 0; i < concurrency; i++ { 607 go func() { 608 errCh <- nl.IncrementEpoch(context.Background(), l) 609 }() 610 } 611 for i := 0; i < concurrency; i++ { 612 if err := <-errCh; err != nil && !errors.Is(err, kvserver.ErrEpochAlreadyIncremented) { 613 t.Fatalf("concurrent increment epoch %d failed: %+v", i, err) 614 } 615 } 616 } 617 618 // TestNodeLivenessSetDraining verifies that when draining, a node's liveness 619 // record is updated and the node will not be present in the store list of other 620 // nodes once they are aware of its draining state. 621 func TestNodeLivenessSetDraining(t *testing.T) { 622 defer leaktest.AfterTest(t)() 623 mtc := &multiTestContext{} 624 defer mtc.Stop() 625 mtc.Start(t, 3) 626 mtc.initGossipNetwork() 627 628 verifyLiveness(t, mtc) 629 630 ctx := context.Background() 631 drainingNodeIdx := 0 632 drainingNodeID := mtc.gossips[drainingNodeIdx].NodeID.Get() 633 634 nodeIDAppearsInStoreList := func(id roachpb.NodeID, sl kvserver.StoreList) bool { 635 for _, store := range sl.Stores() { 636 if store.Node.NodeID == id { 637 return true 638 } 639 } 640 return false 641 } 642 643 // Verify success on failed update of a liveness record that already has the 644 // given draining setting. 645 if err := mtc.nodeLivenesses[drainingNodeIdx].SetDrainingInternal(ctx, kvserverpb.Liveness{}, false); err != nil { 646 t.Fatal(err) 647 } 648 649 mtc.nodeLivenesses[drainingNodeIdx].SetDraining(ctx, true /* drain */, nil /* reporter */) 650 651 // Draining node disappears from store lists. 652 { 653 const expectedLive = 2 654 // Executed in a retry loop to wait until the new liveness record has 655 // been gossiped to the rest of the cluster. 656 testutils.SucceedsSoon(t, func() error { 657 for i, sp := range mtc.storePools { 658 curNodeID := mtc.gossips[i].NodeID.Get() 659 sl, alive, _ := sp.GetStoreList() 660 if alive != expectedLive { 661 return errors.Errorf( 662 "expected %d live stores but got %d from node %d", 663 expectedLive, 664 alive, 665 curNodeID, 666 ) 667 } 668 if nodeIDAppearsInStoreList(drainingNodeID, sl) { 669 return errors.Errorf( 670 "expected node %d not to appear in node %d's store list", 671 drainingNodeID, 672 curNodeID, 673 ) 674 } 675 } 676 return nil 677 }) 678 } 679 680 // Stop and restart the store to verify that a restarted server clears the 681 // draining field on the liveness record. 682 mtc.stopStore(drainingNodeIdx) 683 mtc.restartStore(drainingNodeIdx) 684 685 // Restarted node appears once again in the store list. 686 { 687 const expectedLive = 3 688 // Executed in a retry loop to wait until the new liveness record has 689 // been gossiped to the rest of the cluster. 690 testutils.SucceedsSoon(t, func() error { 691 for i, sp := range mtc.storePools { 692 curNodeID := mtc.gossips[i].NodeID.Get() 693 sl, alive, _ := sp.GetStoreList() 694 if alive != expectedLive { 695 return errors.Errorf( 696 "expected %d live stores but got %d from node %d", 697 expectedLive, 698 alive, 699 curNodeID, 700 ) 701 } 702 if !nodeIDAppearsInStoreList(drainingNodeID, sl) { 703 return errors.Errorf( 704 "expected node %d to appear in node %d's store list: %+v", 705 drainingNodeID, 706 curNodeID, 707 sl.Stores(), 708 ) 709 } 710 } 711 return nil 712 }) 713 } 714 } 715 716 func TestNodeLivenessRetryAmbiguousResultError(t *testing.T) { 717 defer leaktest.AfterTest(t)() 718 719 var injectError atomic.Value 720 var injectedErrorCount int32 721 722 injectError.Store(true) 723 storeCfg := kvserver.TestStoreConfig(nil) 724 storeCfg.TestingKnobs.EvalKnobs.TestingEvalFilter = func(args kvserverbase.FilterArgs) *roachpb.Error { 725 if _, ok := args.Req.(*roachpb.ConditionalPutRequest); !ok { 726 return nil 727 } 728 if val := injectError.Load(); val != nil && val.(bool) { 729 atomic.AddInt32(&injectedErrorCount, 1) 730 injectError.Store(false) 731 return roachpb.NewError(roachpb.NewAmbiguousResultError("test")) 732 } 733 return nil 734 } 735 mtc := &multiTestContext{ 736 storeConfig: &storeCfg, 737 } 738 mtc.Start(t, 1) 739 defer mtc.Stop() 740 741 // Verify retry of the ambiguous result for heartbeat loop. 742 verifyLiveness(t, mtc) 743 744 nl := mtc.nodeLivenesses[0] 745 l, err := nl.Self() 746 if err != nil { 747 t.Fatal(err) 748 } 749 750 // And again on manual heartbeat. 751 injectError.Store(true) 752 if err := nl.Heartbeat(context.Background(), l); err != nil { 753 t.Fatal(err) 754 } 755 if count := atomic.LoadInt32(&injectedErrorCount); count != 2 { 756 t.Errorf("expected injected error count of 2; got %d", count) 757 } 758 } 759 760 func verifyNodeIsDecommissioning(t *testing.T, mtc *multiTestContext, nodeID roachpb.NodeID) { 761 testutils.SucceedsSoon(t, func() error { 762 for _, nl := range mtc.nodeLivenesses { 763 livenesses := nl.GetLivenesses() 764 for _, liveness := range livenesses { 765 if liveness.Decommissioning != (liveness.NodeID == nodeID) { 766 return errors.Errorf("unexpected Decommissioning value of %v for node %v", liveness.Decommissioning, liveness.NodeID) 767 } 768 } 769 } 770 return nil 771 }) 772 } 773 774 func TestNodeLivenessStatusMap(t *testing.T) { 775 defer leaktest.AfterTest(t)() 776 if testing.Short() { 777 t.Skip("short") 778 } 779 780 serverArgs := base.TestServerArgs{ 781 Knobs: base.TestingKnobs{ 782 Store: &kvserver.StoreTestingKnobs{ 783 // Disable replica rebalancing to ensure that the liveness range 784 // does not get out of the first node (we'll be shutting down nodes). 785 DisableReplicaRebalancing: true, 786 // Disable LBS because when the scan is happening at the rate it's happening 787 // below, it's possible that one of the system ranges trigger a split. 788 DisableLoadBasedSplitting: true, 789 }, 790 }, 791 RaftConfig: base.RaftConfig{ 792 // Make everything tick faster to ensure dead nodes are 793 // recognized dead faster. 794 RaftTickInterval: 100 * time.Millisecond, 795 }, 796 // Scan like a bat out of hell to ensure replication and replica GC 797 // happen in a timely manner. 798 ScanInterval: 50 * time.Millisecond, 799 } 800 tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ 801 ServerArgs: serverArgs, 802 // Disable full replication otherwise StartTestCluster with just 1 803 // node will wait forever. 804 ReplicationMode: base.ReplicationManual, 805 }) 806 ctx := context.Background() 807 defer tc.Stopper().Stop(ctx) 808 809 ctx = logtags.AddTag(ctx, "in test", nil) 810 811 log.Infof(ctx, "setting zone config to disable replication") 812 // Allow for inserting zone configs without having to go through (or 813 // duplicate the logic from) the CLI. 814 config.TestingSetupZoneConfigHook(tc.Stopper()) 815 zoneConfig := zonepb.DefaultZoneConfig() 816 // Force just one replica per range to ensure that we can shut down 817 // nodes without endangering the liveness range. 818 zoneConfig.NumReplicas = proto.Int32(1) 819 config.TestingSetZoneConfig(keys.MetaRangesID, zoneConfig) 820 821 log.Infof(ctx, "starting 3 more nodes") 822 tc.AddServer(t, serverArgs) 823 tc.AddServer(t, serverArgs) 824 tc.AddServer(t, serverArgs) 825 826 log.Infof(ctx, "waiting for node statuses") 827 tc.WaitForNodeStatuses(t) 828 tc.WaitForNodeLiveness(t) 829 log.Infof(ctx, "waiting done") 830 831 firstServer := tc.Server(0).(*server.TestServer) 832 833 liveNodeID := firstServer.NodeID() 834 835 deadNodeID := tc.Server(1).NodeID() 836 log.Infof(ctx, "shutting down node %d", deadNodeID) 837 tc.StopServer(1) 838 log.Infof(ctx, "done shutting down node %d", deadNodeID) 839 840 decommissioningNodeID := tc.Server(2).NodeID() 841 log.Infof(ctx, "decommissioning node %d", decommissioningNodeID) 842 if err := firstServer.Decommission(ctx, true, []roachpb.NodeID{decommissioningNodeID}); err != nil { 843 t.Fatal(err) 844 } 845 log.Infof(ctx, "done decommissioning node %d", decommissioningNodeID) 846 847 removedNodeID := tc.Server(3).NodeID() 848 log.Infof(ctx, "decommissioning and shutting down node %d", removedNodeID) 849 if err := firstServer.Decommission(ctx, true, []roachpb.NodeID{removedNodeID}); err != nil { 850 t.Fatal(err) 851 } 852 tc.StopServer(3) 853 log.Infof(ctx, "done removing node %d", removedNodeID) 854 855 log.Infof(ctx, "checking status map") 856 857 // See what comes up in the status. 858 859 cc, err := tc.Server(0).RPCContext().GRPCDialNode( 860 firstServer.RPCAddr(), firstServer.NodeID(), rpc.DefaultClass).Connect(ctx) 861 require.NoError(t, err) 862 admin := serverpb.NewAdminClient(cc) 863 864 type testCase struct { 865 nodeID roachpb.NodeID 866 expectedStatus kvserverpb.NodeLivenessStatus 867 } 868 869 // Below we're going to check that all statuses converge and stabilize 870 // to a known situation. 871 testData := []testCase{ 872 {liveNodeID, kvserverpb.NodeLivenessStatus_LIVE}, 873 {deadNodeID, kvserverpb.NodeLivenessStatus_DEAD}, 874 {decommissioningNodeID, kvserverpb.NodeLivenessStatus_DECOMMISSIONING}, 875 {removedNodeID, kvserverpb.NodeLivenessStatus_DECOMMISSIONED}, 876 } 877 878 for _, test := range testData { 879 t.Run(fmt.Sprintf("n%d->%s", test.nodeID, test.expectedStatus), func(t *testing.T) { 880 nodeID, expectedStatus := test.nodeID, test.expectedStatus 881 882 testutils.SucceedsSoon(t, func() error { 883 // Ensure that dead nodes are quickly recognized as dead by 884 // gossip. Overriding cluster settings is generally a really bad 885 // idea as they are also populated via Gossip and so our update 886 // is possibly going to be wiped out. But going through SQL 887 // doesn't allow durations below 1m15s, which is much too long 888 // for a test. 889 // We do this in every SucceedsSoon attempt, so we'll be good. 890 kvserver.TimeUntilStoreDead.Override(&firstServer.ClusterSettings().SV, 891 kvserver.TestTimeUntilStoreDead) 892 893 log.Infof(ctx, "checking expected status (%s) for node %d", expectedStatus, nodeID) 894 resp, err := admin.Liveness(ctx, &serverpb.LivenessRequest{}) 895 require.NoError(t, err) 896 nodeStatuses := resp.Statuses 897 898 st, ok := nodeStatuses[nodeID] 899 if !ok { 900 return errors.Errorf("node %d: not in statuses\n", nodeID) 901 } 902 if st != expectedStatus { 903 return errors.Errorf("node %d: unexpected status: got %s, expected %s\n", 904 nodeID, st, expectedStatus, 905 ) 906 } 907 return nil 908 }) 909 }) 910 } 911 } 912 913 func testNodeLivenessSetDecommissioning(t *testing.T, decommissionNodeIdx int) { 914 mtc := &multiTestContext{} 915 defer mtc.Stop() 916 mtc.Start(t, 3) 917 mtc.initGossipNetwork() 918 919 verifyLiveness(t, mtc) 920 921 ctx := context.Background() 922 callerNodeLiveness := mtc.nodeLivenesses[0] 923 nodeID := mtc.gossips[decommissionNodeIdx].NodeID.Get() 924 925 // Verify success on failed update of a liveness record that already has the 926 // given decommissioning setting. 927 if _, err := callerNodeLiveness.SetDecommissioningInternal(ctx, nodeID, kvserverpb.Liveness{}, false); err != nil { 928 t.Fatal(err) 929 } 930 931 // Set a node to decommissioning state. 932 if _, err := callerNodeLiveness.SetDecommissioning(ctx, nodeID, true); err != nil { 933 t.Fatal(err) 934 } 935 verifyNodeIsDecommissioning(t, mtc, nodeID) 936 937 // Stop and restart the store to verify that a restarted server retains the 938 // decommissioning field on the liveness record. 939 mtc.stopStore(decommissionNodeIdx) 940 mtc.restartStore(decommissionNodeIdx) 941 942 // Wait until store has restarted and published a new heartbeat to ensure not 943 // looking at pre-restart state. Want to be sure test fails if node wiped the 944 // decommission flag. 945 verifyEpochIncremented(t, mtc, decommissionNodeIdx) 946 verifyNodeIsDecommissioning(t, mtc, nodeID) 947 } 948 949 // TestNodeLivenessSetDecommissioning verifies that when decommissioning, a 950 // node's liveness record is updated and remains after restart. 951 func TestNodeLivenessSetDecommissioning(t *testing.T) { 952 defer leaktest.AfterTest(t)() 953 // Sets itself to decommissioning. 954 testNodeLivenessSetDecommissioning(t, 0) 955 // Set another node to decommissioning. 956 testNodeLivenessSetDecommissioning(t, 1) 957 } 958 959 // TestNodeLivenessDecommissionAbsent exercises a scenario in which a node is 960 // asked to decommission another node whose liveness record is not gossiped any 961 // more. 962 // 963 // See (*NodeLiveness).SetDecommissioning for details. 964 func TestNodeLivenessDecommissionAbsent(t *testing.T) { 965 defer leaktest.AfterTest(t)() 966 967 mtc := &multiTestContext{} 968 defer mtc.Stop() 969 mtc.Start(t, 3) 970 mtc.initGossipNetwork() 971 972 verifyLiveness(t, mtc) 973 974 ctx := context.Background() 975 const goneNodeID = roachpb.NodeID(10000) 976 977 // When the node simply never existed, expect an error. 978 if _, err := mtc.nodeLivenesses[0].SetDecommissioning( 979 ctx, goneNodeID, true, 980 ); !errors.Is(err, kvserver.ErrNoLivenessRecord) { 981 t.Fatal(err) 982 } 983 984 // Pretend the node was once there but isn't gossiped anywhere. 985 if err := mtc.dbs[0].CPut(ctx, keys.NodeLivenessKey(goneNodeID), &kvserverpb.Liveness{ 986 NodeID: goneNodeID, 987 Epoch: 1, 988 Expiration: hlc.LegacyTimestamp(mtc.clock().Now()), 989 }, nil); err != nil { 990 t.Fatal(err) 991 } 992 993 // Decommission from second node. 994 if committed, err := mtc.nodeLivenesses[1].SetDecommissioning(ctx, goneNodeID, true); err != nil { 995 t.Fatal(err) 996 } else if !committed { 997 t.Fatal("no change committed") 998 } 999 // Re-decommission from first node. 1000 if committed, err := mtc.nodeLivenesses[0].SetDecommissioning(ctx, goneNodeID, true); err != nil { 1001 t.Fatal(err) 1002 } else if committed { 1003 t.Fatal("spurious change committed") 1004 } 1005 // Recommission from first node. 1006 if committed, err := mtc.nodeLivenesses[0].SetDecommissioning(ctx, goneNodeID, false); err != nil { 1007 t.Fatal(err) 1008 } else if !committed { 1009 t.Fatal("no change committed") 1010 } 1011 // Decommission from second node (a second time). 1012 if committed, err := mtc.nodeLivenesses[1].SetDecommissioning(ctx, goneNodeID, true); err != nil { 1013 t.Fatal(err) 1014 } else if !committed { 1015 t.Fatal("no change committed") 1016 } 1017 // Recommission from third node. 1018 if committed, err := mtc.nodeLivenesses[2].SetDecommissioning(ctx, goneNodeID, false); err != nil { 1019 t.Fatal(err) 1020 } else if !committed { 1021 t.Fatal("no change committed") 1022 } 1023 }