github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/node_liveness_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver_test
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"reflect"
    17  	"sort"
    18  	"sync/atomic"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/config"
    24  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    25  	"github.com/cockroachdb/cockroach/pkg/gossip"
    26  	"github.com/cockroachdb/cockroach/pkg/keys"
    27  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    28  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    29  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    30  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    31  	"github.com/cockroachdb/cockroach/pkg/rpc"
    32  	"github.com/cockroachdb/cockroach/pkg/server"
    33  	"github.com/cockroachdb/cockroach/pkg/server/serverpb"
    34  	"github.com/cockroachdb/cockroach/pkg/testutils"
    35  	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
    36  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    37  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    38  	"github.com/cockroachdb/cockroach/pkg/util/log"
    39  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    40  	"github.com/cockroachdb/errors"
    41  	"github.com/cockroachdb/logtags"
    42  	"github.com/gogo/protobuf/proto"
    43  	"github.com/stretchr/testify/require"
    44  )
    45  
    46  func verifyLiveness(t *testing.T, mtc *multiTestContext) {
    47  	testutils.SucceedsSoon(t, func() error {
    48  		for i, nl := range mtc.nodeLivenesses {
    49  			for _, g := range mtc.gossips {
    50  				live, err := nl.IsLive(g.NodeID.Get())
    51  				if err != nil {
    52  					return err
    53  				} else if !live {
    54  					return errors.Errorf("node %d not live", g.NodeID.Get())
    55  				}
    56  			}
    57  			if a, e := nl.Metrics().LiveNodes.Value(), int64(len(mtc.nodeLivenesses)); a != e {
    58  				return errors.Errorf("expected node %d's LiveNodes metric to be %d; got %d",
    59  					mtc.gossips[i].NodeID.Get(), e, a)
    60  			}
    61  		}
    62  		return nil
    63  	})
    64  }
    65  
    66  func pauseNodeLivenessHeartbeats(mtc *multiTestContext, pause bool) {
    67  	for _, nl := range mtc.nodeLivenesses {
    68  		nl.PauseHeartbeat(pause)
    69  	}
    70  }
    71  
    72  func TestNodeLiveness(t *testing.T) {
    73  	defer leaktest.AfterTest(t)()
    74  	mtc := &multiTestContext{}
    75  	defer mtc.Stop()
    76  	mtc.Start(t, 3)
    77  
    78  	// Verify liveness of all nodes for all nodes.
    79  	verifyLiveness(t, mtc)
    80  	pauseNodeLivenessHeartbeats(mtc, true)
    81  
    82  	// Advance clock past the liveness threshold to verify IsLive becomes false.
    83  	mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1)
    84  	for idx, nl := range mtc.nodeLivenesses {
    85  		nodeID := mtc.gossips[idx].NodeID.Get()
    86  		live, err := nl.IsLive(nodeID)
    87  		if err != nil {
    88  			t.Error(err)
    89  		} else if live {
    90  			t.Errorf("expected node %d to be considered not-live after advancing node clock", nodeID)
    91  		}
    92  		testutils.SucceedsSoon(t, func() error {
    93  			if a, e := nl.Metrics().LiveNodes.Value(), int64(0); a != e {
    94  				return errors.Errorf("expected node %d's LiveNodes metric to be %d; got %d",
    95  					nodeID, e, a)
    96  			}
    97  			return nil
    98  		})
    99  	}
   100  	// Trigger a manual heartbeat and verify liveness is reestablished.
   101  	for _, nl := range mtc.nodeLivenesses {
   102  		l, err := nl.Self()
   103  		if err != nil {
   104  			t.Fatal(err)
   105  		}
   106  		for {
   107  			err := nl.Heartbeat(context.Background(), l)
   108  			if err == nil {
   109  				break
   110  			}
   111  			if errors.Is(err, kvserver.ErrEpochIncremented) {
   112  				log.Warningf(context.Background(), "retrying after %s", err)
   113  				continue
   114  			}
   115  
   116  			t.Fatal(err)
   117  		}
   118  	}
   119  	verifyLiveness(t, mtc)
   120  
   121  	// Verify metrics counts.
   122  	for i, nl := range mtc.nodeLivenesses {
   123  		if c := nl.Metrics().HeartbeatSuccesses.Count(); c < 2 {
   124  			t.Errorf("node %d: expected metrics count >= 2; got %d", (i + 1), c)
   125  		}
   126  	}
   127  }
   128  
   129  func TestNodeLivenessInitialIncrement(t *testing.T) {
   130  	defer leaktest.AfterTest(t)()
   131  	mtc := &multiTestContext{}
   132  	defer mtc.Stop()
   133  	mtc.Start(t, 1)
   134  
   135  	// Verify liveness of all nodes for all nodes.
   136  	verifyLiveness(t, mtc)
   137  
   138  	liveness, err := mtc.nodeLivenesses[0].GetLiveness(mtc.gossips[0].NodeID.Get())
   139  	if err != nil {
   140  		t.Fatal(err)
   141  	}
   142  	if liveness.Epoch != 1 {
   143  		t.Errorf("expected epoch to be set to 1 initially; got %d", liveness.Epoch)
   144  	}
   145  
   146  	// Restart the node and verify the epoch is incremented with initial heartbeat.
   147  	mtc.stopStore(0)
   148  	mtc.restartStore(0)
   149  	verifyEpochIncremented(t, mtc, 0)
   150  }
   151  
   152  func verifyEpochIncremented(t *testing.T, mtc *multiTestContext, nodeIdx int) {
   153  	testutils.SucceedsSoon(t, func() error {
   154  		liveness, err := mtc.nodeLivenesses[nodeIdx].GetLiveness(mtc.gossips[nodeIdx].NodeID.Get())
   155  		if err != nil {
   156  			return err
   157  		}
   158  		if liveness.Epoch < 2 {
   159  			return errors.Errorf("expected epoch to be >=2 on restart but was %d", liveness.Epoch)
   160  		}
   161  		return nil
   162  	})
   163  
   164  }
   165  
   166  // TestNodeIsLiveCallback verifies that the liveness callback for a
   167  // node is invoked when it changes from state false to true.
   168  func TestNodeIsLiveCallback(t *testing.T) {
   169  	defer leaktest.AfterTest(t)()
   170  	mtc := &multiTestContext{}
   171  	defer mtc.Stop()
   172  	mtc.Start(t, 3)
   173  
   174  	// Verify liveness of all nodes for all nodes.
   175  	verifyLiveness(t, mtc)
   176  	pauseNodeLivenessHeartbeats(mtc, true)
   177  
   178  	var cbMu syncutil.Mutex
   179  	cbs := map[roachpb.NodeID]struct{}{}
   180  	mtc.nodeLivenesses[0].RegisterCallback(func(nodeID roachpb.NodeID) {
   181  		cbMu.Lock()
   182  		defer cbMu.Unlock()
   183  		cbs[nodeID] = struct{}{}
   184  	})
   185  
   186  	// Advance clock past the liveness threshold.
   187  	mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1)
   188  
   189  	// Trigger a manual heartbeat and verify callbacks for each node ID are invoked.
   190  	for _, nl := range mtc.nodeLivenesses {
   191  		l, err := nl.Self()
   192  		if err != nil {
   193  			t.Fatal(err)
   194  		}
   195  		if err := nl.Heartbeat(context.Background(), l); err != nil {
   196  			t.Fatal(err)
   197  		}
   198  	}
   199  
   200  	testutils.SucceedsSoon(t, func() error {
   201  		cbMu.Lock()
   202  		defer cbMu.Unlock()
   203  		for _, g := range mtc.gossips {
   204  			nodeID := g.NodeID.Get()
   205  			if _, ok := cbs[nodeID]; !ok {
   206  				return errors.Errorf("expected IsLive callback for node %d", nodeID)
   207  			}
   208  		}
   209  		return nil
   210  	})
   211  }
   212  
   213  // TestNodeHeartbeatCallback verifies that HeartbeatCallback is invoked whenever
   214  // this node updates its own liveness status.
   215  func TestNodeHeartbeatCallback(t *testing.T) {
   216  	defer leaktest.AfterTest(t)()
   217  	mtc := &multiTestContext{}
   218  	defer mtc.Stop()
   219  	mtc.Start(t, 3)
   220  
   221  	// Verify liveness of all nodes for all nodes.
   222  	verifyLiveness(t, mtc)
   223  	pauseNodeLivenessHeartbeats(mtc, true)
   224  
   225  	// Verify that last update time has been set for all nodes.
   226  	verifyUptimes := func() error {
   227  		expected := mtc.clock().Now()
   228  		for i, s := range mtc.stores {
   229  			uptm, err := s.ReadLastUpTimestamp(context.Background())
   230  			if err != nil {
   231  				return errors.Wrapf(err, "error reading last up time from store %d", i)
   232  			}
   233  			if a, e := uptm.WallTime, expected.WallTime; a != e {
   234  				return errors.Errorf("store %d last uptime = %d; wanted %d", i, a, e)
   235  			}
   236  		}
   237  		return nil
   238  	}
   239  
   240  	if err := verifyUptimes(); err != nil {
   241  		t.Fatal(err)
   242  	}
   243  
   244  	// Advance clock past the liveness threshold and force a manual heartbeat on
   245  	// all node liveness objects, which should update the last up time for each
   246  	// store.
   247  	mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1)
   248  	for _, nl := range mtc.nodeLivenesses {
   249  		l, err := nl.Self()
   250  		if err != nil {
   251  			t.Fatal(err)
   252  		}
   253  		if err := nl.Heartbeat(context.Background(), l); err != nil {
   254  			t.Fatal(err)
   255  		}
   256  	}
   257  	// NB: since the heartbeat callback is invoked synchronously in
   258  	// `Heartbeat()` which this goroutine invoked, we don't need to wrap this in
   259  	// a retry.
   260  	if err := verifyUptimes(); err != nil {
   261  		t.Fatal(err)
   262  	}
   263  }
   264  
   265  // TestNodeLivenessEpochIncrement verifies that incrementing the epoch
   266  // of a node requires the node to be considered not-live and that on
   267  // increment, no other nodes believe the epoch-incremented node to be
   268  // live.
   269  func TestNodeLivenessEpochIncrement(t *testing.T) {
   270  	defer leaktest.AfterTest(t)()
   271  	mtc := &multiTestContext{}
   272  	defer mtc.Stop()
   273  	mtc.Start(t, 2)
   274  
   275  	verifyLiveness(t, mtc)
   276  	pauseNodeLivenessHeartbeats(mtc, true)
   277  
   278  	// First try to increment the epoch of a known-live node.
   279  	deadNodeID := mtc.gossips[1].NodeID.Get()
   280  	oldLiveness, err := mtc.nodeLivenesses[0].GetLiveness(deadNodeID)
   281  	if err != nil {
   282  		t.Fatal(err)
   283  	}
   284  	if err := mtc.nodeLivenesses[0].IncrementEpoch(
   285  		context.Background(), oldLiveness); !testutils.IsError(err, "cannot increment epoch on live node") {
   286  		t.Fatalf("expected error incrementing a live node: %+v", err)
   287  	}
   288  
   289  	// Advance clock past liveness threshold & increment epoch.
   290  	mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1)
   291  	if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); err != nil {
   292  		t.Fatalf("unexpected error incrementing a non-live node: %+v", err)
   293  	}
   294  
   295  	// Verify that the epoch has been advanced.
   296  	testutils.SucceedsSoon(t, func() error {
   297  		newLiveness, err := mtc.nodeLivenesses[0].GetLiveness(deadNodeID)
   298  		if err != nil {
   299  			return err
   300  		}
   301  		if newLiveness.Epoch != oldLiveness.Epoch+1 {
   302  			return errors.Errorf("expected epoch to increment")
   303  		}
   304  		if newLiveness.Expiration != oldLiveness.Expiration {
   305  			return errors.Errorf("expected expiration to remain unchanged")
   306  		}
   307  		if live, err := mtc.nodeLivenesses[0].IsLive(deadNodeID); live || err != nil {
   308  			return errors.Errorf("expected dead node to remain dead after epoch increment %t: %v", live, err)
   309  		}
   310  		return nil
   311  	})
   312  
   313  	// Verify epoch increment metric count.
   314  	if c := mtc.nodeLivenesses[0].Metrics().EpochIncrements.Count(); c != 1 {
   315  		t.Errorf("expected epoch increment == 1; got %d", c)
   316  	}
   317  
   318  	// Verify error on incrementing an already-incremented epoch.
   319  	if err := mtc.nodeLivenesses[0].IncrementEpoch(context.Background(), oldLiveness); !errors.Is(err, kvserver.ErrEpochAlreadyIncremented) {
   320  		t.Fatalf("unexpected error incrementing a non-live node: %+v", err)
   321  	}
   322  
   323  	// Verify error incrementing with a too-high expectation for liveness epoch.
   324  	oldLiveness.Epoch = 3
   325  	if err := mtc.nodeLivenesses[0].IncrementEpoch(
   326  		context.Background(), oldLiveness); !testutils.IsError(err, "unexpected liveness epoch 2; expected >= 3") {
   327  		t.Fatalf("expected error incrementing with a too-high expected epoch: %+v", err)
   328  	}
   329  }
   330  
   331  // TestNodeLivenessRestart verifies that if nodes are shutdown and
   332  // restarted, the node liveness records are re-gossiped immediately.
   333  func TestNodeLivenessRestart(t *testing.T) {
   334  	defer leaktest.AfterTest(t)()
   335  	mtc := &multiTestContext{}
   336  	defer mtc.Stop()
   337  	mtc.Start(t, 2)
   338  
   339  	// After verifying node is in liveness table, stop store.
   340  	verifyLiveness(t, mtc)
   341  	mtc.stopStore(0)
   342  
   343  	// Clear the liveness records in store 1's gossip to make sure we're
   344  	// seeing the liveness record properly gossiped at store startup.
   345  	var expKeys []string
   346  	for _, g := range mtc.gossips {
   347  		key := gossip.MakeNodeLivenessKey(g.NodeID.Get())
   348  		expKeys = append(expKeys, key)
   349  		if err := g.AddInfoProto(key, &kvserverpb.Liveness{}, 0); err != nil {
   350  			t.Fatal(err)
   351  		}
   352  	}
   353  	sort.Strings(expKeys)
   354  
   355  	// Register a callback to gossip in order to verify liveness records
   356  	// are re-gossiped.
   357  	var keysMu struct {
   358  		syncutil.Mutex
   359  		keys []string
   360  	}
   361  	livenessRegex := gossip.MakePrefixPattern(gossip.KeyNodeLivenessPrefix)
   362  	mtc.gossips[0].RegisterCallback(livenessRegex, func(key string, _ roachpb.Value) {
   363  		keysMu.Lock()
   364  		defer keysMu.Unlock()
   365  		for _, k := range keysMu.keys {
   366  			if k == key {
   367  				return
   368  			}
   369  		}
   370  		keysMu.keys = append(keysMu.keys, key)
   371  	})
   372  
   373  	// Restart store and verify gossip contains liveness record for nodes 1&2.
   374  	mtc.restartStore(0)
   375  	testutils.SucceedsSoon(t, func() error {
   376  		keysMu.Lock()
   377  		defer keysMu.Unlock()
   378  		sort.Strings(keysMu.keys)
   379  		if !reflect.DeepEqual(keysMu.keys, expKeys) {
   380  			return errors.Errorf("expected keys %+v != keys %+v", expKeys, keysMu.keys)
   381  		}
   382  		return nil
   383  	})
   384  }
   385  
   386  // TestNodeLivenessSelf verifies that a node keeps its own most recent liveness
   387  // heartbeat info in preference to anything which might be received belatedly
   388  // through gossip.
   389  //
   390  // Note that this test originally injected a Gossip update with a higher Epoch
   391  // and semantics have since changed to make the "self" record less special. It
   392  // is updated like any other node's record, with appropriate safeguards against
   393  // clobbering in place.
   394  func TestNodeLivenessSelf(t *testing.T) {
   395  	defer leaktest.AfterTest(t)()
   396  	mtc := &multiTestContext{}
   397  	defer mtc.Stop()
   398  	mtc.Start(t, 1)
   399  	g := mtc.gossips[0]
   400  
   401  	pauseNodeLivenessHeartbeats(mtc, true)
   402  
   403  	// Verify liveness is properly initialized. This needs to be wrapped in a
   404  	// SucceedsSoon because node liveness gets initialized via an async gossip
   405  	// callback.
   406  	var liveness kvserverpb.Liveness
   407  	testutils.SucceedsSoon(t, func() error {
   408  		var err error
   409  		liveness, err = mtc.nodeLivenesses[0].GetLiveness(g.NodeID.Get())
   410  		return err
   411  	})
   412  	if err := mtc.nodeLivenesses[0].Heartbeat(context.Background(), liveness); err != nil {
   413  		t.Fatal(err)
   414  	}
   415  
   416  	// Gossip random nonsense for liveness and verify that asking for
   417  	// the node's own node ID returns the "correct" value.
   418  	key := gossip.MakeNodeLivenessKey(g.NodeID.Get())
   419  	var count int32
   420  	g.RegisterCallback(key, func(_ string, val roachpb.Value) {
   421  		atomic.AddInt32(&count, 1)
   422  	})
   423  	testutils.SucceedsSoon(t, func() error {
   424  		fakeBehindLiveness := liveness
   425  		fakeBehindLiveness.Epoch-- // almost certainly results in zero
   426  
   427  		if err := g.AddInfoProto(key, &fakeBehindLiveness, 0); err != nil {
   428  			t.Fatal(err)
   429  		}
   430  		if atomic.LoadInt32(&count) < 2 {
   431  			return errors.New("expected count >= 2")
   432  		}
   433  		return nil
   434  	})
   435  
   436  	// Self should not see the fake liveness, but have kept the real one.
   437  	l := mtc.nodeLivenesses[0]
   438  	lGet, err := l.GetLiveness(g.NodeID.Get())
   439  	if err != nil {
   440  		t.Fatal(err)
   441  	}
   442  	lSelf, err := l.Self()
   443  	if err != nil {
   444  		t.Fatal(err)
   445  	}
   446  	if !reflect.DeepEqual(lGet, lSelf) {
   447  		t.Errorf("expected GetLiveness() to return same value as Self(): %+v != %+v", lGet, lSelf)
   448  	}
   449  	if lGet.Epoch == 2 || lSelf.NodeID == 2 {
   450  		t.Errorf("expected GetLiveness() and Self() not to return artificially gossiped liveness: %+v, %+v", lGet, lSelf)
   451  	}
   452  }
   453  
   454  func TestNodeLivenessGetIsLiveMap(t *testing.T) {
   455  	defer leaktest.AfterTest(t)()
   456  	mtc := &multiTestContext{}
   457  	defer mtc.Stop()
   458  	mtc.Start(t, 3)
   459  
   460  	verifyLiveness(t, mtc)
   461  	pauseNodeLivenessHeartbeats(mtc, true)
   462  	lMap := mtc.nodeLivenesses[0].GetIsLiveMap()
   463  	expectedLMap := kvserver.IsLiveMap{
   464  		1: {IsLive: true, Epoch: 1},
   465  		2: {IsLive: true, Epoch: 1},
   466  		3: {IsLive: true, Epoch: 1},
   467  	}
   468  	if !reflect.DeepEqual(expectedLMap, lMap) {
   469  		t.Errorf("expected liveness map %+v; got %+v", expectedLMap, lMap)
   470  	}
   471  
   472  	// Advance the clock but only heartbeat node 0.
   473  	mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1)
   474  	liveness, _ := mtc.nodeLivenesses[0].GetLiveness(mtc.gossips[0].NodeID.Get())
   475  
   476  	testutils.SucceedsSoon(t, func() error {
   477  		if err := mtc.nodeLivenesses[0].Heartbeat(context.Background(), liveness); err != nil {
   478  			if errors.Is(err, kvserver.ErrEpochIncremented) {
   479  				return err
   480  			}
   481  			t.Fatal(err)
   482  		}
   483  		return nil
   484  	})
   485  
   486  	// Now verify only node 0 is live.
   487  	lMap = mtc.nodeLivenesses[0].GetIsLiveMap()
   488  	expectedLMap = kvserver.IsLiveMap{
   489  		1: {IsLive: true, Epoch: 1},
   490  		2: {IsLive: false, Epoch: 1},
   491  		3: {IsLive: false, Epoch: 1},
   492  	}
   493  	if !reflect.DeepEqual(expectedLMap, lMap) {
   494  		t.Errorf("expected liveness map %+v; got %+v", expectedLMap, lMap)
   495  	}
   496  }
   497  
   498  func TestNodeLivenessGetLivenesses(t *testing.T) {
   499  	defer leaktest.AfterTest(t)()
   500  	mtc := &multiTestContext{}
   501  	defer mtc.Stop()
   502  	mtc.Start(t, 3)
   503  
   504  	verifyLiveness(t, mtc)
   505  	pauseNodeLivenessHeartbeats(mtc, true)
   506  
   507  	livenesses := mtc.nodeLivenesses[0].GetLivenesses()
   508  	actualLMapNodes := make(map[roachpb.NodeID]struct{})
   509  	originalExpiration := mtc.clock().PhysicalNow() + mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds()
   510  	for _, l := range livenesses {
   511  		if a, e := l.Epoch, int64(1); a != e {
   512  			t.Errorf("liveness record had epoch %d, wanted %d", a, e)
   513  		}
   514  		if a, e := l.Expiration.WallTime, originalExpiration; a != e {
   515  			t.Errorf("liveness record had expiration %d, wanted %d", a, e)
   516  		}
   517  		actualLMapNodes[l.NodeID] = struct{}{}
   518  	}
   519  	expectedLMapNodes := map[roachpb.NodeID]struct{}{1: {}, 2: {}, 3: {}}
   520  	if !reflect.DeepEqual(actualLMapNodes, expectedLMapNodes) {
   521  		t.Errorf("got liveness map nodes %+v; wanted %+v", actualLMapNodes, expectedLMapNodes)
   522  	}
   523  
   524  	// Advance the clock but only heartbeat node 0.
   525  	mtc.manualClock.Increment(mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1)
   526  	liveness, _ := mtc.nodeLivenesses[0].GetLiveness(mtc.gossips[0].NodeID.Get())
   527  	if err := mtc.nodeLivenesses[0].Heartbeat(context.Background(), liveness); err != nil {
   528  		t.Fatal(err)
   529  	}
   530  
   531  	// Verify that node liveness receives the change.
   532  	livenesses = mtc.nodeLivenesses[0].GetLivenesses()
   533  	actualLMapNodes = make(map[roachpb.NodeID]struct{})
   534  	for _, l := range livenesses {
   535  		if a, e := l.Epoch, int64(1); a != e {
   536  			t.Errorf("liveness record had epoch %d, wanted %d", a, e)
   537  		}
   538  		expectedExpiration := originalExpiration
   539  		if l.NodeID == 1 {
   540  			expectedExpiration += mtc.nodeLivenesses[0].GetLivenessThreshold().Nanoseconds() + 1
   541  		}
   542  		if a, e := l.Expiration.WallTime, expectedExpiration; a != e {
   543  			t.Errorf("liveness record had expiration %d, wanted %d", a, e)
   544  		}
   545  		actualLMapNodes[l.NodeID] = struct{}{}
   546  	}
   547  	if !reflect.DeepEqual(actualLMapNodes, expectedLMapNodes) {
   548  		t.Errorf("got liveness map nodes %+v; wanted %+v", actualLMapNodes, expectedLMapNodes)
   549  	}
   550  }
   551  
   552  // TestNodeLivenessConcurrentHeartbeats verifies that concurrent attempts
   553  // to heartbeat all succeed.
   554  func TestNodeLivenessConcurrentHeartbeats(t *testing.T) {
   555  	defer leaktest.AfterTest(t)()
   556  	mtc := &multiTestContext{}
   557  	defer mtc.Stop()
   558  	mtc.Start(t, 1)
   559  
   560  	verifyLiveness(t, mtc)
   561  	pauseNodeLivenessHeartbeats(mtc, true)
   562  
   563  	const concurrency = 10
   564  
   565  	// Advance clock past the liveness threshold & concurrently heartbeat node.
   566  	nl := mtc.nodeLivenesses[0]
   567  	mtc.manualClock.Increment(nl.GetLivenessThreshold().Nanoseconds() + 1)
   568  	l, err := nl.Self()
   569  	if err != nil {
   570  		t.Fatal(err)
   571  	}
   572  	errCh := make(chan error, concurrency)
   573  	for i := 0; i < concurrency; i++ {
   574  		go func() {
   575  			errCh <- nl.Heartbeat(context.Background(), l)
   576  		}()
   577  	}
   578  	for i := 0; i < concurrency; i++ {
   579  		if err := <-errCh; err != nil {
   580  			t.Fatalf("concurrent heartbeat %d failed: %+v", i, err)
   581  		}
   582  	}
   583  }
   584  
   585  // TestNodeLivenessConcurrentIncrementEpochs verifies concurrent
   586  // attempts to increment liveness of another node all succeed.
   587  func TestNodeLivenessConcurrentIncrementEpochs(t *testing.T) {
   588  	defer leaktest.AfterTest(t)()
   589  	mtc := &multiTestContext{}
   590  	defer mtc.Stop()
   591  	mtc.Start(t, 2)
   592  
   593  	verifyLiveness(t, mtc)
   594  	pauseNodeLivenessHeartbeats(mtc, true)
   595  
   596  	const concurrency = 10
   597  
   598  	// Advance the clock and this time increment epoch concurrently for node 1.
   599  	nl := mtc.nodeLivenesses[0]
   600  	mtc.manualClock.Increment(nl.GetLivenessThreshold().Nanoseconds() + 1)
   601  	l, err := nl.GetLiveness(mtc.gossips[1].NodeID.Get())
   602  	if err != nil {
   603  		t.Fatal(err)
   604  	}
   605  	errCh := make(chan error, concurrency)
   606  	for i := 0; i < concurrency; i++ {
   607  		go func() {
   608  			errCh <- nl.IncrementEpoch(context.Background(), l)
   609  		}()
   610  	}
   611  	for i := 0; i < concurrency; i++ {
   612  		if err := <-errCh; err != nil && !errors.Is(err, kvserver.ErrEpochAlreadyIncremented) {
   613  			t.Fatalf("concurrent increment epoch %d failed: %+v", i, err)
   614  		}
   615  	}
   616  }
   617  
   618  // TestNodeLivenessSetDraining verifies that when draining, a node's liveness
   619  // record is updated and the node will not be present in the store list of other
   620  // nodes once they are aware of its draining state.
   621  func TestNodeLivenessSetDraining(t *testing.T) {
   622  	defer leaktest.AfterTest(t)()
   623  	mtc := &multiTestContext{}
   624  	defer mtc.Stop()
   625  	mtc.Start(t, 3)
   626  	mtc.initGossipNetwork()
   627  
   628  	verifyLiveness(t, mtc)
   629  
   630  	ctx := context.Background()
   631  	drainingNodeIdx := 0
   632  	drainingNodeID := mtc.gossips[drainingNodeIdx].NodeID.Get()
   633  
   634  	nodeIDAppearsInStoreList := func(id roachpb.NodeID, sl kvserver.StoreList) bool {
   635  		for _, store := range sl.Stores() {
   636  			if store.Node.NodeID == id {
   637  				return true
   638  			}
   639  		}
   640  		return false
   641  	}
   642  
   643  	// Verify success on failed update of a liveness record that already has the
   644  	// given draining setting.
   645  	if err := mtc.nodeLivenesses[drainingNodeIdx].SetDrainingInternal(ctx, kvserverpb.Liveness{}, false); err != nil {
   646  		t.Fatal(err)
   647  	}
   648  
   649  	mtc.nodeLivenesses[drainingNodeIdx].SetDraining(ctx, true /* drain */, nil /* reporter */)
   650  
   651  	// Draining node disappears from store lists.
   652  	{
   653  		const expectedLive = 2
   654  		// Executed in a retry loop to wait until the new liveness record has
   655  		// been gossiped to the rest of the cluster.
   656  		testutils.SucceedsSoon(t, func() error {
   657  			for i, sp := range mtc.storePools {
   658  				curNodeID := mtc.gossips[i].NodeID.Get()
   659  				sl, alive, _ := sp.GetStoreList()
   660  				if alive != expectedLive {
   661  					return errors.Errorf(
   662  						"expected %d live stores but got %d from node %d",
   663  						expectedLive,
   664  						alive,
   665  						curNodeID,
   666  					)
   667  				}
   668  				if nodeIDAppearsInStoreList(drainingNodeID, sl) {
   669  					return errors.Errorf(
   670  						"expected node %d not to appear in node %d's store list",
   671  						drainingNodeID,
   672  						curNodeID,
   673  					)
   674  				}
   675  			}
   676  			return nil
   677  		})
   678  	}
   679  
   680  	// Stop and restart the store to verify that a restarted server clears the
   681  	// draining field on the liveness record.
   682  	mtc.stopStore(drainingNodeIdx)
   683  	mtc.restartStore(drainingNodeIdx)
   684  
   685  	// Restarted node appears once again in the store list.
   686  	{
   687  		const expectedLive = 3
   688  		// Executed in a retry loop to wait until the new liveness record has
   689  		// been gossiped to the rest of the cluster.
   690  		testutils.SucceedsSoon(t, func() error {
   691  			for i, sp := range mtc.storePools {
   692  				curNodeID := mtc.gossips[i].NodeID.Get()
   693  				sl, alive, _ := sp.GetStoreList()
   694  				if alive != expectedLive {
   695  					return errors.Errorf(
   696  						"expected %d live stores but got %d from node %d",
   697  						expectedLive,
   698  						alive,
   699  						curNodeID,
   700  					)
   701  				}
   702  				if !nodeIDAppearsInStoreList(drainingNodeID, sl) {
   703  					return errors.Errorf(
   704  						"expected node %d to appear in node %d's store list: %+v",
   705  						drainingNodeID,
   706  						curNodeID,
   707  						sl.Stores(),
   708  					)
   709  				}
   710  			}
   711  			return nil
   712  		})
   713  	}
   714  }
   715  
   716  func TestNodeLivenessRetryAmbiguousResultError(t *testing.T) {
   717  	defer leaktest.AfterTest(t)()
   718  
   719  	var injectError atomic.Value
   720  	var injectedErrorCount int32
   721  
   722  	injectError.Store(true)
   723  	storeCfg := kvserver.TestStoreConfig(nil)
   724  	storeCfg.TestingKnobs.EvalKnobs.TestingEvalFilter = func(args kvserverbase.FilterArgs) *roachpb.Error {
   725  		if _, ok := args.Req.(*roachpb.ConditionalPutRequest); !ok {
   726  			return nil
   727  		}
   728  		if val := injectError.Load(); val != nil && val.(bool) {
   729  			atomic.AddInt32(&injectedErrorCount, 1)
   730  			injectError.Store(false)
   731  			return roachpb.NewError(roachpb.NewAmbiguousResultError("test"))
   732  		}
   733  		return nil
   734  	}
   735  	mtc := &multiTestContext{
   736  		storeConfig: &storeCfg,
   737  	}
   738  	mtc.Start(t, 1)
   739  	defer mtc.Stop()
   740  
   741  	// Verify retry of the ambiguous result for heartbeat loop.
   742  	verifyLiveness(t, mtc)
   743  
   744  	nl := mtc.nodeLivenesses[0]
   745  	l, err := nl.Self()
   746  	if err != nil {
   747  		t.Fatal(err)
   748  	}
   749  
   750  	// And again on manual heartbeat.
   751  	injectError.Store(true)
   752  	if err := nl.Heartbeat(context.Background(), l); err != nil {
   753  		t.Fatal(err)
   754  	}
   755  	if count := atomic.LoadInt32(&injectedErrorCount); count != 2 {
   756  		t.Errorf("expected injected error count of 2; got %d", count)
   757  	}
   758  }
   759  
   760  func verifyNodeIsDecommissioning(t *testing.T, mtc *multiTestContext, nodeID roachpb.NodeID) {
   761  	testutils.SucceedsSoon(t, func() error {
   762  		for _, nl := range mtc.nodeLivenesses {
   763  			livenesses := nl.GetLivenesses()
   764  			for _, liveness := range livenesses {
   765  				if liveness.Decommissioning != (liveness.NodeID == nodeID) {
   766  					return errors.Errorf("unexpected Decommissioning value of %v for node %v", liveness.Decommissioning, liveness.NodeID)
   767  				}
   768  			}
   769  		}
   770  		return nil
   771  	})
   772  }
   773  
   774  func TestNodeLivenessStatusMap(t *testing.T) {
   775  	defer leaktest.AfterTest(t)()
   776  	if testing.Short() {
   777  		t.Skip("short")
   778  	}
   779  
   780  	serverArgs := base.TestServerArgs{
   781  		Knobs: base.TestingKnobs{
   782  			Store: &kvserver.StoreTestingKnobs{
   783  				// Disable replica rebalancing to ensure that the liveness range
   784  				// does not get out of the first node (we'll be shutting down nodes).
   785  				DisableReplicaRebalancing: true,
   786  				// Disable LBS because when the scan is happening at the rate it's happening
   787  				// below, it's possible that one of the system ranges trigger a split.
   788  				DisableLoadBasedSplitting: true,
   789  			},
   790  		},
   791  		RaftConfig: base.RaftConfig{
   792  			// Make everything tick faster to ensure dead nodes are
   793  			// recognized dead faster.
   794  			RaftTickInterval: 100 * time.Millisecond,
   795  		},
   796  		// Scan like a bat out of hell to ensure replication and replica GC
   797  		// happen in a timely manner.
   798  		ScanInterval: 50 * time.Millisecond,
   799  	}
   800  	tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{
   801  		ServerArgs: serverArgs,
   802  		// Disable full replication otherwise StartTestCluster with just 1
   803  		// node will wait forever.
   804  		ReplicationMode: base.ReplicationManual,
   805  	})
   806  	ctx := context.Background()
   807  	defer tc.Stopper().Stop(ctx)
   808  
   809  	ctx = logtags.AddTag(ctx, "in test", nil)
   810  
   811  	log.Infof(ctx, "setting zone config to disable replication")
   812  	// Allow for inserting zone configs without having to go through (or
   813  	// duplicate the logic from) the CLI.
   814  	config.TestingSetupZoneConfigHook(tc.Stopper())
   815  	zoneConfig := zonepb.DefaultZoneConfig()
   816  	// Force just one replica per range to ensure that we can shut down
   817  	// nodes without endangering the liveness range.
   818  	zoneConfig.NumReplicas = proto.Int32(1)
   819  	config.TestingSetZoneConfig(keys.MetaRangesID, zoneConfig)
   820  
   821  	log.Infof(ctx, "starting 3 more nodes")
   822  	tc.AddServer(t, serverArgs)
   823  	tc.AddServer(t, serverArgs)
   824  	tc.AddServer(t, serverArgs)
   825  
   826  	log.Infof(ctx, "waiting for node statuses")
   827  	tc.WaitForNodeStatuses(t)
   828  	tc.WaitForNodeLiveness(t)
   829  	log.Infof(ctx, "waiting done")
   830  
   831  	firstServer := tc.Server(0).(*server.TestServer)
   832  
   833  	liveNodeID := firstServer.NodeID()
   834  
   835  	deadNodeID := tc.Server(1).NodeID()
   836  	log.Infof(ctx, "shutting down node %d", deadNodeID)
   837  	tc.StopServer(1)
   838  	log.Infof(ctx, "done shutting down node %d", deadNodeID)
   839  
   840  	decommissioningNodeID := tc.Server(2).NodeID()
   841  	log.Infof(ctx, "decommissioning node %d", decommissioningNodeID)
   842  	if err := firstServer.Decommission(ctx, true, []roachpb.NodeID{decommissioningNodeID}); err != nil {
   843  		t.Fatal(err)
   844  	}
   845  	log.Infof(ctx, "done decommissioning node %d", decommissioningNodeID)
   846  
   847  	removedNodeID := tc.Server(3).NodeID()
   848  	log.Infof(ctx, "decommissioning and shutting down node %d", removedNodeID)
   849  	if err := firstServer.Decommission(ctx, true, []roachpb.NodeID{removedNodeID}); err != nil {
   850  		t.Fatal(err)
   851  	}
   852  	tc.StopServer(3)
   853  	log.Infof(ctx, "done removing node %d", removedNodeID)
   854  
   855  	log.Infof(ctx, "checking status map")
   856  
   857  	// See what comes up in the status.
   858  
   859  	cc, err := tc.Server(0).RPCContext().GRPCDialNode(
   860  		firstServer.RPCAddr(), firstServer.NodeID(), rpc.DefaultClass).Connect(ctx)
   861  	require.NoError(t, err)
   862  	admin := serverpb.NewAdminClient(cc)
   863  
   864  	type testCase struct {
   865  		nodeID         roachpb.NodeID
   866  		expectedStatus kvserverpb.NodeLivenessStatus
   867  	}
   868  
   869  	// Below we're going to check that all statuses converge and stabilize
   870  	// to a known situation.
   871  	testData := []testCase{
   872  		{liveNodeID, kvserverpb.NodeLivenessStatus_LIVE},
   873  		{deadNodeID, kvserverpb.NodeLivenessStatus_DEAD},
   874  		{decommissioningNodeID, kvserverpb.NodeLivenessStatus_DECOMMISSIONING},
   875  		{removedNodeID, kvserverpb.NodeLivenessStatus_DECOMMISSIONED},
   876  	}
   877  
   878  	for _, test := range testData {
   879  		t.Run(fmt.Sprintf("n%d->%s", test.nodeID, test.expectedStatus), func(t *testing.T) {
   880  			nodeID, expectedStatus := test.nodeID, test.expectedStatus
   881  
   882  			testutils.SucceedsSoon(t, func() error {
   883  				// Ensure that dead nodes are quickly recognized as dead by
   884  				// gossip. Overriding cluster settings is generally a really bad
   885  				// idea as they are also populated via Gossip and so our update
   886  				// is possibly going to be wiped out. But going through SQL
   887  				// doesn't allow durations below 1m15s, which is much too long
   888  				// for a test.
   889  				// We do this in every SucceedsSoon attempt, so we'll be good.
   890  				kvserver.TimeUntilStoreDead.Override(&firstServer.ClusterSettings().SV,
   891  					kvserver.TestTimeUntilStoreDead)
   892  
   893  				log.Infof(ctx, "checking expected status (%s) for node %d", expectedStatus, nodeID)
   894  				resp, err := admin.Liveness(ctx, &serverpb.LivenessRequest{})
   895  				require.NoError(t, err)
   896  				nodeStatuses := resp.Statuses
   897  
   898  				st, ok := nodeStatuses[nodeID]
   899  				if !ok {
   900  					return errors.Errorf("node %d: not in statuses\n", nodeID)
   901  				}
   902  				if st != expectedStatus {
   903  					return errors.Errorf("node %d: unexpected status: got %s, expected %s\n",
   904  						nodeID, st, expectedStatus,
   905  					)
   906  				}
   907  				return nil
   908  			})
   909  		})
   910  	}
   911  }
   912  
   913  func testNodeLivenessSetDecommissioning(t *testing.T, decommissionNodeIdx int) {
   914  	mtc := &multiTestContext{}
   915  	defer mtc.Stop()
   916  	mtc.Start(t, 3)
   917  	mtc.initGossipNetwork()
   918  
   919  	verifyLiveness(t, mtc)
   920  
   921  	ctx := context.Background()
   922  	callerNodeLiveness := mtc.nodeLivenesses[0]
   923  	nodeID := mtc.gossips[decommissionNodeIdx].NodeID.Get()
   924  
   925  	// Verify success on failed update of a liveness record that already has the
   926  	// given decommissioning setting.
   927  	if _, err := callerNodeLiveness.SetDecommissioningInternal(ctx, nodeID, kvserverpb.Liveness{}, false); err != nil {
   928  		t.Fatal(err)
   929  	}
   930  
   931  	// Set a node to decommissioning state.
   932  	if _, err := callerNodeLiveness.SetDecommissioning(ctx, nodeID, true); err != nil {
   933  		t.Fatal(err)
   934  	}
   935  	verifyNodeIsDecommissioning(t, mtc, nodeID)
   936  
   937  	// Stop and restart the store to verify that a restarted server retains the
   938  	// decommissioning field on the liveness record.
   939  	mtc.stopStore(decommissionNodeIdx)
   940  	mtc.restartStore(decommissionNodeIdx)
   941  
   942  	// Wait until store has restarted and published a new heartbeat to ensure not
   943  	// looking at pre-restart state. Want to be sure test fails if node wiped the
   944  	// decommission flag.
   945  	verifyEpochIncremented(t, mtc, decommissionNodeIdx)
   946  	verifyNodeIsDecommissioning(t, mtc, nodeID)
   947  }
   948  
   949  // TestNodeLivenessSetDecommissioning verifies that when decommissioning, a
   950  // node's liveness record is updated and remains after restart.
   951  func TestNodeLivenessSetDecommissioning(t *testing.T) {
   952  	defer leaktest.AfterTest(t)()
   953  	// Sets itself to decommissioning.
   954  	testNodeLivenessSetDecommissioning(t, 0)
   955  	// Set another node to decommissioning.
   956  	testNodeLivenessSetDecommissioning(t, 1)
   957  }
   958  
   959  // TestNodeLivenessDecommissionAbsent exercises a scenario in which a node is
   960  // asked to decommission another node whose liveness record is not gossiped any
   961  // more.
   962  //
   963  // See (*NodeLiveness).SetDecommissioning for details.
   964  func TestNodeLivenessDecommissionAbsent(t *testing.T) {
   965  	defer leaktest.AfterTest(t)()
   966  
   967  	mtc := &multiTestContext{}
   968  	defer mtc.Stop()
   969  	mtc.Start(t, 3)
   970  	mtc.initGossipNetwork()
   971  
   972  	verifyLiveness(t, mtc)
   973  
   974  	ctx := context.Background()
   975  	const goneNodeID = roachpb.NodeID(10000)
   976  
   977  	// When the node simply never existed, expect an error.
   978  	if _, err := mtc.nodeLivenesses[0].SetDecommissioning(
   979  		ctx, goneNodeID, true,
   980  	); !errors.Is(err, kvserver.ErrNoLivenessRecord) {
   981  		t.Fatal(err)
   982  	}
   983  
   984  	// Pretend the node was once there but isn't gossiped anywhere.
   985  	if err := mtc.dbs[0].CPut(ctx, keys.NodeLivenessKey(goneNodeID), &kvserverpb.Liveness{
   986  		NodeID:     goneNodeID,
   987  		Epoch:      1,
   988  		Expiration: hlc.LegacyTimestamp(mtc.clock().Now()),
   989  	}, nil); err != nil {
   990  		t.Fatal(err)
   991  	}
   992  
   993  	// Decommission from second node.
   994  	if committed, err := mtc.nodeLivenesses[1].SetDecommissioning(ctx, goneNodeID, true); err != nil {
   995  		t.Fatal(err)
   996  	} else if !committed {
   997  		t.Fatal("no change committed")
   998  	}
   999  	// Re-decommission from first node.
  1000  	if committed, err := mtc.nodeLivenesses[0].SetDecommissioning(ctx, goneNodeID, true); err != nil {
  1001  		t.Fatal(err)
  1002  	} else if committed {
  1003  		t.Fatal("spurious change committed")
  1004  	}
  1005  	// Recommission from first node.
  1006  	if committed, err := mtc.nodeLivenesses[0].SetDecommissioning(ctx, goneNodeID, false); err != nil {
  1007  		t.Fatal(err)
  1008  	} else if !committed {
  1009  		t.Fatal("no change committed")
  1010  	}
  1011  	// Decommission from second node (a second time).
  1012  	if committed, err := mtc.nodeLivenesses[1].SetDecommissioning(ctx, goneNodeID, true); err != nil {
  1013  		t.Fatal(err)
  1014  	} else if !committed {
  1015  		t.Fatal("no change committed")
  1016  	}
  1017  	// Recommission from third node.
  1018  	if committed, err := mtc.nodeLivenesses[2].SetDecommissioning(ctx, goneNodeID, false); err != nil {
  1019  		t.Fatal(err)
  1020  	} else if !committed {
  1021  		t.Fatal("no change committed")
  1022  	}
  1023  }