github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/store_pool_test.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math/rand"
    17  	"reflect"
    18  	"sort"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    24  	"github.com/cockroachdb/cockroach/pkg/gossip"
    25  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    26  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    27  	"github.com/cockroachdb/cockroach/pkg/rpc"
    28  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    29  	"github.com/cockroachdb/cockroach/pkg/storage"
    30  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    31  	"github.com/cockroachdb/cockroach/pkg/testutils/gossiputil"
    32  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    33  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    34  	"github.com/cockroachdb/cockroach/pkg/util/log"
    35  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    36  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    37  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    38  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    39  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    40  	"github.com/cockroachdb/errors"
    41  	"github.com/kr/pretty"
    42  )
    43  
    44  var uniqueStore = []*roachpb.StoreDescriptor{
    45  	{
    46  		StoreID: 2,
    47  		Attrs:   roachpb.Attributes{Attrs: []string{"ssd"}},
    48  		Node: roachpb.NodeDescriptor{
    49  			NodeID: 2,
    50  			Attrs:  roachpb.Attributes{Attrs: []string{"a"}},
    51  		},
    52  		Capacity: roachpb.StoreCapacity{
    53  			Capacity:  100,
    54  			Available: 200,
    55  		},
    56  	},
    57  }
    58  
    59  type mockNodeLiveness struct {
    60  	syncutil.Mutex
    61  	defaultNodeStatus kvserverpb.NodeLivenessStatus
    62  	nodes             map[roachpb.NodeID]kvserverpb.NodeLivenessStatus
    63  }
    64  
    65  func newMockNodeLiveness(defaultNodeStatus kvserverpb.NodeLivenessStatus) *mockNodeLiveness {
    66  	return &mockNodeLiveness{
    67  		defaultNodeStatus: defaultNodeStatus,
    68  		nodes:             map[roachpb.NodeID]kvserverpb.NodeLivenessStatus{},
    69  	}
    70  }
    71  
    72  func (m *mockNodeLiveness) setNodeStatus(
    73  	nodeID roachpb.NodeID, status kvserverpb.NodeLivenessStatus,
    74  ) {
    75  	m.Lock()
    76  	defer m.Unlock()
    77  	m.nodes[nodeID] = status
    78  }
    79  
    80  func (m *mockNodeLiveness) nodeLivenessFunc(
    81  	nodeID roachpb.NodeID, now time.Time, threshold time.Duration,
    82  ) kvserverpb.NodeLivenessStatus {
    83  	m.Lock()
    84  	defer m.Unlock()
    85  	if status, ok := m.nodes[nodeID]; ok {
    86  		return status
    87  	}
    88  	return m.defaultNodeStatus
    89  }
    90  
    91  // createTestStorePool creates a stopper, gossip and storePool for use in
    92  // tests. Stopper must be stopped by the caller.
    93  func createTestStorePool(
    94  	timeUntilStoreDeadValue time.Duration,
    95  	deterministic bool,
    96  	nodeCount NodeCountFunc,
    97  	defaultNodeStatus kvserverpb.NodeLivenessStatus,
    98  ) (*stop.Stopper, *gossip.Gossip, *hlc.ManualClock, *StorePool, *mockNodeLiveness) {
    99  	stopper := stop.NewStopper()
   100  	mc := hlc.NewManualClock(123)
   101  	clock := hlc.NewClock(mc.UnixNano, time.Nanosecond)
   102  	st := cluster.MakeTestingClusterSettings()
   103  	rpcContext := rpc.NewContext(
   104  		log.AmbientContext{Tracer: st.Tracer}, &base.Config{Insecure: true}, clock, stopper, st)
   105  	server := rpc.NewServer(rpcContext) // never started
   106  	g := gossip.NewTest(1, rpcContext, server, stopper, metric.NewRegistry(), zonepb.DefaultZoneConfigRef())
   107  	mnl := newMockNodeLiveness(defaultNodeStatus)
   108  
   109  	TimeUntilStoreDead.Override(&st.SV, timeUntilStoreDeadValue)
   110  	storePool := NewStorePool(
   111  		log.AmbientContext{Tracer: st.Tracer},
   112  		st,
   113  		g,
   114  		clock,
   115  		nodeCount,
   116  		mnl.nodeLivenessFunc,
   117  		deterministic,
   118  	)
   119  	return stopper, g, mc, storePool, mnl
   120  }
   121  
   122  // TestStorePoolGossipUpdate ensures that the gossip callback in StorePool
   123  // correctly updates a store's details.
   124  func TestStorePoolGossipUpdate(t *testing.T) {
   125  	defer leaktest.AfterTest(t)()
   126  	stopper, g, _, sp, _ := createTestStorePool(
   127  		TestTimeUntilStoreDead, false, /* deterministic */
   128  		func() int { return 0 }, /* NodeCount */
   129  		kvserverpb.NodeLivenessStatus_DEAD)
   130  	defer stopper.Stop(context.Background())
   131  	sg := gossiputil.NewStoreGossiper(g)
   132  
   133  	sp.detailsMu.RLock()
   134  	if _, ok := sp.detailsMu.storeDetails[2]; ok {
   135  		t.Fatalf("store 2 is already in the pool's store list")
   136  	}
   137  	sp.detailsMu.RUnlock()
   138  
   139  	sg.GossipStores(uniqueStore, t)
   140  
   141  	sp.detailsMu.RLock()
   142  	if _, ok := sp.detailsMu.storeDetails[2]; !ok {
   143  		t.Fatalf("store 2 isn't in the pool's store list")
   144  	}
   145  	sp.detailsMu.RUnlock()
   146  }
   147  
   148  // verifyStoreList ensures that the returned list of stores is correct.
   149  func verifyStoreList(
   150  	sp *StorePool,
   151  	constraints []zonepb.ConstraintsConjunction,
   152  	storeIDs roachpb.StoreIDSlice, // optional
   153  	filter storeFilter,
   154  	expected []int,
   155  	expectedAliveStoreCount int,
   156  	expectedThrottledStoreCount int,
   157  ) error {
   158  	var sl StoreList
   159  	var aliveStoreCount int
   160  	var throttled throttledStoreReasons
   161  	if storeIDs == nil {
   162  		sl, aliveStoreCount, throttled = sp.getStoreList(filter)
   163  	} else {
   164  		sl, aliveStoreCount, throttled = sp.getStoreListFromIDs(storeIDs, filter)
   165  	}
   166  	throttledStoreCount := len(throttled)
   167  	sl = sl.filter(constraints)
   168  	if aliveStoreCount != expectedAliveStoreCount {
   169  		return errors.Errorf("expected AliveStoreCount %d does not match actual %d",
   170  			expectedAliveStoreCount, aliveStoreCount)
   171  	}
   172  	if throttledStoreCount != expectedThrottledStoreCount {
   173  		return errors.Errorf("expected ThrottledStoreCount %d does not match actual %d",
   174  			expectedThrottledStoreCount, throttledStoreCount)
   175  	}
   176  	var actual []int
   177  	for _, store := range sl.stores {
   178  		actual = append(actual, int(store.StoreID))
   179  	}
   180  	sort.Ints(expected)
   181  	sort.Ints(actual)
   182  	if !reflect.DeepEqual(expected, actual) {
   183  		return errors.Errorf("expected %+v stores, actual %+v", expected, actual)
   184  	}
   185  	return nil
   186  }
   187  
   188  // TestStorePoolGetStoreList ensures that the store list returns only stores
   189  // that are live and match the attribute criteria.
   190  func TestStorePoolGetStoreList(t *testing.T) {
   191  	defer leaktest.AfterTest(t)()
   192  	// We're going to manually mark stores dead in this test.
   193  	stopper, g, _, sp, mnl := createTestStorePool(
   194  		TestTimeUntilStoreDead, false, /* deterministic */
   195  		func() int { return 10 }, /* nodeCount */
   196  		kvserverpb.NodeLivenessStatus_DEAD)
   197  	defer stopper.Stop(context.Background())
   198  	sg := gossiputil.NewStoreGossiper(g)
   199  	constraints := []zonepb.ConstraintsConjunction{
   200  		{
   201  			Constraints: []zonepb.Constraint{
   202  				{Type: zonepb.Constraint_REQUIRED, Value: "ssd"},
   203  				{Type: zonepb.Constraint_REQUIRED, Value: "dc"},
   204  			},
   205  		},
   206  	}
   207  	required := []string{"ssd", "dc"}
   208  	// Nothing yet.
   209  	sl, _, _ := sp.getStoreList(storeFilterNone)
   210  	sl = sl.filter(constraints)
   211  	if len(sl.stores) != 0 {
   212  		t.Errorf("expected no stores, instead %+v", sl.stores)
   213  	}
   214  
   215  	matchingStore := roachpb.StoreDescriptor{
   216  		StoreID: 1,
   217  		Node:    roachpb.NodeDescriptor{NodeID: 1},
   218  		Attrs:   roachpb.Attributes{Attrs: required},
   219  	}
   220  	supersetStore := roachpb.StoreDescriptor{
   221  		StoreID: 2,
   222  		Node:    roachpb.NodeDescriptor{NodeID: 2},
   223  		Attrs:   roachpb.Attributes{Attrs: append(required, "db")},
   224  	}
   225  	unmatchingStore := roachpb.StoreDescriptor{
   226  		StoreID: 3,
   227  		Node:    roachpb.NodeDescriptor{NodeID: 3},
   228  		Attrs:   roachpb.Attributes{Attrs: []string{"ssd", "otherdc"}},
   229  	}
   230  	emptyStore := roachpb.StoreDescriptor{
   231  		StoreID: 4,
   232  		Node:    roachpb.NodeDescriptor{NodeID: 4},
   233  		Attrs:   roachpb.Attributes{},
   234  	}
   235  	deadStore := roachpb.StoreDescriptor{
   236  		StoreID: 5,
   237  		Node:    roachpb.NodeDescriptor{NodeID: 5},
   238  		Attrs:   roachpb.Attributes{Attrs: required},
   239  	}
   240  	declinedStore := roachpb.StoreDescriptor{
   241  		StoreID: 6,
   242  		Node:    roachpb.NodeDescriptor{NodeID: 6},
   243  		Attrs:   roachpb.Attributes{Attrs: required},
   244  	}
   245  	absentStore := roachpb.StoreDescriptor{
   246  		StoreID: 7,
   247  		Node:    roachpb.NodeDescriptor{NodeID: 7},
   248  		Attrs:   roachpb.Attributes{Attrs: required},
   249  	}
   250  
   251  	// Gossip and mark all alive initially.
   252  	sg.GossipStores([]*roachpb.StoreDescriptor{
   253  		&matchingStore,
   254  		&supersetStore,
   255  		&unmatchingStore,
   256  		&emptyStore,
   257  		&deadStore,
   258  		&declinedStore,
   259  		// absentStore is purposefully not gossiped.
   260  	}, t)
   261  	for i := 1; i <= 7; i++ {
   262  		mnl.setNodeStatus(roachpb.NodeID(i), kvserverpb.NodeLivenessStatus_LIVE)
   263  	}
   264  
   265  	// Set deadStore as dead.
   266  	mnl.setNodeStatus(deadStore.Node.NodeID, kvserverpb.NodeLivenessStatus_DEAD)
   267  	sp.detailsMu.Lock()
   268  	// Set declinedStore as throttled.
   269  	sp.detailsMu.storeDetails[declinedStore.StoreID].throttledUntil = sp.clock.Now().GoTime().Add(time.Hour)
   270  	sp.detailsMu.Unlock()
   271  
   272  	// No filter or limited set of store IDs.
   273  	if err := verifyStoreList(
   274  		sp,
   275  		constraints,
   276  		nil, /* storeIDs */
   277  		storeFilterNone,
   278  		[]int{
   279  			int(matchingStore.StoreID),
   280  			int(supersetStore.StoreID),
   281  			int(declinedStore.StoreID),
   282  		},
   283  		/* expectedAliveStoreCount */ 5,
   284  		/* expectedThrottledStoreCount */ 1,
   285  	); err != nil {
   286  		t.Error(err)
   287  	}
   288  
   289  	// Filter out throttled stores but don't limit the set of store IDs.
   290  	if err := verifyStoreList(
   291  		sp,
   292  		constraints,
   293  		nil, /* storeIDs */
   294  		storeFilterThrottled,
   295  		[]int{
   296  			int(matchingStore.StoreID),
   297  			int(supersetStore.StoreID),
   298  		},
   299  		/* expectedAliveStoreCount */ 5,
   300  		/* expectedThrottledStoreCount */ 1,
   301  	); err != nil {
   302  		t.Error(err)
   303  	}
   304  
   305  	limitToStoreIDs := roachpb.StoreIDSlice{
   306  		matchingStore.StoreID,
   307  		declinedStore.StoreID,
   308  		absentStore.StoreID,
   309  	}
   310  
   311  	// No filter but limited to limitToStoreIDs.
   312  	// Note that supersetStore is not included.
   313  	if err := verifyStoreList(
   314  		sp,
   315  		constraints,
   316  		limitToStoreIDs,
   317  		storeFilterNone,
   318  		[]int{
   319  			int(matchingStore.StoreID),
   320  			int(declinedStore.StoreID),
   321  		},
   322  		/* expectedAliveStoreCount */ 2,
   323  		/* expectedThrottledStoreCount */ 1,
   324  	); err != nil {
   325  		t.Error(err)
   326  	}
   327  
   328  	// Filter out throttled stores and limit to limitToStoreIDs.
   329  	// Note that supersetStore is not included.
   330  	if err := verifyStoreList(
   331  		sp,
   332  		constraints,
   333  		limitToStoreIDs,
   334  		storeFilterThrottled,
   335  		[]int{
   336  			int(matchingStore.StoreID),
   337  		},
   338  		/* expectedAliveStoreCount */ 2,
   339  		/* expectedThrottledStoreCount */ 1,
   340  	); err != nil {
   341  		t.Error(err)
   342  	}
   343  }
   344  
   345  // TestStoreListFilter ensures that the store list constraint filtering works
   346  // properly.
   347  func TestStoreListFilter(t *testing.T) {
   348  	defer leaktest.AfterTest(t)()
   349  
   350  	constraints := []zonepb.ConstraintsConjunction{
   351  		{
   352  			Constraints: []zonepb.Constraint{
   353  				{Type: zonepb.Constraint_REQUIRED, Key: "region", Value: "us-west"},
   354  				{Type: zonepb.Constraint_REQUIRED, Value: "MustMatch"},
   355  				{Type: zonepb.Constraint_DEPRECATED_POSITIVE, Value: "MatchingOptional"},
   356  				{Type: zonepb.Constraint_PROHIBITED, Value: "MustNotMatch"},
   357  			},
   358  		},
   359  	}
   360  
   361  	stores := []struct {
   362  		attributes []string
   363  		locality   []roachpb.Tier
   364  		expected   bool
   365  	}{
   366  		{
   367  			expected: false,
   368  		},
   369  		{
   370  			attributes: []string{"MustMatch"},
   371  			expected:   false,
   372  		},
   373  		{
   374  			locality: []roachpb.Tier{{Key: "region", Value: "us-west"}},
   375  			expected: false,
   376  		},
   377  		{
   378  			attributes: []string{"MustMatch"},
   379  			locality:   []roachpb.Tier{{Key: "region", Value: "us-west"}},
   380  			expected:   true,
   381  		},
   382  		{
   383  			attributes: []string{"a", "MustMatch"},
   384  			locality:   []roachpb.Tier{{Key: "a", Value: "b"}, {Key: "region", Value: "us-west"}},
   385  			expected:   true,
   386  		},
   387  		{
   388  			attributes: []string{"a", "b", "MustMatch", "c"},
   389  			locality:   []roachpb.Tier{{Key: "region", Value: "us-west"}, {Key: "c", Value: "d"}},
   390  			expected:   true,
   391  		},
   392  		{
   393  			attributes: []string{"MustMatch", "MustNotMatch"},
   394  			locality:   []roachpb.Tier{{Key: "region", Value: "us-west"}},
   395  			expected:   false,
   396  		},
   397  		{
   398  			attributes: []string{"MustMatch"},
   399  			locality:   []roachpb.Tier{{Key: "region", Value: "us-west"}, {Key: "MustNotMatch", Value: "b"}},
   400  			expected:   true,
   401  		},
   402  		{
   403  			attributes: []string{"MustMatch"},
   404  			locality:   []roachpb.Tier{{Key: "region", Value: "us-west"}, {Key: "a", Value: "MustNotMatch"}},
   405  			expected:   true,
   406  		},
   407  	}
   408  
   409  	var sl StoreList
   410  	var expected []roachpb.StoreDescriptor
   411  	for i, s := range stores {
   412  		storeDesc := roachpb.StoreDescriptor{
   413  			StoreID: roachpb.StoreID(i + 1),
   414  			Node: roachpb.NodeDescriptor{
   415  				Locality: roachpb.Locality{
   416  					Tiers: s.locality,
   417  				},
   418  			},
   419  		}
   420  		// Randomly stick the attributes in either the node or the store to get
   421  		// code coverage of both locations.
   422  		if rand.Intn(2) == 0 {
   423  			storeDesc.Attrs.Attrs = s.attributes
   424  		} else {
   425  			storeDesc.Node.Attrs.Attrs = s.attributes
   426  		}
   427  		sl.stores = append(sl.stores, storeDesc)
   428  		if s.expected {
   429  			expected = append(expected, storeDesc)
   430  		}
   431  	}
   432  
   433  	filtered := sl.filter(constraints)
   434  	if !reflect.DeepEqual(expected, filtered.stores) {
   435  		t.Errorf("did not get expected stores %s", pretty.Diff(expected, filtered.stores))
   436  	}
   437  }
   438  
   439  func TestStorePoolUpdateLocalStore(t *testing.T) {
   440  	defer leaktest.AfterTest(t)()
   441  	manual := hlc.NewManualClock(123)
   442  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
   443  	// We're going to manually mark stores dead in this test.
   444  	stopper, g, _, sp, _ := createTestStorePool(
   445  		TestTimeUntilStoreDead, false, /* deterministic */
   446  		func() int { return 10 }, /* nodeCount */
   447  		kvserverpb.NodeLivenessStatus_DEAD)
   448  	defer stopper.Stop(context.Background())
   449  	sg := gossiputil.NewStoreGossiper(g)
   450  	stores := []*roachpb.StoreDescriptor{
   451  		{
   452  			StoreID: 1,
   453  			Node:    roachpb.NodeDescriptor{NodeID: 1},
   454  			Capacity: roachpb.StoreCapacity{
   455  				Capacity:         100,
   456  				Available:        50,
   457  				RangeCount:       5,
   458  				LeaseCount:       1,
   459  				LogicalBytes:     30,
   460  				QueriesPerSecond: 100,
   461  				WritesPerSecond:  30,
   462  			},
   463  		},
   464  		{
   465  			StoreID: 2,
   466  			Node:    roachpb.NodeDescriptor{NodeID: 2},
   467  			Capacity: roachpb.StoreCapacity{
   468  				Capacity:         100,
   469  				Available:        55,
   470  				RangeCount:       4,
   471  				LeaseCount:       2,
   472  				LogicalBytes:     25,
   473  				QueriesPerSecond: 50,
   474  				WritesPerSecond:  25,
   475  			},
   476  		},
   477  	}
   478  	sg.GossipStores(stores, t)
   479  
   480  	replica := &Replica{RangeID: 1}
   481  	replica.mu.Lock()
   482  	replica.mu.state.Stats = &enginepb.MVCCStats{
   483  		KeyBytes: 2,
   484  		ValBytes: 4,
   485  	}
   486  	replica.mu.Unlock()
   487  	rs := newReplicaStats(clock, nil)
   488  	for _, store := range stores {
   489  		rs.record(store.Node.NodeID)
   490  	}
   491  	manual.Increment(int64(MinStatsDuration + time.Second))
   492  	replica.leaseholderStats = rs
   493  	replica.writeStats = rs
   494  
   495  	rangeUsageInfo := rangeUsageInfoForRepl(replica)
   496  
   497  	sp.updateLocalStoreAfterRebalance(roachpb.StoreID(1), rangeUsageInfo, roachpb.ADD_REPLICA)
   498  	desc, ok := sp.getStoreDescriptor(roachpb.StoreID(1))
   499  	if !ok {
   500  		t.Fatalf("couldn't find StoreDescriptor for Store ID %d", 1)
   501  	}
   502  	QPS, _ := replica.leaseholderStats.avgQPS()
   503  	WPS, _ := replica.writeStats.avgQPS()
   504  	if expectedRangeCount := int32(6); desc.Capacity.RangeCount != expectedRangeCount {
   505  		t.Errorf("expected RangeCount %d, but got %d", expectedRangeCount, desc.Capacity.RangeCount)
   506  	}
   507  	if expectedBytes := int64(36); desc.Capacity.LogicalBytes != expectedBytes {
   508  		t.Errorf("expected logical bytes %d, but got %d", expectedBytes, desc.Capacity.LogicalBytes)
   509  	}
   510  	if expectedQPS := float64(100); desc.Capacity.QueriesPerSecond != expectedQPS {
   511  		t.Errorf("expected QueriesPerSecond %f, but got %f", expectedQPS, desc.Capacity.QueriesPerSecond)
   512  	}
   513  	if expectedWPS := 30 + WPS; desc.Capacity.WritesPerSecond != expectedWPS {
   514  		t.Errorf("expected WritesPerSecond %f, but got %f", expectedWPS, desc.Capacity.WritesPerSecond)
   515  	}
   516  
   517  	sp.updateLocalStoreAfterRebalance(roachpb.StoreID(2), rangeUsageInfo, roachpb.REMOVE_REPLICA)
   518  	desc, ok = sp.getStoreDescriptor(roachpb.StoreID(2))
   519  	if !ok {
   520  		t.Fatalf("couldn't find StoreDescriptor for Store ID %d", 2)
   521  	}
   522  	if expectedRangeCount := int32(3); desc.Capacity.RangeCount != expectedRangeCount {
   523  		t.Errorf("expected RangeCount %d, but got %d", expectedRangeCount, desc.Capacity.RangeCount)
   524  	}
   525  	if expectedBytes := int64(19); desc.Capacity.LogicalBytes != expectedBytes {
   526  		t.Errorf("expected logical bytes %d, but got %d", expectedBytes, desc.Capacity.LogicalBytes)
   527  	}
   528  	if expectedQPS := float64(50); desc.Capacity.QueriesPerSecond != expectedQPS {
   529  		t.Errorf("expected QueriesPerSecond %f, but got %f", expectedQPS, desc.Capacity.QueriesPerSecond)
   530  	}
   531  	if expectedWPS := 25 - WPS; desc.Capacity.WritesPerSecond != expectedWPS {
   532  		t.Errorf("expected WritesPerSecond %f, but got %f", expectedWPS, desc.Capacity.WritesPerSecond)
   533  	}
   534  
   535  	sp.updateLocalStoresAfterLeaseTransfer(roachpb.StoreID(1), roachpb.StoreID(2), rangeUsageInfo.QueriesPerSecond)
   536  	desc, ok = sp.getStoreDescriptor(roachpb.StoreID(1))
   537  	if !ok {
   538  		t.Fatalf("couldn't find StoreDescriptor for Store ID %d", 1)
   539  	}
   540  	if expectedLeaseCount := int32(0); desc.Capacity.LeaseCount != expectedLeaseCount {
   541  		t.Errorf("expected LeaseCount %d, but got %d", expectedLeaseCount, desc.Capacity.LeaseCount)
   542  	}
   543  	if expectedQPS := 100 - QPS; desc.Capacity.QueriesPerSecond != expectedQPS {
   544  		t.Errorf("expected QueriesPerSecond %f, but got %f", expectedQPS, desc.Capacity.QueriesPerSecond)
   545  	}
   546  	desc, ok = sp.getStoreDescriptor(roachpb.StoreID(2))
   547  	if !ok {
   548  		t.Fatalf("couldn't find StoreDescriptor for Store ID %d", 2)
   549  	}
   550  	if expectedLeaseCount := int32(3); desc.Capacity.LeaseCount != expectedLeaseCount {
   551  		t.Errorf("expected LeaseCount %d, but got %d", expectedLeaseCount, desc.Capacity.LeaseCount)
   552  	}
   553  	if expectedQPS := 50 + QPS; desc.Capacity.QueriesPerSecond != expectedQPS {
   554  		t.Errorf("expected QueriesPerSecond %f, but got %f", expectedQPS, desc.Capacity.QueriesPerSecond)
   555  	}
   556  }
   557  
   558  // TestStorePoolUpdateLocalStoreBeforeGossip verifies that an attempt to update
   559  // the local copy of store before that store has been gossiped will be a no-op.
   560  func TestStorePoolUpdateLocalStoreBeforeGossip(t *testing.T) {
   561  	defer leaktest.AfterTest(t)()
   562  	ctx := context.Background()
   563  	manual := hlc.NewManualClock(123)
   564  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
   565  	stopper, _, _, sp, _ := createTestStorePool(
   566  		TestTimeUntilStoreDead, false, /* deterministic */
   567  		func() int { return 10 }, /* nodeCount */
   568  		kvserverpb.NodeLivenessStatus_DEAD)
   569  	defer stopper.Stop(ctx)
   570  
   571  	// Create store.
   572  	node := roachpb.NodeDescriptor{NodeID: roachpb.NodeID(1)}
   573  	eng := storage.NewDefaultInMem()
   574  	stopper.AddCloser(eng)
   575  	cfg := TestStoreConfig(clock)
   576  	cfg.Transport = NewDummyRaftTransport(cfg.Settings)
   577  	store := NewStore(ctx, cfg, eng, &node)
   578  	// Fake an ident because this test doesn't want to start the store
   579  	// but without an Ident there will be NPEs.
   580  	store.Ident = &roachpb.StoreIdent{
   581  		ClusterID: uuid.Nil,
   582  		StoreID:   1,
   583  		NodeID:    1,
   584  	}
   585  
   586  	// Create replica.
   587  	rg := roachpb.RangeDescriptor{
   588  		RangeID:       1,
   589  		StartKey:      roachpb.RKey([]byte("a")),
   590  		EndKey:        roachpb.RKey([]byte("b")),
   591  		NextReplicaID: 1,
   592  	}
   593  	rg.AddReplica(1, 1, roachpb.VOTER_FULL)
   594  	replica, err := newReplica(ctx, &rg, store, 1)
   595  	if err != nil {
   596  		t.Fatalf("make replica error : %+v", err)
   597  	}
   598  	replica.leaseholderStats = newReplicaStats(store.Clock(), nil)
   599  
   600  	rangeUsageInfo := rangeUsageInfoForRepl(replica)
   601  
   602  	// Update StorePool, which should be a no-op.
   603  	storeID := roachpb.StoreID(1)
   604  	if _, ok := sp.getStoreDescriptor(storeID); ok {
   605  		t.Fatalf("StoreDescriptor not gossiped, should not be found")
   606  	}
   607  	sp.updateLocalStoreAfterRebalance(storeID, rangeUsageInfo, roachpb.ADD_REPLICA)
   608  	if _, ok := sp.getStoreDescriptor(storeID); ok {
   609  		t.Fatalf("StoreDescriptor still not gossiped, should not be found")
   610  	}
   611  }
   612  
   613  func TestStorePoolGetStoreDetails(t *testing.T) {
   614  	defer leaktest.AfterTest(t)()
   615  	stopper, g, _, sp, _ := createTestStorePool(
   616  		TestTimeUntilStoreDead, false, /* deterministic */
   617  		func() int { return 10 }, /* nodeCount */
   618  		kvserverpb.NodeLivenessStatus_DEAD)
   619  	defer stopper.Stop(context.Background())
   620  	sg := gossiputil.NewStoreGossiper(g)
   621  	sg.GossipStores(uniqueStore, t)
   622  
   623  	sp.detailsMu.Lock()
   624  	defer sp.detailsMu.Unlock()
   625  	if detail := sp.getStoreDetailLocked(roachpb.StoreID(1)); detail.desc != nil {
   626  		t.Errorf("unexpected fetched store ID 1: %+v", detail.desc)
   627  	}
   628  	if detail := sp.getStoreDetailLocked(roachpb.StoreID(2)); detail.desc == nil {
   629  		t.Errorf("failed to fetch store ID 2")
   630  	}
   631  }
   632  
   633  func TestStorePoolFindDeadReplicas(t *testing.T) {
   634  	defer leaktest.AfterTest(t)()
   635  	stopper, g, _, sp, mnl := createTestStorePool(
   636  		TestTimeUntilStoreDead, false, /* deterministic */
   637  		func() int { return 10 }, /* nodeCount */
   638  		kvserverpb.NodeLivenessStatus_DEAD)
   639  	defer stopper.Stop(context.Background())
   640  	sg := gossiputil.NewStoreGossiper(g)
   641  
   642  	stores := []*roachpb.StoreDescriptor{
   643  		{
   644  			StoreID: 1,
   645  			Node:    roachpb.NodeDescriptor{NodeID: 1},
   646  		},
   647  		{
   648  			StoreID: 2,
   649  			Node:    roachpb.NodeDescriptor{NodeID: 2},
   650  		},
   651  		{
   652  			StoreID: 3,
   653  			Node:    roachpb.NodeDescriptor{NodeID: 3},
   654  		},
   655  		{
   656  			StoreID: 4,
   657  			Node:    roachpb.NodeDescriptor{NodeID: 4},
   658  		},
   659  		{
   660  			StoreID: 5,
   661  			Node:    roachpb.NodeDescriptor{NodeID: 5},
   662  		},
   663  	}
   664  
   665  	replicas := []roachpb.ReplicaDescriptor{
   666  		{
   667  			NodeID:    1,
   668  			StoreID:   1,
   669  			ReplicaID: 1,
   670  		},
   671  		{
   672  			NodeID:    2,
   673  			StoreID:   2,
   674  			ReplicaID: 2,
   675  		},
   676  		{
   677  			NodeID:    3,
   678  			StoreID:   3,
   679  			ReplicaID: 4,
   680  		},
   681  		{
   682  			NodeID:    4,
   683  			StoreID:   4,
   684  			ReplicaID: 4,
   685  		},
   686  		{
   687  			NodeID:    5,
   688  			StoreID:   5,
   689  			ReplicaID: 5,
   690  		},
   691  	}
   692  
   693  	sg.GossipStores(stores, t)
   694  	for i := 1; i <= 5; i++ {
   695  		mnl.setNodeStatus(roachpb.NodeID(i), kvserverpb.NodeLivenessStatus_LIVE)
   696  	}
   697  
   698  	liveReplicas, deadReplicas := sp.liveAndDeadReplicas(replicas)
   699  	if len(liveReplicas) != 5 {
   700  		t.Fatalf("expected five live replicas, found %d (%v)", len(liveReplicas), liveReplicas)
   701  	}
   702  	if len(deadReplicas) > 0 {
   703  		t.Fatalf("expected no dead replicas initially, found %d (%v)", len(deadReplicas), deadReplicas)
   704  	}
   705  	// Mark nodes 4 & 5 as dead.
   706  	mnl.setNodeStatus(4, kvserverpb.NodeLivenessStatus_DEAD)
   707  	mnl.setNodeStatus(5, kvserverpb.NodeLivenessStatus_DEAD)
   708  
   709  	liveReplicas, deadReplicas = sp.liveAndDeadReplicas(replicas)
   710  	if a, e := liveReplicas, replicas[:3]; !reflect.DeepEqual(a, e) {
   711  		t.Fatalf("expected live replicas %+v; got %+v", e, a)
   712  	}
   713  	if a, e := deadReplicas, replicas[3:]; !reflect.DeepEqual(a, e) {
   714  		t.Fatalf("expected dead replicas %+v; got %+v", e, a)
   715  	}
   716  
   717  	// Mark node 4 as merely unavailable.
   718  	mnl.setNodeStatus(4, kvserverpb.NodeLivenessStatus_UNAVAILABLE)
   719  
   720  	liveReplicas, deadReplicas = sp.liveAndDeadReplicas(replicas)
   721  	if a, e := liveReplicas, replicas[:3]; !reflect.DeepEqual(a, e) {
   722  		t.Fatalf("expected live replicas %+v; got %+v", e, a)
   723  	}
   724  	if a, e := deadReplicas, replicas[4:]; !reflect.DeepEqual(a, e) {
   725  		t.Fatalf("expected dead replicas %+v; got %+v", e, a)
   726  	}
   727  }
   728  
   729  // TestStorePoolDefaultState verifies that the default state of a
   730  // store is neither alive nor dead. This is a regression test for a
   731  // bug in which a call to deadReplicas involving an unknown store
   732  // would have the side effect of marking that store as alive and
   733  // eligible for return by getStoreList. It is therefore significant
   734  // that the two methods are tested in the same test, and in this
   735  // order.
   736  func TestStorePoolDefaultState(t *testing.T) {
   737  	defer leaktest.AfterTest(t)()
   738  	stopper, _, _, sp, _ := createTestStorePool(
   739  		TestTimeUntilStoreDead, false, /* deterministic */
   740  		func() int { return 10 }, /* nodeCount */
   741  		kvserverpb.NodeLivenessStatus_DEAD)
   742  	defer stopper.Stop(context.Background())
   743  
   744  	liveReplicas, deadReplicas := sp.liveAndDeadReplicas([]roachpb.ReplicaDescriptor{{StoreID: 1}})
   745  	if len(liveReplicas) != 0 || len(deadReplicas) != 0 {
   746  		t.Errorf("expected 0 live and 0 dead replicas; got %v and %v", liveReplicas, deadReplicas)
   747  	}
   748  
   749  	sl, alive, throttled := sp.getStoreList(storeFilterNone)
   750  	if len(sl.stores) > 0 {
   751  		t.Errorf("expected no live stores; got list of %v", sl)
   752  	}
   753  	if alive != 0 {
   754  		t.Errorf("expected no live stores; got a live count of %d", alive)
   755  	}
   756  	if len(throttled) != 0 {
   757  		t.Errorf("expected no live stores; got throttled %v", throttled)
   758  	}
   759  }
   760  
   761  func TestStorePoolThrottle(t *testing.T) {
   762  	defer leaktest.AfterTest(t)()
   763  	stopper, g, _, sp, _ := createTestStorePool(
   764  		TestTimeUntilStoreDead, false, /* deterministic */
   765  		func() int { return 10 }, /* nodeCount */
   766  		kvserverpb.NodeLivenessStatus_DEAD)
   767  	defer stopper.Stop(context.Background())
   768  
   769  	sg := gossiputil.NewStoreGossiper(g)
   770  	sg.GossipStores(uniqueStore, t)
   771  
   772  	{
   773  		expected := sp.clock.Now().GoTime().Add(DeclinedReservationsTimeout.Get(&sp.st.SV))
   774  		sp.throttle(throttleDeclined, "", 1)
   775  
   776  		sp.detailsMu.Lock()
   777  		detail := sp.getStoreDetailLocked(1)
   778  		sp.detailsMu.Unlock()
   779  		if !detail.throttledUntil.Equal(expected) {
   780  			t.Errorf("expected store to have been throttled to %v, found %v",
   781  				expected, detail.throttledUntil)
   782  		}
   783  	}
   784  
   785  	{
   786  		expected := sp.clock.Now().GoTime().Add(FailedReservationsTimeout.Get(&sp.st.SV))
   787  		sp.throttle(throttleFailed, "", 1)
   788  
   789  		sp.detailsMu.Lock()
   790  		detail := sp.getStoreDetailLocked(1)
   791  		sp.detailsMu.Unlock()
   792  		if !detail.throttledUntil.Equal(expected) {
   793  			t.Errorf("expected store to have been throttled to %v, found %v",
   794  				expected, detail.throttledUntil)
   795  		}
   796  	}
   797  }
   798  
   799  func TestGetLocalities(t *testing.T) {
   800  	defer leaktest.AfterTest(t)()
   801  	stopper, g, _, sp, _ := createTestStorePool(
   802  		TestTimeUntilStoreDead, false, /* deterministic */
   803  		func() int { return 10 }, /* nodeCount */
   804  		kvserverpb.NodeLivenessStatus_DEAD)
   805  	defer stopper.Stop(context.Background())
   806  	sg := gossiputil.NewStoreGossiper(g)
   807  
   808  	// Creates a node with a locality with the number of tiers passed in. The
   809  	// NodeID is the same as the tier count.
   810  	createLocality := func(tierCount int) roachpb.Locality {
   811  		var locality roachpb.Locality
   812  		for i := 1; i <= tierCount; i++ {
   813  			value := fmt.Sprintf("%d", i)
   814  			locality.Tiers = append(locality.Tiers, roachpb.Tier{
   815  				Key:   value,
   816  				Value: value,
   817  			})
   818  		}
   819  		return locality
   820  	}
   821  	createDescWithLocality := func(tierCount int) roachpb.NodeDescriptor {
   822  		return roachpb.NodeDescriptor{
   823  			NodeID:   roachpb.NodeID(tierCount),
   824  			Locality: createLocality(tierCount),
   825  		}
   826  	}
   827  
   828  	stores := []*roachpb.StoreDescriptor{
   829  		{
   830  			StoreID: 1,
   831  			Node:    createDescWithLocality(1),
   832  		},
   833  		{
   834  			StoreID: 2,
   835  			Node:    createDescWithLocality(2),
   836  		},
   837  		{
   838  			StoreID: 3,
   839  			Node:    createDescWithLocality(3),
   840  		},
   841  		{
   842  			StoreID: 4,
   843  			Node:    createDescWithLocality(2),
   844  		},
   845  	}
   846  
   847  	sg.GossipStores(stores, t)
   848  
   849  	var existingReplicas []roachpb.ReplicaDescriptor
   850  	for _, store := range stores {
   851  		existingReplicas = append(existingReplicas, roachpb.ReplicaDescriptor{NodeID: store.Node.NodeID})
   852  	}
   853  
   854  	localities := sp.getLocalities(existingReplicas)
   855  	for _, store := range stores {
   856  		nodeID := store.Node.NodeID
   857  		locality, ok := localities[nodeID]
   858  		if !ok {
   859  			t.Fatalf("could not find locality for node %d", nodeID)
   860  		}
   861  		if e, a := int(nodeID), len(locality.Tiers); e != a {
   862  			t.Fatalf("for node %d, expected %d tiers, only got %d", nodeID, e, a)
   863  		}
   864  		if e, a := createLocality(int(nodeID)).String(), sp.getNodeLocalityString(nodeID); e != a {
   865  			t.Fatalf("for getNodeLocalityString(%d), expected %q, got %q", nodeID, e, a)
   866  		}
   867  	}
   868  }
   869  
   870  func TestStorePoolDecommissioningReplicas(t *testing.T) {
   871  	defer leaktest.AfterTest(t)()
   872  	stopper, g, _, sp, mnl := createTestStorePool(
   873  		TestTimeUntilStoreDead, false, /* deterministic */
   874  		func() int { return 10 }, /* nodeCount */
   875  		kvserverpb.NodeLivenessStatus_DEAD)
   876  	defer stopper.Stop(context.Background())
   877  	sg := gossiputil.NewStoreGossiper(g)
   878  
   879  	stores := []*roachpb.StoreDescriptor{
   880  		{
   881  			StoreID: 1,
   882  			Node:    roachpb.NodeDescriptor{NodeID: 1},
   883  		},
   884  		{
   885  			StoreID: 2,
   886  			Node:    roachpb.NodeDescriptor{NodeID: 2},
   887  		},
   888  		{
   889  			StoreID: 3,
   890  			Node:    roachpb.NodeDescriptor{NodeID: 3},
   891  		},
   892  		{
   893  			StoreID: 4,
   894  			Node:    roachpb.NodeDescriptor{NodeID: 4},
   895  		},
   896  		{
   897  			StoreID: 5,
   898  			Node:    roachpb.NodeDescriptor{NodeID: 5},
   899  		},
   900  	}
   901  
   902  	replicas := []roachpb.ReplicaDescriptor{
   903  		{
   904  			NodeID:    1,
   905  			StoreID:   1,
   906  			ReplicaID: 1,
   907  		},
   908  		{
   909  			NodeID:    2,
   910  			StoreID:   2,
   911  			ReplicaID: 2,
   912  		},
   913  		{
   914  			NodeID:    3,
   915  			StoreID:   3,
   916  			ReplicaID: 4,
   917  		},
   918  		{
   919  			NodeID:    4,
   920  			StoreID:   4,
   921  			ReplicaID: 4,
   922  		},
   923  		{
   924  			NodeID:    5,
   925  			StoreID:   5,
   926  			ReplicaID: 5,
   927  		},
   928  	}
   929  
   930  	sg.GossipStores(stores, t)
   931  	for i := 1; i <= 5; i++ {
   932  		mnl.setNodeStatus(roachpb.NodeID(i), kvserverpb.NodeLivenessStatus_LIVE)
   933  	}
   934  
   935  	liveReplicas, deadReplicas := sp.liveAndDeadReplicas(replicas)
   936  	if len(liveReplicas) != 5 {
   937  		t.Fatalf("expected five live replicas, found %d (%v)", len(liveReplicas), liveReplicas)
   938  	}
   939  	if len(deadReplicas) > 0 {
   940  		t.Fatalf("expected no dead replicas initially, found %d (%v)", len(deadReplicas), deadReplicas)
   941  	}
   942  	// Mark node 4 as decommissioning.
   943  	mnl.setNodeStatus(4, kvserverpb.NodeLivenessStatus_DECOMMISSIONING)
   944  	// Mark node 5 as dead.
   945  	mnl.setNodeStatus(5, kvserverpb.NodeLivenessStatus_DEAD)
   946  
   947  	liveReplicas, deadReplicas = sp.liveAndDeadReplicas(replicas)
   948  	// Decommissioning replicas are considered live.
   949  	if a, e := liveReplicas, replicas[:4]; !reflect.DeepEqual(a, e) {
   950  		t.Fatalf("expected live replicas %+v; got %+v", e, a)
   951  	}
   952  	if a, e := deadReplicas, replicas[4:]; !reflect.DeepEqual(a, e) {
   953  		t.Fatalf("expected dead replicas %+v; got %+v", e, a)
   954  	}
   955  
   956  	decommissioningReplicas := sp.decommissioningReplicas(replicas)
   957  	if a, e := decommissioningReplicas, replicas[3:4]; !reflect.DeepEqual(a, e) {
   958  		t.Fatalf("expected decommissioning replicas %+v; got %+v", e, a)
   959  	}
   960  }
   961  
   962  func TestNodeLivenessLivenessStatus(t *testing.T) {
   963  	defer leaktest.AfterTest(t)()
   964  	now := timeutil.Now()
   965  	threshold := 5 * time.Minute
   966  
   967  	for _, tc := range []struct {
   968  		liveness kvserverpb.Liveness
   969  		expected kvserverpb.NodeLivenessStatus
   970  	}{
   971  		// Valid status.
   972  		{
   973  			liveness: kvserverpb.Liveness{
   974  				NodeID: 1,
   975  				Epoch:  1,
   976  				Expiration: hlc.LegacyTimestamp{
   977  					WallTime: now.Add(5 * time.Minute).UnixNano(),
   978  				},
   979  				Decommissioning: false,
   980  				Draining:        false,
   981  			},
   982  			expected: kvserverpb.NodeLivenessStatus_LIVE,
   983  		},
   984  		{
   985  			liveness: kvserverpb.Liveness{
   986  				NodeID: 1,
   987  				Epoch:  1,
   988  				Expiration: hlc.LegacyTimestamp{
   989  					// Expires just slightly in the future.
   990  					WallTime: now.UnixNano() + 1,
   991  				},
   992  				Decommissioning: false,
   993  				Draining:        false,
   994  			},
   995  			expected: kvserverpb.NodeLivenessStatus_LIVE,
   996  		},
   997  		// Expired status.
   998  		{
   999  			liveness: kvserverpb.Liveness{
  1000  				NodeID: 1,
  1001  				Epoch:  1,
  1002  				Expiration: hlc.LegacyTimestamp{
  1003  					// Just expired.
  1004  					WallTime: now.UnixNano(),
  1005  				},
  1006  				Decommissioning: false,
  1007  				Draining:        false,
  1008  			},
  1009  			expected: kvserverpb.NodeLivenessStatus_UNAVAILABLE,
  1010  		},
  1011  		// Expired status.
  1012  		{
  1013  			liveness: kvserverpb.Liveness{
  1014  				NodeID: 1,
  1015  				Epoch:  1,
  1016  				Expiration: hlc.LegacyTimestamp{
  1017  					WallTime: now.UnixNano(),
  1018  				},
  1019  				Decommissioning: false,
  1020  				Draining:        false,
  1021  			},
  1022  			expected: kvserverpb.NodeLivenessStatus_UNAVAILABLE,
  1023  		},
  1024  		// Max bound of expired.
  1025  		{
  1026  			liveness: kvserverpb.Liveness{
  1027  				NodeID: 1,
  1028  				Epoch:  1,
  1029  				Expiration: hlc.LegacyTimestamp{
  1030  					WallTime: now.Add(-threshold).UnixNano() + 1,
  1031  				},
  1032  				Decommissioning: false,
  1033  				Draining:        false,
  1034  			},
  1035  			expected: kvserverpb.NodeLivenessStatus_UNAVAILABLE,
  1036  		},
  1037  		// Dead status.
  1038  		{
  1039  			liveness: kvserverpb.Liveness{
  1040  				NodeID: 1,
  1041  				Epoch:  1,
  1042  				Expiration: hlc.LegacyTimestamp{
  1043  					WallTime: now.Add(-threshold).UnixNano(),
  1044  				},
  1045  				Decommissioning: false,
  1046  				Draining:        false,
  1047  			},
  1048  			expected: kvserverpb.NodeLivenessStatus_DEAD,
  1049  		},
  1050  		// Decommissioning.
  1051  		{
  1052  			liveness: kvserverpb.Liveness{
  1053  				NodeID: 1,
  1054  				Epoch:  1,
  1055  				Expiration: hlc.LegacyTimestamp{
  1056  					WallTime: now.Add(time.Second).UnixNano(),
  1057  				},
  1058  				Decommissioning: true,
  1059  				Draining:        false,
  1060  			},
  1061  			expected: kvserverpb.NodeLivenessStatus_DECOMMISSIONING,
  1062  		},
  1063  		// Decommissioned.
  1064  		{
  1065  			liveness: kvserverpb.Liveness{
  1066  				NodeID: 1,
  1067  				Epoch:  1,
  1068  				Expiration: hlc.LegacyTimestamp{
  1069  					WallTime: now.Add(-threshold).UnixNano(),
  1070  				},
  1071  				Decommissioning: true,
  1072  				Draining:        false,
  1073  			},
  1074  			expected: kvserverpb.NodeLivenessStatus_DECOMMISSIONED,
  1075  		},
  1076  		// Draining (reports as unavailable).
  1077  		{
  1078  			liveness: kvserverpb.Liveness{
  1079  				NodeID: 1,
  1080  				Epoch:  1,
  1081  				Expiration: hlc.LegacyTimestamp{
  1082  					WallTime: now.Add(5 * time.Minute).UnixNano(),
  1083  				},
  1084  				Decommissioning: false,
  1085  				Draining:        true,
  1086  			},
  1087  			expected: kvserverpb.NodeLivenessStatus_UNAVAILABLE,
  1088  		},
  1089  	} {
  1090  		t.Run("", func(t *testing.T) {
  1091  			if a, e := LivenessStatus(tc.liveness, now, threshold), tc.expected; a != e {
  1092  				t.Errorf("liveness status was %s, wanted %s", a.String(), e.String())
  1093  			}
  1094  		})
  1095  	}
  1096  }