github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_stats_test.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"math"
    15  	"reflect"
    16  	"testing"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    21  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    22  	"github.com/kr/pretty"
    23  )
    24  
    25  func floatsEqual(x, y float64) bool {
    26  	diff := math.Abs(y - x)
    27  	return diff < 0.00000001
    28  }
    29  
    30  func floatMapsEqual(expected, actual map[string]float64) bool {
    31  	if len(expected) != len(actual) {
    32  		return false
    33  	}
    34  	for k, v1 := range expected {
    35  		v2, ok := actual[k]
    36  		if !ok {
    37  			return false
    38  		}
    39  		if !floatsEqual(v1, v2) {
    40  			return false
    41  		}
    42  	}
    43  	return true
    44  }
    45  
    46  func TestReplicaStats(t *testing.T) {
    47  	defer leaktest.AfterTest(t)()
    48  
    49  	manual := hlc.NewManualClock(123)
    50  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
    51  
    52  	gceLocalities := map[roachpb.NodeID]string{
    53  		1: "region=us-east1,zone=us-east1-a",
    54  		2: "region=us-east1,zone=us-east1-b",
    55  		3: "region=us-west1,zone=us-west1-a",
    56  		4: "",
    57  	}
    58  	mismatchedLocalities := map[roachpb.NodeID]string{
    59  		1: "region=us-east1,zone=a",
    60  		2: "region=us-east1,zone=b",
    61  		3: "region=us-west1,zone=a",
    62  		4: "zone=us-central1-a",
    63  	}
    64  	missingLocalities := map[roachpb.NodeID]string{}
    65  
    66  	testCases := []struct {
    67  		localities map[roachpb.NodeID]string
    68  		reqs       []roachpb.NodeID
    69  		expected   perLocalityCounts
    70  	}{
    71  		{
    72  			localities: gceLocalities,
    73  			reqs:       []roachpb.NodeID{},
    74  			expected:   perLocalityCounts{},
    75  		},
    76  		{
    77  			localities: gceLocalities,
    78  			reqs:       []roachpb.NodeID{1, 1, 1},
    79  			expected: perLocalityCounts{
    80  				"region=us-east1,zone=us-east1-a": 3,
    81  			},
    82  		},
    83  		{
    84  			localities: gceLocalities,
    85  			reqs:       []roachpb.NodeID{1, 2, 3},
    86  			expected: perLocalityCounts{
    87  				"region=us-east1,zone=us-east1-a": 1,
    88  				"region=us-east1,zone=us-east1-b": 1,
    89  				"region=us-west1,zone=us-west1-a": 1,
    90  			},
    91  		},
    92  		{
    93  			localities: gceLocalities,
    94  			reqs:       []roachpb.NodeID{4, 5, 6},
    95  			expected: perLocalityCounts{
    96  				"": 3,
    97  			},
    98  		},
    99  		{
   100  			localities: gceLocalities,
   101  			reqs:       []roachpb.NodeID{1, 4, 2, 5, 3, 6},
   102  			expected: perLocalityCounts{
   103  				"region=us-east1,zone=us-east1-a": 1,
   104  				"region=us-east1,zone=us-east1-b": 1,
   105  				"region=us-west1,zone=us-west1-a": 1,
   106  				"":                                3,
   107  			},
   108  		},
   109  		{
   110  			localities: mismatchedLocalities,
   111  			reqs:       []roachpb.NodeID{},
   112  			expected:   perLocalityCounts{},
   113  		},
   114  		{
   115  			localities: mismatchedLocalities,
   116  			reqs:       []roachpb.NodeID{1, 1, 1},
   117  			expected: perLocalityCounts{
   118  				"region=us-east1,zone=a": 3,
   119  			},
   120  		},
   121  		{
   122  			localities: mismatchedLocalities,
   123  			reqs:       []roachpb.NodeID{1, 2, 3, 4},
   124  			expected: perLocalityCounts{
   125  				"region=us-east1,zone=a": 1,
   126  				"region=us-east1,zone=b": 1,
   127  				"region=us-west1,zone=a": 1,
   128  				"zone=us-central1-a":     1,
   129  			},
   130  		},
   131  		{
   132  			localities: mismatchedLocalities,
   133  			reqs:       []roachpb.NodeID{4, 5, 6},
   134  			expected: perLocalityCounts{
   135  				"zone=us-central1-a": 1,
   136  				"":                   2,
   137  			},
   138  		},
   139  		{
   140  			localities: mismatchedLocalities,
   141  			reqs:       []roachpb.NodeID{1, 4, 2, 5, 3, 6},
   142  			expected: perLocalityCounts{
   143  				"region=us-east1,zone=a": 1,
   144  				"region=us-east1,zone=b": 1,
   145  				"region=us-west1,zone=a": 1,
   146  				"zone=us-central1-a":     1,
   147  				"":                       2,
   148  			},
   149  		},
   150  		{
   151  			localities: missingLocalities,
   152  			reqs:       []roachpb.NodeID{},
   153  			expected:   perLocalityCounts{},
   154  		},
   155  		{
   156  			localities: missingLocalities,
   157  			reqs:       []roachpb.NodeID{1, 1, 1},
   158  			expected: perLocalityCounts{
   159  				"": 3,
   160  			},
   161  		},
   162  		{
   163  			localities: missingLocalities,
   164  			reqs:       []roachpb.NodeID{1, 2, 3, 4, 5, 6},
   165  			expected: perLocalityCounts{
   166  				"": 6,
   167  			},
   168  		},
   169  	}
   170  	for i, tc := range testCases {
   171  		rs := newReplicaStats(clock, func(nodeID roachpb.NodeID) string {
   172  			return tc.localities[nodeID]
   173  		})
   174  		for _, req := range tc.reqs {
   175  			rs.record(req)
   176  		}
   177  		manual.Increment(int64(time.Second))
   178  		if actual, _ := rs.perLocalityDecayingQPS(); !floatMapsEqual(tc.expected, actual) {
   179  			t.Errorf("%d: incorrect per-locality QPS averages: %s", i, pretty.Diff(tc.expected, actual))
   180  		}
   181  		var expectedAvgQPS float64
   182  		for _, v := range tc.expected {
   183  			expectedAvgQPS += v
   184  		}
   185  		if actual, _ := rs.avgQPS(); actual != expectedAvgQPS {
   186  			t.Errorf("%d: avgQPS() got %f, want %f", i, actual, expectedAvgQPS)
   187  		}
   188  		// Verify that QPS numbers get cut in half after another second.
   189  		manual.Increment(int64(time.Second))
   190  		for k, v := range tc.expected {
   191  			tc.expected[k] = v / 2
   192  		}
   193  		if actual, _ := rs.perLocalityDecayingQPS(); !floatMapsEqual(tc.expected, actual) {
   194  			t.Errorf("%d: incorrect per-locality QPS averages: %s", i, pretty.Diff(tc.expected, actual))
   195  		}
   196  		expectedAvgQPS /= 2
   197  		if actual, _ := rs.avgQPS(); actual != expectedAvgQPS {
   198  			t.Errorf("%d: avgQPS() got %f, want %f", i, actual, expectedAvgQPS)
   199  		}
   200  		rs.resetRequestCounts()
   201  		if actual, _ := rs.perLocalityDecayingQPS(); len(actual) != 0 {
   202  			t.Errorf("%d: unexpected non-empty QPS averages after resetting: %+v", i, actual)
   203  		}
   204  	}
   205  }
   206  
   207  func TestReplicaStatsDecay(t *testing.T) {
   208  	defer leaktest.AfterTest(t)()
   209  
   210  	manual := hlc.NewManualClock(123)
   211  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
   212  
   213  	awsLocalities := map[roachpb.NodeID]string{
   214  		1: "region=us-east-1,zone=us-east-1a",
   215  		2: "region=us-east-1,zone=us-east-1b",
   216  		3: "region=us-west-1,zone=us-west-1a",
   217  	}
   218  
   219  	rs := newReplicaStats(clock, func(nodeID roachpb.NodeID) string {
   220  		return awsLocalities[nodeID]
   221  	})
   222  
   223  	{
   224  		counts, dur := rs.perLocalityDecayingQPS()
   225  		if len(counts) != 0 {
   226  			t.Errorf("expected empty request counts, got %+v", counts)
   227  		}
   228  		if dur != 0 {
   229  			t.Errorf("expected duration = 0, got %v", dur)
   230  		}
   231  		manual.Increment(1)
   232  		if _, dur := rs.perLocalityDecayingQPS(); dur != 1 {
   233  			t.Errorf("expected duration = 1, got %v", dur)
   234  		}
   235  		rs.resetRequestCounts()
   236  	}
   237  
   238  	{
   239  		for _, req := range []roachpb.NodeID{1, 1, 2, 2, 3} {
   240  			rs.record(req)
   241  		}
   242  		counts := perLocalityCounts{
   243  			awsLocalities[1]: 2,
   244  			awsLocalities[2]: 2,
   245  			awsLocalities[3]: 1,
   246  		}
   247  		actual, dur := rs.perLocalityDecayingQPS()
   248  		if dur != 0 {
   249  			t.Errorf("expected duration = 0, got %v", dur)
   250  		}
   251  		if !reflect.DeepEqual(counts, actual) {
   252  			t.Errorf("incorrect per-locality request counts: %s", pretty.Diff(counts, actual))
   253  		}
   254  
   255  		var totalDuration time.Duration
   256  		for i := 0; i < len(rs.mu.requests)-1; i++ {
   257  			manual.Increment(int64(replStatsRotateInterval))
   258  			totalDuration = time.Duration(float64(replStatsRotateInterval+totalDuration) * decayFactor)
   259  			expected := make(perLocalityCounts)
   260  			for k, v := range counts {
   261  				counts[k] = v * decayFactor
   262  				expected[k] = counts[k] / totalDuration.Seconds()
   263  			}
   264  			actual, dur = rs.perLocalityDecayingQPS()
   265  			if expectedDur := replStatsRotateInterval * time.Duration(i+1); dur != expectedDur {
   266  				t.Errorf("expected duration = %v, got %v", expectedDur, dur)
   267  			}
   268  			// We can't just use DeepEqual to compare these due to the float
   269  			// multiplication inaccuracies.
   270  			if !floatMapsEqual(expected, actual) {
   271  				t.Errorf("%d: incorrect per-locality request counts: %s", i, pretty.Diff(expected, actual))
   272  			}
   273  		}
   274  
   275  		manual.Increment(int64(replStatsRotateInterval))
   276  		expected := make(perLocalityCounts)
   277  		if actual, _ := rs.perLocalityDecayingQPS(); !reflect.DeepEqual(expected, actual) {
   278  			t.Errorf("incorrect per-locality request counts: %s", pretty.Diff(expected, actual))
   279  		}
   280  		rs.resetRequestCounts()
   281  	}
   282  
   283  	{
   284  		for _, req := range []roachpb.NodeID{1, 1, 2, 2, 3} {
   285  			rs.record(req)
   286  		}
   287  		manual.Increment(int64(replStatsRotateInterval))
   288  		for _, req := range []roachpb.NodeID{2, 2, 3, 3, 3} {
   289  			rs.record(req)
   290  		}
   291  		durationDivisor := time.Duration(float64(replStatsRotateInterval) * decayFactor).Seconds()
   292  		expected := perLocalityCounts{
   293  			// We expect the first loop's requests to be decreased by decayFactor,
   294  			// but not the second loop's.
   295  			awsLocalities[1]: 2 * decayFactor / durationDivisor,
   296  			awsLocalities[2]: (2*decayFactor + 2) / durationDivisor,
   297  			awsLocalities[3]: (1*decayFactor + 3) / durationDivisor,
   298  		}
   299  		if actual, _ := rs.perLocalityDecayingQPS(); !reflect.DeepEqual(expected, actual) {
   300  			t.Errorf("incorrect per-locality request counts: %s", pretty.Diff(expected, actual))
   301  		}
   302  	}
   303  }
   304  
   305  // TestReplicaStatsDecaySmoothing verifies that there is a smooth decrease
   306  // in request counts over time rather than a massive drop when the count
   307  // windows get rotated.
   308  func TestReplicaStatsDecaySmoothing(t *testing.T) {
   309  	defer leaktest.AfterTest(t)()
   310  
   311  	manual := hlc.NewManualClock(123)
   312  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
   313  	awsLocalities := map[roachpb.NodeID]string{
   314  		1: "region=us-east-1,zone=us-east-1a",
   315  		2: "region=us-east-1,zone=us-east-1b",
   316  		3: "region=us-west-1,zone=us-west-1a",
   317  	}
   318  	rs := newReplicaStats(clock, func(nodeID roachpb.NodeID) string {
   319  		return awsLocalities[nodeID]
   320  	})
   321  	rs.record(1)
   322  	rs.record(1)
   323  	rs.record(2)
   324  	rs.record(2)
   325  	rs.record(3)
   326  	expected := perLocalityCounts{
   327  		awsLocalities[1]: 2,
   328  		awsLocalities[2]: 2,
   329  		awsLocalities[3]: 1,
   330  	}
   331  	if actual, _ := rs.perLocalityDecayingQPS(); !reflect.DeepEqual(expected, actual) {
   332  		t.Errorf("incorrect per-locality request counts: %s", pretty.Diff(expected, actual))
   333  	}
   334  
   335  	increment := replStatsRotateInterval / 2
   336  	manual.Increment(int64(increment))
   337  	actual1, dur := rs.perLocalityDecayingQPS()
   338  	if dur != increment {
   339  		t.Errorf("expected duration = %v; got %v", increment, dur)
   340  	}
   341  	for k := range expected {
   342  		expected[k] /= increment.Seconds()
   343  	}
   344  	if !floatMapsEqual(expected, actual1) {
   345  		t.Errorf("incorrect per-locality request counts: %s", pretty.Diff(expected, actual1))
   346  	}
   347  
   348  	// Verify that all values decrease as time advances if no requests come in.
   349  	manual.Increment(1)
   350  	actual2, _ := rs.perLocalityDecayingQPS()
   351  	if len(actual1) != len(actual2) {
   352  		t.Fatalf("unexpected different results sizes (expected %d, got %d)", len(actual1), len(actual2))
   353  	}
   354  	for k := range actual1 {
   355  		if actual2[k] >= actual1[k] {
   356  			t.Errorf("expected newer count %f to be smaller than older count %f", actual2[k], actual2[k])
   357  		}
   358  	}
   359  
   360  	// Ditto for passing a window boundary.
   361  	manual.Increment(int64(increment))
   362  	actual3, _ := rs.perLocalityDecayingQPS()
   363  	if len(actual2) != len(actual3) {
   364  		t.Fatalf("unexpected different results sizes (expected %d, got %d)", len(actual2), len(actual3))
   365  	}
   366  	for k := range actual2 {
   367  		if actual3[k] >= actual2[k] {
   368  			t.Errorf("expected newer count %f to be smaller than older count %f", actual3[k], actual3[k])
   369  		}
   370  	}
   371  }