github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/reports/replication_stats_report_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package reports
    12  
    13  import (
    14  	"context"
    15  	"sort"
    16  	"strings"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/base"
    21  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
    23  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    24  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    25  	"github.com/stretchr/testify/require"
    26  )
    27  
    28  func TestRangeReport(t *testing.T) {
    29  	defer leaktest.AfterTest(t)()
    30  
    31  	ctx := context.Background()
    32  	st := cluster.MakeTestingClusterSettings()
    33  	// This test uses the cluster as a recipient for a report saved from outside
    34  	// the cluster. We disable the cluster's own production of reports so that it
    35  	// doesn't interfere with the test.
    36  	ReporterInterval.Override(&st.SV, 0)
    37  	s, _, db := serverutils.StartServer(t, base.TestServerArgs{Settings: st})
    38  	con := s.InternalExecutor().(sqlutil.InternalExecutor)
    39  	defer s.Stopper().Stop(ctx)
    40  
    41  	// Verify that tables are empty.
    42  	require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{})
    43  	require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{})
    44  
    45  	// Add several localities and verify the result
    46  	stats := make(RangeReport)
    47  	stats.CountRange(MakeZoneKey(1, 3), true, true, true)
    48  	stats.CountRange(MakeZoneKey(1, 3), false, true, true)
    49  	stats.CountRange(MakeZoneKey(1, 3), false, false, true)
    50  	stats.CountRange(MakeZoneKey(1, 3), true, true, false)
    51  	stats.CountRange(MakeZoneKey(2, 3), false, false, false)
    52  	stats.CountRange(MakeZoneKey(2, 4), false, true, false)
    53  
    54  	r := makeReplicationStatsReportSaver()
    55  	time1 := time.Date(2001, 1, 1, 10, 0, 0, 0, time.UTC)
    56  	require.NoError(t, r.Save(ctx, stats, time1, db, con))
    57  	stats = make(RangeReport)
    58  
    59  	require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{
    60  		{"1", "3", "3", "4", "2", "3", "3"},
    61  		{"2", "3", "3", "1", "0", "0", "0"},
    62  		{"2", "4", "3", "1", "0", "1", "0"},
    63  	})
    64  	require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{
    65  		{"3", "'2001-01-01 10:00:00+00:00'"},
    66  	})
    67  	require.Equal(t, 3, r.LastUpdatedRowCount())
    68  
    69  	// Add new set of localities and verify the old ones are deleted
    70  	stats.CountRange(MakeZoneKey(1, 3), false, true, true)
    71  	stats.CountRange(MakeZoneKey(2, 3), false, false, false)
    72  	stats.CountRange(MakeZoneKey(4, 4), false, true, true)
    73  
    74  	time2 := time.Date(2001, 1, 1, 11, 0, 0, 0, time.UTC)
    75  	require.NoError(t, r.Save(ctx, stats, time2, db, con))
    76  	stats = make(RangeReport)
    77  
    78  	require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{
    79  		{"1", "3", "3", "1", "0", "1", "1"},
    80  		{"2", "3", "3", "1", "0", "0", "0"},
    81  		{"4", "4", "3", "1", "0", "1", "1"},
    82  	})
    83  	require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{
    84  		{"3", "'2001-01-01 11:00:00+00:00'"},
    85  	})
    86  	require.Equal(t, 3, r.LastUpdatedRowCount())
    87  
    88  	time3 := time.Date(2001, 1, 1, 11, 30, 0, 0, time.UTC)
    89  	// If some other server takes over and does an update.
    90  	rows, err := con.Exec(ctx, "another-updater", nil, "update system.reports_meta set generated=$1 where id=3", time3)
    91  	require.NoError(t, err)
    92  	require.Equal(t, 1, rows)
    93  	rows, err = con.Exec(ctx, "another-updater", nil, "update system.replication_stats "+
    94  		"set total_ranges=3 where zone_id=1 and subzone_id=3")
    95  	require.NoError(t, err)
    96  	require.Equal(t, 1, rows)
    97  	rows, err = con.Exec(ctx, "another-updater", nil, "delete from system.replication_stats "+
    98  		"where zone_id=2 and subzone_id=3")
    99  	require.NoError(t, err)
   100  	require.Equal(t, 1, rows)
   101  	rows, err = con.Exec(ctx, "another-updater", nil, "insert into system.replication_stats("+
   102  		"zone_id, subzone_id, report_id, total_ranges, unavailable_ranges, under_replicated_ranges, "+
   103  		"over_replicated_ranges) values(16, 16, 3, 6, 0, 1, 2)")
   104  	require.NoError(t, err)
   105  	require.Equal(t, 1, rows)
   106  
   107  	// Add new set of localities and verify the old ones are deleted
   108  	stats.CountRange(MakeZoneKey(1, 3), false, true, true)
   109  	stats.CountRange(MakeZoneKey(2, 3), false, false, false)
   110  	stats.CountRange(MakeZoneKey(4, 4), false, true, true)
   111  
   112  	time4 := time.Date(2001, 1, 1, 12, 0, 0, 0, time.UTC)
   113  	require.NoError(t, r.Save(ctx, stats, time4, db, con))
   114  	stats = make(RangeReport)
   115  
   116  	require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{
   117  		{"1", "3", "3", "1", "0", "1", "1"},
   118  		{"2", "3", "3", "1", "0", "0", "0"},
   119  		{"4", "4", "3", "1", "0", "1", "1"},
   120  	})
   121  	require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{
   122  		{"3", "'2001-01-01 12:00:00+00:00'"},
   123  	})
   124  	require.Equal(t, 3, r.LastUpdatedRowCount())
   125  
   126  	// A brand new report (after restart for example) - still works.
   127  	r = makeReplicationStatsReportSaver()
   128  	stats.CountRange(MakeZoneKey(1, 3), false, true, true)
   129  
   130  	time5 := time.Date(2001, 1, 1, 12, 30, 0, 0, time.UTC)
   131  	require.NoError(t, r.Save(ctx, stats, time5, db, con))
   132  
   133  	require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{
   134  		{"1", "3", "3", "1", "0", "1", "1"},
   135  	})
   136  	require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{
   137  		{"3", "'2001-01-01 12:30:00+00:00'"},
   138  	})
   139  	require.Equal(t, 2, r.LastUpdatedRowCount())
   140  }
   141  
   142  type replicationStatsEntry struct {
   143  	zoneRangeStatus
   144  	object string
   145  }
   146  
   147  type replicationStatsTestCase struct {
   148  	baseReportTestCase
   149  	name string
   150  	exp  []replicationStatsEntry
   151  }
   152  
   153  // runReplicationStatsTest runs one test case. It processes the input schema,
   154  // runs the reports, and verifies that the report looks as expected.
   155  func runReplicationStatsTest(t *testing.T, tc replicationStatsTestCase) {
   156  	ctc, err := compileTestCase(tc.baseReportTestCase)
   157  	if err != nil {
   158  		t.Fatal(err)
   159  	}
   160  	rep, err := computeReplicationStatsReport(context.Background(), &ctc.iter, ctc.checker, ctc.cfg)
   161  	if err != nil {
   162  		t.Fatal(err)
   163  	}
   164  
   165  	// Sort the report's keys.
   166  	gotRows := make([]replicationStatsEntry, len(rep))
   167  	i := 0
   168  	for zone, stats := range rep {
   169  		object := ctc.zoneToObject[zone]
   170  		gotRows[i] = replicationStatsEntry{
   171  			zoneRangeStatus: stats,
   172  			object:          object,
   173  		}
   174  		i++
   175  	}
   176  	sort.Slice(gotRows, func(i, j int) bool {
   177  		return strings.Compare(gotRows[i].object, gotRows[j].object) < 0
   178  	})
   179  	sort.Slice(tc.exp, func(i, j int) bool {
   180  		return strings.Compare(tc.exp[i].object, tc.exp[j].object) < 0
   181  	})
   182  
   183  	require.Equal(t, tc.exp, gotRows)
   184  }
   185  
   186  func TestReplicationStatsReport(t *testing.T) {
   187  	defer leaktest.AfterTest(t)()
   188  	tests := []replicationStatsTestCase{
   189  		{
   190  			name: "simple no violations",
   191  			baseReportTestCase: baseReportTestCase{
   192  				defaultZone: zone{replicas: 3},
   193  				schema: []database{
   194  					{
   195  						name: "db1",
   196  						tables: []table{
   197  							{name: "t1",
   198  								partitions: []partition{{
   199  									name:  "p1",
   200  									start: []int{100},
   201  									end:   []int{200},
   202  									zone:  &zone{constraints: "[+p1]"},
   203  								}},
   204  							},
   205  							{name: "t2"},
   206  						},
   207  						zone: &zone{
   208  							// Change replication options so that db1 gets a report entry.
   209  							replicas: 3,
   210  						},
   211  					},
   212  					{
   213  						name:   "db2",
   214  						tables: []table{{name: "sentinel"}},
   215  					},
   216  				},
   217  				splits: []split{
   218  					{key: "/Table/t1", stores: []int{1, 2, 3}},
   219  					{key: "/Table/t1/pk", stores: []int{1, 2, 3}},
   220  					{key: "/Table/t1/pk/1", stores: []int{1, 2, 3}},
   221  					{key: "/Table/t1/pk/2", stores: []int{1, 2, 3}},
   222  					{key: "/Table/t1/pk/3", stores: []int{1, 2, 3}},
   223  					{key: "/Table/t1/pk/100", stores: []int{1, 2, 3}},
   224  					{key: "/Table/t1/pk/150", stores: []int{1, 2, 3}},
   225  					{key: "/Table/t1/pk/200", stores: []int{1, 2, 3}},
   226  					{key: "/Table/t2", stores: []int{1, 2, 3}},
   227  					{key: "/Table/t2/pk", stores: []int{1, 2, 3}},
   228  					{
   229  						// This range is not covered by the db1's zone config; it'll be
   230  						// counted for the default zone.
   231  						key: "/Table/sentinel", stores: []int{1, 2, 3},
   232  					},
   233  				},
   234  				nodes: []node{
   235  					{id: 1, stores: []store{{id: 1}}},
   236  					{id: 2, stores: []store{{id: 2}}},
   237  					{id: 3, stores: []store{{id: 3}}},
   238  				},
   239  			},
   240  			exp: []replicationStatsEntry{
   241  				{
   242  					object: "default",
   243  					zoneRangeStatus: zoneRangeStatus{
   244  						numRanges:       1,
   245  						unavailable:     0,
   246  						underReplicated: 0,
   247  						overReplicated:  0,
   248  					},
   249  				},
   250  				{
   251  					object: "db1",
   252  					zoneRangeStatus: zoneRangeStatus{
   253  						numRanges:       8,
   254  						unavailable:     0,
   255  						underReplicated: 0,
   256  						overReplicated:  0,
   257  					},
   258  				},
   259  				{
   260  					object: "t1.p1",
   261  					zoneRangeStatus: zoneRangeStatus{
   262  						numRanges:       2,
   263  						unavailable:     0,
   264  						underReplicated: 0,
   265  						overReplicated:  0,
   266  					},
   267  				},
   268  			},
   269  		},
   270  		{
   271  			name: "simple violations",
   272  			baseReportTestCase: baseReportTestCase{
   273  				defaultZone: zone{replicas: 3},
   274  				schema: []database{
   275  					{
   276  						name: "db1",
   277  						tables: []table{
   278  							{name: "t1",
   279  								partitions: []partition{{
   280  									name:  "p1",
   281  									start: []int{100},
   282  									end:   []int{200},
   283  									zone:  &zone{constraints: "[+p1]"},
   284  								}},
   285  							},
   286  						},
   287  					},
   288  				},
   289  				splits: []split{
   290  					// No problem.
   291  					{key: "/Table/t1/pk/100", stores: []int{1, 2, 3}},
   292  					// Under-replicated.
   293  					{key: "/Table/t1/pk/101", stores: []int{1}},
   294  					// Under-replicated.
   295  					{key: "/Table/t1/pk/102", stores: []int{1, 2}},
   296  					// Under-replicated because 4 is dead.
   297  					{key: "/Table/t1/pk/103", stores: []int{1, 2, 4}},
   298  					// Under-replicated and unavailable.
   299  					{key: "/Table/t1/pk/104", stores: []int{3}},
   300  					// Over-replicated.
   301  					{key: "/Table/t1/pk/105", stores: []int{1, 2, 3, 4}},
   302  					// Under-replicated and over-replicated.
   303  					{key: "/Table/t1/pk/106", stores: []int{1, 2, 4, 5}},
   304  				},
   305  				nodes: []node{
   306  					{id: 1, stores: []store{{id: 1}}},
   307  					{id: 2, stores: []store{{id: 2}}},
   308  					{id: 3, stores: []store{{id: 3}}},
   309  					{id: 4, stores: []store{{id: 4}}, dead: true},
   310  					{id: 5, stores: []store{{id: 3}}, dead: true},
   311  				},
   312  			},
   313  			exp: []replicationStatsEntry{
   314  				{
   315  					object: "t1.p1",
   316  					zoneRangeStatus: zoneRangeStatus{
   317  						numRanges:       7,
   318  						unavailable:     1,
   319  						underReplicated: 5,
   320  						overReplicated:  2,
   321  					},
   322  				},
   323  			},
   324  		},
   325  	}
   326  	for _, tc := range tests {
   327  		t.Run(tc.name, func(t *testing.T) {
   328  			runReplicationStatsTest(t, tc)
   329  		})
   330  	}
   331  }