github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/reports/replication_stats_report_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package reports 12 13 import ( 14 "context" 15 "sort" 16 "strings" 17 "testing" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/base" 21 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 22 "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" 23 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 24 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 25 "github.com/stretchr/testify/require" 26 ) 27 28 func TestRangeReport(t *testing.T) { 29 defer leaktest.AfterTest(t)() 30 31 ctx := context.Background() 32 st := cluster.MakeTestingClusterSettings() 33 // This test uses the cluster as a recipient for a report saved from outside 34 // the cluster. We disable the cluster's own production of reports so that it 35 // doesn't interfere with the test. 36 ReporterInterval.Override(&st.SV, 0) 37 s, _, db := serverutils.StartServer(t, base.TestServerArgs{Settings: st}) 38 con := s.InternalExecutor().(sqlutil.InternalExecutor) 39 defer s.Stopper().Stop(ctx) 40 41 // Verify that tables are empty. 42 require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{}) 43 require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{}) 44 45 // Add several localities and verify the result 46 stats := make(RangeReport) 47 stats.CountRange(MakeZoneKey(1, 3), true, true, true) 48 stats.CountRange(MakeZoneKey(1, 3), false, true, true) 49 stats.CountRange(MakeZoneKey(1, 3), false, false, true) 50 stats.CountRange(MakeZoneKey(1, 3), true, true, false) 51 stats.CountRange(MakeZoneKey(2, 3), false, false, false) 52 stats.CountRange(MakeZoneKey(2, 4), false, true, false) 53 54 r := makeReplicationStatsReportSaver() 55 time1 := time.Date(2001, 1, 1, 10, 0, 0, 0, time.UTC) 56 require.NoError(t, r.Save(ctx, stats, time1, db, con)) 57 stats = make(RangeReport) 58 59 require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{ 60 {"1", "3", "3", "4", "2", "3", "3"}, 61 {"2", "3", "3", "1", "0", "0", "0"}, 62 {"2", "4", "3", "1", "0", "1", "0"}, 63 }) 64 require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{ 65 {"3", "'2001-01-01 10:00:00+00:00'"}, 66 }) 67 require.Equal(t, 3, r.LastUpdatedRowCount()) 68 69 // Add new set of localities and verify the old ones are deleted 70 stats.CountRange(MakeZoneKey(1, 3), false, true, true) 71 stats.CountRange(MakeZoneKey(2, 3), false, false, false) 72 stats.CountRange(MakeZoneKey(4, 4), false, true, true) 73 74 time2 := time.Date(2001, 1, 1, 11, 0, 0, 0, time.UTC) 75 require.NoError(t, r.Save(ctx, stats, time2, db, con)) 76 stats = make(RangeReport) 77 78 require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{ 79 {"1", "3", "3", "1", "0", "1", "1"}, 80 {"2", "3", "3", "1", "0", "0", "0"}, 81 {"4", "4", "3", "1", "0", "1", "1"}, 82 }) 83 require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{ 84 {"3", "'2001-01-01 11:00:00+00:00'"}, 85 }) 86 require.Equal(t, 3, r.LastUpdatedRowCount()) 87 88 time3 := time.Date(2001, 1, 1, 11, 30, 0, 0, time.UTC) 89 // If some other server takes over and does an update. 90 rows, err := con.Exec(ctx, "another-updater", nil, "update system.reports_meta set generated=$1 where id=3", time3) 91 require.NoError(t, err) 92 require.Equal(t, 1, rows) 93 rows, err = con.Exec(ctx, "another-updater", nil, "update system.replication_stats "+ 94 "set total_ranges=3 where zone_id=1 and subzone_id=3") 95 require.NoError(t, err) 96 require.Equal(t, 1, rows) 97 rows, err = con.Exec(ctx, "another-updater", nil, "delete from system.replication_stats "+ 98 "where zone_id=2 and subzone_id=3") 99 require.NoError(t, err) 100 require.Equal(t, 1, rows) 101 rows, err = con.Exec(ctx, "another-updater", nil, "insert into system.replication_stats("+ 102 "zone_id, subzone_id, report_id, total_ranges, unavailable_ranges, under_replicated_ranges, "+ 103 "over_replicated_ranges) values(16, 16, 3, 6, 0, 1, 2)") 104 require.NoError(t, err) 105 require.Equal(t, 1, rows) 106 107 // Add new set of localities and verify the old ones are deleted 108 stats.CountRange(MakeZoneKey(1, 3), false, true, true) 109 stats.CountRange(MakeZoneKey(2, 3), false, false, false) 110 stats.CountRange(MakeZoneKey(4, 4), false, true, true) 111 112 time4 := time.Date(2001, 1, 1, 12, 0, 0, 0, time.UTC) 113 require.NoError(t, r.Save(ctx, stats, time4, db, con)) 114 stats = make(RangeReport) 115 116 require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{ 117 {"1", "3", "3", "1", "0", "1", "1"}, 118 {"2", "3", "3", "1", "0", "0", "0"}, 119 {"4", "4", "3", "1", "0", "1", "1"}, 120 }) 121 require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{ 122 {"3", "'2001-01-01 12:00:00+00:00'"}, 123 }) 124 require.Equal(t, 3, r.LastUpdatedRowCount()) 125 126 // A brand new report (after restart for example) - still works. 127 r = makeReplicationStatsReportSaver() 128 stats.CountRange(MakeZoneKey(1, 3), false, true, true) 129 130 time5 := time.Date(2001, 1, 1, 12, 30, 0, 0, time.UTC) 131 require.NoError(t, r.Save(ctx, stats, time5, db, con)) 132 133 require.ElementsMatch(t, TableData(ctx, "system.replication_stats", con), [][]string{ 134 {"1", "3", "3", "1", "0", "1", "1"}, 135 }) 136 require.ElementsMatch(t, TableData(ctx, "system.reports_meta", con), [][]string{ 137 {"3", "'2001-01-01 12:30:00+00:00'"}, 138 }) 139 require.Equal(t, 2, r.LastUpdatedRowCount()) 140 } 141 142 type replicationStatsEntry struct { 143 zoneRangeStatus 144 object string 145 } 146 147 type replicationStatsTestCase struct { 148 baseReportTestCase 149 name string 150 exp []replicationStatsEntry 151 } 152 153 // runReplicationStatsTest runs one test case. It processes the input schema, 154 // runs the reports, and verifies that the report looks as expected. 155 func runReplicationStatsTest(t *testing.T, tc replicationStatsTestCase) { 156 ctc, err := compileTestCase(tc.baseReportTestCase) 157 if err != nil { 158 t.Fatal(err) 159 } 160 rep, err := computeReplicationStatsReport(context.Background(), &ctc.iter, ctc.checker, ctc.cfg) 161 if err != nil { 162 t.Fatal(err) 163 } 164 165 // Sort the report's keys. 166 gotRows := make([]replicationStatsEntry, len(rep)) 167 i := 0 168 for zone, stats := range rep { 169 object := ctc.zoneToObject[zone] 170 gotRows[i] = replicationStatsEntry{ 171 zoneRangeStatus: stats, 172 object: object, 173 } 174 i++ 175 } 176 sort.Slice(gotRows, func(i, j int) bool { 177 return strings.Compare(gotRows[i].object, gotRows[j].object) < 0 178 }) 179 sort.Slice(tc.exp, func(i, j int) bool { 180 return strings.Compare(tc.exp[i].object, tc.exp[j].object) < 0 181 }) 182 183 require.Equal(t, tc.exp, gotRows) 184 } 185 186 func TestReplicationStatsReport(t *testing.T) { 187 defer leaktest.AfterTest(t)() 188 tests := []replicationStatsTestCase{ 189 { 190 name: "simple no violations", 191 baseReportTestCase: baseReportTestCase{ 192 defaultZone: zone{replicas: 3}, 193 schema: []database{ 194 { 195 name: "db1", 196 tables: []table{ 197 {name: "t1", 198 partitions: []partition{{ 199 name: "p1", 200 start: []int{100}, 201 end: []int{200}, 202 zone: &zone{constraints: "[+p1]"}, 203 }}, 204 }, 205 {name: "t2"}, 206 }, 207 zone: &zone{ 208 // Change replication options so that db1 gets a report entry. 209 replicas: 3, 210 }, 211 }, 212 { 213 name: "db2", 214 tables: []table{{name: "sentinel"}}, 215 }, 216 }, 217 splits: []split{ 218 {key: "/Table/t1", stores: []int{1, 2, 3}}, 219 {key: "/Table/t1/pk", stores: []int{1, 2, 3}}, 220 {key: "/Table/t1/pk/1", stores: []int{1, 2, 3}}, 221 {key: "/Table/t1/pk/2", stores: []int{1, 2, 3}}, 222 {key: "/Table/t1/pk/3", stores: []int{1, 2, 3}}, 223 {key: "/Table/t1/pk/100", stores: []int{1, 2, 3}}, 224 {key: "/Table/t1/pk/150", stores: []int{1, 2, 3}}, 225 {key: "/Table/t1/pk/200", stores: []int{1, 2, 3}}, 226 {key: "/Table/t2", stores: []int{1, 2, 3}}, 227 {key: "/Table/t2/pk", stores: []int{1, 2, 3}}, 228 { 229 // This range is not covered by the db1's zone config; it'll be 230 // counted for the default zone. 231 key: "/Table/sentinel", stores: []int{1, 2, 3}, 232 }, 233 }, 234 nodes: []node{ 235 {id: 1, stores: []store{{id: 1}}}, 236 {id: 2, stores: []store{{id: 2}}}, 237 {id: 3, stores: []store{{id: 3}}}, 238 }, 239 }, 240 exp: []replicationStatsEntry{ 241 { 242 object: "default", 243 zoneRangeStatus: zoneRangeStatus{ 244 numRanges: 1, 245 unavailable: 0, 246 underReplicated: 0, 247 overReplicated: 0, 248 }, 249 }, 250 { 251 object: "db1", 252 zoneRangeStatus: zoneRangeStatus{ 253 numRanges: 8, 254 unavailable: 0, 255 underReplicated: 0, 256 overReplicated: 0, 257 }, 258 }, 259 { 260 object: "t1.p1", 261 zoneRangeStatus: zoneRangeStatus{ 262 numRanges: 2, 263 unavailable: 0, 264 underReplicated: 0, 265 overReplicated: 0, 266 }, 267 }, 268 }, 269 }, 270 { 271 name: "simple violations", 272 baseReportTestCase: baseReportTestCase{ 273 defaultZone: zone{replicas: 3}, 274 schema: []database{ 275 { 276 name: "db1", 277 tables: []table{ 278 {name: "t1", 279 partitions: []partition{{ 280 name: "p1", 281 start: []int{100}, 282 end: []int{200}, 283 zone: &zone{constraints: "[+p1]"}, 284 }}, 285 }, 286 }, 287 }, 288 }, 289 splits: []split{ 290 // No problem. 291 {key: "/Table/t1/pk/100", stores: []int{1, 2, 3}}, 292 // Under-replicated. 293 {key: "/Table/t1/pk/101", stores: []int{1}}, 294 // Under-replicated. 295 {key: "/Table/t1/pk/102", stores: []int{1, 2}}, 296 // Under-replicated because 4 is dead. 297 {key: "/Table/t1/pk/103", stores: []int{1, 2, 4}}, 298 // Under-replicated and unavailable. 299 {key: "/Table/t1/pk/104", stores: []int{3}}, 300 // Over-replicated. 301 {key: "/Table/t1/pk/105", stores: []int{1, 2, 3, 4}}, 302 // Under-replicated and over-replicated. 303 {key: "/Table/t1/pk/106", stores: []int{1, 2, 4, 5}}, 304 }, 305 nodes: []node{ 306 {id: 1, stores: []store{{id: 1}}}, 307 {id: 2, stores: []store{{id: 2}}}, 308 {id: 3, stores: []store{{id: 3}}}, 309 {id: 4, stores: []store{{id: 4}}, dead: true}, 310 {id: 5, stores: []store{{id: 3}}, dead: true}, 311 }, 312 }, 313 exp: []replicationStatsEntry{ 314 { 315 object: "t1.p1", 316 zoneRangeStatus: zoneRangeStatus{ 317 numRanges: 7, 318 unavailable: 1, 319 underReplicated: 5, 320 overReplicated: 2, 321 }, 322 }, 323 }, 324 }, 325 } 326 for _, tc := range tests { 327 t.Run(tc.name, func(t *testing.T) { 328 runReplicationStatsTest(t, tc) 329 }) 330 } 331 }