github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/scanner_test.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "fmt" 16 "testing" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 21 "github.com/cockroachdb/cockroach/pkg/testutils" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 24 "github.com/cockroachdb/cockroach/pkg/util/log" 25 "github.com/cockroachdb/cockroach/pkg/util/stop" 26 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 27 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 28 "github.com/cockroachdb/cockroach/pkg/util/tracing" 29 "github.com/cockroachdb/errors" 30 "github.com/google/btree" 31 ) 32 33 func makeAmbCtx() log.AmbientContext { 34 return log.AmbientContext{Tracer: tracing.NewTracer()} 35 } 36 37 // Test implementation of a range set backed by btree.BTree. 38 type testRangeSet struct { 39 syncutil.Mutex 40 replicasByKey *btree.BTree 41 visited int 42 } 43 44 // newTestRangeSet creates a new range set that has the count number of ranges. 45 func newTestRangeSet(count int, t *testing.T) *testRangeSet { 46 rs := &testRangeSet{replicasByKey: btree.New(64 /* degree */)} 47 for i := 0; i < count; i++ { 48 desc := &roachpb.RangeDescriptor{ 49 RangeID: roachpb.RangeID(i), 50 StartKey: roachpb.RKey(fmt.Sprintf("%03d", i)), 51 EndKey: roachpb.RKey(fmt.Sprintf("%03d", i+1)), 52 } 53 // Initialize the range stat so the scanner can use it. 54 repl := &Replica{ 55 RangeID: desc.RangeID, 56 } 57 repl.mu.state.Stats = &enginepb.MVCCStats{ 58 KeyBytes: 1, 59 ValBytes: 2, 60 KeyCount: 1, 61 LiveCount: 1, 62 } 63 repl.mu.state.Desc = desc 64 if exRngItem := rs.replicasByKey.ReplaceOrInsert(repl); exRngItem != nil { 65 t.Fatalf("failed to insert range %s", repl) 66 } 67 } 68 return rs 69 } 70 71 func (rs *testRangeSet) Visit(visitor func(*Replica) bool) { 72 rs.Lock() 73 defer rs.Unlock() 74 rs.visited = 0 75 rs.replicasByKey.Ascend(func(i btree.Item) bool { 76 rs.visited++ 77 rs.Unlock() 78 defer rs.Lock() 79 return visitor(i.(*Replica)) 80 }) 81 } 82 83 func (rs *testRangeSet) EstimatedCount() int { 84 rs.Lock() 85 defer rs.Unlock() 86 count := rs.replicasByKey.Len() - rs.visited 87 if count < 1 { 88 count = 1 89 } 90 return count 91 } 92 93 // removeRange removes the i-th range from the range set. 94 func (rs *testRangeSet) remove(index int, t *testing.T) *Replica { 95 endKey := roachpb.Key(fmt.Sprintf("%03d", index+1)) 96 rs.Lock() 97 defer rs.Unlock() 98 repl := rs.replicasByKey.Delete((rangeBTreeKey)(endKey)) 99 if repl == nil { 100 t.Fatalf("failed to delete range of end key %s", endKey) 101 } 102 return repl.(*Replica) 103 } 104 105 // Test implementation of a range queue which adds range to an 106 // internal slice. 107 type testQueue struct { 108 syncutil.Mutex // Protects ranges, done & processed count 109 ranges []*Replica 110 done bool 111 processed int 112 disabled bool 113 } 114 115 // setDisabled suspends processing of items from the queue. 116 func (tq *testQueue) setDisabled(d bool) { 117 tq.Lock() 118 defer tq.Unlock() 119 tq.disabled = d 120 } 121 122 func (tq *testQueue) Start(stopper *stop.Stopper) { 123 stopper.RunWorker(context.Background(), func(context.Context) { 124 for { 125 select { 126 case <-time.After(1 * time.Millisecond): 127 tq.Lock() 128 if !tq.disabled && len(tq.ranges) > 0 { 129 tq.ranges = tq.ranges[1:] 130 tq.processed++ 131 } 132 tq.Unlock() 133 case <-stopper.ShouldStop(): 134 tq.Lock() 135 tq.done = true 136 tq.Unlock() 137 return 138 } 139 } 140 }) 141 } 142 143 // NB: MaybeAddAsync on a testQueue is actually synchronous. 144 func (tq *testQueue) MaybeAddAsync(ctx context.Context, replI replicaInQueue, now hlc.Timestamp) { 145 repl := replI.(*Replica) 146 147 tq.Lock() 148 defer tq.Unlock() 149 if index := tq.indexOf(repl.RangeID); index == -1 { 150 tq.ranges = append(tq.ranges, repl) 151 } 152 } 153 154 func (tq *testQueue) MaybeRemove(rangeID roachpb.RangeID) { 155 tq.Lock() 156 defer tq.Unlock() 157 if index := tq.indexOf(rangeID); index != -1 { 158 tq.ranges = append(tq.ranges[:index], tq.ranges[index+1:]...) 159 } 160 } 161 162 func (tq *testQueue) Name() string { 163 return "testQueue" 164 } 165 166 func (tq *testQueue) NeedsLease() bool { 167 return false 168 } 169 170 func (tq *testQueue) count() int { 171 tq.Lock() 172 defer tq.Unlock() 173 return len(tq.ranges) 174 } 175 176 func (tq *testQueue) indexOf(rangeID roachpb.RangeID) int { 177 for i, repl := range tq.ranges { 178 if repl.RangeID == rangeID { 179 return i 180 } 181 } 182 return -1 183 } 184 185 func (tq *testQueue) isDone() bool { 186 tq.Lock() 187 defer tq.Unlock() 188 return tq.done 189 } 190 191 // TestScannerAddToQueues verifies that ranges are added to and 192 // removed from multiple queues. 193 func TestScannerAddToQueues(t *testing.T) { 194 defer leaktest.AfterTest(t)() 195 const count = 3 196 ranges := newTestRangeSet(count, t) 197 q1, q2 := &testQueue{}, &testQueue{} 198 // We don't want to actually consume entries from the queues during this test. 199 q1.setDisabled(true) 200 q2.setDisabled(true) 201 mc := hlc.NewManualClock(123) 202 clock := hlc.NewClock(mc.UnixNano, time.Nanosecond) 203 s := newReplicaScanner(makeAmbCtx(), clock, 1*time.Millisecond, 0, 0, ranges) 204 s.AddQueues(q1, q2) 205 stopper := stop.NewStopper() 206 207 // Start scanner and verify that all ranges are added to both queues. 208 s.Start(stopper) 209 testutils.SucceedsSoon(t, func() error { 210 if q1.count() != count || q2.count() != count { 211 return errors.Errorf("q1 or q2 count != %d; got %d, %d", count, q1.count(), q2.count()) 212 } 213 return nil 214 }) 215 216 // Remove first range and verify it does not exist in either range. 217 rng := ranges.remove(0, t) 218 testutils.SucceedsSoon(t, func() error { 219 // This is intentionally inside the loop, otherwise this test races as 220 // our removal of the range may be processed before a stray re-queue. 221 // Removing on each attempt makes sure we clean this up as we retry. 222 s.RemoveReplica(rng) 223 c1 := q1.count() 224 c2 := q2.count() 225 if c1 != count-1 || c2 != count-1 { 226 return errors.Errorf("q1 or q2 count != %d; got %d, %d", count-1, c1, c2) 227 } 228 return nil 229 }) 230 231 // Stop scanner and verify both queues are stopped. 232 stopper.Stop(context.Background()) 233 if !q1.isDone() || !q2.isDone() { 234 t.Errorf("expected all queues to stop; got %t, %t", q1.isDone(), q2.isDone()) 235 } 236 } 237 238 // TestScannerTiming verifies that ranges are scanned, regardless 239 // of how many, to match scanInterval. 240 func TestScannerTiming(t *testing.T) { 241 defer leaktest.AfterTest(t)() 242 const count = 3 243 const runTime = 100 * time.Millisecond 244 const maxError = 7500 * time.Microsecond 245 durations := []time.Duration{ 246 15 * time.Millisecond, 247 25 * time.Millisecond, 248 } 249 for i, duration := range durations { 250 testutils.SucceedsSoon(t, func() error { 251 ranges := newTestRangeSet(count, t) 252 q := &testQueue{} 253 mc := hlc.NewManualClock(123) 254 clock := hlc.NewClock(mc.UnixNano, time.Nanosecond) 255 s := newReplicaScanner(makeAmbCtx(), clock, duration, 0, 0, ranges) 256 s.AddQueues(q) 257 stopper := stop.NewStopper() 258 s.Start(stopper) 259 time.Sleep(runTime) 260 stopper.Stop(context.Background()) 261 262 avg := s.avgScan() 263 log.Infof(context.Background(), "%d: average scan: %s", i, avg) 264 if avg.Nanoseconds()-duration.Nanoseconds() > maxError.Nanoseconds() || 265 duration.Nanoseconds()-avg.Nanoseconds() > maxError.Nanoseconds() { 266 return errors.Errorf("expected %s, got %s: exceeds max error of %s", duration, avg, maxError) 267 } 268 return nil 269 }) 270 } 271 } 272 273 // TestScannerPaceInterval tests that paceInterval returns the correct interval. 274 func TestScannerPaceInterval(t *testing.T) { 275 defer leaktest.AfterTest(t)() 276 const count = 3 277 durations := []time.Duration{ 278 30 * time.Millisecond, 279 60 * time.Millisecond, 280 500 * time.Millisecond, 281 } 282 // function logs an error when the actual value is not close 283 // to the expected value 284 logErrorWhenNotCloseTo := func(expected, actual time.Duration) { 285 delta := 1 * time.Millisecond 286 if actual < expected-delta || actual > expected+delta { 287 t.Errorf("Expected duration %s, got %s", expected, actual) 288 } 289 } 290 for _, duration := range durations { 291 startTime := timeutil.Now() 292 ranges := newTestRangeSet(count, t) 293 s := newReplicaScanner(makeAmbCtx(), nil, duration, 0, 0, ranges) 294 interval := s.paceInterval(startTime, startTime) 295 logErrorWhenNotCloseTo(duration/count, interval) 296 // The range set is empty 297 ranges = newTestRangeSet(0, t) 298 s = newReplicaScanner(makeAmbCtx(), nil, duration, 0, 0, ranges) 299 interval = s.paceInterval(startTime, startTime) 300 logErrorWhenNotCloseTo(duration, interval) 301 ranges = newTestRangeSet(count, t) 302 s = newReplicaScanner(makeAmbCtx(), nil, duration, 0, 0, ranges) 303 // Move the present to duration time into the future 304 interval = s.paceInterval(startTime, startTime.Add(duration)) 305 logErrorWhenNotCloseTo(0, interval) 306 } 307 } 308 309 // TestScannerMinMaxIdleTime verifies that the pace interval will not 310 // be less than the specified min idle time or greater than the 311 // specified max idle time. 312 func TestScannerMinMaxIdleTime(t *testing.T) { 313 defer leaktest.AfterTest(t)() 314 const targetInterval = 100 * time.Millisecond 315 const minIdleTime = 10 * time.Millisecond 316 const maxIdleTime = 15 * time.Millisecond 317 for count := range []int{1, 10, 20, 100} { 318 startTime := timeutil.Now() 319 ranges := newTestRangeSet(count, t) 320 s := newReplicaScanner(makeAmbCtx(), nil, targetInterval, minIdleTime, maxIdleTime, ranges) 321 if interval := s.paceInterval(startTime, startTime); interval < minIdleTime || interval > maxIdleTime { 322 t.Errorf("expected interval %s <= %s <= %s", minIdleTime, interval, maxIdleTime) 323 } 324 } 325 } 326 327 // TestScannerDisabled verifies that disabling a scanner prevents 328 // replicas from being added to queues. 329 func TestScannerDisabled(t *testing.T) { 330 defer leaktest.AfterTest(t)() 331 const count = 3 332 ranges := newTestRangeSet(count, t) 333 q := &testQueue{} 334 mc := hlc.NewManualClock(123) 335 clock := hlc.NewClock(mc.UnixNano, time.Nanosecond) 336 s := newReplicaScanner(makeAmbCtx(), clock, 1*time.Millisecond, 0, 0, ranges) 337 s.AddQueues(q) 338 stopper := stop.NewStopper() 339 defer stopper.Stop(context.Background()) 340 s.Start(stopper) 341 342 // Verify queue gets all ranges. 343 testutils.SucceedsSoon(t, func() error { 344 if q.count() != count { 345 return errors.Errorf("expected %d replicas; have %d", count, q.count()) 346 } 347 if s.scanCount() == 0 { 348 return errors.Errorf("expected scanner count to increment") 349 } 350 return nil 351 }) 352 353 lastWaitEnabledCount := s.waitEnabledCount() 354 355 // Now, disable the scanner. 356 s.SetDisabled(true) 357 testutils.SucceedsSoon(t, func() error { 358 if s.waitEnabledCount() == lastWaitEnabledCount { 359 return errors.Errorf("expected scanner to stop when disabled") 360 } 361 return nil 362 }) 363 364 lastScannerCount := s.scanCount() 365 366 // Remove the replicas and verify the scanner still removes them while disabled. 367 ranges.Visit(func(repl *Replica) bool { 368 s.RemoveReplica(repl) 369 return true 370 }) 371 372 testutils.SucceedsSoon(t, func() error { 373 if qc := q.count(); qc != 0 { 374 return errors.Errorf("expected queue to be empty after replicas removed from scanner; got %d", qc) 375 } 376 return nil 377 }) 378 if sc := s.scanCount(); sc != lastScannerCount { 379 t.Errorf("expected scanner count to not increment: %d != %d", sc, lastScannerCount) 380 } 381 } 382 383 func TestScannerDisabledWithZeroInterval(t *testing.T) { 384 defer leaktest.AfterTest(t)() 385 ranges := newTestRangeSet(1, t) 386 s := newReplicaScanner(makeAmbCtx(), nil, 0*time.Millisecond, 0, 0, ranges) 387 if !s.GetDisabled() { 388 t.Errorf("expected scanner to be disabled") 389 } 390 } 391 392 // TestScannerEmptyRangeSet verifies that an empty range set doesn't busy loop. 393 func TestScannerEmptyRangeSet(t *testing.T) { 394 defer leaktest.AfterTest(t)() 395 ranges := newTestRangeSet(0, t) 396 q := &testQueue{} 397 mc := hlc.NewManualClock(123) 398 clock := hlc.NewClock(mc.UnixNano, time.Nanosecond) 399 s := newReplicaScanner(makeAmbCtx(), clock, time.Hour, 0, 0, ranges) 400 s.AddQueues(q) 401 stopper := stop.NewStopper() 402 defer stopper.Stop(context.Background()) 403 s.Start(stopper) 404 time.Sleep(time.Millisecond) // give it some time to (not) busy loop 405 if count := s.scanCount(); count > 1 { 406 t.Errorf("expected at most one loop, but got %d", count) 407 } 408 }