github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/queue_test.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "container/heap" 15 "context" 16 "fmt" 17 "strconv" 18 "sync/atomic" 19 "testing" 20 "time" 21 22 "github.com/cockroachdb/cockroach/pkg/base" 23 "github.com/cockroachdb/cockroach/pkg/config" 24 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 25 "github.com/cockroachdb/cockroach/pkg/gossip" 26 "github.com/cockroachdb/cockroach/pkg/keys" 27 "github.com/cockroachdb/cockroach/pkg/roachpb" 28 "github.com/cockroachdb/cockroach/pkg/rpc" 29 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 30 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 31 "github.com/cockroachdb/cockroach/pkg/testutils" 32 "github.com/cockroachdb/cockroach/pkg/util/hlc" 33 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 34 "github.com/cockroachdb/cockroach/pkg/util/metric" 35 "github.com/cockroachdb/cockroach/pkg/util/stop" 36 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 37 "github.com/cockroachdb/errors" 38 "github.com/gogo/protobuf/proto" 39 "github.com/stretchr/testify/require" 40 ) 41 42 // testQueueImpl implements queueImpl with a closure for shouldQueue. 43 type testQueueImpl struct { 44 shouldQueueFn func(hlc.Timestamp, *Replica) (bool, float64) 45 processed int32 // accessed atomically 46 duration time.Duration 47 blocker chan struct{} // timer() blocks on this if not nil 48 pChan chan time.Time 49 err error // always returns this error on process 50 } 51 52 func (tq *testQueueImpl) shouldQueue( 53 _ context.Context, now hlc.Timestamp, r *Replica, _ *config.SystemConfig, 54 ) (bool, float64) { 55 return tq.shouldQueueFn(now, r) 56 } 57 58 func (tq *testQueueImpl) process(_ context.Context, _ *Replica, _ *config.SystemConfig) error { 59 atomic.AddInt32(&tq.processed, 1) 60 return tq.err 61 } 62 63 func (tq *testQueueImpl) getProcessed() int { 64 return int(atomic.LoadInt32(&tq.processed)) 65 } 66 67 func (tq *testQueueImpl) timer(_ time.Duration) time.Duration { 68 if tq.blocker != nil { 69 <-tq.blocker 70 } 71 if tq.duration != 0 { 72 return tq.duration 73 } 74 return 0 75 } 76 77 func (tq *testQueueImpl) purgatoryChan() <-chan time.Time { 78 return tq.pChan 79 } 80 81 func makeTestBaseQueue( 82 name string, impl queueImpl, store *Store, gossip *gossip.Gossip, cfg queueConfig, 83 ) *baseQueue { 84 if !cfg.acceptsUnsplitRanges { 85 // Needed in order to pass the validation in newBaseQueue. 86 cfg.needsSystemConfig = true 87 } 88 cfg.successes = metric.NewCounter(metric.Metadata{Name: "processed"}) 89 cfg.failures = metric.NewCounter(metric.Metadata{Name: "failures"}) 90 cfg.pending = metric.NewGauge(metric.Metadata{Name: "pending"}) 91 cfg.processingNanos = metric.NewCounter(metric.Metadata{Name: "processingnanos"}) 92 cfg.purgatory = metric.NewGauge(metric.Metadata{Name: "purgatory"}) 93 return newBaseQueue(name, impl, store, gossip, cfg) 94 } 95 96 func createReplicas(t *testing.T, tc *testContext, num int) []*Replica { 97 t.Helper() 98 99 // Remove replica for range 1 since it encompasses the entire keyspace. 100 repl1, err := tc.store.GetReplica(1) 101 if err != nil { 102 t.Fatal(err) 103 } 104 if err := tc.store.RemoveReplica(context.Background(), repl1, repl1.Desc().NextReplicaID, RemoveOptions{ 105 DestroyData: true, 106 }); err != nil { 107 t.Fatal(err) 108 } 109 110 repls := make([]*Replica, num) 111 for i := 0; i < num; i++ { 112 id := roachpb.RangeID(1000 + i) 113 key := roachpb.RKey(strconv.Itoa(int(id))) 114 endKey := roachpb.RKey(string(key) + "/end") 115 r := createReplica(tc.store, id, key, endKey) 116 if err := tc.store.AddReplica(r); err != nil { 117 t.Fatal(err) 118 } 119 repls[i] = r 120 } 121 return repls 122 } 123 124 // TestQueuePriorityQueue verifies priority queue implementation. 125 func TestQueuePriorityQueue(t *testing.T) { 126 defer leaktest.AfterTest(t)() 127 // Create a priority queue, put the items in it, and 128 // establish the priority queue (heap) invariants. 129 const count = 3 130 expRanges := make([]roachpb.RangeID, count+1) 131 pq := priorityQueue{} 132 pq.sl = make([]*replicaItem, count) 133 for i := 0; i < count; { 134 pq.sl[i] = &replicaItem{ 135 rangeID: roachpb.RangeID(i), 136 priority: float64(i), 137 index: i, 138 } 139 expRanges[3-i] = pq.sl[i].rangeID 140 i++ 141 } 142 heap.Init(&pq) 143 144 // Insert a new item and then modify its priority. 145 priorityItem := &replicaItem{ 146 rangeID: -1, 147 priority: 1.0, 148 } 149 heap.Push(&pq, priorityItem) 150 pq.update(priorityItem, 4.0) 151 expRanges[0] = priorityItem.rangeID 152 153 // Take the items out; they should arrive in decreasing priority order. 154 for i := 0; pq.Len() > 0; i++ { 155 item := heap.Pop(&pq).(*replicaItem) 156 if item.rangeID != expRanges[i] { 157 t.Errorf("%d: unexpected range with priority %f", i, item.priority) 158 } 159 } 160 } 161 162 // TestBaseQueueAddUpdateAndRemove verifies basic operation with base 163 // queue including adding ranges which both should and shouldn't be 164 // queued, updating an existing range, and removing a range. 165 func TestBaseQueueAddUpdateAndRemove(t *testing.T) { 166 defer leaktest.AfterTest(t)() 167 tc := testContext{} 168 stopper := stop.NewStopper() 169 ctx := context.Background() 170 defer stopper.Stop(ctx) 171 tc.Start(t, stopper) 172 173 repls := createReplicas(t, &tc, 2) 174 r1, r2 := repls[0], repls[1] 175 176 shouldAddMap := map[*Replica]bool{ 177 r1: true, 178 r2: true, 179 } 180 priorityMap := map[*Replica]float64{ 181 r1: 1.0, 182 r2: 2.0, 183 } 184 testQueue := &testQueueImpl{ 185 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 186 return shouldAddMap[r], priorityMap[r] 187 }, 188 } 189 bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 2}) 190 191 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 192 bq.maybeAdd(ctx, r2, hlc.Timestamp{}) 193 if bq.Length() != 2 { 194 t.Fatalf("expected length 2; got %d", bq.Length()) 195 } 196 if v := bq.pending.Value(); v != 2 { 197 t.Errorf("expected 2 pending replicas; got %d", v) 198 } 199 if bq.pop() != r2 { 200 t.Error("expected r2") 201 } else { 202 bq.finishProcessingReplica(ctx, stopper, r2, nil) 203 } 204 if v := bq.pending.Value(); v != 1 { 205 t.Errorf("expected 1 pending replicas; got %d", v) 206 } 207 if bq.pop() != r1 { 208 t.Error("expected r1") 209 } else { 210 bq.finishProcessingReplica(ctx, stopper, r1, nil) 211 } 212 if v := bq.pending.Value(); v != 0 { 213 t.Errorf("expected 0 pending replicas; got %d", v) 214 } 215 if r := bq.pop(); r != nil { 216 t.Errorf("expected empty queue; got %v", r) 217 } 218 219 // Add again, but this time r2 shouldn't add. 220 shouldAddMap[r2] = false 221 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 222 bq.maybeAdd(ctx, r2, hlc.Timestamp{}) 223 if bq.Length() != 1 { 224 t.Errorf("expected length 1; got %d", bq.Length()) 225 } 226 227 // Try adding same replica twice. 228 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 229 if bq.Length() != 1 { 230 t.Errorf("expected length 1; got %d", bq.Length()) 231 } 232 233 // Re-add r2 and update priority of r1. 234 shouldAddMap[r2] = true 235 priorityMap[r1] = 3.0 236 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 237 bq.maybeAdd(ctx, r2, hlc.Timestamp{}) 238 if bq.Length() != 2 { 239 t.Fatalf("expected length 2; got %d", bq.Length()) 240 } 241 if bq.pop() != r1 { 242 t.Error("expected r1") 243 } else { 244 bq.finishProcessingReplica(ctx, stopper, r1, nil) 245 } 246 if bq.pop() != r2 { 247 t.Error("expected r2") 248 } else { 249 bq.finishProcessingReplica(ctx, stopper, r2, nil) 250 } 251 if r := bq.pop(); r != nil { 252 t.Errorf("expected empty queue; got %v", r) 253 } 254 255 // Verify that priorities aren't lowered by a later MaybeAdd. 256 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 257 bq.maybeAdd(ctx, r2, hlc.Timestamp{}) 258 priorityMap[r1] = 1.0 259 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 260 if bq.Length() != 2 { 261 t.Fatalf("expected length 2; got %d", bq.Length()) 262 } 263 if bq.pop() != r1 { 264 t.Error("expected r1") 265 } else { 266 bq.finishProcessingReplica(ctx, stopper, r1, nil) 267 } 268 if bq.pop() != r2 { 269 t.Error("expected r2") 270 } else { 271 bq.finishProcessingReplica(ctx, stopper, r2, nil) 272 } 273 if r := bq.pop(); r != nil { 274 t.Errorf("expected empty queue; got %v", r) 275 } 276 277 // Try removing a replica. 278 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 279 bq.maybeAdd(ctx, r2, hlc.Timestamp{}) 280 bq.MaybeRemove(r2.RangeID) 281 if bq.Length() != 1 { 282 t.Fatalf("expected length 1; got %d", bq.Length()) 283 } 284 if v := bq.pending.Value(); v != 1 { 285 t.Errorf("expected 1 pending replicas; got %d", v) 286 } 287 if bq.pop() != r1 { 288 t.Errorf("expected r1") 289 } else { 290 bq.finishProcessingReplica(ctx, stopper, r1, nil) 291 } 292 if v := bq.pending.Value(); v != 0 { 293 t.Errorf("expected 0 pending replicas; got %d", v) 294 } 295 } 296 297 // TestBaseQueueSamePriorityFIFO verifies that if multiple items are queued at 298 // the same priority, they will be processes in first-in-first-out order. 299 // This avoids starvation scenarios, in particular in the Raft snapshot queue. 300 // 301 // See: 302 // https://github.com/cockroachdb/cockroach/issues/31947#issuecomment-434383267 303 func TestBaseQueueSamePriorityFIFO(t *testing.T) { 304 defer leaktest.AfterTest(t)() 305 tc := testContext{} 306 stopper := stop.NewStopper() 307 ctx := context.Background() 308 defer stopper.Stop(ctx) 309 tc.Start(t, stopper) 310 311 repls := createReplicas(t, &tc, 5) 312 313 testQueue := &testQueueImpl{ 314 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 315 t.Fatal("unexpected call to shouldQueue") 316 return false, 0.0 317 }, 318 } 319 320 bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 100}) 321 322 for _, repl := range repls { 323 added, err := bq.testingAdd(ctx, repl, 0.0) 324 if err != nil { 325 t.Fatalf("%s: %v", repl, err) 326 } 327 if !added { 328 t.Fatalf("%v not added", repl) 329 } 330 } 331 for _, expRepl := range repls { 332 actRepl := bq.pop() 333 if actRepl != expRepl { 334 t.Fatalf("expected %v, got %v", expRepl, actRepl) 335 } 336 } 337 } 338 339 // TestBaseQueueAdd verifies that calling Add() directly overrides the 340 // ShouldQueue method. 341 func TestBaseQueueAdd(t *testing.T) { 342 defer leaktest.AfterTest(t)() 343 tc := testContext{} 344 stopper := stop.NewStopper() 345 ctx := context.Background() 346 defer stopper.Stop(ctx) 347 tc.Start(t, stopper) 348 349 r, err := tc.store.GetReplica(1) 350 if err != nil { 351 t.Fatal(err) 352 } 353 354 testQueue := &testQueueImpl{ 355 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 356 return false, 0.0 357 }, 358 } 359 bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 1}) 360 bq.maybeAdd(context.Background(), r, hlc.Timestamp{}) 361 if bq.Length() != 0 { 362 t.Fatalf("expected length 0; got %d", bq.Length()) 363 } 364 if added, err := bq.testingAdd(ctx, r, 1.0); err != nil || !added { 365 t.Fatalf("expected Add to succeed: %t, %s", added, err) 366 } 367 // Add again and verify it's not actually added (it's already there). 368 if added, err := bq.testingAdd(ctx, r, 1.0); err != nil || added { 369 t.Fatalf("expected Add to succeed: %t, %s", added, err) 370 } 371 if bq.Length() != 1 { 372 t.Fatalf("expected length 1; got %d", bq.Length()) 373 } 374 } 375 376 // TestBaseQueueProcess verifies that items from the queue are 377 // processed according to the timer function. 378 func TestBaseQueueProcess(t *testing.T) { 379 defer leaktest.AfterTest(t)() 380 tsc := TestStoreConfig(nil) 381 tc := testContext{} 382 stopper := stop.NewStopper() 383 defer stopper.Stop(context.Background()) 384 tc.StartWithStoreConfig(t, stopper, tsc) 385 386 repls := createReplicas(t, &tc, 2) 387 r1, r2 := repls[0], repls[1] 388 389 testQueue := &testQueueImpl{ 390 blocker: make(chan struct{}, 1), 391 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 392 shouldQueue = true 393 priority = float64(r.RangeID) 394 return 395 }, 396 } 397 bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 2}) 398 bq.Start(stopper) 399 400 ctx := context.Background() 401 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 402 bq.maybeAdd(ctx, r2, hlc.Timestamp{}) 403 if pc := testQueue.getProcessed(); pc != 0 { 404 t.Errorf("expected no processed ranges; got %d", pc) 405 } 406 if v := bq.successes.Count(); v != 0 { 407 t.Errorf("expected 0 processed replicas; got %d", v) 408 } 409 if v := bq.pending.Value(); v != 2 { 410 t.Errorf("expected 2 pending replicas; got %d", v) 411 } 412 413 testQueue.blocker <- struct{}{} 414 testutils.SucceedsSoon(t, func() error { 415 if pc := testQueue.getProcessed(); pc != 1 { 416 return errors.Errorf("expected 1 processed replicas; got %d", pc) 417 } 418 if v := bq.successes.Count(); v != 1 { 419 return errors.Errorf("expected 1 processed replicas; got %d", v) 420 } 421 if v := bq.pending.Value(); v != 1 { 422 return errors.Errorf("expected 1 pending replicas; got %d", v) 423 } 424 return nil 425 }) 426 427 testQueue.blocker <- struct{}{} 428 testutils.SucceedsSoon(t, func() error { 429 if pc := testQueue.getProcessed(); pc < 2 { 430 return errors.Errorf("expected >= %d processed replicas; got %d", 2, pc) 431 } 432 if v := bq.successes.Count(); v != 2 { 433 return errors.Errorf("expected 2 processed replicas; got %d", v) 434 } 435 if v := bq.pending.Value(); v != 0 { 436 return errors.Errorf("expected 0 pending replicas; got %d", v) 437 } 438 return nil 439 }) 440 441 // Ensure the test queue is not blocked on a stray call to 442 // testQueueImpl.timer(). 443 close(testQueue.blocker) 444 } 445 446 // TestBaseQueueAddRemove adds then removes a range; ensure range is 447 // not processed. 448 func TestBaseQueueAddRemove(t *testing.T) { 449 defer leaktest.AfterTest(t)() 450 tc := testContext{} 451 stopper := stop.NewStopper() 452 ctx := context.Background() 453 defer stopper.Stop(ctx) 454 tc.Start(t, stopper) 455 456 r, err := tc.store.GetReplica(1) 457 if err != nil { 458 t.Fatal(err) 459 } 460 461 testQueue := &testQueueImpl{ 462 blocker: make(chan struct{}, 1), 463 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 464 shouldQueue = true 465 priority = 1.0 466 return 467 }, 468 } 469 bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 2}) 470 bq.Start(stopper) 471 472 bq.maybeAdd(ctx, r, hlc.Timestamp{}) 473 bq.MaybeRemove(r.RangeID) 474 475 // Wake the queue 476 close(testQueue.blocker) 477 478 // Make sure the queue has actually run through a few times 479 for i := 0; i < cap(bq.incoming)+1; i++ { 480 bq.incoming <- struct{}{} 481 } 482 483 if pc := testQueue.getProcessed(); pc > 0 { 484 t.Errorf("expected processed count of 0; got %d", pc) 485 } 486 } 487 488 // TestNeedsSystemConfig verifies that queues that don't need the system config 489 // are able to process replicas when the system config isn't available. 490 func TestNeedsSystemConfig(t *testing.T) { 491 defer leaktest.AfterTest(t)() 492 tc := testContext{} 493 stopper := stop.NewStopper() 494 ctx := context.Background() 495 defer stopper.Stop(ctx) 496 tc.Start(t, stopper) 497 498 r, err := tc.store.GetReplica(1) 499 if err != nil { 500 t.Fatal(err) 501 } 502 503 queueFnCalled := 0 504 testQueue := &testQueueImpl{ 505 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (bool, float64) { 506 queueFnCalled++ 507 return true, 1.0 508 }, 509 } 510 511 // Use a gossip instance that won't have the system config available in it. 512 // bqNeedsSysCfg will not add the replica or process it without a system config. 513 rpcContext := rpc.NewContext( 514 tc.store.cfg.AmbientCtx, &base.Config{Insecure: true}, tc.store.cfg.Clock, stopper, 515 cluster.MakeTestingClusterSettings()) 516 emptyGossip := gossip.NewTest( 517 tc.gossip.NodeID.Get(), rpcContext, rpc.NewServer(rpcContext), stopper, tc.store.Registry(), zonepb.DefaultZoneConfigRef()) 518 bqNeedsSysCfg := makeTestBaseQueue("test", testQueue, tc.store, emptyGossip, queueConfig{ 519 needsSystemConfig: true, 520 acceptsUnsplitRanges: true, 521 maxSize: 1, 522 }) 523 524 bqNeedsSysCfg.Start(stopper) 525 bqNeedsSysCfg.maybeAdd(ctx, r, hlc.Timestamp{}) 526 if queueFnCalled != 0 { 527 t.Fatalf("expected shouldQueueFn not to be called without valid system config, got %d calls", queueFnCalled) 528 } 529 530 // Manually add a replica and ensure that the process method doesn't get run. 531 if added, err := bqNeedsSysCfg.testingAdd(ctx, r, 1.0); err != nil || !added { 532 t.Fatalf("expected Add to succeed: %t, %s", added, err) 533 } 534 // Make sure the queue has actually run through a few times 535 for i := 0; i < cap(bqNeedsSysCfg.incoming)+1; i++ { 536 bqNeedsSysCfg.incoming <- struct{}{} 537 } 538 if pc := testQueue.getProcessed(); pc > 0 { 539 t.Errorf("expected processed count of 0 for queue that needs system config; got %d", pc) 540 } 541 542 // Now check that a queue which doesn't require the system config can 543 // successfully add and process a replica. 544 bqNoSysCfg := makeTestBaseQueue("test", testQueue, tc.store, emptyGossip, queueConfig{ 545 needsSystemConfig: false, 546 acceptsUnsplitRanges: true, 547 maxSize: 1, 548 }) 549 bqNoSysCfg.Start(stopper) 550 bqNoSysCfg.maybeAdd(context.Background(), r, hlc.Timestamp{}) 551 if queueFnCalled != 1 { 552 t.Fatalf("expected shouldQueueFn to be called even without valid system config, got %d calls", queueFnCalled) 553 } 554 testutils.SucceedsSoon(t, func() error { 555 if pc := testQueue.getProcessed(); pc != 1 { 556 return errors.Errorf("expected 1 processed replica even without system config; got %d", pc) 557 } 558 if v := bqNoSysCfg.successes.Count(); v != 1 { 559 return errors.Errorf("expected 1 processed replica even without system config; got %d", v) 560 } 561 return nil 562 }) 563 } 564 565 // TestAcceptsUnsplitRanges verifies that ranges that need to split are properly 566 // rejected when the queue has 'acceptsUnsplitRanges = false'. 567 func TestAcceptsUnsplitRanges(t *testing.T) { 568 defer leaktest.AfterTest(t)() 569 stopper := stop.NewStopper() 570 defer stopper.Stop(context.Background()) 571 s, _ := createTestStore(t, 572 testStoreOpts{ 573 // This test was written before test stores could start with more than one 574 // range and was not adapted. 575 createSystemRanges: false, 576 }, 577 stopper) 578 ctx := context.Background() 579 580 maxWontSplitAddr, err := keys.Addr(keys.SystemPrefix) 581 if err != nil { 582 t.Fatal(err) 583 } 584 minWillSplitAddr, err := keys.Addr(keys.TableDataMin) 585 if err != nil { 586 t.Fatal(err) 587 } 588 589 // Remove replica for range 1 since it encompasses the entire keyspace. 590 repl1, err := s.GetReplica(1) 591 if err != nil { 592 t.Error(err) 593 } 594 if err := s.RemoveReplica(context.Background(), repl1, repl1.Desc().NextReplicaID, RemoveOptions{ 595 DestroyData: true, 596 }); err != nil { 597 t.Error(err) 598 } 599 600 // This range can never be split due to zone configs boundaries. 601 neverSplits := createReplica(s, 2, roachpb.RKeyMin, maxWontSplitAddr) 602 if err := s.AddReplica(neverSplits); err != nil { 603 t.Fatal(err) 604 } 605 606 // This range will need to be split after user db/table entries are created. 607 willSplit := createReplica(s, 3, minWillSplitAddr, roachpb.RKeyMax) 608 if err := s.AddReplica(willSplit); err != nil { 609 t.Fatal(err) 610 } 611 612 testQueue := &testQueueImpl{ 613 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 614 // Always queue ranges if they make it past the base queue's logic. 615 return true, float64(r.RangeID) 616 }, 617 } 618 619 bq := makeTestBaseQueue("test", testQueue, s, s.cfg.Gossip, queueConfig{maxSize: 2}) 620 bq.Start(stopper) 621 622 // Check our config. 623 var sysCfg *config.SystemConfig 624 testutils.SucceedsSoon(t, func() error { 625 sysCfg = s.cfg.Gossip.GetSystemConfig() 626 if sysCfg == nil { 627 return errors.New("system config not yet present") 628 } 629 return nil 630 }) 631 neverSplitsDesc := neverSplits.Desc() 632 if sysCfg.NeedsSplit(neverSplitsDesc.StartKey, neverSplitsDesc.EndKey) { 633 t.Fatal("System config says range needs to be split") 634 } 635 willSplitDesc := willSplit.Desc() 636 if sysCfg.NeedsSplit(willSplitDesc.StartKey, willSplitDesc.EndKey) { 637 t.Fatal("System config says range needs to be split") 638 } 639 640 // There are no user db/table entries, everything should be added and 641 // processed as usual. 642 bq.maybeAdd(ctx, neverSplits, hlc.Timestamp{}) 643 bq.maybeAdd(ctx, willSplit, hlc.Timestamp{}) 644 645 testutils.SucceedsSoon(t, func() error { 646 if pc := testQueue.getProcessed(); pc != 2 { 647 return errors.Errorf("expected %d processed replicas; got %d", 2, pc) 648 } 649 // Check metrics. 650 if v := bq.successes.Count(); v != 2 { 651 return errors.Errorf("expected 2 processed replicas; got %d", v) 652 } 653 if v := bq.pending.Value(); v != 0 { 654 return errors.Errorf("expected 0 pending replicas; got %d", v) 655 } 656 return nil 657 }) 658 659 // Now add a user object, it will trigger a split. 660 // The range willSplit starts at the beginning of the user data range, 661 // which means keys.MaxReservedDescID+1. 662 zoneConfig := zonepb.DefaultZoneConfig() 663 zoneConfig.RangeMaxBytes = proto.Int64(1 << 20) 664 config.TestingSetZoneConfig(keys.MaxReservedDescID+2, zoneConfig) 665 666 // Check our config. 667 neverSplitsDesc = neverSplits.Desc() 668 if sysCfg.NeedsSplit(neverSplitsDesc.StartKey, neverSplitsDesc.EndKey) { 669 t.Fatal("System config says range needs to be split") 670 } 671 willSplitDesc = willSplit.Desc() 672 if !sysCfg.NeedsSplit(willSplitDesc.StartKey, willSplitDesc.EndKey) { 673 t.Fatal("System config says range does not need to be split") 674 } 675 676 bq.maybeAdd(ctx, neverSplits, hlc.Timestamp{}) 677 bq.maybeAdd(ctx, willSplit, hlc.Timestamp{}) 678 679 testutils.SucceedsSoon(t, func() error { 680 if pc := testQueue.getProcessed(); pc != 3 { 681 return errors.Errorf("expected %d processed replicas; got %d", 3, pc) 682 } 683 // Check metrics. 684 if v := bq.successes.Count(); v != 3 { 685 return errors.Errorf("expected 3 processed replicas; got %d", v) 686 } 687 if v := bq.pending.Value(); v != 0 { 688 return errors.Errorf("expected 0 pending replicas; got %d", v) 689 } 690 return nil 691 }) 692 } 693 694 type testPurgatoryError struct{} 695 696 func (*testPurgatoryError) Error() string { 697 return "test purgatory error" 698 } 699 700 func (*testPurgatoryError) purgatoryErrorMarker() { 701 } 702 703 // TestBaseQueuePurgatory verifies that if error is set on the test 704 // queue, items are added to the purgatory. Verifies that sending on 705 // the purgatory channel causes the replicas to be reprocessed. 706 func TestBaseQueuePurgatory(t *testing.T) { 707 defer leaktest.AfterTest(t)() 708 tsc := TestStoreConfig(nil) 709 tc := testContext{} 710 stopper := stop.NewStopper() 711 defer stopper.Stop(context.Background()) 712 tc.StartWithStoreConfig(t, stopper, tsc) 713 714 testQueue := &testQueueImpl{ 715 duration: time.Nanosecond, 716 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 717 shouldQueue = true 718 priority = float64(r.RangeID) 719 return 720 }, 721 pChan: make(chan time.Time, 1), 722 err: &testPurgatoryError{}, 723 } 724 725 const replicaCount = 10 726 repls := createReplicas(t, &tc, replicaCount) 727 728 bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: replicaCount}) 729 bq.Start(stopper) 730 731 for _, r := range repls { 732 bq.maybeAdd(context.Background(), r, hlc.Timestamp{}) 733 } 734 735 testutils.SucceedsSoon(t, func() error { 736 if pc := testQueue.getProcessed(); pc != replicaCount { 737 return errors.Errorf("expected %d processed replicas; got %d", replicaCount, pc) 738 } 739 // We have to loop checking the following conditions because the increment 740 // of testQueue.processed does not happen atomically with the replica being 741 // placed in purgatory. 742 // Verify that the size of the purgatory map is correct. 743 if l := bq.PurgatoryLength(); l != replicaCount { 744 return errors.Errorf("expected purgatory size of %d; got %d", replicaCount, l) 745 } 746 // ...and priorityQ should be empty. 747 if l := bq.Length(); l != 0 { 748 return errors.Errorf("expected empty priorityQ; got %d", l) 749 } 750 // Check metrics. 751 if v := bq.successes.Count(); v != 0 { 752 return errors.Errorf("expected 0 processed replicas; got %d", v) 753 } 754 if v := bq.failures.Count(); v != int64(replicaCount) { 755 return errors.Errorf("expected %d failed replicas; got %d", replicaCount, v) 756 } 757 if v := bq.pending.Value(); v != 0 { 758 return errors.Errorf("expected 0 pending replicas; got %d", v) 759 } 760 if v := bq.purgatory.Value(); v != int64(replicaCount) { 761 return errors.Errorf("expected %d purgatory replicas; got %d", replicaCount, v) 762 } 763 return nil 764 }) 765 766 // Now, signal that purgatoried replicas should retry. 767 testQueue.pChan <- timeutil.Now() 768 769 testutils.SucceedsSoon(t, func() error { 770 if pc := testQueue.getProcessed(); pc != replicaCount*2 { 771 return errors.Errorf("expected %d processed replicas; got %d", replicaCount*2, pc) 772 } 773 // We have to loop checking the following conditions because the increment 774 // of testQueue.processed does not happen atomically with the replica being 775 // placed in purgatory. 776 // Verify the replicas are still in purgatory. 777 if l := bq.PurgatoryLength(); l != replicaCount { 778 return errors.Errorf("expected purgatory size of %d; got %d", replicaCount, l) 779 } 780 // ...and priorityQ should be empty. 781 if l := bq.Length(); l != 0 { 782 return errors.Errorf("expected empty priorityQ; got %d", l) 783 } 784 // Check metrics. 785 if v := bq.successes.Count(); v != 0 { 786 return errors.Errorf("expected 0 processed replicas; got %d", v) 787 } 788 if v := bq.failures.Count(); v != int64(replicaCount*2) { 789 return errors.Errorf("expected %d failed replicas; got %d", replicaCount*2, v) 790 } 791 if v := bq.pending.Value(); v != 0 { 792 return errors.Errorf("expected 0 pending replicas; got %d", v) 793 } 794 if v := bq.purgatory.Value(); v != int64(replicaCount) { 795 return errors.Errorf("expected %d purgatory replicas; got %d", replicaCount, v) 796 } 797 return nil 798 }) 799 800 // Remove error and reprocess. 801 testQueue.err = nil 802 testQueue.pChan <- timeutil.Now() 803 804 testutils.SucceedsSoon(t, func() error { 805 if pc := testQueue.getProcessed(); pc != replicaCount*3 { 806 return errors.Errorf("expected %d processed replicas; got %d", replicaCount*3, pc) 807 } 808 // Check metrics. 809 if v := bq.successes.Count(); v != int64(replicaCount) { 810 return errors.Errorf("expected %d processed replicas; got %d", replicaCount, v) 811 } 812 if v := bq.failures.Count(); v != int64(replicaCount*2) { 813 return errors.Errorf("expected %d failed replicas; got %d", replicaCount*2, v) 814 } 815 if v := bq.pending.Value(); v != 0 { 816 return errors.Errorf("expected 0 pending replicas; got %d", v) 817 } 818 if v := bq.purgatory.Value(); v != 0 { 819 return errors.Errorf("expected 0 purgatory replicas; got %d", v) 820 } 821 return nil 822 }) 823 824 // Verify the replicas are no longer in purgatory. 825 if l := bq.PurgatoryLength(); l != 0 { 826 t.Errorf("expected purgatory size of 0; got %d", l) 827 } 828 // ...and priorityQ should be empty. 829 if l := bq.Length(); l != 0 { 830 t.Errorf("expected empty priorityQ; got %d", l) 831 } 832 } 833 834 type processTimeoutQueueImpl struct { 835 testQueueImpl 836 } 837 838 func (pq *processTimeoutQueueImpl) process( 839 ctx context.Context, r *Replica, _ *config.SystemConfig, 840 ) error { 841 <-ctx.Done() 842 atomic.AddInt32(&pq.processed, 1) 843 return ctx.Err() 844 } 845 846 func TestBaseQueueProcessTimeout(t *testing.T) { 847 defer leaktest.AfterTest(t)() 848 tc := testContext{} 849 stopper := stop.NewStopper() 850 defer stopper.Stop(context.Background()) 851 tc.Start(t, stopper) 852 853 r, err := tc.store.GetReplica(1) 854 if err != nil { 855 t.Fatal(err) 856 } 857 858 ptQueue := &processTimeoutQueueImpl{ 859 testQueueImpl: testQueueImpl{ 860 blocker: make(chan struct{}, 1), 861 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 862 return true, 1.0 863 }, 864 }, 865 } 866 bq := makeTestBaseQueue("test", ptQueue, tc.store, tc.gossip, 867 queueConfig{ 868 maxSize: 1, 869 processTimeoutFunc: constantTimeoutFunc(time.Millisecond), 870 acceptsUnsplitRanges: true, 871 }) 872 bq.Start(stopper) 873 bq.maybeAdd(context.Background(), r, hlc.Timestamp{}) 874 875 if l := bq.Length(); l != 1 { 876 t.Errorf("expected one queued replica; got %d", l) 877 } 878 879 ptQueue.blocker <- struct{}{} 880 testutils.SucceedsSoon(t, func() error { 881 if pc := ptQueue.getProcessed(); pc != 1 { 882 return errors.Errorf("expected 1 processed replicas; got %d", pc) 883 } 884 if v := bq.failures.Count(); v != 1 { 885 return errors.Errorf("expected 1 failed replicas; got %d", v) 886 } 887 return nil 888 }) 889 } 890 891 type mvccStatsReplicaInQueue struct { 892 replicaInQueue 893 size int64 894 } 895 896 func (r mvccStatsReplicaInQueue) GetMVCCStats() enginepb.MVCCStats { 897 return enginepb.MVCCStats{ValBytes: r.size} 898 } 899 900 func TestQueueSnapshotTimeoutFunc(t *testing.T) { 901 defer leaktest.AfterTest(t)() 902 type testCase struct { 903 guaranteedProcessingTime time.Duration 904 snapshotRate int64 // bytes/s 905 replicaSize int64 // bytes 906 expectedTimeout time.Duration 907 } 908 makeTest := func(tc testCase) (string, func(t *testing.T)) { 909 return fmt.Sprintf("%+v", tc), func(t *testing.T) { 910 st := cluster.MakeTestingClusterSettings() 911 queueGuaranteedProcessingTimeBudget.Override(&st.SV, tc.guaranteedProcessingTime) 912 recoverySnapshotRate.Override(&st.SV, tc.snapshotRate) 913 tf := makeQueueSnapshotTimeoutFunc(recoverySnapshotRate) 914 repl := mvccStatsReplicaInQueue{ 915 size: tc.replicaSize, 916 } 917 require.Equal(t, tc.expectedTimeout, tf(st, repl)) 918 } 919 } 920 for _, tc := range []testCase{ 921 { 922 guaranteedProcessingTime: time.Minute, 923 snapshotRate: 1 << 30, 924 replicaSize: 1 << 20, 925 expectedTimeout: time.Minute, 926 }, 927 { 928 guaranteedProcessingTime: time.Minute, 929 snapshotRate: 1 << 20, 930 replicaSize: 100 << 20, 931 expectedTimeout: 100 * time.Second * permittedSnapshotSlowdown, 932 }, 933 { 934 guaranteedProcessingTime: time.Hour, 935 snapshotRate: 1 << 20, 936 replicaSize: 100 << 20, 937 expectedTimeout: time.Hour, 938 }, 939 { 940 guaranteedProcessingTime: time.Minute, 941 snapshotRate: 1 << 10, 942 replicaSize: 100 << 20, 943 expectedTimeout: 100 * (1 << 10) * time.Second * permittedSnapshotSlowdown, 944 }, 945 } { 946 t.Run(makeTest(tc)) 947 } 948 } 949 950 // processTimeQueueImpl spends 5ms on each process request. 951 type processTimeQueueImpl struct { 952 testQueueImpl 953 } 954 955 func (pq *processTimeQueueImpl) process( 956 _ context.Context, _ *Replica, _ *config.SystemConfig, 957 ) error { 958 time.Sleep(5 * time.Millisecond) 959 return nil 960 } 961 962 func TestBaseQueueTimeMetric(t *testing.T) { 963 defer leaktest.AfterTest(t)() 964 tc := testContext{} 965 stopper := stop.NewStopper() 966 defer stopper.Stop(context.Background()) 967 tc.Start(t, stopper) 968 969 r, err := tc.store.GetReplica(1) 970 if err != nil { 971 t.Fatal(err) 972 } 973 974 ptQueue := &processTimeQueueImpl{ 975 testQueueImpl: testQueueImpl{ 976 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 977 return true, 1.0 978 }, 979 }, 980 } 981 bq := makeTestBaseQueue("test", ptQueue, tc.store, tc.gossip, 982 queueConfig{ 983 maxSize: 1, 984 processTimeoutFunc: constantTimeoutFunc(time.Millisecond), 985 acceptsUnsplitRanges: true, 986 }) 987 bq.Start(stopper) 988 bq.maybeAdd(context.Background(), r, hlc.Timestamp{}) 989 990 testutils.SucceedsSoon(t, func() error { 991 if v := bq.successes.Count(); v != 1 { 992 return errors.Errorf("expected 1 processed replicas; got %d", v) 993 } 994 if min, v := bq.queueConfig.processTimeoutFunc(nil, nil), bq.processingNanos.Count(); v < min.Nanoseconds() { 995 return errors.Errorf("expected >= %s in processing time; got %s", min, time.Duration(v)) 996 } 997 return nil 998 }) 999 } 1000 1001 func TestBaseQueueShouldQueueAgain(t *testing.T) { 1002 defer leaktest.AfterTest(t)() 1003 testCases := []struct { 1004 now, last hlc.Timestamp 1005 minInterval time.Duration 1006 expQueue bool 1007 expPriority float64 1008 }{ 1009 {makeTS(1, 0), makeTS(1, 0), 0, true, 0}, 1010 {makeTS(100, 0), makeTS(0, 0), 100, true, 0}, 1011 {makeTS(100, 0), makeTS(100, 0), 100, false, 0}, 1012 {makeTS(101, 0), makeTS(100, 0), 100, false, 0}, 1013 {makeTS(200, 0), makeTS(100, 0), 100, true, 1}, 1014 {makeTS(200, 1), makeTS(100, 0), 100, true, 1}, 1015 {makeTS(201, 0), makeTS(100, 0), 100, true, 1.01}, 1016 {makeTS(201, 0), makeTS(100, 1), 100, true, 1.01}, 1017 {makeTS(1100, 0), makeTS(100, 1), 100, true, 10}, 1018 } 1019 1020 for i, tc := range testCases { 1021 sq, pri := shouldQueueAgain(tc.now, tc.last, tc.minInterval) 1022 if sq != tc.expQueue { 1023 t.Errorf("case %d: expected shouldQueue %t; got %t", i, tc.expQueue, sq) 1024 } 1025 if pri != tc.expPriority { 1026 t.Errorf("case %d: expected priority %f; got %f", i, tc.expPriority, pri) 1027 } 1028 } 1029 } 1030 1031 // TestBaseQueueDisable verifies that disabling a queue prevents calls 1032 // to both shouldQueue and process. 1033 func TestBaseQueueDisable(t *testing.T) { 1034 defer leaktest.AfterTest(t)() 1035 tc := testContext{} 1036 stopper := stop.NewStopper() 1037 ctx := context.Background() 1038 defer stopper.Stop(ctx) 1039 tc.Start(t, stopper) 1040 1041 r, err := tc.store.GetReplica(1) 1042 if err != nil { 1043 t.Fatal(err) 1044 } 1045 1046 shouldQueueCalled := false 1047 testQueue := &testQueueImpl{ 1048 blocker: make(chan struct{}, 1), 1049 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (bool, float64) { 1050 shouldQueueCalled = true 1051 return true, 1.0 1052 }, 1053 } 1054 bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 2}) 1055 bq.Start(stopper) 1056 1057 bq.SetDisabled(true) 1058 bq.maybeAdd(context.Background(), r, hlc.Timestamp{}) 1059 if shouldQueueCalled { 1060 t.Error("shouldQueue should not have been called") 1061 } 1062 1063 // Add the range directly, bypassing shouldQueue. 1064 if _, err := bq.testingAdd(ctx, r, 1.0); !errors.Is(err, errQueueDisabled) { 1065 t.Fatal(err) 1066 } 1067 1068 // Wake the queue. 1069 close(testQueue.blocker) 1070 1071 // Make sure the queue has actually run through a few times. 1072 for i := 0; i < cap(bq.incoming)+1; i++ { 1073 bq.incoming <- struct{}{} 1074 } 1075 1076 if pc := testQueue.getProcessed(); pc > 0 { 1077 t.Errorf("expected processed count of 0; got %d", pc) 1078 } 1079 } 1080 1081 type parallelQueueImpl struct { 1082 testQueueImpl 1083 processBlocker chan struct{} 1084 processing int32 // accessed atomically 1085 } 1086 1087 func (pq *parallelQueueImpl) process( 1088 ctx context.Context, repl *Replica, cfg *config.SystemConfig, 1089 ) error { 1090 atomic.AddInt32(&pq.processing, 1) 1091 if pq.processBlocker != nil { 1092 <-pq.processBlocker 1093 } 1094 err := pq.testQueueImpl.process(ctx, repl, cfg) 1095 atomic.AddInt32(&pq.processing, -1) 1096 return err 1097 } 1098 1099 func (pq *parallelQueueImpl) getProcessing() int { 1100 return int(atomic.LoadInt32(&pq.processing)) 1101 } 1102 1103 func TestBaseQueueProcessConcurrently(t *testing.T) { 1104 defer leaktest.AfterTest(t)() 1105 tc := testContext{} 1106 stopper := stop.NewStopper() 1107 defer stopper.Stop(context.Background()) 1108 tc.Start(t, stopper) 1109 1110 repls := createReplicas(t, &tc, 3) 1111 r1, r2, r3 := repls[0], repls[1], repls[2] 1112 1113 pQueue := ¶llelQueueImpl{ 1114 testQueueImpl: testQueueImpl{ 1115 blocker: make(chan struct{}, 1), 1116 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 1117 return true, 1 1118 }, 1119 }, 1120 processBlocker: make(chan struct{}, 1), 1121 } 1122 bq := makeTestBaseQueue("test", pQueue, tc.store, tc.gossip, 1123 queueConfig{ 1124 maxSize: 3, 1125 maxConcurrency: 2, 1126 }, 1127 ) 1128 bq.Start(stopper) 1129 1130 ctx := context.Background() 1131 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 1132 bq.maybeAdd(ctx, r2, hlc.Timestamp{}) 1133 bq.maybeAdd(ctx, r3, hlc.Timestamp{}) 1134 1135 if exp, l := 3, bq.Length(); l != exp { 1136 t.Errorf("expected %d queued replica; got %d", exp, l) 1137 } 1138 1139 assertProcessedAndProcessing := func(expProcessed, expProcessing int) { 1140 t.Helper() 1141 testutils.SucceedsSoon(t, func() error { 1142 if p := pQueue.getProcessed(); p != expProcessed { 1143 return errors.Errorf("expected %d processed replicas; got %d", expProcessed, p) 1144 } 1145 if p := pQueue.getProcessing(); p != expProcessing { 1146 return errors.Errorf("expected %d processing replicas; got %d", expProcessing, p) 1147 } 1148 return nil 1149 }) 1150 } 1151 1152 close(pQueue.blocker) 1153 assertProcessedAndProcessing(0, 2) 1154 1155 pQueue.processBlocker <- struct{}{} 1156 assertProcessedAndProcessing(1, 2) 1157 1158 pQueue.processBlocker <- struct{}{} 1159 assertProcessedAndProcessing(2, 1) 1160 1161 pQueue.processBlocker <- struct{}{} 1162 assertProcessedAndProcessing(3, 0) 1163 } 1164 1165 // TestBaseQueueReplicaChange ensures that if a replica is added to the queue 1166 // with a non-zero replica ID then it is only processed if the retrieved replica 1167 // from the getReplica() function has the same replica ID. 1168 func TestBaseQueueChangeReplicaID(t *testing.T) { 1169 defer leaktest.AfterTest(t)() 1170 // The testContext exists only to construct the baseQueue. 1171 tc := testContext{} 1172 stopper := stop.NewStopper() 1173 ctx := context.Background() 1174 defer stopper.Stop(ctx) 1175 tc.Start(t, stopper) 1176 testQueue := &testQueueImpl{ 1177 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 1178 return true, 1.0 1179 }, 1180 } 1181 bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{ 1182 maxSize: defaultQueueMaxSize, 1183 acceptsUnsplitRanges: true, 1184 }) 1185 r := &fakeReplica{rangeID: 1, replicaID: 1} 1186 bq.mu.Lock() 1187 bq.getReplica = func(rangeID roachpb.RangeID) (replicaInQueue, error) { 1188 if rangeID != 1 { 1189 panic(fmt.Errorf("expected range id 1, got %d", rangeID)) 1190 } 1191 return r, nil 1192 } 1193 bq.mu.Unlock() 1194 require.Equal(t, 0, testQueue.getProcessed()) 1195 bq.maybeAdd(ctx, r, tc.store.Clock().Now()) 1196 bq.DrainQueue(tc.store.Stopper()) 1197 require.Equal(t, 1, testQueue.getProcessed()) 1198 bq.maybeAdd(ctx, r, tc.store.Clock().Now()) 1199 r.replicaID = 2 1200 bq.DrainQueue(tc.store.Stopper()) 1201 require.Equal(t, 1, testQueue.getProcessed()) 1202 require.Equal(t, 0, bq.Length()) 1203 require.Equal(t, 0, bq.PurgatoryLength()) 1204 bq.mu.Lock() 1205 defer bq.mu.Unlock() 1206 _, exists := bq.mu.replicas[1] 1207 require.False(t, exists, bq.mu.replicas) 1208 } 1209 1210 func TestBaseQueueRequeue(t *testing.T) { 1211 defer leaktest.AfterTest(t)() 1212 tc := testContext{} 1213 stopper := stop.NewStopper() 1214 defer stopper.Stop(context.Background()) 1215 tc.Start(t, stopper) 1216 1217 repls := createReplicas(t, &tc, 1) 1218 r1 := repls[0] 1219 1220 var shouldQueueCount int64 // accessed atomically 1221 pQueue := ¶llelQueueImpl{ 1222 testQueueImpl: testQueueImpl{ 1223 blocker: make(chan struct{}, 1), 1224 shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) { 1225 if atomic.AddInt64(&shouldQueueCount, 1) <= 4 { 1226 return true, 1 1227 } 1228 return false, 1 1229 }, 1230 }, 1231 processBlocker: make(chan struct{}, 1), 1232 } 1233 bq := makeTestBaseQueue("test", pQueue, tc.store, tc.gossip, 1234 queueConfig{ 1235 maxSize: 3, 1236 maxConcurrency: 2, 1237 }, 1238 ) 1239 bq.Start(stopper) 1240 1241 assertShouldQueueCount := func(expShouldQueueCount int) { 1242 t.Helper() 1243 testutils.SucceedsSoon(t, func() error { 1244 if count := int(atomic.LoadInt64(&shouldQueueCount)); count != expShouldQueueCount { 1245 return errors.Errorf("expected %d calls to ShouldQueue; found %d", 1246 expShouldQueueCount, count) 1247 } 1248 return nil 1249 }) 1250 } 1251 assertProcessedAndProcessing := func(expProcessed, expProcessing int) { 1252 t.Helper() 1253 testutils.SucceedsSoon(t, func() error { 1254 if p := pQueue.getProcessed(); p != expProcessed { 1255 return errors.Errorf("expected %d processed replicas; got %d", expProcessed, p) 1256 } 1257 if p := pQueue.getProcessing(); p != expProcessing { 1258 return errors.Errorf("expected %d processing replicas; got %d", expProcessing, p) 1259 } 1260 return nil 1261 }) 1262 } 1263 ctx := context.Background() 1264 // MaybeAdd a replica. Should queue after checking ShouldQueue. 1265 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 1266 assertShouldQueueCount(1) 1267 if exp, l := 1, bq.Length(); l != exp { 1268 t.Errorf("expected %d queued replica; got %d", exp, l) 1269 } 1270 1271 // Let the first processing attempt run. 1272 close(pQueue.blocker) 1273 assertProcessedAndProcessing(0, 1) 1274 1275 // MaybeAdd the same replica. Should requeue after checking ShouldQueue. 1276 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 1277 assertShouldQueueCount(2) 1278 1279 // Let the first processing attempt finish. 1280 // Should begin processing second attempt after checking ShouldQueue again. 1281 pQueue.processBlocker <- struct{}{} 1282 assertShouldQueueCount(3) 1283 assertProcessedAndProcessing(1, 1) 1284 1285 // MaybeAdd the same replica. Should requeue after checking ShouldQueue. 1286 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 1287 assertShouldQueueCount(4) 1288 1289 // Let the second processing attempt finish. 1290 // Should NOT processing third attempt after checking ShouldQueue again. 1291 pQueue.processBlocker <- struct{}{} 1292 assertShouldQueueCount(5) 1293 assertProcessedAndProcessing(2, 0) 1294 1295 // MaybeAdd the same replica. Should NOT queue after checking ShouldQueue. 1296 bq.maybeAdd(ctx, r1, hlc.Timestamp{}) 1297 assertShouldQueueCount(6) 1298 assertProcessedAndProcessing(2, 0) 1299 }