github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/queue_test.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"container/heap"
    15  	"context"
    16  	"fmt"
    17  	"strconv"
    18  	"sync/atomic"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/config"
    24  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    25  	"github.com/cockroachdb/cockroach/pkg/gossip"
    26  	"github.com/cockroachdb/cockroach/pkg/keys"
    27  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    28  	"github.com/cockroachdb/cockroach/pkg/rpc"
    29  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    30  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    31  	"github.com/cockroachdb/cockroach/pkg/testutils"
    32  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    33  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    34  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    35  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    36  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    37  	"github.com/cockroachdb/errors"
    38  	"github.com/gogo/protobuf/proto"
    39  	"github.com/stretchr/testify/require"
    40  )
    41  
    42  // testQueueImpl implements queueImpl with a closure for shouldQueue.
    43  type testQueueImpl struct {
    44  	shouldQueueFn func(hlc.Timestamp, *Replica) (bool, float64)
    45  	processed     int32 // accessed atomically
    46  	duration      time.Duration
    47  	blocker       chan struct{} // timer() blocks on this if not nil
    48  	pChan         chan time.Time
    49  	err           error // always returns this error on process
    50  }
    51  
    52  func (tq *testQueueImpl) shouldQueue(
    53  	_ context.Context, now hlc.Timestamp, r *Replica, _ *config.SystemConfig,
    54  ) (bool, float64) {
    55  	return tq.shouldQueueFn(now, r)
    56  }
    57  
    58  func (tq *testQueueImpl) process(_ context.Context, _ *Replica, _ *config.SystemConfig) error {
    59  	atomic.AddInt32(&tq.processed, 1)
    60  	return tq.err
    61  }
    62  
    63  func (tq *testQueueImpl) getProcessed() int {
    64  	return int(atomic.LoadInt32(&tq.processed))
    65  }
    66  
    67  func (tq *testQueueImpl) timer(_ time.Duration) time.Duration {
    68  	if tq.blocker != nil {
    69  		<-tq.blocker
    70  	}
    71  	if tq.duration != 0 {
    72  		return tq.duration
    73  	}
    74  	return 0
    75  }
    76  
    77  func (tq *testQueueImpl) purgatoryChan() <-chan time.Time {
    78  	return tq.pChan
    79  }
    80  
    81  func makeTestBaseQueue(
    82  	name string, impl queueImpl, store *Store, gossip *gossip.Gossip, cfg queueConfig,
    83  ) *baseQueue {
    84  	if !cfg.acceptsUnsplitRanges {
    85  		// Needed in order to pass the validation in newBaseQueue.
    86  		cfg.needsSystemConfig = true
    87  	}
    88  	cfg.successes = metric.NewCounter(metric.Metadata{Name: "processed"})
    89  	cfg.failures = metric.NewCounter(metric.Metadata{Name: "failures"})
    90  	cfg.pending = metric.NewGauge(metric.Metadata{Name: "pending"})
    91  	cfg.processingNanos = metric.NewCounter(metric.Metadata{Name: "processingnanos"})
    92  	cfg.purgatory = metric.NewGauge(metric.Metadata{Name: "purgatory"})
    93  	return newBaseQueue(name, impl, store, gossip, cfg)
    94  }
    95  
    96  func createReplicas(t *testing.T, tc *testContext, num int) []*Replica {
    97  	t.Helper()
    98  
    99  	// Remove replica for range 1 since it encompasses the entire keyspace.
   100  	repl1, err := tc.store.GetReplica(1)
   101  	if err != nil {
   102  		t.Fatal(err)
   103  	}
   104  	if err := tc.store.RemoveReplica(context.Background(), repl1, repl1.Desc().NextReplicaID, RemoveOptions{
   105  		DestroyData: true,
   106  	}); err != nil {
   107  		t.Fatal(err)
   108  	}
   109  
   110  	repls := make([]*Replica, num)
   111  	for i := 0; i < num; i++ {
   112  		id := roachpb.RangeID(1000 + i)
   113  		key := roachpb.RKey(strconv.Itoa(int(id)))
   114  		endKey := roachpb.RKey(string(key) + "/end")
   115  		r := createReplica(tc.store, id, key, endKey)
   116  		if err := tc.store.AddReplica(r); err != nil {
   117  			t.Fatal(err)
   118  		}
   119  		repls[i] = r
   120  	}
   121  	return repls
   122  }
   123  
   124  // TestQueuePriorityQueue verifies priority queue implementation.
   125  func TestQueuePriorityQueue(t *testing.T) {
   126  	defer leaktest.AfterTest(t)()
   127  	// Create a priority queue, put the items in it, and
   128  	// establish the priority queue (heap) invariants.
   129  	const count = 3
   130  	expRanges := make([]roachpb.RangeID, count+1)
   131  	pq := priorityQueue{}
   132  	pq.sl = make([]*replicaItem, count)
   133  	for i := 0; i < count; {
   134  		pq.sl[i] = &replicaItem{
   135  			rangeID:  roachpb.RangeID(i),
   136  			priority: float64(i),
   137  			index:    i,
   138  		}
   139  		expRanges[3-i] = pq.sl[i].rangeID
   140  		i++
   141  	}
   142  	heap.Init(&pq)
   143  
   144  	// Insert a new item and then modify its priority.
   145  	priorityItem := &replicaItem{
   146  		rangeID:  -1,
   147  		priority: 1.0,
   148  	}
   149  	heap.Push(&pq, priorityItem)
   150  	pq.update(priorityItem, 4.0)
   151  	expRanges[0] = priorityItem.rangeID
   152  
   153  	// Take the items out; they should arrive in decreasing priority order.
   154  	for i := 0; pq.Len() > 0; i++ {
   155  		item := heap.Pop(&pq).(*replicaItem)
   156  		if item.rangeID != expRanges[i] {
   157  			t.Errorf("%d: unexpected range with priority %f", i, item.priority)
   158  		}
   159  	}
   160  }
   161  
   162  // TestBaseQueueAddUpdateAndRemove verifies basic operation with base
   163  // queue including adding ranges which both should and shouldn't be
   164  // queued, updating an existing range, and removing a range.
   165  func TestBaseQueueAddUpdateAndRemove(t *testing.T) {
   166  	defer leaktest.AfterTest(t)()
   167  	tc := testContext{}
   168  	stopper := stop.NewStopper()
   169  	ctx := context.Background()
   170  	defer stopper.Stop(ctx)
   171  	tc.Start(t, stopper)
   172  
   173  	repls := createReplicas(t, &tc, 2)
   174  	r1, r2 := repls[0], repls[1]
   175  
   176  	shouldAddMap := map[*Replica]bool{
   177  		r1: true,
   178  		r2: true,
   179  	}
   180  	priorityMap := map[*Replica]float64{
   181  		r1: 1.0,
   182  		r2: 2.0,
   183  	}
   184  	testQueue := &testQueueImpl{
   185  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   186  			return shouldAddMap[r], priorityMap[r]
   187  		},
   188  	}
   189  	bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 2})
   190  
   191  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
   192  	bq.maybeAdd(ctx, r2, hlc.Timestamp{})
   193  	if bq.Length() != 2 {
   194  		t.Fatalf("expected length 2; got %d", bq.Length())
   195  	}
   196  	if v := bq.pending.Value(); v != 2 {
   197  		t.Errorf("expected 2 pending replicas; got %d", v)
   198  	}
   199  	if bq.pop() != r2 {
   200  		t.Error("expected r2")
   201  	} else {
   202  		bq.finishProcessingReplica(ctx, stopper, r2, nil)
   203  	}
   204  	if v := bq.pending.Value(); v != 1 {
   205  		t.Errorf("expected 1 pending replicas; got %d", v)
   206  	}
   207  	if bq.pop() != r1 {
   208  		t.Error("expected r1")
   209  	} else {
   210  		bq.finishProcessingReplica(ctx, stopper, r1, nil)
   211  	}
   212  	if v := bq.pending.Value(); v != 0 {
   213  		t.Errorf("expected 0 pending replicas; got %d", v)
   214  	}
   215  	if r := bq.pop(); r != nil {
   216  		t.Errorf("expected empty queue; got %v", r)
   217  	}
   218  
   219  	// Add again, but this time r2 shouldn't add.
   220  	shouldAddMap[r2] = false
   221  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
   222  	bq.maybeAdd(ctx, r2, hlc.Timestamp{})
   223  	if bq.Length() != 1 {
   224  		t.Errorf("expected length 1; got %d", bq.Length())
   225  	}
   226  
   227  	// Try adding same replica twice.
   228  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
   229  	if bq.Length() != 1 {
   230  		t.Errorf("expected length 1; got %d", bq.Length())
   231  	}
   232  
   233  	// Re-add r2 and update priority of r1.
   234  	shouldAddMap[r2] = true
   235  	priorityMap[r1] = 3.0
   236  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
   237  	bq.maybeAdd(ctx, r2, hlc.Timestamp{})
   238  	if bq.Length() != 2 {
   239  		t.Fatalf("expected length 2; got %d", bq.Length())
   240  	}
   241  	if bq.pop() != r1 {
   242  		t.Error("expected r1")
   243  	} else {
   244  		bq.finishProcessingReplica(ctx, stopper, r1, nil)
   245  	}
   246  	if bq.pop() != r2 {
   247  		t.Error("expected r2")
   248  	} else {
   249  		bq.finishProcessingReplica(ctx, stopper, r2, nil)
   250  	}
   251  	if r := bq.pop(); r != nil {
   252  		t.Errorf("expected empty queue; got %v", r)
   253  	}
   254  
   255  	// Verify that priorities aren't lowered by a later MaybeAdd.
   256  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
   257  	bq.maybeAdd(ctx, r2, hlc.Timestamp{})
   258  	priorityMap[r1] = 1.0
   259  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
   260  	if bq.Length() != 2 {
   261  		t.Fatalf("expected length 2; got %d", bq.Length())
   262  	}
   263  	if bq.pop() != r1 {
   264  		t.Error("expected r1")
   265  	} else {
   266  		bq.finishProcessingReplica(ctx, stopper, r1, nil)
   267  	}
   268  	if bq.pop() != r2 {
   269  		t.Error("expected r2")
   270  	} else {
   271  		bq.finishProcessingReplica(ctx, stopper, r2, nil)
   272  	}
   273  	if r := bq.pop(); r != nil {
   274  		t.Errorf("expected empty queue; got %v", r)
   275  	}
   276  
   277  	// Try removing a replica.
   278  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
   279  	bq.maybeAdd(ctx, r2, hlc.Timestamp{})
   280  	bq.MaybeRemove(r2.RangeID)
   281  	if bq.Length() != 1 {
   282  		t.Fatalf("expected length 1; got %d", bq.Length())
   283  	}
   284  	if v := bq.pending.Value(); v != 1 {
   285  		t.Errorf("expected 1 pending replicas; got %d", v)
   286  	}
   287  	if bq.pop() != r1 {
   288  		t.Errorf("expected r1")
   289  	} else {
   290  		bq.finishProcessingReplica(ctx, stopper, r1, nil)
   291  	}
   292  	if v := bq.pending.Value(); v != 0 {
   293  		t.Errorf("expected 0 pending replicas; got %d", v)
   294  	}
   295  }
   296  
   297  // TestBaseQueueSamePriorityFIFO verifies that if multiple items are queued at
   298  // the same priority, they will be processes in first-in-first-out order.
   299  // This avoids starvation scenarios, in particular in the Raft snapshot queue.
   300  //
   301  // See:
   302  // https://github.com/cockroachdb/cockroach/issues/31947#issuecomment-434383267
   303  func TestBaseQueueSamePriorityFIFO(t *testing.T) {
   304  	defer leaktest.AfterTest(t)()
   305  	tc := testContext{}
   306  	stopper := stop.NewStopper()
   307  	ctx := context.Background()
   308  	defer stopper.Stop(ctx)
   309  	tc.Start(t, stopper)
   310  
   311  	repls := createReplicas(t, &tc, 5)
   312  
   313  	testQueue := &testQueueImpl{
   314  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   315  			t.Fatal("unexpected call to shouldQueue")
   316  			return false, 0.0
   317  		},
   318  	}
   319  
   320  	bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 100})
   321  
   322  	for _, repl := range repls {
   323  		added, err := bq.testingAdd(ctx, repl, 0.0)
   324  		if err != nil {
   325  			t.Fatalf("%s: %v", repl, err)
   326  		}
   327  		if !added {
   328  			t.Fatalf("%v not added", repl)
   329  		}
   330  	}
   331  	for _, expRepl := range repls {
   332  		actRepl := bq.pop()
   333  		if actRepl != expRepl {
   334  			t.Fatalf("expected %v, got %v", expRepl, actRepl)
   335  		}
   336  	}
   337  }
   338  
   339  // TestBaseQueueAdd verifies that calling Add() directly overrides the
   340  // ShouldQueue method.
   341  func TestBaseQueueAdd(t *testing.T) {
   342  	defer leaktest.AfterTest(t)()
   343  	tc := testContext{}
   344  	stopper := stop.NewStopper()
   345  	ctx := context.Background()
   346  	defer stopper.Stop(ctx)
   347  	tc.Start(t, stopper)
   348  
   349  	r, err := tc.store.GetReplica(1)
   350  	if err != nil {
   351  		t.Fatal(err)
   352  	}
   353  
   354  	testQueue := &testQueueImpl{
   355  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   356  			return false, 0.0
   357  		},
   358  	}
   359  	bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 1})
   360  	bq.maybeAdd(context.Background(), r, hlc.Timestamp{})
   361  	if bq.Length() != 0 {
   362  		t.Fatalf("expected length 0; got %d", bq.Length())
   363  	}
   364  	if added, err := bq.testingAdd(ctx, r, 1.0); err != nil || !added {
   365  		t.Fatalf("expected Add to succeed: %t, %s", added, err)
   366  	}
   367  	// Add again and verify it's not actually added (it's already there).
   368  	if added, err := bq.testingAdd(ctx, r, 1.0); err != nil || added {
   369  		t.Fatalf("expected Add to succeed: %t, %s", added, err)
   370  	}
   371  	if bq.Length() != 1 {
   372  		t.Fatalf("expected length 1; got %d", bq.Length())
   373  	}
   374  }
   375  
   376  // TestBaseQueueProcess verifies that items from the queue are
   377  // processed according to the timer function.
   378  func TestBaseQueueProcess(t *testing.T) {
   379  	defer leaktest.AfterTest(t)()
   380  	tsc := TestStoreConfig(nil)
   381  	tc := testContext{}
   382  	stopper := stop.NewStopper()
   383  	defer stopper.Stop(context.Background())
   384  	tc.StartWithStoreConfig(t, stopper, tsc)
   385  
   386  	repls := createReplicas(t, &tc, 2)
   387  	r1, r2 := repls[0], repls[1]
   388  
   389  	testQueue := &testQueueImpl{
   390  		blocker: make(chan struct{}, 1),
   391  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   392  			shouldQueue = true
   393  			priority = float64(r.RangeID)
   394  			return
   395  		},
   396  	}
   397  	bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 2})
   398  	bq.Start(stopper)
   399  
   400  	ctx := context.Background()
   401  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
   402  	bq.maybeAdd(ctx, r2, hlc.Timestamp{})
   403  	if pc := testQueue.getProcessed(); pc != 0 {
   404  		t.Errorf("expected no processed ranges; got %d", pc)
   405  	}
   406  	if v := bq.successes.Count(); v != 0 {
   407  		t.Errorf("expected 0 processed replicas; got %d", v)
   408  	}
   409  	if v := bq.pending.Value(); v != 2 {
   410  		t.Errorf("expected 2 pending replicas; got %d", v)
   411  	}
   412  
   413  	testQueue.blocker <- struct{}{}
   414  	testutils.SucceedsSoon(t, func() error {
   415  		if pc := testQueue.getProcessed(); pc != 1 {
   416  			return errors.Errorf("expected 1 processed replicas; got %d", pc)
   417  		}
   418  		if v := bq.successes.Count(); v != 1 {
   419  			return errors.Errorf("expected 1 processed replicas; got %d", v)
   420  		}
   421  		if v := bq.pending.Value(); v != 1 {
   422  			return errors.Errorf("expected 1 pending replicas; got %d", v)
   423  		}
   424  		return nil
   425  	})
   426  
   427  	testQueue.blocker <- struct{}{}
   428  	testutils.SucceedsSoon(t, func() error {
   429  		if pc := testQueue.getProcessed(); pc < 2 {
   430  			return errors.Errorf("expected >= %d processed replicas; got %d", 2, pc)
   431  		}
   432  		if v := bq.successes.Count(); v != 2 {
   433  			return errors.Errorf("expected 2 processed replicas; got %d", v)
   434  		}
   435  		if v := bq.pending.Value(); v != 0 {
   436  			return errors.Errorf("expected 0 pending replicas; got %d", v)
   437  		}
   438  		return nil
   439  	})
   440  
   441  	// Ensure the test queue is not blocked on a stray call to
   442  	// testQueueImpl.timer().
   443  	close(testQueue.blocker)
   444  }
   445  
   446  // TestBaseQueueAddRemove adds then removes a range; ensure range is
   447  // not processed.
   448  func TestBaseQueueAddRemove(t *testing.T) {
   449  	defer leaktest.AfterTest(t)()
   450  	tc := testContext{}
   451  	stopper := stop.NewStopper()
   452  	ctx := context.Background()
   453  	defer stopper.Stop(ctx)
   454  	tc.Start(t, stopper)
   455  
   456  	r, err := tc.store.GetReplica(1)
   457  	if err != nil {
   458  		t.Fatal(err)
   459  	}
   460  
   461  	testQueue := &testQueueImpl{
   462  		blocker: make(chan struct{}, 1),
   463  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   464  			shouldQueue = true
   465  			priority = 1.0
   466  			return
   467  		},
   468  	}
   469  	bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 2})
   470  	bq.Start(stopper)
   471  
   472  	bq.maybeAdd(ctx, r, hlc.Timestamp{})
   473  	bq.MaybeRemove(r.RangeID)
   474  
   475  	// Wake the queue
   476  	close(testQueue.blocker)
   477  
   478  	// Make sure the queue has actually run through a few times
   479  	for i := 0; i < cap(bq.incoming)+1; i++ {
   480  		bq.incoming <- struct{}{}
   481  	}
   482  
   483  	if pc := testQueue.getProcessed(); pc > 0 {
   484  		t.Errorf("expected processed count of 0; got %d", pc)
   485  	}
   486  }
   487  
   488  // TestNeedsSystemConfig verifies that queues that don't need the system config
   489  // are able to process replicas when the system config isn't available.
   490  func TestNeedsSystemConfig(t *testing.T) {
   491  	defer leaktest.AfterTest(t)()
   492  	tc := testContext{}
   493  	stopper := stop.NewStopper()
   494  	ctx := context.Background()
   495  	defer stopper.Stop(ctx)
   496  	tc.Start(t, stopper)
   497  
   498  	r, err := tc.store.GetReplica(1)
   499  	if err != nil {
   500  		t.Fatal(err)
   501  	}
   502  
   503  	queueFnCalled := 0
   504  	testQueue := &testQueueImpl{
   505  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (bool, float64) {
   506  			queueFnCalled++
   507  			return true, 1.0
   508  		},
   509  	}
   510  
   511  	// Use a gossip instance that won't have the system config available in it.
   512  	// bqNeedsSysCfg will not add the replica or process it without a system config.
   513  	rpcContext := rpc.NewContext(
   514  		tc.store.cfg.AmbientCtx, &base.Config{Insecure: true}, tc.store.cfg.Clock, stopper,
   515  		cluster.MakeTestingClusterSettings())
   516  	emptyGossip := gossip.NewTest(
   517  		tc.gossip.NodeID.Get(), rpcContext, rpc.NewServer(rpcContext), stopper, tc.store.Registry(), zonepb.DefaultZoneConfigRef())
   518  	bqNeedsSysCfg := makeTestBaseQueue("test", testQueue, tc.store, emptyGossip, queueConfig{
   519  		needsSystemConfig:    true,
   520  		acceptsUnsplitRanges: true,
   521  		maxSize:              1,
   522  	})
   523  
   524  	bqNeedsSysCfg.Start(stopper)
   525  	bqNeedsSysCfg.maybeAdd(ctx, r, hlc.Timestamp{})
   526  	if queueFnCalled != 0 {
   527  		t.Fatalf("expected shouldQueueFn not to be called without valid system config, got %d calls", queueFnCalled)
   528  	}
   529  
   530  	// Manually add a replica and ensure that the process method doesn't get run.
   531  	if added, err := bqNeedsSysCfg.testingAdd(ctx, r, 1.0); err != nil || !added {
   532  		t.Fatalf("expected Add to succeed: %t, %s", added, err)
   533  	}
   534  	// Make sure the queue has actually run through a few times
   535  	for i := 0; i < cap(bqNeedsSysCfg.incoming)+1; i++ {
   536  		bqNeedsSysCfg.incoming <- struct{}{}
   537  	}
   538  	if pc := testQueue.getProcessed(); pc > 0 {
   539  		t.Errorf("expected processed count of 0 for queue that needs system config; got %d", pc)
   540  	}
   541  
   542  	// Now check that a queue which doesn't require the system config can
   543  	// successfully add and process a replica.
   544  	bqNoSysCfg := makeTestBaseQueue("test", testQueue, tc.store, emptyGossip, queueConfig{
   545  		needsSystemConfig:    false,
   546  		acceptsUnsplitRanges: true,
   547  		maxSize:              1,
   548  	})
   549  	bqNoSysCfg.Start(stopper)
   550  	bqNoSysCfg.maybeAdd(context.Background(), r, hlc.Timestamp{})
   551  	if queueFnCalled != 1 {
   552  		t.Fatalf("expected shouldQueueFn to be called even without valid system config, got %d calls", queueFnCalled)
   553  	}
   554  	testutils.SucceedsSoon(t, func() error {
   555  		if pc := testQueue.getProcessed(); pc != 1 {
   556  			return errors.Errorf("expected 1 processed replica even without system config; got %d", pc)
   557  		}
   558  		if v := bqNoSysCfg.successes.Count(); v != 1 {
   559  			return errors.Errorf("expected 1 processed replica even without system config; got %d", v)
   560  		}
   561  		return nil
   562  	})
   563  }
   564  
   565  // TestAcceptsUnsplitRanges verifies that ranges that need to split are properly
   566  // rejected when the queue has 'acceptsUnsplitRanges = false'.
   567  func TestAcceptsUnsplitRanges(t *testing.T) {
   568  	defer leaktest.AfterTest(t)()
   569  	stopper := stop.NewStopper()
   570  	defer stopper.Stop(context.Background())
   571  	s, _ := createTestStore(t,
   572  		testStoreOpts{
   573  			// This test was written before test stores could start with more than one
   574  			// range and was not adapted.
   575  			createSystemRanges: false,
   576  		},
   577  		stopper)
   578  	ctx := context.Background()
   579  
   580  	maxWontSplitAddr, err := keys.Addr(keys.SystemPrefix)
   581  	if err != nil {
   582  		t.Fatal(err)
   583  	}
   584  	minWillSplitAddr, err := keys.Addr(keys.TableDataMin)
   585  	if err != nil {
   586  		t.Fatal(err)
   587  	}
   588  
   589  	// Remove replica for range 1 since it encompasses the entire keyspace.
   590  	repl1, err := s.GetReplica(1)
   591  	if err != nil {
   592  		t.Error(err)
   593  	}
   594  	if err := s.RemoveReplica(context.Background(), repl1, repl1.Desc().NextReplicaID, RemoveOptions{
   595  		DestroyData: true,
   596  	}); err != nil {
   597  		t.Error(err)
   598  	}
   599  
   600  	// This range can never be split due to zone configs boundaries.
   601  	neverSplits := createReplica(s, 2, roachpb.RKeyMin, maxWontSplitAddr)
   602  	if err := s.AddReplica(neverSplits); err != nil {
   603  		t.Fatal(err)
   604  	}
   605  
   606  	// This range will need to be split after user db/table entries are created.
   607  	willSplit := createReplica(s, 3, minWillSplitAddr, roachpb.RKeyMax)
   608  	if err := s.AddReplica(willSplit); err != nil {
   609  		t.Fatal(err)
   610  	}
   611  
   612  	testQueue := &testQueueImpl{
   613  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   614  			// Always queue ranges if they make it past the base queue's logic.
   615  			return true, float64(r.RangeID)
   616  		},
   617  	}
   618  
   619  	bq := makeTestBaseQueue("test", testQueue, s, s.cfg.Gossip, queueConfig{maxSize: 2})
   620  	bq.Start(stopper)
   621  
   622  	// Check our config.
   623  	var sysCfg *config.SystemConfig
   624  	testutils.SucceedsSoon(t, func() error {
   625  		sysCfg = s.cfg.Gossip.GetSystemConfig()
   626  		if sysCfg == nil {
   627  			return errors.New("system config not yet present")
   628  		}
   629  		return nil
   630  	})
   631  	neverSplitsDesc := neverSplits.Desc()
   632  	if sysCfg.NeedsSplit(neverSplitsDesc.StartKey, neverSplitsDesc.EndKey) {
   633  		t.Fatal("System config says range needs to be split")
   634  	}
   635  	willSplitDesc := willSplit.Desc()
   636  	if sysCfg.NeedsSplit(willSplitDesc.StartKey, willSplitDesc.EndKey) {
   637  		t.Fatal("System config says range needs to be split")
   638  	}
   639  
   640  	// There are no user db/table entries, everything should be added and
   641  	// processed as usual.
   642  	bq.maybeAdd(ctx, neverSplits, hlc.Timestamp{})
   643  	bq.maybeAdd(ctx, willSplit, hlc.Timestamp{})
   644  
   645  	testutils.SucceedsSoon(t, func() error {
   646  		if pc := testQueue.getProcessed(); pc != 2 {
   647  			return errors.Errorf("expected %d processed replicas; got %d", 2, pc)
   648  		}
   649  		// Check metrics.
   650  		if v := bq.successes.Count(); v != 2 {
   651  			return errors.Errorf("expected 2 processed replicas; got %d", v)
   652  		}
   653  		if v := bq.pending.Value(); v != 0 {
   654  			return errors.Errorf("expected 0 pending replicas; got %d", v)
   655  		}
   656  		return nil
   657  	})
   658  
   659  	// Now add a user object, it will trigger a split.
   660  	// The range willSplit starts at the beginning of the user data range,
   661  	// which means keys.MaxReservedDescID+1.
   662  	zoneConfig := zonepb.DefaultZoneConfig()
   663  	zoneConfig.RangeMaxBytes = proto.Int64(1 << 20)
   664  	config.TestingSetZoneConfig(keys.MaxReservedDescID+2, zoneConfig)
   665  
   666  	// Check our config.
   667  	neverSplitsDesc = neverSplits.Desc()
   668  	if sysCfg.NeedsSplit(neverSplitsDesc.StartKey, neverSplitsDesc.EndKey) {
   669  		t.Fatal("System config says range needs to be split")
   670  	}
   671  	willSplitDesc = willSplit.Desc()
   672  	if !sysCfg.NeedsSplit(willSplitDesc.StartKey, willSplitDesc.EndKey) {
   673  		t.Fatal("System config says range does not need to be split")
   674  	}
   675  
   676  	bq.maybeAdd(ctx, neverSplits, hlc.Timestamp{})
   677  	bq.maybeAdd(ctx, willSplit, hlc.Timestamp{})
   678  
   679  	testutils.SucceedsSoon(t, func() error {
   680  		if pc := testQueue.getProcessed(); pc != 3 {
   681  			return errors.Errorf("expected %d processed replicas; got %d", 3, pc)
   682  		}
   683  		// Check metrics.
   684  		if v := bq.successes.Count(); v != 3 {
   685  			return errors.Errorf("expected 3 processed replicas; got %d", v)
   686  		}
   687  		if v := bq.pending.Value(); v != 0 {
   688  			return errors.Errorf("expected 0 pending replicas; got %d", v)
   689  		}
   690  		return nil
   691  	})
   692  }
   693  
   694  type testPurgatoryError struct{}
   695  
   696  func (*testPurgatoryError) Error() string {
   697  	return "test purgatory error"
   698  }
   699  
   700  func (*testPurgatoryError) purgatoryErrorMarker() {
   701  }
   702  
   703  // TestBaseQueuePurgatory verifies that if error is set on the test
   704  // queue, items are added to the purgatory. Verifies that sending on
   705  // the purgatory channel causes the replicas to be reprocessed.
   706  func TestBaseQueuePurgatory(t *testing.T) {
   707  	defer leaktest.AfterTest(t)()
   708  	tsc := TestStoreConfig(nil)
   709  	tc := testContext{}
   710  	stopper := stop.NewStopper()
   711  	defer stopper.Stop(context.Background())
   712  	tc.StartWithStoreConfig(t, stopper, tsc)
   713  
   714  	testQueue := &testQueueImpl{
   715  		duration: time.Nanosecond,
   716  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   717  			shouldQueue = true
   718  			priority = float64(r.RangeID)
   719  			return
   720  		},
   721  		pChan: make(chan time.Time, 1),
   722  		err:   &testPurgatoryError{},
   723  	}
   724  
   725  	const replicaCount = 10
   726  	repls := createReplicas(t, &tc, replicaCount)
   727  
   728  	bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: replicaCount})
   729  	bq.Start(stopper)
   730  
   731  	for _, r := range repls {
   732  		bq.maybeAdd(context.Background(), r, hlc.Timestamp{})
   733  	}
   734  
   735  	testutils.SucceedsSoon(t, func() error {
   736  		if pc := testQueue.getProcessed(); pc != replicaCount {
   737  			return errors.Errorf("expected %d processed replicas; got %d", replicaCount, pc)
   738  		}
   739  		// We have to loop checking the following conditions because the increment
   740  		// of testQueue.processed does not happen atomically with the replica being
   741  		// placed in purgatory.
   742  		// Verify that the size of the purgatory map is correct.
   743  		if l := bq.PurgatoryLength(); l != replicaCount {
   744  			return errors.Errorf("expected purgatory size of %d; got %d", replicaCount, l)
   745  		}
   746  		// ...and priorityQ should be empty.
   747  		if l := bq.Length(); l != 0 {
   748  			return errors.Errorf("expected empty priorityQ; got %d", l)
   749  		}
   750  		// Check metrics.
   751  		if v := bq.successes.Count(); v != 0 {
   752  			return errors.Errorf("expected 0 processed replicas; got %d", v)
   753  		}
   754  		if v := bq.failures.Count(); v != int64(replicaCount) {
   755  			return errors.Errorf("expected %d failed replicas; got %d", replicaCount, v)
   756  		}
   757  		if v := bq.pending.Value(); v != 0 {
   758  			return errors.Errorf("expected 0 pending replicas; got %d", v)
   759  		}
   760  		if v := bq.purgatory.Value(); v != int64(replicaCount) {
   761  			return errors.Errorf("expected %d purgatory replicas; got %d", replicaCount, v)
   762  		}
   763  		return nil
   764  	})
   765  
   766  	// Now, signal that purgatoried replicas should retry.
   767  	testQueue.pChan <- timeutil.Now()
   768  
   769  	testutils.SucceedsSoon(t, func() error {
   770  		if pc := testQueue.getProcessed(); pc != replicaCount*2 {
   771  			return errors.Errorf("expected %d processed replicas; got %d", replicaCount*2, pc)
   772  		}
   773  		// We have to loop checking the following conditions because the increment
   774  		// of testQueue.processed does not happen atomically with the replica being
   775  		// placed in purgatory.
   776  		// Verify the replicas are still in purgatory.
   777  		if l := bq.PurgatoryLength(); l != replicaCount {
   778  			return errors.Errorf("expected purgatory size of %d; got %d", replicaCount, l)
   779  		}
   780  		// ...and priorityQ should be empty.
   781  		if l := bq.Length(); l != 0 {
   782  			return errors.Errorf("expected empty priorityQ; got %d", l)
   783  		}
   784  		// Check metrics.
   785  		if v := bq.successes.Count(); v != 0 {
   786  			return errors.Errorf("expected 0 processed replicas; got %d", v)
   787  		}
   788  		if v := bq.failures.Count(); v != int64(replicaCount*2) {
   789  			return errors.Errorf("expected %d failed replicas; got %d", replicaCount*2, v)
   790  		}
   791  		if v := bq.pending.Value(); v != 0 {
   792  			return errors.Errorf("expected 0 pending replicas; got %d", v)
   793  		}
   794  		if v := bq.purgatory.Value(); v != int64(replicaCount) {
   795  			return errors.Errorf("expected %d purgatory replicas; got %d", replicaCount, v)
   796  		}
   797  		return nil
   798  	})
   799  
   800  	// Remove error and reprocess.
   801  	testQueue.err = nil
   802  	testQueue.pChan <- timeutil.Now()
   803  
   804  	testutils.SucceedsSoon(t, func() error {
   805  		if pc := testQueue.getProcessed(); pc != replicaCount*3 {
   806  			return errors.Errorf("expected %d processed replicas; got %d", replicaCount*3, pc)
   807  		}
   808  		// Check metrics.
   809  		if v := bq.successes.Count(); v != int64(replicaCount) {
   810  			return errors.Errorf("expected %d processed replicas; got %d", replicaCount, v)
   811  		}
   812  		if v := bq.failures.Count(); v != int64(replicaCount*2) {
   813  			return errors.Errorf("expected %d failed replicas; got %d", replicaCount*2, v)
   814  		}
   815  		if v := bq.pending.Value(); v != 0 {
   816  			return errors.Errorf("expected 0 pending replicas; got %d", v)
   817  		}
   818  		if v := bq.purgatory.Value(); v != 0 {
   819  			return errors.Errorf("expected 0 purgatory replicas; got %d", v)
   820  		}
   821  		return nil
   822  	})
   823  
   824  	// Verify the replicas are no longer in purgatory.
   825  	if l := bq.PurgatoryLength(); l != 0 {
   826  		t.Errorf("expected purgatory size of 0; got %d", l)
   827  	}
   828  	// ...and priorityQ should be empty.
   829  	if l := bq.Length(); l != 0 {
   830  		t.Errorf("expected empty priorityQ; got %d", l)
   831  	}
   832  }
   833  
   834  type processTimeoutQueueImpl struct {
   835  	testQueueImpl
   836  }
   837  
   838  func (pq *processTimeoutQueueImpl) process(
   839  	ctx context.Context, r *Replica, _ *config.SystemConfig,
   840  ) error {
   841  	<-ctx.Done()
   842  	atomic.AddInt32(&pq.processed, 1)
   843  	return ctx.Err()
   844  }
   845  
   846  func TestBaseQueueProcessTimeout(t *testing.T) {
   847  	defer leaktest.AfterTest(t)()
   848  	tc := testContext{}
   849  	stopper := stop.NewStopper()
   850  	defer stopper.Stop(context.Background())
   851  	tc.Start(t, stopper)
   852  
   853  	r, err := tc.store.GetReplica(1)
   854  	if err != nil {
   855  		t.Fatal(err)
   856  	}
   857  
   858  	ptQueue := &processTimeoutQueueImpl{
   859  		testQueueImpl: testQueueImpl{
   860  			blocker: make(chan struct{}, 1),
   861  			shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   862  				return true, 1.0
   863  			},
   864  		},
   865  	}
   866  	bq := makeTestBaseQueue("test", ptQueue, tc.store, tc.gossip,
   867  		queueConfig{
   868  			maxSize:              1,
   869  			processTimeoutFunc:   constantTimeoutFunc(time.Millisecond),
   870  			acceptsUnsplitRanges: true,
   871  		})
   872  	bq.Start(stopper)
   873  	bq.maybeAdd(context.Background(), r, hlc.Timestamp{})
   874  
   875  	if l := bq.Length(); l != 1 {
   876  		t.Errorf("expected one queued replica; got %d", l)
   877  	}
   878  
   879  	ptQueue.blocker <- struct{}{}
   880  	testutils.SucceedsSoon(t, func() error {
   881  		if pc := ptQueue.getProcessed(); pc != 1 {
   882  			return errors.Errorf("expected 1 processed replicas; got %d", pc)
   883  		}
   884  		if v := bq.failures.Count(); v != 1 {
   885  			return errors.Errorf("expected 1 failed replicas; got %d", v)
   886  		}
   887  		return nil
   888  	})
   889  }
   890  
   891  type mvccStatsReplicaInQueue struct {
   892  	replicaInQueue
   893  	size int64
   894  }
   895  
   896  func (r mvccStatsReplicaInQueue) GetMVCCStats() enginepb.MVCCStats {
   897  	return enginepb.MVCCStats{ValBytes: r.size}
   898  }
   899  
   900  func TestQueueSnapshotTimeoutFunc(t *testing.T) {
   901  	defer leaktest.AfterTest(t)()
   902  	type testCase struct {
   903  		guaranteedProcessingTime time.Duration
   904  		snapshotRate             int64 // bytes/s
   905  		replicaSize              int64 // bytes
   906  		expectedTimeout          time.Duration
   907  	}
   908  	makeTest := func(tc testCase) (string, func(t *testing.T)) {
   909  		return fmt.Sprintf("%+v", tc), func(t *testing.T) {
   910  			st := cluster.MakeTestingClusterSettings()
   911  			queueGuaranteedProcessingTimeBudget.Override(&st.SV, tc.guaranteedProcessingTime)
   912  			recoverySnapshotRate.Override(&st.SV, tc.snapshotRate)
   913  			tf := makeQueueSnapshotTimeoutFunc(recoverySnapshotRate)
   914  			repl := mvccStatsReplicaInQueue{
   915  				size: tc.replicaSize,
   916  			}
   917  			require.Equal(t, tc.expectedTimeout, tf(st, repl))
   918  		}
   919  	}
   920  	for _, tc := range []testCase{
   921  		{
   922  			guaranteedProcessingTime: time.Minute,
   923  			snapshotRate:             1 << 30,
   924  			replicaSize:              1 << 20,
   925  			expectedTimeout:          time.Minute,
   926  		},
   927  		{
   928  			guaranteedProcessingTime: time.Minute,
   929  			snapshotRate:             1 << 20,
   930  			replicaSize:              100 << 20,
   931  			expectedTimeout:          100 * time.Second * permittedSnapshotSlowdown,
   932  		},
   933  		{
   934  			guaranteedProcessingTime: time.Hour,
   935  			snapshotRate:             1 << 20,
   936  			replicaSize:              100 << 20,
   937  			expectedTimeout:          time.Hour,
   938  		},
   939  		{
   940  			guaranteedProcessingTime: time.Minute,
   941  			snapshotRate:             1 << 10,
   942  			replicaSize:              100 << 20,
   943  			expectedTimeout:          100 * (1 << 10) * time.Second * permittedSnapshotSlowdown,
   944  		},
   945  	} {
   946  		t.Run(makeTest(tc))
   947  	}
   948  }
   949  
   950  // processTimeQueueImpl spends 5ms on each process request.
   951  type processTimeQueueImpl struct {
   952  	testQueueImpl
   953  }
   954  
   955  func (pq *processTimeQueueImpl) process(
   956  	_ context.Context, _ *Replica, _ *config.SystemConfig,
   957  ) error {
   958  	time.Sleep(5 * time.Millisecond)
   959  	return nil
   960  }
   961  
   962  func TestBaseQueueTimeMetric(t *testing.T) {
   963  	defer leaktest.AfterTest(t)()
   964  	tc := testContext{}
   965  	stopper := stop.NewStopper()
   966  	defer stopper.Stop(context.Background())
   967  	tc.Start(t, stopper)
   968  
   969  	r, err := tc.store.GetReplica(1)
   970  	if err != nil {
   971  		t.Fatal(err)
   972  	}
   973  
   974  	ptQueue := &processTimeQueueImpl{
   975  		testQueueImpl: testQueueImpl{
   976  			shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
   977  				return true, 1.0
   978  			},
   979  		},
   980  	}
   981  	bq := makeTestBaseQueue("test", ptQueue, tc.store, tc.gossip,
   982  		queueConfig{
   983  			maxSize:              1,
   984  			processTimeoutFunc:   constantTimeoutFunc(time.Millisecond),
   985  			acceptsUnsplitRanges: true,
   986  		})
   987  	bq.Start(stopper)
   988  	bq.maybeAdd(context.Background(), r, hlc.Timestamp{})
   989  
   990  	testutils.SucceedsSoon(t, func() error {
   991  		if v := bq.successes.Count(); v != 1 {
   992  			return errors.Errorf("expected 1 processed replicas; got %d", v)
   993  		}
   994  		if min, v := bq.queueConfig.processTimeoutFunc(nil, nil), bq.processingNanos.Count(); v < min.Nanoseconds() {
   995  			return errors.Errorf("expected >= %s in processing time; got %s", min, time.Duration(v))
   996  		}
   997  		return nil
   998  	})
   999  }
  1000  
  1001  func TestBaseQueueShouldQueueAgain(t *testing.T) {
  1002  	defer leaktest.AfterTest(t)()
  1003  	testCases := []struct {
  1004  		now, last   hlc.Timestamp
  1005  		minInterval time.Duration
  1006  		expQueue    bool
  1007  		expPriority float64
  1008  	}{
  1009  		{makeTS(1, 0), makeTS(1, 0), 0, true, 0},
  1010  		{makeTS(100, 0), makeTS(0, 0), 100, true, 0},
  1011  		{makeTS(100, 0), makeTS(100, 0), 100, false, 0},
  1012  		{makeTS(101, 0), makeTS(100, 0), 100, false, 0},
  1013  		{makeTS(200, 0), makeTS(100, 0), 100, true, 1},
  1014  		{makeTS(200, 1), makeTS(100, 0), 100, true, 1},
  1015  		{makeTS(201, 0), makeTS(100, 0), 100, true, 1.01},
  1016  		{makeTS(201, 0), makeTS(100, 1), 100, true, 1.01},
  1017  		{makeTS(1100, 0), makeTS(100, 1), 100, true, 10},
  1018  	}
  1019  
  1020  	for i, tc := range testCases {
  1021  		sq, pri := shouldQueueAgain(tc.now, tc.last, tc.minInterval)
  1022  		if sq != tc.expQueue {
  1023  			t.Errorf("case %d: expected shouldQueue %t; got %t", i, tc.expQueue, sq)
  1024  		}
  1025  		if pri != tc.expPriority {
  1026  			t.Errorf("case %d: expected priority %f; got %f", i, tc.expPriority, pri)
  1027  		}
  1028  	}
  1029  }
  1030  
  1031  // TestBaseQueueDisable verifies that disabling a queue prevents calls
  1032  // to both shouldQueue and process.
  1033  func TestBaseQueueDisable(t *testing.T) {
  1034  	defer leaktest.AfterTest(t)()
  1035  	tc := testContext{}
  1036  	stopper := stop.NewStopper()
  1037  	ctx := context.Background()
  1038  	defer stopper.Stop(ctx)
  1039  	tc.Start(t, stopper)
  1040  
  1041  	r, err := tc.store.GetReplica(1)
  1042  	if err != nil {
  1043  		t.Fatal(err)
  1044  	}
  1045  
  1046  	shouldQueueCalled := false
  1047  	testQueue := &testQueueImpl{
  1048  		blocker: make(chan struct{}, 1),
  1049  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (bool, float64) {
  1050  			shouldQueueCalled = true
  1051  			return true, 1.0
  1052  		},
  1053  	}
  1054  	bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{maxSize: 2})
  1055  	bq.Start(stopper)
  1056  
  1057  	bq.SetDisabled(true)
  1058  	bq.maybeAdd(context.Background(), r, hlc.Timestamp{})
  1059  	if shouldQueueCalled {
  1060  		t.Error("shouldQueue should not have been called")
  1061  	}
  1062  
  1063  	// Add the range directly, bypassing shouldQueue.
  1064  	if _, err := bq.testingAdd(ctx, r, 1.0); !errors.Is(err, errQueueDisabled) {
  1065  		t.Fatal(err)
  1066  	}
  1067  
  1068  	// Wake the queue.
  1069  	close(testQueue.blocker)
  1070  
  1071  	// Make sure the queue has actually run through a few times.
  1072  	for i := 0; i < cap(bq.incoming)+1; i++ {
  1073  		bq.incoming <- struct{}{}
  1074  	}
  1075  
  1076  	if pc := testQueue.getProcessed(); pc > 0 {
  1077  		t.Errorf("expected processed count of 0; got %d", pc)
  1078  	}
  1079  }
  1080  
  1081  type parallelQueueImpl struct {
  1082  	testQueueImpl
  1083  	processBlocker chan struct{}
  1084  	processing     int32 // accessed atomically
  1085  }
  1086  
  1087  func (pq *parallelQueueImpl) process(
  1088  	ctx context.Context, repl *Replica, cfg *config.SystemConfig,
  1089  ) error {
  1090  	atomic.AddInt32(&pq.processing, 1)
  1091  	if pq.processBlocker != nil {
  1092  		<-pq.processBlocker
  1093  	}
  1094  	err := pq.testQueueImpl.process(ctx, repl, cfg)
  1095  	atomic.AddInt32(&pq.processing, -1)
  1096  	return err
  1097  }
  1098  
  1099  func (pq *parallelQueueImpl) getProcessing() int {
  1100  	return int(atomic.LoadInt32(&pq.processing))
  1101  }
  1102  
  1103  func TestBaseQueueProcessConcurrently(t *testing.T) {
  1104  	defer leaktest.AfterTest(t)()
  1105  	tc := testContext{}
  1106  	stopper := stop.NewStopper()
  1107  	defer stopper.Stop(context.Background())
  1108  	tc.Start(t, stopper)
  1109  
  1110  	repls := createReplicas(t, &tc, 3)
  1111  	r1, r2, r3 := repls[0], repls[1], repls[2]
  1112  
  1113  	pQueue := &parallelQueueImpl{
  1114  		testQueueImpl: testQueueImpl{
  1115  			blocker: make(chan struct{}, 1),
  1116  			shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
  1117  				return true, 1
  1118  			},
  1119  		},
  1120  		processBlocker: make(chan struct{}, 1),
  1121  	}
  1122  	bq := makeTestBaseQueue("test", pQueue, tc.store, tc.gossip,
  1123  		queueConfig{
  1124  			maxSize:        3,
  1125  			maxConcurrency: 2,
  1126  		},
  1127  	)
  1128  	bq.Start(stopper)
  1129  
  1130  	ctx := context.Background()
  1131  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
  1132  	bq.maybeAdd(ctx, r2, hlc.Timestamp{})
  1133  	bq.maybeAdd(ctx, r3, hlc.Timestamp{})
  1134  
  1135  	if exp, l := 3, bq.Length(); l != exp {
  1136  		t.Errorf("expected %d queued replica; got %d", exp, l)
  1137  	}
  1138  
  1139  	assertProcessedAndProcessing := func(expProcessed, expProcessing int) {
  1140  		t.Helper()
  1141  		testutils.SucceedsSoon(t, func() error {
  1142  			if p := pQueue.getProcessed(); p != expProcessed {
  1143  				return errors.Errorf("expected %d processed replicas; got %d", expProcessed, p)
  1144  			}
  1145  			if p := pQueue.getProcessing(); p != expProcessing {
  1146  				return errors.Errorf("expected %d processing replicas; got %d", expProcessing, p)
  1147  			}
  1148  			return nil
  1149  		})
  1150  	}
  1151  
  1152  	close(pQueue.blocker)
  1153  	assertProcessedAndProcessing(0, 2)
  1154  
  1155  	pQueue.processBlocker <- struct{}{}
  1156  	assertProcessedAndProcessing(1, 2)
  1157  
  1158  	pQueue.processBlocker <- struct{}{}
  1159  	assertProcessedAndProcessing(2, 1)
  1160  
  1161  	pQueue.processBlocker <- struct{}{}
  1162  	assertProcessedAndProcessing(3, 0)
  1163  }
  1164  
  1165  // TestBaseQueueReplicaChange ensures that if a replica is added to the queue
  1166  // with a non-zero replica ID then it is only processed if the retrieved replica
  1167  // from the getReplica() function has the same replica ID.
  1168  func TestBaseQueueChangeReplicaID(t *testing.T) {
  1169  	defer leaktest.AfterTest(t)()
  1170  	// The testContext exists only to construct the baseQueue.
  1171  	tc := testContext{}
  1172  	stopper := stop.NewStopper()
  1173  	ctx := context.Background()
  1174  	defer stopper.Stop(ctx)
  1175  	tc.Start(t, stopper)
  1176  	testQueue := &testQueueImpl{
  1177  		shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
  1178  			return true, 1.0
  1179  		},
  1180  	}
  1181  	bq := makeTestBaseQueue("test", testQueue, tc.store, tc.gossip, queueConfig{
  1182  		maxSize:              defaultQueueMaxSize,
  1183  		acceptsUnsplitRanges: true,
  1184  	})
  1185  	r := &fakeReplica{rangeID: 1, replicaID: 1}
  1186  	bq.mu.Lock()
  1187  	bq.getReplica = func(rangeID roachpb.RangeID) (replicaInQueue, error) {
  1188  		if rangeID != 1 {
  1189  			panic(fmt.Errorf("expected range id 1, got %d", rangeID))
  1190  		}
  1191  		return r, nil
  1192  	}
  1193  	bq.mu.Unlock()
  1194  	require.Equal(t, 0, testQueue.getProcessed())
  1195  	bq.maybeAdd(ctx, r, tc.store.Clock().Now())
  1196  	bq.DrainQueue(tc.store.Stopper())
  1197  	require.Equal(t, 1, testQueue.getProcessed())
  1198  	bq.maybeAdd(ctx, r, tc.store.Clock().Now())
  1199  	r.replicaID = 2
  1200  	bq.DrainQueue(tc.store.Stopper())
  1201  	require.Equal(t, 1, testQueue.getProcessed())
  1202  	require.Equal(t, 0, bq.Length())
  1203  	require.Equal(t, 0, bq.PurgatoryLength())
  1204  	bq.mu.Lock()
  1205  	defer bq.mu.Unlock()
  1206  	_, exists := bq.mu.replicas[1]
  1207  	require.False(t, exists, bq.mu.replicas)
  1208  }
  1209  
  1210  func TestBaseQueueRequeue(t *testing.T) {
  1211  	defer leaktest.AfterTest(t)()
  1212  	tc := testContext{}
  1213  	stopper := stop.NewStopper()
  1214  	defer stopper.Stop(context.Background())
  1215  	tc.Start(t, stopper)
  1216  
  1217  	repls := createReplicas(t, &tc, 1)
  1218  	r1 := repls[0]
  1219  
  1220  	var shouldQueueCount int64 // accessed atomically
  1221  	pQueue := &parallelQueueImpl{
  1222  		testQueueImpl: testQueueImpl{
  1223  			blocker: make(chan struct{}, 1),
  1224  			shouldQueueFn: func(now hlc.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
  1225  				if atomic.AddInt64(&shouldQueueCount, 1) <= 4 {
  1226  					return true, 1
  1227  				}
  1228  				return false, 1
  1229  			},
  1230  		},
  1231  		processBlocker: make(chan struct{}, 1),
  1232  	}
  1233  	bq := makeTestBaseQueue("test", pQueue, tc.store, tc.gossip,
  1234  		queueConfig{
  1235  			maxSize:        3,
  1236  			maxConcurrency: 2,
  1237  		},
  1238  	)
  1239  	bq.Start(stopper)
  1240  
  1241  	assertShouldQueueCount := func(expShouldQueueCount int) {
  1242  		t.Helper()
  1243  		testutils.SucceedsSoon(t, func() error {
  1244  			if count := int(atomic.LoadInt64(&shouldQueueCount)); count != expShouldQueueCount {
  1245  				return errors.Errorf("expected %d calls to ShouldQueue; found %d",
  1246  					expShouldQueueCount, count)
  1247  			}
  1248  			return nil
  1249  		})
  1250  	}
  1251  	assertProcessedAndProcessing := func(expProcessed, expProcessing int) {
  1252  		t.Helper()
  1253  		testutils.SucceedsSoon(t, func() error {
  1254  			if p := pQueue.getProcessed(); p != expProcessed {
  1255  				return errors.Errorf("expected %d processed replicas; got %d", expProcessed, p)
  1256  			}
  1257  			if p := pQueue.getProcessing(); p != expProcessing {
  1258  				return errors.Errorf("expected %d processing replicas; got %d", expProcessing, p)
  1259  			}
  1260  			return nil
  1261  		})
  1262  	}
  1263  	ctx := context.Background()
  1264  	// MaybeAdd a replica. Should queue after checking ShouldQueue.
  1265  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
  1266  	assertShouldQueueCount(1)
  1267  	if exp, l := 1, bq.Length(); l != exp {
  1268  		t.Errorf("expected %d queued replica; got %d", exp, l)
  1269  	}
  1270  
  1271  	// Let the first processing attempt run.
  1272  	close(pQueue.blocker)
  1273  	assertProcessedAndProcessing(0, 1)
  1274  
  1275  	// MaybeAdd the same replica. Should requeue after checking ShouldQueue.
  1276  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
  1277  	assertShouldQueueCount(2)
  1278  
  1279  	// Let the first processing attempt finish.
  1280  	// Should begin processing second attempt after checking ShouldQueue again.
  1281  	pQueue.processBlocker <- struct{}{}
  1282  	assertShouldQueueCount(3)
  1283  	assertProcessedAndProcessing(1, 1)
  1284  
  1285  	// MaybeAdd the same replica. Should requeue after checking ShouldQueue.
  1286  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
  1287  	assertShouldQueueCount(4)
  1288  
  1289  	// Let the second processing attempt finish.
  1290  	// Should NOT processing third attempt after checking ShouldQueue again.
  1291  	pQueue.processBlocker <- struct{}{}
  1292  	assertShouldQueueCount(5)
  1293  	assertProcessedAndProcessing(2, 0)
  1294  
  1295  	// MaybeAdd the same replica. Should NOT queue after checking ShouldQueue.
  1296  	bq.maybeAdd(ctx, r1, hlc.Timestamp{})
  1297  	assertShouldQueueCount(6)
  1298  	assertProcessedAndProcessing(2, 0)
  1299  }