github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/queue_concurrency_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"math/rand"
    16  	"runtime"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/config"
    21  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    25  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    26  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    27  	"github.com/cockroachdb/cockroach/pkg/util/log"
    28  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    29  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    30  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    31  	"github.com/cockroachdb/errors"
    32  	"golang.org/x/sync/errgroup"
    33  )
    34  
    35  func constantTimeoutFunc(d time.Duration) func(*cluster.Settings, replicaInQueue) time.Duration {
    36  	return func(*cluster.Settings, replicaInQueue) time.Duration { return d }
    37  }
    38  
    39  // TestBaseQueueConcurrent verifies that under concurrent adds/removes of ranges
    40  // to the queue including purgatory errors and regular errors, the queue
    41  // invariants are upheld. The test operates on fake ranges and a mock queue
    42  // impl, which are defined at the end of the file.
    43  func TestBaseQueueConcurrent(t *testing.T) {
    44  	defer leaktest.AfterTest(t)()
    45  
    46  	ctx := context.Background()
    47  	stopper := stop.NewStopper()
    48  	defer stopper.Stop(ctx)
    49  
    50  	// We'll use this many ranges, each of which is added a few times to the
    51  	// queue and maybe removed as well.
    52  	const num = 1000
    53  
    54  	cfg := queueConfig{
    55  		maxSize:              num / 2,
    56  		maxConcurrency:       4,
    57  		acceptsUnsplitRanges: true,
    58  		processTimeoutFunc:   constantTimeoutFunc(time.Millisecond),
    59  		// We don't care about these, but we don't want to crash.
    60  		successes:       metric.NewCounter(metric.Metadata{Name: "processed"}),
    61  		failures:        metric.NewCounter(metric.Metadata{Name: "failures"}),
    62  		pending:         metric.NewGauge(metric.Metadata{Name: "pending"}),
    63  		processingNanos: metric.NewCounter(metric.Metadata{Name: "processingnanos"}),
    64  		purgatory:       metric.NewGauge(metric.Metadata{Name: "purgatory"}),
    65  	}
    66  
    67  	// Set up a fake store with just exactly what the code calls into. Ideally
    68  	// we'd set up an interface against the *Store as well, similar to
    69  	// replicaInQueue, but this isn't an ideal world. Deal with it.
    70  	store := &Store{
    71  		cfg: StoreConfig{
    72  			Clock:             hlc.NewClock(hlc.UnixNano, time.Second),
    73  			AmbientCtx:        log.AmbientContext{Tracer: tracing.NewTracer()},
    74  			DefaultZoneConfig: zonepb.DefaultZoneConfigRef(),
    75  		},
    76  	}
    77  
    78  	// Set up a queue impl that will return random results from processing.
    79  	impl := fakeQueueImpl{
    80  		pr: func(context.Context, *Replica, *config.SystemConfig) error {
    81  			n := rand.Intn(4)
    82  			if n == 0 {
    83  				return nil
    84  			} else if n == 1 {
    85  				return errors.New("injected regular error")
    86  			} else if n == 2 {
    87  				return &benignError{errors.New("injected benign error")}
    88  			}
    89  			return &testPurgatoryError{}
    90  		},
    91  	}
    92  	bq := newBaseQueue("test", impl, store, nil /* Gossip */, cfg)
    93  	bq.getReplica = func(id roachpb.RangeID) (replicaInQueue, error) {
    94  		return &fakeReplica{rangeID: id}, nil
    95  	}
    96  	bq.Start(stopper)
    97  
    98  	var g errgroup.Group
    99  	for i := 1; i <= num; i++ {
   100  		r := &fakeReplica{rangeID: roachpb.RangeID(i)}
   101  		for j := 0; j < 5; j++ {
   102  			g.Go(func() error {
   103  				_, err := bq.testingAdd(ctx, r, 1.0)
   104  				return err
   105  			})
   106  		}
   107  		if rand.Intn(5) == 0 {
   108  			g.Go(func() error {
   109  				bq.MaybeRemove(r.rangeID)
   110  				return nil
   111  			})
   112  		}
   113  		g.Go(func() error {
   114  			bq.assertInvariants()
   115  			return nil
   116  		})
   117  	}
   118  	if err := g.Wait(); err != nil {
   119  		t.Fatal(err)
   120  	}
   121  	for done := false; !done; {
   122  		bq.mu.Lock()
   123  		done = len(bq.mu.replicas) == 0
   124  		bq.mu.Unlock()
   125  		runtime.Gosched()
   126  	}
   127  }
   128  
   129  type fakeQueueImpl struct {
   130  	pr func(context.Context, *Replica, *config.SystemConfig) error
   131  }
   132  
   133  func (fakeQueueImpl) shouldQueue(
   134  	context.Context, hlc.Timestamp, *Replica, *config.SystemConfig,
   135  ) (shouldQueue bool, priority float64) {
   136  	return rand.Intn(5) != 0, 1.0
   137  }
   138  
   139  func (fq fakeQueueImpl) process(
   140  	ctx context.Context, repl *Replica, cfg *config.SystemConfig,
   141  ) error {
   142  	return fq.pr(ctx, repl, cfg)
   143  }
   144  
   145  func (fakeQueueImpl) timer(time.Duration) time.Duration {
   146  	return time.Nanosecond
   147  }
   148  
   149  func (fakeQueueImpl) purgatoryChan() <-chan time.Time {
   150  	return time.After(time.Nanosecond)
   151  }
   152  
   153  type fakeReplica struct {
   154  	rangeID   roachpb.RangeID
   155  	replicaID roachpb.ReplicaID
   156  }
   157  
   158  func (fr *fakeReplica) AnnotateCtx(ctx context.Context) context.Context { return ctx }
   159  func (fr *fakeReplica) StoreID() roachpb.StoreID {
   160  	return 1
   161  }
   162  func (fr *fakeReplica) GetRangeID() roachpb.RangeID         { return fr.rangeID }
   163  func (fr *fakeReplica) ReplicaID() roachpb.ReplicaID        { return fr.replicaID }
   164  func (fr *fakeReplica) IsInitialized() bool                 { return true }
   165  func (fr *fakeReplica) IsDestroyed() (DestroyReason, error) { return destroyReasonAlive, nil }
   166  func (fr *fakeReplica) Desc() *roachpb.RangeDescriptor {
   167  	return &roachpb.RangeDescriptor{RangeID: fr.rangeID, EndKey: roachpb.RKey("z")}
   168  }
   169  func (fr *fakeReplica) maybeInitializeRaftGroup(context.Context) {}
   170  func (fr *fakeReplica) redirectOnOrAcquireLease(
   171  	context.Context,
   172  ) (kvserverpb.LeaseStatus, *roachpb.Error) {
   173  	// baseQueue only checks that the returned error is nil.
   174  	return kvserverpb.LeaseStatus{}, nil
   175  }
   176  func (fr *fakeReplica) IsLeaseValid(roachpb.Lease, hlc.Timestamp) bool { return true }
   177  func (fr *fakeReplica) GetLease() (roachpb.Lease, roachpb.Lease) {
   178  	return roachpb.Lease{}, roachpb.Lease{}
   179  }