github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/scanner_test.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"testing"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    21  	"github.com/cockroachdb/cockroach/pkg/testutils"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    24  	"github.com/cockroachdb/cockroach/pkg/util/log"
    25  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    26  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    27  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    28  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    29  	"github.com/cockroachdb/errors"
    30  	"github.com/google/btree"
    31  )
    32  
    33  func makeAmbCtx() log.AmbientContext {
    34  	return log.AmbientContext{Tracer: tracing.NewTracer()}
    35  }
    36  
    37  // Test implementation of a range set backed by btree.BTree.
    38  type testRangeSet struct {
    39  	syncutil.Mutex
    40  	replicasByKey *btree.BTree
    41  	visited       int
    42  }
    43  
    44  // newTestRangeSet creates a new range set that has the count number of ranges.
    45  func newTestRangeSet(count int, t *testing.T) *testRangeSet {
    46  	rs := &testRangeSet{replicasByKey: btree.New(64 /* degree */)}
    47  	for i := 0; i < count; i++ {
    48  		desc := &roachpb.RangeDescriptor{
    49  			RangeID:  roachpb.RangeID(i),
    50  			StartKey: roachpb.RKey(fmt.Sprintf("%03d", i)),
    51  			EndKey:   roachpb.RKey(fmt.Sprintf("%03d", i+1)),
    52  		}
    53  		// Initialize the range stat so the scanner can use it.
    54  		repl := &Replica{
    55  			RangeID: desc.RangeID,
    56  		}
    57  		repl.mu.state.Stats = &enginepb.MVCCStats{
    58  			KeyBytes:  1,
    59  			ValBytes:  2,
    60  			KeyCount:  1,
    61  			LiveCount: 1,
    62  		}
    63  		repl.mu.state.Desc = desc
    64  		if exRngItem := rs.replicasByKey.ReplaceOrInsert(repl); exRngItem != nil {
    65  			t.Fatalf("failed to insert range %s", repl)
    66  		}
    67  	}
    68  	return rs
    69  }
    70  
    71  func (rs *testRangeSet) Visit(visitor func(*Replica) bool) {
    72  	rs.Lock()
    73  	defer rs.Unlock()
    74  	rs.visited = 0
    75  	rs.replicasByKey.Ascend(func(i btree.Item) bool {
    76  		rs.visited++
    77  		rs.Unlock()
    78  		defer rs.Lock()
    79  		return visitor(i.(*Replica))
    80  	})
    81  }
    82  
    83  func (rs *testRangeSet) EstimatedCount() int {
    84  	rs.Lock()
    85  	defer rs.Unlock()
    86  	count := rs.replicasByKey.Len() - rs.visited
    87  	if count < 1 {
    88  		count = 1
    89  	}
    90  	return count
    91  }
    92  
    93  // removeRange removes the i-th range from the range set.
    94  func (rs *testRangeSet) remove(index int, t *testing.T) *Replica {
    95  	endKey := roachpb.Key(fmt.Sprintf("%03d", index+1))
    96  	rs.Lock()
    97  	defer rs.Unlock()
    98  	repl := rs.replicasByKey.Delete((rangeBTreeKey)(endKey))
    99  	if repl == nil {
   100  		t.Fatalf("failed to delete range of end key %s", endKey)
   101  	}
   102  	return repl.(*Replica)
   103  }
   104  
   105  // Test implementation of a range queue which adds range to an
   106  // internal slice.
   107  type testQueue struct {
   108  	syncutil.Mutex // Protects ranges, done & processed count
   109  	ranges         []*Replica
   110  	done           bool
   111  	processed      int
   112  	disabled       bool
   113  }
   114  
   115  // setDisabled suspends processing of items from the queue.
   116  func (tq *testQueue) setDisabled(d bool) {
   117  	tq.Lock()
   118  	defer tq.Unlock()
   119  	tq.disabled = d
   120  }
   121  
   122  func (tq *testQueue) Start(stopper *stop.Stopper) {
   123  	stopper.RunWorker(context.Background(), func(context.Context) {
   124  		for {
   125  			select {
   126  			case <-time.After(1 * time.Millisecond):
   127  				tq.Lock()
   128  				if !tq.disabled && len(tq.ranges) > 0 {
   129  					tq.ranges = tq.ranges[1:]
   130  					tq.processed++
   131  				}
   132  				tq.Unlock()
   133  			case <-stopper.ShouldStop():
   134  				tq.Lock()
   135  				tq.done = true
   136  				tq.Unlock()
   137  				return
   138  			}
   139  		}
   140  	})
   141  }
   142  
   143  // NB: MaybeAddAsync on a testQueue is actually synchronous.
   144  func (tq *testQueue) MaybeAddAsync(ctx context.Context, replI replicaInQueue, now hlc.Timestamp) {
   145  	repl := replI.(*Replica)
   146  
   147  	tq.Lock()
   148  	defer tq.Unlock()
   149  	if index := tq.indexOf(repl.RangeID); index == -1 {
   150  		tq.ranges = append(tq.ranges, repl)
   151  	}
   152  }
   153  
   154  func (tq *testQueue) MaybeRemove(rangeID roachpb.RangeID) {
   155  	tq.Lock()
   156  	defer tq.Unlock()
   157  	if index := tq.indexOf(rangeID); index != -1 {
   158  		tq.ranges = append(tq.ranges[:index], tq.ranges[index+1:]...)
   159  	}
   160  }
   161  
   162  func (tq *testQueue) Name() string {
   163  	return "testQueue"
   164  }
   165  
   166  func (tq *testQueue) NeedsLease() bool {
   167  	return false
   168  }
   169  
   170  func (tq *testQueue) count() int {
   171  	tq.Lock()
   172  	defer tq.Unlock()
   173  	return len(tq.ranges)
   174  }
   175  
   176  func (tq *testQueue) indexOf(rangeID roachpb.RangeID) int {
   177  	for i, repl := range tq.ranges {
   178  		if repl.RangeID == rangeID {
   179  			return i
   180  		}
   181  	}
   182  	return -1
   183  }
   184  
   185  func (tq *testQueue) isDone() bool {
   186  	tq.Lock()
   187  	defer tq.Unlock()
   188  	return tq.done
   189  }
   190  
   191  // TestScannerAddToQueues verifies that ranges are added to and
   192  // removed from multiple queues.
   193  func TestScannerAddToQueues(t *testing.T) {
   194  	defer leaktest.AfterTest(t)()
   195  	const count = 3
   196  	ranges := newTestRangeSet(count, t)
   197  	q1, q2 := &testQueue{}, &testQueue{}
   198  	// We don't want to actually consume entries from the queues during this test.
   199  	q1.setDisabled(true)
   200  	q2.setDisabled(true)
   201  	mc := hlc.NewManualClock(123)
   202  	clock := hlc.NewClock(mc.UnixNano, time.Nanosecond)
   203  	s := newReplicaScanner(makeAmbCtx(), clock, 1*time.Millisecond, 0, 0, ranges)
   204  	s.AddQueues(q1, q2)
   205  	stopper := stop.NewStopper()
   206  
   207  	// Start scanner and verify that all ranges are added to both queues.
   208  	s.Start(stopper)
   209  	testutils.SucceedsSoon(t, func() error {
   210  		if q1.count() != count || q2.count() != count {
   211  			return errors.Errorf("q1 or q2 count != %d; got %d, %d", count, q1.count(), q2.count())
   212  		}
   213  		return nil
   214  	})
   215  
   216  	// Remove first range and verify it does not exist in either range.
   217  	rng := ranges.remove(0, t)
   218  	testutils.SucceedsSoon(t, func() error {
   219  		// This is intentionally inside the loop, otherwise this test races as
   220  		// our removal of the range may be processed before a stray re-queue.
   221  		// Removing on each attempt makes sure we clean this up as we retry.
   222  		s.RemoveReplica(rng)
   223  		c1 := q1.count()
   224  		c2 := q2.count()
   225  		if c1 != count-1 || c2 != count-1 {
   226  			return errors.Errorf("q1 or q2 count != %d; got %d, %d", count-1, c1, c2)
   227  		}
   228  		return nil
   229  	})
   230  
   231  	// Stop scanner and verify both queues are stopped.
   232  	stopper.Stop(context.Background())
   233  	if !q1.isDone() || !q2.isDone() {
   234  		t.Errorf("expected all queues to stop; got %t, %t", q1.isDone(), q2.isDone())
   235  	}
   236  }
   237  
   238  // TestScannerTiming verifies that ranges are scanned, regardless
   239  // of how many, to match scanInterval.
   240  func TestScannerTiming(t *testing.T) {
   241  	defer leaktest.AfterTest(t)()
   242  	const count = 3
   243  	const runTime = 100 * time.Millisecond
   244  	const maxError = 7500 * time.Microsecond
   245  	durations := []time.Duration{
   246  		15 * time.Millisecond,
   247  		25 * time.Millisecond,
   248  	}
   249  	for i, duration := range durations {
   250  		testutils.SucceedsSoon(t, func() error {
   251  			ranges := newTestRangeSet(count, t)
   252  			q := &testQueue{}
   253  			mc := hlc.NewManualClock(123)
   254  			clock := hlc.NewClock(mc.UnixNano, time.Nanosecond)
   255  			s := newReplicaScanner(makeAmbCtx(), clock, duration, 0, 0, ranges)
   256  			s.AddQueues(q)
   257  			stopper := stop.NewStopper()
   258  			s.Start(stopper)
   259  			time.Sleep(runTime)
   260  			stopper.Stop(context.Background())
   261  
   262  			avg := s.avgScan()
   263  			log.Infof(context.Background(), "%d: average scan: %s", i, avg)
   264  			if avg.Nanoseconds()-duration.Nanoseconds() > maxError.Nanoseconds() ||
   265  				duration.Nanoseconds()-avg.Nanoseconds() > maxError.Nanoseconds() {
   266  				return errors.Errorf("expected %s, got %s: exceeds max error of %s", duration, avg, maxError)
   267  			}
   268  			return nil
   269  		})
   270  	}
   271  }
   272  
   273  // TestScannerPaceInterval tests that paceInterval returns the correct interval.
   274  func TestScannerPaceInterval(t *testing.T) {
   275  	defer leaktest.AfterTest(t)()
   276  	const count = 3
   277  	durations := []time.Duration{
   278  		30 * time.Millisecond,
   279  		60 * time.Millisecond,
   280  		500 * time.Millisecond,
   281  	}
   282  	// function logs an error when the actual value is not close
   283  	// to the expected value
   284  	logErrorWhenNotCloseTo := func(expected, actual time.Duration) {
   285  		delta := 1 * time.Millisecond
   286  		if actual < expected-delta || actual > expected+delta {
   287  			t.Errorf("Expected duration %s, got %s", expected, actual)
   288  		}
   289  	}
   290  	for _, duration := range durations {
   291  		startTime := timeutil.Now()
   292  		ranges := newTestRangeSet(count, t)
   293  		s := newReplicaScanner(makeAmbCtx(), nil, duration, 0, 0, ranges)
   294  		interval := s.paceInterval(startTime, startTime)
   295  		logErrorWhenNotCloseTo(duration/count, interval)
   296  		// The range set is empty
   297  		ranges = newTestRangeSet(0, t)
   298  		s = newReplicaScanner(makeAmbCtx(), nil, duration, 0, 0, ranges)
   299  		interval = s.paceInterval(startTime, startTime)
   300  		logErrorWhenNotCloseTo(duration, interval)
   301  		ranges = newTestRangeSet(count, t)
   302  		s = newReplicaScanner(makeAmbCtx(), nil, duration, 0, 0, ranges)
   303  		// Move the present to duration time into the future
   304  		interval = s.paceInterval(startTime, startTime.Add(duration))
   305  		logErrorWhenNotCloseTo(0, interval)
   306  	}
   307  }
   308  
   309  // TestScannerMinMaxIdleTime verifies that the pace interval will not
   310  // be less than the specified min idle time or greater than the
   311  // specified max idle time.
   312  func TestScannerMinMaxIdleTime(t *testing.T) {
   313  	defer leaktest.AfterTest(t)()
   314  	const targetInterval = 100 * time.Millisecond
   315  	const minIdleTime = 10 * time.Millisecond
   316  	const maxIdleTime = 15 * time.Millisecond
   317  	for count := range []int{1, 10, 20, 100} {
   318  		startTime := timeutil.Now()
   319  		ranges := newTestRangeSet(count, t)
   320  		s := newReplicaScanner(makeAmbCtx(), nil, targetInterval, minIdleTime, maxIdleTime, ranges)
   321  		if interval := s.paceInterval(startTime, startTime); interval < minIdleTime || interval > maxIdleTime {
   322  			t.Errorf("expected interval %s <= %s <= %s", minIdleTime, interval, maxIdleTime)
   323  		}
   324  	}
   325  }
   326  
   327  // TestScannerDisabled verifies that disabling a scanner prevents
   328  // replicas from being added to queues.
   329  func TestScannerDisabled(t *testing.T) {
   330  	defer leaktest.AfterTest(t)()
   331  	const count = 3
   332  	ranges := newTestRangeSet(count, t)
   333  	q := &testQueue{}
   334  	mc := hlc.NewManualClock(123)
   335  	clock := hlc.NewClock(mc.UnixNano, time.Nanosecond)
   336  	s := newReplicaScanner(makeAmbCtx(), clock, 1*time.Millisecond, 0, 0, ranges)
   337  	s.AddQueues(q)
   338  	stopper := stop.NewStopper()
   339  	defer stopper.Stop(context.Background())
   340  	s.Start(stopper)
   341  
   342  	// Verify queue gets all ranges.
   343  	testutils.SucceedsSoon(t, func() error {
   344  		if q.count() != count {
   345  			return errors.Errorf("expected %d replicas; have %d", count, q.count())
   346  		}
   347  		if s.scanCount() == 0 {
   348  			return errors.Errorf("expected scanner count to increment")
   349  		}
   350  		return nil
   351  	})
   352  
   353  	lastWaitEnabledCount := s.waitEnabledCount()
   354  
   355  	// Now, disable the scanner.
   356  	s.SetDisabled(true)
   357  	testutils.SucceedsSoon(t, func() error {
   358  		if s.waitEnabledCount() == lastWaitEnabledCount {
   359  			return errors.Errorf("expected scanner to stop when disabled")
   360  		}
   361  		return nil
   362  	})
   363  
   364  	lastScannerCount := s.scanCount()
   365  
   366  	// Remove the replicas and verify the scanner still removes them while disabled.
   367  	ranges.Visit(func(repl *Replica) bool {
   368  		s.RemoveReplica(repl)
   369  		return true
   370  	})
   371  
   372  	testutils.SucceedsSoon(t, func() error {
   373  		if qc := q.count(); qc != 0 {
   374  			return errors.Errorf("expected queue to be empty after replicas removed from scanner; got %d", qc)
   375  		}
   376  		return nil
   377  	})
   378  	if sc := s.scanCount(); sc != lastScannerCount {
   379  		t.Errorf("expected scanner count to not increment: %d != %d", sc, lastScannerCount)
   380  	}
   381  }
   382  
   383  func TestScannerDisabledWithZeroInterval(t *testing.T) {
   384  	defer leaktest.AfterTest(t)()
   385  	ranges := newTestRangeSet(1, t)
   386  	s := newReplicaScanner(makeAmbCtx(), nil, 0*time.Millisecond, 0, 0, ranges)
   387  	if !s.GetDisabled() {
   388  		t.Errorf("expected scanner to be disabled")
   389  	}
   390  }
   391  
   392  // TestScannerEmptyRangeSet verifies that an empty range set doesn't busy loop.
   393  func TestScannerEmptyRangeSet(t *testing.T) {
   394  	defer leaktest.AfterTest(t)()
   395  	ranges := newTestRangeSet(0, t)
   396  	q := &testQueue{}
   397  	mc := hlc.NewManualClock(123)
   398  	clock := hlc.NewClock(mc.UnixNano, time.Nanosecond)
   399  	s := newReplicaScanner(makeAmbCtx(), clock, time.Hour, 0, 0, ranges)
   400  	s.AddQueues(q)
   401  	stopper := stop.NewStopper()
   402  	defer stopper.Stop(context.Background())
   403  	s.Start(stopper)
   404  	time.Sleep(time.Millisecond) // give it some time to (not) busy loop
   405  	if count := s.scanCount(); count > 1 {
   406  		t.Errorf("expected at most one loop, but got %d", count)
   407  	}
   408  }