github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/scanner.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    19  	"github.com/cockroachdb/cockroach/pkg/util/log"
    20  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    21  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    22  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    23  )
    24  
    25  // A replicaQueue is a prioritized queue of replicas for which work is
    26  // scheduled. For example, there's a GC queue for replicas which are due
    27  // for garbage collection, a rebalance queue to move replicas from full
    28  // or busy stores, a recovery queue for replicas of ranges with dead replicas,
    29  // etc.
    30  type replicaQueue interface {
    31  	// Start launches a goroutine to process the contents of the queue.
    32  	// The provided stopper is used to signal that the goroutine should exit.
    33  	Start(*stop.Stopper)
    34  	// MaybeAdd adds the replica to the queue if the replica meets
    35  	// the queue's inclusion criteria and the queue is not already
    36  	// too full, etc.
    37  	MaybeAddAsync(context.Context, replicaInQueue, hlc.Timestamp)
    38  	// MaybeRemove removes the replica from the queue if it is present.
    39  	MaybeRemove(roachpb.RangeID)
    40  	// Name returns the name of the queue.
    41  	Name() string
    42  	// NeedsLease returns whether the queue requires a replica to be leaseholder.
    43  	NeedsLease() bool
    44  }
    45  
    46  // A replicaSet provides access to a sequence of replicas to consider
    47  // for inclusion in replica queues. There are no requirements for the
    48  // ordering of the iteration.
    49  type replicaSet interface {
    50  	// Visit calls the given function for every replica in the set btree
    51  	// until the function returns false.
    52  	Visit(func(*Replica) bool)
    53  	// EstimatedCount returns the number of replicas estimated to remain
    54  	// in the iteration. This value does not need to be exact.
    55  	EstimatedCount() int
    56  }
    57  
    58  // A replicaScanner iterates over replicas at a measured pace in order to
    59  // complete approximately one full scan per target interval in a large
    60  // store (in small stores it may complete faster than the target
    61  // interval).  Each replica is tested for inclusion in a sequence of
    62  // prioritized replica queues.
    63  type replicaScanner struct {
    64  	log.AmbientContext
    65  	clock *hlc.Clock
    66  
    67  	targetInterval time.Duration  // Target duration interval for scan loop
    68  	minIdleTime    time.Duration  // Min idle time for scan loop
    69  	maxIdleTime    time.Duration  // Max idle time for scan loop
    70  	waitTimer      timeutil.Timer // Shared timer to avoid allocations
    71  	replicas       replicaSet     // Replicas to be scanned
    72  	queues         []replicaQueue // Replica queues managed by this scanner
    73  	removed        chan *Replica  // Replicas to remove from queues
    74  	// Count of times and total duration through the scanning loop.
    75  	mu struct {
    76  		syncutil.Mutex
    77  		scanCount        int64
    78  		waitEnabledCount int64
    79  		total            time.Duration
    80  		// Some tests in this package disable scanning.
    81  		disabled bool
    82  	}
    83  	// Used to notify processing loop if the disabled state changes.
    84  	setDisabledCh chan struct{}
    85  }
    86  
    87  // newReplicaScanner creates a new replica scanner with the provided
    88  // loop intervals, replica set, and replica queues.  If scanFn is not
    89  // nil, after a complete loop that function will be called. If the
    90  // targetInterval is 0, the scanner is disabled.
    91  func newReplicaScanner(
    92  	ambient log.AmbientContext,
    93  	clock *hlc.Clock,
    94  	targetInterval, minIdleTime, maxIdleTime time.Duration,
    95  	replicas replicaSet,
    96  ) *replicaScanner {
    97  	if targetInterval < 0 {
    98  		panic("scanner interval must be greater than or equal to zero")
    99  	}
   100  	rs := &replicaScanner{
   101  		AmbientContext: ambient,
   102  		clock:          clock,
   103  		targetInterval: targetInterval,
   104  		minIdleTime:    minIdleTime,
   105  		maxIdleTime:    maxIdleTime,
   106  		replicas:       replicas,
   107  		removed:        make(chan *Replica, 10),
   108  		setDisabledCh:  make(chan struct{}, 1),
   109  	}
   110  	if targetInterval == 0 {
   111  		rs.SetDisabled(true)
   112  	}
   113  	return rs
   114  }
   115  
   116  // AddQueues adds a variable arg list of queues to the replica scanner.
   117  // This method may only be called before Start().
   118  func (rs *replicaScanner) AddQueues(queues ...replicaQueue) {
   119  	rs.queues = append(rs.queues, queues...)
   120  }
   121  
   122  // Start spins up the scanning loop.
   123  func (rs *replicaScanner) Start(stopper *stop.Stopper) {
   124  	for _, queue := range rs.queues {
   125  		queue.Start(stopper)
   126  	}
   127  	rs.scanLoop(stopper)
   128  }
   129  
   130  // scanCount returns the number of times the scanner has cycled through
   131  // all replicas.
   132  func (rs *replicaScanner) scanCount() int64 {
   133  	rs.mu.Lock()
   134  	defer rs.mu.Unlock()
   135  	return rs.mu.scanCount
   136  }
   137  
   138  // waitEnabledCount returns the number of times the scanner went in the mode of
   139  // waiting to be reenabled.
   140  func (rs *replicaScanner) waitEnabledCount() int64 {
   141  	rs.mu.Lock()
   142  	defer rs.mu.Unlock()
   143  	return rs.mu.waitEnabledCount
   144  }
   145  
   146  // SetDisabled turns replica scanning off or on as directed. Note that while
   147  // disabled, removals are still processed.
   148  func (rs *replicaScanner) SetDisabled(disabled bool) {
   149  	rs.mu.Lock()
   150  	defer rs.mu.Unlock()
   151  	rs.mu.disabled = disabled
   152  	// The select prevents blocking on the channel.
   153  	select {
   154  	case rs.setDisabledCh <- struct{}{}:
   155  	default:
   156  	}
   157  }
   158  
   159  func (rs *replicaScanner) GetDisabled() bool {
   160  	rs.mu.Lock()
   161  	defer rs.mu.Unlock()
   162  	return rs.mu.disabled
   163  }
   164  
   165  // avgScan returns the average scan time of each scan cycle. Used in unittests.
   166  func (rs *replicaScanner) avgScan() time.Duration {
   167  	rs.mu.Lock()
   168  	defer rs.mu.Unlock()
   169  	if rs.mu.scanCount == 0 {
   170  		return 0
   171  	}
   172  	return time.Duration(rs.mu.total.Nanoseconds() / rs.mu.scanCount)
   173  }
   174  
   175  // RemoveReplica removes a replica from any replica queues the scanner may
   176  // have placed it in. This method should be called by the Store
   177  // when a replica is removed (e.g. rebalanced or merged).
   178  func (rs *replicaScanner) RemoveReplica(repl *Replica) {
   179  	rs.removed <- repl
   180  }
   181  
   182  // paceInterval returns a duration between iterations to allow us to pace
   183  // the scan.
   184  func (rs *replicaScanner) paceInterval(start, now time.Time) time.Duration {
   185  	elapsed := now.Sub(start)
   186  	remainingNanos := rs.targetInterval.Nanoseconds() - elapsed.Nanoseconds()
   187  	if remainingNanos < 0 {
   188  		remainingNanos = 0
   189  	}
   190  	count := rs.replicas.EstimatedCount()
   191  	if count < 1 {
   192  		count = 1
   193  	}
   194  	interval := time.Duration(remainingNanos / int64(count))
   195  	if rs.minIdleTime > 0 && interval < rs.minIdleTime {
   196  		interval = rs.minIdleTime
   197  	}
   198  	if rs.maxIdleTime > 0 && interval > rs.maxIdleTime {
   199  		interval = rs.maxIdleTime
   200  	}
   201  	return interval
   202  }
   203  
   204  // waitAndProcess waits for the pace interval and processes the replica
   205  // if repl is not nil. The method returns true when the scanner needs
   206  // to be stopped. The method also removes a replica from queues when it
   207  // is signaled via the removed channel.
   208  func (rs *replicaScanner) waitAndProcess(
   209  	ctx context.Context, stopper *stop.Stopper, start time.Time, repl *Replica,
   210  ) bool {
   211  	waitInterval := rs.paceInterval(start, timeutil.Now())
   212  	rs.waitTimer.Reset(waitInterval)
   213  	if log.V(6) {
   214  		log.Infof(ctx, "wait timer interval set to %s", waitInterval)
   215  	}
   216  	for {
   217  		select {
   218  		case <-rs.waitTimer.C:
   219  			if log.V(6) {
   220  				log.Infof(ctx, "wait timer fired")
   221  			}
   222  			rs.waitTimer.Read = true
   223  			if repl == nil {
   224  				return false
   225  			}
   226  
   227  			if log.V(2) {
   228  				log.Infof(ctx, "replica scanner processing %s", repl)
   229  			}
   230  			for _, q := range rs.queues {
   231  				q.MaybeAddAsync(ctx, repl, rs.clock.Now())
   232  			}
   233  			return false
   234  
   235  		case repl := <-rs.removed:
   236  			rs.removeReplica(repl)
   237  
   238  		case <-stopper.ShouldStop():
   239  			return true
   240  		}
   241  	}
   242  }
   243  
   244  func (rs *replicaScanner) removeReplica(repl *Replica) {
   245  	// Remove replica from all queues as applicable. Note that we still
   246  	// process removals while disabled.
   247  	rangeID := repl.RangeID
   248  	for _, q := range rs.queues {
   249  		q.MaybeRemove(rangeID)
   250  	}
   251  	if log.V(6) {
   252  		ctx := rs.AnnotateCtx(context.TODO())
   253  		log.Infof(ctx, "removed replica %s", repl)
   254  	}
   255  }
   256  
   257  // scanLoop loops endlessly, scanning through replicas available via
   258  // the replica set, or until the scanner is stopped. The iteration
   259  // is paced to complete a full scan in approximately the scan interval.
   260  func (rs *replicaScanner) scanLoop(stopper *stop.Stopper) {
   261  	ctx := rs.AnnotateCtx(context.Background())
   262  	stopper.RunWorker(ctx, func(ctx context.Context) {
   263  		start := timeutil.Now()
   264  
   265  		// waitTimer is reset in each call to waitAndProcess.
   266  		defer rs.waitTimer.Stop()
   267  
   268  		for {
   269  			if rs.GetDisabled() {
   270  				if done := rs.waitEnabled(stopper); done {
   271  					return
   272  				}
   273  				continue
   274  			}
   275  			var shouldStop bool
   276  			count := 0
   277  			rs.replicas.Visit(func(repl *Replica) bool {
   278  				count++
   279  				shouldStop = rs.waitAndProcess(ctx, stopper, start, repl)
   280  				return !shouldStop
   281  			})
   282  			if count == 0 {
   283  				// No replicas processed, just wait.
   284  				shouldStop = rs.waitAndProcess(ctx, stopper, start, nil)
   285  			}
   286  
   287  			// waitAndProcess returns true when the system is stopping. Note that this
   288  			// means we don't have to check the stopper as well.
   289  			if shouldStop {
   290  				return
   291  			}
   292  
   293  			// Increment iteration count.
   294  			func() {
   295  				rs.mu.Lock()
   296  				defer rs.mu.Unlock()
   297  				rs.mu.scanCount++
   298  				rs.mu.total += timeutil.Since(start)
   299  			}()
   300  			if log.V(6) {
   301  				log.Infof(ctx, "reset replica scan iteration")
   302  			}
   303  
   304  			// Reset iteration and start time.
   305  			start = timeutil.Now()
   306  		}
   307  	})
   308  }
   309  
   310  // waitEnabled loops, removing replicas from the scanner's queues,
   311  // until scanning is enabled or the stopper signals shutdown,
   312  func (rs *replicaScanner) waitEnabled(stopper *stop.Stopper) bool {
   313  	rs.mu.Lock()
   314  	rs.mu.waitEnabledCount++
   315  	rs.mu.Unlock()
   316  	for {
   317  		if !rs.GetDisabled() {
   318  			return false
   319  		}
   320  		select {
   321  		case <-rs.setDisabledCh:
   322  			continue
   323  
   324  		case repl := <-rs.removed:
   325  			rs.removeReplica(repl)
   326  
   327  		case <-stopper.ShouldStop():
   328  			return true
   329  		}
   330  	}
   331  }