github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_backpressure.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/keys"
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/settings"
    20  	"github.com/cockroachdb/cockroach/pkg/util/log"
    21  	"github.com/cockroachdb/errors"
    22  )
    23  
    24  var backpressureLogLimiter = log.Every(500 * time.Millisecond)
    25  
    26  // backpressureRangeSizeMultiplier is the multiple of range_max_bytes that a
    27  // range's size must grow to before backpressure will be applied on writes. Set
    28  // to 0 to disable backpressure altogether.
    29  var backpressureRangeSizeMultiplier = settings.RegisterValidatedFloatSetting(
    30  	"kv.range.backpressure_range_size_multiplier",
    31  	"multiple of range_max_bytes that a range is allowed to grow to without "+
    32  		"splitting before writes to that range are blocked, or 0 to disable",
    33  	2.0,
    34  	func(v float64) error {
    35  		if v != 0 && v < 1 {
    36  			return errors.Errorf("backpressure multiplier cannot be smaller than 1: %f", v)
    37  		}
    38  		return nil
    39  	},
    40  )
    41  
    42  // backpressureByteTolerance exists to deal with the fact that lowering the
    43  // range size by anything larger than the backpressureRangeSizeMultiplier would
    44  // immediately mean that all ranges require backpressure. To mitigate this
    45  // unwanted backpressure we say that any range which is larger than the
    46  // size where backpressure would kick in by more than this quantity will
    47  // immediately avoid backpressure. This approach is a bit risky because a
    48  // command larger than this value would effectively disable backpressure
    49  // altogether. Another downside of this approach is that if the range size
    50  // is reduced by roughly exactly the multiplier then we'd potentially have
    51  // lots of ranges in this state.
    52  //
    53  // We additionally mitigate this situation further by doing the following:
    54  //
    55  //  1) We store in-memory on each replica the largest zone configuration range
    56  //     size (largestPreviousMaxRangeBytes) we've seen and we do not backpressure
    57  //     if the current range size is less than that. That value is cleared when
    58  //     a range splits or runs GC such that the range size becomes smaller than
    59  //     the current max range size. This mitigation alone is insufficient because
    60  //     a node may restart before the splitting has concluded, leaving the
    61  //     cluster in a state of backpressure.
    62  //
    63  //  2) We assign a higher priority in the snapshot queue to ranges which are
    64  //     currently backpressuring than ranges which are larger but are not
    65  //     applying backpressure.
    66  //
    67  var backpressureByteTolerance = settings.RegisterByteSizeSetting(
    68  	"kv.range.backpressure_byte_tolerance",
    69  	"defines the number of bytes above the product of "+
    70  		"backpressure_range_size_multiplier and the range_max_size at which "+
    71  		"backpressure will not apply",
    72  	32<<20 /* 32 MiB */)
    73  
    74  // backpressurableSpans contains spans of keys where write backpressuring
    75  // is permitted. Writes to any keys within these spans may cause a batch
    76  // to be backpressured.
    77  var backpressurableSpans = []roachpb.Span{
    78  	{Key: keys.TimeseriesPrefix, EndKey: keys.TimeseriesKeyMax},
    79  	// Backpressure from the end of the system config forward instead of
    80  	// over all table data to avoid backpressuring unsplittable ranges.
    81  	{Key: keys.SystemConfigTableDataMax, EndKey: keys.TableDataMax},
    82  }
    83  
    84  // canBackpressureBatch returns whether the provided BatchRequest is eligible
    85  // for backpressure.
    86  func canBackpressureBatch(ba *roachpb.BatchRequest) bool {
    87  	// Don't backpressure splits themselves.
    88  	if ba.Txn != nil && ba.Txn.Name == splitTxnName {
    89  		return false
    90  	}
    91  
    92  	// Only backpressure batches containing a "backpressurable"
    93  	// method that is within a "backpressurable" key span.
    94  	for _, ru := range ba.Requests {
    95  		req := ru.GetInner()
    96  		if !roachpb.CanBackpressure(req) {
    97  			continue
    98  		}
    99  
   100  		for _, s := range backpressurableSpans {
   101  			if s.Contains(req.Header().Span()) {
   102  				return true
   103  			}
   104  		}
   105  	}
   106  	return false
   107  }
   108  
   109  // shouldBackpressureWrites returns whether writes to the range should be
   110  // subject to backpressure. This is based on the size of the range in
   111  // relation to the split size. The method returns true if the range is more
   112  // than backpressureRangeSizeMultiplier times larger than the split size but not
   113  // larger than that by more than backpressureByteTolerance (see that comment for
   114  // further explanation).
   115  func (r *Replica) shouldBackpressureWrites() bool {
   116  	mult := backpressureRangeSizeMultiplier.Get(&r.store.cfg.Settings.SV)
   117  	if mult == 0 {
   118  		// Disabled.
   119  		return false
   120  	}
   121  
   122  	r.mu.RLock()
   123  	defer r.mu.RUnlock()
   124  	exceeded, bytesOver := r.exceedsMultipleOfSplitSizeRLocked(mult)
   125  	if !exceeded {
   126  		return false
   127  	}
   128  	if bytesOver > backpressureByteTolerance.Get(&r.store.cfg.Settings.SV) {
   129  		return false
   130  	}
   131  	return true
   132  }
   133  
   134  // maybeBackpressureBatch blocks to apply backpressure if the replica deems
   135  // that backpressure is necessary.
   136  func (r *Replica) maybeBackpressureBatch(ctx context.Context, ba *roachpb.BatchRequest) error {
   137  	if !canBackpressureBatch(ba) {
   138  		return nil
   139  	}
   140  
   141  	// If we need to apply backpressure, wait for an ongoing split to finish
   142  	// if one exists. This does not place a hard upper bound on the size of
   143  	// a range because we don't track all in-flight requests (like we do for
   144  	// the quota pool), but it does create an effective soft upper bound.
   145  	for first := true; r.shouldBackpressureWrites(); first = false {
   146  		if first {
   147  			r.store.metrics.BackpressuredOnSplitRequests.Inc(1)
   148  			defer r.store.metrics.BackpressuredOnSplitRequests.Dec(1)
   149  
   150  			if backpressureLogLimiter.ShouldLog() {
   151  				log.Warningf(ctx, "applying backpressure to limit range growth on batch %s", ba)
   152  			}
   153  		}
   154  
   155  		// Register a callback on an ongoing split for this range in the splitQueue.
   156  		splitC := make(chan error, 1)
   157  		if !r.store.splitQueue.MaybeAddCallback(r.RangeID, func(err error) {
   158  			splitC <- err
   159  		}) {
   160  			// No split ongoing. We may have raced with its completion. There's
   161  			// no good way to prevent this race, so we conservatively allow the
   162  			// request to proceed instead of throwing an error that would surface
   163  			// to the client.
   164  			return nil
   165  		}
   166  
   167  		// Wait for the callback to be called.
   168  		select {
   169  		case <-ctx.Done():
   170  			return errors.Wrapf(
   171  				ctx.Err(), "aborted while applying backpressure to %s on range %s", ba, r.Desc(),
   172  			)
   173  		case err := <-splitC:
   174  			if err != nil {
   175  				return errors.Wrapf(
   176  					err, "split failed while applying backpressure to %s on range %s", ba, r.Desc(),
   177  				)
   178  			}
   179  		}
   180  	}
   181  	return nil
   182  }