github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/split/finder.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package split
    12  
    13  import (
    14  	"bytes"
    15  	"math"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  )
    20  
    21  // Load-based splitting.
    22  //
    23  // - Engage split for ranges:
    24  //  - With size exceeding min-range-bytes
    25  //  - with reqs/s rate over a configurable threshold
    26  // - Disengage when a range no longer meets the criteria
    27  // - During split:
    28  //  - Record start time
    29  //  - Keep a sample of 10 keys
    30  //   - Each sample contains three counters: left, right and contained.
    31  //   - On each span, increment the left and/or right counters, depending
    32  //     on whether the span falls entirely to the left, to the right.
    33  //     If exactly on the key, increment neither.
    34  //   - If the span overlaps with the key, increment the contained counter.
    35  //   - When a sample is replaced, discard its counters.
    36  //  - If a range is on for more than a threshold interval:
    37  //   - Examine sample for the smallest diff between left and right counters,
    38  //     excluding any whose counters are not sufficiently advanced;
    39  //     If not less than some constant threshold, skip split.
    40  //   - Use the contained counters to give lower priority to potential split
    41  //     points that have more requests that span over it.
    42  //   - If a desired point is reached, add range to split queue with the chosen
    43  //     key as split key, and provide hint to scatter the replicas.
    44  
    45  const (
    46  	// RecordDurationThreshold is the minimum duration of time the split finder
    47  	// will record a range for, before being ready for a split.
    48  	RecordDurationThreshold    = 10 * time.Second // 10s
    49  	splitKeySampleSize         = 20               // size of split key sample
    50  	splitKeyMinCounter         = 100              // min aggregate counters before consideration
    51  	splitKeyThreshold          = 0.25             // 25% difference between left/right counters
    52  	splitKeyContainedThreshold = 0.50             // too many spanning queries over split point
    53  )
    54  
    55  type sample struct {
    56  	key                    roachpb.Key
    57  	left, right, contained int
    58  }
    59  
    60  // Finder is a structure that is used to determine the split point
    61  // using the Reservoir Sampling method.
    62  type Finder struct {
    63  	startTime time.Time
    64  	samples   [splitKeySampleSize]sample
    65  	count     int
    66  }
    67  
    68  // NewFinder initiates a Finder with the given time.
    69  func NewFinder(startTime time.Time) *Finder {
    70  	return &Finder{
    71  		startTime: startTime,
    72  	}
    73  }
    74  
    75  // Ready checks if the Finder has been initialized with a sufficient
    76  // sample duration.
    77  func (f *Finder) Ready(nowTime time.Time) bool {
    78  	return nowTime.Sub(f.startTime) > RecordDurationThreshold
    79  }
    80  
    81  // Record informs the Finder about where the span lies with
    82  // regard to the keys in the samples.
    83  func (f *Finder) Record(span roachpb.Span, intNFn func(int) int) {
    84  	if f == nil {
    85  		return
    86  	}
    87  
    88  	var idx int
    89  	count := f.count
    90  	f.count++
    91  	if count < splitKeySampleSize {
    92  		idx = count
    93  	} else if idx = intNFn(count); idx >= splitKeySampleSize {
    94  		// Increment all existing keys' counters.
    95  		for i := range f.samples {
    96  			if span.ProperlyContainsKey(f.samples[i].key) {
    97  				f.samples[i].contained++
    98  			} else {
    99  				// If the split is chosen to be here and the key is on or to the left
   100  				// of the start key of the span, we know that the request the span represents
   101  				// - would be isolated to the right of the split point.
   102  				// Similarly, if the split key is greater than the start key of the span
   103  				// (and given that it is not properly contained by the span) it must mean
   104  				// that the request the span represents would be on the left.
   105  				if comp := bytes.Compare(f.samples[i].key, span.Key); comp <= 0 {
   106  					f.samples[i].right++
   107  				} else if comp > 0 {
   108  					f.samples[i].left++
   109  				}
   110  			}
   111  		}
   112  		return
   113  	}
   114  
   115  	// Note we always use the start key of the span. We could
   116  	// take the average of the byte slices, but that seems
   117  	// unnecessarily complex for practical usage.
   118  	f.samples[idx] = sample{key: span.Key}
   119  }
   120  
   121  // Key finds an appropriate split point based on the Reservoir sampling method.
   122  // Returns a nil key if no appropriate key was found.
   123  func (f *Finder) Key() roachpb.Key {
   124  	if f == nil {
   125  		return nil
   126  	}
   127  
   128  	var bestIdx = -1
   129  	var bestScore float64 = 2
   130  	for i, s := range f.samples {
   131  		if s.left+s.right+s.contained < splitKeyMinCounter {
   132  			continue
   133  		}
   134  		balanceScore := math.Abs(float64(s.left-s.right)) / float64(s.left+s.right)
   135  		containedScore := (float64(s.contained) / float64(s.left+s.right+s.contained))
   136  		finalScore := balanceScore + containedScore
   137  		if balanceScore >= splitKeyThreshold ||
   138  			containedScore >= splitKeyContainedThreshold {
   139  			continue
   140  		}
   141  		if finalScore < bestScore {
   142  			bestIdx = i
   143  			bestScore = finalScore
   144  		}
   145  	}
   146  
   147  	if bestIdx == -1 {
   148  		return nil
   149  	}
   150  	return f.samples[bestIdx].key
   151  }