github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/split/finder.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package split 12 13 import ( 14 "bytes" 15 "math" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 ) 20 21 // Load-based splitting. 22 // 23 // - Engage split for ranges: 24 // - With size exceeding min-range-bytes 25 // - with reqs/s rate over a configurable threshold 26 // - Disengage when a range no longer meets the criteria 27 // - During split: 28 // - Record start time 29 // - Keep a sample of 10 keys 30 // - Each sample contains three counters: left, right and contained. 31 // - On each span, increment the left and/or right counters, depending 32 // on whether the span falls entirely to the left, to the right. 33 // If exactly on the key, increment neither. 34 // - If the span overlaps with the key, increment the contained counter. 35 // - When a sample is replaced, discard its counters. 36 // - If a range is on for more than a threshold interval: 37 // - Examine sample for the smallest diff between left and right counters, 38 // excluding any whose counters are not sufficiently advanced; 39 // If not less than some constant threshold, skip split. 40 // - Use the contained counters to give lower priority to potential split 41 // points that have more requests that span over it. 42 // - If a desired point is reached, add range to split queue with the chosen 43 // key as split key, and provide hint to scatter the replicas. 44 45 const ( 46 // RecordDurationThreshold is the minimum duration of time the split finder 47 // will record a range for, before being ready for a split. 48 RecordDurationThreshold = 10 * time.Second // 10s 49 splitKeySampleSize = 20 // size of split key sample 50 splitKeyMinCounter = 100 // min aggregate counters before consideration 51 splitKeyThreshold = 0.25 // 25% difference between left/right counters 52 splitKeyContainedThreshold = 0.50 // too many spanning queries over split point 53 ) 54 55 type sample struct { 56 key roachpb.Key 57 left, right, contained int 58 } 59 60 // Finder is a structure that is used to determine the split point 61 // using the Reservoir Sampling method. 62 type Finder struct { 63 startTime time.Time 64 samples [splitKeySampleSize]sample 65 count int 66 } 67 68 // NewFinder initiates a Finder with the given time. 69 func NewFinder(startTime time.Time) *Finder { 70 return &Finder{ 71 startTime: startTime, 72 } 73 } 74 75 // Ready checks if the Finder has been initialized with a sufficient 76 // sample duration. 77 func (f *Finder) Ready(nowTime time.Time) bool { 78 return nowTime.Sub(f.startTime) > RecordDurationThreshold 79 } 80 81 // Record informs the Finder about where the span lies with 82 // regard to the keys in the samples. 83 func (f *Finder) Record(span roachpb.Span, intNFn func(int) int) { 84 if f == nil { 85 return 86 } 87 88 var idx int 89 count := f.count 90 f.count++ 91 if count < splitKeySampleSize { 92 idx = count 93 } else if idx = intNFn(count); idx >= splitKeySampleSize { 94 // Increment all existing keys' counters. 95 for i := range f.samples { 96 if span.ProperlyContainsKey(f.samples[i].key) { 97 f.samples[i].contained++ 98 } else { 99 // If the split is chosen to be here and the key is on or to the left 100 // of the start key of the span, we know that the request the span represents 101 // - would be isolated to the right of the split point. 102 // Similarly, if the split key is greater than the start key of the span 103 // (and given that it is not properly contained by the span) it must mean 104 // that the request the span represents would be on the left. 105 if comp := bytes.Compare(f.samples[i].key, span.Key); comp <= 0 { 106 f.samples[i].right++ 107 } else if comp > 0 { 108 f.samples[i].left++ 109 } 110 } 111 } 112 return 113 } 114 115 // Note we always use the start key of the span. We could 116 // take the average of the byte slices, but that seems 117 // unnecessarily complex for practical usage. 118 f.samples[idx] = sample{key: span.Key} 119 } 120 121 // Key finds an appropriate split point based on the Reservoir sampling method. 122 // Returns a nil key if no appropriate key was found. 123 func (f *Finder) Key() roachpb.Key { 124 if f == nil { 125 return nil 126 } 127 128 var bestIdx = -1 129 var bestScore float64 = 2 130 for i, s := range f.samples { 131 if s.left+s.right+s.contained < splitKeyMinCounter { 132 continue 133 } 134 balanceScore := math.Abs(float64(s.left-s.right)) / float64(s.left+s.right) 135 containedScore := (float64(s.contained) / float64(s.left+s.right+s.contained)) 136 finalScore := balanceScore + containedScore 137 if balanceScore >= splitKeyThreshold || 138 containedScore >= splitKeyContainedThreshold { 139 continue 140 } 141 if finalScore < bestScore { 142 bestIdx = i 143 bestScore = finalScore 144 } 145 } 146 147 if bestIdx == -1 { 148 return nil 149 } 150 return f.samples[bestIdx].key 151 }