github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/split/decider.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // TODO(tbg): rename this package. `lbsplit`? 12 13 package split 14 15 import ( 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/keys" 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 21 ) 22 23 const minSplitSuggestionInterval = time.Minute 24 25 // A Decider collects measurements about the activity (measured in qps) on a 26 // Replica and, assuming that qps thresholds are exceeded, tries to determine 27 // a split key that would approximately result in halving the load on each of 28 // the resultant ranges. 29 // 30 // Operations should call `Record` with a current timestamp. Operation counts 31 // are aggregated over a second and a qps computed. If the QPS is above threshold, 32 // a split finder is instantiated and the spans supplied to Record are sampled 33 // for a duration (on the order of ten seconds). Assuming that load consistently 34 // remains over threshold, and the workload touches a diverse enough set of keys 35 // to benefit from a split, sampling will eventually instruct a caller of Record 36 // to carry out a split. When the split is initiated, it can obtain the suggested 37 // split point from MaybeSplitKey (which may have disappeared either due to a drop 38 // in qps or a change in the workload). 39 type Decider struct { 40 intn func(n int) int // supplied to Init 41 qpsThreshold func() float64 // supplied to Init 42 43 mu struct { 44 syncutil.Mutex 45 lastQPSRollover time.Time // most recent time recorded by requests. 46 qps float64 // last reqs/s rate as of lastQPSRollover 47 48 count int64 // number of requests recorded since last rollover 49 splitFinder *Finder // populated when engaged or decided 50 lastSplitSuggestion time.Time // last stipulation to client to carry out split 51 } 52 } 53 54 // Init initializes a Decider (which is assumed to be zero). The signature allows 55 // embedding the Decider into a larger struct outside of the scope of this package 56 // without incurring a pointer reference. This is relevant since many Deciders 57 // may exist in the system at any given point in time. 58 func Init(lbs *Decider, intn func(n int) int, qpsThreshold func() float64) { 59 lbs.intn = intn 60 lbs.qpsThreshold = qpsThreshold 61 } 62 63 // Record notifies the Decider that 'n' operations are being carried out which 64 // operate on the span returned by the supplied method. The closure will only 65 // be called when necessary, that is, when the Decider is considering a split 66 // and is sampling key spans to determine a suitable split point. 67 // 68 // If the returned boolean is true, a split key is available (though it may 69 // disappear as more keys are sampled) and should be initiated by the caller, 70 // which can call MaybeSplitKey to retrieve the suggested key. 71 func (d *Decider) Record(now time.Time, n int, span func() roachpb.Span) bool { 72 d.mu.Lock() 73 defer d.mu.Unlock() 74 75 return d.recordLocked(now, n, span) 76 } 77 78 func (d *Decider) recordLocked(now time.Time, n int, span func() roachpb.Span) bool { 79 d.mu.count += int64(n) 80 81 // First compute requests per second since the last check. 82 elapsedSinceLastQPS := now.Sub(d.mu.lastQPSRollover) 83 if elapsedSinceLastQPS >= time.Second { 84 if elapsedSinceLastQPS > 2*time.Second { 85 // Force a QPS of zero; there wasn't any activity within the last 86 // second at all. 87 d.mu.count = 0 88 } 89 // Update the QPS and reset the time and request counter. 90 d.mu.qps = (float64(d.mu.count) / float64(elapsedSinceLastQPS)) * 1e9 91 d.mu.lastQPSRollover = now 92 d.mu.count = 0 93 94 // If the QPS for the range exceeds the threshold, start actively 95 // tracking potential for splitting this range based on load. 96 // This tracking will begin by initiating a splitFinder so it can 97 // begin to Record requests so it can find a split point. If a 98 // splitFinder already exists, we check if a split point is ready 99 // to be used. 100 if d.mu.qps >= d.qpsThreshold() { 101 if d.mu.splitFinder == nil { 102 d.mu.splitFinder = NewFinder(now) 103 } 104 } else { 105 d.mu.splitFinder = nil 106 } 107 } 108 109 if d.mu.splitFinder != nil && n != 0 { 110 s := span() 111 if s.Key != nil { 112 d.mu.splitFinder.Record(span(), d.intn) 113 } 114 if now.Sub(d.mu.lastSplitSuggestion) > minSplitSuggestionInterval && d.mu.splitFinder.Ready(now) && d.mu.splitFinder.Key() != nil { 115 d.mu.lastSplitSuggestion = now 116 return true 117 } 118 } 119 return false 120 } 121 122 // LastQPS returns the most recent QPS measurement. 123 func (d *Decider) LastQPS(now time.Time) float64 { 124 d.mu.Lock() 125 d.recordLocked(now, 0, nil) 126 qps := d.mu.qps 127 d.mu.Unlock() 128 129 return qps 130 } 131 132 // MaybeSplitKey returns a key to perform a split at. The return value will be 133 // nil if either the Decider hasn't decided that a split should be carried out 134 // or if it wasn't able to determine a suitable split key. 135 // 136 // It is legal to call MaybeSplitKey at any time. 137 func (d *Decider) MaybeSplitKey(now time.Time) roachpb.Key { 138 var key roachpb.Key 139 140 d.mu.Lock() 141 d.recordLocked(now, 0, nil) 142 if d.mu.splitFinder != nil && d.mu.splitFinder.Ready(now) { 143 // We've found a key to split at. This key might be in the middle of a 144 // SQL row. If we fail to rectify that, we'll cause SQL crashes: 145 // 146 // https://github.com/cockroachdb/cockroach/pull/42056 147 // 148 // While the behavior at the SQL level is arguably bad and should be 149 // fixed, splitting between column families is also never a good idea 150 // for performance in general. So, if the split key is, say 151 // 152 // /Table/51/52/53/54/55/9/1 153 // 154 // then we want to split instead at 155 // 156 // /Table/51/52/53/54/55 157 // 158 // (see TestDeciderCallsEnsureSafeSplitKey). 159 // 160 // The key found here isn't guaranteed to be a valid SQL column family 161 // key. This is because the keys are sampled from StartKey of requests 162 // hitting this replica. Ranged operations may well wish to exclude the 163 // start point by calling .Next() or may span multiple ranges, and so 164 // such a key may end up being passed to EnsureSafeSplitKey here. 165 // 166 // We take the risk that the result may sometimes not be a good split 167 // point (or even in this range). 168 // 169 // Note that we ignore EnsureSafeSplitKey when it returns an error since 170 // that error only tells us that this key couldn't possibly be a SQL 171 // key. This is more common than one might think since SQL issues plenty 172 // of scans over all column families, meaning that we'll frequently find 173 // a key that has no column family suffix and thus errors out in 174 // EnsureSafeSplitKey. 175 key = d.mu.splitFinder.Key() 176 if safeKey, err := keys.EnsureSafeSplitKey(key); err == nil { 177 key = safeKey 178 } 179 } 180 d.mu.Unlock() 181 182 return key 183 } 184 185 // Reset deactivates any current attempt at determining a split key. 186 func (d *Decider) Reset() { 187 d.mu.Lock() 188 d.mu.splitFinder = nil 189 d.mu.count = 0 190 d.mu.Unlock() 191 }