github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_backpressure.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/keys" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/settings" 20 "github.com/cockroachdb/cockroach/pkg/util/log" 21 "github.com/cockroachdb/errors" 22 ) 23 24 var backpressureLogLimiter = log.Every(500 * time.Millisecond) 25 26 // backpressureRangeSizeMultiplier is the multiple of range_max_bytes that a 27 // range's size must grow to before backpressure will be applied on writes. Set 28 // to 0 to disable backpressure altogether. 29 var backpressureRangeSizeMultiplier = settings.RegisterValidatedFloatSetting( 30 "kv.range.backpressure_range_size_multiplier", 31 "multiple of range_max_bytes that a range is allowed to grow to without "+ 32 "splitting before writes to that range are blocked, or 0 to disable", 33 2.0, 34 func(v float64) error { 35 if v != 0 && v < 1 { 36 return errors.Errorf("backpressure multiplier cannot be smaller than 1: %f", v) 37 } 38 return nil 39 }, 40 ) 41 42 // backpressureByteTolerance exists to deal with the fact that lowering the 43 // range size by anything larger than the backpressureRangeSizeMultiplier would 44 // immediately mean that all ranges require backpressure. To mitigate this 45 // unwanted backpressure we say that any range which is larger than the 46 // size where backpressure would kick in by more than this quantity will 47 // immediately avoid backpressure. This approach is a bit risky because a 48 // command larger than this value would effectively disable backpressure 49 // altogether. Another downside of this approach is that if the range size 50 // is reduced by roughly exactly the multiplier then we'd potentially have 51 // lots of ranges in this state. 52 // 53 // We additionally mitigate this situation further by doing the following: 54 // 55 // 1) We store in-memory on each replica the largest zone configuration range 56 // size (largestPreviousMaxRangeBytes) we've seen and we do not backpressure 57 // if the current range size is less than that. That value is cleared when 58 // a range splits or runs GC such that the range size becomes smaller than 59 // the current max range size. This mitigation alone is insufficient because 60 // a node may restart before the splitting has concluded, leaving the 61 // cluster in a state of backpressure. 62 // 63 // 2) We assign a higher priority in the snapshot queue to ranges which are 64 // currently backpressuring than ranges which are larger but are not 65 // applying backpressure. 66 // 67 var backpressureByteTolerance = settings.RegisterByteSizeSetting( 68 "kv.range.backpressure_byte_tolerance", 69 "defines the number of bytes above the product of "+ 70 "backpressure_range_size_multiplier and the range_max_size at which "+ 71 "backpressure will not apply", 72 32<<20 /* 32 MiB */) 73 74 // backpressurableSpans contains spans of keys where write backpressuring 75 // is permitted. Writes to any keys within these spans may cause a batch 76 // to be backpressured. 77 var backpressurableSpans = []roachpb.Span{ 78 {Key: keys.TimeseriesPrefix, EndKey: keys.TimeseriesKeyMax}, 79 // Backpressure from the end of the system config forward instead of 80 // over all table data to avoid backpressuring unsplittable ranges. 81 {Key: keys.SystemConfigTableDataMax, EndKey: keys.TableDataMax}, 82 } 83 84 // canBackpressureBatch returns whether the provided BatchRequest is eligible 85 // for backpressure. 86 func canBackpressureBatch(ba *roachpb.BatchRequest) bool { 87 // Don't backpressure splits themselves. 88 if ba.Txn != nil && ba.Txn.Name == splitTxnName { 89 return false 90 } 91 92 // Only backpressure batches containing a "backpressurable" 93 // method that is within a "backpressurable" key span. 94 for _, ru := range ba.Requests { 95 req := ru.GetInner() 96 if !roachpb.CanBackpressure(req) { 97 continue 98 } 99 100 for _, s := range backpressurableSpans { 101 if s.Contains(req.Header().Span()) { 102 return true 103 } 104 } 105 } 106 return false 107 } 108 109 // shouldBackpressureWrites returns whether writes to the range should be 110 // subject to backpressure. This is based on the size of the range in 111 // relation to the split size. The method returns true if the range is more 112 // than backpressureRangeSizeMultiplier times larger than the split size but not 113 // larger than that by more than backpressureByteTolerance (see that comment for 114 // further explanation). 115 func (r *Replica) shouldBackpressureWrites() bool { 116 mult := backpressureRangeSizeMultiplier.Get(&r.store.cfg.Settings.SV) 117 if mult == 0 { 118 // Disabled. 119 return false 120 } 121 122 r.mu.RLock() 123 defer r.mu.RUnlock() 124 exceeded, bytesOver := r.exceedsMultipleOfSplitSizeRLocked(mult) 125 if !exceeded { 126 return false 127 } 128 if bytesOver > backpressureByteTolerance.Get(&r.store.cfg.Settings.SV) { 129 return false 130 } 131 return true 132 } 133 134 // maybeBackpressureBatch blocks to apply backpressure if the replica deems 135 // that backpressure is necessary. 136 func (r *Replica) maybeBackpressureBatch(ctx context.Context, ba *roachpb.BatchRequest) error { 137 if !canBackpressureBatch(ba) { 138 return nil 139 } 140 141 // If we need to apply backpressure, wait for an ongoing split to finish 142 // if one exists. This does not place a hard upper bound on the size of 143 // a range because we don't track all in-flight requests (like we do for 144 // the quota pool), but it does create an effective soft upper bound. 145 for first := true; r.shouldBackpressureWrites(); first = false { 146 if first { 147 r.store.metrics.BackpressuredOnSplitRequests.Inc(1) 148 defer r.store.metrics.BackpressuredOnSplitRequests.Dec(1) 149 150 if backpressureLogLimiter.ShouldLog() { 151 log.Warningf(ctx, "applying backpressure to limit range growth on batch %s", ba) 152 } 153 } 154 155 // Register a callback on an ongoing split for this range in the splitQueue. 156 splitC := make(chan error, 1) 157 if !r.store.splitQueue.MaybeAddCallback(r.RangeID, func(err error) { 158 splitC <- err 159 }) { 160 // No split ongoing. We may have raced with its completion. There's 161 // no good way to prevent this race, so we conservatively allow the 162 // request to proceed instead of throwing an error that would surface 163 // to the client. 164 return nil 165 } 166 167 // Wait for the callback to be called. 168 select { 169 case <-ctx.Done(): 170 return errors.Wrapf( 171 ctx.Err(), "aborted while applying backpressure to %s on range %s", ba, r.Desc(), 172 ) 173 case err := <-splitC: 174 if err != nil { 175 return errors.Wrapf( 176 err, "split failed while applying backpressure to %s on range %s", ba, r.Desc(), 177 ) 178 } 179 } 180 } 181 return nil 182 }