k8s.io/apiserver@v0.31.1/pkg/util/flowcontrol/dropped_requests_tracker.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package flowcontrol 18 19 import ( 20 "sync" 21 "sync/atomic" 22 "time" 23 24 "k8s.io/utils/clock" 25 ) 26 27 const ( 28 // maxRetryAfter represents the maximum possible retryAfter. 29 maxRetryAfter = int64(32) 30 ) 31 32 // DroppedRequestsTracker is an interface that allows tracking 33 // a history od dropped requests in the system for the purpose 34 // of adjusting RetryAfter header to avoid system overload. 35 type DroppedRequestsTracker interface { 36 // RecordDroppedRequest records a request that was just 37 // dropped from processing. 38 RecordDroppedRequest(plName string) 39 40 // GetRetryAfter returns the current suggested value of 41 // RetryAfter value. 42 GetRetryAfter(plName string) int64 43 } 44 45 // unixStat keeps a statistic how many requests were dropped within 46 // a single second. 47 type unixStat struct { 48 unixTime int64 49 requests int64 50 } 51 52 type droppedRequestsStats struct { 53 lock sync.RWMutex 54 55 // history stores the history of dropped requests. 56 history []unixStat 57 58 // To reduce lock-contention, we store the information about 59 // the current second here, which we can then access under 60 // reader lock. 61 currentUnix int64 62 currentCount atomic.Int64 63 64 retryAfter atomic.Int64 65 retryAfterUpdateUnix int64 66 } 67 68 func newDroppedRequestsStats(nowUnix int64) *droppedRequestsStats { 69 result := &droppedRequestsStats{ 70 // We assume that we can bump at any time after first dropped request. 71 retryAfterUpdateUnix: 0, 72 } 73 result.retryAfter.Store(1) 74 return result 75 } 76 77 func (s *droppedRequestsStats) recordDroppedRequest(unixTime int64) { 78 // Short path - if the current second matches passed time, 79 // just update the stats. 80 if done := func() bool { 81 s.lock.RLock() 82 defer s.lock.RUnlock() 83 if s.currentUnix == unixTime { 84 s.currentCount.Add(1) 85 return true 86 } 87 return false 88 }(); done { 89 return 90 } 91 92 // We trigger the change of <currentUnix>. 93 s.lock.Lock() 94 defer s.lock.Unlock() 95 if s.currentUnix == unixTime { 96 s.currentCount.Add(1) 97 return 98 } 99 100 s.updateHistory(s.currentUnix, s.currentCount.Load()) 101 s.currentUnix = unixTime 102 s.currentCount.Store(1) 103 104 // We only consider updating retryAfter when bumping the current second. 105 // However, given that we didn't report anything for the current second, 106 // we recompute it based on statistics from the previous one. 107 s.updateRetryAfterIfNeededLocked(unixTime) 108 } 109 110 func (s *droppedRequestsStats) updateHistory(unixTime int64, count int64) { 111 s.history = append(s.history, unixStat{unixTime: unixTime, requests: count}) 112 113 startIndex := 0 114 // Entries that exceed 2*retryAfter or maxRetryAfter are never going to be needed. 115 maxHistory := 2 * s.retryAfter.Load() 116 if maxHistory > maxRetryAfter { 117 maxHistory = maxRetryAfter 118 } 119 for ; startIndex < len(s.history) && unixTime-s.history[startIndex].unixTime > maxHistory; startIndex++ { 120 } 121 if startIndex > 0 { 122 s.history = s.history[startIndex:] 123 } 124 } 125 126 // updateRetryAfterIfNeededLocked updates the retryAfter based on the number of 127 // dropped requests in the last `retryAfter` seconds: 128 // - if there were less than `retryAfter` dropped requests, it decreases 129 // retryAfter 130 // - if there were at least 3*`retryAfter` dropped requests, it increases 131 // retryAfter 132 // 133 // The rationale behind these numbers being fairly low is that APF is queuing 134 // requests and rejecting (dropping) them is a last resort, which is not expected 135 // unless a given priority level is actually overloaded. 136 // 137 // Additionally, we rate-limit the increases of retryAfter to wait at least 138 // `retryAfter' seconds after the previous increase to avoid multiple bumps 139 // on a single spike. 140 // 141 // We're working with the interval [unixTime-retryAfter, unixTime). 142 func (s *droppedRequestsStats) updateRetryAfterIfNeededLocked(unixTime int64) { 143 retryAfter := s.retryAfter.Load() 144 145 droppedRequests := int64(0) 146 for i := len(s.history) - 1; i >= 0; i-- { 147 if unixTime-s.history[i].unixTime > retryAfter { 148 break 149 } 150 if s.history[i].unixTime < unixTime { 151 droppedRequests += s.history[i].requests 152 } 153 } 154 155 if unixTime-s.retryAfterUpdateUnix >= retryAfter && droppedRequests >= 3*retryAfter { 156 // We try to mimic the TCP algorithm and thus are doubling 157 // the retryAfter here. 158 retryAfter *= 2 159 if retryAfter >= maxRetryAfter { 160 retryAfter = maxRetryAfter 161 } 162 s.retryAfter.Store(retryAfter) 163 s.retryAfterUpdateUnix = unixTime 164 return 165 } 166 167 if droppedRequests < retryAfter && retryAfter > 1 { 168 // We try to mimc the TCP algorithm and thus are linearly 169 // scaling down the retryAfter here. 170 retryAfter-- 171 s.retryAfter.Store(retryAfter) 172 return 173 } 174 } 175 176 // droppedRequestsTracker implement DroppedRequestsTracker interface 177 // for the purpose of adjusting RetryAfter header for newly dropped 178 // requests to avoid system overload. 179 type droppedRequestsTracker struct { 180 now func() time.Time 181 182 lock sync.RWMutex 183 plStats map[string]*droppedRequestsStats 184 } 185 186 // NewDroppedRequestsTracker is creating a new instance of 187 // DroppedRequestsTracker. 188 func NewDroppedRequestsTracker() DroppedRequestsTracker { 189 return newDroppedRequestsTracker(clock.RealClock{}.Now) 190 } 191 192 func newDroppedRequestsTracker(now func() time.Time) *droppedRequestsTracker { 193 return &droppedRequestsTracker{ 194 now: now, 195 plStats: make(map[string]*droppedRequestsStats), 196 } 197 } 198 199 func (t *droppedRequestsTracker) RecordDroppedRequest(plName string) { 200 unixTime := t.now().Unix() 201 202 stats := func() *droppedRequestsStats { 203 // The list of priority levels should change very infrequently, 204 // so in almost all cases, the fast path should be enough. 205 t.lock.RLock() 206 if plStats, ok := t.plStats[plName]; ok { 207 t.lock.RUnlock() 208 return plStats 209 } 210 t.lock.RUnlock() 211 212 // Slow path taking writer lock to update the map. 213 t.lock.Lock() 214 defer t.lock.Unlock() 215 if plStats, ok := t.plStats[plName]; ok { 216 return plStats 217 } 218 stats := newDroppedRequestsStats(unixTime) 219 t.plStats[plName] = stats 220 return stats 221 }() 222 223 stats.recordDroppedRequest(unixTime) 224 } 225 226 func (t *droppedRequestsTracker) GetRetryAfter(plName string) int64 { 227 t.lock.RLock() 228 defer t.lock.RUnlock() 229 230 if plStats, ok := t.plStats[plName]; ok { 231 return plStats.retryAfter.Load() 232 } 233 return 1 234 }