github.com/grafana/pyroscope@v1.18.0/pkg/scheduler/queue/user_queues.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/scheduler/queue/user_queues.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package queue 7 8 import ( 9 "math/rand" 10 "slices" 11 "sort" 12 "time" 13 14 "github.com/grafana/pyroscope/pkg/util" 15 ) 16 17 // querier holds information about a querier registered in the queue. 18 type querier struct { 19 // Number of active connections. 20 connections int 21 22 // True if the querier notified it's gracefully shutting down. 23 shuttingDown bool 24 25 // When the last connection has been unregistered. 26 disconnectedAt time.Time 27 } 28 29 // This struct holds user queues for pending requests. It also keeps track of connected queriers, 30 // and mapping between users and queriers. 31 type queues struct { 32 userQueues map[string]*userQueue 33 34 // List of all users with queues, used for iteration when searching for next queue to handle. 35 // Users removed from the middle are replaced with "". To avoid skipping users during iteration, we only shrink 36 // this list when there are ""'s at the end of it. 37 users []string 38 39 maxUserQueueSize int 40 41 // How long to wait before removing a querier which has got disconnected 42 // but hasn't notified about a graceful shutdown. 43 forgetDelay time.Duration 44 45 // Tracks queriers registered to the queue. 46 queriers map[string]*querier 47 48 // Sorted list of querier names, used when creating per-user shard. 49 sortedQueriers []string 50 } 51 52 type userQueue struct { 53 ch chan Request 54 55 // If not nil, only these queriers can handle user requests. If nil, all queriers can. 56 // We set this to nil if number of available queriers <= maxQueriers. 57 queriers map[string]struct{} 58 maxQueriers int 59 60 // Seed for shuffle sharding of queriers. This seed is based on userID only and is therefore consistent 61 // between different frontends. 62 seed int64 63 64 // Points back to 'users' field in queues. Enables quick cleanup. 65 index int 66 } 67 68 func newUserQueues(maxUserQueueSize int, forgetDelay time.Duration) *queues { 69 return &queues{ 70 userQueues: map[string]*userQueue{}, 71 users: nil, 72 maxUserQueueSize: maxUserQueueSize, 73 forgetDelay: forgetDelay, 74 queriers: map[string]*querier{}, 75 sortedQueriers: nil, 76 } 77 } 78 79 func (q *queues) len() int { 80 return len(q.userQueues) 81 } 82 83 func (q *queues) deleteQueue(userID string) { 84 uq := q.userQueues[userID] 85 if uq == nil { 86 return 87 } 88 89 delete(q.userQueues, userID) 90 q.users[uq.index] = "" 91 92 // Shrink users list size if possible. This is safe, and no users will be skipped during iteration. 93 for ix := len(q.users) - 1; ix >= 0 && q.users[ix] == ""; ix-- { 94 q.users = q.users[:ix] 95 } 96 } 97 98 // Returns existing or new queue for user. 99 // MaxQueriers is used to compute which queriers should handle requests for this user. 100 // If maxQueriers is <= 0, all queriers can handle this user's requests. 101 // If maxQueriers has changed since the last call, queriers for this are recomputed. 102 func (q *queues) getOrAddQueue(userID string, maxQueriers int) chan Request { 103 // Empty user is not allowed, as that would break our users list ("" is used for free spot). 104 if userID == "" { 105 return nil 106 } 107 108 if maxQueriers < 0 { 109 maxQueriers = 0 110 } 111 112 uq := q.userQueues[userID] 113 114 if uq == nil { 115 uq = &userQueue{ 116 ch: make(chan Request, q.maxUserQueueSize), 117 seed: util.ShuffleShardSeed(userID, ""), 118 index: -1, 119 } 120 q.userQueues[userID] = uq 121 122 // Add user to the list of users... find first free spot, and put it there. 123 for ix, u := range q.users { 124 if u == "" { 125 uq.index = ix 126 q.users[ix] = userID 127 break 128 } 129 } 130 131 // ... or add to the end. 132 if uq.index < 0 { 133 uq.index = len(q.users) 134 q.users = append(q.users, userID) 135 } 136 } 137 138 if uq.maxQueriers != maxQueriers { 139 uq.maxQueriers = maxQueriers 140 uq.queriers = shuffleQueriersForUser(uq.seed, maxQueriers, q.sortedQueriers, nil) 141 } 142 143 return uq.ch 144 } 145 146 // Finds next queue for the querier. To support fair scheduling between users, client is expected 147 // to pass last user index returned by this function as argument. Is there was no previous 148 // last user index, use -1. 149 func (q *queues) getNextQueueForQuerier(lastUserIndex int, querierID string) (chan Request, string, int) { 150 uid := lastUserIndex 151 152 // Ensure the querier is not shutting down. If the querier is shutting down, we shouldn't forward 153 // any more queries to it. 154 if info := q.queriers[querierID]; info == nil || info.shuttingDown { 155 return nil, "", uid 156 } 157 158 for iters := 0; iters < len(q.users); iters++ { 159 uid = uid + 1 160 161 // Don't use "mod len(q.users)", as that could skip users at the beginning of the list 162 // for example when q.users has shrunk since last call. 163 if uid >= len(q.users) { 164 uid = 0 165 } 166 167 u := q.users[uid] 168 if u == "" { 169 continue 170 } 171 172 q := q.userQueues[u] 173 174 if q.queriers != nil { 175 if _, ok := q.queriers[querierID]; !ok { 176 // This querier is not handling the user. 177 continue 178 } 179 } 180 181 return q.ch, u, uid 182 } 183 return nil, "", uid 184 } 185 186 func (q *queues) addQuerierConnection(querierID string) { 187 info := q.queriers[querierID] 188 if info != nil { 189 info.connections++ 190 191 // Reset in case the querier re-connected while it was in the forget waiting period. 192 info.shuttingDown = false 193 info.disconnectedAt = time.Time{} 194 195 return 196 } 197 198 // First connection from this querier. 199 q.queriers[querierID] = &querier{connections: 1} 200 q.sortedQueriers = append(q.sortedQueriers, querierID) 201 slices.Sort(q.sortedQueriers) 202 203 q.recomputeUserQueriers() 204 } 205 206 func (q *queues) removeQuerierConnection(querierID string, now time.Time) { 207 info := q.queriers[querierID] 208 if info == nil || info.connections <= 0 { 209 panic("unexpected number of connections for querier") 210 } 211 212 // Decrease the number of active connections. 213 info.connections-- 214 if info.connections > 0 { 215 return 216 } 217 218 // There no more active connections. If the forget delay is configured then 219 // we can remove it only if querier has announced a graceful shutdown. 220 if info.shuttingDown || q.forgetDelay == 0 { 221 q.removeQuerier(querierID) 222 return 223 } 224 225 // No graceful shutdown has been notified yet, so we should track the current time 226 // so that we'll remove the querier as soon as we receive the graceful shutdown 227 // notification (if any) or once the threshold expires. 228 info.disconnectedAt = now 229 } 230 231 func (q *queues) removeQuerier(querierID string) { 232 delete(q.queriers, querierID) 233 234 ix := sort.SearchStrings(q.sortedQueriers, querierID) 235 if ix >= len(q.sortedQueriers) || q.sortedQueriers[ix] != querierID { 236 panic("incorrect state of sorted queriers") 237 } 238 239 q.sortedQueriers = append(q.sortedQueriers[:ix], q.sortedQueriers[ix+1:]...) 240 241 q.recomputeUserQueriers() 242 } 243 244 // notifyQuerierShutdown records that a querier has sent notification about a graceful shutdown. 245 func (q *queues) notifyQuerierShutdown(querierID string) { 246 info := q.queriers[querierID] 247 if info == nil { 248 // The querier may have already been removed, so we just ignore it. 249 return 250 } 251 252 // If there are no more connections, we should remove the querier. 253 if info.connections == 0 { 254 q.removeQuerier(querierID) 255 return 256 } 257 258 // Otherwise we should annotate we received a graceful shutdown notification 259 // and the querier will be removed once all connections are unregistered. 260 info.shuttingDown = true 261 } 262 263 // forgetDisconnectedQueriers removes all disconnected queriers that have gone since at least 264 // the forget delay. Returns the number of forgotten queriers. 265 func (q *queues) forgetDisconnectedQueriers(now time.Time) int { 266 // Nothing to do if the forget delay is disabled. 267 if q.forgetDelay == 0 { 268 return 0 269 } 270 271 // Remove all queriers with no connections that have gone since at least the forget delay. 272 threshold := now.Add(-q.forgetDelay) 273 forgotten := 0 274 275 for querierID := range q.queriers { 276 if info := q.queriers[querierID]; info.connections == 0 && info.disconnectedAt.Before(threshold) { 277 q.removeQuerier(querierID) 278 forgotten++ 279 } 280 } 281 282 return forgotten 283 } 284 285 func (q *queues) recomputeUserQueriers() { 286 scratchpad := make([]string, 0, len(q.sortedQueriers)) 287 288 for _, uq := range q.userQueues { 289 uq.queriers = shuffleQueriersForUser(uq.seed, uq.maxQueriers, q.sortedQueriers, scratchpad) 290 } 291 } 292 293 // shuffleQueriersForUser returns nil if queriersToSelect is 0 or there are not enough queriers to select from. 294 // In that case *all* queriers should be used. 295 // Scratchpad is used for shuffling, to avoid new allocations. If nil, new slice is allocated. 296 func shuffleQueriersForUser(userSeed int64, queriersToSelect int, allSortedQueriers []string, scratchpad []string) map[string]struct{} { 297 if queriersToSelect == 0 || len(allSortedQueriers) <= queriersToSelect { 298 return nil 299 } 300 301 result := make(map[string]struct{}, queriersToSelect) 302 rnd := rand.New(rand.NewSource(userSeed)) 303 304 scratchpad = scratchpad[:0] 305 scratchpad = append(scratchpad, allSortedQueriers...) 306 307 last := len(scratchpad) - 1 308 for i := 0; i < queriersToSelect; i++ { 309 r := rnd.Intn(last + 1) 310 result[scratchpad[r]] = struct{}{} 311 // move selected item to the end, it won't be selected anymore. 312 scratchpad[r], scratchpad[last] = scratchpad[last], scratchpad[r] 313 last-- 314 } 315 316 return result 317 }