github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/scheduler/queue/user_queues.go (about) 1 package queue 2 3 import ( 4 "math/rand" 5 "sort" 6 "time" 7 8 "github.com/cortexproject/cortex/pkg/util" 9 ) 10 11 // querier holds information about a querier registered in the queue. 12 type querier struct { 13 // Number of active connections. 14 connections int 15 16 // True if the querier notified it's gracefully shutting down. 17 shuttingDown bool 18 19 // When the last connection has been unregistered. 20 disconnectedAt time.Time 21 } 22 23 // This struct holds user queues for pending requests. It also keeps track of connected queriers, 24 // and mapping between users and queriers. 25 type queues struct { 26 userQueues map[string]*userQueue 27 28 // List of all users with queues, used for iteration when searching for next queue to handle. 29 // Users removed from the middle are replaced with "". To avoid skipping users during iteration, we only shrink 30 // this list when there are ""'s at the end of it. 31 users []string 32 33 maxUserQueueSize int 34 35 // How long to wait before removing a querier which has got disconnected 36 // but hasn't notified about a graceful shutdown. 37 forgetDelay time.Duration 38 39 // Tracks queriers registered to the queue. 40 queriers map[string]*querier 41 42 // Sorted list of querier names, used when creating per-user shard. 43 sortedQueriers []string 44 } 45 46 type userQueue struct { 47 ch chan Request 48 49 // If not nil, only these queriers can handle user requests. If nil, all queriers can. 50 // We set this to nil if number of available queriers <= maxQueriers. 51 queriers map[string]struct{} 52 maxQueriers int 53 54 // Seed for shuffle sharding of queriers. This seed is based on userID only and is therefore consistent 55 // between different frontends. 56 seed int64 57 58 // Points back to 'users' field in queues. Enables quick cleanup. 59 index int 60 } 61 62 func newUserQueues(maxUserQueueSize int, forgetDelay time.Duration) *queues { 63 return &queues{ 64 userQueues: map[string]*userQueue{}, 65 users: nil, 66 maxUserQueueSize: maxUserQueueSize, 67 forgetDelay: forgetDelay, 68 queriers: map[string]*querier{}, 69 sortedQueriers: nil, 70 } 71 } 72 73 func (q *queues) len() int { 74 return len(q.userQueues) 75 } 76 77 func (q *queues) deleteQueue(userID string) { 78 uq := q.userQueues[userID] 79 if uq == nil { 80 return 81 } 82 83 delete(q.userQueues, userID) 84 q.users[uq.index] = "" 85 86 // Shrink users list size if possible. This is safe, and no users will be skipped during iteration. 87 for ix := len(q.users) - 1; ix >= 0 && q.users[ix] == ""; ix-- { 88 q.users = q.users[:ix] 89 } 90 } 91 92 // Returns existing or new queue for user. 93 // MaxQueriers is used to compute which queriers should handle requests for this user. 94 // If maxQueriers is <= 0, all queriers can handle this user's requests. 95 // If maxQueriers has changed since the last call, queriers for this are recomputed. 96 func (q *queues) getOrAddQueue(userID string, maxQueriers int) chan Request { 97 // Empty user is not allowed, as that would break our users list ("" is used for free spot). 98 if userID == "" { 99 return nil 100 } 101 102 if maxQueriers < 0 { 103 maxQueriers = 0 104 } 105 106 uq := q.userQueues[userID] 107 108 if uq == nil { 109 uq = &userQueue{ 110 ch: make(chan Request, q.maxUserQueueSize), 111 seed: util.ShuffleShardSeed(userID, ""), 112 index: -1, 113 } 114 q.userQueues[userID] = uq 115 116 // Add user to the list of users... find first free spot, and put it there. 117 for ix, u := range q.users { 118 if u == "" { 119 uq.index = ix 120 q.users[ix] = userID 121 break 122 } 123 } 124 125 // ... or add to the end. 126 if uq.index < 0 { 127 uq.index = len(q.users) 128 q.users = append(q.users, userID) 129 } 130 } 131 132 if uq.maxQueriers != maxQueriers { 133 uq.maxQueriers = maxQueriers 134 uq.queriers = shuffleQueriersForUser(uq.seed, maxQueriers, q.sortedQueriers, nil) 135 } 136 137 return uq.ch 138 } 139 140 // Finds next queue for the querier. To support fair scheduling between users, client is expected 141 // to pass last user index returned by this function as argument. Is there was no previous 142 // last user index, use -1. 143 func (q *queues) getNextQueueForQuerier(lastUserIndex int, querierID string) (chan Request, string, int) { 144 uid := lastUserIndex 145 146 for iters := 0; iters < len(q.users); iters++ { 147 uid = uid + 1 148 149 // Don't use "mod len(q.users)", as that could skip users at the beginning of the list 150 // for example when q.users has shrunk since last call. 151 if uid >= len(q.users) { 152 uid = 0 153 } 154 155 u := q.users[uid] 156 if u == "" { 157 continue 158 } 159 160 q := q.userQueues[u] 161 162 if q.queriers != nil { 163 if _, ok := q.queriers[querierID]; !ok { 164 // This querier is not handling the user. 165 continue 166 } 167 } 168 169 return q.ch, u, uid 170 } 171 return nil, "", uid 172 } 173 174 func (q *queues) addQuerierConnection(querierID string) { 175 info := q.queriers[querierID] 176 if info != nil { 177 info.connections++ 178 179 // Reset in case the querier re-connected while it was in the forget waiting period. 180 info.shuttingDown = false 181 info.disconnectedAt = time.Time{} 182 183 return 184 } 185 186 // First connection from this querier. 187 q.queriers[querierID] = &querier{connections: 1} 188 q.sortedQueriers = append(q.sortedQueriers, querierID) 189 sort.Strings(q.sortedQueriers) 190 191 q.recomputeUserQueriers() 192 } 193 194 func (q *queues) removeQuerierConnection(querierID string, now time.Time) { 195 info := q.queriers[querierID] 196 if info == nil || info.connections <= 0 { 197 panic("unexpected number of connections for querier") 198 } 199 200 // Decrease the number of active connections. 201 info.connections-- 202 if info.connections > 0 { 203 return 204 } 205 206 // There no more active connections. If the forget delay is configured then 207 // we can remove it only if querier has announced a graceful shutdown. 208 if info.shuttingDown || q.forgetDelay == 0 { 209 q.removeQuerier(querierID) 210 return 211 } 212 213 // No graceful shutdown has been notified yet, so we should track the current time 214 // so that we'll remove the querier as soon as we receive the graceful shutdown 215 // notification (if any) or once the threshold expires. 216 info.disconnectedAt = now 217 } 218 219 func (q *queues) removeQuerier(querierID string) { 220 delete(q.queriers, querierID) 221 222 ix := sort.SearchStrings(q.sortedQueriers, querierID) 223 if ix >= len(q.sortedQueriers) || q.sortedQueriers[ix] != querierID { 224 panic("incorrect state of sorted queriers") 225 } 226 227 q.sortedQueriers = append(q.sortedQueriers[:ix], q.sortedQueriers[ix+1:]...) 228 229 q.recomputeUserQueriers() 230 } 231 232 // notifyQuerierShutdown records that a querier has sent notification about a graceful shutdown. 233 func (q *queues) notifyQuerierShutdown(querierID string) { 234 info := q.queriers[querierID] 235 if info == nil { 236 // The querier may have already been removed, so we just ignore it. 237 return 238 } 239 240 // If there are no more connections, we should remove the querier. 241 if info.connections == 0 { 242 q.removeQuerier(querierID) 243 return 244 } 245 246 // Otherwise we should annotate we received a graceful shutdown notification 247 // and the querier will be removed once all connections are unregistered. 248 info.shuttingDown = true 249 } 250 251 // forgetDisconnectedQueriers removes all disconnected queriers that have gone since at least 252 // the forget delay. Returns the number of forgotten queriers. 253 func (q *queues) forgetDisconnectedQueriers(now time.Time) int { 254 // Nothing to do if the forget delay is disabled. 255 if q.forgetDelay == 0 { 256 return 0 257 } 258 259 // Remove all queriers with no connections that have gone since at least the forget delay. 260 threshold := now.Add(-q.forgetDelay) 261 forgotten := 0 262 263 for querierID := range q.queriers { 264 if info := q.queriers[querierID]; info.connections == 0 && info.disconnectedAt.Before(threshold) { 265 q.removeQuerier(querierID) 266 forgotten++ 267 } 268 } 269 270 return forgotten 271 } 272 273 func (q *queues) recomputeUserQueriers() { 274 scratchpad := make([]string, 0, len(q.sortedQueriers)) 275 276 for _, uq := range q.userQueues { 277 uq.queriers = shuffleQueriersForUser(uq.seed, uq.maxQueriers, q.sortedQueriers, scratchpad) 278 } 279 } 280 281 // shuffleQueriersForUser returns nil if queriersToSelect is 0 or there are not enough queriers to select from. 282 // In that case *all* queriers should be used. 283 // Scratchpad is used for shuffling, to avoid new allocations. If nil, new slice is allocated. 284 func shuffleQueriersForUser(userSeed int64, queriersToSelect int, allSortedQueriers []string, scratchpad []string) map[string]struct{} { 285 if queriersToSelect == 0 || len(allSortedQueriers) <= queriersToSelect { 286 return nil 287 } 288 289 result := make(map[string]struct{}, queriersToSelect) 290 rnd := rand.New(rand.NewSource(userSeed)) 291 292 scratchpad = scratchpad[:0] 293 scratchpad = append(scratchpad, allSortedQueriers...) 294 295 last := len(scratchpad) - 1 296 for i := 0; i < queriersToSelect; i++ { 297 r := rnd.Intn(last + 1) 298 result[scratchpad[r]] = struct{}{} 299 // move selected item to the end, it won't be selected anymore. 300 scratchpad[r], scratchpad[last] = scratchpad[last], scratchpad[r] 301 last-- 302 } 303 304 return result 305 }