github.com/grafana/pyroscope@v1.18.0/pkg/scheduler/queue/user_queues_test.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/scheduler/queue/user_queues_test.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package queue 7 8 import ( 9 "fmt" 10 "math" 11 "math/rand" 12 "slices" 13 "sort" 14 "testing" 15 "time" 16 17 "github.com/stretchr/testify/assert" 18 "github.com/stretchr/testify/require" 19 ) 20 21 func TestQueues(t *testing.T) { 22 uq := newUserQueues(0, 0) 23 assert.NotNil(t, uq) 24 assert.NoError(t, isConsistent(uq)) 25 26 uq.addQuerierConnection("querier-1") 27 uq.addQuerierConnection("querier-2") 28 29 q, u, lastUserIndex := uq.getNextQueueForQuerier(-1, "querier-1") 30 assert.Nil(t, q) 31 assert.Equal(t, "", u) 32 33 // Add queues: [one] 34 qOne := getOrAdd(t, uq, "one", 0) 35 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qOne, qOne) 36 37 // [one two] 38 qTwo := getOrAdd(t, uq, "two", 0) 39 assert.NotEqual(t, qOne, qTwo) 40 41 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qOne, qTwo, qOne) 42 confirmOrderForQuerier(t, uq, "querier-2", -1, qOne, qTwo, qOne) 43 44 // [one two three] 45 // confirm fifo by adding a third queue and iterating to it 46 qThree := getOrAdd(t, uq, "three", 0) 47 48 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qThree, qOne) 49 50 // Remove one: ["" two three] 51 uq.deleteQueue("one") 52 assert.NoError(t, isConsistent(uq)) 53 54 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qThree, qTwo) 55 56 // "four" is added at the beginning of the list: [four two three] 57 qFour := getOrAdd(t, uq, "four", 0) 58 59 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qThree, qFour, qTwo, qThree) 60 61 // Remove two: [four "" three] 62 uq.deleteQueue("two") 63 assert.NoError(t, isConsistent(uq)) 64 65 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qFour, qThree, qFour) 66 67 // Remove three: [four] 68 uq.deleteQueue("three") 69 assert.NoError(t, isConsistent(uq)) 70 71 // Remove four: [] 72 uq.deleteQueue("four") 73 assert.NoError(t, isConsistent(uq)) 74 75 q, _, _ = uq.getNextQueueForQuerier(lastUserIndex, "querier-1") 76 assert.Nil(t, q) 77 } 78 79 func TestQueuesOnTerminatingQuerier(t *testing.T) { 80 uq := newUserQueues(0, 0) 81 assert.NotNil(t, uq) 82 assert.NoError(t, isConsistent(uq)) 83 84 uq.addQuerierConnection("querier-1") 85 uq.addQuerierConnection("querier-2") 86 87 // Add queues: [one, two] 88 qOne := getOrAdd(t, uq, "one", 0) 89 qTwo := getOrAdd(t, uq, "two", 0) 90 confirmOrderForQuerier(t, uq, "querier-1", -1, qOne, qTwo, qOne, qTwo) 91 confirmOrderForQuerier(t, uq, "querier-2", -1, qOne, qTwo, qOne, qTwo) 92 93 // After notify shutdown for querier-2, it's expected to own no queue. 94 uq.notifyQuerierShutdown("querier-2") 95 q, u, _ := uq.getNextQueueForQuerier(-1, "querier-2") 96 assert.Nil(t, q) 97 assert.Equal(t, "", u) 98 99 // However, querier-1 still get queues because it's still running. 100 confirmOrderForQuerier(t, uq, "querier-1", -1, qOne, qTwo, qOne, qTwo) 101 102 // After disconnecting querier-2, it's expected to own no queue. 103 uq.removeQuerier("querier-2") 104 q, u, _ = uq.getNextQueueForQuerier(-1, "querier-2") 105 assert.Nil(t, q) 106 assert.Equal(t, "", u) 107 } 108 109 func TestQueuesWithQueriers(t *testing.T) { 110 uq := newUserQueues(0, 0) 111 assert.NotNil(t, uq) 112 assert.NoError(t, isConsistent(uq)) 113 114 queriers := 30 115 users := 1000 116 maxQueriersPerUser := 5 117 118 // Add some queriers. 119 for ix := 0; ix < queriers; ix++ { 120 qid := fmt.Sprintf("querier-%d", ix) 121 uq.addQuerierConnection(qid) 122 123 // No querier has any queues yet. 124 q, u, _ := uq.getNextQueueForQuerier(-1, qid) 125 assert.Nil(t, q) 126 assert.Equal(t, "", u) 127 } 128 129 assert.NoError(t, isConsistent(uq)) 130 131 // Add user queues. 132 for u := 0; u < users; u++ { 133 uid := fmt.Sprintf("user-%d", u) 134 getOrAdd(t, uq, uid, maxQueriersPerUser) 135 136 // Verify it has maxQueriersPerUser queriers assigned now. 137 qs := uq.userQueues[uid].queriers 138 assert.Equal(t, maxQueriersPerUser, len(qs)) 139 } 140 141 // After adding all users, verify results. For each querier, find out how many different users it handles, 142 // and compute mean and stdDev. 143 queriersMap := make(map[string]int) 144 145 for q := 0; q < queriers; q++ { 146 qid := fmt.Sprintf("querier-%d", q) 147 148 lastUserIndex := -1 149 for { 150 _, _, newIx := uq.getNextQueueForQuerier(lastUserIndex, qid) 151 if newIx < lastUserIndex { 152 break 153 } 154 lastUserIndex = newIx 155 queriersMap[qid]++ 156 } 157 } 158 159 mean := float64(0) 160 for _, c := range queriersMap { 161 mean += float64(c) 162 } 163 mean = mean / float64(len(queriersMap)) 164 165 stdDev := float64(0) 166 for _, c := range queriersMap { 167 d := float64(c) - mean 168 stdDev += (d * d) 169 } 170 stdDev = math.Sqrt(stdDev / float64(len(queriersMap))) 171 t.Log("mean:", mean, "stddev:", stdDev) 172 173 assert.InDelta(t, users*maxQueriersPerUser/queriers, mean, 1) 174 assert.InDelta(t, stdDev, 0, mean*0.2) 175 } 176 177 func TestQueuesConsistency(t *testing.T) { 178 tests := map[string]struct { 179 forgetDelay time.Duration 180 }{ 181 "without forget delay": {}, 182 "with forget delay": {forgetDelay: time.Minute}, 183 } 184 185 for testName, testData := range tests { 186 t.Run(testName, func(t *testing.T) { 187 uq := newUserQueues(0, testData.forgetDelay) 188 assert.NotNil(t, uq) 189 assert.NoError(t, isConsistent(uq)) 190 191 r := rand.New(rand.NewSource(time.Now().Unix())) 192 193 lastUserIndexes := map[string]int{} 194 195 conns := map[string]int{} 196 197 for i := 0; i < 10000; i++ { 198 switch r.Int() % 6 { 199 case 0: 200 assert.NotNil(t, uq.getOrAddQueue(generateTenant(r), 3)) 201 case 1: 202 qid := generateQuerier(r) 203 _, _, luid := uq.getNextQueueForQuerier(lastUserIndexes[qid], qid) 204 lastUserIndexes[qid] = luid 205 case 2: 206 uq.deleteQueue(generateTenant(r)) 207 case 3: 208 q := generateQuerier(r) 209 uq.addQuerierConnection(q) 210 conns[q]++ 211 case 4: 212 q := generateQuerier(r) 213 if conns[q] > 0 { 214 uq.removeQuerierConnection(q, time.Now()) 215 conns[q]-- 216 } 217 case 5: 218 q := generateQuerier(r) 219 uq.notifyQuerierShutdown(q) 220 } 221 222 assert.NoErrorf(t, isConsistent(uq), "last action %d", i) 223 } 224 }) 225 } 226 } 227 228 func TestQueues_ForgetDelay(t *testing.T) { 229 const ( 230 forgetDelay = time.Minute 231 maxQueriersPerUser = 1 232 numUsers = 100 233 ) 234 235 now := time.Now() 236 uq := newUserQueues(0, forgetDelay) 237 assert.NotNil(t, uq) 238 assert.NoError(t, isConsistent(uq)) 239 240 // 3 queriers open 2 connections each. 241 for i := 1; i <= 3; i++ { 242 uq.addQuerierConnection(fmt.Sprintf("querier-%d", i)) 243 uq.addQuerierConnection(fmt.Sprintf("querier-%d", i)) 244 } 245 246 // Add user queues. 247 for i := 0; i < numUsers; i++ { 248 userID := fmt.Sprintf("user-%d", i) 249 getOrAdd(t, uq, userID, maxQueriersPerUser) 250 } 251 252 // We expect querier-1 to have some users. 253 querier1Users := getUsersByQuerier(uq, "querier-1") 254 require.NotEmpty(t, querier1Users) 255 256 // Gracefully shutdown querier-1. 257 uq.removeQuerierConnection("querier-1", now.Add(20*time.Second)) 258 uq.removeQuerierConnection("querier-1", now.Add(21*time.Second)) 259 uq.notifyQuerierShutdown("querier-1") 260 261 // We expect querier-1 has been removed. 262 assert.NotContains(t, uq.queriers, "querier-1") 263 assert.NoError(t, isConsistent(uq)) 264 265 // We expect querier-1 users have been shuffled to other queriers. 266 for _, userID := range querier1Users { 267 assert.Contains(t, append(getUsersByQuerier(uq, "querier-2"), getUsersByQuerier(uq, "querier-3")...), userID) 268 } 269 270 // Querier-1 reconnects. 271 uq.addQuerierConnection("querier-1") 272 uq.addQuerierConnection("querier-1") 273 274 // We expect the initial querier-1 users have got back to querier-1. 275 for _, userID := range querier1Users { 276 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 277 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 278 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 279 } 280 281 // Querier-1 abruptly terminates (no shutdown notification received). 282 uq.removeQuerierConnection("querier-1", now.Add(40*time.Second)) 283 uq.removeQuerierConnection("querier-1", now.Add(41*time.Second)) 284 285 // We expect querier-1 has NOT been removed. 286 assert.Contains(t, uq.queriers, "querier-1") 287 assert.NoError(t, isConsistent(uq)) 288 289 // We expect the querier-1 users have not been shuffled to other queriers. 290 for _, userID := range querier1Users { 291 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 292 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 293 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 294 } 295 296 // Try to forget disconnected queriers, but querier-1 forget delay hasn't passed yet. 297 uq.forgetDisconnectedQueriers(now.Add(90 * time.Second)) 298 299 assert.Contains(t, uq.queriers, "querier-1") 300 assert.NoError(t, isConsistent(uq)) 301 302 for _, userID := range querier1Users { 303 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 304 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 305 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 306 } 307 308 // Try to forget disconnected queriers. This time querier-1 forget delay has passed. 309 uq.forgetDisconnectedQueriers(now.Add(105 * time.Second)) 310 311 assert.NotContains(t, uq.queriers, "querier-1") 312 assert.NoError(t, isConsistent(uq)) 313 314 // We expect querier-1 users have been shuffled to other queriers. 315 for _, userID := range querier1Users { 316 assert.Contains(t, append(getUsersByQuerier(uq, "querier-2"), getUsersByQuerier(uq, "querier-3")...), userID) 317 } 318 } 319 320 func TestQueues_ForgetDelay_ShouldCorrectlyHandleQuerierReconnectingBeforeForgetDelayIsPassed(t *testing.T) { 321 const ( 322 forgetDelay = time.Minute 323 maxQueriersPerUser = 1 324 numUsers = 100 325 ) 326 327 now := time.Now() 328 uq := newUserQueues(0, forgetDelay) 329 assert.NotNil(t, uq) 330 assert.NoError(t, isConsistent(uq)) 331 332 // 3 queriers open 2 connections each. 333 for i := 1; i <= 3; i++ { 334 uq.addQuerierConnection(fmt.Sprintf("querier-%d", i)) 335 uq.addQuerierConnection(fmt.Sprintf("querier-%d", i)) 336 } 337 338 // Add user queues. 339 for i := 0; i < numUsers; i++ { 340 userID := fmt.Sprintf("user-%d", i) 341 getOrAdd(t, uq, userID, maxQueriersPerUser) 342 } 343 344 // We expect querier-1 to have some users. 345 querier1Users := getUsersByQuerier(uq, "querier-1") 346 require.NotEmpty(t, querier1Users) 347 348 // Querier-1 abruptly terminates (no shutdown notification received). 349 uq.removeQuerierConnection("querier-1", now.Add(40*time.Second)) 350 uq.removeQuerierConnection("querier-1", now.Add(41*time.Second)) 351 352 // We expect querier-1 has NOT been removed. 353 assert.Contains(t, uq.queriers, "querier-1") 354 assert.NoError(t, isConsistent(uq)) 355 356 // We expect the querier-1 users have not been shuffled to other queriers. 357 for _, userID := range querier1Users { 358 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 359 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 360 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 361 } 362 363 // Try to forget disconnected queriers, but querier-1 forget delay hasn't passed yet. 364 uq.forgetDisconnectedQueriers(now.Add(90 * time.Second)) 365 366 // Querier-1 reconnects. 367 uq.addQuerierConnection("querier-1") 368 uq.addQuerierConnection("querier-1") 369 370 assert.Contains(t, uq.queriers, "querier-1") 371 assert.NoError(t, isConsistent(uq)) 372 373 // We expect the querier-1 users have not been shuffled to other queriers. 374 for _, userID := range querier1Users { 375 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 376 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 377 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 378 } 379 380 // Try to forget disconnected queriers far in the future, but there's no disconnected querier. 381 uq.forgetDisconnectedQueriers(now.Add(200 * time.Second)) 382 383 assert.Contains(t, uq.queriers, "querier-1") 384 assert.NoError(t, isConsistent(uq)) 385 386 for _, userID := range querier1Users { 387 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 388 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 389 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 390 } 391 } 392 393 func generateTenant(r *rand.Rand) string { 394 return fmt.Sprint("tenant-", r.Int()%5) 395 } 396 397 func generateQuerier(r *rand.Rand) string { 398 return fmt.Sprint("querier-", r.Int()%5) 399 } 400 401 func getOrAdd(t *testing.T, uq *queues, tenant string, maxQueriers int) chan Request { 402 q := uq.getOrAddQueue(tenant, maxQueriers) 403 assert.NotNil(t, q) 404 assert.NoError(t, isConsistent(uq)) 405 assert.Equal(t, q, uq.getOrAddQueue(tenant, maxQueriers)) 406 return q 407 } 408 409 func confirmOrderForQuerier(t *testing.T, uq *queues, querier string, lastUserIndex int, qs ...chan Request) int { 410 var n chan Request 411 for _, q := range qs { 412 n, _, lastUserIndex = uq.getNextQueueForQuerier(lastUserIndex, querier) 413 assert.Equal(t, q, n) 414 assert.NoError(t, isConsistent(uq)) 415 } 416 return lastUserIndex 417 } 418 419 func isConsistent(uq *queues) error { 420 if len(uq.sortedQueriers) != len(uq.queriers) { 421 return fmt.Errorf("inconsistent number of sorted queriers and querier connections") 422 } 423 424 uc := 0 425 for ix, u := range uq.users { 426 q := uq.userQueues[u] 427 if u != "" && q == nil { 428 return fmt.Errorf("user %s doesn't have queue", u) 429 } 430 if u == "" && q != nil { 431 return fmt.Errorf("user %s shouldn't have queue", u) 432 } 433 if u == "" { 434 continue 435 } 436 437 uc++ 438 439 if q.index != ix { 440 return fmt.Errorf("invalid user's index, expected=%d, got=%d", ix, q.index) 441 } 442 443 if q.maxQueriers == 0 && q.queriers != nil { 444 return fmt.Errorf("user %s has queriers, but maxQueriers=0", u) 445 } 446 447 if q.maxQueriers > 0 && len(uq.sortedQueriers) <= q.maxQueriers && q.queriers != nil { 448 return fmt.Errorf("user %s has queriers set despite not enough queriers available", u) 449 } 450 451 if q.maxQueriers > 0 && len(uq.sortedQueriers) > q.maxQueriers && len(q.queriers) != q.maxQueriers { 452 return fmt.Errorf("user %s has incorrect number of queriers, expected=%d, got=%d", u, len(q.queriers), q.maxQueriers) 453 } 454 } 455 456 if uc != len(uq.userQueues) { 457 return fmt.Errorf("inconsistent number of users list and user queues") 458 } 459 460 return nil 461 } 462 463 // getUsersByQuerier returns the list of users handled by the provided querierID. 464 func getUsersByQuerier(queues *queues, querierID string) []string { 465 var userIDs []string 466 for userID, q := range queues.userQueues { 467 if q.queriers == nil { 468 // If it's nil then all queriers can handle this user. 469 userIDs = append(userIDs, userID) 470 continue 471 } 472 if _, ok := q.queriers[querierID]; ok { 473 userIDs = append(userIDs, userID) 474 } 475 } 476 return userIDs 477 } 478 479 func TestShuffleQueriers(t *testing.T) { 480 allQueriers := []string{"a", "b", "c", "d", "e"} 481 482 require.Nil(t, shuffleQueriersForUser(12345, 10, allQueriers, nil)) 483 require.Nil(t, shuffleQueriersForUser(12345, len(allQueriers), allQueriers, nil)) 484 485 r1 := shuffleQueriersForUser(12345, 3, allQueriers, nil) 486 require.Equal(t, 3, len(r1)) 487 488 // Same input produces same output. 489 r2 := shuffleQueriersForUser(12345, 3, allQueriers, nil) 490 require.Equal(t, 3, len(r2)) 491 require.Equal(t, r1, r2) 492 } 493 494 func TestShuffleQueriersCorrectness(t *testing.T) { 495 const queriersCount = 100 496 497 var allSortedQueriers []string 498 for i := 0; i < queriersCount; i++ { 499 allSortedQueriers = append(allSortedQueriers, fmt.Sprintf("%d", i)) 500 } 501 slices.Sort(allSortedQueriers) 502 503 r := rand.New(rand.NewSource(time.Now().UnixNano())) 504 const tests = 1000 505 for i := 0; i < tests; i++ { 506 toSelect := r.Intn(queriersCount) 507 if toSelect == 0 { 508 toSelect = 3 509 } 510 511 selected := shuffleQueriersForUser(r.Int63(), toSelect, allSortedQueriers, nil) 512 513 require.Equal(t, toSelect, len(selected)) 514 515 slices.Sort(allSortedQueriers) 516 prevQuerier := "" 517 for _, q := range allSortedQueriers { 518 require.True(t, prevQuerier < q, "non-unique querier") 519 prevQuerier = q 520 521 ix := sort.SearchStrings(allSortedQueriers, q) 522 require.True(t, ix < len(allSortedQueriers) && allSortedQueriers[ix] == q, "selected querier is not between all queriers") 523 } 524 } 525 }