github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/scheduler/queue/user_queues_test.go (about) 1 package queue 2 3 import ( 4 "fmt" 5 "math" 6 "math/rand" 7 "sort" 8 "testing" 9 "time" 10 11 "github.com/stretchr/testify/assert" 12 "github.com/stretchr/testify/require" 13 ) 14 15 func TestQueues(t *testing.T) { 16 uq := newUserQueues(0, 0) 17 assert.NotNil(t, uq) 18 assert.NoError(t, isConsistent(uq)) 19 20 q, u, lastUserIndex := uq.getNextQueueForQuerier(-1, "querier-1") 21 assert.Nil(t, q) 22 assert.Equal(t, "", u) 23 24 // Add queues: [one] 25 qOne := getOrAdd(t, uq, "one", 0) 26 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qOne, qOne) 27 28 // [one two] 29 qTwo := getOrAdd(t, uq, "two", 0) 30 assert.NotEqual(t, qOne, qTwo) 31 32 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qOne, qTwo, qOne) 33 confirmOrderForQuerier(t, uq, "querier-2", -1, qOne, qTwo, qOne) 34 35 // [one two three] 36 // confirm fifo by adding a third queue and iterating to it 37 qThree := getOrAdd(t, uq, "three", 0) 38 39 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qThree, qOne) 40 41 // Remove one: ["" two three] 42 uq.deleteQueue("one") 43 assert.NoError(t, isConsistent(uq)) 44 45 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qTwo, qThree, qTwo) 46 47 // "four" is added at the beginning of the list: [four two three] 48 qFour := getOrAdd(t, uq, "four", 0) 49 50 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qThree, qFour, qTwo, qThree) 51 52 // Remove two: [four "" three] 53 uq.deleteQueue("two") 54 assert.NoError(t, isConsistent(uq)) 55 56 lastUserIndex = confirmOrderForQuerier(t, uq, "querier-1", lastUserIndex, qFour, qThree, qFour) 57 58 // Remove three: [four] 59 uq.deleteQueue("three") 60 assert.NoError(t, isConsistent(uq)) 61 62 // Remove four: [] 63 uq.deleteQueue("four") 64 assert.NoError(t, isConsistent(uq)) 65 66 q, _, _ = uq.getNextQueueForQuerier(lastUserIndex, "querier-1") 67 assert.Nil(t, q) 68 } 69 70 func TestQueuesWithQueriers(t *testing.T) { 71 uq := newUserQueues(0, 0) 72 assert.NotNil(t, uq) 73 assert.NoError(t, isConsistent(uq)) 74 75 queriers := 30 76 users := 1000 77 maxQueriersPerUser := 5 78 79 // Add some queriers. 80 for ix := 0; ix < queriers; ix++ { 81 qid := fmt.Sprintf("querier-%d", ix) 82 uq.addQuerierConnection(qid) 83 84 // No querier has any queues yet. 85 q, u, _ := uq.getNextQueueForQuerier(-1, qid) 86 assert.Nil(t, q) 87 assert.Equal(t, "", u) 88 } 89 90 assert.NoError(t, isConsistent(uq)) 91 92 // Add user queues. 93 for u := 0; u < users; u++ { 94 uid := fmt.Sprintf("user-%d", u) 95 getOrAdd(t, uq, uid, maxQueriersPerUser) 96 97 // Verify it has maxQueriersPerUser queriers assigned now. 98 qs := uq.userQueues[uid].queriers 99 assert.Equal(t, maxQueriersPerUser, len(qs)) 100 } 101 102 // After adding all users, verify results. For each querier, find out how many different users it handles, 103 // and compute mean and stdDev. 104 queriersMap := make(map[string]int) 105 106 for q := 0; q < queriers; q++ { 107 qid := fmt.Sprintf("querier-%d", q) 108 109 lastUserIndex := -1 110 for { 111 _, _, newIx := uq.getNextQueueForQuerier(lastUserIndex, qid) 112 if newIx < lastUserIndex { 113 break 114 } 115 lastUserIndex = newIx 116 queriersMap[qid]++ 117 } 118 } 119 120 mean := float64(0) 121 for _, c := range queriersMap { 122 mean += float64(c) 123 } 124 mean = mean / float64(len(queriersMap)) 125 126 stdDev := float64(0) 127 for _, c := range queriersMap { 128 d := float64(c) - mean 129 stdDev += (d * d) 130 } 131 stdDev = math.Sqrt(stdDev / float64(len(queriersMap))) 132 t.Log("mean:", mean, "stddev:", stdDev) 133 134 assert.InDelta(t, users*maxQueriersPerUser/queriers, mean, 1) 135 assert.InDelta(t, stdDev, 0, mean*0.2) 136 } 137 138 func TestQueuesConsistency(t *testing.T) { 139 tests := map[string]struct { 140 forgetDelay time.Duration 141 }{ 142 "without forget delay": {}, 143 "with forget delay": {forgetDelay: time.Minute}, 144 } 145 146 for testName, testData := range tests { 147 t.Run(testName, func(t *testing.T) { 148 uq := newUserQueues(0, testData.forgetDelay) 149 assert.NotNil(t, uq) 150 assert.NoError(t, isConsistent(uq)) 151 152 r := rand.New(rand.NewSource(time.Now().Unix())) 153 154 lastUserIndexes := map[string]int{} 155 156 conns := map[string]int{} 157 158 for i := 0; i < 10000; i++ { 159 switch r.Int() % 6 { 160 case 0: 161 assert.NotNil(t, uq.getOrAddQueue(generateTenant(r), 3)) 162 case 1: 163 qid := generateQuerier(r) 164 _, _, luid := uq.getNextQueueForQuerier(lastUserIndexes[qid], qid) 165 lastUserIndexes[qid] = luid 166 case 2: 167 uq.deleteQueue(generateTenant(r)) 168 case 3: 169 q := generateQuerier(r) 170 uq.addQuerierConnection(q) 171 conns[q]++ 172 case 4: 173 q := generateQuerier(r) 174 if conns[q] > 0 { 175 uq.removeQuerierConnection(q, time.Now()) 176 conns[q]-- 177 } 178 case 5: 179 q := generateQuerier(r) 180 uq.notifyQuerierShutdown(q) 181 } 182 183 assert.NoErrorf(t, isConsistent(uq), "last action %d", i) 184 } 185 }) 186 } 187 } 188 189 func TestQueues_ForgetDelay(t *testing.T) { 190 const ( 191 forgetDelay = time.Minute 192 maxQueriersPerUser = 1 193 numUsers = 100 194 ) 195 196 now := time.Now() 197 uq := newUserQueues(0, forgetDelay) 198 assert.NotNil(t, uq) 199 assert.NoError(t, isConsistent(uq)) 200 201 // 3 queriers open 2 connections each. 202 for i := 1; i <= 3; i++ { 203 uq.addQuerierConnection(fmt.Sprintf("querier-%d", i)) 204 uq.addQuerierConnection(fmt.Sprintf("querier-%d", i)) 205 } 206 207 // Add user queues. 208 for i := 0; i < numUsers; i++ { 209 userID := fmt.Sprintf("user-%d", i) 210 getOrAdd(t, uq, userID, maxQueriersPerUser) 211 } 212 213 // We expect querier-1 to have some users. 214 querier1Users := getUsersByQuerier(uq, "querier-1") 215 require.NotEmpty(t, querier1Users) 216 217 // Gracefully shutdown querier-1. 218 uq.removeQuerierConnection("querier-1", now.Add(20*time.Second)) 219 uq.removeQuerierConnection("querier-1", now.Add(21*time.Second)) 220 uq.notifyQuerierShutdown("querier-1") 221 222 // We expect querier-1 has been removed. 223 assert.NotContains(t, uq.queriers, "querier-1") 224 assert.NoError(t, isConsistent(uq)) 225 226 // We expect querier-1 users have been shuffled to other queriers. 227 for _, userID := range querier1Users { 228 assert.Contains(t, append(getUsersByQuerier(uq, "querier-2"), getUsersByQuerier(uq, "querier-3")...), userID) 229 } 230 231 // Querier-1 reconnects. 232 uq.addQuerierConnection("querier-1") 233 uq.addQuerierConnection("querier-1") 234 235 // We expect the initial querier-1 users have got back to querier-1. 236 for _, userID := range querier1Users { 237 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 238 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 239 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 240 } 241 242 // Querier-1 abruptly terminates (no shutdown notification received). 243 uq.removeQuerierConnection("querier-1", now.Add(40*time.Second)) 244 uq.removeQuerierConnection("querier-1", now.Add(41*time.Second)) 245 246 // We expect querier-1 has NOT been removed. 247 assert.Contains(t, uq.queriers, "querier-1") 248 assert.NoError(t, isConsistent(uq)) 249 250 // We expect the querier-1 users have not been shuffled to other queriers. 251 for _, userID := range querier1Users { 252 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 253 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 254 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 255 } 256 257 // Try to forget disconnected queriers, but querier-1 forget delay hasn't passed yet. 258 uq.forgetDisconnectedQueriers(now.Add(90 * time.Second)) 259 260 assert.Contains(t, uq.queriers, "querier-1") 261 assert.NoError(t, isConsistent(uq)) 262 263 for _, userID := range querier1Users { 264 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 265 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 266 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 267 } 268 269 // Try to forget disconnected queriers. This time querier-1 forget delay has passed. 270 uq.forgetDisconnectedQueriers(now.Add(105 * time.Second)) 271 272 assert.NotContains(t, uq.queriers, "querier-1") 273 assert.NoError(t, isConsistent(uq)) 274 275 // We expect querier-1 users have been shuffled to other queriers. 276 for _, userID := range querier1Users { 277 assert.Contains(t, append(getUsersByQuerier(uq, "querier-2"), getUsersByQuerier(uq, "querier-3")...), userID) 278 } 279 } 280 281 func TestQueues_ForgetDelay_ShouldCorrectlyHandleQuerierReconnectingBeforeForgetDelayIsPassed(t *testing.T) { 282 const ( 283 forgetDelay = time.Minute 284 maxQueriersPerUser = 1 285 numUsers = 100 286 ) 287 288 now := time.Now() 289 uq := newUserQueues(0, forgetDelay) 290 assert.NotNil(t, uq) 291 assert.NoError(t, isConsistent(uq)) 292 293 // 3 queriers open 2 connections each. 294 for i := 1; i <= 3; i++ { 295 uq.addQuerierConnection(fmt.Sprintf("querier-%d", i)) 296 uq.addQuerierConnection(fmt.Sprintf("querier-%d", i)) 297 } 298 299 // Add user queues. 300 for i := 0; i < numUsers; i++ { 301 userID := fmt.Sprintf("user-%d", i) 302 getOrAdd(t, uq, userID, maxQueriersPerUser) 303 } 304 305 // We expect querier-1 to have some users. 306 querier1Users := getUsersByQuerier(uq, "querier-1") 307 require.NotEmpty(t, querier1Users) 308 309 // Querier-1 abruptly terminates (no shutdown notification received). 310 uq.removeQuerierConnection("querier-1", now.Add(40*time.Second)) 311 uq.removeQuerierConnection("querier-1", now.Add(41*time.Second)) 312 313 // We expect querier-1 has NOT been removed. 314 assert.Contains(t, uq.queriers, "querier-1") 315 assert.NoError(t, isConsistent(uq)) 316 317 // We expect the querier-1 users have not been shuffled to other queriers. 318 for _, userID := range querier1Users { 319 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 320 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 321 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 322 } 323 324 // Try to forget disconnected queriers, but querier-1 forget delay hasn't passed yet. 325 uq.forgetDisconnectedQueriers(now.Add(90 * time.Second)) 326 327 // Querier-1 reconnects. 328 uq.addQuerierConnection("querier-1") 329 uq.addQuerierConnection("querier-1") 330 331 assert.Contains(t, uq.queriers, "querier-1") 332 assert.NoError(t, isConsistent(uq)) 333 334 // We expect the querier-1 users have not been shuffled to other queriers. 335 for _, userID := range querier1Users { 336 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 337 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 338 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 339 } 340 341 // Try to forget disconnected queriers far in the future, but there's no disconnected querier. 342 uq.forgetDisconnectedQueriers(now.Add(200 * time.Second)) 343 344 assert.Contains(t, uq.queriers, "querier-1") 345 assert.NoError(t, isConsistent(uq)) 346 347 for _, userID := range querier1Users { 348 assert.Contains(t, getUsersByQuerier(uq, "querier-1"), userID) 349 assert.NotContains(t, getUsersByQuerier(uq, "querier-2"), userID) 350 assert.NotContains(t, getUsersByQuerier(uq, "querier-3"), userID) 351 } 352 } 353 354 func generateTenant(r *rand.Rand) string { 355 return fmt.Sprint("tenant-", r.Int()%5) 356 } 357 358 func generateQuerier(r *rand.Rand) string { 359 return fmt.Sprint("querier-", r.Int()%5) 360 } 361 362 func getOrAdd(t *testing.T, uq *queues, tenant string, maxQueriers int) chan Request { 363 q := uq.getOrAddQueue(tenant, maxQueriers) 364 assert.NotNil(t, q) 365 assert.NoError(t, isConsistent(uq)) 366 assert.Equal(t, q, uq.getOrAddQueue(tenant, maxQueriers)) 367 return q 368 } 369 370 func confirmOrderForQuerier(t *testing.T, uq *queues, querier string, lastUserIndex int, qs ...chan Request) int { 371 var n chan Request 372 for _, q := range qs { 373 n, _, lastUserIndex = uq.getNextQueueForQuerier(lastUserIndex, querier) 374 assert.Equal(t, q, n) 375 assert.NoError(t, isConsistent(uq)) 376 } 377 return lastUserIndex 378 } 379 380 func isConsistent(uq *queues) error { 381 if len(uq.sortedQueriers) != len(uq.queriers) { 382 return fmt.Errorf("inconsistent number of sorted queriers and querier connections") 383 } 384 385 uc := 0 386 for ix, u := range uq.users { 387 q := uq.userQueues[u] 388 if u != "" && q == nil { 389 return fmt.Errorf("user %s doesn't have queue", u) 390 } 391 if u == "" && q != nil { 392 return fmt.Errorf("user %s shouldn't have queue", u) 393 } 394 if u == "" { 395 continue 396 } 397 398 uc++ 399 400 if q.index != ix { 401 return fmt.Errorf("invalid user's index, expected=%d, got=%d", ix, q.index) 402 } 403 404 if q.maxQueriers == 0 && q.queriers != nil { 405 return fmt.Errorf("user %s has queriers, but maxQueriers=0", u) 406 } 407 408 if q.maxQueriers > 0 && len(uq.sortedQueriers) <= q.maxQueriers && q.queriers != nil { 409 return fmt.Errorf("user %s has queriers set despite not enough queriers available", u) 410 } 411 412 if q.maxQueriers > 0 && len(uq.sortedQueriers) > q.maxQueriers && len(q.queriers) != q.maxQueriers { 413 return fmt.Errorf("user %s has incorrect number of queriers, expected=%d, got=%d", u, len(q.queriers), q.maxQueriers) 414 } 415 } 416 417 if uc != len(uq.userQueues) { 418 return fmt.Errorf("inconsistent number of users list and user queues") 419 } 420 421 return nil 422 } 423 424 // getUsersByQuerier returns the list of users handled by the provided querierID. 425 func getUsersByQuerier(queues *queues, querierID string) []string { 426 var userIDs []string 427 for userID, q := range queues.userQueues { 428 if q.queriers == nil { 429 // If it's nil then all queriers can handle this user. 430 userIDs = append(userIDs, userID) 431 continue 432 } 433 if _, ok := q.queriers[querierID]; ok { 434 userIDs = append(userIDs, userID) 435 } 436 } 437 return userIDs 438 } 439 440 func TestShuffleQueriers(t *testing.T) { 441 allQueriers := []string{"a", "b", "c", "d", "e"} 442 443 require.Nil(t, shuffleQueriersForUser(12345, 10, allQueriers, nil)) 444 require.Nil(t, shuffleQueriersForUser(12345, len(allQueriers), allQueriers, nil)) 445 446 r1 := shuffleQueriersForUser(12345, 3, allQueriers, nil) 447 require.Equal(t, 3, len(r1)) 448 449 // Same input produces same output. 450 r2 := shuffleQueriersForUser(12345, 3, allQueriers, nil) 451 require.Equal(t, 3, len(r2)) 452 require.Equal(t, r1, r2) 453 } 454 455 func TestShuffleQueriersCorrectness(t *testing.T) { 456 const queriersCount = 100 457 458 var allSortedQueriers []string 459 for i := 0; i < queriersCount; i++ { 460 allSortedQueriers = append(allSortedQueriers, fmt.Sprintf("%d", i)) 461 } 462 sort.Strings(allSortedQueriers) 463 464 r := rand.New(rand.NewSource(time.Now().UnixNano())) 465 const tests = 1000 466 for i := 0; i < tests; i++ { 467 toSelect := r.Intn(queriersCount) 468 if toSelect == 0 { 469 toSelect = 3 470 } 471 472 selected := shuffleQueriersForUser(r.Int63(), toSelect, allSortedQueriers, nil) 473 474 require.Equal(t, toSelect, len(selected)) 475 476 sort.Strings(allSortedQueriers) 477 prevQuerier := "" 478 for _, q := range allSortedQueriers { 479 require.True(t, prevQuerier < q, "non-unique querier") 480 prevQuerier = q 481 482 ix := sort.SearchStrings(allSortedQueriers, q) 483 require.True(t, ix < len(allSortedQueriers) && allSortedQueriers[ix] == q, "selected querier is not between all queriers") 484 } 485 } 486 }