github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/scheduler/queue/queue_test.go (about) 1 package queue 2 3 import ( 4 "context" 5 "fmt" 6 "strconv" 7 "sync" 8 "testing" 9 "time" 10 11 "github.com/grafana/dskit/services" 12 "github.com/prometheus/client_golang/prometheus" 13 "github.com/stretchr/testify/assert" 14 "github.com/stretchr/testify/require" 15 ) 16 17 func BenchmarkGetNextRequest(b *testing.B) { 18 const maxOutstandingPerTenant = 2 19 const numTenants = 50 20 const queriers = 5 21 22 queues := make([]*RequestQueue, 0, b.N) 23 24 for n := 0; n < b.N; n++ { 25 queue := NewRequestQueue(maxOutstandingPerTenant, 0, 26 prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}), 27 prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}), 28 ) 29 queues = append(queues, queue) 30 31 for ix := 0; ix < queriers; ix++ { 32 queue.RegisterQuerierConnection(fmt.Sprintf("querier-%d", ix)) 33 } 34 35 for i := 0; i < maxOutstandingPerTenant; i++ { 36 for j := 0; j < numTenants; j++ { 37 userID := strconv.Itoa(j) 38 39 err := queue.EnqueueRequest(userID, "request", 0, nil) 40 if err != nil { 41 b.Fatal(err) 42 } 43 } 44 } 45 } 46 47 ctx := context.Background() 48 b.ResetTimer() 49 50 for i := 0; i < b.N; i++ { 51 idx := FirstUser() 52 for j := 0; j < maxOutstandingPerTenant*numTenants; j++ { 53 querier := "" 54 b: 55 // Find querier with at least one request to avoid blocking in getNextRequestForQuerier. 56 for _, q := range queues[i].queues.userQueues { 57 for qid := range q.queriers { 58 querier = qid 59 break b 60 } 61 } 62 63 _, nidx, err := queues[i].GetNextRequestForQuerier(ctx, idx, querier) 64 if err != nil { 65 b.Fatal(err) 66 } 67 idx = nidx 68 } 69 } 70 } 71 72 func BenchmarkQueueRequest(b *testing.B) { 73 const maxOutstandingPerTenant = 2 74 const numTenants = 50 75 const queriers = 5 76 77 queues := make([]*RequestQueue, 0, b.N) 78 users := make([]string, 0, numTenants) 79 requests := make([]string, 0, numTenants) 80 81 for n := 0; n < b.N; n++ { 82 q := NewRequestQueue(maxOutstandingPerTenant, 0, 83 prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}), 84 prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}), 85 ) 86 87 for ix := 0; ix < queriers; ix++ { 88 q.RegisterQuerierConnection(fmt.Sprintf("querier-%d", ix)) 89 } 90 91 queues = append(queues, q) 92 93 for j := 0; j < numTenants; j++ { 94 requests = append(requests, fmt.Sprintf("%d-%d", n, j)) 95 users = append(users, strconv.Itoa(j)) 96 } 97 } 98 99 b.ResetTimer() 100 for n := 0; n < b.N; n++ { 101 for i := 0; i < maxOutstandingPerTenant; i++ { 102 for j := 0; j < numTenants; j++ { 103 err := queues[n].EnqueueRequest(users[j], requests[j], 0, nil) 104 if err != nil { 105 b.Fatal(err) 106 } 107 } 108 } 109 } 110 } 111 112 func TestRequestQueue_GetNextRequestForQuerier_ShouldGetRequestAfterReshardingBecauseQuerierHasBeenForgotten(t *testing.T) { 113 const forgetDelay = 3 * time.Second 114 115 queue := NewRequestQueue(1, forgetDelay, 116 prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}), 117 prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"})) 118 119 // Start the queue service. 120 ctx := context.Background() 121 require.NoError(t, services.StartAndAwaitRunning(ctx, queue)) 122 t.Cleanup(func() { 123 require.NoError(t, services.StopAndAwaitTerminated(ctx, queue)) 124 }) 125 126 // Two queriers connect. 127 queue.RegisterQuerierConnection("querier-1") 128 queue.RegisterQuerierConnection("querier-2") 129 130 // Querier-2 waits for a new request. 131 querier2wg := sync.WaitGroup{} 132 querier2wg.Add(1) 133 go func() { 134 defer querier2wg.Done() 135 _, _, err := queue.GetNextRequestForQuerier(ctx, FirstUser(), "querier-2") 136 require.NoError(t, err) 137 }() 138 139 // Querier-1 crashes (no graceful shutdown notification). 140 queue.UnregisterQuerierConnection("querier-1") 141 142 // Enqueue a request from an user which would be assigned to querier-1. 143 // NOTE: "user-1" hash falls in the querier-1 shard. 144 require.NoError(t, queue.EnqueueRequest("user-1", "request", 1, nil)) 145 146 startTime := time.Now() 147 querier2wg.Wait() 148 waitTime := time.Since(startTime) 149 150 // We expect that querier-2 got the request only after querier-1 forget delay is passed. 151 assert.GreaterOrEqual(t, waitTime.Milliseconds(), forgetDelay.Milliseconds()) 152 } 153 154 func TestContextCond(t *testing.T) { 155 t.Run("wait until broadcast", func(t *testing.T) { 156 t.Parallel() 157 mtx := &sync.Mutex{} 158 cond := contextCond{Cond: sync.NewCond(mtx)} 159 160 doneWaiting := make(chan struct{}) 161 162 mtx.Lock() 163 go func() { 164 cond.Wait(context.Background()) 165 mtx.Unlock() 166 close(doneWaiting) 167 }() 168 169 assertChanNotReceived(t, doneWaiting, 100*time.Millisecond, "cond.Wait returned, but it should not because we did not broadcast yet") 170 171 cond.Broadcast() 172 assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after broadcast") 173 }) 174 175 t.Run("wait until context deadline", func(t *testing.T) { 176 t.Parallel() 177 mtx := &sync.Mutex{} 178 cond := contextCond{Cond: sync.NewCond(mtx)} 179 doneWaiting := make(chan struct{}) 180 181 ctx, cancel := context.WithCancel(context.Background()) 182 defer cancel() 183 184 mtx.Lock() 185 go func() { 186 cond.Wait(ctx) 187 mtx.Unlock() 188 close(doneWaiting) 189 }() 190 191 assertChanNotReceived(t, doneWaiting, 100*time.Millisecond, "cond.Wait returned, but it should not because we did not broadcast yet and didn't cancel the context") 192 193 cancel() 194 assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after cancelling the context") 195 }) 196 197 t.Run("wait on already canceled context", func(t *testing.T) { 198 // This test represents the racy real world scenario, 199 // we don't know whether it's going to wait before the broadcast triggered by the context cancellation. 200 t.Parallel() 201 mtx := &sync.Mutex{} 202 cond := contextCond{Cond: sync.NewCond(mtx)} 203 doneWaiting := make(chan struct{}) 204 205 alreadyCanceledContext, cancel := context.WithCancel(context.Background()) 206 cancel() 207 208 mtx.Lock() 209 go func() { 210 cond.Wait(alreadyCanceledContext) 211 mtx.Unlock() 212 close(doneWaiting) 213 }() 214 215 assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after cancelling the context") 216 }) 217 218 t.Run("wait on already canceled context, but it takes a while to wait", func(t *testing.T) { 219 t.Parallel() 220 mtx := &sync.Mutex{} 221 cond := contextCond{ 222 Cond: sync.NewCond(mtx), 223 testHookBeforeWaiting: func() { 224 // This makes the waiting goroutine so slow that out Wait(ctx) will need to broadcast once it sees it waiting. 225 time.Sleep(250 * time.Millisecond) 226 }, 227 } 228 doneWaiting := make(chan struct{}) 229 230 alreadyCanceledContext, cancel := context.WithCancel(context.Background()) 231 cancel() 232 233 mtx.Lock() 234 go func() { 235 cond.Wait(alreadyCanceledContext) 236 mtx.Unlock() 237 close(doneWaiting) 238 }() 239 240 assertChanReceived(t, doneWaiting, time.Second, "cond.Wait did not return after 500ms") 241 }) 242 243 t.Run("lots of goroutines waiting at the same time, none of them misses it's broadcast from cancel", func(t *testing.T) { 244 t.Parallel() 245 mtx := &sync.Mutex{} 246 cond := contextCond{ 247 Cond: sync.NewCond(mtx), 248 testHookBeforeWaiting: func() { 249 // Wait just a little bit to create every goroutine 250 time.Sleep(time.Millisecond) 251 }, 252 } 253 const goroutines = 100 254 255 doneWaiting := make(chan struct{}, goroutines) 256 release := make(chan struct{}) 257 258 ctx, cancel := context.WithCancel(context.Background()) 259 cancel() 260 261 for i := 0; i < goroutines; i++ { 262 go func() { 263 <-release 264 265 mtx.Lock() 266 cond.Wait(ctx) 267 mtx.Unlock() 268 269 doneWaiting <- struct{}{} 270 }() 271 } 272 go func() { 273 <-release 274 cancel() 275 }() 276 277 close(release) 278 279 assert.Eventually(t, func() bool { 280 return len(doneWaiting) == goroutines 281 }, time.Second, 10*time.Millisecond) 282 }) 283 } 284 285 func assertChanReceived(t *testing.T, c chan struct{}, timeout time.Duration, msg string) { 286 t.Helper() 287 288 select { 289 case <-c: 290 case <-time.After(timeout): 291 t.Fatalf(msg) 292 } 293 } 294 295 func assertChanNotReceived(t *testing.T, c chan struct{}, wait time.Duration, msg string, args ...interface{}) { 296 t.Helper() 297 298 select { 299 case <-c: 300 t.Fatalf(msg, args...) 301 case <-time.After(wait): 302 // OK! 303 } 304 }