github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/lokifrontend/frontend/v2/frontend_test.go (about) 1 package v2 2 3 import ( 4 "context" 5 "net" 6 "strconv" 7 "strings" 8 "sync" 9 "testing" 10 "time" 11 12 "github.com/go-kit/log" 13 "github.com/grafana/dskit/flagext" 14 "github.com/grafana/dskit/services" 15 "github.com/stretchr/testify/require" 16 "github.com/weaveworks/common/httpgrpc" 17 "github.com/weaveworks/common/user" 18 "go.uber.org/atomic" 19 "google.golang.org/grpc" 20 21 "github.com/grafana/loki/pkg/lokifrontend/frontend/v2/frontendv2pb" 22 "github.com/grafana/loki/pkg/querier/stats" 23 "github.com/grafana/loki/pkg/scheduler/schedulerpb" 24 "github.com/grafana/loki/pkg/util/test" 25 ) 26 27 const testFrontendWorkerConcurrency = 5 28 29 func setupFrontend(t *testing.T, schedulerReplyFunc func(f *Frontend, msg *schedulerpb.FrontendToScheduler) *schedulerpb.SchedulerToFrontend) (*Frontend, *mockScheduler) { 30 l, err := net.Listen("tcp", "") 31 require.NoError(t, err) 32 33 server := grpc.NewServer() 34 35 h, p, err := net.SplitHostPort(l.Addr().String()) 36 require.NoError(t, err) 37 38 grpcPort, err := strconv.Atoi(p) 39 require.NoError(t, err) 40 41 cfg := Config{} 42 flagext.DefaultValues(&cfg) 43 cfg.SchedulerAddress = l.Addr().String() 44 cfg.WorkerConcurrency = testFrontendWorkerConcurrency 45 cfg.Addr = h 46 cfg.Port = grpcPort 47 48 // logger := log.NewLogfmtLogger(os.Stdout) 49 logger := log.NewNopLogger() 50 f, err := NewFrontend(cfg, nil, logger, nil) 51 require.NoError(t, err) 52 53 frontendv2pb.RegisterFrontendForQuerierServer(server, f) 54 55 ms := newMockScheduler(t, f, schedulerReplyFunc) 56 schedulerpb.RegisterSchedulerForFrontendServer(server, ms) 57 58 require.NoError(t, services.StartAndAwaitRunning(context.Background(), f)) 59 t.Cleanup(func() { 60 _ = services.StopAndAwaitTerminated(context.Background(), f) 61 }) 62 63 go func() { 64 _ = server.Serve(l) 65 }() 66 67 t.Cleanup(func() { 68 _ = l.Close() 69 }) 70 71 // Wait for frontend to connect to scheduler. 72 test.Poll(t, 1*time.Second, 1, func() interface{} { 73 ms.mu.Lock() 74 defer ms.mu.Unlock() 75 76 return len(ms.frontendAddr) 77 }) 78 79 return f, ms 80 } 81 82 func sendResponseWithDelay(f *Frontend, delay time.Duration, userID string, queryID uint64, resp *httpgrpc.HTTPResponse) { 83 if delay > 0 { 84 time.Sleep(delay) 85 } 86 87 ctx := user.InjectOrgID(context.Background(), userID) 88 _, _ = f.QueryResult(ctx, &frontendv2pb.QueryResultRequest{ 89 QueryID: queryID, 90 HttpResponse: resp, 91 Stats: &stats.Stats{}, 92 }) 93 } 94 95 func TestFrontendBasicWorkflow(t *testing.T) { 96 const ( 97 body = "all fine here" 98 userID = "test" 99 ) 100 101 f, _ := setupFrontend(t, func(f *Frontend, msg *schedulerpb.FrontendToScheduler) *schedulerpb.SchedulerToFrontend { 102 // We cannot call QueryResult directly, as Frontend is not yet waiting for the response. 103 // It first needs to be told that enqueuing has succeeded. 104 go sendResponseWithDelay(f, 100*time.Millisecond, userID, msg.QueryID, &httpgrpc.HTTPResponse{ 105 Code: 200, 106 Body: []byte(body), 107 }) 108 109 return &schedulerpb.SchedulerToFrontend{Status: schedulerpb.OK} 110 }) 111 112 resp, err := f.RoundTripGRPC(user.InjectOrgID(context.Background(), userID), &httpgrpc.HTTPRequest{}) 113 require.NoError(t, err) 114 require.Equal(t, int32(200), resp.Code) 115 require.Equal(t, []byte(body), resp.Body) 116 } 117 118 func TestFrontendRetryEnqueue(t *testing.T) { 119 // Frontend uses worker concurrency to compute number of retries. We use one less failure. 120 failures := atomic.NewInt64(testFrontendWorkerConcurrency - 1) 121 const ( 122 body = "hello world" 123 userID = "test" 124 ) 125 126 f, _ := setupFrontend(t, func(f *Frontend, msg *schedulerpb.FrontendToScheduler) *schedulerpb.SchedulerToFrontend { 127 fail := failures.Dec() 128 if fail >= 0 { 129 return &schedulerpb.SchedulerToFrontend{Status: schedulerpb.SHUTTING_DOWN} 130 } 131 132 go sendResponseWithDelay(f, 100*time.Millisecond, userID, msg.QueryID, &httpgrpc.HTTPResponse{ 133 Code: 200, 134 Body: []byte(body), 135 }) 136 137 return &schedulerpb.SchedulerToFrontend{Status: schedulerpb.OK} 138 }) 139 _, err := f.RoundTripGRPC(user.InjectOrgID(context.Background(), userID), &httpgrpc.HTTPRequest{}) 140 require.NoError(t, err) 141 } 142 143 func TestFrontendEnqueueFailure(t *testing.T) { 144 f, _ := setupFrontend(t, func(f *Frontend, msg *schedulerpb.FrontendToScheduler) *schedulerpb.SchedulerToFrontend { 145 return &schedulerpb.SchedulerToFrontend{Status: schedulerpb.SHUTTING_DOWN} 146 }) 147 148 _, err := f.RoundTripGRPC(user.InjectOrgID(context.Background(), "test"), &httpgrpc.HTTPRequest{}) 149 require.Error(t, err) 150 require.True(t, strings.Contains(err.Error(), "failed to enqueue request")) 151 } 152 153 func TestFrontendCancellation(t *testing.T) { 154 f, ms := setupFrontend(t, nil) 155 156 ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) 157 defer cancel() 158 159 resp, err := f.RoundTripGRPC(user.InjectOrgID(ctx, "test"), &httpgrpc.HTTPRequest{}) 160 require.EqualError(t, err, context.DeadlineExceeded.Error()) 161 require.Nil(t, resp) 162 163 // We wait a bit to make sure scheduler receives the cancellation request. 164 test.Poll(t, time.Second, 2, func() interface{} { 165 ms.mu.Lock() 166 defer ms.mu.Unlock() 167 168 return len(ms.msgs) 169 }) 170 171 ms.checkWithLock(func() { 172 require.Equal(t, 2, len(ms.msgs)) 173 require.True(t, ms.msgs[0].Type == schedulerpb.ENQUEUE) 174 require.True(t, ms.msgs[1].Type == schedulerpb.CANCEL) 175 require.True(t, ms.msgs[0].QueryID == ms.msgs[1].QueryID) 176 }) 177 } 178 179 // If FrontendWorkers are busy, cancellation passed by Query frontend may not reach 180 // all the frontend workers thus not reaching the scheduler as well. 181 // Issue: https://github.com/grafana/loki/issues/5132 182 func TestFrontendWorkerCancellation(t *testing.T) { 183 f, ms := setupFrontend(t, nil) 184 185 ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) 186 defer cancel() 187 188 // send multiple requests > maxconcurrency of scheduler. So that it keeps all the frontend worker busy in serving requests. 189 reqCount := testFrontendWorkerConcurrency + 5 190 var wg sync.WaitGroup 191 for i := 0; i < reqCount; i++ { 192 wg.Add(1) 193 go func() { 194 defer wg.Done() 195 resp, err := f.RoundTripGRPC(user.InjectOrgID(ctx, "test"), &httpgrpc.HTTPRequest{}) 196 require.EqualError(t, err, context.DeadlineExceeded.Error()) 197 require.Nil(t, resp) 198 }() 199 } 200 201 wg.Wait() 202 203 // We wait a bit to make sure scheduler receives the cancellation request. 204 // 2 * reqCount because for every request, should also be corresponding cancel request 205 test.Poll(t, 5*time.Second, 2*reqCount, func() interface{} { 206 ms.mu.Lock() 207 defer ms.mu.Unlock() 208 209 return len(ms.msgs) 210 }) 211 212 ms.checkWithLock(func() { 213 require.Equal(t, 2*reqCount, len(ms.msgs)) 214 }) 215 } 216 217 func TestFrontendFailedCancellation(t *testing.T) { 218 f, ms := setupFrontend(t, nil) 219 220 ctx, cancel := context.WithCancel(context.Background()) 221 defer cancel() 222 223 go func() { 224 time.Sleep(100 * time.Millisecond) 225 226 // stop scheduler workers 227 addr := "" 228 f.schedulerWorkers.mu.Lock() 229 for k := range f.schedulerWorkers.workers { 230 addr = k 231 break 232 } 233 f.schedulerWorkers.mu.Unlock() 234 235 f.schedulerWorkers.AddressRemoved(addr) 236 237 // Wait for worker goroutines to stop. 238 time.Sleep(100 * time.Millisecond) 239 240 // Cancel request. Frontend will try to send cancellation to scheduler, but that will fail (not visible to user). 241 // Everything else should still work fine. 242 cancel() 243 }() 244 245 // send request 246 resp, err := f.RoundTripGRPC(user.InjectOrgID(ctx, "test"), &httpgrpc.HTTPRequest{}) 247 require.EqualError(t, err, context.Canceled.Error()) 248 require.Nil(t, resp) 249 250 ms.checkWithLock(func() { 251 require.Equal(t, 1, len(ms.msgs)) 252 }) 253 } 254 255 type mockScheduler struct { 256 t *testing.T 257 f *Frontend 258 259 replyFunc func(f *Frontend, msg *schedulerpb.FrontendToScheduler) *schedulerpb.SchedulerToFrontend 260 261 mu sync.Mutex 262 frontendAddr map[string]int 263 msgs []*schedulerpb.FrontendToScheduler 264 } 265 266 func newMockScheduler(t *testing.T, f *Frontend, replyFunc func(f *Frontend, msg *schedulerpb.FrontendToScheduler) *schedulerpb.SchedulerToFrontend) *mockScheduler { 267 return &mockScheduler{t: t, f: f, frontendAddr: map[string]int{}, replyFunc: replyFunc} 268 } 269 270 func (m *mockScheduler) checkWithLock(fn func()) { 271 m.mu.Lock() 272 defer m.mu.Unlock() 273 274 fn() 275 } 276 277 func (m *mockScheduler) FrontendLoop(frontend schedulerpb.SchedulerForFrontend_FrontendLoopServer) error { 278 init, err := frontend.Recv() 279 if err != nil { 280 return err 281 } 282 283 m.mu.Lock() 284 m.frontendAddr[init.FrontendAddress]++ 285 m.mu.Unlock() 286 287 // Ack INIT from frontend. 288 if err := frontend.Send(&schedulerpb.SchedulerToFrontend{Status: schedulerpb.OK}); err != nil { 289 return err 290 } 291 292 for { 293 msg, err := frontend.Recv() 294 if err != nil { 295 return err 296 } 297 298 m.mu.Lock() 299 m.msgs = append(m.msgs, msg) 300 m.mu.Unlock() 301 302 reply := &schedulerpb.SchedulerToFrontend{Status: schedulerpb.OK} 303 if m.replyFunc != nil { 304 reply = m.replyFunc(m.f, msg) 305 } 306 307 if err := frontend.Send(reply); err != nil { 308 return err 309 } 310 } 311 }