github.com/grafana/pyroscope@v1.18.0/pkg/frontend/frontend_scheduler_worker.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/frontend/v2/frontend_scheduler_worker.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package frontend 7 8 import ( 9 "context" 10 "io" 11 "math/rand" 12 "net/http" 13 "sync" 14 "time" 15 16 "github.com/go-kit/log" 17 "github.com/go-kit/log/level" 18 "github.com/grafana/dskit/backoff" 19 "github.com/grafana/dskit/services" 20 "github.com/pkg/errors" 21 "github.com/prometheus/client_golang/prometheus" 22 "github.com/prometheus/client_golang/prometheus/promauto" 23 "google.golang.org/grpc" 24 25 "github.com/grafana/pyroscope/pkg/frontend/frontendpb" 26 "github.com/grafana/pyroscope/pkg/scheduler/schedulerdiscovery" 27 "github.com/grafana/pyroscope/pkg/scheduler/schedulerpb" 28 "github.com/grafana/pyroscope/pkg/util/httpgrpc" 29 "github.com/grafana/pyroscope/pkg/util/servicediscovery" 30 ) 31 32 const ( 33 schedulerAddressLabel = "scheduler_address" 34 // schedulerWorkerCancelChanCapacity should be at least as big as the number of sub-queries issued by a single query 35 // per scheduler (after splitting and sharding) in order to allow all of them being canceled while scheduler worker is busy. 36 schedulerWorkerCancelChanCapacity = 1000 37 ) 38 39 type frontendSchedulerWorkers struct { 40 services.Service 41 42 cfg Config 43 log log.Logger 44 frontendAddress string 45 46 // Channel with requests that should be forwarded to the scheduler. 47 requestsCh <-chan *frontendRequest 48 49 schedulerDiscovery services.Service 50 schedulerDiscoveryWatcher *services.FailureWatcher 51 52 mu sync.Mutex 53 // Set to nil when stop is called... no more workers are created afterwards. 54 workers map[string]*frontendSchedulerWorker 55 56 enqueuedRequests *prometheus.CounterVec 57 } 58 59 func newFrontendSchedulerWorkers(cfg Config, frontendAddress string, requestsCh <-chan *frontendRequest, log log.Logger, reg prometheus.Registerer) (*frontendSchedulerWorkers, error) { 60 f := &frontendSchedulerWorkers{ 61 cfg: cfg, 62 log: log, 63 frontendAddress: frontendAddress, 64 requestsCh: requestsCh, 65 workers: map[string]*frontendSchedulerWorker{}, 66 schedulerDiscoveryWatcher: services.NewFailureWatcher(), 67 enqueuedRequests: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 68 Name: "pyroscope_query_frontend_workers_enqueued_requests_total", 69 Help: "Total number of requests enqueued by each query frontend worker (regardless of the result), labeled by scheduler address.", 70 }, []string{schedulerAddressLabel}), 71 } 72 73 var err error 74 f.schedulerDiscovery, err = schedulerdiscovery.New(cfg.QuerySchedulerDiscovery, cfg.SchedulerAddress, cfg.DNSLookupPeriod, "query-frontend", f, log, reg) 75 if err != nil { 76 return nil, err 77 } 78 79 f.Service = services.NewBasicService(f.starting, f.running, f.stopping) 80 return f, nil 81 } 82 83 func (f *frontendSchedulerWorkers) starting(ctx context.Context) error { 84 f.schedulerDiscoveryWatcher.WatchService(f.schedulerDiscovery) 85 86 return services.StartAndAwaitRunning(ctx, f.schedulerDiscovery) 87 } 88 89 func (f *frontendSchedulerWorkers) running(ctx context.Context) error { 90 select { 91 case <-ctx.Done(): 92 return nil 93 case err := <-f.schedulerDiscoveryWatcher.Chan(): 94 return errors.Wrap(err, "query-frontend workers subservice failed") 95 } 96 } 97 98 func (f *frontendSchedulerWorkers) stopping(_ error) error { 99 err := services.StopAndAwaitTerminated(context.Background(), f.schedulerDiscovery) 100 101 f.mu.Lock() 102 defer f.mu.Unlock() 103 104 for _, w := range f.workers { 105 w.stop() 106 } 107 f.workers = nil 108 109 return err 110 } 111 112 func (f *frontendSchedulerWorkers) InstanceAdded(instance servicediscovery.Instance) { 113 // Connect only to in-use query-scheduler instances. 114 if instance.InUse { 115 f.addScheduler(instance.Address) 116 } 117 } 118 119 func (f *frontendSchedulerWorkers) addScheduler(address string) { 120 f.mu.Lock() 121 ws := f.workers 122 w := f.workers[address] 123 124 // Already stopped or we already have worker for this address. 125 if ws == nil || w != nil { 126 f.mu.Unlock() 127 return 128 } 129 f.mu.Unlock() 130 131 level.Info(f.log).Log("msg", "adding connection to query-scheduler", "addr", address) 132 conn, err := f.connectToScheduler(context.Background(), address) 133 if err != nil { 134 level.Error(f.log).Log("msg", "error connecting to query-scheduler", "addr", address, "err", err) 135 return 136 } 137 138 // No worker for this address yet, start a new one. 139 w = newFrontendSchedulerWorker(conn, address, f.frontendAddress, f.requestsCh, f.cfg.WorkerConcurrency, f.enqueuedRequests.WithLabelValues(address), f.cfg.MaxLoopDuration, f.log) 140 141 f.mu.Lock() 142 defer f.mu.Unlock() 143 144 // Can be nil if stopping has been called already. 145 if f.workers == nil { 146 return 147 } 148 // We have to recheck for presence in case we got called again while we were 149 // connecting and that one finished first. 150 if f.workers[address] != nil { 151 return 152 } 153 f.workers[address] = w 154 w.start() 155 } 156 157 func (f *frontendSchedulerWorkers) InstanceRemoved(instance servicediscovery.Instance) { 158 f.removeScheduler(instance.Address) 159 } 160 161 func (f *frontendSchedulerWorkers) removeScheduler(address string) { 162 f.mu.Lock() 163 // This works fine if f.workers is nil already or the worker is missing 164 // because the query-scheduler instance was not in use. 165 w := f.workers[address] 166 delete(f.workers, address) 167 f.mu.Unlock() 168 169 if w != nil { 170 level.Info(f.log).Log("msg", "removing connection to query-scheduler", "addr", address) 171 w.stop() 172 } 173 f.enqueuedRequests.Delete(prometheus.Labels{schedulerAddressLabel: address}) 174 } 175 176 func (f *frontendSchedulerWorkers) InstanceChanged(instance servicediscovery.Instance) { 177 // Ensure the query-frontend connects to in-use query-scheduler instances and disconnect from ones no more in use. 178 // The called methods correctly handle the case the query-frontend is already connected/disconnected 179 // to/from the given query-scheduler instance. 180 if instance.InUse { 181 f.addScheduler(instance.Address) 182 } else { 183 f.removeScheduler(instance.Address) 184 } 185 } 186 187 // Get number of workers. 188 func (f *frontendSchedulerWorkers) getWorkersCount() int { 189 f.mu.Lock() 190 defer f.mu.Unlock() 191 192 return len(f.workers) 193 } 194 195 func (f *frontendSchedulerWorkers) connectToScheduler(ctx context.Context, address string) (*grpc.ClientConn, error) { 196 // Because we only use single long-running method, it doesn't make sense to inject user ID, send over tracing or add metrics. 197 opts, err := f.cfg.GRPCClientConfig.DialOption(nil, nil, nil) 198 if err != nil { 199 return nil, err 200 } 201 202 conn, err := grpc.DialContext(ctx, address, opts...) 203 if err != nil { 204 return nil, err 205 } 206 return conn, nil 207 } 208 209 // Worker managing single gRPC connection to Scheduler. Each worker starts multiple goroutines for forwarding 210 // requests and cancellations to scheduler. 211 type frontendSchedulerWorker struct { 212 log log.Logger 213 214 conn *grpc.ClientConn 215 concurrency int 216 schedulerAddr string 217 frontendAddr string 218 219 // Context and cancellation used by individual goroutines. 220 ctx context.Context 221 cancel context.CancelFunc 222 wg sync.WaitGroup 223 224 // Shared between all frontend workers. 225 requestCh <-chan *frontendRequest 226 227 // Cancellation requests for this scheduler are received via this channel. It is passed to frontend after 228 // query has been enqueued to scheduler. 229 cancelCh chan uint64 230 231 // Number of queries sent to this scheduler. 232 enqueuedRequests prometheus.Counter 233 234 maxLoopDuration time.Duration 235 } 236 237 func newFrontendSchedulerWorker(conn *grpc.ClientConn, schedulerAddr string, frontendAddr string, requestCh <-chan *frontendRequest, concurrency int, enqueuedRequests prometheus.Counter, maxLoopDuration time.Duration, log log.Logger) *frontendSchedulerWorker { 238 w := &frontendSchedulerWorker{ 239 log: log, 240 conn: conn, 241 concurrency: concurrency, 242 schedulerAddr: schedulerAddr, 243 frontendAddr: frontendAddr, 244 requestCh: requestCh, 245 cancelCh: make(chan uint64, schedulerWorkerCancelChanCapacity), 246 enqueuedRequests: enqueuedRequests, 247 maxLoopDuration: maxLoopDuration, 248 } 249 w.ctx, w.cancel = context.WithCancel(context.Background()) 250 251 return w 252 } 253 254 func (w *frontendSchedulerWorker) start() { 255 client := schedulerpb.NewSchedulerForFrontendClient(w.conn) 256 for i := 0; i < w.concurrency; i++ { 257 w.wg.Add(1) 258 go func() { 259 defer w.wg.Done() 260 w.runOne(w.ctx, client) 261 }() 262 } 263 } 264 265 func (w *frontendSchedulerWorker) stop() { 266 w.cancel() 267 w.wg.Wait() 268 if err := w.conn.Close(); err != nil { 269 level.Error(w.log).Log("msg", "error while closing connection to scheduler", "err", err) 270 } 271 } 272 273 func (w *frontendSchedulerWorker) runOne(ctx context.Context, client schedulerpb.SchedulerForFrontendClient) { 274 // attemptLoop returns false if there was any error with forwarding requests to scheduler. 275 attemptLoop := func() bool { 276 ctx, cancel := context.WithCancel(ctx) 277 defer cancel() // cancel the stream after we are done to release resources 278 279 loop, loopErr := client.FrontendLoop(ctx) 280 if loopErr != nil { 281 level.Error(w.log).Log("msg", "error contacting scheduler", "err", loopErr, "addr", w.schedulerAddr) 282 return false 283 } 284 285 loopErr = w.schedulerLoop(loop) 286 if loopErr == io.EOF { 287 level.Debug(w.log).Log("msg", "scheduler loop closed", "addr", w.schedulerAddr) 288 return true 289 } 290 if closeErr := loop.CloseSend(); closeErr != nil { 291 level.Debug(w.log).Log("msg", "failed to close frontend loop", "err", loopErr, "addr", w.schedulerAddr) 292 } 293 294 if loopErr != nil { 295 level.Error(w.log).Log("msg", "error sending requests to scheduler", "err", loopErr, "addr", w.schedulerAddr) 296 return false 297 } 298 return true 299 } 300 301 backoffConfig := backoff.Config{ 302 MinBackoff: 250 * time.Millisecond, 303 MaxBackoff: 2 * time.Second, 304 } 305 backoff := backoff.New(ctx, backoffConfig) 306 for backoff.Ongoing() { 307 if !attemptLoop() { 308 backoff.Wait() 309 } else { 310 backoff.Reset() 311 } 312 } 313 } 314 315 func jitter(d time.Duration, factor float64) time.Duration { 316 maxJitter := time.Duration(float64(d) * factor) 317 return d - time.Duration(rand.Int63n(int64(maxJitter))) 318 } 319 320 func (w *frontendSchedulerWorker) schedulerLoop(loop schedulerpb.SchedulerForFrontend_FrontendLoopClient) error { 321 if err := loop.Send(&schedulerpb.FrontendToScheduler{ 322 Type: schedulerpb.FrontendToSchedulerType_INIT, 323 FrontendAddress: w.frontendAddr, 324 }); err != nil { 325 return err 326 } 327 328 if resp, err := loop.Recv(); err != nil || resp.Status != schedulerpb.SchedulerToFrontendStatus_OK { 329 if err != nil { 330 return err 331 } 332 return errors.Errorf("unexpected status received for init: %v", resp.Status) 333 } 334 335 ctx, cancel := context.WithCancel(loop.Context()) 336 defer cancel() 337 if w.maxLoopDuration > 0 { 338 go func() { 339 timer := time.NewTimer(jitter(w.maxLoopDuration, 0.3)) 340 defer timer.Stop() 341 342 select { 343 case <-ctx.Done(): 344 return 345 case <-timer.C: 346 cancel() 347 return 348 } 349 }() 350 } 351 352 for { 353 select { 354 case <-ctx.Done(): 355 // No need to report error if our internal context is canceled. This can happen during shutdown, 356 // or when scheduler is no longer resolvable. (It would be nice if this context reported "done" also when 357 // connection scheduler stops the call, but that doesn't seem to be the case). 358 // 359 // Reporting error here would delay reopening the stream (if the worker context is not done yet). 360 level.Debug(w.log).Log("msg", "stream context finished", "err", ctx.Err()) 361 return nil 362 case req := <-w.requestCh: 363 err := loop.Send(&schedulerpb.FrontendToScheduler{ 364 Type: schedulerpb.FrontendToSchedulerType_ENQUEUE, 365 QueryID: req.queryID, 366 UserID: req.userID, 367 HttpRequest: req.request, 368 FrontendAddress: w.frontendAddr, 369 StatsEnabled: req.statsEnabled, 370 }) 371 w.enqueuedRequests.Inc() 372 373 if err != nil { 374 req.enqueue <- enqueueResult{status: failed} 375 return err 376 } 377 378 resp, err := loop.Recv() 379 if err != nil { 380 req.enqueue <- enqueueResult{status: failed} 381 return err 382 } 383 384 switch resp.Status { 385 case schedulerpb.SchedulerToFrontendStatus_OK: 386 req.enqueue <- enqueueResult{status: waitForResponse, cancelCh: w.cancelCh} 387 // Response will come from querier. 388 389 case schedulerpb.SchedulerToFrontendStatus_SHUTTING_DOWN: 390 // Scheduler is shutting down, report failure to enqueue and stop this loop. 391 req.enqueue <- enqueueResult{status: failed} 392 return errors.New("scheduler is shutting down") 393 394 case schedulerpb.SchedulerToFrontendStatus_ERROR: 395 req.enqueue <- enqueueResult{status: waitForResponse} 396 req.response <- &frontendpb.QueryResultRequest{ 397 HttpResponse: &httpgrpc.HTTPResponse{ 398 Code: http.StatusInternalServerError, 399 Body: []byte(err.Error()), 400 }, 401 } 402 403 case schedulerpb.SchedulerToFrontendStatus_TOO_MANY_REQUESTS_PER_TENANT: 404 req.enqueue <- enqueueResult{status: waitForResponse} 405 req.response <- &frontendpb.QueryResultRequest{ 406 HttpResponse: &httpgrpc.HTTPResponse{ 407 Code: http.StatusTooManyRequests, 408 Body: []byte("too many outstanding requests"), 409 }, 410 } 411 412 default: 413 level.Error(w.log).Log("msg", "unknown response status from the scheduler", "resp", resp, "queryID", req.queryID) 414 req.enqueue <- enqueueResult{status: failed} 415 } 416 417 case reqID := <-w.cancelCh: 418 err := loop.Send(&schedulerpb.FrontendToScheduler{ 419 Type: schedulerpb.FrontendToSchedulerType_CANCEL, 420 QueryID: reqID, 421 }) 422 if err != nil { 423 return err 424 } 425 426 resp, err := loop.Recv() 427 if err != nil { 428 return err 429 } 430 431 // Scheduler may be shutting down, report that. 432 if resp.Status != schedulerpb.SchedulerToFrontendStatus_OK { 433 return errors.Errorf("unexpected status received for cancellation: %v", resp.Status) 434 } 435 } 436 } 437 }