github.com/grafana/pyroscope@v1.18.0/pkg/frontend/frontend_scheduler_worker.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/frontend/v2/frontend_scheduler_worker.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package frontend
     7  
     8  import (
     9  	"context"
    10  	"io"
    11  	"math/rand"
    12  	"net/http"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/go-kit/log"
    17  	"github.com/go-kit/log/level"
    18  	"github.com/grafana/dskit/backoff"
    19  	"github.com/grafana/dskit/services"
    20  	"github.com/pkg/errors"
    21  	"github.com/prometheus/client_golang/prometheus"
    22  	"github.com/prometheus/client_golang/prometheus/promauto"
    23  	"google.golang.org/grpc"
    24  
    25  	"github.com/grafana/pyroscope/pkg/frontend/frontendpb"
    26  	"github.com/grafana/pyroscope/pkg/scheduler/schedulerdiscovery"
    27  	"github.com/grafana/pyroscope/pkg/scheduler/schedulerpb"
    28  	"github.com/grafana/pyroscope/pkg/util/httpgrpc"
    29  	"github.com/grafana/pyroscope/pkg/util/servicediscovery"
    30  )
    31  
    32  const (
    33  	schedulerAddressLabel = "scheduler_address"
    34  	// schedulerWorkerCancelChanCapacity should be at least as big as the number of sub-queries issued by a single query
    35  	// per scheduler (after splitting and sharding) in order to allow all of them being canceled while scheduler worker is busy.
    36  	schedulerWorkerCancelChanCapacity = 1000
    37  )
    38  
    39  type frontendSchedulerWorkers struct {
    40  	services.Service
    41  
    42  	cfg             Config
    43  	log             log.Logger
    44  	frontendAddress string
    45  
    46  	// Channel with requests that should be forwarded to the scheduler.
    47  	requestsCh <-chan *frontendRequest
    48  
    49  	schedulerDiscovery        services.Service
    50  	schedulerDiscoveryWatcher *services.FailureWatcher
    51  
    52  	mu sync.Mutex
    53  	// Set to nil when stop is called... no more workers are created afterwards.
    54  	workers map[string]*frontendSchedulerWorker
    55  
    56  	enqueuedRequests *prometheus.CounterVec
    57  }
    58  
    59  func newFrontendSchedulerWorkers(cfg Config, frontendAddress string, requestsCh <-chan *frontendRequest, log log.Logger, reg prometheus.Registerer) (*frontendSchedulerWorkers, error) {
    60  	f := &frontendSchedulerWorkers{
    61  		cfg:                       cfg,
    62  		log:                       log,
    63  		frontendAddress:           frontendAddress,
    64  		requestsCh:                requestsCh,
    65  		workers:                   map[string]*frontendSchedulerWorker{},
    66  		schedulerDiscoveryWatcher: services.NewFailureWatcher(),
    67  		enqueuedRequests: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    68  			Name: "pyroscope_query_frontend_workers_enqueued_requests_total",
    69  			Help: "Total number of requests enqueued by each query frontend worker (regardless of the result), labeled by scheduler address.",
    70  		}, []string{schedulerAddressLabel}),
    71  	}
    72  
    73  	var err error
    74  	f.schedulerDiscovery, err = schedulerdiscovery.New(cfg.QuerySchedulerDiscovery, cfg.SchedulerAddress, cfg.DNSLookupPeriod, "query-frontend", f, log, reg)
    75  	if err != nil {
    76  		return nil, err
    77  	}
    78  
    79  	f.Service = services.NewBasicService(f.starting, f.running, f.stopping)
    80  	return f, nil
    81  }
    82  
    83  func (f *frontendSchedulerWorkers) starting(ctx context.Context) error {
    84  	f.schedulerDiscoveryWatcher.WatchService(f.schedulerDiscovery)
    85  
    86  	return services.StartAndAwaitRunning(ctx, f.schedulerDiscovery)
    87  }
    88  
    89  func (f *frontendSchedulerWorkers) running(ctx context.Context) error {
    90  	select {
    91  	case <-ctx.Done():
    92  		return nil
    93  	case err := <-f.schedulerDiscoveryWatcher.Chan():
    94  		return errors.Wrap(err, "query-frontend workers subservice failed")
    95  	}
    96  }
    97  
    98  func (f *frontendSchedulerWorkers) stopping(_ error) error {
    99  	err := services.StopAndAwaitTerminated(context.Background(), f.schedulerDiscovery)
   100  
   101  	f.mu.Lock()
   102  	defer f.mu.Unlock()
   103  
   104  	for _, w := range f.workers {
   105  		w.stop()
   106  	}
   107  	f.workers = nil
   108  
   109  	return err
   110  }
   111  
   112  func (f *frontendSchedulerWorkers) InstanceAdded(instance servicediscovery.Instance) {
   113  	// Connect only to in-use query-scheduler instances.
   114  	if instance.InUse {
   115  		f.addScheduler(instance.Address)
   116  	}
   117  }
   118  
   119  func (f *frontendSchedulerWorkers) addScheduler(address string) {
   120  	f.mu.Lock()
   121  	ws := f.workers
   122  	w := f.workers[address]
   123  
   124  	// Already stopped or we already have worker for this address.
   125  	if ws == nil || w != nil {
   126  		f.mu.Unlock()
   127  		return
   128  	}
   129  	f.mu.Unlock()
   130  
   131  	level.Info(f.log).Log("msg", "adding connection to query-scheduler", "addr", address)
   132  	conn, err := f.connectToScheduler(context.Background(), address)
   133  	if err != nil {
   134  		level.Error(f.log).Log("msg", "error connecting to query-scheduler", "addr", address, "err", err)
   135  		return
   136  	}
   137  
   138  	// No worker for this address yet, start a new one.
   139  	w = newFrontendSchedulerWorker(conn, address, f.frontendAddress, f.requestsCh, f.cfg.WorkerConcurrency, f.enqueuedRequests.WithLabelValues(address), f.cfg.MaxLoopDuration, f.log)
   140  
   141  	f.mu.Lock()
   142  	defer f.mu.Unlock()
   143  
   144  	// Can be nil if stopping has been called already.
   145  	if f.workers == nil {
   146  		return
   147  	}
   148  	// We have to recheck for presence in case we got called again while we were
   149  	// connecting and that one finished first.
   150  	if f.workers[address] != nil {
   151  		return
   152  	}
   153  	f.workers[address] = w
   154  	w.start()
   155  }
   156  
   157  func (f *frontendSchedulerWorkers) InstanceRemoved(instance servicediscovery.Instance) {
   158  	f.removeScheduler(instance.Address)
   159  }
   160  
   161  func (f *frontendSchedulerWorkers) removeScheduler(address string) {
   162  	f.mu.Lock()
   163  	// This works fine if f.workers is nil already or the worker is missing
   164  	// because the query-scheduler instance was not in use.
   165  	w := f.workers[address]
   166  	delete(f.workers, address)
   167  	f.mu.Unlock()
   168  
   169  	if w != nil {
   170  		level.Info(f.log).Log("msg", "removing connection to query-scheduler", "addr", address)
   171  		w.stop()
   172  	}
   173  	f.enqueuedRequests.Delete(prometheus.Labels{schedulerAddressLabel: address})
   174  }
   175  
   176  func (f *frontendSchedulerWorkers) InstanceChanged(instance servicediscovery.Instance) {
   177  	// Ensure the query-frontend connects to in-use query-scheduler instances and disconnect from ones no more in use.
   178  	// The called methods correctly handle the case the query-frontend is already connected/disconnected
   179  	// to/from the given query-scheduler instance.
   180  	if instance.InUse {
   181  		f.addScheduler(instance.Address)
   182  	} else {
   183  		f.removeScheduler(instance.Address)
   184  	}
   185  }
   186  
   187  // Get number of workers.
   188  func (f *frontendSchedulerWorkers) getWorkersCount() int {
   189  	f.mu.Lock()
   190  	defer f.mu.Unlock()
   191  
   192  	return len(f.workers)
   193  }
   194  
   195  func (f *frontendSchedulerWorkers) connectToScheduler(ctx context.Context, address string) (*grpc.ClientConn, error) {
   196  	// Because we only use single long-running method, it doesn't make sense to inject user ID, send over tracing or add metrics.
   197  	opts, err := f.cfg.GRPCClientConfig.DialOption(nil, nil, nil)
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  
   202  	conn, err := grpc.DialContext(ctx, address, opts...)
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  	return conn, nil
   207  }
   208  
   209  // Worker managing single gRPC connection to Scheduler. Each worker starts multiple goroutines for forwarding
   210  // requests and cancellations to scheduler.
   211  type frontendSchedulerWorker struct {
   212  	log log.Logger
   213  
   214  	conn          *grpc.ClientConn
   215  	concurrency   int
   216  	schedulerAddr string
   217  	frontendAddr  string
   218  
   219  	// Context and cancellation used by individual goroutines.
   220  	ctx    context.Context
   221  	cancel context.CancelFunc
   222  	wg     sync.WaitGroup
   223  
   224  	// Shared between all frontend workers.
   225  	requestCh <-chan *frontendRequest
   226  
   227  	// Cancellation requests for this scheduler are received via this channel. It is passed to frontend after
   228  	// query has been enqueued to scheduler.
   229  	cancelCh chan uint64
   230  
   231  	// Number of queries sent to this scheduler.
   232  	enqueuedRequests prometheus.Counter
   233  
   234  	maxLoopDuration time.Duration
   235  }
   236  
   237  func newFrontendSchedulerWorker(conn *grpc.ClientConn, schedulerAddr string, frontendAddr string, requestCh <-chan *frontendRequest, concurrency int, enqueuedRequests prometheus.Counter, maxLoopDuration time.Duration, log log.Logger) *frontendSchedulerWorker {
   238  	w := &frontendSchedulerWorker{
   239  		log:              log,
   240  		conn:             conn,
   241  		concurrency:      concurrency,
   242  		schedulerAddr:    schedulerAddr,
   243  		frontendAddr:     frontendAddr,
   244  		requestCh:        requestCh,
   245  		cancelCh:         make(chan uint64, schedulerWorkerCancelChanCapacity),
   246  		enqueuedRequests: enqueuedRequests,
   247  		maxLoopDuration:  maxLoopDuration,
   248  	}
   249  	w.ctx, w.cancel = context.WithCancel(context.Background())
   250  
   251  	return w
   252  }
   253  
   254  func (w *frontendSchedulerWorker) start() {
   255  	client := schedulerpb.NewSchedulerForFrontendClient(w.conn)
   256  	for i := 0; i < w.concurrency; i++ {
   257  		w.wg.Add(1)
   258  		go func() {
   259  			defer w.wg.Done()
   260  			w.runOne(w.ctx, client)
   261  		}()
   262  	}
   263  }
   264  
   265  func (w *frontendSchedulerWorker) stop() {
   266  	w.cancel()
   267  	w.wg.Wait()
   268  	if err := w.conn.Close(); err != nil {
   269  		level.Error(w.log).Log("msg", "error while closing connection to scheduler", "err", err)
   270  	}
   271  }
   272  
   273  func (w *frontendSchedulerWorker) runOne(ctx context.Context, client schedulerpb.SchedulerForFrontendClient) {
   274  	// attemptLoop returns false if there was any error with forwarding requests to scheduler.
   275  	attemptLoop := func() bool {
   276  		ctx, cancel := context.WithCancel(ctx)
   277  		defer cancel() // cancel the stream after we are done to release resources
   278  
   279  		loop, loopErr := client.FrontendLoop(ctx)
   280  		if loopErr != nil {
   281  			level.Error(w.log).Log("msg", "error contacting scheduler", "err", loopErr, "addr", w.schedulerAddr)
   282  			return false
   283  		}
   284  
   285  		loopErr = w.schedulerLoop(loop)
   286  		if loopErr == io.EOF {
   287  			level.Debug(w.log).Log("msg", "scheduler loop closed", "addr", w.schedulerAddr)
   288  			return true
   289  		}
   290  		if closeErr := loop.CloseSend(); closeErr != nil {
   291  			level.Debug(w.log).Log("msg", "failed to close frontend loop", "err", loopErr, "addr", w.schedulerAddr)
   292  		}
   293  
   294  		if loopErr != nil {
   295  			level.Error(w.log).Log("msg", "error sending requests to scheduler", "err", loopErr, "addr", w.schedulerAddr)
   296  			return false
   297  		}
   298  		return true
   299  	}
   300  
   301  	backoffConfig := backoff.Config{
   302  		MinBackoff: 250 * time.Millisecond,
   303  		MaxBackoff: 2 * time.Second,
   304  	}
   305  	backoff := backoff.New(ctx, backoffConfig)
   306  	for backoff.Ongoing() {
   307  		if !attemptLoop() {
   308  			backoff.Wait()
   309  		} else {
   310  			backoff.Reset()
   311  		}
   312  	}
   313  }
   314  
   315  func jitter(d time.Duration, factor float64) time.Duration {
   316  	maxJitter := time.Duration(float64(d) * factor)
   317  	return d - time.Duration(rand.Int63n(int64(maxJitter)))
   318  }
   319  
   320  func (w *frontendSchedulerWorker) schedulerLoop(loop schedulerpb.SchedulerForFrontend_FrontendLoopClient) error {
   321  	if err := loop.Send(&schedulerpb.FrontendToScheduler{
   322  		Type:            schedulerpb.FrontendToSchedulerType_INIT,
   323  		FrontendAddress: w.frontendAddr,
   324  	}); err != nil {
   325  		return err
   326  	}
   327  
   328  	if resp, err := loop.Recv(); err != nil || resp.Status != schedulerpb.SchedulerToFrontendStatus_OK {
   329  		if err != nil {
   330  			return err
   331  		}
   332  		return errors.Errorf("unexpected status received for init: %v", resp.Status)
   333  	}
   334  
   335  	ctx, cancel := context.WithCancel(loop.Context())
   336  	defer cancel()
   337  	if w.maxLoopDuration > 0 {
   338  		go func() {
   339  			timer := time.NewTimer(jitter(w.maxLoopDuration, 0.3))
   340  			defer timer.Stop()
   341  
   342  			select {
   343  			case <-ctx.Done():
   344  				return
   345  			case <-timer.C:
   346  				cancel()
   347  				return
   348  			}
   349  		}()
   350  	}
   351  
   352  	for {
   353  		select {
   354  		case <-ctx.Done():
   355  			// No need to report error if our internal context is canceled. This can happen during shutdown,
   356  			// or when scheduler is no longer resolvable. (It would be nice if this context reported "done" also when
   357  			// connection scheduler stops the call, but that doesn't seem to be the case).
   358  			//
   359  			// Reporting error here would delay reopening the stream (if the worker context is not done yet).
   360  			level.Debug(w.log).Log("msg", "stream context finished", "err", ctx.Err())
   361  			return nil
   362  		case req := <-w.requestCh:
   363  			err := loop.Send(&schedulerpb.FrontendToScheduler{
   364  				Type:            schedulerpb.FrontendToSchedulerType_ENQUEUE,
   365  				QueryID:         req.queryID,
   366  				UserID:          req.userID,
   367  				HttpRequest:     req.request,
   368  				FrontendAddress: w.frontendAddr,
   369  				StatsEnabled:    req.statsEnabled,
   370  			})
   371  			w.enqueuedRequests.Inc()
   372  
   373  			if err != nil {
   374  				req.enqueue <- enqueueResult{status: failed}
   375  				return err
   376  			}
   377  
   378  			resp, err := loop.Recv()
   379  			if err != nil {
   380  				req.enqueue <- enqueueResult{status: failed}
   381  				return err
   382  			}
   383  
   384  			switch resp.Status {
   385  			case schedulerpb.SchedulerToFrontendStatus_OK:
   386  				req.enqueue <- enqueueResult{status: waitForResponse, cancelCh: w.cancelCh}
   387  				// Response will come from querier.
   388  
   389  			case schedulerpb.SchedulerToFrontendStatus_SHUTTING_DOWN:
   390  				// Scheduler is shutting down, report failure to enqueue and stop this loop.
   391  				req.enqueue <- enqueueResult{status: failed}
   392  				return errors.New("scheduler is shutting down")
   393  
   394  			case schedulerpb.SchedulerToFrontendStatus_ERROR:
   395  				req.enqueue <- enqueueResult{status: waitForResponse}
   396  				req.response <- &frontendpb.QueryResultRequest{
   397  					HttpResponse: &httpgrpc.HTTPResponse{
   398  						Code: http.StatusInternalServerError,
   399  						Body: []byte(err.Error()),
   400  					},
   401  				}
   402  
   403  			case schedulerpb.SchedulerToFrontendStatus_TOO_MANY_REQUESTS_PER_TENANT:
   404  				req.enqueue <- enqueueResult{status: waitForResponse}
   405  				req.response <- &frontendpb.QueryResultRequest{
   406  					HttpResponse: &httpgrpc.HTTPResponse{
   407  						Code: http.StatusTooManyRequests,
   408  						Body: []byte("too many outstanding requests"),
   409  					},
   410  				}
   411  
   412  			default:
   413  				level.Error(w.log).Log("msg", "unknown response status from the scheduler", "resp", resp, "queryID", req.queryID)
   414  				req.enqueue <- enqueueResult{status: failed}
   415  			}
   416  
   417  		case reqID := <-w.cancelCh:
   418  			err := loop.Send(&schedulerpb.FrontendToScheduler{
   419  				Type:    schedulerpb.FrontendToSchedulerType_CANCEL,
   420  				QueryID: reqID,
   421  			})
   422  			if err != nil {
   423  				return err
   424  			}
   425  
   426  			resp, err := loop.Recv()
   427  			if err != nil {
   428  				return err
   429  			}
   430  
   431  			// Scheduler may be shutting down, report that.
   432  			if resp.Status != schedulerpb.SchedulerToFrontendStatus_OK {
   433  				return errors.Errorf("unexpected status received for cancellation: %v", resp.Status)
   434  			}
   435  		}
   436  	}
   437  }