github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/querier/worker/scheduler_processor.go (about) 1 package worker 2 3 import ( 4 "context" 5 "fmt" 6 "net/http" 7 "time" 8 9 "github.com/go-kit/log" 10 "github.com/go-kit/log/level" 11 "github.com/grafana/dskit/backoff" 12 "github.com/grafana/dskit/grpcclient" 13 dsmiddleware "github.com/grafana/dskit/middleware" 14 "github.com/grafana/dskit/ring/client" 15 "github.com/grafana/dskit/services" 16 otgrpc "github.com/opentracing-contrib/go-grpc" 17 "github.com/opentracing/opentracing-go" 18 "github.com/prometheus/client_golang/prometheus" 19 "github.com/prometheus/client_golang/prometheus/promauto" 20 "github.com/weaveworks/common/httpgrpc" 21 "github.com/weaveworks/common/middleware" 22 "github.com/weaveworks/common/user" 23 "google.golang.org/grpc" 24 "google.golang.org/grpc/health/grpc_health_v1" 25 26 "github.com/cortexproject/cortex/pkg/frontend/v2/frontendv2pb" 27 querier_stats "github.com/cortexproject/cortex/pkg/querier/stats" 28 "github.com/cortexproject/cortex/pkg/scheduler/schedulerpb" 29 "github.com/cortexproject/cortex/pkg/util/httpgrpcutil" 30 util_log "github.com/cortexproject/cortex/pkg/util/log" 31 ) 32 33 func newSchedulerProcessor(cfg Config, handler RequestHandler, log log.Logger, reg prometheus.Registerer) (*schedulerProcessor, []services.Service) { 34 p := &schedulerProcessor{ 35 log: log, 36 handler: handler, 37 maxMessageSize: cfg.GRPCClientConfig.MaxSendMsgSize, 38 querierID: cfg.QuerierID, 39 grpcConfig: cfg.GRPCClientConfig, 40 41 frontendClientRequestDuration: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ 42 Name: "cortex_querier_query_frontend_request_duration_seconds", 43 Help: "Time spend doing requests to frontend.", 44 Buckets: prometheus.ExponentialBuckets(0.001, 4, 6), 45 }, []string{"operation", "status_code"}), 46 } 47 48 frontendClientsGauge := promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 49 Name: "cortex_querier_query_frontend_clients", 50 Help: "The current number of clients connected to query-frontend.", 51 }) 52 53 poolConfig := client.PoolConfig{ 54 CheckInterval: 5 * time.Second, 55 HealthCheckEnabled: true, 56 HealthCheckTimeout: 1 * time.Second, 57 } 58 59 p.frontendPool = client.NewPool("frontend", poolConfig, nil, p.createFrontendClient, frontendClientsGauge, log) 60 return p, []services.Service{p.frontendPool} 61 } 62 63 // Handles incoming queries from query-scheduler. 64 type schedulerProcessor struct { 65 log log.Logger 66 handler RequestHandler 67 grpcConfig grpcclient.Config 68 maxMessageSize int 69 querierID string 70 71 frontendPool *client.Pool 72 frontendClientRequestDuration *prometheus.HistogramVec 73 } 74 75 // notifyShutdown implements processor. 76 func (sp *schedulerProcessor) notifyShutdown(ctx context.Context, conn *grpc.ClientConn, address string) { 77 client := schedulerpb.NewSchedulerForQuerierClient(conn) 78 79 req := &schedulerpb.NotifyQuerierShutdownRequest{QuerierID: sp.querierID} 80 if _, err := client.NotifyQuerierShutdown(ctx, req); err != nil { 81 // Since we're shutting down there's nothing we can do except logging it. 82 level.Warn(sp.log).Log("msg", "failed to notify querier shutdown to query-scheduler", "address", address, "err", err) 83 } 84 } 85 86 func (sp *schedulerProcessor) processQueriesOnSingleStream(ctx context.Context, conn *grpc.ClientConn, address string) { 87 schedulerClient := schedulerpb.NewSchedulerForQuerierClient(conn) 88 89 backoff := backoff.New(ctx, processorBackoffConfig) 90 for backoff.Ongoing() { 91 c, err := schedulerClient.QuerierLoop(ctx) 92 if err == nil { 93 err = c.Send(&schedulerpb.QuerierToScheduler{QuerierID: sp.querierID}) 94 } 95 96 if err != nil { 97 level.Error(sp.log).Log("msg", "error contacting scheduler", "err", err, "addr", address) 98 backoff.Wait() 99 continue 100 } 101 102 if err := sp.querierLoop(c, address); err != nil { 103 level.Error(sp.log).Log("msg", "error processing requests from scheduler", "err", err, "addr", address) 104 backoff.Wait() 105 continue 106 } 107 108 backoff.Reset() 109 } 110 } 111 112 // process loops processing requests on an established stream. 113 func (sp *schedulerProcessor) querierLoop(c schedulerpb.SchedulerForQuerier_QuerierLoopClient, address string) error { 114 // Build a child context so we can cancel a query when the stream is closed. 115 ctx, cancel := context.WithCancel(c.Context()) 116 defer cancel() 117 118 for { 119 request, err := c.Recv() 120 if err != nil { 121 return err 122 } 123 124 // Handle the request on a "background" goroutine, so we go back to 125 // blocking on c.Recv(). This allows us to detect the stream closing 126 // and cancel the query. We don't actually handle queries in parallel 127 // here, as we're running in lock step with the server - each Recv is 128 // paired with a Send. 129 go func() { 130 // We need to inject user into context for sending response back. 131 ctx := user.InjectOrgID(ctx, request.UserID) 132 133 tracer := opentracing.GlobalTracer() 134 // Ignore errors here. If we cannot get parent span, we just don't create new one. 135 parentSpanContext, _ := httpgrpcutil.GetParentSpanForRequest(tracer, request.HttpRequest) 136 if parentSpanContext != nil { 137 queueSpan, spanCtx := opentracing.StartSpanFromContextWithTracer(ctx, tracer, "querier_processor_runRequest", opentracing.ChildOf(parentSpanContext)) 138 defer queueSpan.Finish() 139 140 ctx = spanCtx 141 } 142 logger := util_log.WithContext(ctx, sp.log) 143 144 sp.runRequest(ctx, logger, request.QueryID, request.FrontendAddress, request.StatsEnabled, request.HttpRequest) 145 146 // Report back to scheduler that processing of the query has finished. 147 if err := c.Send(&schedulerpb.QuerierToScheduler{}); err != nil { 148 level.Error(logger).Log("msg", "error notifying scheduler about finished query", "err", err, "addr", address) 149 } 150 }() 151 } 152 } 153 154 func (sp *schedulerProcessor) runRequest(ctx context.Context, logger log.Logger, queryID uint64, frontendAddress string, statsEnabled bool, request *httpgrpc.HTTPRequest) { 155 var stats *querier_stats.Stats 156 if statsEnabled { 157 stats, ctx = querier_stats.ContextWithEmptyStats(ctx) 158 } 159 160 response, err := sp.handler.Handle(ctx, request) 161 if err != nil { 162 var ok bool 163 response, ok = httpgrpc.HTTPResponseFromError(err) 164 if !ok { 165 response = &httpgrpc.HTTPResponse{ 166 Code: http.StatusInternalServerError, 167 Body: []byte(err.Error()), 168 } 169 } 170 } 171 172 // Ensure responses that are too big are not retried. 173 if len(response.Body) >= sp.maxMessageSize { 174 level.Error(logger).Log("msg", "response larger than max message size", "size", len(response.Body), "maxMessageSize", sp.maxMessageSize) 175 176 errMsg := fmt.Sprintf("response larger than the max message size (%d vs %d)", len(response.Body), sp.maxMessageSize) 177 response = &httpgrpc.HTTPResponse{ 178 Code: http.StatusRequestEntityTooLarge, 179 Body: []byte(errMsg), 180 } 181 } 182 183 c, err := sp.frontendPool.GetClientFor(frontendAddress) 184 if err == nil { 185 // Response is empty and uninteresting. 186 _, err = c.(frontendv2pb.FrontendForQuerierClient).QueryResult(ctx, &frontendv2pb.QueryResultRequest{ 187 QueryID: queryID, 188 HttpResponse: response, 189 Stats: stats, 190 }) 191 } 192 if err != nil { 193 level.Error(logger).Log("msg", "error notifying frontend about finished query", "err", err, "frontend", frontendAddress) 194 } 195 } 196 197 func (sp *schedulerProcessor) createFrontendClient(addr string) (client.PoolClient, error) { 198 opts, err := sp.grpcConfig.DialOption([]grpc.UnaryClientInterceptor{ 199 otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()), 200 middleware.ClientUserHeaderInterceptor, 201 dsmiddleware.PrometheusGRPCUnaryInstrumentation(sp.frontendClientRequestDuration), 202 }, nil) 203 204 if err != nil { 205 return nil, err 206 } 207 208 conn, err := grpc.Dial(addr, opts...) 209 if err != nil { 210 return nil, err 211 } 212 213 return &frontendClient{ 214 FrontendForQuerierClient: frontendv2pb.NewFrontendForQuerierClient(conn), 215 HealthClient: grpc_health_v1.NewHealthClient(conn), 216 conn: conn, 217 }, nil 218 } 219 220 type frontendClient struct { 221 frontendv2pb.FrontendForQuerierClient 222 grpc_health_v1.HealthClient 223 conn *grpc.ClientConn 224 } 225 226 func (fc *frontendClient) Close() error { 227 return fc.conn.Close() 228 }