github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/querier/worker/scheduler_processor.go (about) 1 package worker 2 3 import ( 4 "context" 5 "fmt" 6 "net/http" 7 "strings" 8 "time" 9 10 "github.com/go-kit/log" 11 "github.com/go-kit/log/level" 12 "github.com/grafana/dskit/backoff" 13 "github.com/grafana/dskit/grpcclient" 14 "github.com/grafana/dskit/ring/client" 15 "github.com/grafana/dskit/services" 16 otgrpc "github.com/opentracing-contrib/go-grpc" 17 "github.com/opentracing/opentracing-go" 18 "github.com/weaveworks/common/httpgrpc" 19 "github.com/weaveworks/common/middleware" 20 "github.com/weaveworks/common/user" 21 "google.golang.org/grpc" 22 "google.golang.org/grpc/health/grpc_health_v1" 23 24 "github.com/grafana/loki/pkg/lokifrontend/frontend/v2/frontendv2pb" 25 querier_stats "github.com/grafana/loki/pkg/querier/stats" 26 "github.com/grafana/loki/pkg/scheduler/schedulerpb" 27 util_log "github.com/grafana/loki/pkg/util/log" 28 ) 29 30 func newSchedulerProcessor(cfg Config, handler RequestHandler, log log.Logger, metrics *Metrics) (*schedulerProcessor, []services.Service) { 31 p := &schedulerProcessor{ 32 log: log, 33 handler: handler, 34 maxMessageSize: cfg.GRPCClientConfig.MaxSendMsgSize, 35 querierID: cfg.QuerierID, 36 grpcConfig: cfg.GRPCClientConfig, 37 38 metrics: metrics, 39 } 40 41 poolConfig := client.PoolConfig{ 42 CheckInterval: 5 * time.Second, 43 HealthCheckEnabled: true, 44 HealthCheckTimeout: 1 * time.Second, 45 } 46 47 p.frontendPool = client.NewPool("frontend", poolConfig, nil, p.createFrontendClient, p.metrics.frontendClientsGauge, log) 48 return p, []services.Service{p.frontendPool} 49 } 50 51 // Handles incoming queries from query-scheduler. 52 type schedulerProcessor struct { 53 log log.Logger 54 handler RequestHandler 55 grpcConfig grpcclient.Config 56 maxMessageSize int 57 querierID string 58 59 frontendPool *client.Pool 60 metrics *Metrics 61 } 62 63 // notifyShutdown implements processor. 64 func (sp *schedulerProcessor) notifyShutdown(ctx context.Context, conn *grpc.ClientConn, address string) { 65 client := schedulerpb.NewSchedulerForQuerierClient(conn) 66 67 req := &schedulerpb.NotifyQuerierShutdownRequest{QuerierID: sp.querierID} 68 if _, err := client.NotifyQuerierShutdown(ctx, req); err != nil { 69 // Since we're shutting down there's nothing we can do except logging it. 70 level.Warn(sp.log).Log("msg", "failed to notify querier shutdown to query-scheduler", "address", address, "err", err) 71 } 72 } 73 74 func (sp *schedulerProcessor) processQueriesOnSingleStream(ctx context.Context, conn *grpc.ClientConn, address string) { 75 schedulerClient := schedulerpb.NewSchedulerForQuerierClient(conn) 76 77 backoff := backoff.New(ctx, processorBackoffConfig) 78 for backoff.Ongoing() { 79 c, err := schedulerClient.QuerierLoop(ctx) 80 if err == nil { 81 err = c.Send(&schedulerpb.QuerierToScheduler{QuerierID: sp.querierID}) 82 } 83 84 if err != nil { 85 level.Error(sp.log).Log("msg", "error contacting scheduler", "err", err, "addr", address) 86 backoff.Wait() 87 continue 88 } 89 90 if err := sp.querierLoop(c, address); err != nil { 91 // E.Welch I don't know how to do this any better but context cancelations seem common, 92 // likely because of an underlying connection being close, 93 // they are noisy and I don't think they communicate anything useful. 94 if !strings.Contains(err.Error(), "context canceled") { 95 level.Error(sp.log).Log("msg", "error processing requests from scheduler", "err", err, "addr", address) 96 } 97 backoff.Wait() 98 continue 99 } 100 101 backoff.Reset() 102 } 103 } 104 105 // process loops processing requests on an established stream. 106 func (sp *schedulerProcessor) querierLoop(c schedulerpb.SchedulerForQuerier_QuerierLoopClient, address string) error { 107 // Build a child context so we can cancel a query when the stream is closed. 108 ctx, cancel := context.WithCancel(c.Context()) 109 defer cancel() 110 111 for { 112 request, err := c.Recv() 113 if err != nil { 114 return err 115 } 116 117 // Handle the request on a "background" goroutine, so we go back to 118 // blocking on c.Recv(). This allows us to detect the stream closing 119 // and cancel the query. We don't actually handle queries in parallel 120 // here, as we're running in lock step with the server - each Recv is 121 // paired with a Send. 122 go func() { 123 // We need to inject user into context for sending response back. 124 var ( 125 ctx = user.InjectOrgID(ctx, request.UserID) 126 logger = util_log.WithContext(ctx, sp.log) 127 ) 128 129 sp.metrics.inflightRequests.Inc() 130 131 sp.runRequest(ctx, logger, request.QueryID, request.FrontendAddress, request.StatsEnabled, request.HttpRequest) 132 133 sp.metrics.inflightRequests.Dec() 134 135 // Report back to scheduler that processing of the query has finished. 136 if err := c.Send(&schedulerpb.QuerierToScheduler{}); err != nil { 137 level.Error(logger).Log("msg", "error notifying scheduler about finished query", "err", err, "addr", address) 138 } 139 }() 140 } 141 } 142 143 func (sp *schedulerProcessor) runRequest(ctx context.Context, logger log.Logger, queryID uint64, frontendAddress string, statsEnabled bool, request *httpgrpc.HTTPRequest) { 144 var stats *querier_stats.Stats 145 if statsEnabled { 146 stats, ctx = querier_stats.ContextWithEmptyStats(ctx) 147 } 148 149 response, err := sp.handler.Handle(ctx, request) 150 if err != nil { 151 var ok bool 152 response, ok = httpgrpc.HTTPResponseFromError(err) 153 if !ok { 154 response = &httpgrpc.HTTPResponse{ 155 Code: http.StatusInternalServerError, 156 Body: []byte(err.Error()), 157 } 158 } 159 } 160 161 // Ensure responses that are too big are not retried. 162 if len(response.Body) >= sp.maxMessageSize { 163 level.Error(logger).Log("msg", "response larger than max message size", "size", len(response.Body), "maxMessageSize", sp.maxMessageSize) 164 165 errMsg := fmt.Sprintf("response larger than the max message size (%d vs %d)", len(response.Body), sp.maxMessageSize) 166 response = &httpgrpc.HTTPResponse{ 167 Code: http.StatusRequestEntityTooLarge, 168 Body: []byte(errMsg), 169 } 170 } 171 172 c, err := sp.frontendPool.GetClientFor(frontendAddress) 173 if err == nil { 174 // Response is empty and uninteresting. 175 _, err = c.(frontendv2pb.FrontendForQuerierClient).QueryResult(ctx, &frontendv2pb.QueryResultRequest{ 176 QueryID: queryID, 177 HttpResponse: response, 178 Stats: stats, 179 }) 180 } 181 if err != nil { 182 level.Error(logger).Log("msg", "error notifying frontend about finished query", "err", err, "frontend", frontendAddress) 183 } 184 } 185 186 func (sp *schedulerProcessor) createFrontendClient(addr string) (client.PoolClient, error) { 187 opts, err := sp.grpcConfig.DialOption([]grpc.UnaryClientInterceptor{ 188 otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()), 189 middleware.ClientUserHeaderInterceptor, 190 middleware.UnaryClientInstrumentInterceptor(sp.metrics.frontendClientRequestDuration), 191 }, nil) 192 if err != nil { 193 return nil, err 194 } 195 196 conn, err := grpc.Dial(addr, opts...) 197 if err != nil { 198 return nil, err 199 } 200 201 return &frontendClient{ 202 FrontendForQuerierClient: frontendv2pb.NewFrontendForQuerierClient(conn), 203 HealthClient: grpc_health_v1.NewHealthClient(conn), 204 conn: conn, 205 }, nil 206 } 207 208 type frontendClient struct { 209 frontendv2pb.FrontendForQuerierClient 210 grpc_health_v1.HealthClient 211 conn *grpc.ClientConn 212 } 213 214 func (fc *frontendClient) Close() error { 215 return fc.conn.Close() 216 }