github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/querier/queryrange/querysharding.go (about)

     1  package queryrange
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net/http"
     7  	"time"
     8  
     9  	"github.com/go-kit/log"
    10  	"github.com/go-kit/log/level"
    11  	"github.com/pkg/errors"
    12  	"github.com/prometheus/prometheus/promql/parser"
    13  	"github.com/weaveworks/common/httpgrpc"
    14  
    15  	"github.com/grafana/dskit/tenant"
    16  
    17  	"github.com/grafana/loki/pkg/loghttp"
    18  	"github.com/grafana/loki/pkg/logql"
    19  	"github.com/grafana/loki/pkg/logqlmodel"
    20  	"github.com/grafana/loki/pkg/querier/astmapper"
    21  	"github.com/grafana/loki/pkg/querier/queryrange/queryrangebase"
    22  	"github.com/grafana/loki/pkg/storage/config"
    23  	"github.com/grafana/loki/pkg/util"
    24  	util_log "github.com/grafana/loki/pkg/util/log"
    25  	"github.com/grafana/loki/pkg/util/marshal"
    26  	"github.com/grafana/loki/pkg/util/validation"
    27  )
    28  
    29  var errInvalidShardingRange = errors.New("Query does not fit in a single sharding configuration")
    30  
    31  // NewQueryShardMiddleware creates a middleware which downstreams queries after AST mapping and query encoding.
    32  func NewQueryShardMiddleware(
    33  	logger log.Logger,
    34  	confs ShardingConfigs,
    35  	middlewareMetrics *queryrangebase.InstrumentMiddlewareMetrics,
    36  	shardingMetrics *logql.MapperMetrics,
    37  	limits Limits,
    38  ) queryrangebase.Middleware {
    39  	noshards := !hasShards(confs)
    40  
    41  	if noshards {
    42  		level.Warn(logger).Log(
    43  			"middleware", "QueryShard",
    44  			"msg", "no configuration with shard found",
    45  			"confs", fmt.Sprintf("%+v", confs),
    46  		)
    47  		return queryrangebase.PassthroughMiddleware
    48  	}
    49  
    50  	mapperware := queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler {
    51  		return newASTMapperware(confs, next, logger, shardingMetrics, limits)
    52  	})
    53  
    54  	return queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler {
    55  		return &shardSplitter{
    56  			limits: limits,
    57  			shardingware: queryrangebase.MergeMiddlewares(
    58  				queryrangebase.InstrumentMiddleware("shardingware", middlewareMetrics),
    59  				mapperware,
    60  			).Wrap(next),
    61  			now:  time.Now,
    62  			next: queryrangebase.InstrumentMiddleware("sharding-bypass", middlewareMetrics).Wrap(next),
    63  		}
    64  	})
    65  }
    66  
    67  func newASTMapperware(
    68  	confs ShardingConfigs,
    69  	next queryrangebase.Handler,
    70  	logger log.Logger,
    71  	metrics *logql.MapperMetrics,
    72  	limits Limits,
    73  ) *astMapperware {
    74  	return &astMapperware{
    75  		confs:   confs,
    76  		logger:  log.With(logger, "middleware", "QueryShard.astMapperware"),
    77  		limits:  limits,
    78  		next:    next,
    79  		ng:      logql.NewDownstreamEngine(logql.EngineOpts{}, DownstreamHandler{next: next, limits: limits}, limits, logger),
    80  		metrics: metrics,
    81  	}
    82  }
    83  
    84  type astMapperware struct {
    85  	confs   ShardingConfigs
    86  	logger  log.Logger
    87  	limits  Limits
    88  	next    queryrangebase.Handler
    89  	ng      *logql.DownstreamEngine
    90  	metrics *logql.MapperMetrics
    91  }
    92  
    93  func (ast *astMapperware) Do(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) {
    94  	conf, err := ast.confs.GetConf(r)
    95  	logger := util_log.WithContext(ctx, ast.logger)
    96  	// cannot shard with this timerange
    97  	if err != nil {
    98  		level.Warn(logger).Log("err", err.Error(), "msg", "skipped AST mapper for request")
    99  		return ast.next.Do(ctx, r)
   100  	}
   101  
   102  	userID, err := tenant.TenantID(ctx)
   103  	if err != nil {
   104  		return nil, err
   105  	}
   106  
   107  	resolver, ok := shardResolverForConf(
   108  		ctx,
   109  		conf,
   110  		ast.ng.Opts().MaxLookBackPeriod,
   111  		ast.logger,
   112  		ast.limits.MaxQueryParallelism(userID),
   113  		r,
   114  		ast.next,
   115  	)
   116  	if !ok {
   117  		return ast.next.Do(ctx, r)
   118  	}
   119  
   120  	mapper := logql.NewShardMapper(resolver, ast.metrics)
   121  	if err != nil {
   122  		return nil, err
   123  	}
   124  
   125  	noop, parsed, err := mapper.Parse(r.GetQuery())
   126  	if err != nil {
   127  		level.Warn(logger).Log("msg", "failed mapping AST", "err", err.Error(), "query", r.GetQuery())
   128  		return nil, err
   129  	}
   130  	level.Debug(logger).Log("no-op", noop, "mapped", parsed.String())
   131  
   132  	if noop {
   133  		// the ast can't be mapped to a sharded equivalent
   134  		// so we can bypass the sharding engine.
   135  		return ast.next.Do(ctx, r)
   136  	}
   137  
   138  	params, err := paramsFromRequest(r)
   139  	if err != nil {
   140  		return nil, err
   141  	}
   142  
   143  	var path string
   144  	switch r := r.(type) {
   145  	case *LokiRequest:
   146  		path = r.GetPath()
   147  	case *LokiInstantRequest:
   148  		path = r.GetPath()
   149  	default:
   150  		return nil, fmt.Errorf("expected *LokiRequest or *LokiInstantRequest, got (%T)", r)
   151  	}
   152  	query := ast.ng.Query(ctx, params, parsed)
   153  
   154  	res, err := query.Exec(ctx)
   155  	if err != nil {
   156  		return nil, err
   157  	}
   158  
   159  	value, err := marshal.NewResultValue(res.Data)
   160  	if err != nil {
   161  		return nil, err
   162  	}
   163  
   164  	switch res.Data.Type() {
   165  	case parser.ValueTypeMatrix:
   166  		return &LokiPromResponse{
   167  			Response: &queryrangebase.PrometheusResponse{
   168  				Status: loghttp.QueryStatusSuccess,
   169  				Data: queryrangebase.PrometheusData{
   170  					ResultType: loghttp.ResultTypeMatrix,
   171  					Result:     toProtoMatrix(value.(loghttp.Matrix)),
   172  				},
   173  			},
   174  			Statistics: res.Statistics,
   175  		}, nil
   176  	case logqlmodel.ValueTypeStreams:
   177  		return &LokiResponse{
   178  			Status:     loghttp.QueryStatusSuccess,
   179  			Direction:  params.Direction(),
   180  			Limit:      params.Limit(),
   181  			Version:    uint32(loghttp.GetVersion(path)),
   182  			Statistics: res.Statistics,
   183  			Data: LokiData{
   184  				ResultType: loghttp.ResultTypeStream,
   185  				Result:     value.(loghttp.Streams).ToProto(),
   186  			},
   187  		}, nil
   188  	case parser.ValueTypeVector:
   189  		return &LokiPromResponse{
   190  			Statistics: res.Statistics,
   191  			Response: &queryrangebase.PrometheusResponse{
   192  				Status: loghttp.QueryStatusSuccess,
   193  				Data: queryrangebase.PrometheusData{
   194  					ResultType: loghttp.ResultTypeVector,
   195  					Result:     toProtoVector(value.(loghttp.Vector)),
   196  				},
   197  			},
   198  		}, nil
   199  	default:
   200  		return nil, fmt.Errorf("unexpected downstream response type (%T)", res.Data.Type())
   201  	}
   202  }
   203  
   204  // shardSplitter middleware will only shard appropriate requests that do not extend past the MinShardingLookback interval.
   205  // This is used to send nonsharded requests to the ingesters in order to not overload them.
   206  // TODO(owen-d): export in cortex so we don't duplicate code
   207  type shardSplitter struct {
   208  	limits       Limits                 // delimiter for splitting sharded vs non-sharded queries
   209  	shardingware queryrangebase.Handler // handler for sharded queries
   210  	next         queryrangebase.Handler // handler for non-sharded queries
   211  	now          func() time.Time       // injectable time.Now
   212  }
   213  
   214  func (splitter *shardSplitter) Do(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) {
   215  	tenantIDs, err := tenant.TenantIDs(ctx)
   216  	if err != nil {
   217  		return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
   218  	}
   219  	minShardingLookback := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, splitter.limits.MinShardingLookback)
   220  	if minShardingLookback == 0 {
   221  		return splitter.shardingware.Do(ctx, r)
   222  	}
   223  	cutoff := splitter.now().Add(-minShardingLookback)
   224  	// Only attempt to shard queries which are older than the sharding lookback
   225  	// (the period for which ingesters are also queried) or when the lookback is disabled.
   226  	if minShardingLookback == 0 || util.TimeFromMillis(r.GetEnd()).Before(cutoff) {
   227  		return splitter.shardingware.Do(ctx, r)
   228  	}
   229  	return splitter.next.Do(ctx, r)
   230  }
   231  
   232  func hasShards(confs ShardingConfigs) bool {
   233  	for _, conf := range confs {
   234  		if conf.RowShards > 0 || conf.IndexType == config.TSDBType {
   235  			return true
   236  		}
   237  	}
   238  	return false
   239  }
   240  
   241  // ShardingConfigs is a slice of chunk shard configs
   242  type ShardingConfigs []config.PeriodConfig
   243  
   244  // ValidRange extracts a non-overlapping sharding configuration from a list of configs and a time range.
   245  func (confs ShardingConfigs) ValidRange(start, end int64) (config.PeriodConfig, error) {
   246  	for i, conf := range confs {
   247  		if start < int64(conf.From.Time) {
   248  			// the query starts before this config's range
   249  			return config.PeriodConfig{}, errInvalidShardingRange
   250  		} else if i == len(confs)-1 {
   251  			// the last configuration has no upper bound
   252  			return conf, nil
   253  		} else if end < int64(confs[i+1].From.Time) {
   254  			// The request is entirely scoped into this shard config
   255  			return conf, nil
   256  		} else {
   257  			continue
   258  		}
   259  	}
   260  
   261  	return config.PeriodConfig{}, errInvalidShardingRange
   262  }
   263  
   264  // GetConf will extract a shardable config corresponding to a request and the shardingconfigs
   265  func (confs ShardingConfigs) GetConf(r queryrangebase.Request) (config.PeriodConfig, error) {
   266  	conf, err := confs.ValidRange(r.GetStart(), r.GetEnd())
   267  	// query exists across multiple sharding configs
   268  	if err != nil {
   269  		return conf, err
   270  	}
   271  
   272  	// query doesn't have shard factor, so don't try to do AST mapping.
   273  	if conf.RowShards < 2 && conf.IndexType != config.TSDBType {
   274  		return conf, errors.Errorf("shard factor not high enough: [%d]", conf.RowShards)
   275  	}
   276  
   277  	return conf, nil
   278  }
   279  
   280  // NewSeriesQueryShardMiddleware creates a middleware which shards series queries.
   281  func NewSeriesQueryShardMiddleware(
   282  	logger log.Logger,
   283  	confs ShardingConfigs,
   284  	middlewareMetrics *queryrangebase.InstrumentMiddlewareMetrics,
   285  	shardingMetrics *logql.MapperMetrics,
   286  	limits queryrangebase.Limits,
   287  	merger queryrangebase.Merger,
   288  ) queryrangebase.Middleware {
   289  	noshards := !hasShards(confs)
   290  
   291  	if noshards {
   292  		level.Warn(logger).Log(
   293  			"middleware", "QueryShard",
   294  			"msg", "no configuration with shard found",
   295  			"confs", fmt.Sprintf("%+v", confs),
   296  		)
   297  		return queryrangebase.PassthroughMiddleware
   298  	}
   299  	return queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler {
   300  		return queryrangebase.InstrumentMiddleware("sharding", middlewareMetrics).Wrap(
   301  			&seriesShardingHandler{
   302  				confs:   confs,
   303  				logger:  logger,
   304  				next:    next,
   305  				metrics: shardingMetrics,
   306  				limits:  limits,
   307  				merger:  merger,
   308  			},
   309  		)
   310  	})
   311  }
   312  
   313  type seriesShardingHandler struct {
   314  	confs   ShardingConfigs
   315  	logger  log.Logger
   316  	next    queryrangebase.Handler
   317  	metrics *logql.MapperMetrics
   318  	limits  queryrangebase.Limits
   319  	merger  queryrangebase.Merger
   320  }
   321  
   322  func (ss *seriesShardingHandler) Do(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) {
   323  	conf, err := ss.confs.GetConf(r)
   324  	// cannot shard with this timerange
   325  	if err != nil {
   326  		level.Warn(ss.logger).Log("err", err.Error(), "msg", "skipped sharding for request")
   327  		return ss.next.Do(ctx, r)
   328  	}
   329  
   330  	req, ok := r.(*LokiSeriesRequest)
   331  	if !ok {
   332  		return nil, fmt.Errorf("expected *LokiSeriesRequest, got (%T)", r)
   333  	}
   334  
   335  	ss.metrics.DownstreamQueries.WithLabelValues("series").Inc()
   336  	ss.metrics.DownstreamFactor.Observe(float64(conf.RowShards))
   337  
   338  	requests := make([]queryrangebase.Request, 0, conf.RowShards)
   339  	for i := 0; i < int(conf.RowShards); i++ {
   340  		shardedRequest := *req
   341  		shardedRequest.Shards = []string{astmapper.ShardAnnotation{
   342  			Shard: i,
   343  			Of:    int(conf.RowShards),
   344  		}.String()}
   345  		requests = append(requests, &shardedRequest)
   346  	}
   347  	requestResponses, err := queryrangebase.DoRequests(ctx, ss.next, requests, ss.limits)
   348  	if err != nil {
   349  		return nil, err
   350  	}
   351  	responses := make([]queryrangebase.Response, 0, len(requestResponses))
   352  	for _, res := range requestResponses {
   353  		responses = append(responses, res.Response)
   354  	}
   355  	return ss.merger.MergeResponse(responses...)
   356  }