github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/querier/queryrange/querysharding.go (about) 1 package queryrange 2 3 import ( 4 "context" 5 "fmt" 6 "net/http" 7 "time" 8 9 "github.com/go-kit/log" 10 "github.com/go-kit/log/level" 11 "github.com/pkg/errors" 12 "github.com/prometheus/prometheus/promql/parser" 13 "github.com/weaveworks/common/httpgrpc" 14 15 "github.com/grafana/dskit/tenant" 16 17 "github.com/grafana/loki/pkg/loghttp" 18 "github.com/grafana/loki/pkg/logql" 19 "github.com/grafana/loki/pkg/logqlmodel" 20 "github.com/grafana/loki/pkg/querier/astmapper" 21 "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" 22 "github.com/grafana/loki/pkg/storage/config" 23 "github.com/grafana/loki/pkg/util" 24 util_log "github.com/grafana/loki/pkg/util/log" 25 "github.com/grafana/loki/pkg/util/marshal" 26 "github.com/grafana/loki/pkg/util/validation" 27 ) 28 29 var errInvalidShardingRange = errors.New("Query does not fit in a single sharding configuration") 30 31 // NewQueryShardMiddleware creates a middleware which downstreams queries after AST mapping and query encoding. 32 func NewQueryShardMiddleware( 33 logger log.Logger, 34 confs ShardingConfigs, 35 middlewareMetrics *queryrangebase.InstrumentMiddlewareMetrics, 36 shardingMetrics *logql.MapperMetrics, 37 limits Limits, 38 ) queryrangebase.Middleware { 39 noshards := !hasShards(confs) 40 41 if noshards { 42 level.Warn(logger).Log( 43 "middleware", "QueryShard", 44 "msg", "no configuration with shard found", 45 "confs", fmt.Sprintf("%+v", confs), 46 ) 47 return queryrangebase.PassthroughMiddleware 48 } 49 50 mapperware := queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler { 51 return newASTMapperware(confs, next, logger, shardingMetrics, limits) 52 }) 53 54 return queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler { 55 return &shardSplitter{ 56 limits: limits, 57 shardingware: queryrangebase.MergeMiddlewares( 58 queryrangebase.InstrumentMiddleware("shardingware", middlewareMetrics), 59 mapperware, 60 ).Wrap(next), 61 now: time.Now, 62 next: queryrangebase.InstrumentMiddleware("sharding-bypass", middlewareMetrics).Wrap(next), 63 } 64 }) 65 } 66 67 func newASTMapperware( 68 confs ShardingConfigs, 69 next queryrangebase.Handler, 70 logger log.Logger, 71 metrics *logql.MapperMetrics, 72 limits Limits, 73 ) *astMapperware { 74 return &astMapperware{ 75 confs: confs, 76 logger: log.With(logger, "middleware", "QueryShard.astMapperware"), 77 limits: limits, 78 next: next, 79 ng: logql.NewDownstreamEngine(logql.EngineOpts{}, DownstreamHandler{next: next, limits: limits}, limits, logger), 80 metrics: metrics, 81 } 82 } 83 84 type astMapperware struct { 85 confs ShardingConfigs 86 logger log.Logger 87 limits Limits 88 next queryrangebase.Handler 89 ng *logql.DownstreamEngine 90 metrics *logql.MapperMetrics 91 } 92 93 func (ast *astMapperware) Do(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) { 94 conf, err := ast.confs.GetConf(r) 95 logger := util_log.WithContext(ctx, ast.logger) 96 // cannot shard with this timerange 97 if err != nil { 98 level.Warn(logger).Log("err", err.Error(), "msg", "skipped AST mapper for request") 99 return ast.next.Do(ctx, r) 100 } 101 102 userID, err := tenant.TenantID(ctx) 103 if err != nil { 104 return nil, err 105 } 106 107 resolver, ok := shardResolverForConf( 108 ctx, 109 conf, 110 ast.ng.Opts().MaxLookBackPeriod, 111 ast.logger, 112 ast.limits.MaxQueryParallelism(userID), 113 r, 114 ast.next, 115 ) 116 if !ok { 117 return ast.next.Do(ctx, r) 118 } 119 120 mapper := logql.NewShardMapper(resolver, ast.metrics) 121 if err != nil { 122 return nil, err 123 } 124 125 noop, parsed, err := mapper.Parse(r.GetQuery()) 126 if err != nil { 127 level.Warn(logger).Log("msg", "failed mapping AST", "err", err.Error(), "query", r.GetQuery()) 128 return nil, err 129 } 130 level.Debug(logger).Log("no-op", noop, "mapped", parsed.String()) 131 132 if noop { 133 // the ast can't be mapped to a sharded equivalent 134 // so we can bypass the sharding engine. 135 return ast.next.Do(ctx, r) 136 } 137 138 params, err := paramsFromRequest(r) 139 if err != nil { 140 return nil, err 141 } 142 143 var path string 144 switch r := r.(type) { 145 case *LokiRequest: 146 path = r.GetPath() 147 case *LokiInstantRequest: 148 path = r.GetPath() 149 default: 150 return nil, fmt.Errorf("expected *LokiRequest or *LokiInstantRequest, got (%T)", r) 151 } 152 query := ast.ng.Query(ctx, params, parsed) 153 154 res, err := query.Exec(ctx) 155 if err != nil { 156 return nil, err 157 } 158 159 value, err := marshal.NewResultValue(res.Data) 160 if err != nil { 161 return nil, err 162 } 163 164 switch res.Data.Type() { 165 case parser.ValueTypeMatrix: 166 return &LokiPromResponse{ 167 Response: &queryrangebase.PrometheusResponse{ 168 Status: loghttp.QueryStatusSuccess, 169 Data: queryrangebase.PrometheusData{ 170 ResultType: loghttp.ResultTypeMatrix, 171 Result: toProtoMatrix(value.(loghttp.Matrix)), 172 }, 173 }, 174 Statistics: res.Statistics, 175 }, nil 176 case logqlmodel.ValueTypeStreams: 177 return &LokiResponse{ 178 Status: loghttp.QueryStatusSuccess, 179 Direction: params.Direction(), 180 Limit: params.Limit(), 181 Version: uint32(loghttp.GetVersion(path)), 182 Statistics: res.Statistics, 183 Data: LokiData{ 184 ResultType: loghttp.ResultTypeStream, 185 Result: value.(loghttp.Streams).ToProto(), 186 }, 187 }, nil 188 case parser.ValueTypeVector: 189 return &LokiPromResponse{ 190 Statistics: res.Statistics, 191 Response: &queryrangebase.PrometheusResponse{ 192 Status: loghttp.QueryStatusSuccess, 193 Data: queryrangebase.PrometheusData{ 194 ResultType: loghttp.ResultTypeVector, 195 Result: toProtoVector(value.(loghttp.Vector)), 196 }, 197 }, 198 }, nil 199 default: 200 return nil, fmt.Errorf("unexpected downstream response type (%T)", res.Data.Type()) 201 } 202 } 203 204 // shardSplitter middleware will only shard appropriate requests that do not extend past the MinShardingLookback interval. 205 // This is used to send nonsharded requests to the ingesters in order to not overload them. 206 // TODO(owen-d): export in cortex so we don't duplicate code 207 type shardSplitter struct { 208 limits Limits // delimiter for splitting sharded vs non-sharded queries 209 shardingware queryrangebase.Handler // handler for sharded queries 210 next queryrangebase.Handler // handler for non-sharded queries 211 now func() time.Time // injectable time.Now 212 } 213 214 func (splitter *shardSplitter) Do(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) { 215 tenantIDs, err := tenant.TenantIDs(ctx) 216 if err != nil { 217 return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error()) 218 } 219 minShardingLookback := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, splitter.limits.MinShardingLookback) 220 if minShardingLookback == 0 { 221 return splitter.shardingware.Do(ctx, r) 222 } 223 cutoff := splitter.now().Add(-minShardingLookback) 224 // Only attempt to shard queries which are older than the sharding lookback 225 // (the period for which ingesters are also queried) or when the lookback is disabled. 226 if minShardingLookback == 0 || util.TimeFromMillis(r.GetEnd()).Before(cutoff) { 227 return splitter.shardingware.Do(ctx, r) 228 } 229 return splitter.next.Do(ctx, r) 230 } 231 232 func hasShards(confs ShardingConfigs) bool { 233 for _, conf := range confs { 234 if conf.RowShards > 0 || conf.IndexType == config.TSDBType { 235 return true 236 } 237 } 238 return false 239 } 240 241 // ShardingConfigs is a slice of chunk shard configs 242 type ShardingConfigs []config.PeriodConfig 243 244 // ValidRange extracts a non-overlapping sharding configuration from a list of configs and a time range. 245 func (confs ShardingConfigs) ValidRange(start, end int64) (config.PeriodConfig, error) { 246 for i, conf := range confs { 247 if start < int64(conf.From.Time) { 248 // the query starts before this config's range 249 return config.PeriodConfig{}, errInvalidShardingRange 250 } else if i == len(confs)-1 { 251 // the last configuration has no upper bound 252 return conf, nil 253 } else if end < int64(confs[i+1].From.Time) { 254 // The request is entirely scoped into this shard config 255 return conf, nil 256 } else { 257 continue 258 } 259 } 260 261 return config.PeriodConfig{}, errInvalidShardingRange 262 } 263 264 // GetConf will extract a shardable config corresponding to a request and the shardingconfigs 265 func (confs ShardingConfigs) GetConf(r queryrangebase.Request) (config.PeriodConfig, error) { 266 conf, err := confs.ValidRange(r.GetStart(), r.GetEnd()) 267 // query exists across multiple sharding configs 268 if err != nil { 269 return conf, err 270 } 271 272 // query doesn't have shard factor, so don't try to do AST mapping. 273 if conf.RowShards < 2 && conf.IndexType != config.TSDBType { 274 return conf, errors.Errorf("shard factor not high enough: [%d]", conf.RowShards) 275 } 276 277 return conf, nil 278 } 279 280 // NewSeriesQueryShardMiddleware creates a middleware which shards series queries. 281 func NewSeriesQueryShardMiddleware( 282 logger log.Logger, 283 confs ShardingConfigs, 284 middlewareMetrics *queryrangebase.InstrumentMiddlewareMetrics, 285 shardingMetrics *logql.MapperMetrics, 286 limits queryrangebase.Limits, 287 merger queryrangebase.Merger, 288 ) queryrangebase.Middleware { 289 noshards := !hasShards(confs) 290 291 if noshards { 292 level.Warn(logger).Log( 293 "middleware", "QueryShard", 294 "msg", "no configuration with shard found", 295 "confs", fmt.Sprintf("%+v", confs), 296 ) 297 return queryrangebase.PassthroughMiddleware 298 } 299 return queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler { 300 return queryrangebase.InstrumentMiddleware("sharding", middlewareMetrics).Wrap( 301 &seriesShardingHandler{ 302 confs: confs, 303 logger: logger, 304 next: next, 305 metrics: shardingMetrics, 306 limits: limits, 307 merger: merger, 308 }, 309 ) 310 }) 311 } 312 313 type seriesShardingHandler struct { 314 confs ShardingConfigs 315 logger log.Logger 316 next queryrangebase.Handler 317 metrics *logql.MapperMetrics 318 limits queryrangebase.Limits 319 merger queryrangebase.Merger 320 } 321 322 func (ss *seriesShardingHandler) Do(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) { 323 conf, err := ss.confs.GetConf(r) 324 // cannot shard with this timerange 325 if err != nil { 326 level.Warn(ss.logger).Log("err", err.Error(), "msg", "skipped sharding for request") 327 return ss.next.Do(ctx, r) 328 } 329 330 req, ok := r.(*LokiSeriesRequest) 331 if !ok { 332 return nil, fmt.Errorf("expected *LokiSeriesRequest, got (%T)", r) 333 } 334 335 ss.metrics.DownstreamQueries.WithLabelValues("series").Inc() 336 ss.metrics.DownstreamFactor.Observe(float64(conf.RowShards)) 337 338 requests := make([]queryrangebase.Request, 0, conf.RowShards) 339 for i := 0; i < int(conf.RowShards); i++ { 340 shardedRequest := *req 341 shardedRequest.Shards = []string{astmapper.ShardAnnotation{ 342 Shard: i, 343 Of: int(conf.RowShards), 344 }.String()} 345 requests = append(requests, &shardedRequest) 346 } 347 requestResponses, err := queryrangebase.DoRequests(ctx, ss.next, requests, ss.limits) 348 if err != nil { 349 return nil, err 350 } 351 responses := make([]queryrangebase.Response, 0, len(requestResponses)) 352 for _, res := range requestResponses { 353 responses = append(responses, res.Response) 354 } 355 return ss.merger.MergeResponse(responses...) 356 }