github.com/thanos-io/thanos@v0.32.5/pkg/query/querier.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package query 5 6 import ( 7 "context" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/go-kit/log" 13 "github.com/opentracing/opentracing-go" 14 "github.com/pkg/errors" 15 "github.com/prometheus/client_golang/prometheus" 16 "github.com/prometheus/common/model" 17 "github.com/prometheus/prometheus/model/labels" 18 "github.com/prometheus/prometheus/storage" 19 "github.com/thanos-io/thanos/pkg/dedup" 20 "github.com/thanos-io/thanos/pkg/extprom" 21 "github.com/thanos-io/thanos/pkg/gate" 22 "github.com/thanos-io/thanos/pkg/store" 23 "github.com/thanos-io/thanos/pkg/store/storepb" 24 "github.com/thanos-io/thanos/pkg/tenancy" 25 "github.com/thanos-io/thanos/pkg/tracing" 26 ) 27 28 type seriesStatsReporter func(seriesStats storepb.SeriesStatsCounter) 29 30 var NoopSeriesStatsReporter seriesStatsReporter = func(_ storepb.SeriesStatsCounter) {} 31 32 func NewAggregateStatsReporter(stats *[]storepb.SeriesStatsCounter) seriesStatsReporter { 33 var mutex sync.Mutex 34 return func(s storepb.SeriesStatsCounter) { 35 mutex.Lock() 36 defer mutex.Unlock() 37 *stats = append(*stats, s) 38 } 39 } 40 41 // QueryableCreator returns implementation of promql.Queryable that fetches data from the proxy store API endpoints. 42 // If deduplication is enabled, all data retrieved from it will be deduplicated along all replicaLabels by default. 43 // When the replicaLabels argument is not empty it overwrites the global replicaLabels flag. This allows specifying 44 // replicaLabels at query time. 45 // maxResolutionMillis controls downsampling resolution that is allowed (specified in milliseconds). 46 // partialResponse controls `partialResponseDisabled` option of StoreAPI and partial response behavior of proxy. 47 type QueryableCreator func( 48 deduplicate bool, 49 replicaLabels []string, 50 storeDebugMatchers [][]*labels.Matcher, 51 maxResolutionMillis int64, 52 partialResponse, 53 enableQueryPushdown, 54 skipChunks bool, 55 shardInfo *storepb.ShardInfo, 56 seriesStatsReporter seriesStatsReporter, 57 ) storage.Queryable 58 59 // NewQueryableCreator creates QueryableCreator. 60 // NOTE(bwplotka): Proxy assumes to be replica_aware, see thanos.store.info.StoreInfo.replica_aware field. 61 func NewQueryableCreator( 62 logger log.Logger, 63 reg prometheus.Registerer, 64 proxy storepb.StoreServer, 65 maxConcurrentSelects int, 66 selectTimeout time.Duration, 67 ) QueryableCreator { 68 gf := gate.NewGateFactory(extprom.WrapRegistererWithPrefix("concurrent_selects_", reg), maxConcurrentSelects, gate.Selects) 69 70 return func( 71 deduplicate bool, 72 replicaLabels []string, 73 storeDebugMatchers [][]*labels.Matcher, 74 maxResolutionMillis int64, 75 partialResponse, 76 enableQueryPushdown, 77 skipChunks bool, 78 shardInfo *storepb.ShardInfo, 79 seriesStatsReporter seriesStatsReporter, 80 ) storage.Queryable { 81 return &queryable{ 82 logger: logger, 83 replicaLabels: replicaLabels, 84 storeDebugMatchers: storeDebugMatchers, 85 proxy: proxy, 86 deduplicate: deduplicate, 87 maxResolutionMillis: maxResolutionMillis, 88 partialResponse: partialResponse, 89 skipChunks: skipChunks, 90 gateProviderFn: func() gate.Gate { 91 return gf.New() 92 }, 93 maxConcurrentSelects: maxConcurrentSelects, 94 selectTimeout: selectTimeout, 95 enableQueryPushdown: enableQueryPushdown, 96 shardInfo: shardInfo, 97 seriesStatsReporter: seriesStatsReporter, 98 } 99 } 100 } 101 102 type queryable struct { 103 logger log.Logger 104 replicaLabels []string 105 storeDebugMatchers [][]*labels.Matcher 106 proxy storepb.StoreServer 107 deduplicate bool 108 maxResolutionMillis int64 109 partialResponse bool 110 skipChunks bool 111 gateProviderFn func() gate.Gate 112 maxConcurrentSelects int 113 selectTimeout time.Duration 114 enableQueryPushdown bool 115 shardInfo *storepb.ShardInfo 116 seriesStatsReporter seriesStatsReporter 117 } 118 119 // Querier returns a new storage querier against the underlying proxy store API. 120 func (q *queryable) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) { 121 return newQuerier(ctx, q.logger, mint, maxt, q.replicaLabels, q.storeDebugMatchers, q.proxy, q.deduplicate, q.maxResolutionMillis, q.partialResponse, q.enableQueryPushdown, q.skipChunks, q.gateProviderFn(), q.selectTimeout, q.shardInfo, q.seriesStatsReporter), nil 122 } 123 124 type querier struct { 125 ctx context.Context 126 logger log.Logger 127 cancel func() 128 mint, maxt int64 129 replicaLabels []string 130 storeDebugMatchers [][]*labels.Matcher 131 proxy storepb.StoreServer 132 deduplicate bool 133 maxResolutionMillis int64 134 partialResponseStrategy storepb.PartialResponseStrategy 135 enableQueryPushdown bool 136 skipChunks bool 137 selectGate gate.Gate 138 selectTimeout time.Duration 139 shardInfo *storepb.ShardInfo 140 seriesStatsReporter seriesStatsReporter 141 } 142 143 // newQuerier creates implementation of storage.Querier that fetches data from the proxy 144 // store API endpoints. 145 func newQuerier( 146 ctx context.Context, 147 logger log.Logger, 148 mint, 149 maxt int64, 150 replicaLabels []string, 151 storeDebugMatchers [][]*labels.Matcher, 152 proxy storepb.StoreServer, 153 deduplicate bool, 154 maxResolutionMillis int64, 155 partialResponse, 156 enableQueryPushdown, 157 skipChunks bool, 158 selectGate gate.Gate, 159 selectTimeout time.Duration, 160 shardInfo *storepb.ShardInfo, 161 seriesStatsReporter seriesStatsReporter, 162 ) *querier { 163 if logger == nil { 164 logger = log.NewNopLogger() 165 } 166 ctx, cancel := context.WithCancel(ctx) 167 168 rl := make(map[string]struct{}) 169 for _, replicaLabel := range replicaLabels { 170 rl[replicaLabel] = struct{}{} 171 } 172 173 partialResponseStrategy := storepb.PartialResponseStrategy_ABORT 174 if partialResponse { 175 partialResponseStrategy = storepb.PartialResponseStrategy_WARN 176 } 177 return &querier{ 178 ctx: ctx, 179 logger: logger, 180 cancel: cancel, 181 selectGate: selectGate, 182 selectTimeout: selectTimeout, 183 184 mint: mint, 185 maxt: maxt, 186 replicaLabels: replicaLabels, 187 storeDebugMatchers: storeDebugMatchers, 188 proxy: proxy, 189 deduplicate: deduplicate, 190 maxResolutionMillis: maxResolutionMillis, 191 partialResponseStrategy: partialResponseStrategy, 192 skipChunks: skipChunks, 193 enableQueryPushdown: enableQueryPushdown, 194 shardInfo: shardInfo, 195 seriesStatsReporter: seriesStatsReporter, 196 } 197 } 198 199 func (q *querier) isDedupEnabled() bool { 200 return q.deduplicate && len(q.replicaLabels) > 0 201 } 202 203 type seriesServer struct { 204 // This field just exist to pseudo-implement the unused methods of the interface. 205 storepb.Store_SeriesServer 206 ctx context.Context 207 208 seriesSet []storepb.Series 209 seriesSetStats storepb.SeriesStatsCounter 210 warnings []string 211 } 212 213 func (s *seriesServer) Send(r *storepb.SeriesResponse) error { 214 if r.GetWarning() != "" { 215 s.warnings = append(s.warnings, r.GetWarning()) 216 return nil 217 } 218 219 if r.GetSeries() != nil { 220 s.seriesSet = append(s.seriesSet, *r.GetSeries()) 221 s.seriesSetStats.Count(r.GetSeries()) 222 return nil 223 } 224 225 // Unsupported field, skip. 226 return nil 227 } 228 229 func (s *seriesServer) Context() context.Context { 230 return s.ctx 231 } 232 233 // aggrsFromFunc infers aggregates of the underlying data based on the wrapping 234 // function of a series selection. 235 func aggrsFromFunc(f string) []storepb.Aggr { 236 if f == "min" || strings.HasPrefix(f, "min_") { 237 return []storepb.Aggr{storepb.Aggr_MIN} 238 } 239 if f == "max" || strings.HasPrefix(f, "max_") { 240 return []storepb.Aggr{storepb.Aggr_MAX} 241 } 242 if f == "count" || strings.HasPrefix(f, "count_") { 243 return []storepb.Aggr{storepb.Aggr_COUNT} 244 } 245 // f == "sum" falls through here since we want the actual samples. 246 if strings.HasPrefix(f, "sum_") { 247 return []storepb.Aggr{storepb.Aggr_SUM} 248 } 249 if f == "increase" || f == "rate" || f == "irate" || f == "resets" { 250 return []storepb.Aggr{storepb.Aggr_COUNTER} 251 } 252 // In the default case, we retrieve count and sum to compute an average. 253 return []storepb.Aggr{storepb.Aggr_COUNT, storepb.Aggr_SUM} 254 } 255 256 func storeHintsFromPromHints(hints *storage.SelectHints) *storepb.QueryHints { 257 return &storepb.QueryHints{ 258 StepMillis: hints.Step, 259 Func: &storepb.Func{ 260 Name: hints.Func, 261 }, 262 Grouping: &storepb.Grouping{ 263 By: hints.By, 264 Labels: hints.Grouping, 265 }, 266 Range: &storepb.Range{Millis: hints.Range}, 267 } 268 } 269 270 func (q *querier) Select(_ bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.SeriesSet { 271 if hints == nil { 272 hints = &storage.SelectHints{ 273 Start: q.mint, 274 End: q.maxt, 275 } 276 } 277 278 matchers := make([]string, len(ms)) 279 for i, m := range ms { 280 matchers[i] = m.String() 281 } 282 283 // The querier has a context, but it gets canceled as soon as query evaluation is completed by the engine. 284 // We want to prevent this from happening for the async store API calls we make while preserving tracing context. 285 // TODO(bwplotka): Does the above still is true? It feels weird to leave unfinished calls behind query API. 286 ctx := tracing.CopyTraceContext(context.Background(), q.ctx) 287 ctx, cancel := context.WithTimeout(ctx, q.selectTimeout) 288 span, ctx := tracing.StartSpan(ctx, "querier_select", opentracing.Tags{ 289 "minTime": hints.Start, 290 "maxTime": hints.End, 291 "matchers": "{" + strings.Join(matchers, ",") + "}", 292 }) 293 294 promise := make(chan storage.SeriesSet, 1) 295 go func() { 296 defer close(promise) 297 298 var err error 299 tracing.DoInSpan(ctx, "querier_select_gate_ismyturn", func(ctx context.Context) { 300 err = q.selectGate.Start(ctx) 301 }) 302 if err != nil { 303 promise <- storage.ErrSeriesSet(errors.Wrap(err, "failed to wait for turn")) 304 return 305 } 306 defer q.selectGate.Done() 307 308 span, ctx := tracing.StartSpan(ctx, "querier_select_select_fn") 309 defer span.Finish() 310 311 set, stats, err := q.selectFn(ctx, hints, ms...) 312 if err != nil { 313 promise <- storage.ErrSeriesSet(err) 314 return 315 } 316 q.seriesStatsReporter(stats) 317 318 promise <- set 319 }() 320 321 return &lazySeriesSet{create: func() (storage.SeriesSet, bool) { 322 defer cancel() 323 defer span.Finish() 324 325 // Only gets called once, for the first Next() call of the series set. 326 set, ok := <-promise 327 if !ok { 328 return storage.ErrSeriesSet(errors.New("channel closed before a value received")), false 329 } 330 return set, set.Next() 331 }} 332 } 333 334 func (q *querier) selectFn(ctx context.Context, hints *storage.SelectHints, ms ...*labels.Matcher) (storage.SeriesSet, storepb.SeriesStatsCounter, error) { 335 sms, err := storepb.PromMatchersToMatchers(ms...) 336 if err != nil { 337 return nil, storepb.SeriesStatsCounter{}, errors.Wrap(err, "convert matchers") 338 } 339 340 aggrs := aggrsFromFunc(hints.Func) 341 342 // TODO(bwplotka): Pass it using the SeriesRequest instead of relying on context. 343 ctx = context.WithValue(ctx, store.StoreMatcherKey, q.storeDebugMatchers) 344 ctx = context.WithValue(ctx, tenancy.TenantKey, q.ctx.Value(tenancy.TenantKey)) 345 346 // TODO(bwplotka): Use inprocess gRPC when we want to stream responses. 347 // Currently streaming won't help due to nature of the both PromQL engine which 348 // pulls all series before computations anyway. 349 resp := &seriesServer{ctx: ctx} 350 req := storepb.SeriesRequest{ 351 MinTime: hints.Start, 352 MaxTime: hints.End, 353 Matchers: sms, 354 MaxResolutionWindow: q.maxResolutionMillis, 355 Aggregates: aggrs, 356 ShardInfo: q.shardInfo, 357 PartialResponseStrategy: q.partialResponseStrategy, 358 SkipChunks: q.skipChunks, 359 } 360 if q.enableQueryPushdown { 361 req.QueryHints = storeHintsFromPromHints(hints) 362 } 363 if q.isDedupEnabled() { 364 // Soft ask to sort without replica labels and push them at the end of labelset. 365 req.WithoutReplicaLabels = q.replicaLabels 366 } 367 368 if err := q.proxy.Series(&req, resp); err != nil { 369 return nil, storepb.SeriesStatsCounter{}, errors.Wrap(err, "proxy Series()") 370 } 371 372 var warns storage.Warnings 373 for _, w := range resp.warnings { 374 warns = append(warns, errors.New(w)) 375 } 376 377 if q.enableQueryPushdown && (hints.Func == "max_over_time" || hints.Func == "min_over_time") { 378 // On query pushdown, delete the metric's name from the result because that's what the 379 // PromQL does either way, and we want our iterator to work with data 380 // that was either pushed down or not. 381 for i := range resp.seriesSet { 382 lbls := resp.seriesSet[i].Labels 383 for j, lbl := range lbls { 384 if lbl.Name != model.MetricNameLabel { 385 continue 386 } 387 resp.seriesSet[i].Labels = append(resp.seriesSet[i].Labels[:j], resp.seriesSet[i].Labels[j+1:]...) 388 break 389 } 390 } 391 } 392 393 if !q.isDedupEnabled() { 394 return &promSeriesSet{ 395 mint: q.mint, 396 maxt: q.maxt, 397 set: newStoreSeriesSet(resp.seriesSet), 398 aggrs: aggrs, 399 warns: warns, 400 }, resp.seriesSetStats, nil 401 } 402 403 // TODO(bwplotka): Move to deduplication on chunk level inside promSeriesSet, similar to what we have in dedup.NewDedupChunkMerger(). 404 // This however require big refactor, caring about correct AggrChunk to iterator conversion, pushdown logic and counter reset apply. 405 // For now we apply simple logic that splits potential overlapping chunks into separate replica series, so we can split the work. 406 set := &promSeriesSet{ 407 mint: q.mint, 408 maxt: q.maxt, 409 set: dedup.NewOverlapSplit(newStoreSeriesSet(resp.seriesSet)), 410 aggrs: aggrs, 411 warns: warns, 412 } 413 414 return dedup.NewSeriesSet(set, hints.Func, q.enableQueryPushdown), resp.seriesSetStats, nil 415 } 416 417 // LabelValues returns all potential values for a label name. 418 func (q *querier) LabelValues(name string, matchers ...*labels.Matcher) ([]string, storage.Warnings, error) { 419 span, ctx := tracing.StartSpan(q.ctx, "querier_label_values") 420 defer span.Finish() 421 422 // TODO(bwplotka): Pass it using the SeriesRequest instead of relying on context. 423 ctx = context.WithValue(ctx, store.StoreMatcherKey, q.storeDebugMatchers) 424 ctx = context.WithValue(ctx, tenancy.TenantKey, q.ctx.Value(tenancy.TenantKey)) 425 426 pbMatchers, err := storepb.PromMatchersToMatchers(matchers...) 427 if err != nil { 428 return nil, nil, errors.Wrap(err, "converting prom matchers to storepb matchers") 429 } 430 431 resp, err := q.proxy.LabelValues(ctx, &storepb.LabelValuesRequest{ 432 Label: name, 433 PartialResponseStrategy: q.partialResponseStrategy, 434 Start: q.mint, 435 End: q.maxt, 436 Matchers: pbMatchers, 437 }) 438 if err != nil { 439 return nil, nil, errors.Wrap(err, "proxy LabelValues()") 440 } 441 442 var warns storage.Warnings 443 for _, w := range resp.Warnings { 444 warns = append(warns, errors.New(w)) 445 } 446 447 return resp.Values, warns, nil 448 } 449 450 // LabelNames returns all the unique label names present in the block in sorted order constrained 451 // by the given matchers. 452 func (q *querier) LabelNames(matchers ...*labels.Matcher) ([]string, storage.Warnings, error) { 453 span, ctx := tracing.StartSpan(q.ctx, "querier_label_names") 454 defer span.Finish() 455 456 // TODO(bwplotka): Pass it using the SeriesRequest instead of relying on context. 457 ctx = context.WithValue(ctx, store.StoreMatcherKey, q.storeDebugMatchers) 458 ctx = context.WithValue(ctx, tenancy.TenantKey, q.ctx.Value(tenancy.TenantKey)) 459 460 pbMatchers, err := storepb.PromMatchersToMatchers(matchers...) 461 if err != nil { 462 return nil, nil, errors.Wrap(err, "converting prom matchers to storepb matchers") 463 } 464 465 resp, err := q.proxy.LabelNames(ctx, &storepb.LabelNamesRequest{ 466 PartialResponseStrategy: q.partialResponseStrategy, 467 Start: q.mint, 468 End: q.maxt, 469 Matchers: pbMatchers, 470 }) 471 if err != nil { 472 return nil, nil, errors.Wrap(err, "proxy LabelNames()") 473 } 474 475 var warns storage.Warnings 476 for _, w := range resp.Warnings { 477 warns = append(warns, errors.New(w)) 478 } 479 480 return resp.Names, warns, nil 481 } 482 483 func (q *querier) Close() error { 484 q.cancel() 485 return nil 486 }