github.com/thanos-io/thanos@v0.32.5/pkg/query/querier.go

github.com/thanos-io/thanos@v0.32.5/pkg/query/querier.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package query
     5  
     6  import (
     7  	"context"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/go-kit/log"
    13  	"github.com/opentracing/opentracing-go"
    14  	"github.com/pkg/errors"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  	"github.com/prometheus/common/model"
    17  	"github.com/prometheus/prometheus/model/labels"
    18  	"github.com/prometheus/prometheus/storage"
    19  	"github.com/thanos-io/thanos/pkg/dedup"
    20  	"github.com/thanos-io/thanos/pkg/extprom"
    21  	"github.com/thanos-io/thanos/pkg/gate"
    22  	"github.com/thanos-io/thanos/pkg/store"
    23  	"github.com/thanos-io/thanos/pkg/store/storepb"
    24  	"github.com/thanos-io/thanos/pkg/tenancy"
    25  	"github.com/thanos-io/thanos/pkg/tracing"
    26  )
    27  
    28  type seriesStatsReporter func(seriesStats storepb.SeriesStatsCounter)
    29  
    30  var NoopSeriesStatsReporter seriesStatsReporter = func(_ storepb.SeriesStatsCounter) {}
    31  
    32  func NewAggregateStatsReporter(stats *[]storepb.SeriesStatsCounter) seriesStatsReporter {
    33  	var mutex sync.Mutex
    34  	return func(s storepb.SeriesStatsCounter) {
    35  		mutex.Lock()
    36  		defer mutex.Unlock()
    37  		*stats = append(*stats, s)
    38  	}
    39  }
    40  
    41  // QueryableCreator returns implementation of promql.Queryable that fetches data from the proxy store API endpoints.
    42  // If deduplication is enabled, all data retrieved from it will be deduplicated along all replicaLabels by default.
    43  // When the replicaLabels argument is not empty it overwrites the global replicaLabels flag. This allows specifying
    44  // replicaLabels at query time.
    45  // maxResolutionMillis controls downsampling resolution that is allowed (specified in milliseconds).
    46  // partialResponse controls `partialResponseDisabled` option of StoreAPI and partial response behavior of proxy.
    47  type QueryableCreator func(
    48  	deduplicate bool,
    49  	replicaLabels []string,
    50  	storeDebugMatchers [][]*labels.Matcher,
    51  	maxResolutionMillis int64,
    52  	partialResponse,
    53  	enableQueryPushdown,
    54  	skipChunks bool,
    55  	shardInfo *storepb.ShardInfo,
    56  	seriesStatsReporter seriesStatsReporter,
    57  ) storage.Queryable
    58  
    59  // NewQueryableCreator creates QueryableCreator.
    60  // NOTE(bwplotka): Proxy assumes to be replica_aware, see thanos.store.info.StoreInfo.replica_aware field.
    61  func NewQueryableCreator(
    62  	logger log.Logger,
    63  	reg prometheus.Registerer,
    64  	proxy storepb.StoreServer,
    65  	maxConcurrentSelects int,
    66  	selectTimeout time.Duration,
    67  ) QueryableCreator {
    68  	gf := gate.NewGateFactory(extprom.WrapRegistererWithPrefix("concurrent_selects_", reg), maxConcurrentSelects, gate.Selects)
    69  
    70  	return func(
    71  		deduplicate bool,
    72  		replicaLabels []string,
    73  		storeDebugMatchers [][]*labels.Matcher,
    74  		maxResolutionMillis int64,
    75  		partialResponse,
    76  		enableQueryPushdown,
    77  		skipChunks bool,
    78  		shardInfo *storepb.ShardInfo,
    79  		seriesStatsReporter seriesStatsReporter,
    80  	) storage.Queryable {
    81  		return &queryable{
    82  			logger:              logger,
    83  			replicaLabels:       replicaLabels,
    84  			storeDebugMatchers:  storeDebugMatchers,
    85  			proxy:               proxy,
    86  			deduplicate:         deduplicate,
    87  			maxResolutionMillis: maxResolutionMillis,
    88  			partialResponse:     partialResponse,
    89  			skipChunks:          skipChunks,
    90  			gateProviderFn: func() gate.Gate {
    91  				return gf.New()
    92  			},
    93  			maxConcurrentSelects: maxConcurrentSelects,
    94  			selectTimeout:        selectTimeout,
    95  			enableQueryPushdown:  enableQueryPushdown,
    96  			shardInfo:            shardInfo,
    97  			seriesStatsReporter:  seriesStatsReporter,
    98  		}
    99  	}
   100  }
   101  
   102  type queryable struct {
   103  	logger               log.Logger
   104  	replicaLabels        []string
   105  	storeDebugMatchers   [][]*labels.Matcher
   106  	proxy                storepb.StoreServer
   107  	deduplicate          bool
   108  	maxResolutionMillis  int64
   109  	partialResponse      bool
   110  	skipChunks           bool
   111  	gateProviderFn       func() gate.Gate
   112  	maxConcurrentSelects int
   113  	selectTimeout        time.Duration
   114  	enableQueryPushdown  bool
   115  	shardInfo            *storepb.ShardInfo
   116  	seriesStatsReporter  seriesStatsReporter
   117  }
   118  
   119  // Querier returns a new storage querier against the underlying proxy store API.
   120  func (q *queryable) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
   121  	return newQuerier(ctx, q.logger, mint, maxt, q.replicaLabels, q.storeDebugMatchers, q.proxy, q.deduplicate, q.maxResolutionMillis, q.partialResponse, q.enableQueryPushdown, q.skipChunks, q.gateProviderFn(), q.selectTimeout, q.shardInfo, q.seriesStatsReporter), nil
   122  }
   123  
   124  type querier struct {
   125  	ctx                     context.Context
   126  	logger                  log.Logger
   127  	cancel                  func()
   128  	mint, maxt              int64
   129  	replicaLabels           []string
   130  	storeDebugMatchers      [][]*labels.Matcher
   131  	proxy                   storepb.StoreServer
   132  	deduplicate             bool
   133  	maxResolutionMillis     int64
   134  	partialResponseStrategy storepb.PartialResponseStrategy
   135  	enableQueryPushdown     bool
   136  	skipChunks              bool
   137  	selectGate              gate.Gate
   138  	selectTimeout           time.Duration
   139  	shardInfo               *storepb.ShardInfo
   140  	seriesStatsReporter     seriesStatsReporter
   141  }
   142  
   143  // newQuerier creates implementation of storage.Querier that fetches data from the proxy
   144  // store API endpoints.
   145  func newQuerier(
   146  	ctx context.Context,
   147  	logger log.Logger,
   148  	mint,
   149  	maxt int64,
   150  	replicaLabels []string,
   151  	storeDebugMatchers [][]*labels.Matcher,
   152  	proxy storepb.StoreServer,
   153  	deduplicate bool,
   154  	maxResolutionMillis int64,
   155  	partialResponse,
   156  	enableQueryPushdown,
   157  	skipChunks bool,
   158  	selectGate gate.Gate,
   159  	selectTimeout time.Duration,
   160  	shardInfo *storepb.ShardInfo,
   161  	seriesStatsReporter seriesStatsReporter,
   162  ) *querier {
   163  	if logger == nil {
   164  		logger = log.NewNopLogger()
   165  	}
   166  	ctx, cancel := context.WithCancel(ctx)
   167  
   168  	rl := make(map[string]struct{})
   169  	for _, replicaLabel := range replicaLabels {
   170  		rl[replicaLabel] = struct{}{}
   171  	}
   172  
   173  	partialResponseStrategy := storepb.PartialResponseStrategy_ABORT
   174  	if partialResponse {
   175  		partialResponseStrategy = storepb.PartialResponseStrategy_WARN
   176  	}
   177  	return &querier{
   178  		ctx:           ctx,
   179  		logger:        logger,
   180  		cancel:        cancel,
   181  		selectGate:    selectGate,
   182  		selectTimeout: selectTimeout,
   183  
   184  		mint:                    mint,
   185  		maxt:                    maxt,
   186  		replicaLabels:           replicaLabels,
   187  		storeDebugMatchers:      storeDebugMatchers,
   188  		proxy:                   proxy,
   189  		deduplicate:             deduplicate,
   190  		maxResolutionMillis:     maxResolutionMillis,
   191  		partialResponseStrategy: partialResponseStrategy,
   192  		skipChunks:              skipChunks,
   193  		enableQueryPushdown:     enableQueryPushdown,
   194  		shardInfo:               shardInfo,
   195  		seriesStatsReporter:     seriesStatsReporter,
   196  	}
   197  }
   198  
   199  func (q *querier) isDedupEnabled() bool {
   200  	return q.deduplicate && len(q.replicaLabels) > 0
   201  }
   202  
   203  type seriesServer struct {
   204  	// This field just exist to pseudo-implement the unused methods of the interface.
   205  	storepb.Store_SeriesServer
   206  	ctx context.Context
   207  
   208  	seriesSet      []storepb.Series
   209  	seriesSetStats storepb.SeriesStatsCounter
   210  	warnings       []string
   211  }
   212  
   213  func (s *seriesServer) Send(r *storepb.SeriesResponse) error {
   214  	if r.GetWarning() != "" {
   215  		s.warnings = append(s.warnings, r.GetWarning())
   216  		return nil
   217  	}
   218  
   219  	if r.GetSeries() != nil {
   220  		s.seriesSet = append(s.seriesSet, *r.GetSeries())
   221  		s.seriesSetStats.Count(r.GetSeries())
   222  		return nil
   223  	}
   224  
   225  	// Unsupported field, skip.
   226  	return nil
   227  }
   228  
   229  func (s *seriesServer) Context() context.Context {
   230  	return s.ctx
   231  }
   232  
   233  // aggrsFromFunc infers aggregates of the underlying data based on the wrapping
   234  // function of a series selection.
   235  func aggrsFromFunc(f string) []storepb.Aggr {
   236  	if f == "min" || strings.HasPrefix(f, "min_") {
   237  		return []storepb.Aggr{storepb.Aggr_MIN}
   238  	}
   239  	if f == "max" || strings.HasPrefix(f, "max_") {
   240  		return []storepb.Aggr{storepb.Aggr_MAX}
   241  	}
   242  	if f == "count" || strings.HasPrefix(f, "count_") {
   243  		return []storepb.Aggr{storepb.Aggr_COUNT}
   244  	}
   245  	// f == "sum" falls through here since we want the actual samples.
   246  	if strings.HasPrefix(f, "sum_") {
   247  		return []storepb.Aggr{storepb.Aggr_SUM}
   248  	}
   249  	if f == "increase" || f == "rate" || f == "irate" || f == "resets" {
   250  		return []storepb.Aggr{storepb.Aggr_COUNTER}
   251  	}
   252  	// In the default case, we retrieve count and sum to compute an average.
   253  	return []storepb.Aggr{storepb.Aggr_COUNT, storepb.Aggr_SUM}
   254  }
   255  
   256  func storeHintsFromPromHints(hints *storage.SelectHints) *storepb.QueryHints {
   257  	return &storepb.QueryHints{
   258  		StepMillis: hints.Step,
   259  		Func: &storepb.Func{
   260  			Name: hints.Func,
   261  		},
   262  		Grouping: &storepb.Grouping{
   263  			By:     hints.By,
   264  			Labels: hints.Grouping,
   265  		},
   266  		Range: &storepb.Range{Millis: hints.Range},
   267  	}
   268  }
   269  
   270  func (q *querier) Select(_ bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.SeriesSet {
   271  	if hints == nil {
   272  		hints = &storage.SelectHints{
   273  			Start: q.mint,
   274  			End:   q.maxt,
   275  		}
   276  	}
   277  
   278  	matchers := make([]string, len(ms))
   279  	for i, m := range ms {
   280  		matchers[i] = m.String()
   281  	}
   282  
   283  	// The querier has a context, but it gets canceled as soon as query evaluation is completed by the engine.
   284  	// We want to prevent this from happening for the async store API calls we make while preserving tracing context.
   285  	// TODO(bwplotka): Does the above still is true? It feels weird to leave unfinished calls behind query API.
   286  	ctx := tracing.CopyTraceContext(context.Background(), q.ctx)
   287  	ctx, cancel := context.WithTimeout(ctx, q.selectTimeout)
   288  	span, ctx := tracing.StartSpan(ctx, "querier_select", opentracing.Tags{
   289  		"minTime":  hints.Start,
   290  		"maxTime":  hints.End,
   291  		"matchers": "{" + strings.Join(matchers, ",") + "}",
   292  	})
   293  
   294  	promise := make(chan storage.SeriesSet, 1)
   295  	go func() {
   296  		defer close(promise)
   297  
   298  		var err error
   299  		tracing.DoInSpan(ctx, "querier_select_gate_ismyturn", func(ctx context.Context) {
   300  			err = q.selectGate.Start(ctx)
   301  		})
   302  		if err != nil {
   303  			promise <- storage.ErrSeriesSet(errors.Wrap(err, "failed to wait for turn"))
   304  			return
   305  		}
   306  		defer q.selectGate.Done()
   307  
   308  		span, ctx := tracing.StartSpan(ctx, "querier_select_select_fn")
   309  		defer span.Finish()
   310  
   311  		set, stats, err := q.selectFn(ctx, hints, ms...)
   312  		if err != nil {
   313  			promise <- storage.ErrSeriesSet(err)
   314  			return
   315  		}
   316  		q.seriesStatsReporter(stats)
   317  
   318  		promise <- set
   319  	}()
   320  
   321  	return &lazySeriesSet{create: func() (storage.SeriesSet, bool) {
   322  		defer cancel()
   323  		defer span.Finish()
   324  
   325  		// Only gets called once, for the first Next() call of the series set.
   326  		set, ok := <-promise
   327  		if !ok {
   328  			return storage.ErrSeriesSet(errors.New("channel closed before a value received")), false
   329  		}
   330  		return set, set.Next()
   331  	}}
   332  }
   333  
   334  func (q *querier) selectFn(ctx context.Context, hints *storage.SelectHints, ms ...*labels.Matcher) (storage.SeriesSet, storepb.SeriesStatsCounter, error) {
   335  	sms, err := storepb.PromMatchersToMatchers(ms...)
   336  	if err != nil {
   337  		return nil, storepb.SeriesStatsCounter{}, errors.Wrap(err, "convert matchers")
   338  	}
   339  
   340  	aggrs := aggrsFromFunc(hints.Func)
   341  
   342  	// TODO(bwplotka): Pass it using the SeriesRequest instead of relying on context.
   343  	ctx = context.WithValue(ctx, store.StoreMatcherKey, q.storeDebugMatchers)
   344  	ctx = context.WithValue(ctx, tenancy.TenantKey, q.ctx.Value(tenancy.TenantKey))
   345  
   346  	// TODO(bwplotka): Use inprocess gRPC when we want to stream responses.
   347  	// Currently streaming won't help due to nature of the both PromQL engine which
   348  	// pulls all series before computations anyway.
   349  	resp := &seriesServer{ctx: ctx}
   350  	req := storepb.SeriesRequest{
   351  		MinTime:                 hints.Start,
   352  		MaxTime:                 hints.End,
   353  		Matchers:                sms,
   354  		MaxResolutionWindow:     q.maxResolutionMillis,
   355  		Aggregates:              aggrs,
   356  		ShardInfo:               q.shardInfo,
   357  		PartialResponseStrategy: q.partialResponseStrategy,
   358  		SkipChunks:              q.skipChunks,
   359  	}
   360  	if q.enableQueryPushdown {
   361  		req.QueryHints = storeHintsFromPromHints(hints)
   362  	}
   363  	if q.isDedupEnabled() {
   364  		// Soft ask to sort without replica labels and push them at the end of labelset.
   365  		req.WithoutReplicaLabels = q.replicaLabels
   366  	}
   367  
   368  	if err := q.proxy.Series(&req, resp); err != nil {
   369  		return nil, storepb.SeriesStatsCounter{}, errors.Wrap(err, "proxy Series()")
   370  	}
   371  
   372  	var warns storage.Warnings
   373  	for _, w := range resp.warnings {
   374  		warns = append(warns, errors.New(w))
   375  	}
   376  
   377  	if q.enableQueryPushdown && (hints.Func == "max_over_time" || hints.Func == "min_over_time") {
   378  		// On query pushdown, delete the metric's name from the result because that's what the
   379  		// PromQL does either way, and we want our iterator to work with data
   380  		// that was either pushed down or not.
   381  		for i := range resp.seriesSet {
   382  			lbls := resp.seriesSet[i].Labels
   383  			for j, lbl := range lbls {
   384  				if lbl.Name != model.MetricNameLabel {
   385  					continue
   386  				}
   387  				resp.seriesSet[i].Labels = append(resp.seriesSet[i].Labels[:j], resp.seriesSet[i].Labels[j+1:]...)
   388  				break
   389  			}
   390  		}
   391  	}
   392  
   393  	if !q.isDedupEnabled() {
   394  		return &promSeriesSet{
   395  			mint:  q.mint,
   396  			maxt:  q.maxt,
   397  			set:   newStoreSeriesSet(resp.seriesSet),
   398  			aggrs: aggrs,
   399  			warns: warns,
   400  		}, resp.seriesSetStats, nil
   401  	}
   402  
   403  	// TODO(bwplotka): Move to deduplication on chunk level inside promSeriesSet, similar to what we have in dedup.NewDedupChunkMerger().
   404  	// This however require big refactor, caring about correct AggrChunk to iterator conversion, pushdown logic and counter reset apply.
   405  	// For now we apply simple logic that splits potential overlapping chunks into separate replica series, so we can split the work.
   406  	set := &promSeriesSet{
   407  		mint:  q.mint,
   408  		maxt:  q.maxt,
   409  		set:   dedup.NewOverlapSplit(newStoreSeriesSet(resp.seriesSet)),
   410  		aggrs: aggrs,
   411  		warns: warns,
   412  	}
   413  
   414  	return dedup.NewSeriesSet(set, hints.Func, q.enableQueryPushdown), resp.seriesSetStats, nil
   415  }
   416  
   417  // LabelValues returns all potential values for a label name.
   418  func (q *querier) LabelValues(name string, matchers ...*labels.Matcher) ([]string, storage.Warnings, error) {
   419  	span, ctx := tracing.StartSpan(q.ctx, "querier_label_values")
   420  	defer span.Finish()
   421  
   422  	// TODO(bwplotka): Pass it using the SeriesRequest instead of relying on context.
   423  	ctx = context.WithValue(ctx, store.StoreMatcherKey, q.storeDebugMatchers)
   424  	ctx = context.WithValue(ctx, tenancy.TenantKey, q.ctx.Value(tenancy.TenantKey))
   425  
   426  	pbMatchers, err := storepb.PromMatchersToMatchers(matchers...)
   427  	if err != nil {
   428  		return nil, nil, errors.Wrap(err, "converting prom matchers to storepb matchers")
   429  	}
   430  
   431  	resp, err := q.proxy.LabelValues(ctx, &storepb.LabelValuesRequest{
   432  		Label:                   name,
   433  		PartialResponseStrategy: q.partialResponseStrategy,
   434  		Start:                   q.mint,
   435  		End:                     q.maxt,
   436  		Matchers:                pbMatchers,
   437  	})
   438  	if err != nil {
   439  		return nil, nil, errors.Wrap(err, "proxy LabelValues()")
   440  	}
   441  
   442  	var warns storage.Warnings
   443  	for _, w := range resp.Warnings {
   444  		warns = append(warns, errors.New(w))
   445  	}
   446  
   447  	return resp.Values, warns, nil
   448  }
   449  
   450  // LabelNames returns all the unique label names present in the block in sorted order constrained
   451  // by the given matchers.
   452  func (q *querier) LabelNames(matchers ...*labels.Matcher) ([]string, storage.Warnings, error) {
   453  	span, ctx := tracing.StartSpan(q.ctx, "querier_label_names")
   454  	defer span.Finish()
   455  
   456  	// TODO(bwplotka): Pass it using the SeriesRequest instead of relying on context.
   457  	ctx = context.WithValue(ctx, store.StoreMatcherKey, q.storeDebugMatchers)
   458  	ctx = context.WithValue(ctx, tenancy.TenantKey, q.ctx.Value(tenancy.TenantKey))
   459  
   460  	pbMatchers, err := storepb.PromMatchersToMatchers(matchers...)
   461  	if err != nil {
   462  		return nil, nil, errors.Wrap(err, "converting prom matchers to storepb matchers")
   463  	}
   464  
   465  	resp, err := q.proxy.LabelNames(ctx, &storepb.LabelNamesRequest{
   466  		PartialResponseStrategy: q.partialResponseStrategy,
   467  		Start:                   q.mint,
   468  		End:                     q.maxt,
   469  		Matchers:                pbMatchers,
   470  	})
   471  	if err != nil {
   472  		return nil, nil, errors.Wrap(err, "proxy LabelNames()")
   473  	}
   474  
   475  	var warns storage.Warnings
   476  	for _, w := range resp.Warnings {
   477  		warns = append(warns, errors.New(w))
   478  	}
   479  
   480  	return resp.Names, warns, nil
   481  }
   482  
   483  func (q *querier) Close() error {
   484  	q.cancel()
   485  	return nil
   486  }