github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/configs/legacy_promql/engine.go (about)

     1  // Copyright 2013 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  //nolint //Since this was copied from Prometheus leave it as is
    14  package promql
    15  
    16  import (
    17  	"container/heap"
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"regexp"
    22  	"runtime"
    23  	"sort"
    24  	"strconv"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/go-kit/log"
    29  	"github.com/go-kit/log/level"
    30  	opentracing "github.com/opentracing/opentracing-go"
    31  	"github.com/prometheus/client_golang/prometheus"
    32  	"github.com/prometheus/prometheus/pkg/labels"
    33  	"github.com/prometheus/prometheus/pkg/timestamp"
    34  	"github.com/prometheus/prometheus/pkg/value"
    35  	"github.com/prometheus/prometheus/storage"
    36  
    37  	"github.com/prometheus/prometheus/util/stats"
    38  )
    39  
    40  const (
    41  	namespace = "prometheus"
    42  	subsystem = "engine"
    43  	queryTag  = "query"
    44  
    45  	// The largest SampleValue that can be converted to an int64 without overflow.
    46  	maxInt64 = 9223372036854774784
    47  	// The smallest SampleValue that can be converted to an int64 without underflow.
    48  	minInt64 = -9223372036854775808
    49  )
    50  
    51  type engineMetrics struct {
    52  	currentQueries       prometheus.Gauge
    53  	maxConcurrentQueries prometheus.Gauge
    54  	queryQueueTime       prometheus.Summary
    55  	queryPrepareTime     prometheus.Summary
    56  	queryInnerEval       prometheus.Summary
    57  	queryResultSort      prometheus.Summary
    58  }
    59  
    60  // convertibleToInt64 returns true if v does not over-/underflow an int64.
    61  func convertibleToInt64(v float64) bool {
    62  	return v <= maxInt64 && v >= minInt64
    63  }
    64  
    65  type (
    66  	// ErrQueryTimeout is returned if a query timed out during processing.
    67  	ErrQueryTimeout string
    68  	// ErrQueryCanceled is returned if a query was canceled during processing.
    69  	ErrQueryCanceled string
    70  	// ErrStorage is returned if an error was encountered in the storage layer
    71  	// during query handling.
    72  	ErrStorage error
    73  )
    74  
    75  func (e ErrQueryTimeout) Error() string  { return fmt.Sprintf("query timed out in %s", string(e)) }
    76  func (e ErrQueryCanceled) Error() string { return fmt.Sprintf("query was canceled in %s", string(e)) }
    77  
    78  // A Query is derived from an a raw query string and can be run against an engine
    79  // it is associated with.
    80  type Query interface {
    81  	// Exec processes the query. Can only be called once.
    82  	Exec(ctx context.Context) *Result
    83  	// Close recovers memory used by the query result.
    84  	Close()
    85  	// Statement returns the parsed statement of the query.
    86  	Statement() Statement
    87  	// Stats returns statistics about the lifetime of the query.
    88  	Stats() *stats.TimerGroup
    89  	// Cancel signals that a running query execution should be aborted.
    90  	Cancel()
    91  }
    92  
    93  // query implements the Query interface.
    94  type query struct {
    95  	// Underlying data provider.
    96  	queryable storage.Queryable
    97  	// The original query string.
    98  	q string
    99  	// Statement of the parsed query.
   100  	stmt Statement
   101  	// Timer stats for the query execution.
   102  	stats *stats.TimerGroup
   103  	// Result matrix for reuse.
   104  	matrix Matrix
   105  	// Cancellation function for the query.
   106  	cancel func()
   107  
   108  	// The engine against which the query is executed.
   109  	ng *Engine
   110  }
   111  
   112  // Statement implements the Query interface.
   113  func (q *query) Statement() Statement {
   114  	return q.stmt
   115  }
   116  
   117  // Stats implements the Query interface.
   118  func (q *query) Stats() *stats.TimerGroup {
   119  	return q.stats
   120  }
   121  
   122  // Cancel implements the Query interface.
   123  func (q *query) Cancel() {
   124  	if q.cancel != nil {
   125  		q.cancel()
   126  	}
   127  }
   128  
   129  // Close implements the Query interface.
   130  func (q *query) Close() {
   131  	for _, s := range q.matrix {
   132  		putPointSlice(s.Points)
   133  	}
   134  }
   135  
   136  // Exec implements the Query interface.
   137  func (q *query) Exec(ctx context.Context) *Result {
   138  	if span := opentracing.SpanFromContext(ctx); span != nil {
   139  		span.SetTag(queryTag, q.stmt.String())
   140  	}
   141  
   142  	res, err := q.ng.exec(ctx, q)
   143  	return &Result{Err: err, Value: res}
   144  }
   145  
   146  // contextDone returns an error if the context was canceled or timed out.
   147  func contextDone(ctx context.Context, env string) error {
   148  	select {
   149  	case <-ctx.Done():
   150  		err := ctx.Err()
   151  		switch err {
   152  		case context.Canceled:
   153  			return ErrQueryCanceled(env)
   154  		case context.DeadlineExceeded:
   155  			return ErrQueryTimeout(env)
   156  		default:
   157  			return err
   158  		}
   159  	default:
   160  		return nil
   161  	}
   162  }
   163  
   164  // Engine handles the lifetime of queries from beginning to end.
   165  // It is connected to a querier.
   166  type Engine struct {
   167  	logger  log.Logger
   168  	metrics *engineMetrics
   169  	timeout time.Duration
   170  	gate    *queryGate
   171  }
   172  
   173  // NewEngine returns a new engine.
   174  func NewEngine(logger log.Logger, reg prometheus.Registerer, maxConcurrent int, timeout time.Duration) *Engine {
   175  	if logger == nil {
   176  		logger = log.NewNopLogger()
   177  	}
   178  
   179  	metrics := &engineMetrics{
   180  		currentQueries: prometheus.NewGauge(prometheus.GaugeOpts{
   181  			Namespace: namespace,
   182  			Subsystem: subsystem,
   183  			Name:      "queries",
   184  			Help:      "The current number of queries being executed or waiting.",
   185  		}),
   186  		maxConcurrentQueries: prometheus.NewGauge(prometheus.GaugeOpts{
   187  			Namespace: namespace,
   188  			Subsystem: subsystem,
   189  			Name:      "queries_concurrent_max",
   190  			Help:      "The max number of concurrent queries.",
   191  		}),
   192  		queryQueueTime: prometheus.NewSummary(prometheus.SummaryOpts{
   193  			Namespace:   namespace,
   194  			Subsystem:   subsystem,
   195  			Name:        "query_duration_seconds",
   196  			Help:        "Query timings",
   197  			ConstLabels: prometheus.Labels{"slice": "queue_time"},
   198  		}),
   199  		queryPrepareTime: prometheus.NewSummary(prometheus.SummaryOpts{
   200  			Namespace:   namespace,
   201  			Subsystem:   subsystem,
   202  			Name:        "query_duration_seconds",
   203  			Help:        "Query timings",
   204  			ConstLabels: prometheus.Labels{"slice": "prepare_time"},
   205  		}),
   206  		queryInnerEval: prometheus.NewSummary(prometheus.SummaryOpts{
   207  			Namespace:   namespace,
   208  			Subsystem:   subsystem,
   209  			Name:        "query_duration_seconds",
   210  			Help:        "Query timings",
   211  			ConstLabels: prometheus.Labels{"slice": "inner_eval"},
   212  		}),
   213  		queryResultSort: prometheus.NewSummary(prometheus.SummaryOpts{
   214  			Namespace:   namespace,
   215  			Subsystem:   subsystem,
   216  			Name:        "query_duration_seconds",
   217  			Help:        "Query timings",
   218  			ConstLabels: prometheus.Labels{"slice": "result_sort"},
   219  		}),
   220  	}
   221  	metrics.maxConcurrentQueries.Set(float64(maxConcurrent))
   222  
   223  	if reg != nil {
   224  		reg.MustRegister(
   225  			metrics.currentQueries,
   226  			metrics.maxConcurrentQueries,
   227  			metrics.queryQueueTime,
   228  			metrics.queryPrepareTime,
   229  			metrics.queryInnerEval,
   230  			metrics.queryResultSort,
   231  		)
   232  	}
   233  	return &Engine{
   234  		gate:    newQueryGate(maxConcurrent),
   235  		timeout: timeout,
   236  		logger:  logger,
   237  		metrics: metrics,
   238  	}
   239  }
   240  
   241  // NewInstantQuery returns an evaluation query for the given expression at the given time.
   242  func (ng *Engine) NewInstantQuery(q storage.Queryable, qs string, ts time.Time) (Query, error) {
   243  	expr, err := ParseExpr(qs)
   244  	if err != nil {
   245  		return nil, err
   246  	}
   247  	qry := ng.newQuery(q, expr, ts, ts, 0)
   248  	qry.q = qs
   249  
   250  	return qry, nil
   251  }
   252  
   253  // NewRangeQuery returns an evaluation query for the given time range and with
   254  // the resolution set by the interval.
   255  func (ng *Engine) NewRangeQuery(q storage.Queryable, qs string, start, end time.Time, interval time.Duration) (Query, error) {
   256  	expr, err := ParseExpr(qs)
   257  	if err != nil {
   258  		return nil, err
   259  	}
   260  	if expr.Type() != ValueTypeVector && expr.Type() != ValueTypeScalar {
   261  		return nil, fmt.Errorf("invalid expression type %q for range query, must be Scalar or instant Vector", documentedType(expr.Type()))
   262  	}
   263  	qry := ng.newQuery(q, expr, start, end, interval)
   264  	qry.q = qs
   265  
   266  	return qry, nil
   267  }
   268  
   269  func (ng *Engine) newQuery(q storage.Queryable, expr Expr, start, end time.Time, interval time.Duration) *query {
   270  	es := &EvalStmt{
   271  		Expr:     expr,
   272  		Start:    start,
   273  		End:      end,
   274  		Interval: interval,
   275  	}
   276  	qry := &query{
   277  		stmt:      es,
   278  		ng:        ng,
   279  		stats:     stats.NewTimerGroup(),
   280  		queryable: q,
   281  	}
   282  	return qry
   283  }
   284  
   285  // testStmt is an internal helper statement that allows execution
   286  // of an arbitrary function during handling. It is used to test the Engine.
   287  type testStmt func(context.Context) error
   288  
   289  func (testStmt) String() string { return "test statement" }
   290  func (testStmt) stmt()          {}
   291  
   292  func (ng *Engine) newTestQuery(f func(context.Context) error) Query {
   293  	qry := &query{
   294  		q:     "test statement",
   295  		stmt:  testStmt(f),
   296  		ng:    ng,
   297  		stats: stats.NewTimerGroup(),
   298  	}
   299  	return qry
   300  }
   301  
   302  // exec executes the query.
   303  //
   304  // At this point per query only one EvalStmt is evaluated. Alert and record
   305  // statements are not handled by the Engine.
   306  func (ng *Engine) exec(ctx context.Context, q *query) (Value, error) {
   307  	ng.metrics.currentQueries.Inc()
   308  	defer ng.metrics.currentQueries.Dec()
   309  
   310  	ctx, cancel := context.WithTimeout(ctx, ng.timeout)
   311  	q.cancel = cancel
   312  
   313  	execTimer := q.stats.GetTimer(stats.ExecTotalTime).Start()
   314  	defer execTimer.Stop()
   315  	queueTimer := q.stats.GetTimer(stats.ExecQueueTime).Start()
   316  
   317  	if err := ng.gate.Start(ctx); err != nil {
   318  		return nil, err
   319  	}
   320  	defer ng.gate.Done()
   321  
   322  	queueTimer.Stop()
   323  	ng.metrics.queryQueueTime.Observe(queueTimer.ElapsedTime().Seconds())
   324  
   325  	// Cancel when execution is done or an error was raised.
   326  	defer q.cancel()
   327  
   328  	const env = "query execution"
   329  
   330  	evalTimer := q.stats.GetTimer(stats.EvalTotalTime).Start()
   331  	defer evalTimer.Stop()
   332  
   333  	// The base context might already be canceled on the first iteration (e.g. during shutdown).
   334  	if err := contextDone(ctx, env); err != nil {
   335  		return nil, err
   336  	}
   337  
   338  	switch s := q.Statement().(type) {
   339  	case *EvalStmt:
   340  		return ng.execEvalStmt(ctx, q, s)
   341  	case testStmt:
   342  		return nil, s(ctx)
   343  	}
   344  
   345  	panic(fmt.Errorf("promql.Engine.exec: unhandled statement of type %T", q.Statement()))
   346  }
   347  
   348  func timeMilliseconds(t time.Time) int64 {
   349  	return t.UnixNano() / int64(time.Millisecond/time.Nanosecond)
   350  }
   351  
   352  func durationMilliseconds(d time.Duration) int64 {
   353  	return int64(d / (time.Millisecond / time.Nanosecond))
   354  }
   355  
   356  // execEvalStmt evaluates the expression of an evaluation statement for the given time range.
   357  func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) (Value, error) {
   358  	prepareTimer := query.stats.GetTimer(stats.QueryPreparationTime).Start()
   359  	querier, err := ng.populateSeries(ctx, query.queryable, s)
   360  	prepareTimer.Stop()
   361  	ng.metrics.queryPrepareTime.Observe(prepareTimer.ElapsedTime().Seconds())
   362  
   363  	// XXX(fabxc): the querier returned by populateSeries might be instantiated
   364  	// we must not return without closing irrespective of the error.
   365  	// TODO: make this semantically saner.
   366  	if querier != nil {
   367  		defer querier.Close()
   368  	}
   369  
   370  	if err != nil {
   371  		return nil, err
   372  	}
   373  
   374  	evalTimer := query.stats.GetTimer(stats.InnerEvalTime).Start()
   375  	// Instant evaluation. This is executed as a range evaluation with one step.
   376  	if s.Start == s.End && s.Interval == 0 {
   377  		start := timeMilliseconds(s.Start)
   378  		evaluator := &evaluator{
   379  			startTimestamp: start,
   380  			endTimestamp:   start,
   381  			interval:       1,
   382  			ctx:            ctx,
   383  			logger:         ng.logger,
   384  		}
   385  		val, err := evaluator.Eval(s.Expr)
   386  		if err != nil {
   387  			return nil, err
   388  		}
   389  
   390  		evalTimer.Stop()
   391  		ng.metrics.queryInnerEval.Observe(evalTimer.ElapsedTime().Seconds())
   392  
   393  		mat, ok := val.(Matrix)
   394  		if !ok {
   395  			panic(fmt.Errorf("promql.Engine.exec: invalid expression type %q", val.Type()))
   396  		}
   397  		query.matrix = mat
   398  		switch s.Expr.Type() {
   399  		case ValueTypeVector:
   400  			// Convert matrix with one value per series into vector.
   401  			vector := make(Vector, len(mat))
   402  			for i, s := range mat {
   403  				// Point might have a different timestamp, force it to the evaluation
   404  				// timestamp as that is when we ran the evaluation.
   405  				vector[i] = Sample{Metric: s.Metric, Point: Point{V: s.Points[0].V, T: start}}
   406  			}
   407  			return vector, nil
   408  		case ValueTypeScalar:
   409  			return Scalar{V: mat[0].Points[0].V, T: start}, nil
   410  		case ValueTypeMatrix:
   411  			return mat, nil
   412  		default:
   413  			panic(fmt.Errorf("promql.Engine.exec: unexpected expression type %q", s.Expr.Type()))
   414  		}
   415  
   416  	}
   417  
   418  	// Range evaluation.
   419  	evaluator := &evaluator{
   420  		startTimestamp: timeMilliseconds(s.Start),
   421  		endTimestamp:   timeMilliseconds(s.End),
   422  		interval:       durationMilliseconds(s.Interval),
   423  		ctx:            ctx,
   424  		logger:         ng.logger,
   425  	}
   426  	val, err := evaluator.Eval(s.Expr)
   427  	if err != nil {
   428  		return nil, err
   429  	}
   430  	evalTimer.Stop()
   431  	ng.metrics.queryInnerEval.Observe(evalTimer.ElapsedTime().Seconds())
   432  
   433  	mat, ok := val.(Matrix)
   434  	if !ok {
   435  		panic(fmt.Errorf("promql.Engine.exec: invalid expression type %q", val.Type()))
   436  	}
   437  	query.matrix = mat
   438  
   439  	if err := contextDone(ctx, "expression evaluation"); err != nil {
   440  		return nil, err
   441  	}
   442  
   443  	// TODO(fabxc): order ensured by storage?
   444  	// TODO(fabxc): where to ensure metric labels are a copy from the storage internals.
   445  	sortTimer := query.stats.GetTimer(stats.ResultSortTime).Start()
   446  	sort.Sort(mat)
   447  	sortTimer.Stop()
   448  
   449  	ng.metrics.queryResultSort.Observe(sortTimer.ElapsedTime().Seconds())
   450  	return mat, nil
   451  }
   452  
   453  func (ng *Engine) populateSeries(ctx context.Context, q storage.Queryable, s *EvalStmt) (storage.Querier, error) {
   454  	var maxOffset time.Duration
   455  	Inspect(s.Expr, func(node Node, _ []Node) error {
   456  		switch n := node.(type) {
   457  		case *VectorSelector:
   458  			if maxOffset < LookbackDelta {
   459  				maxOffset = LookbackDelta
   460  			}
   461  			if n.Offset+LookbackDelta > maxOffset {
   462  				maxOffset = n.Offset + LookbackDelta
   463  			}
   464  		case *MatrixSelector:
   465  			if maxOffset < n.Range {
   466  				maxOffset = n.Range
   467  			}
   468  			if n.Offset+n.Range > maxOffset {
   469  				maxOffset = n.Offset + n.Range
   470  			}
   471  		}
   472  		return nil
   473  	})
   474  
   475  	mint := s.Start.Add(-maxOffset)
   476  
   477  	querier, err := q.Querier(ctx, timestamp.FromTime(mint), timestamp.FromTime(s.End))
   478  	if err != nil {
   479  		return nil, err
   480  	}
   481  
   482  	Inspect(s.Expr, func(node Node, path []Node) error {
   483  		var set storage.SeriesSet
   484  		params := &storage.SelectHints{
   485  			Start: timestamp.FromTime(s.Start),
   486  			End:   timestamp.FromTime(s.End),
   487  			Step:  int64(s.Interval / time.Millisecond),
   488  		}
   489  
   490  		switch n := node.(type) {
   491  		case *VectorSelector:
   492  			params.Start = params.Start - durationMilliseconds(LookbackDelta)
   493  			params.Func = extractFuncFromPath(path)
   494  			if n.Offset > 0 {
   495  				offsetMilliseconds := durationMilliseconds(n.Offset)
   496  				params.Start = params.Start - offsetMilliseconds
   497  				params.End = params.End - offsetMilliseconds
   498  			}
   499  
   500  			set = querier.Select(false, params, n.LabelMatchers...)
   501  			n.series, err = expandSeriesSet(ctx, set)
   502  			if err != nil {
   503  				// TODO(fabxc): use multi-error.
   504  				level.Error(ng.logger).Log("msg", "error expanding series set", "err", err)
   505  				return err
   506  			}
   507  
   508  		case *MatrixSelector:
   509  			params.Func = extractFuncFromPath(path)
   510  			// For all matrix queries we want to ensure that we have (end-start) + range selected
   511  			// this way we have `range` data before the start time
   512  			params.Start = params.Start - durationMilliseconds(n.Range)
   513  			if n.Offset > 0 {
   514  				offsetMilliseconds := durationMilliseconds(n.Offset)
   515  				params.Start = params.Start - offsetMilliseconds
   516  				params.End = params.End - offsetMilliseconds
   517  			}
   518  
   519  			set = querier.Select(false, params, n.LabelMatchers...)
   520  			n.series, err = expandSeriesSet(ctx, set)
   521  			if err != nil {
   522  				level.Error(ng.logger).Log("msg", "error expanding series set", "err", err)
   523  				return err
   524  			}
   525  		}
   526  		return nil
   527  	})
   528  	return querier, err
   529  }
   530  
   531  // extractFuncFromPath walks up the path and searches for the first instance of
   532  // a function or aggregation.
   533  func extractFuncFromPath(p []Node) string {
   534  	if len(p) == 0 {
   535  		return ""
   536  	}
   537  	switch n := p[len(p)-1].(type) {
   538  	case *AggregateExpr:
   539  		return n.Op.String()
   540  	case *Call:
   541  		return n.Func.Name
   542  	case *BinaryExpr:
   543  		// If we hit a binary expression we terminate since we only care about functions
   544  		// or aggregations over a single metric.
   545  		return ""
   546  	}
   547  	return extractFuncFromPath(p[:len(p)-1])
   548  }
   549  
   550  func expandSeriesSet(ctx context.Context, it storage.SeriesSet) (res []storage.Series, err error) {
   551  	for it.Next() {
   552  		select {
   553  		case <-ctx.Done():
   554  			return nil, ctx.Err()
   555  		default:
   556  		}
   557  		res = append(res, it.At())
   558  	}
   559  	return res, it.Err()
   560  }
   561  
   562  // An evaluator evaluates given expressions over given fixed timestamps. It
   563  // is attached to an engine through which it connects to a querier and reports
   564  // errors. On timeout or cancellation of its context it terminates.
   565  type evaluator struct {
   566  	ctx context.Context
   567  
   568  	startTimestamp int64 // Start time in milliseconds.
   569  
   570  	endTimestamp int64 // End time in milliseconds.
   571  	interval     int64 // Interval in milliseconds.
   572  
   573  	logger log.Logger
   574  }
   575  
   576  // errorf causes a panic with the input formatted into an error.
   577  func (ev *evaluator) errorf(format string, args ...interface{}) {
   578  	ev.error(fmt.Errorf(format, args...))
   579  }
   580  
   581  // error causes a panic with the given error.
   582  func (ev *evaluator) error(err error) {
   583  	panic(err)
   584  }
   585  
   586  // recover is the handler that turns panics into returns from the top level of evaluation.
   587  func (ev *evaluator) recover(errp *error) {
   588  	e := recover()
   589  	if e == nil {
   590  		return
   591  	}
   592  	if err, ok := e.(runtime.Error); ok {
   593  		// Print the stack trace but do not inhibit the running application.
   594  		buf := make([]byte, 64<<10)
   595  		buf = buf[:runtime.Stack(buf, false)]
   596  
   597  		level.Error(ev.logger).Log("msg", "runtime panic in parser", "err", e, "stacktrace", string(buf))
   598  		*errp = fmt.Errorf("unexpected error: %s", err)
   599  	} else {
   600  		*errp = e.(error)
   601  	}
   602  }
   603  
   604  func (ev *evaluator) Eval(expr Expr) (v Value, err error) {
   605  	defer ev.recover(&err)
   606  	return ev.eval(expr), nil
   607  }
   608  
   609  // EvalNodeHelper stores extra information and caches for evaluating a single node across steps.
   610  type EvalNodeHelper struct {
   611  	// Evaluation timestamp.
   612  	ts int64
   613  	// Vector that can be used for output.
   614  	out Vector
   615  
   616  	// Caches.
   617  	// dropMetricName and label_*.
   618  	dmn map[uint64]labels.Labels
   619  	// signatureFunc.
   620  	sigf map[uint64]uint64
   621  	// funcHistogramQuantile.
   622  	signatureToMetricWithBuckets map[uint64]*metricWithBuckets
   623  	// label_replace.
   624  	regex *regexp.Regexp
   625  
   626  	// For binary vector matching.
   627  	rightSigs    map[uint64]Sample
   628  	matchedSigs  map[uint64]map[uint64]struct{}
   629  	resultMetric map[uint64]labels.Labels
   630  }
   631  
   632  // dropMetricName is a cached version of dropMetricName.
   633  func (enh *EvalNodeHelper) dropMetricName(l labels.Labels) labels.Labels {
   634  	if enh.dmn == nil {
   635  		enh.dmn = make(map[uint64]labels.Labels, len(enh.out))
   636  	}
   637  	h := l.Hash()
   638  	ret, ok := enh.dmn[h]
   639  	if ok {
   640  		return ret
   641  	}
   642  	ret = dropMetricName(l)
   643  	enh.dmn[h] = ret
   644  	return ret
   645  }
   646  
   647  // signatureFunc is a cached version of signatureFunc.
   648  func (enh *EvalNodeHelper) signatureFunc(on bool, names ...string) func(labels.Labels) uint64 {
   649  	if enh.sigf == nil {
   650  		enh.sigf = make(map[uint64]uint64, len(enh.out))
   651  	}
   652  	f := signatureFunc(on, names...)
   653  	return func(l labels.Labels) uint64 {
   654  		h := l.Hash()
   655  		ret, ok := enh.sigf[h]
   656  		if ok {
   657  			return ret
   658  		}
   659  		ret = f(l)
   660  		enh.sigf[h] = ret
   661  		return ret
   662  	}
   663  }
   664  
   665  // rangeEval evaluates the given expressions, and then for each step calls
   666  // the given function with the values computed for each expression at that
   667  // step.  The return value is the combination into time series of  of all the
   668  // function call results.
   669  func (ev *evaluator) rangeEval(f func([]Value, *EvalNodeHelper) Vector, exprs ...Expr) Matrix {
   670  	numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1
   671  	matrixes := make([]Matrix, len(exprs))
   672  	origMatrixes := make([]Matrix, len(exprs))
   673  	for i, e := range exprs {
   674  		// Functions will take string arguments from the expressions, not the values.
   675  		if e != nil && e.Type() != ValueTypeString {
   676  			matrixes[i] = ev.eval(e).(Matrix)
   677  
   678  			// Keep a copy of the original point slices so that they
   679  			// can be returned to the pool.
   680  			origMatrixes[i] = make(Matrix, len(matrixes[i]))
   681  			copy(origMatrixes[i], matrixes[i])
   682  		}
   683  	}
   684  
   685  	vectors := make([]Vector, len(exprs)) // Input vectors for the function.
   686  	args := make([]Value, len(exprs))     // Argument to function.
   687  	// Create an output vector that is as big as the input matrix with
   688  	// the most time series.
   689  	biggestLen := 1
   690  	for i := range exprs {
   691  		vectors[i] = make(Vector, 0, len(matrixes[i]))
   692  		if len(matrixes[i]) > biggestLen {
   693  			biggestLen = len(matrixes[i])
   694  		}
   695  	}
   696  	enh := &EvalNodeHelper{out: make(Vector, 0, biggestLen)}
   697  	seriess := make(map[uint64]Series, biggestLen) // Output series by series hash.
   698  	for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval {
   699  		// Gather input vectors for this timestamp.
   700  		for i := range exprs {
   701  			vectors[i] = vectors[i][:0]
   702  			for si, series := range matrixes[i] {
   703  				for _, point := range series.Points {
   704  					if point.T == ts {
   705  						vectors[i] = append(vectors[i], Sample{Metric: series.Metric, Point: point})
   706  						// Move input vectors forward so we don't have to re-scan the same
   707  						// past points at the next step.
   708  						matrixes[i][si].Points = series.Points[1:]
   709  					}
   710  					break
   711  				}
   712  			}
   713  			args[i] = vectors[i]
   714  		}
   715  		// Make the function call.
   716  		enh.ts = ts
   717  		result := f(args, enh)
   718  		enh.out = result[:0] // Reuse result vector.
   719  		// If this could be an instant query, shortcut so as not to change sort order.
   720  		if ev.endTimestamp == ev.startTimestamp {
   721  			mat := make(Matrix, len(result))
   722  			for i, s := range result {
   723  				s.Point.T = ts
   724  				mat[i] = Series{Metric: s.Metric, Points: []Point{s.Point}}
   725  			}
   726  			return mat
   727  		}
   728  		// Add samples in output vector to output series.
   729  		for _, sample := range result {
   730  			h := sample.Metric.Hash()
   731  			ss, ok := seriess[h]
   732  			if !ok {
   733  				ss = Series{
   734  					Metric: sample.Metric,
   735  					Points: getPointSlice(numSteps),
   736  				}
   737  			}
   738  			sample.Point.T = ts
   739  			ss.Points = append(ss.Points, sample.Point)
   740  			seriess[h] = ss
   741  		}
   742  	}
   743  	// Reuse the original point slices.
   744  	for _, m := range origMatrixes {
   745  		for _, s := range m {
   746  			putPointSlice(s.Points)
   747  		}
   748  	}
   749  	// Assemble the output matrix.
   750  	mat := make(Matrix, 0, len(seriess))
   751  	for _, ss := range seriess {
   752  		mat = append(mat, ss)
   753  	}
   754  	return mat
   755  }
   756  
   757  // eval evaluates the given expression as the given AST expression node requires.
   758  func (ev *evaluator) eval(expr Expr) Value {
   759  	// This is the top-level evaluation method.
   760  	// Thus, we check for timeout/cancellation here.
   761  	if err := contextDone(ev.ctx, "expression evaluation"); err != nil {
   762  		ev.error(err)
   763  	}
   764  	numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1
   765  
   766  	switch e := expr.(type) {
   767  	case *AggregateExpr:
   768  		if s, ok := e.Param.(*StringLiteral); ok {
   769  			return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   770  				return ev.aggregation(e.Op, e.Grouping, e.Without, s.Val, v[0].(Vector), enh)
   771  			}, e.Expr)
   772  		}
   773  		return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   774  			var param float64
   775  			if e.Param != nil {
   776  				param = v[0].(Vector)[0].V
   777  			}
   778  			return ev.aggregation(e.Op, e.Grouping, e.Without, param, v[1].(Vector), enh)
   779  		}, e.Param, e.Expr)
   780  
   781  	case *Call:
   782  		if e.Func.Name == "timestamp" {
   783  			// Matrix evaluation always returns the evaluation time,
   784  			// so this function needs special handling when given
   785  			// a vector selector.
   786  			vs, ok := e.Args[0].(*VectorSelector)
   787  			if ok {
   788  				return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   789  					return e.Func.Call([]Value{ev.vectorSelector(vs, enh.ts)}, e.Args, enh)
   790  				})
   791  			}
   792  		}
   793  		// Check if the function has a matrix argument.
   794  		var matrixArgIndex int
   795  		var matrixArg bool
   796  		for i, a := range e.Args {
   797  			_, ok := a.(*MatrixSelector)
   798  			if ok {
   799  				matrixArgIndex = i
   800  				matrixArg = true
   801  				break
   802  			}
   803  		}
   804  		if !matrixArg {
   805  			// Does not have a matrix argument.
   806  			return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   807  				return e.Func.Call(v, e.Args, enh)
   808  			}, e.Args...)
   809  		}
   810  
   811  		inArgs := make([]Value, len(e.Args))
   812  		// Evaluate any non-matrix arguments.
   813  		otherArgs := make([]Matrix, len(e.Args))
   814  		otherInArgs := make([]Vector, len(e.Args))
   815  		for i, e := range e.Args {
   816  			if i != matrixArgIndex {
   817  				otherArgs[i] = ev.eval(e).(Matrix)
   818  				otherInArgs[i] = Vector{Sample{}}
   819  				inArgs[i] = otherInArgs[i]
   820  			}
   821  		}
   822  
   823  		sel := e.Args[matrixArgIndex].(*MatrixSelector)
   824  		mat := make(Matrix, 0, len(sel.series)) // Output matrix.
   825  		offset := durationMilliseconds(sel.Offset)
   826  		selRange := durationMilliseconds(sel.Range)
   827  		stepRange := selRange
   828  		if stepRange > ev.interval {
   829  			stepRange = ev.interval
   830  		}
   831  		// Reuse objects across steps to save memory allocations.
   832  		points := getPointSlice(16)
   833  		inMatrix := make(Matrix, 1)
   834  		inArgs[matrixArgIndex] = inMatrix
   835  		enh := &EvalNodeHelper{out: make(Vector, 0, 1)}
   836  		// Process all the calls for one time series at a time.
   837  		it := storage.NewBuffer(selRange)
   838  		for i, s := range sel.series {
   839  			points = points[:0]
   840  			it.Reset(s.Iterator())
   841  			ss := Series{
   842  				// For all range vector functions, the only change to the
   843  				// output labels is dropping the metric name so just do
   844  				// it once here.
   845  				Metric: dropMetricName(sel.series[i].Labels()),
   846  				Points: getPointSlice(numSteps),
   847  			}
   848  			inMatrix[0].Metric = sel.series[i].Labels()
   849  			for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval {
   850  				step++
   851  				// Set the non-matrix arguments.
   852  				// They are scalar, so it is safe to use the step number
   853  				// when looking up the argument, as there will be no gaps.
   854  				for j := range e.Args {
   855  					if j != matrixArgIndex {
   856  						otherInArgs[j][0].V = otherArgs[j][0].Points[step].V
   857  					}
   858  				}
   859  				maxt := ts - offset
   860  				mint := maxt - selRange
   861  				// Evaluate the matrix selector for this series for this step.
   862  				points = ev.matrixIterSlice(it, mint, maxt, points)
   863  				if len(points) == 0 {
   864  					continue
   865  				}
   866  				inMatrix[0].Points = points
   867  				enh.ts = ts
   868  				// Make the function call.
   869  				outVec := e.Func.Call(inArgs, e.Args, enh)
   870  				enh.out = outVec[:0]
   871  				if len(outVec) > 0 {
   872  					ss.Points = append(ss.Points, Point{V: outVec[0].Point.V, T: ts})
   873  				}
   874  				// Only buffer stepRange milliseconds from the second step on.
   875  				it.ReduceDelta(stepRange)
   876  			}
   877  			if len(ss.Points) > 0 {
   878  				mat = append(mat, ss)
   879  			}
   880  		}
   881  		putPointSlice(points)
   882  		return mat
   883  
   884  	case *ParenExpr:
   885  		return ev.eval(e.Expr)
   886  
   887  	case *UnaryExpr:
   888  		mat := ev.eval(e.Expr).(Matrix)
   889  		if e.Op == itemSUB {
   890  			for i := range mat {
   891  				mat[i].Metric = dropMetricName(mat[i].Metric)
   892  				for j := range mat[i].Points {
   893  					mat[i].Points[j].V = -mat[i].Points[j].V
   894  				}
   895  			}
   896  		}
   897  		return mat
   898  
   899  	case *BinaryExpr:
   900  		switch lt, rt := e.LHS.Type(), e.RHS.Type(); {
   901  		case lt == ValueTypeScalar && rt == ValueTypeScalar:
   902  			return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   903  				val := scalarBinop(e.Op, v[0].(Vector)[0].Point.V, v[1].(Vector)[0].Point.V)
   904  				return append(enh.out, Sample{Point: Point{V: val}})
   905  			}, e.LHS, e.RHS)
   906  		case lt == ValueTypeVector && rt == ValueTypeVector:
   907  			switch e.Op {
   908  			case itemLAND:
   909  				return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   910  					return ev.VectorAnd(v[0].(Vector), v[1].(Vector), e.VectorMatching, enh)
   911  				}, e.LHS, e.RHS)
   912  			case itemLOR:
   913  				return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   914  					return ev.VectorOr(v[0].(Vector), v[1].(Vector), e.VectorMatching, enh)
   915  				}, e.LHS, e.RHS)
   916  			case itemLUnless:
   917  				return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   918  					return ev.VectorUnless(v[0].(Vector), v[1].(Vector), e.VectorMatching, enh)
   919  				}, e.LHS, e.RHS)
   920  			default:
   921  				return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   922  					return ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, enh)
   923  				}, e.LHS, e.RHS)
   924  			}
   925  
   926  		case lt == ValueTypeVector && rt == ValueTypeScalar:
   927  			return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   928  				return ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].Point.V}, false, e.ReturnBool, enh)
   929  			}, e.LHS, e.RHS)
   930  
   931  		case lt == ValueTypeScalar && rt == ValueTypeVector:
   932  			return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   933  				return ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].Point.V}, true, e.ReturnBool, enh)
   934  			}, e.LHS, e.RHS)
   935  		}
   936  
   937  	case *NumberLiteral:
   938  		return ev.rangeEval(func(v []Value, enh *EvalNodeHelper) Vector {
   939  			return append(enh.out, Sample{Point: Point{V: e.Val}})
   940  		})
   941  
   942  	case *VectorSelector:
   943  		mat := make(Matrix, 0, len(e.series))
   944  		it := storage.NewBuffer(durationMilliseconds(LookbackDelta))
   945  		for i, s := range e.series {
   946  			it.Reset(s.Iterator())
   947  			ss := Series{
   948  				Metric: e.series[i].Labels(),
   949  				Points: getPointSlice(numSteps),
   950  			}
   951  
   952  			for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval {
   953  				_, v, ok := ev.vectorSelectorSingle(it, e, ts)
   954  				if ok {
   955  					ss.Points = append(ss.Points, Point{V: v, T: ts})
   956  				}
   957  			}
   958  
   959  			if len(ss.Points) > 0 {
   960  				mat = append(mat, ss)
   961  			}
   962  		}
   963  		return mat
   964  
   965  	case *MatrixSelector:
   966  		if ev.startTimestamp != ev.endTimestamp {
   967  			panic(fmt.Errorf("cannot do range evaluation of matrix selector"))
   968  		}
   969  		return ev.matrixSelector(e)
   970  	}
   971  
   972  	panic(fmt.Errorf("unhandled expression of type: %T", expr))
   973  }
   974  
   975  // vectorSelector evaluates a *VectorSelector expression.
   976  func (ev *evaluator) vectorSelector(node *VectorSelector, ts int64) Vector {
   977  	var (
   978  		vec = make(Vector, 0, len(node.series))
   979  	)
   980  
   981  	it := storage.NewBuffer(durationMilliseconds(LookbackDelta))
   982  	for i, s := range node.series {
   983  		it.Reset(s.Iterator())
   984  
   985  		t, v, ok := ev.vectorSelectorSingle(it, node, ts)
   986  		if ok {
   987  			vec = append(vec, Sample{
   988  				Metric: node.series[i].Labels(),
   989  				Point:  Point{V: v, T: t},
   990  			})
   991  		}
   992  
   993  	}
   994  	return vec
   995  }
   996  
   997  // vectorSelectorSingle evaluates a instant vector for the iterator of one time series.
   998  func (ev *evaluator) vectorSelectorSingle(it *storage.BufferedSeriesIterator, node *VectorSelector, ts int64) (int64, float64, bool) {
   999  	refTime := ts - durationMilliseconds(node.Offset)
  1000  	var t int64
  1001  	var v float64
  1002  
  1003  	ok := it.Seek(refTime)
  1004  	if !ok {
  1005  		if it.Err() != nil {
  1006  			ev.error(it.Err())
  1007  		}
  1008  	}
  1009  
  1010  	if ok {
  1011  		t, v = it.Values()
  1012  	}
  1013  
  1014  	if !ok || t > refTime {
  1015  		t, v, ok = it.PeekBack(1)
  1016  		if !ok || t < refTime-durationMilliseconds(LookbackDelta) {
  1017  			return 0, 0, false
  1018  		}
  1019  	}
  1020  	if value.IsStaleNaN(v) {
  1021  		return 0, 0, false
  1022  	}
  1023  	return t, v, true
  1024  }
  1025  
  1026  var pointPool = sync.Pool{}
  1027  
  1028  func getPointSlice(sz int) []Point {
  1029  	p := pointPool.Get()
  1030  	if p != nil {
  1031  		return p.([]Point)
  1032  	}
  1033  	return make([]Point, 0, sz)
  1034  }
  1035  
  1036  func putPointSlice(p []Point) {
  1037  	pointPool.Put(p[:0])
  1038  }
  1039  
  1040  // matrixSelector evaluates a *MatrixSelector expression.
  1041  func (ev *evaluator) matrixSelector(node *MatrixSelector) Matrix {
  1042  	var (
  1043  		offset = durationMilliseconds(node.Offset)
  1044  		maxt   = ev.startTimestamp - offset
  1045  		mint   = maxt - durationMilliseconds(node.Range)
  1046  		matrix = make(Matrix, 0, len(node.series))
  1047  	)
  1048  
  1049  	it := storage.NewBuffer(durationMilliseconds(node.Range))
  1050  	for i, s := range node.series {
  1051  		if err := contextDone(ev.ctx, "expression evaluation"); err != nil {
  1052  			ev.error(err)
  1053  		}
  1054  		it.Reset(s.Iterator())
  1055  		ss := Series{
  1056  			Metric: node.series[i].Labels(),
  1057  		}
  1058  
  1059  		ss.Points = ev.matrixIterSlice(it, mint, maxt, getPointSlice(16))
  1060  
  1061  		if len(ss.Points) > 0 {
  1062  			matrix = append(matrix, ss)
  1063  		} else {
  1064  			putPointSlice(ss.Points)
  1065  		}
  1066  	}
  1067  	return matrix
  1068  }
  1069  
  1070  // matrixIterSlice populates a matrix vector covering the requested range for a
  1071  // single time series, with points retrieved from an iterator.
  1072  //
  1073  // As an optimization, the matrix vector may already contain points of the same
  1074  // time series from the evaluation of an earlier step (with lower mint and maxt
  1075  // values). Any such points falling before mint are discarded; points that fall
  1076  // into the [mint, maxt] range are retained; only points with later timestamps
  1077  // are populated from the iterator.
  1078  func (ev *evaluator) matrixIterSlice(it *storage.BufferedSeriesIterator, mint, maxt int64, out []Point) []Point {
  1079  	if len(out) > 0 && out[len(out)-1].T >= mint {
  1080  		// There is an overlap between previous and current ranges, retain common
  1081  		// points. In most such cases:
  1082  		//   (a) the overlap is significantly larger than the eval step; and/or
  1083  		//   (b) the number of samples is relatively small.
  1084  		// so a linear search will be as fast as a binary search.
  1085  		var drop int
  1086  		for drop = 0; out[drop].T < mint; drop++ {
  1087  		}
  1088  		copy(out, out[drop:])
  1089  		out = out[:len(out)-drop]
  1090  		// Only append points with timestamps after the last timestamp we have.
  1091  		mint = out[len(out)-1].T + 1
  1092  	} else {
  1093  		out = out[:0]
  1094  	}
  1095  
  1096  	ok := it.Seek(maxt)
  1097  	if !ok {
  1098  		if it.Err() != nil {
  1099  			ev.error(it.Err())
  1100  		}
  1101  	}
  1102  
  1103  	buf := it.Buffer()
  1104  	for buf.Next() {
  1105  		t, v := buf.At()
  1106  		if value.IsStaleNaN(v) {
  1107  			continue
  1108  		}
  1109  		// Values in the buffer are guaranteed to be smaller than maxt.
  1110  		if t >= mint {
  1111  			out = append(out, Point{T: t, V: v})
  1112  		}
  1113  	}
  1114  	// The seeked sample might also be in the range.
  1115  	if ok {
  1116  		t, v := it.Values()
  1117  		if t == maxt && !value.IsStaleNaN(v) {
  1118  			out = append(out, Point{T: t, V: v})
  1119  		}
  1120  	}
  1121  	return out
  1122  }
  1123  
  1124  func (ev *evaluator) VectorAnd(lhs, rhs Vector, matching *VectorMatching, enh *EvalNodeHelper) Vector {
  1125  	if matching.Card != CardManyToMany {
  1126  		panic("set operations must only use many-to-many matching")
  1127  	}
  1128  	sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
  1129  
  1130  	// The set of signatures for the right-hand side Vector.
  1131  	rightSigs := map[uint64]struct{}{}
  1132  	// Add all rhs samples to a map so we can easily find matches later.
  1133  	for _, rs := range rhs {
  1134  		rightSigs[sigf(rs.Metric)] = struct{}{}
  1135  	}
  1136  
  1137  	for _, ls := range lhs {
  1138  		// If there's a matching entry in the right-hand side Vector, add the sample.
  1139  		if _, ok := rightSigs[sigf(ls.Metric)]; ok {
  1140  			enh.out = append(enh.out, ls)
  1141  		}
  1142  	}
  1143  	return enh.out
  1144  }
  1145  
  1146  func (ev *evaluator) VectorOr(lhs, rhs Vector, matching *VectorMatching, enh *EvalNodeHelper) Vector {
  1147  	if matching.Card != CardManyToMany {
  1148  		panic("set operations must only use many-to-many matching")
  1149  	}
  1150  	sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
  1151  
  1152  	leftSigs := map[uint64]struct{}{}
  1153  	// Add everything from the left-hand-side Vector.
  1154  	for _, ls := range lhs {
  1155  		leftSigs[sigf(ls.Metric)] = struct{}{}
  1156  		enh.out = append(enh.out, ls)
  1157  	}
  1158  	// Add all right-hand side elements which have not been added from the left-hand side.
  1159  	for _, rs := range rhs {
  1160  		if _, ok := leftSigs[sigf(rs.Metric)]; !ok {
  1161  			enh.out = append(enh.out, rs)
  1162  		}
  1163  	}
  1164  	return enh.out
  1165  }
  1166  
  1167  func (ev *evaluator) VectorUnless(lhs, rhs Vector, matching *VectorMatching, enh *EvalNodeHelper) Vector {
  1168  	if matching.Card != CardManyToMany {
  1169  		panic("set operations must only use many-to-many matching")
  1170  	}
  1171  	sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
  1172  
  1173  	rightSigs := map[uint64]struct{}{}
  1174  	for _, rs := range rhs {
  1175  		rightSigs[sigf(rs.Metric)] = struct{}{}
  1176  	}
  1177  
  1178  	for _, ls := range lhs {
  1179  		if _, ok := rightSigs[sigf(ls.Metric)]; !ok {
  1180  			enh.out = append(enh.out, ls)
  1181  		}
  1182  	}
  1183  	return enh.out
  1184  }
  1185  
  1186  // VectorBinop evaluates a binary operation between two Vectors, excluding set operators.
  1187  func (ev *evaluator) VectorBinop(op ItemType, lhs, rhs Vector, matching *VectorMatching, returnBool bool, enh *EvalNodeHelper) Vector {
  1188  	if matching.Card == CardManyToMany {
  1189  		panic("many-to-many only allowed for set operators")
  1190  	}
  1191  	sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...)
  1192  
  1193  	// The control flow below handles one-to-one or many-to-one matching.
  1194  	// For one-to-many, swap sidedness and account for the swap when calculating
  1195  	// values.
  1196  	if matching.Card == CardOneToMany {
  1197  		lhs, rhs = rhs, lhs
  1198  	}
  1199  
  1200  	// All samples from the rhs hashed by the matching label/values.
  1201  	if enh.rightSigs == nil {
  1202  		enh.rightSigs = make(map[uint64]Sample, len(enh.out))
  1203  	} else {
  1204  		for k := range enh.rightSigs {
  1205  			delete(enh.rightSigs, k)
  1206  		}
  1207  	}
  1208  	rightSigs := enh.rightSigs
  1209  
  1210  	// Add all rhs samples to a map so we can easily find matches later.
  1211  	for _, rs := range rhs {
  1212  		sig := sigf(rs.Metric)
  1213  		// The rhs is guaranteed to be the 'one' side. Having multiple samples
  1214  		// with the same signature means that the matching is many-to-many.
  1215  		if _, found := rightSigs[sig]; found {
  1216  			// Many-to-many matching not allowed.
  1217  			ev.errorf("many-to-many matching not allowed: matching labels must be unique on one side")
  1218  		}
  1219  		rightSigs[sig] = rs
  1220  	}
  1221  
  1222  	// Tracks the match-signature. For one-to-one operations the value is nil. For many-to-one
  1223  	// the value is a set of signatures to detect duplicated result elements.
  1224  	if enh.matchedSigs == nil {
  1225  		enh.matchedSigs = make(map[uint64]map[uint64]struct{}, len(rightSigs))
  1226  	} else {
  1227  		for k := range enh.matchedSigs {
  1228  			delete(enh.matchedSigs, k)
  1229  		}
  1230  	}
  1231  	matchedSigs := enh.matchedSigs
  1232  
  1233  	// For all lhs samples find a respective rhs sample and perform
  1234  	// the binary operation.
  1235  	for _, ls := range lhs {
  1236  		sig := sigf(ls.Metric)
  1237  
  1238  		rs, found := rightSigs[sig] // Look for a match in the rhs Vector.
  1239  		if !found {
  1240  			continue
  1241  		}
  1242  
  1243  		// Account for potentially swapped sidedness.
  1244  		vl, vr := ls.V, rs.V
  1245  		if matching.Card == CardOneToMany {
  1246  			vl, vr = vr, vl
  1247  		}
  1248  		value, keep := vectorElemBinop(op, vl, vr)
  1249  		if returnBool {
  1250  			if keep {
  1251  				value = 1.0
  1252  			} else {
  1253  				value = 0.0
  1254  			}
  1255  		} else if !keep {
  1256  			continue
  1257  		}
  1258  		metric := resultMetric(ls.Metric, rs.Metric, op, matching, enh)
  1259  
  1260  		insertedSigs, exists := matchedSigs[sig]
  1261  		if matching.Card == CardOneToOne {
  1262  			if exists {
  1263  				ev.errorf("multiple matches for labels: many-to-one matching must be explicit (group_left/group_right)")
  1264  			}
  1265  			matchedSigs[sig] = nil // Set existence to true.
  1266  		} else {
  1267  			// In many-to-one matching the grouping labels have to ensure a unique metric
  1268  			// for the result Vector. Check whether those labels have already been added for
  1269  			// the same matching labels.
  1270  			insertSig := metric.Hash()
  1271  
  1272  			if !exists {
  1273  				insertedSigs = map[uint64]struct{}{}
  1274  				matchedSigs[sig] = insertedSigs
  1275  			} else if _, duplicate := insertedSigs[insertSig]; duplicate {
  1276  				ev.errorf("multiple matches for labels: grouping labels must ensure unique matches")
  1277  			}
  1278  			insertedSigs[insertSig] = struct{}{}
  1279  		}
  1280  
  1281  		enh.out = append(enh.out, Sample{
  1282  			Metric: metric,
  1283  			Point:  Point{V: value},
  1284  		})
  1285  	}
  1286  	return enh.out
  1287  }
  1288  
  1289  // signatureFunc returns a function that calculates the signature for a metric
  1290  // ignoring the provided labels. If on, then the given labels are only used instead.
  1291  func signatureFunc(on bool, names ...string) func(labels.Labels) uint64 {
  1292  	sort.Strings(names)
  1293  	if on {
  1294  		return func(lset labels.Labels) uint64 {
  1295  			h, _ := lset.HashForLabels(make([]byte, 0, 1024), names...)
  1296  			return h
  1297  		}
  1298  	}
  1299  	return func(lset labels.Labels) uint64 {
  1300  		h, _ := lset.HashWithoutLabels(make([]byte, 0, 1024), names...)
  1301  		return h
  1302  	}
  1303  }
  1304  
  1305  // resultMetric returns the metric for the given sample(s) based on the Vector
  1306  // binary operation and the matching options.
  1307  func resultMetric(lhs, rhs labels.Labels, op ItemType, matching *VectorMatching, enh *EvalNodeHelper) labels.Labels {
  1308  	if enh.resultMetric == nil {
  1309  		enh.resultMetric = make(map[uint64]labels.Labels, len(enh.out))
  1310  	}
  1311  	// op and matching are always the same for a given node, so
  1312  	// there's no need to include them in the hash key.
  1313  	// If the lhs and rhs are the same then the xor would be 0,
  1314  	// so add in one side to protect against that.
  1315  	lh := lhs.Hash()
  1316  	h := (lh ^ rhs.Hash()) + lh
  1317  	if ret, ok := enh.resultMetric[h]; ok {
  1318  		return ret
  1319  	}
  1320  
  1321  	lb := labels.NewBuilder(lhs)
  1322  
  1323  	if shouldDropMetricName(op) {
  1324  		lb.Del(labels.MetricName)
  1325  	}
  1326  
  1327  	if matching.Card == CardOneToOne {
  1328  		if matching.On {
  1329  		Outer:
  1330  			for _, l := range lhs {
  1331  				for _, n := range matching.MatchingLabels {
  1332  					if l.Name == n {
  1333  						continue Outer
  1334  					}
  1335  				}
  1336  				lb.Del(l.Name)
  1337  			}
  1338  		} else {
  1339  			lb.Del(matching.MatchingLabels...)
  1340  		}
  1341  	}
  1342  	for _, ln := range matching.Include {
  1343  		// Included labels from the `group_x` modifier are taken from the "one"-side.
  1344  		if v := rhs.Get(ln); v != "" {
  1345  			lb.Set(ln, v)
  1346  		} else {
  1347  			lb.Del(ln)
  1348  		}
  1349  	}
  1350  
  1351  	ret := lb.Labels()
  1352  	enh.resultMetric[h] = ret
  1353  	return ret
  1354  }
  1355  
  1356  // VectorscalarBinop evaluates a binary operation between a Vector and a Scalar.
  1357  func (ev *evaluator) VectorscalarBinop(op ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper) Vector {
  1358  	for _, lhsSample := range lhs {
  1359  		lv, rv := lhsSample.V, rhs.V
  1360  		// lhs always contains the Vector. If the original position was different
  1361  		// swap for calculating the value.
  1362  		if swap {
  1363  			lv, rv = rv, lv
  1364  		}
  1365  		value, keep := vectorElemBinop(op, lv, rv)
  1366  		if returnBool {
  1367  			if keep {
  1368  				value = 1.0
  1369  			} else {
  1370  				value = 0.0
  1371  			}
  1372  			keep = true
  1373  		}
  1374  		if keep {
  1375  			lhsSample.V = value
  1376  			if shouldDropMetricName(op) || returnBool {
  1377  				lhsSample.Metric = enh.dropMetricName(lhsSample.Metric)
  1378  			}
  1379  			enh.out = append(enh.out, lhsSample)
  1380  		}
  1381  	}
  1382  	return enh.out
  1383  }
  1384  
  1385  func dropMetricName(l labels.Labels) labels.Labels {
  1386  	return labels.NewBuilder(l).Del(labels.MetricName).Labels()
  1387  }
  1388  
  1389  // scalarBinop evaluates a binary operation between two Scalars.
  1390  func scalarBinop(op ItemType, lhs, rhs float64) float64 {
  1391  	switch op {
  1392  	case itemADD:
  1393  		return lhs + rhs
  1394  	case itemSUB:
  1395  		return lhs - rhs
  1396  	case itemMUL:
  1397  		return lhs * rhs
  1398  	case itemDIV:
  1399  		return lhs / rhs
  1400  	case itemPOW:
  1401  		return math.Pow(lhs, rhs)
  1402  	case itemMOD:
  1403  		return math.Mod(lhs, rhs)
  1404  	case itemEQL:
  1405  		return btos(lhs == rhs)
  1406  	case itemNEQ:
  1407  		return btos(lhs != rhs)
  1408  	case itemGTR:
  1409  		return btos(lhs > rhs)
  1410  	case itemLSS:
  1411  		return btos(lhs < rhs)
  1412  	case itemGTE:
  1413  		return btos(lhs >= rhs)
  1414  	case itemLTE:
  1415  		return btos(lhs <= rhs)
  1416  	}
  1417  	panic(fmt.Errorf("operator %q not allowed for Scalar operations", op))
  1418  }
  1419  
  1420  // vectorElemBinop evaluates a binary operation between two Vector elements.
  1421  func vectorElemBinop(op ItemType, lhs, rhs float64) (float64, bool) {
  1422  	switch op {
  1423  	case itemADD:
  1424  		return lhs + rhs, true
  1425  	case itemSUB:
  1426  		return lhs - rhs, true
  1427  	case itemMUL:
  1428  		return lhs * rhs, true
  1429  	case itemDIV:
  1430  		return lhs / rhs, true
  1431  	case itemPOW:
  1432  		return math.Pow(lhs, rhs), true
  1433  	case itemMOD:
  1434  		return math.Mod(lhs, rhs), true
  1435  	case itemEQL:
  1436  		return lhs, lhs == rhs
  1437  	case itemNEQ:
  1438  		return lhs, lhs != rhs
  1439  	case itemGTR:
  1440  		return lhs, lhs > rhs
  1441  	case itemLSS:
  1442  		return lhs, lhs < rhs
  1443  	case itemGTE:
  1444  		return lhs, lhs >= rhs
  1445  	case itemLTE:
  1446  		return lhs, lhs <= rhs
  1447  	}
  1448  	panic(fmt.Errorf("operator %q not allowed for operations between Vectors", op))
  1449  }
  1450  
  1451  // intersection returns the metric of common label/value pairs of two input metrics.
  1452  func intersection(ls1, ls2 labels.Labels) labels.Labels {
  1453  	res := make(labels.Labels, 0, 5)
  1454  
  1455  	for _, l1 := range ls1 {
  1456  		for _, l2 := range ls2 {
  1457  			if l1.Name == l2.Name && l1.Value == l2.Value {
  1458  				res = append(res, l1)
  1459  				continue
  1460  			}
  1461  		}
  1462  	}
  1463  	return res
  1464  }
  1465  
  1466  type groupedAggregation struct {
  1467  	labels           labels.Labels
  1468  	value            float64
  1469  	valuesSquaredSum float64
  1470  	groupCount       int
  1471  	heap             vectorByValueHeap
  1472  	reverseHeap      vectorByReverseValueHeap
  1473  }
  1474  
  1475  // aggregation evaluates an aggregation operation on a Vector.
  1476  func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, param interface{}, vec Vector, enh *EvalNodeHelper) Vector {
  1477  
  1478  	result := map[uint64]*groupedAggregation{}
  1479  	var k int64
  1480  	if op == itemTopK || op == itemBottomK {
  1481  		f := param.(float64)
  1482  		if !convertibleToInt64(f) {
  1483  			ev.errorf("Scalar value %v overflows int64", f)
  1484  		}
  1485  		k = int64(f)
  1486  		if k < 1 {
  1487  			return Vector{}
  1488  		}
  1489  	}
  1490  	var q float64
  1491  	if op == itemQuantile {
  1492  		q = param.(float64)
  1493  	}
  1494  	var valueLabel string
  1495  	if op == itemCountValues {
  1496  		valueLabel = param.(string)
  1497  		if !without {
  1498  			grouping = append(grouping, valueLabel)
  1499  		}
  1500  	}
  1501  
  1502  	sort.Strings(grouping)
  1503  	buf := make([]byte, 0, 1024)
  1504  	for _, s := range vec {
  1505  		metric := s.Metric
  1506  
  1507  		if op == itemCountValues {
  1508  			lb := labels.NewBuilder(metric)
  1509  			lb.Set(valueLabel, strconv.FormatFloat(s.V, 'f', -1, 64))
  1510  			metric = lb.Labels()
  1511  		}
  1512  
  1513  		var (
  1514  			groupingKey uint64
  1515  		)
  1516  		if without {
  1517  			groupingKey, buf = metric.HashWithoutLabels(buf, grouping...)
  1518  		} else {
  1519  			groupingKey, buf = metric.HashForLabels(buf, grouping...)
  1520  		}
  1521  
  1522  		group, ok := result[groupingKey]
  1523  		// Add a new group if it doesn't exist.
  1524  		if !ok {
  1525  			var m labels.Labels
  1526  
  1527  			if without {
  1528  				lb := labels.NewBuilder(metric)
  1529  				lb.Del(grouping...)
  1530  				lb.Del(labels.MetricName)
  1531  				m = lb.Labels()
  1532  			} else {
  1533  				m = make(labels.Labels, 0, len(grouping))
  1534  				for _, l := range metric {
  1535  					for _, n := range grouping {
  1536  						if l.Name == n {
  1537  							m = append(m, l)
  1538  							break
  1539  						}
  1540  					}
  1541  				}
  1542  				sort.Sort(m)
  1543  			}
  1544  			result[groupingKey] = &groupedAggregation{
  1545  				labels:           m,
  1546  				value:            s.V,
  1547  				valuesSquaredSum: s.V * s.V,
  1548  				groupCount:       1,
  1549  			}
  1550  			inputVecLen := int64(len(vec))
  1551  			resultSize := k
  1552  			if k > inputVecLen {
  1553  				resultSize = inputVecLen
  1554  			}
  1555  			if op == itemTopK || op == itemQuantile {
  1556  				result[groupingKey].heap = make(vectorByValueHeap, 0, resultSize)
  1557  				heap.Push(&result[groupingKey].heap, &Sample{
  1558  					Point:  Point{V: s.V},
  1559  					Metric: s.Metric,
  1560  				})
  1561  			} else if op == itemBottomK {
  1562  				result[groupingKey].reverseHeap = make(vectorByReverseValueHeap, 0, resultSize)
  1563  				heap.Push(&result[groupingKey].reverseHeap, &Sample{
  1564  					Point:  Point{V: s.V},
  1565  					Metric: s.Metric,
  1566  				})
  1567  			}
  1568  			continue
  1569  		}
  1570  
  1571  		switch op {
  1572  		case itemSum:
  1573  			group.value += s.V
  1574  
  1575  		case itemAvg:
  1576  			group.value += s.V
  1577  			group.groupCount++
  1578  
  1579  		case itemMax:
  1580  			if group.value < s.V || math.IsNaN(group.value) {
  1581  				group.value = s.V
  1582  			}
  1583  
  1584  		case itemMin:
  1585  			if group.value > s.V || math.IsNaN(group.value) {
  1586  				group.value = s.V
  1587  			}
  1588  
  1589  		case itemCount, itemCountValues:
  1590  			group.groupCount++
  1591  
  1592  		case itemStdvar, itemStddev:
  1593  			group.value += s.V
  1594  			group.valuesSquaredSum += s.V * s.V
  1595  			group.groupCount++
  1596  
  1597  		case itemTopK:
  1598  			if int64(len(group.heap)) < k || group.heap[0].V < s.V || math.IsNaN(group.heap[0].V) {
  1599  				if int64(len(group.heap)) == k {
  1600  					heap.Pop(&group.heap)
  1601  				}
  1602  				heap.Push(&group.heap, &Sample{
  1603  					Point:  Point{V: s.V},
  1604  					Metric: s.Metric,
  1605  				})
  1606  			}
  1607  
  1608  		case itemBottomK:
  1609  			if int64(len(group.reverseHeap)) < k || group.reverseHeap[0].V > s.V || math.IsNaN(group.reverseHeap[0].V) {
  1610  				if int64(len(group.reverseHeap)) == k {
  1611  					heap.Pop(&group.reverseHeap)
  1612  				}
  1613  				heap.Push(&group.reverseHeap, &Sample{
  1614  					Point:  Point{V: s.V},
  1615  					Metric: s.Metric,
  1616  				})
  1617  			}
  1618  
  1619  		case itemQuantile:
  1620  			group.heap = append(group.heap, s)
  1621  
  1622  		default:
  1623  			panic(fmt.Errorf("expected aggregation operator but got %q", op))
  1624  		}
  1625  	}
  1626  
  1627  	// Construct the result Vector from the aggregated groups.
  1628  	for _, aggr := range result {
  1629  		switch op {
  1630  		case itemAvg:
  1631  			aggr.value = aggr.value / float64(aggr.groupCount)
  1632  
  1633  		case itemCount, itemCountValues:
  1634  			aggr.value = float64(aggr.groupCount)
  1635  
  1636  		case itemStdvar:
  1637  			avg := aggr.value / float64(aggr.groupCount)
  1638  			aggr.value = aggr.valuesSquaredSum/float64(aggr.groupCount) - avg*avg
  1639  
  1640  		case itemStddev:
  1641  			avg := aggr.value / float64(aggr.groupCount)
  1642  			aggr.value = math.Sqrt(aggr.valuesSquaredSum/float64(aggr.groupCount) - avg*avg)
  1643  
  1644  		case itemTopK:
  1645  			// The heap keeps the lowest value on top, so reverse it.
  1646  			sort.Sort(sort.Reverse(aggr.heap))
  1647  			for _, v := range aggr.heap {
  1648  				enh.out = append(enh.out, Sample{
  1649  					Metric: v.Metric,
  1650  					Point:  Point{V: v.V},
  1651  				})
  1652  			}
  1653  			continue // Bypass default append.
  1654  
  1655  		case itemBottomK:
  1656  			// The heap keeps the lowest value on top, so reverse it.
  1657  			sort.Sort(sort.Reverse(aggr.reverseHeap))
  1658  			for _, v := range aggr.reverseHeap {
  1659  				enh.out = append(enh.out, Sample{
  1660  					Metric: v.Metric,
  1661  					Point:  Point{V: v.V},
  1662  				})
  1663  			}
  1664  			continue // Bypass default append.
  1665  
  1666  		case itemQuantile:
  1667  			aggr.value = quantile(q, aggr.heap)
  1668  
  1669  		default:
  1670  			// For other aggregations, we already have the right value.
  1671  		}
  1672  
  1673  		enh.out = append(enh.out, Sample{
  1674  			Metric: aggr.labels,
  1675  			Point:  Point{V: aggr.value},
  1676  		})
  1677  	}
  1678  	return enh.out
  1679  }
  1680  
  1681  // btos returns 1 if b is true, 0 otherwise.
  1682  func btos(b bool) float64 {
  1683  	if b {
  1684  		return 1
  1685  	}
  1686  	return 0
  1687  }
  1688  
  1689  // shouldDropMetricName returns whether the metric name should be dropped in the
  1690  // result of the op operation.
  1691  func shouldDropMetricName(op ItemType) bool {
  1692  	switch op {
  1693  	case itemADD, itemSUB, itemDIV, itemMUL, itemMOD:
  1694  		return true
  1695  	default:
  1696  		return false
  1697  	}
  1698  }
  1699  
  1700  // LookbackDelta determines the time since the last sample after which a time
  1701  // series is considered stale.
  1702  var LookbackDelta = 5 * time.Minute
  1703  
  1704  // A queryGate controls the maximum number of concurrently running and waiting queries.
  1705  type queryGate struct {
  1706  	ch chan struct{}
  1707  }
  1708  
  1709  // newQueryGate returns a query gate that limits the number of queries
  1710  // being concurrently executed.
  1711  func newQueryGate(length int) *queryGate {
  1712  	return &queryGate{
  1713  		ch: make(chan struct{}, length),
  1714  	}
  1715  }
  1716  
  1717  // Start blocks until the gate has a free spot or the context is done.
  1718  func (g *queryGate) Start(ctx context.Context) error {
  1719  	select {
  1720  	case <-ctx.Done():
  1721  		return contextDone(ctx, "query queue")
  1722  	case g.ch <- struct{}{}:
  1723  		return nil
  1724  	}
  1725  }
  1726  
  1727  // Done releases a single spot in the gate.
  1728  func (g *queryGate) Done() {
  1729  	select {
  1730  	case <-g.ch:
  1731  	default:
  1732  		panic("engine.queryGate.Done: more operations done than started")
  1733  	}
  1734  }
  1735  
  1736  // documentedType returns the internal type to the equivalent
  1737  // user facing terminology as defined in the documentation.
  1738  func documentedType(t ValueType) string {
  1739  	switch t {
  1740  	case "vector":
  1741  		return "instant vector"
  1742  	case "matrix":
  1743  		return "range vector"
  1744  	default:
  1745  		return string(t)
  1746  	}
  1747  }