github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/query.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package ts
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math"
    17  	"sort"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/kv"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/ts/tspb"
    23  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  // timeSeriesSpan represents a queryed time span for a single time series. This
    28  // is reprented as an ordered slice of data slabs, where each slab contains
    29  // samples.
    30  type timeSeriesSpan []roachpb.InternalTimeSeriesData
    31  
    32  // timeSeriesSpanIterator is used to iterate over a timeSeriesSpan. An iterator
    33  // is helpful because a multi-level index is required to iterate over the structure.
    34  type timeSeriesSpanIterator struct {
    35  	span      timeSeriesSpan
    36  	total     int
    37  	outer     int
    38  	inner     int
    39  	timestamp int64
    40  	length    int
    41  }
    42  
    43  // makeTimeSeriesSpanIterator constructs a new iterator for the supplied
    44  // timeSeriesSpan, initialized at index 0.
    45  func makeTimeSeriesSpanIterator(span timeSeriesSpan) timeSeriesSpanIterator {
    46  	iterator := timeSeriesSpanIterator{
    47  		span: span,
    48  	}
    49  	iterator.computeLength()
    50  	iterator.computeTimestamp()
    51  	return iterator
    52  }
    53  
    54  // computeLength recomputes the total length of the span.
    55  func (tsi *timeSeriesSpanIterator) computeLength() {
    56  	tsi.length = 0
    57  	for _, data := range tsi.span {
    58  		tsi.length += data.SampleCount()
    59  	}
    60  }
    61  
    62  // computeTimestamp computes the timestamp of the sample at the current index.
    63  // It is automatically called internally whenever the iterator is moved.
    64  func (tsi *timeSeriesSpanIterator) computeTimestamp() {
    65  	if !tsi.isValid() {
    66  		tsi.timestamp = 0
    67  		return
    68  	}
    69  	data := tsi.span[tsi.outer]
    70  	tsi.timestamp = data.StartTimestampNanos + data.SampleDurationNanos*int64(tsi.offset())
    71  }
    72  
    73  // forward moves the iterator forward one sample. The maximum index is equal
    74  // to the length of the span, which is one index beyond the last sample.
    75  func (tsi *timeSeriesSpanIterator) forward() {
    76  	if !tsi.isValid() {
    77  		return
    78  	}
    79  	tsi.total++
    80  	tsi.inner++
    81  	if tsi.inner >= tsi.span[tsi.outer].SampleCount() {
    82  		tsi.inner = 0
    83  		tsi.outer++
    84  	}
    85  	tsi.computeTimestamp()
    86  }
    87  
    88  // backward moves the iterator back one sample. The iterator can not be moved
    89  // earlier than the first index.
    90  func (tsi *timeSeriesSpanIterator) backward() {
    91  	if tsi.outer == 0 && tsi.inner == 0 {
    92  		return
    93  	}
    94  	tsi.total--
    95  	if tsi.inner == 0 {
    96  		tsi.outer--
    97  		tsi.inner = tsi.span[tsi.outer].SampleCount() - 1
    98  	} else {
    99  		tsi.inner--
   100  	}
   101  	tsi.computeTimestamp()
   102  }
   103  
   104  // seekIndex sets the iterator to the supplied index in the span. The index
   105  // cannot be set greater than the length of the span or less than zero.
   106  func (tsi *timeSeriesSpanIterator) seekIndex(index int) {
   107  	if index >= tsi.length {
   108  		tsi.total = tsi.length
   109  		tsi.inner = 0
   110  		tsi.outer = len(tsi.span)
   111  		tsi.timestamp = 0
   112  		return
   113  	}
   114  
   115  	if index < 0 {
   116  		index = 0
   117  	}
   118  
   119  	remaining := index
   120  	newOuter := 0
   121  	for len(tsi.span) > newOuter && remaining >= tsi.span[newOuter].SampleCount() {
   122  		remaining -= tsi.span[newOuter].SampleCount()
   123  		newOuter++
   124  	}
   125  	tsi.inner = remaining
   126  	tsi.outer = newOuter
   127  	tsi.total = index
   128  	tsi.computeTimestamp()
   129  }
   130  
   131  // seekTimestamp sets the iterator to the earliest sample index with a timestamp
   132  // greater than or equal to the supplied timestamp.
   133  func (tsi *timeSeriesSpanIterator) seekTimestamp(timestamp int64) {
   134  	seeker := *tsi
   135  	index := sort.Search(tsi.length, func(i int) bool {
   136  		seeker.seekIndex(i)
   137  		return seeker.timestamp >= timestamp
   138  	})
   139  	tsi.seekIndex(index)
   140  }
   141  
   142  func (tsi *timeSeriesSpanIterator) isColumnar() bool {
   143  	return tsi.span[tsi.outer].IsColumnar()
   144  }
   145  
   146  func (tsi *timeSeriesSpanIterator) isRollup() bool {
   147  	return tsi.span[tsi.outer].IsRollup()
   148  }
   149  
   150  func (tsi *timeSeriesSpanIterator) offset() int32 {
   151  	data := tsi.span[tsi.outer]
   152  	if tsi.isColumnar() {
   153  		return data.Offset[tsi.inner]
   154  	}
   155  	return data.Samples[tsi.inner].Offset
   156  }
   157  
   158  func (tsi *timeSeriesSpanIterator) count() uint32 {
   159  	data := tsi.span[tsi.outer]
   160  	if tsi.isColumnar() {
   161  		if tsi.isRollup() {
   162  			return data.Count[tsi.inner]
   163  		}
   164  		return 1
   165  	}
   166  	return data.Samples[tsi.inner].Count
   167  }
   168  
   169  func (tsi *timeSeriesSpanIterator) sum() float64 {
   170  	data := tsi.span[tsi.outer]
   171  	if tsi.isColumnar() {
   172  		if tsi.isRollup() {
   173  			return data.Sum[tsi.inner]
   174  		}
   175  		return data.Last[tsi.inner]
   176  	}
   177  	return data.Samples[tsi.inner].Sum
   178  }
   179  
   180  func (tsi *timeSeriesSpanIterator) max() float64 {
   181  	data := tsi.span[tsi.outer]
   182  	if tsi.isColumnar() {
   183  		if tsi.isRollup() {
   184  			return data.Max[tsi.inner]
   185  		}
   186  		return data.Last[tsi.inner]
   187  	}
   188  	if max := data.Samples[tsi.inner].Max; max != nil {
   189  		return *max
   190  	}
   191  	return data.Samples[tsi.inner].Sum
   192  }
   193  
   194  func (tsi *timeSeriesSpanIterator) min() float64 {
   195  	data := tsi.span[tsi.outer]
   196  	if tsi.isColumnar() {
   197  		if tsi.isRollup() {
   198  			return data.Min[tsi.inner]
   199  		}
   200  		return data.Last[tsi.inner]
   201  	}
   202  	if min := data.Samples[tsi.inner].Min; min != nil {
   203  		return *min
   204  	}
   205  	return data.Samples[tsi.inner].Sum
   206  }
   207  
   208  func (tsi *timeSeriesSpanIterator) first() float64 {
   209  	data := tsi.span[tsi.outer]
   210  	if tsi.isColumnar() {
   211  		if tsi.isRollup() {
   212  			return data.First[tsi.inner]
   213  		}
   214  		return data.Last[tsi.inner]
   215  	}
   216  
   217  	// First was not recorded in the planned row-format rollups, but since these
   218  	// rollups were never actually generated we can safely use sum.
   219  	return data.Samples[tsi.inner].Sum
   220  }
   221  
   222  func (tsi *timeSeriesSpanIterator) last() float64 {
   223  	data := tsi.span[tsi.outer]
   224  	if tsi.isColumnar() {
   225  		return data.Last[tsi.inner]
   226  	}
   227  
   228  	// Last was not recorded in the planned row-format rollups, but since these
   229  	// rollups were never actually generated we can safely use sum.
   230  	return data.Samples[tsi.inner].Sum
   231  }
   232  
   233  func (tsi *timeSeriesSpanIterator) variance() float64 {
   234  	data := tsi.span[tsi.outer]
   235  	if tsi.isColumnar() {
   236  		if tsi.isRollup() {
   237  			return data.Variance[tsi.inner]
   238  		}
   239  		return 0
   240  	}
   241  
   242  	// Variance was not recorded in the planned row-format rollups, but since
   243  	// these rollups were never actually generated we can safely return 0.
   244  	return 0
   245  }
   246  
   247  func (tsi *timeSeriesSpanIterator) average() float64 {
   248  	return tsi.sum() / float64(tsi.count())
   249  }
   250  
   251  func (tsi *timeSeriesSpanIterator) setOffset(value int32) {
   252  	data := tsi.span[tsi.outer]
   253  	if tsi.isColumnar() {
   254  		data.Offset[tsi.inner] = value
   255  		return
   256  	}
   257  	data.Samples[tsi.inner].Offset = value
   258  }
   259  
   260  func (tsi *timeSeriesSpanIterator) setSingleValue(value float64) {
   261  	data := tsi.span[tsi.outer]
   262  	if tsi.isColumnar() {
   263  		data.Last[tsi.inner] = value
   264  		return
   265  	}
   266  	data.Samples[tsi.inner].Sum = value
   267  	data.Samples[tsi.inner].Count = 1
   268  	data.Samples[tsi.inner].Min = nil
   269  	data.Samples[tsi.inner].Max = nil
   270  }
   271  
   272  // truncateSpan truncates the span underlying this iterator to the current
   273  // iterator, *not including* the current position. That is, the logical
   274  // underlying span is truncated to [0, current).
   275  func (tsi *timeSeriesSpanIterator) truncateSpan() {
   276  	var outerExtent int
   277  	if tsi.inner == 0 {
   278  		outerExtent = tsi.outer
   279  	} else {
   280  		outerExtent = tsi.outer + 1
   281  	}
   282  
   283  	// Reclaim memory from unused slabs.
   284  	unused := tsi.span[outerExtent:]
   285  	tsi.span = tsi.span[:outerExtent]
   286  	for i := range unused {
   287  		unused[i] = roachpb.InternalTimeSeriesData{}
   288  	}
   289  
   290  	if tsi.inner != 0 {
   291  		data := tsi.span[tsi.outer]
   292  		size := tsi.inner
   293  		if data.IsColumnar() {
   294  			data.Offset = data.Offset[:size]
   295  			data.Last = data.Last[:size]
   296  			if data.IsRollup() {
   297  				data.First = data.First[:size]
   298  				data.Min = data.Min[:size]
   299  				data.Max = data.Max[:size]
   300  				data.Count = data.Count[:size]
   301  				data.Sum = data.Sum[:size]
   302  				data.Variance = data.Variance[:size]
   303  			}
   304  		} else {
   305  			data.Samples = data.Samples[:size]
   306  		}
   307  		tsi.span[tsi.outer] = data
   308  	}
   309  
   310  	tsi.computeLength()
   311  	tsi.computeTimestamp()
   312  }
   313  
   314  // Convert the underlying span to single-valued by removing all optional columns
   315  // from any columnar spans.
   316  func convertToSingleValue(span timeSeriesSpan) {
   317  	for i := range span {
   318  		if span[i].IsColumnar() {
   319  			span[i].Count = nil
   320  			span[i].Sum = nil
   321  			span[i].Min = nil
   322  			span[i].Max = nil
   323  			span[i].First = nil
   324  			span[i].Variance = nil
   325  		}
   326  	}
   327  }
   328  
   329  // value returns the value of the sample at the iterators index, according to
   330  // the provided downsampler operation.
   331  func (tsi *timeSeriesSpanIterator) value(downsampler tspb.TimeSeriesQueryAggregator) float64 {
   332  	if !tsi.isValid() {
   333  		return 0
   334  	}
   335  	switch downsampler {
   336  	case tspb.TimeSeriesQueryAggregator_AVG:
   337  		return tsi.sum() / float64(tsi.count())
   338  	case tspb.TimeSeriesQueryAggregator_MAX:
   339  		return tsi.max()
   340  	case tspb.TimeSeriesQueryAggregator_MIN:
   341  		return tsi.min()
   342  	case tspb.TimeSeriesQueryAggregator_SUM:
   343  		return tsi.sum()
   344  	}
   345  
   346  	panic(fmt.Sprintf("unknown downsampler option encountered: %v", downsampler))
   347  }
   348  
   349  // valueAtTimestamp returns the value of the span at the provided timestamp,
   350  // according to the current position of the iterator. If the provided timestamp
   351  // is not exactly equal to the iterator's current timestamp, but is in between
   352  // the iterator's timestamp and the previous timestamp, then the value is
   353  // interpolated using linear interpolation.
   354  //
   355  // However, a maximum interpolation limit is passed - if the distance between
   356  // the current timestamp and the previous timestamp is greater than this limit,
   357  // then interpolation will not be attempted.
   358  func (tsi *timeSeriesSpanIterator) valueAtTimestamp(
   359  	timestamp int64, interpolationLimitNanos int64, downsampler tspb.TimeSeriesQueryAggregator,
   360  ) (float64, bool) {
   361  	if !tsi.validAtTimestamp(timestamp, interpolationLimitNanos) {
   362  		return 0, false
   363  	}
   364  	if tsi.timestamp == timestamp {
   365  		return tsi.value(downsampler), true
   366  	}
   367  
   368  	deriv, valid := tsi.derivative(downsampler)
   369  	if !valid {
   370  		return 0, false
   371  	}
   372  	return tsi.value(downsampler) - deriv*float64((tsi.timestamp-timestamp)/tsi.samplePeriod()), true
   373  }
   374  
   375  // validAtTimestamp returns true if the iterator can return a valid value for
   376  // the provided timestamp. This is true either if the iterators current position
   377  // is the current timestamp, *or* if the provided timestamp is between the
   378  // iterators current and previous positions *and* the gap between the current
   379  // and previous positions is less than the provided interpolation limit.
   380  func (tsi *timeSeriesSpanIterator) validAtTimestamp(timestamp, interpolationLimitNanos int64) bool {
   381  	if !tsi.isValid() {
   382  		return false
   383  	}
   384  	if tsi.timestamp == timestamp {
   385  		return true
   386  	}
   387  	// Cannot interpolate before the first index.
   388  	if tsi.total == 0 {
   389  		return false
   390  	}
   391  	prev := *tsi
   392  	prev.backward()
   393  
   394  	// Only interpolate if the timestamp is in between this point and the previous.
   395  	if timestamp > tsi.timestamp || timestamp <= prev.timestamp {
   396  		return false
   397  	}
   398  	// Respect the interpolation limit. Note that an interpolation limit of zero
   399  	// is a special case still needed for legacy tests.
   400  	// TODO(mrtracy): remove test cases with interpolation limit zero.
   401  	if interpolationLimitNanos > 0 && tsi.timestamp-prev.timestamp > interpolationLimitNanos {
   402  		return false
   403  	}
   404  	return true
   405  }
   406  
   407  // derivative returns the current rate of change of the iterator, computed by
   408  // considering the value at the current position and the value at the previous
   409  // position of the iterator. The derivative is expressed per sample period.
   410  func (tsi *timeSeriesSpanIterator) derivative(
   411  	downsampler tspb.TimeSeriesQueryAggregator,
   412  ) (float64, bool) {
   413  	if !tsi.isValid() {
   414  		return 0, false
   415  	}
   416  
   417  	// Cannot compute rate of change for the first index.
   418  	if tsi.total == 0 {
   419  		return 0, false
   420  	}
   421  
   422  	prev := *tsi
   423  	prev.backward()
   424  	rateOfChange := (tsi.value(downsampler) - prev.value(downsampler)) / float64((tsi.timestamp-prev.timestamp)/tsi.samplePeriod())
   425  	return rateOfChange, true
   426  }
   427  
   428  // samplePeriod returns the sample period duration for this iterator.
   429  func (tsi *timeSeriesSpanIterator) samplePeriod() int64 {
   430  	return tsi.span[0].SampleDurationNanos
   431  }
   432  
   433  // isValid returns true if the iterator currently points to a valid sample.
   434  func (tsi *timeSeriesSpanIterator) isValid() bool {
   435  	return tsi.total < tsi.length
   436  }
   437  
   438  // Query processes the supplied query over the supplied timespan and on-disk
   439  // resolution, while respecting the provided limitations on memory usage.
   440  func (db *DB) Query(
   441  	ctx context.Context,
   442  	query tspb.Query,
   443  	diskResolution Resolution,
   444  	timespan QueryTimespan,
   445  	mem QueryMemoryContext,
   446  ) ([]tspb.TimeSeriesDatapoint, []string, error) {
   447  	timespan.normalize()
   448  
   449  	// Validate incoming parameters.
   450  	if err := timespan.verifyBounds(); err != nil {
   451  		return nil, nil, err
   452  	}
   453  	if err := timespan.verifyDiskResolution(diskResolution); err != nil {
   454  		return nil, nil, err
   455  	}
   456  	if err := verifySourceAggregator(query.GetSourceAggregator()); err != nil {
   457  		return nil, nil, err
   458  	}
   459  	if err := verifyDownsampler(query.GetDownsampler()); err != nil {
   460  		return nil, nil, err
   461  	}
   462  
   463  	// Adjust timespan based on the current time.
   464  	if err := timespan.adjustForCurrentTime(diskResolution); err != nil {
   465  		return nil, nil, err
   466  	}
   467  
   468  	var result []tspb.TimeSeriesDatapoint
   469  
   470  	// Create sourceSet, which tracks unique sources seen while querying.
   471  	sourceSet := make(map[string]struct{})
   472  
   473  	resolutions := []Resolution{diskResolution}
   474  	if rollupResolution, ok := diskResolution.TargetRollupResolution(); ok {
   475  		if timespan.verifyDiskResolution(rollupResolution) == nil {
   476  			resolutions = []Resolution{rollupResolution, diskResolution}
   477  		}
   478  	}
   479  
   480  	for _, resolution := range resolutions {
   481  		// Compute the maximum timespan width which can be queried for this resolution
   482  		// without exceeding the memory budget.
   483  		maxTimespanWidth, err := mem.GetMaxTimespan(resolution)
   484  		if err != nil {
   485  			return nil, nil, err
   486  		}
   487  
   488  		if maxTimespanWidth > timespan.width() {
   489  			if err := db.queryChunk(
   490  				ctx, query, resolution, timespan, mem, &result, sourceSet,
   491  			); err != nil {
   492  				return nil, nil, err
   493  			}
   494  		} else {
   495  			// Break up the timespan into "chunks" where each chunk will fit into the
   496  			// memory budget. Query and process each chunk individually, appending
   497  			// results to the same output collection.
   498  			chunkTime := timespan
   499  			chunkTime.EndNanos = chunkTime.StartNanos + maxTimespanWidth
   500  			for ; chunkTime.StartNanos < timespan.EndNanos; chunkTime.moveForward(maxTimespanWidth + timespan.SampleDurationNanos) {
   501  				if chunkTime.EndNanos > timespan.EndNanos {
   502  					// Final chunk may be a smaller window.
   503  					chunkTime.EndNanos = timespan.EndNanos
   504  				}
   505  				if err := db.queryChunk(
   506  					ctx, query, resolution, chunkTime, mem, &result, sourceSet,
   507  				); err != nil {
   508  					return nil, nil, err
   509  				}
   510  			}
   511  		}
   512  
   513  		// If results were returned and there are multiple resolutions, determine
   514  		// if we have satisfied the entire query. If not, determine where the query
   515  		// for the next resolution should begin.
   516  		if len(resolutions) > 1 && len(result) > 0 {
   517  			lastTime := result[len(result)-1].TimestampNanos
   518  			if lastTime >= timespan.EndNanos {
   519  				break
   520  			}
   521  			timespan.StartNanos = lastTime
   522  		}
   523  	}
   524  
   525  	// Convert the unique sources seen into a slice.
   526  	sources := make([]string, 0, len(sourceSet))
   527  	for source := range sourceSet {
   528  		sources = append(sources, source)
   529  	}
   530  
   531  	return result, sources, nil
   532  }
   533  
   534  // queryChunk processes a chunk of a query; this will read the necessary data
   535  // from disk and apply the desired processing operations to generate a result.
   536  func (db *DB) queryChunk(
   537  	ctx context.Context,
   538  	query tspb.Query,
   539  	diskResolution Resolution,
   540  	timespan QueryTimespan,
   541  	mem QueryMemoryContext,
   542  	dest *[]tspb.TimeSeriesDatapoint,
   543  	sourceSet map[string]struct{},
   544  ) error {
   545  	acc := mem.workerMonitor.MakeBoundAccount()
   546  	defer acc.Close(ctx)
   547  
   548  	// Actual queried data should include the interpolation limit on either side.
   549  	diskTimespan := timespan
   550  	diskTimespan.expand(mem.InterpolationLimitNanos)
   551  
   552  	var data []kv.KeyValue
   553  	var err error
   554  	if len(query.Sources) == 0 {
   555  		data, err = db.readAllSourcesFromDatabase(ctx, query.Name, diskResolution, diskTimespan)
   556  	} else {
   557  		data, err = db.readFromDatabase(ctx, query.Name, diskResolution, diskTimespan, query.Sources)
   558  	}
   559  
   560  	if err != nil {
   561  		return err
   562  	}
   563  
   564  	// Assemble data into an ordered timeSeriesSpan for each source.
   565  	sourceSpans, err := convertKeysToSpans(ctx, data, &acc)
   566  	if err != nil {
   567  		return err
   568  	}
   569  	if len(sourceSpans) == 0 {
   570  		return nil
   571  	}
   572  
   573  	if timespan.SampleDurationNanos != diskResolution.SampleDuration() {
   574  		downsampleSpans(sourceSpans, timespan.SampleDurationNanos, query.GetDownsampler())
   575  		// downsampleSpans always produces single-valued spans. At the time of
   576  		// writing, all downsamplers are the identity on single-valued spans, but
   577  		// that may not be true forever (consider for instance a variance
   578  		// downsampler). Therefore, before continuing to the aggregation step we
   579  		// convert the downsampler to SUM, which is equivalent to identify for a
   580  		// single-valued span.
   581  		query.Downsampler = tspb.TimeSeriesQueryAggregator_SUM.Enum()
   582  	}
   583  
   584  	// Aggregate spans, increasing our memory usage if the destination slice is
   585  	// expanded.
   586  	oldCap := cap(*dest)
   587  	aggregateSpansToDatapoints(sourceSpans, query, timespan, mem.InterpolationLimitNanos, dest)
   588  	if oldCap > cap(*dest) {
   589  		if err := mem.resultAccount.Grow(ctx, sizeOfDataPoint*int64(cap(*dest)-oldCap)); err != nil {
   590  			return err
   591  		}
   592  	}
   593  
   594  	// Add unique sources to the supplied source set.
   595  	for k := range sourceSpans {
   596  		sourceSet[k] = struct{}{}
   597  	}
   598  	return nil
   599  }
   600  
   601  // downsampleSpans downsamples the provided timeSeriesSpans in place, without
   602  // allocating additional memory. The output data from downsampleSpans is
   603  // single-valued, without rollups; unused rollup data will be discarded.
   604  func downsampleSpans(
   605  	spans map[string]timeSeriesSpan, duration int64, downsampler tspb.TimeSeriesQueryAggregator,
   606  ) {
   607  	// Downsample data in place.
   608  	for k, span := range spans {
   609  		nextInsert := makeTimeSeriesSpanIterator(span)
   610  		for start, end := nextInsert, nextInsert; start.isValid(); start = end {
   611  			sampleTimestamp := normalizeToPeriod(start.timestamp, duration)
   612  
   613  			switch downsampler {
   614  			case tspb.TimeSeriesQueryAggregator_MAX:
   615  				max := -math.MaxFloat64
   616  				for ; end.isValid() && normalizeToPeriod(end.timestamp, duration) == sampleTimestamp; end.forward() {
   617  					max = math.Max(max, end.max())
   618  				}
   619  				nextInsert.setSingleValue(max)
   620  			case tspb.TimeSeriesQueryAggregator_MIN:
   621  				min := math.MaxFloat64
   622  				for ; end.isValid() && normalizeToPeriod(end.timestamp, duration) == sampleTimestamp; end.forward() {
   623  					min = math.Min(min, end.min())
   624  				}
   625  				nextInsert.setSingleValue(min)
   626  			case tspb.TimeSeriesQueryAggregator_AVG:
   627  				count, sum := uint32(0), 0.0
   628  				for ; end.isValid() && normalizeToPeriod(end.timestamp, duration) == sampleTimestamp; end.forward() {
   629  					count += end.count()
   630  					sum += end.sum()
   631  				}
   632  				nextInsert.setSingleValue(sum / float64(count))
   633  			case tspb.TimeSeriesQueryAggregator_SUM:
   634  				sum := 0.0
   635  				for ; end.isValid() && normalizeToPeriod(end.timestamp, duration) == sampleTimestamp; end.forward() {
   636  					sum += end.sum()
   637  				}
   638  				nextInsert.setSingleValue(sum)
   639  			}
   640  
   641  			nextInsert.setOffset(span[nextInsert.outer].OffsetForTimestamp(sampleTimestamp))
   642  			nextInsert.forward()
   643  		}
   644  
   645  		// Trim span using nextInsert, which is where the next value would be
   646  		// inserted and is thus the first unneeded value.
   647  		nextInsert.truncateSpan()
   648  		span = nextInsert.span
   649  		convertToSingleValue(span)
   650  		spans[k] = span
   651  	}
   652  }
   653  
   654  // aggregateSpansToDatapoints aggregates the supplied set of data spans into
   655  // a single result time series, by aggregating data points from different spans
   656  // which share the same timestamp. For each timestamp in the query range, a
   657  // value is extracted from each span using the supplied downsampling function.
   658  // If a span is missing a value at a specific timestamp, the missing value will
   659  // be interpolated under certain circumstances. The values from the different
   660  // spans are then combined into a single value using the specified source
   661  // aggregator.
   662  func aggregateSpansToDatapoints(
   663  	spans map[string]timeSeriesSpan,
   664  	query tspb.Query,
   665  	timespan QueryTimespan,
   666  	interpolationLimitNanos int64,
   667  	dest *[]tspb.TimeSeriesDatapoint,
   668  ) {
   669  	// Aggregate into reserved result slice (filter points missing from component slices)
   670  	iterators := make([]timeSeriesSpanIterator, 0, len(spans))
   671  	for _, span := range spans {
   672  		iter := makeTimeSeriesSpanIterator(span)
   673  		iter.seekTimestamp(timespan.StartNanos)
   674  		iterators = append(iterators, iter)
   675  	}
   676  
   677  	var lowestTimestamp int64
   678  	computeLowest := func() {
   679  		lowestTimestamp = math.MaxInt64
   680  		for _, iter := range iterators {
   681  			if !iter.isValid() {
   682  				continue
   683  			}
   684  			if iter.timestamp < lowestTimestamp {
   685  				lowestTimestamp = iter.timestamp
   686  			}
   687  		}
   688  	}
   689  
   690  	aggregateValues := make([]float64, len(iterators))
   691  	for computeLowest(); lowestTimestamp <= timespan.EndNanos; computeLowest() {
   692  		aggregateValues = aggregateValues[:0]
   693  		for i, iter := range iterators {
   694  			var value float64
   695  			var valid bool
   696  			switch query.GetDerivative() {
   697  			case tspb.TimeSeriesQueryDerivative_DERIVATIVE:
   698  				valid = iter.validAtTimestamp(lowestTimestamp, interpolationLimitNanos)
   699  				if valid {
   700  					value, valid = iter.derivative(query.GetDownsampler())
   701  					// Convert derivative to seconds.
   702  					value *= float64(time.Second.Nanoseconds()) / float64(iter.samplePeriod())
   703  				}
   704  			case tspb.TimeSeriesQueryDerivative_NON_NEGATIVE_DERIVATIVE:
   705  				valid = iter.validAtTimestamp(lowestTimestamp, interpolationLimitNanos)
   706  				if valid {
   707  					value, valid = iter.derivative(query.GetDownsampler())
   708  					if value < 0 {
   709  						value = 0
   710  					} else {
   711  						// Convert derivative to seconds.
   712  						value *= float64(time.Second.Nanoseconds()) / float64(iter.samplePeriod())
   713  					}
   714  				}
   715  			default:
   716  				value, valid = iter.valueAtTimestamp(
   717  					lowestTimestamp, interpolationLimitNanos, query.GetDownsampler(),
   718  				)
   719  			}
   720  
   721  			if valid {
   722  				aggregateValues = append(aggregateValues, value)
   723  			}
   724  			if iter.timestamp == lowestTimestamp {
   725  				iterators[i].forward()
   726  			}
   727  		}
   728  		if len(aggregateValues) == 0 {
   729  			continue
   730  		}
   731  
   732  		// Filters data points near the current moment which are "incomplete". Any
   733  		// data point in the sufficiently-recent past is required to have a valid
   734  		// contribution from all sources being aggregated.
   735  		//
   736  		// A detailed explanation of why this is done: New time series data points
   737  		// are, in typical usage, always added at the current time; however, due to
   738  		// the curiosities of clock skew, it is a common occurrence for the most
   739  		// recent data point to be available for some sources, but not from others.
   740  		// For queries which aggregate from multiple sources, this can lead to a
   741  		// situation where a persistent and precipitous dip appears at the very end
   742  		// of data graphs. This happens because the most recent point only
   743  		// represents the aggregation of a subset of sources, even though the
   744  		// missing sources are not actually offline, they are simply slightly
   745  		// delayed in reporting.
   746  		//
   747  		// Linear interpolation can gaps in the middle of data, but it does not work
   748  		// in this case as the current time is later than any data available from
   749  		// the missing sources.
   750  		//
   751  		// In this case, we can assume that a missing data point will be added soon,
   752  		// and instead do *not* return the partially aggregated data point to the
   753  		// client.
   754  		if lowestTimestamp > timespan.NowNanos-timespan.SampleDurationNanos {
   755  			if len(aggregateValues) < len(iterators) {
   756  				continue
   757  			}
   758  		}
   759  
   760  		*dest = append(*dest, tspb.TimeSeriesDatapoint{
   761  			TimestampNanos: lowestTimestamp,
   762  			Value:          aggregate(query.GetSourceAggregator(), aggregateValues),
   763  		})
   764  	}
   765  }
   766  
   767  // aggSum returns the sum value of all points in the provided slice.
   768  func aggSum(data []float64) float64 {
   769  	total := 0.0
   770  	for _, dp := range data {
   771  		total += dp
   772  	}
   773  	return total
   774  }
   775  
   776  // aggAvg returns the average value of the points in the provided slice.
   777  func aggAvg(data []float64) float64 {
   778  	if len(data) == 0 {
   779  		return 0.0
   780  	}
   781  	return aggSum(data) / float64(len(data))
   782  }
   783  
   784  // aggMax returns the maximum value of any point in the provided slice.
   785  func aggMax(data []float64) float64 {
   786  	max := -math.MaxFloat64
   787  	for _, dp := range data {
   788  		if dp > max {
   789  			max = dp
   790  		}
   791  	}
   792  	return max
   793  }
   794  
   795  // aggMin returns the minimum value of any point in the provided slice.
   796  func aggMin(data []float64) float64 {
   797  	min := math.MaxFloat64
   798  	for _, dp := range data {
   799  		if dp < min {
   800  			min = dp
   801  		}
   802  	}
   803  	return min
   804  }
   805  
   806  // aggregate computes a single float64 value from the given slice of float64s
   807  // using the specified aggregation function.
   808  func aggregate(agg tspb.TimeSeriesQueryAggregator, values []float64) float64 {
   809  	switch agg {
   810  	case tspb.TimeSeriesQueryAggregator_AVG:
   811  		return aggAvg(values)
   812  	case tspb.TimeSeriesQueryAggregator_SUM:
   813  		return aggSum(values)
   814  	case tspb.TimeSeriesQueryAggregator_MAX:
   815  		return aggMax(values)
   816  	case tspb.TimeSeriesQueryAggregator_MIN:
   817  		return aggMin(values)
   818  	}
   819  
   820  	panic(fmt.Sprintf("unknown aggregator option encountered: %v", agg))
   821  }
   822  
   823  // readFromDatabase retrieves data for the given series name, at the given disk
   824  // resolution, across the supplied time span, for only the given list of
   825  // sources.
   826  func (db *DB) readFromDatabase(
   827  	ctx context.Context,
   828  	seriesName string,
   829  	diskResolution Resolution,
   830  	timespan QueryTimespan,
   831  	sources []string,
   832  ) ([]kv.KeyValue, error) {
   833  	// Iterate over all key timestamps which may contain data for the given
   834  	// sources, based on the given start/end time and the resolution.
   835  	b := &kv.Batch{}
   836  	startTimestamp := diskResolution.normalizeToSlab(timespan.StartNanos)
   837  	kd := diskResolution.SlabDuration()
   838  	for currentTimestamp := startTimestamp; currentTimestamp <= timespan.EndNanos; currentTimestamp += kd {
   839  		for _, source := range sources {
   840  			key := MakeDataKey(seriesName, source, diskResolution, currentTimestamp)
   841  			b.Get(key)
   842  		}
   843  	}
   844  	if err := db.db.Run(ctx, b); err != nil {
   845  		return nil, err
   846  	}
   847  	var rows []kv.KeyValue
   848  	for _, result := range b.Results {
   849  		row := result.Rows[0]
   850  		if row.Value == nil {
   851  			continue
   852  		}
   853  		rows = append(rows, row)
   854  	}
   855  	return rows, nil
   856  }
   857  
   858  // readAllSourcesFromDatabase retrieves data for the given series name, at the
   859  // given disk resolution, across the supplied time span, for all sources. The
   860  // optional limit is used when memory usage is being limited by the number of
   861  // keys, rather than by timespan.
   862  func (db *DB) readAllSourcesFromDatabase(
   863  	ctx context.Context, seriesName string, diskResolution Resolution, timespan QueryTimespan,
   864  ) ([]kv.KeyValue, error) {
   865  	// Based on the supplied timestamps and resolution, construct start and
   866  	// end keys for a scan that will return every key with data relevant to
   867  	// the query. Query slightly before and after the actual queried range
   868  	// to allow interpolation of points at the start and end of the range.
   869  	startKey := MakeDataKey(
   870  		seriesName, "" /* source */, diskResolution, timespan.StartNanos,
   871  	)
   872  	endKey := MakeDataKey(
   873  		seriesName, "" /* source */, diskResolution, timespan.EndNanos,
   874  	).PrefixEnd()
   875  	b := &kv.Batch{}
   876  	b.Scan(startKey, endKey)
   877  
   878  	if err := db.db.Run(ctx, b); err != nil {
   879  		return nil, err
   880  	}
   881  	return b.Results[0].Rows, nil
   882  }
   883  
   884  // convertKeysToSpans converts a batch of KeyValues queried from disk into a
   885  // map of data spans organized by source.
   886  func convertKeysToSpans(
   887  	ctx context.Context, data []kv.KeyValue, acc *mon.BoundAccount,
   888  ) (map[string]timeSeriesSpan, error) {
   889  	sourceSpans := make(map[string]timeSeriesSpan)
   890  	for _, row := range data {
   891  		var data roachpb.InternalTimeSeriesData
   892  		if err := row.ValueProto(&data); err != nil {
   893  			return nil, err
   894  		}
   895  		_, source, _, _, err := DecodeDataKey(row.Key)
   896  		if err != nil {
   897  			return nil, err
   898  		}
   899  		sampleSize := sizeOfSample
   900  		if data.IsColumnar() {
   901  			sampleSize = sizeOfInt32 + sizeOfFloat64
   902  		}
   903  		if err := acc.Grow(
   904  			ctx, sampleSize*int64(data.SampleCount())+sizeOfTimeSeriesData,
   905  		); err != nil {
   906  			return nil, err
   907  		}
   908  		sourceSpans[source] = append(sourceSpans[source], data)
   909  	}
   910  	return sourceSpans, nil
   911  }
   912  
   913  func verifySourceAggregator(agg tspb.TimeSeriesQueryAggregator) error {
   914  	switch agg {
   915  	case tspb.TimeSeriesQueryAggregator_AVG:
   916  		return nil
   917  	case tspb.TimeSeriesQueryAggregator_SUM:
   918  		return nil
   919  	case tspb.TimeSeriesQueryAggregator_MIN:
   920  		return nil
   921  	case tspb.TimeSeriesQueryAggregator_MAX:
   922  		return nil
   923  	case tspb.TimeSeriesQueryAggregator_FIRST,
   924  		tspb.TimeSeriesQueryAggregator_LAST,
   925  		tspb.TimeSeriesQueryAggregator_VARIANCE:
   926  		return errors.Errorf("aggregator %s is not yet supported", agg.String())
   927  	}
   928  	return errors.Errorf("query specified unknown time series aggregator %s", agg.String())
   929  }
   930  
   931  func verifyDownsampler(downsampler tspb.TimeSeriesQueryAggregator) error {
   932  	switch downsampler {
   933  	case tspb.TimeSeriesQueryAggregator_AVG:
   934  		return nil
   935  	case tspb.TimeSeriesQueryAggregator_SUM:
   936  		return nil
   937  	case tspb.TimeSeriesQueryAggregator_MIN:
   938  		return nil
   939  	case tspb.TimeSeriesQueryAggregator_MAX:
   940  		return nil
   941  	case tspb.TimeSeriesQueryAggregator_FIRST,
   942  		tspb.TimeSeriesQueryAggregator_LAST,
   943  		tspb.TimeSeriesQueryAggregator_VARIANCE:
   944  		return errors.Errorf("downsampler %s is not yet supported", downsampler.String())
   945  	}
   946  	return errors.Errorf("query specified unknown time series downsampler %s", downsampler.String())
   947  }