github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/iterator_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package ts
    12  
    13  import (
    14  	"fmt"
    15  	"math"
    16  	"reflect"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/ts/testmodel"
    21  	"github.com/cockroachdb/cockroach/pkg/ts/tspb"
    22  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    23  	"github.com/gogo/protobuf/proto"
    24  	"github.com/kr/pretty"
    25  )
    26  
    27  // makeInternalRowData makes an InternalTimeSeriesData object from a collection
    28  // of data samples. The result will be in the soon-deprecated row format. Input
    29  // is the start timestamp, the sample duration, and the set of samples. As
    30  // opposed to the ToInternal() method, there are two key differences:
    31  //
    32  // 1. This method always procueds a single InternalTimeSeriesData object with
    33  // the provided startTimestamp, rather than breaking up the datapoints into
    34  // several slabs based on a slab duration.
    35  //
    36  // 2. The provided data samples are downsampled according to the sampleDuration,
    37  // mimicking the process that would be used to create a data rollup. Therefore,
    38  // the resulting InternalTimeSeriesData will have one sample for each sample
    39  // period.
    40  //
    41  // Sample data must be provided ordered by timestamp or the output will be
    42  // unpredictable.
    43  func makeInternalRowData(
    44  	startTimestamp, sampleDuration int64, samples []tspb.TimeSeriesDatapoint,
    45  ) roachpb.InternalTimeSeriesData {
    46  	// Adjust startTimestamp to an exact multiple of sampleDuration.
    47  	startTimestamp -= startTimestamp % sampleDuration
    48  	result := roachpb.InternalTimeSeriesData{
    49  		StartTimestampNanos: startTimestamp,
    50  		SampleDurationNanos: sampleDuration,
    51  		Samples:             make([]roachpb.InternalTimeSeriesSample, 0),
    52  	}
    53  
    54  	// Run through all samples, merging any consecutive samples which correspond
    55  	// to the same sample interval.
    56  	for _, sample := range samples {
    57  		offset := int32((sample.TimestampNanos - startTimestamp) / sampleDuration)
    58  		value := sample.Value
    59  
    60  		// Merge into the previous sample if we have the same offset.
    61  		if count := len(result.Samples); count > 0 && result.Samples[count-1].Offset == offset {
    62  			// Initialize max and min if necessary.
    63  			var min, max float64
    64  			if result.Samples[count-1].Count > 1 {
    65  				min, max = *result.Samples[count-1].Min, *result.Samples[count-1].Max
    66  			} else {
    67  				min, max = result.Samples[count-1].Sum, result.Samples[count-1].Sum
    68  			}
    69  
    70  			result.Samples[count-1].Count++
    71  			result.Samples[count-1].Sum += value
    72  			result.Samples[count-1].Min = proto.Float64(math.Min(min, value))
    73  			result.Samples[count-1].Max = proto.Float64(math.Max(max, value))
    74  		} else if count > 0 && result.Samples[count-1].Offset > offset {
    75  			panic("sample data provided to generateData must be ordered by timestamp.")
    76  		} else {
    77  			result.Samples = append(result.Samples, roachpb.InternalTimeSeriesSample{
    78  				Offset: offset,
    79  				Sum:    value,
    80  				Count:  1,
    81  			})
    82  		}
    83  	}
    84  
    85  	return result
    86  }
    87  
    88  // makeInternalRowData makes an InternalTimeSeriesData object from a collection
    89  // of data samples. The result will be in columnar format. Input is the start
    90  // timestamp, the sample duration, and the set of samples. As opposed to the
    91  // ToInternal() method, there are two key differences:
    92  //
    93  // 1. This method always procueds a single InternalTimeSeriesData object with
    94  // the provided startTimestamp, rather than breaking up the datapoints into
    95  // several slabs based on a slab duration.
    96  //
    97  // 2. The provided data samples are downsampled according to the sampleDuration,
    98  // mimicking the process that would be used to create a data rollup. Therefore,
    99  // the resulting InternalTimeSeriesData will have one entry for each offset
   100  // period. Additionally, if there are multiple datapoints in any sample period,
   101  // then the desired result is assumed to be a rollup and every resulting sample
   102  // period will have values for all rollup columns.
   103  //
   104  // Sample data must be provided ordered by timestamp or the output will be
   105  // unpredictable.
   106  func makeInternalColumnData(
   107  	startTimestamp, sampleDuration int64, samples []tspb.TimeSeriesDatapoint,
   108  ) roachpb.InternalTimeSeriesData {
   109  	// Adjust startTimestamp to an exact multiple of sampleDuration.
   110  	startTimestamp -= startTimestamp % sampleDuration
   111  	result := roachpb.InternalTimeSeriesData{
   112  		StartTimestampNanos: startTimestamp,
   113  		SampleDurationNanos: sampleDuration,
   114  	}
   115  
   116  	// Run through all samples, merging any consecutive samples which correspond
   117  	// to the same sample interval. Assume that the data will contain relevant
   118  	// roll-ups, but discard the roll-up data if there is only one sample per
   119  	// sample period.
   120  	isRollup := false
   121  
   122  	// Variance computation must consider each value against the the average.
   123  	// Retain the component values of each column and compute a variance.
   124  	valuesForSample := make([]float64, 0, 1)
   125  	computeVariance := func() float64 {
   126  		variance := 0.0
   127  		if len(valuesForSample) > 1 {
   128  			// Compute average of values.
   129  			sum := 0.0
   130  			for _, value := range valuesForSample {
   131  				sum += value
   132  			}
   133  			avg := sum / float64(len(valuesForSample))
   134  
   135  			// Compute variance of values using the average.
   136  			totalSquaredDeviation := 0.0
   137  			for _, value := range valuesForSample {
   138  				totalSquaredDeviation += math.Pow(value-avg, 2)
   139  			}
   140  			variance = totalSquaredDeviation / float64(len(valuesForSample))
   141  		}
   142  		// Reset value collection.
   143  		valuesForSample = valuesForSample[:0]
   144  		return variance
   145  	}
   146  
   147  	for _, sample := range samples {
   148  		offset := result.OffsetForTimestamp(sample.TimestampNanos)
   149  		value := sample.Value
   150  
   151  		// Merge into the previous sample if we have the same offset.
   152  		if count := len(result.Offset); count > 0 && result.Offset[count-1] == offset {
   153  			isRollup = true
   154  			result.Last[count-1] = value
   155  			result.Count[count-1]++
   156  			result.Sum[count-1] += value
   157  			result.Max[count-1] = math.Max(result.Max[count-1], value)
   158  			result.Min[count-1] = math.Min(result.Min[count-1], value)
   159  			valuesForSample = append(valuesForSample, value)
   160  		} else if count > 0 && result.Offset[count-1] > offset {
   161  			panic("sample data provided to generateData must be ordered by timestamp.")
   162  		} else {
   163  			// Compute variance for previous sample if there was more than one
   164  			// value.
   165  			if len(valuesForSample) > 1 {
   166  				result.Variance[count-1] = computeVariance()
   167  			} else {
   168  				valuesForSample = valuesForSample[:0]
   169  			}
   170  
   171  			result.Offset = append(result.Offset, offset)
   172  			result.Last = append(result.Last, value)
   173  			result.First = append(result.First, value)
   174  			result.Count = append(result.Count, 1)
   175  			result.Sum = append(result.Sum, value)
   176  			result.Min = append(result.Min, value)
   177  			result.Max = append(result.Max, value)
   178  			result.Variance = append(result.Variance, 0)
   179  			valuesForSample = append(valuesForSample, value)
   180  		}
   181  	}
   182  
   183  	// Compute variance for last sample.
   184  	result.Variance[len(result.Variance)-1] = computeVariance()
   185  
   186  	if !isRollup {
   187  		result.First = nil
   188  		result.Count = nil
   189  		result.Sum = nil
   190  		result.Min = nil
   191  		result.Max = nil
   192  		result.Variance = nil
   193  	}
   194  
   195  	return result
   196  }
   197  
   198  func TestMakeInternalData(t *testing.T) {
   199  	defer leaktest.AfterTest(t)()
   200  	data := []tspb.TimeSeriesDatapoint{
   201  		tsdp(110, 20),
   202  		tsdp(120, 300),
   203  		tsdp(130, 400),
   204  		tsdp(140, 800),
   205  		tsdp(180, 200),
   206  		tsdp(190, 240),
   207  		tsdp(210, 500),
   208  		tsdp(230, 490),
   209  		tsdp(320, 590),
   210  		tsdp(350, 990),
   211  	}
   212  
   213  	// Confirm non-rollup case.
   214  	nonRollupRow := makeInternalRowData(50, 10, data)
   215  	nonRollupColumn := makeInternalColumnData(50, 10, data)
   216  	expectedNonRollupRow := roachpb.InternalTimeSeriesData{
   217  		StartTimestampNanos: 50,
   218  		SampleDurationNanos: 10,
   219  	}
   220  	expectedNonRollupColumn := expectedNonRollupRow
   221  	for _, val := range data {
   222  		offset := int32((val.TimestampNanos - 50) / 10)
   223  		expectedNonRollupRow.Samples = append(expectedNonRollupRow.Samples, roachpb.InternalTimeSeriesSample{
   224  			Offset: offset,
   225  			Count:  1,
   226  			Sum:    val.Value,
   227  		})
   228  		expectedNonRollupColumn.Offset = append(expectedNonRollupColumn.Offset, offset)
   229  		expectedNonRollupColumn.Last = append(expectedNonRollupColumn.Last, val.Value)
   230  	}
   231  	if a, e := nonRollupRow, expectedNonRollupRow; !reflect.DeepEqual(a, e) {
   232  		t.Errorf("nonRollupRow got %v, wanted %v", a, e)
   233  	}
   234  	if a, e := nonRollupColumn, expectedNonRollupColumn; !reflect.DeepEqual(a, e) {
   235  		t.Errorf("nonRollupColumn got %v, wanted %v", a, e)
   236  	}
   237  
   238  	// Confirm rollup-generating case. Values are checked against the
   239  	// independently-verified methods of the testmodel package.
   240  	rollupRow := makeInternalRowData(50, 50, data)
   241  	rollupColumn := makeInternalColumnData(50, 50, data)
   242  	expectedRollupRow := roachpb.InternalTimeSeriesData{
   243  		StartTimestampNanos: 50,
   244  		SampleDurationNanos: 50,
   245  	}
   246  	expectedRollupColumn := expectedRollupRow
   247  
   248  	dataSeries := testmodel.DataSeries(data)
   249  	// Last and Offset column.
   250  	for _, dp := range dataSeries.GroupByResolution(50, testmodel.AggregateLast) {
   251  		offset := int32((dp.TimestampNanos - 50) / 50)
   252  		expectedRollupRow.Samples = append(expectedRollupRow.Samples, roachpb.InternalTimeSeriesSample{
   253  			Offset: offset,
   254  		})
   255  		expectedRollupColumn.Offset = append(expectedRollupColumn.Offset, offset)
   256  		expectedRollupColumn.Last = append(expectedRollupColumn.Last, dp.Value)
   257  	}
   258  	// Sum column.
   259  	for i, dp := range dataSeries.GroupByResolution(50, testmodel.AggregateSum) {
   260  		expectedRollupRow.Samples[i].Sum = dp.Value
   261  		expectedRollupColumn.Sum = append(expectedRollupColumn.Sum, dp.Value)
   262  	}
   263  	// Max column.
   264  	for i, dp := range dataSeries.GroupByResolution(50, testmodel.AggregateMax) {
   265  		expectedRollupRow.Samples[i].Max = proto.Float64(dp.Value)
   266  		expectedRollupColumn.Max = append(expectedRollupColumn.Max, dp.Value)
   267  	}
   268  	// Min column.
   269  	for i, dp := range dataSeries.GroupByResolution(50, testmodel.AggregateMin) {
   270  		expectedRollupRow.Samples[i].Min = proto.Float64(dp.Value)
   271  		expectedRollupColumn.Min = append(expectedRollupColumn.Min, dp.Value)
   272  	}
   273  	// Count column.
   274  	for i, dp := range dataSeries.GroupByResolution(50, func(ds testmodel.DataSeries) float64 {
   275  		return float64(len(ds))
   276  	}) {
   277  		count := uint32(int32(dp.Value))
   278  		expectedRollupRow.Samples[i].Count = count
   279  		// Min and max are omitted from samples with a count of 1.
   280  		if count < 2 {
   281  			expectedRollupRow.Samples[i].Min = nil
   282  			expectedRollupRow.Samples[i].Max = nil
   283  		}
   284  		expectedRollupColumn.Count = append(expectedRollupColumn.Count, count)
   285  	}
   286  	// First column.
   287  	for _, dp := range dataSeries.GroupByResolution(50, testmodel.AggregateFirst) {
   288  		expectedRollupColumn.First = append(expectedRollupColumn.First, dp.Value)
   289  	}
   290  	// Variance column.
   291  	for _, dp := range dataSeries.GroupByResolution(50, testmodel.AggregateVariance) {
   292  		expectedRollupColumn.Variance = append(expectedRollupColumn.Variance, dp.Value)
   293  	}
   294  
   295  	if a, e := rollupRow, expectedRollupRow; !reflect.DeepEqual(a, e) {
   296  		t.Errorf("rollupRow got %v, wanted %v", a, e)
   297  		for _, diff := range pretty.Diff(a, e) {
   298  			t.Error(diff)
   299  		}
   300  	}
   301  	if a, e := rollupColumn, expectedRollupColumn; !reflect.DeepEqual(a, e) {
   302  		t.Errorf("rollupColumn got %v, wanted %v", a, e)
   303  		for _, diff := range pretty.Diff(a, e) {
   304  			t.Error(diff)
   305  		}
   306  	}
   307  }
   308  
   309  func verifySpanIteratorPosition(t *testing.T, actual, expected timeSeriesSpanIterator) {
   310  	t.Helper()
   311  	if a, e := actual.total, expected.total; a != e {
   312  		t.Errorf("iterator had total index of %d, wanted %d", a, e)
   313  	}
   314  	if a, e := actual.inner, expected.inner; a != e {
   315  		t.Errorf("iterator had inner index of %d, wanted %d", a, e)
   316  	}
   317  	if a, e := actual.outer, expected.outer; a != e {
   318  		t.Errorf("iterator had outer index of %d, wanted %d", a, e)
   319  	}
   320  	if a, e := actual.timestamp, expected.timestamp; a != e {
   321  		t.Errorf("iterator had timestamp of %d, wanted %d", a, e)
   322  	}
   323  	if a, e := actual.length, expected.length; a != e {
   324  		t.Errorf("iterator had length of %d, wanted %d", a, e)
   325  	}
   326  }
   327  
   328  func TestTimeSeriesSpanIteratorMovement(t *testing.T) {
   329  	defer leaktest.AfterTest(t)()
   330  
   331  	// initialize explicit iterator results for the entire span - this makes the
   332  	// movement tests easier to read, as we are often asserting that the same
   333  	// position.
   334  	explicitPositions := []timeSeriesSpanIterator{
   335  		{
   336  			timestamp: 10,
   337  			length:    6,
   338  		},
   339  		{
   340  			total:     1,
   341  			outer:     0,
   342  			inner:     1,
   343  			timestamp: 20,
   344  			length:    6,
   345  		},
   346  		{
   347  			total:     2,
   348  			outer:     1,
   349  			inner:     0,
   350  			timestamp: 30,
   351  			length:    6,
   352  		},
   353  		{
   354  			total:     3,
   355  			outer:     2,
   356  			inner:     0,
   357  			timestamp: 50,
   358  			length:    6,
   359  		},
   360  		{
   361  			total:     4,
   362  			outer:     2,
   363  			inner:     1,
   364  			timestamp: 70,
   365  			length:    6,
   366  		},
   367  		{
   368  			total:     5,
   369  			outer:     2,
   370  			inner:     2,
   371  			timestamp: 90,
   372  			length:    6,
   373  		},
   374  		{
   375  			total:     6,
   376  			outer:     3,
   377  			inner:     0,
   378  			timestamp: 0,
   379  			length:    6,
   380  		},
   381  	}
   382  
   383  	// Initial position.
   384  	verifyIterTest := func(t *testing.T, iter timeSeriesSpanIterator) {
   385  		verifySpanIteratorPosition(t, iter, explicitPositions[0])
   386  
   387  		// Forwarding.
   388  		iter.forward()
   389  		verifySpanIteratorPosition(t, iter, explicitPositions[1])
   390  		iter.forward()
   391  		verifySpanIteratorPosition(t, iter, explicitPositions[2])
   392  
   393  		iter.forward()
   394  		iter.forward()
   395  		iter.forward()
   396  		iter.forward()
   397  		verifySpanIteratorPosition(t, iter, explicitPositions[6])
   398  		iter.forward()
   399  		verifySpanIteratorPosition(t, iter, explicitPositions[6])
   400  
   401  		// Backwards.
   402  		iter.backward()
   403  		verifySpanIteratorPosition(t, iter, explicitPositions[5])
   404  		iter.backward()
   405  		iter.backward()
   406  		iter.backward()
   407  		iter.backward()
   408  		verifySpanIteratorPosition(t, iter, explicitPositions[1])
   409  		iter.backward()
   410  		iter.backward()
   411  		iter.backward()
   412  		verifySpanIteratorPosition(t, iter, explicitPositions[0])
   413  
   414  		// Seek index.
   415  		iter.seekIndex(2)
   416  		verifySpanIteratorPosition(t, iter, explicitPositions[2])
   417  		iter.seekIndex(4)
   418  		verifySpanIteratorPosition(t, iter, explicitPositions[4])
   419  		iter.seekIndex(0)
   420  		verifySpanIteratorPosition(t, iter, explicitPositions[0])
   421  		iter.seekIndex(1000)
   422  		verifySpanIteratorPosition(t, iter, explicitPositions[6])
   423  		iter.seekIndex(-1)
   424  		verifySpanIteratorPosition(t, iter, explicitPositions[0])
   425  
   426  		// Seek timestamp.
   427  		iter.seekTimestamp(0)
   428  		verifySpanIteratorPosition(t, iter, explicitPositions[0])
   429  		iter.seekTimestamp(15)
   430  		verifySpanIteratorPosition(t, iter, explicitPositions[1])
   431  		iter.seekTimestamp(50)
   432  		verifySpanIteratorPosition(t, iter, explicitPositions[3])
   433  		iter.seekTimestamp(80)
   434  		verifySpanIteratorPosition(t, iter, explicitPositions[5])
   435  		iter.seekTimestamp(10000)
   436  		verifySpanIteratorPosition(t, iter, explicitPositions[6])
   437  	}
   438  
   439  	// Row data only.
   440  	t.Run("row only", func(t *testing.T) {
   441  		verifyIterTest(t, makeTimeSeriesSpanIterator(timeSeriesSpan{
   442  			makeInternalRowData(0, 10, []tspb.TimeSeriesDatapoint{
   443  				tsdp(10, 1),
   444  				tsdp(20, 2),
   445  			}),
   446  			makeInternalRowData(30, 10, []tspb.TimeSeriesDatapoint{
   447  				tsdp(30, 3),
   448  			}),
   449  			makeInternalRowData(50, 10, []tspb.TimeSeriesDatapoint{
   450  				tsdp(50, 5),
   451  				tsdp(70, 7),
   452  				tsdp(90, 9),
   453  			}),
   454  		}))
   455  	})
   456  
   457  	t.Run("columns only", func(t *testing.T) {
   458  		verifyIterTest(t, makeTimeSeriesSpanIterator(timeSeriesSpan{
   459  			makeInternalColumnData(0, 10, []tspb.TimeSeriesDatapoint{
   460  				tsdp(10, 1),
   461  				tsdp(20, 2),
   462  			}),
   463  			makeInternalColumnData(30, 10, []tspb.TimeSeriesDatapoint{
   464  				tsdp(30, 3),
   465  			}),
   466  			makeInternalColumnData(50, 10, []tspb.TimeSeriesDatapoint{
   467  				tsdp(50, 5),
   468  				tsdp(70, 7),
   469  				tsdp(90, 9),
   470  			}),
   471  		}))
   472  	})
   473  
   474  	t.Run("mixed rows and columns", func(t *testing.T) {
   475  		verifyIterTest(t, makeTimeSeriesSpanIterator(timeSeriesSpan{
   476  			makeInternalRowData(0, 10, []tspb.TimeSeriesDatapoint{
   477  				tsdp(10, 1),
   478  				tsdp(20, 2),
   479  			}),
   480  			makeInternalColumnData(30, 10, []tspb.TimeSeriesDatapoint{
   481  				tsdp(30, 3),
   482  			}),
   483  			makeInternalRowData(50, 10, []tspb.TimeSeriesDatapoint{
   484  				tsdp(50, 5),
   485  				tsdp(70, 7),
   486  				tsdp(90, 9),
   487  			}),
   488  		}))
   489  	})
   490  }
   491  
   492  func TestTimeSeriesSpanIteratorValues(t *testing.T) {
   493  	defer leaktest.AfterTest(t)()
   494  	iter := makeTimeSeriesSpanIterator(timeSeriesSpan{
   495  		makeInternalRowData(0, 10, []tspb.TimeSeriesDatapoint{
   496  			tsdp(10, 1),
   497  			tsdp(20, 2),
   498  			tsdp(20, 4),
   499  		}),
   500  		makeInternalRowData(30, 10, []tspb.TimeSeriesDatapoint{
   501  			tsdp(30, 3),
   502  			tsdp(30, 6),
   503  			tsdp(30, 9),
   504  		}),
   505  		makeInternalRowData(50, 10, []tspb.TimeSeriesDatapoint{
   506  			tsdp(50, 12),
   507  			tsdp(70, 700),
   508  			tsdp(90, 9),
   509  		}),
   510  	})
   511  
   512  	iter.seekTimestamp(30)
   513  	for _, tc := range []struct {
   514  		agg           tspb.TimeSeriesQueryAggregator
   515  		expected      float64
   516  		expectedDeriv float64
   517  	}{
   518  		{
   519  			agg:           tspb.TimeSeriesQueryAggregator_AVG,
   520  			expected:      6,
   521  			expectedDeriv: 3,
   522  		},
   523  		{
   524  			agg:           tspb.TimeSeriesQueryAggregator_SUM,
   525  			expected:      18,
   526  			expectedDeriv: 12,
   527  		},
   528  		{
   529  			agg:           tspb.TimeSeriesQueryAggregator_MIN,
   530  			expected:      3,
   531  			expectedDeriv: 1,
   532  		},
   533  		{
   534  			agg:           tspb.TimeSeriesQueryAggregator_MAX,
   535  			expected:      9,
   536  			expectedDeriv: 5,
   537  		},
   538  	} {
   539  		t.Run("value", func(t *testing.T) {
   540  			if a, e := iter.value(tc.agg), tc.expected; a != e {
   541  				t.Errorf("value for %s of iter got %f, wanted %f", tc.agg.String(), a, e)
   542  			}
   543  			deriv, valid := iter.derivative(tc.agg)
   544  			if !valid {
   545  				t.Errorf("expected derivative to be valid, was invalid")
   546  			}
   547  			if a, e := deriv, tc.expectedDeriv; a != e {
   548  				t.Errorf("derivative for %s of iter got %f, wanted %f", tc.agg.String(), a, e)
   549  			}
   550  		})
   551  	}
   552  
   553  	// Test value interpolation.
   554  	iter.seekTimestamp(50)
   555  	for _, tc := range []struct {
   556  		timestamp          int64
   557  		interpolationLimit int64
   558  		expectedValid      bool
   559  		expectedValue      float64
   560  	}{
   561  		{50, 100, true, 12},
   562  		{50, 1, true, 12},
   563  		// Must interpolate in between points.
   564  		{30, 100, false, 0},
   565  		{60, 100, false, 0},
   566  		// Interpolation limit is respected
   567  		{40, 100, true, 9},
   568  		{40, 20, true, 9},
   569  		{40, 19, false, 0},
   570  		// Interpolation limit 0 is still a special case.
   571  		{40, 0, true, 9},
   572  	} {
   573  		interpValue, valid := iter.valueAtTimestamp(tc.timestamp, tc.interpolationLimit, tspb.TimeSeriesQueryAggregator_AVG)
   574  		if valid != tc.expectedValid {
   575  			t.Errorf("valueAtTimestamp valid was %t, wanted %t", valid, tc.expectedValid)
   576  			continue
   577  		}
   578  		if a, e := interpValue, tc.expectedValue; a != e {
   579  			t.Errorf("valueAtTimestamp %d got %f, wanted %f", tc.timestamp, a, e)
   580  		}
   581  	}
   582  
   583  	// Special case: no derivative available at index 0.
   584  	iter.seekIndex(0)
   585  	if _, valid := iter.valueAtTimestamp(20, 1000, tspb.TimeSeriesQueryAggregator_AVG); valid {
   586  		t.Errorf("expected valueAtTimestamp to be invalid at index 0, was valid")
   587  	}
   588  	if _, valid := iter.derivative(tspb.TimeSeriesQueryAggregator_AVG); valid {
   589  		t.Errorf("expected deriv to be invalid at index 0, was valid")
   590  	}
   591  }
   592  
   593  // dataDesc is used to describe an internal data structure independently of it
   594  // being formatted using rows or columns.
   595  type dataDesc struct {
   596  	startTimestamp int64
   597  	sampleDuration int64
   598  	samples        []tspb.TimeSeriesDatapoint
   599  }
   600  
   601  func TestDownsampleSpans(t *testing.T) {
   602  	defer leaktest.AfterTest(t)()
   603  
   604  	// Each test case is structured as such:
   605  	// + A description of an "input" span, which describes a list of
   606  	// InternalTimeSeriesData structures that will be assembled into a data span.
   607  	// Each structure has a start timestamp, a sample period (should be the same
   608  	// for all structure), and a set of data samples.
   609  	// + A sample period, which should be greater than or equal to the sample
   610  	// period of the input span structures.
   611  	// + A downsampler operation.
   612  	// + A description of an "expected" span, which describes the list of
   613  	// InternalTimeSeriesData structures that should result from running the input
   614  	// span through the downsampling operation.
   615  	//
   616  	// Importantly, both the "input" and "expected" spans are defined using the
   617  	// dataDesc structure, rather than explicitly creating InternalTimeSeriesData
   618  	// structures. This is because we want to test both the row format and the
   619  	// columnar format of InternalTimeSeriesData when downsampling - therefore,
   620  	// using the descriptors, each test case is run using first row-structured
   621  	// data, then column-structured data, and finally a mixed-format test which
   622  	// combines the two. This gives us a very broad test area while still
   623  	// maintaining a compact set of test cases.
   624  	for tcnum, tc := range []struct {
   625  		inputDesc    []dataDesc
   626  		samplePeriod int64
   627  		downsampler  tspb.TimeSeriesQueryAggregator
   628  		expectedDesc []dataDesc
   629  	}{
   630  		// Original sample period, average downsampler.
   631  		{
   632  			inputDesc: []dataDesc{
   633  				{0, 10, []tspb.TimeSeriesDatapoint{
   634  					tsdp(10, 1),
   635  					tsdp(20, 2),
   636  					tsdp(20, 4),
   637  					tsdp(30, 5),
   638  				}},
   639  				{50, 10, []tspb.TimeSeriesDatapoint{
   640  					tsdp(50, 5),
   641  					tsdp(60, 6),
   642  				}},
   643  			},
   644  			samplePeriod: 10,
   645  			downsampler:  tspb.TimeSeriesQueryAggregator_AVG,
   646  			expectedDesc: []dataDesc{
   647  				{0, 10, []tspb.TimeSeriesDatapoint{
   648  					tsdp(10, 1),
   649  					tsdp(20, 3),
   650  					tsdp(30, 5),
   651  				}},
   652  				{50, 10, []tspb.TimeSeriesDatapoint{
   653  					tsdp(50, 5),
   654  					tsdp(60, 6),
   655  				}},
   656  			},
   657  		},
   658  		// Original sample period, max downsampler. Should fill in max value.
   659  		{
   660  			inputDesc: []dataDesc{
   661  				{0, 10, []tspb.TimeSeriesDatapoint{
   662  					tsdp(10, 1),
   663  					tsdp(20, 2),
   664  					tsdp(20, 4),
   665  					tsdp(30, 5),
   666  				}},
   667  				{50, 10, []tspb.TimeSeriesDatapoint{
   668  					tsdp(50, 5),
   669  					tsdp(60, 6),
   670  				}},
   671  			},
   672  			samplePeriod: 10,
   673  			downsampler:  tspb.TimeSeriesQueryAggregator_MAX,
   674  			expectedDesc: []dataDesc{
   675  				{0, 10, []tspb.TimeSeriesDatapoint{
   676  					tsdp(10, 1),
   677  					tsdp(20, 4),
   678  					tsdp(30, 5),
   679  				}},
   680  				{50, 10, []tspb.TimeSeriesDatapoint{
   681  					tsdp(50, 5),
   682  					tsdp(60, 6),
   683  				}},
   684  			},
   685  		},
   686  		// Original sample period, min downsampler.
   687  		{
   688  			inputDesc: []dataDesc{
   689  				{0, 10, []tspb.TimeSeriesDatapoint{
   690  					tsdp(10, 1),
   691  					tsdp(20, 2),
   692  					tsdp(20, 4),
   693  					tsdp(30, 5),
   694  				}},
   695  				{50, 10, []tspb.TimeSeriesDatapoint{
   696  					tsdp(50, 5),
   697  					tsdp(60, 6),
   698  				}},
   699  			},
   700  			samplePeriod: 10,
   701  			downsampler:  tspb.TimeSeriesQueryAggregator_MIN,
   702  			expectedDesc: []dataDesc{
   703  				{0, 10, []tspb.TimeSeriesDatapoint{
   704  					tsdp(10, 1),
   705  					tsdp(20, 2),
   706  					tsdp(30, 5),
   707  				}},
   708  				{50, 10, []tspb.TimeSeriesDatapoint{
   709  					tsdp(50, 5),
   710  					tsdp(60, 6),
   711  				}},
   712  			},
   713  		},
   714  		// AVG downsamper. Should re-use original span data.
   715  		{
   716  			inputDesc: []dataDesc{
   717  				{0, 10, []tspb.TimeSeriesDatapoint{
   718  					tsdp(10, 1),
   719  					tsdp(20, 2),
   720  					tsdp(20, 4),
   721  					tsdp(30, 5),
   722  				}},
   723  				{50, 10, []tspb.TimeSeriesDatapoint{
   724  					tsdp(50, 5),
   725  					tsdp(60, 6),
   726  				}},
   727  				{70, 10, []tspb.TimeSeriesDatapoint{
   728  					tsdp(70, 7),
   729  					tsdp(90, 9),
   730  					tsdp(110, 8),
   731  				}},
   732  			},
   733  			samplePeriod: 50,
   734  			downsampler:  tspb.TimeSeriesQueryAggregator_AVG,
   735  			expectedDesc: []dataDesc{
   736  				{0, 10, []tspb.TimeSeriesDatapoint{
   737  					tsdp(0, 3),
   738  					tsdp(50, 6.75),
   739  					tsdp(100, 8),
   740  				}},
   741  			},
   742  		},
   743  		// MAX downsamper. Should re-use original span data; note that the sum and
   744  		// count values are NOT overwritten.
   745  		{
   746  			inputDesc: []dataDesc{
   747  				{0, 10, []tspb.TimeSeriesDatapoint{
   748  					tsdp(10, 1),
   749  					tsdp(20, 2),
   750  					tsdp(20, 4),
   751  					tsdp(30, 5),
   752  				}},
   753  				{50, 10, []tspb.TimeSeriesDatapoint{
   754  					tsdp(50, 5),
   755  					tsdp(60, 6),
   756  				}},
   757  				{70, 10, []tspb.TimeSeriesDatapoint{
   758  					tsdp(70, 7),
   759  					tsdp(90, 9),
   760  					tsdp(110, 8),
   761  				}},
   762  			},
   763  			samplePeriod: 50,
   764  			downsampler:  tspb.TimeSeriesQueryAggregator_MAX,
   765  			expectedDesc: []dataDesc{
   766  				{0, 10, []tspb.TimeSeriesDatapoint{
   767  					tsdp(0, 5),
   768  					tsdp(50, 9),
   769  					tsdp(100, 8),
   770  				}},
   771  			},
   772  		},
   773  		// MIN downsamper. Should re-use original span data; note that the sum and
   774  		// count values are NOT overwritten.
   775  		{
   776  			inputDesc: []dataDesc{
   777  				{0, 10, []tspb.TimeSeriesDatapoint{
   778  					tsdp(10, 1),
   779  					tsdp(20, 2),
   780  					tsdp(20, 4),
   781  					tsdp(30, 5),
   782  				}},
   783  				{50, 10, []tspb.TimeSeriesDatapoint{
   784  					tsdp(50, 5),
   785  					tsdp(60, 6),
   786  				}},
   787  				{70, 10, []tspb.TimeSeriesDatapoint{
   788  					tsdp(70, 7),
   789  					tsdp(90, 9),
   790  					tsdp(110, 8),
   791  				}},
   792  			},
   793  			samplePeriod: 50,
   794  			downsampler:  tspb.TimeSeriesQueryAggregator_MIN,
   795  			expectedDesc: []dataDesc{
   796  				{0, 10, []tspb.TimeSeriesDatapoint{
   797  					tsdp(0, 1),
   798  					tsdp(50, 5),
   799  					tsdp(100, 8),
   800  				}},
   801  			},
   802  		},
   803  		// AVG downsampler, downsampling while re-using multiple
   804  		// InternalTimeSeriesData structures.
   805  		{
   806  			inputDesc: []dataDesc{
   807  				{0, 10, []tspb.TimeSeriesDatapoint{
   808  					tsdp(10, 1),
   809  				}},
   810  				{50, 10, []tspb.TimeSeriesDatapoint{
   811  					tsdp(50, 5),
   812  					tsdp(60, 6),
   813  				}},
   814  				{70, 10, []tspb.TimeSeriesDatapoint{
   815  					tsdp(70, 7),
   816  					tsdp(90, 9),
   817  					tsdp(110, 8),
   818  				}},
   819  			},
   820  			samplePeriod: 50,
   821  			downsampler:  tspb.TimeSeriesQueryAggregator_AVG,
   822  			expectedDesc: []dataDesc{
   823  				{0, 10, []tspb.TimeSeriesDatapoint{
   824  					tsdp(0, 1),
   825  				}},
   826  				{50, 10, []tspb.TimeSeriesDatapoint{
   827  					tsdp(50, 6.75),
   828  					tsdp(100, 8),
   829  				}},
   830  			},
   831  		},
   832  		// MAX downsampler, downsampling while re-using multiple
   833  		// InternalTimeSeriesData structures.
   834  		{
   835  			inputDesc: []dataDesc{
   836  				{0, 10, []tspb.TimeSeriesDatapoint{
   837  					tsdp(10, 1),
   838  				}},
   839  				{50, 10, []tspb.TimeSeriesDatapoint{
   840  					tsdp(50, 5),
   841  					tsdp(60, 6),
   842  				}},
   843  				{70, 10, []tspb.TimeSeriesDatapoint{
   844  					tsdp(70, 7),
   845  					tsdp(90, 9),
   846  					tsdp(110, 8),
   847  				}},
   848  			},
   849  			samplePeriod: 50,
   850  			downsampler:  tspb.TimeSeriesQueryAggregator_MAX,
   851  			expectedDesc: []dataDesc{
   852  				{0, 10, []tspb.TimeSeriesDatapoint{
   853  					tsdp(0, 1),
   854  				}},
   855  				{50, 10, []tspb.TimeSeriesDatapoint{
   856  					tsdp(50, 9),
   857  					tsdp(100, 8),
   858  				}},
   859  			},
   860  		},
   861  	} {
   862  
   863  		// Run case in Row format.
   864  		t.Run(fmt.Sprintf("%d:Row", tcnum), func(t *testing.T) {
   865  			span := make(timeSeriesSpan, len(tc.inputDesc))
   866  			for i, desc := range tc.inputDesc {
   867  				span[i] = makeInternalRowData(desc.startTimestamp, desc.sampleDuration, desc.samples)
   868  			}
   869  			expectedSpan := make(timeSeriesSpan, len(tc.expectedDesc))
   870  			for i, desc := range tc.expectedDesc {
   871  				expectedSpan[i] = makeInternalRowData(desc.startTimestamp, desc.sampleDuration, desc.samples)
   872  			}
   873  			spans := map[string]timeSeriesSpan{
   874  				"test": span,
   875  			}
   876  			downsampleSpans(spans, tc.samplePeriod, tc.downsampler)
   877  			if a, e := spans["test"], expectedSpan; !reflect.DeepEqual(a, e) {
   878  				for _, diff := range pretty.Diff(a, e) {
   879  					t.Error(diff)
   880  				}
   881  			}
   882  		})
   883  
   884  		// Run case in Column format.
   885  		t.Run(fmt.Sprintf("%d:Column", tcnum), func(t *testing.T) {
   886  			span := make(timeSeriesSpan, len(tc.inputDesc))
   887  			for i, desc := range tc.inputDesc {
   888  				span[i] = makeInternalColumnData(desc.startTimestamp, desc.sampleDuration, desc.samples)
   889  			}
   890  			expectedSpan := make(timeSeriesSpan, len(tc.expectedDesc))
   891  			for i, desc := range tc.expectedDesc {
   892  				expectedSpan[i] = makeInternalColumnData(desc.startTimestamp, desc.sampleDuration, desc.samples)
   893  			}
   894  			spans := map[string]timeSeriesSpan{
   895  				"test": span,
   896  			}
   897  			downsampleSpans(spans, tc.samplePeriod, tc.downsampler)
   898  			if a, e := spans["test"], expectedSpan; !reflect.DeepEqual(a, e) {
   899  				for _, diff := range pretty.Diff(a, e) {
   900  					t.Error(diff)
   901  				}
   902  			}
   903  		})
   904  
   905  		// Run case in Mixed format.
   906  		t.Run(fmt.Sprintf("%d:Mixed", tcnum), func(t *testing.T) {
   907  			span := make(timeSeriesSpan, len(tc.inputDesc))
   908  			for i, desc := range tc.inputDesc {
   909  				if i%2 == 0 {
   910  					span[i] = makeInternalRowData(desc.startTimestamp, desc.sampleDuration, desc.samples)
   911  				} else {
   912  					span[i] = makeInternalColumnData(desc.startTimestamp, desc.sampleDuration, desc.samples)
   913  				}
   914  			}
   915  			expectedSpan := make(timeSeriesSpan, len(tc.expectedDesc))
   916  			for i, desc := range tc.expectedDesc {
   917  				if i%2 == 0 {
   918  					expectedSpan[i] = makeInternalRowData(desc.startTimestamp, desc.sampleDuration, desc.samples)
   919  				} else {
   920  					expectedSpan[i] = makeInternalColumnData(desc.startTimestamp, desc.sampleDuration, desc.samples)
   921  				}
   922  			}
   923  			spans := map[string]timeSeriesSpan{
   924  				"test": span,
   925  			}
   926  			downsampleSpans(spans, tc.samplePeriod, tc.downsampler)
   927  			if a, e := spans["test"], expectedSpan; !reflect.DeepEqual(a, e) {
   928  				for _, diff := range pretty.Diff(a, e) {
   929  					t.Error(diff)
   930  				}
   931  			}
   932  		})
   933  	}
   934  }