github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/testmodel/data.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package testmodel
    12  
    13  import (
    14  	"math"
    15  	"sort"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/ts/tspb"
    18  )
    19  
    20  // dp is a shorthand function for constructing a TimeSeriesDatapoint, used for
    21  // convenience in tests.
    22  func dp(timestamp int64, value float64) tspb.TimeSeriesDatapoint {
    23  	return tspb.TimeSeriesDatapoint{
    24  		TimestampNanos: timestamp,
    25  		Value:          value,
    26  	}
    27  }
    28  
    29  // DataSeries represents a series of data points ordered by timestamp.
    30  type DataSeries []tspb.TimeSeriesDatapoint
    31  
    32  func (data DataSeries) Len() int           { return len(data) }
    33  func (data DataSeries) Swap(i, j int)      { data[i], data[j] = data[j], data[i] }
    34  func (data DataSeries) Less(i, j int) bool { return data[i].TimestampNanos < data[j].TimestampNanos }
    35  
    36  func normalizeTime(time, resolution int64) int64 {
    37  	return time - time%resolution
    38  }
    39  
    40  // TimeSlice returns the set of the dataPoints from the supplied series with
    41  // timestamps that fall in the interval [start, end) (not inclusive of end
    42  // timestamp).
    43  func (data DataSeries) TimeSlice(start, end int64) DataSeries {
    44  	startIdx := sort.Search(len(data), func(i int) bool {
    45  		return data[i].TimestampNanos >= start
    46  	})
    47  	endIdx := sort.Search(len(data), func(i int) bool {
    48  		return end <= data[i].TimestampNanos
    49  	})
    50  
    51  	result := data[startIdx:endIdx]
    52  	if len(result) == 0 {
    53  		return nil
    54  	}
    55  	return result
    56  }
    57  
    58  // GroupByResolution aggregates data points in the given series into time
    59  // buckets based on the provided resolution.
    60  func (data DataSeries) GroupByResolution(resolution int64, aggFunc aggFunc) DataSeries {
    61  	if len(data) == 0 {
    62  		return nil
    63  	}
    64  
    65  	result := make(DataSeries, 0)
    66  
    67  	for len(data) > 0 {
    68  		bucketTime := normalizeTime(data[0].TimestampNanos, resolution)
    69  		// Grab the index of the first data point which does not belong to the same
    70  		// bucket as the start data point.
    71  		bucketEndIdx := sort.Search(len(data), func(idx int) bool {
    72  			return normalizeTime(data[idx].TimestampNanos, resolution) > bucketTime
    73  		})
    74  		// Compute the next point as an aggregate of all underlying points which
    75  		// go in the same bucket.
    76  		result = append(result, dp(bucketTime, aggFunc(data[:bucketEndIdx])))
    77  		data = data[bucketEndIdx:]
    78  	}
    79  
    80  	return result
    81  }
    82  
    83  // fillForResolution is used to fill in gaps in the provided data based on the
    84  // provided resolution and fill function; any gaps longer than the resolution
    85  // size will be eligible for fill. This is intended to be called on data sets
    86  // that have been generated using groupByResolution, and may have unexpected
    87  // results otherwise.
    88  func (data DataSeries) fillForResolution(resolution int64, fillFunc fillFunc) DataSeries {
    89  	if len(data) < 2 {
    90  		return data
    91  	}
    92  
    93  	result := make(DataSeries, 0, len(data))
    94  	result = append(result, data[0])
    95  	for i := 1; i < len(data); i++ {
    96  		if data[i].TimestampNanos-data[i-1].TimestampNanos > resolution {
    97  			result = append(result, fillFunc(data[:i], data[i:], resolution)...)
    98  		}
    99  		result = append(result, data[i])
   100  	}
   101  
   102  	return result
   103  }
   104  
   105  // rateOfChange returns the rate of change (over the supplied period) for each
   106  // point in the supplied series, which is defined as:
   107  //         (value - valuePrev) / ((time - timePrev) / period)
   108  // The returned series will be shorter than the original series by one, since
   109  // the rate of change for the first datapoint cannot be computed in this
   110  // fashion.
   111  func (data DataSeries) rateOfChange(period int64) DataSeries {
   112  	if len(data) < 2 {
   113  		return nil
   114  	}
   115  
   116  	result := make(DataSeries, len(data)-1)
   117  	for i := 1; i < len(data); i++ {
   118  		result[i-1] = dp(
   119  			data[i].TimestampNanos,
   120  			(data[i].Value-data[i-1].Value)/(float64(data[i].TimestampNanos-data[i-1].TimestampNanos)/float64(period)),
   121  		)
   122  	}
   123  	return result
   124  }
   125  
   126  // nonNegative replaces any values less than zero with a zero.
   127  func (data DataSeries) nonNegative() DataSeries {
   128  	result := make(DataSeries, len(data))
   129  	for i := range data {
   130  		if data[i].Value >= 0 {
   131  			result[i] = data[i]
   132  		} else {
   133  			result[i] = dp(data[i].TimestampNanos, 0)
   134  		}
   135  	}
   136  	return result
   137  }
   138  
   139  // adjustTimestamps adjusts all timestamps in the series by the provided offset.
   140  func (data DataSeries) adjustTimestamps(offset int64) DataSeries {
   141  	result := make(DataSeries, len(data))
   142  	for i := range data {
   143  		result[i] = data[i]
   144  		result[i].TimestampNanos += offset
   145  	}
   146  	return result
   147  }
   148  
   149  // removeDuplicates removes any duplicate timestamps from the given sorted
   150  // series, keeping the last duplicate datapoint as the only value.
   151  func (data DataSeries) removeDuplicates() DataSeries {
   152  	if len(data) < 2 {
   153  		return data
   154  	}
   155  
   156  	result := make(DataSeries, len(data))
   157  	result[0] = data[0]
   158  	for i, j := 1, 0; i < len(data); i++ {
   159  		if result[j].TimestampNanos == data[i].TimestampNanos {
   160  			// Duplicate timestamp, keep last value only and shrink result output.
   161  			result[j].Value = data[i].Value
   162  			result = result[:len(result)-1]
   163  		} else {
   164  			j++
   165  			result[j] = data[i]
   166  		}
   167  	}
   168  
   169  	return result
   170  }
   171  
   172  // groupSeriesByTimestamp returns a single DataSeries by aggregating DataPoints
   173  // with matching timestamps from the supplied set of series.
   174  func groupSeriesByTimestamp(datas []DataSeries, aggFunc aggFunc) DataSeries {
   175  	if len(datas) == 0 {
   176  		return nil
   177  	}
   178  
   179  	results := make(DataSeries, 0)
   180  	dataPointsToAggregate := make(DataSeries, 0, len(datas))
   181  	for {
   182  		// Filter empty data series.
   183  		origDatas := datas
   184  		datas = datas[:0]
   185  		for _, data := range origDatas {
   186  			if len(data) > 0 {
   187  				datas = append(datas, data)
   188  			}
   189  		}
   190  		if len(datas) == 0 {
   191  			break
   192  		}
   193  
   194  		// Create a slice of datapoints which share the earliest timestamp of any
   195  		// datapoint across all collections. If the data series are all perfectly
   196  		// aligned (same length and timestamps), then this will just be he first
   197  		// data point in each series.
   198  		earliestTime := int64(math.MaxInt64)
   199  		for _, data := range datas {
   200  			if data[0].TimestampNanos < earliestTime {
   201  				// New earliest timestamp found, discard any points which were
   202  				// previously in the collection.
   203  				dataPointsToAggregate = dataPointsToAggregate[:0]
   204  				earliestTime = data[0].TimestampNanos
   205  			}
   206  			if data[0].TimestampNanos == earliestTime {
   207  				// Data point matches earliest timestamp, add it to current datapoint
   208  				// collection.
   209  				dataPointsToAggregate = append(dataPointsToAggregate, data[0])
   210  			}
   211  		}
   212  		results = append(results, dp(earliestTime, aggFunc(dataPointsToAggregate)))
   213  		for i := range datas {
   214  			if datas[i][0].TimestampNanos == earliestTime {
   215  				datas[i] = datas[i][1:]
   216  			}
   217  		}
   218  	}
   219  
   220  	return results
   221  }
   222  
   223  // intersectTimestamps returns all data points for which a matching timestamp is
   224  // found in any of the supplied data series. This is used to emulate an existing
   225  // behavior of CockroachDB's Time Series system, where a data point is not
   226  // interpolated if no aggregated series has a real timestamp at that point.
   227  func (data DataSeries) intersectTimestamps(datas ...DataSeries) DataSeries {
   228  	seenTimestamps := make(map[int64]struct{})
   229  	for _, ds := range datas {
   230  		for _, dp := range ds {
   231  			seenTimestamps[dp.TimestampNanos] = struct{}{}
   232  		}
   233  	}
   234  
   235  	result := make(DataSeries, 0, len(data))
   236  	for _, dp := range data {
   237  		if _, ok := seenTimestamps[dp.TimestampNanos]; ok {
   238  			result = append(result, dp)
   239  		}
   240  	}
   241  	return result
   242  }
   243  
   244  const floatTolerance float64 = 0.0001
   245  
   246  func floatEquals(a, b float64) bool {
   247  	if (a-b) < floatTolerance && (b-a) < floatTolerance {
   248  		return true
   249  	}
   250  	return false
   251  }
   252  
   253  // DataSeriesEquivalent returns true if the provided data series are roughly
   254  // equivalent. This is useful primarily to work around floating point errors
   255  // which occur if the order of computation differs between the model and the
   256  // real system.
   257  func DataSeriesEquivalent(a, b DataSeries) bool {
   258  	if len(a) != len(b) {
   259  		return false
   260  	}
   261  	for i := range a {
   262  		if a[i].TimestampNanos != b[i].TimestampNanos {
   263  			return false
   264  		}
   265  		if !floatEquals(a[i].Value, b[i].Value) {
   266  			return false
   267  		}
   268  	}
   269  	return true
   270  }