github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/testmodel/data.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package testmodel 12 13 import ( 14 "math" 15 "sort" 16 17 "github.com/cockroachdb/cockroach/pkg/ts/tspb" 18 ) 19 20 // dp is a shorthand function for constructing a TimeSeriesDatapoint, used for 21 // convenience in tests. 22 func dp(timestamp int64, value float64) tspb.TimeSeriesDatapoint { 23 return tspb.TimeSeriesDatapoint{ 24 TimestampNanos: timestamp, 25 Value: value, 26 } 27 } 28 29 // DataSeries represents a series of data points ordered by timestamp. 30 type DataSeries []tspb.TimeSeriesDatapoint 31 32 func (data DataSeries) Len() int { return len(data) } 33 func (data DataSeries) Swap(i, j int) { data[i], data[j] = data[j], data[i] } 34 func (data DataSeries) Less(i, j int) bool { return data[i].TimestampNanos < data[j].TimestampNanos } 35 36 func normalizeTime(time, resolution int64) int64 { 37 return time - time%resolution 38 } 39 40 // TimeSlice returns the set of the dataPoints from the supplied series with 41 // timestamps that fall in the interval [start, end) (not inclusive of end 42 // timestamp). 43 func (data DataSeries) TimeSlice(start, end int64) DataSeries { 44 startIdx := sort.Search(len(data), func(i int) bool { 45 return data[i].TimestampNanos >= start 46 }) 47 endIdx := sort.Search(len(data), func(i int) bool { 48 return end <= data[i].TimestampNanos 49 }) 50 51 result := data[startIdx:endIdx] 52 if len(result) == 0 { 53 return nil 54 } 55 return result 56 } 57 58 // GroupByResolution aggregates data points in the given series into time 59 // buckets based on the provided resolution. 60 func (data DataSeries) GroupByResolution(resolution int64, aggFunc aggFunc) DataSeries { 61 if len(data) == 0 { 62 return nil 63 } 64 65 result := make(DataSeries, 0) 66 67 for len(data) > 0 { 68 bucketTime := normalizeTime(data[0].TimestampNanos, resolution) 69 // Grab the index of the first data point which does not belong to the same 70 // bucket as the start data point. 71 bucketEndIdx := sort.Search(len(data), func(idx int) bool { 72 return normalizeTime(data[idx].TimestampNanos, resolution) > bucketTime 73 }) 74 // Compute the next point as an aggregate of all underlying points which 75 // go in the same bucket. 76 result = append(result, dp(bucketTime, aggFunc(data[:bucketEndIdx]))) 77 data = data[bucketEndIdx:] 78 } 79 80 return result 81 } 82 83 // fillForResolution is used to fill in gaps in the provided data based on the 84 // provided resolution and fill function; any gaps longer than the resolution 85 // size will be eligible for fill. This is intended to be called on data sets 86 // that have been generated using groupByResolution, and may have unexpected 87 // results otherwise. 88 func (data DataSeries) fillForResolution(resolution int64, fillFunc fillFunc) DataSeries { 89 if len(data) < 2 { 90 return data 91 } 92 93 result := make(DataSeries, 0, len(data)) 94 result = append(result, data[0]) 95 for i := 1; i < len(data); i++ { 96 if data[i].TimestampNanos-data[i-1].TimestampNanos > resolution { 97 result = append(result, fillFunc(data[:i], data[i:], resolution)...) 98 } 99 result = append(result, data[i]) 100 } 101 102 return result 103 } 104 105 // rateOfChange returns the rate of change (over the supplied period) for each 106 // point in the supplied series, which is defined as: 107 // (value - valuePrev) / ((time - timePrev) / period) 108 // The returned series will be shorter than the original series by one, since 109 // the rate of change for the first datapoint cannot be computed in this 110 // fashion. 111 func (data DataSeries) rateOfChange(period int64) DataSeries { 112 if len(data) < 2 { 113 return nil 114 } 115 116 result := make(DataSeries, len(data)-1) 117 for i := 1; i < len(data); i++ { 118 result[i-1] = dp( 119 data[i].TimestampNanos, 120 (data[i].Value-data[i-1].Value)/(float64(data[i].TimestampNanos-data[i-1].TimestampNanos)/float64(period)), 121 ) 122 } 123 return result 124 } 125 126 // nonNegative replaces any values less than zero with a zero. 127 func (data DataSeries) nonNegative() DataSeries { 128 result := make(DataSeries, len(data)) 129 for i := range data { 130 if data[i].Value >= 0 { 131 result[i] = data[i] 132 } else { 133 result[i] = dp(data[i].TimestampNanos, 0) 134 } 135 } 136 return result 137 } 138 139 // adjustTimestamps adjusts all timestamps in the series by the provided offset. 140 func (data DataSeries) adjustTimestamps(offset int64) DataSeries { 141 result := make(DataSeries, len(data)) 142 for i := range data { 143 result[i] = data[i] 144 result[i].TimestampNanos += offset 145 } 146 return result 147 } 148 149 // removeDuplicates removes any duplicate timestamps from the given sorted 150 // series, keeping the last duplicate datapoint as the only value. 151 func (data DataSeries) removeDuplicates() DataSeries { 152 if len(data) < 2 { 153 return data 154 } 155 156 result := make(DataSeries, len(data)) 157 result[0] = data[0] 158 for i, j := 1, 0; i < len(data); i++ { 159 if result[j].TimestampNanos == data[i].TimestampNanos { 160 // Duplicate timestamp, keep last value only and shrink result output. 161 result[j].Value = data[i].Value 162 result = result[:len(result)-1] 163 } else { 164 j++ 165 result[j] = data[i] 166 } 167 } 168 169 return result 170 } 171 172 // groupSeriesByTimestamp returns a single DataSeries by aggregating DataPoints 173 // with matching timestamps from the supplied set of series. 174 func groupSeriesByTimestamp(datas []DataSeries, aggFunc aggFunc) DataSeries { 175 if len(datas) == 0 { 176 return nil 177 } 178 179 results := make(DataSeries, 0) 180 dataPointsToAggregate := make(DataSeries, 0, len(datas)) 181 for { 182 // Filter empty data series. 183 origDatas := datas 184 datas = datas[:0] 185 for _, data := range origDatas { 186 if len(data) > 0 { 187 datas = append(datas, data) 188 } 189 } 190 if len(datas) == 0 { 191 break 192 } 193 194 // Create a slice of datapoints which share the earliest timestamp of any 195 // datapoint across all collections. If the data series are all perfectly 196 // aligned (same length and timestamps), then this will just be he first 197 // data point in each series. 198 earliestTime := int64(math.MaxInt64) 199 for _, data := range datas { 200 if data[0].TimestampNanos < earliestTime { 201 // New earliest timestamp found, discard any points which were 202 // previously in the collection. 203 dataPointsToAggregate = dataPointsToAggregate[:0] 204 earliestTime = data[0].TimestampNanos 205 } 206 if data[0].TimestampNanos == earliestTime { 207 // Data point matches earliest timestamp, add it to current datapoint 208 // collection. 209 dataPointsToAggregate = append(dataPointsToAggregate, data[0]) 210 } 211 } 212 results = append(results, dp(earliestTime, aggFunc(dataPointsToAggregate))) 213 for i := range datas { 214 if datas[i][0].TimestampNanos == earliestTime { 215 datas[i] = datas[i][1:] 216 } 217 } 218 } 219 220 return results 221 } 222 223 // intersectTimestamps returns all data points for which a matching timestamp is 224 // found in any of the supplied data series. This is used to emulate an existing 225 // behavior of CockroachDB's Time Series system, where a data point is not 226 // interpolated if no aggregated series has a real timestamp at that point. 227 func (data DataSeries) intersectTimestamps(datas ...DataSeries) DataSeries { 228 seenTimestamps := make(map[int64]struct{}) 229 for _, ds := range datas { 230 for _, dp := range ds { 231 seenTimestamps[dp.TimestampNanos] = struct{}{} 232 } 233 } 234 235 result := make(DataSeries, 0, len(data)) 236 for _, dp := range data { 237 if _, ok := seenTimestamps[dp.TimestampNanos]; ok { 238 result = append(result, dp) 239 } 240 } 241 return result 242 } 243 244 const floatTolerance float64 = 0.0001 245 246 func floatEquals(a, b float64) bool { 247 if (a-b) < floatTolerance && (b-a) < floatTolerance { 248 return true 249 } 250 return false 251 } 252 253 // DataSeriesEquivalent returns true if the provided data series are roughly 254 // equivalent. This is useful primarily to work around floating point errors 255 // which occur if the order of computation differs between the model and the 256 // real system. 257 func DataSeriesEquivalent(a, b DataSeries) bool { 258 if len(a) != len(b) { 259 return false 260 } 261 for i := range a { 262 if a[i].TimestampNanos != b[i].TimestampNanos { 263 return false 264 } 265 if !floatEquals(a[i].Value, b[i].Value) { 266 return false 267 } 268 } 269 return true 270 }