github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/rollup.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package ts 12 13 import ( 14 "context" 15 "math" 16 "sort" 17 "unsafe" 18 19 "github.com/cockroachdb/cockroach/pkg/kv" 20 "github.com/cockroachdb/cockroach/pkg/roachpb" 21 "github.com/cockroachdb/cockroach/pkg/ts/tspb" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 ) 24 25 type rollupDatapoint struct { 26 timestampNanos int64 27 first float64 28 last float64 29 min float64 30 max float64 31 sum float64 32 count uint32 33 variance float64 34 } 35 36 type rollupData struct { 37 name string 38 source string 39 datapoints []rollupDatapoint 40 } 41 42 func (rd *rollupData) toInternal( 43 keyDuration, sampleDuration int64, 44 ) ([]roachpb.InternalTimeSeriesData, error) { 45 if err := tspb.VerifySlabAndSampleDuration(keyDuration, sampleDuration); err != nil { 46 return nil, err 47 } 48 49 // This slice must be preallocated to avoid reallocation on `append` because 50 // we maintain pointers to its elements in the map below. 51 result := make([]roachpb.InternalTimeSeriesData, 0, len(rd.datapoints)) 52 // Pointers because they need to mutate the stuff in the slice above. 53 resultByKeyTime := make(map[int64]*roachpb.InternalTimeSeriesData) 54 55 for _, dp := range rd.datapoints { 56 // Determine which InternalTimeSeriesData this datapoint belongs to, 57 // creating if it has not already been created for a previous sample. 58 keyTime := normalizeToPeriod(dp.timestampNanos, keyDuration) 59 itsd, ok := resultByKeyTime[keyTime] 60 if !ok { 61 result = append(result, roachpb.InternalTimeSeriesData{ 62 StartTimestampNanos: keyTime, 63 SampleDurationNanos: sampleDuration, 64 }) 65 itsd = &result[len(result)-1] 66 resultByKeyTime[keyTime] = itsd 67 } 68 69 itsd.Offset = append(itsd.Offset, itsd.OffsetForTimestamp(dp.timestampNanos)) 70 itsd.Last = append(itsd.Last, dp.last) 71 itsd.First = append(itsd.First, dp.first) 72 itsd.Min = append(itsd.Min, dp.min) 73 itsd.Max = append(itsd.Max, dp.max) 74 itsd.Count = append(itsd.Count, dp.count) 75 itsd.Sum = append(itsd.Sum, dp.sum) 76 itsd.Variance = append(itsd.Variance, dp.variance) 77 } 78 79 return result, nil 80 } 81 82 func computeRollupsFromData(data tspb.TimeSeriesData, rollupPeriodNanos int64) rollupData { 83 rollup := rollupData{ 84 name: data.Name, 85 source: data.Source, 86 } 87 88 createRollupPoint := func(timestamp int64, dataSlice []tspb.TimeSeriesDatapoint) { 89 result := rollupDatapoint{ 90 timestampNanos: timestamp, 91 max: -math.MaxFloat64, 92 min: math.MaxFloat64, 93 } 94 for i, dp := range dataSlice { 95 if i == 0 { 96 result.first = dp.Value 97 } 98 result.last = dp.Value 99 result.max = math.Max(result.max, dp.Value) 100 result.min = math.Min(result.min, dp.Value) 101 102 if result.count > 0 { 103 result.variance = computeParallelVariance( 104 parallelVarianceArgs{ 105 count: result.count, 106 average: result.sum / float64(result.count), 107 variance: result.variance, 108 }, 109 parallelVarianceArgs{ 110 count: 1, 111 average: dp.Value, 112 variance: 0, 113 }, 114 ) 115 } 116 117 result.count++ 118 result.sum += dp.Value 119 } 120 121 rollup.datapoints = append(rollup.datapoints, result) 122 } 123 124 dps := data.Datapoints 125 for len(dps) > 0 { 126 rollupTimestamp := normalizeToPeriod(dps[0].TimestampNanos, rollupPeriodNanos) 127 endIdx := sort.Search(len(dps), func(i int) bool { 128 return normalizeToPeriod(dps[i].TimestampNanos, rollupPeriodNanos) > rollupTimestamp 129 }) 130 createRollupPoint(rollupTimestamp, dps[:endIdx]) 131 dps = dps[endIdx:] 132 } 133 134 return rollup 135 } 136 137 func (db *DB) rollupTimeSeries( 138 ctx context.Context, 139 timeSeriesList []timeSeriesResolutionInfo, 140 now hlc.Timestamp, 141 qmc QueryMemoryContext, 142 ) error { 143 thresholds := db.computeThresholds(now.WallTime) 144 for _, timeSeries := range timeSeriesList { 145 // Only process rollup if this resolution has a target rollup resolution. 146 targetResolution, hasRollup := timeSeries.Resolution.TargetRollupResolution() 147 if !hasRollup { 148 continue 149 } 150 151 // Query from beginning of time up to the threshold for this resolution. 152 threshold := thresholds[timeSeries.Resolution] 153 154 // Create an initial targetSpan to find data for this series, starting at 155 // the beginning of time and ending with the threshold time. Queries use 156 // MaxSpanRequestKeys to limit the number of rows in memory at one time, 157 // and will use ResumeSpan to issue additional queries if necessary. 158 targetSpan := roachpb.Span{ 159 Key: MakeDataKey(timeSeries.Name, "" /* source */, timeSeries.Resolution, 0), 160 EndKey: MakeDataKey( 161 timeSeries.Name, "" /* source */, timeSeries.Resolution, threshold, 162 ), 163 } 164 165 // For each row, generate a rollup datapoint and add it to the correct 166 // rollupData object. 167 rollupDataMap := make(map[string]rollupData) 168 169 account := qmc.workerMonitor.MakeBoundAccount() 170 defer account.Close(ctx) 171 172 childQmc := QueryMemoryContext{ 173 workerMonitor: qmc.workerMonitor, 174 resultAccount: &account, 175 QueryMemoryOptions: qmc.QueryMemoryOptions, 176 } 177 for querySpan := targetSpan; querySpan.Valid(); { 178 var err error 179 querySpan, err = db.queryAndComputeRollupsForSpan( 180 ctx, timeSeries, querySpan, targetResolution, rollupDataMap, childQmc, 181 ) 182 if err != nil { 183 return err 184 } 185 } 186 187 // Write computed rollupDataMap to disk 188 var rollupDataSlice []rollupData 189 for _, data := range rollupDataMap { 190 rollupDataSlice = append(rollupDataSlice, data) 191 } 192 if err := db.storeRollup(ctx, targetResolution, rollupDataSlice); err != nil { 193 return err 194 } 195 } 196 return nil 197 } 198 199 // queryAndComputeRollupsForSpan queries time series data from the provided 200 // span, up to a maximum limit of rows based on memory limits. 201 func (db *DB) queryAndComputeRollupsForSpan( 202 ctx context.Context, 203 series timeSeriesResolutionInfo, 204 span roachpb.Span, 205 targetResolution Resolution, 206 rollupDataMap map[string]rollupData, 207 qmc QueryMemoryContext, 208 ) (roachpb.Span, error) { 209 b := &kv.Batch{} 210 b.Header.MaxSpanRequestKeys = qmc.GetMaxRollupSlabs(series.Resolution) 211 b.Scan(span.Key, span.EndKey) 212 if err := db.db.Run(ctx, b); err != nil { 213 return roachpb.Span{}, err 214 } 215 216 // Convert result data into a map of source strings to ordered spans of 217 // time series data. 218 diskAccount := qmc.workerMonitor.MakeBoundAccount() 219 defer diskAccount.Close(ctx) 220 sourceSpans, err := convertKeysToSpans(ctx, b.Results[0].Rows, &diskAccount) 221 if err != nil { 222 return roachpb.Span{}, err 223 } 224 225 // For each source, iterate over the data span and compute 226 // rollupDatapoints. 227 for source, span := range sourceSpans { 228 rollup, ok := rollupDataMap[source] 229 if !ok { 230 rollup = rollupData{ 231 name: series.Name, 232 source: source, 233 } 234 if err := qmc.resultAccount.Grow(ctx, int64(unsafe.Sizeof(rollup))); err != nil { 235 return roachpb.Span{}, err 236 } 237 } 238 239 var end timeSeriesSpanIterator 240 for start := makeTimeSeriesSpanIterator(span); start.isValid(); start = end { 241 rollupPeriod := targetResolution.SampleDuration() 242 sampleTimestamp := normalizeToPeriod(start.timestamp, rollupPeriod) 243 datapoint := rollupDatapoint{ 244 timestampNanos: sampleTimestamp, 245 max: -math.MaxFloat64, 246 min: math.MaxFloat64, 247 first: start.first(), 248 } 249 if err := qmc.resultAccount.Grow(ctx, int64(unsafe.Sizeof(datapoint))); err != nil { 250 return roachpb.Span{}, err 251 } 252 for end = start; end.isValid() && normalizeToPeriod(end.timestamp, rollupPeriod) == sampleTimestamp; end.forward() { 253 datapoint.last = end.last() 254 datapoint.max = math.Max(datapoint.max, end.max()) 255 datapoint.min = math.Min(datapoint.min, end.min()) 256 257 // Chan et al. algorithm for computing parallel variance. This allows 258 // the combination of two previously computed sample variances into a 259 // variance for the combined sample; this is needed when further 260 // downsampling previously downsampled variance values. 261 if datapoint.count > 0 { 262 datapoint.variance = computeParallelVariance( 263 parallelVarianceArgs{ 264 count: end.count(), 265 average: end.average(), 266 variance: end.variance(), 267 }, 268 parallelVarianceArgs{ 269 count: datapoint.count, 270 average: datapoint.sum / float64(datapoint.count), 271 variance: datapoint.variance, 272 }, 273 ) 274 } 275 276 datapoint.count += end.count() 277 datapoint.sum += end.sum() 278 } 279 rollup.datapoints = append(rollup.datapoints, datapoint) 280 } 281 rollupDataMap[source] = rollup 282 } 283 return b.Results[0].ResumeSpanAsValue(), nil 284 } 285 286 type parallelVarianceArgs struct { 287 count uint32 288 average float64 289 variance float64 290 } 291 292 // computeParallelVariance computes the combined variance of two previously 293 // computed sample variances. This is an implementation of the Chan et al. 294 // algorithm for computing parallel variance. This allows the combination of two 295 // previously computed sample variances into a variance for the combined sample; 296 // this is needed when further downsampling previously downsampled variance 297 // values. Note that it is exactly equivalent to the more widely used Welford's 298 // algorithm when either variance set has a count of one. 299 func computeParallelVariance(left, right parallelVarianceArgs) float64 { 300 leftCount := float64(left.count) 301 rightCount := float64(right.count) 302 totalCount := leftCount + rightCount 303 averageDelta := left.average - right.average 304 leftSumOfSquareDeviations := left.variance * leftCount 305 rightSumOfSquareDeviations := right.variance * rightCount 306 totalSumOfSquareDeviations := leftSumOfSquareDeviations + rightSumOfSquareDeviations + (averageDelta*averageDelta)*rightCount*leftCount/totalCount 307 return totalSumOfSquareDeviations / totalCount 308 }