github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/query.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package ts 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "sort" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/kv" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/ts/tspb" 23 "github.com/cockroachdb/cockroach/pkg/util/mon" 24 "github.com/cockroachdb/errors" 25 ) 26 27 // timeSeriesSpan represents a queryed time span for a single time series. This 28 // is reprented as an ordered slice of data slabs, where each slab contains 29 // samples. 30 type timeSeriesSpan []roachpb.InternalTimeSeriesData 31 32 // timeSeriesSpanIterator is used to iterate over a timeSeriesSpan. An iterator 33 // is helpful because a multi-level index is required to iterate over the structure. 34 type timeSeriesSpanIterator struct { 35 span timeSeriesSpan 36 total int 37 outer int 38 inner int 39 timestamp int64 40 length int 41 } 42 43 // makeTimeSeriesSpanIterator constructs a new iterator for the supplied 44 // timeSeriesSpan, initialized at index 0. 45 func makeTimeSeriesSpanIterator(span timeSeriesSpan) timeSeriesSpanIterator { 46 iterator := timeSeriesSpanIterator{ 47 span: span, 48 } 49 iterator.computeLength() 50 iterator.computeTimestamp() 51 return iterator 52 } 53 54 // computeLength recomputes the total length of the span. 55 func (tsi *timeSeriesSpanIterator) computeLength() { 56 tsi.length = 0 57 for _, data := range tsi.span { 58 tsi.length += data.SampleCount() 59 } 60 } 61 62 // computeTimestamp computes the timestamp of the sample at the current index. 63 // It is automatically called internally whenever the iterator is moved. 64 func (tsi *timeSeriesSpanIterator) computeTimestamp() { 65 if !tsi.isValid() { 66 tsi.timestamp = 0 67 return 68 } 69 data := tsi.span[tsi.outer] 70 tsi.timestamp = data.StartTimestampNanos + data.SampleDurationNanos*int64(tsi.offset()) 71 } 72 73 // forward moves the iterator forward one sample. The maximum index is equal 74 // to the length of the span, which is one index beyond the last sample. 75 func (tsi *timeSeriesSpanIterator) forward() { 76 if !tsi.isValid() { 77 return 78 } 79 tsi.total++ 80 tsi.inner++ 81 if tsi.inner >= tsi.span[tsi.outer].SampleCount() { 82 tsi.inner = 0 83 tsi.outer++ 84 } 85 tsi.computeTimestamp() 86 } 87 88 // backward moves the iterator back one sample. The iterator can not be moved 89 // earlier than the first index. 90 func (tsi *timeSeriesSpanIterator) backward() { 91 if tsi.outer == 0 && tsi.inner == 0 { 92 return 93 } 94 tsi.total-- 95 if tsi.inner == 0 { 96 tsi.outer-- 97 tsi.inner = tsi.span[tsi.outer].SampleCount() - 1 98 } else { 99 tsi.inner-- 100 } 101 tsi.computeTimestamp() 102 } 103 104 // seekIndex sets the iterator to the supplied index in the span. The index 105 // cannot be set greater than the length of the span or less than zero. 106 func (tsi *timeSeriesSpanIterator) seekIndex(index int) { 107 if index >= tsi.length { 108 tsi.total = tsi.length 109 tsi.inner = 0 110 tsi.outer = len(tsi.span) 111 tsi.timestamp = 0 112 return 113 } 114 115 if index < 0 { 116 index = 0 117 } 118 119 remaining := index 120 newOuter := 0 121 for len(tsi.span) > newOuter && remaining >= tsi.span[newOuter].SampleCount() { 122 remaining -= tsi.span[newOuter].SampleCount() 123 newOuter++ 124 } 125 tsi.inner = remaining 126 tsi.outer = newOuter 127 tsi.total = index 128 tsi.computeTimestamp() 129 } 130 131 // seekTimestamp sets the iterator to the earliest sample index with a timestamp 132 // greater than or equal to the supplied timestamp. 133 func (tsi *timeSeriesSpanIterator) seekTimestamp(timestamp int64) { 134 seeker := *tsi 135 index := sort.Search(tsi.length, func(i int) bool { 136 seeker.seekIndex(i) 137 return seeker.timestamp >= timestamp 138 }) 139 tsi.seekIndex(index) 140 } 141 142 func (tsi *timeSeriesSpanIterator) isColumnar() bool { 143 return tsi.span[tsi.outer].IsColumnar() 144 } 145 146 func (tsi *timeSeriesSpanIterator) isRollup() bool { 147 return tsi.span[tsi.outer].IsRollup() 148 } 149 150 func (tsi *timeSeriesSpanIterator) offset() int32 { 151 data := tsi.span[tsi.outer] 152 if tsi.isColumnar() { 153 return data.Offset[tsi.inner] 154 } 155 return data.Samples[tsi.inner].Offset 156 } 157 158 func (tsi *timeSeriesSpanIterator) count() uint32 { 159 data := tsi.span[tsi.outer] 160 if tsi.isColumnar() { 161 if tsi.isRollup() { 162 return data.Count[tsi.inner] 163 } 164 return 1 165 } 166 return data.Samples[tsi.inner].Count 167 } 168 169 func (tsi *timeSeriesSpanIterator) sum() float64 { 170 data := tsi.span[tsi.outer] 171 if tsi.isColumnar() { 172 if tsi.isRollup() { 173 return data.Sum[tsi.inner] 174 } 175 return data.Last[tsi.inner] 176 } 177 return data.Samples[tsi.inner].Sum 178 } 179 180 func (tsi *timeSeriesSpanIterator) max() float64 { 181 data := tsi.span[tsi.outer] 182 if tsi.isColumnar() { 183 if tsi.isRollup() { 184 return data.Max[tsi.inner] 185 } 186 return data.Last[tsi.inner] 187 } 188 if max := data.Samples[tsi.inner].Max; max != nil { 189 return *max 190 } 191 return data.Samples[tsi.inner].Sum 192 } 193 194 func (tsi *timeSeriesSpanIterator) min() float64 { 195 data := tsi.span[tsi.outer] 196 if tsi.isColumnar() { 197 if tsi.isRollup() { 198 return data.Min[tsi.inner] 199 } 200 return data.Last[tsi.inner] 201 } 202 if min := data.Samples[tsi.inner].Min; min != nil { 203 return *min 204 } 205 return data.Samples[tsi.inner].Sum 206 } 207 208 func (tsi *timeSeriesSpanIterator) first() float64 { 209 data := tsi.span[tsi.outer] 210 if tsi.isColumnar() { 211 if tsi.isRollup() { 212 return data.First[tsi.inner] 213 } 214 return data.Last[tsi.inner] 215 } 216 217 // First was not recorded in the planned row-format rollups, but since these 218 // rollups were never actually generated we can safely use sum. 219 return data.Samples[tsi.inner].Sum 220 } 221 222 func (tsi *timeSeriesSpanIterator) last() float64 { 223 data := tsi.span[tsi.outer] 224 if tsi.isColumnar() { 225 return data.Last[tsi.inner] 226 } 227 228 // Last was not recorded in the planned row-format rollups, but since these 229 // rollups were never actually generated we can safely use sum. 230 return data.Samples[tsi.inner].Sum 231 } 232 233 func (tsi *timeSeriesSpanIterator) variance() float64 { 234 data := tsi.span[tsi.outer] 235 if tsi.isColumnar() { 236 if tsi.isRollup() { 237 return data.Variance[tsi.inner] 238 } 239 return 0 240 } 241 242 // Variance was not recorded in the planned row-format rollups, but since 243 // these rollups were never actually generated we can safely return 0. 244 return 0 245 } 246 247 func (tsi *timeSeriesSpanIterator) average() float64 { 248 return tsi.sum() / float64(tsi.count()) 249 } 250 251 func (tsi *timeSeriesSpanIterator) setOffset(value int32) { 252 data := tsi.span[tsi.outer] 253 if tsi.isColumnar() { 254 data.Offset[tsi.inner] = value 255 return 256 } 257 data.Samples[tsi.inner].Offset = value 258 } 259 260 func (tsi *timeSeriesSpanIterator) setSingleValue(value float64) { 261 data := tsi.span[tsi.outer] 262 if tsi.isColumnar() { 263 data.Last[tsi.inner] = value 264 return 265 } 266 data.Samples[tsi.inner].Sum = value 267 data.Samples[tsi.inner].Count = 1 268 data.Samples[tsi.inner].Min = nil 269 data.Samples[tsi.inner].Max = nil 270 } 271 272 // truncateSpan truncates the span underlying this iterator to the current 273 // iterator, *not including* the current position. That is, the logical 274 // underlying span is truncated to [0, current). 275 func (tsi *timeSeriesSpanIterator) truncateSpan() { 276 var outerExtent int 277 if tsi.inner == 0 { 278 outerExtent = tsi.outer 279 } else { 280 outerExtent = tsi.outer + 1 281 } 282 283 // Reclaim memory from unused slabs. 284 unused := tsi.span[outerExtent:] 285 tsi.span = tsi.span[:outerExtent] 286 for i := range unused { 287 unused[i] = roachpb.InternalTimeSeriesData{} 288 } 289 290 if tsi.inner != 0 { 291 data := tsi.span[tsi.outer] 292 size := tsi.inner 293 if data.IsColumnar() { 294 data.Offset = data.Offset[:size] 295 data.Last = data.Last[:size] 296 if data.IsRollup() { 297 data.First = data.First[:size] 298 data.Min = data.Min[:size] 299 data.Max = data.Max[:size] 300 data.Count = data.Count[:size] 301 data.Sum = data.Sum[:size] 302 data.Variance = data.Variance[:size] 303 } 304 } else { 305 data.Samples = data.Samples[:size] 306 } 307 tsi.span[tsi.outer] = data 308 } 309 310 tsi.computeLength() 311 tsi.computeTimestamp() 312 } 313 314 // Convert the underlying span to single-valued by removing all optional columns 315 // from any columnar spans. 316 func convertToSingleValue(span timeSeriesSpan) { 317 for i := range span { 318 if span[i].IsColumnar() { 319 span[i].Count = nil 320 span[i].Sum = nil 321 span[i].Min = nil 322 span[i].Max = nil 323 span[i].First = nil 324 span[i].Variance = nil 325 } 326 } 327 } 328 329 // value returns the value of the sample at the iterators index, according to 330 // the provided downsampler operation. 331 func (tsi *timeSeriesSpanIterator) value(downsampler tspb.TimeSeriesQueryAggregator) float64 { 332 if !tsi.isValid() { 333 return 0 334 } 335 switch downsampler { 336 case tspb.TimeSeriesQueryAggregator_AVG: 337 return tsi.sum() / float64(tsi.count()) 338 case tspb.TimeSeriesQueryAggregator_MAX: 339 return tsi.max() 340 case tspb.TimeSeriesQueryAggregator_MIN: 341 return tsi.min() 342 case tspb.TimeSeriesQueryAggregator_SUM: 343 return tsi.sum() 344 } 345 346 panic(fmt.Sprintf("unknown downsampler option encountered: %v", downsampler)) 347 } 348 349 // valueAtTimestamp returns the value of the span at the provided timestamp, 350 // according to the current position of the iterator. If the provided timestamp 351 // is not exactly equal to the iterator's current timestamp, but is in between 352 // the iterator's timestamp and the previous timestamp, then the value is 353 // interpolated using linear interpolation. 354 // 355 // However, a maximum interpolation limit is passed - if the distance between 356 // the current timestamp and the previous timestamp is greater than this limit, 357 // then interpolation will not be attempted. 358 func (tsi *timeSeriesSpanIterator) valueAtTimestamp( 359 timestamp int64, interpolationLimitNanos int64, downsampler tspb.TimeSeriesQueryAggregator, 360 ) (float64, bool) { 361 if !tsi.validAtTimestamp(timestamp, interpolationLimitNanos) { 362 return 0, false 363 } 364 if tsi.timestamp == timestamp { 365 return tsi.value(downsampler), true 366 } 367 368 deriv, valid := tsi.derivative(downsampler) 369 if !valid { 370 return 0, false 371 } 372 return tsi.value(downsampler) - deriv*float64((tsi.timestamp-timestamp)/tsi.samplePeriod()), true 373 } 374 375 // validAtTimestamp returns true if the iterator can return a valid value for 376 // the provided timestamp. This is true either if the iterators current position 377 // is the current timestamp, *or* if the provided timestamp is between the 378 // iterators current and previous positions *and* the gap between the current 379 // and previous positions is less than the provided interpolation limit. 380 func (tsi *timeSeriesSpanIterator) validAtTimestamp(timestamp, interpolationLimitNanos int64) bool { 381 if !tsi.isValid() { 382 return false 383 } 384 if tsi.timestamp == timestamp { 385 return true 386 } 387 // Cannot interpolate before the first index. 388 if tsi.total == 0 { 389 return false 390 } 391 prev := *tsi 392 prev.backward() 393 394 // Only interpolate if the timestamp is in between this point and the previous. 395 if timestamp > tsi.timestamp || timestamp <= prev.timestamp { 396 return false 397 } 398 // Respect the interpolation limit. Note that an interpolation limit of zero 399 // is a special case still needed for legacy tests. 400 // TODO(mrtracy): remove test cases with interpolation limit zero. 401 if interpolationLimitNanos > 0 && tsi.timestamp-prev.timestamp > interpolationLimitNanos { 402 return false 403 } 404 return true 405 } 406 407 // derivative returns the current rate of change of the iterator, computed by 408 // considering the value at the current position and the value at the previous 409 // position of the iterator. The derivative is expressed per sample period. 410 func (tsi *timeSeriesSpanIterator) derivative( 411 downsampler tspb.TimeSeriesQueryAggregator, 412 ) (float64, bool) { 413 if !tsi.isValid() { 414 return 0, false 415 } 416 417 // Cannot compute rate of change for the first index. 418 if tsi.total == 0 { 419 return 0, false 420 } 421 422 prev := *tsi 423 prev.backward() 424 rateOfChange := (tsi.value(downsampler) - prev.value(downsampler)) / float64((tsi.timestamp-prev.timestamp)/tsi.samplePeriod()) 425 return rateOfChange, true 426 } 427 428 // samplePeriod returns the sample period duration for this iterator. 429 func (tsi *timeSeriesSpanIterator) samplePeriod() int64 { 430 return tsi.span[0].SampleDurationNanos 431 } 432 433 // isValid returns true if the iterator currently points to a valid sample. 434 func (tsi *timeSeriesSpanIterator) isValid() bool { 435 return tsi.total < tsi.length 436 } 437 438 // Query processes the supplied query over the supplied timespan and on-disk 439 // resolution, while respecting the provided limitations on memory usage. 440 func (db *DB) Query( 441 ctx context.Context, 442 query tspb.Query, 443 diskResolution Resolution, 444 timespan QueryTimespan, 445 mem QueryMemoryContext, 446 ) ([]tspb.TimeSeriesDatapoint, []string, error) { 447 timespan.normalize() 448 449 // Validate incoming parameters. 450 if err := timespan.verifyBounds(); err != nil { 451 return nil, nil, err 452 } 453 if err := timespan.verifyDiskResolution(diskResolution); err != nil { 454 return nil, nil, err 455 } 456 if err := verifySourceAggregator(query.GetSourceAggregator()); err != nil { 457 return nil, nil, err 458 } 459 if err := verifyDownsampler(query.GetDownsampler()); err != nil { 460 return nil, nil, err 461 } 462 463 // Adjust timespan based on the current time. 464 if err := timespan.adjustForCurrentTime(diskResolution); err != nil { 465 return nil, nil, err 466 } 467 468 var result []tspb.TimeSeriesDatapoint 469 470 // Create sourceSet, which tracks unique sources seen while querying. 471 sourceSet := make(map[string]struct{}) 472 473 resolutions := []Resolution{diskResolution} 474 if rollupResolution, ok := diskResolution.TargetRollupResolution(); ok { 475 if timespan.verifyDiskResolution(rollupResolution) == nil { 476 resolutions = []Resolution{rollupResolution, diskResolution} 477 } 478 } 479 480 for _, resolution := range resolutions { 481 // Compute the maximum timespan width which can be queried for this resolution 482 // without exceeding the memory budget. 483 maxTimespanWidth, err := mem.GetMaxTimespan(resolution) 484 if err != nil { 485 return nil, nil, err 486 } 487 488 if maxTimespanWidth > timespan.width() { 489 if err := db.queryChunk( 490 ctx, query, resolution, timespan, mem, &result, sourceSet, 491 ); err != nil { 492 return nil, nil, err 493 } 494 } else { 495 // Break up the timespan into "chunks" where each chunk will fit into the 496 // memory budget. Query and process each chunk individually, appending 497 // results to the same output collection. 498 chunkTime := timespan 499 chunkTime.EndNanos = chunkTime.StartNanos + maxTimespanWidth 500 for ; chunkTime.StartNanos < timespan.EndNanos; chunkTime.moveForward(maxTimespanWidth + timespan.SampleDurationNanos) { 501 if chunkTime.EndNanos > timespan.EndNanos { 502 // Final chunk may be a smaller window. 503 chunkTime.EndNanos = timespan.EndNanos 504 } 505 if err := db.queryChunk( 506 ctx, query, resolution, chunkTime, mem, &result, sourceSet, 507 ); err != nil { 508 return nil, nil, err 509 } 510 } 511 } 512 513 // If results were returned and there are multiple resolutions, determine 514 // if we have satisfied the entire query. If not, determine where the query 515 // for the next resolution should begin. 516 if len(resolutions) > 1 && len(result) > 0 { 517 lastTime := result[len(result)-1].TimestampNanos 518 if lastTime >= timespan.EndNanos { 519 break 520 } 521 timespan.StartNanos = lastTime 522 } 523 } 524 525 // Convert the unique sources seen into a slice. 526 sources := make([]string, 0, len(sourceSet)) 527 for source := range sourceSet { 528 sources = append(sources, source) 529 } 530 531 return result, sources, nil 532 } 533 534 // queryChunk processes a chunk of a query; this will read the necessary data 535 // from disk and apply the desired processing operations to generate a result. 536 func (db *DB) queryChunk( 537 ctx context.Context, 538 query tspb.Query, 539 diskResolution Resolution, 540 timespan QueryTimespan, 541 mem QueryMemoryContext, 542 dest *[]tspb.TimeSeriesDatapoint, 543 sourceSet map[string]struct{}, 544 ) error { 545 acc := mem.workerMonitor.MakeBoundAccount() 546 defer acc.Close(ctx) 547 548 // Actual queried data should include the interpolation limit on either side. 549 diskTimespan := timespan 550 diskTimespan.expand(mem.InterpolationLimitNanos) 551 552 var data []kv.KeyValue 553 var err error 554 if len(query.Sources) == 0 { 555 data, err = db.readAllSourcesFromDatabase(ctx, query.Name, diskResolution, diskTimespan) 556 } else { 557 data, err = db.readFromDatabase(ctx, query.Name, diskResolution, diskTimespan, query.Sources) 558 } 559 560 if err != nil { 561 return err 562 } 563 564 // Assemble data into an ordered timeSeriesSpan for each source. 565 sourceSpans, err := convertKeysToSpans(ctx, data, &acc) 566 if err != nil { 567 return err 568 } 569 if len(sourceSpans) == 0 { 570 return nil 571 } 572 573 if timespan.SampleDurationNanos != diskResolution.SampleDuration() { 574 downsampleSpans(sourceSpans, timespan.SampleDurationNanos, query.GetDownsampler()) 575 // downsampleSpans always produces single-valued spans. At the time of 576 // writing, all downsamplers are the identity on single-valued spans, but 577 // that may not be true forever (consider for instance a variance 578 // downsampler). Therefore, before continuing to the aggregation step we 579 // convert the downsampler to SUM, which is equivalent to identify for a 580 // single-valued span. 581 query.Downsampler = tspb.TimeSeriesQueryAggregator_SUM.Enum() 582 } 583 584 // Aggregate spans, increasing our memory usage if the destination slice is 585 // expanded. 586 oldCap := cap(*dest) 587 aggregateSpansToDatapoints(sourceSpans, query, timespan, mem.InterpolationLimitNanos, dest) 588 if oldCap > cap(*dest) { 589 if err := mem.resultAccount.Grow(ctx, sizeOfDataPoint*int64(cap(*dest)-oldCap)); err != nil { 590 return err 591 } 592 } 593 594 // Add unique sources to the supplied source set. 595 for k := range sourceSpans { 596 sourceSet[k] = struct{}{} 597 } 598 return nil 599 } 600 601 // downsampleSpans downsamples the provided timeSeriesSpans in place, without 602 // allocating additional memory. The output data from downsampleSpans is 603 // single-valued, without rollups; unused rollup data will be discarded. 604 func downsampleSpans( 605 spans map[string]timeSeriesSpan, duration int64, downsampler tspb.TimeSeriesQueryAggregator, 606 ) { 607 // Downsample data in place. 608 for k, span := range spans { 609 nextInsert := makeTimeSeriesSpanIterator(span) 610 for start, end := nextInsert, nextInsert; start.isValid(); start = end { 611 sampleTimestamp := normalizeToPeriod(start.timestamp, duration) 612 613 switch downsampler { 614 case tspb.TimeSeriesQueryAggregator_MAX: 615 max := -math.MaxFloat64 616 for ; end.isValid() && normalizeToPeriod(end.timestamp, duration) == sampleTimestamp; end.forward() { 617 max = math.Max(max, end.max()) 618 } 619 nextInsert.setSingleValue(max) 620 case tspb.TimeSeriesQueryAggregator_MIN: 621 min := math.MaxFloat64 622 for ; end.isValid() && normalizeToPeriod(end.timestamp, duration) == sampleTimestamp; end.forward() { 623 min = math.Min(min, end.min()) 624 } 625 nextInsert.setSingleValue(min) 626 case tspb.TimeSeriesQueryAggregator_AVG: 627 count, sum := uint32(0), 0.0 628 for ; end.isValid() && normalizeToPeriod(end.timestamp, duration) == sampleTimestamp; end.forward() { 629 count += end.count() 630 sum += end.sum() 631 } 632 nextInsert.setSingleValue(sum / float64(count)) 633 case tspb.TimeSeriesQueryAggregator_SUM: 634 sum := 0.0 635 for ; end.isValid() && normalizeToPeriod(end.timestamp, duration) == sampleTimestamp; end.forward() { 636 sum += end.sum() 637 } 638 nextInsert.setSingleValue(sum) 639 } 640 641 nextInsert.setOffset(span[nextInsert.outer].OffsetForTimestamp(sampleTimestamp)) 642 nextInsert.forward() 643 } 644 645 // Trim span using nextInsert, which is where the next value would be 646 // inserted and is thus the first unneeded value. 647 nextInsert.truncateSpan() 648 span = nextInsert.span 649 convertToSingleValue(span) 650 spans[k] = span 651 } 652 } 653 654 // aggregateSpansToDatapoints aggregates the supplied set of data spans into 655 // a single result time series, by aggregating data points from different spans 656 // which share the same timestamp. For each timestamp in the query range, a 657 // value is extracted from each span using the supplied downsampling function. 658 // If a span is missing a value at a specific timestamp, the missing value will 659 // be interpolated under certain circumstances. The values from the different 660 // spans are then combined into a single value using the specified source 661 // aggregator. 662 func aggregateSpansToDatapoints( 663 spans map[string]timeSeriesSpan, 664 query tspb.Query, 665 timespan QueryTimespan, 666 interpolationLimitNanos int64, 667 dest *[]tspb.TimeSeriesDatapoint, 668 ) { 669 // Aggregate into reserved result slice (filter points missing from component slices) 670 iterators := make([]timeSeriesSpanIterator, 0, len(spans)) 671 for _, span := range spans { 672 iter := makeTimeSeriesSpanIterator(span) 673 iter.seekTimestamp(timespan.StartNanos) 674 iterators = append(iterators, iter) 675 } 676 677 var lowestTimestamp int64 678 computeLowest := func() { 679 lowestTimestamp = math.MaxInt64 680 for _, iter := range iterators { 681 if !iter.isValid() { 682 continue 683 } 684 if iter.timestamp < lowestTimestamp { 685 lowestTimestamp = iter.timestamp 686 } 687 } 688 } 689 690 aggregateValues := make([]float64, len(iterators)) 691 for computeLowest(); lowestTimestamp <= timespan.EndNanos; computeLowest() { 692 aggregateValues = aggregateValues[:0] 693 for i, iter := range iterators { 694 var value float64 695 var valid bool 696 switch query.GetDerivative() { 697 case tspb.TimeSeriesQueryDerivative_DERIVATIVE: 698 valid = iter.validAtTimestamp(lowestTimestamp, interpolationLimitNanos) 699 if valid { 700 value, valid = iter.derivative(query.GetDownsampler()) 701 // Convert derivative to seconds. 702 value *= float64(time.Second.Nanoseconds()) / float64(iter.samplePeriod()) 703 } 704 case tspb.TimeSeriesQueryDerivative_NON_NEGATIVE_DERIVATIVE: 705 valid = iter.validAtTimestamp(lowestTimestamp, interpolationLimitNanos) 706 if valid { 707 value, valid = iter.derivative(query.GetDownsampler()) 708 if value < 0 { 709 value = 0 710 } else { 711 // Convert derivative to seconds. 712 value *= float64(time.Second.Nanoseconds()) / float64(iter.samplePeriod()) 713 } 714 } 715 default: 716 value, valid = iter.valueAtTimestamp( 717 lowestTimestamp, interpolationLimitNanos, query.GetDownsampler(), 718 ) 719 } 720 721 if valid { 722 aggregateValues = append(aggregateValues, value) 723 } 724 if iter.timestamp == lowestTimestamp { 725 iterators[i].forward() 726 } 727 } 728 if len(aggregateValues) == 0 { 729 continue 730 } 731 732 // Filters data points near the current moment which are "incomplete". Any 733 // data point in the sufficiently-recent past is required to have a valid 734 // contribution from all sources being aggregated. 735 // 736 // A detailed explanation of why this is done: New time series data points 737 // are, in typical usage, always added at the current time; however, due to 738 // the curiosities of clock skew, it is a common occurrence for the most 739 // recent data point to be available for some sources, but not from others. 740 // For queries which aggregate from multiple sources, this can lead to a 741 // situation where a persistent and precipitous dip appears at the very end 742 // of data graphs. This happens because the most recent point only 743 // represents the aggregation of a subset of sources, even though the 744 // missing sources are not actually offline, they are simply slightly 745 // delayed in reporting. 746 // 747 // Linear interpolation can gaps in the middle of data, but it does not work 748 // in this case as the current time is later than any data available from 749 // the missing sources. 750 // 751 // In this case, we can assume that a missing data point will be added soon, 752 // and instead do *not* return the partially aggregated data point to the 753 // client. 754 if lowestTimestamp > timespan.NowNanos-timespan.SampleDurationNanos { 755 if len(aggregateValues) < len(iterators) { 756 continue 757 } 758 } 759 760 *dest = append(*dest, tspb.TimeSeriesDatapoint{ 761 TimestampNanos: lowestTimestamp, 762 Value: aggregate(query.GetSourceAggregator(), aggregateValues), 763 }) 764 } 765 } 766 767 // aggSum returns the sum value of all points in the provided slice. 768 func aggSum(data []float64) float64 { 769 total := 0.0 770 for _, dp := range data { 771 total += dp 772 } 773 return total 774 } 775 776 // aggAvg returns the average value of the points in the provided slice. 777 func aggAvg(data []float64) float64 { 778 if len(data) == 0 { 779 return 0.0 780 } 781 return aggSum(data) / float64(len(data)) 782 } 783 784 // aggMax returns the maximum value of any point in the provided slice. 785 func aggMax(data []float64) float64 { 786 max := -math.MaxFloat64 787 for _, dp := range data { 788 if dp > max { 789 max = dp 790 } 791 } 792 return max 793 } 794 795 // aggMin returns the minimum value of any point in the provided slice. 796 func aggMin(data []float64) float64 { 797 min := math.MaxFloat64 798 for _, dp := range data { 799 if dp < min { 800 min = dp 801 } 802 } 803 return min 804 } 805 806 // aggregate computes a single float64 value from the given slice of float64s 807 // using the specified aggregation function. 808 func aggregate(agg tspb.TimeSeriesQueryAggregator, values []float64) float64 { 809 switch agg { 810 case tspb.TimeSeriesQueryAggregator_AVG: 811 return aggAvg(values) 812 case tspb.TimeSeriesQueryAggregator_SUM: 813 return aggSum(values) 814 case tspb.TimeSeriesQueryAggregator_MAX: 815 return aggMax(values) 816 case tspb.TimeSeriesQueryAggregator_MIN: 817 return aggMin(values) 818 } 819 820 panic(fmt.Sprintf("unknown aggregator option encountered: %v", agg)) 821 } 822 823 // readFromDatabase retrieves data for the given series name, at the given disk 824 // resolution, across the supplied time span, for only the given list of 825 // sources. 826 func (db *DB) readFromDatabase( 827 ctx context.Context, 828 seriesName string, 829 diskResolution Resolution, 830 timespan QueryTimespan, 831 sources []string, 832 ) ([]kv.KeyValue, error) { 833 // Iterate over all key timestamps which may contain data for the given 834 // sources, based on the given start/end time and the resolution. 835 b := &kv.Batch{} 836 startTimestamp := diskResolution.normalizeToSlab(timespan.StartNanos) 837 kd := diskResolution.SlabDuration() 838 for currentTimestamp := startTimestamp; currentTimestamp <= timespan.EndNanos; currentTimestamp += kd { 839 for _, source := range sources { 840 key := MakeDataKey(seriesName, source, diskResolution, currentTimestamp) 841 b.Get(key) 842 } 843 } 844 if err := db.db.Run(ctx, b); err != nil { 845 return nil, err 846 } 847 var rows []kv.KeyValue 848 for _, result := range b.Results { 849 row := result.Rows[0] 850 if row.Value == nil { 851 continue 852 } 853 rows = append(rows, row) 854 } 855 return rows, nil 856 } 857 858 // readAllSourcesFromDatabase retrieves data for the given series name, at the 859 // given disk resolution, across the supplied time span, for all sources. The 860 // optional limit is used when memory usage is being limited by the number of 861 // keys, rather than by timespan. 862 func (db *DB) readAllSourcesFromDatabase( 863 ctx context.Context, seriesName string, diskResolution Resolution, timespan QueryTimespan, 864 ) ([]kv.KeyValue, error) { 865 // Based on the supplied timestamps and resolution, construct start and 866 // end keys for a scan that will return every key with data relevant to 867 // the query. Query slightly before and after the actual queried range 868 // to allow interpolation of points at the start and end of the range. 869 startKey := MakeDataKey( 870 seriesName, "" /* source */, diskResolution, timespan.StartNanos, 871 ) 872 endKey := MakeDataKey( 873 seriesName, "" /* source */, diskResolution, timespan.EndNanos, 874 ).PrefixEnd() 875 b := &kv.Batch{} 876 b.Scan(startKey, endKey) 877 878 if err := db.db.Run(ctx, b); err != nil { 879 return nil, err 880 } 881 return b.Results[0].Rows, nil 882 } 883 884 // convertKeysToSpans converts a batch of KeyValues queried from disk into a 885 // map of data spans organized by source. 886 func convertKeysToSpans( 887 ctx context.Context, data []kv.KeyValue, acc *mon.BoundAccount, 888 ) (map[string]timeSeriesSpan, error) { 889 sourceSpans := make(map[string]timeSeriesSpan) 890 for _, row := range data { 891 var data roachpb.InternalTimeSeriesData 892 if err := row.ValueProto(&data); err != nil { 893 return nil, err 894 } 895 _, source, _, _, err := DecodeDataKey(row.Key) 896 if err != nil { 897 return nil, err 898 } 899 sampleSize := sizeOfSample 900 if data.IsColumnar() { 901 sampleSize = sizeOfInt32 + sizeOfFloat64 902 } 903 if err := acc.Grow( 904 ctx, sampleSize*int64(data.SampleCount())+sizeOfTimeSeriesData, 905 ); err != nil { 906 return nil, err 907 } 908 sourceSpans[source] = append(sourceSpans[source], data) 909 } 910 return sourceSpans, nil 911 } 912 913 func verifySourceAggregator(agg tspb.TimeSeriesQueryAggregator) error { 914 switch agg { 915 case tspb.TimeSeriesQueryAggregator_AVG: 916 return nil 917 case tspb.TimeSeriesQueryAggregator_SUM: 918 return nil 919 case tspb.TimeSeriesQueryAggregator_MIN: 920 return nil 921 case tspb.TimeSeriesQueryAggregator_MAX: 922 return nil 923 case tspb.TimeSeriesQueryAggregator_FIRST, 924 tspb.TimeSeriesQueryAggregator_LAST, 925 tspb.TimeSeriesQueryAggregator_VARIANCE: 926 return errors.Errorf("aggregator %s is not yet supported", agg.String()) 927 } 928 return errors.Errorf("query specified unknown time series aggregator %s", agg.String()) 929 } 930 931 func verifyDownsampler(downsampler tspb.TimeSeriesQueryAggregator) error { 932 switch downsampler { 933 case tspb.TimeSeriesQueryAggregator_AVG: 934 return nil 935 case tspb.TimeSeriesQueryAggregator_SUM: 936 return nil 937 case tspb.TimeSeriesQueryAggregator_MIN: 938 return nil 939 case tspb.TimeSeriesQueryAggregator_MAX: 940 return nil 941 case tspb.TimeSeriesQueryAggregator_FIRST, 942 tspb.TimeSeriesQueryAggregator_LAST, 943 tspb.TimeSeriesQueryAggregator_VARIANCE: 944 return errors.Errorf("downsampler %s is not yet supported", downsampler.String()) 945 } 946 return errors.Errorf("query specified unknown time series downsampler %s", downsampler.String()) 947 }