github.com/m3db/m3@v1.5.0/src/query/graphite/ts/lttb.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package ts
    22  
    23  import (
    24  	"math"
    25  	"time"
    26  )
    27  
    28  // LTTB down-samples the data to contain only threshold number of points that
    29  // have the same visual shape as the original data. Inspired from
    30  // https://github.com/dgryski/go-lttb which is based on
    31  // https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
    32  func LTTB(b *Series, start time.Time, end time.Time, millisPerStep int) *Series {
    33  	if end.After(b.EndTime()) {
    34  		end = b.EndTime()
    35  	}
    36  
    37  	seriesValuesPerStep := millisPerStep / b.MillisPerStep()
    38  	seriesStart, seriesEnd := b.StepAtTime(start), b.StepAtTime(end)
    39  
    40  	// This threshold is different than max datapoints since we ensure step size is an integer multiple of original series step
    41  	threshold := int(math.Ceil(float64(seriesEnd-seriesStart) / float64(seriesValuesPerStep)))
    42  	if threshold == 0 || threshold > b.Len() {
    43  		return b // Nothing to do
    44  	}
    45  
    46  	values := NewValues(b.ctx, millisPerStep, threshold)
    47  	// Bucket size. Leave room for start and end data points
    48  	every := float64(seriesValuesPerStep)
    49  	// Always add the first point
    50  	values.SetValueAt(0, b.ValueAt(seriesStart))
    51  	// Set a to be the first chosen point
    52  	a := seriesStart
    53  
    54  	bucketStart := seriesStart + 1
    55  	bucketCenter := bucketStart + int(math.Floor(every)) + 1
    56  
    57  	for i := 0; i < threshold-2; i++ {
    58  		bucketEnd := bucketCenter + int(math.Floor(every))
    59  
    60  		// Calculate point average for next bucket (containing c)
    61  		avgRangeStart := bucketCenter
    62  		avgRangeEnd := bucketEnd
    63  
    64  		if avgRangeEnd >= seriesEnd {
    65  			avgRangeEnd = seriesEnd
    66  		}
    67  
    68  		avgRangeLength := float64(avgRangeEnd - avgRangeStart)
    69  
    70  		var avgX, avgY float64
    71  		var valuesRead int
    72  		for ; avgRangeStart < avgRangeEnd; avgRangeStart++ {
    73  			yVal := b.ValueAt(avgRangeStart)
    74  			if math.IsNaN(yVal) {
    75  				continue
    76  			}
    77  			valuesRead++
    78  			avgX += float64(avgRangeStart)
    79  			avgY += yVal
    80  		}
    81  
    82  		if valuesRead > 0 {
    83  			avgX /= avgRangeLength
    84  			avgY /= avgRangeLength
    85  		} else {
    86  			// If all nulls then should not assign a value to average
    87  			avgX = math.NaN()
    88  			avgY = math.NaN()
    89  		}
    90  
    91  		// Get the range for this bucket
    92  		rangeOffs := bucketStart
    93  		rangeTo := bucketCenter
    94  
    95  		// Point a
    96  		pointAX := float64(a)
    97  		pointAY := b.ValueAt(a)
    98  
    99  		var nextA int
   100  
   101  		// If all points in left or right bucket are null, then fallback to average
   102  		if math.IsNaN(avgY) || math.IsNaN(pointAY) {
   103  			nextA = indexClosestToAverage(b, rangeOffs, rangeTo)
   104  		} else {
   105  			nextA = indexWithLargestTriangle(b, rangeOffs, rangeTo, pointAX, pointAY, avgX, avgY)
   106  		}
   107  
   108  		values.SetValueAt(i+1, b.ValueAt(nextA)) // Pick this point from the bucket
   109  		a = nextA                                // This a is the next a (chosen b)
   110  
   111  		bucketStart = bucketCenter
   112  		bucketCenter = bucketEnd
   113  	}
   114  
   115  	if values.Len() > 1 {
   116  		// Always add last if not just a single step
   117  		values.SetValueAt(values.Len()-1, b.ValueAt(seriesEnd-1))
   118  	}
   119  
   120  	// Derive a new series
   121  	sampledSeries := b.DerivedSeries(start, values)
   122  	return sampledSeries
   123  }
   124  
   125  func indexWithLargestTriangle(b *Series, start int, end int, leftX float64, leftY float64, rightX float64, rightY float64) int {
   126  	// The original algorithm implementation initializes the maxArea as 0 which is a bug!
   127  	maxArea := -1.0
   128  	var largestIndex int
   129  
   130  	xDifference := leftX - rightX
   131  	yDifference := rightY - leftY
   132  	for index := start; index < end; index++ {
   133  		// Calculate triangle area over three buckets
   134  		area := xDifference*(b.ValueAt(index)-leftY) - (leftX-float64(index))*yDifference
   135  		// We only care about the relative area here.
   136  		area = math.Abs(area)
   137  		// Handle nulls properly
   138  		if math.IsNaN(area) {
   139  			area = 0
   140  		}
   141  
   142  		if area > maxArea {
   143  			maxArea = area
   144  			largestIndex = index
   145  		}
   146  	}
   147  
   148  	return largestIndex
   149  }
   150  
   151  func indexClosestToAverage(b *Series, start int, end int) int {
   152  	var sum float64
   153  	var count int
   154  	for index := start; index < end; index++ {
   155  		if math.IsNaN(b.ValueAt(index)) {
   156  			continue
   157  		}
   158  
   159  		sum += b.ValueAt(index)
   160  		count++
   161  	}
   162  
   163  	if count == 0 {
   164  		return start
   165  	}
   166  
   167  	average := sum / float64(count)
   168  	minDifference := math.MaxFloat64
   169  	closestIndex := start
   170  	for index := start; index < end; index++ {
   171  		difference := math.Abs(average - b.ValueAt(index))
   172  		if !math.IsNaN(b.ValueAt(index)) && difference < minDifference {
   173  			closestIndex = index
   174  			minDifference = difference
   175  		}
   176  	}
   177  
   178  	return closestIndex
   179  }