github.com/m3db/m3@v1.5.0/src/query/graphite/ts/lttb.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package ts 22 23 import ( 24 "math" 25 "time" 26 ) 27 28 // LTTB down-samples the data to contain only threshold number of points that 29 // have the same visual shape as the original data. Inspired from 30 // https://github.com/dgryski/go-lttb which is based on 31 // https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf 32 func LTTB(b *Series, start time.Time, end time.Time, millisPerStep int) *Series { 33 if end.After(b.EndTime()) { 34 end = b.EndTime() 35 } 36 37 seriesValuesPerStep := millisPerStep / b.MillisPerStep() 38 seriesStart, seriesEnd := b.StepAtTime(start), b.StepAtTime(end) 39 40 // This threshold is different than max datapoints since we ensure step size is an integer multiple of original series step 41 threshold := int(math.Ceil(float64(seriesEnd-seriesStart) / float64(seriesValuesPerStep))) 42 if threshold == 0 || threshold > b.Len() { 43 return b // Nothing to do 44 } 45 46 values := NewValues(b.ctx, millisPerStep, threshold) 47 // Bucket size. Leave room for start and end data points 48 every := float64(seriesValuesPerStep) 49 // Always add the first point 50 values.SetValueAt(0, b.ValueAt(seriesStart)) 51 // Set a to be the first chosen point 52 a := seriesStart 53 54 bucketStart := seriesStart + 1 55 bucketCenter := bucketStart + int(math.Floor(every)) + 1 56 57 for i := 0; i < threshold-2; i++ { 58 bucketEnd := bucketCenter + int(math.Floor(every)) 59 60 // Calculate point average for next bucket (containing c) 61 avgRangeStart := bucketCenter 62 avgRangeEnd := bucketEnd 63 64 if avgRangeEnd >= seriesEnd { 65 avgRangeEnd = seriesEnd 66 } 67 68 avgRangeLength := float64(avgRangeEnd - avgRangeStart) 69 70 var avgX, avgY float64 71 var valuesRead int 72 for ; avgRangeStart < avgRangeEnd; avgRangeStart++ { 73 yVal := b.ValueAt(avgRangeStart) 74 if math.IsNaN(yVal) { 75 continue 76 } 77 valuesRead++ 78 avgX += float64(avgRangeStart) 79 avgY += yVal 80 } 81 82 if valuesRead > 0 { 83 avgX /= avgRangeLength 84 avgY /= avgRangeLength 85 } else { 86 // If all nulls then should not assign a value to average 87 avgX = math.NaN() 88 avgY = math.NaN() 89 } 90 91 // Get the range for this bucket 92 rangeOffs := bucketStart 93 rangeTo := bucketCenter 94 95 // Point a 96 pointAX := float64(a) 97 pointAY := b.ValueAt(a) 98 99 var nextA int 100 101 // If all points in left or right bucket are null, then fallback to average 102 if math.IsNaN(avgY) || math.IsNaN(pointAY) { 103 nextA = indexClosestToAverage(b, rangeOffs, rangeTo) 104 } else { 105 nextA = indexWithLargestTriangle(b, rangeOffs, rangeTo, pointAX, pointAY, avgX, avgY) 106 } 107 108 values.SetValueAt(i+1, b.ValueAt(nextA)) // Pick this point from the bucket 109 a = nextA // This a is the next a (chosen b) 110 111 bucketStart = bucketCenter 112 bucketCenter = bucketEnd 113 } 114 115 if values.Len() > 1 { 116 // Always add last if not just a single step 117 values.SetValueAt(values.Len()-1, b.ValueAt(seriesEnd-1)) 118 } 119 120 // Derive a new series 121 sampledSeries := b.DerivedSeries(start, values) 122 return sampledSeries 123 } 124 125 func indexWithLargestTriangle(b *Series, start int, end int, leftX float64, leftY float64, rightX float64, rightY float64) int { 126 // The original algorithm implementation initializes the maxArea as 0 which is a bug! 127 maxArea := -1.0 128 var largestIndex int 129 130 xDifference := leftX - rightX 131 yDifference := rightY - leftY 132 for index := start; index < end; index++ { 133 // Calculate triangle area over three buckets 134 area := xDifference*(b.ValueAt(index)-leftY) - (leftX-float64(index))*yDifference 135 // We only care about the relative area here. 136 area = math.Abs(area) 137 // Handle nulls properly 138 if math.IsNaN(area) { 139 area = 0 140 } 141 142 if area > maxArea { 143 maxArea = area 144 largestIndex = index 145 } 146 } 147 148 return largestIndex 149 } 150 151 func indexClosestToAverage(b *Series, start int, end int) int { 152 var sum float64 153 var count int 154 for index := start; index < end; index++ { 155 if math.IsNaN(b.ValueAt(index)) { 156 continue 157 } 158 159 sum += b.ValueAt(index) 160 count++ 161 } 162 163 if count == 0 { 164 return start 165 } 166 167 average := sum / float64(count) 168 minDifference := math.MaxFloat64 169 closestIndex := start 170 for index := start; index < end; index++ { 171 difference := math.Abs(average - b.ValueAt(index)) 172 if !math.IsNaN(b.ValueAt(index)) && difference < minDifference { 173 closestIndex = index 174 minDifference = difference 175 } 176 } 177 178 return closestIndex 179 }