github.com/m3db/m3@v1.5.0/src/query/functions/temporal/aggregation.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package temporal 22 23 import ( 24 "fmt" 25 "math" 26 "sort" 27 "time" 28 29 "github.com/m3db/m3/src/query/executor/transform" 30 "github.com/m3db/m3/src/query/ts" 31 ) 32 33 const ( 34 // AvgType calculates the average of all values in the specified interval. 35 AvgType = "avg_over_time" 36 37 // CountType calculates count of all values in the specified interval. 38 CountType = "count_over_time" 39 40 // MinType calculates the minimum of all values in the specified interval. 41 MinType = "min_over_time" 42 43 // MaxType calculates the maximum of all values in the specified interval. 44 MaxType = "max_over_time" 45 46 // SumType calculates the sum of all values in the specified interval. 47 SumType = "sum_over_time" 48 49 // StdDevType calculates the standard deviation of all values in the specified interval. 50 StdDevType = "stddev_over_time" 51 52 // StdVarType calculates the standard variance of all values in the specified interval. 53 StdVarType = "stdvar_over_time" 54 55 // LastType returns the most recent value in the specified interval. 56 LastType = "last_over_time" 57 58 // QuantileType calculates the φ-quantile (0 ≤ φ ≤ 1) of the values in the specified interval. 59 QuantileType = "quantile_over_time" 60 ) 61 62 type aggFunc func([]float64) float64 63 64 var ( 65 aggFuncs = map[string]aggFunc{ 66 AvgType: avgOverTime, 67 CountType: countOverTime, 68 MinType: minOverTime, 69 MaxType: maxOverTime, 70 SumType: sumOverTime, 71 StdDevType: stddevOverTime, 72 StdVarType: stdvarOverTime, 73 LastType: lastOverTime, 74 } 75 ) 76 77 type aggProcessor struct { 78 aggFunc aggFunc 79 } 80 81 func (a aggProcessor) initialize( 82 _ time.Duration, 83 opts transform.Options, 84 ) processor { 85 return &aggNode{ 86 aggFunc: a.aggFunc, 87 } 88 } 89 90 // NewQuantileOp create a new base temporal transform for quantile_over_time func. 91 func NewQuantileOp(args []interface{}, optype string) (transform.Params, error) { 92 if optype != QuantileType { 93 return nil, fmt.Errorf("unknown aggregation type: %s", optype) 94 } 95 96 if len(args) != 2 { 97 return emptyOp, fmt.Errorf("invalid number of args for %s: %d", QuantileType, len(args)) 98 } 99 100 q, ok := args[0].(float64) 101 if !ok { 102 return emptyOp, fmt.Errorf("unable to cast to quantile argument: %v for %s", args[0], QuantileType) 103 } 104 105 duration, ok := args[1].(time.Duration) 106 if !ok { 107 return emptyOp, fmt.Errorf("unable to cast to scalar argument: %v for %s", args[1], QuantileType) 108 } 109 110 aggregationFunc := makeQuantileOverTimeFn(q) 111 112 a := aggProcessor{ 113 aggFunc: aggregationFunc, 114 } 115 116 return newBaseOp(duration, QuantileType, a) 117 } 118 119 // NewAggOp creates a new base temporal transform with a specified node. 120 func NewAggOp(args []interface{}, optype string) (transform.Params, error) { 121 if aggregationFunc, ok := aggFuncs[optype]; ok { 122 if len(args) != 1 { 123 return emptyOp, fmt.Errorf("invalid number of args for %s: %d", optype, len(args)) 124 } 125 126 duration, ok := args[0].(time.Duration) 127 if !ok { 128 return emptyOp, fmt.Errorf("unable to cast to scalar argument: %v for %s", args[0], optype) 129 } 130 131 a := aggProcessor{ 132 aggFunc: aggregationFunc, 133 } 134 135 return newBaseOp(duration, optype, a) 136 } 137 138 return nil, fmt.Errorf("unknown aggregation type: %s", optype) 139 } 140 141 type aggNode struct { 142 values []float64 143 aggFunc func([]float64) float64 144 } 145 146 func (a *aggNode) process(datapoints ts.Datapoints, _ iterationBounds) float64 { 147 a.values = datapoints.Reset(a.values) 148 return a.aggFunc(a.values) 149 } 150 151 func avgOverTime(values []float64) float64 { 152 sum, count := sumAndCount(values) 153 return sum / count 154 } 155 156 func countOverTime(values []float64) float64 { 157 _, count := sumAndCount(values) 158 if count == 0 { 159 return math.NaN() 160 } 161 162 return count 163 } 164 165 func minOverTime(values []float64) float64 { 166 var seenNotNaN bool 167 min := math.Inf(1) 168 for _, v := range values { 169 if !math.IsNaN(v) { 170 seenNotNaN = true 171 min = math.Min(min, v) 172 } 173 } 174 175 if !seenNotNaN { 176 return math.NaN() 177 } 178 179 return min 180 } 181 182 func maxOverTime(values []float64) float64 { 183 var seenNotNaN bool 184 max := math.Inf(-1) 185 for _, v := range values { 186 if !math.IsNaN(v) { 187 seenNotNaN = true 188 max = math.Max(max, v) 189 } 190 } 191 192 if !seenNotNaN { 193 return math.NaN() 194 } 195 196 return max 197 } 198 199 func sumOverTime(values []float64) float64 { 200 sum, _ := sumAndCount(values) 201 return sum 202 } 203 204 func stddevOverTime(values []float64) float64 { 205 return math.Sqrt(stdvarOverTime(values)) 206 } 207 208 func stdvarOverTime(values []float64) float64 { 209 var aux, count, mean float64 210 for _, v := range values { 211 if !math.IsNaN(v) { 212 count++ 213 delta := v - mean 214 mean += delta / count 215 aux += delta * (v - mean) 216 } 217 } 218 219 // NB: stdvar and stddev are undefined unless there are more than 2 points. 220 if count < 2 { 221 return math.NaN() 222 } 223 224 return aux / count 225 } 226 227 func lastOverTime(values []float64) float64 { 228 length := len(values) 229 if length == 0 { 230 return math.NaN() 231 } 232 233 return values[length-1] 234 } 235 236 func sumAndCount(values []float64) (float64, float64) { 237 sum := 0.0 238 count := 0.0 239 for _, v := range values { 240 if !math.IsNaN(v) { 241 sum += v 242 count++ 243 } 244 } 245 246 if count == 0 { 247 return math.NaN(), 0 248 } 249 250 return sum, count 251 } 252 253 func removeNaNs(vals []float64) []float64 { 254 b := vals[:0] 255 for _, val := range vals { 256 if !math.IsNaN(val) { 257 b = append(b, val) 258 } 259 } 260 261 return b 262 } 263 264 func makeQuantileOverTimeFn(q float64) aggFunc { 265 return func(values []float64) float64 { 266 return quantile(q, removeNaNs(values)) 267 } 268 } 269 270 // qauntile calculates the given quantile of a slice of values. 271 // 272 // This slice will be sorted. 273 // If 'values' has zero elements, NaN is returned. 274 // If q<0, -Inf is returned. 275 // If q>1, +Inf is returned. 276 func quantile(q float64, values []float64) float64 { 277 if len(values) == 0 { 278 return math.NaN() 279 } 280 281 if q < 0 { 282 return math.Inf(-1) 283 } 284 285 if q > 1 { 286 return math.Inf(+1) 287 } 288 289 sort.Float64s(values) 290 291 n := float64(len(values)) 292 // When the quantile lies between two values, 293 // we use a weighted average of the two values. 294 rank := q * (n - 1) 295 296 lowerIndex := math.Max(0, math.Floor(rank)) 297 upperIndex := math.Min(n-1, lowerIndex+1) 298 299 weight := rank - math.Floor(rank) 300 return values[int(lowerIndex)]*(1-weight) + values[int(upperIndex)]*weight 301 }