github.com/m3db/m3@v1.5.0/src/query/functions/temporal/aggregation.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package temporal
    22  
    23  import (
    24  	"fmt"
    25  	"math"
    26  	"sort"
    27  	"time"
    28  
    29  	"github.com/m3db/m3/src/query/executor/transform"
    30  	"github.com/m3db/m3/src/query/ts"
    31  )
    32  
    33  const (
    34  	// AvgType calculates the average of all values in the specified interval.
    35  	AvgType = "avg_over_time"
    36  
    37  	// CountType calculates count of all values in the specified interval.
    38  	CountType = "count_over_time"
    39  
    40  	// MinType calculates the minimum of all values in the specified interval.
    41  	MinType = "min_over_time"
    42  
    43  	// MaxType calculates the maximum of all values in the specified interval.
    44  	MaxType = "max_over_time"
    45  
    46  	// SumType calculates the sum of all values in the specified interval.
    47  	SumType = "sum_over_time"
    48  
    49  	// StdDevType calculates the standard deviation of all values in the specified interval.
    50  	StdDevType = "stddev_over_time"
    51  
    52  	// StdVarType calculates the standard variance of all values in the specified interval.
    53  	StdVarType = "stdvar_over_time"
    54  
    55  	// LastType returns the most recent value in the specified interval.
    56  	LastType = "last_over_time"
    57  
    58  	// QuantileType calculates the φ-quantile (0 ≤ φ ≤ 1) of the values in the specified interval.
    59  	QuantileType = "quantile_over_time"
    60  )
    61  
    62  type aggFunc func([]float64) float64
    63  
    64  var (
    65  	aggFuncs = map[string]aggFunc{
    66  		AvgType:    avgOverTime,
    67  		CountType:  countOverTime,
    68  		MinType:    minOverTime,
    69  		MaxType:    maxOverTime,
    70  		SumType:    sumOverTime,
    71  		StdDevType: stddevOverTime,
    72  		StdVarType: stdvarOverTime,
    73  		LastType:   lastOverTime,
    74  	}
    75  )
    76  
    77  type aggProcessor struct {
    78  	aggFunc aggFunc
    79  }
    80  
    81  func (a aggProcessor) initialize(
    82  	_ time.Duration,
    83  	opts transform.Options,
    84  ) processor {
    85  	return &aggNode{
    86  		aggFunc: a.aggFunc,
    87  	}
    88  }
    89  
    90  // NewQuantileOp create a new base temporal transform for quantile_over_time func.
    91  func NewQuantileOp(args []interface{}, optype string) (transform.Params, error) {
    92  	if optype != QuantileType {
    93  		return nil, fmt.Errorf("unknown aggregation type: %s", optype)
    94  	}
    95  
    96  	if len(args) != 2 {
    97  		return emptyOp, fmt.Errorf("invalid number of args for %s: %d", QuantileType, len(args))
    98  	}
    99  
   100  	q, ok := args[0].(float64)
   101  	if !ok {
   102  		return emptyOp, fmt.Errorf("unable to cast to quantile argument: %v for %s", args[0], QuantileType)
   103  	}
   104  
   105  	duration, ok := args[1].(time.Duration)
   106  	if !ok {
   107  		return emptyOp, fmt.Errorf("unable to cast to scalar argument: %v for %s", args[1], QuantileType)
   108  	}
   109  
   110  	aggregationFunc := makeQuantileOverTimeFn(q)
   111  
   112  	a := aggProcessor{
   113  		aggFunc: aggregationFunc,
   114  	}
   115  
   116  	return newBaseOp(duration, QuantileType, a)
   117  }
   118  
   119  // NewAggOp creates a new base temporal transform with a specified node.
   120  func NewAggOp(args []interface{}, optype string) (transform.Params, error) {
   121  	if aggregationFunc, ok := aggFuncs[optype]; ok {
   122  		if len(args) != 1 {
   123  			return emptyOp, fmt.Errorf("invalid number of args for %s: %d", optype, len(args))
   124  		}
   125  
   126  		duration, ok := args[0].(time.Duration)
   127  		if !ok {
   128  			return emptyOp, fmt.Errorf("unable to cast to scalar argument: %v for %s", args[0], optype)
   129  		}
   130  
   131  		a := aggProcessor{
   132  			aggFunc: aggregationFunc,
   133  		}
   134  
   135  		return newBaseOp(duration, optype, a)
   136  	}
   137  
   138  	return nil, fmt.Errorf("unknown aggregation type: %s", optype)
   139  }
   140  
   141  type aggNode struct {
   142  	values  []float64
   143  	aggFunc func([]float64) float64
   144  }
   145  
   146  func (a *aggNode) process(datapoints ts.Datapoints, _ iterationBounds) float64 {
   147  	a.values = datapoints.Reset(a.values)
   148  	return a.aggFunc(a.values)
   149  }
   150  
   151  func avgOverTime(values []float64) float64 {
   152  	sum, count := sumAndCount(values)
   153  	return sum / count
   154  }
   155  
   156  func countOverTime(values []float64) float64 {
   157  	_, count := sumAndCount(values)
   158  	if count == 0 {
   159  		return math.NaN()
   160  	}
   161  
   162  	return count
   163  }
   164  
   165  func minOverTime(values []float64) float64 {
   166  	var seenNotNaN bool
   167  	min := math.Inf(1)
   168  	for _, v := range values {
   169  		if !math.IsNaN(v) {
   170  			seenNotNaN = true
   171  			min = math.Min(min, v)
   172  		}
   173  	}
   174  
   175  	if !seenNotNaN {
   176  		return math.NaN()
   177  	}
   178  
   179  	return min
   180  }
   181  
   182  func maxOverTime(values []float64) float64 {
   183  	var seenNotNaN bool
   184  	max := math.Inf(-1)
   185  	for _, v := range values {
   186  		if !math.IsNaN(v) {
   187  			seenNotNaN = true
   188  			max = math.Max(max, v)
   189  		}
   190  	}
   191  
   192  	if !seenNotNaN {
   193  		return math.NaN()
   194  	}
   195  
   196  	return max
   197  }
   198  
   199  func sumOverTime(values []float64) float64 {
   200  	sum, _ := sumAndCount(values)
   201  	return sum
   202  }
   203  
   204  func stddevOverTime(values []float64) float64 {
   205  	return math.Sqrt(stdvarOverTime(values))
   206  }
   207  
   208  func stdvarOverTime(values []float64) float64 {
   209  	var aux, count, mean float64
   210  	for _, v := range values {
   211  		if !math.IsNaN(v) {
   212  			count++
   213  			delta := v - mean
   214  			mean += delta / count
   215  			aux += delta * (v - mean)
   216  		}
   217  	}
   218  
   219  	// NB: stdvar and stddev are undefined unless there are more than 2 points.
   220  	if count < 2 {
   221  		return math.NaN()
   222  	}
   223  
   224  	return aux / count
   225  }
   226  
   227  func lastOverTime(values []float64) float64 {
   228  	length := len(values)
   229  	if length == 0 {
   230  		return math.NaN()
   231  	}
   232  
   233  	return values[length-1]
   234  }
   235  
   236  func sumAndCount(values []float64) (float64, float64) {
   237  	sum := 0.0
   238  	count := 0.0
   239  	for _, v := range values {
   240  		if !math.IsNaN(v) {
   241  			sum += v
   242  			count++
   243  		}
   244  	}
   245  
   246  	if count == 0 {
   247  		return math.NaN(), 0
   248  	}
   249  
   250  	return sum, count
   251  }
   252  
   253  func removeNaNs(vals []float64) []float64 {
   254  	b := vals[:0]
   255  	for _, val := range vals {
   256  		if !math.IsNaN(val) {
   257  			b = append(b, val)
   258  		}
   259  	}
   260  
   261  	return b
   262  }
   263  
   264  func makeQuantileOverTimeFn(q float64) aggFunc {
   265  	return func(values []float64) float64 {
   266  		return quantile(q, removeNaNs(values))
   267  	}
   268  }
   269  
   270  // qauntile calculates the given quantile of a slice of values.
   271  //
   272  // This slice will be sorted.
   273  // If 'values' has zero elements, NaN is returned.
   274  // If q<0, -Inf is returned.
   275  // If q>1, +Inf is returned.
   276  func quantile(q float64, values []float64) float64 {
   277  	if len(values) == 0 {
   278  		return math.NaN()
   279  	}
   280  
   281  	if q < 0 {
   282  		return math.Inf(-1)
   283  	}
   284  
   285  	if q > 1 {
   286  		return math.Inf(+1)
   287  	}
   288  
   289  	sort.Float64s(values)
   290  
   291  	n := float64(len(values))
   292  	// When the quantile lies between two values,
   293  	// we use a weighted average of the two values.
   294  	rank := q * (n - 1)
   295  
   296  	lowerIndex := math.Max(0, math.Floor(rank))
   297  	upperIndex := math.Min(n-1, lowerIndex+1)
   298  
   299  	weight := rank - math.Floor(rank)
   300  	return values[int(lowerIndex)]*(1-weight) + values[int(upperIndex)]*weight
   301  }