github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/graphite/common/percentiles.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package common
    22  
    23  import (
    24  	"fmt"
    25  	"math"
    26  
    27  	"github.com/m3db/m3/src/query/graphite/ts"
    28  	"github.com/m3db/m3/src/x/errors"
    29  )
    30  
    31  const (
    32  	// FloatingPointFormat is the floating point format for naming
    33  	FloatingPointFormat = "%.3f"
    34  )
    35  
    36  // ErrInvalidPercentile is used when the percentile specified is incorrect
    37  func ErrInvalidPercentile(percentile float64) error {
    38  	return errors.NewInvalidParamsError(fmt.Errorf("invalid percentile, percentile="+FloatingPointFormat, percentile))
    39  }
    40  
    41  // PercentileNamer formats a string with a percentile
    42  type PercentileNamer func(name string, percentile float64) string
    43  
    44  // ThresholdComparator compares two floats for other comparison
    45  // functions such as Percentile checks.
    46  type ThresholdComparator func(v, threshold float64) bool
    47  
    48  // GreaterThan is a ThresholdComparator function for when
    49  // a value is greater than a threshold
    50  func GreaterThan(v, threshold float64) bool {
    51  	return v > threshold
    52  }
    53  
    54  // LessThan is a ThresholdComparator function for when
    55  // a value is less than a threshold
    56  func LessThan(v, threshold float64) bool {
    57  	return v < threshold
    58  }
    59  
    60  // GetPercentile computes the percentile cut off for an array of floats
    61  func GetPercentile(input []float64, percentile float64, interpolate bool) float64 {
    62  	nans := SafeSort(input)
    63  	series := input[nans:]
    64  	if len(series) == 0 {
    65  		return math.NaN()
    66  	}
    67  
    68  	fractionalRank := (percentile / 100.0) * (float64(len(series) + 1))
    69  	rank := int(fractionalRank)
    70  	rankFraction := fractionalRank - float64(rank)
    71  
    72  	if interpolate == false {
    73  		rank = rank + int(math.Ceil(rankFraction))
    74  	}
    75  
    76  	var percentileResult float64
    77  	if rank == 0 {
    78  		percentileResult = series[0]
    79  	} else if rank-1 == len(series) {
    80  		percentileResult = series[len(series)-1]
    81  	} else {
    82  		percentileResult = series[rank-1]
    83  	}
    84  
    85  	if interpolate && rank != len(series) {
    86  		nextValue := series[rank]
    87  		percentileResult = percentileResult + (rankFraction * (nextValue - percentileResult))
    88  	}
    89  
    90  	return percentileResult
    91  }
    92  
    93  // NPercentile returns percentile-percent of each series in the seriesList.
    94  func NPercentile(ctx *Context, in ts.SeriesList, percentile float64, pn PercentileNamer) (ts.SeriesList, error) {
    95  	if percentile < 0.0 || percentile > 100.0 {
    96  		return ts.NewSeriesList(), ErrInvalidPercentile(percentile)
    97  	}
    98  	results := make([]*ts.Series, 0, in.Len())
    99  	for _, s := range in.Values {
   100  		safeValues := s.SafeValues()
   101  		if len(safeValues) == 0 {
   102  			continue
   103  		}
   104  		percentileVal := GetPercentile(safeValues, percentile, false)
   105  		if !math.IsNaN(percentileVal) {
   106  			vals := ts.NewConstantValues(ctx, percentileVal, s.Len(), s.MillisPerStep())
   107  			percentileSeries := ts.NewSeries(ctx, pn(s.Name(), percentile), s.StartTime(), vals)
   108  			results = append(results, percentileSeries)
   109  		}
   110  	}
   111  	in.Values = results
   112  	return in, nil
   113  }
   114  
   115  // RemoveByPercentile removes all series above or below the given percentile, as
   116  // determined by the PercentileComparator
   117  func RemoveByPercentile(
   118  	ctx *Context,
   119  	in ts.SeriesList,
   120  	percentile float64,
   121  	pn PercentileNamer,
   122  	tc ThresholdComparator,
   123  ) (ts.SeriesList, error) {
   124  	results := make([]*ts.Series, 0, in.Len())
   125  	for _, series := range in.Values {
   126  		single := ts.SeriesList{
   127  			Values:   []*ts.Series{series},
   128  			Metadata: in.Metadata,
   129  		}
   130  		percentileSeries, err := NPercentile(ctx, single, percentile, pn)
   131  		if err != nil {
   132  			return ts.NewSeriesList(), err
   133  		}
   134  
   135  		numSteps := series.Len()
   136  		vals := ts.NewValues(ctx, series.MillisPerStep(), numSteps)
   137  		if percentileSeries.Len() == 1 {
   138  			percentile := percentileSeries.Values[0].ValueAt(0)
   139  			for i := 0; i < numSteps; i++ {
   140  				v := series.ValueAt(i)
   141  				if !tc(v, percentile) {
   142  					vals.SetValueAt(i, v)
   143  				}
   144  			}
   145  		}
   146  		name := pn(series.Name(), percentile)
   147  		newSeries := ts.NewSeries(ctx, name, series.StartTime(), vals)
   148  		results = append(results, newSeries)
   149  	}
   150  	in.Values = results
   151  	return in, nil
   152  }