github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/configs/legacy_promql/quantile.go (about)

     1  // Copyright 2015 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package promql
    15  
    16  import (
    17  	"math"
    18  	"sort"
    19  
    20  	"github.com/prometheus/prometheus/pkg/labels"
    21  )
    22  
    23  // Helpers to calculate quantiles.
    24  
    25  // excludedLabels are the labels to exclude from signature calculation for
    26  // quantiles.
    27  var excludedLabels = []string{
    28  	labels.MetricName,
    29  	labels.BucketLabel,
    30  }
    31  
    32  type bucket struct {
    33  	upperBound float64
    34  	count      float64
    35  }
    36  
    37  // buckets implements sort.Interface.
    38  type buckets []bucket
    39  
    40  func (b buckets) Len() int           { return len(b) }
    41  func (b buckets) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
    42  func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound }
    43  
    44  type metricWithBuckets struct {
    45  	metric  labels.Labels
    46  	buckets buckets
    47  }
    48  
    49  // bucketQuantile calculates the quantile 'q' based on the given buckets. The
    50  // buckets will be sorted by upperBound by this function (i.e. no sorting
    51  // needed before calling this function). The quantile value is interpolated
    52  // assuming a linear distribution within a bucket. However, if the quantile
    53  // falls into the highest bucket, the upper bound of the 2nd highest bucket is
    54  // returned. A natural lower bound of 0 is assumed if the upper bound of the
    55  // lowest bucket is greater 0. In that case, interpolation in the lowest bucket
    56  // happens linearly between 0 and the upper bound of the lowest bucket.
    57  // However, if the lowest bucket has an upper bound less or equal 0, this upper
    58  // bound is returned if the quantile falls into the lowest bucket.
    59  //
    60  // There are a number of special cases (once we have a way to report errors
    61  // happening during evaluations of AST functions, we should report those
    62  // explicitly):
    63  //
    64  // If 'buckets' has fewer than 2 elements, NaN is returned.
    65  //
    66  // If the highest bucket is not +Inf, NaN is returned.
    67  //
    68  // If q<0, -Inf is returned.
    69  //
    70  // If q>1, +Inf is returned.
    71  func bucketQuantile(q float64, buckets buckets) float64 {
    72  	if q < 0 {
    73  		return math.Inf(-1)
    74  	}
    75  	if q > 1 {
    76  		return math.Inf(+1)
    77  	}
    78  	if len(buckets) < 2 {
    79  		return math.NaN()
    80  	}
    81  	sort.Sort(buckets)
    82  	if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
    83  		return math.NaN()
    84  	}
    85  
    86  	ensureMonotonic(buckets)
    87  
    88  	rank := q * buckets[len(buckets)-1].count
    89  	b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
    90  
    91  	if b == len(buckets)-1 {
    92  		return buckets[len(buckets)-2].upperBound
    93  	}
    94  	if b == 0 && buckets[0].upperBound <= 0 {
    95  		return buckets[0].upperBound
    96  	}
    97  	var (
    98  		bucketStart float64
    99  		bucketEnd   = buckets[b].upperBound
   100  		count       = buckets[b].count
   101  	)
   102  	if b > 0 {
   103  		bucketStart = buckets[b-1].upperBound
   104  		count -= buckets[b-1].count
   105  		rank -= buckets[b-1].count
   106  	}
   107  	return bucketStart + (bucketEnd-bucketStart)*(rank/count)
   108  }
   109  
   110  // The assumption that bucket counts increase monotonically with increasing
   111  // upperBound may be violated during:
   112  //
   113  //   * Recording rule evaluation of histogram_quantile, especially when rate()
   114  //      has been applied to the underlying bucket timeseries.
   115  //   * Evaluation of histogram_quantile computed over federated bucket
   116  //      timeseries, especially when rate() has been applied.
   117  //
   118  // This is because scraped data is not made available to rule evaluation or
   119  // federation atomically, so some buckets are computed with data from the
   120  // most recent scrapes, but the other buckets are missing data from the most
   121  // recent scrape.
   122  //
   123  // Monotonicity is usually guaranteed because if a bucket with upper bound
   124  // u1 has count c1, then any bucket with a higher upper bound u > u1 must
   125  // have counted all c1 observations and perhaps more, so that c  >= c1.
   126  //
   127  // Randomly interspersed partial sampling breaks that guarantee, and rate()
   128  // exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
   129  // 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
   130  // monotonicity is broken. It is exacerbated by rate() because under normal
   131  // operation, cumulative counting of buckets will cause the bucket counts to
   132  // diverge such that small differences from missing samples are not a problem.
   133  // rate() removes this divergence.)
   134  //
   135  // bucketQuantile depends on that monotonicity to do a binary search for the
   136  // bucket with the φ-quantile count, so breaking the monotonicity
   137  // guarantee causes bucketQuantile() to return undefined (nonsense) results.
   138  //
   139  // As a somewhat hacky solution until ingestion is atomic per scrape, we
   140  // calculate the "envelope" of the histogram buckets, essentially removing
   141  // any decreases in the count between successive buckets.
   142  
   143  func ensureMonotonic(buckets buckets) {
   144  	max := buckets[0].count
   145  	for i := range buckets[1:] {
   146  		switch {
   147  		case buckets[i].count > max:
   148  			max = buckets[i].count
   149  		case buckets[i].count < max:
   150  			buckets[i].count = max
   151  		}
   152  	}
   153  }
   154  
   155  // qauntile calculates the given quantile of a vector of samples.
   156  //
   157  // The Vector will be sorted.
   158  // If 'values' has zero elements, NaN is returned.
   159  // If q<0, -Inf is returned.
   160  // If q>1, +Inf is returned.
   161  func quantile(q float64, values vectorByValueHeap) float64 {
   162  	if len(values) == 0 {
   163  		return math.NaN()
   164  	}
   165  	if q < 0 {
   166  		return math.Inf(-1)
   167  	}
   168  	if q > 1 {
   169  		return math.Inf(+1)
   170  	}
   171  	sort.Sort(values)
   172  
   173  	n := float64(len(values))
   174  	// When the quantile lies between two samples,
   175  	// we use a weighted average of the two samples.
   176  	rank := q * (n - 1)
   177  
   178  	lowerIndex := math.Max(0, math.Floor(rank))
   179  	upperIndex := math.Min(n-1, lowerIndex+1)
   180  
   181  	weight := rank - math.Floor(rank)
   182  	return values[int(lowerIndex)].V*(1-weight) + values[int(upperIndex)].V*weight
   183  }