github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/configs/legacy_promql/quantile.go (about) 1 // Copyright 2015 The Prometheus Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package promql 15 16 import ( 17 "math" 18 "sort" 19 20 "github.com/prometheus/prometheus/pkg/labels" 21 ) 22 23 // Helpers to calculate quantiles. 24 25 // excludedLabels are the labels to exclude from signature calculation for 26 // quantiles. 27 var excludedLabels = []string{ 28 labels.MetricName, 29 labels.BucketLabel, 30 } 31 32 type bucket struct { 33 upperBound float64 34 count float64 35 } 36 37 // buckets implements sort.Interface. 38 type buckets []bucket 39 40 func (b buckets) Len() int { return len(b) } 41 func (b buckets) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 42 func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound } 43 44 type metricWithBuckets struct { 45 metric labels.Labels 46 buckets buckets 47 } 48 49 // bucketQuantile calculates the quantile 'q' based on the given buckets. The 50 // buckets will be sorted by upperBound by this function (i.e. no sorting 51 // needed before calling this function). The quantile value is interpolated 52 // assuming a linear distribution within a bucket. However, if the quantile 53 // falls into the highest bucket, the upper bound of the 2nd highest bucket is 54 // returned. A natural lower bound of 0 is assumed if the upper bound of the 55 // lowest bucket is greater 0. In that case, interpolation in the lowest bucket 56 // happens linearly between 0 and the upper bound of the lowest bucket. 57 // However, if the lowest bucket has an upper bound less or equal 0, this upper 58 // bound is returned if the quantile falls into the lowest bucket. 59 // 60 // There are a number of special cases (once we have a way to report errors 61 // happening during evaluations of AST functions, we should report those 62 // explicitly): 63 // 64 // If 'buckets' has fewer than 2 elements, NaN is returned. 65 // 66 // If the highest bucket is not +Inf, NaN is returned. 67 // 68 // If q<0, -Inf is returned. 69 // 70 // If q>1, +Inf is returned. 71 func bucketQuantile(q float64, buckets buckets) float64 { 72 if q < 0 { 73 return math.Inf(-1) 74 } 75 if q > 1 { 76 return math.Inf(+1) 77 } 78 if len(buckets) < 2 { 79 return math.NaN() 80 } 81 sort.Sort(buckets) 82 if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) { 83 return math.NaN() 84 } 85 86 ensureMonotonic(buckets) 87 88 rank := q * buckets[len(buckets)-1].count 89 b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank }) 90 91 if b == len(buckets)-1 { 92 return buckets[len(buckets)-2].upperBound 93 } 94 if b == 0 && buckets[0].upperBound <= 0 { 95 return buckets[0].upperBound 96 } 97 var ( 98 bucketStart float64 99 bucketEnd = buckets[b].upperBound 100 count = buckets[b].count 101 ) 102 if b > 0 { 103 bucketStart = buckets[b-1].upperBound 104 count -= buckets[b-1].count 105 rank -= buckets[b-1].count 106 } 107 return bucketStart + (bucketEnd-bucketStart)*(rank/count) 108 } 109 110 // The assumption that bucket counts increase monotonically with increasing 111 // upperBound may be violated during: 112 // 113 // * Recording rule evaluation of histogram_quantile, especially when rate() 114 // has been applied to the underlying bucket timeseries. 115 // * Evaluation of histogram_quantile computed over federated bucket 116 // timeseries, especially when rate() has been applied. 117 // 118 // This is because scraped data is not made available to rule evaluation or 119 // federation atomically, so some buckets are computed with data from the 120 // most recent scrapes, but the other buckets are missing data from the most 121 // recent scrape. 122 // 123 // Monotonicity is usually guaranteed because if a bucket with upper bound 124 // u1 has count c1, then any bucket with a higher upper bound u > u1 must 125 // have counted all c1 observations and perhaps more, so that c >= c1. 126 // 127 // Randomly interspersed partial sampling breaks that guarantee, and rate() 128 // exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from 129 // 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The 130 // monotonicity is broken. It is exacerbated by rate() because under normal 131 // operation, cumulative counting of buckets will cause the bucket counts to 132 // diverge such that small differences from missing samples are not a problem. 133 // rate() removes this divergence.) 134 // 135 // bucketQuantile depends on that monotonicity to do a binary search for the 136 // bucket with the φ-quantile count, so breaking the monotonicity 137 // guarantee causes bucketQuantile() to return undefined (nonsense) results. 138 // 139 // As a somewhat hacky solution until ingestion is atomic per scrape, we 140 // calculate the "envelope" of the histogram buckets, essentially removing 141 // any decreases in the count between successive buckets. 142 143 func ensureMonotonic(buckets buckets) { 144 max := buckets[0].count 145 for i := range buckets[1:] { 146 switch { 147 case buckets[i].count > max: 148 max = buckets[i].count 149 case buckets[i].count < max: 150 buckets[i].count = max 151 } 152 } 153 } 154 155 // qauntile calculates the given quantile of a vector of samples. 156 // 157 // The Vector will be sorted. 158 // If 'values' has zero elements, NaN is returned. 159 // If q<0, -Inf is returned. 160 // If q>1, +Inf is returned. 161 func quantile(q float64, values vectorByValueHeap) float64 { 162 if len(values) == 0 { 163 return math.NaN() 164 } 165 if q < 0 { 166 return math.Inf(-1) 167 } 168 if q > 1 { 169 return math.Inf(+1) 170 } 171 sort.Sort(values) 172 173 n := float64(len(values)) 174 // When the quantile lies between two samples, 175 // we use a weighted average of the two samples. 176 rank := q * (n - 1) 177 178 lowerIndex := math.Max(0, math.Floor(rank)) 179 upperIndex := math.Min(n-1, lowerIndex+1) 180 181 weight := rank - math.Floor(rank) 182 return values[int(lowerIndex)].V*(1-weight) + values[int(upperIndex)].V*weight 183 }