github.com/m3db/m3@v1.5.0/src/query/functions/linear/histogram_quantile.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package linear 22 23 import ( 24 "fmt" 25 "math" 26 "sort" 27 "strconv" 28 29 "github.com/m3db/m3/src/query/block" 30 "github.com/m3db/m3/src/query/executor/transform" 31 "github.com/m3db/m3/src/query/functions/utils" 32 "github.com/m3db/m3/src/query/models" 33 "github.com/m3db/m3/src/query/parser" 34 "github.com/m3db/m3/src/query/util" 35 ) 36 37 const ( 38 // HistogramQuantileType calculates the quantile for histogram buckets. 39 // 40 // NB: each sample must contain a tag with a bucket name (given by tag 41 // options) that denotes the upper bound of that bucket; series without this 42 // tag are ignored. 43 HistogramQuantileType = "histogram_quantile" 44 initIndexBucketLength = 10 45 ) 46 47 // NewHistogramQuantileOp creates a new histogram quantile operation. 48 func NewHistogramQuantileOp( 49 args []interface{}, 50 opType string, 51 ) (parser.Params, error) { 52 if len(args) != 1 { 53 return nil, fmt.Errorf( 54 "invalid number of args for histogram_quantile: %d", len(args)) 55 } 56 57 if opType != HistogramQuantileType { 58 return nil, fmt.Errorf("operator not supported: %s", opType) 59 } 60 61 q, ok := args[0].(float64) 62 if !ok { 63 return nil, fmt.Errorf("unable to cast to scalar argument: %v", args[0]) 64 } 65 66 return newHistogramQuantileOp(q, opType), nil 67 } 68 69 // histogramQuantileOp stores required properties for histogram quantile ops. 70 type histogramQuantileOp struct { 71 q float64 72 opType string 73 } 74 75 // OpType for the operator. 76 func (o histogramQuantileOp) OpType() string { 77 return o.opType 78 } 79 80 // String representation. 81 func (o histogramQuantileOp) String() string { 82 return fmt.Sprintf("type: %s", o.OpType()) 83 } 84 85 // Node creates an execution node. 86 func (o histogramQuantileOp) Node( 87 controller *transform.Controller, 88 _ transform.Options, 89 ) transform.OpNode { 90 return &histogramQuantileNode{ 91 op: o, 92 controller: controller, 93 } 94 } 95 96 func newHistogramQuantileOp( 97 q float64, 98 opType string, 99 ) histogramQuantileOp { 100 return histogramQuantileOp{ 101 q: q, 102 opType: opType, 103 } 104 } 105 106 type histogramQuantileNode struct { 107 op histogramQuantileOp 108 controller *transform.Controller 109 } 110 111 type bucketValue struct { 112 upperBound float64 113 value float64 114 } 115 116 type indexedBucket struct { 117 upperBound float64 118 idx int 119 } 120 121 type indexedBuckets struct { 122 buckets []indexedBucket 123 tags models.Tags 124 } 125 126 func (b indexedBuckets) Len() int { return len(b.buckets) } 127 func (b indexedBuckets) Swap(i, j int) { 128 b.buckets[i], b.buckets[j] = b.buckets[j], b.buckets[i] 129 } 130 func (b indexedBuckets) Less(i, j int) bool { 131 return b.buckets[i].upperBound < b.buckets[j].upperBound 132 } 133 134 type bucketedSeries map[string]indexedBuckets 135 136 type validSeriesBuckets []indexedBuckets 137 138 func (b validSeriesBuckets) Len() int { return len(b) } 139 func (b validSeriesBuckets) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 140 func (b validSeriesBuckets) Less(i, j int) bool { 141 if len(b[i].buckets) == 0 { 142 return false 143 } 144 145 if len(b[j].buckets) == 0 { 146 return true 147 } 148 149 // An arbitrarily chosen sort that guarantees deterministic results. 150 return b[i].buckets[0].idx < b[j].buckets[0].idx 151 } 152 153 func gatherSeriesToBuckets(metas []block.SeriesMeta) validSeriesBuckets { 154 bucketsForID := make(bucketedSeries, initIndexBucketLength) 155 for i, meta := range metas { 156 tags := meta.Tags 157 value, found := tags.Bucket() 158 if !found { 159 // this series does not have a bucket tag; drop it from the output. 160 continue 161 } 162 163 bound, err := strconv.ParseFloat(string(value), 64) 164 if err != nil { 165 // invalid bounds value for the bucket; drop it from the output. 166 continue 167 } 168 169 excludeTags := [][]byte{tags.Opts.MetricName(), tags.Opts.BucketName()} 170 tagsWithoutKeys := tags.TagsWithoutKeys(excludeTags) 171 id := string(tagsWithoutKeys.ID()) 172 newBucket := indexedBucket{ 173 upperBound: bound, 174 idx: i, 175 } 176 177 if buckets, found := bucketsForID[id]; !found { 178 // add a single indexed bucket for this ID with the current index only. 179 newBuckets := make([]indexedBucket, 0, initIndexBucketLength) 180 newBuckets = append(newBuckets, newBucket) 181 bucketsForID[id] = indexedBuckets{ 182 buckets: newBuckets, 183 tags: tagsWithoutKeys, 184 } 185 } else { 186 buckets.buckets = append(buckets.buckets, newBucket) 187 bucketsForID[id] = buckets 188 } 189 } 190 191 return sanitizeBuckets(bucketsForID) 192 } 193 194 // sanitize sorts the bucket maps by upper bound, dropping any series which 195 // have less than two buckets, or any that do not have an upper bound of +Inf 196 func sanitizeBuckets(bucketMap bucketedSeries) validSeriesBuckets { 197 validSeriesBuckets := make(validSeriesBuckets, 0, len(bucketMap)) 198 for _, buckets := range bucketMap { 199 if len(buckets.buckets) < 2 { 200 continue 201 } 202 203 sort.Sort(buckets) 204 maxBound := buckets.buckets[len(buckets.buckets)-1].upperBound 205 if !math.IsInf(maxBound, 1) { 206 continue 207 } 208 209 validSeriesBuckets = append(validSeriesBuckets, buckets) 210 } 211 212 sort.Sort(validSeriesBuckets) 213 return validSeriesBuckets 214 } 215 216 func bucketQuantile(q float64, buckets []bucketValue) float64 { 217 // NB: some valid buckets may have been purged if the values at the current 218 // step for that series are not present. 219 if len(buckets) < 2 { 220 return math.NaN() 221 } 222 223 // NB: similar situation here if the max bound bucket does not have a value 224 // at this point, it is necessary to re-check. 225 if !math.IsInf(buckets[len(buckets)-1].upperBound, 1) { 226 return math.NaN() 227 } 228 229 rank := q * buckets[len(buckets)-1].value 230 231 bucketIndex := sort.Search(len(buckets)-1, func(i int) bool { 232 return buckets[i].value >= rank 233 }) 234 235 if bucketIndex == len(buckets)-1 { 236 return buckets[len(buckets)-2].upperBound 237 } 238 239 if bucketIndex == 0 && buckets[0].upperBound <= 0 { 240 return buckets[0].upperBound 241 } 242 243 var ( 244 bucketStart float64 245 bucketEnd = buckets[bucketIndex].upperBound 246 count = buckets[bucketIndex].value 247 ) 248 249 if bucketIndex > 0 { 250 bucketStart = buckets[bucketIndex-1].upperBound 251 count -= buckets[bucketIndex-1].value 252 rank -= buckets[bucketIndex-1].value 253 } 254 255 return bucketStart + (bucketEnd-bucketStart)*rank/count 256 } 257 258 func (n *histogramQuantileNode) Params() parser.Params { 259 return n.op 260 } 261 262 // Process the block 263 func (n *histogramQuantileNode) Process( 264 queryCtx *models.QueryContext, 265 ID parser.NodeID, 266 b block.Block, 267 ) error { 268 return transform.ProcessSimpleBlock(n, n.controller, queryCtx, ID, b) 269 } 270 271 func (n *histogramQuantileNode) ProcessBlock( 272 queryCtx *models.QueryContext, 273 ID parser.NodeID, 274 b block.Block, 275 ) (block.Block, error) { 276 stepIter, err := b.StepIter() 277 if err != nil { 278 return nil, err 279 } 280 281 meta := b.Meta() 282 seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta()) 283 seriesBuckets := gatherSeriesToBuckets(seriesMetas) 284 285 q := n.op.q 286 if q < 0 || q > 1 { 287 return processInvalidQuantile(queryCtx, q, seriesBuckets, meta, stepIter, n.controller) 288 } 289 290 return processValidQuantile(queryCtx, q, seriesBuckets, meta, stepIter, n.controller) 291 } 292 293 func setupBuilder( 294 queryCtx *models.QueryContext, 295 seriesBuckets validSeriesBuckets, 296 meta block.Metadata, 297 stepIter block.StepIter, 298 controller *transform.Controller, 299 ) (block.Builder, error) { 300 metas := make([]block.SeriesMeta, 0, len(seriesBuckets)) 301 for _, v := range seriesBuckets { 302 metas = append(metas, block.SeriesMeta{ 303 Tags: v.tags, 304 }) 305 } 306 307 builder, err := controller.BlockBuilder(queryCtx, meta, metas) 308 if err != nil { 309 return nil, err 310 } 311 312 if err = builder.AddCols(stepIter.StepCount()); err != nil { 313 return nil, err 314 } 315 316 return builder, nil 317 } 318 319 // Enforce monotonicity for binary search to work. 320 // See https://github.com/prometheus/prometheus/commit/896f951e6846ce252d9d19fd4707a4110ceda5ee 321 func ensureMonotonic(bucketValues []bucketValue) { 322 max := math.Inf(-1) 323 for i := range bucketValues { 324 switch { 325 case bucketValues[i].value >= max: 326 max = bucketValues[i].value 327 case bucketValues[i].value < max: 328 bucketValues[i].value = max 329 } 330 } 331 } 332 333 func processValidQuantile( 334 queryCtx *models.QueryContext, 335 q float64, 336 seriesBuckets validSeriesBuckets, 337 meta block.Metadata, 338 stepIter block.StepIter, 339 controller *transform.Controller, 340 ) (block.Block, error) { 341 builder, err := setupBuilder(queryCtx, seriesBuckets, meta, stepIter, controller) 342 if err != nil { 343 return nil, err 344 } 345 346 for index := 0; stepIter.Next(); index++ { 347 step := stepIter.Current() 348 values := step.Values() 349 bucketValues := make([]bucketValue, 0, initIndexBucketLength) 350 351 aggregatedValues := make([]float64, 0, len(seriesBuckets)) 352 for _, b := range seriesBuckets { 353 buckets := b.buckets 354 // clear previous bucket values. 355 bucketValues = bucketValues[:0] 356 for _, bucket := range buckets { 357 // Only add non-NaN values to contention for the calculation. 358 val := values[bucket.idx] 359 if !math.IsNaN(val) { 360 bucketValues = append( 361 bucketValues, bucketValue{ 362 upperBound: bucket.upperBound, 363 value: val, 364 }, 365 ) 366 } 367 } 368 369 ensureMonotonic(bucketValues) 370 371 aggregatedValues = append(aggregatedValues, bucketQuantile(q, bucketValues)) 372 } 373 374 if err := builder.AppendValues(index, aggregatedValues); err != nil { 375 return nil, err 376 } 377 } 378 379 if err = stepIter.Err(); err != nil { 380 return nil, err 381 } 382 383 return builder.Build(), nil 384 } 385 386 func processInvalidQuantile( 387 queryCtx *models.QueryContext, 388 q float64, 389 seriesBuckets validSeriesBuckets, 390 meta block.Metadata, 391 stepIter block.StepIter, 392 controller *transform.Controller, 393 ) (block.Block, error) { 394 builder, err := setupBuilder(queryCtx, seriesBuckets, meta, stepIter, controller) 395 if err != nil { 396 return nil, err 397 } 398 399 // Set the values to an infinity of the appropriate sign; anything less than 0 400 // becomes -Inf, anything greather than one becomes +Inf. 401 sign := 1 402 if q < 0 { 403 sign = -1 404 } 405 406 setValue := math.Inf(sign) 407 outValues := make([]float64, len(seriesBuckets)) 408 util.Memset(outValues, setValue) 409 for index := 0; stepIter.Next(); index++ { 410 if err := builder.AppendValues(index, outValues); err != nil { 411 return nil, err 412 } 413 } 414 415 if err = stepIter.Err(); err != nil { 416 return nil, err 417 } 418 419 return builder.Build(), nil 420 }