github.com/m3db/m3@v1.5.0/src/query/functions/linear/histogram_quantile.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package linear
    22  
    23  import (
    24  	"fmt"
    25  	"math"
    26  	"sort"
    27  	"strconv"
    28  
    29  	"github.com/m3db/m3/src/query/block"
    30  	"github.com/m3db/m3/src/query/executor/transform"
    31  	"github.com/m3db/m3/src/query/functions/utils"
    32  	"github.com/m3db/m3/src/query/models"
    33  	"github.com/m3db/m3/src/query/parser"
    34  	"github.com/m3db/m3/src/query/util"
    35  )
    36  
    37  const (
    38  	// HistogramQuantileType calculates the quantile for histogram buckets.
    39  	//
    40  	// NB: each sample must contain a tag with a bucket name (given by tag
    41  	// options) that denotes the upper bound of that bucket; series without this
    42  	// tag are ignored.
    43  	HistogramQuantileType = "histogram_quantile"
    44  	initIndexBucketLength = 10
    45  )
    46  
    47  // NewHistogramQuantileOp creates a new histogram quantile operation.
    48  func NewHistogramQuantileOp(
    49  	args []interface{},
    50  	opType string,
    51  ) (parser.Params, error) {
    52  	if len(args) != 1 {
    53  		return nil, fmt.Errorf(
    54  			"invalid number of args for histogram_quantile: %d", len(args))
    55  	}
    56  
    57  	if opType != HistogramQuantileType {
    58  		return nil, fmt.Errorf("operator not supported: %s", opType)
    59  	}
    60  
    61  	q, ok := args[0].(float64)
    62  	if !ok {
    63  		return nil, fmt.Errorf("unable to cast to scalar argument: %v", args[0])
    64  	}
    65  
    66  	return newHistogramQuantileOp(q, opType), nil
    67  }
    68  
    69  // histogramQuantileOp stores required properties for histogram quantile ops.
    70  type histogramQuantileOp struct {
    71  	q      float64
    72  	opType string
    73  }
    74  
    75  // OpType for the operator.
    76  func (o histogramQuantileOp) OpType() string {
    77  	return o.opType
    78  }
    79  
    80  // String representation.
    81  func (o histogramQuantileOp) String() string {
    82  	return fmt.Sprintf("type: %s", o.OpType())
    83  }
    84  
    85  // Node creates an execution node.
    86  func (o histogramQuantileOp) Node(
    87  	controller *transform.Controller,
    88  	_ transform.Options,
    89  ) transform.OpNode {
    90  	return &histogramQuantileNode{
    91  		op:         o,
    92  		controller: controller,
    93  	}
    94  }
    95  
    96  func newHistogramQuantileOp(
    97  	q float64,
    98  	opType string,
    99  ) histogramQuantileOp {
   100  	return histogramQuantileOp{
   101  		q:      q,
   102  		opType: opType,
   103  	}
   104  }
   105  
   106  type histogramQuantileNode struct {
   107  	op         histogramQuantileOp
   108  	controller *transform.Controller
   109  }
   110  
   111  type bucketValue struct {
   112  	upperBound float64
   113  	value      float64
   114  }
   115  
   116  type indexedBucket struct {
   117  	upperBound float64
   118  	idx        int
   119  }
   120  
   121  type indexedBuckets struct {
   122  	buckets []indexedBucket
   123  	tags    models.Tags
   124  }
   125  
   126  func (b indexedBuckets) Len() int { return len(b.buckets) }
   127  func (b indexedBuckets) Swap(i, j int) {
   128  	b.buckets[i], b.buckets[j] = b.buckets[j], b.buckets[i]
   129  }
   130  func (b indexedBuckets) Less(i, j int) bool {
   131  	return b.buckets[i].upperBound < b.buckets[j].upperBound
   132  }
   133  
   134  type bucketedSeries map[string]indexedBuckets
   135  
   136  type validSeriesBuckets []indexedBuckets
   137  
   138  func (b validSeriesBuckets) Len() int      { return len(b) }
   139  func (b validSeriesBuckets) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
   140  func (b validSeriesBuckets) Less(i, j int) bool {
   141  	if len(b[i].buckets) == 0 {
   142  		return false
   143  	}
   144  
   145  	if len(b[j].buckets) == 0 {
   146  		return true
   147  	}
   148  
   149  	// An arbitrarily chosen sort that guarantees deterministic results.
   150  	return b[i].buckets[0].idx < b[j].buckets[0].idx
   151  }
   152  
   153  func gatherSeriesToBuckets(metas []block.SeriesMeta) validSeriesBuckets {
   154  	bucketsForID := make(bucketedSeries, initIndexBucketLength)
   155  	for i, meta := range metas {
   156  		tags := meta.Tags
   157  		value, found := tags.Bucket()
   158  		if !found {
   159  			// this series does not have a bucket tag; drop it from the output.
   160  			continue
   161  		}
   162  
   163  		bound, err := strconv.ParseFloat(string(value), 64)
   164  		if err != nil {
   165  			// invalid bounds value for the bucket; drop it from the output.
   166  			continue
   167  		}
   168  
   169  		excludeTags := [][]byte{tags.Opts.MetricName(), tags.Opts.BucketName()}
   170  		tagsWithoutKeys := tags.TagsWithoutKeys(excludeTags)
   171  		id := string(tagsWithoutKeys.ID())
   172  		newBucket := indexedBucket{
   173  			upperBound: bound,
   174  			idx:        i,
   175  		}
   176  
   177  		if buckets, found := bucketsForID[id]; !found {
   178  			// add a single indexed bucket for this ID with the current index only.
   179  			newBuckets := make([]indexedBucket, 0, initIndexBucketLength)
   180  			newBuckets = append(newBuckets, newBucket)
   181  			bucketsForID[id] = indexedBuckets{
   182  				buckets: newBuckets,
   183  				tags:    tagsWithoutKeys,
   184  			}
   185  		} else {
   186  			buckets.buckets = append(buckets.buckets, newBucket)
   187  			bucketsForID[id] = buckets
   188  		}
   189  	}
   190  
   191  	return sanitizeBuckets(bucketsForID)
   192  }
   193  
   194  // sanitize sorts the bucket maps by upper bound, dropping any series which
   195  // have less than two buckets, or any that do not have an upper bound of +Inf
   196  func sanitizeBuckets(bucketMap bucketedSeries) validSeriesBuckets {
   197  	validSeriesBuckets := make(validSeriesBuckets, 0, len(bucketMap))
   198  	for _, buckets := range bucketMap {
   199  		if len(buckets.buckets) < 2 {
   200  			continue
   201  		}
   202  
   203  		sort.Sort(buckets)
   204  		maxBound := buckets.buckets[len(buckets.buckets)-1].upperBound
   205  		if !math.IsInf(maxBound, 1) {
   206  			continue
   207  		}
   208  
   209  		validSeriesBuckets = append(validSeriesBuckets, buckets)
   210  	}
   211  
   212  	sort.Sort(validSeriesBuckets)
   213  	return validSeriesBuckets
   214  }
   215  
   216  func bucketQuantile(q float64, buckets []bucketValue) float64 {
   217  	// NB: some valid buckets may have been purged if the values at the current
   218  	// step for that series are not present.
   219  	if len(buckets) < 2 {
   220  		return math.NaN()
   221  	}
   222  
   223  	// NB: similar situation here if the max bound bucket does not have a value
   224  	// at this point, it is necessary to re-check.
   225  	if !math.IsInf(buckets[len(buckets)-1].upperBound, 1) {
   226  		return math.NaN()
   227  	}
   228  
   229  	rank := q * buckets[len(buckets)-1].value
   230  
   231  	bucketIndex := sort.Search(len(buckets)-1, func(i int) bool {
   232  		return buckets[i].value >= rank
   233  	})
   234  
   235  	if bucketIndex == len(buckets)-1 {
   236  		return buckets[len(buckets)-2].upperBound
   237  	}
   238  
   239  	if bucketIndex == 0 && buckets[0].upperBound <= 0 {
   240  		return buckets[0].upperBound
   241  	}
   242  
   243  	var (
   244  		bucketStart float64
   245  		bucketEnd   = buckets[bucketIndex].upperBound
   246  		count       = buckets[bucketIndex].value
   247  	)
   248  
   249  	if bucketIndex > 0 {
   250  		bucketStart = buckets[bucketIndex-1].upperBound
   251  		count -= buckets[bucketIndex-1].value
   252  		rank -= buckets[bucketIndex-1].value
   253  	}
   254  
   255  	return bucketStart + (bucketEnd-bucketStart)*rank/count
   256  }
   257  
   258  func (n *histogramQuantileNode) Params() parser.Params {
   259  	return n.op
   260  }
   261  
   262  // Process the block
   263  func (n *histogramQuantileNode) Process(
   264  	queryCtx *models.QueryContext,
   265  	ID parser.NodeID,
   266  	b block.Block,
   267  ) error {
   268  	return transform.ProcessSimpleBlock(n, n.controller, queryCtx, ID, b)
   269  }
   270  
   271  func (n *histogramQuantileNode) ProcessBlock(
   272  	queryCtx *models.QueryContext,
   273  	ID parser.NodeID,
   274  	b block.Block,
   275  ) (block.Block, error) {
   276  	stepIter, err := b.StepIter()
   277  	if err != nil {
   278  		return nil, err
   279  	}
   280  
   281  	meta := b.Meta()
   282  	seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta())
   283  	seriesBuckets := gatherSeriesToBuckets(seriesMetas)
   284  
   285  	q := n.op.q
   286  	if q < 0 || q > 1 {
   287  		return processInvalidQuantile(queryCtx, q, seriesBuckets, meta, stepIter, n.controller)
   288  	}
   289  
   290  	return processValidQuantile(queryCtx, q, seriesBuckets, meta, stepIter, n.controller)
   291  }
   292  
   293  func setupBuilder(
   294  	queryCtx *models.QueryContext,
   295  	seriesBuckets validSeriesBuckets,
   296  	meta block.Metadata,
   297  	stepIter block.StepIter,
   298  	controller *transform.Controller,
   299  ) (block.Builder, error) {
   300  	metas := make([]block.SeriesMeta, 0, len(seriesBuckets))
   301  	for _, v := range seriesBuckets {
   302  		metas = append(metas, block.SeriesMeta{
   303  			Tags: v.tags,
   304  		})
   305  	}
   306  
   307  	builder, err := controller.BlockBuilder(queryCtx, meta, metas)
   308  	if err != nil {
   309  		return nil, err
   310  	}
   311  
   312  	if err = builder.AddCols(stepIter.StepCount()); err != nil {
   313  		return nil, err
   314  	}
   315  
   316  	return builder, nil
   317  }
   318  
   319  // Enforce monotonicity for binary search to work.
   320  // See https://github.com/prometheus/prometheus/commit/896f951e6846ce252d9d19fd4707a4110ceda5ee
   321  func ensureMonotonic(bucketValues []bucketValue) {
   322  	max := math.Inf(-1)
   323  	for i := range bucketValues {
   324  		switch {
   325  		case bucketValues[i].value >= max:
   326  			max = bucketValues[i].value
   327  		case bucketValues[i].value < max:
   328  			bucketValues[i].value = max
   329  		}
   330  	}
   331  }
   332  
   333  func processValidQuantile(
   334  	queryCtx *models.QueryContext,
   335  	q float64,
   336  	seriesBuckets validSeriesBuckets,
   337  	meta block.Metadata,
   338  	stepIter block.StepIter,
   339  	controller *transform.Controller,
   340  ) (block.Block, error) {
   341  	builder, err := setupBuilder(queryCtx, seriesBuckets, meta, stepIter, controller)
   342  	if err != nil {
   343  		return nil, err
   344  	}
   345  
   346  	for index := 0; stepIter.Next(); index++ {
   347  		step := stepIter.Current()
   348  		values := step.Values()
   349  		bucketValues := make([]bucketValue, 0, initIndexBucketLength)
   350  
   351  		aggregatedValues := make([]float64, 0, len(seriesBuckets))
   352  		for _, b := range seriesBuckets {
   353  			buckets := b.buckets
   354  			// clear previous bucket values.
   355  			bucketValues = bucketValues[:0]
   356  			for _, bucket := range buckets {
   357  				// Only add non-NaN values to contention for the calculation.
   358  				val := values[bucket.idx]
   359  				if !math.IsNaN(val) {
   360  					bucketValues = append(
   361  						bucketValues, bucketValue{
   362  							upperBound: bucket.upperBound,
   363  							value:      val,
   364  						},
   365  					)
   366  				}
   367  			}
   368  
   369  			ensureMonotonic(bucketValues)
   370  
   371  			aggregatedValues = append(aggregatedValues, bucketQuantile(q, bucketValues))
   372  		}
   373  
   374  		if err := builder.AppendValues(index, aggregatedValues); err != nil {
   375  			return nil, err
   376  		}
   377  	}
   378  
   379  	if err = stepIter.Err(); err != nil {
   380  		return nil, err
   381  	}
   382  
   383  	return builder.Build(), nil
   384  }
   385  
   386  func processInvalidQuantile(
   387  	queryCtx *models.QueryContext,
   388  	q float64,
   389  	seriesBuckets validSeriesBuckets,
   390  	meta block.Metadata,
   391  	stepIter block.StepIter,
   392  	controller *transform.Controller,
   393  ) (block.Block, error) {
   394  	builder, err := setupBuilder(queryCtx, seriesBuckets, meta, stepIter, controller)
   395  	if err != nil {
   396  		return nil, err
   397  	}
   398  
   399  	// Set the values to an infinity of the appropriate sign; anything less than 0
   400  	// becomes -Inf, anything greather than one becomes +Inf.
   401  	sign := 1
   402  	if q < 0 {
   403  		sign = -1
   404  	}
   405  
   406  	setValue := math.Inf(sign)
   407  	outValues := make([]float64, len(seriesBuckets))
   408  	util.Memset(outValues, setValue)
   409  	for index := 0; stepIter.Next(); index++ {
   410  		if err := builder.AppendValues(index, outValues); err != nil {
   411  			return nil, err
   412  		}
   413  	}
   414  
   415  	if err = stepIter.Err(); err != nil {
   416  		return nil, err
   417  	}
   418  
   419  	return builder.Build(), nil
   420  }