github.com/m3db/m3@v1.5.0/src/query/functions/aggregation/take.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package aggregation
    22  
    23  import (
    24  	"fmt"
    25  	"math"
    26  
    27  	"github.com/m3db/m3/src/query/block"
    28  	"github.com/m3db/m3/src/query/executor/transform"
    29  	"github.com/m3db/m3/src/query/functions/utils"
    30  	"github.com/m3db/m3/src/query/models"
    31  	"github.com/m3db/m3/src/query/parser"
    32  	"github.com/m3db/m3/src/query/util"
    33  )
    34  
    35  const (
    36  	// BottomKType gathers the smallest k non nan elements in a list of series
    37  	BottomKType = "bottomk"
    38  	// TopKType gathers the largest k non nan elements in a list of series
    39  	TopKType = "topk"
    40  )
    41  
    42  type valueAndMeta struct {
    43  	val        float64
    44  	seriesMeta block.SeriesMeta
    45  }
    46  
    47  type takeFunc func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64
    48  type takeInstantFunc func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta
    49  
    50  // NewTakeOp creates a new takeK operation
    51  func NewTakeOp(
    52  	opType string,
    53  	params NodeParams,
    54  ) (parser.Params, error) {
    55  	k := int(params.Parameter)
    56  	fn := func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 {
    57  		return takeFn(heap, values, buckets)
    58  	}
    59  	fnInstant := func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta {
    60  		return takeInstantFn(heap, values, buckets, seriesMetas)
    61  	}
    62  	return newTakeOp(params, opType, k, fn, fnInstant), nil
    63  }
    64  
    65  // takeOp stores required properties for take ops
    66  type takeOp struct {
    67  	params          NodeParams
    68  	opType          string
    69  	k               int
    70  	takeFunc        takeFunc
    71  	takeInstantFunc takeInstantFunc
    72  }
    73  
    74  // OpType for the operator
    75  func (o takeOp) OpType() string {
    76  	return o.opType
    77  }
    78  
    79  // String representation
    80  func (o takeOp) String() string {
    81  	return fmt.Sprintf("type: %s", o.OpType())
    82  }
    83  
    84  // Node creates an execution node
    85  func (o takeOp) Node(
    86  	controller *transform.Controller,
    87  	_ transform.Options,
    88  ) transform.OpNode {
    89  	return &takeNode{
    90  		op:         o,
    91  		controller: controller,
    92  	}
    93  }
    94  
    95  func newTakeOp(params NodeParams, opType string, k int, takeFunc takeFunc, takeInstantFunc takeInstantFunc) takeOp {
    96  	return takeOp{
    97  		params:          params,
    98  		opType:          opType,
    99  		k:               k,
   100  		takeFunc:        takeFunc,
   101  		takeInstantFunc: takeInstantFunc,
   102  	}
   103  }
   104  
   105  // takeNode is different from base node as it only uses grouping to determine
   106  // groups from which to take values from, and does not necessarily compress the
   107  // series set as regular aggregation functions do
   108  type takeNode struct {
   109  	op         takeOp
   110  	controller *transform.Controller
   111  }
   112  
   113  func (n *takeNode) Params() parser.Params {
   114  	return n.op
   115  }
   116  
   117  // Process the block
   118  func (n *takeNode) Process(queryCtx *models.QueryContext, ID parser.NodeID, b block.Block) error {
   119  	return transform.ProcessSimpleBlock(n, n.controller, queryCtx, ID, b)
   120  }
   121  
   122  func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, b block.Block) (block.Block, error) {
   123  	stepIter, err := b.StepIter()
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  
   128  	instantaneous := queryCtx.Options.Instantaneous
   129  	takeTop := n.op.opType == TopKType
   130  	if !takeTop && n.op.opType != BottomKType {
   131  		return nil, fmt.Errorf("operator not supported: %s", n.op.opType)
   132  	}
   133  
   134  	params := n.op.params
   135  	meta := b.Meta()
   136  	seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta())
   137  	buckets, _ := utils.GroupSeries(
   138  		params.MatchingTags,
   139  		params.Without,
   140  		[]byte(n.op.opType),
   141  		seriesMetas,
   142  	)
   143  
   144  	seriesCount := maxSeriesCount(buckets)
   145  	if instantaneous {
   146  		heapSize := seriesCount
   147  		if n.op.k < seriesCount {
   148  			heapSize = n.op.k
   149  		}
   150  
   151  		heap := utils.NewFloatHeap(takeTop, heapSize)
   152  		return n.processBlockInstantaneous(heap, queryCtx, meta, stepIter, seriesMetas, buckets)
   153  	}
   154  
   155  	if n.op.k >= seriesCount {
   156  		return b, nil
   157  	}
   158  
   159  	heap := utils.NewFloatHeap(takeTop, n.op.k)
   160  	builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas)
   161  	if err != nil {
   162  		return nil, err
   163  	}
   164  
   165  	if err = builder.AddCols(stepIter.StepCount()); err != nil {
   166  		return nil, err
   167  	}
   168  
   169  	for index := 0; stepIter.Next(); index++ {
   170  		values := stepIter.Current().Values()
   171  		if err := builder.AppendValues(index, n.op.takeFunc(heap, values, buckets)); err != nil {
   172  			return nil, err
   173  		}
   174  	}
   175  	if err = stepIter.Err(); err != nil {
   176  		return nil, err
   177  	}
   178  	return builder.Build(), nil
   179  }
   180  
   181  func maxSeriesCount(buckets [][]int) int {
   182  	result := 0
   183  
   184  	for _, bucket := range buckets {
   185  		if len(bucket) > result {
   186  			result = len(bucket)
   187  		}
   188  	}
   189  
   190  	return result
   191  }
   192  
   193  func (n *takeNode) processBlockInstantaneous(
   194  	heap utils.FloatHeap,
   195  	queryCtx *models.QueryContext,
   196  	metadata block.Metadata,
   197  	stepIter block.StepIter,
   198  	seriesMetas []block.SeriesMeta,
   199  	buckets [][]int) (block.Block, error) {
   200  	ixLastStep := stepIter.StepCount() - 1 //we only care for the last step values for the instant query
   201  	for i := 0; i <= ixLastStep; i++ {
   202  		if !stepIter.Next() {
   203  			return nil, fmt.Errorf("invalid step count; expected %d got %d", stepIter.StepCount(), i+1)
   204  		}
   205  	}
   206  	metadata.ResultMetadata.KeepNaNs = true
   207  	values := stepIter.Current().Values()
   208  	takenSortedValues := n.op.takeInstantFunc(heap, values, buckets, seriesMetas)
   209  	blockValues, blockSeries := mapToValuesAndSeriesMetas(takenSortedValues)
   210  
   211  	//adjust bounds to contain single step
   212  	time, err := metadata.Bounds.TimeForIndex(ixLastStep)
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  	metadata.Bounds = models.Bounds{
   217  		Start:    time,
   218  		Duration: metadata.Bounds.StepSize,
   219  		StepSize: metadata.Bounds.StepSize,
   220  	}
   221  
   222  	blockBuilder, err := n.controller.BlockBuilder(queryCtx, metadata, blockSeries)
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  	if err = blockBuilder.AddCols(1); err != nil {
   227  		return nil, err
   228  	}
   229  	if err := blockBuilder.AppendValues(0, blockValues); err != nil {
   230  		return nil, err
   231  	}
   232  	if err = stepIter.Err(); err != nil {
   233  		return nil, err
   234  	}
   235  	return blockBuilder.Build(), nil
   236  }
   237  
   238  func mapToValuesAndSeriesMetas(takenSortedValues []valueAndMeta) ([]float64, []block.SeriesMeta) {
   239  	blockValues := make([]float64, 0, len(takenSortedValues))
   240  	blockSeries := make([]block.SeriesMeta, 0, len(takenSortedValues))
   241  	for _, sortedValue := range takenSortedValues {
   242  		blockValues = append(blockValues, sortedValue.val)
   243  		blockSeries = append(blockSeries, sortedValue.seriesMeta)
   244  	}
   245  	return blockValues, blockSeries
   246  }
   247  
   248  func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 {
   249  	capacity := heap.Cap()
   250  	if capacity < 1 {
   251  		util.Memset(values, math.NaN())
   252  		return values
   253  	}
   254  	for _, bucket := range buckets {
   255  		// If this bucket's length is less than or equal to the heap's
   256  		// capacity do not need to clear any values from the input vector,
   257  		// as they are all included in the output.
   258  		if len(bucket) <= capacity {
   259  			continue
   260  		}
   261  
   262  		// Add values from this bucket to heap, clearing them from input vector
   263  		// after they are in the heap.
   264  		for _, idx := range bucket {
   265  			val := values[idx]
   266  			if !math.IsNaN(val) {
   267  				heap.Push(values[idx], idx)
   268  			}
   269  
   270  			values[idx] = math.NaN()
   271  		}
   272  
   273  		// Re-add the val/index pairs from the heap to the input vector
   274  		valIndexPairs := heap.Flush()
   275  		for _, pair := range valIndexPairs {
   276  			values[pair.Index] = pair.Val
   277  		}
   278  	}
   279  
   280  	return values
   281  }
   282  
   283  func takeInstantFn(heap utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []valueAndMeta {
   284  	var result = make([]valueAndMeta, 0, heap.Cap())
   285  	if heap.Cap() < 1 {
   286  		return result
   287  	}
   288  	for _, bucket := range buckets {
   289  		for _, idx := range bucket {
   290  			val := values[idx]
   291  			heap.Push(val, idx)
   292  		}
   293  
   294  		valIndexPairs := heap.OrderedFlush()
   295  		for _, pair := range valIndexPairs {
   296  			prevIndex := pair.Index
   297  			prevMeta := metas[prevIndex]
   298  
   299  			result = append(result, valueAndMeta{
   300  				val:        pair.Val,
   301  				seriesMeta: prevMeta,
   302  			})
   303  		}
   304  	}
   305  	return result
   306  }