github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/functions/aggregation/take.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package aggregation 22 23 import ( 24 "fmt" 25 "math" 26 27 "github.com/m3db/m3/src/query/block" 28 "github.com/m3db/m3/src/query/executor/transform" 29 "github.com/m3db/m3/src/query/functions/utils" 30 "github.com/m3db/m3/src/query/models" 31 "github.com/m3db/m3/src/query/parser" 32 "github.com/m3db/m3/src/query/util" 33 ) 34 35 const ( 36 // BottomKType gathers the smallest k non nan elements in a list of series 37 BottomKType = "bottomk" 38 // TopKType gathers the largest k non nan elements in a list of series 39 TopKType = "topk" 40 ) 41 42 type valueAndMeta struct { 43 val float64 44 seriesMeta block.SeriesMeta 45 } 46 47 type takeFunc func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 48 type takeInstantFunc func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta 49 50 // NewTakeOp creates a new takeK operation 51 func NewTakeOp( 52 opType string, 53 params NodeParams, 54 ) (parser.Params, error) { 55 k := int(params.Parameter) 56 fn := func(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { 57 return takeFn(heap, values, buckets) 58 } 59 fnInstant := func(heap utils.FloatHeap, values []float64, buckets [][]int, seriesMetas []block.SeriesMeta) []valueAndMeta { 60 return takeInstantFn(heap, values, buckets, seriesMetas) 61 } 62 return newTakeOp(params, opType, k, fn, fnInstant), nil 63 } 64 65 // takeOp stores required properties for take ops 66 type takeOp struct { 67 params NodeParams 68 opType string 69 k int 70 takeFunc takeFunc 71 takeInstantFunc takeInstantFunc 72 } 73 74 // OpType for the operator 75 func (o takeOp) OpType() string { 76 return o.opType 77 } 78 79 // String representation 80 func (o takeOp) String() string { 81 return fmt.Sprintf("type: %s", o.OpType()) 82 } 83 84 // Node creates an execution node 85 func (o takeOp) Node( 86 controller *transform.Controller, 87 _ transform.Options, 88 ) transform.OpNode { 89 return &takeNode{ 90 op: o, 91 controller: controller, 92 } 93 } 94 95 func newTakeOp(params NodeParams, opType string, k int, takeFunc takeFunc, takeInstantFunc takeInstantFunc) takeOp { 96 return takeOp{ 97 params: params, 98 opType: opType, 99 k: k, 100 takeFunc: takeFunc, 101 takeInstantFunc: takeInstantFunc, 102 } 103 } 104 105 // takeNode is different from base node as it only uses grouping to determine 106 // groups from which to take values from, and does not necessarily compress the 107 // series set as regular aggregation functions do 108 type takeNode struct { 109 op takeOp 110 controller *transform.Controller 111 } 112 113 func (n *takeNode) Params() parser.Params { 114 return n.op 115 } 116 117 // Process the block 118 func (n *takeNode) Process(queryCtx *models.QueryContext, ID parser.NodeID, b block.Block) error { 119 return transform.ProcessSimpleBlock(n, n.controller, queryCtx, ID, b) 120 } 121 122 func (n *takeNode) ProcessBlock(queryCtx *models.QueryContext, ID parser.NodeID, b block.Block) (block.Block, error) { 123 stepIter, err := b.StepIter() 124 if err != nil { 125 return nil, err 126 } 127 128 instantaneous := queryCtx.Options.Instantaneous 129 takeTop := n.op.opType == TopKType 130 if !takeTop && n.op.opType != BottomKType { 131 return nil, fmt.Errorf("operator not supported: %s", n.op.opType) 132 } 133 134 params := n.op.params 135 meta := b.Meta() 136 seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta()) 137 buckets, _ := utils.GroupSeries( 138 params.MatchingTags, 139 params.Without, 140 []byte(n.op.opType), 141 seriesMetas, 142 ) 143 144 seriesCount := maxSeriesCount(buckets) 145 if instantaneous { 146 heapSize := seriesCount 147 if n.op.k < seriesCount { 148 heapSize = n.op.k 149 } 150 151 heap := utils.NewFloatHeap(takeTop, heapSize) 152 return n.processBlockInstantaneous(heap, queryCtx, meta, stepIter, seriesMetas, buckets) 153 } 154 155 if n.op.k >= seriesCount { 156 return b, nil 157 } 158 159 heap := utils.NewFloatHeap(takeTop, n.op.k) 160 builder, err := n.controller.BlockBuilder(queryCtx, meta, seriesMetas) 161 if err != nil { 162 return nil, err 163 } 164 165 if err = builder.AddCols(stepIter.StepCount()); err != nil { 166 return nil, err 167 } 168 169 for index := 0; stepIter.Next(); index++ { 170 values := stepIter.Current().Values() 171 if err := builder.AppendValues(index, n.op.takeFunc(heap, values, buckets)); err != nil { 172 return nil, err 173 } 174 } 175 if err = stepIter.Err(); err != nil { 176 return nil, err 177 } 178 return builder.Build(), nil 179 } 180 181 func maxSeriesCount(buckets [][]int) int { 182 result := 0 183 184 for _, bucket := range buckets { 185 if len(bucket) > result { 186 result = len(bucket) 187 } 188 } 189 190 return result 191 } 192 193 func (n *takeNode) processBlockInstantaneous( 194 heap utils.FloatHeap, 195 queryCtx *models.QueryContext, 196 metadata block.Metadata, 197 stepIter block.StepIter, 198 seriesMetas []block.SeriesMeta, 199 buckets [][]int) (block.Block, error) { 200 ixLastStep := stepIter.StepCount() - 1 //we only care for the last step values for the instant query 201 for i := 0; i <= ixLastStep; i++ { 202 if !stepIter.Next() { 203 return nil, fmt.Errorf("invalid step count; expected %d got %d", stepIter.StepCount(), i+1) 204 } 205 } 206 metadata.ResultMetadata.KeepNaNs = true 207 values := stepIter.Current().Values() 208 takenSortedValues := n.op.takeInstantFunc(heap, values, buckets, seriesMetas) 209 blockValues, blockSeries := mapToValuesAndSeriesMetas(takenSortedValues) 210 211 //adjust bounds to contain single step 212 time, err := metadata.Bounds.TimeForIndex(ixLastStep) 213 if err != nil { 214 return nil, err 215 } 216 metadata.Bounds = models.Bounds{ 217 Start: time, 218 Duration: metadata.Bounds.StepSize, 219 StepSize: metadata.Bounds.StepSize, 220 } 221 222 blockBuilder, err := n.controller.BlockBuilder(queryCtx, metadata, blockSeries) 223 if err != nil { 224 return nil, err 225 } 226 if err = blockBuilder.AddCols(1); err != nil { 227 return nil, err 228 } 229 if err := blockBuilder.AppendValues(0, blockValues); err != nil { 230 return nil, err 231 } 232 if err = stepIter.Err(); err != nil { 233 return nil, err 234 } 235 return blockBuilder.Build(), nil 236 } 237 238 func mapToValuesAndSeriesMetas(takenSortedValues []valueAndMeta) ([]float64, []block.SeriesMeta) { 239 blockValues := make([]float64, 0, len(takenSortedValues)) 240 blockSeries := make([]block.SeriesMeta, 0, len(takenSortedValues)) 241 for _, sortedValue := range takenSortedValues { 242 blockValues = append(blockValues, sortedValue.val) 243 blockSeries = append(blockSeries, sortedValue.seriesMeta) 244 } 245 return blockValues, blockSeries 246 } 247 248 func takeFn(heap utils.FloatHeap, values []float64, buckets [][]int) []float64 { 249 capacity := heap.Cap() 250 if capacity < 1 { 251 util.Memset(values, math.NaN()) 252 return values 253 } 254 for _, bucket := range buckets { 255 // If this bucket's length is less than or equal to the heap's 256 // capacity do not need to clear any values from the input vector, 257 // as they are all included in the output. 258 if len(bucket) <= capacity { 259 continue 260 } 261 262 // Add values from this bucket to heap, clearing them from input vector 263 // after they are in the heap. 264 for _, idx := range bucket { 265 val := values[idx] 266 if !math.IsNaN(val) { 267 heap.Push(values[idx], idx) 268 } 269 270 values[idx] = math.NaN() 271 } 272 273 // Re-add the val/index pairs from the heap to the input vector 274 valIndexPairs := heap.Flush() 275 for _, pair := range valIndexPairs { 276 values[pair.Index] = pair.Val 277 } 278 } 279 280 return values 281 } 282 283 func takeInstantFn(heap utils.FloatHeap, values []float64, buckets [][]int, metas []block.SeriesMeta) []valueAndMeta { 284 var result = make([]valueAndMeta, 0, heap.Cap()) 285 if heap.Cap() < 1 { 286 return result 287 } 288 for _, bucket := range buckets { 289 for _, idx := range bucket { 290 val := values[idx] 291 heap.Push(val, idx) 292 } 293 294 valIndexPairs := heap.OrderedFlush() 295 for _, pair := range valIndexPairs { 296 prevIndex := pair.Index 297 prevMeta := metas[prevIndex] 298 299 result = append(result, valueAndMeta{ 300 val: pair.Val, 301 seriesMeta: prevMeta, 302 }) 303 } 304 } 305 return result 306 }