github.com/m3db/m3@v1.5.0/src/query/functions/aggregation/count_values.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package aggregation 22 23 import ( 24 "fmt" 25 "math" 26 27 "github.com/m3db/m3/src/query/block" 28 "github.com/m3db/m3/src/query/executor/transform" 29 "github.com/m3db/m3/src/query/functions/utils" 30 "github.com/m3db/m3/src/query/models" 31 "github.com/m3db/m3/src/query/parser" 32 "github.com/m3db/m3/src/query/util" 33 ) 34 35 const ( 36 // CountValuesType counts the number of non nan elements with the same value. 37 CountValuesType = "count_values" 38 ) 39 40 // NewCountValuesOp creates a new count values operation. 41 func NewCountValuesOp( 42 opType string, 43 params NodeParams, 44 ) (parser.Params, error) { 45 if opType != CountValuesType { 46 return baseOp{}, fmt.Errorf("operator not supported: %s", opType) 47 } 48 49 return newCountValuesOp(params, opType), nil 50 } 51 52 // countValuesOp stores required properties for count values ops. 53 type countValuesOp struct { 54 params NodeParams 55 opType string 56 } 57 58 func (o countValuesOp) OpType() string { 59 return o.opType 60 } 61 62 func (o countValuesOp) String() string { 63 return fmt.Sprintf("type: %s", o.OpType()) 64 } 65 66 func (o countValuesOp) Node( 67 controller *transform.Controller, 68 _ transform.Options, 69 ) transform.OpNode { 70 return &countValuesNode{ 71 op: o, 72 controller: controller, 73 } 74 } 75 76 func newCountValuesOp(params NodeParams, opType string) countValuesOp { 77 return countValuesOp{ 78 params: params, 79 opType: opType, 80 } 81 } 82 83 type countValuesNode struct { 84 op countValuesOp 85 controller *transform.Controller 86 } 87 88 func (n *countValuesNode) Params() parser.Params { 89 return n.op 90 } 91 92 // bucketColumn represents a column of times a particular value in a series has 93 // been seen. This may expand as more unique values are seen 94 type bucketColumn []float64 95 96 // bucketBlock is an abstraction for a set of series grouped by tags; count_values 97 // works on these groupings rather than the entire set of series. 98 type bucketBlock struct { 99 // columnLength can expand as further columns are processed; used to initialize 100 // the columns with empty values at each step 101 columnLength int 102 // columns indicates the number of times a value has been seen at a given step 103 columns []bucketColumn 104 // indexMapping maps any unique values seen to the appropriate column index 105 indexMapping map[float64]int 106 } 107 108 // Processes all series in this block bucket at the current column. 109 func processBlockBucketAtColumn( 110 currentBucketBlock *bucketBlock, 111 values []float64, 112 bucket []int, 113 columnIndex int, 114 ) { 115 // Generate appropriate number of rows full of -1s that will later map to NaNs 116 // unless updated with valid values 117 currentColumnLength := currentBucketBlock.columnLength 118 currentBucketBlock.columns[columnIndex] = make(bucketColumn, currentColumnLength) 119 for i := 0; i < currentColumnLength; i++ { 120 util.Memset(currentBucketBlock.columns[columnIndex], math.NaN()) 121 } 122 123 countedValues := countValuesFn(values, bucket) 124 for distinctValue, count := range countedValues { 125 currentBucketColumn := currentBucketBlock.columns[columnIndex] 126 if rowIndex, seen := currentBucketBlock.indexMapping[distinctValue]; seen { 127 // This value has already been seen at rowIndex in a previous column 128 // so add the current value to the appropriate row index. 129 currentBucketColumn[rowIndex] = count 130 } else { 131 // The column index needs to be created here already 132 // Add the count to the end of the bucket column 133 currentBucketBlock.columns[columnIndex] = append(currentBucketColumn, count) 134 135 // Add the distinctValue to the indexMapping 136 currentBucketBlock.indexMapping[distinctValue] = len(currentBucketColumn) 137 } 138 } 139 140 currentBucketBlock.columnLength = len(currentBucketBlock.columns[columnIndex]) 141 } 142 143 // Process the block 144 func (n *countValuesNode) Process( 145 queryCtx *models.QueryContext, 146 ID parser.NodeID, 147 b block.Block, 148 ) error { 149 return transform.ProcessSimpleBlock(n, n.controller, queryCtx, ID, b) 150 } 151 152 func (n *countValuesNode) ProcessBlock( 153 queryCtx *models.QueryContext, 154 ID parser.NodeID, 155 b block.Block, 156 ) (block.Block, error) { 157 meta := b.Meta() 158 stepIter, err := b.StepIter() 159 if err != nil { 160 return nil, err 161 } 162 163 params := n.op.params 164 labelName := params.StringParameter 165 if !models.IsValid(labelName) { 166 return nil, fmt.Errorf("invalid label name %q", labelName) 167 } 168 169 seriesMetas := utils.FlattenMetadata(meta, stepIter.SeriesMeta()) 170 buckets, metas := utils.GroupSeries( 171 params.MatchingTags, 172 params.Without, 173 []byte(n.op.opType), 174 seriesMetas, 175 ) 176 177 stepCount := stepIter.StepCount() 178 intermediateBlock := make([]bucketBlock, len(buckets)) 179 for i := range intermediateBlock { 180 intermediateBlock[i].columns = make([]bucketColumn, stepCount) 181 intermediateBlock[i].indexMapping = make(map[float64]int, len(buckets[i])) 182 } 183 184 for columnIndex := 0; stepIter.Next(); columnIndex++ { 185 step := stepIter.Current() 186 values := step.Values() 187 for bucketIndex, bucket := range buckets { 188 processBlockBucketAtColumn( 189 &intermediateBlock[bucketIndex], 190 values, 191 bucket, 192 columnIndex, 193 ) 194 } 195 } 196 197 if err = stepIter.Err(); err != nil { 198 return nil, err 199 } 200 201 numSeries := 0 202 for _, bucketBlock := range intermediateBlock { 203 numSeries += bucketBlock.columnLength 204 } 205 206 // Rebuild block metas in the expected order 207 blockMetas := make([]block.SeriesMeta, numSeries) 208 previousBucketBlockIndex := 0 209 for bucketIndex, bucketBlock := range intermediateBlock { 210 for k, v := range bucketBlock.indexMapping { 211 // Add the metas of this bucketBlock right after the previous block 212 blockMetas[v+previousBucketBlockIndex] = block.SeriesMeta{ 213 Name: []byte(n.op.opType), 214 Tags: metas[bucketIndex].Tags.Clone().AddTag(models.Tag{ 215 Name: []byte(labelName), 216 Value: utils.FormatFloatToBytes(k), 217 }), 218 } 219 } 220 221 // NB: All metadatas for the intermediate block for this bucket have 222 // been added to the combined block metas. The metadatas for the next 223 // intermediate block should be added after these to maintain order 224 previousBucketBlockIndex += bucketBlock.columnLength 225 } 226 227 // Dedupe common metadatas 228 metaTags, flattenedMeta := utils.DedupeMetadata(blockMetas, meta.Tags.Opts) 229 meta.Tags = metaTags 230 231 builder, err := n.controller.BlockBuilder(queryCtx, meta, flattenedMeta) 232 if err != nil { 233 return nil, err 234 } 235 236 if err := builder.AddCols(stepCount); err != nil { 237 return nil, err 238 } 239 240 for columnIndex := 0; columnIndex < stepCount; columnIndex++ { 241 for _, bucketBlock := range intermediateBlock { 242 valsToAdd := padValuesWithNaNs( 243 bucketBlock.columns[columnIndex], 244 len(bucketBlock.indexMapping), 245 ) 246 if err := builder.AppendValues(columnIndex, valsToAdd); err != nil { 247 return nil, err 248 } 249 } 250 } 251 252 return builder.Build(), nil 253 } 254 255 // pads vals with enough NaNs to match size 256 func padValuesWithNaNs(vals bucketColumn, size int) bucketColumn { 257 numToPad := size - len(vals) 258 for i := 0; i < numToPad; i++ { 259 vals = append(vals, math.NaN()) 260 } 261 262 return vals 263 } 264 265 // count values takes a value array and a bucket list, returns a map of 266 // distinct values to number of times the value was seen in this bucket. 267 // The distinct number returned here becomes the datapoint's value 268 func countValuesFn(values []float64, bucket []int) map[float64]float64 { 269 countedValues := make(map[float64]float64, len(bucket)) 270 for _, idx := range bucket { 271 val := values[idx] 272 if !math.IsNaN(val) { 273 countedValues[val]++ 274 } 275 } 276 277 return countedValues 278 }