github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/results/blockresults/runningstats.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package blockresults 18 19 import ( 20 "fmt" 21 22 "github.com/axiomhq/hyperloglog" 23 "github.com/siglens/siglens/pkg/segment/structs" 24 "github.com/siglens/siglens/pkg/segment/utils" 25 log "github.com/sirupsen/logrus" 26 bbp "github.com/valyala/bytebufferpool" 27 ) 28 29 type RunningBucketResults struct { 30 runningStats []runningStats // maps a stat name to running stats 31 currStats []*structs.MeasureAggregator // measure aggregators in result 32 groupedRunningStats map[string][]runningStats // maps timechart group by col's vals to corresponding running stats 33 count uint64 // total number of elements belonging to the bucket 34 } 35 36 type runningStats struct { 37 rawVal utils.CValueEnclosure // raw value 38 hll *hyperloglog.Sketch 39 } 40 41 func initRunningStats(internalMeasureFns []*structs.MeasureAggregator) []runningStats { 42 retVal := make([]runningStats, len(internalMeasureFns)) 43 for i := 0; i < len(internalMeasureFns); i++ { 44 if internalMeasureFns[i].MeasureFunc == utils.Cardinality { 45 retVal[i] = runningStats{hll: hyperloglog.New()} 46 } 47 } 48 return retVal 49 } 50 51 func initRunningGroupByBucket(internalMeasureFns []*structs.MeasureAggregator) *RunningBucketResults { 52 53 return &RunningBucketResults{ 54 count: 0, 55 runningStats: initRunningStats(internalMeasureFns), 56 currStats: internalMeasureFns, 57 groupedRunningStats: make(map[string][]runningStats), 58 } 59 } 60 61 func initRunningTimeBucket() *RunningBucketResults { 62 63 return &RunningBucketResults{ 64 count: 0, 65 } 66 } 67 68 func (rr *RunningBucketResults) AddTimeToBucketStats(count uint16) { 69 rr.count += uint64(count) 70 } 71 72 func (rr *RunningBucketResults) AddMeasureResults(runningStats *[]runningStats, measureResults []utils.CValueEnclosure, qid uint64, 73 cnt uint64, usedByTimechart bool) { 74 if runningStats == nil { 75 if rr.runningStats == nil { 76 return 77 } 78 runningStats = &rr.runningStats 79 } 80 81 for i := 0; i < len(*runningStats); i++ { 82 switch rr.currStats[i].MeasureFunc { 83 case utils.Sum: 84 fallthrough 85 case utils.Max: 86 fallthrough 87 case utils.Min: 88 err := rr.AddEvalResultsForMinOrMaxOrSum(runningStats, measureResults, i) 89 if err != nil { 90 log.Errorf("AddMeasureResults: %v", err) 91 } 92 case utils.Count: 93 step, err := rr.AddEvalResultsForCount(runningStats, measureResults, i, usedByTimechart, cnt) 94 if err != nil { 95 log.Errorf("AddMeasureResults: %v", err) 96 } 97 i += step 98 case utils.Cardinality: 99 if rr.currStats[i].ValueColRequest == nil { 100 rawVal, err := measureResults[i].GetString() 101 if err != nil { 102 log.Errorf("AddMeasureResults: failed to add measurement to running stats: %v", err) 103 continue 104 } 105 bb := bbp.Get() 106 defer bbp.Put(bb) 107 bb.Reset() 108 _, _ = bb.WriteString(rawVal) 109 (*runningStats)[i].hll.Insert(bb.B) 110 continue 111 } 112 fallthrough 113 case utils.Values: 114 step, err := rr.AddEvalResultsForValuesOrCardinality(runningStats, measureResults, i) 115 if err != nil { 116 log.Errorf("AddMeasureResults: %v", err) 117 } 118 i += step 119 default: 120 err := rr.ProcessReduce(runningStats, measureResults[i], i) 121 if err != nil { 122 log.Errorf("AddMeasureResults: %v", err) 123 } 124 } 125 } 126 rr.count += cnt 127 } 128 129 // This assumes the order of bucketResults.RunningStats are in the same order, referencing the same measure request 130 func (rr *RunningBucketResults) MergeRunningBuckets(toJoin *RunningBucketResults) { 131 132 if toJoin == nil { 133 return 134 } 135 136 // Merge group by bucket inside each time range bucket (For timechart) 137 if toJoin.groupedRunningStats != nil && rr.groupedRunningStats == nil { 138 rr.groupedRunningStats = toJoin.groupedRunningStats 139 } else if rr.groupedRunningStats != nil && len(rr.groupedRunningStats) > 0 { 140 for groupByColVal, runningStats := range rr.groupedRunningStats { 141 toJoinRunningStats, exists := toJoin.groupedRunningStats[groupByColVal] 142 if !exists { 143 continue 144 } 145 rr.mergeRunningStats(&runningStats, toJoinRunningStats) 146 } 147 148 for groupByColVal, toJoinRunningStats := range toJoin.groupedRunningStats { 149 _, exists := rr.groupedRunningStats[groupByColVal] 150 if !exists { 151 rr.groupedRunningStats[groupByColVal] = toJoinRunningStats 152 } 153 } 154 } 155 156 rr.mergeRunningStats(&rr.runningStats, toJoin.runningStats) 157 rr.count += toJoin.count 158 } 159 160 func (rr *RunningBucketResults) mergeRunningStats(runningStats *[]runningStats, toJoinRunningStats []runningStats) { 161 for i := 0; i < len(toJoinRunningStats); i++ { 162 switch rr.currStats[i].MeasureFunc { 163 case utils.Values: 164 if rr.currStats[i].ValueColRequest == nil { 165 err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i) 166 if err != nil { 167 log.Errorf("mergeRunningStats: err: %v", err) 168 } 169 } else { 170 fields := rr.currStats[i].ValueColRequest.GetFields() 171 err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i) 172 if err != nil { 173 log.Errorf("mergeRunningStats: err: %v", err) 174 } 175 i += (len(fields) - 1) 176 } 177 case utils.Cardinality: 178 if rr.currStats[i].ValueColRequest == nil { 179 err := (*runningStats)[i].hll.Merge(toJoinRunningStats[i].hll) 180 if err != nil { 181 log.Errorf("mergeRunningStats: failed merge HLL!: %v", err) 182 } 183 } else { 184 fields := rr.currStats[i].ValueColRequest.GetFields() 185 err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i) 186 if err != nil { 187 log.Errorf("mergeRunningStats: err: %v", err) 188 } 189 i += (len(fields) - 1) 190 } 191 case utils.Count: 192 if rr.currStats[i].ValueColRequest == nil { 193 err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i) 194 if err != nil { 195 log.Errorf("mergeRunningStats: err: %v", err) 196 } 197 } else { 198 fields := rr.currStats[i].ValueColRequest.GetFields() 199 err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i) 200 if err != nil { 201 log.Errorf("mergeRunningStats: failed to add measurement to running stats: %v", err) 202 } 203 i += (len(fields) - 1) 204 } 205 default: 206 err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i) 207 if err != nil { 208 log.Errorf("mergeRunningStats: err: %v", err) 209 } 210 } 211 } 212 } 213 214 func (rr *RunningBucketResults) ProcessReduce(runningStats *[]runningStats, e utils.CValueEnclosure, i int) error { 215 retVal, err := utils.Reduce((*runningStats)[i].rawVal, e, rr.currStats[i].MeasureFunc) 216 if err != nil { 217 return fmt.Errorf("ProcessReduce: failed to add measurement to running stats: %v", err) 218 } else { 219 (*runningStats)[i].rawVal = retVal 220 } 221 return nil 222 } 223 224 func (rr *RunningBucketResults) AddEvalResultsForMinOrMaxOrSum(runningStats *[]runningStats, measureResults []utils.CValueEnclosure, i int) error { 225 if rr.currStats[i].ValueColRequest == nil { 226 return rr.ProcessReduce(runningStats, measureResults[i], i) 227 } 228 229 fields := rr.currStats[i].ValueColRequest.GetFields() 230 if len(fields) != 1 { 231 return fmt.Errorf("AddEvalResultsForMinOrMaxOrSum: Incorrect number of fields for aggCol: %v", rr.currStats[i].String()) 232 } 233 fieldToValue := make(map[string]utils.CValueEnclosure) 234 fieldToValue[fields[0]] = measureResults[i] 235 boolResult, err := rr.currStats[i].ValueColRequest.BooleanExpr.Evaluate(fieldToValue) 236 if err != nil { 237 return fmt.Errorf("AddEvalResultsForMinOrMaxOrSum: there are some errors in the eval function that is inside the min/max function: %v", err) 238 } 239 if boolResult { 240 err := rr.ProcessReduce(runningStats, measureResults[i], i) 241 if err != nil { 242 return fmt.Errorf("AddEvalResultsForMinOrMaxOrSum: %v", err) 243 } 244 } 245 return nil 246 } 247 248 func (rr *RunningBucketResults) AddEvalResultsForCount(runningStats *[]runningStats, measureResults []utils.CValueEnclosure, i int, usedByTimechart bool, cnt uint64) (int, error) { 249 250 if rr.currStats[i].ValueColRequest == nil { 251 if usedByTimechart { 252 eVal := &utils.CValueEnclosure{ 253 Dtype: utils.SS_DT_UNSIGNED_NUM, 254 CVal: cnt, 255 } 256 return 0, rr.ProcessReduce(runningStats, *eVal, i) 257 } else { 258 return 0, rr.ProcessReduce(runningStats, measureResults[i], i) 259 } 260 } 261 262 fields := rr.currStats[i].ValueColRequest.GetFields() 263 fieldToValue := make(map[string]utils.CValueEnclosure) 264 265 index := i 266 for _, field := range fields { 267 fieldToValue[field] = measureResults[index] 268 index++ 269 } 270 271 boolResult, err := rr.currStats[i].ValueColRequest.BooleanExpr.Evaluate(fieldToValue) 272 if err != nil { 273 return 0, fmt.Errorf("AddEvalResultsForCount: there are some errors in the eval function that is inside the count function: %v", err) 274 } 275 if (*runningStats)[i].rawVal.CVal == nil { 276 (*runningStats)[i].rawVal = utils.CValueEnclosure{ 277 CVal: int64(0), 278 Dtype: utils.SS_DT_SIGNED_NUM, 279 } 280 } 281 if boolResult { 282 (*runningStats)[i].rawVal.CVal = (*runningStats)[i].rawVal.CVal.(int64) + 1 283 } 284 285 return len(fields) - 1, nil 286 } 287 288 func (rr *RunningBucketResults) AddEvalResultsForValuesOrCardinality(runningStats *[]runningStats, measureResults []utils.CValueEnclosure, i int) (int, error) { 289 if (*runningStats)[i].rawVal.CVal == nil { 290 (*runningStats)[i].rawVal = utils.CValueEnclosure{ 291 Dtype: utils.SS_DT_STRING_SET, 292 CVal: make(map[string]struct{}, 0), 293 } 294 } 295 strSet := (*runningStats)[i].rawVal.CVal.(map[string]struct{}) 296 297 if rr.currStats[i].ValueColRequest == nil { 298 strVal, err := measureResults[i].GetString() 299 if err != nil { 300 return 0, fmt.Errorf("AddEvalResultsForValuesOrCardinality: failed to add measurement to running stats: %v", err) 301 } 302 strSet[strVal] = struct{}{} 303 (*runningStats)[i].rawVal.CVal = strSet 304 return 0, nil 305 } 306 307 fields := rr.currStats[i].ValueColRequest.GetFields() 308 fieldToValue := make(map[string]utils.CValueEnclosure) 309 310 index := i 311 for _, field := range fields { 312 fieldToValue[field] = measureResults[index] 313 index++ 314 } 315 316 strVal, err := rr.currStats[i].ValueColRequest.EvaluateToString(fieldToValue) 317 if err != nil { 318 return 0, fmt.Errorf("AddEvalResultsForValuesOrCardinality: there are some errors in the eval function that is inside the count function: %v", err) 319 } 320 strSet[strVal] = struct{}{} 321 (*runningStats)[i].rawVal.CVal = strSet 322 323 return len(fields) - 1, nil 324 } 325 326 func (rr *RunningBucketResults) GetRunningStatsBucketValues() ([]utils.CValueEnclosure, uint64) { 327 retVal := make([]utils.CValueEnclosure, len(rr.runningStats)) 328 for i := 0; i < len(rr.runningStats); i++ { 329 retVal[i] = rr.runningStats[i].rawVal 330 } 331 return retVal, rr.count 332 }