github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/aggregations/timechartagg.go (about) 1 package aggregations 2 3 import ( 4 "sort" 5 "strconv" 6 "strings" 7 "time" 8 9 "github.com/axiomhq/hyperloglog" 10 "github.com/siglens/siglens/pkg/segment/structs" 11 "github.com/siglens/siglens/pkg/segment/utils" 12 log "github.com/sirupsen/logrus" 13 ) 14 15 type scorePair struct { 16 groupByColVal string 17 score float64 18 index int 19 } 20 21 func GenerateTimeRangeBuckets(timeHistogram *structs.TimeBucket) []uint64 { 22 timeRangeBuckets := make([]uint64, 0) 23 currentTime := timeHistogram.StartTime 24 for currentTime < timeHistogram.EndTime { 25 timeRangeBuckets = append(timeRangeBuckets, currentTime) 26 nextTime := currentTime + timeHistogram.IntervalMillis 27 if nextTime > timeHistogram.EndTime { 28 break 29 } 30 31 currentTime = nextTime 32 } 33 34 return timeRangeBuckets 35 } 36 37 // Find correct time range bucket for timestamp 38 func FindTimeRangeBucket(timePoints []uint64, timestamp uint64, intervalMillis uint64) uint64 { 39 index := ((timestamp - timePoints[0]) / intervalMillis) 40 if index >= uint64(len(timePoints)) { 41 index = uint64(len(timePoints) - 1) 42 } 43 return timePoints[index] 44 } 45 46 func GetIntervalInMillis(num int, timeUnit utils.TimeUnit) uint64 { 47 numD := time.Duration(num) 48 49 switch timeUnit { 50 case utils.TMMicrosecond: 51 // Might not has effect for 'us', because smallest time unit for timestamp in siglens is ms 52 case utils.TMMillisecond: 53 return uint64(numD) 54 case utils.TMCentisecond: 55 return uint64(numD * 10 * time.Millisecond) 56 case utils.TMDecisecond: 57 return uint64(numD * 100 * time.Millisecond) 58 case utils.TMSecond: 59 return uint64((numD * time.Second).Milliseconds()) 60 case utils.TMMinute: 61 return uint64((numD * time.Minute).Milliseconds()) 62 case utils.TMHour: 63 return uint64((numD * time.Hour).Milliseconds()) 64 case utils.TMDay: 65 return uint64((numD * 24 * time.Hour).Milliseconds()) 66 case utils.TMWeek: 67 return uint64((numD * 7 * 24 * time.Hour).Milliseconds()) 68 case utils.TMMonth: 69 return uint64((numD * 30 * 24 * time.Hour).Milliseconds()) 70 case utils.TMQuarter: 71 return uint64((numD * 120 * 24 * time.Hour).Milliseconds()) 72 } 73 return uint64((10 * time.Minute).Milliseconds()) // 10 Minutes 74 } 75 76 func InitTimeBucket(num int, timeUnit utils.TimeUnit, byField string, limitExpr *structs.LimitExpr, measureAggLength int) *structs.TimeBucket { 77 78 intervalMillis := GetIntervalInMillis(num, timeUnit) 79 80 timechartExpr := &structs.TimechartExpr{ 81 ByField: byField, 82 } 83 84 if len(byField) > 0 { 85 if limitExpr != nil { 86 timechartExpr.LimitExpr = limitExpr 87 } else { 88 timechartExpr.LimitExpr = &structs.LimitExpr{ 89 IsTop: true, 90 Num: 10, 91 LimitScoreMode: structs.LSMBySum, 92 } 93 if measureAggLength > 1 { 94 timechartExpr.LimitExpr.LimitScoreMode = structs.LSMByFreq 95 } 96 } 97 } 98 99 timeBucket := &structs.TimeBucket{ 100 IntervalMillis: intervalMillis, 101 Timechart: timechartExpr, 102 } 103 104 return timeBucket 105 } 106 107 func AddAggCountToTimechartRunningStats(m *structs.MeasureAggregator, allConvertedMeasureOps *[]*structs.MeasureAggregator, allReverseIndex *[]int, colToIdx map[string][]int, idx int) { 108 *allReverseIndex = append(*allReverseIndex, idx) 109 colToIdx[m.MeasureCol] = append(colToIdx[m.MeasureCol], idx) 110 *allConvertedMeasureOps = append(*allConvertedMeasureOps, &structs.MeasureAggregator{ 111 MeasureCol: m.MeasureCol, 112 MeasureFunc: utils.Count, 113 StrEnc: m.StrEnc, 114 }) 115 } 116 117 func AddAggAvgToTimechartRunningStats(m *structs.MeasureAggregator, allConvertedMeasureOps *[]*structs.MeasureAggregator, allReverseIndex *[]int, colToIdx map[string][]int, idx int) { 118 *allReverseIndex = append(*allReverseIndex, idx) 119 colToIdx[m.MeasureCol] = append(colToIdx[m.MeasureCol], idx) 120 *allConvertedMeasureOps = append(*allConvertedMeasureOps, &structs.MeasureAggregator{ 121 MeasureCol: m.MeasureCol, 122 MeasureFunc: utils.Sum, 123 StrEnc: m.StrEnc, 124 }) 125 idx++ 126 *allReverseIndex = append(*allReverseIndex, idx) 127 colToIdx[m.MeasureCol] = append(colToIdx[m.MeasureCol], idx) 128 *allConvertedMeasureOps = append(*allConvertedMeasureOps, &structs.MeasureAggregator{ 129 MeasureCol: m.MeasureCol, 130 MeasureFunc: utils.Count, 131 StrEnc: m.StrEnc, 132 }) 133 } 134 135 // Timechart will only display N highest/lowest scoring distinct values of the split-by field 136 // For Single agg, the score is based on the sum of the values in the aggregation. Therefore, we can only know groupByColVal's ranking after processing all the runningStats 137 // For multiple aggs, the score is based on the freq of the field. Which means we can rank groupByColVal at this time. 138 func CheckGroupByColValsAgainstLimit(timechart *structs.TimechartExpr, groupByColValCnt map[string]int, groupValScoreMap map[string]*utils.CValueEnclosure, measureOperations []*structs.MeasureAggregator) map[string]bool { 139 140 if timechart == nil || timechart.LimitExpr == nil { 141 return nil 142 } 143 144 // When there is only one agg and agg is values(), we can not score that based on the sum of the values in the aggregation 145 onlyUseByValuesFunc := false 146 if len(measureOperations) == 1 && measureOperations[0].MeasureFunc == utils.Values { 147 onlyUseByValuesFunc = true 148 } 149 150 index := 0 151 valIsInLimit := make(map[string]bool) 152 isRankBySum := IsRankBySum(timechart) 153 154 // When there is only one aggregator and aggregator is values(), we can not score that based on the sum of the values in the aggregation 155 if isRankBySum && !onlyUseByValuesFunc { 156 scorePairs := make([]scorePair, 0) 157 // []float64, 0: score; 1: index 158 for groupByColVal, cVal := range groupValScoreMap { 159 valIsInLimit[groupByColVal] = false 160 score, err := cVal.GetFloatValue() 161 if err != nil { 162 log.Errorf("CheckGroupByColValsAgainstLimit: %v does not have a score", groupByColVal) 163 continue 164 } 165 scorePairs = append(scorePairs, scorePair{ 166 groupByColVal: groupByColVal, 167 score: score, 168 index: index, 169 }) 170 index++ 171 } 172 173 if timechart.LimitExpr.IsTop { 174 sort.Slice(scorePairs, func(i, j int) bool { 175 return scorePairs[i].score > scorePairs[j].score 176 }) 177 } else { 178 sort.Slice(scorePairs, func(i, j int) bool { 179 return scorePairs[i].score < scorePairs[j].score 180 }) 181 } 182 183 limit := timechart.LimitExpr.Num 184 if limit > len(scorePairs) { 185 limit = len(scorePairs) 186 } 187 188 for i := 0; i < limit; i++ { 189 valIsInLimit[scorePairs[i].groupByColVal] = true 190 } 191 192 } else { // rank by freq 193 // []int, 0: cnt; 1: index 194 cnts := make([][]int, 0) 195 vals := make([]string, 0) 196 197 for groupByColVal, cnt := range groupByColValCnt { 198 vals = append(vals, groupByColVal) 199 cnts = append(cnts, []int{cnt, index}) 200 valIsInLimit[groupByColVal] = false 201 index++ 202 } 203 204 if timechart.LimitExpr.IsTop { 205 sort.Slice(cnts, func(i, j int) bool { 206 return cnts[i][0] > cnts[j][0] 207 }) 208 } else { 209 sort.Slice(cnts, func(i, j int) bool { 210 return cnts[i][0] < cnts[j][0] 211 }) 212 } 213 214 limit := timechart.LimitExpr.Num 215 if limit > len(vals) { 216 limit = len(vals) 217 } 218 219 for i := 0; i < limit; i++ { 220 valIndex := cnts[i][1] 221 valIsInLimit[vals[valIndex]] = true 222 } 223 } 224 225 return valIsInLimit 226 } 227 228 // Initial score map for single agg: the score is based on the sum of the values in the aggregation 229 func InitialScoreMap(timechart *structs.TimechartExpr, groupByColValCnt map[string]int) map[string]*utils.CValueEnclosure { 230 231 if timechart == nil || timechart.LimitExpr == nil || timechart.LimitExpr.LimitScoreMode == structs.LSMByFreq { 232 return nil 233 } 234 235 groupByColValScoreMap := make(map[string]*utils.CValueEnclosure, 0) 236 for groupByColVal := range groupByColValCnt { 237 groupByColValScoreMap[groupByColVal] = &utils.CValueEnclosure{CVal: nil, Dtype: utils.SS_INVALID} 238 } 239 240 return groupByColValScoreMap 241 } 242 243 func SortTimechartRes(timechart *structs.TimechartExpr, results *[]*structs.BucketResult) { 244 if timechart == nil || results == nil { 245 return 246 } 247 248 sort.Slice(*results, func(i, j int) bool { 249 bucketKey1, ok := (*results)[i].BucketKey.(string) 250 if !ok { 251 log.Errorf("SortTimechartRes: cannot convert bucketKey to string: %v", (*results)[i].BucketKey) 252 return false 253 } 254 255 bucketKey2, ok := (*results)[j].BucketKey.(string) 256 if !ok { 257 log.Errorf("SortTimechartRes: cannot convert bucketKey to string: %v", (*results)[j].BucketKey) 258 return true 259 } 260 261 timestamp1, err := strconv.ParseUint(bucketKey1, 10, 64) 262 if err != nil { 263 log.Errorf("SortTimechartRes: cannot convert bucketKey to timestamp: %v", bucketKey1) 264 return false 265 } 266 267 timestamp2, err := strconv.ParseUint(bucketKey2, 10, 64) 268 if err != nil { 269 log.Errorf("SortTimechartRes: cannot convert bucketKey to timestamp: %v", bucketKey2) 270 return true 271 } 272 273 return timestamp1 < timestamp2 274 }) 275 } 276 277 func IsOtherCol(valIsInLimit map[string]bool, groupByColVal string) bool { 278 isOtherCol := false 279 if valIsInLimit != nil { 280 inLimit, exists := valIsInLimit[groupByColVal] 281 if exists { 282 isOtherCol = !inLimit 283 } 284 } 285 return isOtherCol 286 } 287 288 // For numeric agg(not include dc), we can simply use addition to merge them 289 // For string values, it depends on the aggregation function 290 func MergeVal(eVal *utils.CValueEnclosure, eValToMerge utils.CValueEnclosure, hll *hyperloglog.Sketch, hllToMerge *hyperloglog.Sketch, 291 strSet map[string]struct{}, strSetToMerge map[string]struct{}, aggFunc utils.AggregateFunctions, useAdditionForMerge bool) { 292 293 tmp := utils.CValueEnclosure{ 294 Dtype: eVal.Dtype, 295 CVal: eVal.CVal, 296 } 297 298 switch aggFunc { 299 case utils.Count: 300 fallthrough 301 case utils.Avg: 302 fallthrough 303 case utils.Min: 304 fallthrough 305 case utils.Max: 306 fallthrough 307 case utils.Range: 308 fallthrough 309 case utils.Sum: 310 aggFunc = utils.Sum 311 case utils.Cardinality: 312 if useAdditionForMerge { 313 aggFunc = utils.Sum 314 } else { 315 err := hll.Merge(hllToMerge) 316 if err != nil { 317 log.Errorf("MergeVal: failed to merge hyperloglog stats: %v", err) 318 } 319 eVal.CVal = hll.Estimate() 320 eVal.Dtype = utils.SS_DT_UNSIGNED_NUM 321 return 322 } 323 case utils.Values: 324 // Can not do addition for values func 325 if useAdditionForMerge { 326 return 327 } 328 for str := range strSetToMerge { 329 strSet[str] = struct{}{} 330 } 331 uniqueStrings := make([]string, 0) 332 for str := range strSet { 333 uniqueStrings = append(uniqueStrings, str) 334 } 335 sort.Strings(uniqueStrings) 336 strVal := strings.Join(uniqueStrings, " ") 337 338 eVal.CVal = strVal 339 eVal.Dtype = utils.SS_DT_STRING 340 return 341 } 342 343 retVal, err := utils.Reduce(eValToMerge, tmp, aggFunc) 344 if err != nil { 345 log.Errorf("MergeVal: failed to merge eVal into otherCVal: %v", err) 346 return 347 } 348 eVal.CVal = retVal.CVal 349 eVal.Dtype = retVal.Dtype 350 } 351 352 func MergeMap(groupByColValCnt map[string]int, toMerge map[string]int) { 353 354 for key, cnt := range groupByColValCnt { 355 cntToMerge, exists := toMerge[key] 356 if exists { 357 groupByColValCnt[key] = cnt + cntToMerge 358 } 359 } 360 361 for key, cnt := range toMerge { 362 _, exists := groupByColValCnt[key] 363 if !exists { 364 groupByColValCnt[key] = cnt 365 } 366 } 367 } 368 369 func IsRankBySum(timechart *structs.TimechartExpr) bool { 370 if timechart != nil && timechart.LimitExpr != nil && timechart.LimitExpr.LimitScoreMode == structs.LSMBySum { 371 return true 372 } 373 return false 374 } 375 376 func ShouldAddRes(timechart *structs.TimechartExpr, tmLimitResult *structs.TMLimitResult, index int, eVal utils.CValueEnclosure, 377 hllToMerge *hyperloglog.Sketch, strSetToMerge map[string]struct{}, aggFunc utils.AggregateFunctions, groupByColVal string, isOtherCol bool) bool { 378 379 useAdditionForMerge := (tmLimitResult.OtherCValArr == nil) 380 isRankBySum := IsRankBySum(timechart) 381 382 // If true, current col's val will be added into 'other' col. So its val should not be added into res at this time 383 if isOtherCol { 384 otherCVal := tmLimitResult.OtherCValArr[index] 385 MergeVal(otherCVal, eVal, tmLimitResult.Hll, hllToMerge, tmLimitResult.StrSet, strSetToMerge, aggFunc, useAdditionForMerge) 386 return false 387 } else { 388 if isRankBySum && tmLimitResult.OtherCValArr == nil { 389 scoreVal := tmLimitResult.GroupValScoreMap[groupByColVal] 390 MergeVal(scoreVal, eVal, tmLimitResult.Hll, hllToMerge, tmLimitResult.StrSet, strSetToMerge, aggFunc, useAdditionForMerge) 391 return false 392 } 393 return true 394 } 395 }