github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/aggregations/timechartagg.go (about)

     1  package aggregations
     2  
     3  import (
     4  	"sort"
     5  	"strconv"
     6  	"strings"
     7  	"time"
     8  
     9  	"github.com/axiomhq/hyperloglog"
    10  	"github.com/siglens/siglens/pkg/segment/structs"
    11  	"github.com/siglens/siglens/pkg/segment/utils"
    12  	log "github.com/sirupsen/logrus"
    13  )
    14  
    15  type scorePair struct {
    16  	groupByColVal string
    17  	score         float64
    18  	index         int
    19  }
    20  
    21  func GenerateTimeRangeBuckets(timeHistogram *structs.TimeBucket) []uint64 {
    22  	timeRangeBuckets := make([]uint64, 0)
    23  	currentTime := timeHistogram.StartTime
    24  	for currentTime < timeHistogram.EndTime {
    25  		timeRangeBuckets = append(timeRangeBuckets, currentTime)
    26  		nextTime := currentTime + timeHistogram.IntervalMillis
    27  		if nextTime > timeHistogram.EndTime {
    28  			break
    29  		}
    30  
    31  		currentTime = nextTime
    32  	}
    33  
    34  	return timeRangeBuckets
    35  }
    36  
    37  // Find correct time range bucket for timestamp
    38  func FindTimeRangeBucket(timePoints []uint64, timestamp uint64, intervalMillis uint64) uint64 {
    39  	index := ((timestamp - timePoints[0]) / intervalMillis)
    40  	if index >= uint64(len(timePoints)) {
    41  		index = uint64(len(timePoints) - 1)
    42  	}
    43  	return timePoints[index]
    44  }
    45  
    46  func GetIntervalInMillis(num int, timeUnit utils.TimeUnit) uint64 {
    47  	numD := time.Duration(num)
    48  
    49  	switch timeUnit {
    50  	case utils.TMMicrosecond:
    51  		// Might not has effect for 'us', because smallest time unit for timestamp in siglens is ms
    52  	case utils.TMMillisecond:
    53  		return uint64(numD)
    54  	case utils.TMCentisecond:
    55  		return uint64(numD * 10 * time.Millisecond)
    56  	case utils.TMDecisecond:
    57  		return uint64(numD * 100 * time.Millisecond)
    58  	case utils.TMSecond:
    59  		return uint64((numD * time.Second).Milliseconds())
    60  	case utils.TMMinute:
    61  		return uint64((numD * time.Minute).Milliseconds())
    62  	case utils.TMHour:
    63  		return uint64((numD * time.Hour).Milliseconds())
    64  	case utils.TMDay:
    65  		return uint64((numD * 24 * time.Hour).Milliseconds())
    66  	case utils.TMWeek:
    67  		return uint64((numD * 7 * 24 * time.Hour).Milliseconds())
    68  	case utils.TMMonth:
    69  		return uint64((numD * 30 * 24 * time.Hour).Milliseconds())
    70  	case utils.TMQuarter:
    71  		return uint64((numD * 120 * 24 * time.Hour).Milliseconds())
    72  	}
    73  	return uint64((10 * time.Minute).Milliseconds()) // 10 Minutes
    74  }
    75  
    76  func InitTimeBucket(num int, timeUnit utils.TimeUnit, byField string, limitExpr *structs.LimitExpr, measureAggLength int) *structs.TimeBucket {
    77  
    78  	intervalMillis := GetIntervalInMillis(num, timeUnit)
    79  
    80  	timechartExpr := &structs.TimechartExpr{
    81  		ByField: byField,
    82  	}
    83  
    84  	if len(byField) > 0 {
    85  		if limitExpr != nil {
    86  			timechartExpr.LimitExpr = limitExpr
    87  		} else {
    88  			timechartExpr.LimitExpr = &structs.LimitExpr{
    89  				IsTop:          true,
    90  				Num:            10,
    91  				LimitScoreMode: structs.LSMBySum,
    92  			}
    93  			if measureAggLength > 1 {
    94  				timechartExpr.LimitExpr.LimitScoreMode = structs.LSMByFreq
    95  			}
    96  		}
    97  	}
    98  
    99  	timeBucket := &structs.TimeBucket{
   100  		IntervalMillis: intervalMillis,
   101  		Timechart:      timechartExpr,
   102  	}
   103  
   104  	return timeBucket
   105  }
   106  
   107  func AddAggCountToTimechartRunningStats(m *structs.MeasureAggregator, allConvertedMeasureOps *[]*structs.MeasureAggregator, allReverseIndex *[]int, colToIdx map[string][]int, idx int) {
   108  	*allReverseIndex = append(*allReverseIndex, idx)
   109  	colToIdx[m.MeasureCol] = append(colToIdx[m.MeasureCol], idx)
   110  	*allConvertedMeasureOps = append(*allConvertedMeasureOps, &structs.MeasureAggregator{
   111  		MeasureCol:  m.MeasureCol,
   112  		MeasureFunc: utils.Count,
   113  		StrEnc:      m.StrEnc,
   114  	})
   115  }
   116  
   117  func AddAggAvgToTimechartRunningStats(m *structs.MeasureAggregator, allConvertedMeasureOps *[]*structs.MeasureAggregator, allReverseIndex *[]int, colToIdx map[string][]int, idx int) {
   118  	*allReverseIndex = append(*allReverseIndex, idx)
   119  	colToIdx[m.MeasureCol] = append(colToIdx[m.MeasureCol], idx)
   120  	*allConvertedMeasureOps = append(*allConvertedMeasureOps, &structs.MeasureAggregator{
   121  		MeasureCol:  m.MeasureCol,
   122  		MeasureFunc: utils.Sum,
   123  		StrEnc:      m.StrEnc,
   124  	})
   125  	idx++
   126  	*allReverseIndex = append(*allReverseIndex, idx)
   127  	colToIdx[m.MeasureCol] = append(colToIdx[m.MeasureCol], idx)
   128  	*allConvertedMeasureOps = append(*allConvertedMeasureOps, &structs.MeasureAggregator{
   129  		MeasureCol:  m.MeasureCol,
   130  		MeasureFunc: utils.Count,
   131  		StrEnc:      m.StrEnc,
   132  	})
   133  }
   134  
   135  // Timechart will only display N highest/lowest scoring distinct values of the split-by field
   136  // For Single agg, the score is based on the sum of the values in the aggregation. Therefore, we can only know groupByColVal's ranking after processing all the runningStats
   137  // For multiple aggs, the score is based on the freq of the field. Which means we can rank groupByColVal at this time.
   138  func CheckGroupByColValsAgainstLimit(timechart *structs.TimechartExpr, groupByColValCnt map[string]int, groupValScoreMap map[string]*utils.CValueEnclosure, measureOperations []*structs.MeasureAggregator) map[string]bool {
   139  
   140  	if timechart == nil || timechart.LimitExpr == nil {
   141  		return nil
   142  	}
   143  
   144  	// When there is only one agg and agg is values(), we can not score that based on the sum of the values in the aggregation
   145  	onlyUseByValuesFunc := false
   146  	if len(measureOperations) == 1 && measureOperations[0].MeasureFunc == utils.Values {
   147  		onlyUseByValuesFunc = true
   148  	}
   149  
   150  	index := 0
   151  	valIsInLimit := make(map[string]bool)
   152  	isRankBySum := IsRankBySum(timechart)
   153  
   154  	// When there is only one aggregator and aggregator is values(), we can not score that based on the sum of the values in the aggregation
   155  	if isRankBySum && !onlyUseByValuesFunc {
   156  		scorePairs := make([]scorePair, 0)
   157  		// []float64, 0: score; 1: index
   158  		for groupByColVal, cVal := range groupValScoreMap {
   159  			valIsInLimit[groupByColVal] = false
   160  			score, err := cVal.GetFloatValue()
   161  			if err != nil {
   162  				log.Errorf("CheckGroupByColValsAgainstLimit: %v does not have a score", groupByColVal)
   163  				continue
   164  			}
   165  			scorePairs = append(scorePairs, scorePair{
   166  				groupByColVal: groupByColVal,
   167  				score:         score,
   168  				index:         index,
   169  			})
   170  			index++
   171  		}
   172  
   173  		if timechart.LimitExpr.IsTop {
   174  			sort.Slice(scorePairs, func(i, j int) bool {
   175  				return scorePairs[i].score > scorePairs[j].score
   176  			})
   177  		} else {
   178  			sort.Slice(scorePairs, func(i, j int) bool {
   179  				return scorePairs[i].score < scorePairs[j].score
   180  			})
   181  		}
   182  
   183  		limit := timechart.LimitExpr.Num
   184  		if limit > len(scorePairs) {
   185  			limit = len(scorePairs)
   186  		}
   187  
   188  		for i := 0; i < limit; i++ {
   189  			valIsInLimit[scorePairs[i].groupByColVal] = true
   190  		}
   191  
   192  	} else { // rank by freq
   193  		// []int, 0: cnt; 1: index
   194  		cnts := make([][]int, 0)
   195  		vals := make([]string, 0)
   196  
   197  		for groupByColVal, cnt := range groupByColValCnt {
   198  			vals = append(vals, groupByColVal)
   199  			cnts = append(cnts, []int{cnt, index})
   200  			valIsInLimit[groupByColVal] = false
   201  			index++
   202  		}
   203  
   204  		if timechart.LimitExpr.IsTop {
   205  			sort.Slice(cnts, func(i, j int) bool {
   206  				return cnts[i][0] > cnts[j][0]
   207  			})
   208  		} else {
   209  			sort.Slice(cnts, func(i, j int) bool {
   210  				return cnts[i][0] < cnts[j][0]
   211  			})
   212  		}
   213  
   214  		limit := timechart.LimitExpr.Num
   215  		if limit > len(vals) {
   216  			limit = len(vals)
   217  		}
   218  
   219  		for i := 0; i < limit; i++ {
   220  			valIndex := cnts[i][1]
   221  			valIsInLimit[vals[valIndex]] = true
   222  		}
   223  	}
   224  
   225  	return valIsInLimit
   226  }
   227  
   228  // Initial score map for single agg: the score is based on the sum of the values in the aggregation
   229  func InitialScoreMap(timechart *structs.TimechartExpr, groupByColValCnt map[string]int) map[string]*utils.CValueEnclosure {
   230  
   231  	if timechart == nil || timechart.LimitExpr == nil || timechart.LimitExpr.LimitScoreMode == structs.LSMByFreq {
   232  		return nil
   233  	}
   234  
   235  	groupByColValScoreMap := make(map[string]*utils.CValueEnclosure, 0)
   236  	for groupByColVal := range groupByColValCnt {
   237  		groupByColValScoreMap[groupByColVal] = &utils.CValueEnclosure{CVal: nil, Dtype: utils.SS_INVALID}
   238  	}
   239  
   240  	return groupByColValScoreMap
   241  }
   242  
   243  func SortTimechartRes(timechart *structs.TimechartExpr, results *[]*structs.BucketResult) {
   244  	if timechart == nil || results == nil {
   245  		return
   246  	}
   247  
   248  	sort.Slice(*results, func(i, j int) bool {
   249  		bucketKey1, ok := (*results)[i].BucketKey.(string)
   250  		if !ok {
   251  			log.Errorf("SortTimechartRes: cannot convert bucketKey to string: %v", (*results)[i].BucketKey)
   252  			return false
   253  		}
   254  
   255  		bucketKey2, ok := (*results)[j].BucketKey.(string)
   256  		if !ok {
   257  			log.Errorf("SortTimechartRes: cannot convert bucketKey to string: %v", (*results)[j].BucketKey)
   258  			return true
   259  		}
   260  
   261  		timestamp1, err := strconv.ParseUint(bucketKey1, 10, 64)
   262  		if err != nil {
   263  			log.Errorf("SortTimechartRes: cannot convert bucketKey to timestamp: %v", bucketKey1)
   264  			return false
   265  		}
   266  
   267  		timestamp2, err := strconv.ParseUint(bucketKey2, 10, 64)
   268  		if err != nil {
   269  			log.Errorf("SortTimechartRes: cannot convert bucketKey to timestamp: %v", bucketKey2)
   270  			return true
   271  		}
   272  
   273  		return timestamp1 < timestamp2
   274  	})
   275  }
   276  
   277  func IsOtherCol(valIsInLimit map[string]bool, groupByColVal string) bool {
   278  	isOtherCol := false
   279  	if valIsInLimit != nil {
   280  		inLimit, exists := valIsInLimit[groupByColVal]
   281  		if exists {
   282  			isOtherCol = !inLimit
   283  		}
   284  	}
   285  	return isOtherCol
   286  }
   287  
   288  // For numeric agg(not include dc), we can simply use addition to merge them
   289  // For string values, it depends on the aggregation function
   290  func MergeVal(eVal *utils.CValueEnclosure, eValToMerge utils.CValueEnclosure, hll *hyperloglog.Sketch, hllToMerge *hyperloglog.Sketch,
   291  	strSet map[string]struct{}, strSetToMerge map[string]struct{}, aggFunc utils.AggregateFunctions, useAdditionForMerge bool) {
   292  
   293  	tmp := utils.CValueEnclosure{
   294  		Dtype: eVal.Dtype,
   295  		CVal:  eVal.CVal,
   296  	}
   297  
   298  	switch aggFunc {
   299  	case utils.Count:
   300  		fallthrough
   301  	case utils.Avg:
   302  		fallthrough
   303  	case utils.Min:
   304  		fallthrough
   305  	case utils.Max:
   306  		fallthrough
   307  	case utils.Range:
   308  		fallthrough
   309  	case utils.Sum:
   310  		aggFunc = utils.Sum
   311  	case utils.Cardinality:
   312  		if useAdditionForMerge {
   313  			aggFunc = utils.Sum
   314  		} else {
   315  			err := hll.Merge(hllToMerge)
   316  			if err != nil {
   317  				log.Errorf("MergeVal: failed to merge hyperloglog stats: %v", err)
   318  			}
   319  			eVal.CVal = hll.Estimate()
   320  			eVal.Dtype = utils.SS_DT_UNSIGNED_NUM
   321  			return
   322  		}
   323  	case utils.Values:
   324  		// Can not do addition for values func
   325  		if useAdditionForMerge {
   326  			return
   327  		}
   328  		for str := range strSetToMerge {
   329  			strSet[str] = struct{}{}
   330  		}
   331  		uniqueStrings := make([]string, 0)
   332  		for str := range strSet {
   333  			uniqueStrings = append(uniqueStrings, str)
   334  		}
   335  		sort.Strings(uniqueStrings)
   336  		strVal := strings.Join(uniqueStrings, "&nbsp")
   337  
   338  		eVal.CVal = strVal
   339  		eVal.Dtype = utils.SS_DT_STRING
   340  		return
   341  	}
   342  
   343  	retVal, err := utils.Reduce(eValToMerge, tmp, aggFunc)
   344  	if err != nil {
   345  		log.Errorf("MergeVal: failed to merge eVal into otherCVal: %v", err)
   346  		return
   347  	}
   348  	eVal.CVal = retVal.CVal
   349  	eVal.Dtype = retVal.Dtype
   350  }
   351  
   352  func MergeMap(groupByColValCnt map[string]int, toMerge map[string]int) {
   353  
   354  	for key, cnt := range groupByColValCnt {
   355  		cntToMerge, exists := toMerge[key]
   356  		if exists {
   357  			groupByColValCnt[key] = cnt + cntToMerge
   358  		}
   359  	}
   360  
   361  	for key, cnt := range toMerge {
   362  		_, exists := groupByColValCnt[key]
   363  		if !exists {
   364  			groupByColValCnt[key] = cnt
   365  		}
   366  	}
   367  }
   368  
   369  func IsRankBySum(timechart *structs.TimechartExpr) bool {
   370  	if timechart != nil && timechart.LimitExpr != nil && timechart.LimitExpr.LimitScoreMode == structs.LSMBySum {
   371  		return true
   372  	}
   373  	return false
   374  }
   375  
   376  func ShouldAddRes(timechart *structs.TimechartExpr, tmLimitResult *structs.TMLimitResult, index int, eVal utils.CValueEnclosure,
   377  	hllToMerge *hyperloglog.Sketch, strSetToMerge map[string]struct{}, aggFunc utils.AggregateFunctions, groupByColVal string, isOtherCol bool) bool {
   378  
   379  	useAdditionForMerge := (tmLimitResult.OtherCValArr == nil)
   380  	isRankBySum := IsRankBySum(timechart)
   381  
   382  	// If true, current col's val will be added into 'other' col. So its val should not be added into res at this time
   383  	if isOtherCol {
   384  		otherCVal := tmLimitResult.OtherCValArr[index]
   385  		MergeVal(otherCVal, eVal, tmLimitResult.Hll, hllToMerge, tmLimitResult.StrSet, strSetToMerge, aggFunc, useAdditionForMerge)
   386  		return false
   387  	} else {
   388  		if isRankBySum && tmLimitResult.OtherCValArr == nil {
   389  			scoreVal := tmLimitResult.GroupValScoreMap[groupByColVal]
   390  			MergeVal(scoreVal, eVal, tmLimitResult.Hll, hllToMerge, tmLimitResult.StrSet, strSetToMerge, aggFunc, useAdditionForMerge)
   391  			return false
   392  		}
   393  		return true
   394  	}
   395  }