github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/results/blockresults/runningstats.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package blockresults
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"github.com/axiomhq/hyperloglog"
    23  	"github.com/siglens/siglens/pkg/segment/structs"
    24  	"github.com/siglens/siglens/pkg/segment/utils"
    25  	log "github.com/sirupsen/logrus"
    26  	bbp "github.com/valyala/bytebufferpool"
    27  )
    28  
    29  type RunningBucketResults struct {
    30  	runningStats        []runningStats               // maps a stat name to running stats
    31  	currStats           []*structs.MeasureAggregator // measure aggregators in result
    32  	groupedRunningStats map[string][]runningStats    // maps timechart group by col's vals to corresponding running stats
    33  	count               uint64                       // total number of elements belonging to the bucket
    34  }
    35  
    36  type runningStats struct {
    37  	rawVal utils.CValueEnclosure // raw value
    38  	hll    *hyperloglog.Sketch
    39  }
    40  
    41  func initRunningStats(internalMeasureFns []*structs.MeasureAggregator) []runningStats {
    42  	retVal := make([]runningStats, len(internalMeasureFns))
    43  	for i := 0; i < len(internalMeasureFns); i++ {
    44  		if internalMeasureFns[i].MeasureFunc == utils.Cardinality {
    45  			retVal[i] = runningStats{hll: hyperloglog.New()}
    46  		}
    47  	}
    48  	return retVal
    49  }
    50  
    51  func initRunningGroupByBucket(internalMeasureFns []*structs.MeasureAggregator) *RunningBucketResults {
    52  
    53  	return &RunningBucketResults{
    54  		count:               0,
    55  		runningStats:        initRunningStats(internalMeasureFns),
    56  		currStats:           internalMeasureFns,
    57  		groupedRunningStats: make(map[string][]runningStats),
    58  	}
    59  }
    60  
    61  func initRunningTimeBucket() *RunningBucketResults {
    62  
    63  	return &RunningBucketResults{
    64  		count: 0,
    65  	}
    66  }
    67  
    68  func (rr *RunningBucketResults) AddTimeToBucketStats(count uint16) {
    69  	rr.count += uint64(count)
    70  }
    71  
    72  func (rr *RunningBucketResults) AddMeasureResults(runningStats *[]runningStats, measureResults []utils.CValueEnclosure, qid uint64,
    73  	cnt uint64, usedByTimechart bool) {
    74  	if runningStats == nil {
    75  		if rr.runningStats == nil {
    76  			return
    77  		}
    78  		runningStats = &rr.runningStats
    79  	}
    80  
    81  	for i := 0; i < len(*runningStats); i++ {
    82  		switch rr.currStats[i].MeasureFunc {
    83  		case utils.Sum:
    84  			fallthrough
    85  		case utils.Max:
    86  			fallthrough
    87  		case utils.Min:
    88  			err := rr.AddEvalResultsForMinOrMaxOrSum(runningStats, measureResults, i)
    89  			if err != nil {
    90  				log.Errorf("AddMeasureResults: %v", err)
    91  			}
    92  		case utils.Count:
    93  			step, err := rr.AddEvalResultsForCount(runningStats, measureResults, i, usedByTimechart, cnt)
    94  			if err != nil {
    95  				log.Errorf("AddMeasureResults: %v", err)
    96  			}
    97  			i += step
    98  		case utils.Cardinality:
    99  			if rr.currStats[i].ValueColRequest == nil {
   100  				rawVal, err := measureResults[i].GetString()
   101  				if err != nil {
   102  					log.Errorf("AddMeasureResults: failed to add measurement to running stats: %v", err)
   103  					continue
   104  				}
   105  				bb := bbp.Get()
   106  				defer bbp.Put(bb)
   107  				bb.Reset()
   108  				_, _ = bb.WriteString(rawVal)
   109  				(*runningStats)[i].hll.Insert(bb.B)
   110  				continue
   111  			}
   112  			fallthrough
   113  		case utils.Values:
   114  			step, err := rr.AddEvalResultsForValuesOrCardinality(runningStats, measureResults, i)
   115  			if err != nil {
   116  				log.Errorf("AddMeasureResults: %v", err)
   117  			}
   118  			i += step
   119  		default:
   120  			err := rr.ProcessReduce(runningStats, measureResults[i], i)
   121  			if err != nil {
   122  				log.Errorf("AddMeasureResults: %v", err)
   123  			}
   124  		}
   125  	}
   126  	rr.count += cnt
   127  }
   128  
   129  // This assumes the order of bucketResults.RunningStats are in the same order, referencing the same measure request
   130  func (rr *RunningBucketResults) MergeRunningBuckets(toJoin *RunningBucketResults) {
   131  
   132  	if toJoin == nil {
   133  		return
   134  	}
   135  
   136  	// Merge group by bucket inside each time range bucket (For timechart)
   137  	if toJoin.groupedRunningStats != nil && rr.groupedRunningStats == nil {
   138  		rr.groupedRunningStats = toJoin.groupedRunningStats
   139  	} else if rr.groupedRunningStats != nil && len(rr.groupedRunningStats) > 0 {
   140  		for groupByColVal, runningStats := range rr.groupedRunningStats {
   141  			toJoinRunningStats, exists := toJoin.groupedRunningStats[groupByColVal]
   142  			if !exists {
   143  				continue
   144  			}
   145  			rr.mergeRunningStats(&runningStats, toJoinRunningStats)
   146  		}
   147  
   148  		for groupByColVal, toJoinRunningStats := range toJoin.groupedRunningStats {
   149  			_, exists := rr.groupedRunningStats[groupByColVal]
   150  			if !exists {
   151  				rr.groupedRunningStats[groupByColVal] = toJoinRunningStats
   152  			}
   153  		}
   154  	}
   155  
   156  	rr.mergeRunningStats(&rr.runningStats, toJoin.runningStats)
   157  	rr.count += toJoin.count
   158  }
   159  
   160  func (rr *RunningBucketResults) mergeRunningStats(runningStats *[]runningStats, toJoinRunningStats []runningStats) {
   161  	for i := 0; i < len(toJoinRunningStats); i++ {
   162  		switch rr.currStats[i].MeasureFunc {
   163  		case utils.Values:
   164  			if rr.currStats[i].ValueColRequest == nil {
   165  				err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i)
   166  				if err != nil {
   167  					log.Errorf("mergeRunningStats: err: %v", err)
   168  				}
   169  			} else {
   170  				fields := rr.currStats[i].ValueColRequest.GetFields()
   171  				err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i)
   172  				if err != nil {
   173  					log.Errorf("mergeRunningStats: err: %v", err)
   174  				}
   175  				i += (len(fields) - 1)
   176  			}
   177  		case utils.Cardinality:
   178  			if rr.currStats[i].ValueColRequest == nil {
   179  				err := (*runningStats)[i].hll.Merge(toJoinRunningStats[i].hll)
   180  				if err != nil {
   181  					log.Errorf("mergeRunningStats: failed merge HLL!: %v", err)
   182  				}
   183  			} else {
   184  				fields := rr.currStats[i].ValueColRequest.GetFields()
   185  				err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i)
   186  				if err != nil {
   187  					log.Errorf("mergeRunningStats: err: %v", err)
   188  				}
   189  				i += (len(fields) - 1)
   190  			}
   191  		case utils.Count:
   192  			if rr.currStats[i].ValueColRequest == nil {
   193  				err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i)
   194  				if err != nil {
   195  					log.Errorf("mergeRunningStats: err: %v", err)
   196  				}
   197  			} else {
   198  				fields := rr.currStats[i].ValueColRequest.GetFields()
   199  				err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i)
   200  				if err != nil {
   201  					log.Errorf("mergeRunningStats: failed to add measurement to running stats: %v", err)
   202  				}
   203  				i += (len(fields) - 1)
   204  			}
   205  		default:
   206  			err := rr.ProcessReduce(runningStats, toJoinRunningStats[i].rawVal, i)
   207  			if err != nil {
   208  				log.Errorf("mergeRunningStats: err: %v", err)
   209  			}
   210  		}
   211  	}
   212  }
   213  
   214  func (rr *RunningBucketResults) ProcessReduce(runningStats *[]runningStats, e utils.CValueEnclosure, i int) error {
   215  	retVal, err := utils.Reduce((*runningStats)[i].rawVal, e, rr.currStats[i].MeasureFunc)
   216  	if err != nil {
   217  		return fmt.Errorf("ProcessReduce: failed to add measurement to running stats: %v", err)
   218  	} else {
   219  		(*runningStats)[i].rawVal = retVal
   220  	}
   221  	return nil
   222  }
   223  
   224  func (rr *RunningBucketResults) AddEvalResultsForMinOrMaxOrSum(runningStats *[]runningStats, measureResults []utils.CValueEnclosure, i int) error {
   225  	if rr.currStats[i].ValueColRequest == nil {
   226  		return rr.ProcessReduce(runningStats, measureResults[i], i)
   227  	}
   228  
   229  	fields := rr.currStats[i].ValueColRequest.GetFields()
   230  	if len(fields) != 1 {
   231  		return fmt.Errorf("AddEvalResultsForMinOrMaxOrSum: Incorrect number of fields for aggCol: %v", rr.currStats[i].String())
   232  	}
   233  	fieldToValue := make(map[string]utils.CValueEnclosure)
   234  	fieldToValue[fields[0]] = measureResults[i]
   235  	boolResult, err := rr.currStats[i].ValueColRequest.BooleanExpr.Evaluate(fieldToValue)
   236  	if err != nil {
   237  		return fmt.Errorf("AddEvalResultsForMinOrMaxOrSum: there are some errors in the eval function that is inside the min/max function: %v", err)
   238  	}
   239  	if boolResult {
   240  		err := rr.ProcessReduce(runningStats, measureResults[i], i)
   241  		if err != nil {
   242  			return fmt.Errorf("AddEvalResultsForMinOrMaxOrSum: %v", err)
   243  		}
   244  	}
   245  	return nil
   246  }
   247  
   248  func (rr *RunningBucketResults) AddEvalResultsForCount(runningStats *[]runningStats, measureResults []utils.CValueEnclosure, i int, usedByTimechart bool, cnt uint64) (int, error) {
   249  
   250  	if rr.currStats[i].ValueColRequest == nil {
   251  		if usedByTimechart {
   252  			eVal := &utils.CValueEnclosure{
   253  				Dtype: utils.SS_DT_UNSIGNED_NUM,
   254  				CVal:  cnt,
   255  			}
   256  			return 0, rr.ProcessReduce(runningStats, *eVal, i)
   257  		} else {
   258  			return 0, rr.ProcessReduce(runningStats, measureResults[i], i)
   259  		}
   260  	}
   261  
   262  	fields := rr.currStats[i].ValueColRequest.GetFields()
   263  	fieldToValue := make(map[string]utils.CValueEnclosure)
   264  
   265  	index := i
   266  	for _, field := range fields {
   267  		fieldToValue[field] = measureResults[index]
   268  		index++
   269  	}
   270  
   271  	boolResult, err := rr.currStats[i].ValueColRequest.BooleanExpr.Evaluate(fieldToValue)
   272  	if err != nil {
   273  		return 0, fmt.Errorf("AddEvalResultsForCount: there are some errors in the eval function that is inside the count function: %v", err)
   274  	}
   275  	if (*runningStats)[i].rawVal.CVal == nil {
   276  		(*runningStats)[i].rawVal = utils.CValueEnclosure{
   277  			CVal:  int64(0),
   278  			Dtype: utils.SS_DT_SIGNED_NUM,
   279  		}
   280  	}
   281  	if boolResult {
   282  		(*runningStats)[i].rawVal.CVal = (*runningStats)[i].rawVal.CVal.(int64) + 1
   283  	}
   284  
   285  	return len(fields) - 1, nil
   286  }
   287  
   288  func (rr *RunningBucketResults) AddEvalResultsForValuesOrCardinality(runningStats *[]runningStats, measureResults []utils.CValueEnclosure, i int) (int, error) {
   289  	if (*runningStats)[i].rawVal.CVal == nil {
   290  		(*runningStats)[i].rawVal = utils.CValueEnclosure{
   291  			Dtype: utils.SS_DT_STRING_SET,
   292  			CVal:  make(map[string]struct{}, 0),
   293  		}
   294  	}
   295  	strSet := (*runningStats)[i].rawVal.CVal.(map[string]struct{})
   296  
   297  	if rr.currStats[i].ValueColRequest == nil {
   298  		strVal, err := measureResults[i].GetString()
   299  		if err != nil {
   300  			return 0, fmt.Errorf("AddEvalResultsForValuesOrCardinality: failed to add measurement to running stats: %v", err)
   301  		}
   302  		strSet[strVal] = struct{}{}
   303  		(*runningStats)[i].rawVal.CVal = strSet
   304  		return 0, nil
   305  	}
   306  
   307  	fields := rr.currStats[i].ValueColRequest.GetFields()
   308  	fieldToValue := make(map[string]utils.CValueEnclosure)
   309  
   310  	index := i
   311  	for _, field := range fields {
   312  		fieldToValue[field] = measureResults[index]
   313  		index++
   314  	}
   315  
   316  	strVal, err := rr.currStats[i].ValueColRequest.EvaluateToString(fieldToValue)
   317  	if err != nil {
   318  		return 0, fmt.Errorf("AddEvalResultsForValuesOrCardinality: there are some errors in the eval function that is inside the count function: %v", err)
   319  	}
   320  	strSet[strVal] = struct{}{}
   321  	(*runningStats)[i].rawVal.CVal = strSet
   322  
   323  	return len(fields) - 1, nil
   324  }
   325  
   326  func (rr *RunningBucketResults) GetRunningStatsBucketValues() ([]utils.CValueEnclosure, uint64) {
   327  	retVal := make([]utils.CValueEnclosure, len(rr.runningStats))
   328  	for i := 0; i < len(rr.runningStats); i++ {
   329  		retVal[i] = rr.runningStats[i].rawVal
   330  	}
   331  	return retVal, rr.count
   332  }