github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/search/searchaggs.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package search
    18  
    19  import (
    20  	"bytes"
    21  	"errors"
    22  	"fmt"
    23  	"sort"
    24  	"sync"
    25  
    26  	"github.com/axiomhq/hyperloglog"
    27  	"github.com/dustin/go-humanize"
    28  	dtu "github.com/siglens/siglens/pkg/common/dtypeutils"
    29  	"github.com/siglens/siglens/pkg/config"
    30  	"github.com/siglens/siglens/pkg/segment/aggregations"
    31  	"github.com/siglens/siglens/pkg/segment/reader/segread"
    32  	"github.com/siglens/siglens/pkg/segment/results/blockresults"
    33  	"github.com/siglens/siglens/pkg/segment/results/segresults"
    34  	"github.com/siglens/siglens/pkg/segment/structs"
    35  	"github.com/siglens/siglens/pkg/segment/utils"
    36  	"github.com/siglens/siglens/pkg/segment/writer"
    37  	"github.com/siglens/siglens/pkg/segment/writer/stats"
    38  	toputils "github.com/siglens/siglens/pkg/utils"
    39  	bbp "github.com/valyala/bytebufferpool"
    40  
    41  	log "github.com/sirupsen/logrus"
    42  )
    43  
    44  func applyAggregationsToResult(aggs *structs.QueryAggregators, segmentSearchRecords *SegmentSearchStatus,
    45  	searchReq *structs.SegmentSearchRequest, blockSummaries []*structs.BlockSummary, queryRange *dtu.TimeRange,
    46  	sizeLimit uint64, fileParallelism int64, queryMetrics *structs.QueryProcessingMetrics, qid uint64,
    47  	allSearchResults *segresults.SearchResults) error {
    48  	var blkWG sync.WaitGroup
    49  	allBlocksChan := make(chan *BlockSearchStatus, fileParallelism)
    50  	aggCols, _, _ := GetAggColsAndTimestamp(aggs)
    51  	sharedReader, err := segread.InitSharedMultiColumnReaders(searchReq.SegmentKey, aggCols, searchReq.AllBlocksToSearch,
    52  		blockSummaries, int(fileParallelism), qid)
    53  	if err != nil {
    54  		log.Errorf("applyAggregationsToResult: failed to load all column files reader for %s. Needed cols %+v. Err: %+v",
    55  			searchReq.SegmentKey, aggCols, err)
    56  		if sharedReader != nil {
    57  			sharedReader.Close()
    58  		}
    59  		return err
    60  	}
    61  	defer sharedReader.Close()
    62  
    63  	usedByTimechart := aggs.UsedByTimechart()
    64  	if (aggs != nil && aggs.GroupByRequest != nil) || usedByTimechart {
    65  		cname, ok := checkIfGrpColsPresent(aggs.GroupByRequest, sharedReader.MultiColReaders[0],
    66  			allSearchResults)
    67  		if !ok && !usedByTimechart {
    68  			log.Errorf("qid=%v, applyAggregationsToResult: cname: %v was not present", qid, cname)
    69  			return fmt.Errorf("qid=%v, applyAggregationsToResult: cname: %v was not present", qid,
    70  				cname)
    71  		}
    72  	}
    73  
    74  	rupReader, err := segread.InitNewRollupReader(searchReq.SegmentKey, config.GetTimeStampKey(), qid)
    75  	if err != nil {
    76  		log.Errorf("qid=%d, applyAggregationsToResult: failed initialize rollup reader segkey %s. Error: %v",
    77  			qid, searchReq.SegmentKey, err)
    78  	} else {
    79  		defer rupReader.Close()
    80  	}
    81  	allBlocksToXRollup, aggsHasTimeHt, aggsHasNonTimeHt := getRollupForAggregation(aggs, rupReader)
    82  	for i := int64(0); i < fileParallelism; i++ {
    83  		blkWG.Add(1)
    84  		go applyAggregationsToSingleBlock(sharedReader.MultiColReaders[i], aggs, allSearchResults, allBlocksChan,
    85  			searchReq, queryRange, sizeLimit, &blkWG, queryMetrics, qid, blockSummaries, aggsHasTimeHt,
    86  			aggsHasNonTimeHt, allBlocksToXRollup)
    87  	}
    88  	absKeys := make([]uint16, 0, len(segmentSearchRecords.AllBlockStatus))
    89  	for k := range segmentSearchRecords.AllBlockStatus {
    90  		absKeys = append(absKeys, k)
    91  	}
    92  	if aggs != nil && aggs.Sort != nil {
    93  		if aggs.Sort.Ascending {
    94  			sort.Slice(absKeys, func(i, j int) bool { return absKeys[i] < absKeys[j] })
    95  		} else {
    96  			sort.Slice(absKeys, func(i, j int) bool { return absKeys[i] > absKeys[j] })
    97  		}
    98  	}
    99  	for _, k := range absKeys {
   100  		blkResults := segmentSearchRecords.AllBlockStatus[k]
   101  		if blkResults.hasAnyMatched {
   102  			allBlocksChan <- blkResults
   103  		}
   104  	}
   105  	close(allBlocksChan)
   106  	blkWG.Wait()
   107  	return nil
   108  }
   109  
   110  func applyAggregationsToSingleBlock(multiReader *segread.MultiColSegmentReader, aggs *structs.QueryAggregators,
   111  	allSearchResults *segresults.SearchResults, blockChan chan *BlockSearchStatus, searchReq *structs.SegmentSearchRequest,
   112  	queryRange *dtu.TimeRange, sizeLimit uint64, wg *sync.WaitGroup, queryMetrics *structs.QueryProcessingMetrics,
   113  	qid uint64, blockSummaries []*structs.BlockSummary, aggsHasTimeHt bool, aggsHasNonTimeHt bool,
   114  	allBlocksToXRollup map[uint16]map[uint64]*writer.RolledRecs) {
   115  
   116  	blkResults, err := blockresults.InitBlockResults(sizeLimit, aggs, qid)
   117  	if err != nil {
   118  		log.Errorf("applyAggregationsToSingleBlock: failed to initialize block results reader for %s. Err: %v", searchReq.SegmentKey, err)
   119  		allSearchResults.AddError(err)
   120  	}
   121  	defer wg.Done()
   122  
   123  	for blockStatus := range blockChan {
   124  		if !blockStatus.hasAnyMatched {
   125  			continue
   126  		}
   127  		recIT, err := blockStatus.GetRecordIteratorCopyForBlock(utils.And)
   128  		if err != nil {
   129  			log.Errorf("qid=%d, applyAggregationsToSingleBlock: failed to initialize record iterator for block %+v. Err: %v",
   130  				qid, blockStatus.BlockNum, err)
   131  			continue
   132  		}
   133  
   134  		var toXRollup map[uint64]*writer.RolledRecs = nil
   135  		if allBlocksToXRollup != nil {
   136  			toXRollup = allBlocksToXRollup[blockStatus.BlockNum]
   137  		}
   138  
   139  		isBlkFullyEncosed := queryRange.AreTimesFullyEnclosed(blockSummaries[blockStatus.BlockNum].LowTs,
   140  			blockSummaries[blockStatus.BlockNum].HighTs)
   141  
   142  		var addedTimeHt = false
   143  		if aggs != nil && aggs.TimeHistogram != nil && aggs.TimeHistogram.Timechart == nil && aggsHasTimeHt && isBlkFullyEncosed &&
   144  			toXRollup != nil {
   145  			for rupTskey, rr := range toXRollup {
   146  				rr.MatchedRes.InPlaceIntersection(recIT.AllRecords)
   147  				matchedRrCount := uint16(rr.MatchedRes.GetNumberOfSetBits())
   148  				blkResults.AddKeyToTimeBucket(rupTskey, matchedRrCount)
   149  			}
   150  			addedTimeHt = true
   151  		}
   152  
   153  		if blkResults.ShouldIterateRecords(aggsHasTimeHt, isBlkFullyEncosed,
   154  			blockSummaries[blockStatus.BlockNum].LowTs,
   155  			blockSummaries[blockStatus.BlockNum].HighTs, addedTimeHt) {
   156  			iterRecsAddRrc(recIT, multiReader, blockStatus, queryRange, aggs, aggsHasTimeHt,
   157  				addedTimeHt, blkResults, queryMetrics, allSearchResults, searchReq, qid)
   158  		} else {
   159  			// we did not iterate the records so now we need to just update the counts, so that early-exit
   160  			// as well as hit.total has somewhat accurate value
   161  			rrMc := uint64(recIT.AllRecords.GetNumberOfSetBits())
   162  			if rrMc > 0 {
   163  				blkResults.AddMatchedCount(rrMc)
   164  				queryMetrics.IncrementNumBlocksWithMatch(1)
   165  			}
   166  		}
   167  		doAggs(aggs, multiReader, blockStatus, recIT, blkResults, isBlkFullyEncosed, qid)
   168  	}
   169  	allSearchResults.AddBlockResults(blkResults)
   170  }
   171  
   172  func addRecordToAggregations(grpReq *structs.GroupByRequest, timeHistogram *structs.TimeBucket, measureInfo map[string][]int, numMFuncs int, multiColReader *segread.MultiColSegmentReader,
   173  	blockNum uint16, recIT *BlockRecordIterator, blockRes *blockresults.BlockResults, qid uint64) {
   174  	measureResults := make([]utils.CValueEnclosure, numMFuncs)
   175  	usedByTimechart := (timeHistogram != nil && timeHistogram.Timechart != nil)
   176  	hasLimitOption := false
   177  	groupByColValCnt := make(map[string]int, 0)
   178  	var timeRangeBuckets []uint64
   179  	if usedByTimechart {
   180  		timeRangeBuckets = aggregations.GenerateTimeRangeBuckets(timeHistogram)
   181  		hasLimitOption = timeHistogram.Timechart.LimitExpr != nil
   182  	}
   183  	for recNum := uint16(0); recNum < recIT.AllRecLen; recNum++ {
   184  		if !recIT.ShouldProcessRecord(uint(recNum)) {
   185  			continue
   186  		}
   187  
   188  		var currKey bytes.Buffer
   189  		groupByColVal := ""
   190  
   191  		if usedByTimechart {
   192  			// Find out timePoint for current row
   193  			ts, err := multiColReader.GetTimeStampForRecord(blockNum, recNum, qid)
   194  			if err != nil {
   195  				log.Errorf("addRecordToAggregations: Failed to extract value from timestamp: %v", err)
   196  				continue
   197  			}
   198  			if ts < timeHistogram.StartTime || ts > timeHistogram.EndTime {
   199  				continue
   200  			}
   201  			timePoint := aggregations.FindTimeRangeBucket(timeRangeBuckets, ts, timeHistogram.IntervalMillis)
   202  
   203  			retVal := make([]byte, 9)
   204  			copy(retVal[0:], utils.VALTYPE_ENC_UINT64[:])
   205  			copy(retVal[1:], toputils.Uint64ToBytesLittleEndian(timePoint))
   206  			currKey.Write(retVal)
   207  
   208  			// Get timechart's group by col val, each different val will be a bucket inside each time range bucket
   209  			byField := timeHistogram.Timechart.ByField
   210  			if len(byField) > 0 {
   211  				rawVal, err := multiColReader.ReadRawRecordFromColumnFile(byField, blockNum, recNum, qid)
   212  				if err != nil {
   213  					log.Errorf("addRecordToAggregations: Failed to get key for column %v: %v", byField, err)
   214  				} else {
   215  					strs, err := utils.ConvertGroupByKey(rawVal)
   216  					if err != nil {
   217  						log.Errorf("addRecordToAggregations: failed to extract raw key: %v", err)
   218  					}
   219  					if len(strs) == 1 {
   220  						groupByColVal = strs[0]
   221  					} else {
   222  						log.Errorf("addRecordToAggregations: invalid length of groupByColVal")
   223  					}
   224  				}
   225  				if hasLimitOption {
   226  					cnt, exists := groupByColValCnt[groupByColVal]
   227  					if exists {
   228  						groupByColValCnt[groupByColVal] = cnt + 1
   229  					} else {
   230  						groupByColValCnt[groupByColVal] = 1
   231  					}
   232  				}
   233  			}
   234  		} else {
   235  			for _, col := range grpReq.GroupByColumns {
   236  				rawVal, err := multiColReader.ReadRawRecordFromColumnFile(col, blockNum, recNum, qid)
   237  				if err != nil {
   238  					log.Errorf("addRecordToAggregations: Failed to get key for column %v: %v", col, err)
   239  					currKey.Write(utils.VALTYPE_ENC_BACKFILL)
   240  				} else {
   241  					currKey.Write(rawVal)
   242  				}
   243  			}
   244  		}
   245  
   246  		for cName, indices := range measureInfo {
   247  			rawVal, err := multiColReader.ExtractValueFromColumnFile(cName, blockNum, recNum, qid)
   248  			if err != nil {
   249  				log.Errorf("addRecordToAggregations: Failed to extract measure value from column %+v: %v", cName, err)
   250  				rawVal = &utils.CValueEnclosure{Dtype: utils.SS_DT_BACKFILL}
   251  			}
   252  			for _, idx := range indices {
   253  				measureResults[idx] = *rawVal
   254  			}
   255  		}
   256  		blockRes.AddMeasureResultsToKey(currKey, measureResults, groupByColVal, usedByTimechart, qid)
   257  	}
   258  
   259  	if usedByTimechart && len(timeHistogram.Timechart.ByField) > 0 {
   260  		if len(blockRes.GroupByAggregation.GroupByColValCnt) > 0 {
   261  			aggregations.MergeMap(blockRes.GroupByAggregation.GroupByColValCnt, groupByColValCnt)
   262  		} else {
   263  			blockRes.GroupByAggregation.GroupByColValCnt = groupByColValCnt
   264  		}
   265  	}
   266  }
   267  
   268  func PerformAggsOnRecs(nodeResult *structs.NodeResult, aggs *structs.QueryAggregators, recs map[string]map[string]interface{},
   269  	finalCols map[string]bool, numTotalSegments uint64, finishesSegment bool, qid uint64) map[string]bool {
   270  
   271  	if !nodeResult.PerformAggsOnRecs {
   272  		return nil
   273  	}
   274  
   275  	if finishesSegment {
   276  		nodeResult.RecsAggsProcessedSegments++
   277  	}
   278  
   279  	if nodeResult.RecsAggsType == structs.GroupByType {
   280  		return PerformGroupByRequestAggsOnRecs(nodeResult, recs, finalCols, qid, numTotalSegments)
   281  	} else if nodeResult.RecsAggsType == structs.MeasureAggsType {
   282  		return PerformMeasureAggsOnRecs(nodeResult, recs, finalCols, qid, numTotalSegments)
   283  	}
   284  
   285  	return nil
   286  }
   287  
   288  func PerformGroupByRequestAggsOnRecs(nodeResult *structs.NodeResult, recs map[string]map[string]interface{}, finalCols map[string]bool, qid uint64, numTotalSegments uint64) map[string]bool {
   289  
   290  	nodeResult.GroupByRequest.BucketCount = 3000
   291  
   292  	blockRes, err := blockresults.InitBlockResults(uint64(len(recs)), &structs.QueryAggregators{GroupByRequest: nodeResult.GroupByRequest}, qid)
   293  	if err != nil {
   294  		log.Errorf("PerformGroupByRequestAggsOnRecs: failed to initialize block results reader. Err: %v", err)
   295  		return nil
   296  	}
   297  
   298  	measureInfo, internalMops := blockRes.GetConvertedMeasureInfo()
   299  
   300  	if nodeResult.GroupByRequest != nil && nodeResult.GroupByRequest.MeasureOperations != nil {
   301  		for _, mOp := range nodeResult.GroupByRequest.MeasureOperations {
   302  			if mOp.MeasureFunc == utils.Count {
   303  				internalMops = append(internalMops, mOp)
   304  			}
   305  		}
   306  	}
   307  
   308  	measureResults := make([]utils.CValueEnclosure, len(internalMops))
   309  
   310  	columnKeys := make(map[string][]interface{})
   311  
   312  	finalRecInden := make(map[string]string)
   313  
   314  	for recInden, record := range recs {
   315  		colKeyValues := make([]interface{}, 0)
   316  		byteKey := make([]byte, 0) // bucket Key
   317  		for idx, colName := range nodeResult.GroupByCols {
   318  			value, exists := record[colName]
   319  			if !exists {
   320  				value = ""
   321  			}
   322  			if idx > 0 {
   323  				byteKey = append(byteKey, '_')
   324  			}
   325  			byteKey = append(byteKey, []byte(fmt.Sprintf("%v", value))...)
   326  			colKeyValues = append(colKeyValues, value)
   327  		}
   328  
   329  		var currKey bytes.Buffer
   330  		currKey.Write(byteKey)
   331  
   332  		keyStr := toputils.UnsafeByteSliceToString(currKey.Bytes())
   333  
   334  		if _, exists := columnKeys[keyStr]; !exists {
   335  			columnKeys[keyStr] = colKeyValues
   336  			finalRecInden[keyStr] = recInden
   337  		}
   338  
   339  		for cname, indices := range measureInfo {
   340  			var cVal utils.CValueEnclosure
   341  			value, exists := record[cname]
   342  			if !exists {
   343  				log.Errorf("qid=%d, PerformGroupByRequestAggsOnRecs: failed to find column %s in record", qid, cname)
   344  				cVal = utils.CValueEnclosure{Dtype: utils.SS_DT_BACKFILL}
   345  			} else {
   346  				dval, err := utils.CreateDtypeEnclosure(value, qid)
   347  				if dval.Dtype == utils.SS_DT_STRING {
   348  					floatFieldVal, _ := dtu.ConvertToFloat(value, 64)
   349  					if err == nil {
   350  						value = floatFieldVal
   351  						dval.Dtype = utils.SS_DT_FLOAT
   352  					}
   353  				}
   354  
   355  				if err != nil {
   356  					log.Errorf("qid=%d, PerformGroupByRequestAggsOnRecs: failed to create Dtype Value from rec: %v", qid, err)
   357  					cVal = utils.CValueEnclosure{Dtype: utils.SS_DT_BACKFILL}
   358  				} else {
   359  					cVal = utils.CValueEnclosure{Dtype: dval.Dtype, CVal: value}
   360  				}
   361  			}
   362  
   363  			for _, idx := range indices {
   364  				measureResults[idx] = cVal
   365  			}
   366  		}
   367  
   368  		blockRes.AddMeasureResultsToKey(currKey, measureResults, "", false, qid)
   369  	}
   370  
   371  	if nodeResult.RecsAggsBlockResults == nil {
   372  		nodeResult.RecsAggsBlockResults = blockRes
   373  	} else {
   374  		recAggsBlockresults := nodeResult.RecsAggsBlockResults.(*blockresults.BlockResults)
   375  		recAggsBlockresults.MergeBuckets(blockRes)
   376  	}
   377  
   378  	if nodeResult.RecsAggsProcessedSegments < numTotalSegments {
   379  		for k := range recs {
   380  			delete(recs, k)
   381  		}
   382  		return nil
   383  	} else {
   384  		blockRes = nodeResult.RecsAggsBlockResults.(*blockresults.BlockResults)
   385  	}
   386  
   387  	for k := range finalCols {
   388  		delete(finalCols, k)
   389  	}
   390  
   391  	validRecIndens := make(map[string]bool)
   392  
   393  	for bKey, index := range blockRes.GroupByAggregation.StringBucketIdx {
   394  		recInden, exists := finalRecInden[bKey]
   395  		if !exists {
   396  			continue
   397  		}
   398  		validRecIndens[recInden] = true
   399  		bucketValues, bucketCount := blockRes.GroupByAggregation.AllRunningBuckets[index].GetRunningStatsBucketValues()
   400  
   401  		for idx, colName := range nodeResult.GroupByCols {
   402  			if index == 0 {
   403  				finalCols[colName] = true
   404  			}
   405  			recs[recInden][colName] = columnKeys[bKey][idx]
   406  		}
   407  
   408  		for i, mOp := range internalMops {
   409  			if index == 0 {
   410  				finalCols[mOp.String()] = true
   411  			}
   412  
   413  			if mOp.MeasureFunc == utils.Count {
   414  				recs[recInden][mOp.String()] = bucketCount
   415  			} else {
   416  				if mOp.OverrodeMeasureAgg != nil && mOp.OverrodeMeasureAgg.MeasureFunc == utils.Avg {
   417  					floatVal, err := dtu.ConvertToFloat(bucketValues[i].CVal, 64)
   418  					if err != nil {
   419  						log.Errorf("PerformGroupByRequestAggsOnRecs: failed to convert to float: %v", err)
   420  						continue
   421  					}
   422  					recs[recInden][mOp.OverrodeMeasureAgg.String()] = (floatVal / float64(bucketCount))
   423  					finalCols[mOp.OverrodeMeasureAgg.String()] = true
   424  					if mOp.OverrodeMeasureAgg.String() != mOp.String() {
   425  						delete(finalCols, mOp.String())
   426  					}
   427  				} else {
   428  					recs[recInden][mOp.String()] = bucketValues[i].CVal
   429  				}
   430  			}
   431  		}
   432  	}
   433  
   434  	for k := range recs {
   435  		if _, exists := validRecIndens[k]; !exists {
   436  			delete(recs, k)
   437  		}
   438  	}
   439  
   440  	return map[string]bool{"CHECK_NEXT_AGG": true}
   441  }
   442  
   443  func PerformMeasureAggsOnRecs(nodeResult *structs.NodeResult, recs map[string]map[string]interface{}, finalCols map[string]bool, qid uint64, numTotalSegments uint64) map[string]bool {
   444  
   445  	searchResults, err := segresults.InitSearchResults(uint64(len(recs)), &structs.QueryAggregators{MeasureOperations: nodeResult.MeasureOperations}, structs.SegmentStatsCmd, qid)
   446  	if err != nil {
   447  		log.Errorf("PerformMeasureAggsOnRecs: failed to initialize search results. Err: %v", err)
   448  		return nil
   449  	}
   450  
   451  	searchResults.InitSegmentStatsResults(nodeResult.MeasureOperations)
   452  
   453  	anyCountStat := -1
   454  	lenRecords := len(recs)
   455  
   456  	for idx, mOp := range nodeResult.MeasureOperations {
   457  		if mOp.String() == "count(*)" {
   458  			anyCountStat = idx
   459  			break
   460  		}
   461  	}
   462  
   463  	firstRecInden := ""
   464  
   465  	for recInden := range recs {
   466  		firstRecInden = recInden
   467  		break
   468  	}
   469  
   470  	for recInden, record := range recs {
   471  		sstMap := make(map[string]*structs.SegStats, 0)
   472  
   473  		for _, mOp := range nodeResult.MeasureOperations {
   474  			dtypeVal, err := utils.CreateDtypeEnclosure(record[mOp.MeasureCol], qid)
   475  			if err != nil {
   476  				log.Errorf("PerformMeasureAggsOnRecs: failed to create Dtype Value from rec: %v", err)
   477  				continue
   478  			}
   479  
   480  			if !dtypeVal.IsNumeric() {
   481  				floatVal, err := dtu.ConvertToFloat(record[mOp.MeasureCol], 64)
   482  				if err != nil {
   483  					log.Errorf("PerformMeasureAggsOnRecs: failed to convert to float: %v", err)
   484  					continue
   485  				}
   486  				dtypeVal = &utils.DtypeEnclosure{Dtype: utils.SS_DT_FLOAT, FloatVal: floatVal}
   487  			}
   488  
   489  			nTypeEnclosure := &utils.NumTypeEnclosure{
   490  				Ntype:    dtypeVal.Dtype,
   491  				IntgrVal: int64(dtypeVal.FloatVal),
   492  				FloatVal: dtypeVal.FloatVal,
   493  			}
   494  
   495  			sstMap[mOp.MeasureCol] = &structs.SegStats{
   496  				IsNumeric:   dtypeVal.IsNumeric(),
   497  				Count:       1,
   498  				Hll:         nil,
   499  				NumStats:    &structs.NumericStats{Min: *nTypeEnclosure, Max: *nTypeEnclosure, Sum: *nTypeEnclosure, Dtype: dtypeVal.Dtype},
   500  				StringStats: nil,
   501  				Records:     nil,
   502  			}
   503  
   504  		}
   505  
   506  		err := searchResults.UpdateSegmentStats(sstMap, nodeResult.MeasureOperations, nil)
   507  		if err != nil {
   508  			log.Errorf("PerformMeasureAggsOnRecs: failed to update segment stats: %v", err)
   509  		}
   510  
   511  		delete(recs, recInden)
   512  	}
   513  
   514  	if nodeResult.RecsRunningSegStats == nil {
   515  		nodeResult.RecsRunningSegStats = searchResults.GetSegmentRunningStats()
   516  	} else {
   517  		sstMap := make(map[string]*structs.SegStats, 0)
   518  
   519  		for idx, mOp := range nodeResult.MeasureOperations {
   520  			sstMap[mOp.MeasureCol] = nodeResult.RecsRunningSegStats[idx]
   521  		}
   522  
   523  		err := searchResults.UpdateSegmentStats(sstMap, nodeResult.MeasureOperations, nil)
   524  		if err != nil {
   525  			log.Errorf("PerformMeasureAggsOnRecs: failed to update segment stats: %v", err)
   526  		}
   527  
   528  		nodeResult.RecsRunningSegStats = searchResults.GetSegmentRunningStats()
   529  	}
   530  
   531  	if anyCountStat > -1 {
   532  		nodeResult.TotalRRCCount += uint64(lenRecords)
   533  	}
   534  
   535  	if nodeResult.RecsAggsProcessedSegments < numTotalSegments {
   536  		return nil
   537  	} else {
   538  		for k := range finalCols {
   539  			delete(finalCols, k)
   540  		}
   541  
   542  		finalSegment := make(map[string]interface{}, 0)
   543  
   544  		if anyCountStat > -1 {
   545  			finalCols[nodeResult.MeasureOperations[anyCountStat].String()] = true
   546  			finalSegment[nodeResult.MeasureOperations[anyCountStat].String()] = humanize.Comma(int64(nodeResult.TotalRRCCount))
   547  		}
   548  
   549  		for colName, value := range searchResults.GetSegmentStatsMeasureResults() {
   550  			finalCols[colName] = true
   551  			if value.Dtype == utils.SS_DT_FLOAT {
   552  				value.CVal = humanize.CommafWithDigits(value.CVal.(float64), 3)
   553  			} else {
   554  				value.CVal = humanize.Comma(value.CVal.(int64))
   555  			}
   556  			finalSegment[colName] = value.CVal
   557  		}
   558  
   559  		recs[firstRecInden] = finalSegment
   560  	}
   561  
   562  	return map[string]bool{"CHECK_NEXT_AGG": true}
   563  }
   564  
   565  // returns all columns in aggs and the timestamp column
   566  func GetAggColsAndTimestamp(aggs *structs.QueryAggregators) (map[string]bool, map[string]utils.AggColUsageMode, map[string]bool) {
   567  	aggCols := make(map[string]bool)
   568  	timestampKey := config.GetTimeStampKey()
   569  	aggCols[timestampKey] = true
   570  	if aggs == nil {
   571  		return aggCols, nil, nil
   572  	}
   573  
   574  	// Determine if current col used by eval statements
   575  	aggColUsage := make(map[string]utils.AggColUsageMode)
   576  	// Determine if current col used by agg values() func
   577  	valuesUsage := make(map[string]bool)
   578  	if aggs.Sort != nil {
   579  		aggCols[aggs.Sort.ColName] = true
   580  	}
   581  	if aggs.GroupByRequest != nil {
   582  		for _, cName := range aggs.GroupByRequest.GroupByColumns {
   583  			aggCols[cName] = true
   584  		}
   585  		for _, mOp := range aggs.GroupByRequest.MeasureOperations {
   586  			aggregations.DetermineAggColUsage(mOp, aggCols, aggColUsage, valuesUsage)
   587  		}
   588  	}
   589  	if aggs.TimeHistogram != nil && aggs.TimeHistogram.Timechart != nil && len(aggs.TimeHistogram.Timechart.ByField) > 0 {
   590  		aggCols[aggs.TimeHistogram.Timechart.ByField] = true
   591  	}
   592  	return aggCols, aggColUsage, valuesUsage
   593  }
   594  
   595  func applyAggregationsToResultFastPath(aggs *structs.QueryAggregators, segmentSearchRecords *SegmentSearchStatus,
   596  	searchReq *structs.SegmentSearchRequest, blockSummaries []*structs.BlockSummary, queryRange *dtu.TimeRange,
   597  	sizeLimit uint64, fileParallelism int64, queryMetrics *structs.QueryProcessingMetrics,
   598  	qid uint64, allSearchResults *segresults.SearchResults) error {
   599  
   600  	var blkWG sync.WaitGroup
   601  	allBlocksChan := make(chan *BlockSearchStatus, fileParallelism)
   602  
   603  	rupReader, err := segread.InitNewRollupReader(searchReq.SegmentKey, config.GetTimeStampKey(), qid)
   604  	if err != nil {
   605  		log.Errorf("qid=%d, applyAggregationsToResultFastPath: failed initialize rollup reader segkey %s. Error: %v",
   606  			qid, searchReq.SegmentKey, err)
   607  	} else {
   608  		defer rupReader.Close()
   609  	}
   610  
   611  	// we just call this func so that we load up the correct rollup files for the specified ht interval
   612  	allBlocksToXRollup, _, _ := getRollupForAggregation(aggs, rupReader)
   613  	for i := int64(0); i < fileParallelism; i++ {
   614  		blkWG.Add(1)
   615  		go applyAggregationsToSingleBlockFastPath(aggs, allSearchResults, allBlocksChan,
   616  			searchReq, queryRange, sizeLimit, &blkWG, queryMetrics, qid, blockSummaries,
   617  			allBlocksToXRollup)
   618  	}
   619  
   620  	for _, blkResults := range segmentSearchRecords.AllBlockStatus {
   621  		allBlocksChan <- blkResults
   622  	}
   623  	close(allBlocksChan)
   624  	blkWG.Wait()
   625  	return nil
   626  }
   627  
   628  func applyAggregationsToSingleBlockFastPath(aggs *structs.QueryAggregators,
   629  	allSearchResults *segresults.SearchResults, blockChan chan *BlockSearchStatus, searchReq *structs.SegmentSearchRequest,
   630  	queryRange *dtu.TimeRange, sizeLimit uint64, wg *sync.WaitGroup, queryMetrics *structs.QueryProcessingMetrics,
   631  	qid uint64, blockSummaries []*structs.BlockSummary,
   632  	allBlocksToXRollup map[uint16]map[uint64]*writer.RolledRecs) {
   633  
   634  	blkResults, err := blockresults.InitBlockResults(sizeLimit, aggs, qid)
   635  	if err != nil {
   636  		log.Errorf("applyAggregationsToSingleBlockFastPath: failed to initialize block results reader for %s. Err: %v", searchReq.SegmentKey, err)
   637  		allSearchResults.AddError(err)
   638  	}
   639  
   640  	defer wg.Done()
   641  
   642  	for blockStatus := range blockChan {
   643  
   644  		var toXRollup map[uint64]*writer.RolledRecs = nil
   645  		if allBlocksToXRollup != nil {
   646  			toXRollup = allBlocksToXRollup[blockStatus.BlockNum]
   647  		}
   648  
   649  		for rupTskey, rr := range toXRollup {
   650  			matchedRrCount := uint16(rr.MatchedRes.GetNumberOfSetBits())
   651  			blkResults.AddKeyToTimeBucket(rupTskey, matchedRrCount)
   652  		}
   653  
   654  		blkResults.AddMatchedCount(uint64(blockStatus.numRecords))
   655  		queryMetrics.IncrementNumBlocksWithMatch(1)
   656  	}
   657  	allSearchResults.AddBlockResults(blkResults)
   658  }
   659  
   660  func applySegStatsToMatchedRecords(ops []*structs.MeasureAggregator, segmentSearchRecords *SegmentSearchStatus,
   661  	searchReq *structs.SegmentSearchRequest, blockSummaries []*structs.BlockSummary, queryRange *dtu.TimeRange,
   662  	fileParallelism int64, queryMetrics *structs.QueryProcessingMetrics, qid uint64) (map[string]*structs.SegStats, error) {
   663  
   664  	var blkWG sync.WaitGroup
   665  	allBlocksChan := make(chan *BlockSearchStatus, fileParallelism)
   666  
   667  	measureColAndTS, aggColUsage, valuesUsage := getSegStatsMeasureCols(ops)
   668  	sharedReader, err := segread.InitSharedMultiColumnReaders(searchReq.SegmentKey, measureColAndTS, searchReq.AllBlocksToSearch,
   669  		blockSummaries, int(fileParallelism), qid)
   670  	if err != nil {
   671  		log.Errorf("applyAggregationsToResult: failed to load all column files reader for %s. Needed cols %+v. Err: %+v",
   672  			searchReq.SegmentKey, measureColAndTS, err)
   673  		return nil, errors.New("failed to init sharedmulticolreader")
   674  	}
   675  	defer sharedReader.Close()
   676  
   677  	statRes := segresults.InitStatsResults()
   678  	delete(measureColAndTS, config.GetTimeStampKey())
   679  	for i := int64(0); i < fileParallelism; i++ {
   680  		blkWG.Add(1)
   681  		go segmentStatsWorker(statRes, measureColAndTS, aggColUsage, valuesUsage, sharedReader.MultiColReaders[i], allBlocksChan,
   682  			searchReq, blockSummaries, queryRange, &blkWG, queryMetrics, qid)
   683  	}
   684  
   685  	absKeys := make([]uint16, 0, len(segmentSearchRecords.AllBlockStatus))
   686  	for k := range segmentSearchRecords.AllBlockStatus {
   687  		absKeys = append(absKeys, k)
   688  	}
   689  	for _, k := range absKeys {
   690  		blkResults := segmentSearchRecords.AllBlockStatus[k]
   691  		if blkResults.hasAnyMatched {
   692  			allBlocksChan <- blkResults
   693  		}
   694  	}
   695  	close(allBlocksChan)
   696  	blkWG.Wait()
   697  
   698  	return statRes.GetSegStats(), nil
   699  }
   700  
   701  // returns all columns (+timestamp) in the measure operations
   702  func getSegStatsMeasureCols(ops []*structs.MeasureAggregator) (map[string]bool, map[string]utils.AggColUsageMode, map[string]bool) {
   703  	// Determine if current col used by eval statements
   704  	aggColUsage := make(map[string]utils.AggColUsageMode)
   705  	// Determine if current col used by agg values() func
   706  	valuesUsage := make(map[string]bool)
   707  	aggCols := make(map[string]bool)
   708  	timestampKey := config.GetTimeStampKey()
   709  	aggCols[timestampKey] = true
   710  	for _, op := range ops {
   711  		aggregations.DetermineAggColUsage(op, aggCols, aggColUsage, valuesUsage)
   712  	}
   713  	return aggCols, aggColUsage, valuesUsage
   714  }
   715  
   716  func segmentStatsWorker(statRes *segresults.StatsResults, mCols map[string]bool, aggColUsage map[string]utils.AggColUsageMode, valuesUsage map[string]bool,
   717  	multiReader *segread.MultiColSegmentReader, blockChan chan *BlockSearchStatus, searchReq *structs.SegmentSearchRequest, blockSummaries []*structs.BlockSummary,
   718  	queryRange *dtu.TimeRange, wg *sync.WaitGroup, queryMetrics *structs.QueryProcessingMetrics, qid uint64) {
   719  
   720  	defer wg.Done()
   721  	bb := bbp.Get()
   722  	defer bbp.Put(bb)
   723  
   724  	localStats := make(map[string]*structs.SegStats)
   725  	for blockStatus := range blockChan {
   726  		isBlkFullyEncosed := queryRange.AreTimesFullyEnclosed(blockSummaries[blockStatus.BlockNum].LowTs,
   727  			blockSummaries[blockStatus.BlockNum].HighTs)
   728  		recIT, err := blockStatus.GetRecordIteratorForBlock(utils.And)
   729  		if err != nil {
   730  			log.Errorf("qid=%d, segmentStatsWorker: failed to initialize record iterator for block %+v. Err: %v",
   731  				qid, blockStatus.BlockNum, err)
   732  			continue
   733  		}
   734  
   735  		sortedMatchedRecs := make([]uint16, recIT.AllRecLen)
   736  		idx := 0
   737  		for i := uint(0); i < uint(recIT.AllRecLen); i++ {
   738  			if !recIT.ShouldProcessRecord(i) {
   739  				continue
   740  			}
   741  			recNum16 := uint16(i)
   742  			if !isBlkFullyEncosed {
   743  				recTs, err := multiReader.GetTimeStampForRecord(blockStatus.BlockNum, recNum16, qid)
   744  				if err != nil {
   745  					log.Errorf("qid=%d, segmentStatsWorker failed to initialize time reader for block %+v. Err: %v", qid,
   746  						blockStatus.BlockNum, err)
   747  					continue
   748  				}
   749  				if !queryRange.CheckInRange(recTs) {
   750  					continue
   751  				}
   752  			}
   753  			sortedMatchedRecs[idx] = uint16(i)
   754  			idx++
   755  		}
   756  		sortedMatchedRecs = sortedMatchedRecs[:idx]
   757  		nonDeCols := applySegmentStatsUsingDictEncoding(multiReader, sortedMatchedRecs, mCols, aggColUsage, valuesUsage, blockStatus.BlockNum, recIT, localStats, bb, qid)
   758  		for _, recNum := range sortedMatchedRecs {
   759  			for colName := range nonDeCols {
   760  				val, err := multiReader.ExtractValueFromColumnFile(colName, blockStatus.BlockNum, recNum, qid)
   761  				if err != nil {
   762  					log.Errorf("qid=%d, segmentStatsWorker failed to extract value for column %+v. Err: %v", qid, colName, err)
   763  					continue
   764  				}
   765  
   766  				hasValuesFunc, exists := valuesUsage[colName]
   767  				if !exists {
   768  					hasValuesFunc = false
   769  				}
   770  
   771  				if val.Dtype == utils.SS_DT_STRING {
   772  					str, err := val.GetString()
   773  					if err != nil {
   774  						log.Errorf("qid=%d, segmentStatsWorker failed to extract value for string although type check passed %+v. Err: %v", qid, colName, err)
   775  						continue
   776  					}
   777  					stats.AddSegStatsStr(localStats, colName, str, bb, aggColUsage, hasValuesFunc)
   778  				} else {
   779  					fVal, err := val.GetFloatValue()
   780  					if err != nil {
   781  						log.Errorf("qid=%d, segmentStatsWorker failed to extract numerical value for type %+v. Err: %v", qid, val.Dtype, err)
   782  						continue
   783  					}
   784  					stats.AddSegStatsNums(localStats, colName, utils.SS_FLOAT64, 0, 0, fVal, fmt.Sprintf("%v", fVal), bb, aggColUsage, hasValuesFunc)
   785  				}
   786  			}
   787  		}
   788  	}
   789  	statRes.MergeSegStats(localStats)
   790  }
   791  
   792  // returns all columns that are not dict encoded
   793  func applySegmentStatsUsingDictEncoding(mcr *segread.MultiColSegmentReader, filterdRecNums []uint16, mCols map[string]bool, aggColUsage map[string]utils.AggColUsageMode, valuesUsage map[string]bool,
   794  	blockNum uint16, bri *BlockRecordIterator, lStats map[string]*structs.SegStats, bb *bbp.ByteBuffer, qid uint64) map[string]bool {
   795  	retVal := make(map[string]bool)
   796  	for colName := range mCols {
   797  		if colName == "*" {
   798  			stats.AddSegStatsCount(lStats, colName, uint64(len(filterdRecNums)))
   799  			continue
   800  		}
   801  		isDict, err := mcr.IsBlkDictEncoded(colName, blockNum)
   802  		if err != nil {
   803  			log.Errorf("qid=%d, segmentStatsWorker failed to check if column is dict encoded %+v. Err: %v", qid, colName, err)
   804  			continue
   805  		}
   806  		if !isDict {
   807  			retVal[colName] = true
   808  			continue
   809  		}
   810  		results := make(map[uint16]map[string]interface{})
   811  		ok := mcr.GetDictEncCvalsFromColFile(results, colName, blockNum, filterdRecNums, qid)
   812  		if !ok {
   813  			log.Errorf("qid=%d, segmentStatsWorker failed to get dict cvals for col %s", qid, colName)
   814  			continue
   815  		}
   816  		for _, cMap := range results {
   817  			for colName, rawVal := range cMap {
   818  				colUsage, exists := aggColUsage[colName]
   819  				if !exists {
   820  					colUsage = utils.NoEvalUsage
   821  				}
   822  				// If current col will be used by eval funcs, we should store the raw data and process it
   823  				if colUsage == utils.WithEvalUsage || colUsage == utils.BothUsage {
   824  					e := utils.CValueEnclosure{}
   825  					err := e.ConvertValue(rawVal)
   826  					if err != nil {
   827  						log.Errorf("applySegmentStatsUsingDictEncoding: %v", err)
   828  						continue
   829  					}
   830  
   831  					if e.Dtype != utils.SS_DT_STRING {
   832  						retVal[colName] = true
   833  						continue
   834  					}
   835  
   836  					var stats *structs.SegStats
   837  					var ok bool
   838  					stats, ok = lStats[colName]
   839  					if !ok {
   840  						stats = &structs.SegStats{
   841  							IsNumeric: false,
   842  							Count:     0,
   843  							Hll:       hyperloglog.New16(),
   844  							Records:   make([]*utils.CValueEnclosure, 0),
   845  						}
   846  
   847  						lStats[colName] = stats
   848  					}
   849  					stats.Records = append(stats.Records, &e)
   850  
   851  					// Current col only used by eval statements
   852  					if colUsage == utils.WithEvalUsage {
   853  						continue
   854  					}
   855  				}
   856  
   857  				hasValuesFunc, exists := valuesUsage[colName]
   858  				if !exists {
   859  					hasValuesFunc = false
   860  				}
   861  
   862  				switch val := rawVal.(type) {
   863  				case string:
   864  					stats.AddSegStatsStr(lStats, colName, val, bb, aggColUsage, hasValuesFunc)
   865  				default:
   866  					// This should never occur as dict encoding is only supported for string fields.
   867  					log.Errorf("qid=%d, segmentStatsWorker found a non string in a dict encoded segment. CName %+s", qid, colName)
   868  				}
   869  			}
   870  		}
   871  	}
   872  	return retVal
   873  }
   874  
   875  func iterRecsAddRrc(recIT *BlockRecordIterator, mcr *segread.MultiColSegmentReader,
   876  	blockStatus *BlockSearchStatus, queryRange *dtu.TimeRange, aggs *structs.QueryAggregators,
   877  	aggsHasTimeHt bool, addedTimeHt bool, blkResults *blockresults.BlockResults,
   878  	queryMetrics *structs.QueryProcessingMetrics,
   879  	allSearchResults *segresults.SearchResults, searchReq *structs.SegmentSearchRequest, qid uint64) {
   880  
   881  	numRecsMatched := uint16(0)
   882  	for recNum := uint(0); recNum < uint(recIT.AllRecLen); recNum++ {
   883  		if !recIT.ShouldProcessRecord(recNum) {
   884  			continue
   885  		}
   886  		recNumUint16 := uint16(recNum)
   887  		recTs, err := mcr.GetTimeStampForRecord(blockStatus.BlockNum, recNumUint16, qid)
   888  		if err != nil {
   889  			break
   890  		}
   891  		if !queryRange.CheckInRange(recTs) {
   892  			recIT.UnsetRecord(recNum)
   893  			continue
   894  		}
   895  		if aggs != nil && aggsHasTimeHt && !addedTimeHt {
   896  			blkResults.AddKeyToTimeBucket(recTs, 1)
   897  		}
   898  		numRecsMatched++
   899  		if blkResults.ShouldAddMore() {
   900  			sortVal, invalidCol := extractSortVals(aggs, mcr, blockStatus.BlockNum, recNumUint16, recTs, qid)
   901  			if !invalidCol && blkResults.WillValueBeAdded(sortVal) {
   902  				rrc := &utils.RecordResultContainer{
   903  					SegKeyInfo: utils.SegKeyInfo{
   904  						SegKeyEnc: allSearchResults.GetAddSegEnc(searchReq.SegmentKey),
   905  						IsRemote:  false,
   906  					},
   907  					BlockNum:         blockStatus.BlockNum,
   908  					RecordNum:        recNumUint16,
   909  					SortColumnValue:  sortVal,
   910  					VirtualTableName: searchReq.VirtualTableName,
   911  					TimeStamp:        recTs,
   912  				}
   913  				blkResults.Add(rrc)
   914  			}
   915  		}
   916  	}
   917  	if numRecsMatched > 0 {
   918  		blkResults.AddMatchedCount(uint64(numRecsMatched))
   919  		queryMetrics.IncrementNumBlocksWithMatch(1)
   920  	}
   921  }
   922  
   923  func doAggs(aggs *structs.QueryAggregators, mcr *segread.MultiColSegmentReader,
   924  	bss *BlockSearchStatus, recIT *BlockRecordIterator, blkResults *blockresults.BlockResults,
   925  	isBlkFullyEncosed bool, qid uint64) {
   926  
   927  	if aggs == nil || aggs.GroupByRequest == nil {
   928  		return // nothing to do
   929  	}
   930  
   931  	measureInfo, internalMops := blkResults.GetConvertedMeasureInfo()
   932  	addRecordToAggregations(aggs.GroupByRequest, aggs.TimeHistogram, measureInfo, len(internalMops), mcr,
   933  		bss.BlockNum, recIT, blkResults, qid)
   934  
   935  }
   936  
   937  func CanDoStarTree(segKey string, aggs *structs.QueryAggregators,
   938  	qid uint64) (bool, *segread.AgileTreeReader) {
   939  
   940  	// init agileTreeader
   941  	str, err := segread.InitNewAgileTreeReader(segKey, qid)
   942  	if err != nil {
   943  		log.Errorf("qid=%v, CanDoStarTree: failed to init agileTreereader, err: %v", qid, err)
   944  		return false, nil
   945  	}
   946  
   947  	ok, err := str.CanUseAgileTree(aggs.GroupByRequest)
   948  	if err != nil {
   949  		str.Close()
   950  		return false, nil
   951  	}
   952  
   953  	if !ok {
   954  		str.Close()
   955  		return false, nil
   956  	}
   957  	return true, str // caller responsible to close str if we can use agileTree
   958  }
   959  
   960  func ApplyAgileTree(str *segread.AgileTreeReader, aggs *structs.QueryAggregators,
   961  	allSearchResults *segresults.SearchResults, sizeLimit uint64, qid uint64,
   962  	agileTreeBuf []byte) {
   963  
   964  	_, internalMops := allSearchResults.BlockResults.GetConvertedMeasureInfo()
   965  
   966  	// Note we are using AllSearchResults's blockresult directly here to avoid creating
   967  	// blkRes for each seg and then merging it. This change has perf improvements
   968  	// but the side effect is other threads (async wsSearchHandler threads can't access the
   969  	// blkResuls, else will panic. ALSO this means we can only apply agileTree one seg at a time.
   970  	err := str.ApplyGroupByJit(aggs.GroupByRequest.GroupByColumns, internalMops,
   971  		allSearchResults.BlockResults, qid, agileTreeBuf)
   972  	if err != nil {
   973  		allSearchResults.AddError(err)
   974  		log.Errorf("qid=%v, ApplyAgileTree: failed to JIT agileTree aggs, err: %v", qid, err)
   975  		return
   976  	}
   977  }
   978  
   979  func checkIfGrpColsPresent(grpReq *structs.GroupByRequest,
   980  	mcsr *segread.MultiColSegmentReader, allSearchResults *segresults.SearchResults) (string, bool) {
   981  	measureInfo, _ := allSearchResults.BlockResults.GetConvertedMeasureInfo()
   982  	for _, cname := range grpReq.GroupByColumns {
   983  		if !mcsr.IsColPresent(cname) {
   984  			return cname, false
   985  		}
   986  	}
   987  
   988  	for cname := range measureInfo {
   989  		if !mcsr.IsColPresent(cname) {
   990  			return cname, false
   991  		}
   992  	}
   993  	return "", true
   994  }