github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/search/searchaggs.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package search 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "sort" 24 "sync" 25 26 "github.com/axiomhq/hyperloglog" 27 "github.com/dustin/go-humanize" 28 dtu "github.com/siglens/siglens/pkg/common/dtypeutils" 29 "github.com/siglens/siglens/pkg/config" 30 "github.com/siglens/siglens/pkg/segment/aggregations" 31 "github.com/siglens/siglens/pkg/segment/reader/segread" 32 "github.com/siglens/siglens/pkg/segment/results/blockresults" 33 "github.com/siglens/siglens/pkg/segment/results/segresults" 34 "github.com/siglens/siglens/pkg/segment/structs" 35 "github.com/siglens/siglens/pkg/segment/utils" 36 "github.com/siglens/siglens/pkg/segment/writer" 37 "github.com/siglens/siglens/pkg/segment/writer/stats" 38 toputils "github.com/siglens/siglens/pkg/utils" 39 bbp "github.com/valyala/bytebufferpool" 40 41 log "github.com/sirupsen/logrus" 42 ) 43 44 func applyAggregationsToResult(aggs *structs.QueryAggregators, segmentSearchRecords *SegmentSearchStatus, 45 searchReq *structs.SegmentSearchRequest, blockSummaries []*structs.BlockSummary, queryRange *dtu.TimeRange, 46 sizeLimit uint64, fileParallelism int64, queryMetrics *structs.QueryProcessingMetrics, qid uint64, 47 allSearchResults *segresults.SearchResults) error { 48 var blkWG sync.WaitGroup 49 allBlocksChan := make(chan *BlockSearchStatus, fileParallelism) 50 aggCols, _, _ := GetAggColsAndTimestamp(aggs) 51 sharedReader, err := segread.InitSharedMultiColumnReaders(searchReq.SegmentKey, aggCols, searchReq.AllBlocksToSearch, 52 blockSummaries, int(fileParallelism), qid) 53 if err != nil { 54 log.Errorf("applyAggregationsToResult: failed to load all column files reader for %s. Needed cols %+v. Err: %+v", 55 searchReq.SegmentKey, aggCols, err) 56 if sharedReader != nil { 57 sharedReader.Close() 58 } 59 return err 60 } 61 defer sharedReader.Close() 62 63 usedByTimechart := aggs.UsedByTimechart() 64 if (aggs != nil && aggs.GroupByRequest != nil) || usedByTimechart { 65 cname, ok := checkIfGrpColsPresent(aggs.GroupByRequest, sharedReader.MultiColReaders[0], 66 allSearchResults) 67 if !ok && !usedByTimechart { 68 log.Errorf("qid=%v, applyAggregationsToResult: cname: %v was not present", qid, cname) 69 return fmt.Errorf("qid=%v, applyAggregationsToResult: cname: %v was not present", qid, 70 cname) 71 } 72 } 73 74 rupReader, err := segread.InitNewRollupReader(searchReq.SegmentKey, config.GetTimeStampKey(), qid) 75 if err != nil { 76 log.Errorf("qid=%d, applyAggregationsToResult: failed initialize rollup reader segkey %s. Error: %v", 77 qid, searchReq.SegmentKey, err) 78 } else { 79 defer rupReader.Close() 80 } 81 allBlocksToXRollup, aggsHasTimeHt, aggsHasNonTimeHt := getRollupForAggregation(aggs, rupReader) 82 for i := int64(0); i < fileParallelism; i++ { 83 blkWG.Add(1) 84 go applyAggregationsToSingleBlock(sharedReader.MultiColReaders[i], aggs, allSearchResults, allBlocksChan, 85 searchReq, queryRange, sizeLimit, &blkWG, queryMetrics, qid, blockSummaries, aggsHasTimeHt, 86 aggsHasNonTimeHt, allBlocksToXRollup) 87 } 88 absKeys := make([]uint16, 0, len(segmentSearchRecords.AllBlockStatus)) 89 for k := range segmentSearchRecords.AllBlockStatus { 90 absKeys = append(absKeys, k) 91 } 92 if aggs != nil && aggs.Sort != nil { 93 if aggs.Sort.Ascending { 94 sort.Slice(absKeys, func(i, j int) bool { return absKeys[i] < absKeys[j] }) 95 } else { 96 sort.Slice(absKeys, func(i, j int) bool { return absKeys[i] > absKeys[j] }) 97 } 98 } 99 for _, k := range absKeys { 100 blkResults := segmentSearchRecords.AllBlockStatus[k] 101 if blkResults.hasAnyMatched { 102 allBlocksChan <- blkResults 103 } 104 } 105 close(allBlocksChan) 106 blkWG.Wait() 107 return nil 108 } 109 110 func applyAggregationsToSingleBlock(multiReader *segread.MultiColSegmentReader, aggs *structs.QueryAggregators, 111 allSearchResults *segresults.SearchResults, blockChan chan *BlockSearchStatus, searchReq *structs.SegmentSearchRequest, 112 queryRange *dtu.TimeRange, sizeLimit uint64, wg *sync.WaitGroup, queryMetrics *structs.QueryProcessingMetrics, 113 qid uint64, blockSummaries []*structs.BlockSummary, aggsHasTimeHt bool, aggsHasNonTimeHt bool, 114 allBlocksToXRollup map[uint16]map[uint64]*writer.RolledRecs) { 115 116 blkResults, err := blockresults.InitBlockResults(sizeLimit, aggs, qid) 117 if err != nil { 118 log.Errorf("applyAggregationsToSingleBlock: failed to initialize block results reader for %s. Err: %v", searchReq.SegmentKey, err) 119 allSearchResults.AddError(err) 120 } 121 defer wg.Done() 122 123 for blockStatus := range blockChan { 124 if !blockStatus.hasAnyMatched { 125 continue 126 } 127 recIT, err := blockStatus.GetRecordIteratorCopyForBlock(utils.And) 128 if err != nil { 129 log.Errorf("qid=%d, applyAggregationsToSingleBlock: failed to initialize record iterator for block %+v. Err: %v", 130 qid, blockStatus.BlockNum, err) 131 continue 132 } 133 134 var toXRollup map[uint64]*writer.RolledRecs = nil 135 if allBlocksToXRollup != nil { 136 toXRollup = allBlocksToXRollup[blockStatus.BlockNum] 137 } 138 139 isBlkFullyEncosed := queryRange.AreTimesFullyEnclosed(blockSummaries[blockStatus.BlockNum].LowTs, 140 blockSummaries[blockStatus.BlockNum].HighTs) 141 142 var addedTimeHt = false 143 if aggs != nil && aggs.TimeHistogram != nil && aggs.TimeHistogram.Timechart == nil && aggsHasTimeHt && isBlkFullyEncosed && 144 toXRollup != nil { 145 for rupTskey, rr := range toXRollup { 146 rr.MatchedRes.InPlaceIntersection(recIT.AllRecords) 147 matchedRrCount := uint16(rr.MatchedRes.GetNumberOfSetBits()) 148 blkResults.AddKeyToTimeBucket(rupTskey, matchedRrCount) 149 } 150 addedTimeHt = true 151 } 152 153 if blkResults.ShouldIterateRecords(aggsHasTimeHt, isBlkFullyEncosed, 154 blockSummaries[blockStatus.BlockNum].LowTs, 155 blockSummaries[blockStatus.BlockNum].HighTs, addedTimeHt) { 156 iterRecsAddRrc(recIT, multiReader, blockStatus, queryRange, aggs, aggsHasTimeHt, 157 addedTimeHt, blkResults, queryMetrics, allSearchResults, searchReq, qid) 158 } else { 159 // we did not iterate the records so now we need to just update the counts, so that early-exit 160 // as well as hit.total has somewhat accurate value 161 rrMc := uint64(recIT.AllRecords.GetNumberOfSetBits()) 162 if rrMc > 0 { 163 blkResults.AddMatchedCount(rrMc) 164 queryMetrics.IncrementNumBlocksWithMatch(1) 165 } 166 } 167 doAggs(aggs, multiReader, blockStatus, recIT, blkResults, isBlkFullyEncosed, qid) 168 } 169 allSearchResults.AddBlockResults(blkResults) 170 } 171 172 func addRecordToAggregations(grpReq *structs.GroupByRequest, timeHistogram *structs.TimeBucket, measureInfo map[string][]int, numMFuncs int, multiColReader *segread.MultiColSegmentReader, 173 blockNum uint16, recIT *BlockRecordIterator, blockRes *blockresults.BlockResults, qid uint64) { 174 measureResults := make([]utils.CValueEnclosure, numMFuncs) 175 usedByTimechart := (timeHistogram != nil && timeHistogram.Timechart != nil) 176 hasLimitOption := false 177 groupByColValCnt := make(map[string]int, 0) 178 var timeRangeBuckets []uint64 179 if usedByTimechart { 180 timeRangeBuckets = aggregations.GenerateTimeRangeBuckets(timeHistogram) 181 hasLimitOption = timeHistogram.Timechart.LimitExpr != nil 182 } 183 for recNum := uint16(0); recNum < recIT.AllRecLen; recNum++ { 184 if !recIT.ShouldProcessRecord(uint(recNum)) { 185 continue 186 } 187 188 var currKey bytes.Buffer 189 groupByColVal := "" 190 191 if usedByTimechart { 192 // Find out timePoint for current row 193 ts, err := multiColReader.GetTimeStampForRecord(blockNum, recNum, qid) 194 if err != nil { 195 log.Errorf("addRecordToAggregations: Failed to extract value from timestamp: %v", err) 196 continue 197 } 198 if ts < timeHistogram.StartTime || ts > timeHistogram.EndTime { 199 continue 200 } 201 timePoint := aggregations.FindTimeRangeBucket(timeRangeBuckets, ts, timeHistogram.IntervalMillis) 202 203 retVal := make([]byte, 9) 204 copy(retVal[0:], utils.VALTYPE_ENC_UINT64[:]) 205 copy(retVal[1:], toputils.Uint64ToBytesLittleEndian(timePoint)) 206 currKey.Write(retVal) 207 208 // Get timechart's group by col val, each different val will be a bucket inside each time range bucket 209 byField := timeHistogram.Timechart.ByField 210 if len(byField) > 0 { 211 rawVal, err := multiColReader.ReadRawRecordFromColumnFile(byField, blockNum, recNum, qid) 212 if err != nil { 213 log.Errorf("addRecordToAggregations: Failed to get key for column %v: %v", byField, err) 214 } else { 215 strs, err := utils.ConvertGroupByKey(rawVal) 216 if err != nil { 217 log.Errorf("addRecordToAggregations: failed to extract raw key: %v", err) 218 } 219 if len(strs) == 1 { 220 groupByColVal = strs[0] 221 } else { 222 log.Errorf("addRecordToAggregations: invalid length of groupByColVal") 223 } 224 } 225 if hasLimitOption { 226 cnt, exists := groupByColValCnt[groupByColVal] 227 if exists { 228 groupByColValCnt[groupByColVal] = cnt + 1 229 } else { 230 groupByColValCnt[groupByColVal] = 1 231 } 232 } 233 } 234 } else { 235 for _, col := range grpReq.GroupByColumns { 236 rawVal, err := multiColReader.ReadRawRecordFromColumnFile(col, blockNum, recNum, qid) 237 if err != nil { 238 log.Errorf("addRecordToAggregations: Failed to get key for column %v: %v", col, err) 239 currKey.Write(utils.VALTYPE_ENC_BACKFILL) 240 } else { 241 currKey.Write(rawVal) 242 } 243 } 244 } 245 246 for cName, indices := range measureInfo { 247 rawVal, err := multiColReader.ExtractValueFromColumnFile(cName, blockNum, recNum, qid) 248 if err != nil { 249 log.Errorf("addRecordToAggregations: Failed to extract measure value from column %+v: %v", cName, err) 250 rawVal = &utils.CValueEnclosure{Dtype: utils.SS_DT_BACKFILL} 251 } 252 for _, idx := range indices { 253 measureResults[idx] = *rawVal 254 } 255 } 256 blockRes.AddMeasureResultsToKey(currKey, measureResults, groupByColVal, usedByTimechart, qid) 257 } 258 259 if usedByTimechart && len(timeHistogram.Timechart.ByField) > 0 { 260 if len(blockRes.GroupByAggregation.GroupByColValCnt) > 0 { 261 aggregations.MergeMap(blockRes.GroupByAggregation.GroupByColValCnt, groupByColValCnt) 262 } else { 263 blockRes.GroupByAggregation.GroupByColValCnt = groupByColValCnt 264 } 265 } 266 } 267 268 func PerformAggsOnRecs(nodeResult *structs.NodeResult, aggs *structs.QueryAggregators, recs map[string]map[string]interface{}, 269 finalCols map[string]bool, numTotalSegments uint64, finishesSegment bool, qid uint64) map[string]bool { 270 271 if !nodeResult.PerformAggsOnRecs { 272 return nil 273 } 274 275 if finishesSegment { 276 nodeResult.RecsAggsProcessedSegments++ 277 } 278 279 if nodeResult.RecsAggsType == structs.GroupByType { 280 return PerformGroupByRequestAggsOnRecs(nodeResult, recs, finalCols, qid, numTotalSegments) 281 } else if nodeResult.RecsAggsType == structs.MeasureAggsType { 282 return PerformMeasureAggsOnRecs(nodeResult, recs, finalCols, qid, numTotalSegments) 283 } 284 285 return nil 286 } 287 288 func PerformGroupByRequestAggsOnRecs(nodeResult *structs.NodeResult, recs map[string]map[string]interface{}, finalCols map[string]bool, qid uint64, numTotalSegments uint64) map[string]bool { 289 290 nodeResult.GroupByRequest.BucketCount = 3000 291 292 blockRes, err := blockresults.InitBlockResults(uint64(len(recs)), &structs.QueryAggregators{GroupByRequest: nodeResult.GroupByRequest}, qid) 293 if err != nil { 294 log.Errorf("PerformGroupByRequestAggsOnRecs: failed to initialize block results reader. Err: %v", err) 295 return nil 296 } 297 298 measureInfo, internalMops := blockRes.GetConvertedMeasureInfo() 299 300 if nodeResult.GroupByRequest != nil && nodeResult.GroupByRequest.MeasureOperations != nil { 301 for _, mOp := range nodeResult.GroupByRequest.MeasureOperations { 302 if mOp.MeasureFunc == utils.Count { 303 internalMops = append(internalMops, mOp) 304 } 305 } 306 } 307 308 measureResults := make([]utils.CValueEnclosure, len(internalMops)) 309 310 columnKeys := make(map[string][]interface{}) 311 312 finalRecInden := make(map[string]string) 313 314 for recInden, record := range recs { 315 colKeyValues := make([]interface{}, 0) 316 byteKey := make([]byte, 0) // bucket Key 317 for idx, colName := range nodeResult.GroupByCols { 318 value, exists := record[colName] 319 if !exists { 320 value = "" 321 } 322 if idx > 0 { 323 byteKey = append(byteKey, '_') 324 } 325 byteKey = append(byteKey, []byte(fmt.Sprintf("%v", value))...) 326 colKeyValues = append(colKeyValues, value) 327 } 328 329 var currKey bytes.Buffer 330 currKey.Write(byteKey) 331 332 keyStr := toputils.UnsafeByteSliceToString(currKey.Bytes()) 333 334 if _, exists := columnKeys[keyStr]; !exists { 335 columnKeys[keyStr] = colKeyValues 336 finalRecInden[keyStr] = recInden 337 } 338 339 for cname, indices := range measureInfo { 340 var cVal utils.CValueEnclosure 341 value, exists := record[cname] 342 if !exists { 343 log.Errorf("qid=%d, PerformGroupByRequestAggsOnRecs: failed to find column %s in record", qid, cname) 344 cVal = utils.CValueEnclosure{Dtype: utils.SS_DT_BACKFILL} 345 } else { 346 dval, err := utils.CreateDtypeEnclosure(value, qid) 347 if dval.Dtype == utils.SS_DT_STRING { 348 floatFieldVal, _ := dtu.ConvertToFloat(value, 64) 349 if err == nil { 350 value = floatFieldVal 351 dval.Dtype = utils.SS_DT_FLOAT 352 } 353 } 354 355 if err != nil { 356 log.Errorf("qid=%d, PerformGroupByRequestAggsOnRecs: failed to create Dtype Value from rec: %v", qid, err) 357 cVal = utils.CValueEnclosure{Dtype: utils.SS_DT_BACKFILL} 358 } else { 359 cVal = utils.CValueEnclosure{Dtype: dval.Dtype, CVal: value} 360 } 361 } 362 363 for _, idx := range indices { 364 measureResults[idx] = cVal 365 } 366 } 367 368 blockRes.AddMeasureResultsToKey(currKey, measureResults, "", false, qid) 369 } 370 371 if nodeResult.RecsAggsBlockResults == nil { 372 nodeResult.RecsAggsBlockResults = blockRes 373 } else { 374 recAggsBlockresults := nodeResult.RecsAggsBlockResults.(*blockresults.BlockResults) 375 recAggsBlockresults.MergeBuckets(blockRes) 376 } 377 378 if nodeResult.RecsAggsProcessedSegments < numTotalSegments { 379 for k := range recs { 380 delete(recs, k) 381 } 382 return nil 383 } else { 384 blockRes = nodeResult.RecsAggsBlockResults.(*blockresults.BlockResults) 385 } 386 387 for k := range finalCols { 388 delete(finalCols, k) 389 } 390 391 validRecIndens := make(map[string]bool) 392 393 for bKey, index := range blockRes.GroupByAggregation.StringBucketIdx { 394 recInden, exists := finalRecInden[bKey] 395 if !exists { 396 continue 397 } 398 validRecIndens[recInden] = true 399 bucketValues, bucketCount := blockRes.GroupByAggregation.AllRunningBuckets[index].GetRunningStatsBucketValues() 400 401 for idx, colName := range nodeResult.GroupByCols { 402 if index == 0 { 403 finalCols[colName] = true 404 } 405 recs[recInden][colName] = columnKeys[bKey][idx] 406 } 407 408 for i, mOp := range internalMops { 409 if index == 0 { 410 finalCols[mOp.String()] = true 411 } 412 413 if mOp.MeasureFunc == utils.Count { 414 recs[recInden][mOp.String()] = bucketCount 415 } else { 416 if mOp.OverrodeMeasureAgg != nil && mOp.OverrodeMeasureAgg.MeasureFunc == utils.Avg { 417 floatVal, err := dtu.ConvertToFloat(bucketValues[i].CVal, 64) 418 if err != nil { 419 log.Errorf("PerformGroupByRequestAggsOnRecs: failed to convert to float: %v", err) 420 continue 421 } 422 recs[recInden][mOp.OverrodeMeasureAgg.String()] = (floatVal / float64(bucketCount)) 423 finalCols[mOp.OverrodeMeasureAgg.String()] = true 424 if mOp.OverrodeMeasureAgg.String() != mOp.String() { 425 delete(finalCols, mOp.String()) 426 } 427 } else { 428 recs[recInden][mOp.String()] = bucketValues[i].CVal 429 } 430 } 431 } 432 } 433 434 for k := range recs { 435 if _, exists := validRecIndens[k]; !exists { 436 delete(recs, k) 437 } 438 } 439 440 return map[string]bool{"CHECK_NEXT_AGG": true} 441 } 442 443 func PerformMeasureAggsOnRecs(nodeResult *structs.NodeResult, recs map[string]map[string]interface{}, finalCols map[string]bool, qid uint64, numTotalSegments uint64) map[string]bool { 444 445 searchResults, err := segresults.InitSearchResults(uint64(len(recs)), &structs.QueryAggregators{MeasureOperations: nodeResult.MeasureOperations}, structs.SegmentStatsCmd, qid) 446 if err != nil { 447 log.Errorf("PerformMeasureAggsOnRecs: failed to initialize search results. Err: %v", err) 448 return nil 449 } 450 451 searchResults.InitSegmentStatsResults(nodeResult.MeasureOperations) 452 453 anyCountStat := -1 454 lenRecords := len(recs) 455 456 for idx, mOp := range nodeResult.MeasureOperations { 457 if mOp.String() == "count(*)" { 458 anyCountStat = idx 459 break 460 } 461 } 462 463 firstRecInden := "" 464 465 for recInden := range recs { 466 firstRecInden = recInden 467 break 468 } 469 470 for recInden, record := range recs { 471 sstMap := make(map[string]*structs.SegStats, 0) 472 473 for _, mOp := range nodeResult.MeasureOperations { 474 dtypeVal, err := utils.CreateDtypeEnclosure(record[mOp.MeasureCol], qid) 475 if err != nil { 476 log.Errorf("PerformMeasureAggsOnRecs: failed to create Dtype Value from rec: %v", err) 477 continue 478 } 479 480 if !dtypeVal.IsNumeric() { 481 floatVal, err := dtu.ConvertToFloat(record[mOp.MeasureCol], 64) 482 if err != nil { 483 log.Errorf("PerformMeasureAggsOnRecs: failed to convert to float: %v", err) 484 continue 485 } 486 dtypeVal = &utils.DtypeEnclosure{Dtype: utils.SS_DT_FLOAT, FloatVal: floatVal} 487 } 488 489 nTypeEnclosure := &utils.NumTypeEnclosure{ 490 Ntype: dtypeVal.Dtype, 491 IntgrVal: int64(dtypeVal.FloatVal), 492 FloatVal: dtypeVal.FloatVal, 493 } 494 495 sstMap[mOp.MeasureCol] = &structs.SegStats{ 496 IsNumeric: dtypeVal.IsNumeric(), 497 Count: 1, 498 Hll: nil, 499 NumStats: &structs.NumericStats{Min: *nTypeEnclosure, Max: *nTypeEnclosure, Sum: *nTypeEnclosure, Dtype: dtypeVal.Dtype}, 500 StringStats: nil, 501 Records: nil, 502 } 503 504 } 505 506 err := searchResults.UpdateSegmentStats(sstMap, nodeResult.MeasureOperations, nil) 507 if err != nil { 508 log.Errorf("PerformMeasureAggsOnRecs: failed to update segment stats: %v", err) 509 } 510 511 delete(recs, recInden) 512 } 513 514 if nodeResult.RecsRunningSegStats == nil { 515 nodeResult.RecsRunningSegStats = searchResults.GetSegmentRunningStats() 516 } else { 517 sstMap := make(map[string]*structs.SegStats, 0) 518 519 for idx, mOp := range nodeResult.MeasureOperations { 520 sstMap[mOp.MeasureCol] = nodeResult.RecsRunningSegStats[idx] 521 } 522 523 err := searchResults.UpdateSegmentStats(sstMap, nodeResult.MeasureOperations, nil) 524 if err != nil { 525 log.Errorf("PerformMeasureAggsOnRecs: failed to update segment stats: %v", err) 526 } 527 528 nodeResult.RecsRunningSegStats = searchResults.GetSegmentRunningStats() 529 } 530 531 if anyCountStat > -1 { 532 nodeResult.TotalRRCCount += uint64(lenRecords) 533 } 534 535 if nodeResult.RecsAggsProcessedSegments < numTotalSegments { 536 return nil 537 } else { 538 for k := range finalCols { 539 delete(finalCols, k) 540 } 541 542 finalSegment := make(map[string]interface{}, 0) 543 544 if anyCountStat > -1 { 545 finalCols[nodeResult.MeasureOperations[anyCountStat].String()] = true 546 finalSegment[nodeResult.MeasureOperations[anyCountStat].String()] = humanize.Comma(int64(nodeResult.TotalRRCCount)) 547 } 548 549 for colName, value := range searchResults.GetSegmentStatsMeasureResults() { 550 finalCols[colName] = true 551 if value.Dtype == utils.SS_DT_FLOAT { 552 value.CVal = humanize.CommafWithDigits(value.CVal.(float64), 3) 553 } else { 554 value.CVal = humanize.Comma(value.CVal.(int64)) 555 } 556 finalSegment[colName] = value.CVal 557 } 558 559 recs[firstRecInden] = finalSegment 560 } 561 562 return map[string]bool{"CHECK_NEXT_AGG": true} 563 } 564 565 // returns all columns in aggs and the timestamp column 566 func GetAggColsAndTimestamp(aggs *structs.QueryAggregators) (map[string]bool, map[string]utils.AggColUsageMode, map[string]bool) { 567 aggCols := make(map[string]bool) 568 timestampKey := config.GetTimeStampKey() 569 aggCols[timestampKey] = true 570 if aggs == nil { 571 return aggCols, nil, nil 572 } 573 574 // Determine if current col used by eval statements 575 aggColUsage := make(map[string]utils.AggColUsageMode) 576 // Determine if current col used by agg values() func 577 valuesUsage := make(map[string]bool) 578 if aggs.Sort != nil { 579 aggCols[aggs.Sort.ColName] = true 580 } 581 if aggs.GroupByRequest != nil { 582 for _, cName := range aggs.GroupByRequest.GroupByColumns { 583 aggCols[cName] = true 584 } 585 for _, mOp := range aggs.GroupByRequest.MeasureOperations { 586 aggregations.DetermineAggColUsage(mOp, aggCols, aggColUsage, valuesUsage) 587 } 588 } 589 if aggs.TimeHistogram != nil && aggs.TimeHistogram.Timechart != nil && len(aggs.TimeHistogram.Timechart.ByField) > 0 { 590 aggCols[aggs.TimeHistogram.Timechart.ByField] = true 591 } 592 return aggCols, aggColUsage, valuesUsage 593 } 594 595 func applyAggregationsToResultFastPath(aggs *structs.QueryAggregators, segmentSearchRecords *SegmentSearchStatus, 596 searchReq *structs.SegmentSearchRequest, blockSummaries []*structs.BlockSummary, queryRange *dtu.TimeRange, 597 sizeLimit uint64, fileParallelism int64, queryMetrics *structs.QueryProcessingMetrics, 598 qid uint64, allSearchResults *segresults.SearchResults) error { 599 600 var blkWG sync.WaitGroup 601 allBlocksChan := make(chan *BlockSearchStatus, fileParallelism) 602 603 rupReader, err := segread.InitNewRollupReader(searchReq.SegmentKey, config.GetTimeStampKey(), qid) 604 if err != nil { 605 log.Errorf("qid=%d, applyAggregationsToResultFastPath: failed initialize rollup reader segkey %s. Error: %v", 606 qid, searchReq.SegmentKey, err) 607 } else { 608 defer rupReader.Close() 609 } 610 611 // we just call this func so that we load up the correct rollup files for the specified ht interval 612 allBlocksToXRollup, _, _ := getRollupForAggregation(aggs, rupReader) 613 for i := int64(0); i < fileParallelism; i++ { 614 blkWG.Add(1) 615 go applyAggregationsToSingleBlockFastPath(aggs, allSearchResults, allBlocksChan, 616 searchReq, queryRange, sizeLimit, &blkWG, queryMetrics, qid, blockSummaries, 617 allBlocksToXRollup) 618 } 619 620 for _, blkResults := range segmentSearchRecords.AllBlockStatus { 621 allBlocksChan <- blkResults 622 } 623 close(allBlocksChan) 624 blkWG.Wait() 625 return nil 626 } 627 628 func applyAggregationsToSingleBlockFastPath(aggs *structs.QueryAggregators, 629 allSearchResults *segresults.SearchResults, blockChan chan *BlockSearchStatus, searchReq *structs.SegmentSearchRequest, 630 queryRange *dtu.TimeRange, sizeLimit uint64, wg *sync.WaitGroup, queryMetrics *structs.QueryProcessingMetrics, 631 qid uint64, blockSummaries []*structs.BlockSummary, 632 allBlocksToXRollup map[uint16]map[uint64]*writer.RolledRecs) { 633 634 blkResults, err := blockresults.InitBlockResults(sizeLimit, aggs, qid) 635 if err != nil { 636 log.Errorf("applyAggregationsToSingleBlockFastPath: failed to initialize block results reader for %s. Err: %v", searchReq.SegmentKey, err) 637 allSearchResults.AddError(err) 638 } 639 640 defer wg.Done() 641 642 for blockStatus := range blockChan { 643 644 var toXRollup map[uint64]*writer.RolledRecs = nil 645 if allBlocksToXRollup != nil { 646 toXRollup = allBlocksToXRollup[blockStatus.BlockNum] 647 } 648 649 for rupTskey, rr := range toXRollup { 650 matchedRrCount := uint16(rr.MatchedRes.GetNumberOfSetBits()) 651 blkResults.AddKeyToTimeBucket(rupTskey, matchedRrCount) 652 } 653 654 blkResults.AddMatchedCount(uint64(blockStatus.numRecords)) 655 queryMetrics.IncrementNumBlocksWithMatch(1) 656 } 657 allSearchResults.AddBlockResults(blkResults) 658 } 659 660 func applySegStatsToMatchedRecords(ops []*structs.MeasureAggregator, segmentSearchRecords *SegmentSearchStatus, 661 searchReq *structs.SegmentSearchRequest, blockSummaries []*structs.BlockSummary, queryRange *dtu.TimeRange, 662 fileParallelism int64, queryMetrics *structs.QueryProcessingMetrics, qid uint64) (map[string]*structs.SegStats, error) { 663 664 var blkWG sync.WaitGroup 665 allBlocksChan := make(chan *BlockSearchStatus, fileParallelism) 666 667 measureColAndTS, aggColUsage, valuesUsage := getSegStatsMeasureCols(ops) 668 sharedReader, err := segread.InitSharedMultiColumnReaders(searchReq.SegmentKey, measureColAndTS, searchReq.AllBlocksToSearch, 669 blockSummaries, int(fileParallelism), qid) 670 if err != nil { 671 log.Errorf("applyAggregationsToResult: failed to load all column files reader for %s. Needed cols %+v. Err: %+v", 672 searchReq.SegmentKey, measureColAndTS, err) 673 return nil, errors.New("failed to init sharedmulticolreader") 674 } 675 defer sharedReader.Close() 676 677 statRes := segresults.InitStatsResults() 678 delete(measureColAndTS, config.GetTimeStampKey()) 679 for i := int64(0); i < fileParallelism; i++ { 680 blkWG.Add(1) 681 go segmentStatsWorker(statRes, measureColAndTS, aggColUsage, valuesUsage, sharedReader.MultiColReaders[i], allBlocksChan, 682 searchReq, blockSummaries, queryRange, &blkWG, queryMetrics, qid) 683 } 684 685 absKeys := make([]uint16, 0, len(segmentSearchRecords.AllBlockStatus)) 686 for k := range segmentSearchRecords.AllBlockStatus { 687 absKeys = append(absKeys, k) 688 } 689 for _, k := range absKeys { 690 blkResults := segmentSearchRecords.AllBlockStatus[k] 691 if blkResults.hasAnyMatched { 692 allBlocksChan <- blkResults 693 } 694 } 695 close(allBlocksChan) 696 blkWG.Wait() 697 698 return statRes.GetSegStats(), nil 699 } 700 701 // returns all columns (+timestamp) in the measure operations 702 func getSegStatsMeasureCols(ops []*structs.MeasureAggregator) (map[string]bool, map[string]utils.AggColUsageMode, map[string]bool) { 703 // Determine if current col used by eval statements 704 aggColUsage := make(map[string]utils.AggColUsageMode) 705 // Determine if current col used by agg values() func 706 valuesUsage := make(map[string]bool) 707 aggCols := make(map[string]bool) 708 timestampKey := config.GetTimeStampKey() 709 aggCols[timestampKey] = true 710 for _, op := range ops { 711 aggregations.DetermineAggColUsage(op, aggCols, aggColUsage, valuesUsage) 712 } 713 return aggCols, aggColUsage, valuesUsage 714 } 715 716 func segmentStatsWorker(statRes *segresults.StatsResults, mCols map[string]bool, aggColUsage map[string]utils.AggColUsageMode, valuesUsage map[string]bool, 717 multiReader *segread.MultiColSegmentReader, blockChan chan *BlockSearchStatus, searchReq *structs.SegmentSearchRequest, blockSummaries []*structs.BlockSummary, 718 queryRange *dtu.TimeRange, wg *sync.WaitGroup, queryMetrics *structs.QueryProcessingMetrics, qid uint64) { 719 720 defer wg.Done() 721 bb := bbp.Get() 722 defer bbp.Put(bb) 723 724 localStats := make(map[string]*structs.SegStats) 725 for blockStatus := range blockChan { 726 isBlkFullyEncosed := queryRange.AreTimesFullyEnclosed(blockSummaries[blockStatus.BlockNum].LowTs, 727 blockSummaries[blockStatus.BlockNum].HighTs) 728 recIT, err := blockStatus.GetRecordIteratorForBlock(utils.And) 729 if err != nil { 730 log.Errorf("qid=%d, segmentStatsWorker: failed to initialize record iterator for block %+v. Err: %v", 731 qid, blockStatus.BlockNum, err) 732 continue 733 } 734 735 sortedMatchedRecs := make([]uint16, recIT.AllRecLen) 736 idx := 0 737 for i := uint(0); i < uint(recIT.AllRecLen); i++ { 738 if !recIT.ShouldProcessRecord(i) { 739 continue 740 } 741 recNum16 := uint16(i) 742 if !isBlkFullyEncosed { 743 recTs, err := multiReader.GetTimeStampForRecord(blockStatus.BlockNum, recNum16, qid) 744 if err != nil { 745 log.Errorf("qid=%d, segmentStatsWorker failed to initialize time reader for block %+v. Err: %v", qid, 746 blockStatus.BlockNum, err) 747 continue 748 } 749 if !queryRange.CheckInRange(recTs) { 750 continue 751 } 752 } 753 sortedMatchedRecs[idx] = uint16(i) 754 idx++ 755 } 756 sortedMatchedRecs = sortedMatchedRecs[:idx] 757 nonDeCols := applySegmentStatsUsingDictEncoding(multiReader, sortedMatchedRecs, mCols, aggColUsage, valuesUsage, blockStatus.BlockNum, recIT, localStats, bb, qid) 758 for _, recNum := range sortedMatchedRecs { 759 for colName := range nonDeCols { 760 val, err := multiReader.ExtractValueFromColumnFile(colName, blockStatus.BlockNum, recNum, qid) 761 if err != nil { 762 log.Errorf("qid=%d, segmentStatsWorker failed to extract value for column %+v. Err: %v", qid, colName, err) 763 continue 764 } 765 766 hasValuesFunc, exists := valuesUsage[colName] 767 if !exists { 768 hasValuesFunc = false 769 } 770 771 if val.Dtype == utils.SS_DT_STRING { 772 str, err := val.GetString() 773 if err != nil { 774 log.Errorf("qid=%d, segmentStatsWorker failed to extract value for string although type check passed %+v. Err: %v", qid, colName, err) 775 continue 776 } 777 stats.AddSegStatsStr(localStats, colName, str, bb, aggColUsage, hasValuesFunc) 778 } else { 779 fVal, err := val.GetFloatValue() 780 if err != nil { 781 log.Errorf("qid=%d, segmentStatsWorker failed to extract numerical value for type %+v. Err: %v", qid, val.Dtype, err) 782 continue 783 } 784 stats.AddSegStatsNums(localStats, colName, utils.SS_FLOAT64, 0, 0, fVal, fmt.Sprintf("%v", fVal), bb, aggColUsage, hasValuesFunc) 785 } 786 } 787 } 788 } 789 statRes.MergeSegStats(localStats) 790 } 791 792 // returns all columns that are not dict encoded 793 func applySegmentStatsUsingDictEncoding(mcr *segread.MultiColSegmentReader, filterdRecNums []uint16, mCols map[string]bool, aggColUsage map[string]utils.AggColUsageMode, valuesUsage map[string]bool, 794 blockNum uint16, bri *BlockRecordIterator, lStats map[string]*structs.SegStats, bb *bbp.ByteBuffer, qid uint64) map[string]bool { 795 retVal := make(map[string]bool) 796 for colName := range mCols { 797 if colName == "*" { 798 stats.AddSegStatsCount(lStats, colName, uint64(len(filterdRecNums))) 799 continue 800 } 801 isDict, err := mcr.IsBlkDictEncoded(colName, blockNum) 802 if err != nil { 803 log.Errorf("qid=%d, segmentStatsWorker failed to check if column is dict encoded %+v. Err: %v", qid, colName, err) 804 continue 805 } 806 if !isDict { 807 retVal[colName] = true 808 continue 809 } 810 results := make(map[uint16]map[string]interface{}) 811 ok := mcr.GetDictEncCvalsFromColFile(results, colName, blockNum, filterdRecNums, qid) 812 if !ok { 813 log.Errorf("qid=%d, segmentStatsWorker failed to get dict cvals for col %s", qid, colName) 814 continue 815 } 816 for _, cMap := range results { 817 for colName, rawVal := range cMap { 818 colUsage, exists := aggColUsage[colName] 819 if !exists { 820 colUsage = utils.NoEvalUsage 821 } 822 // If current col will be used by eval funcs, we should store the raw data and process it 823 if colUsage == utils.WithEvalUsage || colUsage == utils.BothUsage { 824 e := utils.CValueEnclosure{} 825 err := e.ConvertValue(rawVal) 826 if err != nil { 827 log.Errorf("applySegmentStatsUsingDictEncoding: %v", err) 828 continue 829 } 830 831 if e.Dtype != utils.SS_DT_STRING { 832 retVal[colName] = true 833 continue 834 } 835 836 var stats *structs.SegStats 837 var ok bool 838 stats, ok = lStats[colName] 839 if !ok { 840 stats = &structs.SegStats{ 841 IsNumeric: false, 842 Count: 0, 843 Hll: hyperloglog.New16(), 844 Records: make([]*utils.CValueEnclosure, 0), 845 } 846 847 lStats[colName] = stats 848 } 849 stats.Records = append(stats.Records, &e) 850 851 // Current col only used by eval statements 852 if colUsage == utils.WithEvalUsage { 853 continue 854 } 855 } 856 857 hasValuesFunc, exists := valuesUsage[colName] 858 if !exists { 859 hasValuesFunc = false 860 } 861 862 switch val := rawVal.(type) { 863 case string: 864 stats.AddSegStatsStr(lStats, colName, val, bb, aggColUsage, hasValuesFunc) 865 default: 866 // This should never occur as dict encoding is only supported for string fields. 867 log.Errorf("qid=%d, segmentStatsWorker found a non string in a dict encoded segment. CName %+s", qid, colName) 868 } 869 } 870 } 871 } 872 return retVal 873 } 874 875 func iterRecsAddRrc(recIT *BlockRecordIterator, mcr *segread.MultiColSegmentReader, 876 blockStatus *BlockSearchStatus, queryRange *dtu.TimeRange, aggs *structs.QueryAggregators, 877 aggsHasTimeHt bool, addedTimeHt bool, blkResults *blockresults.BlockResults, 878 queryMetrics *structs.QueryProcessingMetrics, 879 allSearchResults *segresults.SearchResults, searchReq *structs.SegmentSearchRequest, qid uint64) { 880 881 numRecsMatched := uint16(0) 882 for recNum := uint(0); recNum < uint(recIT.AllRecLen); recNum++ { 883 if !recIT.ShouldProcessRecord(recNum) { 884 continue 885 } 886 recNumUint16 := uint16(recNum) 887 recTs, err := mcr.GetTimeStampForRecord(blockStatus.BlockNum, recNumUint16, qid) 888 if err != nil { 889 break 890 } 891 if !queryRange.CheckInRange(recTs) { 892 recIT.UnsetRecord(recNum) 893 continue 894 } 895 if aggs != nil && aggsHasTimeHt && !addedTimeHt { 896 blkResults.AddKeyToTimeBucket(recTs, 1) 897 } 898 numRecsMatched++ 899 if blkResults.ShouldAddMore() { 900 sortVal, invalidCol := extractSortVals(aggs, mcr, blockStatus.BlockNum, recNumUint16, recTs, qid) 901 if !invalidCol && blkResults.WillValueBeAdded(sortVal) { 902 rrc := &utils.RecordResultContainer{ 903 SegKeyInfo: utils.SegKeyInfo{ 904 SegKeyEnc: allSearchResults.GetAddSegEnc(searchReq.SegmentKey), 905 IsRemote: false, 906 }, 907 BlockNum: blockStatus.BlockNum, 908 RecordNum: recNumUint16, 909 SortColumnValue: sortVal, 910 VirtualTableName: searchReq.VirtualTableName, 911 TimeStamp: recTs, 912 } 913 blkResults.Add(rrc) 914 } 915 } 916 } 917 if numRecsMatched > 0 { 918 blkResults.AddMatchedCount(uint64(numRecsMatched)) 919 queryMetrics.IncrementNumBlocksWithMatch(1) 920 } 921 } 922 923 func doAggs(aggs *structs.QueryAggregators, mcr *segread.MultiColSegmentReader, 924 bss *BlockSearchStatus, recIT *BlockRecordIterator, blkResults *blockresults.BlockResults, 925 isBlkFullyEncosed bool, qid uint64) { 926 927 if aggs == nil || aggs.GroupByRequest == nil { 928 return // nothing to do 929 } 930 931 measureInfo, internalMops := blkResults.GetConvertedMeasureInfo() 932 addRecordToAggregations(aggs.GroupByRequest, aggs.TimeHistogram, measureInfo, len(internalMops), mcr, 933 bss.BlockNum, recIT, blkResults, qid) 934 935 } 936 937 func CanDoStarTree(segKey string, aggs *structs.QueryAggregators, 938 qid uint64) (bool, *segread.AgileTreeReader) { 939 940 // init agileTreeader 941 str, err := segread.InitNewAgileTreeReader(segKey, qid) 942 if err != nil { 943 log.Errorf("qid=%v, CanDoStarTree: failed to init agileTreereader, err: %v", qid, err) 944 return false, nil 945 } 946 947 ok, err := str.CanUseAgileTree(aggs.GroupByRequest) 948 if err != nil { 949 str.Close() 950 return false, nil 951 } 952 953 if !ok { 954 str.Close() 955 return false, nil 956 } 957 return true, str // caller responsible to close str if we can use agileTree 958 } 959 960 func ApplyAgileTree(str *segread.AgileTreeReader, aggs *structs.QueryAggregators, 961 allSearchResults *segresults.SearchResults, sizeLimit uint64, qid uint64, 962 agileTreeBuf []byte) { 963 964 _, internalMops := allSearchResults.BlockResults.GetConvertedMeasureInfo() 965 966 // Note we are using AllSearchResults's blockresult directly here to avoid creating 967 // blkRes for each seg and then merging it. This change has perf improvements 968 // but the side effect is other threads (async wsSearchHandler threads can't access the 969 // blkResuls, else will panic. ALSO this means we can only apply agileTree one seg at a time. 970 err := str.ApplyGroupByJit(aggs.GroupByRequest.GroupByColumns, internalMops, 971 allSearchResults.BlockResults, qid, agileTreeBuf) 972 if err != nil { 973 allSearchResults.AddError(err) 974 log.Errorf("qid=%v, ApplyAgileTree: failed to JIT agileTree aggs, err: %v", qid, err) 975 return 976 } 977 } 978 979 func checkIfGrpColsPresent(grpReq *structs.GroupByRequest, 980 mcsr *segread.MultiColSegmentReader, allSearchResults *segresults.SearchResults) (string, bool) { 981 measureInfo, _ := allSearchResults.BlockResults.GetConvertedMeasureInfo() 982 for _, cname := range grpReq.GroupByColumns { 983 if !mcsr.IsColPresent(cname) { 984 return cname, false 985 } 986 } 987 988 for cname := range measureInfo { 989 if !mcsr.IsColPresent(cname) { 990 return cname, false 991 } 992 } 993 return "", true 994 }