github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/structs/segstructs.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package structs 18 19 import ( 20 "encoding/json" 21 "fmt" 22 "math" 23 24 "github.com/axiomhq/hyperloglog" 25 "github.com/siglens/siglens/pkg/config" 26 "github.com/siglens/siglens/pkg/segment/utils" 27 sutils "github.com/siglens/siglens/pkg/utils" 28 log "github.com/sirupsen/logrus" 29 ) 30 31 type Numbers struct { 32 Min_uint64 uint64 `json:"Min_uint64"` 33 Max_uint64 uint64 `json:"Max_uint64"` 34 Min_int64 int64 `json:"Min_int64"` 35 Max_int64 int64 `json:"Max_int64"` 36 Min_float64 float64 `json:"Min_float64"` 37 Max_float64 float64 `json:"Max_float64"` 38 NumType utils.RangeNumType `json:"NumType"` 39 } 40 41 type OrderByAggregator struct { 42 ColumnName string 43 IncreaseOrder bool 44 } 45 46 type TimeBucket struct { 47 IntervalMillis uint64 // size of each histogram bucket in millis 48 StartTime uint64 // start time of histogram 49 EndTime uint64 // end time of histogram 50 AggName string // name of aggregation 51 Timechart *TimechartExpr 52 } 53 54 type RangeBucket struct { 55 BucketKey string // column name to group 56 Interval float64 // interval of request 57 MinBucketSize uint64 // minimum count for bucket to show up 58 Count uint64 // max number of unique histograms to return 59 } 60 61 type ValueOrder struct { 62 Ascending bool // should results be given in ascending order or descending. (true = ascending, false = descending) 63 } 64 65 type ValueBucket struct { 66 BucketKey string // columnName for which to create buckets on 67 MinBucketSize uint64 // min count per bucket 68 Count uint64 // limit number of resulting histograms 69 CatchAll bool 70 ValueOrder *ValueOrder // how to return filter buckets 71 } 72 73 type FilterCondition struct { 74 Criteria *FilterCriteria 75 Name string 76 } 77 78 type FilterBucket struct { 79 FilterCriteria []*FilterCondition 80 CatchAllBucket bool 81 Count uint64 // limit number of resulting histograms 82 } 83 84 // Defines the different types of bucketing aggregations for processing of results 85 type AggregationType uint8 86 87 const ( 88 TimeHistogram AggregationType = iota 89 GroupBy 90 ) 91 92 type PipeCommandType uint8 93 94 const ( 95 OutputTransformType PipeCommandType = iota + 1 96 MeasureAggsType 97 GroupByType 98 TransactionType 99 ) 100 101 type QueryType uint8 102 103 const ( 104 InvalidCmd = iota 105 SegmentStatsCmd 106 GroupByCmd 107 RRCCmd 108 ) 109 110 // How to sort results 111 type SortRequest struct { 112 ColName string // column name to sort on 113 Ascending bool // if true, result is in ascending order. Else, result is in descending order 114 } 115 116 type FilterStringExpr struct { 117 StringValue string 118 EvalBoolExpr *BoolExpr 119 SearchNode interface{} // type: *ast.Node while parsing, later evaluated to ASTNode 120 } 121 122 type TransactionArguments struct { 123 SortedRecordsSlice []map[string]interface{} 124 OpenTransactionsState map[string]*TransactionGroupState 125 OpenTransactionEvents map[string][]map[string]interface{} 126 Fields []string 127 StartsWith *FilterStringExpr 128 EndsWith *FilterStringExpr 129 } 130 131 type TransactionGroupState struct { 132 Key string 133 Open bool 134 RecInden string 135 Timestamp uint64 136 } 137 138 type QueryAggregators struct { 139 PipeCommandType PipeCommandType 140 OutputTransforms *OutputTransforms 141 MeasureOperations []*MeasureAggregator 142 MathOperations []*MathEvaluator 143 TimeHistogram *TimeBucket // Request for time histograms 144 GroupByRequest *GroupByRequest // groupby aggregation request 145 Sort *SortRequest // how to sort resulting data 146 EarlyExit bool // should query early exit 147 BucketLimit int 148 ShowRequest *ShowRequest 149 TableName string 150 TransactionArguments *TransactionArguments 151 Next *QueryAggregators 152 Limit int 153 } 154 155 type ShowRequest struct { 156 ShowTables bool 157 ShowFilter *ShowFilter 158 ShowTable string 159 ColumnsRequest *ShowColumns 160 } 161 162 type ShowColumns struct { 163 InTable string 164 } 165 166 type ShowFilter struct { 167 Like string 168 } 169 170 type OutputTransforms struct { 171 HarcodedCol []string 172 RenameHardcodedColumns map[string]string 173 OutputColumns *ColumnsRequest // post processing on output columns 174 LetColumns *LetColumnsRequest // let columns processing on output columns 175 FilterRows *BoolExpr // discard rows failing some condition 176 MaxRows uint64 // if 0, get all results; else, get at most this many 177 RowsAdded uint64 // number of rows added to the result. This is used in conjunction with MaxRows. 178 } 179 180 type GroupByRequest struct { 181 MeasureOperations []*MeasureAggregator 182 GroupByColumns []string 183 AggName string // name of aggregation 184 BucketCount int 185 } 186 187 type MeasureAggregator struct { 188 MeasureCol string `json:"measureCol,omitempty"` 189 MeasureFunc utils.AggregateFunctions `json:"measureFunc,omitempty"` 190 StrEnc string `json:"strEnc,omitempty"` 191 ValueColRequest *ValueExpr `json:"valueColRequest,omitempty"` 192 OverrodeMeasureAgg *MeasureAggregator `json:"overrideFunc,omitempty"` 193 } 194 195 type MathEvaluator struct { 196 MathCol string `json:"mathCol,omitempty"` 197 MathFunc utils.MathFunctions `json:"mathFunc,omitempty"` 198 StrEnc string `json:"strEnc,omitempty"` 199 ValueColRequest *ValueExpr `json:"valueCol,omitempty"` 200 } 201 202 type ColumnsRequest struct { 203 RenameColumns map[string]string // maps original column name to new column name 204 ExcludeColumns []string // names of columns to exclude 205 IncludeColumns []string // names of columns to include 206 IncludeValues []*IncludeValue // values of columns to include. Maps column name to index in column 207 RenameAggregationColumns map[string]string // values of aggregations to rename 208 Logfmt bool // true if logfmt request 209 } 210 211 type IncludeValue struct { 212 Index int //index of value in original column 213 ColName string //original column name 214 Label string //new label of value in record 215 } 216 217 // Only NewColName and one of the other fields should have a value 218 type LetColumnsRequest struct { 219 MultiColsRequest *MultiColLetRequest 220 SingleColRequest *SingleColLetRequest 221 ValueColRequest *ValueExpr 222 RexColRequest *RexExpr 223 StatisticColRequest *StatisticExpr 224 RenameColRequest *RenameExpr 225 DedupColRequest *DedupExpr 226 SortColRequest *SortExpr 227 NewColName string 228 } 229 230 type MultiColLetRequest struct { 231 LeftCName string 232 Oper utils.LogicalAndArithmeticOperator 233 RightCName string 234 } 235 236 type SingleColLetRequest struct { 237 CName string 238 Oper utils.LogicalAndArithmeticOperator 239 Value *utils.DtypeEnclosure 240 } 241 242 type BucketResult struct { 243 ElemCount uint64 // total number of elements in bucket 244 StatRes map[string]utils.CValueEnclosure // results of statistic functions 245 BucketKey interface{} // bucket key 246 GroupByKeys []string 247 } 248 249 type AggregationResult struct { 250 IsDateHistogram bool // is this a date histogram 251 Results []*BucketResult // histogram results 252 } 253 254 type BucketHolder struct { 255 GroupByValues []string 256 MeasureVal map[string]interface{} 257 } 258 259 type QueryCount struct { 260 TotalCount uint64 // total number of 261 Op utils.FilterOperator 262 EarlyExit bool // if early exit was requested or not 263 } 264 265 // A helper struct to keep track of errors and results together 266 // In cases of partial failures, both logLines and errList can be defined 267 type NodeResult struct { 268 AllRecords []*utils.RecordResultContainer 269 ErrList []error 270 Histogram map[string]*AggregationResult 271 TotalResults *QueryCount 272 RenameColumns map[string]string 273 SegEncToKey map[uint16]string 274 TotalRRCCount uint64 275 MeasureFunctions []string `json:"measureFunctions,omitempty"` 276 MeasureResults []*BucketHolder `json:"measure,omitempty"` 277 GroupByCols []string `json:"groupByCols,omitempty"` 278 Qtype string `json:"qtype,omitempty"` 279 BucketCount int `json:"bucketCount,omitempty"` 280 PerformAggsOnRecs bool // if true, perform aggregations on records that are returned from rrcreader.go 281 RecsAggsType PipeCommandType // To determine Whether it is GroupByType or MeasureAggsType 282 GroupByRequest *GroupByRequest 283 MeasureOperations []*MeasureAggregator 284 NextQueryAgg *QueryAggregators 285 RecsAggsBlockResults interface{} // Evaluates to *blockresults.BlockResults 286 RecsAggsProcessedSegments uint64 287 RecsRunningSegStats []*SegStats 288 TransactionEventRecords map[string]map[string]interface{} 289 TransactionsProcessed map[string]map[string]interface{} 290 } 291 292 type SegStats struct { 293 IsNumeric bool 294 Count uint64 295 Hll *hyperloglog.Sketch 296 NumStats *NumericStats 297 StringStats *StringStats 298 Records []*utils.CValueEnclosure 299 } 300 301 type NumericStats struct { 302 Min utils.NumTypeEnclosure `json:"min,omitempty"` 303 Max utils.NumTypeEnclosure `json:"max,omitempty"` 304 Sum utils.NumTypeEnclosure `json:"sum,omitempty"` 305 Dtype utils.SS_DTYPE `json:"Dtype,omitempty"` // Dtype shared across min,max, and sum 306 } 307 308 type StringStats struct { 309 StrSet map[string]struct{} 310 } 311 312 // json exportable struct for segstats 313 type SegStatsJSON struct { 314 IsNumeric bool 315 Count uint64 316 RawHll []byte 317 NumStats *NumericStats 318 } 319 320 type AllSegStatsJSON struct { 321 AllSegStats map[string]*SegStatsJSON 322 } 323 324 type RangeStat struct { 325 Min float64 326 Max float64 327 } 328 329 type AvgStat struct { 330 Count int64 331 Sum float64 332 } 333 334 // init SegStats from raw bytes of SegStatsJSON 335 func (ss *SegStats) Init(rawSegStatJson []byte) error { 336 var segStatJson *SegStatsJSON 337 err := json.Unmarshal(rawSegStatJson, &segStatJson) 338 if err != nil { 339 log.Errorf("SegStats.Init: Failed to unmarshal SegStatsJSON: %v", err) 340 return err 341 } 342 ss.IsNumeric = segStatJson.IsNumeric 343 ss.Count = segStatJson.Count 344 ss.Hll = hyperloglog.New() 345 err = ss.Hll.UnmarshalBinary(segStatJson.RawHll) 346 if err != nil { 347 log.Errorf("SegStats.Init: Failed to unmarshal hyperloglog: %v", err) 348 return err 349 } 350 ss.NumStats = segStatJson.NumStats 351 return nil 352 } 353 354 func (ssj *SegStatsJSON) ToStats() (*SegStats, error) { 355 ss := &SegStats{} 356 ss.IsNumeric = ssj.IsNumeric 357 ss.Count = ssj.Count 358 ss.Hll = hyperloglog.New() 359 err := ss.Hll.UnmarshalBinary(ssj.RawHll) 360 if err != nil { 361 log.Errorf("SegStatsJSON.ToStats: Failed to unmarshal hyperloglog: %v", err) 362 return nil, err 363 } 364 ss.NumStats = ssj.NumStats 365 return ss, nil 366 } 367 368 // convert SegStats to SegStatsJSON 369 func (ss *SegStats) ToJSON() (*SegStatsJSON, error) { 370 segStatJson := &SegStatsJSON{} 371 segStatJson.IsNumeric = ss.IsNumeric 372 segStatJson.Count = ss.Count 373 rawHll, err := ss.Hll.MarshalBinary() 374 if err != nil { 375 log.Errorf("SegStats.ToJSON: Failed to marshal hyperloglog: %v", err) 376 return nil, err 377 } 378 segStatJson.RawHll = rawHll 379 segStatJson.NumStats = ss.NumStats 380 return segStatJson, nil 381 } 382 383 func (ma *MeasureAggregator) String() string { 384 if ma.StrEnc != "" { 385 return ma.StrEnc 386 } 387 ma.StrEnc = fmt.Sprintf("%+v(%v)", ma.MeasureFunc.String(), ma.MeasureCol) 388 return ma.StrEnc 389 } 390 391 func (ss *SegStats) Merge(other *SegStats) { 392 ss.Count += other.Count 393 ss.Records = append(ss.Records, other.Records...) 394 err := ss.Hll.Merge(other.Hll) 395 if err != nil { 396 log.Errorf("Failed to merge hyperloglog stats: %v", err) 397 } 398 399 if ss.NumStats == nil { 400 ss.NumStats = other.NumStats 401 return 402 } 403 ss.NumStats.Merge(other.NumStats) 404 } 405 406 func (ss *NumericStats) Merge(other *NumericStats) { 407 switch ss.Min.Ntype { 408 case utils.SS_DT_FLOAT: 409 if other.Dtype == utils.SS_DT_FLOAT { 410 ss.Min.FloatVal = math.Min(ss.Min.FloatVal, other.Min.FloatVal) 411 ss.Max.FloatVal = math.Max(ss.Max.FloatVal, other.Max.FloatVal) 412 ss.Sum.FloatVal = ss.Sum.FloatVal + other.Sum.FloatVal 413 } else { 414 ss.Min.FloatVal = math.Min(ss.Min.FloatVal, float64(other.Min.IntgrVal)) 415 ss.Max.FloatVal = math.Max(ss.Max.FloatVal, float64(other.Max.IntgrVal)) 416 ss.Sum.FloatVal = ss.Sum.FloatVal + float64(other.Sum.IntgrVal) 417 } 418 default: 419 if other.Dtype == utils.SS_DT_FLOAT { 420 ss.Min.FloatVal = math.Min(float64(ss.Min.IntgrVal), other.Min.FloatVal) 421 ss.Max.FloatVal = math.Max(float64(ss.Max.IntgrVal), other.Max.FloatVal) 422 ss.Sum.FloatVal = float64(ss.Sum.IntgrVal) + other.Sum.FloatVal 423 ss.Dtype = utils.SS_DT_FLOAT 424 } else { 425 ss.Min.IntgrVal = sutils.MinInt64(ss.Min.IntgrVal, other.Min.IntgrVal) 426 ss.Max.IntgrVal = sutils.MaxInt64(ss.Max.IntgrVal, other.Max.IntgrVal) 427 ss.Sum.IntgrVal = ss.Sum.IntgrVal + other.Sum.IntgrVal 428 ss.Dtype = utils.SS_DT_SIGNED_NUM 429 } 430 } 431 } 432 433 func (nr *NodeResult) ApplyScroll(scroll int) { 434 435 if scroll == 0 { 436 return 437 } 438 439 if len(nr.AllRecords) <= scroll { 440 nr.AllRecords = make([]*utils.RecordResultContainer, 0) 441 return 442 } 443 444 nr.AllRecords = nr.AllRecords[scroll:] 445 } 446 447 func (n *Numbers) Copy() *Numbers { 448 449 retNum := &Numbers{ 450 NumType: n.NumType, 451 } 452 switch n.NumType { 453 case utils.RNT_UNSIGNED_INT: 454 retNum.Min_uint64 = n.Min_uint64 455 retNum.Max_uint64 = n.Max_uint64 456 case utils.RNT_SIGNED_INT: 457 retNum.Min_int64 = n.Min_int64 458 retNum.Max_int64 = n.Max_int64 459 case utils.RNT_FLOAT64: 460 retNum.Min_float64 = n.Min_float64 461 retNum.Max_float64 = n.Max_float64 462 } 463 return retNum 464 } 465 466 func (qa *QueryAggregators) IsAggsEmpty() bool { 467 if qa.TimeHistogram != nil { 468 return false 469 } 470 if qa.GroupByRequest != nil { 471 if qa.GroupByRequest.GroupByColumns != nil && len(qa.GroupByRequest.GroupByColumns) > 0 { 472 return false 473 } 474 if qa.GroupByRequest.MeasureOperations != nil && len(qa.GroupByRequest.MeasureOperations) > 0 { 475 return false 476 } 477 } 478 return true 479 } 480 481 func (qa *QueryAggregators) IsStatisticBlockEmpty() bool { 482 return (qa != nil && qa.OutputTransforms != nil && qa.OutputTransforms.LetColumns != nil && 483 qa.OutputTransforms.LetColumns.StatisticColRequest == nil) 484 } 485 486 // To determine whether it contains certain specific AggregatorBlocks, such as: Rename Block, Rex Block... 487 func (qa *QueryAggregators) hasLetColumnsRequest() bool { 488 return qa != nil && qa.OutputTransforms != nil && qa.OutputTransforms.LetColumns != nil && 489 (qa.OutputTransforms.LetColumns.RexColRequest != nil || qa.OutputTransforms.LetColumns.RenameColRequest != nil || qa.OutputTransforms.LetColumns.DedupColRequest != nil || 490 qa.OutputTransforms.LetColumns.ValueColRequest != nil || qa.OutputTransforms.LetColumns.SortColRequest != nil) 491 } 492 493 // To determine whether it contains certain specific AggregatorBlocks, such as: Rename Block, Rex Block, MaxRows... 494 func (qa *QueryAggregators) HasQueryAggergatorBlock() bool { 495 return qa != nil && qa.OutputTransforms != nil && (qa.hasLetColumnsRequest() || qa.OutputTransforms.MaxRows > qa.OutputTransforms.RowsAdded) 496 } 497 498 func (qa *QueryAggregators) HasQueryAggergatorBlockInChain() bool { 499 if qa.HasQueryAggergatorBlock() { 500 return true 501 } 502 if qa.Next != nil { 503 return qa.Next.HasQueryAggergatorBlockInChain() 504 } 505 return false 506 } 507 508 func (qa *QueryAggregators) HasDedupBlock() bool { 509 if qa != nil && qa.OutputTransforms != nil && qa.OutputTransforms.LetColumns != nil { 510 letColumns := qa.OutputTransforms.LetColumns 511 512 if letColumns.DedupColRequest != nil { 513 return true 514 } 515 } 516 517 return false 518 } 519 520 func (qa *QueryAggregators) HasDedupBlockInChain() bool { 521 if qa.HasDedupBlock() { 522 return true 523 } 524 if qa.Next != nil { 525 return qa.Next.HasDedupBlockInChain() 526 } 527 return false 528 } 529 530 func (qa *QueryAggregators) HasSortBlock() bool { 531 if qa != nil && qa.OutputTransforms != nil && qa.OutputTransforms.LetColumns != nil { 532 letColumns := qa.OutputTransforms.LetColumns 533 534 if letColumns.SortColRequest != nil { 535 return true 536 } 537 } 538 539 return false 540 } 541 542 func (qa *QueryAggregators) HasSortBlockInChain() bool { 543 if qa.HasSortBlock() { 544 return true 545 } 546 if qa.Next != nil { 547 return qa.Next.HasSortBlockInChain() 548 } 549 return false 550 } 551 552 func (qa *QueryAggregators) HasTransactionArguments() bool { 553 return qa != nil && qa.TransactionArguments != nil 554 } 555 556 func (qa *QueryAggregators) HasTransactionArgumentsInChain() bool { 557 if qa.HasTransactionArguments() { 558 return true 559 } 560 if qa.Next != nil { 561 return qa.Next.HasTransactionArgumentsInChain() 562 } 563 return false 564 } 565 566 func (qa *QueryAggregators) HasRexBlockInQA() bool { 567 return qa != nil && qa.OutputTransforms != nil && qa.OutputTransforms.LetColumns != nil && 568 (qa.OutputTransforms.LetColumns.RexColRequest != nil) 569 } 570 571 func (qa *QueryAggregators) HasGroupByOrMeasureAggsInBlock() bool { 572 return qa != nil && (qa.GroupByRequest != nil || qa.MeasureOperations != nil) 573 } 574 575 func (qa *QueryAggregators) HasGroupByOrMeasureAggsInChain() bool { 576 if qa.HasGroupByOrMeasureAggsInBlock() { 577 return true 578 } 579 if qa.Next != nil { 580 return qa.Next.HasGroupByOrMeasureAggsInChain() 581 } 582 return false 583 } 584 585 func (qa *QueryAggregators) HasRexBlockInChainWithStats() bool { 586 if qa.HasRexBlockInQA() { 587 return qa.Next != nil && qa.Next.HasGroupByOrMeasureAggsInChain() 588 } 589 if qa.Next != nil { 590 return qa.Next.HasRexBlockInChainWithStats() 591 } 592 return false 593 } 594 595 // To determine whether it contains ValueColRequest 596 func (qa *QueryAggregators) HasValueColRequest() bool { 597 for _, agg := range qa.MeasureOperations { 598 if agg.ValueColRequest != nil { 599 return true 600 } 601 } 602 return false 603 } 604 605 // To determine whether it contains Aggregate Func: Values() 606 func (qa *QueryAggregators) HasValuesFunc() bool { 607 for _, agg := range qa.MeasureOperations { 608 if agg.MeasureFunc == utils.Values { 609 return true 610 } 611 } 612 return false 613 } 614 615 func (qa *QueryAggregators) UsedByTimechart() bool { 616 return qa != nil && qa.TimeHistogram != nil && qa.TimeHistogram.Timechart != nil 617 } 618 619 func (qa *QueryAggregators) CanLimitBuckets() bool { 620 // We shouldn't limit the buckets if there's other things to do after the 621 // aggregation, like sorting, filtering, making new columns, etc. 622 return qa.Sort == nil && qa.Next == nil 623 } 624 625 // Init default query aggregators. 626 // By default, a descending sort is added 627 func InitDefaultQueryAggregations() *QueryAggregators { 628 qAggs := &QueryAggregators{ 629 EarlyExit: true, 630 Sort: &SortRequest{ 631 ColName: config.GetTimeStampKey(), 632 Ascending: false, 633 }, 634 } 635 return qAggs 636 } 637 638 func (qtype QueryType) String() string { 639 640 switch qtype { 641 case SegmentStatsCmd: 642 return "segstats-query" 643 case GroupByCmd: 644 return "aggs-query" 645 case RRCCmd: 646 return "logs-query" 647 default: 648 return "invalid" 649 } 650 }