github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/stats/segstats.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package stats
    18  
    19  import (
    20  	"math"
    21  	"strconv"
    22  
    23  	. "github.com/siglens/siglens/pkg/segment/structs"
    24  	. "github.com/siglens/siglens/pkg/segment/utils"
    25  	"github.com/siglens/siglens/pkg/utils"
    26  
    27  	"github.com/axiomhq/hyperloglog"
    28  	bbp "github.com/valyala/bytebufferpool"
    29  )
    30  
    31  func AddSegStatsNums(segstats map[string]*SegStats, cname string,
    32  	inNumType SS_IntUintFloatTypes, intVal int64, uintVal uint64,
    33  	fltVal float64, numstr string, bb *bbp.ByteBuffer, aggColUsage map[string]AggColUsageMode, hasValuesFunc bool) {
    34  
    35  	var stats *SegStats
    36  	var ok bool
    37  	stats, ok = segstats[cname]
    38  	if !ok {
    39  		numStats := &NumericStats{
    40  			Min: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
    41  				IntgrVal: math.MaxInt64},
    42  			Max: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
    43  				IntgrVal: math.MinInt64},
    44  			Sum: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
    45  				IntgrVal: 0},
    46  			Dtype: SS_DT_SIGNED_NUM,
    47  		}
    48  		stats = &SegStats{
    49  			IsNumeric: true,
    50  			Count:     0,
    51  			Hll:       hyperloglog.New16(),
    52  			NumStats:  numStats,
    53  			Records:   make([]*CValueEnclosure, 0),
    54  		}
    55  		segstats[cname] = stats
    56  	}
    57  
    58  	colUsage := NoEvalUsage
    59  	if aggColUsage != nil {
    60  		colUsagVal, exists := aggColUsage[cname]
    61  		if exists {
    62  			colUsage = colUsagVal
    63  		}
    64  	}
    65  
    66  	bb.Reset()
    67  	_, _ = bb.WriteString(numstr)
    68  	stats.Hll.Insert(bb.B)
    69  	processStats(stats, inNumType, intVal, uintVal, fltVal, colUsage, hasValuesFunc)
    70  }
    71  
    72  func AddSegStatsCount(segstats map[string]*SegStats, cname string,
    73  	count uint64) {
    74  
    75  	var stats *SegStats
    76  	var ok bool
    77  	stats, ok = segstats[cname]
    78  	if !ok {
    79  		numStats := &NumericStats{
    80  			Min: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
    81  				IntgrVal: math.MaxInt64},
    82  			Max: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
    83  				IntgrVal: math.MinInt64},
    84  			Sum: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
    85  				IntgrVal: 0},
    86  			Dtype: SS_DT_SIGNED_NUM,
    87  		}
    88  		stats = &SegStats{
    89  			IsNumeric: true,
    90  			Count:     0,
    91  			Hll:       hyperloglog.New16(),
    92  			NumStats:  numStats,
    93  		}
    94  		segstats[cname] = stats
    95  	}
    96  	stats.Count += count
    97  }
    98  
    99  func processStats(stats *SegStats, inNumType SS_IntUintFloatTypes, intVal int64,
   100  	uintVal uint64, fltVal float64, colUsage AggColUsageMode, hasValuesFunc bool) {
   101  
   102  	stats.Count++
   103  
   104  	var inIntgrVal int64
   105  	switch inNumType {
   106  	case SS_UINT8, SS_UINT16, SS_UINT32, SS_UINT64:
   107  		inIntgrVal = int64(uintVal)
   108  	case SS_INT8, SS_INT16, SS_INT32, SS_INT64:
   109  		inIntgrVal = intVal
   110  	}
   111  
   112  	if hasValuesFunc {
   113  		if stats.StringStats == nil {
   114  			stats.StringStats = &StringStats{
   115  				StrSet: make(map[string]struct{}, 0),
   116  			}
   117  		}
   118  	}
   119  
   120  	// we just use the Min stats for stored val comparison but apply the same
   121  	// logic to max and sum
   122  	switch inNumType {
   123  	case SS_FLOAT64:
   124  		if stats.NumStats.Min.Ntype == SS_DT_FLOAT {
   125  			// incoming float, stored is float, simple min
   126  			stats.NumStats.Min.FloatVal = math.Min(stats.NumStats.Min.FloatVal, fltVal)
   127  			stats.NumStats.Max.FloatVal = math.Max(stats.NumStats.Max.FloatVal, fltVal)
   128  			stats.NumStats.Sum.FloatVal = stats.NumStats.Sum.FloatVal + fltVal
   129  			stats.NumStats.Dtype = SS_DT_FLOAT
   130  
   131  			if hasValuesFunc {
   132  				stats.StringStats.StrSet[strconv.FormatFloat(fltVal, 'f', -1, 64)] = struct{}{}
   133  			}
   134  
   135  			if colUsage == BothUsage || colUsage == WithEvalUsage {
   136  				stats.Records = append(stats.Records, &CValueEnclosure{
   137  					Dtype: SS_DT_FLOAT,
   138  					CVal:  fltVal,
   139  				})
   140  			}
   141  		} else {
   142  			// incoming float, stored is non-float, upgrade it
   143  			stats.NumStats.Min.FloatVal = math.Min(float64(stats.NumStats.Min.IntgrVal), fltVal)
   144  			stats.NumStats.Min.Ntype = SS_DT_FLOAT
   145  
   146  			stats.NumStats.Max.FloatVal = math.Max(float64(stats.NumStats.Max.IntgrVal), fltVal)
   147  			stats.NumStats.Max.Ntype = SS_DT_FLOAT
   148  
   149  			stats.NumStats.Sum.FloatVal = float64(stats.NumStats.Sum.IntgrVal) + fltVal
   150  			stats.NumStats.Sum.Ntype = SS_DT_FLOAT
   151  			stats.NumStats.Dtype = SS_DT_FLOAT
   152  
   153  			if hasValuesFunc {
   154  				stats.StringStats.StrSet[strconv.FormatFloat(fltVal, 'f', -1, 64)] = struct{}{}
   155  			}
   156  
   157  			if colUsage == BothUsage || colUsage == WithEvalUsage {
   158  				stats.Records = append(stats.Records, &CValueEnclosure{
   159  					Dtype: SS_DT_FLOAT,
   160  					CVal:  fltVal,
   161  				})
   162  			}
   163  		}
   164  	// incoming is NON-float
   165  	default:
   166  		if stats.NumStats.Min.Ntype == SS_DT_FLOAT {
   167  			// incoming non-float, stored is float, cast it
   168  			stats.NumStats.Min.FloatVal = math.Min(stats.NumStats.Min.FloatVal, float64(inIntgrVal))
   169  			stats.NumStats.Max.FloatVal = math.Max(stats.NumStats.Max.FloatVal, float64(inIntgrVal))
   170  			stats.NumStats.Sum.FloatVal = stats.NumStats.Sum.FloatVal + float64(inIntgrVal)
   171  			stats.NumStats.Dtype = SS_DT_FLOAT
   172  
   173  			if hasValuesFunc {
   174  				stats.StringStats.StrSet[strconv.FormatInt(inIntgrVal, 10)] = struct{}{}
   175  			}
   176  
   177  			if colUsage == BothUsage || colUsage == WithEvalUsage {
   178  				stats.Records = append(stats.Records, &CValueEnclosure{
   179  					Dtype: SS_DT_FLOAT,
   180  					CVal:  float64(inIntgrVal),
   181  				})
   182  			}
   183  		} else {
   184  			// incoming non-float, stored is non-float, simple min
   185  			stats.NumStats.Min.IntgrVal = utils.MinInt64(stats.NumStats.Min.IntgrVal, inIntgrVal)
   186  			stats.NumStats.Max.IntgrVal = utils.MaxInt64(stats.NumStats.Max.IntgrVal, inIntgrVal)
   187  			stats.NumStats.Sum.IntgrVal = stats.NumStats.Sum.IntgrVal + inIntgrVal
   188  			stats.NumStats.Dtype = SS_DT_SIGNED_NUM
   189  
   190  			if hasValuesFunc {
   191  				stats.StringStats.StrSet[strconv.FormatInt(inIntgrVal, 10)] = struct{}{}
   192  			}
   193  
   194  			if colUsage == BothUsage || colUsage == WithEvalUsage {
   195  				stats.Records = append(stats.Records, &CValueEnclosure{
   196  					Dtype: SS_DT_SIGNED_NUM,
   197  					CVal:  inIntgrVal,
   198  				})
   199  			}
   200  		}
   201  	}
   202  
   203  }
   204  
   205  func AddSegStatsStr(segstats map[string]*SegStats, cname string, strVal string,
   206  	bb *bbp.ByteBuffer, aggColUsage map[string]AggColUsageMode, hasValuesFunc bool) {
   207  
   208  	var stats *SegStats
   209  	var ok bool
   210  	stats, ok = segstats[cname]
   211  	if !ok {
   212  		stats = &SegStats{
   213  			IsNumeric: false,
   214  			Count:     0,
   215  			Hll:       hyperloglog.New16(),
   216  			Records:   make([]*CValueEnclosure, 0)}
   217  
   218  		segstats[cname] = stats
   219  	}
   220  	stats.Count++
   221  
   222  	colUsage := NoEvalUsage
   223  	if aggColUsage != nil {
   224  		colUsagVal, exists := aggColUsage[cname]
   225  		if exists {
   226  			colUsage = colUsagVal
   227  		}
   228  	}
   229  
   230  	if colUsage == BothUsage || colUsage == WithEvalUsage {
   231  		stats.Records = append(stats.Records, &CValueEnclosure{
   232  			Dtype: SS_DT_STRING,
   233  			CVal:  strVal,
   234  		})
   235  	}
   236  
   237  	if hasValuesFunc {
   238  		if stats.StringStats == nil {
   239  			stats.StringStats = &StringStats{
   240  				StrSet: make(map[string]struct{}, 0),
   241  			}
   242  		}
   243  
   244  		stats.StringStats.StrSet[strVal] = struct{}{}
   245  	}
   246  
   247  	bb.Reset()
   248  	_, _ = bb.WriteString(strVal)
   249  	stats.Hll.Insert(bb.B)
   250  }
   251  
   252  // adds all elements of m2 to m1 and returns m1
   253  func MergeSegStats(m1, m2 map[string]*SegStats) map[string]*SegStats {
   254  	for k, v := range m2 {
   255  		other, ok := m1[k]
   256  		if !ok {
   257  			m1[k] = v
   258  			continue
   259  		}
   260  		m1[k].Merge(other)
   261  	}
   262  	return m1
   263  }