github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/stats/segstats.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package stats 18 19 import ( 20 "math" 21 "strconv" 22 23 . "github.com/siglens/siglens/pkg/segment/structs" 24 . "github.com/siglens/siglens/pkg/segment/utils" 25 "github.com/siglens/siglens/pkg/utils" 26 27 "github.com/axiomhq/hyperloglog" 28 bbp "github.com/valyala/bytebufferpool" 29 ) 30 31 func AddSegStatsNums(segstats map[string]*SegStats, cname string, 32 inNumType SS_IntUintFloatTypes, intVal int64, uintVal uint64, 33 fltVal float64, numstr string, bb *bbp.ByteBuffer, aggColUsage map[string]AggColUsageMode, hasValuesFunc bool) { 34 35 var stats *SegStats 36 var ok bool 37 stats, ok = segstats[cname] 38 if !ok { 39 numStats := &NumericStats{ 40 Min: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 41 IntgrVal: math.MaxInt64}, 42 Max: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 43 IntgrVal: math.MinInt64}, 44 Sum: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 45 IntgrVal: 0}, 46 Dtype: SS_DT_SIGNED_NUM, 47 } 48 stats = &SegStats{ 49 IsNumeric: true, 50 Count: 0, 51 Hll: hyperloglog.New16(), 52 NumStats: numStats, 53 Records: make([]*CValueEnclosure, 0), 54 } 55 segstats[cname] = stats 56 } 57 58 colUsage := NoEvalUsage 59 if aggColUsage != nil { 60 colUsagVal, exists := aggColUsage[cname] 61 if exists { 62 colUsage = colUsagVal 63 } 64 } 65 66 bb.Reset() 67 _, _ = bb.WriteString(numstr) 68 stats.Hll.Insert(bb.B) 69 processStats(stats, inNumType, intVal, uintVal, fltVal, colUsage, hasValuesFunc) 70 } 71 72 func AddSegStatsCount(segstats map[string]*SegStats, cname string, 73 count uint64) { 74 75 var stats *SegStats 76 var ok bool 77 stats, ok = segstats[cname] 78 if !ok { 79 numStats := &NumericStats{ 80 Min: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 81 IntgrVal: math.MaxInt64}, 82 Max: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 83 IntgrVal: math.MinInt64}, 84 Sum: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 85 IntgrVal: 0}, 86 Dtype: SS_DT_SIGNED_NUM, 87 } 88 stats = &SegStats{ 89 IsNumeric: true, 90 Count: 0, 91 Hll: hyperloglog.New16(), 92 NumStats: numStats, 93 } 94 segstats[cname] = stats 95 } 96 stats.Count += count 97 } 98 99 func processStats(stats *SegStats, inNumType SS_IntUintFloatTypes, intVal int64, 100 uintVal uint64, fltVal float64, colUsage AggColUsageMode, hasValuesFunc bool) { 101 102 stats.Count++ 103 104 var inIntgrVal int64 105 switch inNumType { 106 case SS_UINT8, SS_UINT16, SS_UINT32, SS_UINT64: 107 inIntgrVal = int64(uintVal) 108 case SS_INT8, SS_INT16, SS_INT32, SS_INT64: 109 inIntgrVal = intVal 110 } 111 112 if hasValuesFunc { 113 if stats.StringStats == nil { 114 stats.StringStats = &StringStats{ 115 StrSet: make(map[string]struct{}, 0), 116 } 117 } 118 } 119 120 // we just use the Min stats for stored val comparison but apply the same 121 // logic to max and sum 122 switch inNumType { 123 case SS_FLOAT64: 124 if stats.NumStats.Min.Ntype == SS_DT_FLOAT { 125 // incoming float, stored is float, simple min 126 stats.NumStats.Min.FloatVal = math.Min(stats.NumStats.Min.FloatVal, fltVal) 127 stats.NumStats.Max.FloatVal = math.Max(stats.NumStats.Max.FloatVal, fltVal) 128 stats.NumStats.Sum.FloatVal = stats.NumStats.Sum.FloatVal + fltVal 129 stats.NumStats.Dtype = SS_DT_FLOAT 130 131 if hasValuesFunc { 132 stats.StringStats.StrSet[strconv.FormatFloat(fltVal, 'f', -1, 64)] = struct{}{} 133 } 134 135 if colUsage == BothUsage || colUsage == WithEvalUsage { 136 stats.Records = append(stats.Records, &CValueEnclosure{ 137 Dtype: SS_DT_FLOAT, 138 CVal: fltVal, 139 }) 140 } 141 } else { 142 // incoming float, stored is non-float, upgrade it 143 stats.NumStats.Min.FloatVal = math.Min(float64(stats.NumStats.Min.IntgrVal), fltVal) 144 stats.NumStats.Min.Ntype = SS_DT_FLOAT 145 146 stats.NumStats.Max.FloatVal = math.Max(float64(stats.NumStats.Max.IntgrVal), fltVal) 147 stats.NumStats.Max.Ntype = SS_DT_FLOAT 148 149 stats.NumStats.Sum.FloatVal = float64(stats.NumStats.Sum.IntgrVal) + fltVal 150 stats.NumStats.Sum.Ntype = SS_DT_FLOAT 151 stats.NumStats.Dtype = SS_DT_FLOAT 152 153 if hasValuesFunc { 154 stats.StringStats.StrSet[strconv.FormatFloat(fltVal, 'f', -1, 64)] = struct{}{} 155 } 156 157 if colUsage == BothUsage || colUsage == WithEvalUsage { 158 stats.Records = append(stats.Records, &CValueEnclosure{ 159 Dtype: SS_DT_FLOAT, 160 CVal: fltVal, 161 }) 162 } 163 } 164 // incoming is NON-float 165 default: 166 if stats.NumStats.Min.Ntype == SS_DT_FLOAT { 167 // incoming non-float, stored is float, cast it 168 stats.NumStats.Min.FloatVal = math.Min(stats.NumStats.Min.FloatVal, float64(inIntgrVal)) 169 stats.NumStats.Max.FloatVal = math.Max(stats.NumStats.Max.FloatVal, float64(inIntgrVal)) 170 stats.NumStats.Sum.FloatVal = stats.NumStats.Sum.FloatVal + float64(inIntgrVal) 171 stats.NumStats.Dtype = SS_DT_FLOAT 172 173 if hasValuesFunc { 174 stats.StringStats.StrSet[strconv.FormatInt(inIntgrVal, 10)] = struct{}{} 175 } 176 177 if colUsage == BothUsage || colUsage == WithEvalUsage { 178 stats.Records = append(stats.Records, &CValueEnclosure{ 179 Dtype: SS_DT_FLOAT, 180 CVal: float64(inIntgrVal), 181 }) 182 } 183 } else { 184 // incoming non-float, stored is non-float, simple min 185 stats.NumStats.Min.IntgrVal = utils.MinInt64(stats.NumStats.Min.IntgrVal, inIntgrVal) 186 stats.NumStats.Max.IntgrVal = utils.MaxInt64(stats.NumStats.Max.IntgrVal, inIntgrVal) 187 stats.NumStats.Sum.IntgrVal = stats.NumStats.Sum.IntgrVal + inIntgrVal 188 stats.NumStats.Dtype = SS_DT_SIGNED_NUM 189 190 if hasValuesFunc { 191 stats.StringStats.StrSet[strconv.FormatInt(inIntgrVal, 10)] = struct{}{} 192 } 193 194 if colUsage == BothUsage || colUsage == WithEvalUsage { 195 stats.Records = append(stats.Records, &CValueEnclosure{ 196 Dtype: SS_DT_SIGNED_NUM, 197 CVal: inIntgrVal, 198 }) 199 } 200 } 201 } 202 203 } 204 205 func AddSegStatsStr(segstats map[string]*SegStats, cname string, strVal string, 206 bb *bbp.ByteBuffer, aggColUsage map[string]AggColUsageMode, hasValuesFunc bool) { 207 208 var stats *SegStats 209 var ok bool 210 stats, ok = segstats[cname] 211 if !ok { 212 stats = &SegStats{ 213 IsNumeric: false, 214 Count: 0, 215 Hll: hyperloglog.New16(), 216 Records: make([]*CValueEnclosure, 0)} 217 218 segstats[cname] = stats 219 } 220 stats.Count++ 221 222 colUsage := NoEvalUsage 223 if aggColUsage != nil { 224 colUsagVal, exists := aggColUsage[cname] 225 if exists { 226 colUsage = colUsagVal 227 } 228 } 229 230 if colUsage == BothUsage || colUsage == WithEvalUsage { 231 stats.Records = append(stats.Records, &CValueEnclosure{ 232 Dtype: SS_DT_STRING, 233 CVal: strVal, 234 }) 235 } 236 237 if hasValuesFunc { 238 if stats.StringStats == nil { 239 stats.StringStats = &StringStats{ 240 StrSet: make(map[string]struct{}, 0), 241 } 242 } 243 244 stats.StringStats.StrSet[strVal] = struct{}{} 245 } 246 247 bb.Reset() 248 _, _ = bb.WriteString(strVal) 249 stats.Hll.Insert(bb.B) 250 } 251 252 // adds all elements of m2 to m1 and returns m1 253 func MergeSegStats(m1, m2 map[string]*SegStats) map[string]*SegStats { 254 for k, v := range m2 { 255 other, ok := m1[k] 256 if !ok { 257 m1[k] = v 258 continue 259 } 260 m1[k].Merge(other) 261 } 262 return m1 263 }