github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/segread/segstatsreader.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package segread 18 19 import ( 20 "errors" 21 "fmt" 22 "math" 23 "os" 24 25 "github.com/axiomhq/hyperloglog" 26 "github.com/siglens/siglens/pkg/blob" 27 "github.com/siglens/siglens/pkg/segment/structs" 28 "github.com/siglens/siglens/pkg/segment/utils" 29 30 toputils "github.com/siglens/siglens/pkg/utils" 31 log "github.com/sirupsen/logrus" 32 ) 33 34 func ReadSegStats(segkey string, qid uint64) (map[string]*structs.SegStats, error) { 35 36 retVal := make(map[string]*structs.SegStats) 37 fName := fmt.Sprintf("%v.sst", segkey) 38 err := blob.DownloadSegmentBlob(fName, true) 39 if err != nil { 40 log.Errorf("qid=%d, ReadSegStats failed to download sst file. %+v, err=%v", qid, fName, err) 41 return retVal, err 42 } 43 44 fdata, err := os.ReadFile(fName) 45 if err != nil { 46 log.Errorf("qid=%d, ReadSegStats failed to read sst file. %+v, err=%v", qid, fName, err) 47 return retVal, err 48 } 49 50 defer func() { 51 err := blob.SetSegSetFilesAsNotInUse([]string{fName}) 52 if err != nil { 53 log.Errorf("qid=%d, ReadSegStats failed to close blob. %+v, err=%v", qid, fName, err) 54 } 55 }() 56 57 rIdx := uint32(0) 58 59 // version 60 rIdx++ 61 62 for rIdx < uint32(len(fdata)) { 63 64 // cnamelen 65 cnamelen := toputils.BytesToUint16LittleEndian(fdata[rIdx : rIdx+2]) 66 rIdx += 2 67 // actual cname 68 cname := string(fdata[rIdx : rIdx+uint32(cnamelen)]) 69 rIdx += uint32(cnamelen) 70 // sst len 71 sstlen := toputils.BytesToUint16LittleEndian(fdata[rIdx : rIdx+2]) 72 rIdx += 2 73 74 // actual sst 75 sst, err := readSingleSst(fdata[rIdx:rIdx+uint32(sstlen)], qid) 76 if err != nil { 77 log.Errorf("qid=%d, ReadSegStats: error reading sst for cname=%v, err=%v", 78 qid, cname, err) 79 return retVal, err 80 } 81 rIdx += uint32(sstlen) 82 retVal[cname] = sst 83 } 84 return retVal, nil 85 } 86 87 func readSingleSst(fdata []byte, qid uint64) (*structs.SegStats, error) { 88 89 sst := structs.SegStats{} 90 91 idx := uint16(0) 92 93 // read version, currently ignored 94 idx++ 95 96 // read isNumeric 97 sst.IsNumeric = toputils.BytesToBoolLittleEndian(fdata[idx : idx+1]) 98 idx++ 99 100 // read Count 101 sst.Count = toputils.BytesToUint64LittleEndian(fdata[idx : idx+8]) 102 idx += 8 103 104 hllSize := toputils.BytesToUint16LittleEndian(fdata[idx : idx+2]) 105 idx += 2 106 107 sst.Hll = hyperloglog.New16() 108 err := sst.Hll.UnmarshalBinary(fdata[idx : idx+hllSize]) 109 if err != nil { 110 log.Errorf("qid=%d, readSingleSst unmarshal sst err=%v", qid, err) 111 return nil, err 112 } 113 idx += hllSize 114 115 if !sst.IsNumeric { 116 return &sst, nil 117 } 118 119 sst.NumStats = &structs.NumericStats{} 120 // read Min Ntype 121 min := utils.NumTypeEnclosure{} 122 min.Ntype = utils.SS_DTYPE(fdata[idx : idx+1][0]) 123 idx += 1 124 if min.Ntype == utils.SS_DT_FLOAT { 125 min.FloatVal = toputils.BytesToFloat64LittleEndian(fdata[idx : idx+8]) 126 } else { 127 min.IntgrVal = toputils.BytesToInt64LittleEndian(fdata[idx : idx+8]) 128 } 129 sst.NumStats.Min = min 130 idx += 8 131 132 // read Max Ntype 133 max := utils.NumTypeEnclosure{} 134 max.Ntype = utils.SS_DTYPE(fdata[idx : idx+1][0]) 135 idx += 1 136 if max.Ntype == utils.SS_DT_FLOAT { 137 max.FloatVal = toputils.BytesToFloat64LittleEndian(fdata[idx : idx+8]) 138 } else { 139 max.IntgrVal = toputils.BytesToInt64LittleEndian(fdata[idx : idx+8]) 140 } 141 sst.NumStats.Max = max 142 idx += 8 143 144 // read Sum Ntype 145 sum := utils.NumTypeEnclosure{} 146 sum.Ntype = utils.SS_DTYPE(fdata[idx : idx+1][0]) 147 idx += 1 148 if sum.Ntype == utils.SS_DT_FLOAT { 149 sum.FloatVal = toputils.BytesToFloat64LittleEndian(fdata[idx : idx+8]) 150 } else { 151 sum.IntgrVal = toputils.BytesToInt64LittleEndian(fdata[idx : idx+8]) 152 } 153 sst.NumStats.Sum = sum 154 155 return &sst, nil 156 } 157 158 func GetSegMin(runningSegStat *structs.SegStats, 159 currSegStat *structs.SegStats) (*utils.NumTypeEnclosure, error) { 160 161 rSst := utils.NumTypeEnclosure{ 162 Ntype: utils.SS_DT_SIGNED_NUM, 163 IntgrVal: math.MaxInt64, 164 } 165 166 if currSegStat == nil { 167 log.Errorf("GetSegMin: currSegStat was of nil") 168 return &rSst, errors.New("GetSegMin: currSegStat was of nil") 169 } 170 171 if !currSegStat.IsNumeric { 172 log.Errorf("GetSegMin: current segStats is non-numeric") 173 return &rSst, errors.New("GetSegMin: current segStat is non-numeric") 174 } 175 176 // if this is the first segment, then running will be nil, and we return the first seg's stats 177 if runningSegStat == nil { 178 switch currSegStat.NumStats.Min.Ntype { 179 case utils.SS_DT_FLOAT: 180 rSst.FloatVal = currSegStat.NumStats.Min.FloatVal 181 rSst.Ntype = utils.SS_DT_FLOAT 182 default: 183 rSst.IntgrVal = currSegStat.NumStats.Min.IntgrVal 184 } 185 return &rSst, nil 186 } 187 188 switch currSegStat.NumStats.Min.Ntype { 189 case utils.SS_DT_FLOAT: 190 if runningSegStat.NumStats.Min.Ntype == utils.SS_DT_FLOAT { 191 runningSegStat.NumStats.Min.FloatVal = math.Min(runningSegStat.NumStats.Min.FloatVal, currSegStat.NumStats.Min.FloatVal) 192 rSst.FloatVal = runningSegStat.NumStats.Min.FloatVal 193 rSst.Ntype = utils.SS_DT_FLOAT 194 } else { 195 runningSegStat.NumStats.Min.FloatVal = math.Min(float64(runningSegStat.NumStats.Min.IntgrVal), currSegStat.NumStats.Min.FloatVal) 196 runningSegStat.NumStats.Min.Ntype = utils.SS_DT_FLOAT 197 rSst.FloatVal = runningSegStat.NumStats.Min.FloatVal 198 rSst.Ntype = utils.SS_DT_FLOAT 199 } 200 default: 201 if runningSegStat.NumStats.Min.Ntype == utils.SS_DT_FLOAT { 202 runningSegStat.NumStats.Min.FloatVal = math.Min(runningSegStat.NumStats.Min.FloatVal, float64(currSegStat.NumStats.Min.IntgrVal)) 203 rSst.FloatVal = runningSegStat.NumStats.Min.FloatVal 204 rSst.Ntype = utils.SS_DT_FLOAT 205 } else { 206 runningSegStat.NumStats.Min.IntgrVal = toputils.MinInt64(runningSegStat.NumStats.Min.IntgrVal, currSegStat.NumStats.Min.IntgrVal) 207 rSst.IntgrVal = runningSegStat.NumStats.Min.IntgrVal 208 } 209 } 210 return &rSst, nil 211 } 212 213 func GetSegMax(runningSegStat *structs.SegStats, 214 currSegStat *structs.SegStats) (*utils.NumTypeEnclosure, error) { 215 216 // start with lower resolution and upgrade as necessary 217 rSst := utils.NumTypeEnclosure{ 218 Ntype: utils.SS_DT_SIGNED_NUM, 219 IntgrVal: math.MinInt64, 220 } 221 222 if currSegStat == nil { 223 log.Errorf("GetSegMax: currSegStat was of nil") 224 return &rSst, errors.New("GetSegMax: currSegStat was of nil") 225 } 226 227 if !currSegStat.IsNumeric { 228 log.Errorf("GetSegMax: current segStats is non-numeric") 229 return &rSst, errors.New("GetSegMax: current segStat is non-numeric") 230 } 231 232 // if this is the first segment, then running will be nil, and we return the first seg's stats 233 if runningSegStat == nil { 234 switch currSegStat.NumStats.Max.Ntype { 235 case utils.SS_DT_FLOAT: 236 rSst.FloatVal = currSegStat.NumStats.Max.FloatVal 237 rSst.Ntype = utils.SS_DT_FLOAT 238 default: 239 rSst.IntgrVal = currSegStat.NumStats.Max.IntgrVal 240 } 241 return &rSst, nil 242 } 243 244 switch currSegStat.NumStats.Max.Ntype { 245 case utils.SS_DT_FLOAT: 246 if runningSegStat.NumStats.Max.Ntype == utils.SS_DT_FLOAT { 247 runningSegStat.NumStats.Max.FloatVal = math.Max(runningSegStat.NumStats.Max.FloatVal, currSegStat.NumStats.Max.FloatVal) 248 rSst.FloatVal = runningSegStat.NumStats.Max.FloatVal 249 rSst.Ntype = utils.SS_DT_FLOAT 250 } else { 251 runningSegStat.NumStats.Max.FloatVal = math.Max(float64(runningSegStat.NumStats.Max.IntgrVal), currSegStat.NumStats.Max.FloatVal) 252 rSst.FloatVal = runningSegStat.NumStats.Max.FloatVal 253 rSst.Ntype = utils.SS_DT_FLOAT 254 } 255 default: 256 if runningSegStat.NumStats.Max.Ntype == utils.SS_DT_FLOAT { 257 runningSegStat.NumStats.Max.FloatVal = math.Max(runningSegStat.NumStats.Max.FloatVal, float64(currSegStat.NumStats.Max.IntgrVal)) 258 rSst.FloatVal = runningSegStat.NumStats.Max.FloatVal 259 rSst.Ntype = utils.SS_DT_FLOAT 260 } else { 261 runningSegStat.NumStats.Max.IntgrVal = toputils.MaxInt64(runningSegStat.NumStats.Max.IntgrVal, currSegStat.NumStats.Max.IntgrVal) 262 rSst.IntgrVal = runningSegStat.NumStats.Max.IntgrVal 263 } 264 } 265 return &rSst, nil 266 } 267 268 func GetSegRange(runningSegStat *structs.SegStats, 269 currSegStat *structs.SegStats) (*utils.NumTypeEnclosure, error) { 270 271 // start with lower resolution and upgrade as necessary 272 rSst := utils.NumTypeEnclosure{ 273 Ntype: utils.SS_DT_SIGNED_NUM, 274 IntgrVal: 0, 275 } 276 if currSegStat == nil { 277 log.Errorf("GetSegRange: currSegStat was of nil") 278 return &rSst, errors.New("GetSegRange: currSegStat was of nil") 279 } 280 281 if !currSegStat.IsNumeric { 282 log.Errorf("GetSegRange: current segStats is non-numeric") 283 return &rSst, errors.New("GetSegRange: current segStat is non-numeric") 284 } 285 286 if currSegStat.NumStats.Max.Ntype != currSegStat.NumStats.Min.Ntype { 287 return &rSst, nil 288 } 289 290 // if this is the first segment, then running will be nil, and we return the first seg's stats 291 if runningSegStat == nil { 292 switch currSegStat.NumStats.Max.Ntype { 293 case utils.SS_DT_FLOAT: 294 rSst.FloatVal = currSegStat.NumStats.Max.FloatVal - currSegStat.NumStats.Min.FloatVal 295 rSst.Ntype = utils.SS_DT_FLOAT 296 default: 297 rSst.IntgrVal = currSegStat.NumStats.Max.IntgrVal - currSegStat.NumStats.Min.IntgrVal 298 } 299 return &rSst, nil 300 } 301 302 switch currSegStat.NumStats.Max.Ntype { 303 case utils.SS_DT_FLOAT: 304 if runningSegStat.NumStats.Max.Ntype == utils.SS_DT_FLOAT && runningSegStat.NumStats.Min.Ntype == utils.SS_DT_FLOAT { 305 runningSegStat.NumStats.Max.FloatVal = math.Max(runningSegStat.NumStats.Max.FloatVal, currSegStat.NumStats.Max.FloatVal) 306 runningSegStat.NumStats.Min.FloatVal = math.Min(runningSegStat.NumStats.Min.FloatVal, currSegStat.NumStats.Min.FloatVal) 307 rSst.FloatVal = runningSegStat.NumStats.Max.FloatVal - runningSegStat.NumStats.Min.FloatVal 308 rSst.Ntype = utils.SS_DT_FLOAT 309 } else { 310 runningSegStat.NumStats.Max.FloatVal = math.Max(float64(runningSegStat.NumStats.Max.IntgrVal), currSegStat.NumStats.Max.FloatVal) 311 runningSegStat.NumStats.Min.FloatVal = math.Min(float64(runningSegStat.NumStats.Min.IntgrVal), currSegStat.NumStats.Min.FloatVal) 312 rSst.FloatVal = runningSegStat.NumStats.Max.FloatVal - runningSegStat.NumStats.Min.FloatVal 313 rSst.Ntype = utils.SS_DT_FLOAT 314 } 315 default: 316 if runningSegStat.NumStats.Max.Ntype == utils.SS_DT_FLOAT && runningSegStat.NumStats.Min.Ntype == utils.SS_DT_FLOAT { 317 runningSegStat.NumStats.Max.FloatVal = math.Max(runningSegStat.NumStats.Max.FloatVal, float64(currSegStat.NumStats.Max.IntgrVal)) 318 runningSegStat.NumStats.Min.FloatVal = math.Min(runningSegStat.NumStats.Min.FloatVal, float64(currSegStat.NumStats.Min.IntgrVal)) 319 rSst.FloatVal = runningSegStat.NumStats.Max.FloatVal - runningSegStat.NumStats.Min.FloatVal 320 rSst.Ntype = utils.SS_DT_FLOAT 321 } else { 322 runningSegStat.NumStats.Max.IntgrVal = toputils.MaxInt64(runningSegStat.NumStats.Max.IntgrVal, currSegStat.NumStats.Max.IntgrVal) 323 runningSegStat.NumStats.Min.IntgrVal = toputils.MinInt64(runningSegStat.NumStats.Min.IntgrVal, currSegStat.NumStats.Min.IntgrVal) 324 rSst.IntgrVal = runningSegStat.NumStats.Max.IntgrVal - runningSegStat.NumStats.Min.IntgrVal 325 } 326 } 327 328 return &rSst, nil 329 } 330 331 func GetSegSum(runningSegStat *structs.SegStats, 332 currSegStat *structs.SegStats) (*utils.NumTypeEnclosure, error) { 333 334 // start with lower resolution and upgrade as necessary 335 rSst := utils.NumTypeEnclosure{ 336 Ntype: utils.SS_DT_SIGNED_NUM, 337 IntgrVal: 0, 338 } 339 if currSegStat == nil { 340 log.Errorf("GetSegSum: currSegStat was of nil") 341 return &rSst, errors.New("GetSegSum: currSegStat was of nil") 342 } 343 344 if !currSegStat.IsNumeric { 345 log.Errorf("GetSegSum: current segStats is non-numeric") 346 return &rSst, errors.New("GetSegSum: current segStat is non-numeric") 347 } 348 349 // if this is the first segment, then running will be nil, and we return the first seg's stats 350 if runningSegStat == nil { 351 switch currSegStat.NumStats.Sum.Ntype { 352 case utils.SS_DT_FLOAT: 353 rSst.FloatVal = currSegStat.NumStats.Sum.FloatVal 354 rSst.Ntype = utils.SS_DT_FLOAT 355 default: 356 rSst.IntgrVal = currSegStat.NumStats.Sum.IntgrVal 357 } 358 return &rSst, nil 359 } 360 361 switch currSegStat.NumStats.Sum.Ntype { 362 case utils.SS_DT_FLOAT: 363 if runningSegStat.NumStats.Sum.Ntype == utils.SS_DT_FLOAT { 364 runningSegStat.NumStats.Sum.FloatVal = runningSegStat.NumStats.Sum.FloatVal + currSegStat.NumStats.Sum.FloatVal 365 rSst.FloatVal = runningSegStat.NumStats.Sum.FloatVal 366 rSst.Ntype = utils.SS_DT_FLOAT 367 } else { 368 runningSegStat.NumStats.Sum.FloatVal = float64(runningSegStat.NumStats.Sum.IntgrVal) + currSegStat.NumStats.Sum.FloatVal 369 rSst.FloatVal = runningSegStat.NumStats.Sum.FloatVal 370 rSst.Ntype = utils.SS_DT_FLOAT 371 } 372 default: 373 if runningSegStat.NumStats.Sum.Ntype == utils.SS_DT_FLOAT { 374 runningSegStat.NumStats.Sum.FloatVal = runningSegStat.NumStats.Sum.FloatVal + float64(currSegStat.NumStats.Sum.IntgrVal) 375 rSst.FloatVal = runningSegStat.NumStats.Sum.FloatVal 376 rSst.Ntype = utils.SS_DT_FLOAT 377 } else { 378 runningSegStat.NumStats.Sum.IntgrVal = runningSegStat.NumStats.Sum.IntgrVal + currSegStat.NumStats.Sum.IntgrVal 379 rSst.IntgrVal = runningSegStat.NumStats.Sum.IntgrVal 380 } 381 } 382 383 return &rSst, nil 384 } 385 386 func GetSegCardinality(runningSegStat *structs.SegStats, 387 currSegStat *structs.SegStats) (*utils.NumTypeEnclosure, error) { 388 389 res := utils.NumTypeEnclosure{ 390 Ntype: utils.SS_DT_SIGNED_NUM, 391 IntgrVal: 0, 392 } 393 394 if currSegStat == nil { 395 log.Errorf("GetSegCardinality: currSegStat was of nil") 396 return &res, errors.New("GetSegCardinality: currSegStat was of nil") 397 } 398 399 // if this is the first segment, then running will be nil, and we return the first seg's stats 400 if runningSegStat == nil { 401 res.IntgrVal = int64(currSegStat.Hll.Estimate()) 402 return &res, nil 403 } 404 405 err := runningSegStat.Hll.Merge(currSegStat.Hll) 406 if err != nil { 407 log.Errorf("GetSegCardinality: error in Hll.Merge %+v", err) 408 return nil, err 409 } 410 res.IntgrVal = int64(runningSegStat.Hll.Estimate()) 411 412 return &res, nil 413 } 414 415 func GetSegCount(runningSegStat *structs.SegStats, 416 currSegStat *structs.SegStats) (*utils.NumTypeEnclosure, error) { 417 418 rSst := utils.NumTypeEnclosure{ 419 Ntype: utils.SS_DT_SIGNED_NUM, 420 IntgrVal: int64(0), 421 } 422 if currSegStat == nil { 423 log.Errorf("GetSegCount: currSegStat was of nil") 424 return &rSst, errors.New("GetSegCount: currSegStat was of nil") 425 } 426 427 if runningSegStat == nil { 428 rSst.IntgrVal = int64(currSegStat.Count) 429 return &rSst, nil 430 } 431 432 runningSegStat.Count = runningSegStat.Count + currSegStat.Count 433 rSst.IntgrVal = int64(runningSegStat.Count) 434 435 return &rSst, nil 436 } 437 438 func GetSegAvg(runningSegStat *structs.SegStats, 439 currSegStat *structs.SegStats) (*utils.NumTypeEnclosure, error) { 440 441 // start with lower resolution and upgrade as necessary 442 rSst := utils.NumTypeEnclosure{ 443 Ntype: utils.SS_DT_SIGNED_NUM, 444 IntgrVal: 0, 445 } 446 if currSegStat == nil { 447 log.Errorf("GetSegAvg: currSegStat was of nil") 448 return &rSst, errors.New("GetSegAvg: currSegStat was of nil") 449 } 450 451 if !currSegStat.IsNumeric { 452 log.Errorf("GetSegAvg: current segStats is non-numeric") 453 return &rSst, errors.New("GetSegAvg: current segStat is non-numeric") 454 } 455 456 // if this is the first segment, then running will be nil, and we return the first seg's stats 457 if runningSegStat == nil { 458 switch currSegStat.NumStats.Sum.Ntype { 459 case utils.SS_DT_FLOAT: 460 rSst.FloatVal = currSegStat.NumStats.Sum.FloatVal / float64(currSegStat.Count) 461 rSst.Ntype = utils.SS_DT_FLOAT 462 default: 463 rSst.FloatVal = float64(currSegStat.NumStats.Sum.IntgrVal) / float64(currSegStat.Count) 464 rSst.Ntype = utils.SS_DT_FLOAT 465 } 466 return &rSst, nil 467 } 468 runningSegStat.Count = runningSegStat.Count + currSegStat.Count 469 470 switch currSegStat.NumStats.Sum.Ntype { 471 case utils.SS_DT_FLOAT: 472 if runningSegStat.NumStats.Sum.Ntype == utils.SS_DT_FLOAT { 473 runningSegStat.NumStats.Sum.FloatVal = runningSegStat.NumStats.Sum.FloatVal + currSegStat.NumStats.Sum.FloatVal 474 rSst.FloatVal = runningSegStat.NumStats.Sum.FloatVal / float64(runningSegStat.Count) 475 rSst.Ntype = utils.SS_DT_FLOAT 476 } else { 477 runningSegStat.NumStats.Sum.FloatVal = float64(runningSegStat.NumStats.Sum.IntgrVal) + currSegStat.NumStats.Sum.FloatVal 478 rSst.FloatVal = runningSegStat.NumStats.Sum.FloatVal / float64(runningSegStat.Count) 479 rSst.Ntype = utils.SS_DT_FLOAT 480 } 481 default: 482 if runningSegStat.NumStats.Sum.Ntype == utils.SS_DT_FLOAT { 483 runningSegStat.NumStats.Sum.FloatVal = runningSegStat.NumStats.Sum.FloatVal + float64(currSegStat.NumStats.Sum.IntgrVal) 484 rSst.FloatVal = runningSegStat.NumStats.Sum.FloatVal / float64(runningSegStat.Count) 485 rSst.Ntype = utils.SS_DT_FLOAT 486 } else { 487 runningSegStat.NumStats.Sum.FloatVal = float64(runningSegStat.NumStats.Sum.IntgrVal + currSegStat.NumStats.Sum.IntgrVal) 488 runningSegStat.NumStats.Sum.Ntype = utils.SS_DT_FLOAT 489 rSst.FloatVal = runningSegStat.NumStats.Sum.FloatVal / float64(runningSegStat.Count) 490 rSst.Ntype = utils.SS_DT_FLOAT 491 } 492 } 493 494 return &rSst, nil 495 }