github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/packer.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package writer 18 19 import ( 20 "encoding/json" 21 "errors" 22 "fmt" 23 "math" 24 "math/rand" 25 "os" 26 "strconv" 27 "sync" 28 "time" 29 30 "github.com/axiomhq/hyperloglog" 31 "github.com/bits-and-blooms/bloom/v3" 32 jp "github.com/buger/jsonparser" 33 "github.com/cespare/xxhash" 34 "github.com/siglens/siglens/pkg/blob/ssutils" 35 "github.com/siglens/siglens/pkg/config" 36 "github.com/siglens/siglens/pkg/segment/pqmr" 37 . "github.com/siglens/siglens/pkg/segment/structs" 38 . "github.com/siglens/siglens/pkg/segment/utils" 39 segutils "github.com/siglens/siglens/pkg/segment/utils" 40 "github.com/siglens/siglens/pkg/segment/writer/metrics" 41 "github.com/siglens/siglens/pkg/segment/writer/stats" 42 "github.com/siglens/siglens/pkg/utils" 43 log "github.com/sirupsen/logrus" 44 bbp "github.com/valyala/bytebufferpool" 45 ) 46 47 var wipCardLimit uint16 = 1001 48 49 const FPARM_INT64 = int64(0) 50 const FPARM_UINT64 = uint64(0) 51 const FPARM_FLOAT64 = float64(0) 52 53 /* 54 Each column stored in its own columnar file 55 Each column file format: 56 [ValType-1 1B] [OptionalStringVal-Len-1 2B] [ActualValue-1] 57 [ValType-2 1B] [OptionalStringVal-Len-2 2B] [ActualValue-2] 58 59 60 This function should not be called by itself, must be called via locks 61 62 This function assumes that the record_json has been flattened 63 64 foundColsInRecord is a map[string]bool of all columns in the WIPBlock. New columns will be added to this map 65 The values of this map will be set to false before returning for subsequent calls. This lets us re-use the same map across WIPBlock 66 67 returns : 68 1) Max index amongst the columns 69 3) bool if this record matched the column conditions in PQColTracker 70 3) error 71 */ 72 func (ss *SegStore) EncodeColumns(rawData []byte, recordTime uint64, tsKey *string, 73 signalType segutils.SIGNAL_TYPE) (uint32, bool, error) { 74 75 var maxIdx uint32 = 0 76 var matchedCol = false 77 78 ss.encodeTime(recordTime, tsKey) 79 var err error 80 maxIdx, matchedCol, err = ss.encodeRawJsonObject("", rawData, maxIdx, tsKey, matchedCol, signalType) 81 if err != nil { 82 log.Errorf("Failed to encode json object! err: %+v", err) 83 return maxIdx, matchedCol, err 84 } 85 86 for colName, foundCol := range ss.wipBlock.columnsInBlock { 87 if foundCol { 88 ss.wipBlock.columnsInBlock[colName] = false 89 continue 90 } 91 colWip, ok := ss.wipBlock.colWips[colName] 92 if !ok { 93 log.Errorf("EncodeColumns: tried to add a backfill for a column with no colWip! %v. This should not happen", colName) 94 continue 95 } 96 colWip.cstartidx = colWip.cbufidx 97 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BACKFILL[:]) 98 colWip.cbufidx += 1 99 // also do backfill dictEnc for this recnum 100 checkAddDictEnc(colWip, VALTYPE_ENC_BACKFILL[:], ss.wipBlock.blockSummary.RecCount) 101 } 102 103 return maxIdx, matchedCol, nil 104 } 105 106 func (ss *SegStore) encodeRawJsonObject(currKey string, data []byte, maxIdx uint32, tsKey *string, 107 matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) { 108 handler := func(key []byte, value []byte, valueType jp.ValueType, off int) error { 109 // Maybe push some state onto a stack here? 110 var finalKey string 111 var err error 112 if currKey == "" { 113 finalKey = string(key) 114 } else { 115 finalKey = fmt.Sprintf("%s.%s", currKey, key) 116 } 117 switch valueType { 118 case jp.Object: 119 maxIdx, matchedCol, err = ss.encodeRawJsonObject(finalKey, value, maxIdx, tsKey, matchedCol, signalType) 120 if err != nil { 121 return fmt.Errorf("encodeRawJsonObject: obj currKey: %v, err: %v", currKey, err) 122 } 123 case jp.Array: 124 if signalType == SIGNAL_JAEGER_TRACES { 125 126 maxIdx, matchedCol, err = ss.encodeRawJsonArray(finalKey, value, maxIdx, tsKey, matchedCol, signalType) 127 } else { 128 maxIdx, matchedCol, err = ss.encodeNonJaegerRawJsonArray(finalKey, value, maxIdx, tsKey, matchedCol, signalType) 129 } 130 if err != nil { 131 return fmt.Errorf("encodeRawJsonObject: arr currKey: %v, err: %v", currKey, err) 132 } 133 case jp.String: 134 strVal, err := jp.ParseString(value) 135 if err != nil { 136 return fmt.Errorf("encodeRawJsonObject: str currKey: %v, err: %v", currKey, err) 137 } 138 maxIdx, matchedCol, err = ss.encodeSingleString(finalKey, strVal, maxIdx, tsKey, matchedCol) 139 if err != nil { 140 return fmt.Errorf("encodeRawJsonObject: singstr currKey: %v, err: %v", currKey, err) 141 } 142 case jp.Number: 143 numVal, err := jp.ParseInt(value) 144 if err != nil { 145 fltVal, err := jp.ParseFloat(value) 146 if err != nil { 147 return fmt.Errorf("encodeRawJsonObject: flt currKey: %v, err: %v", currKey, err) 148 } 149 maxIdx, matchedCol, _ = ss.encodeSingleNumber(finalKey, fltVal, maxIdx, tsKey, matchedCol) 150 return nil 151 } 152 maxIdx, matchedCol, _ = ss.encodeSingleNumber(finalKey, numVal, maxIdx, tsKey, matchedCol) 153 case jp.Boolean: 154 boolVal, err := jp.ParseBoolean(value) 155 if err != nil { 156 return fmt.Errorf("encodeRawJsonObject: bool currKey: %v, err: %v", currKey, err) 157 } 158 maxIdx, matchedCol, err = ss.encodeSingleBool(finalKey, boolVal, maxIdx, tsKey, matchedCol) 159 if err != nil { 160 return fmt.Errorf("encodeRawJsonObject: singbool currKey: %v, err: %v", currKey, err) 161 } 162 case jp.Null: 163 maxIdx, matchedCol, err = ss.encodeSingleNull(finalKey, maxIdx, tsKey, matchedCol) 164 if err != nil { 165 return fmt.Errorf("encodeRawJsonObject: singnull currKey: %v, err: %v", currKey, err) 166 } 167 default: 168 return fmt.Errorf("currKey: %v, received unknown type of %+s", currKey, valueType) 169 } 170 return nil 171 } 172 err := jp.ObjectEach(data, handler) 173 return maxIdx, matchedCol, err 174 } 175 176 func (ss *SegStore) encodeRawJsonArray(currKey string, data []byte, maxIdx uint32, tsKey *string, 177 matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) { 178 var encErr error 179 if signalType == SIGNAL_JAEGER_TRACES { 180 if currKey != "references" && currKey != "logs" { 181 maxIdx, matchedCol, encErr = ss.encodeSingleDictArray(currKey, data, maxIdx, tsKey, matchedCol, signalType) 182 if encErr != nil { 183 log.Infof("encodeRawJsonArray error %s", encErr) 184 return maxIdx, matchedCol, encErr 185 } 186 } else { 187 maxIdx, matchedCol, encErr = ss.encodeSingleRawBuffer(currKey, data, maxIdx, tsKey, matchedCol, signalType) 188 if encErr != nil { 189 return maxIdx, matchedCol, encErr 190 } 191 } 192 } 193 return maxIdx, matchedCol, nil 194 } 195 196 func (ss *SegStore) encodeNonJaegerRawJsonArray(currKey string, data []byte, maxIdx uint32, tsKey *string, 197 matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) { 198 i := 0 199 var finalErr error 200 _, aErr := jp.ArrayEach(data, func(value []byte, valueType jp.ValueType, offset int, err error) { 201 var finalKey string 202 var encErr error 203 if currKey == "" { 204 finalKey = fmt.Sprintf("%d", i) 205 } else { 206 finalKey = fmt.Sprintf("%s.%d", currKey, i) 207 } 208 i++ 209 switch valueType { 210 case jp.Object: 211 maxIdx, matchedCol, encErr = ss.encodeRawJsonObject(finalKey, value, maxIdx, tsKey, matchedCol, signalType) 212 if encErr != nil { 213 finalErr = encErr 214 return 215 } 216 case jp.Array: 217 maxIdx, matchedCol, encErr = ss.encodeNonJaegerRawJsonArray(finalKey, value, maxIdx, tsKey, matchedCol, signalType) 218 if encErr != nil { 219 finalErr = encErr 220 return 221 } 222 case jp.String: 223 strVal, encErr := jp.ParseString(value) 224 if encErr != nil { 225 finalErr = encErr 226 return 227 } 228 maxIdx, matchedCol, encErr = ss.encodeSingleString(finalKey, strVal, maxIdx, tsKey, matchedCol) 229 if encErr != nil { 230 finalErr = encErr 231 return 232 } 233 case jp.Number: 234 numVal, encErr := jp.ParseInt(value) 235 if encErr != nil { 236 fltVal, encErr := jp.ParseFloat(value) 237 if encErr != nil { 238 finalErr = encErr 239 return 240 } 241 maxIdx, matchedCol, _ = ss.encodeSingleNumber(finalKey, fltVal, maxIdx, tsKey, matchedCol) 242 return 243 } 244 maxIdx, matchedCol, _ = ss.encodeSingleNumber(finalKey, numVal, maxIdx, tsKey, matchedCol) 245 case jp.Boolean: 246 boolVal, encErr := jp.ParseBoolean(value) 247 if encErr != nil { 248 finalErr = encErr 249 return 250 } 251 maxIdx, matchedCol, encErr = ss.encodeSingleBool(finalKey, boolVal, maxIdx, tsKey, matchedCol) 252 if encErr != nil { 253 finalErr = encErr 254 return 255 } 256 case jp.Null: 257 maxIdx, matchedCol, encErr = ss.encodeSingleNull(finalKey, maxIdx, tsKey, matchedCol) 258 if encErr != nil { 259 finalErr = encErr 260 return 261 } 262 default: 263 finalErr = fmt.Errorf("received unknown type of %+s", valueType) 264 return 265 } 266 }) 267 if aErr != nil { 268 finalErr = aErr 269 } 270 return maxIdx, matchedCol, finalErr 271 } 272 273 func (ss *SegStore) encodeSingleDictArray(arraykey string, data []byte, maxIdx uint32, 274 tsKey *string, matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) { 275 if arraykey == *tsKey { 276 return maxIdx, matchedCol, nil 277 } 278 var finalErr error 279 var colWip *ColWip 280 colWip, _, matchedCol = ss.initAndBackFillColumn(arraykey, data, matchedCol) 281 colBlooms := ss.wipBlock.columnBlooms 282 var bi *BloomIndex 283 var ok bool 284 bi, ok = colBlooms[arraykey] 285 if !ok { 286 bi = &BloomIndex{} 287 bi.uniqueWordCount = 0 288 bCount := getBlockBloomSize(bi) 289 bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY) 290 colBlooms[arraykey] = bi 291 } 292 s := colWip.cbufidx 293 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_DICT_ARRAY[:]) 294 colWip.cbufidx += 1 295 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(0)) //placeholder for encoding length of array 296 colWip.cbufidx += 2 297 _, aErr := jp.ArrayEach(data, func(value []byte, valueType jp.ValueType, offset int, err error) { 298 switch valueType { 299 case jp.Object: 300 keyName, keyType, keyVal, err := getNestedDictEntries(value) 301 if err != nil { 302 log.Errorf("getNestedDictEntries error %+v", err) 303 return 304 } 305 if keyName == "" || keyType == "" || keyVal == "" { 306 err = fmt.Errorf("encodeSingleDictArray: Jaeger tags array should have key/value/type values") 307 log.Error(err) 308 return 309 } 310 //encode and copy keyName 311 n := uint16(len(keyName)) 312 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n)) 313 colWip.cbufidx += 2 314 copy(colWip.cbuf[colWip.cbufidx:], keyName) 315 colWip.cbufidx += uint32(n) 316 //check key type 317 //based on that encode key value 318 switch keyType { 319 case "string": 320 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_SMALL_STRING[:]) 321 colWip.cbufidx += 1 322 n := uint16(len(keyVal)) 323 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n)) 324 colWip.cbufidx += 2 325 copy(colWip.cbuf[colWip.cbufidx:], keyVal) 326 colWip.cbufidx += uint32(n) 327 case "bool": 328 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BOOL[:]) 329 colWip.cbufidx += 1 330 n := uint16(len(keyVal)) 331 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n)) 332 colWip.cbufidx += 2 333 copy(colWip.cbuf[colWip.cbufidx:], keyVal) 334 colWip.cbufidx += uint32(n) 335 case "int64": 336 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_INT64[:]) 337 colWip.cbufidx += 1 338 n := uint16(len(keyVal)) 339 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n)) 340 colWip.cbufidx += 2 341 copy(colWip.cbuf[colWip.cbufidx:], keyVal) 342 colWip.cbufidx += uint32(n) 343 case "float64": 344 copy(colWip.cbuf[colWip.cbufidx:], segutils.VALTYPE_ENC_FLOAT64[:]) 345 colWip.cbufidx += 1 346 n := uint16(len(keyVal)) 347 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n)) 348 colWip.cbufidx += 2 349 copy(colWip.cbuf[colWip.cbufidx:], keyVal) 350 colWip.cbufidx += uint32(n) 351 default: 352 finalErr = fmt.Errorf("encodeSingleDictArray : received unknown key %+s", keyType) 353 } 354 if bi != nil { 355 bi.uniqueWordCount += addToBlockBloom(bi.Bf, []byte(keyName)) 356 bi.uniqueWordCount += addToBlockBloom(bi.Bf, []byte(keyVal)) 357 } 358 stats.AddSegStatsStr(ss.AllSst, keyName, keyVal, ss.wipBlock.bb, nil, false) 359 if colWip.cbufidx > maxIdx { 360 maxIdx = colWip.cbufidx 361 } 362 default: 363 finalErr = fmt.Errorf("encodeSingleDictArray : received unknown type of %+s", valueType) 364 return 365 } 366 }) 367 copy(colWip.cbuf[s+1:], utils.Uint16ToBytesLittleEndian(uint16(colWip.cbufidx-s-3))) 368 if aErr != nil { 369 finalErr = aErr 370 } 371 return maxIdx, matchedCol, finalErr 372 } 373 374 func getNestedDictEntries(data []byte) (string, string, string, error) { 375 var nkey, ntype, nvalue string 376 377 handler := func(key []byte, value []byte, valueType jp.ValueType, off int) error { 378 switch string(key) { 379 case "key": 380 if valueType != jp.String { 381 err := fmt.Errorf("getNestedDictEntries key should be of type string , found type %+v", valueType) 382 return err 383 } 384 nkey = string(value) 385 case "type": 386 ntype = string(value) 387 case "value": 388 nvalue = string(value) 389 default: 390 err := fmt.Errorf("getNestedDictEntries: received unknown key of %+s", key) 391 return err 392 } 393 return nil 394 } 395 err := jp.ObjectEach(data, handler) 396 return nkey, ntype, nvalue, err 397 398 } 399 400 func (ss *SegStore) encodeSingleRawBuffer(key string, value []byte, maxIdx uint32, 401 tsKey *string, matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) { 402 if key == *tsKey { 403 return maxIdx, matchedCol, nil 404 } 405 var colWip *ColWip 406 colWip, _, matchedCol = ss.initAndBackFillColumn(key, value, matchedCol) 407 colBlooms := ss.wipBlock.columnBlooms 408 var bi *BloomIndex 409 var ok bool 410 if key != "_type" && key != "_index" && key != "tags" { 411 _, ok = colBlooms[key] 412 if !ok { 413 bi = &BloomIndex{} 414 bi.uniqueWordCount = 0 415 bCount := getBlockBloomSize(bi) 416 bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY) 417 colBlooms[key] = bi 418 } 419 } 420 //[utils.VALTYPE_RAW_JSON][raw-byte-len][raw-byte] 421 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_RAW_JSON[:]) 422 colWip.cbufidx += 1 423 n := uint16(len(value)) 424 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n)) 425 colWip.cbufidx += 2 426 copy(colWip.cbuf[colWip.cbufidx:], value) 427 colWip.cbufidx += uint32(n) 428 429 if colWip.cbufidx > maxIdx { 430 maxIdx = colWip.cbufidx 431 } 432 return maxIdx, matchedCol, nil 433 } 434 435 func (ss *SegStore) encodeSingleString(key string, value string, maxIdx uint32, 436 tsKey *string, matchedCol bool) (uint32, bool, error) { 437 if key == *tsKey { 438 return maxIdx, matchedCol, nil 439 } 440 var colWip *ColWip 441 var recNum uint16 442 colWip, recNum, matchedCol = ss.initAndBackFillColumn(key, value, matchedCol) 443 colBlooms := ss.wipBlock.columnBlooms 444 var bi *BloomIndex 445 var ok bool 446 if key != "_type" && key != "_index" { 447 bi, ok = colBlooms[key] 448 if !ok { 449 bi = &BloomIndex{} 450 bi.uniqueWordCount = 0 451 bCount := getBlockBloomSize(bi) 452 bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY) 453 colBlooms[key] = bi 454 } 455 } 456 s := colWip.cbufidx 457 colWip.WriteSingleString(value) 458 459 if bi != nil { 460 bi.uniqueWordCount += addToBlockBloom(bi.Bf, []byte(value)) 461 } 462 if !ss.skipDe { 463 checkAddDictEnc(colWip, colWip.cbuf[s:colWip.cbufidx], recNum) 464 } 465 stats.AddSegStatsStr(ss.AllSst, key, value, ss.wipBlock.bb, nil, false) 466 if colWip.cbufidx > maxIdx { 467 maxIdx = colWip.cbufidx 468 } 469 return maxIdx, matchedCol, nil 470 } 471 472 func (ss *SegStore) encodeSingleBool(key string, val bool, maxIdx uint32, 473 tsKey *string, matchedCol bool) (uint32, bool, error) { 474 if key == *tsKey { 475 return maxIdx, matchedCol, nil 476 } 477 var colWip *ColWip 478 colBlooms := ss.wipBlock.columnBlooms 479 colWip, _, matchedCol = ss.initAndBackFillColumn(key, val, matchedCol) 480 var bi *BloomIndex 481 var ok bool 482 483 bi, ok = colBlooms[key] 484 if !ok { 485 bi = &BloomIndex{} 486 bi.uniqueWordCount = 0 487 bCount := 10 488 bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY) 489 colBlooms[key] = bi 490 } 491 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BOOL[:]) 492 colWip.cbufidx += 1 493 copy(colWip.cbuf[colWip.cbufidx:], utils.BoolToBytesLittleEndian(val)) 494 colWip.cbufidx += 1 495 496 if bi != nil { 497 bi.uniqueWordCount += addToBlockBloom(bi.Bf, []byte(strconv.FormatBool(val))) 498 } 499 if colWip.cbufidx > maxIdx { 500 maxIdx = colWip.cbufidx 501 } 502 return maxIdx, matchedCol, nil 503 } 504 505 func (ss *SegStore) encodeSingleNull(key string, maxIdx uint32, 506 tsKey *string, matchedCol bool) (uint32, bool, error) { 507 if key == *tsKey { 508 return maxIdx, matchedCol, nil 509 } 510 var colWip *ColWip 511 colWip, _, matchedCol = ss.initAndBackFillColumn(key, nil, matchedCol) 512 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BACKFILL[:]) 513 colWip.cbufidx += 1 514 if colWip.cbufidx > maxIdx { 515 maxIdx = colWip.cbufidx 516 } 517 return maxIdx, matchedCol, nil 518 } 519 520 func (ss *SegStore) encodeSingleNumber(key string, value interface{}, maxIdx uint32, 521 tsKey *string, matchedCol bool) (uint32, bool, error) { 522 if key == *tsKey { 523 return maxIdx, matchedCol, nil 524 } 525 var colWip *ColWip 526 var recNum uint16 527 colWip, recNum, matchedCol = ss.initAndBackFillColumn(key, value, matchedCol) 528 colRis := ss.wipBlock.columnRangeIndexes 529 segstats := ss.AllSst 530 retLen := encSingleNumber(key, value, colWip.cbuf[:], colWip.cbufidx, colRis, recNum, segstats, 531 ss.wipBlock.bb, colWip) 532 colWip.cbufidx += retLen 533 534 if colWip.cbufidx > maxIdx { 535 maxIdx = colWip.cbufidx 536 } 537 return maxIdx, matchedCol, nil 538 } 539 540 func (ss *SegStore) initAndBackFillColumn(key string, value interface{}, matchedCol bool) (*ColWip, uint16, bool) { 541 allColWip := ss.wipBlock.colWips 542 colBlooms := ss.wipBlock.columnBlooms 543 colRis := ss.wipBlock.columnRangeIndexes 544 allColsInBlock := ss.wipBlock.columnsInBlock 545 recNum := ss.wipBlock.blockSummary.RecCount 546 547 colWip, ok := allColWip[key] 548 if !ok { 549 colWip = InitColWip(ss.SegmentKey, key) 550 allColWip[key] = colWip 551 ss.AllSeenColumns[key] = true 552 } 553 _, ok = allColsInBlock[key] 554 if !ok { 555 if recNum != 0 { 556 log.Debugf("EncodeColumns: newColumn=%v showed up in the middle, backfilling it now", key) 557 backFillPastRecords(key, value, recNum, colBlooms, colRis, colWip) 558 } 559 } 560 allColsInBlock[key] = true 561 matchedCol = matchedCol || ss.pqTracker.isColumnInPQuery(key) 562 colWip.cstartidx = colWip.cbufidx 563 return colWip, recNum, matchedCol 564 } 565 566 func initMicroIndices(key string, val interface{}, colBlooms map[string]*BloomIndex, 567 colRis map[string]*RangeIndex) { 568 switch val.(type) { 569 case string: 570 bi := &BloomIndex{} 571 bi.uniqueWordCount = 0 572 bCount := getBlockBloomSize(bi) 573 bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY) 574 colBlooms[key] = bi 575 576 case float64, int64, uint64, json.Number: 577 ri := &RangeIndex{} 578 ri.Ranges = make(map[string]*Numbers, BLOCK_RI_MAP_SIZE) 579 colRis[key] = ri 580 581 case bool: 582 // todo kunal, for bool type we need to keep a inverted index 583 bi := &BloomIndex{} 584 bi.uniqueWordCount = 0 585 bCount := 10 586 bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY) 587 colBlooms[key] = bi 588 } 589 } 590 591 func backFillPastRecords(key string, val interface{}, recNum uint16, colBlooms map[string]*BloomIndex, 592 colRis map[string]*RangeIndex, colWip *ColWip) uint32 { 593 initMicroIndices(key, val, colBlooms, colRis) 594 packedLen := uint32(0) 595 596 recArr := make([]uint16, recNum) 597 for i := uint16(0); i < recNum; i++ { 598 // only the type will be saved when we are backfilling 599 copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BACKFILL[:]) 600 colWip.cbufidx += 1 601 packedLen += 1 602 recArr[i] = i 603 } 604 // we will also init dictEnc for backfilled recnums 605 colWip.deMap[string(VALTYPE_ENC_BACKFILL[:])] = recArr 606 colWip.deCount++ 607 return packedLen 608 } 609 610 func encSingleNumber(key string, val interface{}, wipbuf []byte, idx uint32, 611 colRis map[string]*RangeIndex, wRecNum uint16, 612 segstats map[string]*SegStats, bb *bbp.ByteBuffer, colWip *ColWip) uint32 { 613 614 ri, ok := colRis[key] 615 if !ok { 616 ri = &RangeIndex{} 617 ri.Ranges = make(map[string]*Numbers, BLOCK_RI_MAP_SIZE) 618 colRis[key] = ri 619 } 620 621 switch cval := val.(type) { 622 case float64: 623 addSegStatsNums(segstats, key, SS_FLOAT64, FPARM_INT64, FPARM_UINT64, cval, 624 fmt.Sprintf("%v", cval), bb) 625 valSize := encJsonNumber(key, SS_FLOAT64, FPARM_INT64, FPARM_UINT64, cval, wipbuf[:], 626 idx, ri.Ranges) 627 checkAddDictEnc(colWip, wipbuf[idx:idx+valSize], wRecNum) 628 return valSize 629 case int64: 630 addSegStatsNums(segstats, key, SS_INT64, cval, FPARM_UINT64, FPARM_FLOAT64, 631 fmt.Sprintf("%v", cval), bb) 632 633 valSize := encJsonNumber(key, SS_INT64, cval, FPARM_UINT64, FPARM_FLOAT64, wipbuf[:], 634 idx, ri.Ranges) 635 checkAddDictEnc(colWip, wipbuf[idx:idx+valSize], wRecNum) 636 return valSize 637 638 default: 639 log.Errorf("encSingleNumber: Tried to encode a non int/float value! value=%+v", cval) 640 } 641 return 0 642 } 643 644 func encJsonNumber(key string, numType SS_IntUintFloatTypes, intVal int64, uintVal uint64, 645 fltVal float64, wipbuf []byte, idx uint32, blockRangeIndex map[string]*Numbers) uint32 { 646 647 var valSize uint32 648 649 switch numType { 650 case SS_INT64: 651 copy(wipbuf[idx:], VALTYPE_ENC_INT64[:]) 652 copy(wipbuf[idx+1:], utils.Int64ToBytesLittleEndian(int64(intVal))) 653 valSize = 1 + 8 654 case SS_UINT64: 655 copy(wipbuf[idx:], VALTYPE_ENC_UINT64[:]) 656 copy(wipbuf[idx+1:], utils.Uint64ToBytesLittleEndian(uintVal)) 657 valSize = 1 + 8 658 case SS_FLOAT64: 659 copy(wipbuf[idx:], VALTYPE_ENC_FLOAT64[:]) 660 copy(wipbuf[idx+1:], utils.Float64ToBytesLittleEndian(fltVal)) 661 valSize = 1 + 8 662 default: 663 log.Errorf("encJsonNumber: unknown numType: %v", numType) 664 } 665 666 if blockRangeIndex != nil { 667 updateRangeIndex(key, blockRangeIndex, numType, intVal, uintVal, fltVal) 668 } 669 670 return valSize 671 } 672 673 /* 674 Caller of this function can confidently cast the CValEncoslure.CVal to one of the foll types: 675 bool (if CValEncoslure.Dtype = SS_DT_BOOL) 676 uint64 (if CValEncoslure.Dtype = SS_DT_UNSIGNED_NUM) 677 int64 (if CValEncoslure.Dtype = SS_DT_SIGNED_NUM) 678 float64 (if CValEncoslure.Dtype = SS_DT_FLOAT) 679 string (if CValEncoslure.Dtype = SS_DT_STRING) 680 array (if CValEncoslure.Dtype = SS_DT_ARRAY_DICT) 681 */ 682 /* 683 parameters: 684 rec: byte slice 685 qid 686 returns: 687 CValEncoslure: Cval encoding of this col entry 688 uint16: len of this entry inside that was inside the byte slice 689 error: 690 */ 691 func GetCvalFromRec(rec []byte, qid uint64) (CValueEnclosure, uint16, error) { 692 693 if len(rec) == 0 { 694 return CValueEnclosure{}, 0, errors.New("column value is empty") 695 } 696 697 var retVal CValueEnclosure 698 var endIdx uint16 699 switch rec[0] { 700 701 case VALTYPE_ENC_SMALL_STRING[0]: 702 retVal.Dtype = SS_DT_STRING 703 // one byte for type & two for reclen 704 705 strlen := utils.BytesToUint16LittleEndian(rec[1:3]) 706 endIdx = strlen + 3 707 retVal.CVal = string(rec[3:endIdx]) 708 case VALTYPE_ENC_BOOL[0]: 709 retVal.Dtype = SS_DT_BOOL 710 if rec[1] == 0 { 711 retVal.CVal = false 712 } else { 713 retVal.CVal = true 714 } 715 endIdx = 2 716 case VALTYPE_ENC_INT8[0]: 717 retVal.Dtype = SS_DT_SIGNED_NUM 718 retVal.CVal = int64(int8(rec[1:][0])) 719 endIdx = 2 720 case VALTYPE_ENC_INT16[0]: 721 retVal.Dtype = SS_DT_SIGNED_NUM 722 retVal.CVal = int64(utils.BytesToInt16LittleEndian(rec[1:])) 723 endIdx = 3 724 case VALTYPE_ENC_INT32[0]: 725 retVal.Dtype = SS_DT_SIGNED_NUM 726 retVal.CVal = int64(utils.BytesToInt32LittleEndian(rec[1:])) 727 endIdx = 5 728 case VALTYPE_ENC_INT64[0]: 729 retVal.Dtype = SS_DT_SIGNED_NUM 730 retVal.CVal = utils.BytesToInt64LittleEndian(rec[1:]) 731 endIdx = 9 732 case VALTYPE_ENC_UINT8[0]: 733 retVal.Dtype = SS_DT_UNSIGNED_NUM 734 retVal.CVal = uint64((rec[1:])[0]) 735 endIdx = 2 736 case VALTYPE_ENC_UINT16[0]: 737 retVal.Dtype = SS_DT_UNSIGNED_NUM 738 retVal.CVal = uint64(utils.BytesToUint16LittleEndian(rec[1:])) 739 endIdx = 3 740 case VALTYPE_ENC_UINT32[0]: 741 retVal.Dtype = SS_DT_UNSIGNED_NUM 742 retVal.CVal = uint64(utils.BytesToUint32LittleEndian(rec[1:])) 743 endIdx = 5 744 case VALTYPE_ENC_UINT64[0]: 745 retVal.Dtype = SS_DT_UNSIGNED_NUM 746 retVal.CVal = utils.BytesToUint64LittleEndian(rec[1:]) 747 endIdx = 9 748 case VALTYPE_ENC_FLOAT64[0]: 749 retVal.Dtype = SS_DT_FLOAT 750 retVal.CVal = utils.BytesToFloat64LittleEndian(rec[1:]) 751 endIdx = 9 752 case VALTYPE_ENC_BACKFILL[0]: 753 retVal.Dtype = SS_DT_BACKFILL 754 retVal.CVal = nil 755 endIdx = 1 756 case VALTYPE_RAW_JSON[0]: 757 retVal.Dtype = SS_DT_RAW_JSON 758 strlen := utils.BytesToUint16LittleEndian(rec[1:3]) 759 endIdx = strlen + 3 760 data := rec[3:endIdx] 761 entries := make([]interface{}, 0) 762 err := json.Unmarshal(data, &entries) 763 if err != nil { 764 log.Errorf("GetCvalFromRec: Error unmarshalling VALTYPE_RAW_JSON = %v", err) 765 return CValueEnclosure{}, 0, err 766 } 767 retVal.CVal = entries 768 case VALTYPE_DICT_ARRAY[0]: 769 retVal.Dtype = SS_DT_ARRAY_DICT 770 // one byte for type & two for reclen 771 totalLen := utils.BytesToInt16LittleEndian(rec[1:]) 772 idx := uint16(3) 773 cValArray := make([]map[string]interface{}, 0) 774 for idx < uint16(totalLen) { 775 cVal := make(map[string]interface{}) 776 strlen := utils.BytesToUint16LittleEndian(rec[idx : idx+2]) 777 idx += 2 778 keyVal := string(rec[idx : idx+strlen]) 779 idx += strlen 780 781 cVal["key"] = keyVal 782 switch rec[idx] { 783 case VALTYPE_ENC_SMALL_STRING[0]: 784 cVal["type"] = "string" 785 // one byte for type & two for reclen 786 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 787 idx += 3 788 cVal["value"] = string(rec[idx : idx+strlen]) 789 idx += strlen 790 case VALTYPE_ENC_BOOL[0]: 791 cVal["type"] = "bool" 792 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 793 idx += 3 794 cVal["value"] = string(rec[idx : idx+strlen]) 795 idx += strlen 796 case VALTYPE_ENC_INT64[0]: 797 cVal["type"] = "int64" 798 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 799 idx += 3 800 cVal["value"] = string(rec[idx : idx+strlen]) 801 idx += strlen 802 case VALTYPE_ENC_FLOAT64[0]: 803 cVal["type"] = "float64" 804 strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3]) 805 idx += 3 806 cVal["value"] = string(rec[idx : idx+strlen]) 807 idx += strlen 808 default: 809 log.Errorf("qid=%d, GetCvalFromRec:SS_DT_ARRAY_DICT unknown type=%v\n", qid, rec[idx]) 810 return retVal, endIdx, errors.New("invalid rec type") 811 } 812 cValArray = append(cValArray, cVal) 813 } 814 retVal.CVal = cValArray 815 endIdx = uint16(totalLen) 816 817 default: 818 log.Errorf("qid=%d, GetCvalFromRec: dont know how to convert type=%v\n", qid, rec[0]) 819 return retVal, endIdx, errors.New("invalid rec type") 820 } 821 822 return retVal, endIdx, nil 823 } 824 825 func WriteMockColSegFile(segkey string, numBlocks int, entryCount int) ([]map[string]*BloomIndex, 826 []*BlockSummary, []map[string]*RangeIndex, map[string]bool, map[uint16]*BlockMetadataHolder, 827 map[string]*ColSizeInfo) { 828 829 allBlockBlooms := make([]map[string]*BloomIndex, numBlocks) 830 allBlockRangeIdx := make([]map[string]*RangeIndex, numBlocks) 831 allBlockSummaries := make([]*BlockSummary, numBlocks) 832 allBlockOffsets := make(map[uint16]*BlockMetadataHolder) 833 segstats := make(map[string]*SegStats) 834 lencnames := uint8(12) 835 cnames := make([]string, lencnames) 836 mapCol := make(map[string]bool) 837 for cidx := uint8(0); cidx < lencnames; cidx += 1 { 838 currCol := fmt.Sprintf("key%v", cidx) 839 cnames[cidx] = currCol 840 mapCol[currCol] = true 841 } 842 843 tsKey := config.GetTimeStampKey() 844 allCols := make(map[string]bool) 845 // set up entries 846 for j := 0; j < numBlocks; j++ { 847 currBlockUint := uint16(j) 848 columnBlooms := make(map[string]*BloomIndex) 849 columnRangeIndexes := make(map[string]*RangeIndex) 850 colWips := make(map[string]*ColWip) 851 wipBlock := WipBlock{ 852 columnBlooms: columnBlooms, 853 columnRangeIndexes: columnRangeIndexes, 854 colWips: colWips, 855 pqMatches: make(map[string]*pqmr.PQMatchResults), 856 columnsInBlock: mapCol, 857 tomRollup: make(map[uint64]*RolledRecs), 858 tohRollup: make(map[uint64]*RolledRecs), 859 todRollup: make(map[uint64]*RolledRecs), 860 bb: bbp.Get(), 861 blockTs: make([]uint64, 0), 862 } 863 segStore := &SegStore{ 864 wipBlock: wipBlock, 865 SegmentKey: segkey, 866 AllSeenColumns: allCols, 867 pqTracker: initPQTracker(), 868 AllSst: segstats, 869 numBlocks: currBlockUint, 870 } 871 for i := 0; i < entryCount; i++ { 872 entry := make(map[string]interface{}) 873 entry[cnames[0]] = "match words 123 abc" 874 entry[cnames[1]] = "value1" 875 entry[cnames[2]] = i 876 entry[cnames[3]] = (i%2 == 0) 877 entry[cnames[4]] = strconv.FormatUint(uint64(i)*2, 10) 878 entry[cnames[5]] = "batch-" + fmt.Sprint(j) + "-" + utils.RandomStringWithCharset(10) 879 entry[cnames[6]] = (i * 2) 880 entry[cnames[7]] = "batch-" + fmt.Sprint(j) 881 entry[cnames[8]] = j 882 entry[cnames[9]] = rand.Float64() 883 entry[cnames[10]] = segkey 884 entry[cnames[11]] = "record-batch-" + fmt.Sprint(i%2) 885 886 timestp := uint64(i) + 1 // dont start with 0 as timestamp 887 raw, _ := json.Marshal(entry) 888 _, _, err := segStore.EncodeColumns(raw, timestp, &tsKey, SIGNAL_EVENTS) 889 if err != nil { 890 log.Errorf("WriteMockColSegFile: error packing entry: %s", err) 891 } 892 segStore.wipBlock.blockSummary.RecCount += 1 893 } 894 895 allBlockBlooms[j] = segStore.wipBlock.columnBlooms 896 allBlockSummaries[j] = &segStore.wipBlock.blockSummary 897 allBlockRangeIdx[j] = segStore.wipBlock.columnRangeIndexes 898 allBlockOffsets[currBlockUint] = &BlockMetadataHolder{ 899 ColumnBlockOffset: make(map[string]int64), 900 ColumnBlockLen: make(map[string]uint32), 901 } 902 for cname, colWip := range segStore.wipBlock.colWips { 903 csgFname := fmt.Sprintf("%v_%v.csg", segkey, xxhash.Sum64String(cname)) 904 var encType []byte 905 if cname == config.GetTimeStampKey() { 906 encType, _ = segStore.wipBlock.encodeTimestamps() 907 } else { 908 encType = ZSTD_COMLUNAR_BLOCK 909 } 910 blkLen, blkOffset, err := writeWip(colWip, encType) 911 if err != nil { 912 log.Errorf("WriteMockColSegFile: failed to write colsegfilename=%v, err=%v", csgFname, err) 913 } 914 allBlockOffsets[currBlockUint].ColumnBlockLen[cname] = blkLen 915 allBlockOffsets[currBlockUint].ColumnBlockOffset[cname] = blkOffset 916 } 917 } 918 919 allColsSizes := make(map[string]*ColSizeInfo) 920 for cname := range mapCol { 921 fnamecmi := fmt.Sprintf("%v_%v.csg", segkey, xxhash.Sum64String(cname)) 922 cmiSize, _ := ssutils.GetFileSizeFromDisk(fnamecmi) 923 fnamecsg := fmt.Sprintf("%v_%v.csg", segkey, xxhash.Sum64String(cname)) 924 csgSize, _ := ssutils.GetFileSizeFromDisk(fnamecsg) 925 allColsSizes[cname] = &ColSizeInfo{CmiSize: cmiSize, CsgSize: csgSize} 926 } 927 928 return allBlockBlooms, allBlockSummaries, allBlockRangeIdx, mapCol, allBlockOffsets, allColsSizes 929 } 930 931 func WriteMockTraceFile(segkey string, numBlocks int, entryCount int) ([]map[string]*BloomIndex, 932 []*BlockSummary, []map[string]*RangeIndex, map[string]bool, map[uint16]*BlockMetadataHolder) { 933 934 allBlockBlooms := make([]map[string]*BloomIndex, numBlocks) 935 allBlockRangeIdx := make([]map[string]*RangeIndex, numBlocks) 936 allBlockSummaries := make([]*BlockSummary, numBlocks) 937 allBlockOffsets := make(map[uint16]*BlockMetadataHolder) 938 939 segstats := make(map[string]*SegStats) 940 941 mapCol := make(map[string]bool) 942 mapCol["tags"] = true 943 mapCol["startTimeMillis"] = true 944 mapCol["timestamp"] = true 945 946 tsKey := config.GetTimeStampKey() 947 allCols := make(map[string]bool) 948 // set up entries 949 for j := 0; j < numBlocks; j++ { 950 currBlockUint := uint16(j) 951 columnBlooms := make(map[string]*BloomIndex) 952 columnRangeIndexes := make(map[string]*RangeIndex) 953 colWips := make(map[string]*ColWip) 954 wipBlock := WipBlock{ 955 columnBlooms: columnBlooms, 956 columnRangeIndexes: columnRangeIndexes, 957 colWips: colWips, 958 pqMatches: make(map[string]*pqmr.PQMatchResults), 959 columnsInBlock: mapCol, 960 tomRollup: make(map[uint64]*RolledRecs), 961 tohRollup: make(map[uint64]*RolledRecs), 962 todRollup: make(map[uint64]*RolledRecs), 963 bb: bbp.Get(), 964 blockTs: make([]uint64, 0), 965 } 966 segStore := &SegStore{ 967 wipBlock: wipBlock, 968 SegmentKey: segkey, 969 AllSeenColumns: allCols, 970 pqTracker: initPQTracker(), 971 AllSst: segstats, 972 numBlocks: currBlockUint, 973 } 974 entries := []struct { 975 entry []byte 976 }{ 977 978 { 979 []byte(`{"tags": [ 980 { 981 "key": "sampler.type", 982 "type": "string", 983 "value": "const" 984 }, 985 { 986 "key": "sampler.param", 987 "type": "bool", 988 "value": "true" 989 }, 990 { 991 "key": "http.status_code", 992 "type": "int64", 993 "value": "200" 994 }, 995 { 996 "key": "component", 997 "type": "string", 998 "value": "gRPC" 999 }, 1000 { 1001 "key": "retry_no", 1002 "type": "int64", 1003 "value": "1" 1004 } 1005 1006 ], 1007 1008 }`, 1009 )}, 1010 } 1011 1012 entry := entries[0].entry 1013 timestp := uint64(2) + 1 // dont start with 0 as timestamp 1014 _, _, err := segStore.EncodeColumns(entry, timestp, &tsKey, SIGNAL_JAEGER_TRACES) 1015 if err != nil { 1016 log.Errorf("WriteMockTraceFile: error packing entry: %s", err) 1017 } 1018 segStore.wipBlock.blockSummary.RecCount += 1 1019 1020 allBlockBlooms[j] = segStore.wipBlock.columnBlooms 1021 allBlockSummaries[j] = &segStore.wipBlock.blockSummary 1022 allBlockRangeIdx[j] = segStore.wipBlock.columnRangeIndexes 1023 allBlockOffsets[currBlockUint] = &BlockMetadataHolder{ 1024 ColumnBlockOffset: make(map[string]int64), 1025 ColumnBlockLen: make(map[string]uint32), 1026 } 1027 for cname, colWip := range segStore.wipBlock.colWips { 1028 csgFname := fmt.Sprintf("%v_%v.csg", segkey, xxhash.Sum64String(cname)) 1029 var encType []byte 1030 if cname == config.GetTimeStampKey() { 1031 encType, _ = segStore.wipBlock.encodeTimestamps() 1032 } else { 1033 encType = ZSTD_COMLUNAR_BLOCK 1034 } 1035 blkLen, blkOffset, err := writeWip(colWip, encType) 1036 if err != nil { 1037 log.Errorf("WriteMockTraceFile: failed to write tracer filename=%v, err=%v", csgFname, err) 1038 } 1039 allBlockOffsets[currBlockUint].ColumnBlockLen[cname] = blkLen 1040 allBlockOffsets[currBlockUint].ColumnBlockOffset[cname] = blkOffset 1041 } 1042 } 1043 return allBlockBlooms, allBlockSummaries, allBlockRangeIdx, mapCol, allBlockOffsets 1044 } 1045 1046 func WriteMockMetricsSegment(forceRotate bool, entryCount int) ([]*metrics.MetricsSegment, error) { 1047 1048 timestamp := uint64(time.Now().Unix() - 24*3600) 1049 metric := []string{"test.metric.0", "test.metric.1", "test.metric.2", "test.metric.3"} 1050 car_type := []string{"Passenger car light", "Passenger car compact", "Passenger car heavy", "Passenger car mini", "Passenger car medium", "Pickup truck", "Van"} 1051 color := []string{"olive", "green", "maroon", "lime", "yellow", "white", "purple", "navy", "aqua"} 1052 group := []string{"group 0", "group 1"} 1053 fuel_type := []string{"Electric", "Diesel", "Gasoline", "CNG", "Ethanol", "Methanol"} 1054 model := []string{"C55 Amg", "325i", "Ranger Pickup 2wd", "Sts", "Pacifica 2wd", "Trailblazer 2wd", "E320 Cdi"} 1055 metrics.InitMetricsSegStore() 1056 for i := 0; i < entryCount; i++ { 1057 entry := make(map[string]interface{}) 1058 entry["metric"] = metric[rand.Intn(len(metric))] 1059 entry["tags"] = map[string]string{ 1060 "car_type": car_type[rand.Intn(len(car_type))], 1061 "color": color[rand.Intn(len(color))], 1062 "group": group[rand.Intn(len(group))], 1063 "fuel_type": fuel_type[rand.Intn(len(fuel_type))], 1064 "model": model[rand.Intn(len(model))], 1065 } 1066 entry["timestamp"] = timestamp + uint64(i) 1067 entry["value"] = rand.Intn(500) 1068 rawJson, _ := json.Marshal(entry) 1069 err := AddTimeSeriesEntryToInMemBuf(rawJson, SIGNAL_METRICS_OTSDB, 0) 1070 if err != nil { 1071 log.Errorf("WriteMockMetricsSegment: error adding time series entry to in memory buffer: %s", err) 1072 return nil, err 1073 } 1074 } 1075 retVal := make([]*metrics.MetricsSegment, len(metrics.GetAllMetricsSegments())) 1076 1077 for idx, mSeg := range metrics.GetAllMetricsSegments() { 1078 err := mSeg.CheckAndRotate(forceRotate) 1079 if err != nil { 1080 log.Errorf("WriteMockMetricsSegment: unable to force rotate: %s", err) 1081 return nil, err 1082 } 1083 retVal[idx] = mSeg 1084 } 1085 1086 return retVal, nil 1087 } 1088 1089 /* 1090 [BlockRangeIndexLen 4B] [rangeKeyData-1] [rangeKeyData-2].... 1091 1092 ** rangeKeyData ** 1093 [RangeKeyLen 2B] [ActualRangeKey xxBytes] [RangeNumType 1B] [MinNumValue 8B] [MaxNumValue 8B] 1094 */ 1095 1096 func EncodeRIBlock(blockRangeIndex map[string]*Numbers, blkNum uint16) (uint32, []byte, error) { 1097 var idx uint32 1098 1099 idx += uint32(RI_BLK_LEN_SIZE) 1100 1101 blkRIBuf := make([]byte, RI_SIZE) 1102 1103 // copy the blockNum 1104 copy(blkRIBuf[idx:], utils.Uint16ToBytesLittleEndian(blkNum)) 1105 idx += 2 1106 1107 copy(blkRIBuf[idx:], CMI_RANGE_INDEX) 1108 idx += 1 // for CMI type 1109 1110 for key, item := range blockRangeIndex { 1111 if len(blkRIBuf) < int(idx) { 1112 newSlice := make([]byte, RI_SIZE) 1113 blkRIBuf = append(blkRIBuf, newSlice...) 1114 } 1115 copy(blkRIBuf[idx:], utils.Uint16ToBytesLittleEndian(uint16(len(key)))) 1116 idx += 2 1117 n := copy(blkRIBuf[idx:], key) 1118 idx += uint32(n) 1119 switch item.NumType { 1120 case RNT_UNSIGNED_INT: 1121 copy(blkRIBuf[idx:], VALTYPE_ENC_RNT_UNSIGNED_INT[:]) 1122 idx += 1 1123 copy(blkRIBuf[idx:], utils.Uint64ToBytesLittleEndian(item.Min_uint64)) 1124 idx += 8 1125 copy(blkRIBuf[idx:], utils.Uint64ToBytesLittleEndian(item.Max_uint64)) 1126 idx += 8 1127 case RNT_SIGNED_INT: 1128 copy(blkRIBuf[idx:], VALTYPE_ENC_RNT_SIGNED_INT[:]) 1129 idx += 1 1130 copy(blkRIBuf[idx:], utils.Int64ToBytesLittleEndian(item.Min_int64)) 1131 idx += 8 1132 copy(blkRIBuf[idx:], utils.Int64ToBytesLittleEndian(item.Max_int64)) 1133 idx += 8 1134 case RNT_FLOAT64: 1135 copy(blkRIBuf[idx:], VALTYPE_ENC_RNT_FLOAT64[:]) 1136 idx += 1 1137 copy(blkRIBuf[idx:], utils.Float64ToBytesLittleEndian(item.Min_float64)) 1138 idx += 8 1139 copy(blkRIBuf[idx:], utils.Float64ToBytesLittleEndian(item.Max_float64)) 1140 idx += 8 1141 } 1142 } 1143 // copy the recordlen at the start of the buf 1144 copy(blkRIBuf[0:], utils.Uint32ToBytesLittleEndian(uint32(idx-RI_BLK_LEN_SIZE))) 1145 // log.Infof("EncodeRIBlock EncodeRIBlock=%v", blkRIBuf[:idx]) 1146 return idx, blkRIBuf, nil 1147 } 1148 1149 func (ss *SegStore) encodeTime(recordTimeMS uint64, tsKey *string) { 1150 allColWip := ss.wipBlock.colWips 1151 allColsInBlock := ss.wipBlock.columnsInBlock 1152 tsWip, ok := allColWip[*tsKey] 1153 if !ok { 1154 tsWip = InitColWip(ss.SegmentKey, *tsKey) 1155 allColWip[*tsKey] = tsWip 1156 ss.AllSeenColumns[*tsKey] = true 1157 } 1158 // we will never need to backfill a ts key 1159 allColsInBlock[*tsKey] = true 1160 if int(ss.wipBlock.blockSummary.RecCount) >= len(ss.wipBlock.blockTs) { 1161 newslice := make([]uint64, WIP_NUM_RECS) 1162 ss.wipBlock.blockTs = append(ss.wipBlock.blockTs, newslice...) 1163 } 1164 ss.wipBlock.blockTs[ss.wipBlock.blockSummary.RecCount] = recordTimeMS 1165 tsWip.cbufidx = 1 // just so the flush/append gets called 1166 1167 // calculate rollups 1168 tom := (recordTimeMS / MS_IN_MIN) * MS_IN_MIN 1169 toh := (recordTimeMS / MS_IN_HOUR) * MS_IN_HOUR 1170 tod := (recordTimeMS / MS_IN_DAY) * MS_IN_DAY 1171 ss.wipBlock.adjustEarliestLatestTimes(recordTimeMS) 1172 addRollup(ss.wipBlock.tomRollup, tom, ss.wipBlock.blockSummary.RecCount) 1173 addRollup(ss.wipBlock.tohRollup, toh, ss.wipBlock.blockSummary.RecCount) 1174 addRollup(ss.wipBlock.todRollup, tod, ss.wipBlock.blockSummary.RecCount) 1175 1176 } 1177 1178 func addRollup(rrmap map[uint64]*RolledRecs, rolledTs uint64, lastRecNum uint16) { 1179 1180 var rr *RolledRecs 1181 var ok bool 1182 rr, ok = rrmap[rolledTs] 1183 if !ok { 1184 mr := pqmr.CreatePQMatchResults(WIP_NUM_RECS) 1185 rr = &RolledRecs{MatchedRes: mr} 1186 rrmap[rolledTs] = rr 1187 } 1188 rr.MatchedRes.AddMatchedRecord(uint(lastRecNum)) 1189 rr.lastRecNum = lastRecNum 1190 } 1191 1192 func WriteMockTsRollup(segkey string) error { 1193 1194 ss := &SegStore{suffix: 1, lock: sync.Mutex{}, SegmentKey: segkey} 1195 1196 wipBlock := createMockTsRollupWipBlock(segkey) 1197 ss.wipBlock = *wipBlock 1198 err := ss.writeWipTsRollups("timestamp") 1199 return err 1200 } 1201 1202 func createMockTsRollupWipBlock(segkey string) *WipBlock { 1203 1204 config.InitializeTestingConfig() 1205 defer os.RemoveAll(config.GetDataPath()) // we just create a suffix file during segstore creation 1206 1207 cTime := uint64(time.Now().UnixMilli()) 1208 lencnames := uint8(2) 1209 cnames := make([]string, lencnames) 1210 for cidx := uint8(0); cidx < lencnames; cidx += 1 { 1211 currCol := fmt.Sprintf("fortscheckkey-%v", cidx) 1212 cnames[cidx] = currCol 1213 } 1214 sId := "ts-rollup" 1215 segstore, err := getSegStore(sId, cTime, "test", 0) 1216 if err != nil { 1217 log.Errorf("createMockTsRollupWipBlock, getSegstore err=%v", err) 1218 return nil 1219 } 1220 tsKey := config.GetTimeStampKey() 1221 entryCount := 1000 1222 1223 startTs := uint64(1652222966645) // Tuesday, May 10, 2022 22:49:26.645 1224 tsincr := uint64(7200) // so that we have 2 hours, 2 days, and > 2mins buckets 1225 1226 runningTs := startTs 1227 for i := 0; i < entryCount; i++ { 1228 // t.Logf("TestTimestampEncoding: ts=%v", runningTs) 1229 record_json := make(map[string]interface{}) 1230 record_json[cnames[0]] = "value1" 1231 record_json[cnames[1]] = json.Number(fmt.Sprint(i)) 1232 rawJson, _ := json.Marshal(record_json) 1233 _, _, err := segstore.EncodeColumns(rawJson, runningTs, &tsKey, SIGNAL_EVENTS) 1234 if err != nil { 1235 log.Errorf("Error:WriteMockColSegFile: error packing entry: %s", err) 1236 } 1237 segstore.wipBlock.blockSummary.RecCount += 1 1238 segstore.adjustEarliestLatestTimes(runningTs) 1239 runningTs += tsincr 1240 } 1241 1242 return &segstore.wipBlock 1243 } 1244 1245 // EncodeBlocksum: format as below 1246 /* 1247 [SummaryLen 4B] [blkNum 2B] [highTs 8B] [lowTs 8B] [recCount 2B] [numColumns 2B] [ColumnBlkInfo] 1248 1249 ColumnBlkInfo : ... 1250 [cnameLen 2B] [COlName xxB] [blkOff 8B] [blkLen 4B]... 1251 1252 */ 1253 1254 func EncodeBlocksum(bmh *BlockMetadataHolder, bsum *BlockSummary, 1255 blockSummBuf []byte, blkNum uint16) (uint32, []byte, error) { 1256 1257 var idx uint32 1258 1259 //check if blockSummBuf is enough to pack blocksummary data 1260 // Each BlockSummary entry = xx bytes 1261 // summLen *4 bytes) + blkNum 2 bytes + bsum.HighTs(8 bytes) + bsum.LowTs(8 bytes) + bsum.RecCoun(2 bytes) 1262 // + N * [ 2 (cnamelen) + (actualCnamLen) + 8 (blkOff) + 4 (blkLen)] 1263 1264 clen := 0 1265 numCols := uint16(0) 1266 for cname := range bmh.ColumnBlockOffset { 1267 clen += len(cname) 1268 numCols++ 1269 } 1270 // summLen + blkNum + highTs + lowTs + recCount + numCols + totalCnamesLen + N * (cnameLenHolder + blkOff + blkLen) 1271 requiredLen := 4 + 2 + 8 + 8 + 2 + 2 + clen + len(bmh.ColumnBlockOffset)*(2+8+4) 1272 1273 if len(blockSummBuf) < requiredLen { 1274 newSlice := make([]byte, requiredLen-len(blockSummBuf)) 1275 blockSummBuf = append(blockSummBuf, newSlice...) 1276 } 1277 1278 // reserve first 4 bytes for BLOCK_SUMMARY_LEN. 1279 idx += 4 1280 1281 copy(blockSummBuf[idx:], utils.Uint16ToBytesLittleEndian(blkNum)) 1282 idx += 2 1283 copy(blockSummBuf[idx:], utils.Uint64ToBytesLittleEndian(bsum.HighTs)) 1284 idx += 8 1285 copy(blockSummBuf[idx:], utils.Uint64ToBytesLittleEndian(bsum.LowTs)) 1286 idx += 8 1287 copy(blockSummBuf[idx:], utils.Uint16ToBytesLittleEndian(bsum.RecCount)) 1288 idx += 2 1289 copy(blockSummBuf[idx:], utils.Uint16ToBytesLittleEndian(numCols)) 1290 idx += 2 1291 1292 for cname, cOff := range bmh.ColumnBlockOffset { 1293 copy(blockSummBuf[idx:], utils.Uint16ToBytesLittleEndian(uint16(len(cname)))) 1294 idx += 2 1295 copy(blockSummBuf[idx:], cname) 1296 idx += uint32(len(cname)) 1297 copy(blockSummBuf[idx:], utils.Int64ToBytesLittleEndian(cOff)) 1298 idx += 8 1299 copy(blockSummBuf[idx:], utils.Uint32ToBytesLittleEndian(bmh.ColumnBlockLen[cname])) 1300 idx += 4 1301 } 1302 1303 // copy the summlen at the start of the buf 1304 copy(blockSummBuf[0:], utils.Uint32ToBytesLittleEndian(uint32(idx))) 1305 1306 return idx, blockSummBuf, nil 1307 } 1308 1309 func WriteMockBlockSummary(file string, blockSums []*BlockSummary, 1310 allBmh map[uint16]*BlockMetadataHolder) { 1311 fd, err := os.OpenFile(file, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) 1312 if err != nil { 1313 log.Errorf("WriteMockBlockSummary: open failed blockSummaryFname=%v, err=%v", file, err) 1314 return 1315 } 1316 1317 defer fd.Close() 1318 1319 for blkNum, block := range blockSums { 1320 blkSumBuf := make([]byte, BLOCK_SUMMARY_SIZE) 1321 packedLen, _, err := EncodeBlocksum(allBmh[uint16(blkNum)], block, blkSumBuf[0:], uint16(blkNum)) 1322 1323 if err != nil { 1324 log.Errorf("WriteMockBlockSummary: EncodeBlocksum: Failed to encode blocksummary=%+v, err=%v", block, err) 1325 return 1326 } 1327 if _, err := fd.Write(blkSumBuf[:packedLen]); err != nil { 1328 log.Errorf("WriteMockBlockSummary: write failed blockSummaryFname=%v, err=%v", file, err) 1329 return 1330 } 1331 } 1332 err = fd.Sync() 1333 if err != nil { 1334 log.Fatal(err) 1335 } 1336 } 1337 1338 func checkAddDictEnc(colWip *ColWip, cval []byte, recNum uint16) { 1339 if colWip.deCount < wipCardLimit { 1340 recs, ok := colWip.deMap[string(cval)] 1341 if !ok { 1342 recs = make([]uint16, 0) 1343 colWip.deCount += 1 1344 } 1345 recs = append(recs, recNum) 1346 colWip.deMap[string(cval)] = recs 1347 // todo we optimize this code, by pre-allocing a fixed length of recs, keep an idx, then add it to recs 1348 // advantages: 1) we avoid extending the array. 2) we avoid inserting in the map on every rec 1349 } 1350 } 1351 1352 func SetCardinalityLimit(val uint16) { 1353 wipCardLimit = val 1354 } 1355 1356 /* 1357 Packing format for dictionary encoding 1358 [NumDictWords 2B] [dEntry1 XX] [dEntry2 XX] ... 1359 1360 dEntry1 -- format 1361 [word1Len 2B] [ActualWord] [numRecs 2B] [recNum1 2B][recNum2 2B].... 1362 1363 */ 1364 1365 func PackDictEnc(colWip *ColWip) { 1366 1367 colWip.cbufidx = 0 1368 // reuse the existing cbuf 1369 // copy num of dict words 1370 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(colWip.deCount)) 1371 colWip.cbufidx += 2 1372 1373 for dword, recNumsArr := range colWip.deMap { 1374 1375 // copy the actual dict word , the TLV is packed inside the dword 1376 copy(colWip.cbuf[colWip.cbufidx:], []byte(dword)) 1377 colWip.cbufidx += uint32(len(dword)) 1378 1379 // copy num of records 1380 numRecs := uint16(len(recNumsArr)) 1381 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(numRecs)) 1382 colWip.cbufidx += 2 1383 1384 for i := uint16(0); i < numRecs; i++ { 1385 // copy the recNum 1386 copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(recNumsArr[i])) 1387 colWip.cbufidx += 2 1388 } 1389 } 1390 } 1391 1392 func addSegStatsStr(segstats map[string]*SegStats, cname string, strVal string, 1393 bb *bbp.ByteBuffer) { 1394 1395 var stats *SegStats 1396 var ok bool 1397 stats, ok = segstats[cname] 1398 if !ok { 1399 stats = &SegStats{ 1400 IsNumeric: false, 1401 Count: 0, 1402 Hll: hyperloglog.New16()} 1403 1404 segstats[cname] = stats 1405 } 1406 1407 stats.Count++ 1408 bb.Reset() 1409 _, _ = bb.WriteString(strVal) 1410 stats.Hll.Insert(bb.B) 1411 } 1412 1413 func addSegStatsNums(segstats map[string]*SegStats, cname string, 1414 inNumType SS_IntUintFloatTypes, intVal int64, uintVal uint64, 1415 fltVal float64, numstr string, bb *bbp.ByteBuffer) { 1416 1417 var stats *SegStats 1418 var ok bool 1419 stats, ok = segstats[cname] 1420 if !ok { 1421 numStats := &NumericStats{ 1422 Min: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 1423 IntgrVal: math.MaxInt64, 1424 FloatVal: math.MaxFloat64, 1425 }, 1426 Max: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 1427 IntgrVal: math.MinInt64, 1428 FloatVal: math.SmallestNonzeroFloat64, 1429 }, 1430 Sum: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 1431 IntgrVal: 0, 1432 FloatVal: 0}, 1433 } 1434 stats = &SegStats{ 1435 IsNumeric: true, 1436 Count: 0, 1437 Hll: hyperloglog.New16(), 1438 NumStats: numStats, 1439 } 1440 segstats[cname] = stats 1441 } 1442 1443 // prior entries were non numeric, so we should init NumStats, but keep the hll and count vars 1444 if stats.NumStats == nil { 1445 numStats := &NumericStats{ 1446 Min: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 1447 IntgrVal: math.MaxInt64, 1448 FloatVal: math.MaxFloat64, 1449 }, 1450 Max: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 1451 IntgrVal: math.MinInt64, 1452 FloatVal: math.SmallestNonzeroFloat64, 1453 }, 1454 Sum: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM, 1455 IntgrVal: 0, 1456 FloatVal: 0}, 1457 } 1458 stats.NumStats = numStats 1459 stats.IsNumeric = true // TODO: what if we have a mix of numeric and non-numeric 1460 } 1461 1462 bb.Reset() 1463 _, _ = bb.WriteString(numstr) 1464 stats.Hll.Insert(bb.B) 1465 processStats(stats, inNumType, intVal, uintVal, fltVal) 1466 } 1467 1468 func processStats(stats *SegStats, inNumType SS_IntUintFloatTypes, intVal int64, 1469 uintVal uint64, fltVal float64) { 1470 1471 stats.Count++ 1472 1473 var inIntgrVal int64 1474 switch inNumType { 1475 case SS_UINT8, SS_UINT16, SS_UINT32, SS_UINT64: 1476 inIntgrVal = int64(uintVal) 1477 case SS_INT8, SS_INT16, SS_INT32, SS_INT64: 1478 inIntgrVal = intVal 1479 } 1480 1481 // we just use the Min stats for stored val comparison but apply the same 1482 // logic to max and sum 1483 switch inNumType { 1484 case SS_FLOAT64: 1485 if stats.NumStats.Min.Ntype == SS_DT_FLOAT { 1486 // incoming float, stored is float, simple min 1487 stats.NumStats.Min.FloatVal = math.Min(stats.NumStats.Min.FloatVal, fltVal) 1488 stats.NumStats.Max.FloatVal = math.Max(stats.NumStats.Max.FloatVal, fltVal) 1489 stats.NumStats.Sum.FloatVal = stats.NumStats.Sum.FloatVal + fltVal 1490 } else { 1491 // incoming float, stored is non-float, upgrade it 1492 stats.NumStats.Min.FloatVal = math.Min(float64(stats.NumStats.Min.IntgrVal), fltVal) 1493 stats.NumStats.Min.Ntype = SS_DT_FLOAT 1494 1495 stats.NumStats.Max.FloatVal = math.Max(float64(stats.NumStats.Max.IntgrVal), fltVal) 1496 stats.NumStats.Max.Ntype = SS_DT_FLOAT 1497 1498 stats.NumStats.Sum.FloatVal = float64(stats.NumStats.Sum.IntgrVal) + fltVal 1499 stats.NumStats.Sum.Ntype = SS_DT_FLOAT 1500 } 1501 // incoming is NON-float 1502 default: 1503 if stats.NumStats.Min.Ntype == SS_DT_FLOAT { 1504 // incoming non-float, stored is float, cast it 1505 stats.NumStats.Min.FloatVal = math.Min(stats.NumStats.Min.FloatVal, float64(inIntgrVal)) 1506 stats.NumStats.Max.FloatVal = math.Max(stats.NumStats.Max.FloatVal, float64(inIntgrVal)) 1507 stats.NumStats.Sum.FloatVal = stats.NumStats.Sum.FloatVal + float64(inIntgrVal) 1508 } else { 1509 // incoming non-float, stored is non-float, simple min 1510 stats.NumStats.Min.IntgrVal = utils.MinInt64(stats.NumStats.Min.IntgrVal, inIntgrVal) 1511 stats.NumStats.Max.IntgrVal = utils.MaxInt64(stats.NumStats.Max.IntgrVal, inIntgrVal) 1512 stats.NumStats.Sum.IntgrVal = stats.NumStats.Sum.IntgrVal + inIntgrVal 1513 } 1514 } 1515 1516 } 1517 1518 func getColByteSlice(rec []byte, qid uint64) ([]byte, uint16, error) { 1519 1520 if len(rec) == 0 { 1521 return []byte{}, 0, errors.New("column value is empty") 1522 } 1523 1524 var endIdx uint16 1525 switch rec[0] { 1526 1527 case VALTYPE_ENC_SMALL_STRING[0]: 1528 strlen := utils.BytesToUint16LittleEndian(rec[1:3]) 1529 endIdx = strlen + 3 1530 case VALTYPE_ENC_BOOL[0], VALTYPE_ENC_INT8[0], VALTYPE_ENC_UINT8[0]: 1531 endIdx = 2 1532 case VALTYPE_ENC_INT16[0], VALTYPE_ENC_UINT16[0]: 1533 endIdx = 3 1534 case VALTYPE_ENC_INT32[0], VALTYPE_ENC_UINT32[0]: 1535 endIdx = 5 1536 case VALTYPE_ENC_INT64[0], VALTYPE_ENC_UINT64[0], VALTYPE_ENC_FLOAT64[0]: 1537 endIdx = 9 1538 case VALTYPE_ENC_BACKFILL[0]: 1539 endIdx = 1 1540 default: 1541 log.Errorf("qid=%d, getColByteSlice: dont know how to convert type=%v\n", qid, rec[0]) 1542 return []byte{}, endIdx, errors.New("invalid rec type") 1543 } 1544 1545 return rec[0:endIdx], endIdx, nil 1546 }