github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/segstore.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package writer 18 19 import ( 20 "bufio" 21 "bytes" 22 "errors" 23 "fmt" 24 "os" 25 "path" 26 "sort" 27 "strconv" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 "github.com/bits-and-blooms/bloom/v3" 33 "github.com/cespare/xxhash" 34 "github.com/siglens/siglens/pkg/blob" 35 "github.com/siglens/siglens/pkg/blob/ssutils" 36 "github.com/siglens/siglens/pkg/common/fileutils" 37 "github.com/siglens/siglens/pkg/config" 38 "github.com/siglens/siglens/pkg/instrumentation" 39 "github.com/siglens/siglens/pkg/querytracker" 40 pqsmeta "github.com/siglens/siglens/pkg/segment/query/pqs/meta" 41 "github.com/siglens/siglens/pkg/segment/structs" 42 "github.com/siglens/siglens/pkg/segment/utils" 43 "github.com/siglens/siglens/pkg/segment/writer/suffix" 44 "github.com/siglens/siglens/pkg/usageStats" 45 toputils "github.com/siglens/siglens/pkg/utils" 46 47 "github.com/siglens/siglens/pkg/segment/pqmr" 48 bbp "github.com/valyala/bytebufferpool" 49 50 log "github.com/sirupsen/logrus" 51 ) 52 53 const MaxAgileTreeNodeCount = 8_000_000 54 const colWipsSizeLimit = 2000 // We shouldn't exceed this during normal usage. 55 56 // SegStore Individual stream buffer 57 type SegStore struct { 58 lock sync.Mutex 59 earliest_millis uint64 // earliest timestamp of a logline here 60 latest_millis uint64 // latest timestamp of a logline here 61 wipBlock WipBlock 62 pqNonEmptyResults map[string]bool // map pqid => true if segstream matched > 0 records 63 // segment related data 64 SegmentKey string 65 segbaseDir string 66 suffix uint64 67 lastUpdated time.Time 68 VirtualTableName string 69 RecordCount int 70 AllSeenColumns map[string]bool 71 pqTracker *PQTracker 72 numBlocks uint16 73 BytesReceivedCount uint64 74 OnDiskBytes uint64 // running sum of cmi/csg/bsu file sizes 75 skipDe bool // kibana docs dont need dict enc, hence this flag 76 timeCreated time.Time 77 AllSst map[string]*structs.SegStats // map[colName] => SegStats_of_each_column 78 sbuilder StarTreeBuilder 79 usingSegTree bool 80 OrgId uint64 81 firstTime bool 82 } 83 84 // helper struct to keep track of persistent queries and columns that need to be searched 85 type PQTracker struct { 86 hasWildcard bool 87 colNames map[string]bool 88 PQNodes map[string]*structs.SearchNode // maps pqid to search node 89 } 90 91 func InitSegStore( 92 segmentKey string, 93 segbaseDir string, 94 suffix uint64, 95 virtualTableName string, 96 skipDe bool, 97 orgId uint64, 98 usingSegTree bool, 99 highTs uint64, 100 lowTs uint64, 101 ) *SegStore { 102 now := time.Now() 103 ss := SegStore{ 104 lock: sync.Mutex{}, 105 pqNonEmptyResults: make(map[string]bool), 106 SegmentKey: segmentKey, 107 segbaseDir: segbaseDir, 108 suffix: suffix, 109 lastUpdated: now, 110 VirtualTableName: virtualTableName, 111 AllSeenColumns: make(map[string]bool), 112 pqTracker: initPQTracker(), 113 skipDe: skipDe, 114 timeCreated: now, 115 AllSst: make(map[string]*structs.SegStats), 116 usingSegTree: usingSegTree, 117 OrgId: orgId, 118 firstTime: true, 119 } 120 121 ss.initWipBlock() 122 ss.wipBlock.blockSummary.HighTs = highTs 123 ss.wipBlock.blockSummary.LowTs = lowTs 124 125 return &ss 126 } 127 128 func (segstore *SegStore) initWipBlock() { 129 130 segstore.wipBlock = WipBlock{ 131 columnBlooms: make(map[string]*BloomIndex), 132 columnRangeIndexes: make(map[string]*RangeIndex), 133 columnsInBlock: make(map[string]bool), 134 pqMatches: make(map[string]*pqmr.PQMatchResults), 135 colWips: make(map[string]*ColWip), 136 bb: bbp.Get(), 137 } 138 segstore.wipBlock.tomRollup = make(map[uint64]*RolledRecs) 139 segstore.wipBlock.tohRollup = make(map[uint64]*RolledRecs) 140 segstore.wipBlock.todRollup = make(map[uint64]*RolledRecs) 141 } 142 143 func (segstore *SegStore) resetWipBlock(forceRotate bool) error { 144 145 segstore.wipBlock.maxIdx = 0 146 147 if len(segstore.wipBlock.colWips) > colWipsSizeLimit { 148 log.Errorf("resetWipBlock: colWips size exceeds %v; current size is %v for segKey %v", 149 colWipsSizeLimit, len(segstore.wipBlock.colWips), segstore.SegmentKey) 150 151 segstore.wipBlock.colWips = make(map[string]*ColWip) 152 } else { 153 for _, cwip := range segstore.wipBlock.colWips { 154 cwip.cbufidx = 0 155 cwip.cstartidx = 0 156 157 cwip.deCount = 0 158 for dword := range cwip.deMap { 159 delete(cwip.deMap, dword) 160 } 161 } 162 } 163 164 for _, bi := range segstore.wipBlock.columnBlooms { 165 bi.uniqueWordCount = 0 166 blockBloomElementCount := getBlockBloomSize(bi) 167 bi.Bf = bloom.NewWithEstimates(uint(blockBloomElementCount), utils.BLOOM_COLL_PROBABILITY) 168 } 169 170 for k := range segstore.wipBlock.columnRangeIndexes { 171 delete(segstore.wipBlock.columnRangeIndexes, k) 172 } 173 174 segstore.wipBlock.blockSummary.HighTs = 0 175 segstore.wipBlock.blockSummary.LowTs = 0 176 numPrevRec := segstore.wipBlock.blockSummary.RecCount 177 segstore.wipBlock.blockSummary.RecCount = 0 178 179 // delete keys from map to keep underlying storage 180 for col := range segstore.wipBlock.columnsInBlock { 181 delete(segstore.wipBlock.columnsInBlock, col) 182 } 183 184 for pqid := range segstore.wipBlock.pqMatches { 185 segstore.wipBlock.pqMatches[pqid].ResetAll() 186 } 187 188 // don't update pqids if no more blocks will be created 189 if forceRotate { 190 return nil 191 } 192 persistentQueries, err := querytracker.GetTopNPersistentSearches(segstore.VirtualTableName, segstore.OrgId) 193 if err != nil { 194 log.Errorf("resetWipBlock: error getting persistent queries: %v", err) 195 return err 196 } 197 for pqid, pNode := range persistentQueries { 198 if _, ok := segstore.wipBlock.pqMatches[pqid]; !ok { 199 mrSize := utils.PQMR_SIZE 200 if segstore.numBlocks > 0 || numPrevRec == 0 { 201 mrSize = uint(numPrevRec) 202 } 203 segstore.wipBlock.pqMatches[pqid] = pqmr.CreatePQMatchResults(mrSize) 204 } 205 segstore.pqTracker.addSearchNode(pqid, pNode) 206 } 207 clearTRollups(segstore.wipBlock.tomRollup) 208 clearTRollups(segstore.wipBlock.tohRollup) 209 clearTRollups(segstore.wipBlock.todRollup) 210 211 return nil 212 } 213 214 func clearTRollups(rrmap map[uint64]*RolledRecs) { 215 // delete keys from map to keep underlying storage 216 for k := range rrmap { 217 delete(rrmap, k) 218 } 219 } 220 221 // do not call this function on its own, since it may result in race condition. It should be called from 222 // the checkAndRotateColFiles func 223 224 func (segstore *SegStore) resetSegStore(streamid string, virtualTableName string) error { 225 226 basedir := getActiveBaseSegDir(streamid, virtualTableName, segstore.suffix) 227 err := os.MkdirAll(basedir, 0764) 228 if err != nil { 229 log.Errorf("resetSegStore : Could not mkdir basedir=%v, %v", basedir, err) 230 return err 231 } 232 233 basename := fmt.Sprintf("%s%d", basedir, segstore.suffix) 234 segstore.earliest_millis = 0 235 segstore.latest_millis = 0 236 segstore.SegmentKey = basename 237 segstore.segbaseDir = basedir 238 segstore.VirtualTableName = virtualTableName 239 segstore.RecordCount = 0 240 segstore.BytesReceivedCount = 0 241 segstore.OnDiskBytes = 0 242 243 segstore.AllSeenColumns = make(map[string]bool) 244 segstore.numBlocks = 0 245 segstore.timeCreated = time.Now() 246 segstore.usingSegTree = false 247 248 segstore.AllSst = make(map[string]*structs.SegStats) 249 segstore.pqNonEmptyResults = make(map[string]bool) 250 // on reset, clear pqs info but before reset block 251 segstore.pqTracker = initPQTracker() 252 segstore.wipBlock.colWips = make(map[string]*ColWip) 253 segstore.wipBlock.clearPQMatchInfo() 254 255 err = segstore.resetWipBlock(false) 256 if err != nil { 257 return err 258 } 259 260 nextidx, err := suffix.GetSuffix(streamid, virtualTableName) 261 if err != nil { 262 log.Errorf("reset segstore: failed to get next suffix idx for stream%+v table%+v. err: %v", streamid, virtualTableName, err) 263 return err 264 } 265 segstore.suffix = nextidx 266 267 return nil 268 } 269 270 // For some types we use a bloom index and for others we use range indices. If 271 // a column has both, we should convert all the values to one type. 272 func consolidateColumnTypes(wipBlock *WipBlock, segmentKey string) { 273 for colName := range wipBlock.columnsInBlock { 274 // Check if this column has both a bloom and a range index. 275 _, ok1 := wipBlock.columnBlooms[colName] 276 _, ok2 := wipBlock.columnRangeIndexes[colName] 277 if !(ok1 && ok2) { 278 continue 279 } 280 281 // Try converting this column to numbers, but if that fails convert it to 282 // strings. 283 ok := convertColumnToNumbers(wipBlock, colName, segmentKey) 284 if !ok { 285 convertColumnToStrings(wipBlock, colName, segmentKey) 286 } 287 } 288 } 289 290 // Returns true if the conversion succeeds. 291 func convertColumnToNumbers(wipBlock *WipBlock, colName string, segmentKey string) bool { 292 // Try converting all values to numbers. 293 oldColWip := wipBlock.colWips[colName] 294 newColWip := InitColWip(segmentKey, colName) 295 rangeIndex := wipBlock.columnRangeIndexes[colName].Ranges 296 297 for i := uint32(0); i < oldColWip.cbufidx; { 298 valType := oldColWip.cbuf[i] 299 i++ 300 301 switch valType { 302 case utils.VALTYPE_ENC_SMALL_STRING[0]: 303 // Parse the string. 304 numBytes := uint32(toputils.BytesToUint16LittleEndian(oldColWip.cbuf[i : i+2])) 305 i += 2 306 numberAsString := string(oldColWip.cbuf[i : i+numBytes]) 307 i += numBytes 308 309 // Try converting to an integer. 310 intVal, err := strconv.ParseInt(numberAsString, 10, 64) 311 if err == nil { 312 // Conversion succeeded. 313 copy(newColWip.cbuf[newColWip.cbufidx:], utils.VALTYPE_ENC_INT64[:]) 314 copy(newColWip.cbuf[newColWip.cbufidx+1:], toputils.Int64ToBytesLittleEndian(intVal)) 315 newColWip.cbufidx += 1 + 8 316 addIntToRangeIndex(colName, intVal, rangeIndex) 317 continue 318 } 319 320 // Try converting to a float. 321 floatVal, err := strconv.ParseFloat(numberAsString, 64) 322 if err == nil { 323 // Conversion succeeded. 324 copy(newColWip.cbuf[newColWip.cbufidx:], utils.VALTYPE_ENC_FLOAT64[:]) 325 copy(newColWip.cbuf[newColWip.cbufidx+1:], toputils.Float64ToBytesLittleEndian(floatVal)) 326 newColWip.cbufidx += 1 + 8 327 addFloatToRangeIndex(colName, floatVal, rangeIndex) 328 continue 329 } 330 331 // Conversion failed. 332 return false 333 334 case utils.VALTYPE_ENC_INT64[0], utils.VALTYPE_ENC_FLOAT64[0]: 335 // Already a number, so just copy it. 336 // It's alrady in the range index, so we don't need to add it again. 337 copy(newColWip.cbuf[newColWip.cbufidx:], oldColWip.cbuf[i-1:i+8]) 338 newColWip.cbufidx += 9 339 i += 8 340 341 case utils.VALTYPE_ENC_BACKFILL[0]: 342 // This is a null value. 343 copy(newColWip.cbuf[newColWip.cbufidx:], utils.VALTYPE_ENC_BACKFILL[:]) 344 newColWip.cbufidx += 1 345 346 case utils.VALTYPE_ENC_BOOL[0]: 347 // Cannot convert bool to number. 348 return false 349 350 default: 351 // Unknown type. 352 log.Errorf("convertColumnToNumbers: unknown type %v", valType) 353 return false 354 } 355 } 356 357 // Conversion succeeded, so replace the column with the new one. 358 wipBlock.colWips[colName] = newColWip 359 delete(wipBlock.columnBlooms, colName) 360 return true 361 } 362 363 func convertColumnToStrings(wipBlock *WipBlock, colName string, segmentKey string) { 364 oldColWip := wipBlock.colWips[colName] 365 newColWip := InitColWip(segmentKey, colName) 366 bloom := wipBlock.columnBlooms[colName] 367 368 for i := uint32(0); i < oldColWip.cbufidx; { 369 valType := oldColWip.cbuf[i] 370 i++ 371 372 switch valType { 373 case utils.VALTYPE_ENC_SMALL_STRING[0]: 374 // Already a string, so just copy it. 375 // This is already in the bloom, so we don't need to add it again. 376 numBytes := uint32(toputils.BytesToUint16LittleEndian(oldColWip.cbuf[i : i+2])) 377 i += 2 378 copy(newColWip.cbuf[newColWip.cbufidx:], oldColWip.cbuf[i-3:i+numBytes]) 379 newColWip.cbufidx += 3 + numBytes 380 i += numBytes 381 382 case utils.VALTYPE_ENC_INT64[0]: 383 // Parse the integer. 384 intVal := toputils.BytesToInt64LittleEndian(oldColWip.cbuf[i : i+8]) 385 i += 8 386 387 stringVal := strconv.FormatInt(intVal, 10) 388 newColWip.WriteSingleString(stringVal) 389 bloom.uniqueWordCount += addToBlockBloom(bloom.Bf, []byte(stringVal)) 390 391 case utils.VALTYPE_ENC_FLOAT64[0]: 392 // Parse the float. 393 floatVal := toputils.BytesToFloat64LittleEndian(oldColWip.cbuf[i : i+8]) 394 i += 8 395 396 stringVal := strconv.FormatFloat(floatVal, 'f', -1, 64) 397 newColWip.WriteSingleString(stringVal) 398 bloom.uniqueWordCount += addToBlockBloom(bloom.Bf, []byte(stringVal)) 399 400 case utils.VALTYPE_ENC_BACKFILL[0]: 401 // This is a null value. 402 copy(newColWip.cbuf[newColWip.cbufidx:], utils.VALTYPE_ENC_BACKFILL[:]) 403 newColWip.cbufidx += 1 404 405 case utils.VALTYPE_ENC_BOOL[0]: 406 // Parse the bool. 407 boolVal := oldColWip.cbuf[i] 408 i++ 409 410 var stringVal string 411 if boolVal == 0 { 412 stringVal = "false" 413 } else { 414 stringVal = "true" 415 } 416 417 newColWip.WriteSingleString(stringVal) 418 bloom.uniqueWordCount += addToBlockBloom(bloom.Bf, []byte(stringVal)) 419 420 default: 421 // Unknown type. 422 log.Errorf("convertColumnsToStrings: unknown type %v when converting column %v", valType, colName) 423 } 424 } 425 426 // Replace the old column. 427 wipBlock.colWips[colName] = newColWip 428 delete(wipBlock.columnRangeIndexes, colName) 429 } 430 431 func (segstore *SegStore) AppendWipToSegfile(streamid string, forceRotate bool, isKibana bool, onTimeRotate bool) error { 432 // If there's columns that had both strings and numbers in them, we need to 433 // try converting them all to numbers, but if that doesn't work we'll 434 // convert them all to strings. 435 consolidateColumnTypes(&segstore.wipBlock, segstore.SegmentKey) 436 437 if segstore.wipBlock.maxIdx > 0 { 438 var totalBytesWritten uint64 = 0 439 var totalMetadata uint64 = 0 440 allColsToFlush := &sync.WaitGroup{} 441 wipBlockLock := sync.Mutex{} 442 wipBlockMetadata := &structs.BlockMetadataHolder{ 443 BlkNum: segstore.numBlocks, 444 ColumnBlockOffset: make(map[string]int64), 445 ColumnBlockLen: make(map[string]uint32), 446 } 447 448 // worst case, each column opens 2 files (.cmi/.csg) and 2 files for segment info (.sid, .bsu) 449 numOpenFDs := int64(len(segstore.wipBlock.colWips)*2 + 2) 450 err := fileutils.GLOBAL_FD_LIMITER.TryAcquireWithBackoff(numOpenFDs, 10, segstore.SegmentKey) 451 if err != nil { 452 log.Errorf("AppendWipToSegfile failed to acquire lock for opening %+v file descriptors. err %+v", numOpenFDs, err) 453 return err 454 } 455 defer fileutils.GLOBAL_FD_LIMITER.Release(numOpenFDs) 456 for colName, colInfo := range segstore.wipBlock.colWips { 457 if colInfo.cbufidx > 0 { 458 allColsToFlush.Add(1) 459 go func(cname string, colWip *ColWip) { 460 defer allColsToFlush.Done() 461 var encType []byte 462 if cname == config.GetTimeStampKey() { 463 encType, err = segstore.wipBlock.encodeTimestamps() 464 if err != nil { 465 log.Errorf("AppendWipToSegfile: failed to encode timestamps err=%v", err) 466 return 467 } 468 _ = segstore.writeWipTsRollups(cname) 469 } else if colWip.deCount > 0 && colWip.deCount < wipCardLimit { 470 encType = utils.ZSTD_DICTIONARY_BLOCK 471 } else { 472 encType = utils.ZSTD_COMLUNAR_BLOCK 473 } 474 475 blkLen, blkOffset, err := writeWip(colWip, encType) 476 if err != nil { 477 log.Errorf("AppendWipToSegfile: failed to write colsegfilename=%v, err=%v", colWip.csgFname, err) 478 return 479 } 480 481 atomic.AddUint64(&totalBytesWritten, uint64(blkLen)) 482 wipBlockLock.Lock() 483 wipBlockMetadata.ColumnBlockOffset[cname] = blkOffset 484 wipBlockMetadata.ColumnBlockLen[cname] = blkLen 485 wipBlockLock.Unlock() 486 487 if !isKibana { 488 // if bloomIndex present then flush it 489 bi, ok := segstore.wipBlock.columnBlooms[cname] 490 if ok { 491 writtenBytes := segstore.flushBloomIndex(cname, bi) 492 atomic.AddUint64(&totalBytesWritten, writtenBytes) 493 atomic.AddUint64(&totalMetadata, writtenBytes) 494 } 495 ri, ok := segstore.wipBlock.columnRangeIndexes[cname] 496 if ok { 497 writtenBytes := segstore.flushBlockRangeIndex(cname, ri) 498 atomic.AddUint64(&totalBytesWritten, writtenBytes) 499 atomic.AddUint64(&totalMetadata, writtenBytes) 500 } 501 } 502 }(colName, colInfo) 503 } 504 } 505 if config.IsAggregationsEnabled() { 506 segstore.computeStarTree() 507 } 508 509 allColsToFlush.Wait() 510 blkSumLen := segstore.flushBlockSummary(wipBlockMetadata, segstore.numBlocks) 511 if !isKibana { 512 // everytime we write compressedWip to segfile, we write a corresponding blockBloom 513 updateUnrotatedBlockInfo(segstore.SegmentKey, segstore.VirtualTableName, &segstore.wipBlock, 514 wipBlockMetadata, segstore.AllSeenColumns, segstore.numBlocks, totalMetadata, segstore.earliest_millis, 515 segstore.latest_millis, segstore.RecordCount, segstore.OrgId) 516 } 517 atomic.AddUint64(&totalBytesWritten, blkSumLen) 518 519 segstore.OnDiskBytes += totalBytesWritten 520 521 allPQIDs := make(map[string]bool) 522 for pqid := range segstore.wipBlock.pqMatches { 523 allPQIDs[pqid] = true 524 } 525 526 err = segstore.FlushSegStats() 527 if err != nil { 528 log.Errorf("AppendWipToSegfile: failed to flushsegstats, err=%v", err) 529 return err 530 } 531 532 allColsSizes := segstore.getAllColsSizes() 533 534 var segmeta = structs.SegMeta{SegmentKey: segstore.SegmentKey, EarliestEpochMS: segstore.earliest_millis, 535 LatestEpochMS: segstore.latest_millis, VirtualTableName: segstore.VirtualTableName, 536 RecordCount: segstore.RecordCount, SegbaseDir: segstore.segbaseDir, 537 BytesReceivedCount: segstore.BytesReceivedCount, OnDiskBytes: segstore.OnDiskBytes, 538 ColumnNames: allColsSizes, AllPQIDs: allPQIDs, NumBlocks: segstore.numBlocks, OrgId: segstore.OrgId} 539 540 sidFname := fmt.Sprintf("%v.sid", segstore.SegmentKey) 541 err = writeRunningSegMeta(sidFname, &segmeta) 542 if err != nil { 543 log.Errorf("AppendWipToSegfile: failed to write sidFname=%v, err=%v", sidFname, err) 544 return err 545 } 546 547 for pqid, pqResults := range segstore.wipBlock.pqMatches { 548 segstore.pqNonEmptyResults[pqid] = segstore.pqNonEmptyResults[pqid] || pqResults.Any() 549 pqidFname := fmt.Sprintf("%v/pqmr/%v.pqmr", segstore.SegmentKey, pqid) 550 err := pqResults.FlushPqmr(&pqidFname, segstore.numBlocks) 551 if err != nil { 552 log.Errorf("AppendWipToSegfile: failed to flush pqmr results to fname %s: %v", pqidFname, err) 553 return err 554 } 555 } 556 557 err = segstore.resetWipBlock(forceRotate) 558 if err != nil { 559 return err 560 } 561 usageStats.UpdateCompressedStats(int64(totalBytesWritten), segmeta.OrgId) 562 segstore.numBlocks += 1 563 } 564 if segstore.numBlocks > 0 && !isKibana { 565 err := segstore.checkAndRotateColFiles(streamid, forceRotate, onTimeRotate) 566 if err != nil { 567 return err 568 } 569 } 570 return nil 571 } 572 573 func removePqmrFilesAndDirectory(pqid string, segKey string) error { 574 workingDirectory, err := os.Getwd() 575 if err != nil { 576 log.Errorf("Error fetching current workingDirectory") 577 return err 578 } 579 pqFname := workingDirectory + "/" + fmt.Sprintf("%v/pqmr/%v.pqmr", segKey, pqid) 580 err = os.Remove(pqFname) 581 if err != nil { 582 log.Errorf("Cannot delete file at %v", err) 583 return err 584 } 585 pqmrDirectory := workingDirectory + "/" + fmt.Sprintf("%v/pqmr/", segKey) 586 files, err := os.ReadDir(pqmrDirectory) 587 if err != nil { 588 log.Errorf("Cannot PQMR directory at %v", pqmrDirectory) 589 return err 590 } 591 if len(files) == 0 { 592 err := os.Remove(pqmrDirectory) 593 if err != nil { 594 log.Errorf("Error deleting Pqmr directory at %v", pqmrDirectory) 595 return err 596 } 597 pqmrParentDirectory := workingDirectory + "/" + fmt.Sprintf("%v/", segKey) 598 files, err = os.ReadDir(pqmrParentDirectory) 599 if err != nil { 600 log.Errorf("Cannot PQMR parent directory at %v", pqmrParentDirectory) 601 return err 602 } 603 if len(files) == 0 { 604 err := os.Remove(pqmrParentDirectory) 605 if err != nil { 606 log.Errorf("Error deleting Pqmr directory at %v", pqmrParentDirectory) 607 return err 608 } 609 } 610 } 611 return nil 612 } 613 614 func (segstore *SegStore) checkAndRotateColFiles(streamid string, forceRotate bool, onTimeRotate bool) error { 615 616 onTreeRotate := false 617 if config.IsAggregationsEnabled() && segstore.usingSegTree { 618 nc := segstore.sbuilder.GetNodeCount() 619 if nc > MaxAgileTreeNodeCount { 620 onTreeRotate = true 621 } 622 } 623 624 if segstore.OnDiskBytes > maxSegFileSize || forceRotate || onTimeRotate || onTreeRotate { 625 626 instrumentation.IncrementInt64Counter(instrumentation.SEGFILE_ROTATE_COUNT, 1) 627 bytesWritten := segstore.flushStarTree() 628 segstore.OnDiskBytes += uint64(bytesWritten) 629 630 activeBasedir := getActiveBaseSegDir(streamid, segstore.VirtualTableName, segstore.suffix-1) 631 finalBasedir := getFinalBaseSegDir(streamid, segstore.VirtualTableName, segstore.suffix-1) 632 633 finalSegmentKey := fmt.Sprintf("%s%d", finalBasedir, segstore.suffix-1) 634 635 log.Infof("Rotating segId=%v RecCount: %v, OnDiskBytes=%v, numBlocks=%v, finalSegKey=%v orgId=%v", 636 segstore.SegmentKey, segstore.RecordCount, segstore.OnDiskBytes, segstore.numBlocks, 637 finalSegmentKey, segstore.OrgId) 638 639 // make sure the parent dir of final exists, the two path calls are because getFinal.. func 640 // returns a '/' at the end 641 err := os.MkdirAll(path.Dir(path.Dir(finalBasedir)), 0764) 642 if err != nil { 643 return err 644 } 645 // delete pqmr files if empty and add to empty PQS 646 for pqid, hasMatchedAnyRecordInWip := range segstore.pqNonEmptyResults { 647 if !hasMatchedAnyRecordInWip { 648 err := removePqmrFilesAndDirectory(pqid, segstore.SegmentKey) 649 if err != nil { 650 log.Errorf("Error deleting pqmr files and directory. Err: %v", err) 651 } 652 go pqsmeta.AddEmptyResults(pqid, segstore.SegmentKey, segstore.VirtualTableName) 653 } 654 } 655 656 allColsSizes := segstore.getAllColsSizes() 657 658 // move the whole dir in one shot 659 err = os.Rename(activeBasedir, finalBasedir) 660 if err != nil { 661 log.Errorf("checkAndRotateColFiles: failed to mv active to final, err=%v", err) 662 return err 663 } 664 // Upload segment files to s3 665 filesToUpload := fileutils.GetAllFilesInDirectory(finalBasedir) 666 667 err = blob.UploadSegmentFiles(filesToUpload) 668 if err != nil { 669 log.Errorf("checkAndRotateColFiles: failed to upload segment files , err=%v", err) 670 } 671 672 allPqids := make(map[string]bool, len(segstore.wipBlock.pqMatches)) 673 for pqid := range segstore.wipBlock.pqMatches { 674 allPqids[pqid] = true 675 } 676 677 var segmeta = structs.SegMeta{SegmentKey: finalSegmentKey, EarliestEpochMS: segstore.earliest_millis, 678 LatestEpochMS: segstore.latest_millis, VirtualTableName: segstore.VirtualTableName, 679 RecordCount: segstore.RecordCount, SegbaseDir: finalBasedir, 680 BytesReceivedCount: segstore.BytesReceivedCount, OnDiskBytes: segstore.OnDiskBytes, 681 ColumnNames: allColsSizes, AllPQIDs: allPqids, NumBlocks: segstore.numBlocks, OrgId: segstore.OrgId} 682 683 AddNewRotatedSegment(segmeta) 684 685 updateRecentlyRotatedSegmentFiles(segstore.SegmentKey, finalSegmentKey) 686 removeSegKeyFromUnrotatedInfo(segstore.SegmentKey) 687 688 // upload ingest node dir to s3 689 err = blob.UploadIngestNodeDir() 690 if err != nil { 691 log.Errorf("checkAndRotateColFiles: failed to upload ingest node dir , err=%v", err) 692 } 693 694 if !forceRotate { 695 err = segstore.resetSegStore(streamid, segstore.VirtualTableName) 696 if err != nil { 697 return err 698 } 699 } 700 701 } 702 return nil 703 } 704 705 func (segstore *SegStore) initStarTreeCols() ([]string, []string) { 706 707 gcols, inMesCols := querytracker.GetTopPersistentAggs(segstore.VirtualTableName) 708 sortedGrpCols := make([]string, 0) 709 gcMap := make(map[string]uint32) // use it to sort based on cardinality 710 for _, cname := range gcols { 711 712 // verify if cname exist in wip 713 _, ok := segstore.wipBlock.colWips[cname] 714 if !ok { 715 continue 716 } 717 718 _, ok = segstore.AllSst[cname] 719 if !ok { 720 continue 721 } 722 723 cest := uint32(segstore.AllSst[cname].Hll.Estimate()) 724 gcMap[cname] = cest 725 sortedGrpCols = append(sortedGrpCols, cname) 726 } 727 728 sort.Slice(sortedGrpCols, func(i, j int) bool { 729 return gcMap[sortedGrpCols[i]] < gcMap[sortedGrpCols[j]] 730 }) 731 732 mCols := make([]string, 0) 733 // Check if measureCols are present in wip 734 for mCname := range inMesCols { 735 736 // verify if measure cname exist in wip 737 _, ok := segstore.wipBlock.colWips[mCname] 738 if !ok { 739 continue 740 } 741 mCols = append(mCols, mCname) 742 } 743 744 return sortedGrpCols, mCols 745 } 746 747 func (segstore *SegStore) computeStarTree() { 748 749 if segstore.numBlocks == 0 { 750 sortedGrpCols, mCols := segstore.initStarTreeCols() 751 if len(sortedGrpCols) == 0 || len(mCols) == 0 { 752 segstore.usingSegTree = false 753 return 754 } 755 segstore.usingSegTree = true 756 segstore.sbuilder.ResetSegTree(&segstore.wipBlock, sortedGrpCols, mCols) 757 } 758 759 if !segstore.usingSegTree { // if tree creation had failed on first block, then skip it 760 return 761 } 762 763 err := segstore.sbuilder.ComputeStarTree(&segstore.wipBlock) 764 if err != nil { 765 segstore.usingSegTree = false 766 log.Errorf("computeStarTree: Failed to compute star tree: %v", err) 767 return 768 } 769 } 770 771 func (segstore *SegStore) flushStarTree() uint32 { 772 773 if !config.IsAggregationsEnabled() { 774 return 0 775 } 776 777 if !segstore.usingSegTree { // if tree creation had failed on first block, then skip it 778 return 0 779 } 780 781 size, err := segstore.sbuilder.EncodeStarTree(segstore.SegmentKey) 782 if err != nil { 783 log.Errorf("flushStarTree: Failed to encode star tree: %v", err) 784 return 0 785 } 786 return size 787 } 788 789 func (segstore *SegStore) adjustEarliestLatestTimes(ts_millis uint64) { 790 791 if segstore.earliest_millis == 0 { 792 segstore.earliest_millis = ts_millis 793 } else { 794 if ts_millis < segstore.earliest_millis { 795 segstore.earliest_millis = ts_millis 796 } 797 } 798 799 if segstore.latest_millis == 0 { 800 segstore.latest_millis = ts_millis 801 } else { 802 if ts_millis > segstore.latest_millis { 803 segstore.latest_millis = ts_millis 804 } 805 } 806 } 807 808 func (wipBlock *WipBlock) adjustEarliestLatestTimes(ts_millis uint64) { 809 810 if wipBlock.blockSummary.LowTs == 0 { 811 wipBlock.blockSummary.LowTs = ts_millis 812 } else { 813 if ts_millis < wipBlock.blockSummary.LowTs { 814 wipBlock.blockSummary.LowTs = ts_millis 815 } 816 } 817 818 if wipBlock.blockSummary.HighTs == 0 { 819 wipBlock.blockSummary.HighTs = ts_millis 820 } else { 821 if ts_millis > wipBlock.blockSummary.HighTs { 822 wipBlock.blockSummary.HighTs = ts_millis 823 } 824 } 825 826 } 827 828 func (segstore *SegStore) WritePackedRecord(rawJson []byte, ts_millis uint64, signalType utils.SIGNAL_TYPE) error { 829 830 var maxIdx uint32 831 var err error 832 var matchedPCols bool 833 tsKey := config.GetTimeStampKey() 834 if signalType == utils.SIGNAL_EVENTS || signalType == utils.SIGNAL_JAEGER_TRACES { 835 maxIdx, matchedPCols, err = segstore.EncodeColumns(rawJson, ts_millis, &tsKey, signalType) 836 if err != nil { 837 log.Errorf("WritePackedRecord: Failed to encode record=%+v", string(rawJson)) 838 return err 839 } 840 } else { 841 log.Errorf("WritePackedRecord: Unknown SignalType=%+v", signalType) 842 return errors.New("unknown signal type") 843 } 844 845 if matchedPCols { 846 applyStreamingSearchToRecord(segstore.wipBlock, segstore.pqTracker.PQNodes, segstore.wipBlock.blockSummary.RecCount) 847 } 848 849 segstore.wipBlock.maxIdx = maxIdx 850 segstore.wipBlock.blockSummary.RecCount += 1 851 segstore.RecordCount++ 852 segstore.lastUpdated = time.Now() 853 return nil 854 } 855 856 // flushes bloom index and returns number of bytes written 857 func (ss *SegStore) flushBloomIndex(cname string, bi *BloomIndex) uint64 { 858 859 if bi == nil { 860 log.Errorf("flushBloomIndex: bi was nill for segkey=%v", ss.SegmentKey) 861 return 0 862 } 863 864 fname := fmt.Sprintf("%s_%v.cmi", ss.SegmentKey, xxhash.Sum64String(cname)) 865 866 bffd, err := os.OpenFile(fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) 867 if err != nil { 868 log.Errorf("flushBloomIndex: open failed fname=%v, err=%v", fname, err) 869 return 0 870 } 871 872 defer bffd.Close() 873 874 var buf bytes.Buffer 875 bufWriter := bufio.NewWriter(&buf) 876 877 // there is no accurate way to find exactly how many bytes the write.to is going to write 878 // and we need that number , so that we write it first and then the actual bloom data 879 // hence this messiness to write it to some buffer, get the bytesWritten count and then do 880 // the actual write 881 bytesWritten, bferr := bi.Bf.WriteTo(bufWriter) 882 if bferr != nil { 883 log.Errorf("flushBloomIndex: write buf failed fname=%v, err=%v", fname, bferr) 884 return 0 885 } 886 887 bytesWritten += utils.LEN_BLKNUM_CMI_SIZE // for blkNum 888 bytesWritten += 1 // reserver for CMI_Type 889 890 // copy the size of blockBloom in uint32 891 if _, err = bffd.Write(toputils.Uint32ToBytesLittleEndian(uint32(bytesWritten))); err != nil { 892 log.Errorf("flushBloomIndex: bloomsize write failed fname=%v, err=%v", fname, err) 893 return 0 894 } 895 896 // copy the blockNum 897 if _, err = bffd.Write(toputils.Uint16ToBytesLittleEndian(ss.numBlocks)); err != nil { 898 log.Errorf("flushBloomIndex: bloomsize write failed fname=%v, err=%v", fname, err) 899 return 0 900 } 901 902 // write CMI type 903 if _, err = bffd.Write(utils.CMI_BLOOM_INDEX); err != nil { 904 log.Errorf("flushBloomIndex: CMI Type write failed fname=%v, err=%v", fname, err) 905 return 0 906 } 907 908 // write the blockBloom 909 _, bferr = bi.Bf.WriteTo(bffd) 910 if bferr != nil { 911 log.Errorf("flushBloomIndex: write blockbloom failed fname=%v, err=%v", fname, bferr) 912 return 0 913 } 914 915 finalBytesWritten := bytesWritten + 4 // add 4 for size 916 if len(bi.HistoricalCount) == 0 { 917 bi.HistoricalCount = make([]uint32, 0) 918 } 919 //adding to block history list 920 bi.HistoricalCount = append(bi.HistoricalCount, bi.uniqueWordCount) 921 if streamIdHistory := len(bi.HistoricalCount); streamIdHistory > utils.BLOOM_SIZE_HISTORY { 922 bi.HistoricalCount = bi.HistoricalCount[streamIdHistory-utils.BLOOM_SIZE_HISTORY:] 923 924 } 925 return uint64(finalBytesWritten) 926 } 927 928 // returns the number of bytes written 929 func (segstore *SegStore) flushBlockSummary(bmh *structs.BlockMetadataHolder, blkNum uint16) uint64 { 930 931 fname := structs.GetBsuFnameFromSegKey(segstore.SegmentKey) 932 933 fd, err := os.OpenFile(fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) 934 if err != nil { 935 log.Errorf("flushBlockSummary: open failed blockSummaryFname=%v, err=%v", fname, err) 936 return 0 937 } 938 939 defer fd.Close() 940 941 blkSumBuf := make([]byte, utils.BLOCK_SUMMARY_SIZE) 942 packedLen, blkSumBuf, err := EncodeBlocksum(bmh, &segstore.wipBlock.blockSummary, blkSumBuf[0:], blkNum) 943 if err != nil { 944 log.Errorf("flushBlockSummary: EncodeBlocksum: Failed to encode blocksummary=%+v, err=%v", 945 segstore.wipBlock.blockSummary, err) 946 return 0 947 } 948 if _, err := fd.Write(blkSumBuf[:packedLen]); err != nil { 949 log.Errorf("flushBlockSummary: write failed blockSummaryFname=%v, err=%v", fname, err) 950 return 0 951 } 952 return uint64(packedLen) 953 } 954 955 func (segstore *SegStore) flushBlockRangeIndex(cname string, ri *RangeIndex) uint64 { 956 957 if ri == nil { 958 log.Errorf("flushBlockRangeIndex: ri was nill for segkey=%v", segstore.SegmentKey) 959 return 0 960 } 961 962 fname := fmt.Sprintf("%s_%v.cmi", segstore.SegmentKey, xxhash.Sum64String(cname)) 963 964 fr, err := os.OpenFile(fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) 965 if err != nil { 966 log.Errorf("flushBlockRangeIndex: open failed fname=%v, err=%v", fname, err) 967 return 0 968 } 969 970 packedLen, blkRIBuf, err := EncodeRIBlock(ri.Ranges, segstore.numBlocks) 971 if err != nil { 972 log.Errorf("flushBlockRangeIndex: EncodeRIBlock: Failed to encode BlockRangeIndex=%+v, err=%v", ri.Ranges, err) 973 return 0 974 } 975 976 if _, err := fr.Write(blkRIBuf[0:packedLen]); err != nil { 977 log.Errorf("flushBlockRangeIndex: write failed blockRangeIndexFname=%v, err=%v", fname, err) 978 return 0 979 } 980 fr.Close() 981 return uint64(packedLen) 982 } 983 984 func initPQTracker() *PQTracker { 985 return &PQTracker{ 986 colNames: make(map[string]bool), 987 PQNodes: make(map[string]*structs.SearchNode), 988 hasWildcard: false, 989 } 990 } 991 992 func (pct *PQTracker) addSearchNode(pqid string, sNode *structs.SearchNode) { 993 pct.PQNodes[pqid] = sNode 994 995 if pct.hasWildcard { 996 return 997 } 998 cols, wildcard := sNode.GetAllColumnsToSearch() 999 for colName := range cols { 1000 pct.colNames[colName] = true 1001 } 1002 pct.hasWildcard = wildcard 1003 } 1004 1005 func (pct *PQTracker) isColumnInPQuery(col string) bool { 1006 if pct.hasWildcard { 1007 return true 1008 } 1009 if pct.colNames == nil { 1010 return false 1011 } 1012 _, ok := pct.colNames[col] 1013 return ok 1014 } 1015 1016 func (wip *WipBlock) clearPQMatchInfo() { 1017 for pqid := range wip.pqMatches { 1018 delete(wip.pqMatches, pqid) 1019 } 1020 } 1021 1022 func (wipBlock *WipBlock) encodeTimestamps() ([]byte, error) { 1023 1024 encType := utils.TIMESTAMP_TOPDIFF_VARENC 1025 1026 tsWip := wipBlock.colWips[config.GetTimeStampKey()] 1027 tsWip.cbufidx = 0 // reset to zero since packer we set it to 1, so that the writeWip gets invoked 1028 1029 var tsType structs.TS_TYPE 1030 diff := wipBlock.blockSummary.HighTs - wipBlock.blockSummary.LowTs 1031 1032 if diff <= toputils.UINT8_MAX { 1033 tsType = structs.TS_Type8 1034 } else if diff <= toputils.UINT16_MAX { 1035 tsType = structs.TS_Type16 1036 } else if diff <= toputils.UINT32_MAX { 1037 tsType = structs.TS_Type32 1038 } else { 1039 tsType = structs.TS_Type64 1040 } 1041 1042 lowTs := wipBlock.blockSummary.LowTs 1043 1044 // store TS_TYPE and lowTs for reconstruction needs 1045 copy(tsWip.cbuf[tsWip.cbufidx:], []byte{uint8(tsType)}) 1046 tsWip.cbufidx += 1 1047 copy(tsWip.cbuf[tsWip.cbufidx:], toputils.Uint64ToBytesLittleEndian(lowTs)) 1048 tsWip.cbufidx += 8 1049 1050 switch tsType { 1051 case structs.TS_Type8: 1052 var tsVal uint8 1053 for i := uint16(0); i < wipBlock.blockSummary.RecCount; i++ { 1054 tsVal = uint8(wipBlock.blockTs[i] - lowTs) 1055 copy(tsWip.cbuf[tsWip.cbufidx:], []byte{tsVal}) 1056 tsWip.cbufidx += 1 1057 } 1058 case structs.TS_Type16: 1059 var tsVal uint16 1060 for i := uint16(0); i < wipBlock.blockSummary.RecCount; i++ { 1061 tsVal = uint16(wipBlock.blockTs[i] - lowTs) 1062 copy(tsWip.cbuf[tsWip.cbufidx:], toputils.Uint16ToBytesLittleEndian(tsVal)) 1063 tsWip.cbufidx += 2 1064 } 1065 case structs.TS_Type32: 1066 var tsVal uint32 1067 for i := uint16(0); i < wipBlock.blockSummary.RecCount; i++ { 1068 tsVal = uint32(wipBlock.blockTs[i] - lowTs) 1069 copy(tsWip.cbuf[tsWip.cbufidx:], toputils.Uint32ToBytesLittleEndian(tsVal)) 1070 tsWip.cbufidx += 4 1071 } 1072 case structs.TS_Type64: 1073 var tsVal uint64 1074 for i := uint16(0); i < wipBlock.blockSummary.RecCount; i++ { 1075 tsVal = wipBlock.blockTs[i] - lowTs 1076 copy(tsWip.cbuf[tsWip.cbufidx:], toputils.Uint64ToBytesLittleEndian(tsVal)) 1077 tsWip.cbufidx += 8 1078 } 1079 } 1080 1081 return encType, nil 1082 } 1083 1084 /* 1085 1086 [blkNum 2B][numBlocks 2B][BuckData xxB]...... 1087 1088 BuckData ===> 1089 [bucketKey 8B][rrEncType 1B][mrDataSize 2B]{matchedRecordData ....} 1090 1091 */ 1092 1093 func (ss *SegStore) writeWipTsRollups(cname string) error { 1094 1095 // todo move this dir creation to initSegStore 1096 dirName := fmt.Sprintf("%v/rups/", path.Dir(ss.SegmentKey)) 1097 if _, err := os.Stat(dirName); os.IsNotExist(err) { 1098 err := os.MkdirAll(dirName, os.FileMode(0764)) 1099 if err != nil { 1100 log.Errorf("writeWipTsRollups: Failed to create directory %s: %v", dirName, err) 1101 return err 1102 } 1103 } 1104 1105 var reterr error = nil 1106 1107 fname := fmt.Sprintf("%v/rups/%v.crup", path.Dir(ss.SegmentKey), xxhash.Sum64String(cname+"m")) 1108 err := writeSingleRup(ss.numBlocks, fname, ss.wipBlock.tomRollup) 1109 if err != nil { 1110 log.Errorf("writeWipTsRollups: failed to write minutes rollup file, err=%v", err) 1111 reterr = err 1112 } 1113 1114 fname = fmt.Sprintf("%v/rups/%v.crup", path.Dir(ss.SegmentKey), xxhash.Sum64String(cname+"h")) 1115 err = writeSingleRup(ss.numBlocks, fname, ss.wipBlock.tohRollup) 1116 if err != nil { 1117 log.Errorf("writeWipTsRollups: failed to write hour rollup file, err=%v", err) 1118 reterr = err 1119 } 1120 fname = fmt.Sprintf("%v/rups/%v.crup", path.Dir(ss.SegmentKey), xxhash.Sum64String(cname+"d")) 1121 err = writeSingleRup(ss.numBlocks, fname, ss.wipBlock.todRollup) 1122 if err != nil { 1123 log.Errorf("writeWipTsRollups: failed to write day rollup file, err=%v", err) 1124 reterr = err 1125 } 1126 1127 return reterr 1128 } 1129 1130 func writeSingleRup(blkNum uint16, fname string, tRup map[uint64]*RolledRecs) error { 1131 fd, err := os.OpenFile(fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) 1132 if err != nil { 1133 log.Errorf("writeSingleRup: open failed fname=%v, err=%v", fname, err) 1134 return err 1135 } 1136 1137 defer fd.Close() 1138 1139 _, err = fd.Seek(0, 2) // go to the end of the file 1140 if err != nil { 1141 log.Errorf("writeSingleRup: failed to get end offset %+v", err) 1142 return err 1143 } 1144 1145 // write blkNum 1146 _, err = fd.Write(toputils.Uint16ToBytesLittleEndian(blkNum)) 1147 if err != nil { 1148 log.Errorf("writeSingleRup: blkNum write failed fname=%v, err=%v", fname, err) 1149 return err 1150 } 1151 1152 // write num of bucketKeys 1153 _, err = fd.Write(toputils.Uint16ToBytesLittleEndian(uint16(len(tRup)))) 1154 if err != nil { 1155 log.Errorf("writeSingleRup: failed to write num of bucket keys %+v", err) 1156 return err 1157 } 1158 1159 for bkey, rr := range tRup { 1160 1161 // write bucketKey ts 1162 if _, err = fd.Write(toputils.Uint64ToBytesLittleEndian(bkey)); err != nil { 1163 log.Errorf("writeSingleRup: blkNum=%v bkey=%v write failed fname=%v, err=%v", 1164 blkNum, bkey, fname, err) 1165 return err 1166 } 1167 1168 // write encoding type 1169 if _, err = fd.Write([]byte{utils.RR_ENC_BITSET}); err != nil { 1170 log.Errorf("writeSingleRup: blkNum=%v bkey=%v enc type failed fname=%v, err=%v", 1171 blkNum, bkey, fname, err) 1172 return err 1173 } 1174 1175 // we could use a Compact here, but in past we saw compact loose data 1176 // once compact is fixed then we can use it here. 1177 // pad an extra word (64 bits) so that shrink does not loose data 1178 cb := rr.MatchedRes.Shrink(uint(rr.lastRecNum + 64)) 1179 mrSize := uint16(cb.GetInMemSize()) 1180 if _, err = fd.Write(toputils.Uint16ToBytesLittleEndian(uint16(mrSize))); err != nil { 1181 log.Errorf("writeSingleRup: blkNum=%v bkey=%v mrsize write failed fname=%v, err=%v", 1182 blkNum, bkey, fname, err) 1183 return err 1184 } 1185 1186 // write actual bitset 1187 err = cb.WriteTo(fd) 1188 if err != nil { 1189 log.Errorf("writeSingleRup: blkNum=%v bkey=%v bitset write failed fname=%v, err=%v", 1190 blkNum, bkey, fname, err) 1191 return err 1192 } 1193 } 1194 1195 return nil 1196 } 1197 1198 /* 1199 Encoding Scheme for all columns single file 1200 1201 [Version 1B] [CnameLen 2B] [Cname xB] [ColSegEncodingLen 2B] [ColSegEncoding xB].... 1202 */ 1203 func (ss *SegStore) FlushSegStats() error { 1204 1205 if len(ss.AllSst) <= 0 { 1206 log.Errorf("FlushSegStats: no segstats to flush") 1207 return errors.New("FlushSegStats: no segstats to flush") 1208 } 1209 1210 fname := fmt.Sprintf("%v.sst", ss.SegmentKey) 1211 fd, err := os.OpenFile(fname, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) 1212 if err != nil { 1213 log.Errorf("FlushSegStats: Failed to open file=%v, err=%v", fname, err) 1214 return err 1215 } 1216 defer fd.Close() 1217 1218 // version 1219 _, err = fd.Write([]byte{1}) 1220 if err != nil { 1221 log.Errorf("FlushSegStats: failed to write version err=%v", err) 1222 return err 1223 } 1224 1225 buf := make([]byte, utils.WIP_SIZE) 1226 for cname, sst := range ss.AllSst { 1227 1228 // cname len 1229 _, err = fd.Write(toputils.Uint16ToBytesLittleEndian(uint16(len(cname)))) 1230 if err != nil { 1231 log.Errorf("FlushSegStats: failed to write cnamelen cname=%v err=%v", cname, err) 1232 return err 1233 } 1234 1235 // cname 1236 _, err = fd.WriteString(cname) 1237 if err != nil { 1238 log.Errorf("FlushSegStats: failed to write cname cname=%v err=%v", cname, err) 1239 return err 1240 } 1241 1242 idx, err := writeSstToBuf(sst, buf) 1243 if err != nil { 1244 log.Errorf("FlushSegStats: error writing to buf err=%v", err) 1245 return err 1246 } 1247 1248 // colsegencodinglen 1249 _, err = fd.Write(toputils.Uint16ToBytesLittleEndian(idx)) 1250 if err != nil { 1251 log.Errorf("FlushSegStats: failed to write colsegencodlen cname=%v err=%v", cname, err) 1252 return err 1253 } 1254 1255 // colsegencoding 1256 _, err = fd.Write(buf[0:idx]) 1257 if err != nil { 1258 log.Errorf("FlushSegStats: failed to write colsegencoding cname=%v err=%v", cname, err) 1259 return err 1260 } 1261 } 1262 1263 return nil 1264 } 1265 1266 /* 1267 Encoding Schema for SegStats Single Column Data 1268 [Version 1B] [isNumeric 1B] [Count 8B] [HLL_Size 2B] [HLL_Data xB] 1269 [N_type 1B] [Min 8B] [N_type 1B] [Max 8B] [N_type 1B] [Sum 8B] 1270 */ 1271 func writeSstToBuf(sst *structs.SegStats, buf []byte) (uint16, error) { 1272 1273 idx := uint16(0) 1274 1275 // version 1276 copy(buf[idx:], []byte{1}) 1277 idx++ 1278 1279 // isNumeric 1280 copy(buf[idx:], toputils.BoolToBytesLittleEndian(sst.IsNumeric)) 1281 idx++ 1282 1283 // Count 1284 copy(buf[idx:], toputils.Uint64ToBytesLittleEndian(sst.Count)) 1285 idx += 8 1286 1287 hllData, err := sst.Hll.MarshalBinary() 1288 if err != nil { 1289 log.Errorf("writeSstToBuf: HLL marshal failed err=%v", err) 1290 return idx, err 1291 } 1292 1293 // HLL_Size 1294 copy(buf[idx:], toputils.Uint16ToBytesLittleEndian(uint16(len(hllData)))) 1295 idx += 2 1296 1297 // HLL_Data 1298 copy(buf[idx:], hllData) 1299 idx += uint16(len(hllData)) 1300 1301 if !sst.IsNumeric { 1302 return idx, nil // dont write numeric stuff if this column is not numeric 1303 } 1304 1305 // Min NumType 1306 copy(buf[idx:], []byte{byte(sst.NumStats.Min.Ntype)}) 1307 idx++ 1308 1309 // Min 1310 if sst.NumStats.Min.Ntype == utils.SS_DT_FLOAT { 1311 copy(buf[idx:], toputils.Float64ToBytesLittleEndian(sst.NumStats.Min.FloatVal)) 1312 } else { 1313 copy(buf[idx:], toputils.Int64ToBytesLittleEndian(sst.NumStats.Min.IntgrVal)) 1314 } 1315 idx += 8 1316 1317 // Max NumType 1318 copy(buf[idx:], []byte{byte(sst.NumStats.Max.Ntype)}) 1319 idx++ 1320 1321 // Max 1322 if sst.NumStats.Max.Ntype == utils.SS_DT_FLOAT { 1323 copy(buf[idx:], toputils.Float64ToBytesLittleEndian(sst.NumStats.Max.FloatVal)) 1324 } else { 1325 copy(buf[idx:], toputils.Int64ToBytesLittleEndian(sst.NumStats.Max.IntgrVal)) 1326 } 1327 idx += 8 1328 1329 // Sum NumType 1330 copy(buf[idx:], []byte{byte(sst.NumStats.Sum.Ntype)}) 1331 idx++ 1332 1333 // Sum 1334 if sst.NumStats.Sum.Ntype == utils.SS_DT_FLOAT { 1335 copy(buf[idx:], toputils.Float64ToBytesLittleEndian(sst.NumStats.Sum.FloatVal)) 1336 } else { 1337 copy(buf[idx:], toputils.Int64ToBytesLittleEndian(sst.NumStats.Sum.IntgrVal)) 1338 } 1339 idx += 8 1340 1341 return idx, nil 1342 } 1343 1344 func (ss *SegStore) getAllColsSizes() map[string]*structs.ColSizeInfo { 1345 1346 allColsSizes := make(map[string]*structs.ColSizeInfo) 1347 1348 for cname := range ss.AllSeenColumns { 1349 1350 if cname == config.GetTimeStampKey() { 1351 continue 1352 } 1353 1354 fname := ssutils.GetFileNameFromSegSetFile(structs.SegSetFile{ 1355 SegKey: ss.SegmentKey, 1356 Identifier: fmt.Sprintf("%v", xxhash.Sum64String(cname)), 1357 FileType: structs.Cmi, 1358 }) 1359 cmiSize, onlocal := ssutils.GetFileSizeFromDisk(fname) 1360 if !onlocal { 1361 log.Errorf("getAllColsSizes: cmi cname: %v, fname: %+v not on local disk", cname, fname) 1362 } 1363 1364 fname = ssutils.GetFileNameFromSegSetFile(structs.SegSetFile{ 1365 SegKey: ss.SegmentKey, 1366 Identifier: fmt.Sprintf("%v", xxhash.Sum64String(cname)), 1367 FileType: structs.Csg, 1368 }) 1369 csgSize, onlocal := ssutils.GetFileSizeFromDisk(fname) 1370 if !onlocal { 1371 log.Errorf("getAllColsSizes: csg cname: %v, fname: %+v not on local disk", cname, fname) 1372 } 1373 1374 csinfo := structs.ColSizeInfo{CmiSize: cmiSize, CsgSize: csgSize} 1375 allColsSizes[cname] = &csinfo 1376 } 1377 return allColsSizes 1378 } 1379 1380 func (ss *SegStore) DestroyWipBlock() { 1381 bbp.Put(ss.wipBlock.bb) 1382 }