github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/segstore.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package writer
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"errors"
    23  	"fmt"
    24  	"os"
    25  	"path"
    26  	"sort"
    27  	"strconv"
    28  	"sync"
    29  	"sync/atomic"
    30  	"time"
    31  
    32  	"github.com/bits-and-blooms/bloom/v3"
    33  	"github.com/cespare/xxhash"
    34  	"github.com/siglens/siglens/pkg/blob"
    35  	"github.com/siglens/siglens/pkg/blob/ssutils"
    36  	"github.com/siglens/siglens/pkg/common/fileutils"
    37  	"github.com/siglens/siglens/pkg/config"
    38  	"github.com/siglens/siglens/pkg/instrumentation"
    39  	"github.com/siglens/siglens/pkg/querytracker"
    40  	pqsmeta "github.com/siglens/siglens/pkg/segment/query/pqs/meta"
    41  	"github.com/siglens/siglens/pkg/segment/structs"
    42  	"github.com/siglens/siglens/pkg/segment/utils"
    43  	"github.com/siglens/siglens/pkg/segment/writer/suffix"
    44  	"github.com/siglens/siglens/pkg/usageStats"
    45  	toputils "github.com/siglens/siglens/pkg/utils"
    46  
    47  	"github.com/siglens/siglens/pkg/segment/pqmr"
    48  	bbp "github.com/valyala/bytebufferpool"
    49  
    50  	log "github.com/sirupsen/logrus"
    51  )
    52  
    53  const MaxAgileTreeNodeCount = 8_000_000
    54  const colWipsSizeLimit = 2000 // We shouldn't exceed this during normal usage.
    55  
    56  // SegStore Individual stream buffer
    57  type SegStore struct {
    58  	lock              sync.Mutex
    59  	earliest_millis   uint64 // earliest timestamp of a logline here
    60  	latest_millis     uint64 // latest timestamp of a logline here
    61  	wipBlock          WipBlock
    62  	pqNonEmptyResults map[string]bool // map pqid => true if segstream matched > 0 records
    63  	// segment related data
    64  	SegmentKey         string
    65  	segbaseDir         string
    66  	suffix             uint64
    67  	lastUpdated        time.Time
    68  	VirtualTableName   string
    69  	RecordCount        int
    70  	AllSeenColumns     map[string]bool
    71  	pqTracker          *PQTracker
    72  	numBlocks          uint16
    73  	BytesReceivedCount uint64
    74  	OnDiskBytes        uint64 // running sum of cmi/csg/bsu file sizes
    75  	skipDe             bool   // kibana docs dont need dict enc, hence this flag
    76  	timeCreated        time.Time
    77  	AllSst             map[string]*structs.SegStats // map[colName] => SegStats_of_each_column
    78  	sbuilder           StarTreeBuilder
    79  	usingSegTree       bool
    80  	OrgId              uint64
    81  	firstTime          bool
    82  }
    83  
    84  // helper struct to keep track of persistent queries and columns that need to be searched
    85  type PQTracker struct {
    86  	hasWildcard bool
    87  	colNames    map[string]bool
    88  	PQNodes     map[string]*structs.SearchNode // maps pqid to search node
    89  }
    90  
    91  func InitSegStore(
    92  	segmentKey string,
    93  	segbaseDir string,
    94  	suffix uint64,
    95  	virtualTableName string,
    96  	skipDe bool,
    97  	orgId uint64,
    98  	usingSegTree bool,
    99  	highTs uint64,
   100  	lowTs uint64,
   101  ) *SegStore {
   102  	now := time.Now()
   103  	ss := SegStore{
   104  		lock:              sync.Mutex{},
   105  		pqNonEmptyResults: make(map[string]bool),
   106  		SegmentKey:        segmentKey,
   107  		segbaseDir:        segbaseDir,
   108  		suffix:            suffix,
   109  		lastUpdated:       now,
   110  		VirtualTableName:  virtualTableName,
   111  		AllSeenColumns:    make(map[string]bool),
   112  		pqTracker:         initPQTracker(),
   113  		skipDe:            skipDe,
   114  		timeCreated:       now,
   115  		AllSst:            make(map[string]*structs.SegStats),
   116  		usingSegTree:      usingSegTree,
   117  		OrgId:             orgId,
   118  		firstTime:         true,
   119  	}
   120  
   121  	ss.initWipBlock()
   122  	ss.wipBlock.blockSummary.HighTs = highTs
   123  	ss.wipBlock.blockSummary.LowTs = lowTs
   124  
   125  	return &ss
   126  }
   127  
   128  func (segstore *SegStore) initWipBlock() {
   129  
   130  	segstore.wipBlock = WipBlock{
   131  		columnBlooms:       make(map[string]*BloomIndex),
   132  		columnRangeIndexes: make(map[string]*RangeIndex),
   133  		columnsInBlock:     make(map[string]bool),
   134  		pqMatches:          make(map[string]*pqmr.PQMatchResults),
   135  		colWips:            make(map[string]*ColWip),
   136  		bb:                 bbp.Get(),
   137  	}
   138  	segstore.wipBlock.tomRollup = make(map[uint64]*RolledRecs)
   139  	segstore.wipBlock.tohRollup = make(map[uint64]*RolledRecs)
   140  	segstore.wipBlock.todRollup = make(map[uint64]*RolledRecs)
   141  }
   142  
   143  func (segstore *SegStore) resetWipBlock(forceRotate bool) error {
   144  
   145  	segstore.wipBlock.maxIdx = 0
   146  
   147  	if len(segstore.wipBlock.colWips) > colWipsSizeLimit {
   148  		log.Errorf("resetWipBlock: colWips size exceeds %v; current size is %v for segKey %v",
   149  			colWipsSizeLimit, len(segstore.wipBlock.colWips), segstore.SegmentKey)
   150  
   151  		segstore.wipBlock.colWips = make(map[string]*ColWip)
   152  	} else {
   153  		for _, cwip := range segstore.wipBlock.colWips {
   154  			cwip.cbufidx = 0
   155  			cwip.cstartidx = 0
   156  
   157  			cwip.deCount = 0
   158  			for dword := range cwip.deMap {
   159  				delete(cwip.deMap, dword)
   160  			}
   161  		}
   162  	}
   163  
   164  	for _, bi := range segstore.wipBlock.columnBlooms {
   165  		bi.uniqueWordCount = 0
   166  		blockBloomElementCount := getBlockBloomSize(bi)
   167  		bi.Bf = bloom.NewWithEstimates(uint(blockBloomElementCount), utils.BLOOM_COLL_PROBABILITY)
   168  	}
   169  
   170  	for k := range segstore.wipBlock.columnRangeIndexes {
   171  		delete(segstore.wipBlock.columnRangeIndexes, k)
   172  	}
   173  
   174  	segstore.wipBlock.blockSummary.HighTs = 0
   175  	segstore.wipBlock.blockSummary.LowTs = 0
   176  	numPrevRec := segstore.wipBlock.blockSummary.RecCount
   177  	segstore.wipBlock.blockSummary.RecCount = 0
   178  
   179  	// delete keys from map to keep underlying storage
   180  	for col := range segstore.wipBlock.columnsInBlock {
   181  		delete(segstore.wipBlock.columnsInBlock, col)
   182  	}
   183  
   184  	for pqid := range segstore.wipBlock.pqMatches {
   185  		segstore.wipBlock.pqMatches[pqid].ResetAll()
   186  	}
   187  
   188  	// don't update pqids if no more blocks will be created
   189  	if forceRotate {
   190  		return nil
   191  	}
   192  	persistentQueries, err := querytracker.GetTopNPersistentSearches(segstore.VirtualTableName, segstore.OrgId)
   193  	if err != nil {
   194  		log.Errorf("resetWipBlock: error getting persistent queries: %v", err)
   195  		return err
   196  	}
   197  	for pqid, pNode := range persistentQueries {
   198  		if _, ok := segstore.wipBlock.pqMatches[pqid]; !ok {
   199  			mrSize := utils.PQMR_SIZE
   200  			if segstore.numBlocks > 0 || numPrevRec == 0 {
   201  				mrSize = uint(numPrevRec)
   202  			}
   203  			segstore.wipBlock.pqMatches[pqid] = pqmr.CreatePQMatchResults(mrSize)
   204  		}
   205  		segstore.pqTracker.addSearchNode(pqid, pNode)
   206  	}
   207  	clearTRollups(segstore.wipBlock.tomRollup)
   208  	clearTRollups(segstore.wipBlock.tohRollup)
   209  	clearTRollups(segstore.wipBlock.todRollup)
   210  
   211  	return nil
   212  }
   213  
   214  func clearTRollups(rrmap map[uint64]*RolledRecs) {
   215  	// delete keys from map to keep underlying storage
   216  	for k := range rrmap {
   217  		delete(rrmap, k)
   218  	}
   219  }
   220  
   221  // do not call this function on its own, since it may result in race condition. It should be called from
   222  // the checkAndRotateColFiles func
   223  
   224  func (segstore *SegStore) resetSegStore(streamid string, virtualTableName string) error {
   225  
   226  	basedir := getActiveBaseSegDir(streamid, virtualTableName, segstore.suffix)
   227  	err := os.MkdirAll(basedir, 0764)
   228  	if err != nil {
   229  		log.Errorf("resetSegStore : Could not mkdir basedir=%v,  %v", basedir, err)
   230  		return err
   231  	}
   232  
   233  	basename := fmt.Sprintf("%s%d", basedir, segstore.suffix)
   234  	segstore.earliest_millis = 0
   235  	segstore.latest_millis = 0
   236  	segstore.SegmentKey = basename
   237  	segstore.segbaseDir = basedir
   238  	segstore.VirtualTableName = virtualTableName
   239  	segstore.RecordCount = 0
   240  	segstore.BytesReceivedCount = 0
   241  	segstore.OnDiskBytes = 0
   242  
   243  	segstore.AllSeenColumns = make(map[string]bool)
   244  	segstore.numBlocks = 0
   245  	segstore.timeCreated = time.Now()
   246  	segstore.usingSegTree = false
   247  
   248  	segstore.AllSst = make(map[string]*structs.SegStats)
   249  	segstore.pqNonEmptyResults = make(map[string]bool)
   250  	// on reset, clear pqs info but before reset block
   251  	segstore.pqTracker = initPQTracker()
   252  	segstore.wipBlock.colWips = make(map[string]*ColWip)
   253  	segstore.wipBlock.clearPQMatchInfo()
   254  
   255  	err = segstore.resetWipBlock(false)
   256  	if err != nil {
   257  		return err
   258  	}
   259  
   260  	nextidx, err := suffix.GetSuffix(streamid, virtualTableName)
   261  	if err != nil {
   262  		log.Errorf("reset segstore: failed to get next suffix idx for stream%+v table%+v. err: %v", streamid, virtualTableName, err)
   263  		return err
   264  	}
   265  	segstore.suffix = nextidx
   266  
   267  	return nil
   268  }
   269  
   270  // For some types we use a bloom index and for others we use range indices. If
   271  // a column has both, we should convert all the values to one type.
   272  func consolidateColumnTypes(wipBlock *WipBlock, segmentKey string) {
   273  	for colName := range wipBlock.columnsInBlock {
   274  		// Check if this column has both a bloom and a range index.
   275  		_, ok1 := wipBlock.columnBlooms[colName]
   276  		_, ok2 := wipBlock.columnRangeIndexes[colName]
   277  		if !(ok1 && ok2) {
   278  			continue
   279  		}
   280  
   281  		// Try converting this column to numbers, but if that fails convert it to
   282  		// strings.
   283  		ok := convertColumnToNumbers(wipBlock, colName, segmentKey)
   284  		if !ok {
   285  			convertColumnToStrings(wipBlock, colName, segmentKey)
   286  		}
   287  	}
   288  }
   289  
   290  // Returns true if the conversion succeeds.
   291  func convertColumnToNumbers(wipBlock *WipBlock, colName string, segmentKey string) bool {
   292  	// Try converting all values to numbers.
   293  	oldColWip := wipBlock.colWips[colName]
   294  	newColWip := InitColWip(segmentKey, colName)
   295  	rangeIndex := wipBlock.columnRangeIndexes[colName].Ranges
   296  
   297  	for i := uint32(0); i < oldColWip.cbufidx; {
   298  		valType := oldColWip.cbuf[i]
   299  		i++
   300  
   301  		switch valType {
   302  		case utils.VALTYPE_ENC_SMALL_STRING[0]:
   303  			// Parse the string.
   304  			numBytes := uint32(toputils.BytesToUint16LittleEndian(oldColWip.cbuf[i : i+2]))
   305  			i += 2
   306  			numberAsString := string(oldColWip.cbuf[i : i+numBytes])
   307  			i += numBytes
   308  
   309  			// Try converting to an integer.
   310  			intVal, err := strconv.ParseInt(numberAsString, 10, 64)
   311  			if err == nil {
   312  				// Conversion succeeded.
   313  				copy(newColWip.cbuf[newColWip.cbufidx:], utils.VALTYPE_ENC_INT64[:])
   314  				copy(newColWip.cbuf[newColWip.cbufidx+1:], toputils.Int64ToBytesLittleEndian(intVal))
   315  				newColWip.cbufidx += 1 + 8
   316  				addIntToRangeIndex(colName, intVal, rangeIndex)
   317  				continue
   318  			}
   319  
   320  			// Try converting to a float.
   321  			floatVal, err := strconv.ParseFloat(numberAsString, 64)
   322  			if err == nil {
   323  				// Conversion succeeded.
   324  				copy(newColWip.cbuf[newColWip.cbufidx:], utils.VALTYPE_ENC_FLOAT64[:])
   325  				copy(newColWip.cbuf[newColWip.cbufidx+1:], toputils.Float64ToBytesLittleEndian(floatVal))
   326  				newColWip.cbufidx += 1 + 8
   327  				addFloatToRangeIndex(colName, floatVal, rangeIndex)
   328  				continue
   329  			}
   330  
   331  			// Conversion failed.
   332  			return false
   333  
   334  		case utils.VALTYPE_ENC_INT64[0], utils.VALTYPE_ENC_FLOAT64[0]:
   335  			// Already a number, so just copy it.
   336  			// It's alrady in the range index, so we don't need to add it again.
   337  			copy(newColWip.cbuf[newColWip.cbufidx:], oldColWip.cbuf[i-1:i+8])
   338  			newColWip.cbufidx += 9
   339  			i += 8
   340  
   341  		case utils.VALTYPE_ENC_BACKFILL[0]:
   342  			// This is a null value.
   343  			copy(newColWip.cbuf[newColWip.cbufidx:], utils.VALTYPE_ENC_BACKFILL[:])
   344  			newColWip.cbufidx += 1
   345  
   346  		case utils.VALTYPE_ENC_BOOL[0]:
   347  			// Cannot convert bool to number.
   348  			return false
   349  
   350  		default:
   351  			// Unknown type.
   352  			log.Errorf("convertColumnToNumbers: unknown type %v", valType)
   353  			return false
   354  		}
   355  	}
   356  
   357  	// Conversion succeeded, so replace the column with the new one.
   358  	wipBlock.colWips[colName] = newColWip
   359  	delete(wipBlock.columnBlooms, colName)
   360  	return true
   361  }
   362  
   363  func convertColumnToStrings(wipBlock *WipBlock, colName string, segmentKey string) {
   364  	oldColWip := wipBlock.colWips[colName]
   365  	newColWip := InitColWip(segmentKey, colName)
   366  	bloom := wipBlock.columnBlooms[colName]
   367  
   368  	for i := uint32(0); i < oldColWip.cbufidx; {
   369  		valType := oldColWip.cbuf[i]
   370  		i++
   371  
   372  		switch valType {
   373  		case utils.VALTYPE_ENC_SMALL_STRING[0]:
   374  			// Already a string, so just copy it.
   375  			// This is already in the bloom, so we don't need to add it again.
   376  			numBytes := uint32(toputils.BytesToUint16LittleEndian(oldColWip.cbuf[i : i+2]))
   377  			i += 2
   378  			copy(newColWip.cbuf[newColWip.cbufidx:], oldColWip.cbuf[i-3:i+numBytes])
   379  			newColWip.cbufidx += 3 + numBytes
   380  			i += numBytes
   381  
   382  		case utils.VALTYPE_ENC_INT64[0]:
   383  			// Parse the integer.
   384  			intVal := toputils.BytesToInt64LittleEndian(oldColWip.cbuf[i : i+8])
   385  			i += 8
   386  
   387  			stringVal := strconv.FormatInt(intVal, 10)
   388  			newColWip.WriteSingleString(stringVal)
   389  			bloom.uniqueWordCount += addToBlockBloom(bloom.Bf, []byte(stringVal))
   390  
   391  		case utils.VALTYPE_ENC_FLOAT64[0]:
   392  			// Parse the float.
   393  			floatVal := toputils.BytesToFloat64LittleEndian(oldColWip.cbuf[i : i+8])
   394  			i += 8
   395  
   396  			stringVal := strconv.FormatFloat(floatVal, 'f', -1, 64)
   397  			newColWip.WriteSingleString(stringVal)
   398  			bloom.uniqueWordCount += addToBlockBloom(bloom.Bf, []byte(stringVal))
   399  
   400  		case utils.VALTYPE_ENC_BACKFILL[0]:
   401  			// This is a null value.
   402  			copy(newColWip.cbuf[newColWip.cbufidx:], utils.VALTYPE_ENC_BACKFILL[:])
   403  			newColWip.cbufidx += 1
   404  
   405  		case utils.VALTYPE_ENC_BOOL[0]:
   406  			// Parse the bool.
   407  			boolVal := oldColWip.cbuf[i]
   408  			i++
   409  
   410  			var stringVal string
   411  			if boolVal == 0 {
   412  				stringVal = "false"
   413  			} else {
   414  				stringVal = "true"
   415  			}
   416  
   417  			newColWip.WriteSingleString(stringVal)
   418  			bloom.uniqueWordCount += addToBlockBloom(bloom.Bf, []byte(stringVal))
   419  
   420  		default:
   421  			// Unknown type.
   422  			log.Errorf("convertColumnsToStrings: unknown type %v when converting column %v", valType, colName)
   423  		}
   424  	}
   425  
   426  	// Replace the old column.
   427  	wipBlock.colWips[colName] = newColWip
   428  	delete(wipBlock.columnRangeIndexes, colName)
   429  }
   430  
   431  func (segstore *SegStore) AppendWipToSegfile(streamid string, forceRotate bool, isKibana bool, onTimeRotate bool) error {
   432  	// If there's columns that had both strings and numbers in them, we need to
   433  	// try converting them all to numbers, but if that doesn't work we'll
   434  	// convert them all to strings.
   435  	consolidateColumnTypes(&segstore.wipBlock, segstore.SegmentKey)
   436  
   437  	if segstore.wipBlock.maxIdx > 0 {
   438  		var totalBytesWritten uint64 = 0
   439  		var totalMetadata uint64 = 0
   440  		allColsToFlush := &sync.WaitGroup{}
   441  		wipBlockLock := sync.Mutex{}
   442  		wipBlockMetadata := &structs.BlockMetadataHolder{
   443  			BlkNum:            segstore.numBlocks,
   444  			ColumnBlockOffset: make(map[string]int64),
   445  			ColumnBlockLen:    make(map[string]uint32),
   446  		}
   447  
   448  		// worst case, each column opens 2 files (.cmi/.csg) and 2 files for segment info (.sid, .bsu)
   449  		numOpenFDs := int64(len(segstore.wipBlock.colWips)*2 + 2)
   450  		err := fileutils.GLOBAL_FD_LIMITER.TryAcquireWithBackoff(numOpenFDs, 10, segstore.SegmentKey)
   451  		if err != nil {
   452  			log.Errorf("AppendWipToSegfile failed to acquire lock for opening %+v file descriptors. err %+v", numOpenFDs, err)
   453  			return err
   454  		}
   455  		defer fileutils.GLOBAL_FD_LIMITER.Release(numOpenFDs)
   456  		for colName, colInfo := range segstore.wipBlock.colWips {
   457  			if colInfo.cbufidx > 0 {
   458  				allColsToFlush.Add(1)
   459  				go func(cname string, colWip *ColWip) {
   460  					defer allColsToFlush.Done()
   461  					var encType []byte
   462  					if cname == config.GetTimeStampKey() {
   463  						encType, err = segstore.wipBlock.encodeTimestamps()
   464  						if err != nil {
   465  							log.Errorf("AppendWipToSegfile: failed to encode timestamps err=%v", err)
   466  							return
   467  						}
   468  						_ = segstore.writeWipTsRollups(cname)
   469  					} else if colWip.deCount > 0 && colWip.deCount < wipCardLimit {
   470  						encType = utils.ZSTD_DICTIONARY_BLOCK
   471  					} else {
   472  						encType = utils.ZSTD_COMLUNAR_BLOCK
   473  					}
   474  
   475  					blkLen, blkOffset, err := writeWip(colWip, encType)
   476  					if err != nil {
   477  						log.Errorf("AppendWipToSegfile: failed to write colsegfilename=%v, err=%v", colWip.csgFname, err)
   478  						return
   479  					}
   480  
   481  					atomic.AddUint64(&totalBytesWritten, uint64(blkLen))
   482  					wipBlockLock.Lock()
   483  					wipBlockMetadata.ColumnBlockOffset[cname] = blkOffset
   484  					wipBlockMetadata.ColumnBlockLen[cname] = blkLen
   485  					wipBlockLock.Unlock()
   486  
   487  					if !isKibana {
   488  						// if bloomIndex present then flush it
   489  						bi, ok := segstore.wipBlock.columnBlooms[cname]
   490  						if ok {
   491  							writtenBytes := segstore.flushBloomIndex(cname, bi)
   492  							atomic.AddUint64(&totalBytesWritten, writtenBytes)
   493  							atomic.AddUint64(&totalMetadata, writtenBytes)
   494  						}
   495  						ri, ok := segstore.wipBlock.columnRangeIndexes[cname]
   496  						if ok {
   497  							writtenBytes := segstore.flushBlockRangeIndex(cname, ri)
   498  							atomic.AddUint64(&totalBytesWritten, writtenBytes)
   499  							atomic.AddUint64(&totalMetadata, writtenBytes)
   500  						}
   501  					}
   502  				}(colName, colInfo)
   503  			}
   504  		}
   505  		if config.IsAggregationsEnabled() {
   506  			segstore.computeStarTree()
   507  		}
   508  
   509  		allColsToFlush.Wait()
   510  		blkSumLen := segstore.flushBlockSummary(wipBlockMetadata, segstore.numBlocks)
   511  		if !isKibana {
   512  			// everytime we write compressedWip to segfile, we write a corresponding blockBloom
   513  			updateUnrotatedBlockInfo(segstore.SegmentKey, segstore.VirtualTableName, &segstore.wipBlock,
   514  				wipBlockMetadata, segstore.AllSeenColumns, segstore.numBlocks, totalMetadata, segstore.earliest_millis,
   515  				segstore.latest_millis, segstore.RecordCount, segstore.OrgId)
   516  		}
   517  		atomic.AddUint64(&totalBytesWritten, blkSumLen)
   518  
   519  		segstore.OnDiskBytes += totalBytesWritten
   520  
   521  		allPQIDs := make(map[string]bool)
   522  		for pqid := range segstore.wipBlock.pqMatches {
   523  			allPQIDs[pqid] = true
   524  		}
   525  
   526  		err = segstore.FlushSegStats()
   527  		if err != nil {
   528  			log.Errorf("AppendWipToSegfile: failed to flushsegstats, err=%v", err)
   529  			return err
   530  		}
   531  
   532  		allColsSizes := segstore.getAllColsSizes()
   533  
   534  		var segmeta = structs.SegMeta{SegmentKey: segstore.SegmentKey, EarliestEpochMS: segstore.earliest_millis,
   535  			LatestEpochMS: segstore.latest_millis, VirtualTableName: segstore.VirtualTableName,
   536  			RecordCount: segstore.RecordCount, SegbaseDir: segstore.segbaseDir,
   537  			BytesReceivedCount: segstore.BytesReceivedCount, OnDiskBytes: segstore.OnDiskBytes,
   538  			ColumnNames: allColsSizes, AllPQIDs: allPQIDs, NumBlocks: segstore.numBlocks, OrgId: segstore.OrgId}
   539  
   540  		sidFname := fmt.Sprintf("%v.sid", segstore.SegmentKey)
   541  		err = writeRunningSegMeta(sidFname, &segmeta)
   542  		if err != nil {
   543  			log.Errorf("AppendWipToSegfile: failed to write sidFname=%v, err=%v", sidFname, err)
   544  			return err
   545  		}
   546  
   547  		for pqid, pqResults := range segstore.wipBlock.pqMatches {
   548  			segstore.pqNonEmptyResults[pqid] = segstore.pqNonEmptyResults[pqid] || pqResults.Any()
   549  			pqidFname := fmt.Sprintf("%v/pqmr/%v.pqmr", segstore.SegmentKey, pqid)
   550  			err := pqResults.FlushPqmr(&pqidFname, segstore.numBlocks)
   551  			if err != nil {
   552  				log.Errorf("AppendWipToSegfile: failed to flush pqmr results to fname %s: %v", pqidFname, err)
   553  				return err
   554  			}
   555  		}
   556  
   557  		err = segstore.resetWipBlock(forceRotate)
   558  		if err != nil {
   559  			return err
   560  		}
   561  		usageStats.UpdateCompressedStats(int64(totalBytesWritten), segmeta.OrgId)
   562  		segstore.numBlocks += 1
   563  	}
   564  	if segstore.numBlocks > 0 && !isKibana {
   565  		err := segstore.checkAndRotateColFiles(streamid, forceRotate, onTimeRotate)
   566  		if err != nil {
   567  			return err
   568  		}
   569  	}
   570  	return nil
   571  }
   572  
   573  func removePqmrFilesAndDirectory(pqid string, segKey string) error {
   574  	workingDirectory, err := os.Getwd()
   575  	if err != nil {
   576  		log.Errorf("Error fetching current workingDirectory")
   577  		return err
   578  	}
   579  	pqFname := workingDirectory + "/" + fmt.Sprintf("%v/pqmr/%v.pqmr", segKey, pqid)
   580  	err = os.Remove(pqFname)
   581  	if err != nil {
   582  		log.Errorf("Cannot delete file at %v", err)
   583  		return err
   584  	}
   585  	pqmrDirectory := workingDirectory + "/" + fmt.Sprintf("%v/pqmr/", segKey)
   586  	files, err := os.ReadDir(pqmrDirectory)
   587  	if err != nil {
   588  		log.Errorf("Cannot PQMR directory at %v", pqmrDirectory)
   589  		return err
   590  	}
   591  	if len(files) == 0 {
   592  		err := os.Remove(pqmrDirectory)
   593  		if err != nil {
   594  			log.Errorf("Error deleting Pqmr directory at %v", pqmrDirectory)
   595  			return err
   596  		}
   597  		pqmrParentDirectory := workingDirectory + "/" + fmt.Sprintf("%v/", segKey)
   598  		files, err = os.ReadDir(pqmrParentDirectory)
   599  		if err != nil {
   600  			log.Errorf("Cannot PQMR parent directory at %v", pqmrParentDirectory)
   601  			return err
   602  		}
   603  		if len(files) == 0 {
   604  			err := os.Remove(pqmrParentDirectory)
   605  			if err != nil {
   606  				log.Errorf("Error deleting Pqmr directory at %v", pqmrParentDirectory)
   607  				return err
   608  			}
   609  		}
   610  	}
   611  	return nil
   612  }
   613  
   614  func (segstore *SegStore) checkAndRotateColFiles(streamid string, forceRotate bool, onTimeRotate bool) error {
   615  
   616  	onTreeRotate := false
   617  	if config.IsAggregationsEnabled() && segstore.usingSegTree {
   618  		nc := segstore.sbuilder.GetNodeCount()
   619  		if nc > MaxAgileTreeNodeCount {
   620  			onTreeRotate = true
   621  		}
   622  	}
   623  
   624  	if segstore.OnDiskBytes > maxSegFileSize || forceRotate || onTimeRotate || onTreeRotate {
   625  
   626  		instrumentation.IncrementInt64Counter(instrumentation.SEGFILE_ROTATE_COUNT, 1)
   627  		bytesWritten := segstore.flushStarTree()
   628  		segstore.OnDiskBytes += uint64(bytesWritten)
   629  
   630  		activeBasedir := getActiveBaseSegDir(streamid, segstore.VirtualTableName, segstore.suffix-1)
   631  		finalBasedir := getFinalBaseSegDir(streamid, segstore.VirtualTableName, segstore.suffix-1)
   632  
   633  		finalSegmentKey := fmt.Sprintf("%s%d", finalBasedir, segstore.suffix-1)
   634  
   635  		log.Infof("Rotating segId=%v RecCount: %v, OnDiskBytes=%v, numBlocks=%v, finalSegKey=%v orgId=%v",
   636  			segstore.SegmentKey, segstore.RecordCount, segstore.OnDiskBytes, segstore.numBlocks,
   637  			finalSegmentKey, segstore.OrgId)
   638  
   639  		// make sure the parent dir of final exists, the two path calls are because getFinal.. func
   640  		// returns a '/' at the end
   641  		err := os.MkdirAll(path.Dir(path.Dir(finalBasedir)), 0764)
   642  		if err != nil {
   643  			return err
   644  		}
   645  		// delete pqmr files if empty and add to empty PQS
   646  		for pqid, hasMatchedAnyRecordInWip := range segstore.pqNonEmptyResults {
   647  			if !hasMatchedAnyRecordInWip {
   648  				err := removePqmrFilesAndDirectory(pqid, segstore.SegmentKey)
   649  				if err != nil {
   650  					log.Errorf("Error deleting pqmr files and directory. Err: %v", err)
   651  				}
   652  				go pqsmeta.AddEmptyResults(pqid, segstore.SegmentKey, segstore.VirtualTableName)
   653  			}
   654  		}
   655  
   656  		allColsSizes := segstore.getAllColsSizes()
   657  
   658  		// move the whole dir in one shot
   659  		err = os.Rename(activeBasedir, finalBasedir)
   660  		if err != nil {
   661  			log.Errorf("checkAndRotateColFiles: failed to mv active to final, err=%v", err)
   662  			return err
   663  		}
   664  		// Upload segment files to s3
   665  		filesToUpload := fileutils.GetAllFilesInDirectory(finalBasedir)
   666  
   667  		err = blob.UploadSegmentFiles(filesToUpload)
   668  		if err != nil {
   669  			log.Errorf("checkAndRotateColFiles: failed to upload segment files , err=%v", err)
   670  		}
   671  
   672  		allPqids := make(map[string]bool, len(segstore.wipBlock.pqMatches))
   673  		for pqid := range segstore.wipBlock.pqMatches {
   674  			allPqids[pqid] = true
   675  		}
   676  
   677  		var segmeta = structs.SegMeta{SegmentKey: finalSegmentKey, EarliestEpochMS: segstore.earliest_millis,
   678  			LatestEpochMS: segstore.latest_millis, VirtualTableName: segstore.VirtualTableName,
   679  			RecordCount: segstore.RecordCount, SegbaseDir: finalBasedir,
   680  			BytesReceivedCount: segstore.BytesReceivedCount, OnDiskBytes: segstore.OnDiskBytes,
   681  			ColumnNames: allColsSizes, AllPQIDs: allPqids, NumBlocks: segstore.numBlocks, OrgId: segstore.OrgId}
   682  
   683  		AddNewRotatedSegment(segmeta)
   684  
   685  		updateRecentlyRotatedSegmentFiles(segstore.SegmentKey, finalSegmentKey)
   686  		removeSegKeyFromUnrotatedInfo(segstore.SegmentKey)
   687  
   688  		// upload ingest node dir to s3
   689  		err = blob.UploadIngestNodeDir()
   690  		if err != nil {
   691  			log.Errorf("checkAndRotateColFiles: failed to upload ingest node dir , err=%v", err)
   692  		}
   693  
   694  		if !forceRotate {
   695  			err = segstore.resetSegStore(streamid, segstore.VirtualTableName)
   696  			if err != nil {
   697  				return err
   698  			}
   699  		}
   700  
   701  	}
   702  	return nil
   703  }
   704  
   705  func (segstore *SegStore) initStarTreeCols() ([]string, []string) {
   706  
   707  	gcols, inMesCols := querytracker.GetTopPersistentAggs(segstore.VirtualTableName)
   708  	sortedGrpCols := make([]string, 0)
   709  	gcMap := make(map[string]uint32) // use it to sort based on cardinality
   710  	for _, cname := range gcols {
   711  
   712  		// verify if cname exist in wip
   713  		_, ok := segstore.wipBlock.colWips[cname]
   714  		if !ok {
   715  			continue
   716  		}
   717  
   718  		_, ok = segstore.AllSst[cname]
   719  		if !ok {
   720  			continue
   721  		}
   722  
   723  		cest := uint32(segstore.AllSst[cname].Hll.Estimate())
   724  		gcMap[cname] = cest
   725  		sortedGrpCols = append(sortedGrpCols, cname)
   726  	}
   727  
   728  	sort.Slice(sortedGrpCols, func(i, j int) bool {
   729  		return gcMap[sortedGrpCols[i]] < gcMap[sortedGrpCols[j]]
   730  	})
   731  
   732  	mCols := make([]string, 0)
   733  	// Check if measureCols are present in wip
   734  	for mCname := range inMesCols {
   735  
   736  		// verify if measure cname exist in wip
   737  		_, ok := segstore.wipBlock.colWips[mCname]
   738  		if !ok {
   739  			continue
   740  		}
   741  		mCols = append(mCols, mCname)
   742  	}
   743  
   744  	return sortedGrpCols, mCols
   745  }
   746  
   747  func (segstore *SegStore) computeStarTree() {
   748  
   749  	if segstore.numBlocks == 0 {
   750  		sortedGrpCols, mCols := segstore.initStarTreeCols()
   751  		if len(sortedGrpCols) == 0 || len(mCols) == 0 {
   752  			segstore.usingSegTree = false
   753  			return
   754  		}
   755  		segstore.usingSegTree = true
   756  		segstore.sbuilder.ResetSegTree(&segstore.wipBlock, sortedGrpCols, mCols)
   757  	}
   758  
   759  	if !segstore.usingSegTree { // if tree creation had failed on first block, then skip it
   760  		return
   761  	}
   762  
   763  	err := segstore.sbuilder.ComputeStarTree(&segstore.wipBlock)
   764  	if err != nil {
   765  		segstore.usingSegTree = false
   766  		log.Errorf("computeStarTree: Failed to compute star tree: %v", err)
   767  		return
   768  	}
   769  }
   770  
   771  func (segstore *SegStore) flushStarTree() uint32 {
   772  
   773  	if !config.IsAggregationsEnabled() {
   774  		return 0
   775  	}
   776  
   777  	if !segstore.usingSegTree { // if tree creation had failed on first block, then skip it
   778  		return 0
   779  	}
   780  
   781  	size, err := segstore.sbuilder.EncodeStarTree(segstore.SegmentKey)
   782  	if err != nil {
   783  		log.Errorf("flushStarTree: Failed to encode star tree: %v", err)
   784  		return 0
   785  	}
   786  	return size
   787  }
   788  
   789  func (segstore *SegStore) adjustEarliestLatestTimes(ts_millis uint64) {
   790  
   791  	if segstore.earliest_millis == 0 {
   792  		segstore.earliest_millis = ts_millis
   793  	} else {
   794  		if ts_millis < segstore.earliest_millis {
   795  			segstore.earliest_millis = ts_millis
   796  		}
   797  	}
   798  
   799  	if segstore.latest_millis == 0 {
   800  		segstore.latest_millis = ts_millis
   801  	} else {
   802  		if ts_millis > segstore.latest_millis {
   803  			segstore.latest_millis = ts_millis
   804  		}
   805  	}
   806  }
   807  
   808  func (wipBlock *WipBlock) adjustEarliestLatestTimes(ts_millis uint64) {
   809  
   810  	if wipBlock.blockSummary.LowTs == 0 {
   811  		wipBlock.blockSummary.LowTs = ts_millis
   812  	} else {
   813  		if ts_millis < wipBlock.blockSummary.LowTs {
   814  			wipBlock.blockSummary.LowTs = ts_millis
   815  		}
   816  	}
   817  
   818  	if wipBlock.blockSummary.HighTs == 0 {
   819  		wipBlock.blockSummary.HighTs = ts_millis
   820  	} else {
   821  		if ts_millis > wipBlock.blockSummary.HighTs {
   822  			wipBlock.blockSummary.HighTs = ts_millis
   823  		}
   824  	}
   825  
   826  }
   827  
   828  func (segstore *SegStore) WritePackedRecord(rawJson []byte, ts_millis uint64, signalType utils.SIGNAL_TYPE) error {
   829  
   830  	var maxIdx uint32
   831  	var err error
   832  	var matchedPCols bool
   833  	tsKey := config.GetTimeStampKey()
   834  	if signalType == utils.SIGNAL_EVENTS || signalType == utils.SIGNAL_JAEGER_TRACES {
   835  		maxIdx, matchedPCols, err = segstore.EncodeColumns(rawJson, ts_millis, &tsKey, signalType)
   836  		if err != nil {
   837  			log.Errorf("WritePackedRecord: Failed to encode record=%+v", string(rawJson))
   838  			return err
   839  		}
   840  	} else {
   841  		log.Errorf("WritePackedRecord: Unknown SignalType=%+v", signalType)
   842  		return errors.New("unknown signal type")
   843  	}
   844  
   845  	if matchedPCols {
   846  		applyStreamingSearchToRecord(segstore.wipBlock, segstore.pqTracker.PQNodes, segstore.wipBlock.blockSummary.RecCount)
   847  	}
   848  
   849  	segstore.wipBlock.maxIdx = maxIdx
   850  	segstore.wipBlock.blockSummary.RecCount += 1
   851  	segstore.RecordCount++
   852  	segstore.lastUpdated = time.Now()
   853  	return nil
   854  }
   855  
   856  // flushes bloom index and returns number of bytes written
   857  func (ss *SegStore) flushBloomIndex(cname string, bi *BloomIndex) uint64 {
   858  
   859  	if bi == nil {
   860  		log.Errorf("flushBloomIndex: bi was nill for segkey=%v", ss.SegmentKey)
   861  		return 0
   862  	}
   863  
   864  	fname := fmt.Sprintf("%s_%v.cmi", ss.SegmentKey, xxhash.Sum64String(cname))
   865  
   866  	bffd, err := os.OpenFile(fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
   867  	if err != nil {
   868  		log.Errorf("flushBloomIndex: open failed fname=%v, err=%v", fname, err)
   869  		return 0
   870  	}
   871  
   872  	defer bffd.Close()
   873  
   874  	var buf bytes.Buffer
   875  	bufWriter := bufio.NewWriter(&buf)
   876  
   877  	// there is no accurate way to find exactly how many bytes the write.to is going to write
   878  	// and we need that number , so that we write it first and then the actual bloom data
   879  	// hence this messiness to write it to some buffer, get the bytesWritten count and then do
   880  	// the actual write
   881  	bytesWritten, bferr := bi.Bf.WriteTo(bufWriter)
   882  	if bferr != nil {
   883  		log.Errorf("flushBloomIndex: write buf failed fname=%v, err=%v", fname, bferr)
   884  		return 0
   885  	}
   886  
   887  	bytesWritten += utils.LEN_BLKNUM_CMI_SIZE // for blkNum
   888  	bytesWritten += 1                         // reserver for CMI_Type
   889  
   890  	// copy the size of blockBloom in uint32
   891  	if _, err = bffd.Write(toputils.Uint32ToBytesLittleEndian(uint32(bytesWritten))); err != nil {
   892  		log.Errorf("flushBloomIndex: bloomsize write failed fname=%v, err=%v", fname, err)
   893  		return 0
   894  	}
   895  
   896  	// copy the blockNum
   897  	if _, err = bffd.Write(toputils.Uint16ToBytesLittleEndian(ss.numBlocks)); err != nil {
   898  		log.Errorf("flushBloomIndex: bloomsize write failed fname=%v, err=%v", fname, err)
   899  		return 0
   900  	}
   901  
   902  	// write CMI type
   903  	if _, err = bffd.Write(utils.CMI_BLOOM_INDEX); err != nil {
   904  		log.Errorf("flushBloomIndex: CMI Type write failed fname=%v, err=%v", fname, err)
   905  		return 0
   906  	}
   907  
   908  	// write the blockBloom
   909  	_, bferr = bi.Bf.WriteTo(bffd)
   910  	if bferr != nil {
   911  		log.Errorf("flushBloomIndex: write blockbloom failed fname=%v, err=%v", fname, bferr)
   912  		return 0
   913  	}
   914  
   915  	finalBytesWritten := bytesWritten + 4 // add 4 for size
   916  	if len(bi.HistoricalCount) == 0 {
   917  		bi.HistoricalCount = make([]uint32, 0)
   918  	}
   919  	//adding to block history list
   920  	bi.HistoricalCount = append(bi.HistoricalCount, bi.uniqueWordCount)
   921  	if streamIdHistory := len(bi.HistoricalCount); streamIdHistory > utils.BLOOM_SIZE_HISTORY {
   922  		bi.HistoricalCount = bi.HistoricalCount[streamIdHistory-utils.BLOOM_SIZE_HISTORY:]
   923  
   924  	}
   925  	return uint64(finalBytesWritten)
   926  }
   927  
   928  // returns the number of bytes written
   929  func (segstore *SegStore) flushBlockSummary(bmh *structs.BlockMetadataHolder, blkNum uint16) uint64 {
   930  
   931  	fname := structs.GetBsuFnameFromSegKey(segstore.SegmentKey)
   932  
   933  	fd, err := os.OpenFile(fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
   934  	if err != nil {
   935  		log.Errorf("flushBlockSummary: open failed blockSummaryFname=%v, err=%v", fname, err)
   936  		return 0
   937  	}
   938  
   939  	defer fd.Close()
   940  
   941  	blkSumBuf := make([]byte, utils.BLOCK_SUMMARY_SIZE)
   942  	packedLen, blkSumBuf, err := EncodeBlocksum(bmh, &segstore.wipBlock.blockSummary, blkSumBuf[0:], blkNum)
   943  	if err != nil {
   944  		log.Errorf("flushBlockSummary: EncodeBlocksum: Failed to encode blocksummary=%+v, err=%v",
   945  			segstore.wipBlock.blockSummary, err)
   946  		return 0
   947  	}
   948  	if _, err := fd.Write(blkSumBuf[:packedLen]); err != nil {
   949  		log.Errorf("flushBlockSummary:  write failed blockSummaryFname=%v, err=%v", fname, err)
   950  		return 0
   951  	}
   952  	return uint64(packedLen)
   953  }
   954  
   955  func (segstore *SegStore) flushBlockRangeIndex(cname string, ri *RangeIndex) uint64 {
   956  
   957  	if ri == nil {
   958  		log.Errorf("flushBlockRangeIndex: ri was nill for segkey=%v", segstore.SegmentKey)
   959  		return 0
   960  	}
   961  
   962  	fname := fmt.Sprintf("%s_%v.cmi", segstore.SegmentKey, xxhash.Sum64String(cname))
   963  
   964  	fr, err := os.OpenFile(fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
   965  	if err != nil {
   966  		log.Errorf("flushBlockRangeIndex: open failed fname=%v, err=%v", fname, err)
   967  		return 0
   968  	}
   969  
   970  	packedLen, blkRIBuf, err := EncodeRIBlock(ri.Ranges, segstore.numBlocks)
   971  	if err != nil {
   972  		log.Errorf("flushBlockRangeIndex: EncodeRIBlock: Failed to encode BlockRangeIndex=%+v, err=%v", ri.Ranges, err)
   973  		return 0
   974  	}
   975  
   976  	if _, err := fr.Write(blkRIBuf[0:packedLen]); err != nil {
   977  		log.Errorf("flushBlockRangeIndex:  write failed blockRangeIndexFname=%v, err=%v", fname, err)
   978  		return 0
   979  	}
   980  	fr.Close()
   981  	return uint64(packedLen)
   982  }
   983  
   984  func initPQTracker() *PQTracker {
   985  	return &PQTracker{
   986  		colNames:    make(map[string]bool),
   987  		PQNodes:     make(map[string]*structs.SearchNode),
   988  		hasWildcard: false,
   989  	}
   990  }
   991  
   992  func (pct *PQTracker) addSearchNode(pqid string, sNode *structs.SearchNode) {
   993  	pct.PQNodes[pqid] = sNode
   994  
   995  	if pct.hasWildcard {
   996  		return
   997  	}
   998  	cols, wildcard := sNode.GetAllColumnsToSearch()
   999  	for colName := range cols {
  1000  		pct.colNames[colName] = true
  1001  	}
  1002  	pct.hasWildcard = wildcard
  1003  }
  1004  
  1005  func (pct *PQTracker) isColumnInPQuery(col string) bool {
  1006  	if pct.hasWildcard {
  1007  		return true
  1008  	}
  1009  	if pct.colNames == nil {
  1010  		return false
  1011  	}
  1012  	_, ok := pct.colNames[col]
  1013  	return ok
  1014  }
  1015  
  1016  func (wip *WipBlock) clearPQMatchInfo() {
  1017  	for pqid := range wip.pqMatches {
  1018  		delete(wip.pqMatches, pqid)
  1019  	}
  1020  }
  1021  
  1022  func (wipBlock *WipBlock) encodeTimestamps() ([]byte, error) {
  1023  
  1024  	encType := utils.TIMESTAMP_TOPDIFF_VARENC
  1025  
  1026  	tsWip := wipBlock.colWips[config.GetTimeStampKey()]
  1027  	tsWip.cbufidx = 0 // reset to zero since packer we set it to 1, so that the writeWip gets invoked
  1028  
  1029  	var tsType structs.TS_TYPE
  1030  	diff := wipBlock.blockSummary.HighTs - wipBlock.blockSummary.LowTs
  1031  
  1032  	if diff <= toputils.UINT8_MAX {
  1033  		tsType = structs.TS_Type8
  1034  	} else if diff <= toputils.UINT16_MAX {
  1035  		tsType = structs.TS_Type16
  1036  	} else if diff <= toputils.UINT32_MAX {
  1037  		tsType = structs.TS_Type32
  1038  	} else {
  1039  		tsType = structs.TS_Type64
  1040  	}
  1041  
  1042  	lowTs := wipBlock.blockSummary.LowTs
  1043  
  1044  	// store TS_TYPE and lowTs for reconstruction needs
  1045  	copy(tsWip.cbuf[tsWip.cbufidx:], []byte{uint8(tsType)})
  1046  	tsWip.cbufidx += 1
  1047  	copy(tsWip.cbuf[tsWip.cbufidx:], toputils.Uint64ToBytesLittleEndian(lowTs))
  1048  	tsWip.cbufidx += 8
  1049  
  1050  	switch tsType {
  1051  	case structs.TS_Type8:
  1052  		var tsVal uint8
  1053  		for i := uint16(0); i < wipBlock.blockSummary.RecCount; i++ {
  1054  			tsVal = uint8(wipBlock.blockTs[i] - lowTs)
  1055  			copy(tsWip.cbuf[tsWip.cbufidx:], []byte{tsVal})
  1056  			tsWip.cbufidx += 1
  1057  		}
  1058  	case structs.TS_Type16:
  1059  		var tsVal uint16
  1060  		for i := uint16(0); i < wipBlock.blockSummary.RecCount; i++ {
  1061  			tsVal = uint16(wipBlock.blockTs[i] - lowTs)
  1062  			copy(tsWip.cbuf[tsWip.cbufidx:], toputils.Uint16ToBytesLittleEndian(tsVal))
  1063  			tsWip.cbufidx += 2
  1064  		}
  1065  	case structs.TS_Type32:
  1066  		var tsVal uint32
  1067  		for i := uint16(0); i < wipBlock.blockSummary.RecCount; i++ {
  1068  			tsVal = uint32(wipBlock.blockTs[i] - lowTs)
  1069  			copy(tsWip.cbuf[tsWip.cbufidx:], toputils.Uint32ToBytesLittleEndian(tsVal))
  1070  			tsWip.cbufidx += 4
  1071  		}
  1072  	case structs.TS_Type64:
  1073  		var tsVal uint64
  1074  		for i := uint16(0); i < wipBlock.blockSummary.RecCount; i++ {
  1075  			tsVal = wipBlock.blockTs[i] - lowTs
  1076  			copy(tsWip.cbuf[tsWip.cbufidx:], toputils.Uint64ToBytesLittleEndian(tsVal))
  1077  			tsWip.cbufidx += 8
  1078  		}
  1079  	}
  1080  
  1081  	return encType, nil
  1082  }
  1083  
  1084  /*
  1085  
  1086     [blkNum 2B][numBlocks 2B][BuckData xxB]......
  1087  
  1088     BuckData ===>
  1089     [bucketKey 8B][rrEncType 1B][mrDataSize 2B]{matchedRecordData ....}
  1090  
  1091  */
  1092  
  1093  func (ss *SegStore) writeWipTsRollups(cname string) error {
  1094  
  1095  	// todo move this dir creation to initSegStore
  1096  	dirName := fmt.Sprintf("%v/rups/", path.Dir(ss.SegmentKey))
  1097  	if _, err := os.Stat(dirName); os.IsNotExist(err) {
  1098  		err := os.MkdirAll(dirName, os.FileMode(0764))
  1099  		if err != nil {
  1100  			log.Errorf("writeWipTsRollups: Failed to create directory %s: %v", dirName, err)
  1101  			return err
  1102  		}
  1103  	}
  1104  
  1105  	var reterr error = nil
  1106  
  1107  	fname := fmt.Sprintf("%v/rups/%v.crup", path.Dir(ss.SegmentKey), xxhash.Sum64String(cname+"m"))
  1108  	err := writeSingleRup(ss.numBlocks, fname, ss.wipBlock.tomRollup)
  1109  	if err != nil {
  1110  		log.Errorf("writeWipTsRollups: failed to write minutes rollup file, err=%v", err)
  1111  		reterr = err
  1112  	}
  1113  
  1114  	fname = fmt.Sprintf("%v/rups/%v.crup", path.Dir(ss.SegmentKey), xxhash.Sum64String(cname+"h"))
  1115  	err = writeSingleRup(ss.numBlocks, fname, ss.wipBlock.tohRollup)
  1116  	if err != nil {
  1117  		log.Errorf("writeWipTsRollups: failed to write hour rollup file, err=%v", err)
  1118  		reterr = err
  1119  	}
  1120  	fname = fmt.Sprintf("%v/rups/%v.crup", path.Dir(ss.SegmentKey), xxhash.Sum64String(cname+"d"))
  1121  	err = writeSingleRup(ss.numBlocks, fname, ss.wipBlock.todRollup)
  1122  	if err != nil {
  1123  		log.Errorf("writeWipTsRollups: failed to write day rollup file, err=%v", err)
  1124  		reterr = err
  1125  	}
  1126  
  1127  	return reterr
  1128  }
  1129  
  1130  func writeSingleRup(blkNum uint16, fname string, tRup map[uint64]*RolledRecs) error {
  1131  	fd, err := os.OpenFile(fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
  1132  	if err != nil {
  1133  		log.Errorf("writeSingleRup: open failed fname=%v, err=%v", fname, err)
  1134  		return err
  1135  	}
  1136  
  1137  	defer fd.Close()
  1138  
  1139  	_, err = fd.Seek(0, 2) // go to the end of the file
  1140  	if err != nil {
  1141  		log.Errorf("writeSingleRup: failed to get end offset %+v", err)
  1142  		return err
  1143  	}
  1144  
  1145  	// write blkNum
  1146  	_, err = fd.Write(toputils.Uint16ToBytesLittleEndian(blkNum))
  1147  	if err != nil {
  1148  		log.Errorf("writeSingleRup: blkNum write failed fname=%v, err=%v", fname, err)
  1149  		return err
  1150  	}
  1151  
  1152  	// write num of bucketKeys
  1153  	_, err = fd.Write(toputils.Uint16ToBytesLittleEndian(uint16(len(tRup))))
  1154  	if err != nil {
  1155  		log.Errorf("writeSingleRup: failed to write num of bucket keys %+v", err)
  1156  		return err
  1157  	}
  1158  
  1159  	for bkey, rr := range tRup {
  1160  
  1161  		// write bucketKey ts
  1162  		if _, err = fd.Write(toputils.Uint64ToBytesLittleEndian(bkey)); err != nil {
  1163  			log.Errorf("writeSingleRup: blkNum=%v bkey=%v write failed fname=%v, err=%v",
  1164  				blkNum, bkey, fname, err)
  1165  			return err
  1166  		}
  1167  
  1168  		// write encoding type
  1169  		if _, err = fd.Write([]byte{utils.RR_ENC_BITSET}); err != nil {
  1170  			log.Errorf("writeSingleRup: blkNum=%v bkey=%v enc type failed fname=%v, err=%v",
  1171  				blkNum, bkey, fname, err)
  1172  			return err
  1173  		}
  1174  
  1175  		// we could use a Compact here, but in past we saw compact loose data
  1176  		// once compact is fixed then we can use it here.
  1177  		// pad an extra word (64 bits) so that shrink does not loose data
  1178  		cb := rr.MatchedRes.Shrink(uint(rr.lastRecNum + 64))
  1179  		mrSize := uint16(cb.GetInMemSize())
  1180  		if _, err = fd.Write(toputils.Uint16ToBytesLittleEndian(uint16(mrSize))); err != nil {
  1181  			log.Errorf("writeSingleRup: blkNum=%v bkey=%v mrsize write failed fname=%v, err=%v",
  1182  				blkNum, bkey, fname, err)
  1183  			return err
  1184  		}
  1185  
  1186  		// write actual bitset
  1187  		err = cb.WriteTo(fd)
  1188  		if err != nil {
  1189  			log.Errorf("writeSingleRup: blkNum=%v bkey=%v bitset write failed fname=%v, err=%v",
  1190  				blkNum, bkey, fname, err)
  1191  			return err
  1192  		}
  1193  	}
  1194  
  1195  	return nil
  1196  }
  1197  
  1198  /*
  1199  Encoding Scheme for all columns single file
  1200  
  1201  [Version 1B] [CnameLen 2B] [Cname xB] [ColSegEncodingLen 2B] [ColSegEncoding xB]....
  1202  */
  1203  func (ss *SegStore) FlushSegStats() error {
  1204  
  1205  	if len(ss.AllSst) <= 0 {
  1206  		log.Errorf("FlushSegStats: no segstats to flush")
  1207  		return errors.New("FlushSegStats: no segstats to flush")
  1208  	}
  1209  
  1210  	fname := fmt.Sprintf("%v.sst", ss.SegmentKey)
  1211  	fd, err := os.OpenFile(fname, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
  1212  	if err != nil {
  1213  		log.Errorf("FlushSegStats: Failed to open file=%v, err=%v", fname, err)
  1214  		return err
  1215  	}
  1216  	defer fd.Close()
  1217  
  1218  	// version
  1219  	_, err = fd.Write([]byte{1})
  1220  	if err != nil {
  1221  		log.Errorf("FlushSegStats: failed to write version err=%v", err)
  1222  		return err
  1223  	}
  1224  
  1225  	buf := make([]byte, utils.WIP_SIZE)
  1226  	for cname, sst := range ss.AllSst {
  1227  
  1228  		// cname len
  1229  		_, err = fd.Write(toputils.Uint16ToBytesLittleEndian(uint16(len(cname))))
  1230  		if err != nil {
  1231  			log.Errorf("FlushSegStats: failed to write cnamelen cname=%v err=%v", cname, err)
  1232  			return err
  1233  		}
  1234  
  1235  		// cname
  1236  		_, err = fd.WriteString(cname)
  1237  		if err != nil {
  1238  			log.Errorf("FlushSegStats: failed to write cname cname=%v err=%v", cname, err)
  1239  			return err
  1240  		}
  1241  
  1242  		idx, err := writeSstToBuf(sst, buf)
  1243  		if err != nil {
  1244  			log.Errorf("FlushSegStats: error writing to buf err=%v", err)
  1245  			return err
  1246  		}
  1247  
  1248  		// colsegencodinglen
  1249  		_, err = fd.Write(toputils.Uint16ToBytesLittleEndian(idx))
  1250  		if err != nil {
  1251  			log.Errorf("FlushSegStats: failed to write colsegencodlen cname=%v err=%v", cname, err)
  1252  			return err
  1253  		}
  1254  
  1255  		// colsegencoding
  1256  		_, err = fd.Write(buf[0:idx])
  1257  		if err != nil {
  1258  			log.Errorf("FlushSegStats: failed to write colsegencoding cname=%v err=%v", cname, err)
  1259  			return err
  1260  		}
  1261  	}
  1262  
  1263  	return nil
  1264  }
  1265  
  1266  /*
  1267  Encoding Schema for SegStats Single Column Data
  1268  [Version 1B] [isNumeric 1B] [Count 8B] [HLL_Size 2B] [HLL_Data xB]
  1269  [N_type 1B] [Min 8B] [N_type 1B] [Max 8B] [N_type 1B] [Sum 8B]
  1270  */
  1271  func writeSstToBuf(sst *structs.SegStats, buf []byte) (uint16, error) {
  1272  
  1273  	idx := uint16(0)
  1274  
  1275  	// version
  1276  	copy(buf[idx:], []byte{1})
  1277  	idx++
  1278  
  1279  	// isNumeric
  1280  	copy(buf[idx:], toputils.BoolToBytesLittleEndian(sst.IsNumeric))
  1281  	idx++
  1282  
  1283  	// Count
  1284  	copy(buf[idx:], toputils.Uint64ToBytesLittleEndian(sst.Count))
  1285  	idx += 8
  1286  
  1287  	hllData, err := sst.Hll.MarshalBinary()
  1288  	if err != nil {
  1289  		log.Errorf("writeSstToBuf: HLL marshal failed err=%v", err)
  1290  		return idx, err
  1291  	}
  1292  
  1293  	// HLL_Size
  1294  	copy(buf[idx:], toputils.Uint16ToBytesLittleEndian(uint16(len(hllData))))
  1295  	idx += 2
  1296  
  1297  	// HLL_Data
  1298  	copy(buf[idx:], hllData)
  1299  	idx += uint16(len(hllData))
  1300  
  1301  	if !sst.IsNumeric {
  1302  		return idx, nil // dont write numeric stuff if this column is not numeric
  1303  	}
  1304  
  1305  	// Min NumType
  1306  	copy(buf[idx:], []byte{byte(sst.NumStats.Min.Ntype)})
  1307  	idx++
  1308  
  1309  	// Min
  1310  	if sst.NumStats.Min.Ntype == utils.SS_DT_FLOAT {
  1311  		copy(buf[idx:], toputils.Float64ToBytesLittleEndian(sst.NumStats.Min.FloatVal))
  1312  	} else {
  1313  		copy(buf[idx:], toputils.Int64ToBytesLittleEndian(sst.NumStats.Min.IntgrVal))
  1314  	}
  1315  	idx += 8
  1316  
  1317  	// Max NumType
  1318  	copy(buf[idx:], []byte{byte(sst.NumStats.Max.Ntype)})
  1319  	idx++
  1320  
  1321  	// Max
  1322  	if sst.NumStats.Max.Ntype == utils.SS_DT_FLOAT {
  1323  		copy(buf[idx:], toputils.Float64ToBytesLittleEndian(sst.NumStats.Max.FloatVal))
  1324  	} else {
  1325  		copy(buf[idx:], toputils.Int64ToBytesLittleEndian(sst.NumStats.Max.IntgrVal))
  1326  	}
  1327  	idx += 8
  1328  
  1329  	// Sum NumType
  1330  	copy(buf[idx:], []byte{byte(sst.NumStats.Sum.Ntype)})
  1331  	idx++
  1332  
  1333  	// Sum
  1334  	if sst.NumStats.Sum.Ntype == utils.SS_DT_FLOAT {
  1335  		copy(buf[idx:], toputils.Float64ToBytesLittleEndian(sst.NumStats.Sum.FloatVal))
  1336  	} else {
  1337  		copy(buf[idx:], toputils.Int64ToBytesLittleEndian(sst.NumStats.Sum.IntgrVal))
  1338  	}
  1339  	idx += 8
  1340  
  1341  	return idx, nil
  1342  }
  1343  
  1344  func (ss *SegStore) getAllColsSizes() map[string]*structs.ColSizeInfo {
  1345  
  1346  	allColsSizes := make(map[string]*structs.ColSizeInfo)
  1347  
  1348  	for cname := range ss.AllSeenColumns {
  1349  
  1350  		if cname == config.GetTimeStampKey() {
  1351  			continue
  1352  		}
  1353  
  1354  		fname := ssutils.GetFileNameFromSegSetFile(structs.SegSetFile{
  1355  			SegKey:     ss.SegmentKey,
  1356  			Identifier: fmt.Sprintf("%v", xxhash.Sum64String(cname)),
  1357  			FileType:   structs.Cmi,
  1358  		})
  1359  		cmiSize, onlocal := ssutils.GetFileSizeFromDisk(fname)
  1360  		if !onlocal {
  1361  			log.Errorf("getAllColsSizes: cmi cname: %v, fname: %+v not on local disk", cname, fname)
  1362  		}
  1363  
  1364  		fname = ssutils.GetFileNameFromSegSetFile(structs.SegSetFile{
  1365  			SegKey:     ss.SegmentKey,
  1366  			Identifier: fmt.Sprintf("%v", xxhash.Sum64String(cname)),
  1367  			FileType:   structs.Csg,
  1368  		})
  1369  		csgSize, onlocal := ssutils.GetFileSizeFromDisk(fname)
  1370  		if !onlocal {
  1371  			log.Errorf("getAllColsSizes: csg cname: %v, fname: %+v not on local disk", cname, fname)
  1372  		}
  1373  
  1374  		csinfo := structs.ColSizeInfo{CmiSize: cmiSize, CsgSize: csgSize}
  1375  		allColsSizes[cname] = &csinfo
  1376  	}
  1377  	return allColsSizes
  1378  }
  1379  
  1380  func (ss *SegStore) DestroyWipBlock() {
  1381  	bbp.Put(ss.wipBlock.bb)
  1382  }