github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/packer.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package writer
    18  
    19  import (
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"math"
    24  	"math/rand"
    25  	"os"
    26  	"strconv"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/axiomhq/hyperloglog"
    31  	"github.com/bits-and-blooms/bloom/v3"
    32  	jp "github.com/buger/jsonparser"
    33  	"github.com/cespare/xxhash"
    34  	"github.com/siglens/siglens/pkg/blob/ssutils"
    35  	"github.com/siglens/siglens/pkg/config"
    36  	"github.com/siglens/siglens/pkg/segment/pqmr"
    37  	. "github.com/siglens/siglens/pkg/segment/structs"
    38  	. "github.com/siglens/siglens/pkg/segment/utils"
    39  	segutils "github.com/siglens/siglens/pkg/segment/utils"
    40  	"github.com/siglens/siglens/pkg/segment/writer/metrics"
    41  	"github.com/siglens/siglens/pkg/segment/writer/stats"
    42  	"github.com/siglens/siglens/pkg/utils"
    43  	log "github.com/sirupsen/logrus"
    44  	bbp "github.com/valyala/bytebufferpool"
    45  )
    46  
    47  var wipCardLimit uint16 = 1001
    48  
    49  const FPARM_INT64 = int64(0)
    50  const FPARM_UINT64 = uint64(0)
    51  const FPARM_FLOAT64 = float64(0)
    52  
    53  /*
    54  	   Each column stored in its own columnar file
    55  	   Each column file format:
    56  		  [ValType-1 1B] [OptionalStringVal-Len-1 2B] [ActualValue-1]
    57  		  [ValType-2 1B] [OptionalStringVal-Len-2 2B] [ActualValue-2]
    58  
    59  
    60  	   This function should not be called by itself, must be called via locks
    61  
    62  	   This function assumes that the record_json has been flattened
    63  
    64  	   foundColsInRecord is a map[string]bool of all columns in the WIPBlock. New columns will be added to this map
    65  	   The values of this map will be set to false before returning for subsequent calls. This lets us re-use the same map across WIPBlock
    66  
    67  	   returns :
    68  		  1) Max index amongst the columns
    69  		  3) bool if this record matched the column conditions in PQColTracker
    70  		  3) error
    71  */
    72  func (ss *SegStore) EncodeColumns(rawData []byte, recordTime uint64, tsKey *string,
    73  	signalType segutils.SIGNAL_TYPE) (uint32, bool, error) {
    74  
    75  	var maxIdx uint32 = 0
    76  	var matchedCol = false
    77  
    78  	ss.encodeTime(recordTime, tsKey)
    79  	var err error
    80  	maxIdx, matchedCol, err = ss.encodeRawJsonObject("", rawData, maxIdx, tsKey, matchedCol, signalType)
    81  	if err != nil {
    82  		log.Errorf("Failed to encode json object! err: %+v", err)
    83  		return maxIdx, matchedCol, err
    84  	}
    85  
    86  	for colName, foundCol := range ss.wipBlock.columnsInBlock {
    87  		if foundCol {
    88  			ss.wipBlock.columnsInBlock[colName] = false
    89  			continue
    90  		}
    91  		colWip, ok := ss.wipBlock.colWips[colName]
    92  		if !ok {
    93  			log.Errorf("EncodeColumns: tried to add a backfill for a column with no colWip! %v. This should not happen", colName)
    94  			continue
    95  		}
    96  		colWip.cstartidx = colWip.cbufidx
    97  		copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BACKFILL[:])
    98  		colWip.cbufidx += 1
    99  		// also do backfill dictEnc for this recnum
   100  		checkAddDictEnc(colWip, VALTYPE_ENC_BACKFILL[:], ss.wipBlock.blockSummary.RecCount)
   101  	}
   102  
   103  	return maxIdx, matchedCol, nil
   104  }
   105  
   106  func (ss *SegStore) encodeRawJsonObject(currKey string, data []byte, maxIdx uint32, tsKey *string,
   107  	matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) {
   108  	handler := func(key []byte, value []byte, valueType jp.ValueType, off int) error {
   109  		// Maybe push some state onto a stack here?
   110  		var finalKey string
   111  		var err error
   112  		if currKey == "" {
   113  			finalKey = string(key)
   114  		} else {
   115  			finalKey = fmt.Sprintf("%s.%s", currKey, key)
   116  		}
   117  		switch valueType {
   118  		case jp.Object:
   119  			maxIdx, matchedCol, err = ss.encodeRawJsonObject(finalKey, value, maxIdx, tsKey, matchedCol, signalType)
   120  			if err != nil {
   121  				return fmt.Errorf("encodeRawJsonObject: obj currKey: %v, err: %v", currKey, err)
   122  			}
   123  		case jp.Array:
   124  			if signalType == SIGNAL_JAEGER_TRACES {
   125  
   126  				maxIdx, matchedCol, err = ss.encodeRawJsonArray(finalKey, value, maxIdx, tsKey, matchedCol, signalType)
   127  			} else {
   128  				maxIdx, matchedCol, err = ss.encodeNonJaegerRawJsonArray(finalKey, value, maxIdx, tsKey, matchedCol, signalType)
   129  			}
   130  			if err != nil {
   131  				return fmt.Errorf("encodeRawJsonObject: arr currKey: %v, err: %v", currKey, err)
   132  			}
   133  		case jp.String:
   134  			strVal, err := jp.ParseString(value)
   135  			if err != nil {
   136  				return fmt.Errorf("encodeRawJsonObject: str currKey: %v, err: %v", currKey, err)
   137  			}
   138  			maxIdx, matchedCol, err = ss.encodeSingleString(finalKey, strVal, maxIdx, tsKey, matchedCol)
   139  			if err != nil {
   140  				return fmt.Errorf("encodeRawJsonObject: singstr currKey: %v, err: %v", currKey, err)
   141  			}
   142  		case jp.Number:
   143  			numVal, err := jp.ParseInt(value)
   144  			if err != nil {
   145  				fltVal, err := jp.ParseFloat(value)
   146  				if err != nil {
   147  					return fmt.Errorf("encodeRawJsonObject: flt currKey: %v, err: %v", currKey, err)
   148  				}
   149  				maxIdx, matchedCol, _ = ss.encodeSingleNumber(finalKey, fltVal, maxIdx, tsKey, matchedCol)
   150  				return nil
   151  			}
   152  			maxIdx, matchedCol, _ = ss.encodeSingleNumber(finalKey, numVal, maxIdx, tsKey, matchedCol)
   153  		case jp.Boolean:
   154  			boolVal, err := jp.ParseBoolean(value)
   155  			if err != nil {
   156  				return fmt.Errorf("encodeRawJsonObject: bool currKey: %v, err: %v", currKey, err)
   157  			}
   158  			maxIdx, matchedCol, err = ss.encodeSingleBool(finalKey, boolVal, maxIdx, tsKey, matchedCol)
   159  			if err != nil {
   160  				return fmt.Errorf("encodeRawJsonObject: singbool currKey: %v, err: %v", currKey, err)
   161  			}
   162  		case jp.Null:
   163  			maxIdx, matchedCol, err = ss.encodeSingleNull(finalKey, maxIdx, tsKey, matchedCol)
   164  			if err != nil {
   165  				return fmt.Errorf("encodeRawJsonObject: singnull currKey: %v, err: %v", currKey, err)
   166  			}
   167  		default:
   168  			return fmt.Errorf("currKey: %v, received unknown type of %+s", currKey, valueType)
   169  		}
   170  		return nil
   171  	}
   172  	err := jp.ObjectEach(data, handler)
   173  	return maxIdx, matchedCol, err
   174  }
   175  
   176  func (ss *SegStore) encodeRawJsonArray(currKey string, data []byte, maxIdx uint32, tsKey *string,
   177  	matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) {
   178  	var encErr error
   179  	if signalType == SIGNAL_JAEGER_TRACES {
   180  		if currKey != "references" && currKey != "logs" {
   181  			maxIdx, matchedCol, encErr = ss.encodeSingleDictArray(currKey, data, maxIdx, tsKey, matchedCol, signalType)
   182  			if encErr != nil {
   183  				log.Infof("encodeRawJsonArray error %s", encErr)
   184  				return maxIdx, matchedCol, encErr
   185  			}
   186  		} else {
   187  			maxIdx, matchedCol, encErr = ss.encodeSingleRawBuffer(currKey, data, maxIdx, tsKey, matchedCol, signalType)
   188  			if encErr != nil {
   189  				return maxIdx, matchedCol, encErr
   190  			}
   191  		}
   192  	}
   193  	return maxIdx, matchedCol, nil
   194  }
   195  
   196  func (ss *SegStore) encodeNonJaegerRawJsonArray(currKey string, data []byte, maxIdx uint32, tsKey *string,
   197  	matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) {
   198  	i := 0
   199  	var finalErr error
   200  	_, aErr := jp.ArrayEach(data, func(value []byte, valueType jp.ValueType, offset int, err error) {
   201  		var finalKey string
   202  		var encErr error
   203  		if currKey == "" {
   204  			finalKey = fmt.Sprintf("%d", i)
   205  		} else {
   206  			finalKey = fmt.Sprintf("%s.%d", currKey, i)
   207  		}
   208  		i++
   209  		switch valueType {
   210  		case jp.Object:
   211  			maxIdx, matchedCol, encErr = ss.encodeRawJsonObject(finalKey, value, maxIdx, tsKey, matchedCol, signalType)
   212  			if encErr != nil {
   213  				finalErr = encErr
   214  				return
   215  			}
   216  		case jp.Array:
   217  			maxIdx, matchedCol, encErr = ss.encodeNonJaegerRawJsonArray(finalKey, value, maxIdx, tsKey, matchedCol, signalType)
   218  			if encErr != nil {
   219  				finalErr = encErr
   220  				return
   221  			}
   222  		case jp.String:
   223  			strVal, encErr := jp.ParseString(value)
   224  			if encErr != nil {
   225  				finalErr = encErr
   226  				return
   227  			}
   228  			maxIdx, matchedCol, encErr = ss.encodeSingleString(finalKey, strVal, maxIdx, tsKey, matchedCol)
   229  			if encErr != nil {
   230  				finalErr = encErr
   231  				return
   232  			}
   233  		case jp.Number:
   234  			numVal, encErr := jp.ParseInt(value)
   235  			if encErr != nil {
   236  				fltVal, encErr := jp.ParseFloat(value)
   237  				if encErr != nil {
   238  					finalErr = encErr
   239  					return
   240  				}
   241  				maxIdx, matchedCol, _ = ss.encodeSingleNumber(finalKey, fltVal, maxIdx, tsKey, matchedCol)
   242  				return
   243  			}
   244  			maxIdx, matchedCol, _ = ss.encodeSingleNumber(finalKey, numVal, maxIdx, tsKey, matchedCol)
   245  		case jp.Boolean:
   246  			boolVal, encErr := jp.ParseBoolean(value)
   247  			if encErr != nil {
   248  				finalErr = encErr
   249  				return
   250  			}
   251  			maxIdx, matchedCol, encErr = ss.encodeSingleBool(finalKey, boolVal, maxIdx, tsKey, matchedCol)
   252  			if encErr != nil {
   253  				finalErr = encErr
   254  				return
   255  			}
   256  		case jp.Null:
   257  			maxIdx, matchedCol, encErr = ss.encodeSingleNull(finalKey, maxIdx, tsKey, matchedCol)
   258  			if encErr != nil {
   259  				finalErr = encErr
   260  				return
   261  			}
   262  		default:
   263  			finalErr = fmt.Errorf("received unknown type of %+s", valueType)
   264  			return
   265  		}
   266  	})
   267  	if aErr != nil {
   268  		finalErr = aErr
   269  	}
   270  	return maxIdx, matchedCol, finalErr
   271  }
   272  
   273  func (ss *SegStore) encodeSingleDictArray(arraykey string, data []byte, maxIdx uint32,
   274  	tsKey *string, matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) {
   275  	if arraykey == *tsKey {
   276  		return maxIdx, matchedCol, nil
   277  	}
   278  	var finalErr error
   279  	var colWip *ColWip
   280  	colWip, _, matchedCol = ss.initAndBackFillColumn(arraykey, data, matchedCol)
   281  	colBlooms := ss.wipBlock.columnBlooms
   282  	var bi *BloomIndex
   283  	var ok bool
   284  	bi, ok = colBlooms[arraykey]
   285  	if !ok {
   286  		bi = &BloomIndex{}
   287  		bi.uniqueWordCount = 0
   288  		bCount := getBlockBloomSize(bi)
   289  		bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY)
   290  		colBlooms[arraykey] = bi
   291  	}
   292  	s := colWip.cbufidx
   293  	copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_DICT_ARRAY[:])
   294  	colWip.cbufidx += 1
   295  	copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(0)) //placeholder for encoding length of array
   296  	colWip.cbufidx += 2
   297  	_, aErr := jp.ArrayEach(data, func(value []byte, valueType jp.ValueType, offset int, err error) {
   298  		switch valueType {
   299  		case jp.Object:
   300  			keyName, keyType, keyVal, err := getNestedDictEntries(value)
   301  			if err != nil {
   302  				log.Errorf("getNestedDictEntries error %+v", err)
   303  				return
   304  			}
   305  			if keyName == "" || keyType == "" || keyVal == "" {
   306  				err = fmt.Errorf("encodeSingleDictArray: Jaeger tags array should have key/value/type values")
   307  				log.Error(err)
   308  				return
   309  			}
   310  			//encode and copy keyName
   311  			n := uint16(len(keyName))
   312  			copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n))
   313  			colWip.cbufidx += 2
   314  			copy(colWip.cbuf[colWip.cbufidx:], keyName)
   315  			colWip.cbufidx += uint32(n)
   316  			//check key type
   317  			//based on that encode key value
   318  			switch keyType {
   319  			case "string":
   320  				copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_SMALL_STRING[:])
   321  				colWip.cbufidx += 1
   322  				n := uint16(len(keyVal))
   323  				copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n))
   324  				colWip.cbufidx += 2
   325  				copy(colWip.cbuf[colWip.cbufidx:], keyVal)
   326  				colWip.cbufidx += uint32(n)
   327  			case "bool":
   328  				copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BOOL[:])
   329  				colWip.cbufidx += 1
   330  				n := uint16(len(keyVal))
   331  				copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n))
   332  				colWip.cbufidx += 2
   333  				copy(colWip.cbuf[colWip.cbufidx:], keyVal)
   334  				colWip.cbufidx += uint32(n)
   335  			case "int64":
   336  				copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_INT64[:])
   337  				colWip.cbufidx += 1
   338  				n := uint16(len(keyVal))
   339  				copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n))
   340  				colWip.cbufidx += 2
   341  				copy(colWip.cbuf[colWip.cbufidx:], keyVal)
   342  				colWip.cbufidx += uint32(n)
   343  			case "float64":
   344  				copy(colWip.cbuf[colWip.cbufidx:], segutils.VALTYPE_ENC_FLOAT64[:])
   345  				colWip.cbufidx += 1
   346  				n := uint16(len(keyVal))
   347  				copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n))
   348  				colWip.cbufidx += 2
   349  				copy(colWip.cbuf[colWip.cbufidx:], keyVal)
   350  				colWip.cbufidx += uint32(n)
   351  			default:
   352  				finalErr = fmt.Errorf("encodeSingleDictArray : received unknown key  %+s", keyType)
   353  			}
   354  			if bi != nil {
   355  				bi.uniqueWordCount += addToBlockBloom(bi.Bf, []byte(keyName))
   356  				bi.uniqueWordCount += addToBlockBloom(bi.Bf, []byte(keyVal))
   357  			}
   358  			stats.AddSegStatsStr(ss.AllSst, keyName, keyVal, ss.wipBlock.bb, nil, false)
   359  			if colWip.cbufidx > maxIdx {
   360  				maxIdx = colWip.cbufidx
   361  			}
   362  		default:
   363  			finalErr = fmt.Errorf("encodeSingleDictArray : received unknown type of %+s", valueType)
   364  			return
   365  		}
   366  	})
   367  	copy(colWip.cbuf[s+1:], utils.Uint16ToBytesLittleEndian(uint16(colWip.cbufidx-s-3)))
   368  	if aErr != nil {
   369  		finalErr = aErr
   370  	}
   371  	return maxIdx, matchedCol, finalErr
   372  }
   373  
   374  func getNestedDictEntries(data []byte) (string, string, string, error) {
   375  	var nkey, ntype, nvalue string
   376  
   377  	handler := func(key []byte, value []byte, valueType jp.ValueType, off int) error {
   378  		switch string(key) {
   379  		case "key":
   380  			if valueType != jp.String {
   381  				err := fmt.Errorf("getNestedDictEntries key should be of type string , found type %+v", valueType)
   382  				return err
   383  			}
   384  			nkey = string(value)
   385  		case "type":
   386  			ntype = string(value)
   387  		case "value":
   388  			nvalue = string(value)
   389  		default:
   390  			err := fmt.Errorf("getNestedDictEntries: received unknown key of %+s", key)
   391  			return err
   392  		}
   393  		return nil
   394  	}
   395  	err := jp.ObjectEach(data, handler)
   396  	return nkey, ntype, nvalue, err
   397  
   398  }
   399  
   400  func (ss *SegStore) encodeSingleRawBuffer(key string, value []byte, maxIdx uint32,
   401  	tsKey *string, matchedCol bool, signalType segutils.SIGNAL_TYPE) (uint32, bool, error) {
   402  	if key == *tsKey {
   403  		return maxIdx, matchedCol, nil
   404  	}
   405  	var colWip *ColWip
   406  	colWip, _, matchedCol = ss.initAndBackFillColumn(key, value, matchedCol)
   407  	colBlooms := ss.wipBlock.columnBlooms
   408  	var bi *BloomIndex
   409  	var ok bool
   410  	if key != "_type" && key != "_index" && key != "tags" {
   411  		_, ok = colBlooms[key]
   412  		if !ok {
   413  			bi = &BloomIndex{}
   414  			bi.uniqueWordCount = 0
   415  			bCount := getBlockBloomSize(bi)
   416  			bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY)
   417  			colBlooms[key] = bi
   418  		}
   419  	}
   420  	//[utils.VALTYPE_RAW_JSON][raw-byte-len][raw-byte]
   421  	copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_RAW_JSON[:])
   422  	colWip.cbufidx += 1
   423  	n := uint16(len(value))
   424  	copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(n))
   425  	colWip.cbufidx += 2
   426  	copy(colWip.cbuf[colWip.cbufidx:], value)
   427  	colWip.cbufidx += uint32(n)
   428  
   429  	if colWip.cbufidx > maxIdx {
   430  		maxIdx = colWip.cbufidx
   431  	}
   432  	return maxIdx, matchedCol, nil
   433  }
   434  
   435  func (ss *SegStore) encodeSingleString(key string, value string, maxIdx uint32,
   436  	tsKey *string, matchedCol bool) (uint32, bool, error) {
   437  	if key == *tsKey {
   438  		return maxIdx, matchedCol, nil
   439  	}
   440  	var colWip *ColWip
   441  	var recNum uint16
   442  	colWip, recNum, matchedCol = ss.initAndBackFillColumn(key, value, matchedCol)
   443  	colBlooms := ss.wipBlock.columnBlooms
   444  	var bi *BloomIndex
   445  	var ok bool
   446  	if key != "_type" && key != "_index" {
   447  		bi, ok = colBlooms[key]
   448  		if !ok {
   449  			bi = &BloomIndex{}
   450  			bi.uniqueWordCount = 0
   451  			bCount := getBlockBloomSize(bi)
   452  			bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY)
   453  			colBlooms[key] = bi
   454  		}
   455  	}
   456  	s := colWip.cbufidx
   457  	colWip.WriteSingleString(value)
   458  
   459  	if bi != nil {
   460  		bi.uniqueWordCount += addToBlockBloom(bi.Bf, []byte(value))
   461  	}
   462  	if !ss.skipDe {
   463  		checkAddDictEnc(colWip, colWip.cbuf[s:colWip.cbufidx], recNum)
   464  	}
   465  	stats.AddSegStatsStr(ss.AllSst, key, value, ss.wipBlock.bb, nil, false)
   466  	if colWip.cbufidx > maxIdx {
   467  		maxIdx = colWip.cbufidx
   468  	}
   469  	return maxIdx, matchedCol, nil
   470  }
   471  
   472  func (ss *SegStore) encodeSingleBool(key string, val bool, maxIdx uint32,
   473  	tsKey *string, matchedCol bool) (uint32, bool, error) {
   474  	if key == *tsKey {
   475  		return maxIdx, matchedCol, nil
   476  	}
   477  	var colWip *ColWip
   478  	colBlooms := ss.wipBlock.columnBlooms
   479  	colWip, _, matchedCol = ss.initAndBackFillColumn(key, val, matchedCol)
   480  	var bi *BloomIndex
   481  	var ok bool
   482  
   483  	bi, ok = colBlooms[key]
   484  	if !ok {
   485  		bi = &BloomIndex{}
   486  		bi.uniqueWordCount = 0
   487  		bCount := 10
   488  		bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY)
   489  		colBlooms[key] = bi
   490  	}
   491  	copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BOOL[:])
   492  	colWip.cbufidx += 1
   493  	copy(colWip.cbuf[colWip.cbufidx:], utils.BoolToBytesLittleEndian(val))
   494  	colWip.cbufidx += 1
   495  
   496  	if bi != nil {
   497  		bi.uniqueWordCount += addToBlockBloom(bi.Bf, []byte(strconv.FormatBool(val)))
   498  	}
   499  	if colWip.cbufidx > maxIdx {
   500  		maxIdx = colWip.cbufidx
   501  	}
   502  	return maxIdx, matchedCol, nil
   503  }
   504  
   505  func (ss *SegStore) encodeSingleNull(key string, maxIdx uint32,
   506  	tsKey *string, matchedCol bool) (uint32, bool, error) {
   507  	if key == *tsKey {
   508  		return maxIdx, matchedCol, nil
   509  	}
   510  	var colWip *ColWip
   511  	colWip, _, matchedCol = ss.initAndBackFillColumn(key, nil, matchedCol)
   512  	copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BACKFILL[:])
   513  	colWip.cbufidx += 1
   514  	if colWip.cbufidx > maxIdx {
   515  		maxIdx = colWip.cbufidx
   516  	}
   517  	return maxIdx, matchedCol, nil
   518  }
   519  
   520  func (ss *SegStore) encodeSingleNumber(key string, value interface{}, maxIdx uint32,
   521  	tsKey *string, matchedCol bool) (uint32, bool, error) {
   522  	if key == *tsKey {
   523  		return maxIdx, matchedCol, nil
   524  	}
   525  	var colWip *ColWip
   526  	var recNum uint16
   527  	colWip, recNum, matchedCol = ss.initAndBackFillColumn(key, value, matchedCol)
   528  	colRis := ss.wipBlock.columnRangeIndexes
   529  	segstats := ss.AllSst
   530  	retLen := encSingleNumber(key, value, colWip.cbuf[:], colWip.cbufidx, colRis, recNum, segstats,
   531  		ss.wipBlock.bb, colWip)
   532  	colWip.cbufidx += retLen
   533  
   534  	if colWip.cbufidx > maxIdx {
   535  		maxIdx = colWip.cbufidx
   536  	}
   537  	return maxIdx, matchedCol, nil
   538  }
   539  
   540  func (ss *SegStore) initAndBackFillColumn(key string, value interface{}, matchedCol bool) (*ColWip, uint16, bool) {
   541  	allColWip := ss.wipBlock.colWips
   542  	colBlooms := ss.wipBlock.columnBlooms
   543  	colRis := ss.wipBlock.columnRangeIndexes
   544  	allColsInBlock := ss.wipBlock.columnsInBlock
   545  	recNum := ss.wipBlock.blockSummary.RecCount
   546  
   547  	colWip, ok := allColWip[key]
   548  	if !ok {
   549  		colWip = InitColWip(ss.SegmentKey, key)
   550  		allColWip[key] = colWip
   551  		ss.AllSeenColumns[key] = true
   552  	}
   553  	_, ok = allColsInBlock[key]
   554  	if !ok {
   555  		if recNum != 0 {
   556  			log.Debugf("EncodeColumns: newColumn=%v showed up in the middle, backfilling it now", key)
   557  			backFillPastRecords(key, value, recNum, colBlooms, colRis, colWip)
   558  		}
   559  	}
   560  	allColsInBlock[key] = true
   561  	matchedCol = matchedCol || ss.pqTracker.isColumnInPQuery(key)
   562  	colWip.cstartidx = colWip.cbufidx
   563  	return colWip, recNum, matchedCol
   564  }
   565  
   566  func initMicroIndices(key string, val interface{}, colBlooms map[string]*BloomIndex,
   567  	colRis map[string]*RangeIndex) {
   568  	switch val.(type) {
   569  	case string:
   570  		bi := &BloomIndex{}
   571  		bi.uniqueWordCount = 0
   572  		bCount := getBlockBloomSize(bi)
   573  		bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY)
   574  		colBlooms[key] = bi
   575  
   576  	case float64, int64, uint64, json.Number:
   577  		ri := &RangeIndex{}
   578  		ri.Ranges = make(map[string]*Numbers, BLOCK_RI_MAP_SIZE)
   579  		colRis[key] = ri
   580  
   581  	case bool:
   582  		// todo kunal, for bool type we need to keep a inverted index
   583  		bi := &BloomIndex{}
   584  		bi.uniqueWordCount = 0
   585  		bCount := 10
   586  		bi.Bf = bloom.NewWithEstimates(uint(bCount), BLOOM_COLL_PROBABILITY)
   587  		colBlooms[key] = bi
   588  	}
   589  }
   590  
   591  func backFillPastRecords(key string, val interface{}, recNum uint16, colBlooms map[string]*BloomIndex,
   592  	colRis map[string]*RangeIndex, colWip *ColWip) uint32 {
   593  	initMicroIndices(key, val, colBlooms, colRis)
   594  	packedLen := uint32(0)
   595  
   596  	recArr := make([]uint16, recNum)
   597  	for i := uint16(0); i < recNum; i++ {
   598  		// only the type will be saved when we are backfilling
   599  		copy(colWip.cbuf[colWip.cbufidx:], VALTYPE_ENC_BACKFILL[:])
   600  		colWip.cbufidx += 1
   601  		packedLen += 1
   602  		recArr[i] = i
   603  	}
   604  	// we will also init dictEnc for backfilled recnums
   605  	colWip.deMap[string(VALTYPE_ENC_BACKFILL[:])] = recArr
   606  	colWip.deCount++
   607  	return packedLen
   608  }
   609  
   610  func encSingleNumber(key string, val interface{}, wipbuf []byte, idx uint32,
   611  	colRis map[string]*RangeIndex, wRecNum uint16,
   612  	segstats map[string]*SegStats, bb *bbp.ByteBuffer, colWip *ColWip) uint32 {
   613  
   614  	ri, ok := colRis[key]
   615  	if !ok {
   616  		ri = &RangeIndex{}
   617  		ri.Ranges = make(map[string]*Numbers, BLOCK_RI_MAP_SIZE)
   618  		colRis[key] = ri
   619  	}
   620  
   621  	switch cval := val.(type) {
   622  	case float64:
   623  		addSegStatsNums(segstats, key, SS_FLOAT64, FPARM_INT64, FPARM_UINT64, cval,
   624  			fmt.Sprintf("%v", cval), bb)
   625  		valSize := encJsonNumber(key, SS_FLOAT64, FPARM_INT64, FPARM_UINT64, cval, wipbuf[:],
   626  			idx, ri.Ranges)
   627  		checkAddDictEnc(colWip, wipbuf[idx:idx+valSize], wRecNum)
   628  		return valSize
   629  	case int64:
   630  		addSegStatsNums(segstats, key, SS_INT64, cval, FPARM_UINT64, FPARM_FLOAT64,
   631  			fmt.Sprintf("%v", cval), bb)
   632  
   633  		valSize := encJsonNumber(key, SS_INT64, cval, FPARM_UINT64, FPARM_FLOAT64, wipbuf[:],
   634  			idx, ri.Ranges)
   635  		checkAddDictEnc(colWip, wipbuf[idx:idx+valSize], wRecNum)
   636  		return valSize
   637  
   638  	default:
   639  		log.Errorf("encSingleNumber: Tried to encode a non int/float value! value=%+v", cval)
   640  	}
   641  	return 0
   642  }
   643  
   644  func encJsonNumber(key string, numType SS_IntUintFloatTypes, intVal int64, uintVal uint64,
   645  	fltVal float64, wipbuf []byte, idx uint32, blockRangeIndex map[string]*Numbers) uint32 {
   646  
   647  	var valSize uint32
   648  
   649  	switch numType {
   650  	case SS_INT64:
   651  		copy(wipbuf[idx:], VALTYPE_ENC_INT64[:])
   652  		copy(wipbuf[idx+1:], utils.Int64ToBytesLittleEndian(int64(intVal)))
   653  		valSize = 1 + 8
   654  	case SS_UINT64:
   655  		copy(wipbuf[idx:], VALTYPE_ENC_UINT64[:])
   656  		copy(wipbuf[idx+1:], utils.Uint64ToBytesLittleEndian(uintVal))
   657  		valSize = 1 + 8
   658  	case SS_FLOAT64:
   659  		copy(wipbuf[idx:], VALTYPE_ENC_FLOAT64[:])
   660  		copy(wipbuf[idx+1:], utils.Float64ToBytesLittleEndian(fltVal))
   661  		valSize = 1 + 8
   662  	default:
   663  		log.Errorf("encJsonNumber: unknown numType: %v", numType)
   664  	}
   665  
   666  	if blockRangeIndex != nil {
   667  		updateRangeIndex(key, blockRangeIndex, numType, intVal, uintVal, fltVal)
   668  	}
   669  
   670  	return valSize
   671  }
   672  
   673  /*
   674     Caller of this function can confidently cast the CValEncoslure.CVal to one of the foll types:
   675  	 bool       (if CValEncoslure.Dtype = SS_DT_BOOL)
   676  	 uint64     (if CValEncoslure.Dtype = SS_DT_UNSIGNED_NUM)
   677  	 int64      (if CValEncoslure.Dtype = SS_DT_SIGNED_NUM)
   678  	 float64    (if CValEncoslure.Dtype = SS_DT_FLOAT)
   679  	 string     (if CValEncoslure.Dtype = SS_DT_STRING)
   680  	 array      (if CValEncoslure.Dtype = SS_DT_ARRAY_DICT)
   681  */
   682  /*
   683  parameters:
   684     rec: byte slice
   685     qid
   686  returns:
   687     CValEncoslure: Cval encoding of this col entry
   688     uint16: len of this entry inside that was inside the byte slice
   689     error:
   690  */
   691  func GetCvalFromRec(rec []byte, qid uint64) (CValueEnclosure, uint16, error) {
   692  
   693  	if len(rec) == 0 {
   694  		return CValueEnclosure{}, 0, errors.New("column value is empty")
   695  	}
   696  
   697  	var retVal CValueEnclosure
   698  	var endIdx uint16
   699  	switch rec[0] {
   700  
   701  	case VALTYPE_ENC_SMALL_STRING[0]:
   702  		retVal.Dtype = SS_DT_STRING
   703  		// one byte for type & two for reclen
   704  
   705  		strlen := utils.BytesToUint16LittleEndian(rec[1:3])
   706  		endIdx = strlen + 3
   707  		retVal.CVal = string(rec[3:endIdx])
   708  	case VALTYPE_ENC_BOOL[0]:
   709  		retVal.Dtype = SS_DT_BOOL
   710  		if rec[1] == 0 {
   711  			retVal.CVal = false
   712  		} else {
   713  			retVal.CVal = true
   714  		}
   715  		endIdx = 2
   716  	case VALTYPE_ENC_INT8[0]:
   717  		retVal.Dtype = SS_DT_SIGNED_NUM
   718  		retVal.CVal = int64(int8(rec[1:][0]))
   719  		endIdx = 2
   720  	case VALTYPE_ENC_INT16[0]:
   721  		retVal.Dtype = SS_DT_SIGNED_NUM
   722  		retVal.CVal = int64(utils.BytesToInt16LittleEndian(rec[1:]))
   723  		endIdx = 3
   724  	case VALTYPE_ENC_INT32[0]:
   725  		retVal.Dtype = SS_DT_SIGNED_NUM
   726  		retVal.CVal = int64(utils.BytesToInt32LittleEndian(rec[1:]))
   727  		endIdx = 5
   728  	case VALTYPE_ENC_INT64[0]:
   729  		retVal.Dtype = SS_DT_SIGNED_NUM
   730  		retVal.CVal = utils.BytesToInt64LittleEndian(rec[1:])
   731  		endIdx = 9
   732  	case VALTYPE_ENC_UINT8[0]:
   733  		retVal.Dtype = SS_DT_UNSIGNED_NUM
   734  		retVal.CVal = uint64((rec[1:])[0])
   735  		endIdx = 2
   736  	case VALTYPE_ENC_UINT16[0]:
   737  		retVal.Dtype = SS_DT_UNSIGNED_NUM
   738  		retVal.CVal = uint64(utils.BytesToUint16LittleEndian(rec[1:]))
   739  		endIdx = 3
   740  	case VALTYPE_ENC_UINT32[0]:
   741  		retVal.Dtype = SS_DT_UNSIGNED_NUM
   742  		retVal.CVal = uint64(utils.BytesToUint32LittleEndian(rec[1:]))
   743  		endIdx = 5
   744  	case VALTYPE_ENC_UINT64[0]:
   745  		retVal.Dtype = SS_DT_UNSIGNED_NUM
   746  		retVal.CVal = utils.BytesToUint64LittleEndian(rec[1:])
   747  		endIdx = 9
   748  	case VALTYPE_ENC_FLOAT64[0]:
   749  		retVal.Dtype = SS_DT_FLOAT
   750  		retVal.CVal = utils.BytesToFloat64LittleEndian(rec[1:])
   751  		endIdx = 9
   752  	case VALTYPE_ENC_BACKFILL[0]:
   753  		retVal.Dtype = SS_DT_BACKFILL
   754  		retVal.CVal = nil
   755  		endIdx = 1
   756  	case VALTYPE_RAW_JSON[0]:
   757  		retVal.Dtype = SS_DT_RAW_JSON
   758  		strlen := utils.BytesToUint16LittleEndian(rec[1:3])
   759  		endIdx = strlen + 3
   760  		data := rec[3:endIdx]
   761  		entries := make([]interface{}, 0)
   762  		err := json.Unmarshal(data, &entries)
   763  		if err != nil {
   764  			log.Errorf("GetCvalFromRec: Error unmarshalling VALTYPE_RAW_JSON = %v", err)
   765  			return CValueEnclosure{}, 0, err
   766  		}
   767  		retVal.CVal = entries
   768  	case VALTYPE_DICT_ARRAY[0]:
   769  		retVal.Dtype = SS_DT_ARRAY_DICT
   770  		// one byte for type & two for reclen
   771  		totalLen := utils.BytesToInt16LittleEndian(rec[1:])
   772  		idx := uint16(3)
   773  		cValArray := make([]map[string]interface{}, 0)
   774  		for idx < uint16(totalLen) {
   775  			cVal := make(map[string]interface{})
   776  			strlen := utils.BytesToUint16LittleEndian(rec[idx : idx+2])
   777  			idx += 2
   778  			keyVal := string(rec[idx : idx+strlen])
   779  			idx += strlen
   780  
   781  			cVal["key"] = keyVal
   782  			switch rec[idx] {
   783  			case VALTYPE_ENC_SMALL_STRING[0]:
   784  				cVal["type"] = "string"
   785  				// one byte for type & two for reclen
   786  				strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   787  				idx += 3
   788  				cVal["value"] = string(rec[idx : idx+strlen])
   789  				idx += strlen
   790  			case VALTYPE_ENC_BOOL[0]:
   791  				cVal["type"] = "bool"
   792  				strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   793  				idx += 3
   794  				cVal["value"] = string(rec[idx : idx+strlen])
   795  				idx += strlen
   796  			case VALTYPE_ENC_INT64[0]:
   797  				cVal["type"] = "int64"
   798  				strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   799  				idx += 3
   800  				cVal["value"] = string(rec[idx : idx+strlen])
   801  				idx += strlen
   802  			case VALTYPE_ENC_FLOAT64[0]:
   803  				cVal["type"] = "float64"
   804  				strlen := utils.BytesToUint16LittleEndian(rec[idx+1 : idx+3])
   805  				idx += 3
   806  				cVal["value"] = string(rec[idx : idx+strlen])
   807  				idx += strlen
   808  			default:
   809  				log.Errorf("qid=%d, GetCvalFromRec:SS_DT_ARRAY_DICT unknown type=%v\n", qid, rec[idx])
   810  				return retVal, endIdx, errors.New("invalid rec type")
   811  			}
   812  			cValArray = append(cValArray, cVal)
   813  		}
   814  		retVal.CVal = cValArray
   815  		endIdx = uint16(totalLen)
   816  
   817  	default:
   818  		log.Errorf("qid=%d, GetCvalFromRec: dont know how to convert type=%v\n", qid, rec[0])
   819  		return retVal, endIdx, errors.New("invalid rec type")
   820  	}
   821  
   822  	return retVal, endIdx, nil
   823  }
   824  
   825  func WriteMockColSegFile(segkey string, numBlocks int, entryCount int) ([]map[string]*BloomIndex,
   826  	[]*BlockSummary, []map[string]*RangeIndex, map[string]bool, map[uint16]*BlockMetadataHolder,
   827  	map[string]*ColSizeInfo) {
   828  
   829  	allBlockBlooms := make([]map[string]*BloomIndex, numBlocks)
   830  	allBlockRangeIdx := make([]map[string]*RangeIndex, numBlocks)
   831  	allBlockSummaries := make([]*BlockSummary, numBlocks)
   832  	allBlockOffsets := make(map[uint16]*BlockMetadataHolder)
   833  	segstats := make(map[string]*SegStats)
   834  	lencnames := uint8(12)
   835  	cnames := make([]string, lencnames)
   836  	mapCol := make(map[string]bool)
   837  	for cidx := uint8(0); cidx < lencnames; cidx += 1 {
   838  		currCol := fmt.Sprintf("key%v", cidx)
   839  		cnames[cidx] = currCol
   840  		mapCol[currCol] = true
   841  	}
   842  
   843  	tsKey := config.GetTimeStampKey()
   844  	allCols := make(map[string]bool)
   845  	// set up entries
   846  	for j := 0; j < numBlocks; j++ {
   847  		currBlockUint := uint16(j)
   848  		columnBlooms := make(map[string]*BloomIndex)
   849  		columnRangeIndexes := make(map[string]*RangeIndex)
   850  		colWips := make(map[string]*ColWip)
   851  		wipBlock := WipBlock{
   852  			columnBlooms:       columnBlooms,
   853  			columnRangeIndexes: columnRangeIndexes,
   854  			colWips:            colWips,
   855  			pqMatches:          make(map[string]*pqmr.PQMatchResults),
   856  			columnsInBlock:     mapCol,
   857  			tomRollup:          make(map[uint64]*RolledRecs),
   858  			tohRollup:          make(map[uint64]*RolledRecs),
   859  			todRollup:          make(map[uint64]*RolledRecs),
   860  			bb:                 bbp.Get(),
   861  			blockTs:            make([]uint64, 0),
   862  		}
   863  		segStore := &SegStore{
   864  			wipBlock:       wipBlock,
   865  			SegmentKey:     segkey,
   866  			AllSeenColumns: allCols,
   867  			pqTracker:      initPQTracker(),
   868  			AllSst:         segstats,
   869  			numBlocks:      currBlockUint,
   870  		}
   871  		for i := 0; i < entryCount; i++ {
   872  			entry := make(map[string]interface{})
   873  			entry[cnames[0]] = "match words 123 abc"
   874  			entry[cnames[1]] = "value1"
   875  			entry[cnames[2]] = i
   876  			entry[cnames[3]] = (i%2 == 0)
   877  			entry[cnames[4]] = strconv.FormatUint(uint64(i)*2, 10)
   878  			entry[cnames[5]] = "batch-" + fmt.Sprint(j) + "-" + utils.RandomStringWithCharset(10)
   879  			entry[cnames[6]] = (i * 2)
   880  			entry[cnames[7]] = "batch-" + fmt.Sprint(j)
   881  			entry[cnames[8]] = j
   882  			entry[cnames[9]] = rand.Float64()
   883  			entry[cnames[10]] = segkey
   884  			entry[cnames[11]] = "record-batch-" + fmt.Sprint(i%2)
   885  
   886  			timestp := uint64(i) + 1 // dont start with 0 as timestamp
   887  			raw, _ := json.Marshal(entry)
   888  			_, _, err := segStore.EncodeColumns(raw, timestp, &tsKey, SIGNAL_EVENTS)
   889  			if err != nil {
   890  				log.Errorf("WriteMockColSegFile: error packing entry: %s", err)
   891  			}
   892  			segStore.wipBlock.blockSummary.RecCount += 1
   893  		}
   894  
   895  		allBlockBlooms[j] = segStore.wipBlock.columnBlooms
   896  		allBlockSummaries[j] = &segStore.wipBlock.blockSummary
   897  		allBlockRangeIdx[j] = segStore.wipBlock.columnRangeIndexes
   898  		allBlockOffsets[currBlockUint] = &BlockMetadataHolder{
   899  			ColumnBlockOffset: make(map[string]int64),
   900  			ColumnBlockLen:    make(map[string]uint32),
   901  		}
   902  		for cname, colWip := range segStore.wipBlock.colWips {
   903  			csgFname := fmt.Sprintf("%v_%v.csg", segkey, xxhash.Sum64String(cname))
   904  			var encType []byte
   905  			if cname == config.GetTimeStampKey() {
   906  				encType, _ = segStore.wipBlock.encodeTimestamps()
   907  			} else {
   908  				encType = ZSTD_COMLUNAR_BLOCK
   909  			}
   910  			blkLen, blkOffset, err := writeWip(colWip, encType)
   911  			if err != nil {
   912  				log.Errorf("WriteMockColSegFile: failed to write colsegfilename=%v, err=%v", csgFname, err)
   913  			}
   914  			allBlockOffsets[currBlockUint].ColumnBlockLen[cname] = blkLen
   915  			allBlockOffsets[currBlockUint].ColumnBlockOffset[cname] = blkOffset
   916  		}
   917  	}
   918  
   919  	allColsSizes := make(map[string]*ColSizeInfo)
   920  	for cname := range mapCol {
   921  		fnamecmi := fmt.Sprintf("%v_%v.csg", segkey, xxhash.Sum64String(cname))
   922  		cmiSize, _ := ssutils.GetFileSizeFromDisk(fnamecmi)
   923  		fnamecsg := fmt.Sprintf("%v_%v.csg", segkey, xxhash.Sum64String(cname))
   924  		csgSize, _ := ssutils.GetFileSizeFromDisk(fnamecsg)
   925  		allColsSizes[cname] = &ColSizeInfo{CmiSize: cmiSize, CsgSize: csgSize}
   926  	}
   927  
   928  	return allBlockBlooms, allBlockSummaries, allBlockRangeIdx, mapCol, allBlockOffsets, allColsSizes
   929  }
   930  
   931  func WriteMockTraceFile(segkey string, numBlocks int, entryCount int) ([]map[string]*BloomIndex,
   932  	[]*BlockSummary, []map[string]*RangeIndex, map[string]bool, map[uint16]*BlockMetadataHolder) {
   933  
   934  	allBlockBlooms := make([]map[string]*BloomIndex, numBlocks)
   935  	allBlockRangeIdx := make([]map[string]*RangeIndex, numBlocks)
   936  	allBlockSummaries := make([]*BlockSummary, numBlocks)
   937  	allBlockOffsets := make(map[uint16]*BlockMetadataHolder)
   938  
   939  	segstats := make(map[string]*SegStats)
   940  
   941  	mapCol := make(map[string]bool)
   942  	mapCol["tags"] = true
   943  	mapCol["startTimeMillis"] = true
   944  	mapCol["timestamp"] = true
   945  
   946  	tsKey := config.GetTimeStampKey()
   947  	allCols := make(map[string]bool)
   948  	// set up entries
   949  	for j := 0; j < numBlocks; j++ {
   950  		currBlockUint := uint16(j)
   951  		columnBlooms := make(map[string]*BloomIndex)
   952  		columnRangeIndexes := make(map[string]*RangeIndex)
   953  		colWips := make(map[string]*ColWip)
   954  		wipBlock := WipBlock{
   955  			columnBlooms:       columnBlooms,
   956  			columnRangeIndexes: columnRangeIndexes,
   957  			colWips:            colWips,
   958  			pqMatches:          make(map[string]*pqmr.PQMatchResults),
   959  			columnsInBlock:     mapCol,
   960  			tomRollup:          make(map[uint64]*RolledRecs),
   961  			tohRollup:          make(map[uint64]*RolledRecs),
   962  			todRollup:          make(map[uint64]*RolledRecs),
   963  			bb:                 bbp.Get(),
   964  			blockTs:            make([]uint64, 0),
   965  		}
   966  		segStore := &SegStore{
   967  			wipBlock:       wipBlock,
   968  			SegmentKey:     segkey,
   969  			AllSeenColumns: allCols,
   970  			pqTracker:      initPQTracker(),
   971  			AllSst:         segstats,
   972  			numBlocks:      currBlockUint,
   973  		}
   974  		entries := []struct {
   975  			entry []byte
   976  		}{
   977  
   978  			{
   979  				[]byte(`{"tags": [
   980  				{
   981  					"key": "sampler.type",
   982  					"type": "string",
   983  					"value": "const"
   984  				},
   985  				{
   986  					"key": "sampler.param",
   987  					"type": "bool",
   988  					"value": "true"
   989  				},
   990  				{
   991  					"key": "http.status_code",
   992  					"type": "int64",
   993  					"value": "200"
   994  				},
   995  				{
   996  					"key": "component",
   997  					"type": "string",
   998  					"value": "gRPC"
   999  				},
  1000  				{
  1001  					"key": "retry_no",
  1002  					"type": "int64",
  1003  					"value": "1"
  1004  				}
  1005  
  1006  				],
  1007  
  1008  			}`,
  1009  				)},
  1010  		}
  1011  
  1012  		entry := entries[0].entry
  1013  		timestp := uint64(2) + 1 // dont start with 0 as timestamp
  1014  		_, _, err := segStore.EncodeColumns(entry, timestp, &tsKey, SIGNAL_JAEGER_TRACES)
  1015  		if err != nil {
  1016  			log.Errorf("WriteMockTraceFile: error packing entry: %s", err)
  1017  		}
  1018  		segStore.wipBlock.blockSummary.RecCount += 1
  1019  
  1020  		allBlockBlooms[j] = segStore.wipBlock.columnBlooms
  1021  		allBlockSummaries[j] = &segStore.wipBlock.blockSummary
  1022  		allBlockRangeIdx[j] = segStore.wipBlock.columnRangeIndexes
  1023  		allBlockOffsets[currBlockUint] = &BlockMetadataHolder{
  1024  			ColumnBlockOffset: make(map[string]int64),
  1025  			ColumnBlockLen:    make(map[string]uint32),
  1026  		}
  1027  		for cname, colWip := range segStore.wipBlock.colWips {
  1028  			csgFname := fmt.Sprintf("%v_%v.csg", segkey, xxhash.Sum64String(cname))
  1029  			var encType []byte
  1030  			if cname == config.GetTimeStampKey() {
  1031  				encType, _ = segStore.wipBlock.encodeTimestamps()
  1032  			} else {
  1033  				encType = ZSTD_COMLUNAR_BLOCK
  1034  			}
  1035  			blkLen, blkOffset, err := writeWip(colWip, encType)
  1036  			if err != nil {
  1037  				log.Errorf("WriteMockTraceFile: failed to write tracer filename=%v, err=%v", csgFname, err)
  1038  			}
  1039  			allBlockOffsets[currBlockUint].ColumnBlockLen[cname] = blkLen
  1040  			allBlockOffsets[currBlockUint].ColumnBlockOffset[cname] = blkOffset
  1041  		}
  1042  	}
  1043  	return allBlockBlooms, allBlockSummaries, allBlockRangeIdx, mapCol, allBlockOffsets
  1044  }
  1045  
  1046  func WriteMockMetricsSegment(forceRotate bool, entryCount int) ([]*metrics.MetricsSegment, error) {
  1047  
  1048  	timestamp := uint64(time.Now().Unix() - 24*3600)
  1049  	metric := []string{"test.metric.0", "test.metric.1", "test.metric.2", "test.metric.3"}
  1050  	car_type := []string{"Passenger car light", "Passenger car compact", "Passenger car heavy", "Passenger car mini", "Passenger car medium", "Pickup truck", "Van"}
  1051  	color := []string{"olive", "green", "maroon", "lime", "yellow", "white", "purple", "navy", "aqua"}
  1052  	group := []string{"group 0", "group 1"}
  1053  	fuel_type := []string{"Electric", "Diesel", "Gasoline", "CNG", "Ethanol", "Methanol"}
  1054  	model := []string{"C55 Amg", "325i", "Ranger Pickup 2wd", "Sts", "Pacifica 2wd", "Trailblazer 2wd", "E320 Cdi"}
  1055  	metrics.InitMetricsSegStore()
  1056  	for i := 0; i < entryCount; i++ {
  1057  		entry := make(map[string]interface{})
  1058  		entry["metric"] = metric[rand.Intn(len(metric))]
  1059  		entry["tags"] = map[string]string{
  1060  			"car_type":  car_type[rand.Intn(len(car_type))],
  1061  			"color":     color[rand.Intn(len(color))],
  1062  			"group":     group[rand.Intn(len(group))],
  1063  			"fuel_type": fuel_type[rand.Intn(len(fuel_type))],
  1064  			"model":     model[rand.Intn(len(model))],
  1065  		}
  1066  		entry["timestamp"] = timestamp + uint64(i)
  1067  		entry["value"] = rand.Intn(500)
  1068  		rawJson, _ := json.Marshal(entry)
  1069  		err := AddTimeSeriesEntryToInMemBuf(rawJson, SIGNAL_METRICS_OTSDB, 0)
  1070  		if err != nil {
  1071  			log.Errorf("WriteMockMetricsSegment: error adding time series entry to in memory buffer: %s", err)
  1072  			return nil, err
  1073  		}
  1074  	}
  1075  	retVal := make([]*metrics.MetricsSegment, len(metrics.GetAllMetricsSegments()))
  1076  
  1077  	for idx, mSeg := range metrics.GetAllMetricsSegments() {
  1078  		err := mSeg.CheckAndRotate(forceRotate)
  1079  		if err != nil {
  1080  			log.Errorf("WriteMockMetricsSegment: unable to force rotate: %s", err)
  1081  			return nil, err
  1082  		}
  1083  		retVal[idx] = mSeg
  1084  	}
  1085  
  1086  	return retVal, nil
  1087  }
  1088  
  1089  /*
  1090  [BlockRangeIndexLen 4B]  [rangeKeyData-1] [rangeKeyData-2]....
  1091  
  1092  ** rangeKeyData **
  1093  [RangeKeyLen 2B] [ActualRangeKey xxBytes] [RangeNumType 1B] [MinNumValue 8B] [MaxNumValue 8B]
  1094  */
  1095  
  1096  func EncodeRIBlock(blockRangeIndex map[string]*Numbers, blkNum uint16) (uint32, []byte, error) {
  1097  	var idx uint32
  1098  
  1099  	idx += uint32(RI_BLK_LEN_SIZE)
  1100  
  1101  	blkRIBuf := make([]byte, RI_SIZE)
  1102  
  1103  	// copy the blockNum
  1104  	copy(blkRIBuf[idx:], utils.Uint16ToBytesLittleEndian(blkNum))
  1105  	idx += 2
  1106  
  1107  	copy(blkRIBuf[idx:], CMI_RANGE_INDEX)
  1108  	idx += 1 // for CMI type
  1109  
  1110  	for key, item := range blockRangeIndex {
  1111  		if len(blkRIBuf) < int(idx) {
  1112  			newSlice := make([]byte, RI_SIZE)
  1113  			blkRIBuf = append(blkRIBuf, newSlice...)
  1114  		}
  1115  		copy(blkRIBuf[idx:], utils.Uint16ToBytesLittleEndian(uint16(len(key))))
  1116  		idx += 2
  1117  		n := copy(blkRIBuf[idx:], key)
  1118  		idx += uint32(n)
  1119  		switch item.NumType {
  1120  		case RNT_UNSIGNED_INT:
  1121  			copy(blkRIBuf[idx:], VALTYPE_ENC_RNT_UNSIGNED_INT[:])
  1122  			idx += 1
  1123  			copy(blkRIBuf[idx:], utils.Uint64ToBytesLittleEndian(item.Min_uint64))
  1124  			idx += 8
  1125  			copy(blkRIBuf[idx:], utils.Uint64ToBytesLittleEndian(item.Max_uint64))
  1126  			idx += 8
  1127  		case RNT_SIGNED_INT:
  1128  			copy(blkRIBuf[idx:], VALTYPE_ENC_RNT_SIGNED_INT[:])
  1129  			idx += 1
  1130  			copy(blkRIBuf[idx:], utils.Int64ToBytesLittleEndian(item.Min_int64))
  1131  			idx += 8
  1132  			copy(blkRIBuf[idx:], utils.Int64ToBytesLittleEndian(item.Max_int64))
  1133  			idx += 8
  1134  		case RNT_FLOAT64:
  1135  			copy(blkRIBuf[idx:], VALTYPE_ENC_RNT_FLOAT64[:])
  1136  			idx += 1
  1137  			copy(blkRIBuf[idx:], utils.Float64ToBytesLittleEndian(item.Min_float64))
  1138  			idx += 8
  1139  			copy(blkRIBuf[idx:], utils.Float64ToBytesLittleEndian(item.Max_float64))
  1140  			idx += 8
  1141  		}
  1142  	}
  1143  	// copy the recordlen at the start of the buf
  1144  	copy(blkRIBuf[0:], utils.Uint32ToBytesLittleEndian(uint32(idx-RI_BLK_LEN_SIZE)))
  1145  	// log.Infof("EncodeRIBlock EncodeRIBlock=%v", blkRIBuf[:idx])
  1146  	return idx, blkRIBuf, nil
  1147  }
  1148  
  1149  func (ss *SegStore) encodeTime(recordTimeMS uint64, tsKey *string) {
  1150  	allColWip := ss.wipBlock.colWips
  1151  	allColsInBlock := ss.wipBlock.columnsInBlock
  1152  	tsWip, ok := allColWip[*tsKey]
  1153  	if !ok {
  1154  		tsWip = InitColWip(ss.SegmentKey, *tsKey)
  1155  		allColWip[*tsKey] = tsWip
  1156  		ss.AllSeenColumns[*tsKey] = true
  1157  	}
  1158  	// we will never need to backfill a ts key
  1159  	allColsInBlock[*tsKey] = true
  1160  	if int(ss.wipBlock.blockSummary.RecCount) >= len(ss.wipBlock.blockTs) {
  1161  		newslice := make([]uint64, WIP_NUM_RECS)
  1162  		ss.wipBlock.blockTs = append(ss.wipBlock.blockTs, newslice...)
  1163  	}
  1164  	ss.wipBlock.blockTs[ss.wipBlock.blockSummary.RecCount] = recordTimeMS
  1165  	tsWip.cbufidx = 1 // just so the flush/append gets called
  1166  
  1167  	// calculate rollups
  1168  	tom := (recordTimeMS / MS_IN_MIN) * MS_IN_MIN
  1169  	toh := (recordTimeMS / MS_IN_HOUR) * MS_IN_HOUR
  1170  	tod := (recordTimeMS / MS_IN_DAY) * MS_IN_DAY
  1171  	ss.wipBlock.adjustEarliestLatestTimes(recordTimeMS)
  1172  	addRollup(ss.wipBlock.tomRollup, tom, ss.wipBlock.blockSummary.RecCount)
  1173  	addRollup(ss.wipBlock.tohRollup, toh, ss.wipBlock.blockSummary.RecCount)
  1174  	addRollup(ss.wipBlock.todRollup, tod, ss.wipBlock.blockSummary.RecCount)
  1175  
  1176  }
  1177  
  1178  func addRollup(rrmap map[uint64]*RolledRecs, rolledTs uint64, lastRecNum uint16) {
  1179  
  1180  	var rr *RolledRecs
  1181  	var ok bool
  1182  	rr, ok = rrmap[rolledTs]
  1183  	if !ok {
  1184  		mr := pqmr.CreatePQMatchResults(WIP_NUM_RECS)
  1185  		rr = &RolledRecs{MatchedRes: mr}
  1186  		rrmap[rolledTs] = rr
  1187  	}
  1188  	rr.MatchedRes.AddMatchedRecord(uint(lastRecNum))
  1189  	rr.lastRecNum = lastRecNum
  1190  }
  1191  
  1192  func WriteMockTsRollup(segkey string) error {
  1193  
  1194  	ss := &SegStore{suffix: 1, lock: sync.Mutex{}, SegmentKey: segkey}
  1195  
  1196  	wipBlock := createMockTsRollupWipBlock(segkey)
  1197  	ss.wipBlock = *wipBlock
  1198  	err := ss.writeWipTsRollups("timestamp")
  1199  	return err
  1200  }
  1201  
  1202  func createMockTsRollupWipBlock(segkey string) *WipBlock {
  1203  
  1204  	config.InitializeTestingConfig()
  1205  	defer os.RemoveAll(config.GetDataPath()) // we just create a suffix file during segstore creation
  1206  
  1207  	cTime := uint64(time.Now().UnixMilli())
  1208  	lencnames := uint8(2)
  1209  	cnames := make([]string, lencnames)
  1210  	for cidx := uint8(0); cidx < lencnames; cidx += 1 {
  1211  		currCol := fmt.Sprintf("fortscheckkey-%v", cidx)
  1212  		cnames[cidx] = currCol
  1213  	}
  1214  	sId := "ts-rollup"
  1215  	segstore, err := getSegStore(sId, cTime, "test", 0)
  1216  	if err != nil {
  1217  		log.Errorf("createMockTsRollupWipBlock, getSegstore err=%v", err)
  1218  		return nil
  1219  	}
  1220  	tsKey := config.GetTimeStampKey()
  1221  	entryCount := 1000
  1222  
  1223  	startTs := uint64(1652222966645) // Tuesday, May 10, 2022 22:49:26.645
  1224  	tsincr := uint64(7200)           // so that we have 2 hours, 2 days, and > 2mins buckets
  1225  
  1226  	runningTs := startTs
  1227  	for i := 0; i < entryCount; i++ {
  1228  		//		t.Logf("TestTimestampEncoding: ts=%v", runningTs)
  1229  		record_json := make(map[string]interface{})
  1230  		record_json[cnames[0]] = "value1"
  1231  		record_json[cnames[1]] = json.Number(fmt.Sprint(i))
  1232  		rawJson, _ := json.Marshal(record_json)
  1233  		_, _, err := segstore.EncodeColumns(rawJson, runningTs, &tsKey, SIGNAL_EVENTS)
  1234  		if err != nil {
  1235  			log.Errorf("Error:WriteMockColSegFile: error packing entry: %s", err)
  1236  		}
  1237  		segstore.wipBlock.blockSummary.RecCount += 1
  1238  		segstore.adjustEarliestLatestTimes(runningTs)
  1239  		runningTs += tsincr
  1240  	}
  1241  
  1242  	return &segstore.wipBlock
  1243  }
  1244  
  1245  // EncodeBlocksum: format as below
  1246  /*
  1247     [SummaryLen 4B] [blkNum 2B] [highTs 8B] [lowTs 8B] [recCount 2B] [numColumns 2B] [ColumnBlkInfo]
  1248  
  1249     ColumnBlkInfo : ...
  1250     [cnameLen 2B] [COlName xxB] [blkOff 8B] [blkLen 4B]...
  1251  
  1252  */
  1253  
  1254  func EncodeBlocksum(bmh *BlockMetadataHolder, bsum *BlockSummary,
  1255  	blockSummBuf []byte, blkNum uint16) (uint32, []byte, error) {
  1256  
  1257  	var idx uint32
  1258  
  1259  	//check if blockSummBuf is enough to pack blocksummary data
  1260  	// Each BlockSummary entry = xx bytes
  1261  	// summLen *4 bytes) + blkNum 2 bytes + bsum.HighTs(8 bytes) + bsum.LowTs(8 bytes) + bsum.RecCoun(2 bytes)
  1262  	// + N * [ 2 (cnamelen) +  (actualCnamLen) + 8 (blkOff) + 4 (blkLen)]
  1263  
  1264  	clen := 0
  1265  	numCols := uint16(0)
  1266  	for cname := range bmh.ColumnBlockOffset {
  1267  		clen += len(cname)
  1268  		numCols++
  1269  	}
  1270  	// summLen + blkNum + highTs + lowTs + recCount + numCols + totalCnamesLen + N * (cnameLenHolder + blkOff + blkLen)
  1271  	requiredLen := 4 + 2 + 8 + 8 + 2 + 2 + clen + len(bmh.ColumnBlockOffset)*(2+8+4)
  1272  
  1273  	if len(blockSummBuf) < requiredLen {
  1274  		newSlice := make([]byte, requiredLen-len(blockSummBuf))
  1275  		blockSummBuf = append(blockSummBuf, newSlice...)
  1276  	}
  1277  
  1278  	// reserve first 4 bytes for BLOCK_SUMMARY_LEN.
  1279  	idx += 4
  1280  
  1281  	copy(blockSummBuf[idx:], utils.Uint16ToBytesLittleEndian(blkNum))
  1282  	idx += 2
  1283  	copy(blockSummBuf[idx:], utils.Uint64ToBytesLittleEndian(bsum.HighTs))
  1284  	idx += 8
  1285  	copy(blockSummBuf[idx:], utils.Uint64ToBytesLittleEndian(bsum.LowTs))
  1286  	idx += 8
  1287  	copy(blockSummBuf[idx:], utils.Uint16ToBytesLittleEndian(bsum.RecCount))
  1288  	idx += 2
  1289  	copy(blockSummBuf[idx:], utils.Uint16ToBytesLittleEndian(numCols))
  1290  	idx += 2
  1291  
  1292  	for cname, cOff := range bmh.ColumnBlockOffset {
  1293  		copy(blockSummBuf[idx:], utils.Uint16ToBytesLittleEndian(uint16(len(cname))))
  1294  		idx += 2
  1295  		copy(blockSummBuf[idx:], cname)
  1296  		idx += uint32(len(cname))
  1297  		copy(blockSummBuf[idx:], utils.Int64ToBytesLittleEndian(cOff))
  1298  		idx += 8
  1299  		copy(blockSummBuf[idx:], utils.Uint32ToBytesLittleEndian(bmh.ColumnBlockLen[cname]))
  1300  		idx += 4
  1301  	}
  1302  
  1303  	// copy the summlen at the start of the buf
  1304  	copy(blockSummBuf[0:], utils.Uint32ToBytesLittleEndian(uint32(idx)))
  1305  
  1306  	return idx, blockSummBuf, nil
  1307  }
  1308  
  1309  func WriteMockBlockSummary(file string, blockSums []*BlockSummary,
  1310  	allBmh map[uint16]*BlockMetadataHolder) {
  1311  	fd, err := os.OpenFile(file, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
  1312  	if err != nil {
  1313  		log.Errorf("WriteMockBlockSummary: open failed blockSummaryFname=%v, err=%v", file, err)
  1314  		return
  1315  	}
  1316  
  1317  	defer fd.Close()
  1318  
  1319  	for blkNum, block := range blockSums {
  1320  		blkSumBuf := make([]byte, BLOCK_SUMMARY_SIZE)
  1321  		packedLen, _, err := EncodeBlocksum(allBmh[uint16(blkNum)], block, blkSumBuf[0:], uint16(blkNum))
  1322  
  1323  		if err != nil {
  1324  			log.Errorf("WriteMockBlockSummary: EncodeBlocksum: Failed to encode blocksummary=%+v, err=%v", block, err)
  1325  			return
  1326  		}
  1327  		if _, err := fd.Write(blkSumBuf[:packedLen]); err != nil {
  1328  			log.Errorf("WriteMockBlockSummary:  write failed blockSummaryFname=%v, err=%v", file, err)
  1329  			return
  1330  		}
  1331  	}
  1332  	err = fd.Sync()
  1333  	if err != nil {
  1334  		log.Fatal(err)
  1335  	}
  1336  }
  1337  
  1338  func checkAddDictEnc(colWip *ColWip, cval []byte, recNum uint16) {
  1339  	if colWip.deCount < wipCardLimit {
  1340  		recs, ok := colWip.deMap[string(cval)]
  1341  		if !ok {
  1342  			recs = make([]uint16, 0)
  1343  			colWip.deCount += 1
  1344  		}
  1345  		recs = append(recs, recNum)
  1346  		colWip.deMap[string(cval)] = recs
  1347  		// todo we optimize this code, by pre-allocing a fixed length of recs, keep an idx, then add it to recs
  1348  		// advantages: 1) we avoid extending the array. 2) we avoid inserting in the map on every rec
  1349  	}
  1350  }
  1351  
  1352  func SetCardinalityLimit(val uint16) {
  1353  	wipCardLimit = val
  1354  }
  1355  
  1356  /*
  1357  	Packing format for dictionary encoding
  1358  	[NumDictWords 2B] [dEntry1 XX] [dEntry2 XX] ...
  1359  
  1360     dEntry1 -- format
  1361     [word1Len 2B] [ActualWord] [numRecs 2B] [recNum1 2B][recNum2 2B]....
  1362  
  1363  */
  1364  
  1365  func PackDictEnc(colWip *ColWip) {
  1366  
  1367  	colWip.cbufidx = 0
  1368  	// reuse the existing cbuf
  1369  	// copy num of dict words
  1370  	copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(colWip.deCount))
  1371  	colWip.cbufidx += 2
  1372  
  1373  	for dword, recNumsArr := range colWip.deMap {
  1374  
  1375  		// copy the actual dict word , the TLV is packed inside the dword
  1376  		copy(colWip.cbuf[colWip.cbufidx:], []byte(dword))
  1377  		colWip.cbufidx += uint32(len(dword))
  1378  
  1379  		// copy num of records
  1380  		numRecs := uint16(len(recNumsArr))
  1381  		copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(numRecs))
  1382  		colWip.cbufidx += 2
  1383  
  1384  		for i := uint16(0); i < numRecs; i++ {
  1385  			// copy the recNum
  1386  			copy(colWip.cbuf[colWip.cbufidx:], utils.Uint16ToBytesLittleEndian(recNumsArr[i]))
  1387  			colWip.cbufidx += 2
  1388  		}
  1389  	}
  1390  }
  1391  
  1392  func addSegStatsStr(segstats map[string]*SegStats, cname string, strVal string,
  1393  	bb *bbp.ByteBuffer) {
  1394  
  1395  	var stats *SegStats
  1396  	var ok bool
  1397  	stats, ok = segstats[cname]
  1398  	if !ok {
  1399  		stats = &SegStats{
  1400  			IsNumeric: false,
  1401  			Count:     0,
  1402  			Hll:       hyperloglog.New16()}
  1403  
  1404  		segstats[cname] = stats
  1405  	}
  1406  
  1407  	stats.Count++
  1408  	bb.Reset()
  1409  	_, _ = bb.WriteString(strVal)
  1410  	stats.Hll.Insert(bb.B)
  1411  }
  1412  
  1413  func addSegStatsNums(segstats map[string]*SegStats, cname string,
  1414  	inNumType SS_IntUintFloatTypes, intVal int64, uintVal uint64,
  1415  	fltVal float64, numstr string, bb *bbp.ByteBuffer) {
  1416  
  1417  	var stats *SegStats
  1418  	var ok bool
  1419  	stats, ok = segstats[cname]
  1420  	if !ok {
  1421  		numStats := &NumericStats{
  1422  			Min: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
  1423  				IntgrVal: math.MaxInt64,
  1424  				FloatVal: math.MaxFloat64,
  1425  			},
  1426  			Max: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
  1427  				IntgrVal: math.MinInt64,
  1428  				FloatVal: math.SmallestNonzeroFloat64,
  1429  			},
  1430  			Sum: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
  1431  				IntgrVal: 0,
  1432  				FloatVal: 0},
  1433  		}
  1434  		stats = &SegStats{
  1435  			IsNumeric: true,
  1436  			Count:     0,
  1437  			Hll:       hyperloglog.New16(),
  1438  			NumStats:  numStats,
  1439  		}
  1440  		segstats[cname] = stats
  1441  	}
  1442  
  1443  	// prior entries were non numeric, so we should init NumStats, but keep the hll and count vars
  1444  	if stats.NumStats == nil {
  1445  		numStats := &NumericStats{
  1446  			Min: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
  1447  				IntgrVal: math.MaxInt64,
  1448  				FloatVal: math.MaxFloat64,
  1449  			},
  1450  			Max: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
  1451  				IntgrVal: math.MinInt64,
  1452  				FloatVal: math.SmallestNonzeroFloat64,
  1453  			},
  1454  			Sum: NumTypeEnclosure{Ntype: SS_DT_SIGNED_NUM,
  1455  				IntgrVal: 0,
  1456  				FloatVal: 0},
  1457  		}
  1458  		stats.NumStats = numStats
  1459  		stats.IsNumeric = true // TODO: what if we have a mix of numeric and non-numeric
  1460  	}
  1461  
  1462  	bb.Reset()
  1463  	_, _ = bb.WriteString(numstr)
  1464  	stats.Hll.Insert(bb.B)
  1465  	processStats(stats, inNumType, intVal, uintVal, fltVal)
  1466  }
  1467  
  1468  func processStats(stats *SegStats, inNumType SS_IntUintFloatTypes, intVal int64,
  1469  	uintVal uint64, fltVal float64) {
  1470  
  1471  	stats.Count++
  1472  
  1473  	var inIntgrVal int64
  1474  	switch inNumType {
  1475  	case SS_UINT8, SS_UINT16, SS_UINT32, SS_UINT64:
  1476  		inIntgrVal = int64(uintVal)
  1477  	case SS_INT8, SS_INT16, SS_INT32, SS_INT64:
  1478  		inIntgrVal = intVal
  1479  	}
  1480  
  1481  	// we just use the Min stats for stored val comparison but apply the same
  1482  	// logic to max and sum
  1483  	switch inNumType {
  1484  	case SS_FLOAT64:
  1485  		if stats.NumStats.Min.Ntype == SS_DT_FLOAT {
  1486  			// incoming float, stored is float, simple min
  1487  			stats.NumStats.Min.FloatVal = math.Min(stats.NumStats.Min.FloatVal, fltVal)
  1488  			stats.NumStats.Max.FloatVal = math.Max(stats.NumStats.Max.FloatVal, fltVal)
  1489  			stats.NumStats.Sum.FloatVal = stats.NumStats.Sum.FloatVal + fltVal
  1490  		} else {
  1491  			// incoming float, stored is non-float, upgrade it
  1492  			stats.NumStats.Min.FloatVal = math.Min(float64(stats.NumStats.Min.IntgrVal), fltVal)
  1493  			stats.NumStats.Min.Ntype = SS_DT_FLOAT
  1494  
  1495  			stats.NumStats.Max.FloatVal = math.Max(float64(stats.NumStats.Max.IntgrVal), fltVal)
  1496  			stats.NumStats.Max.Ntype = SS_DT_FLOAT
  1497  
  1498  			stats.NumStats.Sum.FloatVal = float64(stats.NumStats.Sum.IntgrVal) + fltVal
  1499  			stats.NumStats.Sum.Ntype = SS_DT_FLOAT
  1500  		}
  1501  	// incoming is NON-float
  1502  	default:
  1503  		if stats.NumStats.Min.Ntype == SS_DT_FLOAT {
  1504  			// incoming non-float, stored is float, cast it
  1505  			stats.NumStats.Min.FloatVal = math.Min(stats.NumStats.Min.FloatVal, float64(inIntgrVal))
  1506  			stats.NumStats.Max.FloatVal = math.Max(stats.NumStats.Max.FloatVal, float64(inIntgrVal))
  1507  			stats.NumStats.Sum.FloatVal = stats.NumStats.Sum.FloatVal + float64(inIntgrVal)
  1508  		} else {
  1509  			// incoming non-float, stored is non-float, simple min
  1510  			stats.NumStats.Min.IntgrVal = utils.MinInt64(stats.NumStats.Min.IntgrVal, inIntgrVal)
  1511  			stats.NumStats.Max.IntgrVal = utils.MaxInt64(stats.NumStats.Max.IntgrVal, inIntgrVal)
  1512  			stats.NumStats.Sum.IntgrVal = stats.NumStats.Sum.IntgrVal + inIntgrVal
  1513  		}
  1514  	}
  1515  
  1516  }
  1517  
  1518  func getColByteSlice(rec []byte, qid uint64) ([]byte, uint16, error) {
  1519  
  1520  	if len(rec) == 0 {
  1521  		return []byte{}, 0, errors.New("column value is empty")
  1522  	}
  1523  
  1524  	var endIdx uint16
  1525  	switch rec[0] {
  1526  
  1527  	case VALTYPE_ENC_SMALL_STRING[0]:
  1528  		strlen := utils.BytesToUint16LittleEndian(rec[1:3])
  1529  		endIdx = strlen + 3
  1530  	case VALTYPE_ENC_BOOL[0], VALTYPE_ENC_INT8[0], VALTYPE_ENC_UINT8[0]:
  1531  		endIdx = 2
  1532  	case VALTYPE_ENC_INT16[0], VALTYPE_ENC_UINT16[0]:
  1533  		endIdx = 3
  1534  	case VALTYPE_ENC_INT32[0], VALTYPE_ENC_UINT32[0]:
  1535  		endIdx = 5
  1536  	case VALTYPE_ENC_INT64[0], VALTYPE_ENC_UINT64[0], VALTYPE_ENC_FLOAT64[0]:
  1537  		endIdx = 9
  1538  	case VALTYPE_ENC_BACKFILL[0]:
  1539  		endIdx = 1
  1540  	default:
  1541  		log.Errorf("qid=%d, getColByteSlice: dont know how to convert type=%v\n", qid, rec[0])
  1542  		return []byte{}, endIdx, errors.New("invalid rec type")
  1543  	}
  1544  
  1545  	return rec[0:endIdx], endIdx, nil
  1546  }