github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/segread/segreader_test.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package segread
    18  
    19  import (
    20  	"fmt"
    21  	"os"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/cespare/xxhash"
    26  	"github.com/siglens/siglens/pkg/config"
    27  	"github.com/siglens/siglens/pkg/segment/reader/microreader"
    28  	"github.com/siglens/siglens/pkg/segment/structs"
    29  	segutils "github.com/siglens/siglens/pkg/segment/utils"
    30  	"github.com/siglens/siglens/pkg/segment/writer"
    31  	"github.com/siglens/siglens/pkg/utils"
    32  	log "github.com/sirupsen/logrus"
    33  	"github.com/stretchr/testify/assert"
    34  )
    35  
    36  func Test_segReader(t *testing.T) {
    37  
    38  	segDir := "data/"
    39  	_ = os.MkdirAll(segDir, 0755)
    40  	segKey := segDir + "test"
    41  	numBlocks := 10
    42  	numEntriesInBlock := 10
    43  	_, bsm, _, cols, blockmeta, _ := writer.WriteMockColSegFile(segKey, numBlocks, numEntriesInBlock)
    44  
    45  	assert.Greater(t, len(cols), 1)
    46  	var queryCol string
    47  
    48  	// test across multiple columns types
    49  	for queryCol = range cols {
    50  		if queryCol == config.GetTimeStampKey() {
    51  			continue // ingore ts
    52  		}
    53  		fileName := fmt.Sprintf("%s_%v.csg", segKey, xxhash.Sum64String(queryCol))
    54  
    55  		log.Infof("testing with %s", fileName)
    56  		fd, err := os.Open(fileName)
    57  		assert.NoError(t, err)
    58  		sReader, err := InitNewSegFileReader(fd, queryCol, blockmeta, 0, bsm)
    59  		assert.Nil(t, err)
    60  
    61  		// invalid block
    62  		_, err = sReader.ReadRecordFromBlock(uint16(numBlocks), uint16(numEntriesInBlock))
    63  		assert.NotNil(t, err)
    64  		// correct block, incorrect recordNum
    65  		_, err = sReader.ReadRecordFromBlock(0, uint16(numEntriesInBlock))
    66  		assert.NotNil(t, err, "col %s should not have %+v entries", queryCol, numEntriesInBlock+1)
    67  
    68  		// correct block, correct recordNum
    69  		arr, err := sReader.ReadRecordFromBlock(0, uint16(numEntriesInBlock-3))
    70  		assert.Nil(t, err)
    71  		assert.NotNil(t, arr)
    72  		cVal, _, err := writer.GetCvalFromRec(arr, 23)
    73  		assert.Nil(t, err)
    74  		assert.NotNil(t, cVal)
    75  		log.Infof("GetCvalFromRec: %+v for column %s", cVal, queryCol)
    76  
    77  		err = sReader.Close()
    78  		assert.Nil(t, err)
    79  	}
    80  
    81  	os.RemoveAll(segDir)
    82  }
    83  
    84  func Test_timeReader(t *testing.T) {
    85  
    86  	config.InitializeTestingConfig()
    87  	segDir := "data/"
    88  	_ = os.MkdirAll(segDir, 0755)
    89  	segKey := segDir + "test-time"
    90  	numBlocks := 10
    91  	numEntriesInBlock := 10
    92  	_, bSum, _, cols, blockmeta, _ := writer.WriteMockColSegFile(segKey, numBlocks, numEntriesInBlock)
    93  
    94  	assert.Greater(t, len(cols), 1)
    95  	timeReader, err := InitNewTimeReaderFromBlockSummaries(segKey, config.GetTimeStampKey(), blockmeta, bSum, 0)
    96  	assert.Nil(t, err)
    97  
    98  	// test across multiple columns types
    99  	for blockNum := 0; blockNum < numBlocks; blockNum++ {
   100  		currRecs, err := timeReader.GetAllTimeStampsForBlock(uint16(blockNum))
   101  		assert.Nil(t, err)
   102  		assert.Len(t, currRecs, numEntriesInBlock)
   103  
   104  		startTs := uint64(1)
   105  		for _, readTs := range currRecs {
   106  			assert.Equal(t, startTs, readTs)
   107  			startTs++
   108  		}
   109  	}
   110  	os.RemoveAll(segDir)
   111  }
   112  
   113  func Benchmark_readColumnarFile(b *testing.B) {
   114  	segKey := "/Users/ssubramanian/Desktop/SigLens/siglens/data/Sris-MacBook-Pro.local/final/2022/02/21/01/valtix2/10005995996882630313/0"
   115  	sumFile := structs.GetBsuFnameFromSegKey(segKey)
   116  
   117  	numRecsPerBlock := make(map[uint16]uint16)
   118  	maxRecReadInBlock := make(map[uint16]uint16)
   119  	blockSums, allBlockInfo, _, err := microreader.ReadBlockSummaries(sumFile, []byte{})
   120  	assert.Nil(b, err)
   121  
   122  	for idx, bSum := range blockSums {
   123  		numRecsPerBlock[uint16(idx)] = bSum.RecCount
   124  	}
   125  
   126  	colName := "device_type"
   127  
   128  	colCSG := fmt.Sprintf("%s_%v.csg", segKey, xxhash.Sum64String(colName))
   129  	fd, err := os.Open(colCSG)
   130  	assert.NoError(b, err)
   131  	fileReader, err := InitNewSegFileReader(fd, colName, allBlockInfo, 0, blockSums)
   132  	assert.Nil(b, err)
   133  
   134  	b.ResetTimer()
   135  	failedBlocks := make(map[uint16]bool)
   136  
   137  	sTime := time.Now()
   138  	numRead := 0
   139  	for blkNum := range allBlockInfo {
   140  		for i := uint16(0); i < numRecsPerBlock[blkNum]; i++ {
   141  			rawRec, err := fileReader.ReadRecordFromBlock(blkNum, i)
   142  			numRead++
   143  			assert.Nil(b, err)
   144  			assert.NotNil(b, rawRec)
   145  			if err != nil {
   146  				log.Errorf("Failed to read rec %+d from block %d: %v", i, blkNum, err)
   147  				failedBlocks[blkNum] = true
   148  				break
   149  			}
   150  			maxRecReadInBlock[blkNum] = i
   151  		}
   152  	}
   153  
   154  	log.Infof("Read %+v records in %v", numRead, time.Since(sTime))
   155  	err = fileReader.Close()
   156  	assert.Nil(b, err)
   157  }
   158  
   159  func Test_packUnpackDictEnc(t *testing.T) {
   160  
   161  	colWip := &writer.ColWip{}
   162  	deCount := uint16(100)
   163  
   164  	deMap := make(map[string][]uint16)
   165  	recCounts := uint16(100)
   166  
   167  	allBlockSummaries := make([]*structs.BlockSummary, 1)
   168  	allBlockSummaries[0] = &structs.BlockSummary{RecCount: recCounts}
   169  
   170  	cname := "muycname"
   171  	sfr := &SegmentFileReader{
   172  		blockSummaries: allBlockSummaries,
   173  		deTlv:          make([][]byte, 0),
   174  		deRecToTlv:     make([]uint16, 0),
   175  		currBlockNum:   0,
   176  		ColName:        cname,
   177  	}
   178  
   179  	recNum := uint16(0)
   180  	for dwIdx := uint16(0); dwIdx < deCount; dwIdx++ {
   181  
   182  		cval := fmt.Sprintf("mycval-%v", dwIdx)
   183  		cvalBytes := make([]byte, 3+len(cval))
   184  		cvalBytes[0] = segutils.VALTYPE_ENC_SMALL_STRING[0]
   185  		copy(cvalBytes[1:], utils.Uint16ToBytesLittleEndian(uint16(len(cval))))
   186  		copy(cvalBytes[3:], cval)
   187  
   188  		arr := make([]uint16, recCounts/deCount)
   189  		deMap[string(cvalBytes)] = arr
   190  		for rn := uint16(0); rn < recCounts/deCount; rn++ {
   191  			arr[rn] = recNum + rn
   192  		}
   193  		recNum += recCounts / deCount
   194  	}
   195  
   196  	colWip.SetDeCount(deCount)
   197  	colWip.SetDeMap(deMap)
   198  
   199  	writer.PackDictEnc(colWip)
   200  	buf, idx := colWip.GetBufAndIdx()
   201  
   202  	err := sfr.readDictEnc(buf[0:idx], 0)
   203  	assert.Nil(t, err)
   204  
   205  	orderedRecNums := make([]uint16, recCounts)
   206  	for i := uint16(0); i < recCounts; i++ {
   207  		orderedRecNums[i] = i
   208  	}
   209  
   210  	results := make(map[uint16]map[string]interface{})
   211  	_ = sfr.deToResults(results, orderedRecNums)
   212  
   213  	for rn, val := range results {
   214  		dWord := val[cname]
   215  		expected := fmt.Sprintf("mycval-%v", rn)
   216  		assert.Equal(t, dWord, expected)
   217  	}
   218  }