github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/segread/segreader_test.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package segread 18 19 import ( 20 "fmt" 21 "os" 22 "testing" 23 "time" 24 25 "github.com/cespare/xxhash" 26 "github.com/siglens/siglens/pkg/config" 27 "github.com/siglens/siglens/pkg/segment/reader/microreader" 28 "github.com/siglens/siglens/pkg/segment/structs" 29 segutils "github.com/siglens/siglens/pkg/segment/utils" 30 "github.com/siglens/siglens/pkg/segment/writer" 31 "github.com/siglens/siglens/pkg/utils" 32 log "github.com/sirupsen/logrus" 33 "github.com/stretchr/testify/assert" 34 ) 35 36 func Test_segReader(t *testing.T) { 37 38 segDir := "data/" 39 _ = os.MkdirAll(segDir, 0755) 40 segKey := segDir + "test" 41 numBlocks := 10 42 numEntriesInBlock := 10 43 _, bsm, _, cols, blockmeta, _ := writer.WriteMockColSegFile(segKey, numBlocks, numEntriesInBlock) 44 45 assert.Greater(t, len(cols), 1) 46 var queryCol string 47 48 // test across multiple columns types 49 for queryCol = range cols { 50 if queryCol == config.GetTimeStampKey() { 51 continue // ingore ts 52 } 53 fileName := fmt.Sprintf("%s_%v.csg", segKey, xxhash.Sum64String(queryCol)) 54 55 log.Infof("testing with %s", fileName) 56 fd, err := os.Open(fileName) 57 assert.NoError(t, err) 58 sReader, err := InitNewSegFileReader(fd, queryCol, blockmeta, 0, bsm) 59 assert.Nil(t, err) 60 61 // invalid block 62 _, err = sReader.ReadRecordFromBlock(uint16(numBlocks), uint16(numEntriesInBlock)) 63 assert.NotNil(t, err) 64 // correct block, incorrect recordNum 65 _, err = sReader.ReadRecordFromBlock(0, uint16(numEntriesInBlock)) 66 assert.NotNil(t, err, "col %s should not have %+v entries", queryCol, numEntriesInBlock+1) 67 68 // correct block, correct recordNum 69 arr, err := sReader.ReadRecordFromBlock(0, uint16(numEntriesInBlock-3)) 70 assert.Nil(t, err) 71 assert.NotNil(t, arr) 72 cVal, _, err := writer.GetCvalFromRec(arr, 23) 73 assert.Nil(t, err) 74 assert.NotNil(t, cVal) 75 log.Infof("GetCvalFromRec: %+v for column %s", cVal, queryCol) 76 77 err = sReader.Close() 78 assert.Nil(t, err) 79 } 80 81 os.RemoveAll(segDir) 82 } 83 84 func Test_timeReader(t *testing.T) { 85 86 config.InitializeTestingConfig() 87 segDir := "data/" 88 _ = os.MkdirAll(segDir, 0755) 89 segKey := segDir + "test-time" 90 numBlocks := 10 91 numEntriesInBlock := 10 92 _, bSum, _, cols, blockmeta, _ := writer.WriteMockColSegFile(segKey, numBlocks, numEntriesInBlock) 93 94 assert.Greater(t, len(cols), 1) 95 timeReader, err := InitNewTimeReaderFromBlockSummaries(segKey, config.GetTimeStampKey(), blockmeta, bSum, 0) 96 assert.Nil(t, err) 97 98 // test across multiple columns types 99 for blockNum := 0; blockNum < numBlocks; blockNum++ { 100 currRecs, err := timeReader.GetAllTimeStampsForBlock(uint16(blockNum)) 101 assert.Nil(t, err) 102 assert.Len(t, currRecs, numEntriesInBlock) 103 104 startTs := uint64(1) 105 for _, readTs := range currRecs { 106 assert.Equal(t, startTs, readTs) 107 startTs++ 108 } 109 } 110 os.RemoveAll(segDir) 111 } 112 113 func Benchmark_readColumnarFile(b *testing.B) { 114 segKey := "/Users/ssubramanian/Desktop/SigLens/siglens/data/Sris-MacBook-Pro.local/final/2022/02/21/01/valtix2/10005995996882630313/0" 115 sumFile := structs.GetBsuFnameFromSegKey(segKey) 116 117 numRecsPerBlock := make(map[uint16]uint16) 118 maxRecReadInBlock := make(map[uint16]uint16) 119 blockSums, allBlockInfo, _, err := microreader.ReadBlockSummaries(sumFile, []byte{}) 120 assert.Nil(b, err) 121 122 for idx, bSum := range blockSums { 123 numRecsPerBlock[uint16(idx)] = bSum.RecCount 124 } 125 126 colName := "device_type" 127 128 colCSG := fmt.Sprintf("%s_%v.csg", segKey, xxhash.Sum64String(colName)) 129 fd, err := os.Open(colCSG) 130 assert.NoError(b, err) 131 fileReader, err := InitNewSegFileReader(fd, colName, allBlockInfo, 0, blockSums) 132 assert.Nil(b, err) 133 134 b.ResetTimer() 135 failedBlocks := make(map[uint16]bool) 136 137 sTime := time.Now() 138 numRead := 0 139 for blkNum := range allBlockInfo { 140 for i := uint16(0); i < numRecsPerBlock[blkNum]; i++ { 141 rawRec, err := fileReader.ReadRecordFromBlock(blkNum, i) 142 numRead++ 143 assert.Nil(b, err) 144 assert.NotNil(b, rawRec) 145 if err != nil { 146 log.Errorf("Failed to read rec %+d from block %d: %v", i, blkNum, err) 147 failedBlocks[blkNum] = true 148 break 149 } 150 maxRecReadInBlock[blkNum] = i 151 } 152 } 153 154 log.Infof("Read %+v records in %v", numRead, time.Since(sTime)) 155 err = fileReader.Close() 156 assert.Nil(b, err) 157 } 158 159 func Test_packUnpackDictEnc(t *testing.T) { 160 161 colWip := &writer.ColWip{} 162 deCount := uint16(100) 163 164 deMap := make(map[string][]uint16) 165 recCounts := uint16(100) 166 167 allBlockSummaries := make([]*structs.BlockSummary, 1) 168 allBlockSummaries[0] = &structs.BlockSummary{RecCount: recCounts} 169 170 cname := "muycname" 171 sfr := &SegmentFileReader{ 172 blockSummaries: allBlockSummaries, 173 deTlv: make([][]byte, 0), 174 deRecToTlv: make([]uint16, 0), 175 currBlockNum: 0, 176 ColName: cname, 177 } 178 179 recNum := uint16(0) 180 for dwIdx := uint16(0); dwIdx < deCount; dwIdx++ { 181 182 cval := fmt.Sprintf("mycval-%v", dwIdx) 183 cvalBytes := make([]byte, 3+len(cval)) 184 cvalBytes[0] = segutils.VALTYPE_ENC_SMALL_STRING[0] 185 copy(cvalBytes[1:], utils.Uint16ToBytesLittleEndian(uint16(len(cval)))) 186 copy(cvalBytes[3:], cval) 187 188 arr := make([]uint16, recCounts/deCount) 189 deMap[string(cvalBytes)] = arr 190 for rn := uint16(0); rn < recCounts/deCount; rn++ { 191 arr[rn] = recNum + rn 192 } 193 recNum += recCounts / deCount 194 } 195 196 colWip.SetDeCount(deCount) 197 colWip.SetDeMap(deMap) 198 199 writer.PackDictEnc(colWip) 200 buf, idx := colWip.GetBufAndIdx() 201 202 err := sfr.readDictEnc(buf[0:idx], 0) 203 assert.Nil(t, err) 204 205 orderedRecNums := make([]uint16, recCounts) 206 for i := uint16(0); i < recCounts; i++ { 207 orderedRecNums[i] = i 208 } 209 210 results := make(map[uint16]map[string]interface{}) 211 _ = sfr.deToResults(results, orderedRecNums) 212 213 for rn, val := range results { 214 dWord := val[cname] 215 expected := fmt.Sprintf("mycval-%v", rn) 216 assert.Equal(t, dWord, expected) 217 } 218 }