github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/metadata/segmentmicroindex.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metadata 18 19 import ( 20 "fmt" 21 "io" 22 "os" 23 "strings" 24 25 "github.com/cespare/xxhash" 26 blob "github.com/siglens/siglens/pkg/blob" 27 "github.com/siglens/siglens/pkg/config" 28 "github.com/siglens/siglens/pkg/segment/reader/microreader" 29 "github.com/siglens/siglens/pkg/segment/structs" 30 "github.com/siglens/siglens/pkg/segment/utils" 31 toputils "github.com/siglens/siglens/pkg/utils" 32 log "github.com/sirupsen/logrus" 33 ) 34 35 // Top level segment metadata for access of cmis/search metadata 36 type SegmentMicroIndex struct { 37 structs.SegMeta 38 SegmentMicroIndices 39 SegmentSearchMetadata 40 // Any time you add an element here, make sure you adjust mergeSegmentMicroIndex 41 } 42 43 // Holder structure for just the segment microindices 44 type SegmentMicroIndices struct { 45 blockCmis []map[string]*structs.CmiContainer 46 MicroIndexSize uint64 47 loadedMicroIndices bool 48 } 49 50 // Holder structure for just the segment search metadata (blk summaries & blockSearchInfo) 51 type SegmentSearchMetadata struct { 52 BlockSummaries []*structs.BlockSummary 53 BlockSearchInfo map[uint16]*structs.BlockMetadataHolder 54 SearchMetadataSize uint64 55 loadedSearchMetadata bool 56 } 57 58 func InitSegmentMicroIndex(segMetaInfo *structs.SegMeta) *SegmentMicroIndex { 59 60 sm := &SegmentMicroIndex{ 61 SegMeta: *segMetaInfo, 62 } 63 sm.loadedMicroIndices = false 64 sm.loadedSearchMetadata = false 65 sm.initMetadataSize() 66 return sm 67 } 68 69 // Initializes sm.searchMetadaSize and sm.microIndexSize values 70 func (sm *SegmentMicroIndex) initMetadataSize() { 71 searchMetadataSize := uint64(0) 72 searchMetadataSize += uint64(sm.NumBlocks * structs.SIZE_OF_BSUM) // block summaries 73 // for values of the BlockMetadataHolder 74 searchMetadataSize += uint64(sm.NumBlocks * uint16(len(sm.ColumnNames)) * structs.SIZE_OF_BlockInfo) 75 // for keys of BlockMetadataHolder 76 // 2 ==> two maps, 10 ==> avg colnamesize 77 searchMetadataSize += uint64(sm.NumBlocks) * 2 * 10 * uint64(len(sm.ColumnNames)) 78 79 sm.SearchMetadataSize = searchMetadataSize 80 81 microIndexSize := uint64(0) 82 for _, colSizeInfo := range sm.ColumnNames { 83 microIndexSize += colSizeInfo.CmiSize 84 } 85 sm.MicroIndexSize = microIndexSize 86 } 87 88 func (ssm *SegmentSearchMetadata) clearSearchMetadata() { 89 ssm.BlockSearchInfo = nil 90 ssm.BlockSummaries = nil 91 ssm.loadedSearchMetadata = false 92 } 93 94 func (smi *SegmentMicroIndices) clearMicroIndices() { 95 smi.blockCmis = nil 96 smi.loadedMicroIndices = false 97 } 98 99 // Returns all columnar cmis for a given block or any errors encountered 100 func (smi *SegmentMicroIndices) GetCMIsForBlock(blkNum uint16) (map[string]*structs.CmiContainer, error) { 101 if int(blkNum) >= len(smi.blockCmis) { 102 return nil, fmt.Errorf("blkNum %+v does not exist", blkNum) 103 } 104 cmis := smi.blockCmis[blkNum] 105 return cmis, nil 106 } 107 108 // Returns the cmi for a given block & column, or any errors encountered 109 func (smi *SegmentMicroIndices) GetCMIForBlockAndColumn(blkNum uint16, cname string) (*structs.CmiContainer, error) { 110 allCmis, err := smi.GetCMIsForBlock(blkNum) 111 if err != nil { 112 return nil, err 113 } 114 retVal, ok := allCmis[cname] 115 if !ok { 116 return nil, fmt.Errorf("Failed to find column %+v in cmis for block %+v", cname, blkNum) 117 } 118 return retVal, nil 119 } 120 121 func (sm *SegmentMicroIndex) LoadSearchMetadata(rbuf []byte) ([]byte, error) { 122 if sm.loadedSearchMetadata { 123 return rbuf, nil 124 } 125 retbuf, blockSum, allBmh, err := sm.readBlockSummaries(rbuf) 126 if err != nil { 127 sm.clearSearchMetadata() 128 return rbuf, err 129 } 130 sm.loadedSearchMetadata = true 131 sm.BlockSummaries = blockSum 132 sm.BlockSearchInfo = allBmh 133 return retbuf, nil 134 } 135 136 func (sm *SegmentMicroIndex) readBlockSummaries(rbuf []byte) ([]byte, []*structs.BlockSummary, 137 map[uint16]*structs.BlockMetadataHolder, error) { 138 139 bsfname := structs.GetBsuFnameFromSegKey(sm.SegmentKey) 140 blockSum, allBmh, retbuf, err := microreader.ReadBlockSummaries(bsfname, rbuf) 141 if err != nil { 142 log.Errorf("Failed to read block summary file: %v, err:%+v", bsfname, err) 143 return rbuf, blockSum, allBmh, err 144 } 145 return retbuf, blockSum, allBmh, nil 146 } 147 148 func (sm *SegmentMicroIndex) loadMicroIndices(blocksToLoad map[uint16]map[string]bool, allBlocks bool, colsToCheck map[string]bool, wildcardCol bool) error { 149 blkCmis, err := sm.readCmis(blocksToLoad, allBlocks, colsToCheck, wildcardCol) 150 if err != nil { 151 sm.clearMicroIndices() 152 return err 153 } 154 sm.loadedMicroIndices = true 155 sm.blockCmis = blkCmis 156 return nil 157 } 158 159 func (sm *SegmentMicroIndex) readCmis(blocksToLoad map[uint16]map[string]bool, allBlocks bool, 160 colsToCheck map[string]bool, wildcardCol bool) ([]map[string]*structs.CmiContainer, error) { 161 162 if strings.Contains(sm.VirtualTableName, ".kibana") { 163 // no error bc kibana does not generate any CMIs 164 return []map[string]*structs.CmiContainer{}, nil 165 } 166 var allCols map[string]bool 167 if wildcardCol { 168 allCols = sm.getColumns() 169 } else { 170 allCols = colsToCheck 171 } 172 173 blkCmis := make([]map[string]*structs.CmiContainer, INITIAL_NUM_BLOCKS) 174 for i := uint16(0); i < INITIAL_NUM_BLOCKS; i += 1 { 175 blkCmis[i] = make(map[string]*structs.CmiContainer) 176 } 177 bb := make([]byte, utils.LEN_BLOCK_CMI_SIZE+utils.LEN_BLKNUM_CMI_SIZE) // for cmilen (4) and blkNum (2) 178 cmbuf := make([]byte, 0) 179 180 bulkDownloadFiles := make(map[string]string) 181 var fName string 182 for cname := range allCols { 183 // timestamp, _type and _index col have no cmi 184 if cname == config.GetTimeStampKey() || cname == "_type" || cname == "_index" { 185 continue 186 } 187 if cname == "" { 188 return nil, fmt.Errorf("readCmis: unknown seg set col") 189 } else { 190 fName = fmt.Sprintf("%v_%v.cmi", sm.SegmentKey, xxhash.Sum64String(cname)) 191 } 192 bulkDownloadFiles[fName] = cname 193 } 194 err := blob.BulkDownloadSegmentBlob(bulkDownloadFiles, false) 195 if err != nil { 196 log.Errorf("readCmis: failed to bulk download seg files. err=%v", err) 197 return nil, err 198 } 199 200 for fName, cname := range bulkDownloadFiles { 201 fd, err := os.OpenFile(fName, os.O_RDONLY, 0644) 202 if err != nil { 203 log.Errorf("readCmis: open failed cname=%v, fname=%v, err=[%v], continuing with rest", cname, fName, err) 204 continue 205 } 206 defer fd.Close() 207 208 offset := int64(0) 209 for { 210 _, err = fd.ReadAt(bb, offset) 211 if err != nil { 212 if err != io.EOF { 213 log.Errorf("readCmis: failed to read cmilen err=[%+v], continuing with rest cmis", err) 214 break 215 } 216 break 217 } 218 offset += utils.LEN_BLOCK_CMI_SIZE + utils.LEN_BLKNUM_CMI_SIZE // for cmilenHolder (4) and blkNum (2) 219 cmilen := toputils.BytesToUint32LittleEndian(bb[0:utils.LEN_BLOCK_CMI_SIZE]) 220 cmilen -= utils.LEN_BLKNUM_CMI_SIZE // for the blkNum(2) 221 if bufflen := uint32(len(cmbuf)); bufflen < cmilen { 222 newSlice := make([]byte, cmilen-bufflen) 223 cmbuf = append(cmbuf, newSlice...) 224 } 225 226 blkNum := toputils.BytesToUint16LittleEndian(bb[utils.LEN_BLOCK_CMI_SIZE:]) 227 if _, shouldLoad := blocksToLoad[blkNum]; allBlocks || shouldLoad { 228 _, err = fd.ReadAt(cmbuf[:cmilen], offset) 229 if err != nil { 230 if err != io.EOF { 231 log.Errorf("readCmis: failed to read cmi err=[%+v], continuing with rest cmis", err) 232 break 233 } 234 break 235 } 236 237 cmic, err := getCmi(cmbuf[:cmilen]) 238 if err != nil { 239 log.Errorf("readCmis: failed to convert CMI, err=[%v], continuing with rest cmis", err) 240 break 241 } 242 if intBlkNum := int(blkNum); len(blkCmis) <= intBlkNum { 243 numToAdd := intBlkNum 244 newArrEntry := make([]map[string]*structs.CmiContainer, numToAdd) 245 for i := 0; i < numToAdd; i++ { 246 newArrEntry[i] = make(map[string]*structs.CmiContainer) 247 } 248 blkCmis = append(blkCmis, newArrEntry...) 249 } 250 blkCmis[blkNum][cname] = cmic 251 } 252 offset += int64(cmilen) 253 } 254 } 255 return blkCmis, nil 256 } 257 258 func (sm *SegmentMicroIndex) getColumns() map[string]bool { 259 retVal := make(map[string]bool, len(sm.ColumnNames)) 260 for k := range sm.ColumnNames { 261 retVal[k] = true 262 } 263 return retVal 264 }