github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/metadata/segmentmicroindex.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metadata
    18  
    19  import (
    20  	"fmt"
    21  	"io"
    22  	"os"
    23  	"strings"
    24  
    25  	"github.com/cespare/xxhash"
    26  	blob "github.com/siglens/siglens/pkg/blob"
    27  	"github.com/siglens/siglens/pkg/config"
    28  	"github.com/siglens/siglens/pkg/segment/reader/microreader"
    29  	"github.com/siglens/siglens/pkg/segment/structs"
    30  	"github.com/siglens/siglens/pkg/segment/utils"
    31  	toputils "github.com/siglens/siglens/pkg/utils"
    32  	log "github.com/sirupsen/logrus"
    33  )
    34  
    35  // Top level segment metadata for access of cmis/search metadata
    36  type SegmentMicroIndex struct {
    37  	structs.SegMeta
    38  	SegmentMicroIndices
    39  	SegmentSearchMetadata
    40  	// Any time you add an element here, make sure you adjust mergeSegmentMicroIndex
    41  }
    42  
    43  // Holder structure for just the segment microindices
    44  type SegmentMicroIndices struct {
    45  	blockCmis          []map[string]*structs.CmiContainer
    46  	MicroIndexSize     uint64
    47  	loadedMicroIndices bool
    48  }
    49  
    50  // Holder structure for just the segment search metadata (blk summaries & blockSearchInfo)
    51  type SegmentSearchMetadata struct {
    52  	BlockSummaries       []*structs.BlockSummary
    53  	BlockSearchInfo      map[uint16]*structs.BlockMetadataHolder
    54  	SearchMetadataSize   uint64
    55  	loadedSearchMetadata bool
    56  }
    57  
    58  func InitSegmentMicroIndex(segMetaInfo *structs.SegMeta) *SegmentMicroIndex {
    59  
    60  	sm := &SegmentMicroIndex{
    61  		SegMeta: *segMetaInfo,
    62  	}
    63  	sm.loadedMicroIndices = false
    64  	sm.loadedSearchMetadata = false
    65  	sm.initMetadataSize()
    66  	return sm
    67  }
    68  
    69  // Initializes sm.searchMetadaSize and sm.microIndexSize values
    70  func (sm *SegmentMicroIndex) initMetadataSize() {
    71  	searchMetadataSize := uint64(0)
    72  	searchMetadataSize += uint64(sm.NumBlocks * structs.SIZE_OF_BSUM) // block summaries
    73  	// for values of the BlockMetadataHolder
    74  	searchMetadataSize += uint64(sm.NumBlocks * uint16(len(sm.ColumnNames)) * structs.SIZE_OF_BlockInfo)
    75  	// for keys of BlockMetadataHolder
    76  	// 2 ==> two maps, 10 ==> avg colnamesize
    77  	searchMetadataSize += uint64(sm.NumBlocks) * 2 * 10 * uint64(len(sm.ColumnNames))
    78  
    79  	sm.SearchMetadataSize = searchMetadataSize
    80  
    81  	microIndexSize := uint64(0)
    82  	for _, colSizeInfo := range sm.ColumnNames {
    83  		microIndexSize += colSizeInfo.CmiSize
    84  	}
    85  	sm.MicroIndexSize = microIndexSize
    86  }
    87  
    88  func (ssm *SegmentSearchMetadata) clearSearchMetadata() {
    89  	ssm.BlockSearchInfo = nil
    90  	ssm.BlockSummaries = nil
    91  	ssm.loadedSearchMetadata = false
    92  }
    93  
    94  func (smi *SegmentMicroIndices) clearMicroIndices() {
    95  	smi.blockCmis = nil
    96  	smi.loadedMicroIndices = false
    97  }
    98  
    99  // Returns all columnar cmis for a given block or any errors encountered
   100  func (smi *SegmentMicroIndices) GetCMIsForBlock(blkNum uint16) (map[string]*structs.CmiContainer, error) {
   101  	if int(blkNum) >= len(smi.blockCmis) {
   102  		return nil, fmt.Errorf("blkNum %+v does not exist", blkNum)
   103  	}
   104  	cmis := smi.blockCmis[blkNum]
   105  	return cmis, nil
   106  }
   107  
   108  // Returns the cmi for a given block & column, or any errors encountered
   109  func (smi *SegmentMicroIndices) GetCMIForBlockAndColumn(blkNum uint16, cname string) (*structs.CmiContainer, error) {
   110  	allCmis, err := smi.GetCMIsForBlock(blkNum)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  	retVal, ok := allCmis[cname]
   115  	if !ok {
   116  		return nil, fmt.Errorf("Failed to find column %+v in cmis for block %+v", cname, blkNum)
   117  	}
   118  	return retVal, nil
   119  }
   120  
   121  func (sm *SegmentMicroIndex) LoadSearchMetadata(rbuf []byte) ([]byte, error) {
   122  	if sm.loadedSearchMetadata {
   123  		return rbuf, nil
   124  	}
   125  	retbuf, blockSum, allBmh, err := sm.readBlockSummaries(rbuf)
   126  	if err != nil {
   127  		sm.clearSearchMetadata()
   128  		return rbuf, err
   129  	}
   130  	sm.loadedSearchMetadata = true
   131  	sm.BlockSummaries = blockSum
   132  	sm.BlockSearchInfo = allBmh
   133  	return retbuf, nil
   134  }
   135  
   136  func (sm *SegmentMicroIndex) readBlockSummaries(rbuf []byte) ([]byte, []*structs.BlockSummary,
   137  	map[uint16]*structs.BlockMetadataHolder, error) {
   138  
   139  	bsfname := structs.GetBsuFnameFromSegKey(sm.SegmentKey)
   140  	blockSum, allBmh, retbuf, err := microreader.ReadBlockSummaries(bsfname, rbuf)
   141  	if err != nil {
   142  		log.Errorf("Failed to read block summary file: %v, err:%+v", bsfname, err)
   143  		return rbuf, blockSum, allBmh, err
   144  	}
   145  	return retbuf, blockSum, allBmh, nil
   146  }
   147  
   148  func (sm *SegmentMicroIndex) loadMicroIndices(blocksToLoad map[uint16]map[string]bool, allBlocks bool, colsToCheck map[string]bool, wildcardCol bool) error {
   149  	blkCmis, err := sm.readCmis(blocksToLoad, allBlocks, colsToCheck, wildcardCol)
   150  	if err != nil {
   151  		sm.clearMicroIndices()
   152  		return err
   153  	}
   154  	sm.loadedMicroIndices = true
   155  	sm.blockCmis = blkCmis
   156  	return nil
   157  }
   158  
   159  func (sm *SegmentMicroIndex) readCmis(blocksToLoad map[uint16]map[string]bool, allBlocks bool,
   160  	colsToCheck map[string]bool, wildcardCol bool) ([]map[string]*structs.CmiContainer, error) {
   161  
   162  	if strings.Contains(sm.VirtualTableName, ".kibana") {
   163  		// no error bc kibana does not generate any CMIs
   164  		return []map[string]*structs.CmiContainer{}, nil
   165  	}
   166  	var allCols map[string]bool
   167  	if wildcardCol {
   168  		allCols = sm.getColumns()
   169  	} else {
   170  		allCols = colsToCheck
   171  	}
   172  
   173  	blkCmis := make([]map[string]*structs.CmiContainer, INITIAL_NUM_BLOCKS)
   174  	for i := uint16(0); i < INITIAL_NUM_BLOCKS; i += 1 {
   175  		blkCmis[i] = make(map[string]*structs.CmiContainer)
   176  	}
   177  	bb := make([]byte, utils.LEN_BLOCK_CMI_SIZE+utils.LEN_BLKNUM_CMI_SIZE) // for cmilen (4) and blkNum (2)
   178  	cmbuf := make([]byte, 0)
   179  
   180  	bulkDownloadFiles := make(map[string]string)
   181  	var fName string
   182  	for cname := range allCols {
   183  		// timestamp, _type and _index col have no cmi
   184  		if cname == config.GetTimeStampKey() || cname == "_type" || cname == "_index" {
   185  			continue
   186  		}
   187  		if cname == "" {
   188  			return nil, fmt.Errorf("readCmis: unknown seg set col")
   189  		} else {
   190  			fName = fmt.Sprintf("%v_%v.cmi", sm.SegmentKey, xxhash.Sum64String(cname))
   191  		}
   192  		bulkDownloadFiles[fName] = cname
   193  	}
   194  	err := blob.BulkDownloadSegmentBlob(bulkDownloadFiles, false)
   195  	if err != nil {
   196  		log.Errorf("readCmis: failed to bulk download seg files. err=%v", err)
   197  		return nil, err
   198  	}
   199  
   200  	for fName, cname := range bulkDownloadFiles {
   201  		fd, err := os.OpenFile(fName, os.O_RDONLY, 0644)
   202  		if err != nil {
   203  			log.Errorf("readCmis: open failed cname=%v, fname=%v, err=[%v], continuing with rest", cname, fName, err)
   204  			continue
   205  		}
   206  		defer fd.Close()
   207  
   208  		offset := int64(0)
   209  		for {
   210  			_, err = fd.ReadAt(bb, offset)
   211  			if err != nil {
   212  				if err != io.EOF {
   213  					log.Errorf("readCmis: failed to read cmilen err=[%+v], continuing with rest cmis", err)
   214  					break
   215  				}
   216  				break
   217  			}
   218  			offset += utils.LEN_BLOCK_CMI_SIZE + utils.LEN_BLKNUM_CMI_SIZE // for cmilenHolder (4) and blkNum (2)
   219  			cmilen := toputils.BytesToUint32LittleEndian(bb[0:utils.LEN_BLOCK_CMI_SIZE])
   220  			cmilen -= utils.LEN_BLKNUM_CMI_SIZE // for the blkNum(2)
   221  			if bufflen := uint32(len(cmbuf)); bufflen < cmilen {
   222  				newSlice := make([]byte, cmilen-bufflen)
   223  				cmbuf = append(cmbuf, newSlice...)
   224  			}
   225  
   226  			blkNum := toputils.BytesToUint16LittleEndian(bb[utils.LEN_BLOCK_CMI_SIZE:])
   227  			if _, shouldLoad := blocksToLoad[blkNum]; allBlocks || shouldLoad {
   228  				_, err = fd.ReadAt(cmbuf[:cmilen], offset)
   229  				if err != nil {
   230  					if err != io.EOF {
   231  						log.Errorf("readCmis: failed to read cmi err=[%+v], continuing with rest cmis", err)
   232  						break
   233  					}
   234  					break
   235  				}
   236  
   237  				cmic, err := getCmi(cmbuf[:cmilen])
   238  				if err != nil {
   239  					log.Errorf("readCmis: failed to convert CMI, err=[%v], continuing with rest cmis", err)
   240  					break
   241  				}
   242  				if intBlkNum := int(blkNum); len(blkCmis) <= intBlkNum {
   243  					numToAdd := intBlkNum
   244  					newArrEntry := make([]map[string]*structs.CmiContainer, numToAdd)
   245  					for i := 0; i < numToAdd; i++ {
   246  						newArrEntry[i] = make(map[string]*structs.CmiContainer)
   247  					}
   248  					blkCmis = append(blkCmis, newArrEntry...)
   249  				}
   250  				blkCmis[blkNum][cname] = cmic
   251  			}
   252  			offset += int64(cmilen)
   253  		}
   254  	}
   255  	return blkCmis, nil
   256  }
   257  
   258  func (sm *SegmentMicroIndex) getColumns() map[string]bool {
   259  	retVal := make(map[string]bool, len(sm.ColumnNames))
   260  	for k := range sm.ColumnNames {
   261  		retVal[k] = true
   262  	}
   263  	return retVal
   264  }