github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/metadata/blockmeta.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metadata
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  
    23  	dtu "github.com/siglens/siglens/pkg/common/dtypeutils"
    24  	"github.com/siglens/siglens/pkg/segment/pqmr"
    25  	"github.com/siglens/siglens/pkg/segment/query/metadata/metautils"
    26  	pqsmeta "github.com/siglens/siglens/pkg/segment/query/pqs/meta"
    27  	"github.com/siglens/siglens/pkg/segment/structs"
    28  	"github.com/siglens/siglens/pkg/segment/utils"
    29  	segutils "github.com/siglens/siglens/pkg/segment/utils"
    30  	"github.com/siglens/siglens/pkg/segment/writer"
    31  	"github.com/siglens/siglens/pkg/utils/semaphore"
    32  	log "github.com/sirupsen/logrus"
    33  )
    34  
    35  const INITIAL_NUM_BLOCKS = 1000
    36  
    37  var GlobalBlockMicroIndexCheckLimiter *semaphore.WeightedSemaphore
    38  
    39  func InitBlockMetaCheckLimiter(unloadedBlockLimit int64) {
    40  	GlobalBlockMicroIndexCheckLimiter = semaphore.NewDefaultWeightedSemaphore(unloadedBlockLimit, "GlobalBlockMicroIndexCheckLimiter")
    41  }
    42  
    43  // converts blocks to a search request. block summaries & column meta are not guaranteed to be in memory
    44  // if the block summaries & column meta are not in memory, then load right before query
    45  func convertBlocksToSearchRequest(blocksForFile map[uint16]map[string]bool, file string, indexName string,
    46  	segMicroIdx *SegmentMicroIndex) (*structs.SegmentSearchRequest, error) {
    47  
    48  	if len(blocksForFile) == 0 {
    49  		return nil, errors.New("no matched blocks for search request")
    50  	}
    51  
    52  	searchMeta := &structs.SearchMetadataHolder{
    53  		BlockSummariesFile: structs.GetBsuFnameFromSegKey(segMicroIdx.SegmentKey),
    54  		SearchTotalMemory:  segMicroIdx.SearchMetadataSize,
    55  	}
    56  	if segMicroIdx.BlockSummaries != nil {
    57  		searchMeta.BlockSummaries = segMicroIdx.BlockSummaries
    58  	}
    59  
    60  	columnCopy := segMicroIdx.getColumns()
    61  	finalReq := &structs.SegmentSearchRequest{
    62  		SegmentKey:         file,
    63  		VirtualTableName:   indexName,
    64  		SearchMetadata:     searchMeta,
    65  		AllPossibleColumns: columnCopy,
    66  		LatestEpochMS:      segMicroIdx.LatestEpochMS,
    67  		CmiPassedCnames:    blocksForFile,
    68  	}
    69  	blockInfo := make(map[uint16]*structs.BlockMetadataHolder)
    70  	for blockNum := range blocksForFile {
    71  		blockInfo[blockNum] = segMicroIdx.BlockSearchInfo[blockNum]
    72  	}
    73  	finalReq.AllBlocksToSearch = blockInfo
    74  	return finalReq, nil
    75  }
    76  
    77  // TODO: function is getting to big and has many args, needs to be refactored
    78  // Returns all search requests,  number of blocks checked, number of blocks passed, error
    79  func RunCmiCheck(segkey string, tableName string, timeRange *dtu.TimeRange,
    80  	blockTracker *structs.BlockTracker, bloomKeys map[string]bool, bloomOp utils.LogicalOperator,
    81  	rangeFilter map[string]string, rangeOp utils.FilterOperator, isRange bool, wildCardValue bool,
    82  	currQuery *structs.SearchQuery, colsToCheck map[string]bool, wildcardCol bool,
    83  	qid uint64, isQueryPersistent bool, pqid string) (*structs.SegmentSearchRequest, uint64, uint64, error) {
    84  
    85  	isMatchAll := currQuery.IsMatchAll()
    86  
    87  	globalMetadata.updateLock.RLock()
    88  	defer globalMetadata.updateLock.RUnlock()
    89  	segMicroIndex, exists := globalMetadata.getMicroIndex(segkey)
    90  	if !exists {
    91  		log.Errorf("qid=%d, Segment file %+v for table %+v does not exist in block meta, but existed in time filtering. This should not happen", qid, segkey, tableName)
    92  		return nil, 0, 0, fmt.Errorf("segment file %+v for table %+v does not exist in block meta, but existed in time filtering. This should not happen", segkey, tableName)
    93  	}
    94  
    95  	totalRequestedMemory := int64(0)
    96  	if !segMicroIndex.loadedSearchMetadata {
    97  		currSearchMetaSize := int64(segMicroIndex.SearchMetadataSize)
    98  		totalRequestedMemory += currSearchMetaSize
    99  		err := GlobalBlockMicroIndexCheckLimiter.TryAcquireWithBackoff(currSearchMetaSize, 10, segkey)
   100  		if err != nil {
   101  			log.Errorf("qid=%d, Failed to acquire memory from global pool for search! Error: %v", qid, err)
   102  			return nil, 0, 0, fmt.Errorf("failed to acquire memory from global pool for search! Error: %v", err)
   103  		}
   104  		_, err = segMicroIndex.LoadSearchMetadata([]byte{})
   105  		if err != nil {
   106  			log.Errorf("qid=%d, Failed to load search metadata for segKey %+v! Error: %v", qid, segMicroIndex.SegmentKey, err)
   107  			return nil, 0, 0, fmt.Errorf("failed to acquire memory from global pool for search! Error: %v", err)
   108  		}
   109  	}
   110  
   111  	totalBlockCount := uint64(len(segMicroIndex.BlockSummaries))
   112  	timeFilteredBlocks := metautils.FilterBlocksByTime(segMicroIndex.BlockSummaries, blockTracker, timeRange)
   113  	numBlocks := uint16(len(segMicroIndex.BlockSummaries))
   114  	droppedBlocksDueToTime := false
   115  	if len(timeFilteredBlocks) < int(totalBlockCount) {
   116  		droppedBlocksDueToTime = true
   117  	}
   118  
   119  	var missingBlockCMI bool
   120  	if len(timeFilteredBlocks) > 0 && !isMatchAll && !segMicroIndex.loadedMicroIndices {
   121  		totalRequestedMemory += int64(segMicroIndex.MicroIndexSize)
   122  		err := GlobalBlockMicroIndexCheckLimiter.TryAcquireWithBackoff(int64(segMicroIndex.MicroIndexSize), 10, segkey)
   123  		if err != nil {
   124  			log.Errorf("qid=%d, Failed to acquire memory from global pool for search! Error: %v", qid, err)
   125  			return nil, 0, 0, fmt.Errorf("failed to acquire memory from global pool for search! Error: %v", err)
   126  		}
   127  		blkCmis, err := segMicroIndex.readCmis(timeFilteredBlocks, false, colsToCheck, wildcardCol)
   128  		if err != nil {
   129  			log.Errorf("qid=%d, Failed to cmi for blocks and columns. Num blocks %+v, Num columns %+v. Error: %+v",
   130  				qid, len(timeFilteredBlocks), len(colsToCheck), err)
   131  			missingBlockCMI = true
   132  		} else {
   133  			segMicroIndex.blockCmis = blkCmis
   134  		}
   135  	}
   136  
   137  	if !isMatchAll && !missingBlockCMI {
   138  		for blockToCheck := range timeFilteredBlocks {
   139  			if blockToCheck >= numBlocks {
   140  				log.Errorf("qid=%d, Time range passed for a block with no micro index!", qid)
   141  				continue
   142  			}
   143  			if isRange {
   144  				if wildcardCol {
   145  					doRangeCheckAllCol(segMicroIndex, blockToCheck, rangeFilter, rangeOp, timeFilteredBlocks, qid)
   146  				} else {
   147  					doRangeCheckForCol(segMicroIndex, blockToCheck, rangeFilter, rangeOp, timeFilteredBlocks, colsToCheck, qid)
   148  				}
   149  			} else {
   150  				negateMatch := false
   151  				if currQuery != nil && currQuery.MatchFilter != nil && currQuery.MatchFilter.NegateMatch {
   152  					negateMatch = true
   153  				}
   154  				if !wildCardValue && !negateMatch {
   155  					if wildcardCol {
   156  						doBloomCheckAllCol(segMicroIndex, blockToCheck, bloomKeys, bloomOp, timeFilteredBlocks)
   157  					} else {
   158  						doBloomCheckForCol(segMicroIndex, blockToCheck, bloomKeys, bloomOp, timeFilteredBlocks, colsToCheck)
   159  					}
   160  				}
   161  			}
   162  		}
   163  	}
   164  
   165  	filteredBlockCount := uint64(0)
   166  	var finalReq *structs.SegmentSearchRequest
   167  	var err error
   168  
   169  	if len(timeFilteredBlocks) == 0 && !droppedBlocksDueToTime {
   170  		if isQueryPersistent {
   171  			go pqsmeta.AddEmptyResults(pqid, segkey, tableName)
   172  			go writer.BackFillPQSSegmetaEntry(segkey, pqid)
   173  		}
   174  	}
   175  
   176  	if len(timeFilteredBlocks) > 0 {
   177  		finalReq, err = convertBlocksToSearchRequest(timeFilteredBlocks, segkey, tableName, segMicroIndex)
   178  		if err == nil {
   179  			filteredBlockCount = uint64(len(timeFilteredBlocks))
   180  		} else {
   181  			log.Errorf("qid=%v, runCmiCheck: failed to convert blocks, err=%v", qid, err)
   182  		}
   183  	}
   184  
   185  	if !segMicroIndex.loadedMicroIndices {
   186  		segMicroIndex.clearMicroIndices()
   187  	}
   188  	if !segMicroIndex.loadedSearchMetadata {
   189  		segMicroIndex.clearSearchMetadata()
   190  	}
   191  	if totalRequestedMemory > 0 {
   192  		GlobalBlockMicroIndexCheckLimiter.Release(totalRequestedMemory)
   193  	}
   194  	return finalReq, totalBlockCount, filteredBlockCount, err
   195  }
   196  
   197  func doRangeCheckAllCol(segMicroIndex *SegmentMicroIndex, blockToCheck uint16, rangeFilter map[string]string,
   198  	rangeOp utils.FilterOperator, timeFilteredBlocks map[uint16]map[string]bool, qid uint64) {
   199  
   200  	allCMIs, err := segMicroIndex.GetCMIsForBlock(blockToCheck)
   201  	if err != nil {
   202  		return
   203  	}
   204  	matchedAny := false
   205  	for cname, cmi := range allCMIs {
   206  		var matchedBlockRange bool
   207  		if cmi.CmiType != utils.CMI_RANGE_INDEX[0] {
   208  			continue
   209  		}
   210  		matchedBlockRange = metautils.CheckRangeIndex(rangeFilter, cmi.Ranges, rangeOp, qid)
   211  		if matchedBlockRange {
   212  			timeFilteredBlocks[blockToCheck][cname] = true
   213  			matchedAny = true
   214  		}
   215  	}
   216  	if !matchedAny {
   217  		delete(timeFilteredBlocks, blockToCheck)
   218  	}
   219  }
   220  
   221  func doRangeCheckForCol(segMicroIndex *SegmentMicroIndex, blockToCheck uint16, rangeFilter map[string]string,
   222  	rangeOp utils.FilterOperator, timeFilteredBlocks map[uint16]map[string]bool, colsToCheck map[string]bool, qid uint64) {
   223  
   224  	var matchedBlockRange bool
   225  	for colName := range colsToCheck {
   226  		colCMI, err := segMicroIndex.GetCMIForBlockAndColumn(blockToCheck, colName)
   227  		if err != nil {
   228  			continue
   229  		}
   230  		if colCMI.CmiType != utils.CMI_RANGE_INDEX[0] {
   231  			continue
   232  		}
   233  		matchedBlockRange = metautils.CheckRangeIndex(rangeFilter, colCMI.Ranges, rangeOp, qid)
   234  		if matchedBlockRange {
   235  			timeFilteredBlocks[blockToCheck][colName] = true
   236  		} else {
   237  			break
   238  		}
   239  	}
   240  	if !matchedBlockRange {
   241  		delete(timeFilteredBlocks, blockToCheck)
   242  	}
   243  }
   244  
   245  func doBloomCheckForCol(segMicroIndex *SegmentMicroIndex, blockToCheck uint16, bloomKeys map[string]bool,
   246  	bloomOp utils.LogicalOperator, timeFilteredBlocks map[uint16]map[string]bool, colsToCheck map[string]bool) {
   247  
   248  	var matchedNeedleInBlock = true
   249  	for entry := range bloomKeys {
   250  		var needleExists bool
   251  		for colName := range colsToCheck {
   252  			colCMI, err := segMicroIndex.GetCMIForBlockAndColumn(blockToCheck, colName)
   253  			if err != nil {
   254  				continue
   255  			}
   256  			if colCMI.CmiType != utils.CMI_BLOOM_INDEX[0] {
   257  				continue
   258  			}
   259  			needleExists = colCMI.Bf.TestString(entry)
   260  			if needleExists {
   261  				timeFilteredBlocks[blockToCheck][colName] = true
   262  				break
   263  			}
   264  		}
   265  		if !needleExists && bloomOp == utils.And {
   266  			matchedNeedleInBlock = false
   267  			break
   268  		} else if needleExists && bloomOp == utils.Or {
   269  			matchedNeedleInBlock = true
   270  			break
   271  		}
   272  	}
   273  	//If no match is found removing block from incoming blocksToCheck
   274  	if !matchedNeedleInBlock {
   275  		delete(timeFilteredBlocks, blockToCheck)
   276  	}
   277  }
   278  
   279  func doBloomCheckAllCol(segMicroIndex *SegmentMicroIndex, blockToCheck uint16, bloomKeys map[string]bool,
   280  	bloomOp utils.LogicalOperator, timeFilteredBlocks map[uint16]map[string]bool) {
   281  
   282  	var matchedNeedleInBlock = true
   283  	var allEntriesMissing bool = false
   284  	for entry := range bloomKeys {
   285  		var needleExists bool
   286  		allCMIs, err := segMicroIndex.GetCMIsForBlock(blockToCheck)
   287  		if err != nil {
   288  			needleExists = false
   289  		} else {
   290  			atleastOneFound := false
   291  			for cname, cmi := range allCMIs {
   292  				if cmi.CmiType != utils.CMI_BLOOM_INDEX[0] {
   293  					continue
   294  				}
   295  				if cmi.Bf.TestString(entry) {
   296  					timeFilteredBlocks[blockToCheck][cname] = true
   297  					atleastOneFound = true
   298  				}
   299  			}
   300  			if atleastOneFound {
   301  				needleExists = true
   302  			}
   303  		}
   304  		if !needleExists && bloomOp == utils.And {
   305  			matchedNeedleInBlock = false
   306  			break
   307  		} else if needleExists && bloomOp == utils.Or {
   308  			allEntriesMissing = false
   309  			matchedNeedleInBlock = true
   310  			break
   311  		} else if !needleExists && bloomOp == utils.Or {
   312  			allEntriesMissing = true
   313  			matchedNeedleInBlock = false
   314  		}
   315  	}
   316  
   317  	// Or only early exits when it sees true. If all entries are false, we need to handle it here
   318  	if bloomOp == segutils.Or && allEntriesMissing && !matchedNeedleInBlock {
   319  		matchedNeedleInBlock = false
   320  	}
   321  
   322  	//If no match is found, removing block from incoming blocksToCheck
   323  	if !matchedNeedleInBlock {
   324  		delete(timeFilteredBlocks, blockToCheck)
   325  	}
   326  }
   327  
   328  func GetBlockSearchInfoForKey(key string) (map[uint16]*structs.BlockMetadataHolder, error) {
   329  	globalMetadata.updateLock.RLock()
   330  	defer globalMetadata.updateLock.RUnlock()
   331  
   332  	segmentMeta, ok := globalMetadata.getMicroIndex(key)
   333  	if !ok {
   334  		return nil, errors.New("failed to find key in all block micro")
   335  	}
   336  
   337  	if segmentMeta.loadedSearchMetadata {
   338  		return segmentMeta.BlockSearchInfo, nil
   339  	}
   340  
   341  	_, _, allBmh, err := segmentMeta.readBlockSummaries([]byte{})
   342  	if err != nil {
   343  		log.Errorf("GetBlockSearchInfoForKey: failed to read column block sum infos for key %s: %v", key, err)
   344  		return nil, err
   345  	}
   346  
   347  	return allBmh, nil
   348  }
   349  
   350  func GetBlockSummariesForKey(key string) ([]*structs.BlockSummary, error) {
   351  	globalMetadata.updateLock.RLock()
   352  	defer globalMetadata.updateLock.RUnlock()
   353  
   354  	segmentMeta, ok := globalMetadata.getMicroIndex(key)
   355  	if !ok {
   356  		return nil, errors.New("failed to find key in all block micro")
   357  	}
   358  
   359  	if segmentMeta.loadedSearchMetadata {
   360  		return segmentMeta.BlockSummaries, nil
   361  	}
   362  
   363  	_, blockSum, _, err := segmentMeta.readBlockSummaries([]byte{})
   364  	if err != nil {
   365  		log.Errorf("GetBlockSearchInfoForKey: failed to read column block infos for key %s: %v", key, err)
   366  		return nil, err
   367  	}
   368  	return blockSum, nil
   369  }
   370  
   371  // returns block search info, block summaries, and any errors encountered
   372  // block search info will be loaded for all possible columns
   373  func GetSearchInfoForPQSQuery(key string, spqmr *pqmr.SegmentPQMRResults) (map[uint16]*structs.BlockMetadataHolder,
   374  	[]*structs.BlockSummary, error) {
   375  	globalMetadata.updateLock.RLock()
   376  	defer globalMetadata.updateLock.RUnlock()
   377  
   378  	segmentMeta, ok := globalMetadata.getMicroIndex(key)
   379  	if !ok {
   380  		return nil, nil, errors.New("failed to find key in all block micro")
   381  	}
   382  
   383  	if segmentMeta.loadedSearchMetadata {
   384  		return segmentMeta.BlockSearchInfo, segmentMeta.BlockSummaries, nil
   385  	}
   386  
   387  	// avoid caller having to clean up BlockSearchInfo
   388  	_, blockSum, allBmh, err := segmentMeta.readBlockSummaries([]byte{})
   389  	if err != nil {
   390  		log.Errorf("GetBlockSearchInfoForKey: failed to read block infos for segKey %+v: %v", key, err)
   391  		return nil, nil, err
   392  	}
   393  	retSearchInfo := make(map[uint16]*structs.BlockMetadataHolder)
   394  	setBlocks := spqmr.GetAllBlocks()
   395  	for _, blkNum := range setBlocks {
   396  		if blkMetadata, ok := allBmh[blkNum]; ok {
   397  			retSearchInfo[blkNum] = blkMetadata
   398  		}
   399  	}
   400  	return retSearchInfo, blockSum, nil
   401  }