github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/metadata/blockmeta.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metadata 18 19 import ( 20 "errors" 21 "fmt" 22 23 dtu "github.com/siglens/siglens/pkg/common/dtypeutils" 24 "github.com/siglens/siglens/pkg/segment/pqmr" 25 "github.com/siglens/siglens/pkg/segment/query/metadata/metautils" 26 pqsmeta "github.com/siglens/siglens/pkg/segment/query/pqs/meta" 27 "github.com/siglens/siglens/pkg/segment/structs" 28 "github.com/siglens/siglens/pkg/segment/utils" 29 segutils "github.com/siglens/siglens/pkg/segment/utils" 30 "github.com/siglens/siglens/pkg/segment/writer" 31 "github.com/siglens/siglens/pkg/utils/semaphore" 32 log "github.com/sirupsen/logrus" 33 ) 34 35 const INITIAL_NUM_BLOCKS = 1000 36 37 var GlobalBlockMicroIndexCheckLimiter *semaphore.WeightedSemaphore 38 39 func InitBlockMetaCheckLimiter(unloadedBlockLimit int64) { 40 GlobalBlockMicroIndexCheckLimiter = semaphore.NewDefaultWeightedSemaphore(unloadedBlockLimit, "GlobalBlockMicroIndexCheckLimiter") 41 } 42 43 // converts blocks to a search request. block summaries & column meta are not guaranteed to be in memory 44 // if the block summaries & column meta are not in memory, then load right before query 45 func convertBlocksToSearchRequest(blocksForFile map[uint16]map[string]bool, file string, indexName string, 46 segMicroIdx *SegmentMicroIndex) (*structs.SegmentSearchRequest, error) { 47 48 if len(blocksForFile) == 0 { 49 return nil, errors.New("no matched blocks for search request") 50 } 51 52 searchMeta := &structs.SearchMetadataHolder{ 53 BlockSummariesFile: structs.GetBsuFnameFromSegKey(segMicroIdx.SegmentKey), 54 SearchTotalMemory: segMicroIdx.SearchMetadataSize, 55 } 56 if segMicroIdx.BlockSummaries != nil { 57 searchMeta.BlockSummaries = segMicroIdx.BlockSummaries 58 } 59 60 columnCopy := segMicroIdx.getColumns() 61 finalReq := &structs.SegmentSearchRequest{ 62 SegmentKey: file, 63 VirtualTableName: indexName, 64 SearchMetadata: searchMeta, 65 AllPossibleColumns: columnCopy, 66 LatestEpochMS: segMicroIdx.LatestEpochMS, 67 CmiPassedCnames: blocksForFile, 68 } 69 blockInfo := make(map[uint16]*structs.BlockMetadataHolder) 70 for blockNum := range blocksForFile { 71 blockInfo[blockNum] = segMicroIdx.BlockSearchInfo[blockNum] 72 } 73 finalReq.AllBlocksToSearch = blockInfo 74 return finalReq, nil 75 } 76 77 // TODO: function is getting to big and has many args, needs to be refactored 78 // Returns all search requests, number of blocks checked, number of blocks passed, error 79 func RunCmiCheck(segkey string, tableName string, timeRange *dtu.TimeRange, 80 blockTracker *structs.BlockTracker, bloomKeys map[string]bool, bloomOp utils.LogicalOperator, 81 rangeFilter map[string]string, rangeOp utils.FilterOperator, isRange bool, wildCardValue bool, 82 currQuery *structs.SearchQuery, colsToCheck map[string]bool, wildcardCol bool, 83 qid uint64, isQueryPersistent bool, pqid string) (*structs.SegmentSearchRequest, uint64, uint64, error) { 84 85 isMatchAll := currQuery.IsMatchAll() 86 87 globalMetadata.updateLock.RLock() 88 defer globalMetadata.updateLock.RUnlock() 89 segMicroIndex, exists := globalMetadata.getMicroIndex(segkey) 90 if !exists { 91 log.Errorf("qid=%d, Segment file %+v for table %+v does not exist in block meta, but existed in time filtering. This should not happen", qid, segkey, tableName) 92 return nil, 0, 0, fmt.Errorf("segment file %+v for table %+v does not exist in block meta, but existed in time filtering. This should not happen", segkey, tableName) 93 } 94 95 totalRequestedMemory := int64(0) 96 if !segMicroIndex.loadedSearchMetadata { 97 currSearchMetaSize := int64(segMicroIndex.SearchMetadataSize) 98 totalRequestedMemory += currSearchMetaSize 99 err := GlobalBlockMicroIndexCheckLimiter.TryAcquireWithBackoff(currSearchMetaSize, 10, segkey) 100 if err != nil { 101 log.Errorf("qid=%d, Failed to acquire memory from global pool for search! Error: %v", qid, err) 102 return nil, 0, 0, fmt.Errorf("failed to acquire memory from global pool for search! Error: %v", err) 103 } 104 _, err = segMicroIndex.LoadSearchMetadata([]byte{}) 105 if err != nil { 106 log.Errorf("qid=%d, Failed to load search metadata for segKey %+v! Error: %v", qid, segMicroIndex.SegmentKey, err) 107 return nil, 0, 0, fmt.Errorf("failed to acquire memory from global pool for search! Error: %v", err) 108 } 109 } 110 111 totalBlockCount := uint64(len(segMicroIndex.BlockSummaries)) 112 timeFilteredBlocks := metautils.FilterBlocksByTime(segMicroIndex.BlockSummaries, blockTracker, timeRange) 113 numBlocks := uint16(len(segMicroIndex.BlockSummaries)) 114 droppedBlocksDueToTime := false 115 if len(timeFilteredBlocks) < int(totalBlockCount) { 116 droppedBlocksDueToTime = true 117 } 118 119 var missingBlockCMI bool 120 if len(timeFilteredBlocks) > 0 && !isMatchAll && !segMicroIndex.loadedMicroIndices { 121 totalRequestedMemory += int64(segMicroIndex.MicroIndexSize) 122 err := GlobalBlockMicroIndexCheckLimiter.TryAcquireWithBackoff(int64(segMicroIndex.MicroIndexSize), 10, segkey) 123 if err != nil { 124 log.Errorf("qid=%d, Failed to acquire memory from global pool for search! Error: %v", qid, err) 125 return nil, 0, 0, fmt.Errorf("failed to acquire memory from global pool for search! Error: %v", err) 126 } 127 blkCmis, err := segMicroIndex.readCmis(timeFilteredBlocks, false, colsToCheck, wildcardCol) 128 if err != nil { 129 log.Errorf("qid=%d, Failed to cmi for blocks and columns. Num blocks %+v, Num columns %+v. Error: %+v", 130 qid, len(timeFilteredBlocks), len(colsToCheck), err) 131 missingBlockCMI = true 132 } else { 133 segMicroIndex.blockCmis = blkCmis 134 } 135 } 136 137 if !isMatchAll && !missingBlockCMI { 138 for blockToCheck := range timeFilteredBlocks { 139 if blockToCheck >= numBlocks { 140 log.Errorf("qid=%d, Time range passed for a block with no micro index!", qid) 141 continue 142 } 143 if isRange { 144 if wildcardCol { 145 doRangeCheckAllCol(segMicroIndex, blockToCheck, rangeFilter, rangeOp, timeFilteredBlocks, qid) 146 } else { 147 doRangeCheckForCol(segMicroIndex, blockToCheck, rangeFilter, rangeOp, timeFilteredBlocks, colsToCheck, qid) 148 } 149 } else { 150 negateMatch := false 151 if currQuery != nil && currQuery.MatchFilter != nil && currQuery.MatchFilter.NegateMatch { 152 negateMatch = true 153 } 154 if !wildCardValue && !negateMatch { 155 if wildcardCol { 156 doBloomCheckAllCol(segMicroIndex, blockToCheck, bloomKeys, bloomOp, timeFilteredBlocks) 157 } else { 158 doBloomCheckForCol(segMicroIndex, blockToCheck, bloomKeys, bloomOp, timeFilteredBlocks, colsToCheck) 159 } 160 } 161 } 162 } 163 } 164 165 filteredBlockCount := uint64(0) 166 var finalReq *structs.SegmentSearchRequest 167 var err error 168 169 if len(timeFilteredBlocks) == 0 && !droppedBlocksDueToTime { 170 if isQueryPersistent { 171 go pqsmeta.AddEmptyResults(pqid, segkey, tableName) 172 go writer.BackFillPQSSegmetaEntry(segkey, pqid) 173 } 174 } 175 176 if len(timeFilteredBlocks) > 0 { 177 finalReq, err = convertBlocksToSearchRequest(timeFilteredBlocks, segkey, tableName, segMicroIndex) 178 if err == nil { 179 filteredBlockCount = uint64(len(timeFilteredBlocks)) 180 } else { 181 log.Errorf("qid=%v, runCmiCheck: failed to convert blocks, err=%v", qid, err) 182 } 183 } 184 185 if !segMicroIndex.loadedMicroIndices { 186 segMicroIndex.clearMicroIndices() 187 } 188 if !segMicroIndex.loadedSearchMetadata { 189 segMicroIndex.clearSearchMetadata() 190 } 191 if totalRequestedMemory > 0 { 192 GlobalBlockMicroIndexCheckLimiter.Release(totalRequestedMemory) 193 } 194 return finalReq, totalBlockCount, filteredBlockCount, err 195 } 196 197 func doRangeCheckAllCol(segMicroIndex *SegmentMicroIndex, blockToCheck uint16, rangeFilter map[string]string, 198 rangeOp utils.FilterOperator, timeFilteredBlocks map[uint16]map[string]bool, qid uint64) { 199 200 allCMIs, err := segMicroIndex.GetCMIsForBlock(blockToCheck) 201 if err != nil { 202 return 203 } 204 matchedAny := false 205 for cname, cmi := range allCMIs { 206 var matchedBlockRange bool 207 if cmi.CmiType != utils.CMI_RANGE_INDEX[0] { 208 continue 209 } 210 matchedBlockRange = metautils.CheckRangeIndex(rangeFilter, cmi.Ranges, rangeOp, qid) 211 if matchedBlockRange { 212 timeFilteredBlocks[blockToCheck][cname] = true 213 matchedAny = true 214 } 215 } 216 if !matchedAny { 217 delete(timeFilteredBlocks, blockToCheck) 218 } 219 } 220 221 func doRangeCheckForCol(segMicroIndex *SegmentMicroIndex, blockToCheck uint16, rangeFilter map[string]string, 222 rangeOp utils.FilterOperator, timeFilteredBlocks map[uint16]map[string]bool, colsToCheck map[string]bool, qid uint64) { 223 224 var matchedBlockRange bool 225 for colName := range colsToCheck { 226 colCMI, err := segMicroIndex.GetCMIForBlockAndColumn(blockToCheck, colName) 227 if err != nil { 228 continue 229 } 230 if colCMI.CmiType != utils.CMI_RANGE_INDEX[0] { 231 continue 232 } 233 matchedBlockRange = metautils.CheckRangeIndex(rangeFilter, colCMI.Ranges, rangeOp, qid) 234 if matchedBlockRange { 235 timeFilteredBlocks[blockToCheck][colName] = true 236 } else { 237 break 238 } 239 } 240 if !matchedBlockRange { 241 delete(timeFilteredBlocks, blockToCheck) 242 } 243 } 244 245 func doBloomCheckForCol(segMicroIndex *SegmentMicroIndex, blockToCheck uint16, bloomKeys map[string]bool, 246 bloomOp utils.LogicalOperator, timeFilteredBlocks map[uint16]map[string]bool, colsToCheck map[string]bool) { 247 248 var matchedNeedleInBlock = true 249 for entry := range bloomKeys { 250 var needleExists bool 251 for colName := range colsToCheck { 252 colCMI, err := segMicroIndex.GetCMIForBlockAndColumn(blockToCheck, colName) 253 if err != nil { 254 continue 255 } 256 if colCMI.CmiType != utils.CMI_BLOOM_INDEX[0] { 257 continue 258 } 259 needleExists = colCMI.Bf.TestString(entry) 260 if needleExists { 261 timeFilteredBlocks[blockToCheck][colName] = true 262 break 263 } 264 } 265 if !needleExists && bloomOp == utils.And { 266 matchedNeedleInBlock = false 267 break 268 } else if needleExists && bloomOp == utils.Or { 269 matchedNeedleInBlock = true 270 break 271 } 272 } 273 //If no match is found removing block from incoming blocksToCheck 274 if !matchedNeedleInBlock { 275 delete(timeFilteredBlocks, blockToCheck) 276 } 277 } 278 279 func doBloomCheckAllCol(segMicroIndex *SegmentMicroIndex, blockToCheck uint16, bloomKeys map[string]bool, 280 bloomOp utils.LogicalOperator, timeFilteredBlocks map[uint16]map[string]bool) { 281 282 var matchedNeedleInBlock = true 283 var allEntriesMissing bool = false 284 for entry := range bloomKeys { 285 var needleExists bool 286 allCMIs, err := segMicroIndex.GetCMIsForBlock(blockToCheck) 287 if err != nil { 288 needleExists = false 289 } else { 290 atleastOneFound := false 291 for cname, cmi := range allCMIs { 292 if cmi.CmiType != utils.CMI_BLOOM_INDEX[0] { 293 continue 294 } 295 if cmi.Bf.TestString(entry) { 296 timeFilteredBlocks[blockToCheck][cname] = true 297 atleastOneFound = true 298 } 299 } 300 if atleastOneFound { 301 needleExists = true 302 } 303 } 304 if !needleExists && bloomOp == utils.And { 305 matchedNeedleInBlock = false 306 break 307 } else if needleExists && bloomOp == utils.Or { 308 allEntriesMissing = false 309 matchedNeedleInBlock = true 310 break 311 } else if !needleExists && bloomOp == utils.Or { 312 allEntriesMissing = true 313 matchedNeedleInBlock = false 314 } 315 } 316 317 // Or only early exits when it sees true. If all entries are false, we need to handle it here 318 if bloomOp == segutils.Or && allEntriesMissing && !matchedNeedleInBlock { 319 matchedNeedleInBlock = false 320 } 321 322 //If no match is found, removing block from incoming blocksToCheck 323 if !matchedNeedleInBlock { 324 delete(timeFilteredBlocks, blockToCheck) 325 } 326 } 327 328 func GetBlockSearchInfoForKey(key string) (map[uint16]*structs.BlockMetadataHolder, error) { 329 globalMetadata.updateLock.RLock() 330 defer globalMetadata.updateLock.RUnlock() 331 332 segmentMeta, ok := globalMetadata.getMicroIndex(key) 333 if !ok { 334 return nil, errors.New("failed to find key in all block micro") 335 } 336 337 if segmentMeta.loadedSearchMetadata { 338 return segmentMeta.BlockSearchInfo, nil 339 } 340 341 _, _, allBmh, err := segmentMeta.readBlockSummaries([]byte{}) 342 if err != nil { 343 log.Errorf("GetBlockSearchInfoForKey: failed to read column block sum infos for key %s: %v", key, err) 344 return nil, err 345 } 346 347 return allBmh, nil 348 } 349 350 func GetBlockSummariesForKey(key string) ([]*structs.BlockSummary, error) { 351 globalMetadata.updateLock.RLock() 352 defer globalMetadata.updateLock.RUnlock() 353 354 segmentMeta, ok := globalMetadata.getMicroIndex(key) 355 if !ok { 356 return nil, errors.New("failed to find key in all block micro") 357 } 358 359 if segmentMeta.loadedSearchMetadata { 360 return segmentMeta.BlockSummaries, nil 361 } 362 363 _, blockSum, _, err := segmentMeta.readBlockSummaries([]byte{}) 364 if err != nil { 365 log.Errorf("GetBlockSearchInfoForKey: failed to read column block infos for key %s: %v", key, err) 366 return nil, err 367 } 368 return blockSum, nil 369 } 370 371 // returns block search info, block summaries, and any errors encountered 372 // block search info will be loaded for all possible columns 373 func GetSearchInfoForPQSQuery(key string, spqmr *pqmr.SegmentPQMRResults) (map[uint16]*structs.BlockMetadataHolder, 374 []*structs.BlockSummary, error) { 375 globalMetadata.updateLock.RLock() 376 defer globalMetadata.updateLock.RUnlock() 377 378 segmentMeta, ok := globalMetadata.getMicroIndex(key) 379 if !ok { 380 return nil, nil, errors.New("failed to find key in all block micro") 381 } 382 383 if segmentMeta.loadedSearchMetadata { 384 return segmentMeta.BlockSearchInfo, segmentMeta.BlockSummaries, nil 385 } 386 387 // avoid caller having to clean up BlockSearchInfo 388 _, blockSum, allBmh, err := segmentMeta.readBlockSummaries([]byte{}) 389 if err != nil { 390 log.Errorf("GetBlockSearchInfoForKey: failed to read block infos for segKey %+v: %v", key, err) 391 return nil, nil, err 392 } 393 retSearchInfo := make(map[uint16]*structs.BlockMetadataHolder) 394 setBlocks := spqmr.GetAllBlocks() 395 for _, blkNum := range setBlocks { 396 if blkMetadata, ok := allBmh[blkNum]; ok { 397 retSearchInfo[blkNum] = blkMetadata 398 } 399 } 400 return retSearchInfo, blockSum, nil 401 }