github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/search/searchstatus.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package search
    18  
    19  import (
    20  	"errors"
    21  	"sync"
    22  
    23  	dtu "github.com/siglens/siglens/pkg/common/dtypeutils"
    24  	"github.com/siglens/siglens/pkg/segment/pqmr"
    25  	"github.com/siglens/siglens/pkg/segment/results/segresults"
    26  	"github.com/siglens/siglens/pkg/segment/structs"
    27  	"github.com/siglens/siglens/pkg/segment/utils"
    28  	log "github.com/sirupsen/logrus"
    29  )
    30  
    31  const PQMR_INITIAL_SIZE = 15000
    32  
    33  // Do not modify this variable, use it only for cloning only
    34  var pqmrAllMatchedConst *pqmr.PQMatchResults
    35  
    36  // a helper struct to keep track of which records & blocks need to be searched
    37  type SegmentSearchStatus struct {
    38  	AllBlockStatus     map[uint16]*BlockSearchStatus
    39  	numBlocksToSearch  uint16 // number of blocks to raw search (passed bloom & block time range check)
    40  	numBlocksInSegFile uint16 // number of blocks in segment file
    41  }
    42  
    43  type BlockSearchStatus struct {
    44  	allRecords    *pqmr.PQMatchResults // allrecords in block
    45  	BlockNum      uint16               // block number of search
    46  	numRecords    uint16               // number of records
    47  	blockLock     *sync.RWMutex        // lock for reading/editing records
    48  	firstSearch   bool                 // has allRecords been updated?
    49  	hasAnyMatched bool                 // if any bit is set
    50  }
    51  
    52  type BlockRecordIterator struct {
    53  	firstSearch bool // bool if first iterator
    54  	op          utils.LogicalOperator
    55  	AllRecords  *pqmr.PQMatchResults // allrecords in block
    56  	AllRecLen   uint16
    57  }
    58  
    59  func init() {
    60  	pqmrAllMatchedConst = pqmr.CreatePQMatchResults(PQMR_INITIAL_SIZE)
    61  	for j := uint(0); j < PQMR_INITIAL_SIZE; j++ {
    62  		pqmrAllMatchedConst.AddMatchedRecord(j)
    63  	}
    64  }
    65  
    66  // Inits blocks & records to search based on input blkSum and tRange.
    67  // We will generously raw search all records in a block with a HighTS and LowTs inside tRange
    68  // It is up to the caller to call .Close()
    69  func InitBlocksToSearch(searchReq *structs.SegmentSearchRequest, blkSum []*structs.BlockSummary, allSearchResults *segresults.SearchResults, tRange *dtu.TimeRange) *SegmentSearchStatus {
    70  
    71  	allBlocks := make(map[uint16]*BlockSearchStatus, len(blkSum))
    72  
    73  	blocksToSearch := uint16(0)
    74  	for i, bSum := range blkSum {
    75  
    76  		if tRange.CheckRangeOverLap(bSum.LowTs, bSum.HighTs) {
    77  			currBlk := uint16(i)
    78  
    79  			if _, shouldSearch := searchReq.AllBlocksToSearch[currBlk]; !shouldSearch {
    80  				continue
    81  			}
    82  
    83  			if !allSearchResults.ShouldSearchRange(bSum.LowTs, bSum.HighTs) {
    84  				allSearchResults.SetEarlyExit(true)
    85  				continue
    86  			}
    87  
    88  			// Using clone method to set all the bits at once, instead of looping through to set each bit.
    89  			passedRecs := pqmr.Clone(pqmrAllMatchedConst)
    90  
    91  			// Resizing based on the recCount
    92  			if bSum.RecCount > PQMR_INITIAL_SIZE {
    93  				for j := uint(PQMR_INITIAL_SIZE); j < uint(bSum.RecCount); j++ {
    94  					passedRecs.AddMatchedRecord(j)
    95  				}
    96  			} else {
    97  				for j := uint(bSum.RecCount); j < PQMR_INITIAL_SIZE; j++ {
    98  					passedRecs.ClearBit(j)
    99  				}
   100  			}
   101  
   102  			allBlocks[currBlk] = &BlockSearchStatus{
   103  				BlockNum:      currBlk,
   104  				allRecords:    passedRecs,
   105  				numRecords:    bSum.RecCount,
   106  				blockLock:     &sync.RWMutex{},
   107  				firstSearch:   true,
   108  				hasAnyMatched: true,
   109  			}
   110  			blocksToSearch++
   111  		}
   112  	}
   113  
   114  	return &SegmentSearchStatus{
   115  		AllBlockStatus:     allBlocks,
   116  		numBlocksToSearch:  blocksToSearch,
   117  		numBlocksInSegFile: uint16(len(blkSum)),
   118  	}
   119  }
   120  
   121  func (sss *SegmentSearchStatus) getTotalCounts() (uint64, uint64) {
   122  
   123  	totalMatched := uint64(0)
   124  	totalUnmatched := uint64(0)
   125  	blkMatchedCount := uint64(0)
   126  	blkUnmatchedCount := uint64(0)
   127  
   128  	for _, blkStatus := range sss.AllBlockStatus {
   129  		blkMatchedCount = uint64(blkStatus.allRecords.GetNumberOfSetBits())
   130  		blkUnmatchedCount = uint64(blkStatus.numRecords) - blkMatchedCount
   131  		totalMatched += blkMatchedCount
   132  		totalUnmatched += blkUnmatchedCount
   133  	}
   134  
   135  	return totalMatched, totalUnmatched
   136  }
   137  
   138  func (sss *SegmentSearchStatus) Close() {
   139  }
   140  
   141  // if op == Or return allUnmatchedRecords
   142  // if op == And return allMatchedRecords
   143  // if op == Exclusion return allMatchedRecords
   144  // if this is the first call, then return allMatchedRecords regardless (will be time filtered)
   145  func (sss *SegmentSearchStatus) GetRecordIteratorForBlock(op utils.LogicalOperator, blkNum uint16) (*BlockRecordIterator, error) {
   146  
   147  	blkStatus, ok := sss.AllBlockStatus[blkNum]
   148  	if !ok {
   149  		log.Errorf("AddTimeFilteredRecordToBlock: tried to add a record to a block that does not exist %+v", blkNum)
   150  		return nil, errors.New("block does not exist in segment")
   151  	}
   152  
   153  	return blkStatus.GetRecordIteratorForBlock(op)
   154  }
   155  
   156  func (sss *SegmentSearchStatus) updateMatchedRecords(blkNum uint16, matchedRecs *pqmr.PQMatchResults, op utils.LogicalOperator) error {
   157  
   158  	blkStatus, ok := sss.AllBlockStatus[blkNum]
   159  	if !ok {
   160  		log.Warnf("updateAndMatchedRecords: block %d does not exist in allBlockStatus!", blkNum)
   161  		return errors.New("block does not exist in sss.allBlockStatus")
   162  	}
   163  	switch op {
   164  	case utils.And:
   165  		// new blkRecs.allMatchedRecords ==  intersection of matchedRecs and blkRecs.allMatchedRecords
   166  		// for elements removed from blkRecs.allMatchedRecords, add to blkRecs.allUnmatchedRecords
   167  		blkStatus.intersectMatchedRecords(matchedRecs)
   168  	case utils.Or:
   169  		// add all new recordNums to  sss.allBlockStatus.allMatchedRecords
   170  		// for newly added recordNums, remove it from sss.allBlockStatus.allUnmatchedRecords
   171  		if blkStatus.firstSearch {
   172  			blkStatus.intersectMatchedRecords(matchedRecs)
   173  		} else {
   174  			blkStatus.unionMatchedRecords(matchedRecs)
   175  		}
   176  	case utils.Exclusion:
   177  		// remove all recIdx from blkRecs.allMatchedRecords that exist in matchedRecs
   178  		// for removed elements from blkRecs.allMatchedRecords, add to blkRecs.allUnmatchedRecord
   179  		blkStatus.excludeMatchedRecords(matchedRecs)
   180  	}
   181  	blkStatus.firstSearch = false
   182  	return nil
   183  }
   184  
   185  func (bss *BlockSearchStatus) intersectMatchedRecords(matchedRecs *pqmr.PQMatchResults) {
   186  
   187  	bss.blockLock.Lock()
   188  	bss.allRecords.InPlaceIntersection(matchedRecs)
   189  
   190  	bss.hasAnyMatched = bss.allRecords.Any()
   191  
   192  	bss.blockLock.Unlock()
   193  }
   194  
   195  func (bss *BlockSearchStatus) unionMatchedRecords(matchedRecs *pqmr.PQMatchResults) {
   196  
   197  	bss.blockLock.Lock()
   198  	bss.allRecords.InPlaceUnion(matchedRecs)
   199  
   200  	bss.hasAnyMatched = bss.allRecords.Any()
   201  
   202  	bss.blockLock.Unlock()
   203  }
   204  
   205  func (bss *BlockSearchStatus) excludeMatchedRecords(matchedRecs *pqmr.PQMatchResults) {
   206  
   207  	bss.blockLock.Lock()
   208  	for i := uint(0); i < uint(int(matchedRecs.GetNumberOfBits())); i++ {
   209  		if matchedRecs.DoesRecordMatch(i) {
   210  			if bss.allRecords.DoesRecordMatch(i) {
   211  				bss.allRecords.ClearBit(i)
   212  			}
   213  		}
   214  	}
   215  	bss.hasAnyMatched = bss.allRecords.Any()
   216  	bss.blockLock.Unlock()
   217  }
   218  
   219  func (bss *BlockSearchStatus) GetRecordIteratorForBlock(op utils.LogicalOperator) (*BlockRecordIterator, error) {
   220  	return &BlockRecordIterator{
   221  		firstSearch: bss.firstSearch,
   222  		op:          op,
   223  		AllRecords:  bss.allRecords,
   224  		AllRecLen:   bss.numRecords,
   225  	}, nil
   226  }
   227  
   228  // returns a copy of the block iterator. This should be called in during time range filtering to avoid PQMR backfilling time filtered records
   229  func (bss *BlockSearchStatus) GetRecordIteratorCopyForBlock(op utils.LogicalOperator) (*BlockRecordIterator, error) {
   230  	return &BlockRecordIterator{
   231  		firstSearch: bss.firstSearch,
   232  		op:          op,
   233  		AllRecords:  bss.allRecords.Copy(),
   234  		AllRecLen:   bss.numRecords,
   235  	}, nil
   236  }
   237  
   238  func (bss *BlockRecordIterator) ShouldProcessRecord(idx uint) bool {
   239  	if idx >= uint(bss.AllRecLen) {
   240  		return false
   241  	}
   242  	if bss.firstSearch || bss.op == utils.And || bss.op == utils.Exclusion {
   243  		if bss.AllRecords.DoesRecordMatch(idx) {
   244  			return true
   245  		}
   246  	} else if bss.op == utils.Or {
   247  		if !bss.AllRecords.DoesRecordMatch(idx) {
   248  			return true
   249  		}
   250  	}
   251  	return false
   252  }
   253  
   254  // set idx bit to 0. This function can be used to remove records that dont match timestamps
   255  func (bss *BlockRecordIterator) UnsetRecord(idx uint) {
   256  	if idx >= uint(bss.AllRecLen) {
   257  		return
   258  	}
   259  	bss.AllRecords.ClearBit(idx)
   260  }
   261  
   262  // Inits blocks for aggs on input blkSum
   263  func InitBlocksForAggsFastPath(searchReq *structs.SegmentSearchRequest,
   264  	blkSum []*structs.BlockSummary) *SegmentSearchStatus {
   265  
   266  	allBlocks := make(map[uint16]*BlockSearchStatus, len(searchReq.AllBlocksToSearch))
   267  
   268  	for blkNum := range searchReq.AllBlocksToSearch {
   269  
   270  		bSum := blkSum[blkNum]
   271  
   272  		allBlocks[blkNum] = &BlockSearchStatus{
   273  			BlockNum:      blkNum,
   274  			numRecords:    bSum.RecCount,
   275  			blockLock:     &sync.RWMutex{},
   276  			firstSearch:   true,
   277  			hasAnyMatched: true,
   278  		}
   279  	}
   280  
   281  	return &SegmentSearchStatus{
   282  		AllBlockStatus:     allBlocks,
   283  		numBlocksToSearch:  uint16(len(searchReq.AllBlocksToSearch)),
   284  		numBlocksInSegFile: uint16(len(blkSum)),
   285  	}
   286  }
   287  
   288  // for fastpath, the matchedRec bitset is not used
   289  // matchedcount is always equal to numrecs in each of the blocksearchstatus
   290  func (sss *SegmentSearchStatus) getTotalCountsFastPath() uint64 {
   291  
   292  	totalMatched := uint64(0)
   293  
   294  	for _, blkStatus := range sss.AllBlockStatus {
   295  		totalMatched += uint64(blkStatus.numRecords)
   296  	}
   297  
   298  	return totalMatched
   299  }
   300  
   301  func InitIteratorFromPQMR(pqmr *pqmr.PQMatchResults, nRecs uint) *BlockRecordIterator {
   302  	return &BlockRecordIterator{
   303  		firstSearch: true,
   304  		op:          utils.And,
   305  		AllRecords:  pqmr,
   306  		AllRecLen:   uint16(nRecs),
   307  	}
   308  }