github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/search/filtersearch.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package search
    18  
    19  import (
    20  	"sync"
    21  
    22  	"github.com/siglens/siglens/pkg/config"
    23  	"github.com/siglens/siglens/pkg/segment/reader/segread"
    24  	"github.com/siglens/siglens/pkg/segment/results/segresults"
    25  	"github.com/siglens/siglens/pkg/segment/structs"
    26  	"github.com/siglens/siglens/pkg/segment/utils"
    27  	log "github.com/sirupsen/logrus"
    28  )
    29  
    30  // Search a single SearchQuery and returns which records passes the filter
    31  func RawSearchSingleQuery(query *structs.SearchQuery, searchReq *structs.SegmentSearchRequest, segmentSearch *SegmentSearchStatus,
    32  	allBlockSearchHelpers []*structs.BlockSearchHelper, op utils.LogicalOperator, queryMetrics *structs.QueryProcessingMetrics, qid uint64,
    33  	allSearchResults *segresults.SearchResults) *SegmentSearchStatus {
    34  
    35  	queryType := query.GetQueryType()
    36  	searchCols := getAllColumnsNeededForSearch(query, searchReq.AllPossibleColumns)
    37  	sharedMultiReader, err := segread.InitSharedMultiColumnReaders(searchReq.SegmentKey, searchCols, searchReq.AllBlocksToSearch,
    38  		searchReq.SearchMetadata.BlockSummaries, len(allBlockSearchHelpers), qid)
    39  
    40  	if err != nil {
    41  		// if we fail to read needed columns, we can convert it to a match none
    42  		// TODO: what would this look like in complex relations
    43  		queryType = structs.EditQueryTypeForInvalidColumn(queryType)
    44  		log.Warnf("qid=%d, filterBlockRequestFromQuery: Unable to read all columns in query new query type %+v",
    45  			qid, queryType)
    46  		log.Warnf("qid=%d, filterBlockRequestFromQuery: Tried to initialized a multi reader for %+v. Error: %v",
    47  			qid, searchCols, err)
    48  	}
    49  
    50  	defer sharedMultiReader.Close()
    51  	// call N parallel block managers, each with their own block
    52  	filterBlockRequestsChan := make(chan *BlockSearchStatus, len(segmentSearch.AllBlockStatus))
    53  	for _, filterReq := range segmentSearch.AllBlockStatus {
    54  		filterBlockRequestsChan <- filterReq
    55  	}
    56  	close(filterBlockRequestsChan)
    57  
    58  	var runningBlockManagers sync.WaitGroup
    59  	for i, blockHelper := range allBlockSearchHelpers {
    60  		runningBlockManagers.Add(1)
    61  		go filterBlockRequestFromQuery(sharedMultiReader.MultiColReaders[i], query, segmentSearch,
    62  			filterBlockRequestsChan, blockHelper, &runningBlockManagers, op, queryType, qid,
    63  			allSearchResults, searchReq)
    64  	}
    65  	runningBlockManagers.Wait()
    66  	logSingleQuerySummary(segmentSearch, op, qid)
    67  	return segmentSearch
    68  }
    69  
    70  func logSingleQuerySummary(segmentSearch *SegmentSearchStatus, op utils.LogicalOperator, qid uint64) {
    71  	if config.IsDebugMode() {
    72  		opStr := utils.ConvertOperatorToString(op)
    73  		sumMatched, sumUnmatched := segmentSearch.getTotalCounts()
    74  		log.Infof("qid=%d, After a %+v op, there are %+v total matched records and %+v total unmatched records",
    75  			qid, opStr, sumMatched, sumUnmatched)
    76  	}
    77  }
    78  
    79  func getAllColumnsNeededForSearch(query *structs.SearchQuery, allCols map[string]bool) map[string]bool {
    80  	searchCols, wildcard := query.GetAllColumnsInQuery()
    81  	if wildcard && query.SearchType != structs.MatchAll {
    82  		searchCols = allCols
    83  	}
    84  
    85  	return searchCols
    86  }
    87  
    88  func filterBlockRequestFromQuery(multiColReader *segread.MultiColSegmentReader, query *structs.SearchQuery,
    89  	segmentSearch *SegmentSearchStatus, resultsChan chan *BlockSearchStatus, blockHelper *structs.BlockSearchHelper,
    90  	runningBlockManagers *sync.WaitGroup, op utils.LogicalOperator, queryType structs.SearchNodeType,
    91  	qid uint64, allSearchResults *segresults.SearchResults, searchReq *structs.SegmentSearchRequest) {
    92  
    93  	defer runningBlockManagers.Done() // defer in case of panics
    94  
    95  	holderDte := &utils.DtypeEnclosure{}
    96  	for blockReq := range resultsChan {
    97  		blockHelper.ResetBlockHelper()
    98  		recIT, err := segmentSearch.GetRecordIteratorForBlock(op, blockReq.BlockNum)
    99  		if err != nil {
   100  			log.Errorf("qid=%d filterBlockRequestFromQuery failed to get next search set for block %d! Err %+v", qid, blockReq.BlockNum, err)
   101  			allSearchResults.AddError(err)
   102  			break
   103  		}
   104  		switch queryType {
   105  		case structs.MatchAllQuery:
   106  			// time should have been checked before, and recsToSearch
   107  			for i := uint(0); i < uint(recIT.AllRecLen); i++ {
   108  				if recIT.ShouldProcessRecord(i) {
   109  					blockHelper.AddMatchedRecord(i)
   110  				}
   111  			}
   112  		case structs.ColumnValueQuery:
   113  			filterRecordsFromSearchQuery(query, segmentSearch, blockHelper, multiColReader, recIT,
   114  				blockReq.BlockNum, holderDte, qid, allSearchResults, searchReq)
   115  		case structs.InvalidQuery:
   116  			// don't match any records
   117  		}
   118  		matchedRecords := blockHelper.GetAllMatchedRecords()
   119  		err = segmentSearch.updateMatchedRecords(blockReq.BlockNum, matchedRecords, op)
   120  		if err != nil {
   121  			log.Errorf("qid=%d, filterBlockRequestFromQuery failed to update segment search status with matched records %+v. Error %+v", qid, matchedRecords, err)
   122  			allSearchResults.AddError(err)
   123  			break
   124  		}
   125  	}
   126  }
   127  
   128  func filterRecordsFromSearchQuery(query *structs.SearchQuery, segmentSearch *SegmentSearchStatus,
   129  	blockHelper *structs.BlockSearchHelper,
   130  	multiColReader *segread.MultiColSegmentReader, recIT *BlockRecordIterator, blockNum uint16,
   131  	holderDte *utils.DtypeEnclosure, qid uint64, allSearchResults *segresults.SearchResults,
   132  	searchReq *structs.SegmentSearchRequest) {
   133  
   134  	// first we walk through the search checking if this query can be satisfied by looking at the
   135  	// dict encoding file for the column/s
   136  	cmiPassedCnames := make(map[string]bool)
   137  	checkAllCols := false
   138  	if query.SearchType == structs.MatchWordsAllColumns ||
   139  		query.SearchType == structs.RegexExpressionAllColumns ||
   140  		query.SearchType == structs.MatchDictArrayAllColumns {
   141  		checkAllCols = true
   142  	}
   143  
   144  	for _, colInfo := range multiColReader.AllColums {
   145  		if checkAllCols {
   146  			cmiPassedCnames[colInfo.ColumnName] = true
   147  		} else {
   148  			_, ok := searchReq.CmiPassedCnames[blockNum][colInfo.ColumnName]
   149  			if ok {
   150  				cmiPassedCnames[colInfo.ColumnName] = true
   151  			}
   152  		}
   153  	}
   154  
   155  	doRecLevelSearch, deCnames, err := applyColumnarSearchUsingDictEnc(query, multiColReader, blockNum, qid,
   156  		recIT, blockHelper, searchReq, cmiPassedCnames)
   157  	if err != nil {
   158  		allSearchResults.AddError(err)
   159  		// we still continue, since the reclevel may not yield an error
   160  	}
   161  
   162  	// we go through all of the cmi-passed-columnnames, if all of them have already been checked in
   163  	// the dict-enc func above, then we don't need to do rec-by-rec search
   164  	if doRecLevelSearch {
   165  		for cname := range cmiPassedCnames {
   166  			_, ok := deCnames[cname]
   167  			if !ok {
   168  				doRecLevelSearch = true
   169  				break
   170  			} else {
   171  				doRecLevelSearch = false
   172  			}
   173  		}
   174  	}
   175  
   176  	if doRecLevelSearch {
   177  		for i := uint(0); i < uint(recIT.AllRecLen); i++ {
   178  			if recIT.ShouldProcessRecord(i) {
   179  				matched, err := ApplyColumnarSearchQuery(query, multiColReader, blockNum, uint16(i), holderDte,
   180  					qid, deCnames, searchReq, cmiPassedCnames)
   181  				if err != nil {
   182  					allSearchResults.AddError(err)
   183  					break
   184  				}
   185  				if query.MatchFilter != nil && query.MatchFilter.NegateMatch {
   186  					if matched || blockHelper.DoesRecordMatch(i) {
   187  						blockHelper.ClearBit(i)
   188  					} else {
   189  						blockHelper.AddMatchedRecord(i)
   190  					}
   191  				} else {
   192  					if matched {
   193  						blockHelper.AddMatchedRecord(i)
   194  					}
   195  				}
   196  			}
   197  		}
   198  	}
   199  	multiColReader.ReorderColumnUsage()
   200  }