github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/search/filtersearch.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package search 18 19 import ( 20 "sync" 21 22 "github.com/siglens/siglens/pkg/config" 23 "github.com/siglens/siglens/pkg/segment/reader/segread" 24 "github.com/siglens/siglens/pkg/segment/results/segresults" 25 "github.com/siglens/siglens/pkg/segment/structs" 26 "github.com/siglens/siglens/pkg/segment/utils" 27 log "github.com/sirupsen/logrus" 28 ) 29 30 // Search a single SearchQuery and returns which records passes the filter 31 func RawSearchSingleQuery(query *structs.SearchQuery, searchReq *structs.SegmentSearchRequest, segmentSearch *SegmentSearchStatus, 32 allBlockSearchHelpers []*structs.BlockSearchHelper, op utils.LogicalOperator, queryMetrics *structs.QueryProcessingMetrics, qid uint64, 33 allSearchResults *segresults.SearchResults) *SegmentSearchStatus { 34 35 queryType := query.GetQueryType() 36 searchCols := getAllColumnsNeededForSearch(query, searchReq.AllPossibleColumns) 37 sharedMultiReader, err := segread.InitSharedMultiColumnReaders(searchReq.SegmentKey, searchCols, searchReq.AllBlocksToSearch, 38 searchReq.SearchMetadata.BlockSummaries, len(allBlockSearchHelpers), qid) 39 40 if err != nil { 41 // if we fail to read needed columns, we can convert it to a match none 42 // TODO: what would this look like in complex relations 43 queryType = structs.EditQueryTypeForInvalidColumn(queryType) 44 log.Warnf("qid=%d, filterBlockRequestFromQuery: Unable to read all columns in query new query type %+v", 45 qid, queryType) 46 log.Warnf("qid=%d, filterBlockRequestFromQuery: Tried to initialized a multi reader for %+v. Error: %v", 47 qid, searchCols, err) 48 } 49 50 defer sharedMultiReader.Close() 51 // call N parallel block managers, each with their own block 52 filterBlockRequestsChan := make(chan *BlockSearchStatus, len(segmentSearch.AllBlockStatus)) 53 for _, filterReq := range segmentSearch.AllBlockStatus { 54 filterBlockRequestsChan <- filterReq 55 } 56 close(filterBlockRequestsChan) 57 58 var runningBlockManagers sync.WaitGroup 59 for i, blockHelper := range allBlockSearchHelpers { 60 runningBlockManagers.Add(1) 61 go filterBlockRequestFromQuery(sharedMultiReader.MultiColReaders[i], query, segmentSearch, 62 filterBlockRequestsChan, blockHelper, &runningBlockManagers, op, queryType, qid, 63 allSearchResults, searchReq) 64 } 65 runningBlockManagers.Wait() 66 logSingleQuerySummary(segmentSearch, op, qid) 67 return segmentSearch 68 } 69 70 func logSingleQuerySummary(segmentSearch *SegmentSearchStatus, op utils.LogicalOperator, qid uint64) { 71 if config.IsDebugMode() { 72 opStr := utils.ConvertOperatorToString(op) 73 sumMatched, sumUnmatched := segmentSearch.getTotalCounts() 74 log.Infof("qid=%d, After a %+v op, there are %+v total matched records and %+v total unmatched records", 75 qid, opStr, sumMatched, sumUnmatched) 76 } 77 } 78 79 func getAllColumnsNeededForSearch(query *structs.SearchQuery, allCols map[string]bool) map[string]bool { 80 searchCols, wildcard := query.GetAllColumnsInQuery() 81 if wildcard && query.SearchType != structs.MatchAll { 82 searchCols = allCols 83 } 84 85 return searchCols 86 } 87 88 func filterBlockRequestFromQuery(multiColReader *segread.MultiColSegmentReader, query *structs.SearchQuery, 89 segmentSearch *SegmentSearchStatus, resultsChan chan *BlockSearchStatus, blockHelper *structs.BlockSearchHelper, 90 runningBlockManagers *sync.WaitGroup, op utils.LogicalOperator, queryType structs.SearchNodeType, 91 qid uint64, allSearchResults *segresults.SearchResults, searchReq *structs.SegmentSearchRequest) { 92 93 defer runningBlockManagers.Done() // defer in case of panics 94 95 holderDte := &utils.DtypeEnclosure{} 96 for blockReq := range resultsChan { 97 blockHelper.ResetBlockHelper() 98 recIT, err := segmentSearch.GetRecordIteratorForBlock(op, blockReq.BlockNum) 99 if err != nil { 100 log.Errorf("qid=%d filterBlockRequestFromQuery failed to get next search set for block %d! Err %+v", qid, blockReq.BlockNum, err) 101 allSearchResults.AddError(err) 102 break 103 } 104 switch queryType { 105 case structs.MatchAllQuery: 106 // time should have been checked before, and recsToSearch 107 for i := uint(0); i < uint(recIT.AllRecLen); i++ { 108 if recIT.ShouldProcessRecord(i) { 109 blockHelper.AddMatchedRecord(i) 110 } 111 } 112 case structs.ColumnValueQuery: 113 filterRecordsFromSearchQuery(query, segmentSearch, blockHelper, multiColReader, recIT, 114 blockReq.BlockNum, holderDte, qid, allSearchResults, searchReq) 115 case structs.InvalidQuery: 116 // don't match any records 117 } 118 matchedRecords := blockHelper.GetAllMatchedRecords() 119 err = segmentSearch.updateMatchedRecords(blockReq.BlockNum, matchedRecords, op) 120 if err != nil { 121 log.Errorf("qid=%d, filterBlockRequestFromQuery failed to update segment search status with matched records %+v. Error %+v", qid, matchedRecords, err) 122 allSearchResults.AddError(err) 123 break 124 } 125 } 126 } 127 128 func filterRecordsFromSearchQuery(query *structs.SearchQuery, segmentSearch *SegmentSearchStatus, 129 blockHelper *structs.BlockSearchHelper, 130 multiColReader *segread.MultiColSegmentReader, recIT *BlockRecordIterator, blockNum uint16, 131 holderDte *utils.DtypeEnclosure, qid uint64, allSearchResults *segresults.SearchResults, 132 searchReq *structs.SegmentSearchRequest) { 133 134 // first we walk through the search checking if this query can be satisfied by looking at the 135 // dict encoding file for the column/s 136 cmiPassedCnames := make(map[string]bool) 137 checkAllCols := false 138 if query.SearchType == structs.MatchWordsAllColumns || 139 query.SearchType == structs.RegexExpressionAllColumns || 140 query.SearchType == structs.MatchDictArrayAllColumns { 141 checkAllCols = true 142 } 143 144 for _, colInfo := range multiColReader.AllColums { 145 if checkAllCols { 146 cmiPassedCnames[colInfo.ColumnName] = true 147 } else { 148 _, ok := searchReq.CmiPassedCnames[blockNum][colInfo.ColumnName] 149 if ok { 150 cmiPassedCnames[colInfo.ColumnName] = true 151 } 152 } 153 } 154 155 doRecLevelSearch, deCnames, err := applyColumnarSearchUsingDictEnc(query, multiColReader, blockNum, qid, 156 recIT, blockHelper, searchReq, cmiPassedCnames) 157 if err != nil { 158 allSearchResults.AddError(err) 159 // we still continue, since the reclevel may not yield an error 160 } 161 162 // we go through all of the cmi-passed-columnnames, if all of them have already been checked in 163 // the dict-enc func above, then we don't need to do rec-by-rec search 164 if doRecLevelSearch { 165 for cname := range cmiPassedCnames { 166 _, ok := deCnames[cname] 167 if !ok { 168 doRecLevelSearch = true 169 break 170 } else { 171 doRecLevelSearch = false 172 } 173 } 174 } 175 176 if doRecLevelSearch { 177 for i := uint(0); i < uint(recIT.AllRecLen); i++ { 178 if recIT.ShouldProcessRecord(i) { 179 matched, err := ApplyColumnarSearchQuery(query, multiColReader, blockNum, uint16(i), holderDte, 180 qid, deCnames, searchReq, cmiPassedCnames) 181 if err != nil { 182 allSearchResults.AddError(err) 183 break 184 } 185 if query.MatchFilter != nil && query.MatchFilter.NegateMatch { 186 if matched || blockHelper.DoesRecordMatch(i) { 187 blockHelper.ClearBit(i) 188 } else { 189 blockHelper.AddMatchedRecord(i) 190 } 191 } else { 192 if matched { 193 blockHelper.AddMatchedRecord(i) 194 } 195 } 196 } 197 } 198 } 199 multiColReader.ReorderColumnUsage() 200 }