github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/metadatafilter_test.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package query 18 19 import ( 20 "os" 21 "testing" 22 23 localstorage "github.com/siglens/siglens/pkg/blob/local" 24 dtu "github.com/siglens/siglens/pkg/common/dtypeutils" 25 "github.com/siglens/siglens/pkg/config" 26 "github.com/siglens/siglens/pkg/segment/memory/limit" 27 "github.com/siglens/siglens/pkg/segment/query/metadata" 28 . "github.com/siglens/siglens/pkg/segment/structs" 29 "github.com/siglens/siglens/pkg/segment/utils" 30 serverutils "github.com/siglens/siglens/pkg/server/utils" 31 log "github.com/sirupsen/logrus" 32 "github.com/stretchr/testify/assert" 33 ) 34 35 func testTimeFilter(t *testing.T, numBlocks int, numEntriesInBlock int, fileCount int) { 36 37 tRange := &dtu.TimeRange{ 38 StartEpochMs: 0, 39 EndEpochMs: uint64(numEntriesInBlock), 40 } 41 42 timeFilteredFiles, totalChecked, passedCheck := metadata.FilterSegmentsByTime(tRange, []string{"evts"}, 0) 43 log.Infof("time filter: %v", timeFilteredFiles) 44 assert.Equal(t, passedCheck, uint64(fileCount), "all files passed") 45 assert.Equal(t, totalChecked, uint64(fileCount), "all files passed") 46 assert.Len(t, timeFilteredFiles, 1, "one table") 47 assert.Contains(t, timeFilteredFiles, "evts", "one table") 48 assert.Len(t, timeFilteredFiles["evts"], fileCount) 49 50 // adding extra tables that do not exist should not change results 51 extraTableFiles, totalChecked, passedCheck := metadata.FilterSegmentsByTime(tRange, []string{"evts", "extra-table"}, 0) 52 assert.Equal(t, passedCheck, uint64(fileCount), "all files passed") 53 assert.Equal(t, totalChecked, uint64(fileCount), "all files passed") 54 assert.Len(t, extraTableFiles, 1, "one table") 55 assert.Contains(t, extraTableFiles, "evts", "one table") 56 assert.Len(t, extraTableFiles["evts"], fileCount) 57 58 // no results when no tables are given 59 noTableFiles, totalChecked, passedCheck := metadata.FilterSegmentsByTime(tRange, []string{}, 0) 60 assert.Equal(t, passedCheck, uint64(0), "no tables") 61 assert.Equal(t, totalChecked, uint64(0), "no tables") 62 assert.Len(t, noTableFiles, 0) 63 assert.Len(t, noTableFiles["evts"], 0) 64 } 65 66 func testBloomFilter(t *testing.T, numBlocks int, numEntriesInBlock int, fileCount int) { 67 tRange := &dtu.TimeRange{ 68 StartEpochMs: 0, 69 EndEpochMs: uint64(numEntriesInBlock), 70 } 71 indexNames := []string{"evts"} 72 value1, _ := utils.CreateDtypeEnclosure("value1", 0) 73 baseQuery := &SearchQuery{ 74 ExpressionFilter: &SearchExpression{ 75 LeftSearchInput: &SearchExpressionInput{ColumnName: "key1"}, 76 FilterOp: utils.Equals, 77 RightSearchInput: &SearchExpressionInput{ColumnValue: value1}, 78 }, 79 SearchType: SimpleExpression, 80 } 81 allFiles, _, _ := metadata.FilterSegmentsByTime(tRange, indexNames, 0) 82 ti := InitTableInfo("evts", 0, false) 83 sn := &SearchNode{ 84 AndSearchConditions: &SearchCondition{ 85 SearchQueries: []*SearchQuery{baseQuery}, 86 }, 87 } 88 qInfo, err := InitQueryInformation(sn, nil, tRange, ti, uint64(numBlocks*numEntriesInBlock*fileCount), 5, 1, nil, 0) 89 assert.NoError(t, err) 90 qsrs := convertSegKeysToQueryRequests(qInfo, allFiles) 91 keysToRawSearch, _, _ := filterSegKeysToQueryResults(qInfo, qsrs) 92 93 _, _, isRange := baseQuery.ExtractRangeFilterFromQuery(1) 94 assert.False(t, isRange) 95 96 blockbloomKeywords, wildcard, blockOp := baseQuery.GetAllBlockBloomKeysToSearch() 97 assert.False(t, wildcard) 98 99 assert.Len(t, blockbloomKeywords, 1) 100 assert.Equal(t, blockOp, utils.And) 101 assert.Contains(t, blockbloomKeywords, "value1") 102 assert.Len(t, keysToRawSearch, fileCount, "raw search all keys but got %+v. expected %+v", keysToRawSearch, fileCount) 103 var rangeOp utils.FilterOperator = utils.Equals 104 for _, qsr := range keysToRawSearch { 105 assert.Equal(t, RAW_SEARCH, qsr.sType) 106 blkTracker, err := qsr.GetMicroIndexFilter() 107 assert.NoError(t, err, "no error should occur when getting block tracker") 108 searchRequests, checkedBlocks, matchedBlocks, errs := getAllSearchRequestsFromCmi(baseQuery, tRange, blkTracker, 109 blockbloomKeywords, blockOp, nil, rangeOp, false, wildcard, 0, true, qsr.pqid) 110 assert.Len(t, errs, 0) 111 assert.Len(t, searchRequests, 1, "one file at a time") 112 assert.Equal(t, uint64(numBlocks), checkedBlocks, "checkedBlocks blocks is not as expected") 113 assert.Equal(t, uint64(numBlocks), matchedBlocks, "matchedBlocks blocks is not as expected") 114 for _, sReq := range searchRequests { 115 assert.Len(t, sReq.AllBlocksToSearch, len(sReq.SearchMetadata.BlockSummaries)) 116 } 117 } 118 119 var randomFile string 120 for fileName := range allFiles["evts"] { 121 randomFile = fileName 122 break 123 } 124 log.Infof("Searching for file %s", randomFile) 125 randomFileDTE, _ := utils.CreateDtypeEnclosure(randomFile, 0) 126 fileNameQuery := &SearchQuery{ 127 ExpressionFilter: &SearchExpression{ 128 LeftSearchInput: &SearchExpressionInput{ColumnName: "key10"}, 129 FilterOp: utils.Equals, 130 RightSearchInput: &SearchExpressionInput{ColumnValue: randomFileDTE}, 131 }, 132 SearchType: SimpleExpression, 133 } 134 blockbloomKeywords, wildcard, blockOp = fileNameQuery.GetAllBlockBloomKeysToSearch() 135 assert.False(t, wildcard) 136 assert.Len(t, blockbloomKeywords, 1) 137 assert.Equal(t, blockOp, utils.And) 138 assert.Contains(t, blockbloomKeywords, randomFile) 139 140 assert.Len(t, keysToRawSearch, fileCount, "raw search all keys but got %+v. expected %+v", keysToRawSearch, fileCount) 141 for _, qsr := range keysToRawSearch { 142 assert.Equal(t, RAW_SEARCH, qsr.sType) 143 blkTracker, err := qsr.GetMicroIndexFilter() 144 assert.NoError(t, err, "no error should occur when getting block tracker") 145 searchRequests, checkedBlocks, matchedBlocks, errs := getAllSearchRequestsFromCmi(fileNameQuery, tRange, blkTracker, 146 blockbloomKeywords, blockOp, nil, rangeOp, false, wildcard, 0, true, qsr.pqid) 147 assert.Len(t, errs, 0) 148 assert.Equal(t, uint64(numBlocks), checkedBlocks, "all blocks will be checked") 149 if qsr.segKey == randomFile { 150 assert.Len(t, searchRequests, 1, "file with segKey == %+v should be the only match", qsr.segKey) 151 assert.Equal(t, uint64(numBlocks), matchedBlocks, "a single file with have the right value for key10") 152 for _, sReq := range searchRequests { 153 assert.Len(t, sReq.AllBlocksToSearch, len(sReq.SearchMetadata.BlockSummaries)) 154 } 155 } else { 156 assert.Len(t, searchRequests, 0, "should not generate an ssr with key %+v when looking for %+v", qsr.segKey, randomFile) 157 assert.Equal(t, uint64(0), matchedBlocks, "no matched blocks") 158 } 159 } 160 161 // key7 == batch-1 test 162 batchOne, _ := utils.CreateDtypeEnclosure("batch-1", 0) 163 batchQuery := &SearchQuery{ 164 ExpressionFilter: &SearchExpression{ 165 LeftSearchInput: &SearchExpressionInput{ColumnName: "key7"}, 166 FilterOp: utils.Equals, 167 RightSearchInput: &SearchExpressionInput{ColumnValue: batchOne}, 168 }, 169 SearchType: SimpleExpression, 170 } 171 allFiles, _, _ = metadata.FilterSegmentsByTime(tRange, []string{"evts"}, 0) 172 qsrs = convertSegKeysToQueryRequests(qInfo, allFiles) 173 keysToRawSearch, _, _ = filterSegKeysToQueryResults(qInfo, qsrs) 174 175 blockbloomKeywords, wildcard, blockOp = batchQuery.GetAllBlockBloomKeysToSearch() 176 assert.False(t, wildcard) 177 assert.Len(t, blockbloomKeywords, 1) 178 assert.Equal(t, blockOp, utils.And) 179 assert.Contains(t, blockbloomKeywords, "batch-1") 180 log.Infof("batch query block bloom keys : %v, block op %v", blockbloomKeywords, blockOp) 181 182 assert.Len(t, keysToRawSearch, fileCount, "raw search all keys but got %+v. expected %+v", keysToRawSearch, fileCount) 183 for _, qsr := range keysToRawSearch { 184 assert.Equal(t, RAW_SEARCH, qsr.sType) 185 blkTracker, err := qsr.GetMicroIndexFilter() 186 assert.NoError(t, err, "no error should occur when getting block tracker") 187 searchRequests, checkedBlocks, matchedBlocks, errs := getAllSearchRequestsFromCmi(batchQuery, tRange, blkTracker, 188 blockbloomKeywords, blockOp, nil, rangeOp, false, wildcard, 0, true, qsr.pqid) 189 assert.Len(t, errs, 0) 190 assert.Len(t, searchRequests, 1, "process single request at a time") 191 assert.Equal(t, uint64(numBlocks), checkedBlocks, "each file will should have a single matching block") 192 assert.Equal(t, uint64(1), matchedBlocks, "each file will should have a single matching block") 193 for _, sReq := range searchRequests { 194 assert.Len(t, sReq.AllBlocksToSearch, 1) 195 assert.Contains(t, sReq.AllBlocksToSearch, uint16(1)) 196 } 197 } 198 199 batchWildcardQuery := &SearchQuery{ 200 ExpressionFilter: &SearchExpression{ 201 LeftSearchInput: &SearchExpressionInput{ColumnName: "*"}, 202 FilterOp: utils.Equals, 203 RightSearchInput: &SearchExpressionInput{ColumnValue: batchOne}, 204 }, 205 SearchType: SimpleExpression, 206 } 207 208 // changing col name has no effect on block bloom keys 209 blockbloomKeywords, wildcardValue, blockOp := batchWildcardQuery.GetAllBlockBloomKeysToSearch() 210 assert.False(t, wildcardValue) 211 assert.Len(t, blockbloomKeywords, 1) 212 assert.Equal(t, blockOp, utils.And) 213 assert.Contains(t, blockbloomKeywords, "batch-1") 214 cols, wildcard := batchWildcardQuery.GetAllColumnsInQuery() 215 assert.True(t, wildcard) 216 assert.Len(t, cols, 0) 217 218 for _, qsr := range keysToRawSearch { 219 blkTracker, err := qsr.GetMicroIndexFilter() 220 assert.NoError(t, err, "no error should occur when getting block tracker") 221 searchRequests, checkedBlocks, matchedBlocks, errs := getAllSearchRequestsFromCmi(batchWildcardQuery, tRange, blkTracker, 222 blockbloomKeywords, blockOp, nil, rangeOp, false, wildcardValue, 0, true, qsr.pqid) 223 assert.Len(t, errs, 0) 224 assert.Len(t, searchRequests, 1, "one file at a time key7=batch-1") 225 assert.Equal(t, uint64(numBlocks), checkedBlocks, "each file will should have a single matching block") 226 assert.Equal(t, uint64(1), matchedBlocks, "each file will should have a single matching block") 227 for _, sReq := range searchRequests { 228 assert.Len(t, sReq.AllBlocksToSearch, 1) 229 assert.Contains(t, sReq.AllBlocksToSearch, uint16(1)) 230 } 231 } 232 233 } 234 235 func testRangeFilter(t *testing.T, numBlocks int, numEntriesInBlock int, fileCount int) { 236 tRange := &dtu.TimeRange{ 237 StartEpochMs: 0, 238 EndEpochMs: uint64(numEntriesInBlock), 239 } 240 rangeValue, _ := utils.CreateDtypeEnclosure(int64(0), 0) 241 rangeQuery := &SearchQuery{ 242 ExpressionFilter: &SearchExpression{ 243 LeftSearchInput: &SearchExpressionInput{ColumnName: "key8"}, 244 FilterOp: utils.Equals, 245 RightSearchInput: &SearchExpressionInput{ColumnValue: rangeValue}, 246 }, 247 SearchType: SimpleExpression, 248 } 249 allFiles, _, _ := metadata.FilterSegmentsByTime(tRange, []string{"evts"}, 0) 250 ti := InitTableInfo("evts", 0, false) 251 sn := &SearchNode{ 252 AndSearchConditions: &SearchCondition{ 253 SearchQueries: []*SearchQuery{rangeQuery}, 254 }, 255 } 256 qInfo, err := InitQueryInformation(sn, nil, tRange, ti, uint64(numBlocks*numEntriesInBlock*fileCount), 5, 1, nil, 0) 257 assert.NoError(t, err) 258 qsrs := convertSegKeysToQueryRequests(qInfo, allFiles) 259 keysToRawSearch, _, _ := filterSegKeysToQueryResults(qInfo, qsrs) 260 rangeFilter, rangeOp, isRange := rangeQuery.ExtractRangeFilterFromQuery(1) 261 log.Infof("Extracting range query. Filter %+v, RangeOp %+v", rangeFilter, rangeOp) 262 assert.True(t, isRange) 263 264 for _, qsr := range keysToRawSearch { 265 assert.Equal(t, RAW_SEARCH, qsr.sType) 266 blkTracker, err := qsr.GetMicroIndexFilter() 267 assert.NoError(t, err, "no error should occur when getting block tracker") 268 finalRangeRequests, totalChecked, passedBlocks, errs := getAllSearchRequestsFromCmi(rangeQuery, tRange, blkTracker, 269 nil, utils.And, rangeFilter, rangeOp, true, false, 0, true, qsr.pqid) 270 assert.Len(t, errs, 0) 271 assert.Equal(t, uint64(numBlocks), totalChecked) 272 assert.Equal(t, uint64(1), passedBlocks, "one block in each file matches") 273 for _, sReq := range finalRangeRequests { 274 assert.Len(t, sReq.AllBlocksToSearch, 1) 275 assert.Contains(t, sReq.AllBlocksToSearch, uint16(0)) 276 log.Infof("sReq %+v", sReq.AllBlocksToSearch) 277 } 278 } 279 } 280 281 func getMyIds() []uint64 { 282 myids := make([]uint64, 1) 283 myids[0] = 0 284 return myids 285 } 286 287 func Test_MetadataFilter(t *testing.T) { 288 numBlocks := 5 289 numEntriesInBlock := 10 290 fileCount := 5 291 config.InitializeDefaultConfig() 292 _ = localstorage.InitLocalStorage() 293 limit.InitMemoryLimiter() 294 err := InitQueryNode(getMyIds, serverutils.ExtractKibanaRequests) 295 if err != nil { 296 t.Fatalf("Failed to initialize query node: %v", err) 297 } 298 metadata.InitMockColumnarMetadataStore("data/", fileCount, numBlocks, numEntriesInBlock) 299 testTimeFilter(t, numBlocks, numEntriesInBlock, fileCount) 300 testBloomFilter(t, numBlocks, numEntriesInBlock, fileCount) 301 testRangeFilter(t, numBlocks, numEntriesInBlock, fileCount) 302 303 err = os.RemoveAll("data/") 304 if err != nil { 305 t.Fatalf("Failed to initialize query node: %v", err) 306 } 307 }