github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/segquery_test.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package query 18 19 import ( 20 "os" 21 "testing" 22 "time" 23 24 localstorage "github.com/siglens/siglens/pkg/blob/local" 25 dtu "github.com/siglens/siglens/pkg/common/dtypeutils" 26 "github.com/siglens/siglens/pkg/config" 27 "github.com/siglens/siglens/pkg/instrumentation" 28 "github.com/siglens/siglens/pkg/querytracker" 29 "github.com/siglens/siglens/pkg/segment/memory/limit" 30 "github.com/siglens/siglens/pkg/segment/pqmr" 31 "github.com/siglens/siglens/pkg/segment/query/metadata" 32 "github.com/siglens/siglens/pkg/segment/query/pqs" 33 "github.com/siglens/siglens/pkg/segment/query/summary" 34 "github.com/siglens/siglens/pkg/segment/results/segresults" 35 . "github.com/siglens/siglens/pkg/segment/structs" 36 . "github.com/siglens/siglens/pkg/segment/utils" 37 serverutils "github.com/siglens/siglens/pkg/server/utils" 38 "github.com/stretchr/testify/assert" 39 ) 40 41 func Test_extractRangeFilter(t *testing.T) { 42 43 // 1.0 > col1 44 leftLiteralEncoded, err := CreateDtypeEnclosure(1.0, 0) 45 if err != nil { 46 assert.Fail(t, "failed to encode 1.0", err) 47 } 48 leftInput := &SearchExpressionInput{ 49 ColumnValue: leftLiteralEncoded, 50 } 51 rightInput := &SearchExpressionInput{ 52 ColumnName: "col1", 53 } 54 55 rangeMap, newOp, isValid := ExtractRangeFilterFromSearch(leftInput, GreaterThan, rightInput, 0) 56 assert.True(t, isValid, "valid range as 1.0 can be converted to a float") 57 assert.Equal(t, newOp, LessThan, "Need to reflect to keep column on left: 1.0 > col1 --> col1 < 1.0") 58 assert.Contains(t, rangeMap, "col1") 59 60 _, _, isValid = ExtractRangeFilterFromSearch(leftInput, IsNull, rightInput, 0) 61 assert.False(t, isValid, "Range for isNull operation is unsupported") 62 63 _, _, isValid = ExtractRangeFilterFromSearch(leftInput, IsNotNull, rightInput, 0) 64 assert.False(t, isValid, "Range for isNotNull operation is unsupported") 65 66 abcdLiteralEncoded, err := CreateDtypeEnclosure("abcd", 0) 67 if err != nil { 68 assert.Fail(t, "failed to encode abcd", err) 69 } 70 leftInvalidInput := &SearchExpressionInput{ 71 ColumnValue: abcdLiteralEncoded, 72 } 73 _, _, isValid = ExtractRangeFilterFromSearch(leftInvalidInput, GreaterThan, rightInput, 0) 74 assert.False(t, isValid, "Invalid literal that is not a number") 75 } 76 77 func bloomMetadataFilter(t *testing.T, numBuffers int, numEntriesForBuffer int, fileCount int) { 78 value1, _ := CreateDtypeEnclosure("value1", 0) 79 valueFilter := FilterCriteria{ 80 ExpressionFilter: &ExpressionFilter{ 81 LeftInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnName: "key1"}}}, 82 FilterOperator: Equals, 83 RightInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnValue: value1}}}, 84 }, 85 } 86 timeRange := &dtu.TimeRange{ 87 StartEpochMs: 0, 88 EndEpochMs: uint64(numEntriesForBuffer), 89 } 90 simpleNode := &ASTNode{ 91 AndFilterCondition: &Condition{FilterCriteria: []*FilterCriteria{&valueFilter}}, 92 TimeRange: timeRange, 93 } 94 searchNode := ConvertASTNodeToSearchNode(simpleNode, 0) 95 ti := InitTableInfo("evts", 0, false) 96 queryInfo, err := InitQueryInformation(searchNode, nil, timeRange, ti, uint64(numEntriesForBuffer*numBuffers*fileCount), 97 4, 0, nil, 0) 98 assert.NoError(t, err) 99 allQuerySegKeys, rawCount, _, pqsCount, err := getAllSegmentsInQuery(queryInfo, false, time.Now(), 0) 100 assert.NoError(t, err) 101 assert.Len(t, allQuerySegKeys, fileCount) 102 assert.Equal(t, rawCount, uint64(fileCount)) 103 assert.Equal(t, pqsCount, uint64(0)) 104 105 summary := &summary.QuerySummary{} 106 for _, qsr := range allQuerySegKeys { 107 assert.Equal(t, RAW_SEARCH, qsr.sType) 108 toSearch, err := qsr.GetMicroIndexFilter() 109 assert.NoError(t, err) 110 allSearchReq := ExtractSSRFromSearchNode(searchNode, toSearch, timeRange, ti.GetQueryTables(), summary, 2, true, queryInfo.pqid) 111 assert.Len(t, allSearchReq, 1) 112 // all blocks have key1==value1 113 for key, value := range allSearchReq { 114 assert.Equal(t, value.SegmentKey, key) 115 assert.NotNil(t, value.SearchMetadata) 116 assert.NotNil(t, value.SearchMetadata.BlockSummaries) 117 } 118 } 119 120 batchOne, _ := CreateDtypeEnclosure("batch-1", 0) 121 batchFilter := FilterCriteria{ 122 ExpressionFilter: &ExpressionFilter{ 123 LeftInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnName: "key1"}}}, 124 FilterOperator: Equals, 125 RightInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnValue: batchOne}}}, 126 }, 127 } 128 simpleNode = &ASTNode{ 129 AndFilterCondition: &Condition{FilterCriteria: []*FilterCriteria{&batchFilter}}, 130 TimeRange: timeRange, 131 } 132 searchNode = ConvertASTNodeToSearchNode(simpleNode, 0) 133 queryInfo, err = InitQueryInformation(searchNode, nil, timeRange, ti, uint64(numEntriesForBuffer*numBuffers*fileCount), 134 4, 1, nil, 0) 135 assert.NoError(t, err) 136 allQuerySegKeys, rawCount, _, pqsCount, err = getAllSegmentsInQuery(queryInfo, false, time.Now(), 0) 137 assert.NoError(t, err) 138 assert.Len(t, allQuerySegKeys, fileCount) 139 assert.Equal(t, rawCount, uint64(fileCount)) 140 assert.Equal(t, pqsCount, uint64(0)) 141 for _, qsr := range allQuerySegKeys { 142 assert.Equal(t, RAW_SEARCH, qsr.sType) 143 toSearch, err := qsr.GetMicroIndexFilter() 144 assert.NoError(t, err) 145 allSearchReq := ExtractSSRFromSearchNode(searchNode, toSearch, timeRange, ti.GetQueryTables(), summary, 2, true, queryInfo.pqid) 146 assert.Len(t, allSearchReq, 0, "key1=batch-1 never exists, it only exists for key6") 147 148 } 149 150 batchFilter = FilterCriteria{ 151 ExpressionFilter: &ExpressionFilter{ 152 LeftInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnName: "key7"}}}, 153 FilterOperator: Equals, 154 RightInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnValue: batchOne}}}, 155 }, 156 } 157 simpleNode = &ASTNode{ 158 AndFilterCondition: &Condition{FilterCriteria: []*FilterCriteria{&batchFilter}}, 159 TimeRange: timeRange, 160 } 161 searchNode = ConvertASTNodeToSearchNode(simpleNode, 0) 162 queryInfo, err = InitQueryInformation(searchNode, nil, timeRange, ti, uint64(numEntriesForBuffer*numBuffers*fileCount), 163 4, 2, nil, 0) 164 assert.NoError(t, err) 165 allQuerySegKeys, rawCount, _, pqsCount, err = getAllSegmentsInQuery(queryInfo, false, time.Now(), 0) 166 assert.NoError(t, err) 167 assert.Len(t, allQuerySegKeys, fileCount) 168 assert.Equal(t, rawCount, uint64(fileCount)) 169 assert.Equal(t, pqsCount, uint64(0)) 170 for _, qsr := range allQuerySegKeys { 171 assert.Equal(t, RAW_SEARCH, qsr.sType) 172 toSearch, err := qsr.GetMicroIndexFilter() 173 assert.NoError(t, err) 174 allSearchReq := ExtractSSRFromSearchNode(searchNode, toSearch, timeRange, ti.GetQueryTables(), summary, 2, true, queryInfo.pqid) 175 assert.Len(t, allSearchReq, 1, "key7 will have batch-1 in only one block") 176 for key, value := range allSearchReq { 177 assert.Equal(t, value.SegmentKey, key) 178 assert.NotNil(t, value.SearchMetadata) 179 assert.Len(t, value.AllBlocksToSearch, 1, "key7 will have batch-1 in only one block") 180 assert.NotNil(t, value.SearchMetadata.BlockSummaries) 181 assert.Contains(t, value.AllBlocksToSearch, uint16(1)) 182 } 183 184 } 185 } 186 187 func rangeMetadataFilter(t *testing.T, numBuffers int, numEntriesForBuffer int, fileCount int) { 188 ti := InitTableInfo("evts", 0, false) 189 zeroValue, _ := CreateDtypeEnclosure(0, 0) 190 valueFilter := FilterCriteria{ 191 ExpressionFilter: &ExpressionFilter{ 192 LeftInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnName: "key8"}}}, 193 FilterOperator: GreaterThan, 194 RightInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnValue: zeroValue}}}, 195 }, 196 } 197 timeRange := &dtu.TimeRange{ 198 StartEpochMs: 0, 199 EndEpochMs: uint64(numEntriesForBuffer), 200 } 201 simpleNode := &ASTNode{ 202 AndFilterCondition: &Condition{FilterCriteria: []*FilterCriteria{&valueFilter}}, 203 TimeRange: timeRange, 204 } 205 searchNode := ConvertASTNodeToSearchNode(simpleNode, 0) 206 queryInfo, err := InitQueryInformation(searchNode, nil, timeRange, ti, uint64(numEntriesForBuffer*numBuffers*fileCount), 207 4, 2, nil, 0) 208 assert.NoError(t, err) 209 allQuerySegKeys, rawCount, _, pqsCount, err := getAllSegmentsInQuery(queryInfo, false, time.Now(), 0) 210 assert.NoError(t, err) 211 assert.Len(t, allQuerySegKeys, fileCount) 212 assert.Equal(t, rawCount, uint64(fileCount)) 213 assert.Equal(t, pqsCount, uint64(0)) 214 215 summary := &summary.QuerySummary{} 216 for _, qsr := range allQuerySegKeys { 217 assert.Equal(t, RAW_SEARCH, qsr.sType) 218 toSearch, err := qsr.GetMicroIndexFilter() 219 assert.NoError(t, err) 220 allSearchReq := ExtractSSRFromSearchNode(searchNode, toSearch, timeRange, ti.GetQueryTables(), summary, 2, true, queryInfo.pqid) 221 assert.Len(t, allSearchReq, 1, "shouldve generated 1 SSR") 222 for key, value := range allSearchReq { 223 assert.Equal(t, value.SegmentKey, key) 224 assert.NotNil(t, value.SearchMetadata) 225 assert.NotNil(t, value.SearchMetadata.BlockSummaries) 226 assert.Len(t, value.AllBlocksToSearch, numBuffers-1, "match all except for block 0") 227 assert.NotContains(t, value.AllBlocksToSearch, uint16(0)) 228 } 229 } 230 231 valueFilter = FilterCriteria{ 232 ExpressionFilter: &ExpressionFilter{ 233 LeftInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnName: "key8"}}}, 234 FilterOperator: Equals, 235 RightInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnValue: zeroValue}}}, 236 }, 237 } 238 simpleNode = &ASTNode{ 239 AndFilterCondition: &Condition{FilterCriteria: []*FilterCriteria{&valueFilter}}, 240 TimeRange: timeRange, 241 } 242 searchNode = ConvertASTNodeToSearchNode(simpleNode, 0) 243 queryInfo, err = InitQueryInformation(searchNode, nil, timeRange, ti, uint64(numEntriesForBuffer*numBuffers*fileCount), 244 4, 2, nil, 0) 245 assert.NoError(t, err) 246 allQuerySegKeys, rawCount, _, pqsCount, err = getAllSegmentsInQuery(queryInfo, false, time.Now(), 0) 247 assert.NoError(t, err) 248 assert.Len(t, allQuerySegKeys, fileCount) 249 assert.Equal(t, rawCount, uint64(fileCount)) 250 assert.Equal(t, pqsCount, uint64(0)) 251 252 for _, qsr := range allQuerySegKeys { 253 assert.Equal(t, RAW_SEARCH, qsr.sType) 254 toSearch, err := qsr.GetMicroIndexFilter() 255 assert.NoError(t, err) 256 allSearchReq := ExtractSSRFromSearchNode(searchNode, toSearch, timeRange, ti.GetQueryTables(), summary, 2, true, queryInfo.pqid) 257 assert.Len(t, allSearchReq, 1, "shouldve generated 1 SSR") 258 for key, value := range allSearchReq { 259 assert.Equal(t, value.SegmentKey, key) 260 // only block 0 should match, but bc blooms are random, there is a non-zero chance another block will pass. 261 // it is unlikely for >1 to pass, but technically it is possible, so this test is on the generous side 262 assert.Less(t, len(value.AllBlocksToSearch), numBuffers/2) 263 assert.Contains(t, value.AllBlocksToSearch, uint16(0)) 264 assert.NotNil(t, value.SearchMetadata) 265 assert.NotNil(t, value.SearchMetadata.BlockSummaries) 266 } 267 } 268 269 valueFilter = FilterCriteria{ 270 ExpressionFilter: &ExpressionFilter{ 271 LeftInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnName: "key8"}}}, 272 FilterOperator: LessThan, 273 RightInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnValue: zeroValue}}}, 274 }, 275 } 276 simpleNode = &ASTNode{ 277 AndFilterCondition: &Condition{FilterCriteria: []*FilterCriteria{&valueFilter}}, 278 TimeRange: timeRange, 279 } 280 searchNode = ConvertASTNodeToSearchNode(simpleNode, 0) 281 queryInfo, err = InitQueryInformation(searchNode, nil, timeRange, ti, uint64(numEntriesForBuffer*numBuffers*fileCount), 282 4, 2, nil, 0) 283 assert.NoError(t, err) 284 allQuerySegKeys, rawCount, _, pqsCount, err = getAllSegmentsInQuery(queryInfo, false, time.Now(), 0) 285 assert.NoError(t, err) 286 assert.Len(t, allQuerySegKeys, fileCount) 287 assert.Equal(t, rawCount, uint64(fileCount)) 288 assert.Equal(t, pqsCount, uint64(0)) 289 for _, qsr := range allQuerySegKeys { 290 assert.Equal(t, RAW_SEARCH, qsr.sType) 291 toSearch, err := qsr.GetMicroIndexFilter() 292 assert.NoError(t, err) 293 allSearchReq := ExtractSSRFromSearchNode(searchNode, toSearch, timeRange, ti.GetQueryTables(), summary, 2, true, queryInfo.pqid) 294 assert.Len(t, allSearchReq, 0, "no blocks have <0") 295 } 296 } 297 298 func pqsSegQuery(t *testing.T, numBuffers int, numEntriesForBuffer int, fileCount int) { 299 config.SetPQSEnabled(true) 300 301 // new generate mock rotated with pqs for subset of blocks 302 // make sure raw search actually does raw search for the blocks not in sqpmr 303 ti := InitTableInfo("evts", 0, false) 304 fullTimeRange := &dtu.TimeRange{ 305 StartEpochMs: 0, 306 EndEpochMs: uint64(numEntriesForBuffer), 307 } 308 zero, _ := CreateDtypeEnclosure("record-batch-0", 0) 309 valueFilter := FilterCriteria{ 310 ExpressionFilter: &ExpressionFilter{ 311 LeftInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnName: "key11"}}}, 312 FilterOperator: Equals, 313 RightInput: &FilterInput{Expression: &Expression{LeftInput: &ExpressionInput{ColumnValue: zero}}}, 314 }, 315 } 316 simpleNode := &ASTNode{ 317 AndFilterCondition: &Condition{FilterCriteria: []*FilterCriteria{&valueFilter}}, 318 TimeRange: fullTimeRange, 319 } 320 searchNode := ConvertASTNodeToSearchNode(simpleNode, 0) 321 322 allPossibleKeys, finalCount, totalCount := metadata.FilterSegmentsByTime(fullTimeRange, ti.GetQueryTables(), 0) 323 assert.Equal(t, len(allPossibleKeys), 1) 324 assert.Contains(t, allPossibleKeys, "evts") 325 assert.Len(t, allPossibleKeys["evts"], fileCount) 326 assert.Equal(t, finalCount, totalCount) 327 assert.Equal(t, finalCount, uint64(fileCount)) 328 329 pqid := querytracker.GetHashForQuery(searchNode) 330 for tName, segKeys := range allPossibleKeys { 331 for segKey := range segKeys { 332 spqmr := pqmr.InitSegmentPQMResults() 333 currSPQMRFile := segKey + "/pqmr/" + pqid + ".pqmr" 334 for blkNum := 0; blkNum < numBuffers; blkNum++ { 335 if blkNum%2 == 0 { 336 continue // force raw search of even blocks 337 } 338 currPQMR := pqmr.CreatePQMatchResults(uint(numEntriesForBuffer)) 339 for recNum := 0; recNum < numEntriesForBuffer; recNum++ { 340 if recNum%2 == 0 { 341 currPQMR.AddMatchedRecord(uint(recNum)) 342 } 343 } 344 spqmr.SetBlockResults(uint16(blkNum), currPQMR) 345 err := currPQMR.FlushPqmr(&currSPQMRFile, uint16(blkNum)) 346 assert.Nil(t, err, "no error on flush") 347 } 348 pqs.AddPersistentQueryResult(segKey, tName, pqid) 349 } 350 } 351 querySummary := summary.InitQuerySummary(summary.LOGS, 1) 352 sizeLimit := uint64(numBuffers * numEntriesForBuffer * fileCount) 353 allSegFileResults, err := segresults.InitSearchResults(sizeLimit, nil, RRCCmd, 4) 354 assert.Nil(t, err, "no error on init") 355 queryInfo, err := InitQueryInformation(searchNode, nil, fullTimeRange, ti, uint64(numEntriesForBuffer*numBuffers*fileCount), 356 4, 2, nil, 0) 357 assert.NoError(t, err) 358 querySegmentRequests, numRawSearchKeys, _, numPQSKeys, err := getAllSegmentsInQuery(queryInfo, false, time.Now(), 0) 359 assert.NoError(t, err) 360 assert.Len(t, querySegmentRequests, fileCount, "each file has a query segment request") 361 assert.Equal(t, uint64(0), numRawSearchKeys) 362 assert.Equal(t, uint64(fileCount), numPQSKeys) 363 364 for _, qsr := range querySegmentRequests { 365 assert.Equal(t, PQS, qsr.sType) 366 err := applyFilterOperatorSingleRequest(qsr, allSegFileResults, querySummary) 367 assert.NoError(t, err) 368 assert.Equal(t, RAW_SEARCH, qsr.sType, "changed type to raw search after pqs filtering") 369 assert.NotNil(t, qsr.blkTracker, "added blkTacker after pqs filtering") 370 fullBlkTracker, err := qsr.GetMicroIndexFilter() 371 assert.NoError(t, err) 372 assert.Contains(t, fullBlkTracker, "evts", "pqs raw search table") 373 assert.Len(t, fullBlkTracker["evts"], 1) 374 assert.Contains(t, fullBlkTracker["evts"], qsr.segKey, "resulting map should be map[tableName]->map[segKey]->blkTracker") 375 for _, blkTracker := range fullBlkTracker["evts"] { 376 for i := uint16(0); i < uint16(numBuffers); i++ { 377 if i%2 == 0 { 378 assert.True(t, blkTracker.ShouldProcessBlock(i), "Block %+v should be raw searched", i) 379 } else { 380 assert.False(t, blkTracker.ShouldProcessBlock(i), "Block %+v should not be raw searched", i) 381 } 382 } 383 } 384 385 ssrForMissingPQS := ExtractSSRFromSearchNode(searchNode, fullBlkTracker, fullTimeRange, ti.GetQueryTables(), querySummary, 1, true, pqid) 386 assert.Len(t, ssrForMissingPQS, 1, "generate SSR one file at a time") 387 for _, ssr := range ssrForMissingPQS { 388 for i := uint16(0); i < uint16(numBuffers); i++ { 389 if i%2 == 0 { 390 assert.Contains(t, ssr.AllBlocksToSearch, i) 391 } else { 392 assert.NotContains(t, ssr.AllBlocksToSearch, i) 393 } 394 } 395 assert.NotNil(t, ssr.SearchMetadata) 396 assert.NotNil(t, ssr.SearchMetadata.BlockSummaries) 397 assert.Len(t, ssr.SearchMetadata.BlockSummaries, numBuffers) 398 } 399 } 400 qc := InitQueryContextWithTableInfo(ti, sizeLimit, 0, 0, false) 401 // run a single query end to end 402 nodeRes := ApplyFilterOperator(simpleNode, fullTimeRange, nil, 5, qc) 403 assert.NotNil(t, nodeRes) 404 assert.Len(t, nodeRes.ErrList, 0, "no errors") 405 expectedCount := uint64((numBuffers*numEntriesForBuffer)/2) * uint64(fileCount) 406 assert.Equal(t, expectedCount, nodeRes.TotalResults.TotalCount, "match using pqmr & not") 407 assert.Equal(t, Equals, nodeRes.TotalResults.Op, "no early exit") 408 } 409 410 func Test_segQueryFilter(t *testing.T) { 411 numBuffers := 5 412 numEntriesForBuffer := 10 413 fileCount := 5 414 instrumentation.InitMetrics() 415 _ = localstorage.InitLocalStorage() 416 config.InitializeTestingConfig() 417 limit.InitMemoryLimiter() 418 err := InitQueryNode(getMyIds, serverutils.ExtractKibanaRequests) 419 if err != nil { 420 t.Fatalf("Failed to initialize query node: %v", err) 421 } 422 metadata.InitMockColumnarMetadataStore("data/", fileCount, numBuffers, numEntriesForBuffer) 423 424 bloomMetadataFilter(t, numBuffers, numEntriesForBuffer, fileCount) 425 rangeMetadataFilter(t, numBuffers, numEntriesForBuffer, fileCount) 426 pqsSegQuery(t, numBuffers, numEntriesForBuffer, fileCount) 427 // add more simple, complex, and nested metadata checking 428 time.Sleep(1 * time.Second) // sleep to give some time for background pqs threads to write out dirs 429 err = os.RemoveAll("data/") 430 assert.Nil(t, err) 431 }