github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/query/metadatafilter_test.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package query
    18  
    19  import (
    20  	"os"
    21  	"testing"
    22  
    23  	localstorage "github.com/siglens/siglens/pkg/blob/local"
    24  	dtu "github.com/siglens/siglens/pkg/common/dtypeutils"
    25  	"github.com/siglens/siglens/pkg/config"
    26  	"github.com/siglens/siglens/pkg/segment/memory/limit"
    27  	"github.com/siglens/siglens/pkg/segment/query/metadata"
    28  	. "github.com/siglens/siglens/pkg/segment/structs"
    29  	"github.com/siglens/siglens/pkg/segment/utils"
    30  	serverutils "github.com/siglens/siglens/pkg/server/utils"
    31  	log "github.com/sirupsen/logrus"
    32  	"github.com/stretchr/testify/assert"
    33  )
    34  
    35  func testTimeFilter(t *testing.T, numBlocks int, numEntriesInBlock int, fileCount int) {
    36  
    37  	tRange := &dtu.TimeRange{
    38  		StartEpochMs: 0,
    39  		EndEpochMs:   uint64(numEntriesInBlock),
    40  	}
    41  
    42  	timeFilteredFiles, totalChecked, passedCheck := metadata.FilterSegmentsByTime(tRange, []string{"evts"}, 0)
    43  	log.Infof("time filter: %v", timeFilteredFiles)
    44  	assert.Equal(t, passedCheck, uint64(fileCount), "all files passed")
    45  	assert.Equal(t, totalChecked, uint64(fileCount), "all files passed")
    46  	assert.Len(t, timeFilteredFiles, 1, "one table")
    47  	assert.Contains(t, timeFilteredFiles, "evts", "one table")
    48  	assert.Len(t, timeFilteredFiles["evts"], fileCount)
    49  
    50  	// adding extra tables that do not exist should not change results
    51  	extraTableFiles, totalChecked, passedCheck := metadata.FilterSegmentsByTime(tRange, []string{"evts", "extra-table"}, 0)
    52  	assert.Equal(t, passedCheck, uint64(fileCount), "all files passed")
    53  	assert.Equal(t, totalChecked, uint64(fileCount), "all files passed")
    54  	assert.Len(t, extraTableFiles, 1, "one table")
    55  	assert.Contains(t, extraTableFiles, "evts", "one table")
    56  	assert.Len(t, extraTableFiles["evts"], fileCount)
    57  
    58  	// no results when no tables are given
    59  	noTableFiles, totalChecked, passedCheck := metadata.FilterSegmentsByTime(tRange, []string{}, 0)
    60  	assert.Equal(t, passedCheck, uint64(0), "no tables")
    61  	assert.Equal(t, totalChecked, uint64(0), "no tables")
    62  	assert.Len(t, noTableFiles, 0)
    63  	assert.Len(t, noTableFiles["evts"], 0)
    64  }
    65  
    66  func testBloomFilter(t *testing.T, numBlocks int, numEntriesInBlock int, fileCount int) {
    67  	tRange := &dtu.TimeRange{
    68  		StartEpochMs: 0,
    69  		EndEpochMs:   uint64(numEntriesInBlock),
    70  	}
    71  	indexNames := []string{"evts"}
    72  	value1, _ := utils.CreateDtypeEnclosure("value1", 0)
    73  	baseQuery := &SearchQuery{
    74  		ExpressionFilter: &SearchExpression{
    75  			LeftSearchInput:  &SearchExpressionInput{ColumnName: "key1"},
    76  			FilterOp:         utils.Equals,
    77  			RightSearchInput: &SearchExpressionInput{ColumnValue: value1},
    78  		},
    79  		SearchType: SimpleExpression,
    80  	}
    81  	allFiles, _, _ := metadata.FilterSegmentsByTime(tRange, indexNames, 0)
    82  	ti := InitTableInfo("evts", 0, false)
    83  	sn := &SearchNode{
    84  		AndSearchConditions: &SearchCondition{
    85  			SearchQueries: []*SearchQuery{baseQuery},
    86  		},
    87  	}
    88  	qInfo, err := InitQueryInformation(sn, nil, tRange, ti, uint64(numBlocks*numEntriesInBlock*fileCount), 5, 1, nil, 0)
    89  	assert.NoError(t, err)
    90  	qsrs := convertSegKeysToQueryRequests(qInfo, allFiles)
    91  	keysToRawSearch, _, _ := filterSegKeysToQueryResults(qInfo, qsrs)
    92  
    93  	_, _, isRange := baseQuery.ExtractRangeFilterFromQuery(1)
    94  	assert.False(t, isRange)
    95  
    96  	blockbloomKeywords, wildcard, blockOp := baseQuery.GetAllBlockBloomKeysToSearch()
    97  	assert.False(t, wildcard)
    98  
    99  	assert.Len(t, blockbloomKeywords, 1)
   100  	assert.Equal(t, blockOp, utils.And)
   101  	assert.Contains(t, blockbloomKeywords, "value1")
   102  	assert.Len(t, keysToRawSearch, fileCount, "raw search all keys but got %+v. expected %+v", keysToRawSearch, fileCount)
   103  	var rangeOp utils.FilterOperator = utils.Equals
   104  	for _, qsr := range keysToRawSearch {
   105  		assert.Equal(t, RAW_SEARCH, qsr.sType)
   106  		blkTracker, err := qsr.GetMicroIndexFilter()
   107  		assert.NoError(t, err, "no error should occur when getting block tracker")
   108  		searchRequests, checkedBlocks, matchedBlocks, errs := getAllSearchRequestsFromCmi(baseQuery, tRange, blkTracker,
   109  			blockbloomKeywords, blockOp, nil, rangeOp, false, wildcard, 0, true, qsr.pqid)
   110  		assert.Len(t, errs, 0)
   111  		assert.Len(t, searchRequests, 1, "one file at a time")
   112  		assert.Equal(t, uint64(numBlocks), checkedBlocks, "checkedBlocks blocks is not as expected")
   113  		assert.Equal(t, uint64(numBlocks), matchedBlocks, "matchedBlocks blocks is not as expected")
   114  		for _, sReq := range searchRequests {
   115  			assert.Len(t, sReq.AllBlocksToSearch, len(sReq.SearchMetadata.BlockSummaries))
   116  		}
   117  	}
   118  
   119  	var randomFile string
   120  	for fileName := range allFiles["evts"] {
   121  		randomFile = fileName
   122  		break
   123  	}
   124  	log.Infof("Searching for file %s", randomFile)
   125  	randomFileDTE, _ := utils.CreateDtypeEnclosure(randomFile, 0)
   126  	fileNameQuery := &SearchQuery{
   127  		ExpressionFilter: &SearchExpression{
   128  			LeftSearchInput:  &SearchExpressionInput{ColumnName: "key10"},
   129  			FilterOp:         utils.Equals,
   130  			RightSearchInput: &SearchExpressionInput{ColumnValue: randomFileDTE},
   131  		},
   132  		SearchType: SimpleExpression,
   133  	}
   134  	blockbloomKeywords, wildcard, blockOp = fileNameQuery.GetAllBlockBloomKeysToSearch()
   135  	assert.False(t, wildcard)
   136  	assert.Len(t, blockbloomKeywords, 1)
   137  	assert.Equal(t, blockOp, utils.And)
   138  	assert.Contains(t, blockbloomKeywords, randomFile)
   139  
   140  	assert.Len(t, keysToRawSearch, fileCount, "raw search all keys but got %+v. expected %+v", keysToRawSearch, fileCount)
   141  	for _, qsr := range keysToRawSearch {
   142  		assert.Equal(t, RAW_SEARCH, qsr.sType)
   143  		blkTracker, err := qsr.GetMicroIndexFilter()
   144  		assert.NoError(t, err, "no error should occur when getting block tracker")
   145  		searchRequests, checkedBlocks, matchedBlocks, errs := getAllSearchRequestsFromCmi(fileNameQuery, tRange, blkTracker,
   146  			blockbloomKeywords, blockOp, nil, rangeOp, false, wildcard, 0, true, qsr.pqid)
   147  		assert.Len(t, errs, 0)
   148  		assert.Equal(t, uint64(numBlocks), checkedBlocks, "all blocks will be checked")
   149  		if qsr.segKey == randomFile {
   150  			assert.Len(t, searchRequests, 1, "file with segKey == %+v should be the only match", qsr.segKey)
   151  			assert.Equal(t, uint64(numBlocks), matchedBlocks, "a single file with have the right value for key10")
   152  			for _, sReq := range searchRequests {
   153  				assert.Len(t, sReq.AllBlocksToSearch, len(sReq.SearchMetadata.BlockSummaries))
   154  			}
   155  		} else {
   156  			assert.Len(t, searchRequests, 0, "should not generate an ssr with key %+v when looking for %+v", qsr.segKey, randomFile)
   157  			assert.Equal(t, uint64(0), matchedBlocks, "no matched blocks")
   158  		}
   159  	}
   160  
   161  	// key7 == batch-1 test
   162  	batchOne, _ := utils.CreateDtypeEnclosure("batch-1", 0)
   163  	batchQuery := &SearchQuery{
   164  		ExpressionFilter: &SearchExpression{
   165  			LeftSearchInput:  &SearchExpressionInput{ColumnName: "key7"},
   166  			FilterOp:         utils.Equals,
   167  			RightSearchInput: &SearchExpressionInput{ColumnValue: batchOne},
   168  		},
   169  		SearchType: SimpleExpression,
   170  	}
   171  	allFiles, _, _ = metadata.FilterSegmentsByTime(tRange, []string{"evts"}, 0)
   172  	qsrs = convertSegKeysToQueryRequests(qInfo, allFiles)
   173  	keysToRawSearch, _, _ = filterSegKeysToQueryResults(qInfo, qsrs)
   174  
   175  	blockbloomKeywords, wildcard, blockOp = batchQuery.GetAllBlockBloomKeysToSearch()
   176  	assert.False(t, wildcard)
   177  	assert.Len(t, blockbloomKeywords, 1)
   178  	assert.Equal(t, blockOp, utils.And)
   179  	assert.Contains(t, blockbloomKeywords, "batch-1")
   180  	log.Infof("batch query block bloom keys : %v, block op %v", blockbloomKeywords, blockOp)
   181  
   182  	assert.Len(t, keysToRawSearch, fileCount, "raw search all keys but got %+v. expected %+v", keysToRawSearch, fileCount)
   183  	for _, qsr := range keysToRawSearch {
   184  		assert.Equal(t, RAW_SEARCH, qsr.sType)
   185  		blkTracker, err := qsr.GetMicroIndexFilter()
   186  		assert.NoError(t, err, "no error should occur when getting block tracker")
   187  		searchRequests, checkedBlocks, matchedBlocks, errs := getAllSearchRequestsFromCmi(batchQuery, tRange, blkTracker,
   188  			blockbloomKeywords, blockOp, nil, rangeOp, false, wildcard, 0, true, qsr.pqid)
   189  		assert.Len(t, errs, 0)
   190  		assert.Len(t, searchRequests, 1, "process single request at a time")
   191  		assert.Equal(t, uint64(numBlocks), checkedBlocks, "each file will should have a single matching block")
   192  		assert.Equal(t, uint64(1), matchedBlocks, "each file will should have a single matching block")
   193  		for _, sReq := range searchRequests {
   194  			assert.Len(t, sReq.AllBlocksToSearch, 1)
   195  			assert.Contains(t, sReq.AllBlocksToSearch, uint16(1))
   196  		}
   197  	}
   198  
   199  	batchWildcardQuery := &SearchQuery{
   200  		ExpressionFilter: &SearchExpression{
   201  			LeftSearchInput:  &SearchExpressionInput{ColumnName: "*"},
   202  			FilterOp:         utils.Equals,
   203  			RightSearchInput: &SearchExpressionInput{ColumnValue: batchOne},
   204  		},
   205  		SearchType: SimpleExpression,
   206  	}
   207  
   208  	// changing col name has no effect on block bloom keys
   209  	blockbloomKeywords, wildcardValue, blockOp := batchWildcardQuery.GetAllBlockBloomKeysToSearch()
   210  	assert.False(t, wildcardValue)
   211  	assert.Len(t, blockbloomKeywords, 1)
   212  	assert.Equal(t, blockOp, utils.And)
   213  	assert.Contains(t, blockbloomKeywords, "batch-1")
   214  	cols, wildcard := batchWildcardQuery.GetAllColumnsInQuery()
   215  	assert.True(t, wildcard)
   216  	assert.Len(t, cols, 0)
   217  
   218  	for _, qsr := range keysToRawSearch {
   219  		blkTracker, err := qsr.GetMicroIndexFilter()
   220  		assert.NoError(t, err, "no error should occur when getting block tracker")
   221  		searchRequests, checkedBlocks, matchedBlocks, errs := getAllSearchRequestsFromCmi(batchWildcardQuery, tRange, blkTracker,
   222  			blockbloomKeywords, blockOp, nil, rangeOp, false, wildcardValue, 0, true, qsr.pqid)
   223  		assert.Len(t, errs, 0)
   224  		assert.Len(t, searchRequests, 1, "one file at a time key7=batch-1")
   225  		assert.Equal(t, uint64(numBlocks), checkedBlocks, "each file will should have a single matching block")
   226  		assert.Equal(t, uint64(1), matchedBlocks, "each file will should have a single matching block")
   227  		for _, sReq := range searchRequests {
   228  			assert.Len(t, sReq.AllBlocksToSearch, 1)
   229  			assert.Contains(t, sReq.AllBlocksToSearch, uint16(1))
   230  		}
   231  	}
   232  
   233  }
   234  
   235  func testRangeFilter(t *testing.T, numBlocks int, numEntriesInBlock int, fileCount int) {
   236  	tRange := &dtu.TimeRange{
   237  		StartEpochMs: 0,
   238  		EndEpochMs:   uint64(numEntriesInBlock),
   239  	}
   240  	rangeValue, _ := utils.CreateDtypeEnclosure(int64(0), 0)
   241  	rangeQuery := &SearchQuery{
   242  		ExpressionFilter: &SearchExpression{
   243  			LeftSearchInput:  &SearchExpressionInput{ColumnName: "key8"},
   244  			FilterOp:         utils.Equals,
   245  			RightSearchInput: &SearchExpressionInput{ColumnValue: rangeValue},
   246  		},
   247  		SearchType: SimpleExpression,
   248  	}
   249  	allFiles, _, _ := metadata.FilterSegmentsByTime(tRange, []string{"evts"}, 0)
   250  	ti := InitTableInfo("evts", 0, false)
   251  	sn := &SearchNode{
   252  		AndSearchConditions: &SearchCondition{
   253  			SearchQueries: []*SearchQuery{rangeQuery},
   254  		},
   255  	}
   256  	qInfo, err := InitQueryInformation(sn, nil, tRange, ti, uint64(numBlocks*numEntriesInBlock*fileCount), 5, 1, nil, 0)
   257  	assert.NoError(t, err)
   258  	qsrs := convertSegKeysToQueryRequests(qInfo, allFiles)
   259  	keysToRawSearch, _, _ := filterSegKeysToQueryResults(qInfo, qsrs)
   260  	rangeFilter, rangeOp, isRange := rangeQuery.ExtractRangeFilterFromQuery(1)
   261  	log.Infof("Extracting range query. Filter %+v, RangeOp %+v", rangeFilter, rangeOp)
   262  	assert.True(t, isRange)
   263  
   264  	for _, qsr := range keysToRawSearch {
   265  		assert.Equal(t, RAW_SEARCH, qsr.sType)
   266  		blkTracker, err := qsr.GetMicroIndexFilter()
   267  		assert.NoError(t, err, "no error should occur when getting block tracker")
   268  		finalRangeRequests, totalChecked, passedBlocks, errs := getAllSearchRequestsFromCmi(rangeQuery, tRange, blkTracker,
   269  			nil, utils.And, rangeFilter, rangeOp, true, false, 0, true, qsr.pqid)
   270  		assert.Len(t, errs, 0)
   271  		assert.Equal(t, uint64(numBlocks), totalChecked)
   272  		assert.Equal(t, uint64(1), passedBlocks, "one block in each file matches")
   273  		for _, sReq := range finalRangeRequests {
   274  			assert.Len(t, sReq.AllBlocksToSearch, 1)
   275  			assert.Contains(t, sReq.AllBlocksToSearch, uint16(0))
   276  			log.Infof("sReq %+v", sReq.AllBlocksToSearch)
   277  		}
   278  	}
   279  }
   280  
   281  func getMyIds() []uint64 {
   282  	myids := make([]uint64, 1)
   283  	myids[0] = 0
   284  	return myids
   285  }
   286  
   287  func Test_MetadataFilter(t *testing.T) {
   288  	numBlocks := 5
   289  	numEntriesInBlock := 10
   290  	fileCount := 5
   291  	config.InitializeDefaultConfig()
   292  	_ = localstorage.InitLocalStorage()
   293  	limit.InitMemoryLimiter()
   294  	err := InitQueryNode(getMyIds, serverutils.ExtractKibanaRequests)
   295  	if err != nil {
   296  		t.Fatalf("Failed to initialize query node: %v", err)
   297  	}
   298  	metadata.InitMockColumnarMetadataStore("data/", fileCount, numBlocks, numEntriesInBlock)
   299  	testTimeFilter(t, numBlocks, numEntriesInBlock, fileCount)
   300  	testBloomFilter(t, numBlocks, numEntriesInBlock, fileCount)
   301  	testRangeFilter(t, numBlocks, numEntriesInBlock, fileCount)
   302  
   303  	err = os.RemoveAll("data/")
   304  	if err != nil {
   305  		t.Fatalf("Failed to initialize query node: %v", err)
   306  	}
   307  }