github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/segread/agiletreereader_test.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package segread
    18  
    19  /*
    20  	"encoding/json"
    21  	"fmt"
    22  	"math/rand"
    23  	"testing"
    24  
    25  	"github.com/siglens/siglens/pkg/config"
    26  	"github.com/siglens/siglens/pkg/segment/pqmr"
    27  	"github.com/siglens/siglens/pkg/segment/structs"
    28  	"github.com/siglens/siglens/pkg/segment/utils"
    29  	"github.com/siglens/siglens/pkg/segment/writer"
    30  	log "github.com/sirupsen/logrus"
    31  	"github.com/stretchr/testify/assert"
    32  	bbp "github.com/valyala/bytebufferpool"
    33  */
    34  
    35  /*
    36     // todo decoding this test since we are not sure yet if we will support filters via agileTree
    37  
    38  func Test_StartTreeColumnFilter(t *testing.T) {
    39  	allCols := make(map[string]bool)
    40  	segstats := make(map[string]*structs.SegStats)
    41  
    42  	wipBlock := WipBlock{
    43  		columnBlooms:       make(map[string]*writer.BloomIndex),
    44  		columnRangeIndexes: make(map[string]*RangeIndex),
    45  		colWips:            make(map[string]*ColWip),
    46  		pqMatches:          make(map[string]*pqmr.PQMatchResults),
    47  		columnsInBlock:     make(map[string]bool),
    48  		tomRollup:          make(map[uint64]*RolledRecs),
    49  		tohRollup:          make(map[uint64]*RolledRecs),
    50  		todRollup:          make(map[uint64]*RolledRecs),
    51  		bb:                 bbp.Get(),
    52  		blockTs:            make([]uint64, 0),
    53  	}
    54  	segStore := &SegStore{
    55  		wipBlock:       wipBlock,
    56  		SegmentKey:     "test-segkey",
    57  		AllSeenColumns: allCols,
    58  		pqTracker:      initPQTracker(),
    59  		AllSst:         segstats,
    60  		numBlocks:      0,
    61  	}
    62  
    63  	entryCount := uint16(16_000)
    64  	tsKey := config.GetTimeStampKey()
    65  	for i := uint16(0); i < entryCount; i++ {
    66  		entry := make(map[string]interface{})
    67  		entry["key1"] = "match words 123 abc"
    68  		entry["key2"] = "value1"
    69  		entry["key3"] = i
    70  		if i%2 == 0 {
    71  			entry["key4"] = "even"
    72  		} else {
    73  			entry["key4"] = "odd"
    74  		}
    75  		entry["key5"] = fmt.Sprintf("batch-%v", rand.Intn(10))
    76  		entry["key6"] = rand.Int()
    77  
    78  		timestp := uint64(i) + 1 // dont start with 0 as timestamp
    79  		raw, _ := json.Marshal(entry)
    80  		_, _, err := segStore.EncodeColumns(raw, timestp, &tsKey)
    81  		assert.NoError(t, err)
    82  		segStore.wipBlock.blockSummary.RecCount += 1
    83  	}
    84  
    85  	groupByCols := []string{"key2", "key4", "key5"}
    86  	aggFunctions := make([]*structs.MeasureAggregator, 0)
    87  
    88  	for _, col := range []string{"key3", "key6"} {
    89  		for _, fun := range []utils.AggregateFunctions{utils.Sum, utils.Min, utils.Max} {
    90  			aggFunctions = append(aggFunctions, &structs.MeasureAggregator{MeasureCol: col, MeasureFunc: fun})
    91  		}
    92  	}
    93  
    94  	even, _ := utils.CreateDtypeEnclosure("even", 0)
    95  	evenQuery := &structs.SearchQuery{
    96  		ExpressionFilter: &structs.SearchExpression{
    97  			LeftSearchInput:  &structs.SearchExpressionInput{ColumnName: "key4"},
    98  			FilterOp:         utils.Equals,
    99  			RightSearchInput: &structs.SearchExpressionInput{ColumnValue: even},
   100  		},
   101  		SearchType: structs.SimpleExpression,
   102  	}
   103  	evenQuery.GetQueryInfo()
   104  	var builder StarTreeBuilder
   105  
   106  	expected := make([]uint16, entryCount/2)
   107  	idx := 0
   108  	for i := uint16(0); i < entryCount; i += 2 {
   109  		expected[idx] = i
   110  		idx++
   111  	}
   112  
   113  	for i := 0; i < 100; i++ {
   114  		rand.Shuffle(len(groupByCols), func(i, j int) { groupByCols[i], groupByCols[j] = groupByCols[j], groupByCols[i] })
   115  		log.Infof("iteration %+v using groupby cols %+v", i, groupByCols)
   116  		builder.Reset(&segStore.wipBlock, groupByCols)
   117  		result := builder.ComputeStarTree(&segStore.wipBlock, groupByCols, aggFunctions)
   118  		data, err := builder.EncodeStarTree(&segStore.wipBlock, &result, groupByCols, aggFunctions)
   119  		assert.Nil(t, err)
   120  		decoded, err := DecodeStarTree(data)
   121  		assert.Nil(t, err)
   122  		check(t, *decoded, groupByCols, aggFunctions, &result)
   123  		retVal, err := decoded.ApplyColumnFilter(evenQuery)
   124  		log.Infof("iteration %+v has %+v results", i, len(retVal))
   125  		assert.Equal(t, decoded.metadata.GroupByKeys, groupByCols)
   126  		assert.Nil(t, err)
   127  		assert.Equal(t, uint16(len(retVal)), entryCount/2)
   128  		sort.Slice(retVal, func(i, j int) bool { return retVal[i] < retVal[j] })
   129  		assert.Equal(t, retVal, expected)
   130  	}
   131  	}
   132  
   133  
   134  func Test_StartTreeGroupBy(t *testing.T) {
   135  	allCols := make(map[string]bool)
   136  	segstats := make(map[string]*structs.SegStats)
   137  
   138  	wipBlock := writer.WipBlock{
   139  		columnBlooms:       make(map[string]*writer.BloomIndex),
   140  		columnRangeIndexes: make(map[string]*writer.RangeIndex),
   141  		colWips:            make(map[string]*writer.ColWip),
   142  		pqMatches:          make(map[string]*pqmr.PQMatchResults),
   143  		columnsInBlock:     make(map[string]bool),
   144  		tomRollup:          make(map[uint64]*RolledRecs),
   145  		tohRollup:          make(map[uint64]*RolledRecs),
   146  		todRollup:          make(map[uint64]*RolledRecs),
   147  		bb:                 bbp.Get(),
   148  		blockTs:            make([]uint64, 0),
   149  	}
   150  	segStore := &SegStore{
   151  		wipBlock:       wipBlock,
   152  		SegmentKey:     "test-segkey",
   153  		AllSeenColumns: allCols,
   154  		pqTracker:      initPQTracker(),
   155  		AllSst:         segstats,
   156  		numBlocks:      0,
   157  	}
   158  
   159  	entryCount := 16_000
   160  	tsKey := config.GetTimeStampKey()
   161  	for i := 0; i < entryCount; i++ {
   162  		entry := make(map[string]interface{})
   163  		entry["key1"] = "match words 123 abc"
   164  		entry["key2"] = "value1"
   165  		entry["key3"] = i
   166  		if i%2 == 0 {
   167  			entry["key4"] = "even"
   168  		} else {
   169  			entry["key4"] = "odd"
   170  		}
   171  		entry["key5"] = fmt.Sprintf("batch-%v", rand.Intn(10))
   172  		entry["key6"] = rand.Int()
   173  
   174  		timestp := uint64(i) + 1 // dont start with 0 as timestamp
   175  		raw, _ := json.Marshal(entry)
   176  		_, _, err := segStore.EncodeColumns(raw, timestp, &tsKey)
   177  		assert.NoError(t, err)
   178  		segStore.wipBlock.blockSummary.RecCount += 1
   179  	}
   180  
   181  	groupByCols := []string{"key2", "key4", "key5"}
   182  	aggFunctions := make([]*structs.MeasureAggregator, 0)
   183  
   184  	for _, col := range []string{"key3", "key6"} {
   185  		for _, fun := range []utils.AggregateFunctions{utils.Sum, utils.Min, utils.Max} {
   186  			aggFunctions = append(aggFunctions, &structs.MeasureAggregator{MeasureCol: col, MeasureFunc: fun})
   187  		}
   188  	}
   189  
   190  	grpByCols := []string{"key4"}
   191  	measureOps := []*structs.MeasureAggregator{
   192  		{MeasureCol: "key3", MeasureFunc: utils.Min},
   193  		{MeasureCol: "key3", MeasureFunc: utils.Max},
   194  		{MeasureCol: "key3", MeasureFunc: utils.Sum},
   195  	}
   196  	grpByRequest := &structs.GroupByRequest{MeasureOperations: measureOps, GroupByColumns: grpByCols}
   197  
   198  	var builder StarTreeBuilder
   199  
   200  	oddSum := int64(0)
   201  	evenSum := int64(0)
   202  	for i := int64(0); i < int64(entryCount); i++ {
   203  		if i%2 == 0 {
   204  			evenSum += i
   205  		} else {
   206  			oddSum += i
   207  		}
   208  	}
   209  
   210  	for i := 0; i < 100; i++ {
   211  		rand.Shuffle(len(aggFunctions), func(i, j int) { aggFunctions[i], aggFunctions[j] = aggFunctions[j], aggFunctions[i] })
   212  		log.Infof("iteration %+v using agg fns cols %+v", i, aggFunctions)
   213  		builder.Reset(&segStore.wipBlock, groupByCols)
   214  		result := builder.ComputeStarTree(&segStore.wipBlock, groupByCols, aggFunctions)
   215  		_, err := builder.EncodeStarTree(&segStore.wipBlock, &result, groupByCols, aggFunctions)
   216  		assert.Nil(t, err)
   217  
   218  
   219  		// todo write UTs to have a str.ReadMeta and compare the decoded treeMeta.
   220  		// todo write a just-in-time decoder to see if the aggvalues that are returned are accurate
   221  		// we need to first go through each block and write this block the .str file
   222  		// and create blocksummary as you encode each tree and then pass it to str.InitNewAgileTreeReader
   223  		// use the WriteMockSegFile to create a segfile
   224  
   225  
   226  		decoded, err := DecodeStarTree(data)
   227  		assert.Nil(t, err)
   228  		check(t, *decoded, groupByCols, aggFunctions, &result)
   229  		retVal, err := decoded.ApplyGroupBy(grpByRequest)
   230  		assert.Equal(t, decoded.metadata.GroupByKeys, groupByCols)
   231  		assert.Nil(t, err)
   232  		assert.Len(t, retVal, 2, "key4 has 2 unique values")
   233  		assert.Contains(t, retVal, "even")
   234  		assert.Contains(t, retVal, "odd")
   235  
   236  		evenAggs := retVal["even"]
   237  		assert.Len(t, evenAggs, len(measureOps))
   238  		assert.Equal(t, evenAggs[0].CVal.(int64), int64(0), "min is 0")
   239  		assert.Equal(t, evenAggs[1].CVal.(int64), int64(entryCount-2))
   240  		assert.Equal(t, evenAggs[2].CVal.(int64), evenSum, "sum must be greater than max")
   241  
   242  		oddAggs := retVal["odd"]
   243  		assert.Len(t, oddAggs, len(measureOps))
   244  		assert.Equal(t, oddAggs[0].CVal.(int64), int64(1), "min is 1")
   245  		assert.Equal(t, oddAggs[1].CVal.(int64), int64(entryCount-1))
   246  		assert.Equal(t, oddAggs[2].CVal.(int64), oddSum, "sum must be greater than max")
   247  
   248  	}
   249  }
   250  */