github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/startree_test.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package writer
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"os"
    23  	"testing"
    24  
    25  	jsoniter "github.com/json-iterator/go"
    26  	"github.com/siglens/siglens/pkg/config"
    27  	"github.com/siglens/siglens/pkg/segment/pqmr"
    28  	"github.com/siglens/siglens/pkg/segment/structs"
    29  	. "github.com/siglens/siglens/pkg/segment/structs"
    30  	"github.com/siglens/siglens/pkg/segment/utils"
    31  	"github.com/stretchr/testify/assert"
    32  	bbp "github.com/valyala/bytebufferpool"
    33  )
    34  
    35  var cases = []struct {
    36  	input string
    37  }{
    38  	{
    39  		`{
    40  					"a":"val1",
    41  					"b":"val1",
    42  					"c":true,
    43  					"d":"John",
    44  				   "e": 1,
    45  				   "f": 2
    46  			}`,
    47  	},
    48  	{
    49  		`{
    50  					"a":"val1",
    51  					"b":"val1",
    52  					"c":true,
    53  					"d":"John",
    54  				   "e": 1,
    55  				   "f": 2
    56  			}`,
    57  	},
    58  	{
    59  		`{
    60  					"a":"val1",
    61  					"b":"val1",
    62  					"c":true,
    63  					"d":"John",
    64  				   "e": 1,
    65  				   "f": 2
    66  			}`,
    67  	},
    68  	{
    69  		`{
    70  					"a":"val1",
    71  					"b":"val1",
    72  					"c":true,
    73  					"d":"John",
    74  				   "e": 1,
    75  				   "f": 2
    76  			}`,
    77  	},
    78  	{
    79  		`{
    80  					"a":"val2",
    81  					"b":"val3",
    82  					"c":false,
    83  					"d":"Paul",
    84  				   "e": 1,
    85  				   "f": 2
    86  			}`,
    87  	},
    88  	{
    89  		`{
    90  					"a":"val1",
    91  					"b":"val4",
    92  					"c":true,
    93  					"d":"John",
    94  				   "e": 1,
    95  				   "f": 2
    96  			}`,
    97  	},
    98  	{
    99  		`{
   100  					"a":"val1",
   101  					"b":"val2",
   102  					"c":true,
   103  					"d":"John",
   104  				   "e": 1,
   105  				   "f": 2
   106  			}`,
   107  	},
   108  	{
   109  		`{
   110  					"a":"val1",
   111  					"b":"val1",
   112  					"c":true,
   113  					"d":"John",
   114  				   "e": 1,
   115  				   "f": 2
   116  			}`,
   117  	},
   118  	{
   119  		`{
   120  					"a":"wow",
   121  					"b":"val1",
   122  					"c":true,
   123  					"d":"John",
   124  				   "e": 1,
   125  				   "f": 4
   126  			}`,
   127  	},
   128  	{
   129  		`{
   130  					"a":"val1",
   131  					"b":"val1",
   132  					"c":true,
   133  					"d":"John",
   134  				   "e": 1,
   135  				   "f": 2
   136  			}`,
   137  	},
   138  	{
   139  		`{
   140  					"a":"val23",
   141  					"b":"val1",
   142  					"c":true,
   143  					"d":"John",
   144  				   "f": 2
   145  			}`,
   146  	},
   147  	{
   148  		`{
   149  					"a":"val1567",
   150  					"b":"val1",
   151  					"c":true,
   152  					"d":"John",
   153  				   "e": 1,
   154  				   "f": 2
   155  			}`,
   156  	},
   157  	{
   158  		`{
   159  					"a":"val1",
   160  					"b":"val1",
   161  					"c":true,
   162  					"d":"John",
   163  				   "e": 1,
   164  				   "f": 2
   165  			}`,
   166  	},
   167  	{
   168  		`{
   169  					"a":"",
   170  					"b":"val1",
   171  					"c":true,
   172  					"d":"John",
   173  				   "e": 1,
   174  				   "f": 2
   175  			}`,
   176  	},
   177  	{
   178  		`{
   179  					"a":"val1",
   180  					"b":"val1",
   181  					"c":true,
   182  					"d":"John",
   183  				   "f": 2
   184  			}`,
   185  	},
   186  	{
   187  		`{
   188  					"a":"val1",
   189  					"b":"val1",
   190  					"c":true,
   191  					"d":"John",
   192  				   "f": 2
   193  			}`,
   194  	},
   195  }
   196  
   197  /*
   198  func checkTree(t *testing.T, node1 *Node, node2 *Node) {
   199  	assert.Equal(t, node1.aggValues, node2.aggValues)
   200  
   201  	for key, child := range node1.children {
   202  		otherChild, ok := node2.children[key]
   203  
   204  		assert.True(t, ok)
   205  		assert.Equal(t, child.matchedRecordsStartIndex, otherChild.matchedRecordsStartIndex)
   206  		assert.Equal(t, child.matchedRecordsEndIndex, otherChild.matchedRecordsEndIndex)
   207  
   208  		checkTree(t, child, otherChild)
   209  	}
   210  }
   211  
   212  func check(t *testing.T, decTree StarTreeQueryMaker, groupByKeys []string, aggFunctions []*structs.MeasureAggregator,
   213  	origTree *StarTree) {
   214  	assert.Equal(t, groupByKeys, decTree.metadata.GroupByKeys)
   215  	assert.Equal(t, aggFunctions, decTree.metadata.AggFunctions)
   216  
   217  	checkTree(t, origTree.Root, decTree.tree.Root)
   218  
   219  	assert.Equal(t, origTree.matchedRecordsIndices, decTree.tree.matchedRecordsIndices)
   220  	}
   221  */
   222  
   223  func TestStarTree(t *testing.T) {
   224  	rangeIndex = map[string]*structs.Numbers{}
   225  
   226  	var blockSummary structs.BlockSummary
   227  	colWips := make(map[string]*ColWip)
   228  	wipBlock := WipBlock{
   229  		columnBlooms:       make(map[string]*BloomIndex),
   230  		columnRangeIndexes: make(map[string]*RangeIndex),
   231  		colWips:            colWips,
   232  		pqMatches:          make(map[string]*pqmr.PQMatchResults),
   233  		columnsInBlock:     make(map[string]bool),
   234  		blockSummary:       blockSummary,
   235  		tomRollup:          make(map[uint64]*RolledRecs),
   236  		tohRollup:          make(map[uint64]*RolledRecs),
   237  		todRollup:          make(map[uint64]*RolledRecs),
   238  		bb:                 bbp.Get(),
   239  	}
   240  	segstats := make(map[string]*SegStats)
   241  	allCols := make(map[string]bool)
   242  	ss := &SegStore{
   243  		wipBlock:       wipBlock,
   244  		SegmentKey:     "test-segkey1",
   245  		AllSeenColumns: allCols,
   246  		pqTracker:      initPQTracker(),
   247  		AllSst:         segstats,
   248  		numBlocks:      0,
   249  	}
   250  	tsKey := config.GetTimeStampKey()
   251  	for i, test := range cases {
   252  
   253  		var record_json map[string]interface{}
   254  		var json = jsoniter.ConfigCompatibleWithStandardLibrary
   255  		decoder := json.NewDecoder(bytes.NewReader([]byte(test.input)))
   256  		decoder.UseNumber()
   257  		err := decoder.Decode(&record_json)
   258  		if err != nil {
   259  			t.Errorf("testid: %d: Failed to parse json err:%v", i+1, err)
   260  			continue
   261  		}
   262  		raw, err := json.Marshal(record_json)
   263  		assert.NoError(t, err)
   264  
   265  		maxIdx, _, err := ss.EncodeColumns(raw, uint64(i), &tsKey, utils.SIGNAL_EVENTS)
   266  		assert.NoError(t, err)
   267  
   268  		ss.wipBlock.maxIdx = maxIdx
   269  		ss.wipBlock.blockSummary.RecCount += 1
   270  	}
   271  
   272  	groupByCols := []string{"a", "d"}
   273  	mColNames := []string{"e", "f"}
   274  
   275  	var builder StarTreeBuilder
   276  	for trial := 0; trial < 10; trial += 1 {
   277  		builder.ResetSegTree(&ss.wipBlock, groupByCols, mColNames)
   278  		err := builder.ComputeStarTree(&ss.wipBlock)
   279  		assert.NoError(t, err)
   280  		root := builder.tree.Root
   281  
   282  		_, err = builder.EncodeStarTree(ss.SegmentKey)
   283  		assert.NoError(t, err)
   284  
   285  		// first TotalMeasFns will be for col "e"
   286  		agSumIdx := 1*(TotalMeasFns) + MeasFnSumIdx
   287  		assert.Equal(t, root.aggValues[agSumIdx].CVal.(int64),
   288  			int64(34),
   289  			fmt.Sprintf("expected sum of 34 for sum of column f; got %d",
   290  				root.aggValues[agSumIdx].CVal.(int64)))
   291  
   292  	}
   293  	fName := fmt.Sprintf("%v.strl", ss.SegmentKey)
   294  	_ = os.RemoveAll(fName)
   295  	fName = fmt.Sprintf("%v.strm", ss.SegmentKey)
   296  	_ = os.RemoveAll(fName)
   297  }
   298  
   299  func TestStarTreeMedium(t *testing.T) {
   300  	rangeIndex = map[string]*structs.Numbers{}
   301  
   302  	var largeCases []struct {
   303  		input string
   304  	}
   305  
   306  	for i := 0; i < 1000; i += 1 {
   307  		largeCases = append(largeCases, cases...)
   308  	}
   309  
   310  	currCases := largeCases
   311  
   312  	var blockSummary structs.BlockSummary
   313  	colWips := make(map[string]*ColWip)
   314  	wipBlock := WipBlock{
   315  		columnBlooms:       make(map[string]*BloomIndex),
   316  		columnRangeIndexes: make(map[string]*RangeIndex),
   317  		colWips:            colWips,
   318  		pqMatches:          make(map[string]*pqmr.PQMatchResults),
   319  		columnsInBlock:     make(map[string]bool),
   320  		blockSummary:       blockSummary,
   321  		tomRollup:          make(map[uint64]*RolledRecs),
   322  		tohRollup:          make(map[uint64]*RolledRecs),
   323  		todRollup:          make(map[uint64]*RolledRecs),
   324  		bb:                 bbp.Get(),
   325  	}
   326  	segstats := make(map[string]*SegStats)
   327  	allCols := make(map[string]bool)
   328  	ss := &SegStore{
   329  		wipBlock:       wipBlock,
   330  		SegmentKey:     "test-segkey2",
   331  		AllSeenColumns: allCols,
   332  		pqTracker:      initPQTracker(),
   333  		AllSst:         segstats,
   334  		numBlocks:      0,
   335  	}
   336  	tsKey := config.GetTimeStampKey()
   337  
   338  	for i, test := range currCases {
   339  
   340  		var record_json map[string]interface{}
   341  		var json = jsoniter.ConfigCompatibleWithStandardLibrary
   342  		decoder := json.NewDecoder(bytes.NewReader([]byte(test.input)))
   343  		decoder.UseNumber()
   344  		err := decoder.Decode(&record_json)
   345  		if err != nil {
   346  			t.Errorf("testid: %d: Failed to parse json err:%v", i+1, err)
   347  			continue
   348  		}
   349  		raw, err := json.Marshal(record_json)
   350  		assert.NoError(t, err)
   351  
   352  		maxIdx, _, err := ss.EncodeColumns(raw, uint64(i), &tsKey, utils.SIGNAL_EVENTS)
   353  		assert.NoError(t, err)
   354  
   355  		ss.wipBlock.maxIdx = maxIdx
   356  		ss.wipBlock.blockSummary.RecCount += 1
   357  	}
   358  
   359  	groupByCols := [...]string{"a", "d"}
   360  	mColNames := []string{"e", "f"}
   361  
   362  	var builder StarTreeBuilder
   363  
   364  	for trial := 0; trial < 10; trial += 1 {
   365  		builder.ResetSegTree(&ss.wipBlock, groupByCols[:], mColNames)
   366  		err := builder.ComputeStarTree(&ss.wipBlock)
   367  		assert.NoError(t, err)
   368  		root := builder.tree.Root
   369  
   370  		_, err = builder.EncodeStarTree(ss.SegmentKey)
   371  		assert.NoError(t, err)
   372  
   373  		// first TotalMeasFns will be for col "e"
   374  		agSumIdx := 1*(TotalMeasFns) + MeasFnSumIdx
   375  
   376  		assert.Equal(t, root.aggValues[agSumIdx].CVal.(int64),
   377  			int64(34*1000),
   378  			fmt.Sprintf("expected sum of 340000 for sum of column f; got %d",
   379  				root.aggValues[agSumIdx].CVal.(int64)))
   380  	}
   381  	fName := fmt.Sprintf("%v.strl", ss.SegmentKey)
   382  	_ = os.RemoveAll(fName)
   383  	fName = fmt.Sprintf("%v.strm", ss.SegmentKey)
   384  	_ = os.RemoveAll(fName)
   385  }
   386  
   387  func TestStarTreeMediumEncoding(t *testing.T) {
   388  	rangeIndex = map[string]*structs.Numbers{}
   389  
   390  	var largeCases []struct {
   391  		input string
   392  	}
   393  
   394  	for i := 0; i < 50; i += 1 {
   395  		largeCases = append(largeCases, cases...)
   396  	}
   397  
   398  	currCases := largeCases
   399  
   400  	var blockSummary structs.BlockSummary
   401  	colWips := make(map[string]*ColWip)
   402  	wipBlock := WipBlock{
   403  		columnBlooms:       make(map[string]*BloomIndex),
   404  		columnRangeIndexes: make(map[string]*RangeIndex),
   405  		colWips:            colWips,
   406  		pqMatches:          make(map[string]*pqmr.PQMatchResults),
   407  		columnsInBlock:     make(map[string]bool),
   408  		blockSummary:       blockSummary,
   409  		tomRollup:          make(map[uint64]*RolledRecs),
   410  		tohRollup:          make(map[uint64]*RolledRecs),
   411  		todRollup:          make(map[uint64]*RolledRecs),
   412  		bb:                 bbp.Get(),
   413  	}
   414  
   415  	allCols := make(map[string]bool)
   416  	segstats := make(map[string]*SegStats)
   417  	ss := &SegStore{
   418  		wipBlock:       wipBlock,
   419  		SegmentKey:     "test-segkey3",
   420  		AllSeenColumns: allCols,
   421  		pqTracker:      initPQTracker(),
   422  		AllSst:         segstats,
   423  		numBlocks:      0,
   424  	}
   425  	tsKey := config.GetTimeStampKey()
   426  
   427  	for i, test := range currCases {
   428  
   429  		var record_json map[string]interface{}
   430  		var json = jsoniter.ConfigCompatibleWithStandardLibrary
   431  		decoder := json.NewDecoder(bytes.NewReader([]byte(test.input)))
   432  		decoder.UseNumber()
   433  		err := decoder.Decode(&record_json)
   434  		if err != nil {
   435  			t.Errorf("testid: %d: Failed to parse json err:%v", i+1, err)
   436  			continue
   437  		}
   438  		raw, err := json.Marshal(record_json)
   439  		assert.NoError(t, err)
   440  
   441  		maxIdx, _, err := ss.EncodeColumns(raw, uint64(i), &tsKey, utils.SIGNAL_EVENTS)
   442  		assert.NoError(t, err)
   443  
   444  		ss.wipBlock.maxIdx = maxIdx
   445  		ss.wipBlock.blockSummary.RecCount += 1
   446  		ss.RecordCount++
   447  	}
   448  
   449  	groupByCols := [...]string{"a", "d"}
   450  	mColNames := []string{"e", "f"}
   451  
   452  	var builder StarTreeBuilder
   453  	for trial := 0; trial < 10; trial += 1 {
   454  		builder.ResetSegTree(&ss.wipBlock, groupByCols[:], mColNames)
   455  		err := builder.ComputeStarTree(&ss.wipBlock)
   456  		assert.NoError(t, err)
   457  		root := builder.tree.Root
   458  
   459  		_, err = builder.EncodeStarTree(ss.SegmentKey)
   460  		assert.NoError(t, err)
   461  
   462  		// first TotalMeasFns will be for col "e"
   463  		agSumIdx := 1*(TotalMeasFns) + MeasFnSumIdx
   464  		assert.Equal(t, root.aggValues[agSumIdx].CVal.(int64),
   465  			int64(1700),
   466  			fmt.Sprintf("expected sum of 3400 for sum of column f; got %d",
   467  				root.aggValues[agSumIdx].CVal.(int64)))
   468  
   469  	}
   470  	fName := fmt.Sprintf("%v.strl", ss.SegmentKey)
   471  	_ = os.RemoveAll(fName)
   472  	fName = fmt.Sprintf("%v.strm", ss.SegmentKey)
   473  	_ = os.RemoveAll(fName)
   474  }
   475  
   476  func TestStarTreeMediumEncodingDecoding(t *testing.T) {
   477  	rangeIndex = map[string]*structs.Numbers{}
   478  
   479  	var largeCases []struct {
   480  		input string
   481  	}
   482  
   483  	for i := 0; i < 50; i += 1 {
   484  		largeCases = append(largeCases, cases...)
   485  	}
   486  
   487  	currCases := largeCases
   488  
   489  	var blockSummary structs.BlockSummary
   490  	colWips := make(map[string]*ColWip)
   491  	wipBlock := WipBlock{
   492  		columnBlooms:       make(map[string]*BloomIndex),
   493  		columnRangeIndexes: make(map[string]*RangeIndex),
   494  		colWips:            colWips,
   495  		pqMatches:          make(map[string]*pqmr.PQMatchResults),
   496  		columnsInBlock:     make(map[string]bool),
   497  		blockSummary:       blockSummary,
   498  		tomRollup:          make(map[uint64]*RolledRecs),
   499  		tohRollup:          make(map[uint64]*RolledRecs),
   500  		todRollup:          make(map[uint64]*RolledRecs),
   501  		bb:                 bbp.Get(),
   502  	}
   503  	segstats := make(map[string]*SegStats)
   504  	allCols := make(map[string]bool)
   505  	ss := &SegStore{
   506  		wipBlock:       wipBlock,
   507  		SegmentKey:     "test-segkey4",
   508  		AllSeenColumns: allCols,
   509  		pqTracker:      initPQTracker(),
   510  		AllSst:         segstats,
   511  		numBlocks:      0,
   512  	}
   513  	tsKey := config.GetTimeStampKey()
   514  
   515  	for i, test := range currCases {
   516  
   517  		var record_json map[string]interface{}
   518  		var json = jsoniter.ConfigCompatibleWithStandardLibrary
   519  		decoder := json.NewDecoder(bytes.NewReader([]byte(test.input)))
   520  		decoder.UseNumber()
   521  		err := decoder.Decode(&record_json)
   522  		if err != nil {
   523  			t.Errorf("testid: %d: Failed to parse json err:%v", i+1, err)
   524  			continue
   525  		}
   526  		raw, err := json.Marshal(record_json)
   527  		assert.NoError(t, err)
   528  
   529  		maxIdx, _, err := ss.EncodeColumns(raw, uint64(i), &tsKey, utils.SIGNAL_EVENTS)
   530  		assert.NoError(t, err)
   531  
   532  		ss.wipBlock.maxIdx = maxIdx
   533  		ss.wipBlock.blockSummary.RecCount += 1
   534  	}
   535  
   536  	groupByCols := [...]string{"a", "d"}
   537  	mColNames := []string{"e", "f"}
   538  
   539  	var builder StarTreeBuilder
   540  
   541  	for trial := 0; trial < 1; trial += 1 {
   542  		builder.ResetSegTree(&ss.wipBlock, groupByCols[:], mColNames)
   543  		err := builder.ComputeStarTree(&ss.wipBlock)
   544  		assert.NoError(t, err)
   545  		root := builder.tree.Root
   546  
   547  		_, err = builder.EncodeStarTree(ss.SegmentKey)
   548  		assert.NoError(t, err)
   549  
   550  		// first TotalMeasFns will be for col "e"
   551  		agidx := 1*(TotalMeasFns) + MeasFnSumIdx
   552  		assert.Equal(t, int64(17*100), root.aggValues[agidx].CVal.(int64),
   553  			fmt.Sprintf("expected 17000 for sum of column f; got %d",
   554  				root.aggValues[agidx].CVal.(int64)))
   555  
   556  		agidx = 1*(TotalMeasFns) + MeasFnMinIdx
   557  		assert.Equal(t, int64(2), root.aggValues[agidx].CVal.(int64),
   558  			fmt.Sprintf("expected 2 for min of column f; got %d",
   559  				root.aggValues[agidx].CVal.(int64)))
   560  
   561  		agidx = 1*(TotalMeasFns) + MeasFnMaxIdx
   562  		assert.Equal(t, int64(4), root.aggValues[agidx].CVal.(int64),
   563  			fmt.Sprintf("expected 4 for max of column f; got %d",
   564  				root.aggValues[agidx].CVal.(int64)))
   565  
   566  		agidx = 1*(TotalMeasFns) + MeasFnCountIdx
   567  		assert.Equal(t, uint64(800), root.aggValues[agidx].CVal.(uint64),
   568  			fmt.Sprintf("expected 800 for count of column f; got %d",
   569  				root.aggValues[agidx].CVal.(uint64)))
   570  
   571  	}
   572  	fName := fmt.Sprintf("%v.strl", ss.SegmentKey)
   573  	_ = os.RemoveAll(fName)
   574  	fName = fmt.Sprintf("%v.strm", ss.SegmentKey)
   575  	_ = os.RemoveAll(fName)
   576  }