github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/segread/agiletreereader_test.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package segread 18 19 /* 20 "encoding/json" 21 "fmt" 22 "math/rand" 23 "testing" 24 25 "github.com/siglens/siglens/pkg/config" 26 "github.com/siglens/siglens/pkg/segment/pqmr" 27 "github.com/siglens/siglens/pkg/segment/structs" 28 "github.com/siglens/siglens/pkg/segment/utils" 29 "github.com/siglens/siglens/pkg/segment/writer" 30 log "github.com/sirupsen/logrus" 31 "github.com/stretchr/testify/assert" 32 bbp "github.com/valyala/bytebufferpool" 33 */ 34 35 /* 36 // todo decoding this test since we are not sure yet if we will support filters via agileTree 37 38 func Test_StartTreeColumnFilter(t *testing.T) { 39 allCols := make(map[string]bool) 40 segstats := make(map[string]*structs.SegStats) 41 42 wipBlock := WipBlock{ 43 columnBlooms: make(map[string]*writer.BloomIndex), 44 columnRangeIndexes: make(map[string]*RangeIndex), 45 colWips: make(map[string]*ColWip), 46 pqMatches: make(map[string]*pqmr.PQMatchResults), 47 columnsInBlock: make(map[string]bool), 48 tomRollup: make(map[uint64]*RolledRecs), 49 tohRollup: make(map[uint64]*RolledRecs), 50 todRollup: make(map[uint64]*RolledRecs), 51 bb: bbp.Get(), 52 blockTs: make([]uint64, 0), 53 } 54 segStore := &SegStore{ 55 wipBlock: wipBlock, 56 SegmentKey: "test-segkey", 57 AllSeenColumns: allCols, 58 pqTracker: initPQTracker(), 59 AllSst: segstats, 60 numBlocks: 0, 61 } 62 63 entryCount := uint16(16_000) 64 tsKey := config.GetTimeStampKey() 65 for i := uint16(0); i < entryCount; i++ { 66 entry := make(map[string]interface{}) 67 entry["key1"] = "match words 123 abc" 68 entry["key2"] = "value1" 69 entry["key3"] = i 70 if i%2 == 0 { 71 entry["key4"] = "even" 72 } else { 73 entry["key4"] = "odd" 74 } 75 entry["key5"] = fmt.Sprintf("batch-%v", rand.Intn(10)) 76 entry["key6"] = rand.Int() 77 78 timestp := uint64(i) + 1 // dont start with 0 as timestamp 79 raw, _ := json.Marshal(entry) 80 _, _, err := segStore.EncodeColumns(raw, timestp, &tsKey) 81 assert.NoError(t, err) 82 segStore.wipBlock.blockSummary.RecCount += 1 83 } 84 85 groupByCols := []string{"key2", "key4", "key5"} 86 aggFunctions := make([]*structs.MeasureAggregator, 0) 87 88 for _, col := range []string{"key3", "key6"} { 89 for _, fun := range []utils.AggregateFunctions{utils.Sum, utils.Min, utils.Max} { 90 aggFunctions = append(aggFunctions, &structs.MeasureAggregator{MeasureCol: col, MeasureFunc: fun}) 91 } 92 } 93 94 even, _ := utils.CreateDtypeEnclosure("even", 0) 95 evenQuery := &structs.SearchQuery{ 96 ExpressionFilter: &structs.SearchExpression{ 97 LeftSearchInput: &structs.SearchExpressionInput{ColumnName: "key4"}, 98 FilterOp: utils.Equals, 99 RightSearchInput: &structs.SearchExpressionInput{ColumnValue: even}, 100 }, 101 SearchType: structs.SimpleExpression, 102 } 103 evenQuery.GetQueryInfo() 104 var builder StarTreeBuilder 105 106 expected := make([]uint16, entryCount/2) 107 idx := 0 108 for i := uint16(0); i < entryCount; i += 2 { 109 expected[idx] = i 110 idx++ 111 } 112 113 for i := 0; i < 100; i++ { 114 rand.Shuffle(len(groupByCols), func(i, j int) { groupByCols[i], groupByCols[j] = groupByCols[j], groupByCols[i] }) 115 log.Infof("iteration %+v using groupby cols %+v", i, groupByCols) 116 builder.Reset(&segStore.wipBlock, groupByCols) 117 result := builder.ComputeStarTree(&segStore.wipBlock, groupByCols, aggFunctions) 118 data, err := builder.EncodeStarTree(&segStore.wipBlock, &result, groupByCols, aggFunctions) 119 assert.Nil(t, err) 120 decoded, err := DecodeStarTree(data) 121 assert.Nil(t, err) 122 check(t, *decoded, groupByCols, aggFunctions, &result) 123 retVal, err := decoded.ApplyColumnFilter(evenQuery) 124 log.Infof("iteration %+v has %+v results", i, len(retVal)) 125 assert.Equal(t, decoded.metadata.GroupByKeys, groupByCols) 126 assert.Nil(t, err) 127 assert.Equal(t, uint16(len(retVal)), entryCount/2) 128 sort.Slice(retVal, func(i, j int) bool { return retVal[i] < retVal[j] }) 129 assert.Equal(t, retVal, expected) 130 } 131 } 132 133 134 func Test_StartTreeGroupBy(t *testing.T) { 135 allCols := make(map[string]bool) 136 segstats := make(map[string]*structs.SegStats) 137 138 wipBlock := writer.WipBlock{ 139 columnBlooms: make(map[string]*writer.BloomIndex), 140 columnRangeIndexes: make(map[string]*writer.RangeIndex), 141 colWips: make(map[string]*writer.ColWip), 142 pqMatches: make(map[string]*pqmr.PQMatchResults), 143 columnsInBlock: make(map[string]bool), 144 tomRollup: make(map[uint64]*RolledRecs), 145 tohRollup: make(map[uint64]*RolledRecs), 146 todRollup: make(map[uint64]*RolledRecs), 147 bb: bbp.Get(), 148 blockTs: make([]uint64, 0), 149 } 150 segStore := &SegStore{ 151 wipBlock: wipBlock, 152 SegmentKey: "test-segkey", 153 AllSeenColumns: allCols, 154 pqTracker: initPQTracker(), 155 AllSst: segstats, 156 numBlocks: 0, 157 } 158 159 entryCount := 16_000 160 tsKey := config.GetTimeStampKey() 161 for i := 0; i < entryCount; i++ { 162 entry := make(map[string]interface{}) 163 entry["key1"] = "match words 123 abc" 164 entry["key2"] = "value1" 165 entry["key3"] = i 166 if i%2 == 0 { 167 entry["key4"] = "even" 168 } else { 169 entry["key4"] = "odd" 170 } 171 entry["key5"] = fmt.Sprintf("batch-%v", rand.Intn(10)) 172 entry["key6"] = rand.Int() 173 174 timestp := uint64(i) + 1 // dont start with 0 as timestamp 175 raw, _ := json.Marshal(entry) 176 _, _, err := segStore.EncodeColumns(raw, timestp, &tsKey) 177 assert.NoError(t, err) 178 segStore.wipBlock.blockSummary.RecCount += 1 179 } 180 181 groupByCols := []string{"key2", "key4", "key5"} 182 aggFunctions := make([]*structs.MeasureAggregator, 0) 183 184 for _, col := range []string{"key3", "key6"} { 185 for _, fun := range []utils.AggregateFunctions{utils.Sum, utils.Min, utils.Max} { 186 aggFunctions = append(aggFunctions, &structs.MeasureAggregator{MeasureCol: col, MeasureFunc: fun}) 187 } 188 } 189 190 grpByCols := []string{"key4"} 191 measureOps := []*structs.MeasureAggregator{ 192 {MeasureCol: "key3", MeasureFunc: utils.Min}, 193 {MeasureCol: "key3", MeasureFunc: utils.Max}, 194 {MeasureCol: "key3", MeasureFunc: utils.Sum}, 195 } 196 grpByRequest := &structs.GroupByRequest{MeasureOperations: measureOps, GroupByColumns: grpByCols} 197 198 var builder StarTreeBuilder 199 200 oddSum := int64(0) 201 evenSum := int64(0) 202 for i := int64(0); i < int64(entryCount); i++ { 203 if i%2 == 0 { 204 evenSum += i 205 } else { 206 oddSum += i 207 } 208 } 209 210 for i := 0; i < 100; i++ { 211 rand.Shuffle(len(aggFunctions), func(i, j int) { aggFunctions[i], aggFunctions[j] = aggFunctions[j], aggFunctions[i] }) 212 log.Infof("iteration %+v using agg fns cols %+v", i, aggFunctions) 213 builder.Reset(&segStore.wipBlock, groupByCols) 214 result := builder.ComputeStarTree(&segStore.wipBlock, groupByCols, aggFunctions) 215 _, err := builder.EncodeStarTree(&segStore.wipBlock, &result, groupByCols, aggFunctions) 216 assert.Nil(t, err) 217 218 219 // todo write UTs to have a str.ReadMeta and compare the decoded treeMeta. 220 // todo write a just-in-time decoder to see if the aggvalues that are returned are accurate 221 // we need to first go through each block and write this block the .str file 222 // and create blocksummary as you encode each tree and then pass it to str.InitNewAgileTreeReader 223 // use the WriteMockSegFile to create a segfile 224 225 226 decoded, err := DecodeStarTree(data) 227 assert.Nil(t, err) 228 check(t, *decoded, groupByCols, aggFunctions, &result) 229 retVal, err := decoded.ApplyGroupBy(grpByRequest) 230 assert.Equal(t, decoded.metadata.GroupByKeys, groupByCols) 231 assert.Nil(t, err) 232 assert.Len(t, retVal, 2, "key4 has 2 unique values") 233 assert.Contains(t, retVal, "even") 234 assert.Contains(t, retVal, "odd") 235 236 evenAggs := retVal["even"] 237 assert.Len(t, evenAggs, len(measureOps)) 238 assert.Equal(t, evenAggs[0].CVal.(int64), int64(0), "min is 0") 239 assert.Equal(t, evenAggs[1].CVal.(int64), int64(entryCount-2)) 240 assert.Equal(t, evenAggs[2].CVal.(int64), evenSum, "sum must be greater than max") 241 242 oddAggs := retVal["odd"] 243 assert.Len(t, oddAggs, len(measureOps)) 244 assert.Equal(t, oddAggs[0].CVal.(int64), int64(1), "min is 1") 245 assert.Equal(t, oddAggs[1].CVal.(int64), int64(entryCount-1)) 246 assert.Equal(t, oddAggs[2].CVal.(int64), oddSum, "sum must be greater than max") 247 248 } 249 } 250 */