github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/startree_test.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package writer 18 19 import ( 20 "bytes" 21 "fmt" 22 "os" 23 "testing" 24 25 jsoniter "github.com/json-iterator/go" 26 "github.com/siglens/siglens/pkg/config" 27 "github.com/siglens/siglens/pkg/segment/pqmr" 28 "github.com/siglens/siglens/pkg/segment/structs" 29 . "github.com/siglens/siglens/pkg/segment/structs" 30 "github.com/siglens/siglens/pkg/segment/utils" 31 "github.com/stretchr/testify/assert" 32 bbp "github.com/valyala/bytebufferpool" 33 ) 34 35 var cases = []struct { 36 input string 37 }{ 38 { 39 `{ 40 "a":"val1", 41 "b":"val1", 42 "c":true, 43 "d":"John", 44 "e": 1, 45 "f": 2 46 }`, 47 }, 48 { 49 `{ 50 "a":"val1", 51 "b":"val1", 52 "c":true, 53 "d":"John", 54 "e": 1, 55 "f": 2 56 }`, 57 }, 58 { 59 `{ 60 "a":"val1", 61 "b":"val1", 62 "c":true, 63 "d":"John", 64 "e": 1, 65 "f": 2 66 }`, 67 }, 68 { 69 `{ 70 "a":"val1", 71 "b":"val1", 72 "c":true, 73 "d":"John", 74 "e": 1, 75 "f": 2 76 }`, 77 }, 78 { 79 `{ 80 "a":"val2", 81 "b":"val3", 82 "c":false, 83 "d":"Paul", 84 "e": 1, 85 "f": 2 86 }`, 87 }, 88 { 89 `{ 90 "a":"val1", 91 "b":"val4", 92 "c":true, 93 "d":"John", 94 "e": 1, 95 "f": 2 96 }`, 97 }, 98 { 99 `{ 100 "a":"val1", 101 "b":"val2", 102 "c":true, 103 "d":"John", 104 "e": 1, 105 "f": 2 106 }`, 107 }, 108 { 109 `{ 110 "a":"val1", 111 "b":"val1", 112 "c":true, 113 "d":"John", 114 "e": 1, 115 "f": 2 116 }`, 117 }, 118 { 119 `{ 120 "a":"wow", 121 "b":"val1", 122 "c":true, 123 "d":"John", 124 "e": 1, 125 "f": 4 126 }`, 127 }, 128 { 129 `{ 130 "a":"val1", 131 "b":"val1", 132 "c":true, 133 "d":"John", 134 "e": 1, 135 "f": 2 136 }`, 137 }, 138 { 139 `{ 140 "a":"val23", 141 "b":"val1", 142 "c":true, 143 "d":"John", 144 "f": 2 145 }`, 146 }, 147 { 148 `{ 149 "a":"val1567", 150 "b":"val1", 151 "c":true, 152 "d":"John", 153 "e": 1, 154 "f": 2 155 }`, 156 }, 157 { 158 `{ 159 "a":"val1", 160 "b":"val1", 161 "c":true, 162 "d":"John", 163 "e": 1, 164 "f": 2 165 }`, 166 }, 167 { 168 `{ 169 "a":"", 170 "b":"val1", 171 "c":true, 172 "d":"John", 173 "e": 1, 174 "f": 2 175 }`, 176 }, 177 { 178 `{ 179 "a":"val1", 180 "b":"val1", 181 "c":true, 182 "d":"John", 183 "f": 2 184 }`, 185 }, 186 { 187 `{ 188 "a":"val1", 189 "b":"val1", 190 "c":true, 191 "d":"John", 192 "f": 2 193 }`, 194 }, 195 } 196 197 /* 198 func checkTree(t *testing.T, node1 *Node, node2 *Node) { 199 assert.Equal(t, node1.aggValues, node2.aggValues) 200 201 for key, child := range node1.children { 202 otherChild, ok := node2.children[key] 203 204 assert.True(t, ok) 205 assert.Equal(t, child.matchedRecordsStartIndex, otherChild.matchedRecordsStartIndex) 206 assert.Equal(t, child.matchedRecordsEndIndex, otherChild.matchedRecordsEndIndex) 207 208 checkTree(t, child, otherChild) 209 } 210 } 211 212 func check(t *testing.T, decTree StarTreeQueryMaker, groupByKeys []string, aggFunctions []*structs.MeasureAggregator, 213 origTree *StarTree) { 214 assert.Equal(t, groupByKeys, decTree.metadata.GroupByKeys) 215 assert.Equal(t, aggFunctions, decTree.metadata.AggFunctions) 216 217 checkTree(t, origTree.Root, decTree.tree.Root) 218 219 assert.Equal(t, origTree.matchedRecordsIndices, decTree.tree.matchedRecordsIndices) 220 } 221 */ 222 223 func TestStarTree(t *testing.T) { 224 rangeIndex = map[string]*structs.Numbers{} 225 226 var blockSummary structs.BlockSummary 227 colWips := make(map[string]*ColWip) 228 wipBlock := WipBlock{ 229 columnBlooms: make(map[string]*BloomIndex), 230 columnRangeIndexes: make(map[string]*RangeIndex), 231 colWips: colWips, 232 pqMatches: make(map[string]*pqmr.PQMatchResults), 233 columnsInBlock: make(map[string]bool), 234 blockSummary: blockSummary, 235 tomRollup: make(map[uint64]*RolledRecs), 236 tohRollup: make(map[uint64]*RolledRecs), 237 todRollup: make(map[uint64]*RolledRecs), 238 bb: bbp.Get(), 239 } 240 segstats := make(map[string]*SegStats) 241 allCols := make(map[string]bool) 242 ss := &SegStore{ 243 wipBlock: wipBlock, 244 SegmentKey: "test-segkey1", 245 AllSeenColumns: allCols, 246 pqTracker: initPQTracker(), 247 AllSst: segstats, 248 numBlocks: 0, 249 } 250 tsKey := config.GetTimeStampKey() 251 for i, test := range cases { 252 253 var record_json map[string]interface{} 254 var json = jsoniter.ConfigCompatibleWithStandardLibrary 255 decoder := json.NewDecoder(bytes.NewReader([]byte(test.input))) 256 decoder.UseNumber() 257 err := decoder.Decode(&record_json) 258 if err != nil { 259 t.Errorf("testid: %d: Failed to parse json err:%v", i+1, err) 260 continue 261 } 262 raw, err := json.Marshal(record_json) 263 assert.NoError(t, err) 264 265 maxIdx, _, err := ss.EncodeColumns(raw, uint64(i), &tsKey, utils.SIGNAL_EVENTS) 266 assert.NoError(t, err) 267 268 ss.wipBlock.maxIdx = maxIdx 269 ss.wipBlock.blockSummary.RecCount += 1 270 } 271 272 groupByCols := []string{"a", "d"} 273 mColNames := []string{"e", "f"} 274 275 var builder StarTreeBuilder 276 for trial := 0; trial < 10; trial += 1 { 277 builder.ResetSegTree(&ss.wipBlock, groupByCols, mColNames) 278 err := builder.ComputeStarTree(&ss.wipBlock) 279 assert.NoError(t, err) 280 root := builder.tree.Root 281 282 _, err = builder.EncodeStarTree(ss.SegmentKey) 283 assert.NoError(t, err) 284 285 // first TotalMeasFns will be for col "e" 286 agSumIdx := 1*(TotalMeasFns) + MeasFnSumIdx 287 assert.Equal(t, root.aggValues[agSumIdx].CVal.(int64), 288 int64(34), 289 fmt.Sprintf("expected sum of 34 for sum of column f; got %d", 290 root.aggValues[agSumIdx].CVal.(int64))) 291 292 } 293 fName := fmt.Sprintf("%v.strl", ss.SegmentKey) 294 _ = os.RemoveAll(fName) 295 fName = fmt.Sprintf("%v.strm", ss.SegmentKey) 296 _ = os.RemoveAll(fName) 297 } 298 299 func TestStarTreeMedium(t *testing.T) { 300 rangeIndex = map[string]*structs.Numbers{} 301 302 var largeCases []struct { 303 input string 304 } 305 306 for i := 0; i < 1000; i += 1 { 307 largeCases = append(largeCases, cases...) 308 } 309 310 currCases := largeCases 311 312 var blockSummary structs.BlockSummary 313 colWips := make(map[string]*ColWip) 314 wipBlock := WipBlock{ 315 columnBlooms: make(map[string]*BloomIndex), 316 columnRangeIndexes: make(map[string]*RangeIndex), 317 colWips: colWips, 318 pqMatches: make(map[string]*pqmr.PQMatchResults), 319 columnsInBlock: make(map[string]bool), 320 blockSummary: blockSummary, 321 tomRollup: make(map[uint64]*RolledRecs), 322 tohRollup: make(map[uint64]*RolledRecs), 323 todRollup: make(map[uint64]*RolledRecs), 324 bb: bbp.Get(), 325 } 326 segstats := make(map[string]*SegStats) 327 allCols := make(map[string]bool) 328 ss := &SegStore{ 329 wipBlock: wipBlock, 330 SegmentKey: "test-segkey2", 331 AllSeenColumns: allCols, 332 pqTracker: initPQTracker(), 333 AllSst: segstats, 334 numBlocks: 0, 335 } 336 tsKey := config.GetTimeStampKey() 337 338 for i, test := range currCases { 339 340 var record_json map[string]interface{} 341 var json = jsoniter.ConfigCompatibleWithStandardLibrary 342 decoder := json.NewDecoder(bytes.NewReader([]byte(test.input))) 343 decoder.UseNumber() 344 err := decoder.Decode(&record_json) 345 if err != nil { 346 t.Errorf("testid: %d: Failed to parse json err:%v", i+1, err) 347 continue 348 } 349 raw, err := json.Marshal(record_json) 350 assert.NoError(t, err) 351 352 maxIdx, _, err := ss.EncodeColumns(raw, uint64(i), &tsKey, utils.SIGNAL_EVENTS) 353 assert.NoError(t, err) 354 355 ss.wipBlock.maxIdx = maxIdx 356 ss.wipBlock.blockSummary.RecCount += 1 357 } 358 359 groupByCols := [...]string{"a", "d"} 360 mColNames := []string{"e", "f"} 361 362 var builder StarTreeBuilder 363 364 for trial := 0; trial < 10; trial += 1 { 365 builder.ResetSegTree(&ss.wipBlock, groupByCols[:], mColNames) 366 err := builder.ComputeStarTree(&ss.wipBlock) 367 assert.NoError(t, err) 368 root := builder.tree.Root 369 370 _, err = builder.EncodeStarTree(ss.SegmentKey) 371 assert.NoError(t, err) 372 373 // first TotalMeasFns will be for col "e" 374 agSumIdx := 1*(TotalMeasFns) + MeasFnSumIdx 375 376 assert.Equal(t, root.aggValues[agSumIdx].CVal.(int64), 377 int64(34*1000), 378 fmt.Sprintf("expected sum of 340000 for sum of column f; got %d", 379 root.aggValues[agSumIdx].CVal.(int64))) 380 } 381 fName := fmt.Sprintf("%v.strl", ss.SegmentKey) 382 _ = os.RemoveAll(fName) 383 fName = fmt.Sprintf("%v.strm", ss.SegmentKey) 384 _ = os.RemoveAll(fName) 385 } 386 387 func TestStarTreeMediumEncoding(t *testing.T) { 388 rangeIndex = map[string]*structs.Numbers{} 389 390 var largeCases []struct { 391 input string 392 } 393 394 for i := 0; i < 50; i += 1 { 395 largeCases = append(largeCases, cases...) 396 } 397 398 currCases := largeCases 399 400 var blockSummary structs.BlockSummary 401 colWips := make(map[string]*ColWip) 402 wipBlock := WipBlock{ 403 columnBlooms: make(map[string]*BloomIndex), 404 columnRangeIndexes: make(map[string]*RangeIndex), 405 colWips: colWips, 406 pqMatches: make(map[string]*pqmr.PQMatchResults), 407 columnsInBlock: make(map[string]bool), 408 blockSummary: blockSummary, 409 tomRollup: make(map[uint64]*RolledRecs), 410 tohRollup: make(map[uint64]*RolledRecs), 411 todRollup: make(map[uint64]*RolledRecs), 412 bb: bbp.Get(), 413 } 414 415 allCols := make(map[string]bool) 416 segstats := make(map[string]*SegStats) 417 ss := &SegStore{ 418 wipBlock: wipBlock, 419 SegmentKey: "test-segkey3", 420 AllSeenColumns: allCols, 421 pqTracker: initPQTracker(), 422 AllSst: segstats, 423 numBlocks: 0, 424 } 425 tsKey := config.GetTimeStampKey() 426 427 for i, test := range currCases { 428 429 var record_json map[string]interface{} 430 var json = jsoniter.ConfigCompatibleWithStandardLibrary 431 decoder := json.NewDecoder(bytes.NewReader([]byte(test.input))) 432 decoder.UseNumber() 433 err := decoder.Decode(&record_json) 434 if err != nil { 435 t.Errorf("testid: %d: Failed to parse json err:%v", i+1, err) 436 continue 437 } 438 raw, err := json.Marshal(record_json) 439 assert.NoError(t, err) 440 441 maxIdx, _, err := ss.EncodeColumns(raw, uint64(i), &tsKey, utils.SIGNAL_EVENTS) 442 assert.NoError(t, err) 443 444 ss.wipBlock.maxIdx = maxIdx 445 ss.wipBlock.blockSummary.RecCount += 1 446 ss.RecordCount++ 447 } 448 449 groupByCols := [...]string{"a", "d"} 450 mColNames := []string{"e", "f"} 451 452 var builder StarTreeBuilder 453 for trial := 0; trial < 10; trial += 1 { 454 builder.ResetSegTree(&ss.wipBlock, groupByCols[:], mColNames) 455 err := builder.ComputeStarTree(&ss.wipBlock) 456 assert.NoError(t, err) 457 root := builder.tree.Root 458 459 _, err = builder.EncodeStarTree(ss.SegmentKey) 460 assert.NoError(t, err) 461 462 // first TotalMeasFns will be for col "e" 463 agSumIdx := 1*(TotalMeasFns) + MeasFnSumIdx 464 assert.Equal(t, root.aggValues[agSumIdx].CVal.(int64), 465 int64(1700), 466 fmt.Sprintf("expected sum of 3400 for sum of column f; got %d", 467 root.aggValues[agSumIdx].CVal.(int64))) 468 469 } 470 fName := fmt.Sprintf("%v.strl", ss.SegmentKey) 471 _ = os.RemoveAll(fName) 472 fName = fmt.Sprintf("%v.strm", ss.SegmentKey) 473 _ = os.RemoveAll(fName) 474 } 475 476 func TestStarTreeMediumEncodingDecoding(t *testing.T) { 477 rangeIndex = map[string]*structs.Numbers{} 478 479 var largeCases []struct { 480 input string 481 } 482 483 for i := 0; i < 50; i += 1 { 484 largeCases = append(largeCases, cases...) 485 } 486 487 currCases := largeCases 488 489 var blockSummary structs.BlockSummary 490 colWips := make(map[string]*ColWip) 491 wipBlock := WipBlock{ 492 columnBlooms: make(map[string]*BloomIndex), 493 columnRangeIndexes: make(map[string]*RangeIndex), 494 colWips: colWips, 495 pqMatches: make(map[string]*pqmr.PQMatchResults), 496 columnsInBlock: make(map[string]bool), 497 blockSummary: blockSummary, 498 tomRollup: make(map[uint64]*RolledRecs), 499 tohRollup: make(map[uint64]*RolledRecs), 500 todRollup: make(map[uint64]*RolledRecs), 501 bb: bbp.Get(), 502 } 503 segstats := make(map[string]*SegStats) 504 allCols := make(map[string]bool) 505 ss := &SegStore{ 506 wipBlock: wipBlock, 507 SegmentKey: "test-segkey4", 508 AllSeenColumns: allCols, 509 pqTracker: initPQTracker(), 510 AllSst: segstats, 511 numBlocks: 0, 512 } 513 tsKey := config.GetTimeStampKey() 514 515 for i, test := range currCases { 516 517 var record_json map[string]interface{} 518 var json = jsoniter.ConfigCompatibleWithStandardLibrary 519 decoder := json.NewDecoder(bytes.NewReader([]byte(test.input))) 520 decoder.UseNumber() 521 err := decoder.Decode(&record_json) 522 if err != nil { 523 t.Errorf("testid: %d: Failed to parse json err:%v", i+1, err) 524 continue 525 } 526 raw, err := json.Marshal(record_json) 527 assert.NoError(t, err) 528 529 maxIdx, _, err := ss.EncodeColumns(raw, uint64(i), &tsKey, utils.SIGNAL_EVENTS) 530 assert.NoError(t, err) 531 532 ss.wipBlock.maxIdx = maxIdx 533 ss.wipBlock.blockSummary.RecCount += 1 534 } 535 536 groupByCols := [...]string{"a", "d"} 537 mColNames := []string{"e", "f"} 538 539 var builder StarTreeBuilder 540 541 for trial := 0; trial < 1; trial += 1 { 542 builder.ResetSegTree(&ss.wipBlock, groupByCols[:], mColNames) 543 err := builder.ComputeStarTree(&ss.wipBlock) 544 assert.NoError(t, err) 545 root := builder.tree.Root 546 547 _, err = builder.EncodeStarTree(ss.SegmentKey) 548 assert.NoError(t, err) 549 550 // first TotalMeasFns will be for col "e" 551 agidx := 1*(TotalMeasFns) + MeasFnSumIdx 552 assert.Equal(t, int64(17*100), root.aggValues[agidx].CVal.(int64), 553 fmt.Sprintf("expected 17000 for sum of column f; got %d", 554 root.aggValues[agidx].CVal.(int64))) 555 556 agidx = 1*(TotalMeasFns) + MeasFnMinIdx 557 assert.Equal(t, int64(2), root.aggValues[agidx].CVal.(int64), 558 fmt.Sprintf("expected 2 for min of column f; got %d", 559 root.aggValues[agidx].CVal.(int64))) 560 561 agidx = 1*(TotalMeasFns) + MeasFnMaxIdx 562 assert.Equal(t, int64(4), root.aggValues[agidx].CVal.(int64), 563 fmt.Sprintf("expected 4 for max of column f; got %d", 564 root.aggValues[agidx].CVal.(int64))) 565 566 agidx = 1*(TotalMeasFns) + MeasFnCountIdx 567 assert.Equal(t, uint64(800), root.aggValues[agidx].CVal.(uint64), 568 fmt.Sprintf("expected 800 for count of column f; got %d", 569 root.aggValues[agidx].CVal.(uint64))) 570 571 } 572 fName := fmt.Sprintf("%v.strl", ss.SegmentKey) 573 _ = os.RemoveAll(fName) 574 fName = fmt.Sprintf("%v.strm", ss.SegmentKey) 575 _ = os.RemoveAll(fName) 576 }