github.com/parquet-go/parquet-go@v0.20.0/encoding/encoding_test.go (about) 1 package encoding_test 2 3 import ( 4 "bytes" 5 "io" 6 "math" 7 "math/bits" 8 "math/rand" 9 "testing" 10 "time" 11 12 "github.com/parquet-go/parquet-go/deprecated" 13 "github.com/parquet-go/parquet-go/encoding" 14 "github.com/parquet-go/parquet-go/encoding/bitpacked" 15 "github.com/parquet-go/parquet-go/encoding/bytestreamsplit" 16 "github.com/parquet-go/parquet-go/encoding/delta" 17 "github.com/parquet-go/parquet-go/encoding/plain" 18 "github.com/parquet-go/parquet-go/encoding/rle" 19 "github.com/parquet-go/parquet-go/internal/unsafecast" 20 ) 21 22 func repeatInt64(seq []int64, n int) []int64 { 23 rep := make([]int64, len(seq)*n) 24 for i := 0; i < n; i++ { 25 copy(rep[i*len(seq):], seq) 26 } 27 return rep 28 } 29 30 var booleanTests = [...][]bool{ 31 {}, 32 {true}, 33 {false}, 34 {true, false, true, false, true, true, true, false, false, true}, 35 { // repeating 32x 36 true, true, true, true, true, true, true, true, 37 true, true, true, true, true, true, true, true, 38 true, true, true, true, true, true, true, true, 39 true, true, true, true, true, true, true, true, 40 }, 41 { // repeating 33x 42 true, true, true, true, true, true, true, true, 43 true, true, true, true, true, true, true, true, 44 true, true, true, true, true, true, true, true, 45 true, true, true, true, true, true, true, true, 46 true, 47 }, 48 { // alternating 15x 49 false, true, false, true, false, true, false, true, 50 false, true, false, true, false, true, false, 51 }, 52 { // alternating 16x 53 false, true, false, true, false, true, false, true, 54 false, true, false, true, false, true, false, true, 55 }, 56 } 57 58 var levelsTests = [...][]byte{ 59 {}, 60 {0}, 61 {1}, 62 {0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt8, math.MaxInt8, 0}, 63 { // repeating 24x 64 42, 42, 42, 42, 42, 42, 42, 42, 65 42, 42, 42, 42, 42, 42, 42, 42, 66 42, 42, 42, 42, 42, 42, 42, 42, 67 }, 68 { // never repeating 69 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 70 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 71 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 72 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 73 }, 74 { // streaks of repeating values 75 0, 0, 0, 0, 1, 1, 1, 1, 76 2, 2, 2, 2, 3, 3, 3, 3, 77 4, 4, 4, 4, 5, 5, 5, 5, 78 6, 6, 6, 7, 7, 7, 8, 8, 79 8, 9, 9, 9, 80 }, 81 } 82 83 var int32Tests = [...][]int32{ 84 {}, 85 {0}, 86 {1}, 87 {-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt32, math.MaxInt32, 0}, 88 { // repeating 24x 89 42, 42, 42, 42, 42, 42, 42, 42, 90 42, 42, 42, 42, 42, 42, 42, 42, 91 42, 42, 42, 42, 42, 42, 42, 42, 92 }, 93 { // never repeating 94 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 95 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 96 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 97 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 98 }, 99 { // streaks of repeating values 100 0, 0, 0, 0, 1, 1, 1, 1, 101 2, 2, 2, 2, 3, 3, 3, 3, 102 4, 4, 4, 4, 5, 5, 5, 5, 103 6, 6, 6, 7, 7, 7, 8, 8, 104 8, 9, 9, 9, 105 }, 106 { // a sequence that triggered a bug in the delta binary packed encoding 107 24, 36, 47, 32, 29, 4, 9, 20, 2, 18, 108 }, 109 } 110 111 var int64Tests = [...][]int64{ 112 {}, 113 {0}, 114 {1}, 115 {-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt64, math.MaxInt64, 0}, 116 { // repeating 24x 117 42, 42, 42, 42, 42, 42, 42, 42, 118 42, 42, 42, 42, 42, 42, 42, 42, 119 42, 42, 42, 42, 42, 42, 42, 42, 120 }, 121 { // never repeating 122 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 123 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 124 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 125 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 126 }, 127 { // streaks of repeating values 128 0, 0, 0, 0, 1, 1, 1, 1, 129 2, 2, 2, 2, 3, 3, 3, 3, 130 4, 4, 4, 4, 5, 5, 5, 5, 131 6, 6, 6, 7, 7, 7, 8, 8, 132 8, 9, 9, 9, 133 }, 134 { // streaks of repeating values 135 0, 0, 0, 0, 1, 1, 1, 1, 136 2, 2, 2, 2, 3, 3, 3, 3, 137 4, 4, 4, 4, 5, 5, 5, 5, 138 6, 6, 6, 7, 7, 7, 8, 8, 139 8, 9, 9, 9, 140 }, 141 repeatInt64( // a sequence resulting in 64 bits words in the delta binary packed encoding 142 []int64{ 143 math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64, 144 math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64, 145 146 0, math.MaxInt64, math.MinInt64, math.MaxInt64, 147 math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64, 148 }, 149 5, 150 ), 151 } 152 153 var int96Tests = [...][]deprecated.Int96{ 154 {}, 155 {{0: 0}}, 156 {{0: 1}}, 157 } 158 159 var floatTests = [...][]float32{ 160 {}, 161 {0}, 162 {1}, 163 {0, 1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat32, math.MaxFloat32, 0}, 164 {-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat32, math.MaxFloat32, 0}, 165 } 166 167 var doubleTests = [...][]float64{ 168 {}, 169 {0}, 170 {1}, 171 {-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat64, math.MaxFloat64, 0}, 172 } 173 174 var byteArrayTests = [...][][]byte{ 175 {}, 176 {[]byte("")}, 177 {[]byte("A"), []byte("B"), []byte("C")}, 178 {[]byte("hello world!"), bytes.Repeat([]byte("1234567890"), 100)}, 179 } 180 181 var fixedLenByteArrayTests = [...]struct { 182 size int 183 data []byte 184 }{ 185 {size: 1, data: []byte("")}, 186 {size: 1, data: []byte("ABCDEFGH")}, 187 {size: 2, data: []byte("ABCDEFGH")}, 188 {size: 4, data: []byte("ABCDEFGH")}, 189 {size: 8, data: []byte("ABCDEFGH")}, 190 {size: 10, data: bytes.Repeat([]byte("123456789"), 100)}, 191 {size: 16, data: bytes.Repeat([]byte("1234567890"), 160)}, 192 } 193 194 var encodings = [...]encoding.Encoding{ 195 new(plain.Encoding), 196 new(rle.Encoding), 197 new(bitpacked.Encoding), 198 new(plain.DictionaryEncoding), 199 new(rle.DictionaryEncoding), 200 new(delta.BinaryPackedEncoding), 201 new(delta.LengthByteArrayEncoding), 202 new(delta.ByteArrayEncoding), 203 new(bytestreamsplit.Encoding), 204 } 205 206 func TestEncoding(t *testing.T) { 207 for _, encoding := range encodings { 208 t.Run(encoding.String(), func(t *testing.T) { testEncoding(t, encoding) }) 209 } 210 } 211 212 func testEncoding(t *testing.T, e encoding.Encoding) { 213 for _, test := range [...]struct { 214 scenario string 215 function func(*testing.T, encoding.Encoding) 216 }{ 217 { 218 scenario: "boolean", 219 function: testBooleanEncoding, 220 }, 221 222 { 223 scenario: "levels", 224 function: testLevelsEncoding, 225 }, 226 227 { 228 scenario: "int32", 229 function: testInt32Encoding, 230 }, 231 232 { 233 scenario: "int64", 234 function: testInt64Encoding, 235 }, 236 237 { 238 scenario: "int96", 239 function: testInt96Encoding, 240 }, 241 242 { 243 scenario: "float", 244 function: testFloatEncoding, 245 }, 246 247 { 248 scenario: "double", 249 function: testDoubleEncoding, 250 }, 251 252 { 253 scenario: "byte array", 254 function: testByteArrayEncoding, 255 }, 256 257 { 258 scenario: "fixed length byte array", 259 function: testFixedLenByteArrayEncoding, 260 }, 261 } { 262 t.Run(test.scenario, func(t *testing.T) { test.function(t, e) }) 263 } 264 } 265 266 func setBitWidth(enc encoding.Encoding, bitWidth int) { 267 switch e := enc.(type) { 268 case *rle.Encoding: 269 e.BitWidth = bitWidth 270 case *bitpacked.Encoding: 271 e.BitWidth = bitWidth 272 } 273 } 274 275 type encodingFunc func(encoding.Encoding, []byte, []byte) ([]byte, error) 276 277 func testBooleanEncoding(t *testing.T, e encoding.Encoding) { 278 testCanEncodeBoolean(t, e) 279 buffer := []byte{} 280 values := []byte{} 281 input := []byte{} 282 setBitWidth(e, 1) 283 284 for _, test := range booleanTests { 285 t.Run("", func(t *testing.T) { 286 var err error 287 288 input = input[:0] 289 count := 0 290 for _, value := range test { 291 input = plain.AppendBoolean(input, count, value) 292 count++ 293 } 294 295 buffer, err = e.EncodeBoolean(buffer, input) 296 assertNoError(t, err) 297 values, err = e.DecodeBoolean(values, buffer) 298 assertNoError(t, err) 299 assertEqualBytes(t, input, values) 300 }) 301 } 302 } 303 304 func testLevelsEncoding(t *testing.T, e encoding.Encoding) { 305 testCanEncodeLevels(t, e) 306 buffer := []byte{} 307 values := []byte{} 308 309 for _, input := range levelsTests { 310 setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(input))) 311 312 t.Run("", func(t *testing.T) { 313 var err error 314 buffer, err = e.EncodeLevels(buffer, input) 315 assertNoError(t, err) 316 values, err = e.DecodeLevels(values, buffer) 317 assertNoError(t, err) 318 assertEqualBytes(t, input, values[:len(input)]) 319 }) 320 } 321 } 322 323 func testInt32Encoding(t *testing.T, e encoding.Encoding) { 324 testCanEncodeInt32(t, e) 325 buffer := []byte{} 326 values := []int32{} 327 328 for _, input := range int32Tests { 329 setBitWidth(e, maxLenInt32(input)) 330 331 t.Run("", func(t *testing.T) { 332 var err error 333 buffer, err = e.EncodeInt32(buffer, input) 334 assertNoError(t, err) 335 values, err = e.DecodeInt32(values, buffer) 336 assertNoError(t, err) 337 assertEqualInt32(t, input, values) 338 }) 339 } 340 } 341 342 func testInt64Encoding(t *testing.T, e encoding.Encoding) { 343 testCanEncodeInt64(t, e) 344 buffer := []byte{} 345 values := []int64{} 346 347 for _, input := range int64Tests { 348 setBitWidth(e, maxLenInt64(input)) 349 350 t.Run("", func(t *testing.T) { 351 var err error 352 buffer, err = e.EncodeInt64(buffer, input) 353 assertNoError(t, err) 354 values, err = e.DecodeInt64(values, buffer) 355 assertNoError(t, err) 356 assertEqualInt64(t, input, values) 357 }) 358 } 359 } 360 361 func testInt96Encoding(t *testing.T, e encoding.Encoding) { 362 testCanEncodeInt96(t, e) 363 buffer := []byte{} 364 values := []deprecated.Int96{} 365 366 for _, input := range int96Tests { 367 t.Run("", func(t *testing.T) { 368 var err error 369 buffer, err = e.EncodeInt96(buffer, input) 370 assertNoError(t, err) 371 values, err = e.DecodeInt96(values, buffer) 372 assertNoError(t, err) 373 assertEqualInt96(t, input, values) 374 }) 375 } 376 } 377 378 func testFloatEncoding(t *testing.T, e encoding.Encoding) { 379 testCanEncodeFloat(t, e) 380 buffer := []byte{} 381 values := []float32{} 382 383 for _, input := range floatTests { 384 t.Run("", func(t *testing.T) { 385 var err error 386 buffer, err = e.EncodeFloat(buffer, input) 387 assertNoError(t, err) 388 values, err = e.DecodeFloat(values, buffer) 389 assertNoError(t, err) 390 assertEqualFloat32(t, input, values) 391 }) 392 } 393 } 394 395 func testDoubleEncoding(t *testing.T, e encoding.Encoding) { 396 testCanEncodeDouble(t, e) 397 buffer := []byte{} 398 values := []float64{} 399 400 for _, input := range doubleTests { 401 t.Run("", func(t *testing.T) { 402 var err error 403 buffer, err = e.EncodeDouble(buffer, input) 404 assertNoError(t, err) 405 values, err = e.DecodeDouble(values, buffer) 406 assertNoError(t, err) 407 assertEqualFloat64(t, input, values) 408 }) 409 } 410 } 411 412 func testByteArrayEncoding(t *testing.T, e encoding.Encoding) { 413 testCanEncodeByteArray(t, e) 414 input := []byte{} 415 buffer := []byte{} 416 values := []byte{} 417 offsets := []uint32{} 418 419 for _, test := range byteArrayTests { 420 offsets, input = offsets[:0], input[:0] 421 lastOffset := uint32(0) 422 423 for _, value := range test { 424 offsets = append(offsets, lastOffset) 425 input = append(input, value...) 426 lastOffset += uint32(len(value)) 427 } 428 429 offsets = append(offsets, lastOffset) 430 431 t.Run("", func(t *testing.T) { 432 var err error 433 buffer, err = e.EncodeByteArray(buffer, input, offsets) 434 assertNoError(t, err) 435 estimatedOutputSize := e.EstimateDecodeByteArraySize(buffer) 436 values, _, err = e.DecodeByteArray(values, buffer, offsets) 437 assertNoError(t, err) 438 assertEqualBytes(t, input, values) 439 if len(values) > estimatedOutputSize { 440 t.Errorf("the decode output was larger than the estimate: %d>%d", len(values), estimatedOutputSize) 441 } 442 }) 443 } 444 } 445 446 func testFixedLenByteArrayEncoding(t *testing.T, e encoding.Encoding) { 447 testCanEncodeFixedLenByteArray(t, e) 448 buffer := []byte{} 449 values := []byte{} 450 451 for _, test := range fixedLenByteArrayTests { 452 t.Run("", func(t *testing.T) { 453 var err error 454 buffer, err = e.EncodeFixedLenByteArray(buffer, test.data, test.size) 455 assertNoError(t, err) 456 values, err = e.DecodeFixedLenByteArray(values, buffer, test.size) 457 assertNoError(t, err) 458 assertEqualBytes(t, test.data, values) 459 }) 460 } 461 } 462 463 func testCanEncodeBoolean(t testing.TB, e encoding.Encoding) { 464 testCanEncode(t, e, encoding.CanEncodeBoolean) 465 } 466 467 func testCanEncodeLevels(t testing.TB, e encoding.Encoding) { 468 testCanEncode(t, e, encoding.CanEncodeLevels) 469 } 470 471 func testCanEncodeInt32(t testing.TB, e encoding.Encoding) { 472 testCanEncode(t, e, encoding.CanEncodeInt32) 473 } 474 475 func testCanEncodeInt64(t testing.TB, e encoding.Encoding) { 476 testCanEncode(t, e, encoding.CanEncodeInt64) 477 } 478 479 func testCanEncodeInt96(t testing.TB, e encoding.Encoding) { 480 testCanEncode(t, e, encoding.CanEncodeInt96) 481 } 482 483 func testCanEncodeFloat(t testing.TB, e encoding.Encoding) { 484 testCanEncode(t, e, encoding.CanEncodeFloat) 485 } 486 487 func testCanEncodeDouble(t testing.TB, e encoding.Encoding) { 488 testCanEncode(t, e, encoding.CanEncodeDouble) 489 } 490 491 func testCanEncodeByteArray(t testing.TB, e encoding.Encoding) { 492 testCanEncode(t, e, encoding.CanEncodeByteArray) 493 } 494 495 func testCanEncodeFixedLenByteArray(t testing.TB, e encoding.Encoding) { 496 testCanEncode(t, e, encoding.CanEncodeFixedLenByteArray) 497 } 498 499 func testCanEncode(t testing.TB, e encoding.Encoding, test func(encoding.Encoding) bool) { 500 if !test(e) { 501 t.Skip("encoding not supported") 502 } 503 } 504 505 func assertNoError(t *testing.T, err error) { 506 t.Helper() 507 if err != nil { 508 t.Fatal(err) 509 } 510 } 511 512 func assertEqualBytes(t *testing.T, want, got []byte) { 513 t.Helper() 514 if !bytes.Equal(want, got) { 515 t.Fatalf("values mismatch:\nwant = %q\ngot = %q", want, got) 516 } 517 } 518 519 func assertEqualInt32(t *testing.T, want, got []int32) { 520 t.Helper() 521 assertEqualBytes(t, unsafecast.Int32ToBytes(want), unsafecast.Int32ToBytes(got)) 522 } 523 524 func assertEqualInt64(t *testing.T, want, got []int64) { 525 t.Helper() 526 assertEqualBytes(t, unsafecast.Int64ToBytes(want), unsafecast.Int64ToBytes(got)) 527 } 528 529 func assertEqualInt96(t *testing.T, want, got []deprecated.Int96) { 530 t.Helper() 531 assertEqualBytes(t, deprecated.Int96ToBytes(want), deprecated.Int96ToBytes(got)) 532 } 533 534 func assertEqualFloat32(t *testing.T, want, got []float32) { 535 t.Helper() 536 assertEqualBytes(t, unsafecast.Float32ToBytes(want), unsafecast.Float32ToBytes(got)) 537 } 538 539 func assertEqualFloat64(t *testing.T, want, got []float64) { 540 t.Helper() 541 assertEqualBytes(t, unsafecast.Float64ToBytes(want), unsafecast.Float64ToBytes(got)) 542 } 543 544 const ( 545 benchmarkNumValues = 10e3 546 ) 547 548 func newRand() *rand.Rand { 549 return rand.New(rand.NewSource(1)) 550 } 551 552 func BenchmarkEncode(b *testing.B) { 553 for _, encoding := range encodings { 554 b.Run(encoding.String(), func(b *testing.B) { benchmarkEncode(b, encoding) }) 555 } 556 } 557 558 func benchmarkEncode(b *testing.B, e encoding.Encoding) { 559 for _, test := range [...]struct { 560 scenario string 561 function func(*testing.B, encoding.Encoding) 562 }{ 563 { 564 scenario: "boolean", 565 function: benchmarkEncodeBoolean, 566 }, 567 { 568 scenario: "levels", 569 function: benchmarkEncodeLevels, 570 }, 571 { 572 scenario: "int32", 573 function: benchmarkEncodeInt32, 574 }, 575 { 576 scenario: "int64", 577 function: benchmarkEncodeInt64, 578 }, 579 { 580 scenario: "float", 581 function: benchmarkEncodeFloat, 582 }, 583 { 584 scenario: "double", 585 function: benchmarkEncodeDouble, 586 }, 587 { 588 scenario: "byte array", 589 function: benchmarkEncodeByteArray, 590 }, 591 { 592 scenario: "fixed length byte array", 593 function: benchmarkEncodeFixedLenByteArray, 594 }, 595 } { 596 b.Run(test.scenario, func(b *testing.B) { test.function(b, e) }) 597 } 598 } 599 600 func benchmarkEncodeBoolean(b *testing.B, e encoding.Encoding) { 601 testCanEncodeBoolean(b, e) 602 buffer := make([]byte, 0) 603 values := generateBooleanValues(benchmarkNumValues, newRand()) 604 setBitWidth(e, 1) 605 606 reportThroughput(b, benchmarkNumValues, len(values), func() { 607 benchmarkZeroAllocsPerRun(b, func() { 608 buffer, _ = e.EncodeBoolean(buffer, values) 609 }) 610 }) 611 } 612 613 func benchmarkEncodeLevels(b *testing.B, e encoding.Encoding) { 614 testCanEncodeLevels(b, e) 615 buffer := make([]byte, 0) 616 values := generateLevelValues(benchmarkNumValues, newRand()) 617 setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(values))) 618 619 reportThroughput(b, benchmarkNumValues, len(values), func() { 620 benchmarkZeroAllocsPerRun(b, func() { 621 buffer, _ = e.EncodeLevels(buffer, values) 622 }) 623 }) 624 } 625 626 func benchmarkEncodeInt32(b *testing.B, e encoding.Encoding) { 627 testCanEncodeInt32(b, e) 628 buffer := make([]byte, 0) 629 values := generateInt32Values(benchmarkNumValues, newRand()) 630 setBitWidth(e, maxLenInt32(values)) 631 632 reportThroughput(b, benchmarkNumValues, 4*len(values), func() { 633 benchmarkZeroAllocsPerRun(b, func() { 634 buffer, _ = e.EncodeInt32(buffer, values) 635 }) 636 }) 637 } 638 639 func benchmarkEncodeInt64(b *testing.B, e encoding.Encoding) { 640 testCanEncodeInt64(b, e) 641 buffer := make([]byte, 0) 642 values := generateInt64Values(benchmarkNumValues, newRand()) 643 setBitWidth(e, maxLenInt64(values)) 644 645 reportThroughput(b, benchmarkNumValues, 8*len(values), func() { 646 benchmarkZeroAllocsPerRun(b, func() { 647 buffer, _ = e.EncodeInt64(buffer, values) 648 }) 649 }) 650 } 651 652 func benchmarkEncodeFloat(b *testing.B, e encoding.Encoding) { 653 testCanEncodeFloat(b, e) 654 buffer := make([]byte, 0) 655 values := generateFloatValues(benchmarkNumValues, newRand()) 656 657 reportThroughput(b, benchmarkNumValues, 4*len(values), func() { 658 benchmarkZeroAllocsPerRun(b, func() { 659 buffer, _ = e.EncodeFloat(buffer, values) 660 }) 661 }) 662 } 663 664 func benchmarkEncodeDouble(b *testing.B, e encoding.Encoding) { 665 testCanEncodeDouble(b, e) 666 buffer := make([]byte, 0) 667 values := generateDoubleValues(benchmarkNumValues, newRand()) 668 669 reportThroughput(b, benchmarkNumValues, 8*len(values), func() { 670 benchmarkZeroAllocsPerRun(b, func() { 671 buffer, _ = e.EncodeDouble(buffer, values) 672 }) 673 }) 674 } 675 676 func benchmarkEncodeByteArray(b *testing.B, e encoding.Encoding) { 677 testCanEncodeByteArray(b, e) 678 buffer := make([]byte, 0) 679 values, offsets := generateByteArrayValues(benchmarkNumValues, newRand()) 680 681 numBytes := len(values) + 4*len(offsets) 682 reportThroughput(b, benchmarkNumValues, numBytes, func() { 683 benchmarkZeroAllocsPerRun(b, func() { 684 buffer, _ = e.EncodeByteArray(buffer, values, offsets) 685 }) 686 }) 687 } 688 689 func benchmarkEncodeFixedLenByteArray(b *testing.B, e encoding.Encoding) { 690 testCanEncodeFixedLenByteArray(b, e) 691 const size = 16 692 buffer := make([]byte, 0) 693 values := generateFixedLenByteArrayValues(benchmarkNumValues, newRand(), size) 694 695 reportThroughput(b, benchmarkNumValues, len(values), func() { 696 benchmarkZeroAllocsPerRun(b, func() { 697 buffer, _ = e.EncodeFixedLenByteArray(buffer, values, size) 698 }) 699 }) 700 } 701 702 func BenchmarkDecode(b *testing.B) { 703 for _, encoding := range encodings { 704 b.Run(encoding.String(), func(b *testing.B) { benchmarkDecode(b, encoding) }) 705 } 706 } 707 708 func benchmarkDecode(b *testing.B, e encoding.Encoding) { 709 for _, test := range [...]struct { 710 scenario string 711 function func(*testing.B, encoding.Encoding) 712 }{ 713 { 714 scenario: "boolean", 715 function: benchmarkDecodeBoolean, 716 }, 717 { 718 scenario: "levels", 719 function: benchmarkDecodeLevels, 720 }, 721 { 722 scenario: "int32", 723 function: benchmarkDecodeInt32, 724 }, 725 { 726 scenario: "int64", 727 function: benchmarkDecodeInt64, 728 }, 729 { 730 scenario: "float", 731 function: benchmarkDecodeFloat, 732 }, 733 { 734 scenario: "double", 735 function: benchmarkDecodeDouble, 736 }, 737 { 738 scenario: "byte array", 739 function: benchmarkDecodeByteArray, 740 }, 741 { 742 scenario: "fixed length byte array", 743 function: benchmarkDecodeFixedLenByteArray, 744 }, 745 } { 746 b.Run(test.scenario, func(b *testing.B) { test.function(b, e) }) 747 } 748 } 749 750 func benchmarkDecodeBoolean(b *testing.B, e encoding.Encoding) { 751 testCanEncodeBoolean(b, e) 752 values := generateBooleanValues(benchmarkNumValues, newRand()) 753 setBitWidth(e, 1) 754 buffer, _ := e.EncodeBoolean(nil, values) 755 756 reportThroughput(b, benchmarkNumValues, len(values), func() { 757 benchmarkZeroAllocsPerRun(b, func() { 758 values, _ = e.DecodeBoolean(values, buffer) 759 }) 760 }) 761 } 762 763 func benchmarkDecodeLevels(b *testing.B, e encoding.Encoding) { 764 testCanEncodeLevels(b, e) 765 values := generateLevelValues(benchmarkNumValues, newRand()) 766 setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(values))) 767 buffer, _ := e.EncodeLevels(nil, values) 768 769 reportThroughput(b, benchmarkNumValues, len(values), func() { 770 benchmarkZeroAllocsPerRun(b, func() { 771 values, _ = e.DecodeLevels(values, buffer) 772 }) 773 }) 774 } 775 776 func benchmarkDecodeInt32(b *testing.B, e encoding.Encoding) { 777 testCanEncodeInt32(b, e) 778 values := generateInt32Values(benchmarkNumValues, newRand()) 779 setBitWidth(e, maxLenInt32(values)) 780 buffer, _ := e.EncodeInt32(nil, values) 781 782 reportThroughput(b, benchmarkNumValues, 4*len(values), func() { 783 benchmarkZeroAllocsPerRun(b, func() { 784 values, _ = e.DecodeInt32(values, buffer) 785 }) 786 }) 787 } 788 789 func benchmarkDecodeInt64(b *testing.B, e encoding.Encoding) { 790 testCanEncodeInt64(b, e) 791 values := generateInt64Values(benchmarkNumValues, newRand()) 792 setBitWidth(e, maxLenInt64(values)) 793 buffer, _ := e.EncodeInt64(nil, values) 794 795 reportThroughput(b, benchmarkNumValues, 8*len(values), func() { 796 benchmarkZeroAllocsPerRun(b, func() { 797 values, _ = e.DecodeInt64(values, buffer) 798 }) 799 }) 800 } 801 802 func benchmarkDecodeFloat(b *testing.B, e encoding.Encoding) { 803 testCanEncodeFloat(b, e) 804 values := generateFloatValues(benchmarkNumValues, newRand()) 805 buffer, _ := e.EncodeFloat(nil, values) 806 807 reportThroughput(b, benchmarkNumValues, 4*len(values), func() { 808 benchmarkZeroAllocsPerRun(b, func() { 809 values, _ = e.DecodeFloat(values, buffer) 810 }) 811 }) 812 } 813 814 func benchmarkDecodeDouble(b *testing.B, e encoding.Encoding) { 815 testCanEncodeDouble(b, e) 816 values := generateDoubleValues(benchmarkNumValues, newRand()) 817 buffer, _ := e.EncodeDouble(nil, values) 818 819 reportThroughput(b, benchmarkNumValues, 8*len(values), func() { 820 benchmarkZeroAllocsPerRun(b, func() { 821 values, _ = e.DecodeDouble(values, buffer) 822 }) 823 }) 824 } 825 826 func benchmarkDecodeByteArray(b *testing.B, e encoding.Encoding) { 827 testCanEncodeByteArray(b, e) 828 values, offsets := generateByteArrayValues(benchmarkNumValues, newRand()) 829 buffer, _ := e.EncodeByteArray(nil, values, offsets) 830 831 numBytes := len(values) + 4*len(offsets) 832 reportThroughput(b, benchmarkNumValues, numBytes, func() { 833 benchmarkZeroAllocsPerRun(b, func() { 834 values, offsets, _ = e.DecodeByteArray(values, buffer, offsets) 835 }) 836 }) 837 } 838 839 func benchmarkDecodeFixedLenByteArray(b *testing.B, e encoding.Encoding) { 840 testCanEncodeFixedLenByteArray(b, e) 841 const size = 16 842 values := generateFixedLenByteArrayValues(benchmarkNumValues, newRand(), size) 843 buffer, _ := e.EncodeFixedLenByteArray(nil, values, size) 844 845 reportThroughput(b, benchmarkNumValues, len(values), func() { 846 benchmarkZeroAllocsPerRun(b, func() { 847 values, _ = e.DecodeFixedLenByteArray(values, buffer, size) 848 }) 849 }) 850 } 851 852 func benchmarkZeroAllocsPerRun(b *testing.B, f func()) { 853 if allocs := testing.AllocsPerRun(b.N, f); allocs != 0 && !testing.Short() { 854 b.Errorf("too many memory allocations: %g", allocs) 855 } 856 } 857 858 func reportThroughput(b *testing.B, numValues, numBytes int, do func()) { 859 start := time.Now() 860 do() 861 seconds := time.Since(start).Seconds() 862 b.SetBytes(int64(numBytes)) 863 b.ReportMetric(float64(b.N*numValues)/seconds, "value/s") 864 } 865 866 func generateLevelValues(n int, r *rand.Rand) []uint8 { 867 values := make([]uint8, n) 868 for i := range values { 869 values[i] = uint8(r.Intn(6)) 870 } 871 return values 872 } 873 874 func generateBooleanValues(n int, r *rand.Rand) []byte { 875 values := make([]byte, n/8+1) 876 io.ReadFull(r, values) 877 return values 878 } 879 880 func generateInt32Values(n int, r *rand.Rand) []int32 { 881 values := make([]int32, n) 882 for i := range values { 883 values[i] = r.Int31n(100) 884 } 885 return values 886 } 887 888 func generateInt64Values(n int, r *rand.Rand) []int64 { 889 values := make([]int64, n) 890 for i := range values { 891 values[i] = r.Int63n(100) 892 } 893 return values 894 } 895 896 func generateFloatValues(n int, r *rand.Rand) []float32 { 897 values := make([]float32, n) 898 for i := range values { 899 values[i] = r.Float32() 900 } 901 return values 902 } 903 904 func generateDoubleValues(n int, r *rand.Rand) []float64 { 905 values := make([]float64, n) 906 for i := range values { 907 values[i] = r.Float64() 908 } 909 return values 910 } 911 912 func generateByteArrayValues(n int, r *rand.Rand) ([]byte, []uint32) { 913 const maxLen = 21 914 offsets := make([]uint32, n+1) 915 values := make([]byte, n*maxLen) 916 length := 0 917 918 for i := 0; i < n; i++ { 919 k := r.Intn(maxLen) + 1 920 io.ReadFull(r, values[length:length+k]) 921 offsets[i] = uint32(length) 922 length += k 923 } 924 925 offsets[n] = uint32(length) 926 return values[:length], offsets 927 } 928 929 func generateFixedLenByteArrayValues(n int, r *rand.Rand, size int) []byte { 930 values := make([]byte, n*size) 931 io.ReadFull(r, values) 932 return values 933 } 934 935 func maxLenInt8(data []int8) int { 936 max := 0 937 for _, v := range data { 938 if n := bits.Len8(uint8(v)); n > max { 939 max = n 940 } 941 } 942 return max 943 } 944 945 func maxLenInt32(data []int32) int { 946 max := 0 947 for _, v := range data { 948 if n := bits.Len32(uint32(v)); n > max { 949 max = n 950 } 951 } 952 return max 953 } 954 955 func maxLenInt64(data []int64) int { 956 max := 0 957 for _, v := range data { 958 if n := bits.Len64(uint64(v)); n > max { 959 max = n 960 } 961 } 962 return max 963 }