github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/encoding/encoding_test.go (about) 1 package encoding_test 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "io" 7 "math" 8 "math/bits" 9 "math/rand" 10 "testing" 11 "time" 12 13 "github.com/vc42/parquet-go/deprecated" 14 "github.com/vc42/parquet-go/encoding" 15 "github.com/vc42/parquet-go/encoding/bitpacked" 16 "github.com/vc42/parquet-go/encoding/bytestreamsplit" 17 "github.com/vc42/parquet-go/encoding/delta" 18 "github.com/vc42/parquet-go/encoding/plain" 19 "github.com/vc42/parquet-go/encoding/rle" 20 "github.com/vc42/parquet-go/internal/unsafecast" 21 ) 22 23 func repeatInt64(seq []int64, n int) []int64 { 24 rep := make([]int64, len(seq)*n) 25 for i := 0; i < n; i++ { 26 copy(rep[i*len(seq):], seq) 27 } 28 return rep 29 } 30 31 var booleanTests = [...][]bool{ 32 {}, 33 {true}, 34 {false}, 35 {true, false, true, false, true, true, true, false, false, true}, 36 { // repeating 32x 37 true, true, true, true, true, true, true, true, 38 true, true, true, true, true, true, true, true, 39 true, true, true, true, true, true, true, true, 40 true, true, true, true, true, true, true, true, 41 }, 42 { // repeating 33x 43 true, true, true, true, true, true, true, true, 44 true, true, true, true, true, true, true, true, 45 true, true, true, true, true, true, true, true, 46 true, true, true, true, true, true, true, true, 47 true, 48 }, 49 { // alternating 15x 50 false, true, false, true, false, true, false, true, 51 false, true, false, true, false, true, false, 52 }, 53 { // alternating 16x 54 false, true, false, true, false, true, false, true, 55 false, true, false, true, false, true, false, true, 56 }, 57 } 58 59 var levelsTests = [...][]byte{ 60 {}, 61 {0}, 62 {1}, 63 {0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt8, math.MaxInt8, 0}, 64 { // repeating 24x 65 42, 42, 42, 42, 42, 42, 42, 42, 66 42, 42, 42, 42, 42, 42, 42, 42, 67 42, 42, 42, 42, 42, 42, 42, 42, 68 }, 69 { // never repeating 70 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 71 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 72 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 73 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 74 }, 75 { // streaks of repeating values 76 0, 0, 0, 0, 1, 1, 1, 1, 77 2, 2, 2, 2, 3, 3, 3, 3, 78 4, 4, 4, 4, 5, 5, 5, 5, 79 6, 6, 6, 7, 7, 7, 8, 8, 80 8, 9, 9, 9, 81 }, 82 } 83 84 var int32Tests = [...][]int32{ 85 {}, 86 {0}, 87 {1}, 88 {-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt32, math.MaxInt32, 0}, 89 { // repeating 24x 90 42, 42, 42, 42, 42, 42, 42, 42, 91 42, 42, 42, 42, 42, 42, 42, 42, 92 42, 42, 42, 42, 42, 42, 42, 42, 93 }, 94 { // never repeating 95 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 96 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 97 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 98 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 99 }, 100 { // streaks of repeating values 101 0, 0, 0, 0, 1, 1, 1, 1, 102 2, 2, 2, 2, 3, 3, 3, 3, 103 4, 4, 4, 4, 5, 5, 5, 5, 104 6, 6, 6, 7, 7, 7, 8, 8, 105 8, 9, 9, 9, 106 }, 107 { // a sequence that triggered a bug in the delta binary packed encoding 108 24, 36, 47, 32, 29, 4, 9, 20, 2, 18, 109 }, 110 } 111 112 var int64Tests = [...][]int64{ 113 {}, 114 {0}, 115 {1}, 116 {-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt64, math.MaxInt64, 0}, 117 { // repeating 24x 118 42, 42, 42, 42, 42, 42, 42, 42, 119 42, 42, 42, 42, 42, 42, 42, 42, 120 42, 42, 42, 42, 42, 42, 42, 42, 121 }, 122 { // never repeating 123 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 124 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 125 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 126 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 127 }, 128 { // streaks of repeating values 129 0, 0, 0, 0, 1, 1, 1, 1, 130 2, 2, 2, 2, 3, 3, 3, 3, 131 4, 4, 4, 4, 5, 5, 5, 5, 132 6, 6, 6, 7, 7, 7, 8, 8, 133 8, 9, 9, 9, 134 }, 135 { // streaks of repeating values 136 0, 0, 0, 0, 1, 1, 1, 1, 137 2, 2, 2, 2, 3, 3, 3, 3, 138 4, 4, 4, 4, 5, 5, 5, 5, 139 6, 6, 6, 7, 7, 7, 8, 8, 140 8, 9, 9, 9, 141 }, 142 repeatInt64( // a sequence resulting in 64 bits words in the delta binary packed encoding 143 []int64{ 144 math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64, 145 math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64, 146 147 0, math.MaxInt64, math.MinInt64, math.MaxInt64, 148 math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64, 149 }, 150 5, 151 ), 152 } 153 154 var int96Tests = [...][]deprecated.Int96{ 155 {}, 156 {{0: 0}}, 157 {{0: 1}}, 158 } 159 160 var floatTests = [...][]float32{ 161 {}, 162 {0}, 163 {1}, 164 {0, 1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat32, math.MaxFloat32, 0}, 165 {-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat32, math.MaxFloat32, 0}, 166 } 167 168 var doubleTests = [...][]float64{ 169 {}, 170 {0}, 171 {1}, 172 {-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat64, math.MaxFloat64, 0}, 173 } 174 175 var byteArrayTests = [...][][]byte{ 176 {}, 177 {[]byte("")}, 178 {[]byte("A"), []byte("B"), []byte("C")}, 179 {[]byte("hello world!"), bytes.Repeat([]byte("1234567890"), 100)}, 180 } 181 182 var fixedLenByteArrayTests = [...]struct { 183 size int 184 data []byte 185 }{ 186 {size: 1, data: []byte("")}, 187 {size: 1, data: []byte("ABCDEFGH")}, 188 {size: 2, data: []byte("ABCDEFGH")}, 189 {size: 4, data: []byte("ABCDEFGH")}, 190 {size: 8, data: []byte("ABCDEFGH")}, 191 {size: 10, data: bytes.Repeat([]byte("123456789"), 100)}, 192 {size: 16, data: bytes.Repeat([]byte("1234567890"), 160)}, 193 } 194 195 var encodings = [...]encoding.Encoding{ 196 new(plain.Encoding), 197 new(rle.Encoding), 198 new(bitpacked.Encoding), 199 new(plain.DictionaryEncoding), 200 new(rle.DictionaryEncoding), 201 new(delta.BinaryPackedEncoding), 202 new(delta.LengthByteArrayEncoding), 203 new(delta.ByteArrayEncoding), 204 new(bytestreamsplit.Encoding), 205 } 206 207 func TestEncoding(t *testing.T) { 208 for _, encoding := range encodings { 209 t.Run(encoding.String(), func(t *testing.T) { testEncoding(t, encoding) }) 210 } 211 } 212 213 func testEncoding(t *testing.T, e encoding.Encoding) { 214 for _, test := range [...]struct { 215 scenario string 216 function func(*testing.T, encoding.Encoding) 217 }{ 218 { 219 scenario: "boolean", 220 function: testBooleanEncoding, 221 }, 222 223 { 224 scenario: "levels", 225 function: testLevelsEncoding, 226 }, 227 228 { 229 scenario: "int32", 230 function: testInt32Encoding, 231 }, 232 233 { 234 scenario: "int64", 235 function: testInt64Encoding, 236 }, 237 238 { 239 scenario: "int96", 240 function: testInt96Encoding, 241 }, 242 243 { 244 scenario: "float", 245 function: testFloatEncoding, 246 }, 247 248 { 249 scenario: "double", 250 function: testDoubleEncoding, 251 }, 252 253 { 254 scenario: "byte array", 255 function: testByteArrayEncoding, 256 }, 257 258 { 259 scenario: "fixed length byte array", 260 function: testFixedLenByteArrayEncoding, 261 }, 262 } { 263 t.Run(test.scenario, func(t *testing.T) { test.function(t, e) }) 264 } 265 } 266 267 func setBitWidth(enc encoding.Encoding, bitWidth int) { 268 switch e := enc.(type) { 269 case *rle.Encoding: 270 e.BitWidth = bitWidth 271 case *bitpacked.Encoding: 272 e.BitWidth = bitWidth 273 } 274 } 275 276 type encodingFunc func(encoding.Encoding, []byte, []byte) ([]byte, error) 277 278 func testBooleanEncoding(t *testing.T, e encoding.Encoding) { 279 testCanEncodeBoolean(t, e) 280 buffer := []byte{} 281 values := []byte{} 282 input := []byte{} 283 setBitWidth(e, 1) 284 285 for _, test := range booleanTests { 286 t.Run("", func(t *testing.T) { 287 var err error 288 289 input = input[:0] 290 count := 0 291 for _, value := range test { 292 input = plain.AppendBoolean(input, count, value) 293 count++ 294 } 295 296 buffer, err = e.EncodeBoolean(buffer, input) 297 assertNoError(t, err) 298 values, err = e.DecodeBoolean(values, buffer) 299 assertNoError(t, err) 300 assertBytesEqual(t, input, values) 301 }) 302 } 303 } 304 305 func testLevelsEncoding(t *testing.T, e encoding.Encoding) { 306 testCanEncodeLevels(t, e) 307 buffer := []byte{} 308 values := []byte{} 309 310 for _, test := range levelsTests { 311 setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(test))) 312 313 t.Run("", func(t *testing.T) { 314 var err error 315 buffer, err = e.EncodeLevels(buffer, test) 316 assertNoError(t, err) 317 values, err = e.DecodeLevels(values, buffer) 318 assertNoError(t, err) 319 assertBytesEqual(t, test, values[:len(test)]) 320 }) 321 } 322 } 323 324 func testInt32Encoding(t *testing.T, e encoding.Encoding) { 325 testCanEncodeInt32(t, e) 326 buffer := []byte{} 327 values := []byte{} 328 329 for _, test := range int32Tests { 330 setBitWidth(e, maxLenInt32(test)) 331 332 t.Run("", func(t *testing.T) { 333 var err error 334 var input = unsafecast.Int32ToBytes(test) 335 buffer, err = e.EncodeInt32(buffer, input) 336 assertNoError(t, err) 337 values, err = e.DecodeInt32(values, buffer) 338 assertNoError(t, err) 339 assertBytesEqual(t, input, values) 340 }) 341 } 342 } 343 344 func testInt64Encoding(t *testing.T, e encoding.Encoding) { 345 testCanEncodeInt64(t, e) 346 buffer := []byte{} 347 values := []byte{} 348 349 for _, test := range int64Tests { 350 setBitWidth(e, maxLenInt64(test)) 351 352 t.Run("", func(t *testing.T) { 353 var err error 354 var input = unsafecast.Int64ToBytes(test) 355 buffer, err = e.EncodeInt64(buffer, input) 356 assertNoError(t, err) 357 values, err = e.DecodeInt64(values, buffer) 358 assertNoError(t, err) 359 assertBytesEqual(t, input, values) 360 }) 361 } 362 } 363 364 func testInt96Encoding(t *testing.T, e encoding.Encoding) { 365 testCanEncodeInt96(t, e) 366 buffer := []byte{} 367 values := []byte{} 368 369 for _, test := range int96Tests { 370 t.Run("", func(t *testing.T) { 371 var err error 372 var input = deprecated.Int96ToBytes(test) 373 buffer, err = e.EncodeInt96(buffer, input) 374 assertNoError(t, err) 375 values, err = e.DecodeInt96(values, buffer) 376 assertNoError(t, err) 377 assertBytesEqual(t, input, values) 378 }) 379 } 380 } 381 382 func testFloatEncoding(t *testing.T, e encoding.Encoding) { 383 testCanEncodeFloat(t, e) 384 buffer := []byte{} 385 values := []byte{} 386 387 for _, test := range floatTests { 388 t.Run("", func(t *testing.T) { 389 var err error 390 var input = unsafecast.Float32ToBytes(test) 391 buffer, err = e.EncodeFloat(buffer, input) 392 assertNoError(t, err) 393 values, err = e.DecodeFloat(values, buffer) 394 assertNoError(t, err) 395 assertBytesEqual(t, input, values) 396 }) 397 } 398 } 399 400 func testDoubleEncoding(t *testing.T, e encoding.Encoding) { 401 testCanEncodeDouble(t, e) 402 buffer := []byte{} 403 values := []byte{} 404 405 for _, test := range doubleTests { 406 t.Run("", func(t *testing.T) { 407 var err error 408 var input = unsafecast.Float64ToBytes(test) 409 buffer, err = e.EncodeDouble(buffer, input) 410 assertNoError(t, err) 411 values, err = e.DecodeDouble(values, buffer) 412 assertNoError(t, err) 413 assertBytesEqual(t, input, values) 414 }) 415 } 416 } 417 418 func testByteArrayEncoding(t *testing.T, e encoding.Encoding) { 419 testCanEncodeByteArray(t, e) 420 buffer := []byte{} 421 values := []byte{} 422 byteArrays := []byte{} 423 424 for _, test := range byteArrayTests { 425 byteArrays = byteArrays[:0] 426 427 for _, value := range test { 428 byteArrays = plain.AppendByteArray(byteArrays, value) 429 } 430 431 t.Run("", func(t *testing.T) { 432 var err error 433 buffer, err = e.EncodeByteArray(buffer, byteArrays) 434 assertNoError(t, err) 435 values, err = e.DecodeByteArray(values, buffer) 436 assertNoError(t, err) 437 assertBytesEqual(t, byteArrays, values) 438 }) 439 } 440 } 441 442 func testFixedLenByteArrayEncoding(t *testing.T, e encoding.Encoding) { 443 testCanEncodeFixedLenByteArray(t, e) 444 buffer := []byte{} 445 values := []byte{} 446 447 for _, test := range fixedLenByteArrayTests { 448 t.Run("", func(t *testing.T) { 449 var err error 450 buffer, err = e.EncodeFixedLenByteArray(buffer, test.data, test.size) 451 assertNoError(t, err) 452 values, err = e.DecodeFixedLenByteArray(values, buffer, test.size) 453 assertNoError(t, err) 454 assertBytesEqual(t, test.data, values) 455 }) 456 } 457 } 458 459 func testCanEncodeBoolean(t testing.TB, e encoding.Encoding) { 460 testCanEncode(t, e, encoding.CanEncodeBoolean) 461 } 462 463 func testCanEncodeLevels(t testing.TB, e encoding.Encoding) { 464 testCanEncode(t, e, encoding.CanEncodeLevels) 465 } 466 467 func testCanEncodeInt32(t testing.TB, e encoding.Encoding) { 468 testCanEncode(t, e, encoding.CanEncodeInt32) 469 } 470 471 func testCanEncodeInt64(t testing.TB, e encoding.Encoding) { 472 testCanEncode(t, e, encoding.CanEncodeInt64) 473 } 474 475 func testCanEncodeInt96(t testing.TB, e encoding.Encoding) { 476 testCanEncode(t, e, encoding.CanEncodeInt96) 477 } 478 479 func testCanEncodeFloat(t testing.TB, e encoding.Encoding) { 480 testCanEncode(t, e, encoding.CanEncodeFloat) 481 } 482 483 func testCanEncodeDouble(t testing.TB, e encoding.Encoding) { 484 testCanEncode(t, e, encoding.CanEncodeDouble) 485 } 486 487 func testCanEncodeByteArray(t testing.TB, e encoding.Encoding) { 488 testCanEncode(t, e, encoding.CanEncodeByteArray) 489 } 490 491 func testCanEncodeFixedLenByteArray(t testing.TB, e encoding.Encoding) { 492 testCanEncode(t, e, encoding.CanEncodeFixedLenByteArray) 493 } 494 495 func testCanEncode(t testing.TB, e encoding.Encoding, test func(encoding.Encoding) bool) { 496 if !test(e) { 497 t.Skip("encoding not supported") 498 } 499 } 500 501 func assertNoError(t *testing.T, err error) { 502 t.Helper() 503 if err != nil { 504 t.Fatal(err) 505 } 506 } 507 508 func assertBytesEqual(t *testing.T, want, got []byte) { 509 t.Helper() 510 if !bytes.Equal(want, got) { 511 t.Fatalf("values mismatch:\nwant = %q\ngot = %q", want, got) 512 } 513 } 514 515 const ( 516 benchmarkNumValues = 10e3 517 ) 518 519 func newRand() *rand.Rand { 520 return rand.New(rand.NewSource(1)) 521 } 522 523 func BenchmarkEncode(b *testing.B) { 524 for _, encoding := range encodings { 525 b.Run(encoding.String(), func(b *testing.B) { benchmarkEncode(b, encoding) }) 526 } 527 } 528 529 func benchmarkEncode(b *testing.B, e encoding.Encoding) { 530 for _, test := range [...]struct { 531 scenario string 532 function func(*testing.B, encoding.Encoding) 533 }{ 534 { 535 scenario: "boolean", 536 function: benchmarkEncodeBoolean, 537 }, 538 { 539 scenario: "levels", 540 function: benchmarkEncodeLevels, 541 }, 542 { 543 scenario: "int32", 544 function: benchmarkEncodeInt32, 545 }, 546 { 547 scenario: "int64", 548 function: benchmarkEncodeInt64, 549 }, 550 { 551 scenario: "float", 552 function: benchmarkEncodeFloat, 553 }, 554 { 555 scenario: "double", 556 function: benchmarkEncodeDouble, 557 }, 558 { 559 scenario: "byte array", 560 function: benchmarkEncodeByteArray, 561 }, 562 { 563 scenario: "fixed length byte array", 564 function: benchmarkEncodeFixedLenByteArray, 565 }, 566 } { 567 b.Run(test.scenario, func(b *testing.B) { test.function(b, e) }) 568 } 569 } 570 571 func benchmarkEncodeBoolean(b *testing.B, e encoding.Encoding) { 572 testCanEncodeBoolean(b, e) 573 buffer := make([]byte, 0) 574 values := generateBooleanValues(benchmarkNumValues, newRand()) 575 setBitWidth(e, 1) 576 577 reportThroughput(b, benchmarkNumValues, len(values), func() { 578 benchmarkZeroAllocsPerRun(b, func() { 579 buffer, _ = e.EncodeBoolean(buffer, values) 580 }) 581 }) 582 } 583 584 func benchmarkEncodeLevels(b *testing.B, e encoding.Encoding) { 585 testCanEncodeLevels(b, e) 586 buffer := make([]byte, 0) 587 values := generateLevelValues(benchmarkNumValues, newRand()) 588 setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(values))) 589 590 reportThroughput(b, benchmarkNumValues, len(values), func() { 591 benchmarkZeroAllocsPerRun(b, func() { 592 buffer, _ = e.EncodeLevels(buffer, values) 593 }) 594 }) 595 } 596 597 func benchmarkEncodeInt32(b *testing.B, e encoding.Encoding) { 598 testCanEncodeInt32(b, e) 599 buffer := make([]byte, 0) 600 values := generateInt32Values(benchmarkNumValues, newRand()) 601 setBitWidth(e, maxLenInt32(unsafecast.BytesToInt32(values))) 602 603 reportThroughput(b, benchmarkNumValues, len(values), func() { 604 benchmarkZeroAllocsPerRun(b, func() { 605 buffer, _ = e.EncodeInt32(buffer, values) 606 }) 607 }) 608 } 609 610 func benchmarkEncodeInt64(b *testing.B, e encoding.Encoding) { 611 testCanEncodeInt64(b, e) 612 buffer := make([]byte, 0) 613 values := generateInt64Values(benchmarkNumValues, newRand()) 614 setBitWidth(e, maxLenInt64(unsafecast.BytesToInt64(values))) 615 616 reportThroughput(b, benchmarkNumValues, len(values), func() { 617 benchmarkZeroAllocsPerRun(b, func() { 618 buffer, _ = e.EncodeInt64(buffer, values) 619 }) 620 }) 621 } 622 623 func benchmarkEncodeFloat(b *testing.B, e encoding.Encoding) { 624 testCanEncodeFloat(b, e) 625 buffer := make([]byte, 0) 626 values := generateFloatValues(benchmarkNumValues, newRand()) 627 628 reportThroughput(b, benchmarkNumValues, len(values), func() { 629 benchmarkZeroAllocsPerRun(b, func() { 630 buffer, _ = e.EncodeFloat(buffer, values) 631 }) 632 }) 633 } 634 635 func benchmarkEncodeDouble(b *testing.B, e encoding.Encoding) { 636 testCanEncodeDouble(b, e) 637 buffer := make([]byte, 0) 638 values := generateDoubleValues(benchmarkNumValues, newRand()) 639 640 reportThroughput(b, benchmarkNumValues, len(values), func() { 641 benchmarkZeroAllocsPerRun(b, func() { 642 buffer, _ = e.EncodeDouble(buffer, values) 643 }) 644 }) 645 } 646 647 func benchmarkEncodeByteArray(b *testing.B, e encoding.Encoding) { 648 testCanEncodeByteArray(b, e) 649 buffer := make([]byte, 0) 650 values := generateByteArrayValues(benchmarkNumValues, newRand()) 651 652 reportThroughput(b, benchmarkNumValues, len(values), func() { 653 benchmarkZeroAllocsPerRun(b, func() { 654 buffer, _ = e.EncodeByteArray(buffer, values) 655 }) 656 }) 657 } 658 659 func benchmarkEncodeFixedLenByteArray(b *testing.B, e encoding.Encoding) { 660 testCanEncodeFixedLenByteArray(b, e) 661 const size = 16 662 buffer := make([]byte, 0) 663 values := generateFixedLenByteArrayValues(benchmarkNumValues, newRand(), size) 664 665 reportThroughput(b, benchmarkNumValues, len(values), func() { 666 benchmarkZeroAllocsPerRun(b, func() { 667 buffer, _ = e.EncodeFixedLenByteArray(buffer, values, size) 668 }) 669 }) 670 } 671 672 func BenchmarkDecode(b *testing.B) { 673 for _, encoding := range encodings { 674 b.Run(encoding.String(), func(b *testing.B) { benchmarkDecode(b, encoding) }) 675 } 676 } 677 678 func benchmarkDecode(b *testing.B, e encoding.Encoding) { 679 for _, test := range [...]struct { 680 scenario string 681 function func(*testing.B, encoding.Encoding) 682 }{ 683 { 684 scenario: "boolean", 685 function: benchmarkDecodeBoolean, 686 }, 687 { 688 scenario: "levels", 689 function: benchmarkDecodeLevels, 690 }, 691 { 692 scenario: "int32", 693 function: benchmarkDecodeInt32, 694 }, 695 { 696 scenario: "int64", 697 function: benchmarkDecodeInt64, 698 }, 699 { 700 scenario: "float", 701 function: benchmarkDecodeFloat, 702 }, 703 { 704 scenario: "double", 705 function: benchmarkDecodeDouble, 706 }, 707 { 708 scenario: "byte array", 709 function: benchmarkDecodeByteArray, 710 }, 711 { 712 scenario: "fixed length byte array", 713 function: benchmarkDecodeFixedLenByteArray, 714 }, 715 } { 716 b.Run(test.scenario, func(b *testing.B) { test.function(b, e) }) 717 } 718 } 719 720 func benchmarkDecodeBoolean(b *testing.B, e encoding.Encoding) { 721 testCanEncodeBoolean(b, e) 722 values := generateBooleanValues(benchmarkNumValues, newRand()) 723 output := make([]byte, 0) 724 setBitWidth(e, 1) 725 buffer, _ := e.EncodeBoolean(nil, values) 726 727 reportThroughput(b, benchmarkNumValues, len(values), func() { 728 benchmarkZeroAllocsPerRun(b, func() { 729 output, _ = e.DecodeBoolean(output, buffer) 730 }) 731 }) 732 } 733 734 func benchmarkDecodeLevels(b *testing.B, e encoding.Encoding) { 735 testCanEncodeLevels(b, e) 736 values := generateLevelValues(benchmarkNumValues, newRand()) 737 output := make([]byte, 0) 738 setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(values))) 739 buffer, _ := e.EncodeLevels(nil, values) 740 741 reportThroughput(b, benchmarkNumValues, len(values), func() { 742 benchmarkZeroAllocsPerRun(b, func() { 743 output, _ = e.DecodeLevels(output, buffer) 744 }) 745 }) 746 } 747 748 func benchmarkDecodeInt32(b *testing.B, e encoding.Encoding) { 749 testCanEncodeInt32(b, e) 750 values := generateInt32Values(benchmarkNumValues, newRand()) 751 output := make([]byte, 0) 752 setBitWidth(e, maxLenInt32(unsafecast.BytesToInt32(values))) 753 buffer, _ := e.EncodeInt32(nil, values) 754 755 reportThroughput(b, benchmarkNumValues, len(values), func() { 756 benchmarkZeroAllocsPerRun(b, func() { 757 output, _ = e.DecodeInt32(output, buffer) 758 }) 759 }) 760 } 761 762 func benchmarkDecodeInt64(b *testing.B, e encoding.Encoding) { 763 testCanEncodeInt64(b, e) 764 values := generateInt64Values(benchmarkNumValues, newRand()) 765 output := make([]byte, 0) 766 setBitWidth(e, maxLenInt64(unsafecast.BytesToInt64(values))) 767 buffer, _ := e.EncodeInt64(nil, values) 768 769 reportThroughput(b, benchmarkNumValues, len(values), func() { 770 benchmarkZeroAllocsPerRun(b, func() { 771 output, _ = e.DecodeInt64(output, buffer) 772 }) 773 }) 774 } 775 776 func benchmarkDecodeFloat(b *testing.B, e encoding.Encoding) { 777 testCanEncodeFloat(b, e) 778 values := generateFloatValues(benchmarkNumValues, newRand()) 779 output := make([]byte, 0) 780 buffer, _ := e.EncodeFloat(nil, values) 781 782 reportThroughput(b, benchmarkNumValues, len(values), func() { 783 benchmarkZeroAllocsPerRun(b, func() { 784 output, _ = e.DecodeFloat(output, buffer) 785 }) 786 }) 787 } 788 789 func benchmarkDecodeDouble(b *testing.B, e encoding.Encoding) { 790 testCanEncodeDouble(b, e) 791 values := generateDoubleValues(benchmarkNumValues, newRand()) 792 output := make([]byte, 0) 793 buffer, _ := e.EncodeDouble(nil, values) 794 795 reportThroughput(b, benchmarkNumValues, len(values), func() { 796 benchmarkZeroAllocsPerRun(b, func() { 797 output, _ = e.DecodeDouble(output, buffer) 798 }) 799 }) 800 } 801 802 func benchmarkDecodeByteArray(b *testing.B, e encoding.Encoding) { 803 testCanEncodeByteArray(b, e) 804 values := generateByteArrayValues(benchmarkNumValues, newRand()) 805 output := make([]byte, 0) 806 buffer, _ := e.EncodeByteArray(nil, values) 807 808 reportThroughput(b, benchmarkNumValues, len(values), func() { 809 benchmarkZeroAllocsPerRun(b, func() { 810 output, _ = e.DecodeByteArray(output, buffer) 811 }) 812 }) 813 } 814 815 func benchmarkDecodeFixedLenByteArray(b *testing.B, e encoding.Encoding) { 816 testCanEncodeFixedLenByteArray(b, e) 817 const size = 16 818 values := generateFixedLenByteArrayValues(benchmarkNumValues, newRand(), size) 819 output := make([]byte, 0) 820 buffer, _ := e.EncodeFixedLenByteArray(nil, values, size) 821 822 reportThroughput(b, benchmarkNumValues, len(values), func() { 823 benchmarkZeroAllocsPerRun(b, func() { 824 output, _ = e.DecodeFixedLenByteArray(output, buffer, size) 825 }) 826 }) 827 } 828 829 func benchmarkZeroAllocsPerRun(b *testing.B, f func()) { 830 if allocs := testing.AllocsPerRun(b.N, f); allocs != 0 && !testing.Short() { 831 b.Errorf("too many memory allocations: %g", allocs) 832 } 833 } 834 835 func reportThroughput(b *testing.B, numValues, numBytes int, do func()) { 836 start := time.Now() 837 do() 838 seconds := time.Since(start).Seconds() 839 b.SetBytes(int64(numBytes)) 840 b.ReportMetric(float64(b.N*numValues)/seconds, "value/s") 841 } 842 843 func generateBooleanValues(n int, r *rand.Rand) []byte { 844 values := make([]byte, n/8+1) 845 io.ReadFull(r, values) 846 return values 847 } 848 849 func generateLevelValues(n int, r *rand.Rand) []byte { 850 values := make([]byte, n) 851 for i := range values { 852 values[i] = byte(r.Intn(6)) 853 } 854 return values 855 } 856 857 func generateInt32Values(n int, r *rand.Rand) []byte { 858 values := make([]byte, 4*n) 859 for i := 0; i < n; i++ { 860 binary.LittleEndian.PutUint32(values[i*4:], uint32(r.Int31n(100))) 861 } 862 return values 863 } 864 865 func generateInt64Values(n int, r *rand.Rand) []byte { 866 values := make([]byte, 8*n) 867 for i := 0; i < n; i++ { 868 binary.LittleEndian.PutUint64(values[i*8:], uint64(r.Int63n(100))) 869 } 870 return values 871 } 872 873 func generateFloatValues(n int, r *rand.Rand) []byte { 874 values := make([]byte, 4*n) 875 for i := 0; i < n; i++ { 876 binary.LittleEndian.PutUint32(values[i*4:], math.Float32bits(r.Float32())) 877 } 878 return values 879 } 880 881 func generateDoubleValues(n int, r *rand.Rand) []byte { 882 values := make([]byte, 8*n) 883 for i := 0; i < n; i++ { 884 binary.LittleEndian.PutUint64(values[i*8:], math.Float64bits(r.Float64())) 885 } 886 return values 887 } 888 889 func generateByteArrayValues(n int, r *rand.Rand) []byte { 890 const maxLen = 21 891 values := make([]byte, plain.ByteArrayLengthSize*n+n*maxLen) 892 length := 0 893 894 for i := 0; i < n; i++ { 895 k := r.Intn(maxLen) + 1 896 plain.PutByteArrayLength(values[length:], k) 897 length += plain.ByteArrayLengthSize 898 io.ReadFull(r, values[length:length+k]) 899 length += k 900 } 901 902 return values[:length] 903 } 904 905 func generateFixedLenByteArrayValues(n int, r *rand.Rand, size int) []byte { 906 values := make([]byte, n*size) 907 io.ReadFull(r, values) 908 return values 909 } 910 911 func maxLenInt8(data []int8) int { 912 max := 0 913 for _, v := range data { 914 if n := bits.Len8(uint8(v)); n > max { 915 max = n 916 } 917 } 918 return max 919 } 920 921 func maxLenInt32(data []int32) int { 922 max := 0 923 for _, v := range data { 924 if n := bits.Len32(uint32(v)); n > max { 925 max = n 926 } 927 } 928 return max 929 } 930 931 func maxLenInt64(data []int64) int { 932 max := 0 933 for _, v := range data { 934 if n := bits.Len64(uint64(v)); n > max { 935 max = n 936 } 937 } 938 return max 939 }