github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/encoding/encoding_test.go (about)

     1  package encoding_test
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"math"
     7  	"math/bits"
     8  	"math/rand"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/segmentio/parquet-go/deprecated"
    13  	"github.com/segmentio/parquet-go/encoding"
    14  	"github.com/segmentio/parquet-go/encoding/bitpacked"
    15  	"github.com/segmentio/parquet-go/encoding/bytestreamsplit"
    16  	"github.com/segmentio/parquet-go/encoding/delta"
    17  	"github.com/segmentio/parquet-go/encoding/plain"
    18  	"github.com/segmentio/parquet-go/encoding/rle"
    19  	"github.com/segmentio/parquet-go/internal/unsafecast"
    20  )
    21  
    22  func repeatInt64(seq []int64, n int) []int64 {
    23  	rep := make([]int64, len(seq)*n)
    24  	for i := 0; i < n; i++ {
    25  		copy(rep[i*len(seq):], seq)
    26  	}
    27  	return rep
    28  }
    29  
    30  var booleanTests = [...][]bool{
    31  	{},
    32  	{true},
    33  	{false},
    34  	{true, false, true, false, true, true, true, false, false, true},
    35  	{ // repeating 32x
    36  		true, true, true, true, true, true, true, true,
    37  		true, true, true, true, true, true, true, true,
    38  		true, true, true, true, true, true, true, true,
    39  		true, true, true, true, true, true, true, true,
    40  	},
    41  	{ // repeating 33x
    42  		true, true, true, true, true, true, true, true,
    43  		true, true, true, true, true, true, true, true,
    44  		true, true, true, true, true, true, true, true,
    45  		true, true, true, true, true, true, true, true,
    46  		true,
    47  	},
    48  	{ // alternating 15x
    49  		false, true, false, true, false, true, false, true,
    50  		false, true, false, true, false, true, false,
    51  	},
    52  	{ // alternating 16x
    53  		false, true, false, true, false, true, false, true,
    54  		false, true, false, true, false, true, false, true,
    55  	},
    56  }
    57  
    58  var levelsTests = [...][]byte{
    59  	{},
    60  	{0},
    61  	{1},
    62  	{0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt8, math.MaxInt8, 0},
    63  	{ // repeating 24x
    64  		42, 42, 42, 42, 42, 42, 42, 42,
    65  		42, 42, 42, 42, 42, 42, 42, 42,
    66  		42, 42, 42, 42, 42, 42, 42, 42,
    67  	},
    68  	{ // never repeating
    69  		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    70  		0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
    71  		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    72  		0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
    73  	},
    74  	{ // streaks of repeating values
    75  		0, 0, 0, 0, 1, 1, 1, 1,
    76  		2, 2, 2, 2, 3, 3, 3, 3,
    77  		4, 4, 4, 4, 5, 5, 5, 5,
    78  		6, 6, 6, 7, 7, 7, 8, 8,
    79  		8, 9, 9, 9,
    80  	},
    81  }
    82  
    83  var int32Tests = [...][]int32{
    84  	{},
    85  	{0},
    86  	{1},
    87  	{-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt32, math.MaxInt32, 0},
    88  	{ // repeating 24x
    89  		42, 42, 42, 42, 42, 42, 42, 42,
    90  		42, 42, 42, 42, 42, 42, 42, 42,
    91  		42, 42, 42, 42, 42, 42, 42, 42,
    92  	},
    93  	{ // never repeating
    94  		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    95  		0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
    96  		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    97  		0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
    98  	},
    99  	{ // streaks of repeating values
   100  		0, 0, 0, 0, 1, 1, 1, 1,
   101  		2, 2, 2, 2, 3, 3, 3, 3,
   102  		4, 4, 4, 4, 5, 5, 5, 5,
   103  		6, 6, 6, 7, 7, 7, 8, 8,
   104  		8, 9, 9, 9,
   105  	},
   106  	{ // a sequence that triggered a bug in the delta binary packed encoding
   107  		24, 36, 47, 32, 29, 4, 9, 20, 2, 18,
   108  	},
   109  }
   110  
   111  var int64Tests = [...][]int64{
   112  	{},
   113  	{0},
   114  	{1},
   115  	{-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt64, math.MaxInt64, 0},
   116  	{ // repeating 24x
   117  		42, 42, 42, 42, 42, 42, 42, 42,
   118  		42, 42, 42, 42, 42, 42, 42, 42,
   119  		42, 42, 42, 42, 42, 42, 42, 42,
   120  	},
   121  	{ // never repeating
   122  		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
   123  		0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
   124  		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
   125  		0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
   126  	},
   127  	{ // streaks of repeating values
   128  		0, 0, 0, 0, 1, 1, 1, 1,
   129  		2, 2, 2, 2, 3, 3, 3, 3,
   130  		4, 4, 4, 4, 5, 5, 5, 5,
   131  		6, 6, 6, 7, 7, 7, 8, 8,
   132  		8, 9, 9, 9,
   133  	},
   134  	{ // streaks of repeating values
   135  		0, 0, 0, 0, 1, 1, 1, 1,
   136  		2, 2, 2, 2, 3, 3, 3, 3,
   137  		4, 4, 4, 4, 5, 5, 5, 5,
   138  		6, 6, 6, 7, 7, 7, 8, 8,
   139  		8, 9, 9, 9,
   140  	},
   141  	repeatInt64( // a sequence resulting in 64 bits words in the delta binary packed encoding
   142  		[]int64{
   143  			math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64,
   144  			math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64,
   145  
   146  			0, math.MaxInt64, math.MinInt64, math.MaxInt64,
   147  			math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64,
   148  		},
   149  		5,
   150  	),
   151  }
   152  
   153  var int96Tests = [...][]deprecated.Int96{
   154  	{},
   155  	{{0: 0}},
   156  	{{0: 1}},
   157  }
   158  
   159  var floatTests = [...][]float32{
   160  	{},
   161  	{0},
   162  	{1},
   163  	{0, 1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat32, math.MaxFloat32, 0},
   164  	{-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat32, math.MaxFloat32, 0},
   165  }
   166  
   167  var doubleTests = [...][]float64{
   168  	{},
   169  	{0},
   170  	{1},
   171  	{-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat64, math.MaxFloat64, 0},
   172  }
   173  
   174  var byteArrayTests = [...][][]byte{
   175  	{},
   176  	{[]byte("")},
   177  	{[]byte("A"), []byte("B"), []byte("C")},
   178  	{[]byte("hello world!"), bytes.Repeat([]byte("1234567890"), 100)},
   179  }
   180  
   181  var fixedLenByteArrayTests = [...]struct {
   182  	size int
   183  	data []byte
   184  }{
   185  	{size: 1, data: []byte("")},
   186  	{size: 1, data: []byte("ABCDEFGH")},
   187  	{size: 2, data: []byte("ABCDEFGH")},
   188  	{size: 4, data: []byte("ABCDEFGH")},
   189  	{size: 8, data: []byte("ABCDEFGH")},
   190  	{size: 10, data: bytes.Repeat([]byte("123456789"), 100)},
   191  	{size: 16, data: bytes.Repeat([]byte("1234567890"), 160)},
   192  }
   193  
   194  var encodings = [...]encoding.Encoding{
   195  	new(plain.Encoding),
   196  	new(rle.Encoding),
   197  	new(bitpacked.Encoding),
   198  	new(plain.DictionaryEncoding),
   199  	new(rle.DictionaryEncoding),
   200  	new(delta.BinaryPackedEncoding),
   201  	new(delta.LengthByteArrayEncoding),
   202  	new(delta.ByteArrayEncoding),
   203  	new(bytestreamsplit.Encoding),
   204  }
   205  
   206  func TestEncoding(t *testing.T) {
   207  	for _, encoding := range encodings {
   208  		t.Run(encoding.String(), func(t *testing.T) { testEncoding(t, encoding) })
   209  	}
   210  }
   211  
   212  func testEncoding(t *testing.T, e encoding.Encoding) {
   213  	for _, test := range [...]struct {
   214  		scenario string
   215  		function func(*testing.T, encoding.Encoding)
   216  	}{
   217  		{
   218  			scenario: "boolean",
   219  			function: testBooleanEncoding,
   220  		},
   221  
   222  		{
   223  			scenario: "levels",
   224  			function: testLevelsEncoding,
   225  		},
   226  
   227  		{
   228  			scenario: "int32",
   229  			function: testInt32Encoding,
   230  		},
   231  
   232  		{
   233  			scenario: "int64",
   234  			function: testInt64Encoding,
   235  		},
   236  
   237  		{
   238  			scenario: "int96",
   239  			function: testInt96Encoding,
   240  		},
   241  
   242  		{
   243  			scenario: "float",
   244  			function: testFloatEncoding,
   245  		},
   246  
   247  		{
   248  			scenario: "double",
   249  			function: testDoubleEncoding,
   250  		},
   251  
   252  		{
   253  			scenario: "byte array",
   254  			function: testByteArrayEncoding,
   255  		},
   256  
   257  		{
   258  			scenario: "fixed length byte array",
   259  			function: testFixedLenByteArrayEncoding,
   260  		},
   261  	} {
   262  		t.Run(test.scenario, func(t *testing.T) { test.function(t, e) })
   263  	}
   264  }
   265  
   266  func setBitWidth(enc encoding.Encoding, bitWidth int) {
   267  	switch e := enc.(type) {
   268  	case *rle.Encoding:
   269  		e.BitWidth = bitWidth
   270  	case *bitpacked.Encoding:
   271  		e.BitWidth = bitWidth
   272  	}
   273  }
   274  
   275  type encodingFunc func(encoding.Encoding, []byte, []byte) ([]byte, error)
   276  
   277  func testBooleanEncoding(t *testing.T, e encoding.Encoding) {
   278  	testCanEncodeBoolean(t, e)
   279  	buffer := []byte{}
   280  	values := []byte{}
   281  	input := []byte{}
   282  	setBitWidth(e, 1)
   283  
   284  	for _, test := range booleanTests {
   285  		t.Run("", func(t *testing.T) {
   286  			var err error
   287  
   288  			input = input[:0]
   289  			count := 0
   290  			for _, value := range test {
   291  				input = plain.AppendBoolean(input, count, value)
   292  				count++
   293  			}
   294  
   295  			buffer, err = e.EncodeBoolean(buffer, input)
   296  			assertNoError(t, err)
   297  			values, err = e.DecodeBoolean(values, buffer)
   298  			assertNoError(t, err)
   299  			assertEqualBytes(t, input, values)
   300  		})
   301  	}
   302  }
   303  
   304  func testLevelsEncoding(t *testing.T, e encoding.Encoding) {
   305  	testCanEncodeLevels(t, e)
   306  	buffer := []byte{}
   307  	values := []byte{}
   308  
   309  	for _, input := range levelsTests {
   310  		setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(input)))
   311  
   312  		t.Run("", func(t *testing.T) {
   313  			var err error
   314  			buffer, err = e.EncodeLevels(buffer, input)
   315  			assertNoError(t, err)
   316  			values, err = e.DecodeLevels(values, buffer)
   317  			assertNoError(t, err)
   318  			assertEqualBytes(t, input, values[:len(input)])
   319  		})
   320  	}
   321  }
   322  
   323  func testInt32Encoding(t *testing.T, e encoding.Encoding) {
   324  	testCanEncodeInt32(t, e)
   325  	buffer := []byte{}
   326  	values := []int32{}
   327  
   328  	for _, input := range int32Tests {
   329  		setBitWidth(e, maxLenInt32(input))
   330  
   331  		t.Run("", func(t *testing.T) {
   332  			var err error
   333  			buffer, err = e.EncodeInt32(buffer, input)
   334  			assertNoError(t, err)
   335  			values, err = e.DecodeInt32(values, buffer)
   336  			assertNoError(t, err)
   337  			assertEqualInt32(t, input, values)
   338  		})
   339  	}
   340  }
   341  
   342  func testInt64Encoding(t *testing.T, e encoding.Encoding) {
   343  	testCanEncodeInt64(t, e)
   344  	buffer := []byte{}
   345  	values := []int64{}
   346  
   347  	for _, input := range int64Tests {
   348  		setBitWidth(e, maxLenInt64(input))
   349  
   350  		t.Run("", func(t *testing.T) {
   351  			var err error
   352  			buffer, err = e.EncodeInt64(buffer, input)
   353  			assertNoError(t, err)
   354  			values, err = e.DecodeInt64(values, buffer)
   355  			assertNoError(t, err)
   356  			assertEqualInt64(t, input, values)
   357  		})
   358  	}
   359  }
   360  
   361  func testInt96Encoding(t *testing.T, e encoding.Encoding) {
   362  	testCanEncodeInt96(t, e)
   363  	buffer := []byte{}
   364  	values := []deprecated.Int96{}
   365  
   366  	for _, input := range int96Tests {
   367  		t.Run("", func(t *testing.T) {
   368  			var err error
   369  			buffer, err = e.EncodeInt96(buffer, input)
   370  			assertNoError(t, err)
   371  			values, err = e.DecodeInt96(values, buffer)
   372  			assertNoError(t, err)
   373  			assertEqualInt96(t, input, values)
   374  		})
   375  	}
   376  }
   377  
   378  func testFloatEncoding(t *testing.T, e encoding.Encoding) {
   379  	testCanEncodeFloat(t, e)
   380  	buffer := []byte{}
   381  	values := []float32{}
   382  
   383  	for _, input := range floatTests {
   384  		t.Run("", func(t *testing.T) {
   385  			var err error
   386  			buffer, err = e.EncodeFloat(buffer, input)
   387  			assertNoError(t, err)
   388  			values, err = e.DecodeFloat(values, buffer)
   389  			assertNoError(t, err)
   390  			assertEqualFloat32(t, input, values)
   391  		})
   392  	}
   393  }
   394  
   395  func testDoubleEncoding(t *testing.T, e encoding.Encoding) {
   396  	testCanEncodeDouble(t, e)
   397  	buffer := []byte{}
   398  	values := []float64{}
   399  
   400  	for _, input := range doubleTests {
   401  		t.Run("", func(t *testing.T) {
   402  			var err error
   403  			buffer, err = e.EncodeDouble(buffer, input)
   404  			assertNoError(t, err)
   405  			values, err = e.DecodeDouble(values, buffer)
   406  			assertNoError(t, err)
   407  			assertEqualFloat64(t, input, values)
   408  		})
   409  	}
   410  }
   411  
   412  func testByteArrayEncoding(t *testing.T, e encoding.Encoding) {
   413  	testCanEncodeByteArray(t, e)
   414  	input := []byte{}
   415  	buffer := []byte{}
   416  	values := []byte{}
   417  	offsets := []uint32{}
   418  
   419  	for _, test := range byteArrayTests {
   420  		offsets, input = offsets[:0], input[:0]
   421  		lastOffset := uint32(0)
   422  
   423  		for _, value := range test {
   424  			offsets = append(offsets, lastOffset)
   425  			input = append(input, value...)
   426  			lastOffset += uint32(len(value))
   427  		}
   428  
   429  		offsets = append(offsets, lastOffset)
   430  
   431  		t.Run("", func(t *testing.T) {
   432  			var err error
   433  			buffer, err = e.EncodeByteArray(buffer, input, offsets)
   434  			assertNoError(t, err)
   435  			estimatedOutputSize := e.EstimateDecodeByteArraySize(buffer)
   436  			values, _, err = e.DecodeByteArray(values, buffer, offsets)
   437  			assertNoError(t, err)
   438  			assertEqualBytes(t, input, values)
   439  			if len(values) > estimatedOutputSize {
   440  				t.Errorf("the decode output was larger than the estimate: %d>%d", len(values), estimatedOutputSize)
   441  			}
   442  		})
   443  	}
   444  }
   445  
   446  func testFixedLenByteArrayEncoding(t *testing.T, e encoding.Encoding) {
   447  	testCanEncodeFixedLenByteArray(t, e)
   448  	buffer := []byte{}
   449  	values := []byte{}
   450  
   451  	for _, test := range fixedLenByteArrayTests {
   452  		t.Run("", func(t *testing.T) {
   453  			var err error
   454  			buffer, err = e.EncodeFixedLenByteArray(buffer, test.data, test.size)
   455  			assertNoError(t, err)
   456  			values, err = e.DecodeFixedLenByteArray(values, buffer, test.size)
   457  			assertNoError(t, err)
   458  			assertEqualBytes(t, test.data, values)
   459  		})
   460  	}
   461  }
   462  
   463  func testCanEncodeBoolean(t testing.TB, e encoding.Encoding) {
   464  	testCanEncode(t, e, encoding.CanEncodeBoolean)
   465  }
   466  
   467  func testCanEncodeLevels(t testing.TB, e encoding.Encoding) {
   468  	testCanEncode(t, e, encoding.CanEncodeLevels)
   469  }
   470  
   471  func testCanEncodeInt32(t testing.TB, e encoding.Encoding) {
   472  	testCanEncode(t, e, encoding.CanEncodeInt32)
   473  }
   474  
   475  func testCanEncodeInt64(t testing.TB, e encoding.Encoding) {
   476  	testCanEncode(t, e, encoding.CanEncodeInt64)
   477  }
   478  
   479  func testCanEncodeInt96(t testing.TB, e encoding.Encoding) {
   480  	testCanEncode(t, e, encoding.CanEncodeInt96)
   481  }
   482  
   483  func testCanEncodeFloat(t testing.TB, e encoding.Encoding) {
   484  	testCanEncode(t, e, encoding.CanEncodeFloat)
   485  }
   486  
   487  func testCanEncodeDouble(t testing.TB, e encoding.Encoding) {
   488  	testCanEncode(t, e, encoding.CanEncodeDouble)
   489  }
   490  
   491  func testCanEncodeByteArray(t testing.TB, e encoding.Encoding) {
   492  	testCanEncode(t, e, encoding.CanEncodeByteArray)
   493  }
   494  
   495  func testCanEncodeFixedLenByteArray(t testing.TB, e encoding.Encoding) {
   496  	testCanEncode(t, e, encoding.CanEncodeFixedLenByteArray)
   497  }
   498  
   499  func testCanEncode(t testing.TB, e encoding.Encoding, test func(encoding.Encoding) bool) {
   500  	if !test(e) {
   501  		t.Skip("encoding not supported")
   502  	}
   503  }
   504  
   505  func assertNoError(t *testing.T, err error) {
   506  	t.Helper()
   507  	if err != nil {
   508  		t.Fatal(err)
   509  	}
   510  }
   511  
   512  func assertEqualBytes(t *testing.T, want, got []byte) {
   513  	t.Helper()
   514  	if !bytes.Equal(want, got) {
   515  		t.Fatalf("values mismatch:\nwant = %q\ngot  = %q", want, got)
   516  	}
   517  }
   518  
   519  func assertEqualInt32(t *testing.T, want, got []int32) {
   520  	t.Helper()
   521  	assertEqualBytes(t, unsafecast.Int32ToBytes(want), unsafecast.Int32ToBytes(got))
   522  }
   523  
   524  func assertEqualInt64(t *testing.T, want, got []int64) {
   525  	t.Helper()
   526  	assertEqualBytes(t, unsafecast.Int64ToBytes(want), unsafecast.Int64ToBytes(got))
   527  }
   528  
   529  func assertEqualInt96(t *testing.T, want, got []deprecated.Int96) {
   530  	t.Helper()
   531  	assertEqualBytes(t, deprecated.Int96ToBytes(want), deprecated.Int96ToBytes(got))
   532  }
   533  
   534  func assertEqualFloat32(t *testing.T, want, got []float32) {
   535  	t.Helper()
   536  	assertEqualBytes(t, unsafecast.Float32ToBytes(want), unsafecast.Float32ToBytes(got))
   537  }
   538  
   539  func assertEqualFloat64(t *testing.T, want, got []float64) {
   540  	t.Helper()
   541  	assertEqualBytes(t, unsafecast.Float64ToBytes(want), unsafecast.Float64ToBytes(got))
   542  }
   543  
   544  const (
   545  	benchmarkNumValues = 10e3
   546  )
   547  
   548  func newRand() *rand.Rand {
   549  	return rand.New(rand.NewSource(1))
   550  }
   551  
   552  func BenchmarkEncode(b *testing.B) {
   553  	for _, encoding := range encodings {
   554  		b.Run(encoding.String(), func(b *testing.B) { benchmarkEncode(b, encoding) })
   555  	}
   556  }
   557  
   558  func benchmarkEncode(b *testing.B, e encoding.Encoding) {
   559  	for _, test := range [...]struct {
   560  		scenario string
   561  		function func(*testing.B, encoding.Encoding)
   562  	}{
   563  		{
   564  			scenario: "boolean",
   565  			function: benchmarkEncodeBoolean,
   566  		},
   567  		{
   568  			scenario: "levels",
   569  			function: benchmarkEncodeLevels,
   570  		},
   571  		{
   572  			scenario: "int32",
   573  			function: benchmarkEncodeInt32,
   574  		},
   575  		{
   576  			scenario: "int64",
   577  			function: benchmarkEncodeInt64,
   578  		},
   579  		{
   580  			scenario: "float",
   581  			function: benchmarkEncodeFloat,
   582  		},
   583  		{
   584  			scenario: "double",
   585  			function: benchmarkEncodeDouble,
   586  		},
   587  		{
   588  			scenario: "byte array",
   589  			function: benchmarkEncodeByteArray,
   590  		},
   591  		{
   592  			scenario: "fixed length byte array",
   593  			function: benchmarkEncodeFixedLenByteArray,
   594  		},
   595  	} {
   596  		b.Run(test.scenario, func(b *testing.B) { test.function(b, e) })
   597  	}
   598  }
   599  
   600  func benchmarkEncodeBoolean(b *testing.B, e encoding.Encoding) {
   601  	testCanEncodeBoolean(b, e)
   602  	buffer := make([]byte, 0)
   603  	values := generateBooleanValues(benchmarkNumValues, newRand())
   604  	setBitWidth(e, 1)
   605  
   606  	reportThroughput(b, benchmarkNumValues, len(values), func() {
   607  		benchmarkZeroAllocsPerRun(b, func() {
   608  			buffer, _ = e.EncodeBoolean(buffer, values)
   609  		})
   610  	})
   611  }
   612  
   613  func benchmarkEncodeLevels(b *testing.B, e encoding.Encoding) {
   614  	testCanEncodeLevels(b, e)
   615  	buffer := make([]byte, 0)
   616  	values := generateLevelValues(benchmarkNumValues, newRand())
   617  	setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(values)))
   618  
   619  	reportThroughput(b, benchmarkNumValues, len(values), func() {
   620  		benchmarkZeroAllocsPerRun(b, func() {
   621  			buffer, _ = e.EncodeLevels(buffer, values)
   622  		})
   623  	})
   624  }
   625  
   626  func benchmarkEncodeInt32(b *testing.B, e encoding.Encoding) {
   627  	testCanEncodeInt32(b, e)
   628  	buffer := make([]byte, 0)
   629  	values := generateInt32Values(benchmarkNumValues, newRand())
   630  	setBitWidth(e, maxLenInt32(values))
   631  
   632  	reportThroughput(b, benchmarkNumValues, 4*len(values), func() {
   633  		benchmarkZeroAllocsPerRun(b, func() {
   634  			buffer, _ = e.EncodeInt32(buffer, values)
   635  		})
   636  	})
   637  }
   638  
   639  func benchmarkEncodeInt64(b *testing.B, e encoding.Encoding) {
   640  	testCanEncodeInt64(b, e)
   641  	buffer := make([]byte, 0)
   642  	values := generateInt64Values(benchmarkNumValues, newRand())
   643  	setBitWidth(e, maxLenInt64(values))
   644  
   645  	reportThroughput(b, benchmarkNumValues, 8*len(values), func() {
   646  		benchmarkZeroAllocsPerRun(b, func() {
   647  			buffer, _ = e.EncodeInt64(buffer, values)
   648  		})
   649  	})
   650  }
   651  
   652  func benchmarkEncodeFloat(b *testing.B, e encoding.Encoding) {
   653  	testCanEncodeFloat(b, e)
   654  	buffer := make([]byte, 0)
   655  	values := generateFloatValues(benchmarkNumValues, newRand())
   656  
   657  	reportThroughput(b, benchmarkNumValues, 4*len(values), func() {
   658  		benchmarkZeroAllocsPerRun(b, func() {
   659  			buffer, _ = e.EncodeFloat(buffer, values)
   660  		})
   661  	})
   662  }
   663  
   664  func benchmarkEncodeDouble(b *testing.B, e encoding.Encoding) {
   665  	testCanEncodeDouble(b, e)
   666  	buffer := make([]byte, 0)
   667  	values := generateDoubleValues(benchmarkNumValues, newRand())
   668  
   669  	reportThroughput(b, benchmarkNumValues, 8*len(values), func() {
   670  		benchmarkZeroAllocsPerRun(b, func() {
   671  			buffer, _ = e.EncodeDouble(buffer, values)
   672  		})
   673  	})
   674  }
   675  
   676  func benchmarkEncodeByteArray(b *testing.B, e encoding.Encoding) {
   677  	testCanEncodeByteArray(b, e)
   678  	buffer := make([]byte, 0)
   679  	values, offsets := generateByteArrayValues(benchmarkNumValues, newRand())
   680  
   681  	numBytes := len(values) + 4*len(offsets)
   682  	reportThroughput(b, benchmarkNumValues, numBytes, func() {
   683  		benchmarkZeroAllocsPerRun(b, func() {
   684  			buffer, _ = e.EncodeByteArray(buffer, values, offsets)
   685  		})
   686  	})
   687  }
   688  
   689  func benchmarkEncodeFixedLenByteArray(b *testing.B, e encoding.Encoding) {
   690  	testCanEncodeFixedLenByteArray(b, e)
   691  	const size = 16
   692  	buffer := make([]byte, 0)
   693  	values := generateFixedLenByteArrayValues(benchmarkNumValues, newRand(), size)
   694  
   695  	reportThroughput(b, benchmarkNumValues, len(values), func() {
   696  		benchmarkZeroAllocsPerRun(b, func() {
   697  			buffer, _ = e.EncodeFixedLenByteArray(buffer, values, size)
   698  		})
   699  	})
   700  }
   701  
   702  func BenchmarkDecode(b *testing.B) {
   703  	for _, encoding := range encodings {
   704  		b.Run(encoding.String(), func(b *testing.B) { benchmarkDecode(b, encoding) })
   705  	}
   706  }
   707  
   708  func benchmarkDecode(b *testing.B, e encoding.Encoding) {
   709  	for _, test := range [...]struct {
   710  		scenario string
   711  		function func(*testing.B, encoding.Encoding)
   712  	}{
   713  		{
   714  			scenario: "boolean",
   715  			function: benchmarkDecodeBoolean,
   716  		},
   717  		{
   718  			scenario: "levels",
   719  			function: benchmarkDecodeLevels,
   720  		},
   721  		{
   722  			scenario: "int32",
   723  			function: benchmarkDecodeInt32,
   724  		},
   725  		{
   726  			scenario: "int64",
   727  			function: benchmarkDecodeInt64,
   728  		},
   729  		{
   730  			scenario: "float",
   731  			function: benchmarkDecodeFloat,
   732  		},
   733  		{
   734  			scenario: "double",
   735  			function: benchmarkDecodeDouble,
   736  		},
   737  		{
   738  			scenario: "byte array",
   739  			function: benchmarkDecodeByteArray,
   740  		},
   741  		{
   742  			scenario: "fixed length byte array",
   743  			function: benchmarkDecodeFixedLenByteArray,
   744  		},
   745  	} {
   746  		b.Run(test.scenario, func(b *testing.B) { test.function(b, e) })
   747  	}
   748  }
   749  
   750  func benchmarkDecodeBoolean(b *testing.B, e encoding.Encoding) {
   751  	testCanEncodeBoolean(b, e)
   752  	values := generateBooleanValues(benchmarkNumValues, newRand())
   753  	setBitWidth(e, 1)
   754  	buffer, _ := e.EncodeBoolean(nil, values)
   755  
   756  	reportThroughput(b, benchmarkNumValues, len(values), func() {
   757  		benchmarkZeroAllocsPerRun(b, func() {
   758  			values, _ = e.DecodeBoolean(values, buffer)
   759  		})
   760  	})
   761  }
   762  
   763  func benchmarkDecodeLevels(b *testing.B, e encoding.Encoding) {
   764  	testCanEncodeLevels(b, e)
   765  	values := generateLevelValues(benchmarkNumValues, newRand())
   766  	setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(values)))
   767  	buffer, _ := e.EncodeLevels(nil, values)
   768  
   769  	reportThroughput(b, benchmarkNumValues, len(values), func() {
   770  		benchmarkZeroAllocsPerRun(b, func() {
   771  			values, _ = e.DecodeLevels(values, buffer)
   772  		})
   773  	})
   774  }
   775  
   776  func benchmarkDecodeInt32(b *testing.B, e encoding.Encoding) {
   777  	testCanEncodeInt32(b, e)
   778  	values := generateInt32Values(benchmarkNumValues, newRand())
   779  	setBitWidth(e, maxLenInt32(values))
   780  	buffer, _ := e.EncodeInt32(nil, values)
   781  
   782  	reportThroughput(b, benchmarkNumValues, 4*len(values), func() {
   783  		benchmarkZeroAllocsPerRun(b, func() {
   784  			values, _ = e.DecodeInt32(values, buffer)
   785  		})
   786  	})
   787  }
   788  
   789  func benchmarkDecodeInt64(b *testing.B, e encoding.Encoding) {
   790  	testCanEncodeInt64(b, e)
   791  	values := generateInt64Values(benchmarkNumValues, newRand())
   792  	setBitWidth(e, maxLenInt64(values))
   793  	buffer, _ := e.EncodeInt64(nil, values)
   794  
   795  	reportThroughput(b, benchmarkNumValues, 8*len(values), func() {
   796  		benchmarkZeroAllocsPerRun(b, func() {
   797  			values, _ = e.DecodeInt64(values, buffer)
   798  		})
   799  	})
   800  }
   801  
   802  func benchmarkDecodeFloat(b *testing.B, e encoding.Encoding) {
   803  	testCanEncodeFloat(b, e)
   804  	values := generateFloatValues(benchmarkNumValues, newRand())
   805  	buffer, _ := e.EncodeFloat(nil, values)
   806  
   807  	reportThroughput(b, benchmarkNumValues, 4*len(values), func() {
   808  		benchmarkZeroAllocsPerRun(b, func() {
   809  			values, _ = e.DecodeFloat(values, buffer)
   810  		})
   811  	})
   812  }
   813  
   814  func benchmarkDecodeDouble(b *testing.B, e encoding.Encoding) {
   815  	testCanEncodeDouble(b, e)
   816  	values := generateDoubleValues(benchmarkNumValues, newRand())
   817  	buffer, _ := e.EncodeDouble(nil, values)
   818  
   819  	reportThroughput(b, benchmarkNumValues, 8*len(values), func() {
   820  		benchmarkZeroAllocsPerRun(b, func() {
   821  			values, _ = e.DecodeDouble(values, buffer)
   822  		})
   823  	})
   824  }
   825  
   826  func benchmarkDecodeByteArray(b *testing.B, e encoding.Encoding) {
   827  	testCanEncodeByteArray(b, e)
   828  	values, offsets := generateByteArrayValues(benchmarkNumValues, newRand())
   829  	buffer, _ := e.EncodeByteArray(nil, values, offsets)
   830  
   831  	numBytes := len(values) + 4*len(offsets)
   832  	reportThroughput(b, benchmarkNumValues, numBytes, func() {
   833  		benchmarkZeroAllocsPerRun(b, func() {
   834  			values, offsets, _ = e.DecodeByteArray(values, buffer, offsets)
   835  		})
   836  	})
   837  }
   838  
   839  func benchmarkDecodeFixedLenByteArray(b *testing.B, e encoding.Encoding) {
   840  	testCanEncodeFixedLenByteArray(b, e)
   841  	const size = 16
   842  	values := generateFixedLenByteArrayValues(benchmarkNumValues, newRand(), size)
   843  	buffer, _ := e.EncodeFixedLenByteArray(nil, values, size)
   844  
   845  	reportThroughput(b, benchmarkNumValues, len(values), func() {
   846  		benchmarkZeroAllocsPerRun(b, func() {
   847  			values, _ = e.DecodeFixedLenByteArray(values, buffer, size)
   848  		})
   849  	})
   850  }
   851  
   852  func benchmarkZeroAllocsPerRun(b *testing.B, f func()) {
   853  	if allocs := testing.AllocsPerRun(b.N, f); allocs != 0 && !testing.Short() {
   854  		b.Errorf("too many memory allocations: %g", allocs)
   855  	}
   856  }
   857  
   858  func reportThroughput(b *testing.B, numValues, numBytes int, do func()) {
   859  	start := time.Now()
   860  	do()
   861  	seconds := time.Since(start).Seconds()
   862  	b.SetBytes(int64(numBytes))
   863  	b.ReportMetric(float64(b.N*numValues)/seconds, "value/s")
   864  }
   865  
   866  func generateLevelValues(n int, r *rand.Rand) []uint8 {
   867  	values := make([]uint8, n)
   868  	for i := range values {
   869  		values[i] = uint8(r.Intn(6))
   870  	}
   871  	return values
   872  }
   873  
   874  func generateBooleanValues(n int, r *rand.Rand) []byte {
   875  	values := make([]byte, n/8+1)
   876  	io.ReadFull(r, values)
   877  	return values
   878  }
   879  
   880  func generateInt32Values(n int, r *rand.Rand) []int32 {
   881  	values := make([]int32, n)
   882  	for i := range values {
   883  		values[i] = r.Int31n(100)
   884  	}
   885  	return values
   886  }
   887  
   888  func generateInt64Values(n int, r *rand.Rand) []int64 {
   889  	values := make([]int64, n)
   890  	for i := range values {
   891  		values[i] = r.Int63n(100)
   892  	}
   893  	return values
   894  }
   895  
   896  func generateFloatValues(n int, r *rand.Rand) []float32 {
   897  	values := make([]float32, n)
   898  	for i := range values {
   899  		values[i] = r.Float32()
   900  	}
   901  	return values
   902  }
   903  
   904  func generateDoubleValues(n int, r *rand.Rand) []float64 {
   905  	values := make([]float64, n)
   906  	for i := range values {
   907  		values[i] = r.Float64()
   908  	}
   909  	return values
   910  }
   911  
   912  func generateByteArrayValues(n int, r *rand.Rand) ([]byte, []uint32) {
   913  	const maxLen = 21
   914  	offsets := make([]uint32, n+1)
   915  	values := make([]byte, n*maxLen)
   916  	length := 0
   917  
   918  	for i := 0; i < n; i++ {
   919  		k := r.Intn(maxLen) + 1
   920  		io.ReadFull(r, values[length:length+k])
   921  		offsets[i] = uint32(length)
   922  		length += k
   923  	}
   924  
   925  	offsets[n] = uint32(length)
   926  	return values[:length], offsets
   927  }
   928  
   929  func generateFixedLenByteArrayValues(n int, r *rand.Rand, size int) []byte {
   930  	values := make([]byte, n*size)
   931  	io.ReadFull(r, values)
   932  	return values
   933  }
   934  
   935  func maxLenInt8(data []int8) int {
   936  	max := 0
   937  	for _, v := range data {
   938  		if n := bits.Len8(uint8(v)); n > max {
   939  			max = n
   940  		}
   941  	}
   942  	return max
   943  }
   944  
   945  func maxLenInt32(data []int32) int {
   946  	max := 0
   947  	for _, v := range data {
   948  		if n := bits.Len32(uint32(v)); n > max {
   949  			max = n
   950  		}
   951  	}
   952  	return max
   953  }
   954  
   955  func maxLenInt64(data []int64) int {
   956  	max := 0
   957  	for _, v := range data {
   958  		if n := bits.Len64(uint64(v)); n > max {
   959  			max = n
   960  		}
   961  	}
   962  	return max
   963  }