github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/encoding/delta/binary_packed_test.go (about)

     1  package delta
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"math/bits"
     7  	"testing"
     8  )
     9  
    10  func maxLen32(miniBlock []int32) (maxLen int) {
    11  	for _, v := range miniBlock {
    12  		if n := bits.Len32(uint32(v)); n > maxLen {
    13  			maxLen = n
    14  		}
    15  	}
    16  	return maxLen
    17  }
    18  
    19  func maxLen64(miniBlock []int64) (maxLen int) {
    20  	for _, v := range miniBlock {
    21  		if n := bits.Len64(uint64(v)); n > maxLen {
    22  			maxLen = n
    23  		}
    24  	}
    25  	return maxLen
    26  }
    27  
    28  func TestBlockDeltaInt32(t *testing.T) {
    29  	testBlockDeltaInt32(t, blockDeltaInt32)
    30  }
    31  
    32  func testBlockDeltaInt32(t *testing.T, f func(*[blockSize]int32, int32) int32) {
    33  	t.Helper()
    34  	block := [blockSize]int32{}
    35  	for i := range block {
    36  		block[i] = int32(2 * (i + 1))
    37  	}
    38  	lastValue := f(&block, 0)
    39  	if lastValue != 2*blockSize {
    40  		t.Errorf("wrong last block value: want=%d got=%d", 2*blockSize, lastValue)
    41  	}
    42  	for i := range block {
    43  		j := int32(2 * (i + 0))
    44  		k := int32(2 * (i + 1))
    45  		if block[i] != (k - j) {
    46  			t.Errorf("wrong block delta at index %d: want=%d got=%d", i, k-j, block[i])
    47  		}
    48  	}
    49  }
    50  
    51  func TestBlockMinInt32(t *testing.T) {
    52  	testBlockMinInt32(t, blockMinInt32)
    53  }
    54  
    55  func testBlockMinInt32(t *testing.T, f func(*[blockSize]int32) int32) {
    56  	t.Helper()
    57  	block := [blockSize]int32{}
    58  	for i := range block {
    59  		block[i] = blockSize - int32(i)
    60  	}
    61  	if min := f(&block); min != 1 {
    62  		t.Errorf("wrong min block value: want=1 got=%d", min)
    63  	}
    64  }
    65  
    66  func TestBlockSubInt32(t *testing.T) {
    67  	testBlockSubInt32(t, blockSubInt32)
    68  }
    69  
    70  func testBlockSubInt32(t *testing.T, f func(*[blockSize]int32, int32)) {
    71  	t.Helper()
    72  	block := [blockSize]int32{}
    73  	for i := range block {
    74  		block[i] = int32(i)
    75  	}
    76  	f(&block, 1)
    77  	for i := range block {
    78  		if block[i] != int32(i-1) {
    79  			t.Errorf("wrong block value at index %d: want=%d got=%d", i, i-1, block[i])
    80  		}
    81  	}
    82  }
    83  
    84  func TestBlockBitWidthsInt32(t *testing.T) {
    85  	testBlockBitWidthsInt32(t, blockBitWidthsInt32)
    86  }
    87  
    88  func testBlockBitWidthsInt32(t *testing.T, f func(*[numMiniBlocks]byte, *[blockSize]int32)) {
    89  	t.Helper()
    90  	bitWidths := [numMiniBlocks]byte{}
    91  	block := [blockSize]int32{}
    92  	for i := range block {
    93  		block[i] = int32(i)
    94  	}
    95  	f(&bitWidths, &block)
    96  
    97  	want := [numMiniBlocks]byte{}
    98  	for i := range want {
    99  		j := (i + 0) * miniBlockSize
   100  		k := (i + 1) * miniBlockSize
   101  		want[i] = byte(maxLen32(block[j:k]))
   102  	}
   103  
   104  	if bitWidths != want {
   105  		t.Errorf("wrong bit widths: want=%d got=%d", want, bitWidths)
   106  	}
   107  }
   108  
   109  func TestEncodeMiniBlockInt32(t *testing.T) {
   110  	testEncodeMiniBlockInt32(t, encodeMiniBlockInt32)
   111  }
   112  
   113  func testEncodeMiniBlockInt32(t *testing.T, f func([]byte, *[miniBlockSize]int32, uint)) {
   114  	t.Helper()
   115  	for bitWidth := uint(1); bitWidth <= 32; bitWidth++ {
   116  		t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) {
   117  			got := [4*miniBlockSize + 32]byte{}
   118  			src := [miniBlockSize]int32{}
   119  			for i := range src {
   120  				src[i] = int32(i) & int32((1<<bitWidth)-1)
   121  			}
   122  
   123  			want := [4*miniBlockSize + 32]byte{}
   124  			bitOffset := uint(0)
   125  
   126  			for _, bits := range src {
   127  				for b := uint(0); b < bitWidth; b++ {
   128  					x := bitOffset / 8
   129  					y := bitOffset % 8
   130  					want[x] |= byte(((bits >> b) & 1) << y)
   131  					bitOffset++
   132  				}
   133  			}
   134  
   135  			f(got[:], &src, bitWidth)
   136  			n := (miniBlockSize * bitWidth) / 8
   137  
   138  			if !bytes.Equal(want[:n], got[:n]) {
   139  				t.Errorf("output mismatch: want=%08x got=%08x", want[:n], got[:n])
   140  			}
   141  		})
   142  	}
   143  }
   144  
   145  func BenchmarkBlockDeltaInt32(b *testing.B) {
   146  	benchmarkBlockDeltaInt32(b, blockDeltaInt32)
   147  }
   148  
   149  func benchmarkBlockDeltaInt32(b *testing.B, f func(*[blockSize]int32, int32) int32) {
   150  	b.SetBytes(4 * blockSize)
   151  	block := [blockSize]int32{}
   152  	for i := 0; i < b.N; i++ {
   153  		_ = f(&block, 0)
   154  	}
   155  }
   156  
   157  func BenchmarkBlockMinInt32(b *testing.B) {
   158  	benchmarkBlockMinInt32(b, blockMinInt32)
   159  }
   160  
   161  func benchmarkBlockMinInt32(b *testing.B, f func(*[blockSize]int32) int32) {
   162  	b.SetBytes(4 * blockSize)
   163  	block := [blockSize]int32{}
   164  	for i := 0; i < b.N; i++ {
   165  		_ = f(&block)
   166  	}
   167  }
   168  
   169  func BenchmarkBlockSubInt32(b *testing.B) {
   170  	benchmarkBlockSubInt32(b, blockSubInt32)
   171  }
   172  
   173  func benchmarkBlockSubInt32(b *testing.B, f func(*[blockSize]int32, int32)) {
   174  	b.SetBytes(4 * blockSize)
   175  	block := [blockSize]int32{}
   176  	for i := 0; i < b.N; i++ {
   177  		f(&block, 42)
   178  	}
   179  }
   180  
   181  func BenchmarkBlockBitWidthsInt32(b *testing.B) {
   182  	benchmarkBlockBitWidthsInt32(b, blockBitWidthsInt32)
   183  }
   184  
   185  func benchmarkBlockBitWidthsInt32(b *testing.B, f func(*[numMiniBlocks]byte, *[blockSize]int32)) {
   186  	b.SetBytes(4 * blockSize)
   187  	bitWidths := [numMiniBlocks]byte{}
   188  	block := [blockSize]int32{}
   189  	for i := 0; i < b.N; i++ {
   190  		f(&bitWidths, &block)
   191  	}
   192  }
   193  
   194  func BenchmarkEncodeMiniBlockInt32(b *testing.B) {
   195  	benchmarkEncodeMiniBlockInt32(b, encodeMiniBlockInt32)
   196  }
   197  
   198  func benchmarkEncodeMiniBlockInt32(b *testing.B, f func([]byte, *[miniBlockSize]int32, uint)) {
   199  	for bitWidth := uint(1); bitWidth <= 32; bitWidth++ {
   200  		b.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(b *testing.B) {
   201  			b.SetBytes(4 * miniBlockSize)
   202  			dst := [4*miniBlockSize + 32]byte{}
   203  			src := [miniBlockSize]int32{}
   204  			for i := 0; i < b.N; i++ {
   205  				f(dst[:], &src, bitWidth)
   206  			}
   207  		})
   208  	}
   209  }
   210  
   211  func TestBlockDeltaInt64(t *testing.T) {
   212  	testBlockDeltaInt64(t, blockDeltaInt64)
   213  }
   214  
   215  func testBlockDeltaInt64(t *testing.T, f func(*[blockSize]int64, int64) int64) {
   216  	t.Helper()
   217  	block := [blockSize]int64{}
   218  	for i := range block {
   219  		block[i] = int64(2 * (i + 1))
   220  	}
   221  	lastValue := f(&block, 0)
   222  	if lastValue != 2*blockSize {
   223  		t.Errorf("wrong last block value: want=%d got=%d", 2*blockSize, lastValue)
   224  	}
   225  	for i := range block {
   226  		j := int64(2 * (i + 0))
   227  		k := int64(2 * (i + 1))
   228  		if block[i] != (k - j) {
   229  			t.Errorf("wrong block delta at index %d: want=%d got=%d", i, k-j, block[i])
   230  		}
   231  	}
   232  }
   233  
   234  func TestBlockMinInt64(t *testing.T) {
   235  	testBlockMinInt64(t, blockMinInt64)
   236  }
   237  
   238  func testBlockMinInt64(t *testing.T, f func(*[blockSize]int64) int64) {
   239  	block := [blockSize]int64{}
   240  	for i := range block {
   241  		block[i] = blockSize - int64(i)
   242  	}
   243  	if min := f(&block); min != 1 {
   244  		t.Errorf("wrong min block value: want=1 got=%d", min)
   245  	}
   246  }
   247  
   248  func TestBlockSubInt64(t *testing.T) {
   249  	testBlockSubInt64(t, blockSubInt64)
   250  }
   251  
   252  func testBlockSubInt64(t *testing.T, f func(*[blockSize]int64, int64)) {
   253  	block := [blockSize]int64{}
   254  	for i := range block {
   255  		block[i] = int64(i)
   256  	}
   257  	f(&block, 1)
   258  	for i := range block {
   259  		if block[i] != int64(i-1) {
   260  			t.Errorf("wrong block value at index %d: want=%d got=%d", i, i-1, block[i])
   261  		}
   262  	}
   263  }
   264  
   265  func TestBlockBitWidthsInt64(t *testing.T) {
   266  	testBlockBitWidthsInt64(t, blockBitWidthsInt64)
   267  }
   268  
   269  func testBlockBitWidthsInt64(t *testing.T, f func(*[numMiniBlocks]byte, *[blockSize]int64)) {
   270  	bitWidths := [numMiniBlocks]byte{}
   271  	block := [blockSize]int64{}
   272  	for i := range block {
   273  		block[i] = int64(i)
   274  	}
   275  	f(&bitWidths, &block)
   276  
   277  	want := [numMiniBlocks]byte{}
   278  	for i := range want {
   279  		j := (i + 0) * miniBlockSize
   280  		k := (i + 1) * miniBlockSize
   281  		want[i] = byte(maxLen64(block[j:k]))
   282  	}
   283  
   284  	if bitWidths != want {
   285  		t.Errorf("wrong bit widths: want=%d got=%d", want, bitWidths)
   286  	}
   287  }
   288  
   289  func TestEncodeMiniBlockInt64(t *testing.T) {
   290  	testEncodeMiniBlockInt64(t, encodeMiniBlockInt64)
   291  }
   292  
   293  func testEncodeMiniBlockInt64(t *testing.T, f func([]byte, *[miniBlockSize]int64, uint)) {
   294  	for bitWidth := uint(1); bitWidth <= 64; bitWidth++ {
   295  		t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) {
   296  			got := [8*miniBlockSize + 64]byte{}
   297  			src := [miniBlockSize]int64{}
   298  			for i := range src {
   299  				src[i] = int64(i) & int64((1<<bitWidth)-1)
   300  			}
   301  
   302  			want := [8*miniBlockSize + 64]byte{}
   303  			bitOffset := uint(0)
   304  
   305  			for _, bits := range src {
   306  				for b := uint(0); b < bitWidth; b++ {
   307  					x := bitOffset / 8
   308  					y := bitOffset % 8
   309  					want[x] |= byte(((bits >> b) & 1) << y)
   310  					bitOffset++
   311  				}
   312  			}
   313  
   314  			f(got[:], &src, bitWidth)
   315  			n := (miniBlockSize * bitWidth) / 8
   316  
   317  			if !bytes.Equal(want[:n], got[:n]) {
   318  				t.Errorf("output mismatch: want=%08x got=%08x", want[:n], got[:n])
   319  			}
   320  		})
   321  	}
   322  }
   323  
   324  func BenchmarkBlockDeltaInt64(b *testing.B) {
   325  	benchmarkBlockDeltaInt64(b, blockDeltaInt64)
   326  }
   327  
   328  func benchmarkBlockDeltaInt64(b *testing.B, f func(*[blockSize]int64, int64) int64) {
   329  	b.SetBytes(8 * blockSize)
   330  	block := [blockSize]int64{}
   331  	for i := 0; i < b.N; i++ {
   332  		_ = f(&block, 0)
   333  	}
   334  }
   335  
   336  func BenchmarkBlockMinInt64(b *testing.B) {
   337  	benchmarkBlockMinInt64(b, blockMinInt64)
   338  }
   339  
   340  func benchmarkBlockMinInt64(b *testing.B, f func(*[blockSize]int64) int64) {
   341  	b.SetBytes(8 * blockSize)
   342  	block := [blockSize]int64{}
   343  	for i := 0; i < b.N; i++ {
   344  		_ = f(&block)
   345  	}
   346  }
   347  
   348  func BenchmarkBlockSubInt64(b *testing.B) {
   349  	benchmarkBlockSubInt64(b, blockSubInt64)
   350  }
   351  
   352  func benchmarkBlockSubInt64(b *testing.B, f func(*[blockSize]int64, int64)) {
   353  	b.SetBytes(8 * blockSize)
   354  	block := [blockSize]int64{}
   355  	for i := 0; i < b.N; i++ {
   356  		f(&block, 42)
   357  	}
   358  }
   359  
   360  func BenchmarkBlockBitWidthsInt64(b *testing.B) {
   361  	benchmarkBlockBitWidthsInt64(b, blockBitWidthsInt64)
   362  }
   363  
   364  func benchmarkBlockBitWidthsInt64(b *testing.B, f func(*[numMiniBlocks]byte, *[blockSize]int64)) {
   365  	b.SetBytes(8 * blockSize)
   366  	bitWidths := [numMiniBlocks]byte{}
   367  	block := [blockSize]int64{}
   368  	for i := 0; i < b.N; i++ {
   369  		f(&bitWidths, &block)
   370  	}
   371  }
   372  
   373  func BenchmarkEncodeMiniBlockInt64(b *testing.B) {
   374  	benchmarkEncodeMiniBlockInt64(b, encodeMiniBlockInt64)
   375  }
   376  
   377  func benchmarkEncodeMiniBlockInt64(b *testing.B, f func([]byte, *[miniBlockSize]int64, uint)) {
   378  	for bitWidth := uint(1); bitWidth <= 64; bitWidth++ {
   379  		b.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(b *testing.B) {
   380  			b.SetBytes(8 * miniBlockSize)
   381  			dst := [8*miniBlockSize + 64]byte{}
   382  			src := [miniBlockSize]int64{}
   383  			for i := 0; i < b.N; i++ {
   384  				f(dst[:], &src, bitWidth)
   385  			}
   386  		})
   387  	}
   388  }