github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/encoding/delta/binary_packed_test.go (about) 1 package delta 2 3 import ( 4 "bytes" 5 "fmt" 6 "math/bits" 7 "testing" 8 ) 9 10 func maxLen32(miniBlock []int32) (maxLen int) { 11 for _, v := range miniBlock { 12 if n := bits.Len32(uint32(v)); n > maxLen { 13 maxLen = n 14 } 15 } 16 return maxLen 17 } 18 19 func maxLen64(miniBlock []int64) (maxLen int) { 20 for _, v := range miniBlock { 21 if n := bits.Len64(uint64(v)); n > maxLen { 22 maxLen = n 23 } 24 } 25 return maxLen 26 } 27 28 func TestBlockDeltaInt32(t *testing.T) { 29 testBlockDeltaInt32(t, blockDeltaInt32) 30 } 31 32 func testBlockDeltaInt32(t *testing.T, f func(*[blockSize]int32, int32) int32) { 33 t.Helper() 34 block := [blockSize]int32{} 35 for i := range block { 36 block[i] = int32(2 * (i + 1)) 37 } 38 lastValue := f(&block, 0) 39 if lastValue != 2*blockSize { 40 t.Errorf("wrong last block value: want=%d got=%d", 2*blockSize, lastValue) 41 } 42 for i := range block { 43 j := int32(2 * (i + 0)) 44 k := int32(2 * (i + 1)) 45 if block[i] != (k - j) { 46 t.Errorf("wrong block delta at index %d: want=%d got=%d", i, k-j, block[i]) 47 } 48 } 49 } 50 51 func TestBlockMinInt32(t *testing.T) { 52 testBlockMinInt32(t, blockMinInt32) 53 } 54 55 func testBlockMinInt32(t *testing.T, f func(*[blockSize]int32) int32) { 56 t.Helper() 57 block := [blockSize]int32{} 58 for i := range block { 59 block[i] = blockSize - int32(i) 60 } 61 if min := f(&block); min != 1 { 62 t.Errorf("wrong min block value: want=1 got=%d", min) 63 } 64 } 65 66 func TestBlockSubInt32(t *testing.T) { 67 testBlockSubInt32(t, blockSubInt32) 68 } 69 70 func testBlockSubInt32(t *testing.T, f func(*[blockSize]int32, int32)) { 71 t.Helper() 72 block := [blockSize]int32{} 73 for i := range block { 74 block[i] = int32(i) 75 } 76 f(&block, 1) 77 for i := range block { 78 if block[i] != int32(i-1) { 79 t.Errorf("wrong block value at index %d: want=%d got=%d", i, i-1, block[i]) 80 } 81 } 82 } 83 84 func TestBlockBitWidthsInt32(t *testing.T) { 85 testBlockBitWidthsInt32(t, blockBitWidthsInt32) 86 } 87 88 func testBlockBitWidthsInt32(t *testing.T, f func(*[numMiniBlocks]byte, *[blockSize]int32)) { 89 t.Helper() 90 bitWidths := [numMiniBlocks]byte{} 91 block := [blockSize]int32{} 92 for i := range block { 93 block[i] = int32(i) 94 } 95 f(&bitWidths, &block) 96 97 want := [numMiniBlocks]byte{} 98 for i := range want { 99 j := (i + 0) * miniBlockSize 100 k := (i + 1) * miniBlockSize 101 want[i] = byte(maxLen32(block[j:k])) 102 } 103 104 if bitWidths != want { 105 t.Errorf("wrong bit widths: want=%d got=%d", want, bitWidths) 106 } 107 } 108 109 func TestEncodeMiniBlockInt32(t *testing.T) { 110 testEncodeMiniBlockInt32(t, encodeMiniBlockInt32) 111 } 112 113 func testEncodeMiniBlockInt32(t *testing.T, f func([]byte, *[miniBlockSize]int32, uint)) { 114 t.Helper() 115 for bitWidth := uint(1); bitWidth <= 32; bitWidth++ { 116 t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) { 117 got := [4*miniBlockSize + 32]byte{} 118 src := [miniBlockSize]int32{} 119 for i := range src { 120 src[i] = int32(i) & int32((1<<bitWidth)-1) 121 } 122 123 want := [4*miniBlockSize + 32]byte{} 124 bitOffset := uint(0) 125 126 for _, bits := range src { 127 for b := uint(0); b < bitWidth; b++ { 128 x := bitOffset / 8 129 y := bitOffset % 8 130 want[x] |= byte(((bits >> b) & 1) << y) 131 bitOffset++ 132 } 133 } 134 135 f(got[:], &src, bitWidth) 136 n := (miniBlockSize * bitWidth) / 8 137 138 if !bytes.Equal(want[:n], got[:n]) { 139 t.Errorf("output mismatch: want=%08x got=%08x", want[:n], got[:n]) 140 } 141 }) 142 } 143 } 144 145 func BenchmarkBlockDeltaInt32(b *testing.B) { 146 benchmarkBlockDeltaInt32(b, blockDeltaInt32) 147 } 148 149 func benchmarkBlockDeltaInt32(b *testing.B, f func(*[blockSize]int32, int32) int32) { 150 b.SetBytes(4 * blockSize) 151 block := [blockSize]int32{} 152 for i := 0; i < b.N; i++ { 153 _ = f(&block, 0) 154 } 155 } 156 157 func BenchmarkBlockMinInt32(b *testing.B) { 158 benchmarkBlockMinInt32(b, blockMinInt32) 159 } 160 161 func benchmarkBlockMinInt32(b *testing.B, f func(*[blockSize]int32) int32) { 162 b.SetBytes(4 * blockSize) 163 block := [blockSize]int32{} 164 for i := 0; i < b.N; i++ { 165 _ = f(&block) 166 } 167 } 168 169 func BenchmarkBlockSubInt32(b *testing.B) { 170 benchmarkBlockSubInt32(b, blockSubInt32) 171 } 172 173 func benchmarkBlockSubInt32(b *testing.B, f func(*[blockSize]int32, int32)) { 174 b.SetBytes(4 * blockSize) 175 block := [blockSize]int32{} 176 for i := 0; i < b.N; i++ { 177 f(&block, 42) 178 } 179 } 180 181 func BenchmarkBlockBitWidthsInt32(b *testing.B) { 182 benchmarkBlockBitWidthsInt32(b, blockBitWidthsInt32) 183 } 184 185 func benchmarkBlockBitWidthsInt32(b *testing.B, f func(*[numMiniBlocks]byte, *[blockSize]int32)) { 186 b.SetBytes(4 * blockSize) 187 bitWidths := [numMiniBlocks]byte{} 188 block := [blockSize]int32{} 189 for i := 0; i < b.N; i++ { 190 f(&bitWidths, &block) 191 } 192 } 193 194 func BenchmarkEncodeMiniBlockInt32(b *testing.B) { 195 benchmarkEncodeMiniBlockInt32(b, encodeMiniBlockInt32) 196 } 197 198 func benchmarkEncodeMiniBlockInt32(b *testing.B, f func([]byte, *[miniBlockSize]int32, uint)) { 199 for bitWidth := uint(1); bitWidth <= 32; bitWidth++ { 200 b.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(b *testing.B) { 201 b.SetBytes(4 * miniBlockSize) 202 dst := [4*miniBlockSize + 32]byte{} 203 src := [miniBlockSize]int32{} 204 for i := 0; i < b.N; i++ { 205 f(dst[:], &src, bitWidth) 206 } 207 }) 208 } 209 } 210 211 func TestBlockDeltaInt64(t *testing.T) { 212 testBlockDeltaInt64(t, blockDeltaInt64) 213 } 214 215 func testBlockDeltaInt64(t *testing.T, f func(*[blockSize]int64, int64) int64) { 216 t.Helper() 217 block := [blockSize]int64{} 218 for i := range block { 219 block[i] = int64(2 * (i + 1)) 220 } 221 lastValue := f(&block, 0) 222 if lastValue != 2*blockSize { 223 t.Errorf("wrong last block value: want=%d got=%d", 2*blockSize, lastValue) 224 } 225 for i := range block { 226 j := int64(2 * (i + 0)) 227 k := int64(2 * (i + 1)) 228 if block[i] != (k - j) { 229 t.Errorf("wrong block delta at index %d: want=%d got=%d", i, k-j, block[i]) 230 } 231 } 232 } 233 234 func TestBlockMinInt64(t *testing.T) { 235 testBlockMinInt64(t, blockMinInt64) 236 } 237 238 func testBlockMinInt64(t *testing.T, f func(*[blockSize]int64) int64) { 239 block := [blockSize]int64{} 240 for i := range block { 241 block[i] = blockSize - int64(i) 242 } 243 if min := f(&block); min != 1 { 244 t.Errorf("wrong min block value: want=1 got=%d", min) 245 } 246 } 247 248 func TestBlockSubInt64(t *testing.T) { 249 testBlockSubInt64(t, blockSubInt64) 250 } 251 252 func testBlockSubInt64(t *testing.T, f func(*[blockSize]int64, int64)) { 253 block := [blockSize]int64{} 254 for i := range block { 255 block[i] = int64(i) 256 } 257 f(&block, 1) 258 for i := range block { 259 if block[i] != int64(i-1) { 260 t.Errorf("wrong block value at index %d: want=%d got=%d", i, i-1, block[i]) 261 } 262 } 263 } 264 265 func TestBlockBitWidthsInt64(t *testing.T) { 266 testBlockBitWidthsInt64(t, blockBitWidthsInt64) 267 } 268 269 func testBlockBitWidthsInt64(t *testing.T, f func(*[numMiniBlocks]byte, *[blockSize]int64)) { 270 bitWidths := [numMiniBlocks]byte{} 271 block := [blockSize]int64{} 272 for i := range block { 273 block[i] = int64(i) 274 } 275 f(&bitWidths, &block) 276 277 want := [numMiniBlocks]byte{} 278 for i := range want { 279 j := (i + 0) * miniBlockSize 280 k := (i + 1) * miniBlockSize 281 want[i] = byte(maxLen64(block[j:k])) 282 } 283 284 if bitWidths != want { 285 t.Errorf("wrong bit widths: want=%d got=%d", want, bitWidths) 286 } 287 } 288 289 func TestEncodeMiniBlockInt64(t *testing.T) { 290 testEncodeMiniBlockInt64(t, encodeMiniBlockInt64) 291 } 292 293 func testEncodeMiniBlockInt64(t *testing.T, f func([]byte, *[miniBlockSize]int64, uint)) { 294 for bitWidth := uint(1); bitWidth <= 64; bitWidth++ { 295 t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) { 296 got := [8*miniBlockSize + 64]byte{} 297 src := [miniBlockSize]int64{} 298 for i := range src { 299 src[i] = int64(i) & int64((1<<bitWidth)-1) 300 } 301 302 want := [8*miniBlockSize + 64]byte{} 303 bitOffset := uint(0) 304 305 for _, bits := range src { 306 for b := uint(0); b < bitWidth; b++ { 307 x := bitOffset / 8 308 y := bitOffset % 8 309 want[x] |= byte(((bits >> b) & 1) << y) 310 bitOffset++ 311 } 312 } 313 314 f(got[:], &src, bitWidth) 315 n := (miniBlockSize * bitWidth) / 8 316 317 if !bytes.Equal(want[:n], got[:n]) { 318 t.Errorf("output mismatch: want=%08x got=%08x", want[:n], got[:n]) 319 } 320 }) 321 } 322 } 323 324 func BenchmarkBlockDeltaInt64(b *testing.B) { 325 benchmarkBlockDeltaInt64(b, blockDeltaInt64) 326 } 327 328 func benchmarkBlockDeltaInt64(b *testing.B, f func(*[blockSize]int64, int64) int64) { 329 b.SetBytes(8 * blockSize) 330 block := [blockSize]int64{} 331 for i := 0; i < b.N; i++ { 332 _ = f(&block, 0) 333 } 334 } 335 336 func BenchmarkBlockMinInt64(b *testing.B) { 337 benchmarkBlockMinInt64(b, blockMinInt64) 338 } 339 340 func benchmarkBlockMinInt64(b *testing.B, f func(*[blockSize]int64) int64) { 341 b.SetBytes(8 * blockSize) 342 block := [blockSize]int64{} 343 for i := 0; i < b.N; i++ { 344 _ = f(&block) 345 } 346 } 347 348 func BenchmarkBlockSubInt64(b *testing.B) { 349 benchmarkBlockSubInt64(b, blockSubInt64) 350 } 351 352 func benchmarkBlockSubInt64(b *testing.B, f func(*[blockSize]int64, int64)) { 353 b.SetBytes(8 * blockSize) 354 block := [blockSize]int64{} 355 for i := 0; i < b.N; i++ { 356 f(&block, 42) 357 } 358 } 359 360 func BenchmarkBlockBitWidthsInt64(b *testing.B) { 361 benchmarkBlockBitWidthsInt64(b, blockBitWidthsInt64) 362 } 363 364 func benchmarkBlockBitWidthsInt64(b *testing.B, f func(*[numMiniBlocks]byte, *[blockSize]int64)) { 365 b.SetBytes(8 * blockSize) 366 bitWidths := [numMiniBlocks]byte{} 367 block := [blockSize]int64{} 368 for i := 0; i < b.N; i++ { 369 f(&bitWidths, &block) 370 } 371 } 372 373 func BenchmarkEncodeMiniBlockInt64(b *testing.B) { 374 benchmarkEncodeMiniBlockInt64(b, encodeMiniBlockInt64) 375 } 376 377 func benchmarkEncodeMiniBlockInt64(b *testing.B, f func([]byte, *[miniBlockSize]int64, uint)) { 378 for bitWidth := uint(1); bitWidth <= 64; bitWidth++ { 379 b.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(b *testing.B) { 380 b.SetBytes(8 * miniBlockSize) 381 dst := [8*miniBlockSize + 64]byte{} 382 src := [miniBlockSize]int64{} 383 for i := 0; i < b.N; i++ { 384 f(dst[:], &src, bitWidth) 385 } 386 }) 387 } 388 }