github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/encoding/bitpacked/bitpacked.go (about) 1 package bitpacked 2 3 import ( 4 "github.com/segmentio/parquet-go/encoding" 5 "github.com/segmentio/parquet-go/format" 6 ) 7 8 type Encoding struct { 9 encoding.NotSupported 10 BitWidth int 11 } 12 13 func (e *Encoding) String() string { 14 return "BIT_PACKED" 15 } 16 17 func (e *Encoding) Encoding() format.Encoding { 18 return format.BitPacked 19 } 20 21 func (e *Encoding) EncodeLevels(dst []byte, src []uint8) ([]byte, error) { 22 dst, err := encodeLevels(dst[:0], src, uint(e.BitWidth)) 23 return dst, e.wrap(err) 24 } 25 26 func (e *Encoding) DecodeLevels(dst []uint8, src []byte) ([]uint8, error) { 27 dst, err := decodeLevels(dst[:0], src, uint(e.BitWidth)) 28 return dst, e.wrap(err) 29 } 30 31 func (e *Encoding) wrap(err error) error { 32 if err != nil { 33 err = encoding.Error(e, err) 34 } 35 return err 36 } 37 38 func encodeLevels(dst, src []byte, bitWidth uint) ([]byte, error) { 39 if bitWidth == 0 || len(src) == 0 { 40 return append(dst[:0], 0), nil 41 } 42 43 n := ((int(bitWidth) * len(src)) + 7) / 8 44 c := n + 1 45 46 if cap(dst) < c { 47 dst = make([]byte, c, 2*c) 48 } else { 49 dst = dst[:c] 50 for i := range dst { 51 dst[i] = 0 52 } 53 } 54 55 bitMask := byte(1<<bitWidth) - 1 56 bitShift := 8 - bitWidth 57 bitOffset := uint(0) 58 59 for _, value := range src { 60 v := bitFlip(value) >> bitShift 61 i := bitOffset / 8 62 j := bitOffset % 8 63 dst[i+0] |= (v & bitMask) << j 64 dst[i+1] |= (v >> (8 - j)) 65 bitOffset += bitWidth 66 } 67 68 return dst[:n], nil 69 } 70 71 func decodeLevels(dst, src []byte, bitWidth uint) ([]byte, error) { 72 if bitWidth == 0 || len(src) == 0 { 73 return append(dst[:0], 0), nil 74 } 75 76 numBits := 8 * uint(len(src)) 77 numValues := int(numBits / bitWidth) 78 if (numBits % bitWidth) != 0 { 79 numValues++ 80 } 81 82 if cap(dst) < numValues { 83 dst = make([]byte, numValues, 2*numValues) 84 } else { 85 dst = dst[:numValues] 86 for i := range dst { 87 dst[i] = 0 88 } 89 } 90 91 bitMask := byte(1<<bitWidth) - 1 92 bitShift := 8 - bitWidth 93 bitOffset := uint(0) 94 95 for k := range dst { 96 i := bitOffset / 8 97 j := bitOffset % 8 98 v := (src[i+0] >> j) 99 if int(i+1) < len(src) { 100 v |= (src[i+1] << (8 - j)) 101 } 102 v &= bitMask 103 dst[k] = bitFlip(v) >> bitShift 104 bitOffset += bitWidth 105 } 106 107 return dst, nil 108 } 109 110 func bitFlip(b byte) byte { 111 return (((b >> 0) & 1) << 7) | 112 (((b >> 1) & 1) << 6) | 113 (((b >> 2) & 1) << 5) | 114 (((b >> 3) & 1) << 4) | 115 (((b >> 4) & 1) << 3) | 116 (((b >> 5) & 1) << 2) | 117 (((b >> 6) & 1) << 1) | 118 (((b >> 7) & 1) << 0) 119 }