github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/encoding/bitpacked/bitpacked.go (about)

     1  package bitpacked
     2  
     3  import (
     4  	"github.com/segmentio/parquet-go/encoding"
     5  	"github.com/segmentio/parquet-go/format"
     6  )
     7  
     8  type Encoding struct {
     9  	encoding.NotSupported
    10  	BitWidth int
    11  }
    12  
    13  func (e *Encoding) String() string {
    14  	return "BIT_PACKED"
    15  }
    16  
    17  func (e *Encoding) Encoding() format.Encoding {
    18  	return format.BitPacked
    19  }
    20  
    21  func (e *Encoding) EncodeLevels(dst []byte, src []uint8) ([]byte, error) {
    22  	dst, err := encodeLevels(dst[:0], src, uint(e.BitWidth))
    23  	return dst, e.wrap(err)
    24  }
    25  
    26  func (e *Encoding) DecodeLevels(dst []uint8, src []byte) ([]uint8, error) {
    27  	dst, err := decodeLevels(dst[:0], src, uint(e.BitWidth))
    28  	return dst, e.wrap(err)
    29  }
    30  
    31  func (e *Encoding) wrap(err error) error {
    32  	if err != nil {
    33  		err = encoding.Error(e, err)
    34  	}
    35  	return err
    36  }
    37  
    38  func encodeLevels(dst, src []byte, bitWidth uint) ([]byte, error) {
    39  	if bitWidth == 0 || len(src) == 0 {
    40  		return append(dst[:0], 0), nil
    41  	}
    42  
    43  	n := ((int(bitWidth) * len(src)) + 7) / 8
    44  	c := n + 1
    45  
    46  	if cap(dst) < c {
    47  		dst = make([]byte, c, 2*c)
    48  	} else {
    49  		dst = dst[:c]
    50  		for i := range dst {
    51  			dst[i] = 0
    52  		}
    53  	}
    54  
    55  	bitMask := byte(1<<bitWidth) - 1
    56  	bitShift := 8 - bitWidth
    57  	bitOffset := uint(0)
    58  
    59  	for _, value := range src {
    60  		v := bitFlip(value) >> bitShift
    61  		i := bitOffset / 8
    62  		j := bitOffset % 8
    63  		dst[i+0] |= (v & bitMask) << j
    64  		dst[i+1] |= (v >> (8 - j))
    65  		bitOffset += bitWidth
    66  	}
    67  
    68  	return dst[:n], nil
    69  }
    70  
    71  func decodeLevels(dst, src []byte, bitWidth uint) ([]byte, error) {
    72  	if bitWidth == 0 || len(src) == 0 {
    73  		return append(dst[:0], 0), nil
    74  	}
    75  
    76  	numBits := 8 * uint(len(src))
    77  	numValues := int(numBits / bitWidth)
    78  	if (numBits % bitWidth) != 0 {
    79  		numValues++
    80  	}
    81  
    82  	if cap(dst) < numValues {
    83  		dst = make([]byte, numValues, 2*numValues)
    84  	} else {
    85  		dst = dst[:numValues]
    86  		for i := range dst {
    87  			dst[i] = 0
    88  		}
    89  	}
    90  
    91  	bitMask := byte(1<<bitWidth) - 1
    92  	bitShift := 8 - bitWidth
    93  	bitOffset := uint(0)
    94  
    95  	for k := range dst {
    96  		i := bitOffset / 8
    97  		j := bitOffset % 8
    98  		v := (src[i+0] >> j)
    99  		if int(i+1) < len(src) {
   100  			v |= (src[i+1] << (8 - j))
   101  		}
   102  		v &= bitMask
   103  		dst[k] = bitFlip(v) >> bitShift
   104  		bitOffset += bitWidth
   105  	}
   106  
   107  	return dst, nil
   108  }
   109  
   110  func bitFlip(b byte) byte {
   111  	return (((b >> 0) & 1) << 7) |
   112  		(((b >> 1) & 1) << 6) |
   113  		(((b >> 2) & 1) << 5) |
   114  		(((b >> 3) & 1) << 4) |
   115  		(((b >> 4) & 1) << 3) |
   116  		(((b >> 5) & 1) << 2) |
   117  		(((b >> 6) & 1) << 1) |
   118  		(((b >> 7) & 1) << 0)
   119  }