github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/encoding/rle/dictionary.go (about)

     1  package rle
     2  
     3  import (
     4  	"math/bits"
     5  
     6  	"github.com/segmentio/parquet-go/encoding"
     7  	"github.com/segmentio/parquet-go/format"
     8  	"github.com/segmentio/parquet-go/internal/unsafecast"
     9  )
    10  
    11  type DictionaryEncoding struct {
    12  	encoding.NotSupported
    13  }
    14  
    15  func (e *DictionaryEncoding) String() string {
    16  	return "RLE_DICTIONARY"
    17  }
    18  
    19  func (e *DictionaryEncoding) Encoding() format.Encoding {
    20  	return format.RLEDictionary
    21  }
    22  
    23  func (e *DictionaryEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
    24  	bitWidth := maxLenInt32(src)
    25  	dst = append(dst[:0], byte(bitWidth))
    26  	dst, err := encodeInt32(dst, src, uint(bitWidth))
    27  	return dst, e.wrap(err)
    28  }
    29  
    30  func (e *DictionaryEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
    31  	if len(src) == 0 {
    32  		return dst[:0], nil
    33  	}
    34  	buf := unsafecast.Int32ToBytes(dst)
    35  	buf, err := decodeInt32(buf[:0], src[1:], uint(src[0]))
    36  	return unsafecast.BytesToInt32(buf), e.wrap(err)
    37  }
    38  
    39  func (e *DictionaryEncoding) wrap(err error) error {
    40  	if err != nil {
    41  		err = encoding.Error(e, err)
    42  	}
    43  	return err
    44  }
    45  
    46  func clearInt32(data []int32) {
    47  	for i := range data {
    48  		data[i] = 0
    49  	}
    50  }
    51  
    52  func maxLenInt32(data []int32) (max int) {
    53  	for _, v := range data {
    54  		if n := bits.Len32(uint32(v)); n > max {
    55  			max = n
    56  		}
    57  	}
    58  	return max
    59  }