github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/encoding/rle/dictionary.go (about)

     1  package rle
     2  
     3  import (
     4  	"math/bits"
     5  
     6  	"github.com/vc42/parquet-go/encoding"
     7  	"github.com/vc42/parquet-go/format"
     8  	"github.com/vc42/parquet-go/internal/unsafecast"
     9  )
    10  
    11  type DictionaryEncoding struct {
    12  	encoding.NotSupported
    13  }
    14  
    15  func (e *DictionaryEncoding) String() string {
    16  	return "RLE_DICTIONARY"
    17  }
    18  
    19  func (e *DictionaryEncoding) Encoding() format.Encoding {
    20  	return format.RLEDictionary
    21  }
    22  
    23  func (e *DictionaryEncoding) EncodeInt32(dst, src []byte) ([]byte, error) {
    24  	if (len(src) % 4) != 0 {
    25  		return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "INT32", len(src))
    26  	}
    27  	src32 := unsafecast.BytesToInt32(src)
    28  	bitWidth := maxLenInt32(src32)
    29  	dst = append(dst[:0], byte(bitWidth))
    30  	dst, err := encodeInt32(dst, src32, uint(bitWidth))
    31  	return dst, e.wrap(err)
    32  }
    33  
    34  func (e *DictionaryEncoding) DecodeInt32(dst, src []byte) ([]byte, error) {
    35  	if len(src) == 0 {
    36  		return dst[:0], nil
    37  	}
    38  	dst, err := decodeInt32(dst[:0], src[1:], uint(src[0]))
    39  	return dst, e.wrap(err)
    40  }
    41  
    42  func (e *DictionaryEncoding) wrap(err error) error {
    43  	if err != nil {
    44  		err = encoding.Error(e, err)
    45  	}
    46  	return err
    47  }
    48  
    49  func clearInt32(data []int32) {
    50  	for i := range data {
    51  		data[i] = 0
    52  	}
    53  }
    54  
    55  func maxLenInt32(data []int32) (max int) {
    56  	for _, v := range data {
    57  		if n := bits.Len32(uint32(v)); n > max {
    58  			max = n
    59  		}
    60  	}
    61  	return max
    62  }