github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/encoding/plain/plain.go (about)

     1  // Package plain implements the PLAIN parquet encoding.
     2  //
     3  // https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0
     4  package plain
     5  
     6  import (
     7  	"encoding/binary"
     8  	"fmt"
     9  	"io"
    10  	"math"
    11  
    12  	"github.com/vc42/parquet-go/deprecated"
    13  	"github.com/vc42/parquet-go/encoding"
    14  	"github.com/vc42/parquet-go/format"
    15  )
    16  
    17  const (
    18  	ByteArrayLengthSize = 4
    19  	MaxByteArrayLength  = math.MaxInt32
    20  )
    21  
    22  type Encoding struct {
    23  	encoding.NotSupported
    24  }
    25  
    26  func (e *Encoding) String() string {
    27  	return "PLAIN"
    28  }
    29  
    30  func (e *Encoding) Encoding() format.Encoding {
    31  	return format.Plain
    32  }
    33  
    34  func (e *Encoding) EncodeBoolean(dst, src []byte) ([]byte, error) {
    35  	return append(dst[:0], src...), nil
    36  }
    37  
    38  func (e *Encoding) EncodeInt32(dst, src []byte) ([]byte, error) {
    39  	if (len(src) % 4) != 0 {
    40  		return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "INT32", len(src))
    41  	}
    42  	return append(dst[:0], src...), nil
    43  }
    44  
    45  func (e *Encoding) EncodeInt64(dst, src []byte) ([]byte, error) {
    46  	if (len(src) % 8) != 0 {
    47  		return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "INT64", len(src))
    48  	}
    49  	return append(dst[:0], src...), nil
    50  }
    51  
    52  func (e *Encoding) EncodeInt96(dst, src []byte) ([]byte, error) {
    53  	if (len(src) % 12) != 0 {
    54  		return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "INT96", len(src))
    55  	}
    56  	return append(dst[:0], src...), nil
    57  }
    58  
    59  func (e *Encoding) EncodeFloat(dst, src []byte) ([]byte, error) {
    60  	if (len(src) % 4) != 0 {
    61  		return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "FLOAT", len(src))
    62  	}
    63  	return append(dst[:0], src...), nil
    64  }
    65  
    66  func (e *Encoding) EncodeDouble(dst, src []byte) ([]byte, error) {
    67  	if (len(src) % 8) != 0 {
    68  		return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "DOUBLE", len(src))
    69  	}
    70  	return append(dst[:0], src...), nil
    71  }
    72  
    73  func (e *Encoding) EncodeByteArray(dst []byte, src []byte) ([]byte, error) {
    74  	if err := ValidateByteArray(src); err != nil {
    75  		return dst[:0], encoding.Error(e, err)
    76  	}
    77  	return append(dst[:0], src...), nil
    78  }
    79  
    80  func (e *Encoding) EncodeFixedLenByteArray(dst, src []byte, size int) ([]byte, error) {
    81  	if size < 0 || size > encoding.MaxFixedLenByteArraySize {
    82  		return dst[:0], encoding.Error(e, encoding.ErrInvalidArgument)
    83  	}
    84  	return append(dst[:0], src...), nil
    85  }
    86  
    87  func (e *Encoding) DecodeBoolean(dst, src []byte) ([]byte, error) {
    88  	return append(dst[:0], src...), nil
    89  }
    90  
    91  func (e *Encoding) DecodeInt32(dst, src []byte) ([]byte, error) {
    92  	if (len(src) % 4) != 0 {
    93  		return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src))
    94  	}
    95  	return append(dst[:0], src...), nil
    96  }
    97  
    98  func (e *Encoding) DecodeInt64(dst, src []byte) ([]byte, error) {
    99  	if (len(src) % 8) != 0 {
   100  		return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src))
   101  	}
   102  	return append(dst[:0], src...), nil
   103  }
   104  
   105  func (e *Encoding) DecodeInt96(dst, src []byte) ([]byte, error) {
   106  	if (len(src) % 12) != 0 {
   107  		return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src))
   108  	}
   109  	return append(dst[:0], src...), nil
   110  }
   111  
   112  func (e *Encoding) DecodeFloat(dst, src []byte) ([]byte, error) {
   113  	if (len(src) % 4) != 0 {
   114  		return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src))
   115  	}
   116  	return append(dst[:0], src...), nil
   117  }
   118  
   119  func (e *Encoding) DecodeDouble(dst, src []byte) ([]byte, error) {
   120  	if (len(src) % 8) != 0 {
   121  		return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src))
   122  	}
   123  	return append(dst[:0], src...), nil
   124  }
   125  
   126  func (e *Encoding) DecodeByteArray(dst, src []byte) ([]byte, error) {
   127  	if err := ValidateByteArray(src); err != nil {
   128  		return dst[:0], encoding.Error(e, err)
   129  	}
   130  	return append(dst[:0], src...), nil
   131  }
   132  
   133  func (e *Encoding) DecodeFixedLenByteArray(dst, src []byte, size int) ([]byte, error) {
   134  	if size < 0 || size > encoding.MaxFixedLenByteArraySize {
   135  		return dst[:0], encoding.Error(e, encoding.ErrInvalidArgument)
   136  	}
   137  	if (len(src) % size) != 0 {
   138  		return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "FIXED_LEN_BYTE_ARRAY", len(src))
   139  	}
   140  	return append(dst[:0], src...), nil
   141  }
   142  
   143  func Boolean(v bool) []byte { return AppendBoolean(nil, 0, v) }
   144  
   145  func Int32(v int32) []byte { return AppendInt32(nil, v) }
   146  
   147  func Int64(v int64) []byte { return AppendInt64(nil, v) }
   148  
   149  func Int96(v deprecated.Int96) []byte { return AppendInt96(nil, v) }
   150  
   151  func Float(v float32) []byte { return AppendFloat(nil, v) }
   152  
   153  func Double(v float64) []byte { return AppendDouble(nil, v) }
   154  
   155  func ByteArray(v []byte) []byte { return AppendByteArray(nil, v) }
   156  
   157  func AppendBoolean(b []byte, n int, v bool) []byte {
   158  	i := n / 8
   159  	j := n % 8
   160  
   161  	if cap(b) > i {
   162  		b = b[:i+1]
   163  	} else {
   164  		tmp := make([]byte, i+1, 2*(i+1))
   165  		copy(tmp, b)
   166  		b = tmp
   167  	}
   168  
   169  	k := uint(j)
   170  	x := byte(0)
   171  	if v {
   172  		x = 1
   173  	}
   174  
   175  	b[i] = (b[i] & ^(1 << k)) | (x << k)
   176  	return b
   177  }
   178  
   179  func AppendInt32(b []byte, v int32) []byte {
   180  	x := [4]byte{}
   181  	binary.LittleEndian.PutUint32(x[:], uint32(v))
   182  	return append(b, x[:]...)
   183  }
   184  
   185  func AppendInt64(b []byte, v int64) []byte {
   186  	x := [8]byte{}
   187  	binary.LittleEndian.PutUint64(x[:], uint64(v))
   188  	return append(b, x[:]...)
   189  }
   190  
   191  func AppendInt96(b []byte, v deprecated.Int96) []byte {
   192  	x := [12]byte{}
   193  	binary.LittleEndian.PutUint32(x[0:4], v[0])
   194  	binary.LittleEndian.PutUint32(x[4:8], v[1])
   195  	binary.LittleEndian.PutUint32(x[8:12], v[2])
   196  	return append(b, x[:]...)
   197  }
   198  
   199  func AppendFloat(b []byte, v float32) []byte {
   200  	x := [4]byte{}
   201  	binary.LittleEndian.PutUint32(x[:], math.Float32bits(v))
   202  	return append(b, x[:]...)
   203  }
   204  
   205  func AppendDouble(b []byte, v float64) []byte {
   206  	x := [8]byte{}
   207  	binary.LittleEndian.PutUint64(x[:], math.Float64bits(v))
   208  	return append(b, x[:]...)
   209  }
   210  
   211  func AppendByteArray(b, v []byte) []byte {
   212  	length := [ByteArrayLengthSize]byte{}
   213  	PutByteArrayLength(length[:], len(v))
   214  	b = append(b, length[:]...)
   215  	b = append(b, v...)
   216  	return b
   217  }
   218  
   219  func AppendByteArrayString(b []byte, v string) []byte {
   220  	length := [ByteArrayLengthSize]byte{}
   221  	PutByteArrayLength(length[:], len(v))
   222  	b = append(b, length[:]...)
   223  	b = append(b, v...)
   224  	return b
   225  }
   226  
   227  func AppendByteArrayLength(b []byte, n int) []byte {
   228  	length := [ByteArrayLengthSize]byte{}
   229  	PutByteArrayLength(length[:], n)
   230  	return append(b, length[:]...)
   231  }
   232  
   233  func ByteArrayLength(b []byte) int {
   234  	return int(binary.LittleEndian.Uint32(b))
   235  }
   236  
   237  func PutByteArrayLength(b []byte, n int) {
   238  	binary.LittleEndian.PutUint32(b, uint32(n))
   239  }
   240  
   241  type status int
   242  
   243  const (
   244  	ok status = iota
   245  	errTooShort
   246  	errTooLarge
   247  )
   248  
   249  func ValidateByteArray(b []byte) error {
   250  	switch validateByteArray(b) {
   251  	case errTooShort:
   252  		return ErrTooShort(len(b))
   253  	case errTooLarge:
   254  		return ErrTooLarge(len(b))
   255  	default: // ok
   256  		return nil
   257  	}
   258  }
   259  
   260  func RangeByteArray(b []byte, do func([]byte) error) (err error) {
   261  	for len(b) > 0 {
   262  		var v []byte
   263  		if v, b, err = NextByteArray(b); err != nil {
   264  			return err
   265  		}
   266  		if err = do(v); err != nil {
   267  			return err
   268  		}
   269  	}
   270  	return nil
   271  }
   272  
   273  func NextByteArray(b []byte) (v, r []byte, err error) {
   274  	if len(b) < ByteArrayLengthSize {
   275  		return nil, b, ErrTooShort(len(b))
   276  	}
   277  	n := ByteArrayLength(b)
   278  	if n > (len(b) - ByteArrayLengthSize) {
   279  		return nil, b, ErrTooShort(len(b))
   280  	}
   281  	if n > MaxByteArrayLength {
   282  		return nil, b, ErrTooLarge(n)
   283  	}
   284  	n += ByteArrayLengthSize
   285  	return b[ByteArrayLengthSize:n:n], b[n:len(b):len(b)], nil
   286  }
   287  
   288  func ErrTooShort(length int) error {
   289  	return fmt.Errorf("input of length %d is too short to contain a PLAIN encoded byte array value: %w", length, io.ErrUnexpectedEOF)
   290  }
   291  
   292  func ErrTooLarge(length int) error {
   293  	return fmt.Errorf("byte array of length %d is too large to be encoded", length)
   294  }