github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/encoding/plain/plain.go (about)

     1  // Package plain implements the PLAIN parquet encoding.
     2  //
     3  // https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0
     4  package plain
     5  
     6  import (
     7  	"encoding/binary"
     8  	"fmt"
     9  	"io"
    10  	"math"
    11  
    12  	"github.com/segmentio/parquet-go/deprecated"
    13  	"github.com/segmentio/parquet-go/encoding"
    14  	"github.com/segmentio/parquet-go/format"
    15  	"github.com/segmentio/parquet-go/internal/unsafecast"
    16  )
    17  
    18  const (
    19  	ByteArrayLengthSize = 4
    20  	MaxByteArrayLength  = math.MaxInt32
    21  )
    22  
    23  type Encoding struct {
    24  	encoding.NotSupported
    25  }
    26  
    27  func (e *Encoding) String() string {
    28  	return "PLAIN"
    29  }
    30  
    31  func (e *Encoding) Encoding() format.Encoding {
    32  	return format.Plain
    33  }
    34  
    35  func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) {
    36  	return append(dst[:0], src...), nil
    37  }
    38  
    39  func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
    40  	return append(dst[:0], unsafecast.Int32ToBytes(src)...), nil
    41  }
    42  
    43  func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
    44  	return append(dst[:0], unsafecast.Int64ToBytes(src)...), nil
    45  }
    46  
    47  func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) {
    48  	return append(dst[:0], deprecated.Int96ToBytes(src)...), nil
    49  }
    50  
    51  func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
    52  	return append(dst[:0], unsafecast.Float32ToBytes(src)...), nil
    53  }
    54  
    55  func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
    56  	return append(dst[:0], unsafecast.Float64ToBytes(src)...), nil
    57  }
    58  
    59  func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) {
    60  	dst = dst[:0]
    61  
    62  	if len(offsets) > 0 {
    63  		baseOffset := offsets[0]
    64  
    65  		for _, endOffset := range offsets[1:] {
    66  			dst = AppendByteArray(dst, src[baseOffset:endOffset:endOffset])
    67  			baseOffset = endOffset
    68  		}
    69  	}
    70  
    71  	return dst, nil
    72  }
    73  
    74  func (e *Encoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
    75  	if size < 0 || size > encoding.MaxFixedLenByteArraySize {
    76  		return dst[:0], encoding.Error(e, encoding.ErrInvalidArgument)
    77  	}
    78  	return append(dst[:0], src...), nil
    79  }
    80  
    81  func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) {
    82  	return append(dst[:0], src...), nil
    83  }
    84  
    85  func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
    86  	if (len(src) % 4) != 0 {
    87  		return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src))
    88  	}
    89  	return append(dst[:0], unsafecast.BytesToInt32(src)...), nil
    90  }
    91  
    92  func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) {
    93  	if (len(src) % 8) != 0 {
    94  		return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src))
    95  	}
    96  	return append(dst[:0], unsafecast.BytesToInt64(src)...), nil
    97  }
    98  
    99  func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) {
   100  	if (len(src) % 12) != 0 {
   101  		return dst, encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src))
   102  	}
   103  	return append(dst[:0], deprecated.BytesToInt96(src)...), nil
   104  }
   105  
   106  func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) {
   107  	if (len(src) % 4) != 0 {
   108  		return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src))
   109  	}
   110  	return append(dst[:0], unsafecast.BytesToFloat32(src)...), nil
   111  }
   112  
   113  func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) {
   114  	if (len(src) % 8) != 0 {
   115  		return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src))
   116  	}
   117  	return append(dst[:0], unsafecast.BytesToFloat64(src)...), nil
   118  }
   119  
   120  func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) {
   121  	dst, offsets = dst[:0], offsets[:0]
   122  
   123  	for i := 0; i < len(src); {
   124  		if (len(src) - i) < ByteArrayLengthSize {
   125  			return dst, offsets, ErrTooShort(len(src))
   126  		}
   127  		n := ByteArrayLength(src[i:])
   128  		if n > (len(src) - ByteArrayLengthSize) {
   129  			return dst, offsets, ErrTooShort(len(src))
   130  		}
   131  		i += ByteArrayLengthSize
   132  		offsets = append(offsets, uint32(len(dst)))
   133  		dst = append(dst, src[i:i+n]...)
   134  		i += n
   135  	}
   136  
   137  	return dst, append(offsets, uint32(len(dst))), nil
   138  }
   139  
   140  func (e *Encoding) DecodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
   141  	if size < 0 || size > encoding.MaxFixedLenByteArraySize {
   142  		return dst, encoding.Error(e, encoding.ErrInvalidArgument)
   143  	}
   144  	if (len(src) % size) != 0 {
   145  		return dst, encoding.ErrDecodeInvalidInputSize(e, "FIXED_LEN_BYTE_ARRAY", len(src))
   146  	}
   147  	return append(dst[:0], src...), nil
   148  }
   149  
   150  func (e *Encoding) EstimateDecodeByteArraySize(src []byte) int {
   151  	return len(src)
   152  }
   153  
   154  func (e *Encoding) CanDecodeInPlace() bool {
   155  	return true
   156  }
   157  
   158  func Boolean(v bool) []byte { return AppendBoolean(nil, 0, v) }
   159  
   160  func Int32(v int32) []byte { return AppendInt32(nil, v) }
   161  
   162  func Int64(v int64) []byte { return AppendInt64(nil, v) }
   163  
   164  func Int96(v deprecated.Int96) []byte { return AppendInt96(nil, v) }
   165  
   166  func Float(v float32) []byte { return AppendFloat(nil, v) }
   167  
   168  func Double(v float64) []byte { return AppendDouble(nil, v) }
   169  
   170  func ByteArray(v []byte) []byte { return AppendByteArray(nil, v) }
   171  
   172  func AppendBoolean(b []byte, n int, v bool) []byte {
   173  	i := n / 8
   174  	j := n % 8
   175  
   176  	if cap(b) > i {
   177  		b = b[:i+1]
   178  	} else {
   179  		tmp := make([]byte, i+1, 2*(i+1))
   180  		copy(tmp, b)
   181  		b = tmp
   182  	}
   183  
   184  	k := uint(j)
   185  	x := byte(0)
   186  	if v {
   187  		x = 1
   188  	}
   189  
   190  	b[i] = (b[i] & ^(1 << k)) | (x << k)
   191  	return b
   192  }
   193  
   194  func AppendInt32(b []byte, v int32) []byte {
   195  	x := [4]byte{}
   196  	binary.LittleEndian.PutUint32(x[:], uint32(v))
   197  	return append(b, x[:]...)
   198  }
   199  
   200  func AppendInt64(b []byte, v int64) []byte {
   201  	x := [8]byte{}
   202  	binary.LittleEndian.PutUint64(x[:], uint64(v))
   203  	return append(b, x[:]...)
   204  }
   205  
   206  func AppendInt96(b []byte, v deprecated.Int96) []byte {
   207  	x := [12]byte{}
   208  	binary.LittleEndian.PutUint32(x[0:4], v[0])
   209  	binary.LittleEndian.PutUint32(x[4:8], v[1])
   210  	binary.LittleEndian.PutUint32(x[8:12], v[2])
   211  	return append(b, x[:]...)
   212  }
   213  
   214  func AppendFloat(b []byte, v float32) []byte {
   215  	x := [4]byte{}
   216  	binary.LittleEndian.PutUint32(x[:], math.Float32bits(v))
   217  	return append(b, x[:]...)
   218  }
   219  
   220  func AppendDouble(b []byte, v float64) []byte {
   221  	x := [8]byte{}
   222  	binary.LittleEndian.PutUint64(x[:], math.Float64bits(v))
   223  	return append(b, x[:]...)
   224  }
   225  
   226  func AppendByteArray(b, v []byte) []byte {
   227  	length := [ByteArrayLengthSize]byte{}
   228  	PutByteArrayLength(length[:], len(v))
   229  	b = append(b, length[:]...)
   230  	b = append(b, v...)
   231  	return b
   232  }
   233  
   234  func AppendByteArrayString(b []byte, v string) []byte {
   235  	length := [ByteArrayLengthSize]byte{}
   236  	PutByteArrayLength(length[:], len(v))
   237  	b = append(b, length[:]...)
   238  	b = append(b, v...)
   239  	return b
   240  }
   241  
   242  func AppendByteArrayLength(b []byte, n int) []byte {
   243  	length := [ByteArrayLengthSize]byte{}
   244  	PutByteArrayLength(length[:], n)
   245  	return append(b, length[:]...)
   246  }
   247  
   248  func ByteArrayLength(b []byte) int {
   249  	return int(binary.LittleEndian.Uint32(b))
   250  }
   251  
   252  func PutByteArrayLength(b []byte, n int) {
   253  	binary.LittleEndian.PutUint32(b, uint32(n))
   254  }
   255  
   256  func RangeByteArray(b []byte, do func([]byte) error) (err error) {
   257  	for len(b) > 0 {
   258  		var v []byte
   259  		if v, b, err = NextByteArray(b); err != nil {
   260  			return err
   261  		}
   262  		if err = do(v); err != nil {
   263  			return err
   264  		}
   265  	}
   266  	return nil
   267  }
   268  
   269  func NextByteArray(b []byte) (v, r []byte, err error) {
   270  	if len(b) < ByteArrayLengthSize {
   271  		return nil, b, ErrTooShort(len(b))
   272  	}
   273  	n := ByteArrayLength(b)
   274  	if n > (len(b) - ByteArrayLengthSize) {
   275  		return nil, b, ErrTooShort(len(b))
   276  	}
   277  	if n > MaxByteArrayLength {
   278  		return nil, b, ErrTooLarge(n)
   279  	}
   280  	n += ByteArrayLengthSize
   281  	return b[ByteArrayLengthSize:n:n], b[n:len(b):len(b)], nil
   282  }
   283  
   284  func ErrTooShort(length int) error {
   285  	return fmt.Errorf("input of length %d is too short to contain a PLAIN encoded byte array value: %w", length, io.ErrUnexpectedEOF)
   286  }
   287  
   288  func ErrTooLarge(length int) error {
   289  	return fmt.Errorf("byte array of length %d is too large to be encoded", length)
   290  }