github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/encoding/plain/plain.go (about) 1 // Package plain implements the PLAIN parquet encoding. 2 // 3 // https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0 4 package plain 5 6 import ( 7 "encoding/binary" 8 "fmt" 9 "io" 10 "math" 11 12 "github.com/segmentio/parquet-go/deprecated" 13 "github.com/segmentio/parquet-go/encoding" 14 "github.com/segmentio/parquet-go/format" 15 "github.com/segmentio/parquet-go/internal/unsafecast" 16 ) 17 18 const ( 19 ByteArrayLengthSize = 4 20 MaxByteArrayLength = math.MaxInt32 21 ) 22 23 type Encoding struct { 24 encoding.NotSupported 25 } 26 27 func (e *Encoding) String() string { 28 return "PLAIN" 29 } 30 31 func (e *Encoding) Encoding() format.Encoding { 32 return format.Plain 33 } 34 35 func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) { 36 return append(dst[:0], src...), nil 37 } 38 39 func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { 40 return append(dst[:0], unsafecast.Int32ToBytes(src)...), nil 41 } 42 43 func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { 44 return append(dst[:0], unsafecast.Int64ToBytes(src)...), nil 45 } 46 47 func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { 48 return append(dst[:0], deprecated.Int96ToBytes(src)...), nil 49 } 50 51 func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { 52 return append(dst[:0], unsafecast.Float32ToBytes(src)...), nil 53 } 54 55 func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { 56 return append(dst[:0], unsafecast.Float64ToBytes(src)...), nil 57 } 58 59 func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) { 60 dst = dst[:0] 61 62 if len(offsets) > 0 { 63 baseOffset := offsets[0] 64 65 for _, endOffset := range offsets[1:] { 66 dst = AppendByteArray(dst, src[baseOffset:endOffset:endOffset]) 67 baseOffset = endOffset 68 } 69 } 70 71 return dst, nil 72 } 73 74 func (e *Encoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) { 75 if size < 0 || size > encoding.MaxFixedLenByteArraySize { 76 return dst[:0], encoding.Error(e, encoding.ErrInvalidArgument) 77 } 78 return append(dst[:0], src...), nil 79 } 80 81 func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) { 82 return append(dst[:0], src...), nil 83 } 84 85 func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { 86 if (len(src) % 4) != 0 { 87 return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) 88 } 89 return append(dst[:0], unsafecast.BytesToInt32(src)...), nil 90 } 91 92 func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { 93 if (len(src) % 8) != 0 { 94 return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) 95 } 96 return append(dst[:0], unsafecast.BytesToInt64(src)...), nil 97 } 98 99 func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) { 100 if (len(src) % 12) != 0 { 101 return dst, encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src)) 102 } 103 return append(dst[:0], deprecated.BytesToInt96(src)...), nil 104 } 105 106 func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { 107 if (len(src) % 4) != 0 { 108 return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) 109 } 110 return append(dst[:0], unsafecast.BytesToFloat32(src)...), nil 111 } 112 113 func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { 114 if (len(src) % 8) != 0 { 115 return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) 116 } 117 return append(dst[:0], unsafecast.BytesToFloat64(src)...), nil 118 } 119 120 func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) { 121 dst, offsets = dst[:0], offsets[:0] 122 123 for i := 0; i < len(src); { 124 if (len(src) - i) < ByteArrayLengthSize { 125 return dst, offsets, ErrTooShort(len(src)) 126 } 127 n := ByteArrayLength(src[i:]) 128 if n > (len(src) - ByteArrayLengthSize) { 129 return dst, offsets, ErrTooShort(len(src)) 130 } 131 i += ByteArrayLengthSize 132 offsets = append(offsets, uint32(len(dst))) 133 dst = append(dst, src[i:i+n]...) 134 i += n 135 } 136 137 return dst, append(offsets, uint32(len(dst))), nil 138 } 139 140 func (e *Encoding) DecodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) { 141 if size < 0 || size > encoding.MaxFixedLenByteArraySize { 142 return dst, encoding.Error(e, encoding.ErrInvalidArgument) 143 } 144 if (len(src) % size) != 0 { 145 return dst, encoding.ErrDecodeInvalidInputSize(e, "FIXED_LEN_BYTE_ARRAY", len(src)) 146 } 147 return append(dst[:0], src...), nil 148 } 149 150 func (e *Encoding) EstimateDecodeByteArraySize(src []byte) int { 151 return len(src) 152 } 153 154 func (e *Encoding) CanDecodeInPlace() bool { 155 return true 156 } 157 158 func Boolean(v bool) []byte { return AppendBoolean(nil, 0, v) } 159 160 func Int32(v int32) []byte { return AppendInt32(nil, v) } 161 162 func Int64(v int64) []byte { return AppendInt64(nil, v) } 163 164 func Int96(v deprecated.Int96) []byte { return AppendInt96(nil, v) } 165 166 func Float(v float32) []byte { return AppendFloat(nil, v) } 167 168 func Double(v float64) []byte { return AppendDouble(nil, v) } 169 170 func ByteArray(v []byte) []byte { return AppendByteArray(nil, v) } 171 172 func AppendBoolean(b []byte, n int, v bool) []byte { 173 i := n / 8 174 j := n % 8 175 176 if cap(b) > i { 177 b = b[:i+1] 178 } else { 179 tmp := make([]byte, i+1, 2*(i+1)) 180 copy(tmp, b) 181 b = tmp 182 } 183 184 k := uint(j) 185 x := byte(0) 186 if v { 187 x = 1 188 } 189 190 b[i] = (b[i] & ^(1 << k)) | (x << k) 191 return b 192 } 193 194 func AppendInt32(b []byte, v int32) []byte { 195 x := [4]byte{} 196 binary.LittleEndian.PutUint32(x[:], uint32(v)) 197 return append(b, x[:]...) 198 } 199 200 func AppendInt64(b []byte, v int64) []byte { 201 x := [8]byte{} 202 binary.LittleEndian.PutUint64(x[:], uint64(v)) 203 return append(b, x[:]...) 204 } 205 206 func AppendInt96(b []byte, v deprecated.Int96) []byte { 207 x := [12]byte{} 208 binary.LittleEndian.PutUint32(x[0:4], v[0]) 209 binary.LittleEndian.PutUint32(x[4:8], v[1]) 210 binary.LittleEndian.PutUint32(x[8:12], v[2]) 211 return append(b, x[:]...) 212 } 213 214 func AppendFloat(b []byte, v float32) []byte { 215 x := [4]byte{} 216 binary.LittleEndian.PutUint32(x[:], math.Float32bits(v)) 217 return append(b, x[:]...) 218 } 219 220 func AppendDouble(b []byte, v float64) []byte { 221 x := [8]byte{} 222 binary.LittleEndian.PutUint64(x[:], math.Float64bits(v)) 223 return append(b, x[:]...) 224 } 225 226 func AppendByteArray(b, v []byte) []byte { 227 length := [ByteArrayLengthSize]byte{} 228 PutByteArrayLength(length[:], len(v)) 229 b = append(b, length[:]...) 230 b = append(b, v...) 231 return b 232 } 233 234 func AppendByteArrayString(b []byte, v string) []byte { 235 length := [ByteArrayLengthSize]byte{} 236 PutByteArrayLength(length[:], len(v)) 237 b = append(b, length[:]...) 238 b = append(b, v...) 239 return b 240 } 241 242 func AppendByteArrayLength(b []byte, n int) []byte { 243 length := [ByteArrayLengthSize]byte{} 244 PutByteArrayLength(length[:], n) 245 return append(b, length[:]...) 246 } 247 248 func ByteArrayLength(b []byte) int { 249 return int(binary.LittleEndian.Uint32(b)) 250 } 251 252 func PutByteArrayLength(b []byte, n int) { 253 binary.LittleEndian.PutUint32(b, uint32(n)) 254 } 255 256 func RangeByteArray(b []byte, do func([]byte) error) (err error) { 257 for len(b) > 0 { 258 var v []byte 259 if v, b, err = NextByteArray(b); err != nil { 260 return err 261 } 262 if err = do(v); err != nil { 263 return err 264 } 265 } 266 return nil 267 } 268 269 func NextByteArray(b []byte) (v, r []byte, err error) { 270 if len(b) < ByteArrayLengthSize { 271 return nil, b, ErrTooShort(len(b)) 272 } 273 n := ByteArrayLength(b) 274 if n > (len(b) - ByteArrayLengthSize) { 275 return nil, b, ErrTooShort(len(b)) 276 } 277 if n > MaxByteArrayLength { 278 return nil, b, ErrTooLarge(n) 279 } 280 n += ByteArrayLengthSize 281 return b[ByteArrayLengthSize:n:n], b[n:len(b):len(b)], nil 282 } 283 284 func ErrTooShort(length int) error { 285 return fmt.Errorf("input of length %d is too short to contain a PLAIN encoded byte array value: %w", length, io.ErrUnexpectedEOF) 286 } 287 288 func ErrTooLarge(length int) error { 289 return fmt.Errorf("byte array of length %d is too large to be encoded", length) 290 }