github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/encoding/plain/plain.go (about) 1 // Package plain implements the PLAIN parquet encoding. 2 // 3 // https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0 4 package plain 5 6 import ( 7 "encoding/binary" 8 "fmt" 9 "io" 10 "math" 11 12 "github.com/vc42/parquet-go/deprecated" 13 "github.com/vc42/parquet-go/encoding" 14 "github.com/vc42/parquet-go/format" 15 ) 16 17 const ( 18 ByteArrayLengthSize = 4 19 MaxByteArrayLength = math.MaxInt32 20 ) 21 22 type Encoding struct { 23 encoding.NotSupported 24 } 25 26 func (e *Encoding) String() string { 27 return "PLAIN" 28 } 29 30 func (e *Encoding) Encoding() format.Encoding { 31 return format.Plain 32 } 33 34 func (e *Encoding) EncodeBoolean(dst, src []byte) ([]byte, error) { 35 return append(dst[:0], src...), nil 36 } 37 38 func (e *Encoding) EncodeInt32(dst, src []byte) ([]byte, error) { 39 if (len(src) % 4) != 0 { 40 return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "INT32", len(src)) 41 } 42 return append(dst[:0], src...), nil 43 } 44 45 func (e *Encoding) EncodeInt64(dst, src []byte) ([]byte, error) { 46 if (len(src) % 8) != 0 { 47 return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "INT64", len(src)) 48 } 49 return append(dst[:0], src...), nil 50 } 51 52 func (e *Encoding) EncodeInt96(dst, src []byte) ([]byte, error) { 53 if (len(src) % 12) != 0 { 54 return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "INT96", len(src)) 55 } 56 return append(dst[:0], src...), nil 57 } 58 59 func (e *Encoding) EncodeFloat(dst, src []byte) ([]byte, error) { 60 if (len(src) % 4) != 0 { 61 return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "FLOAT", len(src)) 62 } 63 return append(dst[:0], src...), nil 64 } 65 66 func (e *Encoding) EncodeDouble(dst, src []byte) ([]byte, error) { 67 if (len(src) % 8) != 0 { 68 return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "DOUBLE", len(src)) 69 } 70 return append(dst[:0], src...), nil 71 } 72 73 func (e *Encoding) EncodeByteArray(dst []byte, src []byte) ([]byte, error) { 74 if err := ValidateByteArray(src); err != nil { 75 return dst[:0], encoding.Error(e, err) 76 } 77 return append(dst[:0], src...), nil 78 } 79 80 func (e *Encoding) EncodeFixedLenByteArray(dst, src []byte, size int) ([]byte, error) { 81 if size < 0 || size > encoding.MaxFixedLenByteArraySize { 82 return dst[:0], encoding.Error(e, encoding.ErrInvalidArgument) 83 } 84 return append(dst[:0], src...), nil 85 } 86 87 func (e *Encoding) DecodeBoolean(dst, src []byte) ([]byte, error) { 88 return append(dst[:0], src...), nil 89 } 90 91 func (e *Encoding) DecodeInt32(dst, src []byte) ([]byte, error) { 92 if (len(src) % 4) != 0 { 93 return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) 94 } 95 return append(dst[:0], src...), nil 96 } 97 98 func (e *Encoding) DecodeInt64(dst, src []byte) ([]byte, error) { 99 if (len(src) % 8) != 0 { 100 return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) 101 } 102 return append(dst[:0], src...), nil 103 } 104 105 func (e *Encoding) DecodeInt96(dst, src []byte) ([]byte, error) { 106 if (len(src) % 12) != 0 { 107 return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src)) 108 } 109 return append(dst[:0], src...), nil 110 } 111 112 func (e *Encoding) DecodeFloat(dst, src []byte) ([]byte, error) { 113 if (len(src) % 4) != 0 { 114 return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) 115 } 116 return append(dst[:0], src...), nil 117 } 118 119 func (e *Encoding) DecodeDouble(dst, src []byte) ([]byte, error) { 120 if (len(src) % 8) != 0 { 121 return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) 122 } 123 return append(dst[:0], src...), nil 124 } 125 126 func (e *Encoding) DecodeByteArray(dst, src []byte) ([]byte, error) { 127 if err := ValidateByteArray(src); err != nil { 128 return dst[:0], encoding.Error(e, err) 129 } 130 return append(dst[:0], src...), nil 131 } 132 133 func (e *Encoding) DecodeFixedLenByteArray(dst, src []byte, size int) ([]byte, error) { 134 if size < 0 || size > encoding.MaxFixedLenByteArraySize { 135 return dst[:0], encoding.Error(e, encoding.ErrInvalidArgument) 136 } 137 if (len(src) % size) != 0 { 138 return dst[:0], encoding.ErrDecodeInvalidInputSize(e, "FIXED_LEN_BYTE_ARRAY", len(src)) 139 } 140 return append(dst[:0], src...), nil 141 } 142 143 func Boolean(v bool) []byte { return AppendBoolean(nil, 0, v) } 144 145 func Int32(v int32) []byte { return AppendInt32(nil, v) } 146 147 func Int64(v int64) []byte { return AppendInt64(nil, v) } 148 149 func Int96(v deprecated.Int96) []byte { return AppendInt96(nil, v) } 150 151 func Float(v float32) []byte { return AppendFloat(nil, v) } 152 153 func Double(v float64) []byte { return AppendDouble(nil, v) } 154 155 func ByteArray(v []byte) []byte { return AppendByteArray(nil, v) } 156 157 func AppendBoolean(b []byte, n int, v bool) []byte { 158 i := n / 8 159 j := n % 8 160 161 if cap(b) > i { 162 b = b[:i+1] 163 } else { 164 tmp := make([]byte, i+1, 2*(i+1)) 165 copy(tmp, b) 166 b = tmp 167 } 168 169 k := uint(j) 170 x := byte(0) 171 if v { 172 x = 1 173 } 174 175 b[i] = (b[i] & ^(1 << k)) | (x << k) 176 return b 177 } 178 179 func AppendInt32(b []byte, v int32) []byte { 180 x := [4]byte{} 181 binary.LittleEndian.PutUint32(x[:], uint32(v)) 182 return append(b, x[:]...) 183 } 184 185 func AppendInt64(b []byte, v int64) []byte { 186 x := [8]byte{} 187 binary.LittleEndian.PutUint64(x[:], uint64(v)) 188 return append(b, x[:]...) 189 } 190 191 func AppendInt96(b []byte, v deprecated.Int96) []byte { 192 x := [12]byte{} 193 binary.LittleEndian.PutUint32(x[0:4], v[0]) 194 binary.LittleEndian.PutUint32(x[4:8], v[1]) 195 binary.LittleEndian.PutUint32(x[8:12], v[2]) 196 return append(b, x[:]...) 197 } 198 199 func AppendFloat(b []byte, v float32) []byte { 200 x := [4]byte{} 201 binary.LittleEndian.PutUint32(x[:], math.Float32bits(v)) 202 return append(b, x[:]...) 203 } 204 205 func AppendDouble(b []byte, v float64) []byte { 206 x := [8]byte{} 207 binary.LittleEndian.PutUint64(x[:], math.Float64bits(v)) 208 return append(b, x[:]...) 209 } 210 211 func AppendByteArray(b, v []byte) []byte { 212 length := [ByteArrayLengthSize]byte{} 213 PutByteArrayLength(length[:], len(v)) 214 b = append(b, length[:]...) 215 b = append(b, v...) 216 return b 217 } 218 219 func AppendByteArrayString(b []byte, v string) []byte { 220 length := [ByteArrayLengthSize]byte{} 221 PutByteArrayLength(length[:], len(v)) 222 b = append(b, length[:]...) 223 b = append(b, v...) 224 return b 225 } 226 227 func AppendByteArrayLength(b []byte, n int) []byte { 228 length := [ByteArrayLengthSize]byte{} 229 PutByteArrayLength(length[:], n) 230 return append(b, length[:]...) 231 } 232 233 func ByteArrayLength(b []byte) int { 234 return int(binary.LittleEndian.Uint32(b)) 235 } 236 237 func PutByteArrayLength(b []byte, n int) { 238 binary.LittleEndian.PutUint32(b, uint32(n)) 239 } 240 241 type status int 242 243 const ( 244 ok status = iota 245 errTooShort 246 errTooLarge 247 ) 248 249 func ValidateByteArray(b []byte) error { 250 switch validateByteArray(b) { 251 case errTooShort: 252 return ErrTooShort(len(b)) 253 case errTooLarge: 254 return ErrTooLarge(len(b)) 255 default: // ok 256 return nil 257 } 258 } 259 260 func RangeByteArray(b []byte, do func([]byte) error) (err error) { 261 for len(b) > 0 { 262 var v []byte 263 if v, b, err = NextByteArray(b); err != nil { 264 return err 265 } 266 if err = do(v); err != nil { 267 return err 268 } 269 } 270 return nil 271 } 272 273 func NextByteArray(b []byte) (v, r []byte, err error) { 274 if len(b) < ByteArrayLengthSize { 275 return nil, b, ErrTooShort(len(b)) 276 } 277 n := ByteArrayLength(b) 278 if n > (len(b) - ByteArrayLengthSize) { 279 return nil, b, ErrTooShort(len(b)) 280 } 281 if n > MaxByteArrayLength { 282 return nil, b, ErrTooLarge(n) 283 } 284 n += ByteArrayLengthSize 285 return b[ByteArrayLengthSize:n:n], b[n:len(b):len(b)], nil 286 } 287 288 func ErrTooShort(length int) error { 289 return fmt.Errorf("input of length %d is too short to contain a PLAIN encoded byte array value: %w", length, io.ErrUnexpectedEOF) 290 } 291 292 func ErrTooLarge(length int) error { 293 return fmt.Errorf("byte array of length %d is too large to be encoded", length) 294 }