storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/encoding/rle-encode.go (about) 1 /* 2 * Minio Cloud Storage, (C) 2019 Minio, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package encoding 18 19 import ( 20 "fmt" 21 22 "storj.io/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" 23 ) 24 25 func rleEncodeInt32s(i32s []int32, bitWidth int32) (data []byte) { 26 j := 0 27 for i := 0; i < len(i32s); i = j { 28 for j = i + 1; j < len(i32s) && i32s[i] == i32s[j]; j++ { 29 } 30 31 headerBytes := varIntEncode(uint64((j - i) << 1)) 32 data = append(data, headerBytes...) 33 34 valBytes := plainEncodeInt32s([]int32{i32s[i]}) 35 byteCount := (bitWidth + 7) / 8 36 data = append(data, valBytes[:byteCount]...) 37 } 38 39 return data 40 } 41 42 func rleEncodeInt64s(i64s []int64, bitWidth int32) (data []byte) { 43 j := 0 44 for i := 0; i < len(i64s); i = j { 45 for j = i + 1; j < len(i64s) && i64s[i] == i64s[j]; j++ { 46 } 47 48 headerBytes := varIntEncode(uint64((j - i) << 1)) 49 data = append(data, headerBytes...) 50 51 valBytes := plainEncodeInt64s([]int64{i64s[i]}) 52 byteCount := (bitWidth + 7) / 8 53 data = append(data, valBytes[:byteCount]...) 54 } 55 56 return data 57 } 58 59 // RLEBitPackedHybridEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3 60 // 61 // Supported Types: INT32, INT64 62 func RLEBitPackedHybridEncode(values interface{}, bitWidth int32, parquetType parquet.Type) []byte { 63 var rleBytes []byte 64 65 switch parquetType { 66 case parquet.Type_INT32: 67 i32s, ok := values.([]int32) 68 if !ok { 69 panic(fmt.Errorf("expected slice of int32")) 70 } 71 rleBytes = rleEncodeInt32s(i32s, bitWidth) 72 case parquet.Type_INT64: 73 i64s, ok := values.([]int64) 74 if !ok { 75 panic(fmt.Errorf("expected slice of int64")) 76 } 77 rleBytes = rleEncodeInt64s(i64s, bitWidth) 78 default: 79 panic(fmt.Errorf("%v parquet type unsupported", parquetType)) 80 } 81 82 lenBytes := plainEncodeInt32s([]int32{int32(len(rleBytes))}) 83 return append(lenBytes, rleBytes...) 84 }