storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/encoding/rledict-encode.go (about) 1 /* 2 * Minio Cloud Storage, (C) 2019 Minio, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package encoding 18 19 import ( 20 "storj.io/minio/pkg/s3select/internal/parquet-go/common" 21 "storj.io/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" 22 ) 23 24 // RLEDictEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#dictionary-encoding-plain_dictionary--2-and-rle_dictionary--8 and returns dictionary page data and data page data. 25 // 26 // Dictionary page data contains PLAIN encodeed slice of uniquely fully defined non-nil values. 27 // Data page data contains RLE/Bit-Packed Hybrid encoded indices of fully defined non-nil values. 28 // 29 // Supported Types: BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BYTE_ARRAY 30 func RLEDictEncode(values []interface{}, parquetType parquet.Type, bitWidth int32) (dictPageData, dataPageData []byte, dictValueCount int32, indexBitWidth uint8) { 31 var definedValues []interface{} 32 var indices []int32 33 34 valueIndexMap := make(map[interface{}]int32) 35 j := 0 36 for i := 0; i < len(values); i = j { 37 for j = i; j < len(values); j++ { 38 value := values[j] 39 if value == nil { 40 continue 41 } 42 43 index, found := valueIndexMap[value] 44 if !found { 45 index = int32(len(definedValues)) 46 definedValues = append(definedValues, value) 47 valueIndexMap[value] = index 48 } 49 50 indices = append(indices, index) 51 } 52 } 53 54 indexBitWidth = uint8(common.BitWidth(uint64(indices[len(indices)-1]))) 55 56 dictPageData = PlainEncode(common.ToSliceValue(definedValues, parquetType), parquetType) 57 dataPageData = RLEBitPackedHybridEncode(indices, int32(indexBitWidth), parquet.Type_INT32) 58 59 return dictPageData, dataPageData, int32(len(definedValues)), indexBitWidth 60 }