storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/encoding/rle-encode.go (about)

     1  /*
     2   * Minio Cloud Storage, (C) 2019 Minio, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package encoding
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"storj.io/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
    23  )
    24  
    25  func rleEncodeInt32s(i32s []int32, bitWidth int32) (data []byte) {
    26  	j := 0
    27  	for i := 0; i < len(i32s); i = j {
    28  		for j = i + 1; j < len(i32s) && i32s[i] == i32s[j]; j++ {
    29  		}
    30  
    31  		headerBytes := varIntEncode(uint64((j - i) << 1))
    32  		data = append(data, headerBytes...)
    33  
    34  		valBytes := plainEncodeInt32s([]int32{i32s[i]})
    35  		byteCount := (bitWidth + 7) / 8
    36  		data = append(data, valBytes[:byteCount]...)
    37  	}
    38  
    39  	return data
    40  }
    41  
    42  func rleEncodeInt64s(i64s []int64, bitWidth int32) (data []byte) {
    43  	j := 0
    44  	for i := 0; i < len(i64s); i = j {
    45  		for j = i + 1; j < len(i64s) && i64s[i] == i64s[j]; j++ {
    46  		}
    47  
    48  		headerBytes := varIntEncode(uint64((j - i) << 1))
    49  		data = append(data, headerBytes...)
    50  
    51  		valBytes := plainEncodeInt64s([]int64{i64s[i]})
    52  		byteCount := (bitWidth + 7) / 8
    53  		data = append(data, valBytes[:byteCount]...)
    54  	}
    55  
    56  	return data
    57  }
    58  
    59  // RLEBitPackedHybridEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3
    60  //
    61  // Supported Types: INT32, INT64
    62  func RLEBitPackedHybridEncode(values interface{}, bitWidth int32, parquetType parquet.Type) []byte {
    63  	var rleBytes []byte
    64  
    65  	switch parquetType {
    66  	case parquet.Type_INT32:
    67  		i32s, ok := values.([]int32)
    68  		if !ok {
    69  			panic(fmt.Errorf("expected slice of int32"))
    70  		}
    71  		rleBytes = rleEncodeInt32s(i32s, bitWidth)
    72  	case parquet.Type_INT64:
    73  		i64s, ok := values.([]int64)
    74  		if !ok {
    75  			panic(fmt.Errorf("expected slice of int64"))
    76  		}
    77  		rleBytes = rleEncodeInt64s(i64s, bitWidth)
    78  	default:
    79  		panic(fmt.Errorf("%v parquet type unsupported", parquetType))
    80  	}
    81  
    82  	lenBytes := plainEncodeInt32s([]int32{int32(len(rleBytes))})
    83  	return append(lenBytes, rleBytes...)
    84  }