storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/compression.go (about)

     1  /*
     2   * Minio Cloud Storage, (C) 2018 Minio, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package parquet
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"sync"
    24  
    25  	"github.com/klauspost/compress/gzip"
    26  	"github.com/klauspost/compress/snappy"
    27  	"github.com/klauspost/compress/zstd"
    28  	"github.com/pierrec/lz4"
    29  
    30  	"storj.io/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
    31  )
    32  
    33  type compressionCodec parquet.CompressionCodec
    34  
    35  var zstdOnce sync.Once
    36  var zstdEnc *zstd.Encoder
    37  var zstdDec *zstd.Decoder
    38  
    39  func initZstd() {
    40  	zstdOnce.Do(func() {
    41  		zstdEnc, _ = zstd.NewWriter(nil, zstd.WithZeroFrames(true))
    42  		zstdDec, _ = zstd.NewReader(nil)
    43  	})
    44  }
    45  
    46  func (c compressionCodec) compress(buf []byte) ([]byte, error) {
    47  	switch parquet.CompressionCodec(c) {
    48  	case parquet.CompressionCodec_UNCOMPRESSED:
    49  		return buf, nil
    50  
    51  	case parquet.CompressionCodec_SNAPPY:
    52  		return snappy.Encode(nil, buf), nil
    53  
    54  	case parquet.CompressionCodec_GZIP:
    55  		byteBuf := new(bytes.Buffer)
    56  		writer := gzip.NewWriter(byteBuf)
    57  		n, err := writer.Write(buf)
    58  		if err != nil {
    59  			return nil, err
    60  		}
    61  		if n != len(buf) {
    62  			return nil, fmt.Errorf("short writes")
    63  		}
    64  
    65  		if err = writer.Flush(); err != nil {
    66  			return nil, err
    67  		}
    68  
    69  		if err = writer.Close(); err != nil {
    70  			return nil, err
    71  		}
    72  
    73  		return byteBuf.Bytes(), nil
    74  
    75  	case parquet.CompressionCodec_LZ4:
    76  		byteBuf := new(bytes.Buffer)
    77  		writer := lz4.NewWriter(byteBuf)
    78  		n, err := writer.Write(buf)
    79  		if err != nil {
    80  			return nil, err
    81  		}
    82  		if n != len(buf) {
    83  			return nil, fmt.Errorf("short writes")
    84  		}
    85  
    86  		if err = writer.Flush(); err != nil {
    87  			return nil, err
    88  		}
    89  
    90  		if err = writer.Close(); err != nil {
    91  			return nil, err
    92  		}
    93  
    94  		return byteBuf.Bytes(), nil
    95  	case parquet.CompressionCodec_ZSTD:
    96  		initZstd()
    97  		return zstdEnc.EncodeAll(buf, nil), nil
    98  	}
    99  
   100  	return nil, fmt.Errorf("invalid compression codec %v", c)
   101  }
   102  
   103  func (c compressionCodec) uncompress(buf []byte) ([]byte, error) {
   104  	switch parquet.CompressionCodec(c) {
   105  	case parquet.CompressionCodec_UNCOMPRESSED:
   106  		return buf, nil
   107  
   108  	case parquet.CompressionCodec_SNAPPY:
   109  		return snappy.Decode(nil, buf)
   110  
   111  	case parquet.CompressionCodec_GZIP:
   112  		reader, err := gzip.NewReader(bytes.NewReader(buf))
   113  		if err != nil {
   114  			return nil, err
   115  		}
   116  		defer reader.Close()
   117  		return ioutil.ReadAll(reader)
   118  
   119  	case parquet.CompressionCodec_LZ4:
   120  		return ioutil.ReadAll(lz4.NewReader(bytes.NewReader(buf)))
   121  
   122  	case parquet.CompressionCodec_ZSTD:
   123  		initZstd()
   124  		return zstdDec.DecodeAll(buf, nil)
   125  	}
   126  
   127  	return nil, fmt.Errorf("invalid compression codec %v", c)
   128  }