github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/compress/zstd/zstd.go (about)

     1  // Package zstd implements the ZSTD parquet compression codec.
     2  package zstd
     3  
     4  import (
     5  	"sync"
     6  
     7  	"github.com/klauspost/compress/zstd"
     8  	"github.com/parquet-go/parquet-go/format"
     9  )
    10  
    11  type Level = zstd.EncoderLevel
    12  
    13  const (
    14  	// SpeedFastest will choose the fastest reasonable compression.
    15  	// This is roughly equivalent to the fastest Zstandard mode.
    16  	SpeedFastest = zstd.SpeedFastest
    17  
    18  	// SpeedDefault is the default "pretty fast" compression option.
    19  	// This is roughly equivalent to the default Zstandard mode (level 3).
    20  	SpeedDefault = zstd.SpeedDefault
    21  
    22  	// SpeedBetterCompression will yield better compression than the default.
    23  	// Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
    24  	// By using this, notice that CPU usage may go up in the future.
    25  	SpeedBetterCompression = zstd.SpeedBetterCompression
    26  
    27  	// SpeedBestCompression will choose the best available compression option.
    28  	// This will offer the best compression no matter the CPU cost.
    29  	SpeedBestCompression = zstd.SpeedBestCompression
    30  )
    31  
    32  const (
    33  	DefaultLevel = SpeedDefault
    34  
    35  	DefaultConcurrency = 1
    36  )
    37  
    38  type Codec struct {
    39  	Level Level
    40  
    41  	// Concurrency is the number of CPU cores to use for encoding and decoding.
    42  	// If Concurrency is 0, it will use DefaultConcurrency.
    43  	Concurrency uint
    44  
    45  	encoders sync.Pool // *zstd.Encoder
    46  	decoders sync.Pool // *zstd.Decoder
    47  }
    48  
    49  func (c *Codec) String() string {
    50  	return "ZSTD"
    51  }
    52  
    53  func (c *Codec) CompressionCodec() format.CompressionCodec {
    54  	return format.Zstd
    55  }
    56  
    57  func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
    58  	e, _ := c.encoders.Get().(*zstd.Encoder)
    59  	if e == nil {
    60  		var err error
    61  		e, err = zstd.NewWriter(nil,
    62  			zstd.WithEncoderConcurrency(c.concurrency()),
    63  			zstd.WithEncoderLevel(c.level()),
    64  			zstd.WithZeroFrames(true),
    65  			zstd.WithEncoderCRC(false),
    66  		)
    67  		if err != nil {
    68  			return dst[:0], err
    69  		}
    70  	}
    71  	defer c.encoders.Put(e)
    72  	return e.EncodeAll(src, dst[:0]), nil
    73  }
    74  
    75  func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
    76  	d, _ := c.decoders.Get().(*zstd.Decoder)
    77  	if d == nil {
    78  		var err error
    79  		d, err = zstd.NewReader(nil,
    80  			zstd.WithDecoderConcurrency(c.concurrency()),
    81  		)
    82  		if err != nil {
    83  			return dst[:0], err
    84  		}
    85  	}
    86  	defer c.decoders.Put(d)
    87  	return d.DecodeAll(src, dst[:0])
    88  }
    89  
    90  func (c *Codec) level() Level {
    91  	if c.Level != 0 {
    92  		return c.Level
    93  	}
    94  	return DefaultLevel
    95  }
    96  
    97  func (c *Codec) concurrency() int {
    98  	if c.Concurrency != 0 {
    99  		return int(c.Concurrency)
   100  	}
   101  	return DefaultConcurrency
   102  }