github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/compress/lz4/lz4.go (about)

     1  // Package lz4 implements the LZ4_RAW parquet compression codec.
     2  package lz4
     3  
     4  import (
     5  	"github.com/parquet-go/parquet-go/format"
     6  	"github.com/pierrec/lz4/v4"
     7  )
     8  
     9  type Level = lz4.CompressionLevel
    10  
    11  const (
    12  	Fastest = lz4.CompressionLevel(99)
    13  	Fast    = lz4.Fast
    14  	Level1  = lz4.Level1
    15  	Level2  = lz4.Level2
    16  	Level3  = lz4.Level3
    17  	Level4  = lz4.Level4
    18  	Level5  = lz4.Level5
    19  	Level6  = lz4.Level6
    20  	Level7  = lz4.Level7
    21  	Level8  = lz4.Level8
    22  	Level9  = lz4.Level9
    23  )
    24  
    25  const (
    26  	DefaultLevel = Fast
    27  )
    28  
    29  type Codec struct {
    30  	Level Level
    31  }
    32  
    33  func (c *Codec) String() string {
    34  	return "LZ4_RAW"
    35  }
    36  
    37  func (c *Codec) CompressionCodec() format.CompressionCodec {
    38  	return format.Lz4Raw
    39  }
    40  
    41  func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
    42  	dst = reserveAtLeast(dst, lz4.CompressBlockBound(len(src)))
    43  
    44  	var (
    45  		n   int
    46  		err error
    47  	)
    48  	if c.Level == Fastest {
    49  		compressor := lz4.Compressor{}
    50  		n, err = compressor.CompressBlock(src, dst)
    51  	} else {
    52  		compressor := lz4.CompressorHC{Level: c.Level}
    53  		n, err = compressor.CompressBlock(src, dst)
    54  	}
    55  	return dst[:n], err
    56  }
    57  
    58  func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
    59  	// 3x seems like a common compression ratio, so we optimistically size the
    60  	// output buffer to that size. Feel free to change the value if you observe
    61  	// different behaviors.
    62  	dst = reserveAtLeast(dst, 3*len(src))
    63  
    64  	for {
    65  		n, err := lz4.UncompressBlock(src, dst)
    66  		// The lz4 package does not expose the error values, they are declared
    67  		// in internal/lz4errors. Based on what I read of the implementation,
    68  		// the only condition where this function errors is if the output buffer
    69  		// was too short.
    70  		//
    71  		// https://github.com/pierrec/lz4/blob/a5532e5996ee86d17f8ce2694c08fb5bf3c6b471/internal/lz4block/block.go#L45-L53
    72  		if err != nil {
    73  			dst = make([]byte, 2*len(dst))
    74  		} else {
    75  			return dst[:n], nil
    76  		}
    77  	}
    78  }
    79  
    80  func reserveAtLeast(b []byte, n int) []byte {
    81  	if cap(b) < n {
    82  		b = make([]byte, n)
    83  	} else {
    84  		b = b[:cap(b)]
    85  	}
    86  	return b
    87  }