github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/compress/lz4/lz4.go (about)

     1  // Package lz4 implements the LZ4_RAW parquet compression codec.
     2  package lz4
     3  
     4  import (
     5  	"github.com/pierrec/lz4/v4"
     6  	"github.com/vc42/parquet-go/format"
     7  )
     8  
     9  type Level = lz4.CompressionLevel
    10  
    11  const (
    12  	Fast   = lz4.Fast
    13  	Level1 = lz4.Level1
    14  	Level2 = lz4.Level2
    15  	Level3 = lz4.Level3
    16  	Level4 = lz4.Level4
    17  	Level5 = lz4.Level5
    18  	Level6 = lz4.Level6
    19  	Level7 = lz4.Level7
    20  	Level8 = lz4.Level8
    21  	Level9 = lz4.Level9
    22  )
    23  
    24  const (
    25  	DefaultLevel = Fast
    26  )
    27  
    28  type Codec struct {
    29  	Level Level
    30  }
    31  
    32  func (c *Codec) String() string {
    33  	return "LZ4_RAW"
    34  }
    35  
    36  func (c *Codec) CompressionCodec() format.CompressionCodec {
    37  	return format.Lz4Raw
    38  }
    39  
    40  func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
    41  	dst = reserveAtLeast(dst, len(src)/4)
    42  
    43  	compressor := lz4.CompressorHC{Level: c.Level}
    44  	for {
    45  		n, err := compressor.CompressBlock(src, dst)
    46  		if err != nil { // see Decode for details about error handling
    47  			dst = make([]byte, 2*len(dst))
    48  		} else {
    49  			return dst[:n], nil
    50  		}
    51  	}
    52  }
    53  
    54  func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
    55  	// 3x seems like a common compression ratio, so we optimistically size the
    56  	// output buffer to that size. Feel free to change the value if you observe
    57  	// different behaviors.
    58  	dst = reserveAtLeast(dst, 3*len(src))
    59  
    60  	for {
    61  		n, err := lz4.UncompressBlock(src, dst)
    62  		// The lz4 package does not expose the error values, they are declared
    63  		// in internal/lz4errors. Based on what I read of the implementation,
    64  		// the only condition where this function errors is if the output buffer
    65  		// was too short.
    66  		//
    67  		// https://github.com/pierrec/lz4/blob/a5532e5996ee86d17f8ce2694c08fb5bf3c6b471/internal/lz4block/block.go#L45-L53
    68  		if err != nil {
    69  			dst = make([]byte, 2*len(dst))
    70  		} else {
    71  			return dst[:n], nil
    72  		}
    73  	}
    74  }
    75  
    76  func reserveAtLeast(b []byte, n int) []byte {
    77  	if cap(b) < n {
    78  		b = make([]byte, n)
    79  	} else {
    80  		b = b[:cap(b)]
    81  	}
    82  	return b
    83  }