github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/compress/lz4/lz4.go (about)

     1  // Package lz4 implements the LZ4_RAW parquet compression codec.
     2  package lz4
     3  
     4  import (
     5  	"github.com/pierrec/lz4/v4"
     6  	"github.com/segmentio/parquet-go/format"
     7  )
     8  
     9  type Level = lz4.CompressionLevel
    10  
    11  const (
    12  	Fast   = lz4.Fast
    13  	Level1 = lz4.Level1
    14  	Level2 = lz4.Level2
    15  	Level3 = lz4.Level3
    16  	Level4 = lz4.Level4
    17  	Level5 = lz4.Level5
    18  	Level6 = lz4.Level6
    19  	Level7 = lz4.Level7
    20  	Level8 = lz4.Level8
    21  	Level9 = lz4.Level9
    22  )
    23  
    24  const (
    25  	DefaultLevel = Fast
    26  )
    27  
    28  type Codec struct {
    29  	Level Level
    30  }
    31  
    32  func (c *Codec) String() string {
    33  	return "LZ4_RAW"
    34  }
    35  
    36  func (c *Codec) CompressionCodec() format.CompressionCodec {
    37  	return format.Lz4Raw
    38  }
    39  
    40  func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
    41  	dst = reserveAtLeast(dst, len(src)/4)
    42  
    43  	compressor := lz4.CompressorHC{Level: c.Level}
    44  	for {
    45  		n, err := compressor.CompressBlock(src, dst)
    46  		if err != nil { // see Decode for details about error handling
    47  			dst = make([]byte, 2*len(dst))
    48  		} else if n == 0 {
    49  			dst = reserveAtLeast(dst, lz4.CompressBlockBound(len(src)))
    50  		} else {
    51  			return dst[:n], nil
    52  		}
    53  	}
    54  }
    55  
    56  func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
    57  	// 3x seems like a common compression ratio, so we optimistically size the
    58  	// output buffer to that size. Feel free to change the value if you observe
    59  	// different behaviors.
    60  	dst = reserveAtLeast(dst, 3*len(src))
    61  
    62  	for {
    63  		n, err := lz4.UncompressBlock(src, dst)
    64  		// The lz4 package does not expose the error values, they are declared
    65  		// in internal/lz4errors. Based on what I read of the implementation,
    66  		// the only condition where this function errors is if the output buffer
    67  		// was too short.
    68  		//
    69  		// https://github.com/pierrec/lz4/blob/a5532e5996ee86d17f8ce2694c08fb5bf3c6b471/internal/lz4block/block.go#L45-L53
    70  		if err != nil {
    71  			dst = make([]byte, 2*len(dst))
    72  		} else {
    73  			return dst[:n], nil
    74  		}
    75  	}
    76  }
    77  
    78  func reserveAtLeast(b []byte, n int) []byte {
    79  	if cap(b) < n {
    80  		b = make([]byte, n)
    81  	} else {
    82  		b = b[:cap(b)]
    83  	}
    84  	return b
    85  }