github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/compress/lz4/lz4.go (about) 1 // Package lz4 implements the LZ4_RAW parquet compression codec. 2 package lz4 3 4 import ( 5 "github.com/pierrec/lz4/v4" 6 "github.com/vc42/parquet-go/format" 7 ) 8 9 type Level = lz4.CompressionLevel 10 11 const ( 12 Fast = lz4.Fast 13 Level1 = lz4.Level1 14 Level2 = lz4.Level2 15 Level3 = lz4.Level3 16 Level4 = lz4.Level4 17 Level5 = lz4.Level5 18 Level6 = lz4.Level6 19 Level7 = lz4.Level7 20 Level8 = lz4.Level8 21 Level9 = lz4.Level9 22 ) 23 24 const ( 25 DefaultLevel = Fast 26 ) 27 28 type Codec struct { 29 Level Level 30 } 31 32 func (c *Codec) String() string { 33 return "LZ4_RAW" 34 } 35 36 func (c *Codec) CompressionCodec() format.CompressionCodec { 37 return format.Lz4Raw 38 } 39 40 func (c *Codec) Encode(dst, src []byte) ([]byte, error) { 41 dst = reserveAtLeast(dst, len(src)/4) 42 43 compressor := lz4.CompressorHC{Level: c.Level} 44 for { 45 n, err := compressor.CompressBlock(src, dst) 46 if err != nil { // see Decode for details about error handling 47 dst = make([]byte, 2*len(dst)) 48 } else { 49 return dst[:n], nil 50 } 51 } 52 } 53 54 func (c *Codec) Decode(dst, src []byte) ([]byte, error) { 55 // 3x seems like a common compression ratio, so we optimistically size the 56 // output buffer to that size. Feel free to change the value if you observe 57 // different behaviors. 58 dst = reserveAtLeast(dst, 3*len(src)) 59 60 for { 61 n, err := lz4.UncompressBlock(src, dst) 62 // The lz4 package does not expose the error values, they are declared 63 // in internal/lz4errors. Based on what I read of the implementation, 64 // the only condition where this function errors is if the output buffer 65 // was too short. 66 // 67 // https://github.com/pierrec/lz4/blob/a5532e5996ee86d17f8ce2694c08fb5bf3c6b471/internal/lz4block/block.go#L45-L53 68 if err != nil { 69 dst = make([]byte, 2*len(dst)) 70 } else { 71 return dst[:n], nil 72 } 73 } 74 } 75 76 func reserveAtLeast(b []byte, n int) []byte { 77 if cap(b) < n { 78 b = make([]byte, n) 79 } else { 80 b = b[:cap(b)] 81 } 82 return b 83 }