github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/compress/lz4/lz4.go (about) 1 // Package lz4 implements the LZ4_RAW parquet compression codec. 2 package lz4 3 4 import ( 5 "github.com/pierrec/lz4/v4" 6 "github.com/segmentio/parquet-go/format" 7 ) 8 9 type Level = lz4.CompressionLevel 10 11 const ( 12 Fast = lz4.Fast 13 Level1 = lz4.Level1 14 Level2 = lz4.Level2 15 Level3 = lz4.Level3 16 Level4 = lz4.Level4 17 Level5 = lz4.Level5 18 Level6 = lz4.Level6 19 Level7 = lz4.Level7 20 Level8 = lz4.Level8 21 Level9 = lz4.Level9 22 ) 23 24 const ( 25 DefaultLevel = Fast 26 ) 27 28 type Codec struct { 29 Level Level 30 } 31 32 func (c *Codec) String() string { 33 return "LZ4_RAW" 34 } 35 36 func (c *Codec) CompressionCodec() format.CompressionCodec { 37 return format.Lz4Raw 38 } 39 40 func (c *Codec) Encode(dst, src []byte) ([]byte, error) { 41 dst = reserveAtLeast(dst, len(src)/4) 42 43 compressor := lz4.CompressorHC{Level: c.Level} 44 for { 45 n, err := compressor.CompressBlock(src, dst) 46 if err != nil { // see Decode for details about error handling 47 dst = make([]byte, 2*len(dst)) 48 } else if n == 0 { 49 dst = reserveAtLeast(dst, lz4.CompressBlockBound(len(src))) 50 } else { 51 return dst[:n], nil 52 } 53 } 54 } 55 56 func (c *Codec) Decode(dst, src []byte) ([]byte, error) { 57 // 3x seems like a common compression ratio, so we optimistically size the 58 // output buffer to that size. Feel free to change the value if you observe 59 // different behaviors. 60 dst = reserveAtLeast(dst, 3*len(src)) 61 62 for { 63 n, err := lz4.UncompressBlock(src, dst) 64 // The lz4 package does not expose the error values, they are declared 65 // in internal/lz4errors. Based on what I read of the implementation, 66 // the only condition where this function errors is if the output buffer 67 // was too short. 68 // 69 // https://github.com/pierrec/lz4/blob/a5532e5996ee86d17f8ce2694c08fb5bf3c6b471/internal/lz4block/block.go#L45-L53 70 if err != nil { 71 dst = make([]byte, 2*len(dst)) 72 } else { 73 return dst[:n], nil 74 } 75 } 76 } 77 78 func reserveAtLeast(b []byte, n int) []byte { 79 if cap(b) < n { 80 b = make([]byte, n) 81 } else { 82 b = b[:cap(b)] 83 } 84 return b 85 }