github.com/datastax/go-cassandra-native-protocol@v0.0.0-20220706104457-5e8aad05cf90/compression/lz4/lz4.go (about) 1 // Copyright 2020 DataStax 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package lz4 16 17 import ( 18 "bytes" 19 "encoding/binary" 20 "fmt" 21 "io" 22 "io/ioutil" 23 24 "github.com/pierrec/lz4/v4" 25 ) 26 27 // Compressor satisfies frame.BodyCompressor and segment.PayloadCompressor for the LZ4 algorithm. 28 // Note: Cassandra expects lz4-compressed bodies to start with a 4-byte integer holding the decompressed message length. 29 // The Go implementation of lz4 used here does not include that, so we need to do it manually when encoding and 30 // decoding. 31 type Compressor struct{} 32 33 func (c Compressor) Compress(source io.Reader, dest io.Writer) error { 34 if uncompressedMessage, err := bufferFromReader(source); err != nil { 35 return fmt.Errorf("cannot read uncompressed message: %w", err) 36 } else { 37 maxCompressedSize := lz4.CompressBlockBound(len(uncompressedMessage)) 38 // allocate enough space for the max compressed size 39 compressedMessage := make([]byte, maxCompressedSize) 40 // compress the message and write the result to the destination buffer; 41 // note that for empty messages, this results in a single byte being written and written = 1; 42 // this is normal and is what Cassandra expects for empty compressed messages. 43 if written, err := lz4.CompressBlock(uncompressedMessage, compressedMessage, nil); err != nil { 44 return fmt.Errorf("cannot compress message: %w", err) 45 } else if _, err := dest.Write(compressedMessage[:written]); err != nil { 46 return fmt.Errorf("cannot write compressed message: %w", err) 47 } 48 return nil 49 } 50 } 51 52 func (c Compressor) CompressWithLength(source io.Reader, dest io.Writer) error { 53 if uncompressedMessage, err := bufferFromReader(source); err != nil { 54 return err 55 } else { 56 maxCompressedSize := lz4.CompressBlockBound(len(uncompressedMessage)) 57 // allocate enough space for the max compressed size + 4 bytes for the decompressed length 58 const SizeOfLength = 4 59 compressedMessage := make([]byte, maxCompressedSize+SizeOfLength) 60 // write the decompressed length in the 4 first bytes 61 binary.BigEndian.PutUint32(compressedMessage, uint32(len(uncompressedMessage))) 62 // compress the message and write the result to the destination buffer starting at offset 4; 63 // note that for empty messages, this results in a single byte being written and written = 1; 64 // this is normal and is what Cassandra expects for empty compressed messages. 65 if written, err := lz4.CompressBlock(uncompressedMessage, compressedMessage[SizeOfLength:], nil); err != nil { 66 return fmt.Errorf("cannot compress message: %w", err) 67 } else if _, err := dest.Write(compressedMessage[:written+SizeOfLength]); err != nil { 68 return fmt.Errorf("cannot write compressed message: %w", err) 69 } 70 return nil 71 } 72 } 73 74 func (c Compressor) Decompress(source io.Reader, dest io.Writer) error { 75 if compressedMessage, err := bufferFromReader(source); err != nil { 76 return fmt.Errorf("cannot read compressed message: %w", err) 77 } else if decompressedMessage, err := decompress(compressedMessage); err != nil { 78 return fmt.Errorf("cannot decompress message: %w", err) 79 } else if _, err := dest.Write(decompressedMessage); err != nil { 80 return fmt.Errorf("cannot write decompressed message: %w", err) 81 } 82 return nil 83 } 84 85 func (c Compressor) DecompressWithLength(source io.Reader, dest io.Writer) error { 86 // read the decompressed length first 87 var decompressedLength uint32 88 if err := binary.Read(source, binary.BigEndian, &decompressedLength); err != nil { 89 return fmt.Errorf("cannot read compressed length: %w", err) 90 } else if decompressedLength == 0 { 91 // if decompressed length is zero, the remaining buffer will contain a single byte that should be discarded 92 if _, err = io.CopyN(ioutil.Discard, source, 1); err != nil { 93 return fmt.Errorf("cannot read empty message: %w", err) 94 } 95 return nil 96 } 97 return c.Decompress(source, dest) 98 } 99 100 func decompress(source []byte) (dest []byte, err error) { 101 // try destination buffers of increased length to avoid allocating too much space, starting with twice the 102 // compressed length and up to eight times the compressed length 103 compressedLength := len(source) 104 var written int 105 for i := compressedLength * 2; i <= compressedLength*8; i *= 2 { 106 dest = make([]byte, i) 107 if written, err = lz4.UncompressBlock(source, dest); err == nil { 108 break 109 } 110 } 111 return dest[:written], err 112 } 113 114 func bufferFromReader(source io.Reader) ([]byte, error) { 115 var buf *bytes.Buffer 116 switch s := source.(type) { 117 case *bytes.Buffer: 118 buf = s 119 default: 120 buf = &bytes.Buffer{} 121 if _, err := buf.ReadFrom(s); err != nil { 122 return nil, err 123 } 124 } 125 return buf.Bytes(), nil 126 }