github.com/hashicorp/vault/sdk@v0.13.0/helper/compressutil/compress.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package compressutil 5 6 import ( 7 "bytes" 8 "compress/gzip" 9 "compress/lzw" 10 "fmt" 11 "io" 12 13 "github.com/golang/snappy" 14 "github.com/pierrec/lz4" 15 ) 16 17 const ( 18 // A byte value used as a canary prefix for the compressed information 19 // which is used to distinguish if a JSON input is compressed or not. 20 // The value of this constant should not be a first character of any 21 // valid JSON string. 22 23 CompressionTypeGzip = "gzip" 24 CompressionCanaryGzip byte = 'G' 25 26 CompressionTypeLZW = "lzw" 27 CompressionCanaryLZW byte = 'L' 28 29 CompressionTypeSnappy = "snappy" 30 CompressionCanarySnappy byte = 'S' 31 32 CompressionTypeLZ4 = "lz4" 33 CompressionCanaryLZ4 byte = '4' 34 ) 35 36 // CompressUtilReadCloser embeds the snappy reader which implements the io.Reader 37 // interface. The decompress procedure in this utility expects an 38 // io.ReadCloser. This type implements the io.Closer interface to retain the 39 // generic way of decompression. 40 type CompressUtilReadCloser struct { 41 io.Reader 42 } 43 44 // Close is a noop method implemented only to satisfy the io.Closer interface 45 func (c *CompressUtilReadCloser) Close() error { 46 return nil 47 } 48 49 // CompressionConfig is used to select a compression type to be performed by 50 // Compress and Decompress utilities. 51 // Supported types are: 52 // * CompressionTypeLZW 53 // * CompressionTypeGzip 54 // * CompressionTypeSnappy 55 // * CompressionTypeLZ4 56 // 57 // When using CompressionTypeGzip, the compression levels can also be chosen: 58 // * gzip.DefaultCompression 59 // * gzip.BestSpeed 60 // * gzip.BestCompression 61 type CompressionConfig struct { 62 // Type of the compression algorithm to be used 63 Type string 64 65 // When using Gzip format, the compression level to employ 66 GzipCompressionLevel int 67 } 68 69 // Compress places the canary byte in a buffer and uses the same buffer to fill 70 // in the compressed information of the given input. The configuration supports 71 // two type of compression: LZW and Gzip. When using Gzip compression format, 72 // if GzipCompressionLevel is not specified, the 'gzip.DefaultCompression' will 73 // be assumed. 74 func Compress(data []byte, config *CompressionConfig) ([]byte, error) { 75 var buf bytes.Buffer 76 var writer io.WriteCloser 77 var err error 78 79 if config == nil { 80 return nil, fmt.Errorf("config is nil") 81 } 82 83 // Write the canary into the buffer and create writer to compress the 84 // input data based on the configured type 85 switch config.Type { 86 case CompressionTypeLZW: 87 buf.Write([]byte{CompressionCanaryLZW}) 88 writer = lzw.NewWriter(&buf, lzw.LSB, 8) 89 90 case CompressionTypeGzip: 91 buf.Write([]byte{CompressionCanaryGzip}) 92 93 switch { 94 case config.GzipCompressionLevel == gzip.BestCompression, 95 config.GzipCompressionLevel == gzip.BestSpeed, 96 config.GzipCompressionLevel == gzip.DefaultCompression: 97 // These are valid compression levels 98 default: 99 // If compression level is set to NoCompression or to 100 // any invalid value, fallback to DefaultCompression 101 config.GzipCompressionLevel = gzip.DefaultCompression 102 } 103 writer, err = gzip.NewWriterLevel(&buf, config.GzipCompressionLevel) 104 105 case CompressionTypeSnappy: 106 buf.Write([]byte{CompressionCanarySnappy}) 107 writer = snappy.NewBufferedWriter(&buf) 108 109 case CompressionTypeLZ4: 110 buf.Write([]byte{CompressionCanaryLZ4}) 111 writer = lz4.NewWriter(&buf) 112 113 default: 114 return nil, fmt.Errorf("unsupported compression type") 115 } 116 117 if err != nil { 118 return nil, fmt.Errorf("failed to create a compression writer: %w", err) 119 } 120 121 if writer == nil { 122 return nil, fmt.Errorf("failed to create a compression writer") 123 } 124 125 // Compress the input and place it in the same buffer containing the 126 // canary byte. 127 if _, err = writer.Write(data); err != nil { 128 return nil, fmt.Errorf("failed to compress input data: err: %w", err) 129 } 130 131 // Close the io.WriteCloser 132 if err = writer.Close(); err != nil { 133 return nil, err 134 } 135 136 // Return the compressed bytes with canary byte at the start 137 return buf.Bytes(), nil 138 } 139 140 // Decompress checks if the first byte in the input matches the canary byte. 141 // If the first byte is a canary byte, then the input past the canary byte 142 // will be decompressed using the method specified in the given configuration. 143 // If the first byte isn't a canary byte, then the utility returns a boolean 144 // value indicating that the input was not compressed. 145 func Decompress(data []byte) ([]byte, bool, error) { 146 bytes, _, notCompressed, err := DecompressWithCanary(data) 147 return bytes, notCompressed, err 148 } 149 150 // DecompressWithCanary checks if the first byte in the input matches the canary byte. 151 // If the first byte is a canary byte, then the input past the canary byte 152 // will be decompressed using the method specified in the given configuration. The type of compression used is also 153 // returned. If the first byte isn't a canary byte, then the utility returns a boolean 154 // value indicating that the input was not compressed. 155 func DecompressWithCanary(data []byte) ([]byte, string, bool, error) { 156 var err error 157 var reader io.ReadCloser 158 var compressionType string 159 if data == nil || len(data) == 0 { 160 return nil, "", false, fmt.Errorf("'data' being decompressed is empty") 161 } 162 163 canary := data[0] 164 cData := data[1:] 165 166 switch canary { 167 // If the first byte matches the canary byte, remove the canary 168 // byte and try to decompress the data that is after the canary. 169 case CompressionCanaryGzip: 170 if len(data) < 2 { 171 return nil, "", false, fmt.Errorf("invalid 'data' after the canary") 172 } 173 reader, err = gzip.NewReader(bytes.NewReader(cData)) 174 compressionType = CompressionTypeGzip 175 176 case CompressionCanaryLZW: 177 if len(data) < 2 { 178 return nil, "", false, fmt.Errorf("invalid 'data' after the canary") 179 } 180 reader = lzw.NewReader(bytes.NewReader(cData), lzw.LSB, 8) 181 compressionType = CompressionTypeLZW 182 183 case CompressionCanarySnappy: 184 if len(data) < 2 { 185 return nil, "", false, fmt.Errorf("invalid 'data' after the canary") 186 } 187 reader = &CompressUtilReadCloser{ 188 Reader: snappy.NewReader(bytes.NewReader(cData)), 189 } 190 compressionType = CompressionTypeSnappy 191 192 case CompressionCanaryLZ4: 193 if len(data) < 2 { 194 return nil, "", false, fmt.Errorf("invalid 'data' after the canary") 195 } 196 reader = &CompressUtilReadCloser{ 197 Reader: lz4.NewReader(bytes.NewReader(cData)), 198 } 199 compressionType = CompressionTypeLZ4 200 201 default: 202 // If the first byte doesn't match the canary byte, it means 203 // that the content was not compressed at all. Indicate the 204 // caller that the input was not compressed. 205 return nil, "", true, nil 206 } 207 if err != nil { 208 return nil, "", false, fmt.Errorf("failed to create a compression reader: %w", err) 209 } 210 if reader == nil { 211 return nil, "", false, fmt.Errorf("failed to create a compression reader") 212 } 213 214 // Close the io.ReadCloser 215 defer reader.Close() 216 217 // Read all the compressed data into a buffer 218 var buf bytes.Buffer 219 220 // Read the compressed data into a buffer, but do so 221 // slowly to prevent reading all the data into memory 222 // at once (protecting against e.g. zip bombs). 223 for { 224 _, err := io.CopyN(&buf, reader, 1024) 225 if err != nil { 226 if err == io.EOF { 227 break 228 } 229 return nil, "", false, err 230 } 231 } 232 233 return buf.Bytes(), compressionType, false, nil 234 }