github.com/hashicorp/vault/sdk@v0.13.0/helper/compressutil/compress.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package compressutil
     5  
     6  import (
     7  	"bytes"
     8  	"compress/gzip"
     9  	"compress/lzw"
    10  	"fmt"
    11  	"io"
    12  
    13  	"github.com/golang/snappy"
    14  	"github.com/pierrec/lz4"
    15  )
    16  
    17  const (
    18  	// A byte value used as a canary prefix for the compressed information
    19  	// which is used to distinguish if a JSON input is compressed or not.
    20  	// The value of this constant should not be a first character of any
    21  	// valid JSON string.
    22  
    23  	CompressionTypeGzip        = "gzip"
    24  	CompressionCanaryGzip byte = 'G'
    25  
    26  	CompressionTypeLZW        = "lzw"
    27  	CompressionCanaryLZW byte = 'L'
    28  
    29  	CompressionTypeSnappy        = "snappy"
    30  	CompressionCanarySnappy byte = 'S'
    31  
    32  	CompressionTypeLZ4        = "lz4"
    33  	CompressionCanaryLZ4 byte = '4'
    34  )
    35  
    36  // CompressUtilReadCloser embeds the snappy reader which implements the io.Reader
    37  // interface. The decompress procedure in this utility expects an
    38  // io.ReadCloser. This type implements the io.Closer interface to retain the
    39  // generic way of decompression.
    40  type CompressUtilReadCloser struct {
    41  	io.Reader
    42  }
    43  
    44  // Close is a noop method implemented only to satisfy the io.Closer interface
    45  func (c *CompressUtilReadCloser) Close() error {
    46  	return nil
    47  }
    48  
    49  // CompressionConfig is used to select a compression type to be performed by
    50  // Compress and Decompress utilities.
    51  // Supported types are:
    52  // * CompressionTypeLZW
    53  // * CompressionTypeGzip
    54  // * CompressionTypeSnappy
    55  // * CompressionTypeLZ4
    56  //
    57  // When using CompressionTypeGzip, the compression levels can also be chosen:
    58  // * gzip.DefaultCompression
    59  // * gzip.BestSpeed
    60  // * gzip.BestCompression
    61  type CompressionConfig struct {
    62  	// Type of the compression algorithm to be used
    63  	Type string
    64  
    65  	// When using Gzip format, the compression level to employ
    66  	GzipCompressionLevel int
    67  }
    68  
    69  // Compress places the canary byte in a buffer and uses the same buffer to fill
    70  // in the compressed information of the given input. The configuration supports
    71  // two type of compression: LZW and Gzip. When using Gzip compression format,
    72  // if GzipCompressionLevel is not specified, the 'gzip.DefaultCompression' will
    73  // be assumed.
    74  func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
    75  	var buf bytes.Buffer
    76  	var writer io.WriteCloser
    77  	var err error
    78  
    79  	if config == nil {
    80  		return nil, fmt.Errorf("config is nil")
    81  	}
    82  
    83  	// Write the canary into the buffer and create writer to compress the
    84  	// input data based on the configured type
    85  	switch config.Type {
    86  	case CompressionTypeLZW:
    87  		buf.Write([]byte{CompressionCanaryLZW})
    88  		writer = lzw.NewWriter(&buf, lzw.LSB, 8)
    89  
    90  	case CompressionTypeGzip:
    91  		buf.Write([]byte{CompressionCanaryGzip})
    92  
    93  		switch {
    94  		case config.GzipCompressionLevel == gzip.BestCompression,
    95  			config.GzipCompressionLevel == gzip.BestSpeed,
    96  			config.GzipCompressionLevel == gzip.DefaultCompression:
    97  			// These are valid compression levels
    98  		default:
    99  			// If compression level is set to NoCompression or to
   100  			// any invalid value, fallback to DefaultCompression
   101  			config.GzipCompressionLevel = gzip.DefaultCompression
   102  		}
   103  		writer, err = gzip.NewWriterLevel(&buf, config.GzipCompressionLevel)
   104  
   105  	case CompressionTypeSnappy:
   106  		buf.Write([]byte{CompressionCanarySnappy})
   107  		writer = snappy.NewBufferedWriter(&buf)
   108  
   109  	case CompressionTypeLZ4:
   110  		buf.Write([]byte{CompressionCanaryLZ4})
   111  		writer = lz4.NewWriter(&buf)
   112  
   113  	default:
   114  		return nil, fmt.Errorf("unsupported compression type")
   115  	}
   116  
   117  	if err != nil {
   118  		return nil, fmt.Errorf("failed to create a compression writer: %w", err)
   119  	}
   120  
   121  	if writer == nil {
   122  		return nil, fmt.Errorf("failed to create a compression writer")
   123  	}
   124  
   125  	// Compress the input and place it in the same buffer containing the
   126  	// canary byte.
   127  	if _, err = writer.Write(data); err != nil {
   128  		return nil, fmt.Errorf("failed to compress input data: err: %w", err)
   129  	}
   130  
   131  	// Close the io.WriteCloser
   132  	if err = writer.Close(); err != nil {
   133  		return nil, err
   134  	}
   135  
   136  	// Return the compressed bytes with canary byte at the start
   137  	return buf.Bytes(), nil
   138  }
   139  
   140  // Decompress checks if the first byte in the input matches the canary byte.
   141  // If the first byte is a canary byte, then the input past the canary byte
   142  // will be decompressed using the method specified in the given configuration.
   143  // If the first byte isn't a canary byte, then the utility returns a boolean
   144  // value indicating that the input was not compressed.
   145  func Decompress(data []byte) ([]byte, bool, error) {
   146  	bytes, _, notCompressed, err := DecompressWithCanary(data)
   147  	return bytes, notCompressed, err
   148  }
   149  
   150  // DecompressWithCanary checks if the first byte in the input matches the canary byte.
   151  // If the first byte is a canary byte, then the input past the canary byte
   152  // will be decompressed using the method specified in the given configuration. The type of compression used is also
   153  // returned. If the first byte isn't a canary byte, then the utility returns a boolean
   154  // value indicating that the input was not compressed.
   155  func DecompressWithCanary(data []byte) ([]byte, string, bool, error) {
   156  	var err error
   157  	var reader io.ReadCloser
   158  	var compressionType string
   159  	if data == nil || len(data) == 0 {
   160  		return nil, "", false, fmt.Errorf("'data' being decompressed is empty")
   161  	}
   162  
   163  	canary := data[0]
   164  	cData := data[1:]
   165  
   166  	switch canary {
   167  	// If the first byte matches the canary byte, remove the canary
   168  	// byte and try to decompress the data that is after the canary.
   169  	case CompressionCanaryGzip:
   170  		if len(data) < 2 {
   171  			return nil, "", false, fmt.Errorf("invalid 'data' after the canary")
   172  		}
   173  		reader, err = gzip.NewReader(bytes.NewReader(cData))
   174  		compressionType = CompressionTypeGzip
   175  
   176  	case CompressionCanaryLZW:
   177  		if len(data) < 2 {
   178  			return nil, "", false, fmt.Errorf("invalid 'data' after the canary")
   179  		}
   180  		reader = lzw.NewReader(bytes.NewReader(cData), lzw.LSB, 8)
   181  		compressionType = CompressionTypeLZW
   182  
   183  	case CompressionCanarySnappy:
   184  		if len(data) < 2 {
   185  			return nil, "", false, fmt.Errorf("invalid 'data' after the canary")
   186  		}
   187  		reader = &CompressUtilReadCloser{
   188  			Reader: snappy.NewReader(bytes.NewReader(cData)),
   189  		}
   190  		compressionType = CompressionTypeSnappy
   191  
   192  	case CompressionCanaryLZ4:
   193  		if len(data) < 2 {
   194  			return nil, "", false, fmt.Errorf("invalid 'data' after the canary")
   195  		}
   196  		reader = &CompressUtilReadCloser{
   197  			Reader: lz4.NewReader(bytes.NewReader(cData)),
   198  		}
   199  		compressionType = CompressionTypeLZ4
   200  
   201  	default:
   202  		// If the first byte doesn't match the canary byte, it means
   203  		// that the content was not compressed at all. Indicate the
   204  		// caller that the input was not compressed.
   205  		return nil, "", true, nil
   206  	}
   207  	if err != nil {
   208  		return nil, "", false, fmt.Errorf("failed to create a compression reader: %w", err)
   209  	}
   210  	if reader == nil {
   211  		return nil, "", false, fmt.Errorf("failed to create a compression reader")
   212  	}
   213  
   214  	// Close the io.ReadCloser
   215  	defer reader.Close()
   216  
   217  	// Read all the compressed data into a buffer
   218  	var buf bytes.Buffer
   219  
   220  	// Read the compressed data into a buffer, but do so
   221  	// slowly to prevent reading all the data into memory
   222  	// at once (protecting against e.g. zip bombs).
   223  	for {
   224  		_, err := io.CopyN(&buf, reader, 1024)
   225  		if err != nil {
   226  			if err == io.EOF {
   227  				break
   228  			}
   229  			return nil, "", false, err
   230  		}
   231  	}
   232  
   233  	return buf.Bytes(), compressionType, false, nil
   234  }