github.com/influxdata/telegraf@v1.30.3/internal/content_coding.go (about)

     1  package internal
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  
    10  	"github.com/klauspost/compress/gzip"
    11  	"github.com/klauspost/compress/zlib"
    12  	"github.com/klauspost/compress/zstd"
    13  	"github.com/klauspost/pgzip"
    14  )
    15  
    16  const defaultMaxDecompressionSize int64 = 500 * 1024 * 1024 //500MB
    17  
    18  // DecodingOption provide methods to change the decoding from the standard
    19  // configuration.
    20  type DecodingOption func(*decoderConfig)
    21  
    22  type decoderConfig struct {
    23  	maxDecompressionSize int64
    24  }
    25  
    26  func WithMaxDecompressionSize(maxDecompressionSize int64) DecodingOption {
    27  	return func(cfg *decoderConfig) {
    28  		cfg.maxDecompressionSize = maxDecompressionSize
    29  	}
    30  }
    31  
    32  type encoderConfig struct {
    33  	level int
    34  }
    35  
    36  // EncodingOption provide methods to change the encoding from the standard
    37  // configuration.
    38  type EncodingOption func(*encoderConfig)
    39  
    40  func WithCompressionLevel(level int) EncodingOption {
    41  	return func(cfg *encoderConfig) {
    42  		cfg.level = level
    43  	}
    44  }
    45  
    46  // NewStreamContentDecoder returns a reader that will decode the stream
    47  // according to the encoding type.
    48  func NewStreamContentDecoder(encoding string, r io.Reader) (io.Reader, error) {
    49  	switch encoding {
    50  	case "gzip":
    51  		return NewGzipReader(r)
    52  	case "identity", "":
    53  		return r, nil
    54  	default:
    55  		return nil, errors.New("invalid value for content_encoding")
    56  	}
    57  }
    58  
    59  // GzipReader is similar to gzip.Reader but reads only a single gzip stream per read.
    60  type GzipReader struct {
    61  	r           io.Reader
    62  	z           *pgzip.Reader
    63  	endOfStream bool
    64  }
    65  
    66  func NewGzipReader(r io.Reader) (io.Reader, error) {
    67  	// We need a read that implements ByteReader in order to line up the next
    68  	// stream.
    69  	br := bufio.NewReader(r)
    70  
    71  	// Reads the first gzip stream header.
    72  	z, err := pgzip.NewReader(br)
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  
    77  	// Prevent future calls to Read from reading the following gzip header.
    78  	z.Multistream(false)
    79  
    80  	return &GzipReader{r: br, z: z}, nil
    81  }
    82  
    83  func (r *GzipReader) Read(b []byte) (int, error) {
    84  	if r.endOfStream {
    85  		// Reads the next gzip header and prepares for the next stream.
    86  		err := r.z.Reset(r.r)
    87  		if err != nil {
    88  			return 0, err
    89  		}
    90  		r.z.Multistream(false)
    91  		r.endOfStream = false
    92  	}
    93  
    94  	n, err := r.z.Read(b)
    95  
    96  	// Since multistream is disabled, io.EOF indicates the end of the gzip
    97  	// sequence.  On the next read we must read the next gzip header.
    98  	if errors.Is(err, io.EOF) {
    99  		r.endOfStream = true
   100  		return n, nil
   101  	}
   102  	return n, err
   103  }
   104  
   105  // NewContentEncoder returns a ContentEncoder for the encoding type.
   106  func NewContentEncoder(encoding string, options ...EncodingOption) (ContentEncoder, error) {
   107  	switch encoding {
   108  	case "gzip":
   109  		return NewGzipEncoder(options...)
   110  	case "identity", "":
   111  		return NewIdentityEncoder(options...)
   112  	case "zlib":
   113  		return NewZlibEncoder(options...)
   114  	case "zstd":
   115  		return NewZstdEncoder(options...)
   116  	default:
   117  		return nil, errors.New("invalid value for content_encoding")
   118  	}
   119  }
   120  
   121  type AutoDecoder struct {
   122  	encoding string
   123  	gzip     *GzipDecoder
   124  	identity *IdentityDecoder
   125  }
   126  
   127  func (a *AutoDecoder) SetEncoding(encoding string) {
   128  	a.encoding = encoding
   129  }
   130  
   131  func (a *AutoDecoder) Decode(data []byte) ([]byte, error) {
   132  	if a.encoding == "gzip" {
   133  		return a.gzip.Decode(data)
   134  	}
   135  	return a.identity.Decode(data)
   136  }
   137  
   138  func NewAutoContentDecoder(options ...DecodingOption) *AutoDecoder {
   139  	var a AutoDecoder
   140  
   141  	a.identity = NewIdentityDecoder(options...)
   142  	a.gzip = NewGzipDecoder(options...)
   143  	return &a
   144  }
   145  
   146  // NewContentDecoder returns a ContentDecoder for the encoding type.
   147  func NewContentDecoder(encoding string, options ...DecodingOption) (ContentDecoder, error) {
   148  	switch encoding {
   149  	case "auto":
   150  		return NewAutoContentDecoder(options...), nil
   151  	case "gzip":
   152  		return NewGzipDecoder(options...), nil
   153  	case "identity", "":
   154  		return NewIdentityDecoder(options...), nil
   155  	case "zlib":
   156  		return NewZlibDecoder(options...), nil
   157  	case "zstd":
   158  		return NewZstdDecoder(options...)
   159  	default:
   160  		return nil, errors.New("invalid value for content_encoding")
   161  	}
   162  }
   163  
   164  // ContentEncoder applies a wrapper encoding to byte buffers.
   165  type ContentEncoder interface {
   166  	Encode([]byte) ([]byte, error)
   167  }
   168  
   169  // GzipEncoder compresses the buffer using gzip at the default level.
   170  type GzipEncoder struct {
   171  	pwriter *pgzip.Writer
   172  	writer  *gzip.Writer
   173  	buf     *bytes.Buffer
   174  }
   175  
   176  func NewGzipEncoder(options ...EncodingOption) (*GzipEncoder, error) {
   177  	cfg := encoderConfig{level: gzip.DefaultCompression}
   178  	for _, o := range options {
   179  		o(&cfg)
   180  	}
   181  
   182  	// Check if the compression level is supported
   183  	switch cfg.level {
   184  	case gzip.NoCompression, gzip.DefaultCompression, gzip.BestSpeed, gzip.BestCompression:
   185  		// Do nothing as those are valid levels
   186  	default:
   187  		return nil, errors.New("invalid compression level, only 0, 1 and 9 are supported")
   188  	}
   189  
   190  	var buf bytes.Buffer
   191  	pw, err := pgzip.NewWriterLevel(&buf, cfg.level)
   192  	if err != nil {
   193  		return nil, err
   194  	}
   195  
   196  	w, err := gzip.NewWriterLevel(&buf, cfg.level)
   197  	return &GzipEncoder{
   198  		pwriter: pw,
   199  		writer:  w,
   200  		buf:     &buf,
   201  	}, err
   202  }
   203  
   204  func (e *GzipEncoder) Encode(data []byte) ([]byte, error) {
   205  	// Parallel Gzip is only faster for larger data chunks. According to the
   206  	// project's documentation the trade-off size is at about 1MB, so we switch
   207  	// to parallel Gzip if the data is larger and run the built-in version
   208  	// otherwise.
   209  	if len(data) > 1024*1024 {
   210  		return e.encodeBig(data)
   211  	}
   212  	return e.encodeSmall(data)
   213  }
   214  
   215  func (e *GzipEncoder) encodeSmall(data []byte) ([]byte, error) {
   216  	e.buf.Reset()
   217  	e.writer.Reset(e.buf)
   218  
   219  	_, err := e.writer.Write(data)
   220  	if err != nil {
   221  		return nil, err
   222  	}
   223  	err = e.writer.Close()
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  	return e.buf.Bytes(), nil
   228  }
   229  
   230  func (e *GzipEncoder) encodeBig(data []byte) ([]byte, error) {
   231  	e.buf.Reset()
   232  	e.pwriter.Reset(e.buf)
   233  
   234  	_, err := e.pwriter.Write(data)
   235  	if err != nil {
   236  		return nil, err
   237  	}
   238  	err = e.pwriter.Close()
   239  	if err != nil {
   240  		return nil, err
   241  	}
   242  	return e.buf.Bytes(), nil
   243  }
   244  
   245  type ZlibEncoder struct {
   246  	writer *zlib.Writer
   247  	buf    *bytes.Buffer
   248  }
   249  
   250  func NewZlibEncoder(options ...EncodingOption) (*ZlibEncoder, error) {
   251  	cfg := encoderConfig{level: zlib.DefaultCompression}
   252  	for _, o := range options {
   253  		o(&cfg)
   254  	}
   255  
   256  	switch cfg.level {
   257  	case zlib.NoCompression, zlib.DefaultCompression, zlib.BestSpeed, zlib.BestCompression:
   258  		// Do nothing as those are valid levels
   259  	default:
   260  		return nil, errors.New("invalid compression level, only 0, 1 and 9 are supported")
   261  	}
   262  
   263  	var buf bytes.Buffer
   264  	w, err := zlib.NewWriterLevel(&buf, cfg.level)
   265  	return &ZlibEncoder{
   266  		writer: w,
   267  		buf:    &buf,
   268  	}, err
   269  }
   270  
   271  func (e *ZlibEncoder) Encode(data []byte) ([]byte, error) {
   272  	e.buf.Reset()
   273  	e.writer.Reset(e.buf)
   274  
   275  	_, err := e.writer.Write(data)
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	err = e.writer.Close()
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  	return e.buf.Bytes(), nil
   284  }
   285  
   286  type ZstdEncoder struct {
   287  	encoder *zstd.Encoder
   288  }
   289  
   290  func NewZstdEncoder(options ...EncodingOption) (*ZstdEncoder, error) {
   291  	cfg := encoderConfig{level: 3}
   292  	for _, o := range options {
   293  		o(&cfg)
   294  	}
   295  
   296  	// Map the levels
   297  	var level zstd.EncoderLevel
   298  	switch cfg.level {
   299  	case 1:
   300  		level = zstd.SpeedFastest
   301  	case 3:
   302  		level = zstd.SpeedDefault
   303  	case 7:
   304  		level = zstd.SpeedBetterCompression
   305  	case 11:
   306  		level = zstd.SpeedBestCompression
   307  	default:
   308  		return nil, errors.New("invalid compression level, only 1, 3, 7 and 11 are supported")
   309  	}
   310  
   311  	e, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(level))
   312  	return &ZstdEncoder{
   313  		encoder: e,
   314  	}, err
   315  }
   316  
   317  func (e *ZstdEncoder) Encode(data []byte) ([]byte, error) {
   318  	return e.encoder.EncodeAll(data, make([]byte, 0, len(data))), nil
   319  }
   320  
   321  // IdentityEncoder is a null encoder that applies no transformation.
   322  type IdentityEncoder struct{}
   323  
   324  func NewIdentityEncoder(options ...EncodingOption) (*IdentityEncoder, error) {
   325  	if len(options) > 0 {
   326  		return nil, errors.New("identity encoder does not support options")
   327  	}
   328  
   329  	return &IdentityEncoder{}, nil
   330  }
   331  
   332  func (*IdentityEncoder) Encode(data []byte) ([]byte, error) {
   333  	return data, nil
   334  }
   335  
   336  // ContentDecoder removes a wrapper encoding from byte buffers.
   337  type ContentDecoder interface {
   338  	SetEncoding(string)
   339  	Decode([]byte) ([]byte, error)
   340  }
   341  
   342  // GzipDecoder decompresses buffers with gzip compression.
   343  type GzipDecoder struct {
   344  	preader              *pgzip.Reader
   345  	reader               *gzip.Reader
   346  	buf                  *bytes.Buffer
   347  	maxDecompressionSize int64
   348  }
   349  
   350  func NewGzipDecoder(options ...DecodingOption) *GzipDecoder {
   351  	cfg := decoderConfig{maxDecompressionSize: defaultMaxDecompressionSize}
   352  	for _, o := range options {
   353  		o(&cfg)
   354  	}
   355  
   356  	return &GzipDecoder{
   357  		preader:              new(pgzip.Reader),
   358  		reader:               new(gzip.Reader),
   359  		buf:                  new(bytes.Buffer),
   360  		maxDecompressionSize: cfg.maxDecompressionSize,
   361  	}
   362  }
   363  
   364  func (*GzipDecoder) SetEncoding(string) {}
   365  
   366  func (d *GzipDecoder) Decode(data []byte) ([]byte, error) {
   367  	// Parallel Gzip is only faster for larger data chunks. According to the
   368  	// project's documentation the trade-off size is at about 1MB, so we switch
   369  	// to parallel Gzip if the data is larger and run the built-in version
   370  	// otherwise.
   371  	if len(data) > 1024*1024 {
   372  		return d.decodeBig(data)
   373  	}
   374  	return d.decodeSmall(data)
   375  }
   376  
   377  func (d *GzipDecoder) decodeSmall(data []byte) ([]byte, error) {
   378  	err := d.reader.Reset(bytes.NewBuffer(data))
   379  	if err != nil {
   380  		return nil, err
   381  	}
   382  	d.buf.Reset()
   383  
   384  	n, err := io.CopyN(d.buf, d.reader, d.maxDecompressionSize)
   385  	if err != nil && !errors.Is(err, io.EOF) {
   386  		return nil, err
   387  	} else if n == d.maxDecompressionSize {
   388  		return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", d.maxDecompressionSize)
   389  	}
   390  
   391  	err = d.reader.Close()
   392  	if err != nil {
   393  		return nil, err
   394  	}
   395  	return d.buf.Bytes(), nil
   396  }
   397  
   398  func (d *GzipDecoder) decodeBig(data []byte) ([]byte, error) {
   399  	err := d.preader.Reset(bytes.NewBuffer(data))
   400  	if err != nil {
   401  		return nil, err
   402  	}
   403  	d.buf.Reset()
   404  
   405  	n, err := io.CopyN(d.buf, d.preader, d.maxDecompressionSize)
   406  	if err != nil && !errors.Is(err, io.EOF) {
   407  		return nil, err
   408  	} else if n == d.maxDecompressionSize {
   409  		return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", d.maxDecompressionSize)
   410  	}
   411  
   412  	err = d.preader.Close()
   413  	if err != nil {
   414  		return nil, err
   415  	}
   416  	return d.buf.Bytes(), nil
   417  }
   418  
   419  type ZlibDecoder struct {
   420  	buf                  *bytes.Buffer
   421  	maxDecompressionSize int64
   422  }
   423  
   424  func NewZlibDecoder(options ...DecodingOption) *ZlibDecoder {
   425  	cfg := decoderConfig{maxDecompressionSize: defaultMaxDecompressionSize}
   426  	for _, o := range options {
   427  		o(&cfg)
   428  	}
   429  
   430  	return &ZlibDecoder{
   431  		buf:                  new(bytes.Buffer),
   432  		maxDecompressionSize: cfg.maxDecompressionSize,
   433  	}
   434  }
   435  
   436  func (*ZlibDecoder) SetEncoding(string) {}
   437  
   438  func (d *ZlibDecoder) Decode(data []byte) ([]byte, error) {
   439  	d.buf.Reset()
   440  
   441  	b := bytes.NewBuffer(data)
   442  	r, err := zlib.NewReader(b)
   443  	if err != nil {
   444  		return nil, err
   445  	}
   446  
   447  	n, err := io.CopyN(d.buf, r, d.maxDecompressionSize)
   448  	if err != nil && !errors.Is(err, io.EOF) {
   449  		return nil, err
   450  	} else if n == d.maxDecompressionSize {
   451  		return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", d.maxDecompressionSize)
   452  	}
   453  
   454  	err = r.Close()
   455  	if err != nil {
   456  		return nil, err
   457  	}
   458  	return d.buf.Bytes(), nil
   459  }
   460  
   461  type ZstdDecoder struct {
   462  	decoder *zstd.Decoder
   463  }
   464  
   465  func NewZstdDecoder(options ...DecodingOption) (*ZstdDecoder, error) {
   466  	cfg := decoderConfig{maxDecompressionSize: defaultMaxDecompressionSize}
   467  	for _, o := range options {
   468  		o(&cfg)
   469  	}
   470  
   471  	d, err := zstd.NewReader(nil, zstd.WithDecoderConcurrency(0), zstd.WithDecoderMaxWindow(uint64(cfg.maxDecompressionSize)))
   472  	return &ZstdDecoder{
   473  		decoder: d,
   474  	}, err
   475  }
   476  
   477  func (*ZstdDecoder) SetEncoding(string) {}
   478  
   479  func (d *ZstdDecoder) Decode(data []byte) ([]byte, error) {
   480  	return d.decoder.DecodeAll(data, nil)
   481  }
   482  
   483  // IdentityDecoder is a null decoder that returns the input.
   484  type IdentityDecoder struct {
   485  }
   486  
   487  func NewIdentityDecoder(_ ...DecodingOption) *IdentityDecoder {
   488  	return &IdentityDecoder{}
   489  }
   490  
   491  func (*IdentityDecoder) SetEncoding(string) {}
   492  
   493  func (*IdentityDecoder) Decode(data []byte) ([]byte, error) {
   494  	return data, nil
   495  }