github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/decompress.go (about)

     1  package processor
     2  
     3  import (
     4  	"bytes"
     5  	"compress/bzip2"
     6  	"compress/flate"
     7  	"compress/gzip"
     8  	"compress/zlib"
     9  	"fmt"
    10  	"io"
    11  	"time"
    12  
    13  	"github.com/Jeffail/benthos/v3/internal/docs"
    14  	"github.com/Jeffail/benthos/v3/internal/tracing"
    15  	"github.com/Jeffail/benthos/v3/lib/log"
    16  	"github.com/Jeffail/benthos/v3/lib/metrics"
    17  	"github.com/Jeffail/benthos/v3/lib/response"
    18  	"github.com/Jeffail/benthos/v3/lib/types"
    19  	"github.com/golang/snappy"
    20  	"github.com/pierrec/lz4/v4"
    21  )
    22  
    23  //------------------------------------------------------------------------------
    24  
    25  func init() {
    26  	Constructors[TypeDecompress] = TypeSpec{
    27  		constructor: NewDecompress,
    28  		Categories: []Category{
    29  			CategoryParsing,
    30  		},
    31  		Summary: `
    32  Decompresses messages according to the selected algorithm. Supported
    33  decompression types are: gzip, zlib, bzip2, flate, snappy, lz4.`,
    34  		FieldSpecs: docs.FieldSpecs{
    35  			docs.FieldCommon("algorithm", "The decompression algorithm to use.").HasOptions("gzip", "zlib", "bzip2", "flate", "snappy", "lz4"),
    36  			PartsFieldSpec,
    37  		},
    38  	}
    39  }
    40  
    41  //------------------------------------------------------------------------------
    42  
    43  // DecompressConfig contains configuration fields for the Decompress processor.
    44  type DecompressConfig struct {
    45  	Algorithm string `json:"algorithm" yaml:"algorithm"`
    46  	Parts     []int  `json:"parts" yaml:"parts"`
    47  }
    48  
    49  // NewDecompressConfig returns a DecompressConfig with default values.
    50  func NewDecompressConfig() DecompressConfig {
    51  	return DecompressConfig{
    52  		Algorithm: "gzip",
    53  		Parts:     []int{},
    54  	}
    55  }
    56  
    57  //------------------------------------------------------------------------------
    58  
    59  type decompressFunc func(bytes []byte) ([]byte, error)
    60  
    61  func gzipDecompress(b []byte) ([]byte, error) {
    62  	r, err := gzip.NewReader(bytes.NewBuffer(b))
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  
    67  	outBuf := bytes.Buffer{}
    68  	if _, err = io.Copy(&outBuf, r); err != nil {
    69  		r.Close()
    70  		return nil, err
    71  	}
    72  	r.Close()
    73  	return outBuf.Bytes(), nil
    74  }
    75  
    76  func snappyDecompress(b []byte) ([]byte, error) {
    77  	return snappy.Decode(nil, b)
    78  }
    79  
    80  func zlibDecompress(b []byte) ([]byte, error) {
    81  	r, err := zlib.NewReader(bytes.NewBuffer(b))
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	outBuf := bytes.Buffer{}
    87  	if _, err = io.Copy(&outBuf, r); err != nil {
    88  		r.Close()
    89  		return nil, err
    90  	}
    91  	r.Close()
    92  	return outBuf.Bytes(), nil
    93  }
    94  
    95  func flateDecompress(b []byte) ([]byte, error) {
    96  	r := flate.NewReader(bytes.NewBuffer(b))
    97  
    98  	outBuf := bytes.Buffer{}
    99  	if _, err := io.Copy(&outBuf, r); err != nil {
   100  		r.Close()
   101  		return nil, err
   102  	}
   103  	r.Close()
   104  	return outBuf.Bytes(), nil
   105  }
   106  
   107  func bzip2Decompress(b []byte) ([]byte, error) {
   108  	r := bzip2.NewReader(bytes.NewBuffer(b))
   109  
   110  	outBuf := bytes.Buffer{}
   111  	if _, err := io.Copy(&outBuf, r); err != nil {
   112  		return nil, err
   113  	}
   114  	return outBuf.Bytes(), nil
   115  }
   116  
   117  func lz4Decompress(b []byte) ([]byte, error) {
   118  	buf := bytes.NewBuffer(b)
   119  	r := lz4.NewReader(buf)
   120  
   121  	outBuf := bytes.Buffer{}
   122  	if _, err := outBuf.ReadFrom(r); err != nil && err != io.EOF {
   123  		return nil, err
   124  	}
   125  
   126  	return outBuf.Bytes(), nil
   127  }
   128  
   129  func strToDecompressor(str string) (decompressFunc, error) {
   130  	switch str {
   131  	case "gzip":
   132  		return gzipDecompress, nil
   133  	case "zlib":
   134  		return zlibDecompress, nil
   135  	case "flate":
   136  		return flateDecompress, nil
   137  	case "bzip2":
   138  		return bzip2Decompress, nil
   139  	case "snappy":
   140  		return snappyDecompress, nil
   141  	case "lz4":
   142  		return lz4Decompress, nil
   143  	}
   144  	return nil, fmt.Errorf("decompression type not recognised: %v", str)
   145  }
   146  
   147  //------------------------------------------------------------------------------
   148  
   149  // Decompress is a processor that can decompress parts of a message following a
   150  // chosen compression algorithm.
   151  type Decompress struct {
   152  	conf   DecompressConfig
   153  	decomp decompressFunc
   154  
   155  	log   log.Modular
   156  	stats metrics.Type
   157  
   158  	mCount     metrics.StatCounter
   159  	mErr       metrics.StatCounter
   160  	mSent      metrics.StatCounter
   161  	mBatchSent metrics.StatCounter
   162  }
   163  
   164  // NewDecompress returns a Decompress processor.
   165  func NewDecompress(
   166  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   167  ) (Type, error) {
   168  	dcor, err := strToDecompressor(conf.Decompress.Algorithm)
   169  	if err != nil {
   170  		return nil, err
   171  	}
   172  	return &Decompress{
   173  		conf:   conf.Decompress,
   174  		decomp: dcor,
   175  		log:    log,
   176  		stats:  stats,
   177  
   178  		mCount:     stats.GetCounter("count"),
   179  		mErr:       stats.GetCounter("error"),
   180  		mSent:      stats.GetCounter("sent"),
   181  		mBatchSent: stats.GetCounter("batch.sent"),
   182  	}, nil
   183  }
   184  
   185  //------------------------------------------------------------------------------
   186  
   187  // ProcessMessage applies the processor to a message, either creating >0
   188  // resulting messages or a response to be sent back to the message source.
   189  func (d *Decompress) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   190  	d.mCount.Incr(1)
   191  	newMsg := msg.Copy()
   192  
   193  	proc := func(i int, span *tracing.Span, part types.Part) error {
   194  		newBytes, err := d.decomp(part.Get())
   195  		if err != nil {
   196  			d.mErr.Incr(1)
   197  			d.log.Errorf("Failed to decompress message part: %v\n", err)
   198  			return err
   199  		}
   200  		part.Set(newBytes)
   201  		return nil
   202  	}
   203  
   204  	if newMsg.Len() == 0 {
   205  		return nil, response.NewAck()
   206  	}
   207  
   208  	IteratePartsWithSpanV2(TypeDecompress, d.conf.Parts, newMsg, proc)
   209  
   210  	d.mBatchSent.Incr(1)
   211  	d.mSent.Incr(int64(newMsg.Len()))
   212  	msgs := [1]types.Message{newMsg}
   213  	return msgs[:], nil
   214  }
   215  
   216  // CloseAsync shuts down the processor and stops processing requests.
   217  func (d *Decompress) CloseAsync() {
   218  }
   219  
   220  // WaitForClose blocks until the processor has closed down.
   221  func (d *Decompress) WaitForClose(timeout time.Duration) error {
   222  	return nil
   223  }
   224  
   225  //------------------------------------------------------------------------------