github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/subprocess.go (about)

     1  package processor
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"context"
     7  	"encoding/binary"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"math/bits"
    12  	"os/exec"
    13  	"strconv"
    14  	"sync"
    15  	"sync/atomic"
    16  	"time"
    17  
    18  	"github.com/Jeffail/benthos/v3/internal/docs"
    19  	"github.com/Jeffail/benthos/v3/internal/tracing"
    20  	"github.com/Jeffail/benthos/v3/lib/log"
    21  	"github.com/Jeffail/benthos/v3/lib/metrics"
    22  	"github.com/Jeffail/benthos/v3/lib/types"
    23  )
    24  
    25  //------------------------------------------------------------------------------
    26  
    27  func init() {
    28  	Constructors[TypeSubprocess] = TypeSpec{
    29  		constructor: NewSubprocess,
    30  		Categories: []Category{
    31  			CategoryIntegration,
    32  		},
    33  		Summary: `
    34  Executes a command as a subprocess and, for each message, will pipe its contents to the stdin stream of the process followed by a newline.`,
    35  		Description: `
    36  The subprocess must then either return a line over stdout or stderr. If a response is returned over stdout then its contents will replace the message. If a response is instead returned from stderr it will be logged and the message will continue unchanged and will be [marked as failed](/docs/configuration/error_handling).
    37  
    38  Rather than separating data by a newline it's possible to specify alternative ` + "[`codec_send`](#codec_send) and [`codec_recv`](#codec_recv)" + ` values, which allow binary messages to be encoded for logical separation.
    39  
    40  The execution environment of the subprocess is the same as the Benthos instance, including environment variables and the current working directory.
    41  
    42  The field ` + "`max_buffer`" + ` defines the maximum response size able to be read from the subprocess. This value should be set significantly above the real expected maximum response size.
    43  
    44  ## Subprocess requirements
    45  
    46  It is required that subprocesses flush their stdout and stderr pipes for each line. Benthos will attempt to keep the process alive for as long as the pipeline is running. If the process exits early it will be restarted.
    47  
    48  ## Messages containing line breaks
    49  
    50  If a message contains line breaks each line of the message is piped to the subprocess and flushed, and a response is expected from the subprocess before another line is fed in.`,
    51  		FieldSpecs: docs.FieldSpecs{
    52  			docs.FieldCommon("name", "The command to execute as a subprocess.", "cat", "sed", "awk"),
    53  			docs.FieldString("args", "A list of arguments to provide the command.").Array(),
    54  			docs.FieldAdvanced("max_buffer", "The maximum expected response size."),
    55  			docs.FieldAdvanced(
    56  				"codec_send", "Determines how messages written to the subprocess are encoded, which allows them to be logically separated.",
    57  			).HasOptions("lines", "length_prefixed_uint32_be", "netstring").AtVersion("3.37.0"),
    58  			docs.FieldAdvanced(
    59  				"codec_recv", "Determines how messages read from the subprocess are decoded, which allows them to be logically separated.",
    60  			).HasOptions("lines", "length_prefixed_uint32_be", "netstring").AtVersion("3.37.0"),
    61  			PartsFieldSpec,
    62  		},
    63  	}
    64  }
    65  
    66  //------------------------------------------------------------------------------
    67  
    68  // SubprocessConfig contains configuration fields for the Subprocess processor.
    69  type SubprocessConfig struct {
    70  	Parts     []int    `json:"parts" yaml:"parts"`
    71  	Name      string   `json:"name" yaml:"name"`
    72  	Args      []string `json:"args" yaml:"args"`
    73  	MaxBuffer int      `json:"max_buffer" yaml:"max_buffer"`
    74  	CodecSend string   `json:"codec_send" yaml:"codec_send"`
    75  	CodecRecv string   `json:"codec_recv" yaml:"codec_recv"`
    76  }
    77  
    78  // NewSubprocessConfig returns a SubprocessConfig with default values.
    79  func NewSubprocessConfig() SubprocessConfig {
    80  	return SubprocessConfig{
    81  		Parts:     []int{},
    82  		Name:      "cat",
    83  		Args:      []string{},
    84  		MaxBuffer: bufio.MaxScanTokenSize,
    85  		CodecSend: "lines",
    86  		CodecRecv: "lines",
    87  	}
    88  }
    89  
    90  //------------------------------------------------------------------------------
    91  
    92  // Subprocess is a processor that executes a command.
    93  type Subprocess struct {
    94  	subprocClosed int32
    95  
    96  	log   log.Modular
    97  	stats metrics.Type
    98  
    99  	conf     SubprocessConfig
   100  	subproc  *subprocWrapper
   101  	procFunc func(index int, span *tracing.Span, part types.Part) error
   102  	mut      sync.Mutex
   103  
   104  	mCount     metrics.StatCounter
   105  	mErr       metrics.StatCounter
   106  	mSent      metrics.StatCounter
   107  	mBatchSent metrics.StatCounter
   108  }
   109  
   110  // NewSubprocess returns a Subprocess processor.
   111  func NewSubprocess(
   112  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   113  ) (Type, error) {
   114  	return newSubprocess(conf.Subprocess, mgr, log, stats)
   115  }
   116  
   117  func newSubprocess(
   118  	conf SubprocessConfig, mgr types.Manager, log log.Modular, stats metrics.Type,
   119  ) (Type, error) {
   120  	e := &Subprocess{
   121  		log:        log,
   122  		stats:      stats,
   123  		conf:       conf,
   124  		mCount:     stats.GetCounter("count"),
   125  		mErr:       stats.GetCounter("error"),
   126  		mSent:      stats.GetCounter("sent"),
   127  		mBatchSent: stats.GetCounter("batch.sent"),
   128  	}
   129  	var err error
   130  	if e.subproc, err = newSubprocWrapper(conf.Name, conf.Args, e.conf.MaxBuffer, conf.CodecRecv, log); err != nil {
   131  		return nil, err
   132  	}
   133  	if e.procFunc, err = e.getSendSubprocessorFunc(conf.CodecSend); err != nil {
   134  		return nil, err
   135  	}
   136  	return e, nil
   137  }
   138  
   139  //------------------------------------------------------------------------------
   140  
   141  func (e *Subprocess) getSendSubprocessorFunc(codec string) (func(index int, span *tracing.Span, part types.Part) error, error) {
   142  	switch codec {
   143  	case "length_prefixed_uint32_be":
   144  		return func(_ int, _ *tracing.Span, part types.Part) error {
   145  			const prefixBytes int = 4
   146  
   147  			lenBuf := make([]byte, prefixBytes)
   148  			m := part.Get()
   149  			binary.BigEndian.PutUint32(lenBuf, uint32(len(m)))
   150  
   151  			res, err := e.subproc.Send(lenBuf, m, nil)
   152  			if err != nil {
   153  				e.log.Errorf("Failed to send message to subprocess: %v\n", err)
   154  				e.mErr.Incr(1)
   155  				return err
   156  			}
   157  			res2 := make([]byte, len(res))
   158  			copy(res2, res)
   159  			part.Set(res2)
   160  			return nil
   161  		}, nil
   162  	case "netstring":
   163  		return func(_ int, _ *tracing.Span, part types.Part) error {
   164  			lenBuf := make([]byte, 0)
   165  			m := part.Get()
   166  			lenBuf = append(strconv.AppendUint(lenBuf, uint64(len(m)), 10), ':')
   167  			res, err := e.subproc.Send(lenBuf, m, commaBytes)
   168  			if err != nil {
   169  				e.log.Errorf("Failed to send message to subprocess: %v\n", err)
   170  				e.mErr.Incr(1)
   171  				return err
   172  			}
   173  			res2 := make([]byte, len(res))
   174  			copy(res2, res)
   175  			part.Set(res2)
   176  			return nil
   177  		}, nil
   178  	case "lines":
   179  		return func(_ int, _ *tracing.Span, part types.Part) error {
   180  			results := [][]byte{}
   181  			splitMsg := bytes.Split(part.Get(), newLineBytes)
   182  			for j, p := range splitMsg {
   183  				if len(p) == 0 && len(splitMsg) > 1 && j == (len(splitMsg)-1) {
   184  					results = append(results, []byte(""))
   185  					continue
   186  				}
   187  				res, err := e.subproc.Send(nil, p, newLineBytes)
   188  				if err != nil {
   189  					e.log.Errorf("Failed to send message to subprocess: %v\n", err)
   190  					e.mErr.Incr(1)
   191  					return err
   192  				}
   193  				results = append(results, res)
   194  			}
   195  			part.Set(bytes.Join(results, newLineBytes))
   196  			return nil
   197  		}, nil
   198  	}
   199  	return nil, fmt.Errorf("unrecognized codec_send value: %v", codec)
   200  }
   201  
   202  type subprocWrapper struct {
   203  	name   string
   204  	args   []string
   205  	maxBuf int
   206  
   207  	splitFunc bufio.SplitFunc
   208  	logger    log.Modular
   209  
   210  	cmdMut      sync.Mutex
   211  	cmdExitChan chan struct{}
   212  	stdoutChan  chan []byte
   213  	stderrChan  chan []byte
   214  
   215  	cmd         *exec.Cmd
   216  	cmdStdin    io.WriteCloser
   217  	cmdCancelFn func()
   218  
   219  	closeChan  chan struct{}
   220  	closedChan chan struct{}
   221  }
   222  
   223  func newSubprocWrapper(name string, args []string, maxBuf int, codecRecv string, log log.Modular) (*subprocWrapper, error) {
   224  	s := &subprocWrapper{
   225  		name:       name,
   226  		args:       args,
   227  		maxBuf:     maxBuf,
   228  		logger:     log,
   229  		closeChan:  make(chan struct{}),
   230  		closedChan: make(chan struct{}),
   231  	}
   232  	switch codecRecv {
   233  	case "lines":
   234  		s.splitFunc = bufio.ScanLines
   235  	case "length_prefixed_uint32_be":
   236  		s.splitFunc = lengthPrefixedUInt32BESplitFunc
   237  	case "netstring":
   238  		s.splitFunc = netstringSplitFunc
   239  	default:
   240  		return nil, fmt.Errorf("invalid codec_recv option: %v", codecRecv)
   241  	}
   242  	if err := s.start(); err != nil {
   243  		return nil, err
   244  	}
   245  	go func() {
   246  		defer func() {
   247  			s.stop()
   248  			close(s.closedChan)
   249  		}()
   250  		for {
   251  			select {
   252  			case <-s.cmdExitChan:
   253  				log.Warnln("Subprocess exited")
   254  				s.stop()
   255  
   256  				// Flush channels
   257  				var msgBytes []byte
   258  				for stdoutMsg := range s.stdoutChan {
   259  					msgBytes = append(msgBytes, stdoutMsg...)
   260  				}
   261  				if len(msgBytes) > 0 {
   262  					log.Infoln(string(msgBytes))
   263  				}
   264  				msgBytes = nil
   265  				for stderrMsg := range s.stderrChan {
   266  					msgBytes = append(msgBytes, stderrMsg...)
   267  				}
   268  				if len(msgBytes) > 0 {
   269  					log.Errorln(string(msgBytes))
   270  				}
   271  
   272  				s.start()
   273  			case <-s.closeChan:
   274  				return
   275  			}
   276  		}
   277  	}()
   278  	return s, nil
   279  }
   280  
   281  var maxInt = (1<<bits.UintSize)/2 - 1
   282  
   283  func lengthPrefixedUInt32BESplitFunc(data []byte, atEOF bool) (advance int, token []byte, err error) {
   284  	const prefixBytes int = 4
   285  	if atEOF {
   286  		return 0, nil, nil
   287  	}
   288  	if len(data) < prefixBytes {
   289  		// request more data
   290  		return 0, nil, nil
   291  	}
   292  	l := binary.BigEndian.Uint32(data)
   293  	if l > (uint32(maxInt) - uint32(prefixBytes)) {
   294  		return 0, nil, errors.New("number of bytes to read exceeds representable range of go int datatype")
   295  	}
   296  	bytesToRead := int(l)
   297  
   298  	if len(data)-prefixBytes >= bytesToRead {
   299  		return prefixBytes + bytesToRead, data[prefixBytes : prefixBytes+bytesToRead], nil
   300  	}
   301  	return 0, nil, nil
   302  }
   303  
   304  func netstringSplitFunc(data []byte, atEOF bool) (advance int, token []byte, err error) {
   305  	if atEOF {
   306  		return 0, nil, nil
   307  	}
   308  
   309  	if i := bytes.IndexByte(data, ':'); i >= 0 {
   310  		if i == 0 {
   311  			return 0, nil, errors.New("encountered invalid netstring: netstring starts with colon (':')")
   312  		}
   313  		l, err := strconv.ParseUint(string(data[0:i]), 10, bits.UintSize-1)
   314  		if err != nil {
   315  			return 0, nil, fmt.Errorf("encountered invalid netstring: unable to decode length '%v'", string(data[0:i]))
   316  		}
   317  		bytesToRead := int(l)
   318  
   319  		if len(data) > i+1+bytesToRead {
   320  			if data[i+1+bytesToRead] != ',' {
   321  				return 0, nil, errors.New("encountered invalid netstring: trailing comma-character is missing")
   322  			}
   323  			return i + 1 + bytesToRead + 1, data[i+1 : i+1+bytesToRead], nil
   324  		}
   325  	}
   326  	// request more data
   327  	return 0, nil, nil
   328  }
   329  
   330  func (s *subprocWrapper) start() error {
   331  	s.cmdMut.Lock()
   332  	defer s.cmdMut.Unlock()
   333  
   334  	var err error
   335  	cmdCtx, cmdCancelFn := context.WithCancel(context.Background())
   336  	defer func() {
   337  		if err != nil {
   338  			cmdCancelFn()
   339  		}
   340  	}()
   341  
   342  	cmd := exec.CommandContext(cmdCtx, s.name, s.args...)
   343  	var cmdStdin io.WriteCloser
   344  	if cmdStdin, err = cmd.StdinPipe(); err != nil {
   345  		return err
   346  	}
   347  	var cmdStdout, cmdStderr io.ReadCloser
   348  	if cmdStdout, err = cmd.StdoutPipe(); err != nil {
   349  		return err
   350  	}
   351  	if cmdStderr, err = cmd.StderrPipe(); err != nil {
   352  		return err
   353  	}
   354  	if err := cmd.Start(); err != nil {
   355  		return err
   356  	}
   357  
   358  	s.cmd = cmd
   359  	s.cmdStdin = cmdStdin
   360  	s.cmdCancelFn = cmdCancelFn
   361  
   362  	cmdExitChan := make(chan struct{})
   363  	stdoutChan := make(chan []byte)
   364  	stderrChan := make(chan []byte)
   365  
   366  	go func() {
   367  		defer func() {
   368  			s.cmdMut.Lock()
   369  			if cmdExitChan != nil {
   370  				close(cmdExitChan)
   371  				cmdExitChan = nil
   372  			}
   373  			close(stdoutChan)
   374  			s.cmdMut.Unlock()
   375  		}()
   376  
   377  		scanner := bufio.NewScanner(cmdStdout)
   378  		scanner.Split(s.splitFunc)
   379  		if s.maxBuf != bufio.MaxScanTokenSize {
   380  			scanner.Buffer(nil, s.maxBuf)
   381  		}
   382  		for scanner.Scan() {
   383  			data := scanner.Bytes()
   384  			dataCopy := make([]byte, len(data))
   385  			copy(dataCopy, data)
   386  
   387  			stdoutChan <- dataCopy
   388  		}
   389  		if err := scanner.Err(); err != nil {
   390  			s.logger.Errorf("Failed to read subprocess output: %v\n", err)
   391  		}
   392  	}()
   393  	go func() {
   394  		defer func() {
   395  			s.cmdMut.Lock()
   396  			if cmdExitChan != nil {
   397  				close(cmdExitChan)
   398  				cmdExitChan = nil
   399  			}
   400  			close(stderrChan)
   401  			s.cmdMut.Unlock()
   402  		}()
   403  
   404  		scanner := bufio.NewScanner(cmdStderr)
   405  		if s.maxBuf != bufio.MaxScanTokenSize {
   406  			scanner.Buffer(nil, s.maxBuf)
   407  		}
   408  		for scanner.Scan() {
   409  			data := scanner.Bytes()
   410  			dataCopy := make([]byte, len(data))
   411  			copy(dataCopy, data)
   412  
   413  			stderrChan <- dataCopy
   414  		}
   415  		if err := scanner.Err(); err != nil {
   416  			s.logger.Errorf("Failed to read subprocess error output: %v\n", err)
   417  		}
   418  	}()
   419  
   420  	s.cmdExitChan = cmdExitChan
   421  	s.stdoutChan = stdoutChan
   422  	s.stderrChan = stderrChan
   423  	s.logger.Infoln("Subprocess started")
   424  	return nil
   425  }
   426  
   427  func (s *subprocWrapper) stop() error {
   428  	s.cmdMut.Lock()
   429  	var err error
   430  	if s.cmd != nil {
   431  		s.cmdCancelFn()
   432  		err = s.cmd.Wait()
   433  		s.cmd = nil
   434  		s.cmdStdin = nil
   435  		s.cmdCancelFn = func() {}
   436  	}
   437  	s.cmdMut.Unlock()
   438  	return err
   439  }
   440  
   441  func (s *subprocWrapper) Send(prolog, payload, epilog []byte) ([]byte, error) {
   442  	s.cmdMut.Lock()
   443  	stdin := s.cmdStdin
   444  	outChan := s.stdoutChan
   445  	errChan := s.stderrChan
   446  	s.cmdMut.Unlock()
   447  
   448  	if stdin == nil {
   449  		return nil, types.ErrTypeClosed
   450  	}
   451  	if prolog != nil {
   452  		if _, err := stdin.Write(prolog); err != nil {
   453  			return nil, err
   454  		}
   455  	}
   456  	if _, err := stdin.Write(payload); err != nil {
   457  		return nil, err
   458  	}
   459  	if epilog != nil {
   460  		if _, err := stdin.Write(epilog); err != nil {
   461  			return nil, err
   462  		}
   463  	}
   464  
   465  	var outBytes, errBytes []byte
   466  	var open bool
   467  	select {
   468  	case outBytes, open = <-outChan:
   469  	case errBytes, open = <-errChan:
   470  		tout := time.After(time.Second)
   471  		var errBuf bytes.Buffer
   472  		errBuf.Write(errBytes)
   473  	flushErrLoop:
   474  		for open {
   475  			select {
   476  			case errBytes, open = <-errChan:
   477  				errBuf.Write(errBytes)
   478  			case <-tout:
   479  				break flushErrLoop
   480  			}
   481  		}
   482  		errBytes = errBuf.Bytes()
   483  	}
   484  
   485  	if !open {
   486  		return nil, types.ErrTypeClosed
   487  	}
   488  	if len(errBytes) > 0 {
   489  		return nil, errors.New(string(errBytes))
   490  	}
   491  	return outBytes, nil
   492  }
   493  
   494  //------------------------------------------------------------------------------
   495  
   496  var newLineBytes = []byte("\n")
   497  var commaBytes = []byte(",")
   498  
   499  // ProcessMessage logs an event and returns the message unchanged.
   500  func (e *Subprocess) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   501  	e.mCount.Incr(1)
   502  	e.mut.Lock()
   503  	defer e.mut.Unlock()
   504  
   505  	result := msg.Copy()
   506  
   507  	IteratePartsWithSpanV2(TypeSubprocess, e.conf.Parts, result, e.procFunc)
   508  
   509  	e.mSent.Incr(int64(result.Len()))
   510  	e.mBatchSent.Incr(1)
   511  
   512  	msgs := [1]types.Message{result}
   513  	return msgs[:], nil
   514  }
   515  
   516  // CloseAsync shuts down the processor and stops processing requests.
   517  func (e *Subprocess) CloseAsync() {
   518  	if atomic.CompareAndSwapInt32(&e.subprocClosed, 0, 1) {
   519  		close(e.subproc.closeChan)
   520  	}
   521  }
   522  
   523  // WaitForClose blocks until the processor has closed down.
   524  func (e *Subprocess) WaitForClose(timeout time.Duration) error {
   525  	select {
   526  	case <-time.After(timeout):
   527  		return fmt.Errorf("subprocess failed to close in allotted time: %w", types.ErrTimeout)
   528  	case <-e.subproc.closedChan:
   529  	}
   530  	return nil
   531  }
   532  
   533  //------------------------------------------------------------------------------