github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/subprocess.go (about) 1 package processor 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "encoding/binary" 8 "errors" 9 "fmt" 10 "io" 11 "math/bits" 12 "os/exec" 13 "strconv" 14 "sync" 15 "sync/atomic" 16 "time" 17 18 "github.com/Jeffail/benthos/v3/internal/docs" 19 "github.com/Jeffail/benthos/v3/internal/tracing" 20 "github.com/Jeffail/benthos/v3/lib/log" 21 "github.com/Jeffail/benthos/v3/lib/metrics" 22 "github.com/Jeffail/benthos/v3/lib/types" 23 ) 24 25 //------------------------------------------------------------------------------ 26 27 func init() { 28 Constructors[TypeSubprocess] = TypeSpec{ 29 constructor: NewSubprocess, 30 Categories: []Category{ 31 CategoryIntegration, 32 }, 33 Summary: ` 34 Executes a command as a subprocess and, for each message, will pipe its contents to the stdin stream of the process followed by a newline.`, 35 Description: ` 36 The subprocess must then either return a line over stdout or stderr. If a response is returned over stdout then its contents will replace the message. If a response is instead returned from stderr it will be logged and the message will continue unchanged and will be [marked as failed](/docs/configuration/error_handling). 37 38 Rather than separating data by a newline it's possible to specify alternative ` + "[`codec_send`](#codec_send) and [`codec_recv`](#codec_recv)" + ` values, which allow binary messages to be encoded for logical separation. 39 40 The execution environment of the subprocess is the same as the Benthos instance, including environment variables and the current working directory. 41 42 The field ` + "`max_buffer`" + ` defines the maximum response size able to be read from the subprocess. This value should be set significantly above the real expected maximum response size. 43 44 ## Subprocess requirements 45 46 It is required that subprocesses flush their stdout and stderr pipes for each line. Benthos will attempt to keep the process alive for as long as the pipeline is running. If the process exits early it will be restarted. 47 48 ## Messages containing line breaks 49 50 If a message contains line breaks each line of the message is piped to the subprocess and flushed, and a response is expected from the subprocess before another line is fed in.`, 51 FieldSpecs: docs.FieldSpecs{ 52 docs.FieldCommon("name", "The command to execute as a subprocess.", "cat", "sed", "awk"), 53 docs.FieldString("args", "A list of arguments to provide the command.").Array(), 54 docs.FieldAdvanced("max_buffer", "The maximum expected response size."), 55 docs.FieldAdvanced( 56 "codec_send", "Determines how messages written to the subprocess are encoded, which allows them to be logically separated.", 57 ).HasOptions("lines", "length_prefixed_uint32_be", "netstring").AtVersion("3.37.0"), 58 docs.FieldAdvanced( 59 "codec_recv", "Determines how messages read from the subprocess are decoded, which allows them to be logically separated.", 60 ).HasOptions("lines", "length_prefixed_uint32_be", "netstring").AtVersion("3.37.0"), 61 PartsFieldSpec, 62 }, 63 } 64 } 65 66 //------------------------------------------------------------------------------ 67 68 // SubprocessConfig contains configuration fields for the Subprocess processor. 69 type SubprocessConfig struct { 70 Parts []int `json:"parts" yaml:"parts"` 71 Name string `json:"name" yaml:"name"` 72 Args []string `json:"args" yaml:"args"` 73 MaxBuffer int `json:"max_buffer" yaml:"max_buffer"` 74 CodecSend string `json:"codec_send" yaml:"codec_send"` 75 CodecRecv string `json:"codec_recv" yaml:"codec_recv"` 76 } 77 78 // NewSubprocessConfig returns a SubprocessConfig with default values. 79 func NewSubprocessConfig() SubprocessConfig { 80 return SubprocessConfig{ 81 Parts: []int{}, 82 Name: "cat", 83 Args: []string{}, 84 MaxBuffer: bufio.MaxScanTokenSize, 85 CodecSend: "lines", 86 CodecRecv: "lines", 87 } 88 } 89 90 //------------------------------------------------------------------------------ 91 92 // Subprocess is a processor that executes a command. 93 type Subprocess struct { 94 subprocClosed int32 95 96 log log.Modular 97 stats metrics.Type 98 99 conf SubprocessConfig 100 subproc *subprocWrapper 101 procFunc func(index int, span *tracing.Span, part types.Part) error 102 mut sync.Mutex 103 104 mCount metrics.StatCounter 105 mErr metrics.StatCounter 106 mSent metrics.StatCounter 107 mBatchSent metrics.StatCounter 108 } 109 110 // NewSubprocess returns a Subprocess processor. 111 func NewSubprocess( 112 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 113 ) (Type, error) { 114 return newSubprocess(conf.Subprocess, mgr, log, stats) 115 } 116 117 func newSubprocess( 118 conf SubprocessConfig, mgr types.Manager, log log.Modular, stats metrics.Type, 119 ) (Type, error) { 120 e := &Subprocess{ 121 log: log, 122 stats: stats, 123 conf: conf, 124 mCount: stats.GetCounter("count"), 125 mErr: stats.GetCounter("error"), 126 mSent: stats.GetCounter("sent"), 127 mBatchSent: stats.GetCounter("batch.sent"), 128 } 129 var err error 130 if e.subproc, err = newSubprocWrapper(conf.Name, conf.Args, e.conf.MaxBuffer, conf.CodecRecv, log); err != nil { 131 return nil, err 132 } 133 if e.procFunc, err = e.getSendSubprocessorFunc(conf.CodecSend); err != nil { 134 return nil, err 135 } 136 return e, nil 137 } 138 139 //------------------------------------------------------------------------------ 140 141 func (e *Subprocess) getSendSubprocessorFunc(codec string) (func(index int, span *tracing.Span, part types.Part) error, error) { 142 switch codec { 143 case "length_prefixed_uint32_be": 144 return func(_ int, _ *tracing.Span, part types.Part) error { 145 const prefixBytes int = 4 146 147 lenBuf := make([]byte, prefixBytes) 148 m := part.Get() 149 binary.BigEndian.PutUint32(lenBuf, uint32(len(m))) 150 151 res, err := e.subproc.Send(lenBuf, m, nil) 152 if err != nil { 153 e.log.Errorf("Failed to send message to subprocess: %v\n", err) 154 e.mErr.Incr(1) 155 return err 156 } 157 res2 := make([]byte, len(res)) 158 copy(res2, res) 159 part.Set(res2) 160 return nil 161 }, nil 162 case "netstring": 163 return func(_ int, _ *tracing.Span, part types.Part) error { 164 lenBuf := make([]byte, 0) 165 m := part.Get() 166 lenBuf = append(strconv.AppendUint(lenBuf, uint64(len(m)), 10), ':') 167 res, err := e.subproc.Send(lenBuf, m, commaBytes) 168 if err != nil { 169 e.log.Errorf("Failed to send message to subprocess: %v\n", err) 170 e.mErr.Incr(1) 171 return err 172 } 173 res2 := make([]byte, len(res)) 174 copy(res2, res) 175 part.Set(res2) 176 return nil 177 }, nil 178 case "lines": 179 return func(_ int, _ *tracing.Span, part types.Part) error { 180 results := [][]byte{} 181 splitMsg := bytes.Split(part.Get(), newLineBytes) 182 for j, p := range splitMsg { 183 if len(p) == 0 && len(splitMsg) > 1 && j == (len(splitMsg)-1) { 184 results = append(results, []byte("")) 185 continue 186 } 187 res, err := e.subproc.Send(nil, p, newLineBytes) 188 if err != nil { 189 e.log.Errorf("Failed to send message to subprocess: %v\n", err) 190 e.mErr.Incr(1) 191 return err 192 } 193 results = append(results, res) 194 } 195 part.Set(bytes.Join(results, newLineBytes)) 196 return nil 197 }, nil 198 } 199 return nil, fmt.Errorf("unrecognized codec_send value: %v", codec) 200 } 201 202 type subprocWrapper struct { 203 name string 204 args []string 205 maxBuf int 206 207 splitFunc bufio.SplitFunc 208 logger log.Modular 209 210 cmdMut sync.Mutex 211 cmdExitChan chan struct{} 212 stdoutChan chan []byte 213 stderrChan chan []byte 214 215 cmd *exec.Cmd 216 cmdStdin io.WriteCloser 217 cmdCancelFn func() 218 219 closeChan chan struct{} 220 closedChan chan struct{} 221 } 222 223 func newSubprocWrapper(name string, args []string, maxBuf int, codecRecv string, log log.Modular) (*subprocWrapper, error) { 224 s := &subprocWrapper{ 225 name: name, 226 args: args, 227 maxBuf: maxBuf, 228 logger: log, 229 closeChan: make(chan struct{}), 230 closedChan: make(chan struct{}), 231 } 232 switch codecRecv { 233 case "lines": 234 s.splitFunc = bufio.ScanLines 235 case "length_prefixed_uint32_be": 236 s.splitFunc = lengthPrefixedUInt32BESplitFunc 237 case "netstring": 238 s.splitFunc = netstringSplitFunc 239 default: 240 return nil, fmt.Errorf("invalid codec_recv option: %v", codecRecv) 241 } 242 if err := s.start(); err != nil { 243 return nil, err 244 } 245 go func() { 246 defer func() { 247 s.stop() 248 close(s.closedChan) 249 }() 250 for { 251 select { 252 case <-s.cmdExitChan: 253 log.Warnln("Subprocess exited") 254 s.stop() 255 256 // Flush channels 257 var msgBytes []byte 258 for stdoutMsg := range s.stdoutChan { 259 msgBytes = append(msgBytes, stdoutMsg...) 260 } 261 if len(msgBytes) > 0 { 262 log.Infoln(string(msgBytes)) 263 } 264 msgBytes = nil 265 for stderrMsg := range s.stderrChan { 266 msgBytes = append(msgBytes, stderrMsg...) 267 } 268 if len(msgBytes) > 0 { 269 log.Errorln(string(msgBytes)) 270 } 271 272 s.start() 273 case <-s.closeChan: 274 return 275 } 276 } 277 }() 278 return s, nil 279 } 280 281 var maxInt = (1<<bits.UintSize)/2 - 1 282 283 func lengthPrefixedUInt32BESplitFunc(data []byte, atEOF bool) (advance int, token []byte, err error) { 284 const prefixBytes int = 4 285 if atEOF { 286 return 0, nil, nil 287 } 288 if len(data) < prefixBytes { 289 // request more data 290 return 0, nil, nil 291 } 292 l := binary.BigEndian.Uint32(data) 293 if l > (uint32(maxInt) - uint32(prefixBytes)) { 294 return 0, nil, errors.New("number of bytes to read exceeds representable range of go int datatype") 295 } 296 bytesToRead := int(l) 297 298 if len(data)-prefixBytes >= bytesToRead { 299 return prefixBytes + bytesToRead, data[prefixBytes : prefixBytes+bytesToRead], nil 300 } 301 return 0, nil, nil 302 } 303 304 func netstringSplitFunc(data []byte, atEOF bool) (advance int, token []byte, err error) { 305 if atEOF { 306 return 0, nil, nil 307 } 308 309 if i := bytes.IndexByte(data, ':'); i >= 0 { 310 if i == 0 { 311 return 0, nil, errors.New("encountered invalid netstring: netstring starts with colon (':')") 312 } 313 l, err := strconv.ParseUint(string(data[0:i]), 10, bits.UintSize-1) 314 if err != nil { 315 return 0, nil, fmt.Errorf("encountered invalid netstring: unable to decode length '%v'", string(data[0:i])) 316 } 317 bytesToRead := int(l) 318 319 if len(data) > i+1+bytesToRead { 320 if data[i+1+bytesToRead] != ',' { 321 return 0, nil, errors.New("encountered invalid netstring: trailing comma-character is missing") 322 } 323 return i + 1 + bytesToRead + 1, data[i+1 : i+1+bytesToRead], nil 324 } 325 } 326 // request more data 327 return 0, nil, nil 328 } 329 330 func (s *subprocWrapper) start() error { 331 s.cmdMut.Lock() 332 defer s.cmdMut.Unlock() 333 334 var err error 335 cmdCtx, cmdCancelFn := context.WithCancel(context.Background()) 336 defer func() { 337 if err != nil { 338 cmdCancelFn() 339 } 340 }() 341 342 cmd := exec.CommandContext(cmdCtx, s.name, s.args...) 343 var cmdStdin io.WriteCloser 344 if cmdStdin, err = cmd.StdinPipe(); err != nil { 345 return err 346 } 347 var cmdStdout, cmdStderr io.ReadCloser 348 if cmdStdout, err = cmd.StdoutPipe(); err != nil { 349 return err 350 } 351 if cmdStderr, err = cmd.StderrPipe(); err != nil { 352 return err 353 } 354 if err := cmd.Start(); err != nil { 355 return err 356 } 357 358 s.cmd = cmd 359 s.cmdStdin = cmdStdin 360 s.cmdCancelFn = cmdCancelFn 361 362 cmdExitChan := make(chan struct{}) 363 stdoutChan := make(chan []byte) 364 stderrChan := make(chan []byte) 365 366 go func() { 367 defer func() { 368 s.cmdMut.Lock() 369 if cmdExitChan != nil { 370 close(cmdExitChan) 371 cmdExitChan = nil 372 } 373 close(stdoutChan) 374 s.cmdMut.Unlock() 375 }() 376 377 scanner := bufio.NewScanner(cmdStdout) 378 scanner.Split(s.splitFunc) 379 if s.maxBuf != bufio.MaxScanTokenSize { 380 scanner.Buffer(nil, s.maxBuf) 381 } 382 for scanner.Scan() { 383 data := scanner.Bytes() 384 dataCopy := make([]byte, len(data)) 385 copy(dataCopy, data) 386 387 stdoutChan <- dataCopy 388 } 389 if err := scanner.Err(); err != nil { 390 s.logger.Errorf("Failed to read subprocess output: %v\n", err) 391 } 392 }() 393 go func() { 394 defer func() { 395 s.cmdMut.Lock() 396 if cmdExitChan != nil { 397 close(cmdExitChan) 398 cmdExitChan = nil 399 } 400 close(stderrChan) 401 s.cmdMut.Unlock() 402 }() 403 404 scanner := bufio.NewScanner(cmdStderr) 405 if s.maxBuf != bufio.MaxScanTokenSize { 406 scanner.Buffer(nil, s.maxBuf) 407 } 408 for scanner.Scan() { 409 data := scanner.Bytes() 410 dataCopy := make([]byte, len(data)) 411 copy(dataCopy, data) 412 413 stderrChan <- dataCopy 414 } 415 if err := scanner.Err(); err != nil { 416 s.logger.Errorf("Failed to read subprocess error output: %v\n", err) 417 } 418 }() 419 420 s.cmdExitChan = cmdExitChan 421 s.stdoutChan = stdoutChan 422 s.stderrChan = stderrChan 423 s.logger.Infoln("Subprocess started") 424 return nil 425 } 426 427 func (s *subprocWrapper) stop() error { 428 s.cmdMut.Lock() 429 var err error 430 if s.cmd != nil { 431 s.cmdCancelFn() 432 err = s.cmd.Wait() 433 s.cmd = nil 434 s.cmdStdin = nil 435 s.cmdCancelFn = func() {} 436 } 437 s.cmdMut.Unlock() 438 return err 439 } 440 441 func (s *subprocWrapper) Send(prolog, payload, epilog []byte) ([]byte, error) { 442 s.cmdMut.Lock() 443 stdin := s.cmdStdin 444 outChan := s.stdoutChan 445 errChan := s.stderrChan 446 s.cmdMut.Unlock() 447 448 if stdin == nil { 449 return nil, types.ErrTypeClosed 450 } 451 if prolog != nil { 452 if _, err := stdin.Write(prolog); err != nil { 453 return nil, err 454 } 455 } 456 if _, err := stdin.Write(payload); err != nil { 457 return nil, err 458 } 459 if epilog != nil { 460 if _, err := stdin.Write(epilog); err != nil { 461 return nil, err 462 } 463 } 464 465 var outBytes, errBytes []byte 466 var open bool 467 select { 468 case outBytes, open = <-outChan: 469 case errBytes, open = <-errChan: 470 tout := time.After(time.Second) 471 var errBuf bytes.Buffer 472 errBuf.Write(errBytes) 473 flushErrLoop: 474 for open { 475 select { 476 case errBytes, open = <-errChan: 477 errBuf.Write(errBytes) 478 case <-tout: 479 break flushErrLoop 480 } 481 } 482 errBytes = errBuf.Bytes() 483 } 484 485 if !open { 486 return nil, types.ErrTypeClosed 487 } 488 if len(errBytes) > 0 { 489 return nil, errors.New(string(errBytes)) 490 } 491 return outBytes, nil 492 } 493 494 //------------------------------------------------------------------------------ 495 496 var newLineBytes = []byte("\n") 497 var commaBytes = []byte(",") 498 499 // ProcessMessage logs an event and returns the message unchanged. 500 func (e *Subprocess) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 501 e.mCount.Incr(1) 502 e.mut.Lock() 503 defer e.mut.Unlock() 504 505 result := msg.Copy() 506 507 IteratePartsWithSpanV2(TypeSubprocess, e.conf.Parts, result, e.procFunc) 508 509 e.mSent.Incr(int64(result.Len())) 510 e.mBatchSent.Incr(1) 511 512 msgs := [1]types.Message{result} 513 return msgs[:], nil 514 } 515 516 // CloseAsync shuts down the processor and stops processing requests. 517 func (e *Subprocess) CloseAsync() { 518 if atomic.CompareAndSwapInt32(&e.subprocClosed, 0, 1) { 519 close(e.subproc.closeChan) 520 } 521 } 522 523 // WaitForClose blocks until the processor has closed down. 524 func (e *Subprocess) WaitForClose(timeout time.Duration) error { 525 select { 526 case <-time.After(timeout): 527 return fmt.Errorf("subprocess failed to close in allotted time: %w", types.ErrTimeout) 528 case <-e.subproc.closedChan: 529 } 530 return nil 531 } 532 533 //------------------------------------------------------------------------------