github.com/Jeffail/benthos/v3@v3.65.0/internal/codec/reader.go (about)

     1  package codec
     2  
     3  import (
     4  	"archive/tar"
     5  	"bufio"
     6  	"bytes"
     7  	"compress/gzip"
     8  	"context"
     9  	"encoding/csv"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"path/filepath"
    14  	"regexp"
    15  	"strconv"
    16  	"strings"
    17  	"sync"
    18  
    19  	"github.com/Jeffail/benthos/v3/internal/docs"
    20  	"github.com/Jeffail/benthos/v3/lib/message"
    21  	"github.com/Jeffail/benthos/v3/lib/types"
    22  )
    23  
    24  // ReaderDocs is a static field documentation for input codecs.
    25  var ReaderDocs = docs.FieldCommon(
    26  	"codec", "The way in which the bytes of a data source should be converted into discrete messages, codecs are useful for specifying how large files or contiunous streams of data might be processed in small chunks rather than loading it all in memory. It's possible to consume lines using a custom delimiter with the `delim:x` codec, where x is the character sequence custom delimiter. Codecs can be chained with `/`, for example a gzip compressed CSV file can be consumed with the codec `gzip/csv`.", "lines", "delim:\t", "delim:foobar", "gzip/csv",
    27  ).HasAnnotatedOptions(
    28  	"auto", "EXPERIMENTAL: Attempts to derive a codec for each file based on information such as the extension. For example, a .tar.gz file would be consumed with the `gzip/tar` codec. Defaults to all-bytes.",
    29  	"all-bytes", "Consume the entire file as a single binary message.",
    30  	"chunker:x", "Consume the file in chunks of a given number of bytes.",
    31  	"csv", "Consume structured rows as comma separated values, the first row must be a header row.",
    32  	"csv:x", "Consume structured rows as values separated by a custom delimiter, the first row must be a header row. The custom delimiter must be a single character, e.g. the codec `\"csv:\\t\"` would consume a tab delimited file.",
    33  	"delim:x", "Consume the file in segments divided by a custom delimiter.",
    34  	"gzip", "Decompress a gzip file, this codec should precede another codec, e.g. `gzip/all-bytes`, `gzip/tar`, `gzip/csv`, etc.",
    35  	"lines", "Consume the file in segments divided by linebreaks.",
    36  	"multipart", "Consumes the output of another codec and batches messages together. A batch ends when an empty message is consumed. For example, the codec `lines/multipart` could be used to consume multipart messages where an empty line indicates the end of each batch.",
    37  	"regex:(?m)^\\d\\d:\\d\\d:\\d\\d", "Consume the file in segments divided by regular expression.",
    38  	"tar", "Parse the file as a tar archive, and consume each file of the archive as a message.",
    39  )
    40  
    41  //------------------------------------------------------------------------------
    42  
    43  // ReaderConfig is a general configuration struct that covers all reader codecs.
    44  type ReaderConfig struct {
    45  	MaxScanTokenSize int
    46  }
    47  
    48  // NewReaderConfig creates a reader configuration with default values.
    49  func NewReaderConfig() ReaderConfig {
    50  	return ReaderConfig{
    51  		MaxScanTokenSize: bufio.MaxScanTokenSize,
    52  	}
    53  }
    54  
    55  //------------------------------------------------------------------------------
    56  
    57  // ReaderAckFn is a function provided to a reader codec that it should call once
    58  // the underlying io.ReadCloser is fully consumed.
    59  type ReaderAckFn func(context.Context, error) error
    60  
    61  func ackOnce(fn ReaderAckFn) ReaderAckFn {
    62  	var once sync.Once
    63  	return func(ctx context.Context, err error) error {
    64  		var ackErr error
    65  		once.Do(func() {
    66  			ackErr = fn(ctx, err)
    67  		})
    68  		return ackErr
    69  	}
    70  }
    71  
    72  // Reader is a codec type that reads message parts from a source.
    73  type Reader interface {
    74  	Next(context.Context) ([]types.Part, ReaderAckFn, error)
    75  	Close(context.Context) error
    76  }
    77  
    78  type ioReaderConstructor func(string, io.ReadCloser) (io.ReadCloser, error)
    79  
    80  // ReaderConstructor creates a reader from a filename, an io.ReadCloser and an
    81  // ack func which is called by the reader once the io.ReadCloser is finished
    82  // with. The filename can be empty and is usually ignored, but might be
    83  // necessary for certain codecs.
    84  type ReaderConstructor func(string, io.ReadCloser, ReaderAckFn) (Reader, error)
    85  
    86  // readerReaderConstructor is a private constructor for readers that _must_
    87  // consume from other readers.
    88  type readerReaderConstructor func(string, Reader) (Reader, error)
    89  
    90  func chainIOCtors(first, second ioReaderConstructor) ioReaderConstructor {
    91  	return func(s string, rc io.ReadCloser) (io.ReadCloser, error) {
    92  		r1, err := first(s, rc)
    93  		if err != nil {
    94  			return nil, err
    95  		}
    96  		r2, err := second(s, r1)
    97  		if err != nil {
    98  			r1.Close()
    99  			return nil, err
   100  		}
   101  		return r2, nil
   102  	}
   103  }
   104  
   105  func chainIOIntoPartCtor(first ioReaderConstructor, second ReaderConstructor) ReaderConstructor {
   106  	return func(s string, rc io.ReadCloser, aFn ReaderAckFn) (Reader, error) {
   107  		r1, err := first(s, rc)
   108  		if err != nil {
   109  			return nil, err
   110  		}
   111  		r2, err := second(s, r1, aFn)
   112  		if err != nil {
   113  			r1.Close()
   114  			return nil, err
   115  		}
   116  		return r2, nil
   117  	}
   118  }
   119  
   120  func chainPartIntoReaderCtor(first ReaderConstructor, second readerReaderConstructor) ReaderConstructor {
   121  	return func(s string, rc io.ReadCloser, aFn ReaderAckFn) (Reader, error) {
   122  		r1, err := first(s, rc, aFn)
   123  		if err != nil {
   124  			return nil, err
   125  		}
   126  		r2, err := second(s, r1)
   127  		if err != nil {
   128  			r1.Close(context.Background())
   129  			return nil, err
   130  		}
   131  		return r2, nil
   132  	}
   133  }
   134  
   135  func chainedReader(codec string, conf ReaderConfig) (ReaderConstructor, error) {
   136  	codecs := strings.Split(codec, "/")
   137  
   138  	var ioCtor ioReaderConstructor
   139  	var partCtor ReaderConstructor
   140  
   141  	for i, codec := range codecs {
   142  		if tmpIOCtor, ok := ioReader(codec, conf); ok {
   143  			if partCtor != nil {
   144  				return nil, fmt.Errorf("unable to follow codec '%v' with '%v'", codecs[i-1], codec)
   145  			}
   146  			if ioCtor != nil {
   147  				ioCtor = chainIOCtors(ioCtor, tmpIOCtor)
   148  			} else {
   149  				ioCtor = tmpIOCtor
   150  			}
   151  			continue
   152  		}
   153  		tmpPartCtor, ok, err := partReader(codec, conf)
   154  		if err != nil {
   155  			return nil, err
   156  		}
   157  		if ok {
   158  			if partCtor != nil {
   159  				return nil, fmt.Errorf("unable to follow codec '%v' with '%v'", codecs[i-1], codec)
   160  			}
   161  			if ioCtor != nil {
   162  				tmpPartCtor = chainIOIntoPartCtor(ioCtor, tmpPartCtor)
   163  				ioCtor = nil
   164  			}
   165  			partCtor = tmpPartCtor
   166  			continue
   167  		}
   168  		tmpReaderCtor, ok := readerReader(codec, conf)
   169  		if !ok {
   170  			return nil, fmt.Errorf("codec was not recognised: %v", codec)
   171  		}
   172  		if partCtor == nil {
   173  			return nil, fmt.Errorf("unable to codec '%v' must be preceded by a structured codec", codec)
   174  		}
   175  		partCtor = chainPartIntoReaderCtor(partCtor, tmpReaderCtor)
   176  	}
   177  	if partCtor == nil {
   178  		return nil, fmt.Errorf("codec was not recognised: %v", codecs)
   179  	}
   180  	return partCtor, nil
   181  }
   182  
   183  func ioReader(codec string, conf ReaderConfig) (ioReaderConstructor, bool) {
   184  	if codec == "gzip" {
   185  		return func(_ string, r io.ReadCloser) (io.ReadCloser, error) {
   186  			g, err := gzip.NewReader(r)
   187  			if err != nil {
   188  				r.Close()
   189  				return nil, err
   190  			}
   191  			return g, nil
   192  		}, true
   193  	}
   194  	return nil, false
   195  }
   196  
   197  func readerReader(codec string, conf ReaderConfig) (readerReaderConstructor, bool) {
   198  	if codec == "multipart" {
   199  		return func(_ string, r Reader) (Reader, error) {
   200  			return newMultipartReader(r)
   201  		}, true
   202  	}
   203  	return nil, false
   204  }
   205  
   206  func partReader(codec string, conf ReaderConfig) (ReaderConstructor, bool, error) {
   207  	switch codec {
   208  	case "all-bytes":
   209  		return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) {
   210  			return &allBytesReader{r, fn, false}, nil
   211  		}, true, nil
   212  	case "lines":
   213  		return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) {
   214  			return newLinesReader(conf, r, fn)
   215  		}, true, nil
   216  	case "csv":
   217  		return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) {
   218  			return newCSVReader(r, fn, nil)
   219  		}, true, nil
   220  	case "tar":
   221  		return newTarReader, true, nil
   222  	}
   223  	if strings.HasPrefix(codec, "delim:") {
   224  		by := strings.TrimPrefix(codec, "delim:")
   225  		if by == "" {
   226  			return nil, false, errors.New("custom delimiter codec requires a non-empty delimiter")
   227  		}
   228  		return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) {
   229  			return newCustomDelimReader(conf, r, by, fn)
   230  		}, true, nil
   231  	}
   232  	if strings.HasPrefix(codec, "csv:") {
   233  		by := strings.TrimPrefix(codec, "csv:")
   234  		if by == "" {
   235  			return nil, false, errors.New("csv codec requires a non-empty delimiter")
   236  		}
   237  		byRunes := []rune(by)
   238  		if len(byRunes) != 1 {
   239  			return nil, false, errors.New("csv codec requires a single character delimiter")
   240  		}
   241  		byRune := byRunes[0]
   242  		return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) {
   243  			return newCSVReader(r, fn, &byRune)
   244  		}, true, nil
   245  	}
   246  	if strings.HasPrefix(codec, "chunker:") {
   247  		chunkSize, err := strconv.ParseInt(strings.TrimPrefix(codec, "chunker:"), 10, 64)
   248  		if err != nil {
   249  			return nil, false, fmt.Errorf("invalid chunk size for chunker codec: %w", err)
   250  		}
   251  		return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) {
   252  			return newChunkerReader(conf, r, chunkSize, fn)
   253  		}, true, nil
   254  	}
   255  	if strings.HasPrefix(codec, "regex:") {
   256  		by := strings.TrimPrefix(codec, "regex:")
   257  		if by == "" {
   258  			return nil, false, errors.New("regex codec requires a non-empty delimiter")
   259  		}
   260  		return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) {
   261  			return newRexExpSplitReader(conf, r, by, fn)
   262  		}, true, nil
   263  	}
   264  	return nil, false, nil
   265  }
   266  
   267  func convertDeprecatedCodec(codec string) string {
   268  	switch codec {
   269  	case "csv-gzip":
   270  		return "gzip/csv"
   271  	case "tar-gzip":
   272  		return "gzip/tar"
   273  	}
   274  	return codec
   275  }
   276  
   277  // GetReader returns a constructor that creates reader codecs.
   278  func GetReader(codec string, conf ReaderConfig) (ReaderConstructor, error) {
   279  	codec = convertDeprecatedCodec(codec)
   280  	if codec == "auto" {
   281  		return autoCodec(conf), nil
   282  	}
   283  	return chainedReader(codec, conf)
   284  }
   285  
   286  func autoCodec(conf ReaderConfig) ReaderConstructor {
   287  	return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) {
   288  		codec := "all-bytes"
   289  		switch filepath.Ext(path) {
   290  		case ".csv":
   291  			codec = "csv"
   292  		case ".csv.gz", ".csv.gzip":
   293  			codec = "gzip/csv"
   294  		case ".tar":
   295  			codec = "tar"
   296  		case ".tgz":
   297  			codec = "gzip/tar"
   298  		}
   299  		if strings.HasSuffix(path, ".tar.gzip") {
   300  			codec = "gzip/tar"
   301  		} else if strings.HasSuffix(path, ".tar.gz") {
   302  			codec = "gzip/tar"
   303  		}
   304  
   305  		ctor, err := GetReader(codec, conf)
   306  		if err != nil {
   307  			return nil, fmt.Errorf("failed to infer codec: %v", err)
   308  		}
   309  		return ctor(path, r, fn)
   310  	}
   311  }
   312  
   313  //------------------------------------------------------------------------------
   314  
   315  type allBytesReader struct {
   316  	i        io.ReadCloser
   317  	ack      ReaderAckFn
   318  	consumed bool
   319  }
   320  
   321  func (a *allBytesReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) {
   322  	if a.consumed {
   323  		return nil, nil, io.EOF
   324  	}
   325  	a.consumed = true
   326  	b, err := io.ReadAll(a.i)
   327  	if err != nil {
   328  		_ = a.ack(ctx, err)
   329  		return nil, nil, err
   330  	}
   331  	p := message.NewPart(b)
   332  	return []types.Part{p}, a.ack, nil
   333  }
   334  
   335  func (a *allBytesReader) Close(ctx context.Context) error {
   336  	if !a.consumed {
   337  		_ = a.ack(ctx, errors.New("service shutting down"))
   338  	}
   339  	return a.i.Close()
   340  }
   341  
   342  //------------------------------------------------------------------------------
   343  
   344  type linesReader struct {
   345  	buf       *bufio.Scanner
   346  	r         io.ReadCloser
   347  	sourceAck ReaderAckFn
   348  
   349  	mut      sync.Mutex
   350  	finished bool
   351  	pending  int32
   352  }
   353  
   354  func newLinesReader(conf ReaderConfig, r io.ReadCloser, ackFn ReaderAckFn) (Reader, error) {
   355  	scanner := bufio.NewScanner(r)
   356  	if conf.MaxScanTokenSize != bufio.MaxScanTokenSize {
   357  		scanner.Buffer([]byte{}, conf.MaxScanTokenSize)
   358  	}
   359  	return &linesReader{
   360  		buf:       scanner,
   361  		r:         r,
   362  		sourceAck: ackOnce(ackFn),
   363  	}, nil
   364  }
   365  
   366  func (a *linesReader) ack(ctx context.Context, err error) error {
   367  	a.mut.Lock()
   368  	a.pending--
   369  	doAck := a.pending == 0 && a.finished
   370  	a.mut.Unlock()
   371  
   372  	if err != nil {
   373  		return a.sourceAck(ctx, err)
   374  	}
   375  	if doAck {
   376  		return a.sourceAck(ctx, nil)
   377  	}
   378  	return nil
   379  }
   380  
   381  func (a *linesReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) {
   382  	scanned := a.buf.Scan()
   383  	a.mut.Lock()
   384  	defer a.mut.Unlock()
   385  
   386  	if scanned {
   387  		a.pending++
   388  		bytesCopy := make([]byte, len(a.buf.Bytes()))
   389  		copy(bytesCopy, a.buf.Bytes())
   390  		return []types.Part{message.NewPart(bytesCopy)}, a.ack, nil
   391  	}
   392  
   393  	err := a.buf.Err()
   394  	if err == nil {
   395  		err = io.EOF
   396  		a.finished = true
   397  	} else {
   398  		_ = a.sourceAck(ctx, err)
   399  	}
   400  	return nil, nil, err
   401  }
   402  
   403  func (a *linesReader) Close(ctx context.Context) error {
   404  	a.mut.Lock()
   405  	defer a.mut.Unlock()
   406  
   407  	if !a.finished {
   408  		_ = a.sourceAck(ctx, errors.New("service shutting down"))
   409  	}
   410  	if a.pending == 0 {
   411  		_ = a.sourceAck(ctx, nil)
   412  	}
   413  	return a.r.Close()
   414  }
   415  
   416  //------------------------------------------------------------------------------
   417  
   418  type csvReader struct {
   419  	scanner   *csv.Reader
   420  	r         io.ReadCloser
   421  	sourceAck ReaderAckFn
   422  
   423  	headers []string
   424  
   425  	mut      sync.Mutex
   426  	finished bool
   427  	pending  int32
   428  }
   429  
   430  func newCSVReader(r io.ReadCloser, ackFn ReaderAckFn, customComma *rune) (Reader, error) {
   431  	scanner := csv.NewReader(r)
   432  	scanner.ReuseRecord = true
   433  	if customComma != nil {
   434  		scanner.Comma = *customComma
   435  	}
   436  
   437  	headers, err := scanner.Read()
   438  	if err != nil {
   439  		return nil, err
   440  	}
   441  
   442  	headersCopy := make([]string, len(headers))
   443  	copy(headersCopy, headers)
   444  
   445  	return &csvReader{
   446  		scanner:   scanner,
   447  		r:         r,
   448  		sourceAck: ackOnce(ackFn),
   449  		headers:   headersCopy,
   450  	}, nil
   451  }
   452  
   453  func (a *csvReader) ack(ctx context.Context, err error) error {
   454  	a.mut.Lock()
   455  	a.pending--
   456  	doAck := a.pending == 0 && a.finished
   457  	a.mut.Unlock()
   458  
   459  	if err != nil {
   460  		return a.sourceAck(ctx, err)
   461  	}
   462  	if doAck {
   463  		return a.sourceAck(ctx, nil)
   464  	}
   465  	return nil
   466  }
   467  
   468  func (a *csvReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) {
   469  	records, err := a.scanner.Read()
   470  
   471  	a.mut.Lock()
   472  	defer a.mut.Unlock()
   473  
   474  	if err != nil {
   475  		if err == io.EOF {
   476  			a.finished = true
   477  		} else {
   478  			_ = a.sourceAck(ctx, err)
   479  		}
   480  		return nil, nil, err
   481  	}
   482  
   483  	a.pending++
   484  
   485  	obj := make(map[string]interface{}, len(records))
   486  	for i, r := range records {
   487  		obj[a.headers[i]] = r
   488  	}
   489  
   490  	part := message.NewPart(nil)
   491  	part.SetJSON(obj)
   492  
   493  	return []types.Part{part}, a.ack, nil
   494  }
   495  
   496  func (a *csvReader) Close(ctx context.Context) error {
   497  	a.mut.Lock()
   498  	defer a.mut.Unlock()
   499  
   500  	if !a.finished {
   501  		_ = a.sourceAck(ctx, errors.New("service shutting down"))
   502  	}
   503  	if a.pending == 0 {
   504  		_ = a.sourceAck(ctx, nil)
   505  	}
   506  	return a.r.Close()
   507  }
   508  
   509  //------------------------------------------------------------------------------
   510  
   511  type customDelimReader struct {
   512  	buf       *bufio.Scanner
   513  	r         io.ReadCloser
   514  	sourceAck ReaderAckFn
   515  
   516  	mut      sync.Mutex
   517  	finished bool
   518  	pending  int32
   519  }
   520  
   521  func newCustomDelimReader(conf ReaderConfig, r io.ReadCloser, delim string, ackFn ReaderAckFn) (Reader, error) {
   522  	scanner := bufio.NewScanner(r)
   523  	if conf.MaxScanTokenSize != bufio.MaxScanTokenSize {
   524  		scanner.Buffer([]byte{}, conf.MaxScanTokenSize)
   525  	}
   526  
   527  	delimBytes := []byte(delim)
   528  
   529  	scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   530  		if atEOF && len(data) == 0 {
   531  			return 0, nil, nil
   532  		}
   533  
   534  		if i := bytes.Index(data, delimBytes); i >= 0 {
   535  			// We have a full terminated line.
   536  			return i + len(delimBytes), data[0:i], nil
   537  		}
   538  
   539  		// If we're at EOF, we have a final, non-terminated line. Return it.
   540  		if atEOF {
   541  			return len(data), data, nil
   542  		}
   543  
   544  		// Request more data.
   545  		return 0, nil, nil
   546  	})
   547  
   548  	return &customDelimReader{
   549  		buf:       scanner,
   550  		r:         r,
   551  		sourceAck: ackOnce(ackFn),
   552  	}, nil
   553  }
   554  
   555  func (a *customDelimReader) ack(ctx context.Context, err error) error {
   556  	a.mut.Lock()
   557  	a.pending--
   558  	doAck := a.pending == 0 && a.finished
   559  	a.mut.Unlock()
   560  
   561  	if err != nil {
   562  		return a.sourceAck(ctx, err)
   563  	}
   564  	if doAck {
   565  		return a.sourceAck(ctx, nil)
   566  	}
   567  	return nil
   568  }
   569  
   570  func (a *customDelimReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) {
   571  	scanned := a.buf.Scan()
   572  
   573  	a.mut.Lock()
   574  	defer a.mut.Unlock()
   575  
   576  	if scanned {
   577  		a.pending++
   578  
   579  		bytesCopy := make([]byte, len(a.buf.Bytes()))
   580  		copy(bytesCopy, a.buf.Bytes())
   581  		return []types.Part{message.NewPart(bytesCopy)}, a.ack, nil
   582  	}
   583  	err := a.buf.Err()
   584  	if err == nil {
   585  		err = io.EOF
   586  		a.finished = true
   587  	} else {
   588  		_ = a.sourceAck(ctx, err)
   589  	}
   590  	return nil, nil, err
   591  }
   592  
   593  func (a *customDelimReader) Close(ctx context.Context) error {
   594  	a.mut.Lock()
   595  	defer a.mut.Unlock()
   596  
   597  	if !a.finished {
   598  		_ = a.sourceAck(ctx, errors.New("service shutting down"))
   599  	}
   600  	if a.pending == 0 {
   601  		_ = a.sourceAck(ctx, nil)
   602  	}
   603  	return a.r.Close()
   604  }
   605  
   606  //------------------------------------------------------------------------------
   607  
   608  type chunkerReader struct {
   609  	chunkSize int64
   610  	buf       *bytes.Buffer
   611  	r         io.ReadCloser
   612  	sourceAck ReaderAckFn
   613  
   614  	mut      sync.Mutex
   615  	finished bool
   616  	pending  int32
   617  }
   618  
   619  func newChunkerReader(conf ReaderConfig, r io.ReadCloser, chunkSize int64, ackFn ReaderAckFn) (Reader, error) {
   620  	return &chunkerReader{
   621  		chunkSize: chunkSize,
   622  		buf:       bytes.NewBuffer(make([]byte, 0, chunkSize)),
   623  		r:         r,
   624  		sourceAck: ackOnce(ackFn),
   625  	}, nil
   626  }
   627  
   628  func (a *chunkerReader) ack(ctx context.Context, err error) error {
   629  	a.mut.Lock()
   630  	a.pending--
   631  	doAck := a.pending == 0 && a.finished
   632  	a.mut.Unlock()
   633  
   634  	if err != nil {
   635  		return a.sourceAck(ctx, err)
   636  	}
   637  	if doAck {
   638  		return a.sourceAck(ctx, nil)
   639  	}
   640  	return nil
   641  }
   642  
   643  func (a *chunkerReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) {
   644  	if a.finished {
   645  		return nil, nil, io.EOF
   646  	}
   647  
   648  	_, err := io.CopyN(a.buf, a.r, a.chunkSize)
   649  
   650  	a.mut.Lock()
   651  	defer a.mut.Unlock()
   652  
   653  	if err != nil {
   654  		if err == io.EOF {
   655  			a.finished = true
   656  		} else {
   657  			_ = a.sourceAck(ctx, err)
   658  			return nil, nil, err
   659  		}
   660  	}
   661  
   662  	if a.buf.Len() > 0 {
   663  		a.pending++
   664  
   665  		bytesCopy := make([]byte, a.buf.Len())
   666  		copy(bytesCopy, a.buf.Bytes())
   667  
   668  		a.buf.Reset()
   669  		return []types.Part{message.NewPart(bytesCopy)}, a.ack, nil
   670  	}
   671  
   672  	return nil, nil, err
   673  }
   674  
   675  func (a *chunkerReader) Close(ctx context.Context) error {
   676  	a.mut.Lock()
   677  	defer a.mut.Unlock()
   678  
   679  	if !a.finished {
   680  		_ = a.sourceAck(ctx, errors.New("service shutting down"))
   681  	}
   682  	if a.pending == 0 {
   683  		_ = a.sourceAck(ctx, nil)
   684  	}
   685  	return a.r.Close()
   686  }
   687  
   688  //------------------------------------------------------------------------------
   689  
   690  type tarReader struct {
   691  	buf       *tar.Reader
   692  	r         io.ReadCloser
   693  	sourceAck ReaderAckFn
   694  
   695  	mut      sync.Mutex
   696  	finished bool
   697  	pending  int32
   698  }
   699  
   700  func newTarReader(path string, r io.ReadCloser, ackFn ReaderAckFn) (Reader, error) {
   701  	return &tarReader{
   702  		buf:       tar.NewReader(r),
   703  		r:         r,
   704  		sourceAck: ackOnce(ackFn),
   705  	}, nil
   706  }
   707  
   708  func (a *tarReader) ack(ctx context.Context, err error) error {
   709  	a.mut.Lock()
   710  	a.pending--
   711  	doAck := a.pending == 0 && a.finished
   712  	a.mut.Unlock()
   713  
   714  	if err != nil {
   715  		return a.sourceAck(ctx, err)
   716  	}
   717  	if doAck {
   718  		return a.sourceAck(ctx, nil)
   719  	}
   720  	return nil
   721  }
   722  
   723  func (a *tarReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) {
   724  	_, err := a.buf.Next()
   725  
   726  	a.mut.Lock()
   727  	defer a.mut.Unlock()
   728  
   729  	if err == nil {
   730  		fileBuf := bytes.Buffer{}
   731  		if _, err = fileBuf.ReadFrom(a.buf); err != nil {
   732  			_ = a.sourceAck(ctx, err)
   733  			return nil, nil, err
   734  		}
   735  		a.pending++
   736  		return []types.Part{message.NewPart(fileBuf.Bytes())}, a.ack, nil
   737  	}
   738  
   739  	if err == io.EOF {
   740  		a.finished = true
   741  	} else {
   742  		_ = a.sourceAck(ctx, err)
   743  	}
   744  	return nil, nil, err
   745  }
   746  
   747  func (a *tarReader) Close(ctx context.Context) error {
   748  	a.mut.Lock()
   749  	defer a.mut.Unlock()
   750  
   751  	if !a.finished {
   752  		_ = a.sourceAck(ctx, errors.New("service shutting down"))
   753  	}
   754  	if a.pending == 0 {
   755  		_ = a.sourceAck(ctx, nil)
   756  	}
   757  	return a.r.Close()
   758  }
   759  
   760  //------------------------------------------------------------------------------
   761  
   762  type multipartReader struct {
   763  	child Reader
   764  }
   765  
   766  func newMultipartReader(r Reader) (Reader, error) {
   767  	return &multipartReader{
   768  		child: r,
   769  	}, nil
   770  }
   771  
   772  func isEmpty(p []types.Part) bool {
   773  	if len(p) == 0 {
   774  		return true
   775  	}
   776  	if len(p) == 1 && len(p[0].Get()) == 0 {
   777  		return true
   778  	}
   779  	return false
   780  }
   781  
   782  func (m *multipartReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) {
   783  	var parts []types.Part
   784  	var acks []ReaderAckFn
   785  
   786  	ackFn := func(ctx context.Context, err error) error {
   787  		for _, fn := range acks {
   788  			_ = fn(ctx, err)
   789  		}
   790  		return nil
   791  	}
   792  
   793  	for {
   794  		newParts, ack, err := m.child.Next(ctx)
   795  		if err != nil {
   796  			if errors.Is(err, io.EOF) && len(parts) > 0 {
   797  				return parts, ackFn, nil
   798  			}
   799  			return nil, nil, err
   800  		}
   801  		if isEmpty(newParts) {
   802  			_ = ack(ctx, nil)
   803  			if len(parts) > 0 {
   804  				// Empty message signals batch end.
   805  				return parts, ackFn, nil
   806  			}
   807  		} else {
   808  			parts = append(parts, newParts...)
   809  			acks = append(acks, ack)
   810  		}
   811  	}
   812  }
   813  
   814  func (m *multipartReader) Close(ctx context.Context) error {
   815  	return m.child.Close(ctx)
   816  }
   817  
   818  //------------------------------------------------------------------------------
   819  
   820  type regexReader struct {
   821  	buf       *bufio.Scanner
   822  	r         io.ReadCloser
   823  	sourceAck ReaderAckFn
   824  
   825  	mut      sync.Mutex
   826  	finished bool
   827  	pending  int32
   828  }
   829  
   830  func newRexExpSplitReader(conf ReaderConfig, r io.ReadCloser, regex string, ackFn ReaderAckFn) (Reader, error) {
   831  	scanner := bufio.NewScanner(r)
   832  	if conf.MaxScanTokenSize != bufio.MaxScanTokenSize {
   833  		scanner.Buffer([]byte{}, conf.MaxScanTokenSize)
   834  	}
   835  
   836  	compiled, err := regexp.Compile(regex)
   837  
   838  	if err != nil {
   839  		return nil, err
   840  	}
   841  
   842  	scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   843  		if atEOF && len(data) == 0 {
   844  			return 0, nil, nil
   845  		}
   846  
   847  		loc := compiled.FindAllIndex(data, 2)
   848  		if loc == nil {
   849  			if atEOF {
   850  				return len(data), data, nil
   851  			}
   852  			return 0, nil, nil
   853  		}
   854  
   855  		if len(loc) == 1 {
   856  			if atEOF {
   857  				if loc[0][0] == 0 {
   858  					return len(data), data, nil
   859  				}
   860  				return loc[0][0], data[0:loc[0][0]], nil
   861  			}
   862  			return 0, nil, nil
   863  		}
   864  		if loc[0][0] == 0 {
   865  			return loc[1][0], data[0:loc[1][0]], nil
   866  		}
   867  		return loc[0][0], data[0:loc[0][0]], nil
   868  	})
   869  
   870  	return &regexReader{
   871  		buf:       scanner,
   872  		r:         r,
   873  		sourceAck: ackOnce(ackFn),
   874  	}, nil
   875  }
   876  
   877  func (a *regexReader) ack(ctx context.Context, err error) error {
   878  	a.mut.Lock()
   879  	a.pending--
   880  	doAck := a.pending == 0 && a.finished
   881  	a.mut.Unlock()
   882  
   883  	if err != nil {
   884  		return a.sourceAck(ctx, err)
   885  	}
   886  	if doAck {
   887  		return a.sourceAck(ctx, nil)
   888  	}
   889  	return nil
   890  }
   891  
   892  func (a *regexReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) {
   893  	scanned := a.buf.Scan()
   894  
   895  	a.mut.Lock()
   896  	defer a.mut.Unlock()
   897  
   898  	if scanned {
   899  		a.pending++
   900  
   901  		bytesCopy := make([]byte, len(a.buf.Bytes()))
   902  		copy(bytesCopy, a.buf.Bytes())
   903  		return []types.Part{message.NewPart(bytesCopy)}, a.ack, nil
   904  	}
   905  	err := a.buf.Err()
   906  	if err == nil {
   907  		err = io.EOF
   908  		a.finished = true
   909  	} else {
   910  		_ = a.sourceAck(ctx, err)
   911  	}
   912  	return nil, nil, err
   913  }
   914  
   915  func (a *regexReader) Close(ctx context.Context) error {
   916  	a.mut.Lock()
   917  	defer a.mut.Unlock()
   918  
   919  	if !a.finished {
   920  		_ = a.sourceAck(ctx, errors.New("service shutting down"))
   921  	}
   922  	if a.pending == 0 {
   923  		_ = a.sourceAck(ctx, nil)
   924  	}
   925  	return a.r.Close()
   926  }