github.com/Jeffail/benthos/v3@v3.65.0/lib/input/reader/lines.go (about)

     1  package reader
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"context"
     7  	"io"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/Jeffail/benthos/v3/lib/message"
    12  	"github.com/Jeffail/benthos/v3/lib/types"
    13  )
    14  
    15  //------------------------------------------------------------------------------
    16  
    17  // Lines is a reader implementation that continuously reads line delimited
    18  // messages from an io.Reader type.
    19  type Lines struct {
    20  	handleCtor func(ctx context.Context) (io.Reader, error)
    21  	onClose    func(ctx context.Context)
    22  
    23  	mut        sync.Mutex
    24  	handle     io.Reader
    25  	shutdownFn func()
    26  	errChan    chan error
    27  	msgChan    chan types.Message
    28  
    29  	maxBuffer int
    30  	multipart bool
    31  	delimiter []byte
    32  }
    33  
    34  // NewLines creates a new reader input type able to create a feed of line
    35  // delimited messages from an io.Reader.
    36  //
    37  // Callers must provide a constructor function for the target io.Reader, which
    38  // is called on start up and again each time a reader is exhausted. If the
    39  // constructor is called but there is no more content to create a Reader for
    40  // then the error `io.EOF` should be returned and the Lines will close.
    41  //
    42  // Callers must also provide an onClose function, which will be called if the
    43  // Lines has been instructed to shut down. This function should unblock any
    44  // blocked Read calls.
    45  func NewLines(
    46  	handleCtor func() (io.Reader, error),
    47  	onClose func(),
    48  	options ...func(r *Lines),
    49  ) (*Lines, error) {
    50  	r := Lines{
    51  		handleCtor: func(ctx context.Context) (io.Reader, error) {
    52  			return handleCtor()
    53  		},
    54  		onClose: func(ctx context.Context) {
    55  			onClose()
    56  		},
    57  		maxBuffer: bufio.MaxScanTokenSize,
    58  		multipart: false,
    59  		delimiter: []byte("\n"),
    60  	}
    61  
    62  	for _, opt := range options {
    63  		opt(&r)
    64  	}
    65  
    66  	r.shutdownFn = func() {}
    67  	return &r, nil
    68  }
    69  
    70  // NewLinesWithContext expands NewLines by requiring context.Context arguments
    71  // in the provided closures.
    72  func NewLinesWithContext(
    73  	handleCtor func(ctx context.Context) (io.Reader, error),
    74  	onClose func(ctx context.Context),
    75  	options ...func(r *Lines),
    76  ) (*Lines, error) {
    77  	r := Lines{
    78  		handleCtor: handleCtor,
    79  		onClose:    onClose,
    80  		maxBuffer:  bufio.MaxScanTokenSize,
    81  		multipart:  false,
    82  		delimiter:  []byte("\n"),
    83  	}
    84  
    85  	for _, opt := range options {
    86  		opt(&r)
    87  	}
    88  
    89  	r.shutdownFn = func() {}
    90  	return &r, nil
    91  }
    92  
    93  //------------------------------------------------------------------------------
    94  
    95  // OptLinesSetMaxBuffer is a option func that sets the maximum size of the
    96  // line parsing buffers.
    97  func OptLinesSetMaxBuffer(maxBuffer int) func(r *Lines) {
    98  	return func(r *Lines) {
    99  		r.maxBuffer = maxBuffer
   100  	}
   101  }
   102  
   103  // OptLinesSetMultipart is a option func that sets the boolean flag
   104  // indicating whether lines should be parsed as multipart or not.
   105  func OptLinesSetMultipart(multipart bool) func(r *Lines) {
   106  	return func(r *Lines) {
   107  		r.multipart = multipart
   108  	}
   109  }
   110  
   111  // OptLinesSetDelimiter is a option func that sets the delimiter (default
   112  // '\n') used to divide lines (message parts) in the stream of data.
   113  func OptLinesSetDelimiter(delimiter string) func(r *Lines) {
   114  	return func(r *Lines) {
   115  		r.delimiter = []byte(delimiter)
   116  	}
   117  }
   118  
   119  //------------------------------------------------------------------------------
   120  
   121  func (r *Lines) closeHandle() {
   122  	if r.handle != nil {
   123  		if closer, ok := r.handle.(io.ReadCloser); ok {
   124  			closer.Close()
   125  		}
   126  		r.handle = nil
   127  	}
   128  	r.shutdownFn()
   129  }
   130  
   131  //------------------------------------------------------------------------------
   132  
   133  // Connect attempts to establish a new scanner for an io.Reader.
   134  func (r *Lines) Connect() error {
   135  	return r.ConnectWithContext(context.Background())
   136  }
   137  
   138  // ConnectWithContext attempts to establish a new scanner for an io.Reader.
   139  func (r *Lines) ConnectWithContext(ctx context.Context) error {
   140  	r.mut.Lock()
   141  	defer r.mut.Unlock()
   142  	r.closeHandle()
   143  
   144  	handle, err := r.handleCtor(ctx)
   145  	if err != nil {
   146  		if err == io.EOF {
   147  			return types.ErrTypeClosed
   148  		}
   149  		return err
   150  	}
   151  
   152  	scanner := bufio.NewScanner(handle)
   153  	if r.maxBuffer != bufio.MaxScanTokenSize {
   154  		scanner.Buffer([]byte{}, r.maxBuffer)
   155  	}
   156  
   157  	scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   158  		if atEOF && len(data) == 0 {
   159  			return 0, nil, nil
   160  		}
   161  
   162  		if i := bytes.Index(data, r.delimiter); i >= 0 {
   163  			// We have a full terminated line.
   164  			return i + len(r.delimiter), data[0:i], nil
   165  		}
   166  
   167  		// If we're at EOF, we have a final, non-terminated line. Return it.
   168  		if atEOF {
   169  			return len(data), data, nil
   170  		}
   171  
   172  		// Request more data.
   173  		return 0, nil, nil
   174  	})
   175  
   176  	scannerCtx, shutdownFn := context.WithCancel(context.Background())
   177  	msgChan := make(chan types.Message)
   178  	errChan := make(chan error)
   179  
   180  	go func() {
   181  		defer func() {
   182  			shutdownFn()
   183  			close(errChan)
   184  			close(msgChan)
   185  		}()
   186  
   187  		msg := message.New(nil)
   188  		for scanner.Scan() {
   189  			partBytes := make([]byte, len(scanner.Bytes()))
   190  			partSize := copy(partBytes, scanner.Bytes())
   191  
   192  			if partSize > 0 {
   193  				msg.Append(message.NewPart(partBytes))
   194  				if !r.multipart {
   195  					select {
   196  					case msgChan <- msg:
   197  					case <-scannerCtx.Done():
   198  						return
   199  					}
   200  					msg = message.New(nil)
   201  				}
   202  			} else if r.multipart && msg.Len() > 0 {
   203  				// Empty line means we're finished reading parts for this
   204  				// message.
   205  				select {
   206  				case msgChan <- msg:
   207  				case <-scannerCtx.Done():
   208  					return
   209  				}
   210  				msg = message.New(nil)
   211  			}
   212  		}
   213  		if msg.Len() > 0 {
   214  			select {
   215  			case msgChan <- msg:
   216  			case <-scannerCtx.Done():
   217  				return
   218  			}
   219  		}
   220  		if serr := scanner.Err(); serr != nil {
   221  			select {
   222  			case errChan <- serr:
   223  			case <-scannerCtx.Done():
   224  				return
   225  			}
   226  		}
   227  	}()
   228  
   229  	r.handle = handle
   230  	r.msgChan = msgChan
   231  	r.errChan = errChan
   232  	r.shutdownFn = shutdownFn
   233  	return nil
   234  }
   235  
   236  // ReadWithContext attempts to read a new line from the io.Reader.
   237  func (r *Lines) ReadWithContext(ctx context.Context) (types.Message, AsyncAckFn, error) {
   238  	r.mut.Lock()
   239  	msgChan := r.msgChan
   240  	errChan := r.errChan
   241  	r.mut.Unlock()
   242  
   243  	select {
   244  	case msg, open := <-msgChan:
   245  		if !open {
   246  			return nil, nil, types.ErrNotConnected
   247  		}
   248  		return msg, noopAsyncAckFn, nil
   249  	case err, open := <-errChan:
   250  		if !open {
   251  			return nil, nil, types.ErrNotConnected
   252  		}
   253  		return nil, nil, err
   254  	case <-ctx.Done():
   255  	}
   256  	return nil, nil, types.ErrTimeout
   257  }
   258  
   259  // Read attempts to read a new line from the io.Reader.
   260  func (r *Lines) Read() (types.Message, error) {
   261  	r.mut.Lock()
   262  	msgChan := r.msgChan
   263  	errChan := r.errChan
   264  	r.mut.Unlock()
   265  
   266  	select {
   267  	case msg, open := <-msgChan:
   268  		if !open {
   269  			return nil, types.ErrNotConnected
   270  		}
   271  		return msg, nil
   272  	case err, open := <-errChan:
   273  		if !open {
   274  			return nil, types.ErrNotConnected
   275  		}
   276  		return nil, err
   277  	}
   278  }
   279  
   280  // Acknowledge confirms whether or not our unacknowledged messages have been
   281  // successfully propagated or not.
   282  func (r *Lines) Acknowledge(err error) error {
   283  	return nil
   284  }
   285  
   286  // CloseAsync shuts down the reader input and stops processing requests.
   287  func (r *Lines) CloseAsync() {
   288  	go func() {
   289  		r.mut.Lock()
   290  		r.onClose(context.Background())
   291  		r.closeHandle()
   292  		r.mut.Unlock()
   293  	}()
   294  }
   295  
   296  // WaitForClose blocks until the reader input has closed down.
   297  func (r *Lines) WaitForClose(timeout time.Duration) error {
   298  	return nil
   299  }
   300  
   301  //------------------------------------------------------------------------------