github.com/Jeffail/benthos/v3@v3.65.0/lib/input/reader/lines.go (about) 1 package reader 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "io" 8 "sync" 9 "time" 10 11 "github.com/Jeffail/benthos/v3/lib/message" 12 "github.com/Jeffail/benthos/v3/lib/types" 13 ) 14 15 //------------------------------------------------------------------------------ 16 17 // Lines is a reader implementation that continuously reads line delimited 18 // messages from an io.Reader type. 19 type Lines struct { 20 handleCtor func(ctx context.Context) (io.Reader, error) 21 onClose func(ctx context.Context) 22 23 mut sync.Mutex 24 handle io.Reader 25 shutdownFn func() 26 errChan chan error 27 msgChan chan types.Message 28 29 maxBuffer int 30 multipart bool 31 delimiter []byte 32 } 33 34 // NewLines creates a new reader input type able to create a feed of line 35 // delimited messages from an io.Reader. 36 // 37 // Callers must provide a constructor function for the target io.Reader, which 38 // is called on start up and again each time a reader is exhausted. If the 39 // constructor is called but there is no more content to create a Reader for 40 // then the error `io.EOF` should be returned and the Lines will close. 41 // 42 // Callers must also provide an onClose function, which will be called if the 43 // Lines has been instructed to shut down. This function should unblock any 44 // blocked Read calls. 45 func NewLines( 46 handleCtor func() (io.Reader, error), 47 onClose func(), 48 options ...func(r *Lines), 49 ) (*Lines, error) { 50 r := Lines{ 51 handleCtor: func(ctx context.Context) (io.Reader, error) { 52 return handleCtor() 53 }, 54 onClose: func(ctx context.Context) { 55 onClose() 56 }, 57 maxBuffer: bufio.MaxScanTokenSize, 58 multipart: false, 59 delimiter: []byte("\n"), 60 } 61 62 for _, opt := range options { 63 opt(&r) 64 } 65 66 r.shutdownFn = func() {} 67 return &r, nil 68 } 69 70 // NewLinesWithContext expands NewLines by requiring context.Context arguments 71 // in the provided closures. 72 func NewLinesWithContext( 73 handleCtor func(ctx context.Context) (io.Reader, error), 74 onClose func(ctx context.Context), 75 options ...func(r *Lines), 76 ) (*Lines, error) { 77 r := Lines{ 78 handleCtor: handleCtor, 79 onClose: onClose, 80 maxBuffer: bufio.MaxScanTokenSize, 81 multipart: false, 82 delimiter: []byte("\n"), 83 } 84 85 for _, opt := range options { 86 opt(&r) 87 } 88 89 r.shutdownFn = func() {} 90 return &r, nil 91 } 92 93 //------------------------------------------------------------------------------ 94 95 // OptLinesSetMaxBuffer is a option func that sets the maximum size of the 96 // line parsing buffers. 97 func OptLinesSetMaxBuffer(maxBuffer int) func(r *Lines) { 98 return func(r *Lines) { 99 r.maxBuffer = maxBuffer 100 } 101 } 102 103 // OptLinesSetMultipart is a option func that sets the boolean flag 104 // indicating whether lines should be parsed as multipart or not. 105 func OptLinesSetMultipart(multipart bool) func(r *Lines) { 106 return func(r *Lines) { 107 r.multipart = multipart 108 } 109 } 110 111 // OptLinesSetDelimiter is a option func that sets the delimiter (default 112 // '\n') used to divide lines (message parts) in the stream of data. 113 func OptLinesSetDelimiter(delimiter string) func(r *Lines) { 114 return func(r *Lines) { 115 r.delimiter = []byte(delimiter) 116 } 117 } 118 119 //------------------------------------------------------------------------------ 120 121 func (r *Lines) closeHandle() { 122 if r.handle != nil { 123 if closer, ok := r.handle.(io.ReadCloser); ok { 124 closer.Close() 125 } 126 r.handle = nil 127 } 128 r.shutdownFn() 129 } 130 131 //------------------------------------------------------------------------------ 132 133 // Connect attempts to establish a new scanner for an io.Reader. 134 func (r *Lines) Connect() error { 135 return r.ConnectWithContext(context.Background()) 136 } 137 138 // ConnectWithContext attempts to establish a new scanner for an io.Reader. 139 func (r *Lines) ConnectWithContext(ctx context.Context) error { 140 r.mut.Lock() 141 defer r.mut.Unlock() 142 r.closeHandle() 143 144 handle, err := r.handleCtor(ctx) 145 if err != nil { 146 if err == io.EOF { 147 return types.ErrTypeClosed 148 } 149 return err 150 } 151 152 scanner := bufio.NewScanner(handle) 153 if r.maxBuffer != bufio.MaxScanTokenSize { 154 scanner.Buffer([]byte{}, r.maxBuffer) 155 } 156 157 scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { 158 if atEOF && len(data) == 0 { 159 return 0, nil, nil 160 } 161 162 if i := bytes.Index(data, r.delimiter); i >= 0 { 163 // We have a full terminated line. 164 return i + len(r.delimiter), data[0:i], nil 165 } 166 167 // If we're at EOF, we have a final, non-terminated line. Return it. 168 if atEOF { 169 return len(data), data, nil 170 } 171 172 // Request more data. 173 return 0, nil, nil 174 }) 175 176 scannerCtx, shutdownFn := context.WithCancel(context.Background()) 177 msgChan := make(chan types.Message) 178 errChan := make(chan error) 179 180 go func() { 181 defer func() { 182 shutdownFn() 183 close(errChan) 184 close(msgChan) 185 }() 186 187 msg := message.New(nil) 188 for scanner.Scan() { 189 partBytes := make([]byte, len(scanner.Bytes())) 190 partSize := copy(partBytes, scanner.Bytes()) 191 192 if partSize > 0 { 193 msg.Append(message.NewPart(partBytes)) 194 if !r.multipart { 195 select { 196 case msgChan <- msg: 197 case <-scannerCtx.Done(): 198 return 199 } 200 msg = message.New(nil) 201 } 202 } else if r.multipart && msg.Len() > 0 { 203 // Empty line means we're finished reading parts for this 204 // message. 205 select { 206 case msgChan <- msg: 207 case <-scannerCtx.Done(): 208 return 209 } 210 msg = message.New(nil) 211 } 212 } 213 if msg.Len() > 0 { 214 select { 215 case msgChan <- msg: 216 case <-scannerCtx.Done(): 217 return 218 } 219 } 220 if serr := scanner.Err(); serr != nil { 221 select { 222 case errChan <- serr: 223 case <-scannerCtx.Done(): 224 return 225 } 226 } 227 }() 228 229 r.handle = handle 230 r.msgChan = msgChan 231 r.errChan = errChan 232 r.shutdownFn = shutdownFn 233 return nil 234 } 235 236 // ReadWithContext attempts to read a new line from the io.Reader. 237 func (r *Lines) ReadWithContext(ctx context.Context) (types.Message, AsyncAckFn, error) { 238 r.mut.Lock() 239 msgChan := r.msgChan 240 errChan := r.errChan 241 r.mut.Unlock() 242 243 select { 244 case msg, open := <-msgChan: 245 if !open { 246 return nil, nil, types.ErrNotConnected 247 } 248 return msg, noopAsyncAckFn, nil 249 case err, open := <-errChan: 250 if !open { 251 return nil, nil, types.ErrNotConnected 252 } 253 return nil, nil, err 254 case <-ctx.Done(): 255 } 256 return nil, nil, types.ErrTimeout 257 } 258 259 // Read attempts to read a new line from the io.Reader. 260 func (r *Lines) Read() (types.Message, error) { 261 r.mut.Lock() 262 msgChan := r.msgChan 263 errChan := r.errChan 264 r.mut.Unlock() 265 266 select { 267 case msg, open := <-msgChan: 268 if !open { 269 return nil, types.ErrNotConnected 270 } 271 return msg, nil 272 case err, open := <-errChan: 273 if !open { 274 return nil, types.ErrNotConnected 275 } 276 return nil, err 277 } 278 } 279 280 // Acknowledge confirms whether or not our unacknowledged messages have been 281 // successfully propagated or not. 282 func (r *Lines) Acknowledge(err error) error { 283 return nil 284 } 285 286 // CloseAsync shuts down the reader input and stops processing requests. 287 func (r *Lines) CloseAsync() { 288 go func() { 289 r.mut.Lock() 290 r.onClose(context.Background()) 291 r.closeHandle() 292 r.mut.Unlock() 293 }() 294 } 295 296 // WaitForClose blocks until the reader input has closed down. 297 func (r *Lines) WaitForClose(timeout time.Duration) error { 298 return nil 299 } 300 301 //------------------------------------------------------------------------------