github.com/Jeffail/benthos/v3@v3.65.0/internal/codec/reader.go (about) 1 package codec 2 3 import ( 4 "archive/tar" 5 "bufio" 6 "bytes" 7 "compress/gzip" 8 "context" 9 "encoding/csv" 10 "errors" 11 "fmt" 12 "io" 13 "path/filepath" 14 "regexp" 15 "strconv" 16 "strings" 17 "sync" 18 19 "github.com/Jeffail/benthos/v3/internal/docs" 20 "github.com/Jeffail/benthos/v3/lib/message" 21 "github.com/Jeffail/benthos/v3/lib/types" 22 ) 23 24 // ReaderDocs is a static field documentation for input codecs. 25 var ReaderDocs = docs.FieldCommon( 26 "codec", "The way in which the bytes of a data source should be converted into discrete messages, codecs are useful for specifying how large files or contiunous streams of data might be processed in small chunks rather than loading it all in memory. It's possible to consume lines using a custom delimiter with the `delim:x` codec, where x is the character sequence custom delimiter. Codecs can be chained with `/`, for example a gzip compressed CSV file can be consumed with the codec `gzip/csv`.", "lines", "delim:\t", "delim:foobar", "gzip/csv", 27 ).HasAnnotatedOptions( 28 "auto", "EXPERIMENTAL: Attempts to derive a codec for each file based on information such as the extension. For example, a .tar.gz file would be consumed with the `gzip/tar` codec. Defaults to all-bytes.", 29 "all-bytes", "Consume the entire file as a single binary message.", 30 "chunker:x", "Consume the file in chunks of a given number of bytes.", 31 "csv", "Consume structured rows as comma separated values, the first row must be a header row.", 32 "csv:x", "Consume structured rows as values separated by a custom delimiter, the first row must be a header row. The custom delimiter must be a single character, e.g. the codec `\"csv:\\t\"` would consume a tab delimited file.", 33 "delim:x", "Consume the file in segments divided by a custom delimiter.", 34 "gzip", "Decompress a gzip file, this codec should precede another codec, e.g. `gzip/all-bytes`, `gzip/tar`, `gzip/csv`, etc.", 35 "lines", "Consume the file in segments divided by linebreaks.", 36 "multipart", "Consumes the output of another codec and batches messages together. A batch ends when an empty message is consumed. For example, the codec `lines/multipart` could be used to consume multipart messages where an empty line indicates the end of each batch.", 37 "regex:(?m)^\\d\\d:\\d\\d:\\d\\d", "Consume the file in segments divided by regular expression.", 38 "tar", "Parse the file as a tar archive, and consume each file of the archive as a message.", 39 ) 40 41 //------------------------------------------------------------------------------ 42 43 // ReaderConfig is a general configuration struct that covers all reader codecs. 44 type ReaderConfig struct { 45 MaxScanTokenSize int 46 } 47 48 // NewReaderConfig creates a reader configuration with default values. 49 func NewReaderConfig() ReaderConfig { 50 return ReaderConfig{ 51 MaxScanTokenSize: bufio.MaxScanTokenSize, 52 } 53 } 54 55 //------------------------------------------------------------------------------ 56 57 // ReaderAckFn is a function provided to a reader codec that it should call once 58 // the underlying io.ReadCloser is fully consumed. 59 type ReaderAckFn func(context.Context, error) error 60 61 func ackOnce(fn ReaderAckFn) ReaderAckFn { 62 var once sync.Once 63 return func(ctx context.Context, err error) error { 64 var ackErr error 65 once.Do(func() { 66 ackErr = fn(ctx, err) 67 }) 68 return ackErr 69 } 70 } 71 72 // Reader is a codec type that reads message parts from a source. 73 type Reader interface { 74 Next(context.Context) ([]types.Part, ReaderAckFn, error) 75 Close(context.Context) error 76 } 77 78 type ioReaderConstructor func(string, io.ReadCloser) (io.ReadCloser, error) 79 80 // ReaderConstructor creates a reader from a filename, an io.ReadCloser and an 81 // ack func which is called by the reader once the io.ReadCloser is finished 82 // with. The filename can be empty and is usually ignored, but might be 83 // necessary for certain codecs. 84 type ReaderConstructor func(string, io.ReadCloser, ReaderAckFn) (Reader, error) 85 86 // readerReaderConstructor is a private constructor for readers that _must_ 87 // consume from other readers. 88 type readerReaderConstructor func(string, Reader) (Reader, error) 89 90 func chainIOCtors(first, second ioReaderConstructor) ioReaderConstructor { 91 return func(s string, rc io.ReadCloser) (io.ReadCloser, error) { 92 r1, err := first(s, rc) 93 if err != nil { 94 return nil, err 95 } 96 r2, err := second(s, r1) 97 if err != nil { 98 r1.Close() 99 return nil, err 100 } 101 return r2, nil 102 } 103 } 104 105 func chainIOIntoPartCtor(first ioReaderConstructor, second ReaderConstructor) ReaderConstructor { 106 return func(s string, rc io.ReadCloser, aFn ReaderAckFn) (Reader, error) { 107 r1, err := first(s, rc) 108 if err != nil { 109 return nil, err 110 } 111 r2, err := second(s, r1, aFn) 112 if err != nil { 113 r1.Close() 114 return nil, err 115 } 116 return r2, nil 117 } 118 } 119 120 func chainPartIntoReaderCtor(first ReaderConstructor, second readerReaderConstructor) ReaderConstructor { 121 return func(s string, rc io.ReadCloser, aFn ReaderAckFn) (Reader, error) { 122 r1, err := first(s, rc, aFn) 123 if err != nil { 124 return nil, err 125 } 126 r2, err := second(s, r1) 127 if err != nil { 128 r1.Close(context.Background()) 129 return nil, err 130 } 131 return r2, nil 132 } 133 } 134 135 func chainedReader(codec string, conf ReaderConfig) (ReaderConstructor, error) { 136 codecs := strings.Split(codec, "/") 137 138 var ioCtor ioReaderConstructor 139 var partCtor ReaderConstructor 140 141 for i, codec := range codecs { 142 if tmpIOCtor, ok := ioReader(codec, conf); ok { 143 if partCtor != nil { 144 return nil, fmt.Errorf("unable to follow codec '%v' with '%v'", codecs[i-1], codec) 145 } 146 if ioCtor != nil { 147 ioCtor = chainIOCtors(ioCtor, tmpIOCtor) 148 } else { 149 ioCtor = tmpIOCtor 150 } 151 continue 152 } 153 tmpPartCtor, ok, err := partReader(codec, conf) 154 if err != nil { 155 return nil, err 156 } 157 if ok { 158 if partCtor != nil { 159 return nil, fmt.Errorf("unable to follow codec '%v' with '%v'", codecs[i-1], codec) 160 } 161 if ioCtor != nil { 162 tmpPartCtor = chainIOIntoPartCtor(ioCtor, tmpPartCtor) 163 ioCtor = nil 164 } 165 partCtor = tmpPartCtor 166 continue 167 } 168 tmpReaderCtor, ok := readerReader(codec, conf) 169 if !ok { 170 return nil, fmt.Errorf("codec was not recognised: %v", codec) 171 } 172 if partCtor == nil { 173 return nil, fmt.Errorf("unable to codec '%v' must be preceded by a structured codec", codec) 174 } 175 partCtor = chainPartIntoReaderCtor(partCtor, tmpReaderCtor) 176 } 177 if partCtor == nil { 178 return nil, fmt.Errorf("codec was not recognised: %v", codecs) 179 } 180 return partCtor, nil 181 } 182 183 func ioReader(codec string, conf ReaderConfig) (ioReaderConstructor, bool) { 184 if codec == "gzip" { 185 return func(_ string, r io.ReadCloser) (io.ReadCloser, error) { 186 g, err := gzip.NewReader(r) 187 if err != nil { 188 r.Close() 189 return nil, err 190 } 191 return g, nil 192 }, true 193 } 194 return nil, false 195 } 196 197 func readerReader(codec string, conf ReaderConfig) (readerReaderConstructor, bool) { 198 if codec == "multipart" { 199 return func(_ string, r Reader) (Reader, error) { 200 return newMultipartReader(r) 201 }, true 202 } 203 return nil, false 204 } 205 206 func partReader(codec string, conf ReaderConfig) (ReaderConstructor, bool, error) { 207 switch codec { 208 case "all-bytes": 209 return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) { 210 return &allBytesReader{r, fn, false}, nil 211 }, true, nil 212 case "lines": 213 return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) { 214 return newLinesReader(conf, r, fn) 215 }, true, nil 216 case "csv": 217 return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) { 218 return newCSVReader(r, fn, nil) 219 }, true, nil 220 case "tar": 221 return newTarReader, true, nil 222 } 223 if strings.HasPrefix(codec, "delim:") { 224 by := strings.TrimPrefix(codec, "delim:") 225 if by == "" { 226 return nil, false, errors.New("custom delimiter codec requires a non-empty delimiter") 227 } 228 return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) { 229 return newCustomDelimReader(conf, r, by, fn) 230 }, true, nil 231 } 232 if strings.HasPrefix(codec, "csv:") { 233 by := strings.TrimPrefix(codec, "csv:") 234 if by == "" { 235 return nil, false, errors.New("csv codec requires a non-empty delimiter") 236 } 237 byRunes := []rune(by) 238 if len(byRunes) != 1 { 239 return nil, false, errors.New("csv codec requires a single character delimiter") 240 } 241 byRune := byRunes[0] 242 return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) { 243 return newCSVReader(r, fn, &byRune) 244 }, true, nil 245 } 246 if strings.HasPrefix(codec, "chunker:") { 247 chunkSize, err := strconv.ParseInt(strings.TrimPrefix(codec, "chunker:"), 10, 64) 248 if err != nil { 249 return nil, false, fmt.Errorf("invalid chunk size for chunker codec: %w", err) 250 } 251 return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) { 252 return newChunkerReader(conf, r, chunkSize, fn) 253 }, true, nil 254 } 255 if strings.HasPrefix(codec, "regex:") { 256 by := strings.TrimPrefix(codec, "regex:") 257 if by == "" { 258 return nil, false, errors.New("regex codec requires a non-empty delimiter") 259 } 260 return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) { 261 return newRexExpSplitReader(conf, r, by, fn) 262 }, true, nil 263 } 264 return nil, false, nil 265 } 266 267 func convertDeprecatedCodec(codec string) string { 268 switch codec { 269 case "csv-gzip": 270 return "gzip/csv" 271 case "tar-gzip": 272 return "gzip/tar" 273 } 274 return codec 275 } 276 277 // GetReader returns a constructor that creates reader codecs. 278 func GetReader(codec string, conf ReaderConfig) (ReaderConstructor, error) { 279 codec = convertDeprecatedCodec(codec) 280 if codec == "auto" { 281 return autoCodec(conf), nil 282 } 283 return chainedReader(codec, conf) 284 } 285 286 func autoCodec(conf ReaderConfig) ReaderConstructor { 287 return func(path string, r io.ReadCloser, fn ReaderAckFn) (Reader, error) { 288 codec := "all-bytes" 289 switch filepath.Ext(path) { 290 case ".csv": 291 codec = "csv" 292 case ".csv.gz", ".csv.gzip": 293 codec = "gzip/csv" 294 case ".tar": 295 codec = "tar" 296 case ".tgz": 297 codec = "gzip/tar" 298 } 299 if strings.HasSuffix(path, ".tar.gzip") { 300 codec = "gzip/tar" 301 } else if strings.HasSuffix(path, ".tar.gz") { 302 codec = "gzip/tar" 303 } 304 305 ctor, err := GetReader(codec, conf) 306 if err != nil { 307 return nil, fmt.Errorf("failed to infer codec: %v", err) 308 } 309 return ctor(path, r, fn) 310 } 311 } 312 313 //------------------------------------------------------------------------------ 314 315 type allBytesReader struct { 316 i io.ReadCloser 317 ack ReaderAckFn 318 consumed bool 319 } 320 321 func (a *allBytesReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) { 322 if a.consumed { 323 return nil, nil, io.EOF 324 } 325 a.consumed = true 326 b, err := io.ReadAll(a.i) 327 if err != nil { 328 _ = a.ack(ctx, err) 329 return nil, nil, err 330 } 331 p := message.NewPart(b) 332 return []types.Part{p}, a.ack, nil 333 } 334 335 func (a *allBytesReader) Close(ctx context.Context) error { 336 if !a.consumed { 337 _ = a.ack(ctx, errors.New("service shutting down")) 338 } 339 return a.i.Close() 340 } 341 342 //------------------------------------------------------------------------------ 343 344 type linesReader struct { 345 buf *bufio.Scanner 346 r io.ReadCloser 347 sourceAck ReaderAckFn 348 349 mut sync.Mutex 350 finished bool 351 pending int32 352 } 353 354 func newLinesReader(conf ReaderConfig, r io.ReadCloser, ackFn ReaderAckFn) (Reader, error) { 355 scanner := bufio.NewScanner(r) 356 if conf.MaxScanTokenSize != bufio.MaxScanTokenSize { 357 scanner.Buffer([]byte{}, conf.MaxScanTokenSize) 358 } 359 return &linesReader{ 360 buf: scanner, 361 r: r, 362 sourceAck: ackOnce(ackFn), 363 }, nil 364 } 365 366 func (a *linesReader) ack(ctx context.Context, err error) error { 367 a.mut.Lock() 368 a.pending-- 369 doAck := a.pending == 0 && a.finished 370 a.mut.Unlock() 371 372 if err != nil { 373 return a.sourceAck(ctx, err) 374 } 375 if doAck { 376 return a.sourceAck(ctx, nil) 377 } 378 return nil 379 } 380 381 func (a *linesReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) { 382 scanned := a.buf.Scan() 383 a.mut.Lock() 384 defer a.mut.Unlock() 385 386 if scanned { 387 a.pending++ 388 bytesCopy := make([]byte, len(a.buf.Bytes())) 389 copy(bytesCopy, a.buf.Bytes()) 390 return []types.Part{message.NewPart(bytesCopy)}, a.ack, nil 391 } 392 393 err := a.buf.Err() 394 if err == nil { 395 err = io.EOF 396 a.finished = true 397 } else { 398 _ = a.sourceAck(ctx, err) 399 } 400 return nil, nil, err 401 } 402 403 func (a *linesReader) Close(ctx context.Context) error { 404 a.mut.Lock() 405 defer a.mut.Unlock() 406 407 if !a.finished { 408 _ = a.sourceAck(ctx, errors.New("service shutting down")) 409 } 410 if a.pending == 0 { 411 _ = a.sourceAck(ctx, nil) 412 } 413 return a.r.Close() 414 } 415 416 //------------------------------------------------------------------------------ 417 418 type csvReader struct { 419 scanner *csv.Reader 420 r io.ReadCloser 421 sourceAck ReaderAckFn 422 423 headers []string 424 425 mut sync.Mutex 426 finished bool 427 pending int32 428 } 429 430 func newCSVReader(r io.ReadCloser, ackFn ReaderAckFn, customComma *rune) (Reader, error) { 431 scanner := csv.NewReader(r) 432 scanner.ReuseRecord = true 433 if customComma != nil { 434 scanner.Comma = *customComma 435 } 436 437 headers, err := scanner.Read() 438 if err != nil { 439 return nil, err 440 } 441 442 headersCopy := make([]string, len(headers)) 443 copy(headersCopy, headers) 444 445 return &csvReader{ 446 scanner: scanner, 447 r: r, 448 sourceAck: ackOnce(ackFn), 449 headers: headersCopy, 450 }, nil 451 } 452 453 func (a *csvReader) ack(ctx context.Context, err error) error { 454 a.mut.Lock() 455 a.pending-- 456 doAck := a.pending == 0 && a.finished 457 a.mut.Unlock() 458 459 if err != nil { 460 return a.sourceAck(ctx, err) 461 } 462 if doAck { 463 return a.sourceAck(ctx, nil) 464 } 465 return nil 466 } 467 468 func (a *csvReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) { 469 records, err := a.scanner.Read() 470 471 a.mut.Lock() 472 defer a.mut.Unlock() 473 474 if err != nil { 475 if err == io.EOF { 476 a.finished = true 477 } else { 478 _ = a.sourceAck(ctx, err) 479 } 480 return nil, nil, err 481 } 482 483 a.pending++ 484 485 obj := make(map[string]interface{}, len(records)) 486 for i, r := range records { 487 obj[a.headers[i]] = r 488 } 489 490 part := message.NewPart(nil) 491 part.SetJSON(obj) 492 493 return []types.Part{part}, a.ack, nil 494 } 495 496 func (a *csvReader) Close(ctx context.Context) error { 497 a.mut.Lock() 498 defer a.mut.Unlock() 499 500 if !a.finished { 501 _ = a.sourceAck(ctx, errors.New("service shutting down")) 502 } 503 if a.pending == 0 { 504 _ = a.sourceAck(ctx, nil) 505 } 506 return a.r.Close() 507 } 508 509 //------------------------------------------------------------------------------ 510 511 type customDelimReader struct { 512 buf *bufio.Scanner 513 r io.ReadCloser 514 sourceAck ReaderAckFn 515 516 mut sync.Mutex 517 finished bool 518 pending int32 519 } 520 521 func newCustomDelimReader(conf ReaderConfig, r io.ReadCloser, delim string, ackFn ReaderAckFn) (Reader, error) { 522 scanner := bufio.NewScanner(r) 523 if conf.MaxScanTokenSize != bufio.MaxScanTokenSize { 524 scanner.Buffer([]byte{}, conf.MaxScanTokenSize) 525 } 526 527 delimBytes := []byte(delim) 528 529 scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { 530 if atEOF && len(data) == 0 { 531 return 0, nil, nil 532 } 533 534 if i := bytes.Index(data, delimBytes); i >= 0 { 535 // We have a full terminated line. 536 return i + len(delimBytes), data[0:i], nil 537 } 538 539 // If we're at EOF, we have a final, non-terminated line. Return it. 540 if atEOF { 541 return len(data), data, nil 542 } 543 544 // Request more data. 545 return 0, nil, nil 546 }) 547 548 return &customDelimReader{ 549 buf: scanner, 550 r: r, 551 sourceAck: ackOnce(ackFn), 552 }, nil 553 } 554 555 func (a *customDelimReader) ack(ctx context.Context, err error) error { 556 a.mut.Lock() 557 a.pending-- 558 doAck := a.pending == 0 && a.finished 559 a.mut.Unlock() 560 561 if err != nil { 562 return a.sourceAck(ctx, err) 563 } 564 if doAck { 565 return a.sourceAck(ctx, nil) 566 } 567 return nil 568 } 569 570 func (a *customDelimReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) { 571 scanned := a.buf.Scan() 572 573 a.mut.Lock() 574 defer a.mut.Unlock() 575 576 if scanned { 577 a.pending++ 578 579 bytesCopy := make([]byte, len(a.buf.Bytes())) 580 copy(bytesCopy, a.buf.Bytes()) 581 return []types.Part{message.NewPart(bytesCopy)}, a.ack, nil 582 } 583 err := a.buf.Err() 584 if err == nil { 585 err = io.EOF 586 a.finished = true 587 } else { 588 _ = a.sourceAck(ctx, err) 589 } 590 return nil, nil, err 591 } 592 593 func (a *customDelimReader) Close(ctx context.Context) error { 594 a.mut.Lock() 595 defer a.mut.Unlock() 596 597 if !a.finished { 598 _ = a.sourceAck(ctx, errors.New("service shutting down")) 599 } 600 if a.pending == 0 { 601 _ = a.sourceAck(ctx, nil) 602 } 603 return a.r.Close() 604 } 605 606 //------------------------------------------------------------------------------ 607 608 type chunkerReader struct { 609 chunkSize int64 610 buf *bytes.Buffer 611 r io.ReadCloser 612 sourceAck ReaderAckFn 613 614 mut sync.Mutex 615 finished bool 616 pending int32 617 } 618 619 func newChunkerReader(conf ReaderConfig, r io.ReadCloser, chunkSize int64, ackFn ReaderAckFn) (Reader, error) { 620 return &chunkerReader{ 621 chunkSize: chunkSize, 622 buf: bytes.NewBuffer(make([]byte, 0, chunkSize)), 623 r: r, 624 sourceAck: ackOnce(ackFn), 625 }, nil 626 } 627 628 func (a *chunkerReader) ack(ctx context.Context, err error) error { 629 a.mut.Lock() 630 a.pending-- 631 doAck := a.pending == 0 && a.finished 632 a.mut.Unlock() 633 634 if err != nil { 635 return a.sourceAck(ctx, err) 636 } 637 if doAck { 638 return a.sourceAck(ctx, nil) 639 } 640 return nil 641 } 642 643 func (a *chunkerReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) { 644 if a.finished { 645 return nil, nil, io.EOF 646 } 647 648 _, err := io.CopyN(a.buf, a.r, a.chunkSize) 649 650 a.mut.Lock() 651 defer a.mut.Unlock() 652 653 if err != nil { 654 if err == io.EOF { 655 a.finished = true 656 } else { 657 _ = a.sourceAck(ctx, err) 658 return nil, nil, err 659 } 660 } 661 662 if a.buf.Len() > 0 { 663 a.pending++ 664 665 bytesCopy := make([]byte, a.buf.Len()) 666 copy(bytesCopy, a.buf.Bytes()) 667 668 a.buf.Reset() 669 return []types.Part{message.NewPart(bytesCopy)}, a.ack, nil 670 } 671 672 return nil, nil, err 673 } 674 675 func (a *chunkerReader) Close(ctx context.Context) error { 676 a.mut.Lock() 677 defer a.mut.Unlock() 678 679 if !a.finished { 680 _ = a.sourceAck(ctx, errors.New("service shutting down")) 681 } 682 if a.pending == 0 { 683 _ = a.sourceAck(ctx, nil) 684 } 685 return a.r.Close() 686 } 687 688 //------------------------------------------------------------------------------ 689 690 type tarReader struct { 691 buf *tar.Reader 692 r io.ReadCloser 693 sourceAck ReaderAckFn 694 695 mut sync.Mutex 696 finished bool 697 pending int32 698 } 699 700 func newTarReader(path string, r io.ReadCloser, ackFn ReaderAckFn) (Reader, error) { 701 return &tarReader{ 702 buf: tar.NewReader(r), 703 r: r, 704 sourceAck: ackOnce(ackFn), 705 }, nil 706 } 707 708 func (a *tarReader) ack(ctx context.Context, err error) error { 709 a.mut.Lock() 710 a.pending-- 711 doAck := a.pending == 0 && a.finished 712 a.mut.Unlock() 713 714 if err != nil { 715 return a.sourceAck(ctx, err) 716 } 717 if doAck { 718 return a.sourceAck(ctx, nil) 719 } 720 return nil 721 } 722 723 func (a *tarReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) { 724 _, err := a.buf.Next() 725 726 a.mut.Lock() 727 defer a.mut.Unlock() 728 729 if err == nil { 730 fileBuf := bytes.Buffer{} 731 if _, err = fileBuf.ReadFrom(a.buf); err != nil { 732 _ = a.sourceAck(ctx, err) 733 return nil, nil, err 734 } 735 a.pending++ 736 return []types.Part{message.NewPart(fileBuf.Bytes())}, a.ack, nil 737 } 738 739 if err == io.EOF { 740 a.finished = true 741 } else { 742 _ = a.sourceAck(ctx, err) 743 } 744 return nil, nil, err 745 } 746 747 func (a *tarReader) Close(ctx context.Context) error { 748 a.mut.Lock() 749 defer a.mut.Unlock() 750 751 if !a.finished { 752 _ = a.sourceAck(ctx, errors.New("service shutting down")) 753 } 754 if a.pending == 0 { 755 _ = a.sourceAck(ctx, nil) 756 } 757 return a.r.Close() 758 } 759 760 //------------------------------------------------------------------------------ 761 762 type multipartReader struct { 763 child Reader 764 } 765 766 func newMultipartReader(r Reader) (Reader, error) { 767 return &multipartReader{ 768 child: r, 769 }, nil 770 } 771 772 func isEmpty(p []types.Part) bool { 773 if len(p) == 0 { 774 return true 775 } 776 if len(p) == 1 && len(p[0].Get()) == 0 { 777 return true 778 } 779 return false 780 } 781 782 func (m *multipartReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) { 783 var parts []types.Part 784 var acks []ReaderAckFn 785 786 ackFn := func(ctx context.Context, err error) error { 787 for _, fn := range acks { 788 _ = fn(ctx, err) 789 } 790 return nil 791 } 792 793 for { 794 newParts, ack, err := m.child.Next(ctx) 795 if err != nil { 796 if errors.Is(err, io.EOF) && len(parts) > 0 { 797 return parts, ackFn, nil 798 } 799 return nil, nil, err 800 } 801 if isEmpty(newParts) { 802 _ = ack(ctx, nil) 803 if len(parts) > 0 { 804 // Empty message signals batch end. 805 return parts, ackFn, nil 806 } 807 } else { 808 parts = append(parts, newParts...) 809 acks = append(acks, ack) 810 } 811 } 812 } 813 814 func (m *multipartReader) Close(ctx context.Context) error { 815 return m.child.Close(ctx) 816 } 817 818 //------------------------------------------------------------------------------ 819 820 type regexReader struct { 821 buf *bufio.Scanner 822 r io.ReadCloser 823 sourceAck ReaderAckFn 824 825 mut sync.Mutex 826 finished bool 827 pending int32 828 } 829 830 func newRexExpSplitReader(conf ReaderConfig, r io.ReadCloser, regex string, ackFn ReaderAckFn) (Reader, error) { 831 scanner := bufio.NewScanner(r) 832 if conf.MaxScanTokenSize != bufio.MaxScanTokenSize { 833 scanner.Buffer([]byte{}, conf.MaxScanTokenSize) 834 } 835 836 compiled, err := regexp.Compile(regex) 837 838 if err != nil { 839 return nil, err 840 } 841 842 scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { 843 if atEOF && len(data) == 0 { 844 return 0, nil, nil 845 } 846 847 loc := compiled.FindAllIndex(data, 2) 848 if loc == nil { 849 if atEOF { 850 return len(data), data, nil 851 } 852 return 0, nil, nil 853 } 854 855 if len(loc) == 1 { 856 if atEOF { 857 if loc[0][0] == 0 { 858 return len(data), data, nil 859 } 860 return loc[0][0], data[0:loc[0][0]], nil 861 } 862 return 0, nil, nil 863 } 864 if loc[0][0] == 0 { 865 return loc[1][0], data[0:loc[1][0]], nil 866 } 867 return loc[0][0], data[0:loc[0][0]], nil 868 }) 869 870 return ®exReader{ 871 buf: scanner, 872 r: r, 873 sourceAck: ackOnce(ackFn), 874 }, nil 875 } 876 877 func (a *regexReader) ack(ctx context.Context, err error) error { 878 a.mut.Lock() 879 a.pending-- 880 doAck := a.pending == 0 && a.finished 881 a.mut.Unlock() 882 883 if err != nil { 884 return a.sourceAck(ctx, err) 885 } 886 if doAck { 887 return a.sourceAck(ctx, nil) 888 } 889 return nil 890 } 891 892 func (a *regexReader) Next(ctx context.Context) ([]types.Part, ReaderAckFn, error) { 893 scanned := a.buf.Scan() 894 895 a.mut.Lock() 896 defer a.mut.Unlock() 897 898 if scanned { 899 a.pending++ 900 901 bytesCopy := make([]byte, len(a.buf.Bytes())) 902 copy(bytesCopy, a.buf.Bytes()) 903 return []types.Part{message.NewPart(bytesCopy)}, a.ack, nil 904 } 905 err := a.buf.Err() 906 if err == nil { 907 err = io.EOF 908 a.finished = true 909 } else { 910 _ = a.sourceAck(ctx, err) 911 } 912 return nil, nil, err 913 } 914 915 func (a *regexReader) Close(ctx context.Context) error { 916 a.mut.Lock() 917 defer a.mut.Unlock() 918 919 if !a.finished { 920 _ = a.sourceAck(ctx, errors.New("service shutting down")) 921 } 922 if a.pending == 0 { 923 _ = a.sourceAck(ctx, nil) 924 } 925 return a.r.Close() 926 }