github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/split.go (about)

     1  package processor
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/Jeffail/benthos/v3/internal/docs"
     7  	"github.com/Jeffail/benthos/v3/lib/log"
     8  	"github.com/Jeffail/benthos/v3/lib/message"
     9  	"github.com/Jeffail/benthos/v3/lib/metrics"
    10  	"github.com/Jeffail/benthos/v3/lib/response"
    11  	"github.com/Jeffail/benthos/v3/lib/types"
    12  )
    13  
    14  //------------------------------------------------------------------------------
    15  
    16  func init() {
    17  	Constructors[TypeSplit] = TypeSpec{
    18  		constructor: NewSplit,
    19  		Categories: []Category{
    20  			CategoryUtility,
    21  		},
    22  		Summary: `
    23  Breaks message batches (synonymous with multiple part messages) into smaller batches. The size of the resulting batches are determined either by a discrete size or, if the field ` + "`byte_size`" + ` is non-zero, then by total size in bytes (which ever limit is reached first).`,
    24  		Description: `
    25  This processor is for breaking batches down into smaller ones. In order to break a single message out into multiple messages use the ` + "[`unarchive` processor](/docs/components/processors/unarchive)" + `.
    26  
    27  If there is a remainder of messages after splitting a batch the remainder is also sent as a single batch. For example, if your target size was 10, and the processor received a batch of 95 message parts, the result would be 9 batches of 10 messages followed by a batch of 5 messages.`,
    28  		UsesBatches: true,
    29  		FieldSpecs: docs.FieldSpecs{
    30  			docs.FieldCommon("size", "The target number of messages."),
    31  			docs.FieldCommon("byte_size", "An optional target of total message bytes."),
    32  		},
    33  	}
    34  }
    35  
    36  //------------------------------------------------------------------------------
    37  
    38  // SplitConfig is a configuration struct containing fields for the Split
    39  // processor, which breaks message batches down into batches of a smaller size.
    40  type SplitConfig struct {
    41  	Size     int `json:"size" yaml:"size"`
    42  	ByteSize int `json:"byte_size" yaml:"byte_size"`
    43  }
    44  
    45  // NewSplitConfig returns a SplitConfig with default values.
    46  func NewSplitConfig() SplitConfig {
    47  	return SplitConfig{
    48  		Size:     1,
    49  		ByteSize: 0,
    50  	}
    51  }
    52  
    53  //------------------------------------------------------------------------------
    54  
    55  // Split is a processor that splits messages into a message per part.
    56  type Split struct {
    57  	log   log.Modular
    58  	stats metrics.Type
    59  
    60  	size     int
    61  	byteSize int
    62  
    63  	mCount     metrics.StatCounter
    64  	mDropped   metrics.StatCounter
    65  	mSent      metrics.StatCounter
    66  	mBatchSent metrics.StatCounter
    67  }
    68  
    69  // NewSplit returns a Split processor.
    70  func NewSplit(
    71  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
    72  ) (Type, error) {
    73  	return &Split{
    74  		log:   log,
    75  		stats: stats,
    76  
    77  		size:     conf.Split.Size,
    78  		byteSize: conf.Split.ByteSize,
    79  
    80  		mCount:     stats.GetCounter("count"),
    81  		mDropped:   stats.GetCounter("dropped"),
    82  		mSent:      stats.GetCounter("sent"),
    83  		mBatchSent: stats.GetCounter("batch.sent"),
    84  	}, nil
    85  }
    86  
    87  //------------------------------------------------------------------------------
    88  
    89  // ProcessMessage applies the processor to a message, either creating >0
    90  // resulting messages or a response to be sent back to the message source.
    91  func (s *Split) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
    92  	s.mCount.Incr(1)
    93  
    94  	if msg.Len() == 0 {
    95  		s.mDropped.Incr(1)
    96  		return nil, response.NewAck()
    97  	}
    98  
    99  	msgs := []types.Message{}
   100  
   101  	nextMsg := message.New(nil)
   102  	byteSize := 0
   103  
   104  	msg.Iter(func(i int, p types.Part) error {
   105  		if (s.size > 0 && nextMsg.Len() >= s.size) ||
   106  			(s.byteSize > 0 && (byteSize+len(p.Get())) > s.byteSize) {
   107  			if nextMsg.Len() > 0 {
   108  				msgs = append(msgs, nextMsg)
   109  				nextMsg = message.New(nil)
   110  				byteSize = 0
   111  			} else {
   112  				s.log.Warnf("A single message exceeds the target batch byte size of '%v', actual size: '%v'", s.byteSize, len(p.Get()))
   113  			}
   114  		}
   115  		nextMsg.Append(p)
   116  		byteSize += len(p.Get())
   117  		return nil
   118  	})
   119  
   120  	if nextMsg.Len() > 0 {
   121  		msgs = append(msgs, nextMsg)
   122  	}
   123  
   124  	s.mBatchSent.Incr(int64(len(msgs)))
   125  	s.mSent.Incr(int64(msg.Len()))
   126  	return msgs, nil
   127  }
   128  
   129  // CloseAsync shuts down the processor and stops processing requests.
   130  func (s *Split) CloseAsync() {
   131  }
   132  
   133  // WaitForClose blocks until the processor has closed down.
   134  func (s *Split) WaitForClose(timeout time.Duration) error {
   135  	return nil
   136  }
   137  
   138  //------------------------------------------------------------------------------