github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/batch.go (about)

     1  package processor
     2  
     3  import (
     4  	"fmt"
     5  	"sync"
     6  	"time"
     7  
     8  	"github.com/Jeffail/benthos/v3/internal/docs"
     9  	"github.com/Jeffail/benthos/v3/internal/interop"
    10  	"github.com/Jeffail/benthos/v3/lib/condition"
    11  	"github.com/Jeffail/benthos/v3/lib/log"
    12  	"github.com/Jeffail/benthos/v3/lib/message"
    13  	"github.com/Jeffail/benthos/v3/lib/metrics"
    14  	"github.com/Jeffail/benthos/v3/lib/response"
    15  	"github.com/Jeffail/benthos/v3/lib/types"
    16  )
    17  
    18  //------------------------------------------------------------------------------
    19  
    20  func init() {
    21  	Constructors[TypeBatch] = TypeSpec{
    22  		constructor: NewBatch,
    23  		Description: `
    24  DEPRECATED: This processor is no longer supported and has been replaced with
    25  improved batching mechanisms. For more information about batching in Benthos
    26  please check out [this document](/docs/configuration/batching).
    27  
    28  This processor is scheduled to be removed in Benthos V4`,
    29  		Status: docs.StatusDeprecated,
    30  		config: docs.FieldComponent().WithChildren(
    31  			docs.FieldDeprecated("byte_size"),
    32  			docs.FieldDeprecated("count"),
    33  			docs.FieldDeprecated("condition").HasType(docs.FieldTypeCondition),
    34  			docs.FieldDeprecated("period"),
    35  		),
    36  	}
    37  }
    38  
    39  //------------------------------------------------------------------------------
    40  
    41  // BatchConfig contains configuration fields for the Batch processor.
    42  type BatchConfig struct {
    43  	ByteSize  int              `json:"byte_size" yaml:"byte_size"`
    44  	Count     int              `json:"count" yaml:"count"`
    45  	Condition condition.Config `json:"condition" yaml:"condition"`
    46  	Period    string           `json:"period" yaml:"period"`
    47  }
    48  
    49  // NewBatchConfig returns a BatchConfig with default values.
    50  func NewBatchConfig() BatchConfig {
    51  	cond := condition.NewConfig()
    52  	cond.Type = "static"
    53  	cond.Static = false
    54  	return BatchConfig{
    55  		ByteSize:  0,
    56  		Count:     0,
    57  		Condition: cond,
    58  		Period:    "",
    59  	}
    60  }
    61  
    62  //------------------------------------------------------------------------------
    63  
    64  // Batch is a processor that combines messages into a batch until a size limit
    65  // or other condition is reached, at which point the batch is sent out. When a
    66  // message is combined without yet producing a batch a NoAck response is
    67  // returned, which is interpretted as source types as an instruction to send
    68  // another message through but hold off on acknowledging this one.
    69  //
    70  // Eventually, when the batch reaches its target size, the batch is sent through
    71  // the pipeline as a single message and an acknowledgement for that message
    72  // determines whether the whole batch of messages are acknowledged.
    73  //
    74  // TODO: V4 Remove me.
    75  type Batch struct {
    76  	byteSize  int
    77  	count     int
    78  	period    time.Duration
    79  	cond      condition.Type
    80  	sizeTally int
    81  	parts     []types.Part
    82  
    83  	triggered bool
    84  	lastBatch time.Time
    85  	mut       sync.Mutex
    86  
    87  	mSizeBatch   metrics.StatCounter
    88  	mCountBatch  metrics.StatCounter
    89  	mPeriodBatch metrics.StatCounter
    90  	mCondBatch   metrics.StatCounter
    91  
    92  	log   log.Modular
    93  	stats metrics.Type
    94  
    95  	mCount     metrics.StatCounter
    96  	mSent      metrics.StatCounter
    97  	mBatchSent metrics.StatCounter
    98  	mDropped   metrics.StatCounter
    99  }
   100  
   101  // NewBatch returns a Batch processor.
   102  func NewBatch(
   103  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   104  ) (Type, error) {
   105  	log.Warnln("The batch processor is deprecated and is scheduled for removal in Benthos V4. For more information about batching in Benthos check out https://benthos.dev/docs/configuration/batching")
   106  
   107  	cMgr, cLog, cStats := interop.LabelChild("condition", mgr, log, stats)
   108  	cond, err := condition.New(conf.Batch.Condition, cMgr, cLog, cStats)
   109  	if err != nil {
   110  		return nil, fmt.Errorf("failed to create condition: %v", err)
   111  	}
   112  	var period time.Duration
   113  	if len(conf.Batch.Period) > 0 {
   114  		if period, err = time.ParseDuration(conf.Batch.Period); err != nil {
   115  			return nil, fmt.Errorf("failed to parse duration string: %v", err)
   116  		}
   117  	}
   118  	return &Batch{
   119  		log:   log,
   120  		stats: stats,
   121  
   122  		byteSize: conf.Batch.ByteSize,
   123  		count:    conf.Batch.Count,
   124  		period:   period,
   125  		cond:     cond,
   126  
   127  		lastBatch: time.Now(),
   128  
   129  		mSizeBatch:   stats.GetCounter("on_size"),
   130  		mCountBatch:  stats.GetCounter("on_count"),
   131  		mPeriodBatch: stats.GetCounter("on_period"),
   132  		mCondBatch:   stats.GetCounter("on_condition"),
   133  
   134  		mCount:     stats.GetCounter("count"),
   135  		mSent:      stats.GetCounter("sent"),
   136  		mBatchSent: stats.GetCounter("batch.sent"),
   137  		mDropped:   stats.GetCounter("dropped"),
   138  	}, nil
   139  }
   140  
   141  //------------------------------------------------------------------------------
   142  
   143  // ProcessMessage applies the processor to a message, either creating >0
   144  // resulting messages or a response to be sent back to the message source.
   145  func (b *Batch) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   146  	b.mCount.Incr(1)
   147  	b.mut.Lock()
   148  	defer b.mut.Unlock()
   149  
   150  	var batch bool
   151  
   152  	// Add new parts to the buffer.
   153  	msg.Iter(func(i int, p types.Part) error {
   154  		if b.add(p.Copy()) {
   155  			batch = true
   156  		}
   157  		return nil
   158  	})
   159  
   160  	// If we have reached our target count of parts in the buffer.
   161  	if batch {
   162  		if newMsg := b.flush(); newMsg != nil {
   163  			b.mSent.Incr(int64(newMsg.Len()))
   164  			b.mBatchSent.Incr(1)
   165  			return []types.Message{newMsg}, nil
   166  		}
   167  	}
   168  
   169  	b.log.Traceln("Added message to pending batch")
   170  	b.mDropped.Incr(1)
   171  	return nil, response.NewUnack()
   172  }
   173  
   174  // CloseAsync shuts down the processor and stops processing requests.
   175  func (b *Batch) CloseAsync() {
   176  }
   177  
   178  // WaitForClose blocks until the processor has closed down.
   179  func (b *Batch) WaitForClose(timeout time.Duration) error {
   180  	return nil
   181  }
   182  
   183  //------------------------------------------------------------------------------
   184  
   185  func (b *Batch) add(part types.Part) bool {
   186  	b.sizeTally += len(part.Get())
   187  	b.parts = append(b.parts, part)
   188  
   189  	if !b.triggered && b.count > 0 && len(b.parts) >= b.count {
   190  		b.triggered = true
   191  		b.mCountBatch.Incr(1)
   192  		b.log.Traceln("Batching based on count")
   193  	}
   194  	if !b.triggered && b.byteSize > 0 && b.sizeTally >= b.byteSize {
   195  		b.triggered = true
   196  		b.mSizeBatch.Incr(1)
   197  		b.log.Traceln("Batching based on byte_size")
   198  	}
   199  	tmpMsg := message.New(nil)
   200  	tmpMsg.Append(part)
   201  	if !b.triggered && b.cond.Check(tmpMsg) {
   202  		b.triggered = true
   203  		b.mCondBatch.Incr(1)
   204  		b.log.Traceln("Batching based on condition")
   205  	}
   206  
   207  	return b.triggered || (b.period > 0 && time.Since(b.lastBatch) > b.period)
   208  }
   209  
   210  func (b *Batch) flush() types.Message {
   211  	var newMsg types.Message
   212  	if len(b.parts) > 0 {
   213  		if !b.triggered && b.period > 0 && time.Since(b.lastBatch) > b.period {
   214  			b.mPeriodBatch.Incr(1)
   215  			b.log.Traceln("Batching based on period")
   216  		}
   217  		newMsg = message.New(nil)
   218  		newMsg.Append(b.parts...)
   219  	}
   220  	b.parts = nil
   221  	b.sizeTally = 0
   222  	b.lastBatch = time.Now()
   223  	b.triggered = false
   224  
   225  	return newMsg
   226  }
   227  
   228  //------------------------------------------------------------------------------