github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/batch.go (about) 1 package processor 2 3 import ( 4 "fmt" 5 "sync" 6 "time" 7 8 "github.com/Jeffail/benthos/v3/internal/docs" 9 "github.com/Jeffail/benthos/v3/internal/interop" 10 "github.com/Jeffail/benthos/v3/lib/condition" 11 "github.com/Jeffail/benthos/v3/lib/log" 12 "github.com/Jeffail/benthos/v3/lib/message" 13 "github.com/Jeffail/benthos/v3/lib/metrics" 14 "github.com/Jeffail/benthos/v3/lib/response" 15 "github.com/Jeffail/benthos/v3/lib/types" 16 ) 17 18 //------------------------------------------------------------------------------ 19 20 func init() { 21 Constructors[TypeBatch] = TypeSpec{ 22 constructor: NewBatch, 23 Description: ` 24 DEPRECATED: This processor is no longer supported and has been replaced with 25 improved batching mechanisms. For more information about batching in Benthos 26 please check out [this document](/docs/configuration/batching). 27 28 This processor is scheduled to be removed in Benthos V4`, 29 Status: docs.StatusDeprecated, 30 config: docs.FieldComponent().WithChildren( 31 docs.FieldDeprecated("byte_size"), 32 docs.FieldDeprecated("count"), 33 docs.FieldDeprecated("condition").HasType(docs.FieldTypeCondition), 34 docs.FieldDeprecated("period"), 35 ), 36 } 37 } 38 39 //------------------------------------------------------------------------------ 40 41 // BatchConfig contains configuration fields for the Batch processor. 42 type BatchConfig struct { 43 ByteSize int `json:"byte_size" yaml:"byte_size"` 44 Count int `json:"count" yaml:"count"` 45 Condition condition.Config `json:"condition" yaml:"condition"` 46 Period string `json:"period" yaml:"period"` 47 } 48 49 // NewBatchConfig returns a BatchConfig with default values. 50 func NewBatchConfig() BatchConfig { 51 cond := condition.NewConfig() 52 cond.Type = "static" 53 cond.Static = false 54 return BatchConfig{ 55 ByteSize: 0, 56 Count: 0, 57 Condition: cond, 58 Period: "", 59 } 60 } 61 62 //------------------------------------------------------------------------------ 63 64 // Batch is a processor that combines messages into a batch until a size limit 65 // or other condition is reached, at which point the batch is sent out. When a 66 // message is combined without yet producing a batch a NoAck response is 67 // returned, which is interpretted as source types as an instruction to send 68 // another message through but hold off on acknowledging this one. 69 // 70 // Eventually, when the batch reaches its target size, the batch is sent through 71 // the pipeline as a single message and an acknowledgement for that message 72 // determines whether the whole batch of messages are acknowledged. 73 // 74 // TODO: V4 Remove me. 75 type Batch struct { 76 byteSize int 77 count int 78 period time.Duration 79 cond condition.Type 80 sizeTally int 81 parts []types.Part 82 83 triggered bool 84 lastBatch time.Time 85 mut sync.Mutex 86 87 mSizeBatch metrics.StatCounter 88 mCountBatch metrics.StatCounter 89 mPeriodBatch metrics.StatCounter 90 mCondBatch metrics.StatCounter 91 92 log log.Modular 93 stats metrics.Type 94 95 mCount metrics.StatCounter 96 mSent metrics.StatCounter 97 mBatchSent metrics.StatCounter 98 mDropped metrics.StatCounter 99 } 100 101 // NewBatch returns a Batch processor. 102 func NewBatch( 103 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 104 ) (Type, error) { 105 log.Warnln("The batch processor is deprecated and is scheduled for removal in Benthos V4. For more information about batching in Benthos check out https://benthos.dev/docs/configuration/batching") 106 107 cMgr, cLog, cStats := interop.LabelChild("condition", mgr, log, stats) 108 cond, err := condition.New(conf.Batch.Condition, cMgr, cLog, cStats) 109 if err != nil { 110 return nil, fmt.Errorf("failed to create condition: %v", err) 111 } 112 var period time.Duration 113 if len(conf.Batch.Period) > 0 { 114 if period, err = time.ParseDuration(conf.Batch.Period); err != nil { 115 return nil, fmt.Errorf("failed to parse duration string: %v", err) 116 } 117 } 118 return &Batch{ 119 log: log, 120 stats: stats, 121 122 byteSize: conf.Batch.ByteSize, 123 count: conf.Batch.Count, 124 period: period, 125 cond: cond, 126 127 lastBatch: time.Now(), 128 129 mSizeBatch: stats.GetCounter("on_size"), 130 mCountBatch: stats.GetCounter("on_count"), 131 mPeriodBatch: stats.GetCounter("on_period"), 132 mCondBatch: stats.GetCounter("on_condition"), 133 134 mCount: stats.GetCounter("count"), 135 mSent: stats.GetCounter("sent"), 136 mBatchSent: stats.GetCounter("batch.sent"), 137 mDropped: stats.GetCounter("dropped"), 138 }, nil 139 } 140 141 //------------------------------------------------------------------------------ 142 143 // ProcessMessage applies the processor to a message, either creating >0 144 // resulting messages or a response to be sent back to the message source. 145 func (b *Batch) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 146 b.mCount.Incr(1) 147 b.mut.Lock() 148 defer b.mut.Unlock() 149 150 var batch bool 151 152 // Add new parts to the buffer. 153 msg.Iter(func(i int, p types.Part) error { 154 if b.add(p.Copy()) { 155 batch = true 156 } 157 return nil 158 }) 159 160 // If we have reached our target count of parts in the buffer. 161 if batch { 162 if newMsg := b.flush(); newMsg != nil { 163 b.mSent.Incr(int64(newMsg.Len())) 164 b.mBatchSent.Incr(1) 165 return []types.Message{newMsg}, nil 166 } 167 } 168 169 b.log.Traceln("Added message to pending batch") 170 b.mDropped.Incr(1) 171 return nil, response.NewUnack() 172 } 173 174 // CloseAsync shuts down the processor and stops processing requests. 175 func (b *Batch) CloseAsync() { 176 } 177 178 // WaitForClose blocks until the processor has closed down. 179 func (b *Batch) WaitForClose(timeout time.Duration) error { 180 return nil 181 } 182 183 //------------------------------------------------------------------------------ 184 185 func (b *Batch) add(part types.Part) bool { 186 b.sizeTally += len(part.Get()) 187 b.parts = append(b.parts, part) 188 189 if !b.triggered && b.count > 0 && len(b.parts) >= b.count { 190 b.triggered = true 191 b.mCountBatch.Incr(1) 192 b.log.Traceln("Batching based on count") 193 } 194 if !b.triggered && b.byteSize > 0 && b.sizeTally >= b.byteSize { 195 b.triggered = true 196 b.mSizeBatch.Incr(1) 197 b.log.Traceln("Batching based on byte_size") 198 } 199 tmpMsg := message.New(nil) 200 tmpMsg.Append(part) 201 if !b.triggered && b.cond.Check(tmpMsg) { 202 b.triggered = true 203 b.mCondBatch.Incr(1) 204 b.log.Traceln("Batching based on condition") 205 } 206 207 return b.triggered || (b.period > 0 && time.Since(b.lastBatch) > b.period) 208 } 209 210 func (b *Batch) flush() types.Message { 211 var newMsg types.Message 212 if len(b.parts) > 0 { 213 if !b.triggered && b.period > 0 && time.Since(b.lastBatch) > b.period { 214 b.mPeriodBatch.Incr(1) 215 b.log.Traceln("Batching based on period") 216 } 217 newMsg = message.New(nil) 218 newMsg.Append(b.parts...) 219 } 220 b.parts = nil 221 b.sizeTally = 0 222 b.lastBatch = time.Now() 223 b.triggered = false 224 225 return newMsg 226 } 227 228 //------------------------------------------------------------------------------