github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/group_by_value.go (about) 1 package processor 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/Jeffail/benthos/v3/internal/bloblang/field" 8 "github.com/Jeffail/benthos/v3/internal/docs" 9 "github.com/Jeffail/benthos/v3/internal/interop" 10 "github.com/Jeffail/benthos/v3/internal/tracing" 11 "github.com/Jeffail/benthos/v3/lib/log" 12 "github.com/Jeffail/benthos/v3/lib/message" 13 "github.com/Jeffail/benthos/v3/lib/metrics" 14 "github.com/Jeffail/benthos/v3/lib/response" 15 "github.com/Jeffail/benthos/v3/lib/types" 16 ) 17 18 //------------------------------------------------------------------------------ 19 20 func init() { 21 Constructors[TypeGroupByValue] = TypeSpec{ 22 constructor: NewGroupByValue, 23 Categories: []Category{ 24 CategoryComposition, 25 }, 26 Summary: ` 27 Splits a batch of messages into N batches, where each resulting batch contains a 28 group of messages determined by a 29 [function interpolated string](/docs/configuration/interpolation#bloblang-queries) evaluated 30 per message.`, 31 Description: ` 32 This allows you to group messages using arbitrary fields within their content or 33 metadata, process them individually, and send them to unique locations as per 34 their group.`, 35 Footnotes: ` 36 ## Examples 37 38 If we were consuming Kafka messages and needed to group them by their key, 39 archive the groups, and send them to S3 with the key as part of the path we 40 could achieve that with the following: 41 42 ` + "```yaml" + ` 43 pipeline: 44 processors: 45 - group_by_value: 46 value: ${! meta("kafka_key") } 47 - archive: 48 format: tar 49 - compress: 50 algorithm: gzip 51 output: 52 aws_s3: 53 bucket: TODO 54 path: docs/${! meta("kafka_key") }/${! count("files") }-${! timestamp_unix_nano() }.tar.gz 55 ` + "```" + ``, 56 FieldSpecs: docs.FieldSpecs{ 57 docs.FieldCommon( 58 "value", "The interpolated string to group based on.", 59 "${! meta(\"kafka_key\") }", "${! json(\"foo.bar\") }-${! meta(\"baz\") }", 60 ).IsInterpolated(), 61 }, 62 UsesBatches: true, 63 } 64 } 65 66 //------------------------------------------------------------------------------ 67 68 // GroupByValueConfig is a configuration struct containing fields for the 69 // GroupByValue processor, which breaks message batches down into N batches of a 70 // smaller size according to a function interpolated string evaluated per 71 // message part. 72 type GroupByValueConfig struct { 73 Value string `json:"value" yaml:"value"` 74 } 75 76 // NewGroupByValueConfig returns a GroupByValueConfig with default values. 77 func NewGroupByValueConfig() GroupByValueConfig { 78 return GroupByValueConfig{ 79 Value: "${! meta(\"example\") }", 80 } 81 } 82 83 //------------------------------------------------------------------------------ 84 85 // GroupByValue is a processor that breaks message batches down into N batches 86 // of a smaller size according to a function interpolated string evaluated per 87 // message part. 88 type GroupByValue struct { 89 log log.Modular 90 stats metrics.Type 91 92 value *field.Expression 93 94 mCount metrics.StatCounter 95 mGroups metrics.StatGauge 96 mSent metrics.StatCounter 97 mBatchSent metrics.StatCounter 98 } 99 100 // NewGroupByValue returns a GroupByValue processor. 101 func NewGroupByValue( 102 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 103 ) (Type, error) { 104 value, err := interop.NewBloblangField(mgr, conf.GroupByValue.Value) 105 if err != nil { 106 return nil, fmt.Errorf("failed to parse value expression: %v", err) 107 } 108 return &GroupByValue{ 109 log: log, 110 stats: stats, 111 112 value: value, 113 114 mCount: stats.GetCounter("count"), 115 mGroups: stats.GetGauge("groups"), 116 mSent: stats.GetCounter("sent"), 117 mBatchSent: stats.GetCounter("batch.sent"), 118 }, nil 119 } 120 121 //------------------------------------------------------------------------------ 122 123 // ProcessMessage applies the processor to a message, either creating >0 124 // resulting messages or a response to be sent back to the message source. 125 func (g *GroupByValue) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 126 g.mCount.Incr(1) 127 128 if msg.Len() == 0 { 129 return nil, response.NewAck() 130 } 131 132 groupKeys := []string{} 133 groupMap := map[string]types.Message{} 134 135 spans := tracing.CreateChildSpans(TypeGroupByValue, msg) 136 137 msg.Iter(func(i int, p types.Part) error { 138 v := g.value.String(i, msg) 139 spans[i].LogKV( 140 "event", "grouped", 141 "type", v, 142 ) 143 spans[i].SetTag("group", v) 144 if group, exists := groupMap[v]; exists { 145 group.Append(p) 146 } else { 147 g.log.Tracef("New group formed: %v\n", v) 148 groupKeys = append(groupKeys, v) 149 newMsg := message.New(nil) 150 newMsg.Append(p) 151 groupMap[v] = newMsg 152 } 153 return nil 154 }) 155 156 for _, s := range spans { 157 s.Finish() 158 } 159 160 msgs := []types.Message{} 161 for _, key := range groupKeys { 162 msgs = append(msgs, groupMap[key]) 163 } 164 165 g.mGroups.Set(int64(len(groupKeys))) 166 167 if len(msgs) == 0 { 168 return nil, response.NewAck() 169 } 170 171 g.mBatchSent.Incr(int64(len(msgs))) 172 for _, m := range msgs { 173 g.mSent.Incr(int64(m.Len())) 174 } 175 return msgs, nil 176 } 177 178 // CloseAsync shuts down the processor and stops processing requests. 179 func (g *GroupByValue) CloseAsync() { 180 } 181 182 // WaitForClose blocks until the processor has closed down. 183 func (g *GroupByValue) WaitForClose(timeout time.Duration) error { 184 return nil 185 } 186 187 //------------------------------------------------------------------------------