github.com/Jeffail/benthos/v3@v3.65.0/lib/input/reader/kafka_cg.go (about) 1 package reader 2 3 import ( 4 "context" 5 "crypto/tls" 6 "fmt" 7 "io" 8 "strconv" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/Jeffail/benthos/v3/lib/log" 14 "github.com/Jeffail/benthos/v3/lib/message" 15 "github.com/Jeffail/benthos/v3/lib/message/batch" 16 "github.com/Jeffail/benthos/v3/lib/metrics" 17 "github.com/Jeffail/benthos/v3/lib/types" 18 "github.com/Shopify/sarama" 19 ) 20 21 //------------------------------------------------------------------------------ 22 23 type asyncMessage struct { 24 msg types.Message 25 ackFn AsyncAckFn 26 } 27 28 // KafkaCG is an input type that reads from a Kafka cluster by balancing 29 // partitions across other consumers of the same consumer group. 30 type KafkaCG struct { 31 version sarama.KafkaVersion 32 tlsConf *tls.Config 33 addresses []string 34 topics []string 35 36 commitPeriod time.Duration 37 sessionTimeout time.Duration 38 heartbeatInterval time.Duration 39 rebalanceTimeout time.Duration 40 maxProcPeriod time.Duration 41 42 cMut sync.Mutex 43 groupCancelFn context.CancelFunc 44 session sarama.ConsumerGroupSession 45 msgChan chan asyncMessage 46 47 mRebalanced metrics.StatCounter 48 49 conf KafkaBalancedConfig 50 stats metrics.Type 51 log log.Modular 52 mgr types.Manager 53 54 closeOnce sync.Once 55 closedChan chan struct{} 56 } 57 58 // NewKafkaCG creates a new KafkaCG input type. 59 func NewKafkaCG( 60 conf KafkaBalancedConfig, mgr types.Manager, log log.Modular, stats metrics.Type, 61 ) (*KafkaCG, error) { 62 if conf.Batching.IsNoop() { 63 conf.Batching.Count = 1 64 } 65 k := KafkaCG{ 66 conf: conf, 67 stats: stats, 68 groupCancelFn: func() {}, 69 log: log, 70 mgr: mgr, 71 mRebalanced: stats.GetCounter("rebalanced"), 72 closedChan: make(chan struct{}), 73 } 74 if conf.TLS.Enabled { 75 var err error 76 if k.tlsConf, err = conf.TLS.Get(); err != nil { 77 return nil, err 78 } 79 } 80 for _, addr := range conf.Addresses { 81 for _, splitAddr := range strings.Split(addr, ",") { 82 if trimmed := strings.TrimSpace(splitAddr); len(trimmed) > 0 { 83 k.addresses = append(k.addresses, trimmed) 84 } 85 } 86 } 87 for _, t := range conf.Topics { 88 for _, splitTopics := range strings.Split(t, ",") { 89 if trimmed := strings.TrimSpace(splitTopics); len(trimmed) > 0 { 90 k.topics = append(k.topics, trimmed) 91 } 92 } 93 } 94 if tout := conf.CommitPeriod; len(tout) > 0 { 95 var err error 96 if k.commitPeriod, err = time.ParseDuration(tout); err != nil { 97 return nil, fmt.Errorf("failed to parse commit period string: %v", err) 98 } 99 } 100 if tout := conf.Group.SessionTimeout; len(tout) > 0 { 101 var err error 102 if k.sessionTimeout, err = time.ParseDuration(tout); err != nil { 103 return nil, fmt.Errorf("failed to parse session timeout string: %v", err) 104 } 105 } 106 if tout := conf.Group.HeartbeatInterval; len(tout) > 0 { 107 var err error 108 if k.heartbeatInterval, err = time.ParseDuration(tout); err != nil { 109 return nil, fmt.Errorf("failed to parse heartbeat interval string: %v", err) 110 } 111 } 112 if tout := conf.Group.RebalanceTimeout; len(tout) > 0 { 113 var err error 114 if k.rebalanceTimeout, err = time.ParseDuration(tout); err != nil { 115 return nil, fmt.Errorf("failed to parse rebalance timeout string: %v", err) 116 } 117 } 118 if tout := conf.MaxProcessingPeriod; len(tout) > 0 { 119 var err error 120 if k.maxProcPeriod, err = time.ParseDuration(tout); err != nil { 121 return nil, fmt.Errorf("failed to parse max processing period string: %v", err) 122 } 123 } 124 125 var err error 126 if k.version, err = sarama.ParseKafkaVersion(conf.TargetVersion); err != nil { 127 return nil, err 128 } 129 return &k, nil 130 } 131 132 //------------------------------------------------------------------------------ 133 134 // Setup is run at the beginning of a new session, before ConsumeClaim. 135 func (k *KafkaCG) Setup(sesh sarama.ConsumerGroupSession) error { 136 k.cMut.Lock() 137 k.session = sesh 138 k.cMut.Unlock() 139 k.mRebalanced.Incr(1) 140 return nil 141 } 142 143 // Cleanup is run at the end of a session, once all ConsumeClaim goroutines have 144 // exited but before the offsets are committed for the very last time. 145 func (k *KafkaCG) Cleanup(sesh sarama.ConsumerGroupSession) error { 146 k.cMut.Lock() 147 k.session = nil 148 k.cMut.Unlock() 149 return nil 150 } 151 152 // ConsumeClaim must start a consumer loop of ConsumerGroupClaim's Messages(). 153 // Once the Messages() channel is closed, the Handler must finish its processing 154 // loop and exit. 155 func (k *KafkaCG) ConsumeClaim(sess sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { 156 topic, partition := claim.Topic(), claim.Partition() 157 k.log.Debugf("Consuming messages from topic '%v' partition '%v'\n", topic, partition) 158 defer k.log.Debugf("Stopped consuming messages from topic '%v' partition '%v'\n", topic, partition) 159 160 ackedChan := make(chan error) 161 162 latestOffset := claim.InitialOffset() 163 batchPolicy, err := batch.NewPolicy(k.conf.Batching, k.mgr, k.log, k.stats) 164 if err != nil { 165 k.log.Errorf("Failed to initialise batch policy: %v, falling back to single messages.\n", err) 166 fallBackConf := batch.NewPolicyConfig() 167 fallBackConf.Count = 1 168 if batchPolicy, err = batch.NewPolicy(fallBackConf, k.mgr, k.log, k.stats); err != nil { 169 k.log.Errorf("Failed to initialise fallback batch policy: %v.\n", err) 170 // The consume claim gets reopened immediately so let's try and 171 // avoid a busy loop (this should never happen anyway). 172 <-time.After(time.Second) 173 return err 174 } 175 } 176 defer batchPolicy.CloseAsync() 177 178 var nextTimedBatchChan <-chan time.Time 179 flushBatch := func(topic string, partition int32, offset int64) bool { 180 nextTimedBatchChan = nil 181 msg := batchPolicy.Flush() 182 if msg == nil { 183 return true 184 } 185 select { 186 case k.msgChan <- asyncMessage{ 187 msg: msg, 188 ackFn: func(ctx context.Context, res types.Response) error { 189 resErr := res.Error() 190 if resErr == nil { 191 k.cMut.Lock() 192 if k.session != nil { 193 k.log.Debugf("Marking offset for topic '%v' partition '%v'.\n", topic, partition) 194 k.session.MarkOffset(topic, partition, offset, "") 195 } else { 196 k.log.Debugf("Unable to mark offset for topic '%v' partition '%v'.\n", topic, partition) 197 } 198 k.cMut.Unlock() 199 } 200 select { 201 case ackedChan <- resErr: 202 case <-sess.Context().Done(): 203 } 204 return nil 205 }, 206 }: 207 select { 208 case resErr := <-ackedChan: 209 if resErr != nil { 210 k.log.Errorf("Received error from message batch: %v, shutting down consumer.\n", resErr) 211 return false 212 } 213 case <-sess.Context().Done(): 214 return false 215 } 216 case <-sess.Context().Done(): 217 return false 218 } 219 return true 220 } 221 222 for { 223 if nextTimedBatchChan == nil { 224 if tNext := batchPolicy.UntilNext(); tNext >= 0 { 225 nextTimedBatchChan = time.After(tNext) 226 } 227 } 228 select { 229 case <-nextTimedBatchChan: 230 if !flushBatch(claim.Topic(), claim.Partition(), latestOffset+1) { 231 return nil 232 } 233 case data, open := <-claim.Messages(): 234 if !open { 235 return nil 236 } 237 latestOffset = data.Offset 238 part := message.NewPart(data.Value) 239 240 meta := part.Metadata() 241 for _, hdr := range data.Headers { 242 meta.Set(string(hdr.Key), string(hdr.Value)) 243 } 244 245 lag := claim.HighWaterMarkOffset() - data.Offset - 1 246 if lag < 0 { 247 lag = 0 248 } 249 250 meta.Set("kafka_key", string(data.Key)) 251 meta.Set("kafka_partition", strconv.Itoa(int(data.Partition))) 252 meta.Set("kafka_topic", data.Topic) 253 meta.Set("kafka_offset", strconv.Itoa(int(data.Offset))) 254 meta.Set("kafka_lag", strconv.FormatInt(lag, 10)) 255 meta.Set("kafka_timestamp_unix", strconv.FormatInt(data.Timestamp.Unix(), 10)) 256 257 if batchPolicy.Add(part) { 258 if !flushBatch(claim.Topic(), claim.Partition(), latestOffset+1) { 259 return nil 260 } 261 } 262 case <-sess.Context().Done(): 263 return nil 264 } 265 } 266 } 267 268 //------------------------------------------------------------------------------ 269 270 func (k *KafkaCG) closeGroup() { 271 k.cMut.Lock() 272 cancelFn := k.groupCancelFn 273 k.cMut.Unlock() 274 275 if cancelFn != nil { 276 k.log.Debugln("Closing group consumers.") 277 cancelFn() 278 } 279 280 k.closeOnce.Do(func() { 281 close(k.closedChan) 282 }) 283 } 284 285 //------------------------------------------------------------------------------ 286 287 // ConnectWithContext establishes a KafkaCG connection. 288 func (k *KafkaCG) ConnectWithContext(ctx context.Context) error { 289 k.cMut.Lock() 290 defer k.cMut.Unlock() 291 if k.msgChan != nil { 292 return nil 293 } 294 295 config := sarama.NewConfig() 296 config.ClientID = k.conf.ClientID 297 config.RackID = k.conf.RackID 298 config.Net.DialTimeout = time.Second 299 config.Version = k.version 300 config.Consumer.Return.Errors = true 301 config.Consumer.MaxProcessingTime = k.maxProcPeriod 302 config.Consumer.Offsets.AutoCommit.Enable = true 303 config.Consumer.Offsets.AutoCommit.Interval = k.commitPeriod 304 config.Consumer.Group.Session.Timeout = k.sessionTimeout 305 config.Consumer.Group.Heartbeat.Interval = k.heartbeatInterval 306 config.Consumer.Group.Rebalance.Timeout = k.rebalanceTimeout 307 config.ChannelBufferSize = k.conf.FetchBufferCap 308 309 if config.Net.ReadTimeout <= k.sessionTimeout { 310 config.Net.ReadTimeout = k.sessionTimeout * 2 311 } 312 if config.Net.ReadTimeout <= k.rebalanceTimeout { 313 config.Net.ReadTimeout = k.rebalanceTimeout * 2 314 } 315 316 config.Net.TLS.Enable = k.conf.TLS.Enabled 317 if k.conf.TLS.Enabled { 318 config.Net.TLS.Config = k.tlsConf 319 } 320 if k.conf.StartFromOldest { 321 config.Consumer.Offsets.Initial = sarama.OffsetOldest 322 } 323 324 if err := k.conf.SASL.Apply(k.mgr, config); err != nil { 325 return err 326 } 327 328 // Start a new consumer group 329 group, err := sarama.NewConsumerGroup(k.addresses, k.conf.ConsumerGroup, config) 330 if err != nil { 331 return err 332 } 333 334 // Handle errors 335 go func() { 336 for { 337 gerr, open := <-group.Errors() 338 if !open { 339 return 340 } 341 if gerr != nil { 342 k.log.Errorf("KafkaCG message recv error: %v\n", gerr) 343 if cerr, ok := gerr.(*sarama.ConsumerError); ok { 344 if cerr.Err == sarama.ErrUnknownMemberId { 345 // Sarama doesn't seem to recover from this error. 346 go k.closeGroup() 347 } 348 } 349 } 350 } 351 }() 352 353 // Handle session 354 go func() { 355 groupLoop: 356 for { 357 ctx, doneFn := context.WithCancel(context.Background()) 358 359 k.cMut.Lock() 360 k.groupCancelFn = doneFn 361 k.cMut.Unlock() 362 363 k.log.Debugln("Starting consumer group") 364 gerr := group.Consume(ctx, k.topics, k) 365 select { 366 case <-ctx.Done(): 367 break groupLoop 368 default: 369 } 370 doneFn() 371 if gerr != nil { 372 if gerr != io.EOF { 373 k.log.Errorf("KafkaCG group session error: %v\n", gerr) 374 } 375 break groupLoop 376 } 377 } 378 k.log.Debugln("Closing consumer group") 379 380 group.Close() 381 382 k.cMut.Lock() 383 if k.msgChan != nil { 384 close(k.msgChan) 385 k.msgChan = nil 386 } 387 k.cMut.Unlock() 388 }() 389 390 k.msgChan = make(chan asyncMessage) 391 392 k.log.Infof("Receiving kafka messages from brokers %s as group '%v'\n", k.addresses, k.conf.ConsumerGroup) 393 return nil 394 } 395 396 // ReadWithContext attempts to read a message from a KafkaCG topic. 397 func (k *KafkaCG) ReadWithContext(ctx context.Context) (types.Message, AsyncAckFn, error) { 398 k.cMut.Lock() 399 msgChan := k.msgChan 400 k.cMut.Unlock() 401 402 if msgChan == nil { 403 return nil, nil, types.ErrNotConnected 404 } 405 406 select { 407 case m, open := <-k.msgChan: 408 if !open { 409 return nil, nil, types.ErrNotConnected 410 } 411 return m.msg, m.ackFn, nil 412 case <-ctx.Done(): 413 } 414 return nil, nil, types.ErrTimeout 415 } 416 417 // CloseAsync shuts down the KafkaCG input and stops processing requests. 418 func (k *KafkaCG) CloseAsync() { 419 go k.closeGroup() 420 } 421 422 // WaitForClose blocks until the KafkaCG input has closed down. 423 func (k *KafkaCG) WaitForClose(timeout time.Duration) error { 424 select { 425 case <-k.closedChan: 426 case <-time.After(timeout): 427 return types.ErrTimeout 428 } 429 return nil 430 } 431 432 //------------------------------------------------------------------------------