github.com/argoproj/argo-events@v1.9.1/eventbus/kafka/sensor/kafka_handler.go (about)

     1  package kafka
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/IBM/sarama"
    10  	"github.com/argoproj/argo-events/eventbus/kafka/base"
    11  	"go.uber.org/zap"
    12  )
    13  
    14  type KafkaHandler struct {
    15  	*sync.Mutex
    16  	Logger *zap.SugaredLogger
    17  
    18  	// kafka details
    19  	GroupName     string
    20  	Producer      sarama.AsyncProducer
    21  	OffsetManager sarama.OffsetManager
    22  	TriggerTopic  string
    23  
    24  	// handler functions
    25  	// one function for each consumed topic, return messages, an
    26  	// offset and an optional function that will in a transaction
    27  	Handlers map[string]func(*sarama.ConsumerMessage) ([]*sarama.ProducerMessage, int64, func())
    28  
    29  	// cleanup function
    30  	// used to clear state when consumer group is rebalanced
    31  	Reset func() error
    32  
    33  	// maintains a mapping of keys (which correspond to triggers)
    34  	// to offsets, used to ensure triggers aren't invoked twice
    35  	checkpoints Checkpoints
    36  }
    37  
    38  type Checkpoints map[string]map[int32]*Checkpoint
    39  
    40  type Checkpoint struct {
    41  	Logger  *zap.SugaredLogger
    42  	Init    bool
    43  	Offsets map[string]int64
    44  }
    45  
    46  func (c *Checkpoint) Skip(key string, offset int64) bool {
    47  	if c.Offsets == nil {
    48  		return false
    49  	}
    50  	return offset < c.Offsets[key]
    51  }
    52  
    53  func (c *Checkpoint) Set(key string, offset int64) {
    54  	if c.Offsets == nil {
    55  		c.Offsets = map[string]int64{}
    56  	}
    57  	c.Offsets[key] = offset
    58  }
    59  
    60  func (c *Checkpoint) Metadata() string {
    61  	if c.Offsets == nil {
    62  		return ""
    63  	}
    64  
    65  	metadata, err := json.Marshal(c.Offsets)
    66  	if err != nil {
    67  		c.Logger.Errorw("Failed to serialize metadata", err)
    68  		return ""
    69  	}
    70  
    71  	return string(metadata)
    72  }
    73  
    74  func (h *KafkaHandler) Setup(session sarama.ConsumerGroupSession) error {
    75  	h.Logger.Infow("Kafka setup", zap.Any("claims", session.Claims()))
    76  
    77  	// instantiates checkpoints for all topic/partitions managed by
    78  	// this claim
    79  	h.checkpoints = Checkpoints{}
    80  
    81  	for topic, partitions := range session.Claims() {
    82  		h.checkpoints[topic] = map[int32]*Checkpoint{}
    83  
    84  		for _, partition := range partitions {
    85  			partitionOffsetManager, err := h.OffsetManager.ManagePartition(topic, partition)
    86  			if err != nil {
    87  				return err
    88  			}
    89  
    90  			func() {
    91  				var offsets map[string]int64
    92  
    93  				defer partitionOffsetManager.AsyncClose()
    94  				offset, metadata := partitionOffsetManager.NextOffset()
    95  
    96  				// only need to manage the offsets for each trigger
    97  				// with respect to the trigger topic
    98  				if topic == h.TriggerTopic && metadata != "" {
    99  					if err := json.Unmarshal([]byte(metadata), &offsets); err != nil {
   100  						// if metadata is invalid json, it will be
   101  						// reset to an empty map
   102  						h.Logger.Errorw("Failed to deserialize metadata, resetting", err)
   103  					}
   104  				}
   105  
   106  				h.checkpoints[topic][partition] = &Checkpoint{
   107  					Logger:  h.Logger,
   108  					Init:    offset == -1, // mark offset when first message consumed
   109  					Offsets: offsets,
   110  				}
   111  			}()
   112  
   113  			h.OffsetManager.Commit()
   114  			if err := partitionOffsetManager.Close(); err != nil {
   115  				return err
   116  			}
   117  		}
   118  	}
   119  
   120  	return nil
   121  }
   122  
   123  func (h *KafkaHandler) Cleanup(session sarama.ConsumerGroupSession) error {
   124  	h.Logger.Infow("Kafka cleanup", zap.Any("claims", session.Claims()))
   125  	return h.Reset()
   126  }
   127  
   128  func (h *KafkaHandler) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
   129  	handler, ok := h.Handlers[claim.Topic()]
   130  	if !ok {
   131  		return fmt.Errorf("unrecognized topic %s", claim.Topic())
   132  	}
   133  
   134  	checkpoint, ok := h.checkpoints[claim.Topic()][claim.Partition()]
   135  	if !ok {
   136  		return fmt.Errorf("unrecognized topic %s or partition %d", claim.Topic(), claim.Partition())
   137  	}
   138  
   139  	// Batch messsages from the claim message channel. A message will
   140  	// be produced to the batched channel if the max batch size is
   141  	// reached or the time limit has elapsed, whichever happens
   142  	// first. Batching helps optimize kafka transactions.
   143  	batch := base.Batch(100, 1*time.Second, claim.Messages())
   144  
   145  	for {
   146  		select {
   147  		case msgs := <-batch:
   148  			if len(msgs) == 0 {
   149  				h.Logger.Warn("Kafka batch contains no messages")
   150  				continue
   151  			}
   152  
   153  			transaction := &KafkaTransaction{
   154  				Logger:        h.Logger,
   155  				Producer:      h.Producer,
   156  				GroupName:     h.GroupName,
   157  				Topic:         claim.Topic(),
   158  				Partition:     claim.Partition(),
   159  				ResetOffset:   msgs[0].Offset,
   160  				ResetMetadata: checkpoint.Metadata(),
   161  			}
   162  
   163  			var messages []*sarama.ProducerMessage
   164  			var offset int64
   165  			var fns []func()
   166  
   167  			for _, msg := range msgs {
   168  				key := string(msg.Key)
   169  
   170  				h.Logger.Infow("Received message",
   171  					zap.String("topic", msg.Topic),
   172  					zap.String("key", key),
   173  					zap.Int32("partition", msg.Partition),
   174  					zap.Int64("offset", msg.Offset))
   175  
   176  				if checkpoint.Init {
   177  					// mark offset in order to reconsume from this
   178  					// offset if a restart occurs
   179  					session.MarkOffset(msg.Topic, msg.Partition, msg.Offset, "")
   180  					session.Commit()
   181  					checkpoint.Init = false
   182  				}
   183  
   184  				if checkpoint.Skip(key, msg.Offset) {
   185  					h.Logger.Infof("Skipping trigger '%s' (%d<%d)", key, msg.Offset, checkpoint.Offsets[key])
   186  					continue
   187  				}
   188  
   189  				m, o, f := handler(msg)
   190  				if msg.Topic == h.TriggerTopic && len(m) > 0 {
   191  					// when a trigger is invoked (there is a message)
   192  					// update the checkpoint to ensure the trigger
   193  					// is not re-invoked in the case of a restart
   194  					checkpoint.Set(key, msg.Offset+1)
   195  				}
   196  
   197  				// update transacation information
   198  				messages = append(messages, m...)
   199  				offset = o
   200  				if f != nil {
   201  					fns = append(fns, f)
   202  				}
   203  			}
   204  
   205  			func() {
   206  				h.Lock()
   207  				defer h.Unlock()
   208  				if err := transaction.Commit(session, messages, offset, checkpoint.Metadata()); err != nil {
   209  					h.Logger.Errorw("Transaction error", zap.Error(err))
   210  				}
   211  			}()
   212  
   213  			// invoke (action) functions asynchronously
   214  			for _, fn := range fns {
   215  				go fn()
   216  			}
   217  		case <-session.Context().Done():
   218  			return nil
   219  		}
   220  	}
   221  }
   222  
   223  func (h *KafkaHandler) Close() error {
   224  	h.Lock()
   225  	defer h.Unlock()
   226  
   227  	if err := h.OffsetManager.Close(); err != nil {
   228  		return err
   229  	}
   230  
   231  	return h.Producer.Close()
   232  }