github.com/argoproj/argo-events@v1.9.1/eventsources/sources/kafka/start.go (about)

     1  /*
     2  Copyright 2018 BlackRock, Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package kafka
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"strconv"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/IBM/sarama"
    30  	"go.uber.org/zap"
    31  
    32  	"github.com/argoproj/argo-events/common"
    33  	"github.com/argoproj/argo-events/common/logging"
    34  	eventbuscommon "github.com/argoproj/argo-events/eventbus/common"
    35  	eventsourcecommon "github.com/argoproj/argo-events/eventsources/common"
    36  	"github.com/argoproj/argo-events/eventsources/sources"
    37  	metrics "github.com/argoproj/argo-events/metrics"
    38  	apicommon "github.com/argoproj/argo-events/pkg/apis/common"
    39  	"github.com/argoproj/argo-events/pkg/apis/events"
    40  	"github.com/argoproj/argo-events/pkg/apis/eventsource/v1alpha1"
    41  )
    42  
    43  // EventListener implements Eventing kafka event source
    44  type EventListener struct {
    45  	EventSourceName  string
    46  	EventName        string
    47  	KafkaEventSource v1alpha1.KafkaEventSource
    48  	Metrics          *metrics.Metrics
    49  }
    50  
    51  // GetEventSourceName returns name of event source
    52  func (el *EventListener) GetEventSourceName() string {
    53  	return el.EventSourceName
    54  }
    55  
    56  // GetEventName returns name of event
    57  func (el *EventListener) GetEventName() string {
    58  	return el.EventName
    59  }
    60  
    61  // GetEventSourceType return type of event server
    62  func (el *EventListener) GetEventSourceType() apicommon.EventSourceType {
    63  	return apicommon.KafkaEvent
    64  }
    65  
    66  func verifyPartitionAvailable(part int32, partitions []int32) bool {
    67  	for _, p := range partitions {
    68  		if part == p {
    69  			return true
    70  		}
    71  	}
    72  	return false
    73  }
    74  
    75  // StartListening starts listening events
    76  func (el *EventListener) StartListening(ctx context.Context, dispatch func([]byte, ...eventsourcecommon.Option) error) error {
    77  	log := logging.FromContext(ctx).
    78  		With(logging.LabelEventSourceType, el.GetEventSourceType(), logging.LabelEventName, el.GetEventName())
    79  	defer sources.Recover(el.GetEventName())
    80  
    81  	log.Info("start kafka event source...")
    82  	kafkaEventSource := &el.KafkaEventSource
    83  
    84  	if kafkaEventSource.ConsumerGroup == nil {
    85  		return el.partitionConsumer(ctx, log, kafkaEventSource, dispatch)
    86  	} else {
    87  		return el.consumerGroupConsumer(ctx, log, kafkaEventSource, dispatch)
    88  	}
    89  }
    90  
    91  func (el *EventListener) consumerGroupConsumer(ctx context.Context, log *zap.SugaredLogger, kafkaEventSource *v1alpha1.KafkaEventSource, dispatch func([]byte, ...eventsourcecommon.Option) error) error {
    92  	config, err := getSaramaConfig(kafkaEventSource, log)
    93  	if err != nil {
    94  		return err
    95  	}
    96  
    97  	switch kafkaEventSource.ConsumerGroup.RebalanceStrategy {
    98  	case "sticky":
    99  		config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategySticky()}
   100  	case "roundrobin":
   101  		config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()}
   102  	case "range":
   103  		config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRange()}
   104  	default:
   105  		log.Info("Invalid rebalance strategy, using default: range")
   106  		config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRange()}
   107  	}
   108  
   109  	consumer := Consumer{
   110  		ready:            make(chan bool),
   111  		dispatch:         dispatch,
   112  		logger:           log,
   113  		kafkaEventSource: kafkaEventSource,
   114  		eventSourceName:  el.EventSourceName,
   115  		eventName:        el.EventName,
   116  		metrics:          el.Metrics,
   117  	}
   118  
   119  	urls := strings.Split(kafkaEventSource.URL, ",")
   120  	client, err := sarama.NewConsumerGroup(urls, kafkaEventSource.ConsumerGroup.GroupName, config)
   121  	if err != nil {
   122  		log.Errorf("Error creating consumer group client: %v", err)
   123  		return err
   124  	}
   125  
   126  	wg := &sync.WaitGroup{}
   127  	wg.Add(1)
   128  	go func() {
   129  		defer wg.Done()
   130  		for {
   131  			// `Consume` should be called inside an infinite loop, when a
   132  			// server-side rebalance happens, the consumer session will need to be
   133  			// recreated to get the new claims
   134  			if err := client.Consume(ctx, []string{kafkaEventSource.Topic}, &consumer); err != nil {
   135  				log.Errorf("Error from consumer: %v", err)
   136  			}
   137  			// check if context was cancelled, signaling that the consumer should stop
   138  			if ctx.Err() != nil {
   139  				log.Infof("Error from context: %v", ctx.Err())
   140  				return
   141  			}
   142  			consumer.ready = make(chan bool)
   143  		}
   144  	}()
   145  
   146  	<-consumer.ready // Await till the consumer has been set up
   147  	log.Info("Sarama consumer group up and running!...")
   148  
   149  	<-ctx.Done()
   150  	log.Info("terminating: context cancelled")
   151  	wg.Wait()
   152  
   153  	if err = client.Close(); err != nil {
   154  		log.Errorf("Error closing client: %v", err)
   155  		return err
   156  	}
   157  
   158  	return nil
   159  }
   160  
   161  func (el *EventListener) partitionConsumer(ctx context.Context, log *zap.SugaredLogger, kafkaEventSource *v1alpha1.KafkaEventSource, dispatch func([]byte, ...eventsourcecommon.Option) error) error {
   162  	defer sources.Recover(el.GetEventName())
   163  
   164  	log.Info("start kafka event source...")
   165  
   166  	var consumer sarama.Consumer
   167  
   168  	log.Info("connecting to Kafka cluster...")
   169  	if err := common.DoWithRetry(kafkaEventSource.ConnectionBackoff, func() error {
   170  		var err error
   171  
   172  		config, err := getSaramaConfig(kafkaEventSource, log)
   173  		if err != nil {
   174  			return err
   175  		}
   176  
   177  		urls := strings.Split(kafkaEventSource.URL, ",")
   178  		consumer, err = sarama.NewConsumer(urls, config)
   179  		if err != nil {
   180  			return err
   181  		}
   182  		return nil
   183  	}); err != nil {
   184  		return fmt.Errorf("failed to connect to Kafka broker for event source %s, %w", el.GetEventName(), err)
   185  	}
   186  
   187  	log = log.With("partition-id", kafkaEventSource.Partition)
   188  
   189  	log.Info("parsing the partition value...")
   190  	pInt, err := strconv.ParseInt(kafkaEventSource.Partition, 10, 32)
   191  	if err != nil {
   192  		return fmt.Errorf("failed to parse Kafka partition %s for event source %s, %w", kafkaEventSource.Partition, el.GetEventName(), err)
   193  	}
   194  	partition := int32(pInt)
   195  
   196  	log.Info("getting available partitions...")
   197  	availablePartitions, err := consumer.Partitions(kafkaEventSource.Topic)
   198  	if err != nil {
   199  		return fmt.Errorf("failed to get the available partitions for topic %s and event source %s, %w", kafkaEventSource.Topic, el.GetEventName(), err)
   200  	}
   201  
   202  	log.Info("verifying the partition exists within available partitions...")
   203  	if ok := verifyPartitionAvailable(partition, availablePartitions); !ok {
   204  		return fmt.Errorf("partition %d is not available. event source %s, %w", partition, el.GetEventName(), err)
   205  	}
   206  
   207  	log.Info("getting partition consumer...")
   208  	partitionConsumer, err := consumer.ConsumePartition(kafkaEventSource.Topic, partition, sarama.OffsetNewest)
   209  	if err != nil {
   210  		return fmt.Errorf("failed to create consumer partition for event source %s, %w", el.GetEventName(), err)
   211  	}
   212  
   213  	processOne := func(msg *sarama.ConsumerMessage) error {
   214  		defer func(start time.Time) {
   215  			el.Metrics.EventProcessingDuration(el.GetEventSourceName(), el.GetEventName(), float64(time.Since(start)/time.Millisecond))
   216  		}(time.Now())
   217  
   218  		log.Info("dispatching event on the data channel...")
   219  		eventData := &events.KafkaEventData{
   220  			Topic:     msg.Topic,
   221  			Key:       string(msg.Key),
   222  			Partition: int(msg.Partition),
   223  			Timestamp: msg.Timestamp.String(),
   224  			Metadata:  kafkaEventSource.Metadata,
   225  		}
   226  
   227  		headers := make(map[string]string)
   228  
   229  		for _, recordHeader := range msg.Headers {
   230  			headers[string(recordHeader.Key)] = string(recordHeader.Value)
   231  		}
   232  
   233  		eventData.Headers = headers
   234  
   235  		if kafkaEventSource.JSONBody {
   236  			eventData.Body = (*json.RawMessage)(&msg.Value)
   237  		} else {
   238  			eventData.Body = msg.Value
   239  		}
   240  		eventBody, err := json.Marshal(eventData)
   241  		if err != nil {
   242  			return fmt.Errorf("failed to marshal the event data, rejecting the event, %w", err)
   243  		}
   244  
   245  		kafkaID := genUniqueID(el.GetEventSourceName(), el.GetEventName(), kafkaEventSource.URL, msg.Topic, msg.Partition, msg.Offset)
   246  
   247  		if err = dispatch(eventBody, eventsourcecommon.WithID(kafkaID)); err != nil {
   248  			return fmt.Errorf("failed to dispatch a Kafka event, %w", err)
   249  		}
   250  		return nil
   251  	}
   252  
   253  	log.Info("listening to messages on the partition...")
   254  	for {
   255  		select {
   256  		case msg := <-partitionConsumer.Messages():
   257  			if err := processOne(msg); err != nil {
   258  				log.Errorw("failed to process a Kafka message", zap.Error(err))
   259  				el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName())
   260  			}
   261  		case err := <-partitionConsumer.Errors():
   262  			return fmt.Errorf("failed to consume messages for event source %s, %w", el.GetEventName(), err)
   263  
   264  		case <-ctx.Done():
   265  			log.Info("event source is stopped, closing partition consumer")
   266  			err = partitionConsumer.Close()
   267  			if err != nil {
   268  				log.Errorw("failed to close consumer", zap.Error(err))
   269  			}
   270  			return nil
   271  		}
   272  	}
   273  }
   274  
   275  func getSaramaConfig(kafkaEventSource *v1alpha1.KafkaEventSource, log *zap.SugaredLogger) (*sarama.Config, error) {
   276  	config, err := common.GetSaramaConfigFromYAMLString(kafkaEventSource.Config)
   277  	if err != nil {
   278  		return nil, err
   279  	}
   280  
   281  	if kafkaEventSource.Version == "" {
   282  		config.Version = sarama.V1_0_0_0
   283  	} else {
   284  		version, err := sarama.ParseKafkaVersion(kafkaEventSource.Version)
   285  		if err != nil {
   286  			log.Errorf("Error parsing Kafka version: %v", err)
   287  			return nil, err
   288  		}
   289  		config.Version = version
   290  	}
   291  
   292  	if kafkaEventSource.SASL != nil {
   293  		config.Net.SASL.Enable = true
   294  
   295  		config.Net.SASL.Mechanism = sarama.SASLMechanism(kafkaEventSource.SASL.GetMechanism())
   296  		if config.Net.SASL.Mechanism == "SCRAM-SHA-512" {
   297  			config.Net.SASL.SCRAMClientGeneratorFunc = func() sarama.SCRAMClient { return &common.XDGSCRAMClient{HashGeneratorFcn: common.SHA512New} }
   298  		} else if config.Net.SASL.Mechanism == "SCRAM-SHA-256" {
   299  			config.Net.SASL.SCRAMClientGeneratorFunc = func() sarama.SCRAMClient { return &common.XDGSCRAMClient{HashGeneratorFcn: common.SHA256New} }
   300  		}
   301  
   302  		user, err := common.GetSecretFromVolume(kafkaEventSource.SASL.UserSecret)
   303  		if err != nil {
   304  			log.Errorf("Error getting user value from secret: %v", err)
   305  			return nil, err
   306  		}
   307  		config.Net.SASL.User = user
   308  
   309  		password, err := common.GetSecretFromVolume(kafkaEventSource.SASL.PasswordSecret)
   310  		if err != nil {
   311  			log.Errorf("Error getting password value from secret: %v", err)
   312  			return nil, err
   313  		}
   314  		config.Net.SASL.Password = password
   315  	}
   316  
   317  	if kafkaEventSource.TLS != nil {
   318  		tlsConfig, err := common.GetTLSConfig(kafkaEventSource.TLS)
   319  		if err != nil {
   320  			return nil, fmt.Errorf("failed to get the tls configuration, %w", err)
   321  		}
   322  		config.Net.TLS.Config = tlsConfig
   323  		config.Net.TLS.Enable = true
   324  	}
   325  
   326  	if kafkaEventSource.ConsumerGroup != nil {
   327  		if kafkaEventSource.ConsumerGroup.Oldest {
   328  			config.Consumer.Offsets.Initial = sarama.OffsetOldest
   329  		}
   330  	}
   331  	return config, nil
   332  }
   333  
   334  // Consumer represents a Sarama consumer group consumer
   335  type Consumer struct {
   336  	ready            chan bool
   337  	dispatch         func([]byte, ...eventsourcecommon.Option) error
   338  	logger           *zap.SugaredLogger
   339  	kafkaEventSource *v1alpha1.KafkaEventSource
   340  	eventSourceName  string
   341  	eventName        string
   342  	metrics          *metrics.Metrics
   343  }
   344  
   345  // Setup is run at the beginning of a new session, before ConsumeClaim
   346  func (consumer *Consumer) Setup(sarama.ConsumerGroupSession) error {
   347  	// Mark the consumer as ready
   348  	close(consumer.ready)
   349  	return nil
   350  }
   351  
   352  // Cleanup is run at the end of a session, once all ConsumeClaim goroutines have exited
   353  func (consumer *Consumer) Cleanup(sarama.ConsumerGroupSession) error {
   354  	return nil
   355  }
   356  
   357  var eventBusErr *eventbuscommon.EventBusError
   358  
   359  // ConsumeClaim must start a consumer loop of ConsumerGroupClaim's Messages().
   360  func (consumer *Consumer) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
   361  	// NOTE:
   362  	// Do not move the code below to a goroutine.
   363  	// The `ConsumeClaim` itself is called within a goroutine, see:
   364  	// https://github.com/IBM/sarama/blob/master/consumer_group.go#L27-L29
   365  	for message := range claim.Messages() {
   366  		if err := consumer.processOne(session, message); err != nil {
   367  			consumer.metrics.EventProcessingFailed(consumer.eventSourceName, consumer.eventName)
   368  			if errors.As(err, &eventBusErr) { // EventBus error, do not continue.
   369  				consumer.logger.Errorw("failed to process a Kafka message due to event bus issue", zap.Error(err))
   370  				break
   371  			} else {
   372  				consumer.logger.Errorw("failed to process a Kafka message, skip it", zap.Error(err))
   373  				continue
   374  			}
   375  		}
   376  		if consumer.kafkaEventSource.LimitEventsPerSecond > 0 {
   377  			// 1000000000 is 1 second in nanoseconds
   378  			d := (1000000000 / time.Duration(consumer.kafkaEventSource.LimitEventsPerSecond) * time.Nanosecond) * time.Nanosecond
   379  			consumer.logger.Debugf("Sleeping for: %v.", d)
   380  			time.Sleep(d)
   381  		}
   382  	}
   383  
   384  	return nil
   385  }
   386  
   387  func (consumer *Consumer) processOne(session sarama.ConsumerGroupSession, message *sarama.ConsumerMessage) error {
   388  	defer func(start time.Time) {
   389  		consumer.metrics.EventProcessingDuration(consumer.eventSourceName, consumer.eventName, float64(time.Since(start)/time.Millisecond))
   390  	}(time.Now())
   391  
   392  	consumer.logger.Info("dispatching event on the data channel...")
   393  	eventData := &events.KafkaEventData{
   394  		Topic:     message.Topic,
   395  		Key:       string(message.Key),
   396  		Partition: int(message.Partition),
   397  		Timestamp: message.Timestamp.String(),
   398  		Metadata:  consumer.kafkaEventSource.Metadata,
   399  	}
   400  
   401  	headers := make(map[string]string)
   402  
   403  	for _, recordHeader := range message.Headers {
   404  		headers[string(recordHeader.Key)] = string(recordHeader.Value)
   405  	}
   406  
   407  	eventData.Headers = headers
   408  
   409  	if consumer.kafkaEventSource.JSONBody {
   410  		eventData.Body = (*json.RawMessage)(&message.Value)
   411  	} else {
   412  		eventData.Body = message.Value
   413  	}
   414  	eventBody, err := json.Marshal(eventData)
   415  	if err != nil {
   416  		return fmt.Errorf("failed to marshal the event data, rejecting the event, %w", err)
   417  	}
   418  
   419  	messageID := genUniqueID(consumer.eventSourceName, consumer.eventName, consumer.kafkaEventSource.URL, message.Topic, message.Partition, message.Offset)
   420  
   421  	if err = consumer.dispatch(eventBody, eventsourcecommon.WithID(messageID)); err != nil {
   422  		return fmt.Errorf("failed to dispatch a kafka event, %w", err)
   423  	}
   424  	session.MarkMessage(message, "")
   425  	return nil
   426  }
   427  
   428  // Function can be passed as Option to generate unique id for kafka event
   429  // eventSourceName:eventName:kafka-url:topic:partition:offset
   430  func genUniqueID(eventSourceName, eventName, kafkaURL, topic string, partition int32, offset int64) string {
   431  	kafkaID := fmt.Sprintf("%s:%s:%s:%s:%d:%d", eventSourceName, eventName, strings.Split(kafkaURL, ",")[0], topic, partition, offset)
   432  
   433  	return kafkaID
   434  }