github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/event/target/kafka.go (about)

     1  // Copyright (c) 2015-2023 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package target
    19  
    20  import (
    21  	"context"
    22  	"crypto/tls"
    23  	"crypto/x509"
    24  	"encoding/json"
    25  	"errors"
    26  	"fmt"
    27  	"net/url"
    28  	"os"
    29  	"path/filepath"
    30  	"strings"
    31  	"time"
    32  
    33  	"github.com/minio/minio/internal/event"
    34  	"github.com/minio/minio/internal/logger"
    35  	"github.com/minio/minio/internal/once"
    36  	"github.com/minio/minio/internal/store"
    37  	xnet "github.com/minio/pkg/v2/net"
    38  
    39  	"github.com/IBM/sarama"
    40  	saramatls "github.com/IBM/sarama/tools/tls"
    41  )
    42  
    43  // Kafka input constants
    44  const (
    45  	KafkaBrokers          = "brokers"
    46  	KafkaTopic            = "topic"
    47  	KafkaQueueDir         = "queue_dir"
    48  	KafkaQueueLimit       = "queue_limit"
    49  	KafkaTLS              = "tls"
    50  	KafkaTLSSkipVerify    = "tls_skip_verify"
    51  	KafkaTLSClientAuth    = "tls_client_auth"
    52  	KafkaSASL             = "sasl"
    53  	KafkaSASLUsername     = "sasl_username"
    54  	KafkaSASLPassword     = "sasl_password"
    55  	KafkaSASLMechanism    = "sasl_mechanism"
    56  	KafkaClientTLSCert    = "client_tls_cert"
    57  	KafkaClientTLSKey     = "client_tls_key"
    58  	KafkaVersion          = "version"
    59  	KafkaBatchSize        = "batch_size"
    60  	KafkaCompressionCodec = "compression_codec"
    61  	KafkaCompressionLevel = "compression_level"
    62  
    63  	EnvKafkaEnable                   = "MINIO_NOTIFY_KAFKA_ENABLE"
    64  	EnvKafkaBrokers                  = "MINIO_NOTIFY_KAFKA_BROKERS"
    65  	EnvKafkaTopic                    = "MINIO_NOTIFY_KAFKA_TOPIC"
    66  	EnvKafkaQueueDir                 = "MINIO_NOTIFY_KAFKA_QUEUE_DIR"
    67  	EnvKafkaQueueLimit               = "MINIO_NOTIFY_KAFKA_QUEUE_LIMIT"
    68  	EnvKafkaTLS                      = "MINIO_NOTIFY_KAFKA_TLS"
    69  	EnvKafkaTLSSkipVerify            = "MINIO_NOTIFY_KAFKA_TLS_SKIP_VERIFY"
    70  	EnvKafkaTLSClientAuth            = "MINIO_NOTIFY_KAFKA_TLS_CLIENT_AUTH"
    71  	EnvKafkaSASLEnable               = "MINIO_NOTIFY_KAFKA_SASL"
    72  	EnvKafkaSASLUsername             = "MINIO_NOTIFY_KAFKA_SASL_USERNAME"
    73  	EnvKafkaSASLPassword             = "MINIO_NOTIFY_KAFKA_SASL_PASSWORD"
    74  	EnvKafkaSASLMechanism            = "MINIO_NOTIFY_KAFKA_SASL_MECHANISM"
    75  	EnvKafkaClientTLSCert            = "MINIO_NOTIFY_KAFKA_CLIENT_TLS_CERT"
    76  	EnvKafkaClientTLSKey             = "MINIO_NOTIFY_KAFKA_CLIENT_TLS_KEY"
    77  	EnvKafkaVersion                  = "MINIO_NOTIFY_KAFKA_VERSION"
    78  	EnvKafkaBatchSize                = "MINIO_NOTIFY_KAFKA_BATCH_SIZE"
    79  	EnvKafkaProducerCompressionCodec = "MINIO_NOTIFY_KAFKA_PRODUCER_COMPRESSION_CODEC"
    80  	EnvKafkaProducerCompressionLevel = "MINIO_NOTIFY_KAFKA_PRODUCER_COMPRESSION_LEVEL"
    81  )
    82  
    83  var codecs = map[string]sarama.CompressionCodec{
    84  	"none":   sarama.CompressionNone,
    85  	"gzip":   sarama.CompressionGZIP,
    86  	"snappy": sarama.CompressionSnappy,
    87  	"lz4":    sarama.CompressionLZ4,
    88  	"zstd":   sarama.CompressionZSTD,
    89  }
    90  
    91  // KafkaArgs - Kafka target arguments.
    92  type KafkaArgs struct {
    93  	Enable     bool        `json:"enable"`
    94  	Brokers    []xnet.Host `json:"brokers"`
    95  	Topic      string      `json:"topic"`
    96  	QueueDir   string      `json:"queueDir"`
    97  	QueueLimit uint64      `json:"queueLimit"`
    98  	Version    string      `json:"version"`
    99  	BatchSize  uint32      `json:"batchSize"`
   100  	TLS        struct {
   101  		Enable        bool               `json:"enable"`
   102  		RootCAs       *x509.CertPool     `json:"-"`
   103  		SkipVerify    bool               `json:"skipVerify"`
   104  		ClientAuth    tls.ClientAuthType `json:"clientAuth"`
   105  		ClientTLSCert string             `json:"clientTLSCert"`
   106  		ClientTLSKey  string             `json:"clientTLSKey"`
   107  	} `json:"tls"`
   108  	SASL struct {
   109  		Enable    bool   `json:"enable"`
   110  		User      string `json:"username"`
   111  		Password  string `json:"password"`
   112  		Mechanism string `json:"mechanism"`
   113  	} `json:"sasl"`
   114  	Producer struct {
   115  		Compression      string `json:"compression"`
   116  		CompressionLevel int    `json:"compressionLevel"`
   117  	} `json:"producer"`
   118  }
   119  
   120  // Validate KafkaArgs fields
   121  func (k KafkaArgs) Validate() error {
   122  	if !k.Enable {
   123  		return nil
   124  	}
   125  	if len(k.Brokers) == 0 {
   126  		return errors.New("no broker address found")
   127  	}
   128  	for _, b := range k.Brokers {
   129  		if _, err := xnet.ParseHost(b.String()); err != nil {
   130  			return err
   131  		}
   132  	}
   133  	if k.QueueDir != "" {
   134  		if !filepath.IsAbs(k.QueueDir) {
   135  			return errors.New("queueDir path should be absolute")
   136  		}
   137  	}
   138  	if k.Version != "" {
   139  		if _, err := sarama.ParseKafkaVersion(k.Version); err != nil {
   140  			return err
   141  		}
   142  	}
   143  	if k.BatchSize > 1 {
   144  		if k.QueueDir == "" {
   145  			return errors.New("batch should be enabled only if queue dir is enabled")
   146  		}
   147  	}
   148  	return nil
   149  }
   150  
   151  // KafkaTarget - Kafka target.
   152  type KafkaTarget struct {
   153  	initOnce once.Init
   154  
   155  	id         event.TargetID
   156  	args       KafkaArgs
   157  	client     sarama.Client
   158  	producer   sarama.SyncProducer
   159  	config     *sarama.Config
   160  	store      store.Store[event.Event]
   161  	batch      *store.Batch[string, *sarama.ProducerMessage]
   162  	loggerOnce logger.LogOnce
   163  	quitCh     chan struct{}
   164  }
   165  
   166  // ID - returns target ID.
   167  func (target *KafkaTarget) ID() event.TargetID {
   168  	return target.id
   169  }
   170  
   171  // Name - returns the Name of the target.
   172  func (target *KafkaTarget) Name() string {
   173  	return target.ID().String()
   174  }
   175  
   176  // Store returns any underlying store if set.
   177  func (target *KafkaTarget) Store() event.TargetStore {
   178  	return target.store
   179  }
   180  
   181  // IsActive - Return true if target is up and active
   182  func (target *KafkaTarget) IsActive() (bool, error) {
   183  	if err := target.init(); err != nil {
   184  		return false, err
   185  	}
   186  	return target.isActive()
   187  }
   188  
   189  func (target *KafkaTarget) isActive() (bool, error) {
   190  	// Refer https://github.com/IBM/sarama/issues/1341
   191  	brokers := target.client.Brokers()
   192  	if len(brokers) == 0 {
   193  		return false, store.ErrNotConnected
   194  	}
   195  	return true, nil
   196  }
   197  
   198  // Save - saves the events to the store which will be replayed when the Kafka connection is active.
   199  func (target *KafkaTarget) Save(eventData event.Event) error {
   200  	if target.store != nil {
   201  		return target.store.Put(eventData)
   202  	}
   203  	if err := target.init(); err != nil {
   204  		return err
   205  	}
   206  	return target.send(eventData)
   207  }
   208  
   209  // send - sends an event to the kafka.
   210  func (target *KafkaTarget) send(eventData event.Event) error {
   211  	if target.producer == nil {
   212  		return store.ErrNotConnected
   213  	}
   214  	msg, err := target.toProducerMessage(eventData)
   215  	if err != nil {
   216  		return err
   217  	}
   218  	_, _, err = target.producer.SendMessage(msg)
   219  	return err
   220  }
   221  
   222  // SendFromStore - reads an event from store and sends it to Kafka.
   223  func (target *KafkaTarget) SendFromStore(key store.Key) error {
   224  	if err := target.init(); err != nil {
   225  		return err
   226  	}
   227  
   228  	// If batch is enabled, the event will be batched in memory
   229  	// and will be committed once the batch is full.
   230  	if target.batch != nil {
   231  		return target.addToBatch(key)
   232  	}
   233  
   234  	eventData, eErr := target.store.Get(key.Name)
   235  	if eErr != nil {
   236  		// The last event key in a successful batch will be sent in the channel atmost once by the replayEvents()
   237  		// Such events will not exist and wouldve been already been sent successfully.
   238  		if os.IsNotExist(eErr) {
   239  			return nil
   240  		}
   241  		return eErr
   242  	}
   243  
   244  	if err := target.send(eventData); err != nil {
   245  		if isKafkaConnErr(err) {
   246  			return store.ErrNotConnected
   247  		}
   248  		return err
   249  	}
   250  
   251  	// Delete the event from store.
   252  	return target.store.Del(key.Name)
   253  }
   254  
   255  func (target *KafkaTarget) addToBatch(key store.Key) error {
   256  	if target.batch.IsFull() {
   257  		if err := target.commitBatch(); err != nil {
   258  			return err
   259  		}
   260  	}
   261  	if _, ok := target.batch.GetByKey(key.Name); !ok {
   262  		eventData, err := target.store.Get(key.Name)
   263  		if err != nil {
   264  			if os.IsNotExist(err) {
   265  				return nil
   266  			}
   267  			return err
   268  		}
   269  		msg, err := target.toProducerMessage(eventData)
   270  		if err != nil {
   271  			return err
   272  		}
   273  		if err = target.batch.Add(key.Name, msg); err != nil {
   274  			return err
   275  		}
   276  	}
   277  	// commit the batch if the key is the last one present in the store.
   278  	if key.IsLast || target.batch.IsFull() {
   279  		return target.commitBatch()
   280  	}
   281  	return nil
   282  }
   283  
   284  func (target *KafkaTarget) commitBatch() error {
   285  	keys, msgs, err := target.batch.GetAll()
   286  	if err != nil {
   287  		return err
   288  	}
   289  	if err = target.producer.SendMessages(msgs); err != nil {
   290  		if isKafkaConnErr(err) {
   291  			return store.ErrNotConnected
   292  		}
   293  		return err
   294  	}
   295  	return target.store.DelList(keys)
   296  }
   297  
   298  func (target *KafkaTarget) toProducerMessage(eventData event.Event) (*sarama.ProducerMessage, error) {
   299  	objectName, err := url.QueryUnescape(eventData.S3.Object.Key)
   300  	if err != nil {
   301  		return nil, err
   302  	}
   303  
   304  	key := eventData.S3.Bucket.Name + "/" + objectName
   305  	data, err := json.Marshal(event.Log{EventName: eventData.EventName, Key: key, Records: []event.Event{eventData}})
   306  	if err != nil {
   307  		return nil, err
   308  	}
   309  
   310  	return &sarama.ProducerMessage{
   311  		Topic: target.args.Topic,
   312  		Key:   sarama.StringEncoder(key),
   313  		Value: sarama.ByteEncoder(data),
   314  	}, nil
   315  }
   316  
   317  // Close - closes underneath kafka connection.
   318  func (target *KafkaTarget) Close() error {
   319  	close(target.quitCh)
   320  
   321  	if target.producer != nil {
   322  		target.producer.Close()
   323  		return target.client.Close()
   324  	}
   325  
   326  	return nil
   327  }
   328  
   329  func (target *KafkaTarget) init() error {
   330  	return target.initOnce.Do(target.initKafka)
   331  }
   332  
   333  func (target *KafkaTarget) initKafka() error {
   334  	args := target.args
   335  
   336  	config := sarama.NewConfig()
   337  	if args.Version != "" {
   338  		kafkaVersion, err := sarama.ParseKafkaVersion(args.Version)
   339  		if err != nil {
   340  			target.loggerOnce(context.Background(), err, target.ID().String())
   341  			return err
   342  		}
   343  		config.Version = kafkaVersion
   344  	}
   345  
   346  	config.Net.KeepAlive = 60 * time.Second
   347  	config.Net.SASL.User = args.SASL.User
   348  	config.Net.SASL.Password = args.SASL.Password
   349  	initScramClient(args, config) // initializes configured scram client.
   350  	config.Net.SASL.Enable = args.SASL.Enable
   351  
   352  	tlsConfig, err := saramatls.NewConfig(args.TLS.ClientTLSCert, args.TLS.ClientTLSKey)
   353  	if err != nil {
   354  		target.loggerOnce(context.Background(), err, target.ID().String())
   355  		return err
   356  	}
   357  
   358  	config.Net.TLS.Enable = args.TLS.Enable
   359  	config.Net.TLS.Config = tlsConfig
   360  	config.Net.TLS.Config.InsecureSkipVerify = args.TLS.SkipVerify
   361  	config.Net.TLS.Config.ClientAuth = args.TLS.ClientAuth
   362  	config.Net.TLS.Config.RootCAs = args.TLS.RootCAs
   363  
   364  	// These settings are needed to ensure that kafka client doesn't hang on brokers
   365  	// refer https://github.com/IBM/sarama/issues/765#issuecomment-254333355
   366  	config.Producer.Retry.Max = 2
   367  	config.Producer.Retry.Backoff = (1 * time.Second)
   368  	config.Producer.Return.Successes = true
   369  	config.Producer.Return.Errors = true
   370  	config.Producer.RequiredAcks = 1
   371  	config.Producer.Timeout = (5 * time.Second)
   372  	// Set Producer Compression
   373  	cc, ok := codecs[strings.ToLower(args.Producer.Compression)]
   374  	if ok {
   375  		config.Producer.Compression = cc
   376  		config.Producer.CompressionLevel = args.Producer.CompressionLevel
   377  	}
   378  
   379  	config.Net.ReadTimeout = (5 * time.Second)
   380  	config.Net.DialTimeout = (5 * time.Second)
   381  	config.Net.WriteTimeout = (5 * time.Second)
   382  	config.Metadata.Retry.Max = 1
   383  	config.Metadata.Retry.Backoff = (1 * time.Second)
   384  	config.Metadata.RefreshFrequency = (15 * time.Minute)
   385  
   386  	target.config = config
   387  
   388  	brokers := []string{}
   389  	for _, broker := range args.Brokers {
   390  		brokers = append(brokers, broker.String())
   391  	}
   392  
   393  	client, err := sarama.NewClient(brokers, config)
   394  	if err != nil {
   395  		if !errors.Is(err, sarama.ErrOutOfBrokers) {
   396  			target.loggerOnce(context.Background(), err, target.ID().String())
   397  		}
   398  		return err
   399  	}
   400  
   401  	producer, err := sarama.NewSyncProducerFromClient(client)
   402  	if err != nil {
   403  		if !errors.Is(err, sarama.ErrOutOfBrokers) {
   404  			target.loggerOnce(context.Background(), err, target.ID().String())
   405  		}
   406  		return err
   407  	}
   408  	target.client = client
   409  	target.producer = producer
   410  
   411  	yes, err := target.isActive()
   412  	if err != nil {
   413  		return err
   414  	}
   415  	if !yes {
   416  		return store.ErrNotConnected
   417  	}
   418  
   419  	return nil
   420  }
   421  
   422  // NewKafkaTarget - creates new Kafka target with auth credentials.
   423  func NewKafkaTarget(id string, args KafkaArgs, loggerOnce logger.LogOnce) (*KafkaTarget, error) {
   424  	var queueStore store.Store[event.Event]
   425  	if args.QueueDir != "" {
   426  		queueDir := filepath.Join(args.QueueDir, storePrefix+"-kafka-"+id)
   427  		queueStore = store.NewQueueStore[event.Event](queueDir, args.QueueLimit, event.StoreExtension)
   428  		if err := queueStore.Open(); err != nil {
   429  			return nil, fmt.Errorf("unable to initialize the queue store of Kafka `%s`: %w", id, err)
   430  		}
   431  	}
   432  
   433  	target := &KafkaTarget{
   434  		id:         event.TargetID{ID: id, Name: "kafka"},
   435  		args:       args,
   436  		store:      queueStore,
   437  		loggerOnce: loggerOnce,
   438  		quitCh:     make(chan struct{}),
   439  	}
   440  
   441  	if target.store != nil {
   442  		if args.BatchSize > 1 {
   443  			target.batch = store.NewBatch[string, *sarama.ProducerMessage](args.BatchSize)
   444  		}
   445  		store.StreamItems(target.store, target, target.quitCh, target.loggerOnce)
   446  	}
   447  
   448  	return target, nil
   449  }
   450  
   451  func isKafkaConnErr(err error) bool {
   452  	// Sarama opens the circuit breaker after 3 consecutive connection failures.
   453  	return err == sarama.ErrLeaderNotAvailable || err.Error() == "circuit breaker is open"
   454  }