github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/event/target/kafka.go (about) 1 // Copyright (c) 2015-2023 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package target 19 20 import ( 21 "context" 22 "crypto/tls" 23 "crypto/x509" 24 "encoding/json" 25 "errors" 26 "fmt" 27 "net/url" 28 "os" 29 "path/filepath" 30 "strings" 31 "time" 32 33 "github.com/minio/minio/internal/event" 34 "github.com/minio/minio/internal/logger" 35 "github.com/minio/minio/internal/once" 36 "github.com/minio/minio/internal/store" 37 xnet "github.com/minio/pkg/v2/net" 38 39 "github.com/IBM/sarama" 40 saramatls "github.com/IBM/sarama/tools/tls" 41 ) 42 43 // Kafka input constants 44 const ( 45 KafkaBrokers = "brokers" 46 KafkaTopic = "topic" 47 KafkaQueueDir = "queue_dir" 48 KafkaQueueLimit = "queue_limit" 49 KafkaTLS = "tls" 50 KafkaTLSSkipVerify = "tls_skip_verify" 51 KafkaTLSClientAuth = "tls_client_auth" 52 KafkaSASL = "sasl" 53 KafkaSASLUsername = "sasl_username" 54 KafkaSASLPassword = "sasl_password" 55 KafkaSASLMechanism = "sasl_mechanism" 56 KafkaClientTLSCert = "client_tls_cert" 57 KafkaClientTLSKey = "client_tls_key" 58 KafkaVersion = "version" 59 KafkaBatchSize = "batch_size" 60 KafkaCompressionCodec = "compression_codec" 61 KafkaCompressionLevel = "compression_level" 62 63 EnvKafkaEnable = "MINIO_NOTIFY_KAFKA_ENABLE" 64 EnvKafkaBrokers = "MINIO_NOTIFY_KAFKA_BROKERS" 65 EnvKafkaTopic = "MINIO_NOTIFY_KAFKA_TOPIC" 66 EnvKafkaQueueDir = "MINIO_NOTIFY_KAFKA_QUEUE_DIR" 67 EnvKafkaQueueLimit = "MINIO_NOTIFY_KAFKA_QUEUE_LIMIT" 68 EnvKafkaTLS = "MINIO_NOTIFY_KAFKA_TLS" 69 EnvKafkaTLSSkipVerify = "MINIO_NOTIFY_KAFKA_TLS_SKIP_VERIFY" 70 EnvKafkaTLSClientAuth = "MINIO_NOTIFY_KAFKA_TLS_CLIENT_AUTH" 71 EnvKafkaSASLEnable = "MINIO_NOTIFY_KAFKA_SASL" 72 EnvKafkaSASLUsername = "MINIO_NOTIFY_KAFKA_SASL_USERNAME" 73 EnvKafkaSASLPassword = "MINIO_NOTIFY_KAFKA_SASL_PASSWORD" 74 EnvKafkaSASLMechanism = "MINIO_NOTIFY_KAFKA_SASL_MECHANISM" 75 EnvKafkaClientTLSCert = "MINIO_NOTIFY_KAFKA_CLIENT_TLS_CERT" 76 EnvKafkaClientTLSKey = "MINIO_NOTIFY_KAFKA_CLIENT_TLS_KEY" 77 EnvKafkaVersion = "MINIO_NOTIFY_KAFKA_VERSION" 78 EnvKafkaBatchSize = "MINIO_NOTIFY_KAFKA_BATCH_SIZE" 79 EnvKafkaProducerCompressionCodec = "MINIO_NOTIFY_KAFKA_PRODUCER_COMPRESSION_CODEC" 80 EnvKafkaProducerCompressionLevel = "MINIO_NOTIFY_KAFKA_PRODUCER_COMPRESSION_LEVEL" 81 ) 82 83 var codecs = map[string]sarama.CompressionCodec{ 84 "none": sarama.CompressionNone, 85 "gzip": sarama.CompressionGZIP, 86 "snappy": sarama.CompressionSnappy, 87 "lz4": sarama.CompressionLZ4, 88 "zstd": sarama.CompressionZSTD, 89 } 90 91 // KafkaArgs - Kafka target arguments. 92 type KafkaArgs struct { 93 Enable bool `json:"enable"` 94 Brokers []xnet.Host `json:"brokers"` 95 Topic string `json:"topic"` 96 QueueDir string `json:"queueDir"` 97 QueueLimit uint64 `json:"queueLimit"` 98 Version string `json:"version"` 99 BatchSize uint32 `json:"batchSize"` 100 TLS struct { 101 Enable bool `json:"enable"` 102 RootCAs *x509.CertPool `json:"-"` 103 SkipVerify bool `json:"skipVerify"` 104 ClientAuth tls.ClientAuthType `json:"clientAuth"` 105 ClientTLSCert string `json:"clientTLSCert"` 106 ClientTLSKey string `json:"clientTLSKey"` 107 } `json:"tls"` 108 SASL struct { 109 Enable bool `json:"enable"` 110 User string `json:"username"` 111 Password string `json:"password"` 112 Mechanism string `json:"mechanism"` 113 } `json:"sasl"` 114 Producer struct { 115 Compression string `json:"compression"` 116 CompressionLevel int `json:"compressionLevel"` 117 } `json:"producer"` 118 } 119 120 // Validate KafkaArgs fields 121 func (k KafkaArgs) Validate() error { 122 if !k.Enable { 123 return nil 124 } 125 if len(k.Brokers) == 0 { 126 return errors.New("no broker address found") 127 } 128 for _, b := range k.Brokers { 129 if _, err := xnet.ParseHost(b.String()); err != nil { 130 return err 131 } 132 } 133 if k.QueueDir != "" { 134 if !filepath.IsAbs(k.QueueDir) { 135 return errors.New("queueDir path should be absolute") 136 } 137 } 138 if k.Version != "" { 139 if _, err := sarama.ParseKafkaVersion(k.Version); err != nil { 140 return err 141 } 142 } 143 if k.BatchSize > 1 { 144 if k.QueueDir == "" { 145 return errors.New("batch should be enabled only if queue dir is enabled") 146 } 147 } 148 return nil 149 } 150 151 // KafkaTarget - Kafka target. 152 type KafkaTarget struct { 153 initOnce once.Init 154 155 id event.TargetID 156 args KafkaArgs 157 client sarama.Client 158 producer sarama.SyncProducer 159 config *sarama.Config 160 store store.Store[event.Event] 161 batch *store.Batch[string, *sarama.ProducerMessage] 162 loggerOnce logger.LogOnce 163 quitCh chan struct{} 164 } 165 166 // ID - returns target ID. 167 func (target *KafkaTarget) ID() event.TargetID { 168 return target.id 169 } 170 171 // Name - returns the Name of the target. 172 func (target *KafkaTarget) Name() string { 173 return target.ID().String() 174 } 175 176 // Store returns any underlying store if set. 177 func (target *KafkaTarget) Store() event.TargetStore { 178 return target.store 179 } 180 181 // IsActive - Return true if target is up and active 182 func (target *KafkaTarget) IsActive() (bool, error) { 183 if err := target.init(); err != nil { 184 return false, err 185 } 186 return target.isActive() 187 } 188 189 func (target *KafkaTarget) isActive() (bool, error) { 190 // Refer https://github.com/IBM/sarama/issues/1341 191 brokers := target.client.Brokers() 192 if len(brokers) == 0 { 193 return false, store.ErrNotConnected 194 } 195 return true, nil 196 } 197 198 // Save - saves the events to the store which will be replayed when the Kafka connection is active. 199 func (target *KafkaTarget) Save(eventData event.Event) error { 200 if target.store != nil { 201 return target.store.Put(eventData) 202 } 203 if err := target.init(); err != nil { 204 return err 205 } 206 return target.send(eventData) 207 } 208 209 // send - sends an event to the kafka. 210 func (target *KafkaTarget) send(eventData event.Event) error { 211 if target.producer == nil { 212 return store.ErrNotConnected 213 } 214 msg, err := target.toProducerMessage(eventData) 215 if err != nil { 216 return err 217 } 218 _, _, err = target.producer.SendMessage(msg) 219 return err 220 } 221 222 // SendFromStore - reads an event from store and sends it to Kafka. 223 func (target *KafkaTarget) SendFromStore(key store.Key) error { 224 if err := target.init(); err != nil { 225 return err 226 } 227 228 // If batch is enabled, the event will be batched in memory 229 // and will be committed once the batch is full. 230 if target.batch != nil { 231 return target.addToBatch(key) 232 } 233 234 eventData, eErr := target.store.Get(key.Name) 235 if eErr != nil { 236 // The last event key in a successful batch will be sent in the channel atmost once by the replayEvents() 237 // Such events will not exist and wouldve been already been sent successfully. 238 if os.IsNotExist(eErr) { 239 return nil 240 } 241 return eErr 242 } 243 244 if err := target.send(eventData); err != nil { 245 if isKafkaConnErr(err) { 246 return store.ErrNotConnected 247 } 248 return err 249 } 250 251 // Delete the event from store. 252 return target.store.Del(key.Name) 253 } 254 255 func (target *KafkaTarget) addToBatch(key store.Key) error { 256 if target.batch.IsFull() { 257 if err := target.commitBatch(); err != nil { 258 return err 259 } 260 } 261 if _, ok := target.batch.GetByKey(key.Name); !ok { 262 eventData, err := target.store.Get(key.Name) 263 if err != nil { 264 if os.IsNotExist(err) { 265 return nil 266 } 267 return err 268 } 269 msg, err := target.toProducerMessage(eventData) 270 if err != nil { 271 return err 272 } 273 if err = target.batch.Add(key.Name, msg); err != nil { 274 return err 275 } 276 } 277 // commit the batch if the key is the last one present in the store. 278 if key.IsLast || target.batch.IsFull() { 279 return target.commitBatch() 280 } 281 return nil 282 } 283 284 func (target *KafkaTarget) commitBatch() error { 285 keys, msgs, err := target.batch.GetAll() 286 if err != nil { 287 return err 288 } 289 if err = target.producer.SendMessages(msgs); err != nil { 290 if isKafkaConnErr(err) { 291 return store.ErrNotConnected 292 } 293 return err 294 } 295 return target.store.DelList(keys) 296 } 297 298 func (target *KafkaTarget) toProducerMessage(eventData event.Event) (*sarama.ProducerMessage, error) { 299 objectName, err := url.QueryUnescape(eventData.S3.Object.Key) 300 if err != nil { 301 return nil, err 302 } 303 304 key := eventData.S3.Bucket.Name + "/" + objectName 305 data, err := json.Marshal(event.Log{EventName: eventData.EventName, Key: key, Records: []event.Event{eventData}}) 306 if err != nil { 307 return nil, err 308 } 309 310 return &sarama.ProducerMessage{ 311 Topic: target.args.Topic, 312 Key: sarama.StringEncoder(key), 313 Value: sarama.ByteEncoder(data), 314 }, nil 315 } 316 317 // Close - closes underneath kafka connection. 318 func (target *KafkaTarget) Close() error { 319 close(target.quitCh) 320 321 if target.producer != nil { 322 target.producer.Close() 323 return target.client.Close() 324 } 325 326 return nil 327 } 328 329 func (target *KafkaTarget) init() error { 330 return target.initOnce.Do(target.initKafka) 331 } 332 333 func (target *KafkaTarget) initKafka() error { 334 args := target.args 335 336 config := sarama.NewConfig() 337 if args.Version != "" { 338 kafkaVersion, err := sarama.ParseKafkaVersion(args.Version) 339 if err != nil { 340 target.loggerOnce(context.Background(), err, target.ID().String()) 341 return err 342 } 343 config.Version = kafkaVersion 344 } 345 346 config.Net.KeepAlive = 60 * time.Second 347 config.Net.SASL.User = args.SASL.User 348 config.Net.SASL.Password = args.SASL.Password 349 initScramClient(args, config) // initializes configured scram client. 350 config.Net.SASL.Enable = args.SASL.Enable 351 352 tlsConfig, err := saramatls.NewConfig(args.TLS.ClientTLSCert, args.TLS.ClientTLSKey) 353 if err != nil { 354 target.loggerOnce(context.Background(), err, target.ID().String()) 355 return err 356 } 357 358 config.Net.TLS.Enable = args.TLS.Enable 359 config.Net.TLS.Config = tlsConfig 360 config.Net.TLS.Config.InsecureSkipVerify = args.TLS.SkipVerify 361 config.Net.TLS.Config.ClientAuth = args.TLS.ClientAuth 362 config.Net.TLS.Config.RootCAs = args.TLS.RootCAs 363 364 // These settings are needed to ensure that kafka client doesn't hang on brokers 365 // refer https://github.com/IBM/sarama/issues/765#issuecomment-254333355 366 config.Producer.Retry.Max = 2 367 config.Producer.Retry.Backoff = (1 * time.Second) 368 config.Producer.Return.Successes = true 369 config.Producer.Return.Errors = true 370 config.Producer.RequiredAcks = 1 371 config.Producer.Timeout = (5 * time.Second) 372 // Set Producer Compression 373 cc, ok := codecs[strings.ToLower(args.Producer.Compression)] 374 if ok { 375 config.Producer.Compression = cc 376 config.Producer.CompressionLevel = args.Producer.CompressionLevel 377 } 378 379 config.Net.ReadTimeout = (5 * time.Second) 380 config.Net.DialTimeout = (5 * time.Second) 381 config.Net.WriteTimeout = (5 * time.Second) 382 config.Metadata.Retry.Max = 1 383 config.Metadata.Retry.Backoff = (1 * time.Second) 384 config.Metadata.RefreshFrequency = (15 * time.Minute) 385 386 target.config = config 387 388 brokers := []string{} 389 for _, broker := range args.Brokers { 390 brokers = append(brokers, broker.String()) 391 } 392 393 client, err := sarama.NewClient(brokers, config) 394 if err != nil { 395 if !errors.Is(err, sarama.ErrOutOfBrokers) { 396 target.loggerOnce(context.Background(), err, target.ID().String()) 397 } 398 return err 399 } 400 401 producer, err := sarama.NewSyncProducerFromClient(client) 402 if err != nil { 403 if !errors.Is(err, sarama.ErrOutOfBrokers) { 404 target.loggerOnce(context.Background(), err, target.ID().String()) 405 } 406 return err 407 } 408 target.client = client 409 target.producer = producer 410 411 yes, err := target.isActive() 412 if err != nil { 413 return err 414 } 415 if !yes { 416 return store.ErrNotConnected 417 } 418 419 return nil 420 } 421 422 // NewKafkaTarget - creates new Kafka target with auth credentials. 423 func NewKafkaTarget(id string, args KafkaArgs, loggerOnce logger.LogOnce) (*KafkaTarget, error) { 424 var queueStore store.Store[event.Event] 425 if args.QueueDir != "" { 426 queueDir := filepath.Join(args.QueueDir, storePrefix+"-kafka-"+id) 427 queueStore = store.NewQueueStore[event.Event](queueDir, args.QueueLimit, event.StoreExtension) 428 if err := queueStore.Open(); err != nil { 429 return nil, fmt.Errorf("unable to initialize the queue store of Kafka `%s`: %w", id, err) 430 } 431 } 432 433 target := &KafkaTarget{ 434 id: event.TargetID{ID: id, Name: "kafka"}, 435 args: args, 436 store: queueStore, 437 loggerOnce: loggerOnce, 438 quitCh: make(chan struct{}), 439 } 440 441 if target.store != nil { 442 if args.BatchSize > 1 { 443 target.batch = store.NewBatch[string, *sarama.ProducerMessage](args.BatchSize) 444 } 445 store.StreamItems(target.store, target, target.quitCh, target.loggerOnce) 446 } 447 448 return target, nil 449 } 450 451 func isKafkaConnErr(err error) bool { 452 // Sarama opens the circuit breaker after 3 consecutive connection failures. 453 return err == sarama.ErrLeaderNotAvailable || err.Error() == "circuit breaker is open" 454 }