github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/kafka/consumergroup/consumer_group.go (about) 1 package consumergroup 2 3 import ( 4 "errors" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/wvanbergen/kazoo-go" 10 "gopkg.in/Shopify/sarama.v1" 11 ) 12 13 var ( 14 AlreadyClosing = errors.New("The consumer group is already shutting down.") 15 ) 16 17 type OffsetConfig struct { 18 Initial int64 // The initial offset method to use if the consumer has no previously stored offset. Must be either sarama.OffsetOldest (default) or sarama.OffsetNewest. 19 ProcessingTimeout time.Duration // Time to wait for all the offsets for a partition to be processed after stopping to consume from it. Defaults to 1 minute. 20 CommitInterval time.Duration // The interval between which the processed offsets are commited. 21 ResetOffsets bool // Resets the offsets for the consumergroup so that it won't resume from where it left off previously. 22 } 23 24 type Config struct { 25 *sarama.Config 26 27 Zookeeper *kazoo.Config 28 29 Offsets OffsetConfig 30 } 31 32 func NewConfig() *Config { 33 config := &Config{} 34 config.Config = sarama.NewConfig() 35 config.Zookeeper = kazoo.NewConfig() 36 config.Offsets.Initial = sarama.OffsetOldest 37 config.Offsets.ProcessingTimeout = 60 * time.Second 38 config.Offsets.CommitInterval = 10 * time.Second 39 40 return config 41 } 42 43 func (cgc *Config) Validate() error { 44 if cgc.Zookeeper.Timeout <= 0 { 45 return sarama.ConfigurationError("ZookeeperTimeout should have a duration > 0") 46 } 47 48 if cgc.Offsets.CommitInterval < 0 { 49 return sarama.ConfigurationError("CommitInterval should have a duration >= 0") 50 } 51 52 if cgc.Offsets.Initial != sarama.OffsetOldest && cgc.Offsets.Initial != sarama.OffsetNewest { 53 return errors.New("Offsets.Initial should be sarama.OffsetOldest or sarama.OffsetNewest.") 54 } 55 56 if cgc.Config != nil { 57 if err := cgc.Config.Validate(); err != nil { 58 return err 59 } 60 } 61 62 return nil 63 } 64 65 // The ConsumerGroup type holds all the information for a consumer that is part 66 // of a consumer group. Call JoinConsumerGroup to start a consumer. 67 type ConsumerGroup struct { 68 config *Config 69 70 consumer sarama.Consumer 71 kazoo *kazoo.Kazoo 72 group *kazoo.Consumergroup 73 instance *kazoo.ConsumergroupInstance 74 75 wg sync.WaitGroup 76 singleShutdown sync.Once 77 78 messages chan *sarama.ConsumerMessage 79 errors chan error 80 stopper chan struct{} 81 82 consumers kazoo.ConsumergroupInstanceList 83 84 offsetManager OffsetManager 85 } 86 87 // Connects to a consumer group, using Zookeeper for auto-discovery 88 func JoinConsumerGroup(name string, topics []string, zookeeper []string, config *Config) (cg *ConsumerGroup, err error) { 89 90 if name == "" { 91 return nil, sarama.ConfigurationError("Empty consumergroup name") 92 } 93 94 if len(topics) == 0 { 95 return nil, sarama.ConfigurationError("No topics provided") 96 } 97 98 if len(zookeeper) == 0 { 99 return nil, errors.New("You need to provide at least one zookeeper node address!") 100 } 101 102 if config == nil { 103 config = NewConfig() 104 } 105 config.ClientID = name 106 107 // Validate configuration 108 if err = config.Validate(); err != nil { 109 return 110 } 111 112 var kz *kazoo.Kazoo 113 if kz, err = kazoo.NewKazoo(zookeeper, config.Zookeeper); err != nil { 114 return 115 } 116 117 brokers, err := kz.BrokerList() 118 if err != nil { 119 kz.Close() 120 return 121 } 122 123 group := kz.Consumergroup(name) 124 125 if config.Offsets.ResetOffsets { 126 err = group.ResetOffsets() 127 if err != nil { 128 kz.Close() 129 return 130 } 131 } 132 133 instance := group.NewInstance() 134 135 var consumer sarama.Consumer 136 if consumer, err = sarama.NewConsumer(brokers, config.Config); err != nil { 137 kz.Close() 138 return 139 } 140 141 cg = &ConsumerGroup{ 142 config: config, 143 consumer: consumer, 144 145 kazoo: kz, 146 group: group, 147 instance: instance, 148 149 messages: make(chan *sarama.ConsumerMessage, config.ChannelBufferSize), 150 errors: make(chan error, config.ChannelBufferSize), 151 stopper: make(chan struct{}), 152 } 153 154 // Register consumer group 155 if exists, err := cg.group.Exists(); err != nil { 156 cg.Logf("FAILED to check for existence of consumergroup: %s!\n", err) 157 _ = consumer.Close() 158 _ = kz.Close() 159 return nil, err 160 } else if !exists { 161 cg.Logf("Consumergroup `%s` does not yet exists, creating...\n", cg.group.Name) 162 if err := cg.group.Create(); err != nil { 163 cg.Logf("FAILED to create consumergroup in Zookeeper: %s!\n", err) 164 _ = consumer.Close() 165 _ = kz.Close() 166 return nil, err 167 } 168 } 169 170 // Register itself with zookeeper 171 if err := cg.instance.Register(topics); err != nil { 172 cg.Logf("FAILED to register consumer instance: %s!\n", err) 173 return nil, err 174 } else { 175 cg.Logf("Consumer instance registered (%s).", cg.instance.ID) 176 } 177 178 offsetConfig := OffsetManagerConfig{CommitInterval: config.Offsets.CommitInterval} 179 cg.offsetManager = NewZookeeperOffsetManager(cg, &offsetConfig) 180 181 go cg.topicListConsumer(topics) 182 183 return 184 } 185 186 // Returns a channel that you can read to obtain events from Kafka to process. 187 func (cg *ConsumerGroup) Messages() <-chan *sarama.ConsumerMessage { 188 return cg.messages 189 } 190 191 // Returns a channel that you can read to obtain events from Kafka to process. 192 func (cg *ConsumerGroup) Errors() <-chan error { 193 return cg.errors 194 } 195 196 func (cg *ConsumerGroup) Closed() bool { 197 return cg.instance == nil 198 } 199 200 func (cg *ConsumerGroup) Close() error { 201 shutdownError := AlreadyClosing 202 cg.singleShutdown.Do(func() { 203 defer cg.kazoo.Close() 204 205 shutdownError = nil 206 207 close(cg.stopper) 208 cg.wg.Wait() 209 210 if err := cg.offsetManager.Close(); err != nil { 211 cg.Logf("FAILED closing the offset manager: %s!\n", err) 212 } 213 214 if shutdownError = cg.instance.Deregister(); shutdownError != nil { 215 cg.Logf("FAILED deregistering consumer instance: %s!\n", shutdownError) 216 } else { 217 cg.Logf("Deregistered consumer instance %s.\n", cg.instance.ID) 218 } 219 220 if shutdownError = cg.consumer.Close(); shutdownError != nil { 221 cg.Logf("FAILED closing the Sarama client: %s\n", shutdownError) 222 } 223 224 close(cg.messages) 225 close(cg.errors) 226 cg.instance = nil 227 }) 228 229 return shutdownError 230 } 231 232 func (cg *ConsumerGroup) Logf(format string, args ...interface{}) { 233 var identifier string 234 if cg.instance == nil { 235 identifier = "(defunct)" 236 } else { 237 identifier = cg.instance.ID[len(cg.instance.ID)-12:] 238 } 239 sarama.Logger.Printf("[%s/%s] %s", cg.group.Name, identifier, fmt.Sprintf(format, args...)) 240 } 241 242 func (cg *ConsumerGroup) InstanceRegistered() (bool, error) { 243 return cg.instance.Registered() 244 } 245 246 func (cg *ConsumerGroup) CommitUpto(message *sarama.ConsumerMessage) error { 247 cg.offsetManager.MarkAsProcessed(message.Topic, message.Partition, message.Offset) 248 return nil 249 } 250 251 func (cg *ConsumerGroup) FlushOffsets() error { 252 return cg.offsetManager.Flush() 253 } 254 255 func (cg *ConsumerGroup) topicListConsumer(topics []string) { 256 for { 257 select { 258 case <-cg.stopper: 259 return 260 default: 261 } 262 263 consumers, consumerChanges, err := cg.group.WatchInstances() 264 if err != nil { 265 cg.Logf("FAILED to get list of registered consumer instances: %s\n", err) 266 return 267 } 268 269 cg.consumers = consumers 270 cg.Logf("Currently registered consumers: %d\n", len(cg.consumers)) 271 272 stopper := make(chan struct{}) 273 274 for _, topic := range topics { 275 cg.wg.Add(1) 276 go cg.topicConsumer(topic, cg.messages, cg.errors, stopper) 277 } 278 279 select { 280 case <-cg.stopper: 281 close(stopper) 282 return 283 284 case <-consumerChanges: 285 registered, err := cg.instance.Registered() 286 if err != nil { 287 cg.Logf("FAILED to get register status: %s\n", err) 288 } else if !registered { 289 err = cg.instance.Register(topics) 290 if err != nil { 291 cg.Logf("FAILED to register consumer instance: %s!\n", err) 292 } else { 293 cg.Logf("Consumer instance registered (%s).", cg.instance.ID) 294 } 295 } 296 297 cg.Logf("Triggering rebalance due to consumer list change\n") 298 close(stopper) 299 cg.wg.Wait() 300 } 301 } 302 } 303 304 func (cg *ConsumerGroup) topicConsumer(topic string, messages chan<- *sarama.ConsumerMessage, errors chan<- error, stopper <-chan struct{}) { 305 defer cg.wg.Done() 306 307 select { 308 case <-stopper: 309 return 310 default: 311 } 312 313 cg.Logf("%s :: Started topic consumer\n", topic) 314 315 // Fetch a list of partition IDs 316 partitions, err := cg.kazoo.Topic(topic).Partitions() 317 if err != nil { 318 cg.Logf("%s :: FAILED to get list of partitions: %s\n", topic, err) 319 cg.errors <- &sarama.ConsumerError{ 320 Topic: topic, 321 Partition: -1, 322 Err: err, 323 } 324 return 325 } 326 327 partitionLeaders, err := retrievePartitionLeaders(partitions) 328 if err != nil { 329 cg.Logf("%s :: FAILED to get leaders of partitions: %s\n", topic, err) 330 cg.errors <- &sarama.ConsumerError{ 331 Topic: topic, 332 Partition: -1, 333 Err: err, 334 } 335 return 336 } 337 338 dividedPartitions := dividePartitionsBetweenConsumers(cg.consumers, partitionLeaders) 339 myPartitions := dividedPartitions[cg.instance.ID] 340 cg.Logf("%s :: Claiming %d of %d partitions", topic, len(myPartitions), len(partitionLeaders)) 341 342 // Consume all the assigned partitions 343 var wg sync.WaitGroup 344 for _, pid := range myPartitions { 345 346 wg.Add(1) 347 go cg.partitionConsumer(topic, pid.ID, messages, errors, &wg, stopper) 348 } 349 350 wg.Wait() 351 cg.Logf("%s :: Stopped topic consumer\n", topic) 352 } 353 354 func (cg *ConsumerGroup) consumePartition(topic string, partition int32, nextOffset int64) (sarama.PartitionConsumer, error) { 355 consumer, err := cg.consumer.ConsumePartition(topic, partition, nextOffset) 356 if err == sarama.ErrOffsetOutOfRange { 357 cg.Logf("%s/%d :: Partition consumer offset out of Range.\n", topic, partition) 358 // if the offset is out of range, simplistically decide whether to use OffsetNewest or OffsetOldest 359 // if the configuration specified offsetOldest, then switch to the oldest available offset, else 360 // switch to the newest available offset. 361 if cg.config.Offsets.Initial == sarama.OffsetOldest { 362 nextOffset = sarama.OffsetOldest 363 cg.Logf("%s/%d :: Partition consumer offset reset to oldest available offset.\n", topic, partition) 364 } else { 365 nextOffset = sarama.OffsetNewest 366 cg.Logf("%s/%d :: Partition consumer offset reset to newest available offset.\n", topic, partition) 367 } 368 // retry the consumePartition with the adjusted offset 369 consumer, err = cg.consumer.ConsumePartition(topic, partition, nextOffset) 370 } 371 if err != nil { 372 cg.Logf("%s/%d :: FAILED to start partition consumer: %s\n", topic, partition, err) 373 return nil, err 374 } 375 return consumer, err 376 } 377 378 // Consumes a partition 379 func (cg *ConsumerGroup) partitionConsumer(topic string, partition int32, messages chan<- *sarama.ConsumerMessage, errors chan<- error, wg *sync.WaitGroup, stopper <-chan struct{}) { 380 defer wg.Done() 381 382 select { 383 case <-stopper: 384 return 385 default: 386 } 387 388 // Since ProcessingTimeout is the amount of time we'll wait for the final batch 389 // of messages to be processed before releasing a partition, we need to wait slightly 390 // longer than that before timing out here to ensure that another consumer has had 391 // enough time to release the partition. Hence, +2 seconds. 392 maxRetries := int(cg.config.Offsets.ProcessingTimeout/time.Second) + 2 393 for tries := 0; tries < maxRetries; tries++ { 394 if err := cg.instance.ClaimPartition(topic, partition); err == nil { 395 break 396 } else if tries+1 < maxRetries { 397 if err == kazoo.ErrPartitionClaimedByOther { 398 // Another consumer still owns this partition. We should wait longer for it to release it. 399 time.Sleep(1 * time.Second) 400 } else { 401 // An unexpected error occurred. Log it and continue trying until we hit the timeout. 402 cg.Logf("%s/%d :: FAILED to claim partition on attempt %v of %v; retrying in 1 second. Error: %v", topic, partition, tries+1, maxRetries, err) 403 time.Sleep(1 * time.Second) 404 } 405 } else { 406 cg.Logf("%s/%d :: FAILED to claim the partition: %s\n", topic, partition, err) 407 cg.errors <- &sarama.ConsumerError{ 408 Topic: topic, 409 Partition: partition, 410 Err: err, 411 } 412 return 413 } 414 } 415 416 defer func() { 417 err := cg.instance.ReleasePartition(topic, partition) 418 if err != nil { 419 cg.Logf("%s/%d :: FAILED to release partition: %s\n", topic, partition, err) 420 cg.errors <- &sarama.ConsumerError{ 421 Topic: topic, 422 Partition: partition, 423 Err: err, 424 } 425 } 426 }() 427 428 nextOffset, err := cg.offsetManager.InitializePartition(topic, partition) 429 if err != nil { 430 cg.Logf("%s/%d :: FAILED to determine initial offset: %s\n", topic, partition, err) 431 return 432 } 433 434 if nextOffset >= 0 { 435 cg.Logf("%s/%d :: Partition consumer starting at offset %d.\n", topic, partition, nextOffset) 436 } else { 437 nextOffset = cg.config.Offsets.Initial 438 if nextOffset == sarama.OffsetOldest { 439 cg.Logf("%s/%d :: Partition consumer starting at the oldest available offset.\n", topic, partition) 440 } else if nextOffset == sarama.OffsetNewest { 441 cg.Logf("%s/%d :: Partition consumer listening for new messages only.\n", topic, partition) 442 } 443 } 444 445 consumer, err := cg.consumePartition(topic, partition, nextOffset) 446 447 if err != nil { 448 cg.Logf("%s/%d :: FAILED to start partition consumer: %s\n", topic, partition, err) 449 return 450 } 451 452 defer consumer.Close() 453 454 err = nil 455 var lastOffset int64 = -1 // aka unknown 456 partitionConsumerLoop: 457 for { 458 select { 459 case <-stopper: 460 break partitionConsumerLoop 461 462 case err := <-consumer.Errors(): 463 if err == nil { 464 cg.Logf("%s/%d :: Consumer encountered an invalid state: re-establishing consumption of partition.\n", topic, partition) 465 466 // Errors encountered (if any) are logged in the consumerPartition function 467 var cErr error 468 consumer, cErr = cg.consumePartition(topic, partition, lastOffset) 469 if cErr != nil { 470 break partitionConsumerLoop 471 } 472 continue partitionConsumerLoop 473 } 474 475 for { 476 select { 477 case errors <- err: 478 continue partitionConsumerLoop 479 480 case <-stopper: 481 break partitionConsumerLoop 482 } 483 } 484 485 case message := <-consumer.Messages(): 486 if message == nil { 487 cg.Logf("%s/%d :: Consumer encountered an invalid state: re-establishing consumption of partition.\n", topic, partition) 488 489 // Errors encountered (if any) are logged in the consumerPartition function 490 var cErr error 491 consumer, cErr = cg.consumePartition(topic, partition, lastOffset) 492 if cErr != nil { 493 break partitionConsumerLoop 494 } 495 continue partitionConsumerLoop 496 497 } 498 499 for { 500 select { 501 case <-stopper: 502 break partitionConsumerLoop 503 504 case messages <- message: 505 lastOffset = message.Offset 506 continue partitionConsumerLoop 507 } 508 } 509 } 510 } 511 512 cg.Logf("%s/%d :: Stopping partition consumer at offset %d\n", topic, partition, lastOffset) 513 if err := cg.offsetManager.FinalizePartition(topic, partition, lastOffset, cg.config.Offsets.ProcessingTimeout); err != nil { 514 cg.Logf("%s/%d :: %s\n", topic, partition, err) 515 } 516 }