github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/kafka/consumergroup/consumer_group.go (about)

     1  package consumergroup
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/wvanbergen/kazoo-go"
    10  	"gopkg.in/Shopify/sarama.v1"
    11  )
    12  
    13  var (
    14  	AlreadyClosing = errors.New("The consumer group is already shutting down.")
    15  )
    16  
    17  type OffsetConfig struct {
    18  	Initial           int64         // The initial offset method to use if the consumer has no previously stored offset. Must be either sarama.OffsetOldest (default) or sarama.OffsetNewest.
    19  	ProcessingTimeout time.Duration // Time to wait for all the offsets for a partition to be processed after stopping to consume from it. Defaults to 1 minute.
    20  	CommitInterval    time.Duration // The interval between which the processed offsets are commited.
    21  	ResetOffsets      bool          // Resets the offsets for the consumergroup so that it won't resume from where it left off previously.
    22  }
    23  
    24  type Config struct {
    25  	*sarama.Config
    26  
    27  	Zookeeper *kazoo.Config
    28  
    29  	Offsets OffsetConfig
    30  }
    31  
    32  func NewConfig() *Config {
    33  	config := &Config{}
    34  	config.Config = sarama.NewConfig()
    35  	config.Zookeeper = kazoo.NewConfig()
    36  	config.Offsets.Initial = sarama.OffsetOldest
    37  	config.Offsets.ProcessingTimeout = 60 * time.Second
    38  	config.Offsets.CommitInterval = 10 * time.Second
    39  
    40  	return config
    41  }
    42  
    43  func (cgc *Config) Validate() error {
    44  	if cgc.Zookeeper.Timeout <= 0 {
    45  		return sarama.ConfigurationError("ZookeeperTimeout should have a duration > 0")
    46  	}
    47  
    48  	if cgc.Offsets.CommitInterval < 0 {
    49  		return sarama.ConfigurationError("CommitInterval should have a duration >= 0")
    50  	}
    51  
    52  	if cgc.Offsets.Initial != sarama.OffsetOldest && cgc.Offsets.Initial != sarama.OffsetNewest {
    53  		return errors.New("Offsets.Initial should be sarama.OffsetOldest or sarama.OffsetNewest.")
    54  	}
    55  
    56  	if cgc.Config != nil {
    57  		if err := cgc.Config.Validate(); err != nil {
    58  			return err
    59  		}
    60  	}
    61  
    62  	return nil
    63  }
    64  
    65  // The ConsumerGroup type holds all the information for a consumer that is part
    66  // of a consumer group. Call JoinConsumerGroup to start a consumer.
    67  type ConsumerGroup struct {
    68  	config *Config
    69  
    70  	consumer sarama.Consumer
    71  	kazoo    *kazoo.Kazoo
    72  	group    *kazoo.Consumergroup
    73  	instance *kazoo.ConsumergroupInstance
    74  
    75  	wg             sync.WaitGroup
    76  	singleShutdown sync.Once
    77  
    78  	messages chan *sarama.ConsumerMessage
    79  	errors   chan error
    80  	stopper  chan struct{}
    81  
    82  	consumers kazoo.ConsumergroupInstanceList
    83  
    84  	offsetManager OffsetManager
    85  }
    86  
    87  // Connects to a consumer group, using Zookeeper for auto-discovery
    88  func JoinConsumerGroup(name string, topics []string, zookeeper []string, config *Config) (cg *ConsumerGroup, err error) {
    89  
    90  	if name == "" {
    91  		return nil, sarama.ConfigurationError("Empty consumergroup name")
    92  	}
    93  
    94  	if len(topics) == 0 {
    95  		return nil, sarama.ConfigurationError("No topics provided")
    96  	}
    97  
    98  	if len(zookeeper) == 0 {
    99  		return nil, errors.New("You need to provide at least one zookeeper node address!")
   100  	}
   101  
   102  	if config == nil {
   103  		config = NewConfig()
   104  	}
   105  	config.ClientID = name
   106  
   107  	// Validate configuration
   108  	if err = config.Validate(); err != nil {
   109  		return
   110  	}
   111  
   112  	var kz *kazoo.Kazoo
   113  	if kz, err = kazoo.NewKazoo(zookeeper, config.Zookeeper); err != nil {
   114  		return
   115  	}
   116  
   117  	brokers, err := kz.BrokerList()
   118  	if err != nil {
   119  		kz.Close()
   120  		return
   121  	}
   122  
   123  	group := kz.Consumergroup(name)
   124  
   125  	if config.Offsets.ResetOffsets {
   126  		err = group.ResetOffsets()
   127  		if err != nil {
   128  			kz.Close()
   129  			return
   130  		}
   131  	}
   132  
   133  	instance := group.NewInstance()
   134  
   135  	var consumer sarama.Consumer
   136  	if consumer, err = sarama.NewConsumer(brokers, config.Config); err != nil {
   137  		kz.Close()
   138  		return
   139  	}
   140  
   141  	cg = &ConsumerGroup{
   142  		config:   config,
   143  		consumer: consumer,
   144  
   145  		kazoo:    kz,
   146  		group:    group,
   147  		instance: instance,
   148  
   149  		messages: make(chan *sarama.ConsumerMessage, config.ChannelBufferSize),
   150  		errors:   make(chan error, config.ChannelBufferSize),
   151  		stopper:  make(chan struct{}),
   152  	}
   153  
   154  	// Register consumer group
   155  	if exists, err := cg.group.Exists(); err != nil {
   156  		cg.Logf("FAILED to check for existence of consumergroup: %s!\n", err)
   157  		_ = consumer.Close()
   158  		_ = kz.Close()
   159  		return nil, err
   160  	} else if !exists {
   161  		cg.Logf("Consumergroup `%s` does not yet exists, creating...\n", cg.group.Name)
   162  		if err := cg.group.Create(); err != nil {
   163  			cg.Logf("FAILED to create consumergroup in Zookeeper: %s!\n", err)
   164  			_ = consumer.Close()
   165  			_ = kz.Close()
   166  			return nil, err
   167  		}
   168  	}
   169  
   170  	// Register itself with zookeeper
   171  	if err := cg.instance.Register(topics); err != nil {
   172  		cg.Logf("FAILED to register consumer instance: %s!\n", err)
   173  		return nil, err
   174  	} else {
   175  		cg.Logf("Consumer instance registered (%s).", cg.instance.ID)
   176  	}
   177  
   178  	offsetConfig := OffsetManagerConfig{CommitInterval: config.Offsets.CommitInterval}
   179  	cg.offsetManager = NewZookeeperOffsetManager(cg, &offsetConfig)
   180  
   181  	go cg.topicListConsumer(topics)
   182  
   183  	return
   184  }
   185  
   186  // Returns a channel that you can read to obtain events from Kafka to process.
   187  func (cg *ConsumerGroup) Messages() <-chan *sarama.ConsumerMessage {
   188  	return cg.messages
   189  }
   190  
   191  // Returns a channel that you can read to obtain events from Kafka to process.
   192  func (cg *ConsumerGroup) Errors() <-chan error {
   193  	return cg.errors
   194  }
   195  
   196  func (cg *ConsumerGroup) Closed() bool {
   197  	return cg.instance == nil
   198  }
   199  
   200  func (cg *ConsumerGroup) Close() error {
   201  	shutdownError := AlreadyClosing
   202  	cg.singleShutdown.Do(func() {
   203  		defer cg.kazoo.Close()
   204  
   205  		shutdownError = nil
   206  
   207  		close(cg.stopper)
   208  		cg.wg.Wait()
   209  
   210  		if err := cg.offsetManager.Close(); err != nil {
   211  			cg.Logf("FAILED closing the offset manager: %s!\n", err)
   212  		}
   213  
   214  		if shutdownError = cg.instance.Deregister(); shutdownError != nil {
   215  			cg.Logf("FAILED deregistering consumer instance: %s!\n", shutdownError)
   216  		} else {
   217  			cg.Logf("Deregistered consumer instance %s.\n", cg.instance.ID)
   218  		}
   219  
   220  		if shutdownError = cg.consumer.Close(); shutdownError != nil {
   221  			cg.Logf("FAILED closing the Sarama client: %s\n", shutdownError)
   222  		}
   223  
   224  		close(cg.messages)
   225  		close(cg.errors)
   226  		cg.instance = nil
   227  	})
   228  
   229  	return shutdownError
   230  }
   231  
   232  func (cg *ConsumerGroup) Logf(format string, args ...interface{}) {
   233  	var identifier string
   234  	if cg.instance == nil {
   235  		identifier = "(defunct)"
   236  	} else {
   237  		identifier = cg.instance.ID[len(cg.instance.ID)-12:]
   238  	}
   239  	sarama.Logger.Printf("[%s/%s] %s", cg.group.Name, identifier, fmt.Sprintf(format, args...))
   240  }
   241  
   242  func (cg *ConsumerGroup) InstanceRegistered() (bool, error) {
   243  	return cg.instance.Registered()
   244  }
   245  
   246  func (cg *ConsumerGroup) CommitUpto(message *sarama.ConsumerMessage) error {
   247  	cg.offsetManager.MarkAsProcessed(message.Topic, message.Partition, message.Offset)
   248  	return nil
   249  }
   250  
   251  func (cg *ConsumerGroup) FlushOffsets() error {
   252  	return cg.offsetManager.Flush()
   253  }
   254  
   255  func (cg *ConsumerGroup) topicListConsumer(topics []string) {
   256  	for {
   257  		select {
   258  		case <-cg.stopper:
   259  			return
   260  		default:
   261  		}
   262  
   263  		consumers, consumerChanges, err := cg.group.WatchInstances()
   264  		if err != nil {
   265  			cg.Logf("FAILED to get list of registered consumer instances: %s\n", err)
   266  			return
   267  		}
   268  
   269  		cg.consumers = consumers
   270  		cg.Logf("Currently registered consumers: %d\n", len(cg.consumers))
   271  
   272  		stopper := make(chan struct{})
   273  
   274  		for _, topic := range topics {
   275  			cg.wg.Add(1)
   276  			go cg.topicConsumer(topic, cg.messages, cg.errors, stopper)
   277  		}
   278  
   279  		select {
   280  		case <-cg.stopper:
   281  			close(stopper)
   282  			return
   283  
   284  		case <-consumerChanges:
   285  			registered, err := cg.instance.Registered()
   286  			if err != nil {
   287  				cg.Logf("FAILED to get register status: %s\n", err)
   288  			} else if !registered {
   289  				err = cg.instance.Register(topics)
   290  				if err != nil {
   291  					cg.Logf("FAILED to register consumer instance: %s!\n", err)
   292  				} else {
   293  					cg.Logf("Consumer instance registered (%s).", cg.instance.ID)
   294  				}
   295  			}
   296  
   297  			cg.Logf("Triggering rebalance due to consumer list change\n")
   298  			close(stopper)
   299  			cg.wg.Wait()
   300  		}
   301  	}
   302  }
   303  
   304  func (cg *ConsumerGroup) topicConsumer(topic string, messages chan<- *sarama.ConsumerMessage, errors chan<- error, stopper <-chan struct{}) {
   305  	defer cg.wg.Done()
   306  
   307  	select {
   308  	case <-stopper:
   309  		return
   310  	default:
   311  	}
   312  
   313  	cg.Logf("%s :: Started topic consumer\n", topic)
   314  
   315  	// Fetch a list of partition IDs
   316  	partitions, err := cg.kazoo.Topic(topic).Partitions()
   317  	if err != nil {
   318  		cg.Logf("%s :: FAILED to get list of partitions: %s\n", topic, err)
   319  		cg.errors <- &sarama.ConsumerError{
   320  			Topic:     topic,
   321  			Partition: -1,
   322  			Err:       err,
   323  		}
   324  		return
   325  	}
   326  
   327  	partitionLeaders, err := retrievePartitionLeaders(partitions)
   328  	if err != nil {
   329  		cg.Logf("%s :: FAILED to get leaders of partitions: %s\n", topic, err)
   330  		cg.errors <- &sarama.ConsumerError{
   331  			Topic:     topic,
   332  			Partition: -1,
   333  			Err:       err,
   334  		}
   335  		return
   336  	}
   337  
   338  	dividedPartitions := dividePartitionsBetweenConsumers(cg.consumers, partitionLeaders)
   339  	myPartitions := dividedPartitions[cg.instance.ID]
   340  	cg.Logf("%s :: Claiming %d of %d partitions", topic, len(myPartitions), len(partitionLeaders))
   341  
   342  	// Consume all the assigned partitions
   343  	var wg sync.WaitGroup
   344  	for _, pid := range myPartitions {
   345  
   346  		wg.Add(1)
   347  		go cg.partitionConsumer(topic, pid.ID, messages, errors, &wg, stopper)
   348  	}
   349  
   350  	wg.Wait()
   351  	cg.Logf("%s :: Stopped topic consumer\n", topic)
   352  }
   353  
   354  func (cg *ConsumerGroup) consumePartition(topic string, partition int32, nextOffset int64) (sarama.PartitionConsumer, error) {
   355  	consumer, err := cg.consumer.ConsumePartition(topic, partition, nextOffset)
   356  	if err == sarama.ErrOffsetOutOfRange {
   357  		cg.Logf("%s/%d :: Partition consumer offset out of Range.\n", topic, partition)
   358  		// if the offset is out of range, simplistically decide whether to use OffsetNewest or OffsetOldest
   359  		// if the configuration specified offsetOldest, then switch to the oldest available offset, else
   360  		// switch to the newest available offset.
   361  		if cg.config.Offsets.Initial == sarama.OffsetOldest {
   362  			nextOffset = sarama.OffsetOldest
   363  			cg.Logf("%s/%d :: Partition consumer offset reset to oldest available offset.\n", topic, partition)
   364  		} else {
   365  			nextOffset = sarama.OffsetNewest
   366  			cg.Logf("%s/%d :: Partition consumer offset reset to newest available offset.\n", topic, partition)
   367  		}
   368  		// retry the consumePartition with the adjusted offset
   369  		consumer, err = cg.consumer.ConsumePartition(topic, partition, nextOffset)
   370  	}
   371  	if err != nil {
   372  		cg.Logf("%s/%d :: FAILED to start partition consumer: %s\n", topic, partition, err)
   373  		return nil, err
   374  	}
   375  	return consumer, err
   376  }
   377  
   378  // Consumes a partition
   379  func (cg *ConsumerGroup) partitionConsumer(topic string, partition int32, messages chan<- *sarama.ConsumerMessage, errors chan<- error, wg *sync.WaitGroup, stopper <-chan struct{}) {
   380  	defer wg.Done()
   381  
   382  	select {
   383  	case <-stopper:
   384  		return
   385  	default:
   386  	}
   387  
   388  	// Since ProcessingTimeout is the amount of time we'll wait for the final batch
   389  	// of messages to be processed before releasing a partition, we need to wait slightly
   390  	// longer than that before timing out here to ensure that another consumer has had
   391  	// enough time to release the partition. Hence, +2 seconds.
   392  	maxRetries := int(cg.config.Offsets.ProcessingTimeout/time.Second) + 2
   393  	for tries := 0; tries < maxRetries; tries++ {
   394  		if err := cg.instance.ClaimPartition(topic, partition); err == nil {
   395  			break
   396  		} else if tries+1 < maxRetries {
   397  			if err == kazoo.ErrPartitionClaimedByOther {
   398  				// Another consumer still owns this partition. We should wait longer for it to release it.
   399  				time.Sleep(1 * time.Second)
   400  			} else {
   401  				// An unexpected error occurred. Log it and continue trying until we hit the timeout.
   402  				cg.Logf("%s/%d :: FAILED to claim partition on attempt %v of %v; retrying in 1 second. Error: %v", topic, partition, tries+1, maxRetries, err)
   403  				time.Sleep(1 * time.Second)
   404  			}
   405  		} else {
   406  			cg.Logf("%s/%d :: FAILED to claim the partition: %s\n", topic, partition, err)
   407  			cg.errors <- &sarama.ConsumerError{
   408  				Topic:     topic,
   409  				Partition: partition,
   410  				Err:       err,
   411  			}
   412  			return
   413  		}
   414  	}
   415  
   416  	defer func() {
   417  		err := cg.instance.ReleasePartition(topic, partition)
   418  		if err != nil {
   419  			cg.Logf("%s/%d :: FAILED to release partition: %s\n", topic, partition, err)
   420  			cg.errors <- &sarama.ConsumerError{
   421  				Topic:     topic,
   422  				Partition: partition,
   423  				Err:       err,
   424  			}
   425  		}
   426  	}()
   427  
   428  	nextOffset, err := cg.offsetManager.InitializePartition(topic, partition)
   429  	if err != nil {
   430  		cg.Logf("%s/%d :: FAILED to determine initial offset: %s\n", topic, partition, err)
   431  		return
   432  	}
   433  
   434  	if nextOffset >= 0 {
   435  		cg.Logf("%s/%d :: Partition consumer starting at offset %d.\n", topic, partition, nextOffset)
   436  	} else {
   437  		nextOffset = cg.config.Offsets.Initial
   438  		if nextOffset == sarama.OffsetOldest {
   439  			cg.Logf("%s/%d :: Partition consumer starting at the oldest available offset.\n", topic, partition)
   440  		} else if nextOffset == sarama.OffsetNewest {
   441  			cg.Logf("%s/%d :: Partition consumer listening for new messages only.\n", topic, partition)
   442  		}
   443  	}
   444  
   445  	consumer, err := cg.consumePartition(topic, partition, nextOffset)
   446  
   447  	if err != nil {
   448  		cg.Logf("%s/%d :: FAILED to start partition consumer: %s\n", topic, partition, err)
   449  		return
   450  	}
   451  
   452  	defer consumer.Close()
   453  
   454  	err = nil
   455  	var lastOffset int64 = -1 // aka unknown
   456  partitionConsumerLoop:
   457  	for {
   458  		select {
   459  		case <-stopper:
   460  			break partitionConsumerLoop
   461  
   462  		case err := <-consumer.Errors():
   463  			if err == nil {
   464  				cg.Logf("%s/%d :: Consumer encountered an invalid state: re-establishing consumption of partition.\n", topic, partition)
   465  
   466  				// Errors encountered (if any) are logged in the consumerPartition function
   467  				var cErr error
   468  				consumer, cErr = cg.consumePartition(topic, partition, lastOffset)
   469  				if cErr != nil {
   470  					break partitionConsumerLoop
   471  				}
   472  				continue partitionConsumerLoop
   473  			}
   474  
   475  			for {
   476  				select {
   477  				case errors <- err:
   478  					continue partitionConsumerLoop
   479  
   480  				case <-stopper:
   481  					break partitionConsumerLoop
   482  				}
   483  			}
   484  
   485  		case message := <-consumer.Messages():
   486  			if message == nil {
   487  				cg.Logf("%s/%d :: Consumer encountered an invalid state: re-establishing consumption of partition.\n", topic, partition)
   488  
   489  				// Errors encountered (if any) are logged in the consumerPartition function
   490  				var cErr error
   491  				consumer, cErr = cg.consumePartition(topic, partition, lastOffset)
   492  				if cErr != nil {
   493  					break partitionConsumerLoop
   494  				}
   495  				continue partitionConsumerLoop
   496  
   497  			}
   498  
   499  			for {
   500  				select {
   501  				case <-stopper:
   502  					break partitionConsumerLoop
   503  
   504  				case messages <- message:
   505  					lastOffset = message.Offset
   506  					continue partitionConsumerLoop
   507  				}
   508  			}
   509  		}
   510  	}
   511  
   512  	cg.Logf("%s/%d :: Stopping partition consumer at offset %d\n", topic, partition, lastOffset)
   513  	if err := cg.offsetManager.FinalizePartition(topic, partition, lastOffset, cg.config.Offsets.ProcessingTimeout); err != nil {
   514  		cg.Logf("%s/%d :: %s\n", topic, partition, err)
   515  	}
   516  }