github.com/deanMdreon/kafka-go@v0.4.32/reader.go

github.com/deanMdreon/kafka-go@v0.4.32/reader.go (about)

     1  package kafka
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"sort"
    10  	"strconv"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  )
    15  
    16  const (
    17  	LastOffset  int64 = -1 // The most recent offset available for a partition.
    18  	FirstOffset int64 = -2 // The least recent offset available for a partition.
    19  )
    20  
    21  const (
    22  	// defaultCommitRetries holds the number commit attempts to make
    23  	// before giving up
    24  	defaultCommitRetries = 3
    25  )
    26  
    27  const (
    28  	// defaultFetchMinBytes of 1 byte means that fetch requests are answered as
    29  	// soon as a single byte of data is available or the fetch request times out
    30  	// waiting for data to arrive.
    31  	defaultFetchMinBytes = 1
    32  )
    33  
    34  var (
    35  	errOnlyAvailableWithGroup = errors.New("unavailable when GroupID is not set")
    36  	errNotAvailableWithGroup  = errors.New("unavailable when GroupID is set")
    37  )
    38  
    39  const (
    40  	// defaultReadBackoffMax/Min sets the boundaries for how long the reader wait before
    41  	// polling for new messages
    42  	defaultReadBackoffMin = 100 * time.Millisecond
    43  	defaultReadBackoffMax = 1 * time.Second
    44  )
    45  
    46  // Reader provides a high-level API for consuming messages from kafka.
    47  //
    48  // A Reader automatically manages reconnections to a kafka server, and
    49  // blocking methods have context support for asynchronous cancellations.
    50  //
    51  // Note that it is important to call `Close()` on a `Reader` when a process exits.
    52  // The kafka server needs a graceful disconnect to stop it from continuing to
    53  // attempt to send messages to the connected clients. The given example will not
    54  // call `Close()` if the process is terminated with SIGINT (ctrl-c at the shell) or
    55  // SIGTERM (as docker stop or a kubernetes restart does). This can result in a
    56  // delay when a new reader on the same topic connects (e.g. new process started
    57  // or new container running). Use a `signal.Notify` handler to close the reader on
    58  // process shutdown.
    59  type Reader struct {
    60  	// immutable fields of the reader
    61  	config ReaderConfig
    62  
    63  	// communication channels between the parent reader and its subreaders
    64  	msgs chan readerMessage
    65  
    66  	// mutable fields of the reader (synchronized on the mutex)
    67  	mutex   sync.Mutex
    68  	join    sync.WaitGroup
    69  	cancel  context.CancelFunc
    70  	stop    context.CancelFunc
    71  	done    chan struct{}
    72  	commits chan commitRequest
    73  	version int64 // version holds the generation of the spawned readers
    74  	offset  int64
    75  	lag     int64
    76  	closed  bool
    77  
    78  	// Without a group subscription (when Reader.config.GroupID == ""),
    79  	// when errors occur, the Reader gets a synthetic readerMessage with
    80  	// a non-nil err set. With group subscriptions however, when an error
    81  	// occurs in Reader.run, there's no reader running (sic, cf. reader vs.
    82  	// Reader) and there's no way to let the high-level methods like
    83  	// FetchMessage know that an error indeed occurred. If an error in run
    84  	// occurs, it will be non-block-sent to this unbuffered channel, where
    85  	// the high-level methods can select{} on it and notify the caller.
    86  	runError chan error
    87  
    88  	// reader stats are all made of atomic values, no need for synchronization.
    89  	once  uint32
    90  	stctx context.Context
    91  	// reader stats are all made of atomic values, no need for synchronization.
    92  	// Use a pointer to ensure 64-bit alignment of the values.
    93  	stats *readerStats
    94  }
    95  
    96  // useConsumerGroup indicates whether the Reader is part of a consumer group.
    97  func (r *Reader) useConsumerGroup() bool { return r.config.GroupID != "" }
    98  
    99  func (r *Reader) getTopics() []string {
   100  	if len(r.config.GroupTopics) > 0 {
   101  		return r.config.GroupTopics[:]
   102  	}
   103  
   104  	return []string{r.config.Topic}
   105  }
   106  
   107  // useSyncCommits indicates whether the Reader is configured to perform sync or
   108  // async commits.
   109  func (r *Reader) useSyncCommits() bool { return r.config.CommitInterval == 0 }
   110  
   111  func (r *Reader) unsubscribe() {
   112  	r.cancel()
   113  	r.join.Wait()
   114  	// it would be interesting to drain the r.msgs channel at this point since
   115  	// it will contain buffered messages for partitions that may not be
   116  	// re-assigned to this reader in the next consumer group generation.
   117  	// however, draining the channel could race with the client calling
   118  	// ReadMessage, which could result in messages delivered and/or committed
   119  	// with gaps in the offset.  for now, we will err on the side of caution and
   120  	// potentially have those messages be reprocessed in the next generation by
   121  	// another consumer to avoid such a race.
   122  }
   123  
   124  func (r *Reader) subscribe(allAssignments map[string][]PartitionAssignment) {
   125  	offsets := make(map[topicPartition]int64)
   126  	for topic, assignments := range allAssignments {
   127  		for _, assignment := range assignments {
   128  			key := topicPartition{
   129  				topic:     topic,
   130  				partition: int32(assignment.ID),
   131  			}
   132  			offsets[key] = assignment.Offset
   133  		}
   134  	}
   135  
   136  	r.mutex.Lock()
   137  	r.start(offsets)
   138  	r.mutex.Unlock()
   139  
   140  	r.withLogger(func(l Logger) {
   141  		l.Printf("subscribed to topics and partitions: %+v", offsets)
   142  	})
   143  }
   144  
   145  func (r *Reader) waitThrottleTime(throttleTimeMS int32) {
   146  	if throttleTimeMS == 0 {
   147  		return
   148  	}
   149  
   150  	t := time.NewTimer(time.Duration(throttleTimeMS) * time.Millisecond)
   151  	defer t.Stop()
   152  
   153  	select {
   154  	case <-r.stctx.Done():
   155  		return
   156  	case <-t.C:
   157  	}
   158  }
   159  
   160  // commitOffsetsWithRetry attempts to commit the specified offsets and retries
   161  // up to the specified number of times
   162  func (r *Reader) commitOffsetsWithRetry(gen *Generation, offsetStash offsetStash, retries int) (err error) {
   163  	const (
   164  		backoffDelayMin = 100 * time.Millisecond
   165  		backoffDelayMax = 5 * time.Second
   166  	)
   167  
   168  	for attempt := 0; attempt < retries; attempt++ {
   169  		if attempt != 0 {
   170  			if !sleep(r.stctx, backoff(attempt, backoffDelayMin, backoffDelayMax)) {
   171  				return
   172  			}
   173  		}
   174  
   175  		if err = gen.CommitOffsets(offsetStash); err == nil {
   176  			return
   177  		}
   178  	}
   179  
   180  	return // err will not be nil
   181  }
   182  
   183  // offsetStash holds offsets by topic => partition => offset
   184  type offsetStash map[string]map[int]int64
   185  
   186  // merge updates the offsetStash with the offsets from the provided messages
   187  func (o offsetStash) merge(commits []commit) {
   188  	for _, c := range commits {
   189  		offsetsByPartition, ok := o[c.topic]
   190  		if !ok {
   191  			offsetsByPartition = map[int]int64{}
   192  			o[c.topic] = offsetsByPartition
   193  		}
   194  
   195  		if offset, ok := offsetsByPartition[c.partition]; !ok || c.offset > offset {
   196  			offsetsByPartition[c.partition] = c.offset
   197  		}
   198  	}
   199  }
   200  
   201  // reset clears the contents of the offsetStash
   202  func (o offsetStash) reset() {
   203  	for key := range o {
   204  		delete(o, key)
   205  	}
   206  }
   207  
   208  // commitLoopImmediate handles each commit synchronously
   209  func (r *Reader) commitLoopImmediate(ctx context.Context, gen *Generation) {
   210  	offsets := offsetStash{}
   211  
   212  	for {
   213  		select {
   214  		case <-ctx.Done():
   215  			// drain the commit channel and prepare a single, final commit.
   216  			// the commit will combine any outstanding requests and the result
   217  			// will be sent back to all the callers of CommitMessages so that
   218  			// they can return.
   219  			var errchs []chan<- error
   220  			for hasCommits := true; hasCommits; {
   221  				select {
   222  				case req := <-r.commits:
   223  					offsets.merge(req.commits)
   224  					errchs = append(errchs, req.errch)
   225  				default:
   226  					hasCommits = false
   227  				}
   228  			}
   229  			err := r.commitOffsetsWithRetry(gen, offsets, defaultCommitRetries)
   230  			for _, errch := range errchs {
   231  				// NOTE : this will be a buffered channel and will not block.
   232  				errch <- err
   233  			}
   234  			return
   235  
   236  		case req := <-r.commits:
   237  			offsets.merge(req.commits)
   238  			req.errch <- r.commitOffsetsWithRetry(gen, offsets, defaultCommitRetries)
   239  			offsets.reset()
   240  		}
   241  	}
   242  }
   243  
   244  // commitLoopInterval handles each commit asynchronously with a period defined
   245  // by ReaderConfig.CommitInterval
   246  func (r *Reader) commitLoopInterval(ctx context.Context, gen *Generation) {
   247  	ticker := time.NewTicker(r.config.CommitInterval)
   248  	defer ticker.Stop()
   249  
   250  	// the offset stash should not survive rebalances b/c the consumer may
   251  	// receive new assignments.
   252  	offsets := offsetStash{}
   253  
   254  	commit := func() {
   255  		if err := r.commitOffsetsWithRetry(gen, offsets, defaultCommitRetries); err != nil {
   256  			r.withErrorLogger(func(l Logger) { l.Printf(err.Error()) })
   257  		} else {
   258  			offsets.reset()
   259  		}
   260  	}
   261  
   262  	for {
   263  		select {
   264  		case <-ctx.Done():
   265  			// drain the commit channel in order to prepare the final commit.
   266  			for hasCommits := true; hasCommits; {
   267  				select {
   268  				case req := <-r.commits:
   269  					offsets.merge(req.commits)
   270  				default:
   271  					hasCommits = false
   272  				}
   273  			}
   274  			commit()
   275  			return
   276  
   277  		case <-ticker.C:
   278  			commit()
   279  
   280  		case req := <-r.commits:
   281  			offsets.merge(req.commits)
   282  		}
   283  	}
   284  }
   285  
   286  // commitLoop processes commits off the commit chan
   287  func (r *Reader) commitLoop(ctx context.Context, gen *Generation) {
   288  	r.withLogger(func(l Logger) {
   289  		l.Printf("started commit for group %s\n", r.config.GroupID)
   290  	})
   291  	defer r.withLogger(func(l Logger) {
   292  		l.Printf("stopped commit for group %s\n", r.config.GroupID)
   293  	})
   294  
   295  	if r.config.CommitInterval == 0 {
   296  		r.commitLoopImmediate(ctx, gen)
   297  	} else {
   298  		r.commitLoopInterval(ctx, gen)
   299  	}
   300  }
   301  
   302  // run provides the main consumer group management loop.  Each iteration performs the
   303  // handshake to join the Reader to the consumer group.
   304  //
   305  // This function is responsible for closing the consumer group upon exit.
   306  func (r *Reader) run(cg *ConsumerGroup) {
   307  	defer close(r.done)
   308  	defer cg.Close()
   309  
   310  	r.withLogger(func(l Logger) {
   311  		l.Printf("entering loop for consumer group, %v\n", r.config.GroupID)
   312  	})
   313  
   314  	for {
   315  		// Limit the number of attempts at waiting for the next
   316  		// consumer generation.
   317  		var err error
   318  		var gen *Generation
   319  		for attempt := 1; attempt <= r.config.MaxAttempts; attempt++ {
   320  			gen, err = cg.Next(r.stctx)
   321  			if err == nil {
   322  				break
   323  			}
   324  			if err == r.stctx.Err() {
   325  				return
   326  			}
   327  			r.stats.errors.observe(1)
   328  			r.withErrorLogger(func(l Logger) {
   329  				l.Printf(err.Error())
   330  			})
   331  			// Continue with next attempt...
   332  		}
   333  		if err != nil {
   334  			// All attempts have failed.
   335  			select {
   336  			case r.runError <- err:
   337  				// If somebody's receiving on the runError, let
   338  				// them know the error occurred.
   339  			default:
   340  				// Otherwise, don't block to allow healing.
   341  			}
   342  			continue
   343  		}
   344  
   345  		r.stats.rebalances.observe(1)
   346  
   347  		r.subscribe(gen.Assignments)
   348  
   349  		gen.Start(func(ctx context.Context) {
   350  			r.commitLoop(ctx, gen)
   351  		})
   352  		gen.Start(func(ctx context.Context) {
   353  			// wait for the generation to end and then unsubscribe.
   354  			select {
   355  			case <-ctx.Done():
   356  				// continue to next generation
   357  			case <-r.stctx.Done():
   358  				// this will be the last loop because the reader is closed.
   359  			}
   360  			r.unsubscribe()
   361  		})
   362  	}
   363  }
   364  
   365  // ReaderConfig is a configuration object used to create new instances of
   366  // Reader.
   367  type ReaderConfig struct {
   368  	// The list of broker addresses used to connect to the kafka cluster.
   369  	Brokers []string
   370  
   371  	// GroupID holds the optional consumer group id.  If GroupID is specified, then
   372  	// Partition should NOT be specified e.g. 0
   373  	GroupID string
   374  
   375  	// GroupTopics allows specifying multiple topics, but can only be used in
   376  	// combination with GroupID, as it is a consumer-group feature. As such, if
   377  	// GroupID is set, then either Topic or GroupTopics must be defined.
   378  	GroupTopics []string
   379  
   380  	// The topic to read messages from.
   381  	Topic string
   382  
   383  	// Partition to read messages from.  Either Partition or GroupID may
   384  	// be assigned, but not both
   385  	Partition int
   386  
   387  	// An dialer used to open connections to the kafka server. This field is
   388  	// optional, if nil, the default dialer is used instead.
   389  	Dialer *Dialer
   390  
   391  	// The capacity of the internal message queue, defaults to 100 if none is
   392  	// set.
   393  	QueueCapacity int
   394  
   395  	// MinBytes indicates to the broker the minimum batch size that the consumer
   396  	// will accept. Setting a high minimum when consuming from a low-volume topic
   397  	// may result in delayed delivery when the broker does not have enough data to
   398  	// satisfy the defined minimum.
   399  	//
   400  	// Default: 1
   401  	MinBytes int
   402  
   403  	// MaxBytes indicates to the broker the maximum batch size that the consumer
   404  	// will accept. The broker will truncate a message to satisfy this maximum, so
   405  	// choose a value that is high enough for your largest message size.
   406  	//
   407  	// Default: 1MB
   408  	MaxBytes int
   409  
   410  	// Maximum amount of time to wait for new data to come when fetching batches
   411  	// of messages from kafka.
   412  	//
   413  	// Default: 10s
   414  	MaxWait time.Duration
   415  
   416  	// ReadLagInterval sets the frequency at which the reader lag is updated.
   417  	// Setting this field to a negative value disables lag reporting.
   418  	ReadLagInterval time.Duration
   419  
   420  	// GroupBalancers is the priority-ordered list of client-side consumer group
   421  	// balancing strategies that will be offered to the coordinator.  The first
   422  	// strategy that all group members support will be chosen by the leader.
   423  	//
   424  	// Default: [Range, RoundRobin]
   425  	//
   426  	// Only used when GroupID is set
   427  	GroupBalancers []GroupBalancer
   428  
   429  	// HeartbeatInterval sets the optional frequency at which the reader sends the consumer
   430  	// group heartbeat update.
   431  	//
   432  	// Default: 3s
   433  	//
   434  	// Only used when GroupID is set
   435  	HeartbeatInterval time.Duration
   436  
   437  	// CommitInterval indicates the interval at which offsets are committed to
   438  	// the broker.  If 0, commits will be handled synchronously.
   439  	//
   440  	// Default: 0
   441  	//
   442  	// Only used when GroupID is set
   443  	CommitInterval time.Duration
   444  
   445  	// PartitionWatchInterval indicates how often a reader checks for partition changes.
   446  	// If a reader sees a partition change (such as a partition add) it will rebalance the group
   447  	// picking up new partitions.
   448  	//
   449  	// Default: 5s
   450  	//
   451  	// Only used when GroupID is set and WatchPartitionChanges is set.
   452  	PartitionWatchInterval time.Duration
   453  
   454  	// WatchForPartitionChanges is used to inform kafka-go that a consumer group should be
   455  	// polling the brokers and rebalancing if any partition changes happen to the topic.
   456  	WatchPartitionChanges bool
   457  
   458  	// SessionTimeout optionally sets the length of time that may pass without a heartbeat
   459  	// before the coordinator considers the consumer dead and initiates a rebalance.
   460  	//
   461  	// Default: 30s
   462  	//
   463  	// Only used when GroupID is set
   464  	SessionTimeout time.Duration
   465  
   466  	// RebalanceTimeout optionally sets the length of time the coordinator will wait
   467  	// for members to join as part of a rebalance.  For kafka servers under higher
   468  	// load, it may be useful to set this value higher.
   469  	//
   470  	// Default: 30s
   471  	//
   472  	// Only used when GroupID is set
   473  	RebalanceTimeout time.Duration
   474  
   475  	// JoinGroupBackoff optionally sets the length of time to wait between re-joining
   476  	// the consumer group after an error.
   477  	//
   478  	// Default: 5s
   479  	JoinGroupBackoff time.Duration
   480  
   481  	// RetentionTime optionally sets the length of time the consumer group will be saved
   482  	// by the broker
   483  	//
   484  	// Default: 24h
   485  	//
   486  	// Only used when GroupID is set
   487  	RetentionTime time.Duration
   488  
   489  	// StartOffset determines from whence the consumer group should begin
   490  	// consuming when it finds a partition without a committed offset.  If
   491  	// non-zero, it must be set to one of FirstOffset or LastOffset.
   492  	//
   493  	// Default: FirstOffset
   494  	//
   495  	// Only used when GroupID is set
   496  	StartOffset int64
   497  
   498  	// BackoffDelayMin optionally sets the smallest amount of time the reader will wait before
   499  	// polling for new messages
   500  	//
   501  	// Default: 100ms
   502  	ReadBackoffMin time.Duration
   503  
   504  	// BackoffDelayMax optionally sets the maximum amount of time the reader will wait before
   505  	// polling for new messages
   506  	//
   507  	// Default: 1s
   508  	ReadBackoffMax time.Duration
   509  
   510  	// If not nil, specifies a logger used to report internal changes within the
   511  	// reader.
   512  	Logger Logger
   513  
   514  	// ErrorLogger is the logger used to report errors. If nil, the reader falls
   515  	// back to using Logger instead.
   516  	ErrorLogger Logger
   517  
   518  	// IsolationLevel controls the visibility of transactional records.
   519  	// ReadUncommitted makes all records visible. With ReadCommitted only
   520  	// non-transactional and committed records are visible.
   521  	IsolationLevel IsolationLevel
   522  
   523  	// Limit of how many attempts will be made before delivering the error.
   524  	//
   525  	// The default is to try 3 times.
   526  	MaxAttempts int
   527  }
   528  
   529  // Validate method validates ReaderConfig properties.
   530  func (config *ReaderConfig) Validate() error {
   531  	if len(config.Brokers) == 0 {
   532  		return errors.New("cannot create a new kafka reader with an empty list of broker addresses")
   533  	}
   534  
   535  	if config.Partition < 0 || config.Partition >= math.MaxInt32 {
   536  		return errors.New(fmt.Sprintf("partition number out of bounds: %d", config.Partition))
   537  	}
   538  
   539  	if config.MinBytes < 0 {
   540  		return errors.New(fmt.Sprintf("invalid negative minimum batch size (min = %d)", config.MinBytes))
   541  	}
   542  
   543  	if config.MaxBytes < 0 {
   544  		return errors.New(fmt.Sprintf("invalid negative maximum batch size (max = %d)", config.MaxBytes))
   545  	}
   546  
   547  	if config.GroupID != "" {
   548  		if config.Partition != 0 {
   549  			return errors.New("either Partition or GroupID may be specified, but not both")
   550  		}
   551  
   552  		if len(config.Topic) == 0 && len(config.GroupTopics) == 0 {
   553  			return errors.New("either Topic or GroupTopics must be specified with GroupID")
   554  		}
   555  	} else if len(config.Topic) == 0 {
   556  		return errors.New("cannot create a new kafka reader with an empty topic")
   557  	}
   558  
   559  	if config.MinBytes > config.MaxBytes {
   560  		return errors.New(fmt.Sprintf("minimum batch size greater than the maximum (min = %d, max = %d)", config.MinBytes, config.MaxBytes))
   561  	}
   562  
   563  	if config.ReadBackoffMax < 0 {
   564  		return errors.New(fmt.Sprintf("ReadBackoffMax out of bounds: %d", config.ReadBackoffMax))
   565  	}
   566  
   567  	if config.ReadBackoffMin < 0 {
   568  		return errors.New(fmt.Sprintf("ReadBackoffMin out of bounds: %d", config.ReadBackoffMin))
   569  	}
   570  
   571  	return nil
   572  }
   573  
   574  // ReaderStats is a data structure returned by a call to Reader.Stats that exposes
   575  // details about the behavior of the reader.
   576  type ReaderStats struct {
   577  	Dials      int64 `metric:"kafka.reader.dial.count"      type:"counter"`
   578  	Fetches    int64 `metric:"kafka.reader.fetch.count"     type:"counter"`
   579  	Messages   int64 `metric:"kafka.reader.message.count"   type:"counter"`
   580  	Bytes      int64 `metric:"kafka.reader.message.bytes"   type:"counter"`
   581  	Rebalances int64 `metric:"kafka.reader.rebalance.count" type:"counter"`
   582  	Timeouts   int64 `metric:"kafka.reader.timeout.count"   type:"counter"`
   583  	Errors     int64 `metric:"kafka.reader.error.count"     type:"counter"`
   584  
   585  	DialTime   DurationStats `metric:"kafka.reader.dial.seconds"`
   586  	ReadTime   DurationStats `metric:"kafka.reader.read.seconds"`
   587  	WaitTime   DurationStats `metric:"kafka.reader.wait.seconds"`
   588  	FetchSize  SummaryStats  `metric:"kafka.reader.fetch.size"`
   589  	FetchBytes SummaryStats  `metric:"kafka.reader.fetch.bytes"`
   590  
   591  	Offset        int64         `metric:"kafka.reader.offset"          type:"gauge"`
   592  	Lag           int64         `metric:"kafka.reader.lag"             type:"gauge"`
   593  	MinBytes      int64         `metric:"kafka.reader.fetch_bytes.min" type:"gauge"`
   594  	MaxBytes      int64         `metric:"kafka.reader.fetch_bytes.max" type:"gauge"`
   595  	MaxWait       time.Duration `metric:"kafka.reader.fetch_wait.max"  type:"gauge"`
   596  	QueueLength   int64         `metric:"kafka.reader.queue.length"    type:"gauge"`
   597  	QueueCapacity int64         `metric:"kafka.reader.queue.capacity"  type:"gauge"`
   598  
   599  	ClientID  string `tag:"client_id"`
   600  	Topic     string `tag:"topic"`
   601  	Partition string `tag:"partition"`
   602  
   603  	// The original `Fetches` field had a typo where the metric name was called
   604  	// "kafak..." instead of "kafka...", in order to offer time to fix monitors
   605  	// that may be relying on this mistake we are temporarily introducing this
   606  	// field.
   607  	DeprecatedFetchesWithTypo int64 `metric:"kafak.reader.fetch.count" type:"counter"`
   608  }
   609  
   610  // readerStats is a struct that contains statistics on a reader.
   611  type readerStats struct {
   612  	dials      counter
   613  	fetches    counter
   614  	messages   counter
   615  	bytes      counter
   616  	rebalances counter
   617  	timeouts   counter
   618  	errors     counter
   619  	dialTime   summary
   620  	readTime   summary
   621  	waitTime   summary
   622  	fetchSize  summary
   623  	fetchBytes summary
   624  	offset     gauge
   625  	lag        gauge
   626  	partition  string
   627  }
   628  
   629  // NewReader creates and returns a new Reader configured with config.
   630  // The offset is initialized to FirstOffset.
   631  func NewReader(config ReaderConfig) *Reader {
   632  	if err := config.Validate(); err != nil {
   633  		panic(err)
   634  	}
   635  
   636  	if config.GroupID != "" {
   637  		if len(config.GroupBalancers) == 0 {
   638  			config.GroupBalancers = []GroupBalancer{
   639  				RangeGroupBalancer{},
   640  				RoundRobinGroupBalancer{},
   641  			}
   642  		}
   643  	}
   644  
   645  	if config.Dialer == nil {
   646  		config.Dialer = DefaultDialer
   647  	}
   648  
   649  	if config.MaxBytes == 0 {
   650  		config.MaxBytes = 1e6 // 1 MB
   651  	}
   652  
   653  	if config.MinBytes == 0 {
   654  		config.MinBytes = defaultFetchMinBytes
   655  	}
   656  
   657  	if config.MaxWait == 0 {
   658  		config.MaxWait = 10 * time.Second
   659  	}
   660  
   661  	if config.ReadLagInterval == 0 {
   662  		config.ReadLagInterval = 1 * time.Minute
   663  	}
   664  
   665  	if config.ReadBackoffMin == 0 {
   666  		config.ReadBackoffMin = defaultReadBackoffMin
   667  	}
   668  
   669  	if config.ReadBackoffMax == 0 {
   670  		config.ReadBackoffMax = defaultReadBackoffMax
   671  	}
   672  
   673  	if config.ReadBackoffMax < config.ReadBackoffMin {
   674  		panic(fmt.Errorf("ReadBackoffMax %d smaller than ReadBackoffMin %d", config.ReadBackoffMax, config.ReadBackoffMin))
   675  	}
   676  
   677  	if config.QueueCapacity == 0 {
   678  		config.QueueCapacity = 100
   679  	}
   680  
   681  	if config.MaxAttempts == 0 {
   682  		config.MaxAttempts = 3
   683  	}
   684  
   685  	// when configured as a consumer group; stats should report a partition of -1
   686  	readerStatsPartition := config.Partition
   687  	if config.GroupID != "" {
   688  		readerStatsPartition = -1
   689  	}
   690  
   691  	// when configured as a consume group, start version as 1 to ensure that only
   692  	// the rebalance function will start readers
   693  	version := int64(0)
   694  	if config.GroupID != "" {
   695  		version = 1
   696  	}
   697  
   698  	stctx, stop := context.WithCancel(context.Background())
   699  	r := &Reader{
   700  		config:  config,
   701  		msgs:    make(chan readerMessage, config.QueueCapacity),
   702  		cancel:  func() {},
   703  		commits: make(chan commitRequest, config.QueueCapacity),
   704  		stop:    stop,
   705  		offset:  FirstOffset,
   706  		stctx:   stctx,
   707  		stats: &readerStats{
   708  			dialTime:   makeSummary(),
   709  			readTime:   makeSummary(),
   710  			waitTime:   makeSummary(),
   711  			fetchSize:  makeSummary(),
   712  			fetchBytes: makeSummary(),
   713  			// Generate the string representation of the partition number only
   714  			// once when the reader is created.
   715  			partition: strconv.Itoa(readerStatsPartition),
   716  		},
   717  		version: version,
   718  	}
   719  	if r.useConsumerGroup() {
   720  		r.done = make(chan struct{})
   721  		r.runError = make(chan error)
   722  		cg, err := NewConsumerGroup(ConsumerGroupConfig{
   723  			ID:                     r.config.GroupID,
   724  			Brokers:                r.config.Brokers,
   725  			Dialer:                 r.config.Dialer,
   726  			Topics:                 r.getTopics(),
   727  			GroupBalancers:         r.config.GroupBalancers,
   728  			HeartbeatInterval:      r.config.HeartbeatInterval,
   729  			PartitionWatchInterval: r.config.PartitionWatchInterval,
   730  			WatchPartitionChanges:  r.config.WatchPartitionChanges,
   731  			SessionTimeout:         r.config.SessionTimeout,
   732  			RebalanceTimeout:       r.config.RebalanceTimeout,
   733  			JoinGroupBackoff:       r.config.JoinGroupBackoff,
   734  			RetentionTime:          r.config.RetentionTime,
   735  			StartOffset:            r.config.StartOffset,
   736  			Logger:                 r.config.Logger,
   737  			ErrorLogger:            r.config.ErrorLogger,
   738  		})
   739  		if err != nil {
   740  			panic(err)
   741  		}
   742  		go r.run(cg)
   743  	}
   744  
   745  	return r
   746  }
   747  
   748  // Config returns the reader's configuration.
   749  func (r *Reader) Config() ReaderConfig {
   750  	return r.config
   751  }
   752  
   753  // Close closes the stream, preventing the program from reading any more
   754  // messages from it.
   755  func (r *Reader) Close() error {
   756  	atomic.StoreUint32(&r.once, 1)
   757  
   758  	r.mutex.Lock()
   759  	closed := r.closed
   760  	r.closed = true
   761  	r.mutex.Unlock()
   762  
   763  	r.cancel()
   764  	r.stop()
   765  	r.join.Wait()
   766  
   767  	if r.done != nil {
   768  		<-r.done
   769  	}
   770  
   771  	if !closed {
   772  		close(r.msgs)
   773  	}
   774  
   775  	return nil
   776  }
   777  
   778  // ReadMessage reads and return the next message from the r. The method call
   779  // blocks until a message becomes available, or an error occurs. The program
   780  // may also specify a context to asynchronously cancel the blocking operation.
   781  //
   782  // The method returns io.EOF to indicate that the reader has been closed.
   783  //
   784  // If consumer groups are used, ReadMessage will automatically commit the
   785  // offset when called. Note that this could result in an offset being committed
   786  // before the message is fully processed.
   787  //
   788  // If more fine grained control of when offsets are  committed is required, it
   789  // is recommended to use FetchMessage with CommitMessages instead.
   790  func (r *Reader) ReadMessage(ctx context.Context) (Message, error) {
   791  	m, err := r.FetchMessage(ctx)
   792  	if err != nil {
   793  		return Message{}, err
   794  	}
   795  
   796  	if r.useConsumerGroup() {
   797  		if err := r.CommitMessages(ctx, m); err != nil {
   798  			return Message{}, err
   799  		}
   800  	}
   801  
   802  	return m, nil
   803  }
   804  
   805  // FetchMessage reads and return the next message from the r. The method call
   806  // blocks until a message becomes available, or an error occurs. The program
   807  // may also specify a context to asynchronously cancel the blocking operation.
   808  //
   809  // The method returns io.EOF to indicate that the reader has been closed.
   810  //
   811  // FetchMessage does not commit offsets automatically when using consumer groups.
   812  // Use CommitMessages to commit the offset.
   813  func (r *Reader) FetchMessage(ctx context.Context) (Message, error) {
   814  	r.activateReadLag()
   815  
   816  	for {
   817  		r.mutex.Lock()
   818  
   819  		if !r.closed && r.version == 0 {
   820  			r.start(r.getTopicPartitionOffset())
   821  		}
   822  
   823  		version := r.version
   824  		r.mutex.Unlock()
   825  
   826  		select {
   827  		case <-ctx.Done():
   828  			return Message{}, ctx.Err()
   829  
   830  		case err := <-r.runError:
   831  			return Message{}, err
   832  
   833  		case m, ok := <-r.msgs:
   834  			if !ok {
   835  				return Message{}, io.EOF
   836  			}
   837  
   838  			if m.version >= version {
   839  				r.mutex.Lock()
   840  
   841  				switch {
   842  				case m.error != nil:
   843  				case version == r.version:
   844  					r.offset = m.message.Offset + 1
   845  					r.lag = m.watermark - r.offset
   846  				}
   847  
   848  				r.mutex.Unlock()
   849  
   850  				switch m.error {
   851  				case nil:
   852  				case io.EOF:
   853  					// io.EOF is used as a marker to indicate that the stream
   854  					// has been closed, in case it was received from the inner
   855  					// reader we don't want to confuse the program and replace
   856  					// the error with io.ErrUnexpectedEOF.
   857  					m.error = io.ErrUnexpectedEOF
   858  				}
   859  
   860  				return m.message, m.error
   861  			}
   862  		}
   863  	}
   864  }
   865  
   866  // CommitMessages commits the list of messages passed as argument. The program
   867  // may pass a context to asynchronously cancel the commit operation when it was
   868  // configured to be blocking.
   869  //
   870  // Because kafka consumer groups track a single offset per partition, the
   871  // highest message offset passed to CommitMessages will cause all previous
   872  // messages to be committed. Applications need to account for these Kafka
   873  // limitations when committing messages, and maintain message ordering if they
   874  // need strong delivery guarantees. This property makes it valid to pass only
   875  // the last message seen to CommitMessages in order to move the offset of the
   876  // topic/partition it belonged to forward, effectively committing all previous
   877  // messages in the partition.
   878  func (r *Reader) CommitMessages(ctx context.Context, msgs ...Message) error {
   879  	if !r.useConsumerGroup() {
   880  		return errOnlyAvailableWithGroup
   881  	}
   882  
   883  	var errch <-chan error
   884  	creq := commitRequest{
   885  		commits: makeCommits(msgs...),
   886  	}
   887  
   888  	if r.useSyncCommits() {
   889  		ch := make(chan error, 1)
   890  		errch, creq.errch = ch, ch
   891  	}
   892  
   893  	select {
   894  	case r.commits <- creq:
   895  	case <-ctx.Done():
   896  		return ctx.Err()
   897  	case <-r.stctx.Done():
   898  		// This context is used to ensure we don't allow commits after the
   899  		// reader was closed.
   900  		return io.ErrClosedPipe
   901  	}
   902  
   903  	if !r.useSyncCommits() {
   904  		return nil
   905  	}
   906  
   907  	select {
   908  	case <-ctx.Done():
   909  		return ctx.Err()
   910  	case err := <-errch:
   911  		return err
   912  	}
   913  }
   914  
   915  // ReadLag returns the current lag of the reader by fetching the last offset of
   916  // the topic and partition and computing the difference between that value and
   917  // the offset of the last message returned by ReadMessage.
   918  //
   919  // This method is intended to be used in cases where a program may be unable to
   920  // call ReadMessage to update the value returned by Lag, but still needs to get
   921  // an up to date estimation of how far behind the reader is. For example when
   922  // the consumer is not ready to process the next message.
   923  //
   924  // The function returns a lag of zero when the reader's current offset is
   925  // negative.
   926  func (r *Reader) ReadLag(ctx context.Context) (lag int64, err error) {
   927  	if r.useConsumerGroup() {
   928  		return 0, errNotAvailableWithGroup
   929  	}
   930  
   931  	type offsets struct {
   932  		first int64
   933  		last  int64
   934  	}
   935  
   936  	offch := make(chan offsets, 1)
   937  	errch := make(chan error, 1)
   938  
   939  	go func() {
   940  		var off offsets
   941  		var err error
   942  
   943  		for _, broker := range r.config.Brokers {
   944  			var conn *Conn
   945  
   946  			if conn, err = r.config.Dialer.DialLeader(ctx, "tcp", broker, r.config.Topic, r.config.Partition); err != nil {
   947  				continue
   948  			}
   949  
   950  			deadline, _ := ctx.Deadline()
   951  			conn.SetDeadline(deadline)
   952  
   953  			off.first, off.last, err = conn.ReadOffsets()
   954  			conn.Close()
   955  
   956  			if err == nil {
   957  				break
   958  			}
   959  		}
   960  
   961  		if err != nil {
   962  			errch <- err
   963  		} else {
   964  			offch <- off
   965  		}
   966  	}()
   967  
   968  	select {
   969  	case off := <-offch:
   970  		switch cur := r.Offset(); {
   971  		case cur == FirstOffset:
   972  			lag = off.last - off.first
   973  
   974  		case cur == LastOffset:
   975  			lag = 0
   976  
   977  		default:
   978  			lag = off.last - cur
   979  		}
   980  	case err = <-errch:
   981  	case <-ctx.Done():
   982  		err = ctx.Err()
   983  	}
   984  
   985  	return
   986  }
   987  
   988  // Offset returns the current absolute offset of the reader, or -1
   989  // if r is backed by a consumer group.
   990  func (r *Reader) Offset() int64 {
   991  	if r.useConsumerGroup() {
   992  		return -1
   993  	}
   994  
   995  	r.mutex.Lock()
   996  	offset := r.offset
   997  	r.mutex.Unlock()
   998  	r.withLogger(func(log Logger) {
   999  		log.Printf("looking up offset of kafka reader for partition %d of %s: %d", r.config.Partition, r.config.Topic, offset)
  1000  	})
  1001  	return offset
  1002  }
  1003  
  1004  // Lag returns the lag of the last message returned by ReadMessage, or -1
  1005  // if r is backed by a consumer group.
  1006  func (r *Reader) Lag() int64 {
  1007  	if r.useConsumerGroup() {
  1008  		return -1
  1009  	}
  1010  
  1011  	r.mutex.Lock()
  1012  	lag := r.lag
  1013  	r.mutex.Unlock()
  1014  	return lag
  1015  }
  1016  
  1017  // SetOffset changes the offset from which the next batch of messages will be
  1018  // read. The method fails with io.ErrClosedPipe if the reader has already been closed.
  1019  //
  1020  // From version 0.2.0, FirstOffset and LastOffset can be used to indicate the first
  1021  // or last available offset in the partition. Please note while -1 and -2 were accepted
  1022  // to indicate the first or last offset in previous versions, the meanings of the numbers
  1023  // were swapped in 0.2.0 to match the meanings in other libraries and the Kafka protocol
  1024  // specification.
  1025  func (r *Reader) SetOffset(offset int64) error {
  1026  	if r.useConsumerGroup() {
  1027  		return errNotAvailableWithGroup
  1028  	}
  1029  
  1030  	var err error
  1031  	r.mutex.Lock()
  1032  
  1033  	if r.closed {
  1034  		err = io.ErrClosedPipe
  1035  	} else if offset != r.offset {
  1036  		r.withLogger(func(log Logger) {
  1037  			log.Printf("setting the offset of the kafka reader for partition %d of %s from %d to %d",
  1038  				r.config.Partition, r.config.Topic, r.offset, offset)
  1039  		})
  1040  		r.offset = offset
  1041  
  1042  		if r.version != 0 {
  1043  			r.start(r.getTopicPartitionOffset())
  1044  		}
  1045  
  1046  		r.activateReadLag()
  1047  	}
  1048  
  1049  	r.mutex.Unlock()
  1050  	return err
  1051  }
  1052  
  1053  // SetOffsetAt changes the offset from which the next batch of messages will be
  1054  // read given the timestamp t.
  1055  //
  1056  // The method fails if the unable to connect partition leader, or unable to read the offset
  1057  // given the ts, or if the reader has been closed.
  1058  func (r *Reader) SetOffsetAt(ctx context.Context, t time.Time) error {
  1059  	r.mutex.Lock()
  1060  	if r.closed {
  1061  		r.mutex.Unlock()
  1062  		return io.ErrClosedPipe
  1063  	}
  1064  	r.mutex.Unlock()
  1065  
  1066  	for _, broker := range r.config.Brokers {
  1067  		conn, err := r.config.Dialer.DialLeader(ctx, "tcp", broker, r.config.Topic, r.config.Partition)
  1068  		if err != nil {
  1069  			continue
  1070  		}
  1071  
  1072  		deadline, _ := ctx.Deadline()
  1073  		conn.SetDeadline(deadline)
  1074  		offset, err := conn.ReadOffset(t)
  1075  		conn.Close()
  1076  		if err != nil {
  1077  			return err
  1078  		}
  1079  
  1080  		return r.SetOffset(offset)
  1081  	}
  1082  	return fmt.Errorf("error setting offset for timestamp %+v", t)
  1083  }
  1084  
  1085  // Stats returns a snapshot of the reader stats since the last time the method
  1086  // was called, or since the reader was created if it is called for the first
  1087  // time.
  1088  //
  1089  // A typical use of this method is to spawn a goroutine that will periodically
  1090  // call Stats on a kafka reader and report the metrics to a stats collection
  1091  // system.
  1092  func (r *Reader) Stats() ReaderStats {
  1093  	stats := ReaderStats{
  1094  		Dials:         r.stats.dials.snapshot(),
  1095  		Fetches:       r.stats.fetches.snapshot(),
  1096  		Messages:      r.stats.messages.snapshot(),
  1097  		Bytes:         r.stats.bytes.snapshot(),
  1098  		Rebalances:    r.stats.rebalances.snapshot(),
  1099  		Timeouts:      r.stats.timeouts.snapshot(),
  1100  		Errors:        r.stats.errors.snapshot(),
  1101  		DialTime:      r.stats.dialTime.snapshotDuration(),
  1102  		ReadTime:      r.stats.readTime.snapshotDuration(),
  1103  		WaitTime:      r.stats.waitTime.snapshotDuration(),
  1104  		FetchSize:     r.stats.fetchSize.snapshot(),
  1105  		FetchBytes:    r.stats.fetchBytes.snapshot(),
  1106  		Offset:        r.stats.offset.snapshot(),
  1107  		Lag:           r.stats.lag.snapshot(),
  1108  		MinBytes:      int64(r.config.MinBytes),
  1109  		MaxBytes:      int64(r.config.MaxBytes),
  1110  		MaxWait:       r.config.MaxWait,
  1111  		QueueLength:   int64(len(r.msgs)),
  1112  		QueueCapacity: int64(cap(r.msgs)),
  1113  		ClientID:      r.config.Dialer.ClientID,
  1114  		Topic:         r.config.Topic,
  1115  		Partition:     r.stats.partition,
  1116  	}
  1117  	// TODO: remove when we get rid of the deprecated field.
  1118  	stats.DeprecatedFetchesWithTypo = stats.Fetches
  1119  	return stats
  1120  }
  1121  
  1122  func (r *Reader) getTopicPartitionOffset() map[topicPartition]int64 {
  1123  	key := topicPartition{topic: r.config.Topic, partition: int32(r.config.Partition)}
  1124  	return map[topicPartition]int64{key: r.offset}
  1125  }
  1126  
  1127  func (r *Reader) withLogger(do func(Logger)) {
  1128  	if r.config.Logger != nil {
  1129  		do(r.config.Logger)
  1130  	}
  1131  }
  1132  
  1133  func (r *Reader) withErrorLogger(do func(Logger)) {
  1134  	if r.config.ErrorLogger != nil {
  1135  		do(r.config.ErrorLogger)
  1136  	} else {
  1137  		r.withLogger(do)
  1138  	}
  1139  }
  1140  
  1141  func (r *Reader) activateReadLag() {
  1142  	if r.config.ReadLagInterval > 0 && atomic.CompareAndSwapUint32(&r.once, 0, 1) {
  1143  		// read lag will only be calculated when not using consumer groups
  1144  		// todo discuss how capturing read lag should interact with rebalancing
  1145  		if !r.useConsumerGroup() {
  1146  			go r.readLag(r.stctx)
  1147  		}
  1148  	}
  1149  }
  1150  
  1151  func (r *Reader) readLag(ctx context.Context) {
  1152  	ticker := time.NewTicker(r.config.ReadLagInterval)
  1153  	defer ticker.Stop()
  1154  
  1155  	for {
  1156  		timeout, cancel := context.WithTimeout(ctx, r.config.ReadLagInterval/2)
  1157  		lag, err := r.ReadLag(timeout)
  1158  		cancel()
  1159  
  1160  		if err != nil {
  1161  			r.stats.errors.observe(1)
  1162  			r.withErrorLogger(func(log Logger) {
  1163  				log.Printf("kafka reader failed to read lag of partition %d of %s: %s", r.config.Partition, r.config.Topic, err)
  1164  			})
  1165  		} else {
  1166  			r.stats.lag.observe(lag)
  1167  		}
  1168  
  1169  		select {
  1170  		case <-ticker.C:
  1171  		case <-ctx.Done():
  1172  			return
  1173  		}
  1174  	}
  1175  }
  1176  
  1177  func (r *Reader) start(offsetsByPartition map[topicPartition]int64) {
  1178  	if r.closed {
  1179  		// don't start child reader if parent Reader is closed
  1180  		return
  1181  	}
  1182  
  1183  	ctx, cancel := context.WithCancel(context.Background())
  1184  
  1185  	r.cancel() // always cancel the previous reader
  1186  	r.cancel = cancel
  1187  	r.version++
  1188  
  1189  	r.join.Add(len(offsetsByPartition))
  1190  	for key, offset := range offsetsByPartition {
  1191  		go func(ctx context.Context, key topicPartition, offset int64, join *sync.WaitGroup) {
  1192  			defer join.Done()
  1193  
  1194  			(&reader{
  1195  				dialer:          r.config.Dialer,
  1196  				logger:          r.config.Logger,
  1197  				errorLogger:     r.config.ErrorLogger,
  1198  				brokers:         r.config.Brokers,
  1199  				topic:           key.topic,
  1200  				partition:       int(key.partition),
  1201  				minBytes:        r.config.MinBytes,
  1202  				maxBytes:        r.config.MaxBytes,
  1203  				maxWait:         r.config.MaxWait,
  1204  				backoffDelayMin: r.config.ReadBackoffMin,
  1205  				backoffDelayMax: r.config.ReadBackoffMax,
  1206  				version:         r.version,
  1207  				msgs:            r.msgs,
  1208  				stats:           r.stats,
  1209  				isolationLevel:  r.config.IsolationLevel,
  1210  				maxAttempts:     r.config.MaxAttempts,
  1211  			}).run(ctx, offset)
  1212  		}(ctx, key, offset, &r.join)
  1213  	}
  1214  }
  1215  
  1216  // A reader reads messages from kafka and produces them on its channels, it's
  1217  // used as an way to asynchronously fetch messages while the main program reads
  1218  // them using the high level reader API.
  1219  type reader struct {
  1220  	dialer          *Dialer
  1221  	logger          Logger
  1222  	errorLogger     Logger
  1223  	brokers         []string
  1224  	topic           string
  1225  	partition       int
  1226  	minBytes        int
  1227  	maxBytes        int
  1228  	maxWait         time.Duration
  1229  	backoffDelayMin time.Duration
  1230  	backoffDelayMax time.Duration
  1231  	version         int64
  1232  	msgs            chan<- readerMessage
  1233  	stats           *readerStats
  1234  	isolationLevel  IsolationLevel
  1235  	maxAttempts     int
  1236  }
  1237  
  1238  type readerMessage struct {
  1239  	version   int64
  1240  	message   Message
  1241  	watermark int64
  1242  	error     error
  1243  }
  1244  
  1245  func (r *reader) run(ctx context.Context, offset int64) {
  1246  	// This is the reader's main loop, it only ends if the context is canceled
  1247  	// and will keep attempting to reader messages otherwise.
  1248  	//
  1249  	// Retrying indefinitely has the nice side effect of preventing Read calls
  1250  	// on the parent reader to block if connection to the kafka server fails,
  1251  	// the reader keeps reporting errors on the error channel which will then
  1252  	// be surfaced to the program.
  1253  	// If the reader wasn't retrying then the program would block indefinitely
  1254  	// on a Read call after reading the first error.
  1255  	for attempt := 0; true; attempt++ {
  1256  		if attempt != 0 {
  1257  			if !sleep(ctx, backoff(attempt, r.backoffDelayMin, r.backoffDelayMax)) {
  1258  				return
  1259  			}
  1260  		}
  1261  
  1262  		r.withLogger(func(log Logger) {
  1263  			log.Printf("initializing kafka reader for partition %d of %s starting at offset %d", r.partition, r.topic, offset)
  1264  		})
  1265  
  1266  		conn, start, err := r.initialize(ctx, offset)
  1267  		switch err {
  1268  		case nil:
  1269  		case OffsetOutOfRange:
  1270  			// This would happen if the requested offset is passed the last
  1271  			// offset on the partition leader. In that case we're just going
  1272  			// to retry later hoping that enough data has been produced.
  1273  			r.withErrorLogger(func(log Logger) {
  1274  				log.Printf("error initializing the kafka reader for partition %d of %s: %s", r.partition, r.topic, OffsetOutOfRange)
  1275  			})
  1276  			continue
  1277  		default:
  1278  			// Perform a configured number of attempts before
  1279  			// reporting first errors, this helps mitigate
  1280  			// situations where the kafka server is temporarily
  1281  			// unavailable.
  1282  			if attempt >= r.maxAttempts {
  1283  				r.sendError(ctx, err)
  1284  			} else {
  1285  				r.stats.errors.observe(1)
  1286  				r.withErrorLogger(func(log Logger) {
  1287  					log.Printf("error initializing the kafka reader for partition %d of %s: %s", r.partition, r.topic, err)
  1288  				})
  1289  			}
  1290  			continue
  1291  		}
  1292  
  1293  		// Resetting the attempt counter ensures that if a failure occurs after
  1294  		// a successful initialization we don't keep increasing the backoff
  1295  		// timeout.
  1296  		attempt = 0
  1297  
  1298  		// Now we're sure to have an absolute offset number, may anything happen
  1299  		// to the connection we know we'll want to restart from this offset.
  1300  		offset = start
  1301  
  1302  		errcount := 0
  1303  	readLoop:
  1304  		for {
  1305  			if !sleep(ctx, backoff(errcount, r.backoffDelayMin, r.backoffDelayMax)) {
  1306  				conn.Close()
  1307  				return
  1308  			}
  1309  
  1310  			switch offset, err = r.read(ctx, offset, conn); err {
  1311  			case nil:
  1312  				errcount = 0
  1313  				continue
  1314  			case io.EOF:
  1315  				// done with this batch of messages...carry on.  note that this
  1316  				// block relies on the batch repackaging real io.EOF errors as
  1317  				// io.UnexpectedEOF.  otherwise, we would end up swallowing real
  1318  				// errors here.
  1319  				errcount = 0
  1320  				continue
  1321  			case UnknownTopicOrPartition:
  1322  				r.withErrorLogger(func(log Logger) {
  1323  					log.Printf("failed to read from current broker for partition %d of %s at offset %d, topic or parition not found on this broker, %v", r.partition, r.topic, offset, r.brokers)
  1324  				})
  1325  
  1326  				conn.Close()
  1327  
  1328  				// The next call to .initialize will re-establish a connection to the proper
  1329  				// topic/partition broker combo.
  1330  				r.stats.rebalances.observe(1)
  1331  				break readLoop
  1332  			case NotLeaderForPartition:
  1333  				r.withErrorLogger(func(log Logger) {
  1334  					log.Printf("failed to read from current broker for partition %d of %s at offset %d, not the leader", r.partition, r.topic, offset)
  1335  				})
  1336  
  1337  				conn.Close()
  1338  
  1339  				// The next call to .initialize will re-establish a connection to the proper
  1340  				// partition leader.
  1341  				r.stats.rebalances.observe(1)
  1342  				break readLoop
  1343  
  1344  			case RequestTimedOut:
  1345  				// Timeout on the kafka side, this can be safely retried.
  1346  				errcount = 0
  1347  				r.withLogger(func(log Logger) {
  1348  					log.Printf("no messages received from kafka within the allocated time for partition %d of %s at offset %d", r.partition, r.topic, offset)
  1349  				})
  1350  				r.stats.timeouts.observe(1)
  1351  				continue
  1352  
  1353  			case OffsetOutOfRange:
  1354  				first, last, err := r.readOffsets(conn)
  1355  				if err != nil {
  1356  					r.withErrorLogger(func(log Logger) {
  1357  						log.Printf("the kafka reader got an error while attempting to determine whether it was reading before the first offset or after the last offset of partition %d of %s: %s", r.partition, r.topic, err)
  1358  					})
  1359  					conn.Close()
  1360  					break readLoop
  1361  				}
  1362  
  1363  				switch {
  1364  				case offset < first:
  1365  					r.withErrorLogger(func(log Logger) {
  1366  						log.Printf("the kafka reader is reading before the first offset for partition %d of %s, skipping from offset %d to %d (%d messages)", r.partition, r.topic, offset, first, first-offset)
  1367  					})
  1368  					offset, errcount = first, 0
  1369  					continue // retry immediately so we don't keep falling behind due to the backoff
  1370  
  1371  				case offset < last:
  1372  					errcount = 0
  1373  					continue // more messages have already become available, retry immediately
  1374  
  1375  				default:
  1376  					// We may be reading past the last offset, will retry later.
  1377  					r.withErrorLogger(func(log Logger) {
  1378  						log.Printf("the kafka reader is reading passed the last offset for partition %d of %s at offset %d", r.partition, r.topic, offset)
  1379  					})
  1380  				}
  1381  
  1382  			case context.Canceled:
  1383  				// Another reader has taken over, we can safely quit.
  1384  				conn.Close()
  1385  				return
  1386  
  1387  			case errUnknownCodec:
  1388  				// The compression codec is either unsupported or has not been
  1389  				// imported.  This is a fatal error b/c the reader cannot
  1390  				// proceed.
  1391  				r.sendError(ctx, err)
  1392  				break readLoop
  1393  
  1394  			default:
  1395  				if _, ok := err.(Error); ok {
  1396  					r.sendError(ctx, err)
  1397  				} else {
  1398  					r.withErrorLogger(func(log Logger) {
  1399  						log.Printf("the kafka reader got an unknown error reading partition %d of %s at offset %d: %s", r.partition, r.topic, offset, err)
  1400  					})
  1401  					r.stats.errors.observe(1)
  1402  					conn.Close()
  1403  					break readLoop
  1404  				}
  1405  			}
  1406  
  1407  			errcount++
  1408  		}
  1409  	}
  1410  }
  1411  
  1412  func (r *reader) initialize(ctx context.Context, offset int64) (conn *Conn, start int64, err error) {
  1413  	for i := 0; i != len(r.brokers) && conn == nil; i++ {
  1414  		broker := r.brokers[i]
  1415  		var first, last int64
  1416  
  1417  		t0 := time.Now()
  1418  		conn, err = r.dialer.DialLeader(ctx, "tcp", broker, r.topic, r.partition)
  1419  		t1 := time.Now()
  1420  		r.stats.dials.observe(1)
  1421  		r.stats.dialTime.observeDuration(t1.Sub(t0))
  1422  
  1423  		if err != nil {
  1424  			continue
  1425  		}
  1426  
  1427  		if first, last, err = r.readOffsets(conn); err != nil {
  1428  			conn.Close()
  1429  			conn = nil
  1430  			break
  1431  		}
  1432  
  1433  		switch {
  1434  		case offset == FirstOffset:
  1435  			offset = first
  1436  
  1437  		case offset == LastOffset:
  1438  			offset = last
  1439  
  1440  		case offset < first:
  1441  			offset = first
  1442  		}
  1443  
  1444  		r.withLogger(func(log Logger) {
  1445  			log.Printf("the kafka reader for partition %d of %s is seeking to offset %d", r.partition, r.topic, offset)
  1446  		})
  1447  
  1448  		if start, err = conn.Seek(offset, SeekAbsolute); err != nil {
  1449  			conn.Close()
  1450  			conn = nil
  1451  			break
  1452  		}
  1453  
  1454  		conn.SetDeadline(time.Time{})
  1455  	}
  1456  
  1457  	return
  1458  }
  1459  
  1460  func (r *reader) read(ctx context.Context, offset int64, conn *Conn) (int64, error) {
  1461  	r.stats.fetches.observe(1)
  1462  	r.stats.offset.observe(offset)
  1463  
  1464  	t0 := time.Now()
  1465  	conn.SetReadDeadline(t0.Add(r.maxWait))
  1466  
  1467  	batch := conn.ReadBatchWith(ReadBatchConfig{
  1468  		MinBytes:       r.minBytes,
  1469  		MaxBytes:       r.maxBytes,
  1470  		IsolationLevel: r.isolationLevel,
  1471  	})
  1472  	highWaterMark := batch.HighWaterMark()
  1473  
  1474  	t1 := time.Now()
  1475  	r.stats.waitTime.observeDuration(t1.Sub(t0))
  1476  
  1477  	var msg Message
  1478  	var err error
  1479  	var size int64
  1480  	var bytes int64
  1481  
  1482  	const safetyTimeout = 10 * time.Second
  1483  	deadline := time.Now().Add(safetyTimeout)
  1484  	conn.SetReadDeadline(deadline)
  1485  
  1486  	for {
  1487  		if now := time.Now(); deadline.Sub(now) < (safetyTimeout / 2) {
  1488  			deadline = now.Add(safetyTimeout)
  1489  			conn.SetReadDeadline(deadline)
  1490  		}
  1491  
  1492  		if msg, err = batch.ReadMessage(); err != nil {
  1493  			batch.Close()
  1494  			break
  1495  		}
  1496  
  1497  		n := int64(len(msg.Key) + len(msg.Value))
  1498  		r.stats.messages.observe(1)
  1499  		r.stats.bytes.observe(n)
  1500  
  1501  		if err = r.sendMessage(ctx, msg, highWaterMark); err != nil {
  1502  			batch.Close()
  1503  			break
  1504  		}
  1505  
  1506  		offset = msg.Offset + 1
  1507  		r.stats.offset.observe(offset)
  1508  		r.stats.lag.observe(highWaterMark - offset)
  1509  
  1510  		size++
  1511  		bytes += n
  1512  	}
  1513  
  1514  	conn.SetReadDeadline(time.Time{})
  1515  
  1516  	t2 := time.Now()
  1517  	r.stats.readTime.observeDuration(t2.Sub(t1))
  1518  	r.stats.fetchSize.observe(size)
  1519  	r.stats.fetchBytes.observe(bytes)
  1520  	return offset, err
  1521  }
  1522  
  1523  func (r *reader) readOffsets(conn *Conn) (first, last int64, err error) {
  1524  	conn.SetDeadline(time.Now().Add(10 * time.Second))
  1525  	return conn.ReadOffsets()
  1526  }
  1527  
  1528  func (r *reader) sendMessage(ctx context.Context, msg Message, watermark int64) error {
  1529  	select {
  1530  	case r.msgs <- readerMessage{version: r.version, message: msg, watermark: watermark}:
  1531  		return nil
  1532  	case <-ctx.Done():
  1533  		return ctx.Err()
  1534  	}
  1535  }
  1536  
  1537  func (r *reader) sendError(ctx context.Context, err error) error {
  1538  	select {
  1539  	case r.msgs <- readerMessage{version: r.version, error: err}:
  1540  		return nil
  1541  	case <-ctx.Done():
  1542  		return ctx.Err()
  1543  	}
  1544  }
  1545  
  1546  func (r *reader) withLogger(do func(Logger)) {
  1547  	if r.logger != nil {
  1548  		do(r.logger)
  1549  	}
  1550  }
  1551  
  1552  func (r *reader) withErrorLogger(do func(Logger)) {
  1553  	if r.errorLogger != nil {
  1554  		do(r.errorLogger)
  1555  	} else {
  1556  		r.withLogger(do)
  1557  	}
  1558  }
  1559  
  1560  // extractTopics returns the unique list of topics represented by the set of
  1561  // provided members
  1562  func extractTopics(members []GroupMember) []string {
  1563  	visited := map[string]struct{}{}
  1564  	var topics []string
  1565  
  1566  	for _, member := range members {
  1567  		for _, topic := range member.Topics {
  1568  			if _, seen := visited[topic]; seen {
  1569  				continue
  1570  			}
  1571  
  1572  			topics = append(topics, topic)
  1573  			visited[topic] = struct{}{}
  1574  		}
  1575  	}
  1576  
  1577  	sort.Strings(topics)
  1578  
  1579  	return topics
  1580  }