github.com/hoveychen/kafka-go@v0.4.42/reader.go

github.com/hoveychen/kafka-go@v0.4.42/reader.go (about)

     1  package kafka
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"sort"
    10  	"strconv"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  )
    15  
    16  const (
    17  	LastOffset  int64 = -1 // The most recent offset available for a partition.
    18  	FirstOffset int64 = -2 // The least recent offset available for a partition.
    19  )
    20  
    21  const (
    22  	// defaultCommitRetries holds the number of commit attempts to make
    23  	// before giving up.
    24  	defaultCommitRetries = 3
    25  )
    26  
    27  const (
    28  	// defaultFetchMinBytes of 1 byte means that fetch requests are answered as
    29  	// soon as a single byte of data is available or the fetch request times out
    30  	// waiting for data to arrive.
    31  	defaultFetchMinBytes = 1
    32  )
    33  
    34  var (
    35  	errOnlyAvailableWithGroup = errors.New("unavailable when GroupID is not set")
    36  	errNotAvailableWithGroup  = errors.New("unavailable when GroupID is set")
    37  )
    38  
    39  const (
    40  	// defaultReadBackoffMax/Min sets the boundaries for how long the reader wait before
    41  	// polling for new messages.
    42  	defaultReadBackoffMin = 100 * time.Millisecond
    43  	defaultReadBackoffMax = 1 * time.Second
    44  )
    45  
    46  // Reader provides a high-level API for consuming messages from kafka.
    47  //
    48  // A Reader automatically manages reconnections to a kafka server, and
    49  // blocking methods have context support for asynchronous cancellations.
    50  //
    51  // Note that it is important to call `Close()` on a `Reader` when a process exits.
    52  // The kafka server needs a graceful disconnect to stop it from continuing to
    53  // attempt to send messages to the connected clients. The given example will not
    54  // call `Close()` if the process is terminated with SIGINT (ctrl-c at the shell) or
    55  // SIGTERM (as docker stop or a kubernetes restart does). This can result in a
    56  // delay when a new reader on the same topic connects (e.g. new process started
    57  // or new container running). Use a `signal.Notify` handler to close the reader on
    58  // process shutdown.
    59  type Reader struct {
    60  	// immutable fields of the reader
    61  	config ReaderConfig
    62  
    63  	// communication channels between the parent reader and its subreaders
    64  	msgs chan readerMessage
    65  
    66  	// mutable fields of the reader (synchronized on the mutex)
    67  	mutex   sync.Mutex
    68  	join    sync.WaitGroup
    69  	cancel  context.CancelFunc
    70  	stop    context.CancelFunc
    71  	done    chan struct{}
    72  	commits chan commitRequest
    73  	version int64 // version holds the generation of the spawned readers
    74  	offset  int64
    75  	lag     int64
    76  	closed  bool
    77  
    78  	// Without a group subscription (when Reader.config.GroupID == ""),
    79  	// when errors occur, the Reader gets a synthetic readerMessage with
    80  	// a non-nil err set. With group subscriptions however, when an error
    81  	// occurs in Reader.run, there's no reader running (sic, cf. reader vs.
    82  	// Reader) and there's no way to let the high-level methods like
    83  	// FetchMessage know that an error indeed occurred. If an error in run
    84  	// occurs, it will be non-block-sent to this unbuffered channel, where
    85  	// the high-level methods can select{} on it and notify the caller.
    86  	runError chan error
    87  
    88  	// reader stats are all made of atomic values, no need for synchronization.
    89  	once  uint32
    90  	stctx context.Context
    91  	// reader stats are all made of atomic values, no need for synchronization.
    92  	// Use a pointer to ensure 64-bit alignment of the values.
    93  	stats *readerStats
    94  }
    95  
    96  // useConsumerGroup indicates whether the Reader is part of a consumer group.
    97  func (r *Reader) useConsumerGroup() bool { return r.config.GroupID != "" }
    98  
    99  func (r *Reader) getTopics() []string {
   100  	if len(r.config.GroupTopics) > 0 {
   101  		return r.config.GroupTopics[:]
   102  	}
   103  
   104  	return []string{r.config.Topic}
   105  }
   106  
   107  // useSyncCommits indicates whether the Reader is configured to perform sync or
   108  // async commits.
   109  func (r *Reader) useSyncCommits() bool { return r.config.CommitInterval == 0 }
   110  
   111  func (r *Reader) unsubscribe() {
   112  	r.cancel()
   113  	r.join.Wait()
   114  	// it would be interesting to drain the r.msgs channel at this point since
   115  	// it will contain buffered messages for partitions that may not be
   116  	// re-assigned to this reader in the next consumer group generation.
   117  	// however, draining the channel could race with the client calling
   118  	// ReadMessage, which could result in messages delivered and/or committed
   119  	// with gaps in the offset.  for now, we will err on the side of caution and
   120  	// potentially have those messages be reprocessed in the next generation by
   121  	// another consumer to avoid such a race.
   122  }
   123  
   124  func (r *Reader) subscribe(allAssignments map[string][]PartitionAssignment) {
   125  	offsets := make(map[topicPartition]int64)
   126  	for topic, assignments := range allAssignments {
   127  		for _, assignment := range assignments {
   128  			key := topicPartition{
   129  				topic:     topic,
   130  				partition: int32(assignment.ID),
   131  			}
   132  			offsets[key] = assignment.Offset
   133  		}
   134  	}
   135  
   136  	r.mutex.Lock()
   137  	r.start(offsets)
   138  	r.mutex.Unlock()
   139  
   140  	r.withLogger(func(l Logger) {
   141  		l.Printf("subscribed to topics and partitions: %+v", offsets)
   142  	})
   143  }
   144  
   145  // commitOffsetsWithRetry attempts to commit the specified offsets and retries
   146  // up to the specified number of times.
   147  func (r *Reader) commitOffsetsWithRetry(gen *Generation, offsetStash offsetStash, retries int) (err error) {
   148  	const (
   149  		backoffDelayMin = 100 * time.Millisecond
   150  		backoffDelayMax = 5 * time.Second
   151  	)
   152  
   153  	for attempt := 0; attempt < retries; attempt++ {
   154  		if attempt != 0 {
   155  			if !sleep(r.stctx, backoff(attempt, backoffDelayMin, backoffDelayMax)) {
   156  				return
   157  			}
   158  		}
   159  
   160  		if err = gen.CommitOffsets(offsetStash); err == nil {
   161  			return
   162  		}
   163  	}
   164  
   165  	return // err will not be nil
   166  }
   167  
   168  // offsetStash holds offsets by topic => partition => offset.
   169  type offsetStash map[string]map[int]int64
   170  
   171  // merge updates the offsetStash with the offsets from the provided messages.
   172  func (o offsetStash) merge(commits []commit) {
   173  	for _, c := range commits {
   174  		offsetsByPartition, ok := o[c.topic]
   175  		if !ok {
   176  			offsetsByPartition = map[int]int64{}
   177  			o[c.topic] = offsetsByPartition
   178  		}
   179  
   180  		if offset, ok := offsetsByPartition[c.partition]; !ok || c.offset > offset {
   181  			offsetsByPartition[c.partition] = c.offset
   182  		}
   183  	}
   184  }
   185  
   186  // reset clears the contents of the offsetStash.
   187  func (o offsetStash) reset() {
   188  	for key := range o {
   189  		delete(o, key)
   190  	}
   191  }
   192  
   193  // commitLoopImmediate handles each commit synchronously.
   194  func (r *Reader) commitLoopImmediate(ctx context.Context, gen *Generation) {
   195  	offsets := offsetStash{}
   196  
   197  	for {
   198  		select {
   199  		case <-ctx.Done():
   200  			// drain the commit channel and prepare a single, final commit.
   201  			// the commit will combine any outstanding requests and the result
   202  			// will be sent back to all the callers of CommitMessages so that
   203  			// they can return.
   204  			var errchs []chan<- error
   205  			for hasCommits := true; hasCommits; {
   206  				select {
   207  				case req := <-r.commits:
   208  					offsets.merge(req.commits)
   209  					errchs = append(errchs, req.errch)
   210  				default:
   211  					hasCommits = false
   212  				}
   213  			}
   214  			err := r.commitOffsetsWithRetry(gen, offsets, defaultCommitRetries)
   215  			for _, errch := range errchs {
   216  				// NOTE : this will be a buffered channel and will not block.
   217  				errch <- err
   218  			}
   219  			return
   220  
   221  		case req := <-r.commits:
   222  			offsets.merge(req.commits)
   223  			req.errch <- r.commitOffsetsWithRetry(gen, offsets, defaultCommitRetries)
   224  			offsets.reset()
   225  		}
   226  	}
   227  }
   228  
   229  // commitLoopInterval handles each commit asynchronously with a period defined
   230  // by ReaderConfig.CommitInterval.
   231  func (r *Reader) commitLoopInterval(ctx context.Context, gen *Generation) {
   232  	ticker := time.NewTicker(r.config.CommitInterval)
   233  	defer ticker.Stop()
   234  
   235  	// the offset stash should not survive rebalances b/c the consumer may
   236  	// receive new assignments.
   237  	offsets := offsetStash{}
   238  
   239  	commit := func() {
   240  		if err := r.commitOffsetsWithRetry(gen, offsets, defaultCommitRetries); err != nil {
   241  			r.withErrorLogger(func(l Logger) { l.Printf("%v", err) })
   242  		} else {
   243  			offsets.reset()
   244  		}
   245  	}
   246  
   247  	for {
   248  		select {
   249  		case <-ctx.Done():
   250  			// drain the commit channel in order to prepare the final commit.
   251  			for hasCommits := true; hasCommits; {
   252  				select {
   253  				case req := <-r.commits:
   254  					offsets.merge(req.commits)
   255  				default:
   256  					hasCommits = false
   257  				}
   258  			}
   259  			commit()
   260  			return
   261  
   262  		case <-ticker.C:
   263  			commit()
   264  
   265  		case req := <-r.commits:
   266  			offsets.merge(req.commits)
   267  		}
   268  	}
   269  }
   270  
   271  // commitLoop processes commits off the commit chan.
   272  func (r *Reader) commitLoop(ctx context.Context, gen *Generation) {
   273  	r.withLogger(func(l Logger) {
   274  		l.Printf("started commit for group %s\n", r.config.GroupID)
   275  	})
   276  	defer r.withLogger(func(l Logger) {
   277  		l.Printf("stopped commit for group %s\n", r.config.GroupID)
   278  	})
   279  
   280  	if r.useSyncCommits() {
   281  		r.commitLoopImmediate(ctx, gen)
   282  	} else {
   283  		r.commitLoopInterval(ctx, gen)
   284  	}
   285  }
   286  
   287  // run provides the main consumer group management loop.  Each iteration performs the
   288  // handshake to join the Reader to the consumer group.
   289  //
   290  // This function is responsible for closing the consumer group upon exit.
   291  func (r *Reader) run(cg *ConsumerGroup) {
   292  	defer close(r.done)
   293  	defer cg.Close()
   294  
   295  	r.withLogger(func(l Logger) {
   296  		l.Printf("entering loop for consumer group, %v\n", r.config.GroupID)
   297  	})
   298  
   299  	for {
   300  		// Limit the number of attempts at waiting for the next
   301  		// consumer generation.
   302  		var err error
   303  		var gen *Generation
   304  		for attempt := 1; attempt <= r.config.MaxAttempts; attempt++ {
   305  			gen, err = cg.Next(r.stctx)
   306  			if err == nil {
   307  				break
   308  			}
   309  			if errors.Is(err, r.stctx.Err()) {
   310  				return
   311  			}
   312  			r.stats.errors.observe(1)
   313  			r.withErrorLogger(func(l Logger) {
   314  				l.Printf("%v", err)
   315  			})
   316  			// Continue with next attempt...
   317  		}
   318  		if err != nil {
   319  			// All attempts have failed.
   320  			select {
   321  			case r.runError <- err:
   322  				// If somebody's receiving on the runError, let
   323  				// them know the error occurred.
   324  			default:
   325  				// Otherwise, don't block to allow healing.
   326  			}
   327  			continue
   328  		}
   329  
   330  		r.stats.rebalances.observe(1)
   331  
   332  		r.subscribe(gen.Assignments)
   333  
   334  		gen.Start(func(ctx context.Context) {
   335  			r.commitLoop(ctx, gen)
   336  		})
   337  		gen.Start(func(ctx context.Context) {
   338  			// wait for the generation to end and then unsubscribe.
   339  			select {
   340  			case <-ctx.Done():
   341  				// continue to next generation
   342  			case <-r.stctx.Done():
   343  				// this will be the last loop because the reader is closed.
   344  			}
   345  			r.unsubscribe()
   346  		})
   347  	}
   348  }
   349  
   350  // ReaderConfig is a configuration object used to create new instances of
   351  // Reader.
   352  type ReaderConfig struct {
   353  	// The list of broker addresses used to connect to the kafka cluster.
   354  	Brokers []string
   355  
   356  	// GroupID holds the optional consumer group id.  If GroupID is specified, then
   357  	// Partition should NOT be specified e.g. 0
   358  	GroupID string
   359  
   360  	// GroupTopics allows specifying multiple topics, but can only be used in
   361  	// combination with GroupID, as it is a consumer-group feature. As such, if
   362  	// GroupID is set, then either Topic or GroupTopics must be defined.
   363  	GroupTopics []string
   364  
   365  	// The topic to read messages from.
   366  	Topic string
   367  
   368  	// Partition to read messages from.  Either Partition or GroupID may
   369  	// be assigned, but not both
   370  	Partition int
   371  
   372  	// An dialer used to open connections to the kafka server. This field is
   373  	// optional, if nil, the default dialer is used instead.
   374  	Dialer *Dialer
   375  
   376  	// The capacity of the internal message queue, defaults to 100 if none is
   377  	// set.
   378  	QueueCapacity int
   379  
   380  	// MinBytes indicates to the broker the minimum batch size that the consumer
   381  	// will accept. Setting a high minimum when consuming from a low-volume topic
   382  	// may result in delayed delivery when the broker does not have enough data to
   383  	// satisfy the defined minimum.
   384  	//
   385  	// Default: 1
   386  	MinBytes int
   387  
   388  	// MaxBytes indicates to the broker the maximum batch size that the consumer
   389  	// will accept. The broker will truncate a message to satisfy this maximum, so
   390  	// choose a value that is high enough for your largest message size.
   391  	//
   392  	// Default: 1MB
   393  	MaxBytes int
   394  
   395  	// Maximum amount of time to wait for new data to come when fetching batches
   396  	// of messages from kafka.
   397  	//
   398  	// Default: 10s
   399  	MaxWait time.Duration
   400  
   401  	// ReadBatchTimeout amount of time to wait to fetch message from kafka messages batch.
   402  	//
   403  	// Default: 10s
   404  	ReadBatchTimeout time.Duration
   405  
   406  	// ReadLagInterval sets the frequency at which the reader lag is updated.
   407  	// Setting this field to a negative value disables lag reporting.
   408  	ReadLagInterval time.Duration
   409  
   410  	// GroupBalancers is the priority-ordered list of client-side consumer group
   411  	// balancing strategies that will be offered to the coordinator.  The first
   412  	// strategy that all group members support will be chosen by the leader.
   413  	//
   414  	// Default: [Range, RoundRobin]
   415  	//
   416  	// Only used when GroupID is set
   417  	GroupBalancers []GroupBalancer
   418  
   419  	// HeartbeatInterval sets the optional frequency at which the reader sends the consumer
   420  	// group heartbeat update.
   421  	//
   422  	// Default: 3s
   423  	//
   424  	// Only used when GroupID is set
   425  	HeartbeatInterval time.Duration
   426  
   427  	// CommitInterval indicates the interval at which offsets are committed to
   428  	// the broker.  If 0, commits will be handled synchronously.
   429  	//
   430  	// Default: 0
   431  	//
   432  	// Only used when GroupID is set
   433  	CommitInterval time.Duration
   434  
   435  	// PartitionWatchInterval indicates how often a reader checks for partition changes.
   436  	// If a reader sees a partition change (such as a partition add) it will rebalance the group
   437  	// picking up new partitions.
   438  	//
   439  	// Default: 5s
   440  	//
   441  	// Only used when GroupID is set and WatchPartitionChanges is set.
   442  	PartitionWatchInterval time.Duration
   443  
   444  	// WatchForPartitionChanges is used to inform kafka-go that a consumer group should be
   445  	// polling the brokers and rebalancing if any partition changes happen to the topic.
   446  	WatchPartitionChanges bool
   447  
   448  	// SessionTimeout optionally sets the length of time that may pass without a heartbeat
   449  	// before the coordinator considers the consumer dead and initiates a rebalance.
   450  	//
   451  	// Default: 30s
   452  	//
   453  	// Only used when GroupID is set
   454  	SessionTimeout time.Duration
   455  
   456  	// RebalanceTimeout optionally sets the length of time the coordinator will wait
   457  	// for members to join as part of a rebalance.  For kafka servers under higher
   458  	// load, it may be useful to set this value higher.
   459  	//
   460  	// Default: 30s
   461  	//
   462  	// Only used when GroupID is set
   463  	RebalanceTimeout time.Duration
   464  
   465  	// JoinGroupBackoff optionally sets the length of time to wait between re-joining
   466  	// the consumer group after an error.
   467  	//
   468  	// Default: 5s
   469  	JoinGroupBackoff time.Duration
   470  
   471  	// RetentionTime optionally sets the length of time the consumer group will be saved
   472  	// by the broker
   473  	//
   474  	// Default: 24h
   475  	//
   476  	// Only used when GroupID is set
   477  	RetentionTime time.Duration
   478  
   479  	// StartOffset determines from whence the consumer group should begin
   480  	// consuming when it finds a partition without a committed offset.  If
   481  	// non-zero, it must be set to one of FirstOffset or LastOffset.
   482  	//
   483  	// Default: FirstOffset
   484  	//
   485  	// Only used when GroupID is set
   486  	StartOffset int64
   487  
   488  	// BackoffDelayMin optionally sets the smallest amount of time the reader will wait before
   489  	// polling for new messages
   490  	//
   491  	// Default: 100ms
   492  	ReadBackoffMin time.Duration
   493  
   494  	// BackoffDelayMax optionally sets the maximum amount of time the reader will wait before
   495  	// polling for new messages
   496  	//
   497  	// Default: 1s
   498  	ReadBackoffMax time.Duration
   499  
   500  	// If not nil, specifies a logger used to report internal changes within the
   501  	// reader.
   502  	Logger Logger
   503  
   504  	// ErrorLogger is the logger used to report errors. If nil, the reader falls
   505  	// back to using Logger instead.
   506  	ErrorLogger Logger
   507  
   508  	// IsolationLevel controls the visibility of transactional records.
   509  	// ReadUncommitted makes all records visible. With ReadCommitted only
   510  	// non-transactional and committed records are visible.
   511  	IsolationLevel IsolationLevel
   512  
   513  	// Limit of how many attempts to connect will be made before returning the error.
   514  	//
   515  	// The default is to try 3 times.
   516  	MaxAttempts int
   517  
   518  	// OffsetOutOfRangeError indicates that the reader should return an error in
   519  	// the event of an OffsetOutOfRange error, rather than retrying indefinitely.
   520  	// This flag is being added to retain backwards-compatibility, so it will be
   521  	// removed in a future version of kafka-go.
   522  	OffsetOutOfRangeError bool
   523  }
   524  
   525  // Validate method validates ReaderConfig properties.
   526  func (config *ReaderConfig) Validate() error {
   527  	if len(config.Brokers) == 0 {
   528  		return errors.New("cannot create a new kafka reader with an empty list of broker addresses")
   529  	}
   530  
   531  	if config.Partition < 0 || config.Partition >= math.MaxInt32 {
   532  		return fmt.Errorf("partition number out of bounds: %d", config.Partition)
   533  	}
   534  
   535  	if config.MinBytes < 0 {
   536  		return fmt.Errorf("invalid negative minimum batch size (min = %d)", config.MinBytes)
   537  	}
   538  
   539  	if config.MaxBytes < 0 {
   540  		return fmt.Errorf("invalid negative maximum batch size (max = %d)", config.MaxBytes)
   541  	}
   542  
   543  	if config.GroupID != "" {
   544  		if config.Partition != 0 {
   545  			return errors.New("either Partition or GroupID may be specified, but not both")
   546  		}
   547  
   548  		if len(config.Topic) == 0 && len(config.GroupTopics) == 0 {
   549  			return errors.New("either Topic or GroupTopics must be specified with GroupID")
   550  		}
   551  	} else if len(config.Topic) == 0 {
   552  		return errors.New("cannot create a new kafka reader with an empty topic")
   553  	}
   554  
   555  	if config.MinBytes > config.MaxBytes {
   556  		return fmt.Errorf("minimum batch size greater than the maximum (min = %d, max = %d)", config.MinBytes, config.MaxBytes)
   557  	}
   558  
   559  	if config.ReadBackoffMax < 0 {
   560  		return fmt.Errorf("ReadBackoffMax out of bounds: %d", config.ReadBackoffMax)
   561  	}
   562  
   563  	if config.ReadBackoffMin < 0 {
   564  		return fmt.Errorf("ReadBackoffMin out of bounds: %d", config.ReadBackoffMin)
   565  	}
   566  
   567  	return nil
   568  }
   569  
   570  // ReaderStats is a data structure returned by a call to Reader.Stats that exposes
   571  // details about the behavior of the reader.
   572  type ReaderStats struct {
   573  	Dials      int64 `metric:"kafka.reader.dial.count"      type:"counter"`
   574  	Fetches    int64 `metric:"kafka.reader.fetch.count"     type:"counter"`
   575  	Messages   int64 `metric:"kafka.reader.message.count"   type:"counter"`
   576  	Bytes      int64 `metric:"kafka.reader.message.bytes"   type:"counter"`
   577  	Rebalances int64 `metric:"kafka.reader.rebalance.count" type:"counter"`
   578  	Timeouts   int64 `metric:"kafka.reader.timeout.count"   type:"counter"`
   579  	Errors     int64 `metric:"kafka.reader.error.count"     type:"counter"`
   580  
   581  	DialTime   DurationStats `metric:"kafka.reader.dial.seconds"`
   582  	ReadTime   DurationStats `metric:"kafka.reader.read.seconds"`
   583  	WaitTime   DurationStats `metric:"kafka.reader.wait.seconds"`
   584  	FetchSize  SummaryStats  `metric:"kafka.reader.fetch.size"`
   585  	FetchBytes SummaryStats  `metric:"kafka.reader.fetch.bytes"`
   586  
   587  	Offset        int64         `metric:"kafka.reader.offset"          type:"gauge"`
   588  	Lag           int64         `metric:"kafka.reader.lag"             type:"gauge"`
   589  	MinBytes      int64         `metric:"kafka.reader.fetch_bytes.min" type:"gauge"`
   590  	MaxBytes      int64         `metric:"kafka.reader.fetch_bytes.max" type:"gauge"`
   591  	MaxWait       time.Duration `metric:"kafka.reader.fetch_wait.max"  type:"gauge"`
   592  	QueueLength   int64         `metric:"kafka.reader.queue.length"    type:"gauge"`
   593  	QueueCapacity int64         `metric:"kafka.reader.queue.capacity"  type:"gauge"`
   594  
   595  	ClientID  string `tag:"client_id"`
   596  	Topic     string `tag:"topic"`
   597  	Partition string `tag:"partition"`
   598  
   599  	// The original `Fetches` field had a typo where the metric name was called
   600  	// "kafak..." instead of "kafka...", in order to offer time to fix monitors
   601  	// that may be relying on this mistake we are temporarily introducing this
   602  	// field.
   603  	DeprecatedFetchesWithTypo int64 `metric:"kafak.reader.fetch.count" type:"counter"`
   604  }
   605  
   606  // readerStats is a struct that contains statistics on a reader.
   607  type readerStats struct {
   608  	dials      counter
   609  	fetches    counter
   610  	messages   counter
   611  	bytes      counter
   612  	rebalances counter
   613  	timeouts   counter
   614  	errors     counter
   615  	dialTime   summary
   616  	readTime   summary
   617  	waitTime   summary
   618  	fetchSize  summary
   619  	fetchBytes summary
   620  	offset     gauge
   621  	lag        gauge
   622  	partition  string
   623  }
   624  
   625  // NewReader creates and returns a new Reader configured with config.
   626  // The offset is initialized to FirstOffset.
   627  func NewReader(config ReaderConfig) *Reader {
   628  	if err := config.Validate(); err != nil {
   629  		panic(err)
   630  	}
   631  
   632  	if config.GroupID != "" {
   633  		if len(config.GroupBalancers) == 0 {
   634  			config.GroupBalancers = []GroupBalancer{
   635  				RangeGroupBalancer{},
   636  				RoundRobinGroupBalancer{},
   637  			}
   638  		}
   639  	}
   640  
   641  	if config.Dialer == nil {
   642  		config.Dialer = DefaultDialer
   643  	}
   644  
   645  	if config.MaxBytes == 0 {
   646  		config.MaxBytes = 1e6 // 1 MB
   647  	}
   648  
   649  	if config.MinBytes == 0 {
   650  		config.MinBytes = defaultFetchMinBytes
   651  	}
   652  
   653  	if config.MaxWait == 0 {
   654  		config.MaxWait = 10 * time.Second
   655  	}
   656  
   657  	if config.ReadBatchTimeout == 0 {
   658  		config.ReadBatchTimeout = 10 * time.Second
   659  	}
   660  
   661  	if config.ReadLagInterval == 0 {
   662  		config.ReadLagInterval = 1 * time.Minute
   663  	}
   664  
   665  	if config.ReadBackoffMin == 0 {
   666  		config.ReadBackoffMin = defaultReadBackoffMin
   667  	}
   668  
   669  	if config.ReadBackoffMax == 0 {
   670  		config.ReadBackoffMax = defaultReadBackoffMax
   671  	}
   672  
   673  	if config.ReadBackoffMax < config.ReadBackoffMin {
   674  		panic(fmt.Errorf("ReadBackoffMax %d smaller than ReadBackoffMin %d", config.ReadBackoffMax, config.ReadBackoffMin))
   675  	}
   676  
   677  	if config.QueueCapacity == 0 {
   678  		config.QueueCapacity = 100
   679  	}
   680  
   681  	if config.MaxAttempts == 0 {
   682  		config.MaxAttempts = 3
   683  	}
   684  
   685  	// when configured as a consumer group; stats should report a partition of -1
   686  	readerStatsPartition := config.Partition
   687  	if config.GroupID != "" {
   688  		readerStatsPartition = -1
   689  	}
   690  
   691  	// when configured as a consume group, start version as 1 to ensure that only
   692  	// the rebalance function will start readers
   693  	version := int64(0)
   694  	if config.GroupID != "" {
   695  		version = 1
   696  	}
   697  
   698  	stctx, stop := context.WithCancel(context.Background())
   699  	r := &Reader{
   700  		config:  config,
   701  		msgs:    make(chan readerMessage, config.QueueCapacity),
   702  		cancel:  func() {},
   703  		commits: make(chan commitRequest, config.QueueCapacity),
   704  		stop:    stop,
   705  		offset:  FirstOffset,
   706  		stctx:   stctx,
   707  		stats: &readerStats{
   708  			dialTime:   makeSummary(),
   709  			readTime:   makeSummary(),
   710  			waitTime:   makeSummary(),
   711  			fetchSize:  makeSummary(),
   712  			fetchBytes: makeSummary(),
   713  			// Generate the string representation of the partition number only
   714  			// once when the reader is created.
   715  			partition: strconv.Itoa(readerStatsPartition),
   716  		},
   717  		version: version,
   718  	}
   719  	if r.useConsumerGroup() {
   720  		r.done = make(chan struct{})
   721  		r.runError = make(chan error)
   722  		cg, err := NewConsumerGroup(ConsumerGroupConfig{
   723  			ID:                     r.config.GroupID,
   724  			Brokers:                r.config.Brokers,
   725  			Dialer:                 r.config.Dialer,
   726  			Topics:                 r.getTopics(),
   727  			GroupBalancers:         r.config.GroupBalancers,
   728  			HeartbeatInterval:      r.config.HeartbeatInterval,
   729  			PartitionWatchInterval: r.config.PartitionWatchInterval,
   730  			WatchPartitionChanges:  r.config.WatchPartitionChanges,
   731  			SessionTimeout:         r.config.SessionTimeout,
   732  			RebalanceTimeout:       r.config.RebalanceTimeout,
   733  			JoinGroupBackoff:       r.config.JoinGroupBackoff,
   734  			RetentionTime:          r.config.RetentionTime,
   735  			StartOffset:            r.config.StartOffset,
   736  			Logger:                 r.config.Logger,
   737  			ErrorLogger:            r.config.ErrorLogger,
   738  		})
   739  		if err != nil {
   740  			panic(err)
   741  		}
   742  		go r.run(cg)
   743  	}
   744  
   745  	return r
   746  }
   747  
   748  // Config returns the reader's configuration.
   749  func (r *Reader) Config() ReaderConfig {
   750  	return r.config
   751  }
   752  
   753  // Close closes the stream, preventing the program from reading any more
   754  // messages from it.
   755  func (r *Reader) Close() error {
   756  	atomic.StoreUint32(&r.once, 1)
   757  
   758  	r.mutex.Lock()
   759  	closed := r.closed
   760  	r.closed = true
   761  	r.mutex.Unlock()
   762  
   763  	r.cancel()
   764  	r.stop()
   765  	r.join.Wait()
   766  
   767  	if r.done != nil {
   768  		<-r.done
   769  	}
   770  
   771  	if !closed {
   772  		close(r.msgs)
   773  	}
   774  
   775  	return nil
   776  }
   777  
   778  // ReadMessage reads and return the next message from the r. The method call
   779  // blocks until a message becomes available, or an error occurs. The program
   780  // may also specify a context to asynchronously cancel the blocking operation.
   781  //
   782  // The method returns io.EOF to indicate that the reader has been closed.
   783  //
   784  // If consumer groups are used, ReadMessage will automatically commit the
   785  // offset when called. Note that this could result in an offset being committed
   786  // before the message is fully processed.
   787  //
   788  // If more fine-grained control of when offsets are committed is required, it
   789  // is recommended to use FetchMessage with CommitMessages instead.
   790  func (r *Reader) ReadMessage(ctx context.Context) (Message, error) {
   791  	m, err := r.FetchMessage(ctx)
   792  	if err != nil {
   793  		return Message{}, err
   794  	}
   795  
   796  	if r.useConsumerGroup() {
   797  		if err := r.CommitMessages(ctx, m); err != nil {
   798  			return Message{}, err
   799  		}
   800  	}
   801  
   802  	return m, nil
   803  }
   804  
   805  // FetchMessage reads and return the next message from the r. The method call
   806  // blocks until a message becomes available, or an error occurs. The program
   807  // may also specify a context to asynchronously cancel the blocking operation.
   808  //
   809  // The method returns io.EOF to indicate that the reader has been closed.
   810  //
   811  // FetchMessage does not commit offsets automatically when using consumer groups.
   812  // Use CommitMessages to commit the offset.
   813  func (r *Reader) FetchMessage(ctx context.Context) (Message, error) {
   814  	r.activateReadLag()
   815  
   816  	for {
   817  		r.mutex.Lock()
   818  
   819  		if !r.closed && r.version == 0 {
   820  			r.start(r.getTopicPartitionOffset())
   821  		}
   822  
   823  		version := r.version
   824  		r.mutex.Unlock()
   825  
   826  		select {
   827  		case <-ctx.Done():
   828  			return Message{}, ctx.Err()
   829  
   830  		case err := <-r.runError:
   831  			return Message{}, err
   832  
   833  		case m, ok := <-r.msgs:
   834  			if !ok {
   835  				return Message{}, io.EOF
   836  			}
   837  
   838  			if m.version >= version {
   839  				r.mutex.Lock()
   840  
   841  				switch {
   842  				case m.error != nil:
   843  				case version == r.version:
   844  					r.offset = m.message.Offset + 1
   845  					r.lag = m.watermark - r.offset
   846  				}
   847  
   848  				r.mutex.Unlock()
   849  
   850  				if errors.Is(m.error, io.EOF) {
   851  					// io.EOF is used as a marker to indicate that the stream
   852  					// has been closed, in case it was received from the inner
   853  					// reader we don't want to confuse the program and replace
   854  					// the error with io.ErrUnexpectedEOF.
   855  					m.error = io.ErrUnexpectedEOF
   856  				}
   857  
   858  				return m.message, m.error
   859  			}
   860  		}
   861  	}
   862  }
   863  
   864  // CommitMessages commits the list of messages passed as argument. The program
   865  // may pass a context to asynchronously cancel the commit operation when it was
   866  // configured to be blocking.
   867  //
   868  // Because kafka consumer groups track a single offset per partition, the
   869  // highest message offset passed to CommitMessages will cause all previous
   870  // messages to be committed. Applications need to account for these Kafka
   871  // limitations when committing messages, and maintain message ordering if they
   872  // need strong delivery guarantees. This property makes it valid to pass only
   873  // the last message seen to CommitMessages in order to move the offset of the
   874  // topic/partition it belonged to forward, effectively committing all previous
   875  // messages in the partition.
   876  func (r *Reader) CommitMessages(ctx context.Context, msgs ...Message) error {
   877  	if !r.useConsumerGroup() {
   878  		return errOnlyAvailableWithGroup
   879  	}
   880  
   881  	var errch <-chan error
   882  	creq := commitRequest{
   883  		commits: makeCommits(msgs...),
   884  	}
   885  
   886  	if r.useSyncCommits() {
   887  		ch := make(chan error, 1)
   888  		errch, creq.errch = ch, ch
   889  	}
   890  
   891  	select {
   892  	case r.commits <- creq:
   893  	case <-ctx.Done():
   894  		return ctx.Err()
   895  	case <-r.stctx.Done():
   896  		// This context is used to ensure we don't allow commits after the
   897  		// reader was closed.
   898  		return io.ErrClosedPipe
   899  	}
   900  
   901  	if !r.useSyncCommits() {
   902  		return nil
   903  	}
   904  
   905  	select {
   906  	case <-ctx.Done():
   907  		return ctx.Err()
   908  	case err := <-errch:
   909  		return err
   910  	}
   911  }
   912  
   913  // ReadLag returns the current lag of the reader by fetching the last offset of
   914  // the topic and partition and computing the difference between that value and
   915  // the offset of the last message returned by ReadMessage.
   916  //
   917  // This method is intended to be used in cases where a program may be unable to
   918  // call ReadMessage to update the value returned by Lag, but still needs to get
   919  // an up to date estimation of how far behind the reader is. For example when
   920  // the consumer is not ready to process the next message.
   921  //
   922  // The function returns a lag of zero when the reader's current offset is
   923  // negative.
   924  func (r *Reader) ReadLag(ctx context.Context) (lag int64, err error) {
   925  	if r.useConsumerGroup() {
   926  		return 0, errNotAvailableWithGroup
   927  	}
   928  
   929  	type offsets struct {
   930  		first int64
   931  		last  int64
   932  	}
   933  
   934  	offch := make(chan offsets, 1)
   935  	errch := make(chan error, 1)
   936  
   937  	go func() {
   938  		var off offsets
   939  		var err error
   940  
   941  		for _, broker := range r.config.Brokers {
   942  			var conn *Conn
   943  
   944  			if conn, err = r.config.Dialer.DialLeader(ctx, "tcp", broker, r.config.Topic, r.config.Partition); err != nil {
   945  				continue
   946  			}
   947  
   948  			deadline, _ := ctx.Deadline()
   949  			conn.SetDeadline(deadline)
   950  
   951  			off.first, off.last, err = conn.ReadOffsets()
   952  			conn.Close()
   953  
   954  			if err == nil {
   955  				break
   956  			}
   957  		}
   958  
   959  		if err != nil {
   960  			errch <- err
   961  		} else {
   962  			offch <- off
   963  		}
   964  	}()
   965  
   966  	select {
   967  	case off := <-offch:
   968  		switch cur := r.Offset(); {
   969  		case cur == FirstOffset:
   970  			lag = off.last - off.first
   971  
   972  		case cur == LastOffset:
   973  			lag = 0
   974  
   975  		default:
   976  			lag = off.last - cur
   977  		}
   978  	case err = <-errch:
   979  	case <-ctx.Done():
   980  		err = ctx.Err()
   981  	}
   982  
   983  	return
   984  }
   985  
   986  // Offset returns the current absolute offset of the reader, or -1
   987  // if r is backed by a consumer group.
   988  func (r *Reader) Offset() int64 {
   989  	if r.useConsumerGroup() {
   990  		return -1
   991  	}
   992  
   993  	r.mutex.Lock()
   994  	offset := r.offset
   995  	r.mutex.Unlock()
   996  	r.withLogger(func(log Logger) {
   997  		log.Printf("looking up offset of kafka reader for partition %d of %s: %s", r.config.Partition, r.config.Topic, toHumanOffset(offset))
   998  	})
   999  	return offset
  1000  }
  1001  
  1002  // Lag returns the lag of the last message returned by ReadMessage, or -1
  1003  // if r is backed by a consumer group.
  1004  func (r *Reader) Lag() int64 {
  1005  	if r.useConsumerGroup() {
  1006  		return -1
  1007  	}
  1008  
  1009  	r.mutex.Lock()
  1010  	lag := r.lag
  1011  	r.mutex.Unlock()
  1012  	return lag
  1013  }
  1014  
  1015  // SetOffset changes the offset from which the next batch of messages will be
  1016  // read. The method fails with io.ErrClosedPipe if the reader has already been closed.
  1017  //
  1018  // From version 0.2.0, FirstOffset and LastOffset can be used to indicate the first
  1019  // or last available offset in the partition. Please note while -1 and -2 were accepted
  1020  // to indicate the first or last offset in previous versions, the meanings of the numbers
  1021  // were swapped in 0.2.0 to match the meanings in other libraries and the Kafka protocol
  1022  // specification.
  1023  func (r *Reader) SetOffset(offset int64) error {
  1024  	if r.useConsumerGroup() {
  1025  		return errNotAvailableWithGroup
  1026  	}
  1027  
  1028  	var err error
  1029  	r.mutex.Lock()
  1030  
  1031  	if r.closed {
  1032  		err = io.ErrClosedPipe
  1033  	} else if offset != r.offset {
  1034  		r.withLogger(func(log Logger) {
  1035  			log.Printf("setting the offset of the kafka reader for partition %d of %s from %s to %s",
  1036  				r.config.Partition, r.config.Topic, toHumanOffset(r.offset), toHumanOffset(offset))
  1037  		})
  1038  		r.offset = offset
  1039  
  1040  		if r.version != 0 {
  1041  			r.start(r.getTopicPartitionOffset())
  1042  		}
  1043  
  1044  		r.activateReadLag()
  1045  	}
  1046  
  1047  	r.mutex.Unlock()
  1048  	return err
  1049  }
  1050  
  1051  // SetOffsetAt changes the offset from which the next batch of messages will be
  1052  // read given the timestamp t.
  1053  //
  1054  // The method fails if the unable to connect partition leader, or unable to read the offset
  1055  // given the ts, or if the reader has been closed.
  1056  func (r *Reader) SetOffsetAt(ctx context.Context, t time.Time) error {
  1057  	r.mutex.Lock()
  1058  	if r.closed {
  1059  		r.mutex.Unlock()
  1060  		return io.ErrClosedPipe
  1061  	}
  1062  	r.mutex.Unlock()
  1063  
  1064  	if len(r.config.Brokers) < 1 {
  1065  		return errors.New("no brokers in config")
  1066  	}
  1067  	var conn *Conn
  1068  	var err error
  1069  	for _, broker := range r.config.Brokers {
  1070  		conn, err = r.config.Dialer.DialLeader(ctx, "tcp", broker, r.config.Topic, r.config.Partition)
  1071  		if err != nil {
  1072  			continue
  1073  		}
  1074  		deadline, _ := ctx.Deadline()
  1075  		conn.SetDeadline(deadline)
  1076  		offset, err := conn.ReadOffset(t)
  1077  		conn.Close()
  1078  		if err != nil {
  1079  			return err
  1080  		}
  1081  
  1082  		return r.SetOffset(offset)
  1083  	}
  1084  	return fmt.Errorf("error dialing all brokers, one of the errors: %w", err)
  1085  }
  1086  
  1087  // Stats returns a snapshot of the reader stats since the last time the method
  1088  // was called, or since the reader was created if it is called for the first
  1089  // time.
  1090  //
  1091  // A typical use of this method is to spawn a goroutine that will periodically
  1092  // call Stats on a kafka reader and report the metrics to a stats collection
  1093  // system.
  1094  func (r *Reader) Stats() ReaderStats {
  1095  	stats := ReaderStats{
  1096  		Dials:         r.stats.dials.snapshot(),
  1097  		Fetches:       r.stats.fetches.snapshot(),
  1098  		Messages:      r.stats.messages.snapshot(),
  1099  		Bytes:         r.stats.bytes.snapshot(),
  1100  		Rebalances:    r.stats.rebalances.snapshot(),
  1101  		Timeouts:      r.stats.timeouts.snapshot(),
  1102  		Errors:        r.stats.errors.snapshot(),
  1103  		DialTime:      r.stats.dialTime.snapshotDuration(),
  1104  		ReadTime:      r.stats.readTime.snapshotDuration(),
  1105  		WaitTime:      r.stats.waitTime.snapshotDuration(),
  1106  		FetchSize:     r.stats.fetchSize.snapshot(),
  1107  		FetchBytes:    r.stats.fetchBytes.snapshot(),
  1108  		Offset:        r.stats.offset.snapshot(),
  1109  		Lag:           r.stats.lag.snapshot(),
  1110  		MinBytes:      int64(r.config.MinBytes),
  1111  		MaxBytes:      int64(r.config.MaxBytes),
  1112  		MaxWait:       r.config.MaxWait,
  1113  		QueueLength:   int64(len(r.msgs)),
  1114  		QueueCapacity: int64(cap(r.msgs)),
  1115  		ClientID:      r.config.Dialer.ClientID,
  1116  		Topic:         r.config.Topic,
  1117  		Partition:     r.stats.partition,
  1118  	}
  1119  	// TODO: remove when we get rid of the deprecated field.
  1120  	stats.DeprecatedFetchesWithTypo = stats.Fetches
  1121  	return stats
  1122  }
  1123  
  1124  func (r *Reader) getTopicPartitionOffset() map[topicPartition]int64 {
  1125  	key := topicPartition{topic: r.config.Topic, partition: int32(r.config.Partition)}
  1126  	return map[topicPartition]int64{key: r.offset}
  1127  }
  1128  
  1129  func (r *Reader) withLogger(do func(Logger)) {
  1130  	if r.config.Logger != nil {
  1131  		do(r.config.Logger)
  1132  	}
  1133  }
  1134  
  1135  func (r *Reader) withErrorLogger(do func(Logger)) {
  1136  	if r.config.ErrorLogger != nil {
  1137  		do(r.config.ErrorLogger)
  1138  	} else {
  1139  		r.withLogger(do)
  1140  	}
  1141  }
  1142  
  1143  func (r *Reader) activateReadLag() {
  1144  	if r.config.ReadLagInterval > 0 && atomic.CompareAndSwapUint32(&r.once, 0, 1) {
  1145  		// read lag will only be calculated when not using consumer groups
  1146  		// todo discuss how capturing read lag should interact with rebalancing
  1147  		if !r.useConsumerGroup() {
  1148  			go r.readLag(r.stctx)
  1149  		}
  1150  	}
  1151  }
  1152  
  1153  func (r *Reader) readLag(ctx context.Context) {
  1154  	ticker := time.NewTicker(r.config.ReadLagInterval)
  1155  	defer ticker.Stop()
  1156  
  1157  	for {
  1158  		timeout, cancel := context.WithTimeout(ctx, r.config.ReadLagInterval/2)
  1159  		lag, err := r.ReadLag(timeout)
  1160  		cancel()
  1161  
  1162  		if err != nil {
  1163  			r.stats.errors.observe(1)
  1164  			r.withErrorLogger(func(log Logger) {
  1165  				log.Printf("kafka reader failed to read lag of partition %d of %s: %s", r.config.Partition, r.config.Topic, err)
  1166  			})
  1167  		} else {
  1168  			r.stats.lag.observe(lag)
  1169  		}
  1170  
  1171  		select {
  1172  		case <-ticker.C:
  1173  		case <-ctx.Done():
  1174  			return
  1175  		}
  1176  	}
  1177  }
  1178  
  1179  func (r *Reader) start(offsetsByPartition map[topicPartition]int64) {
  1180  	if r.closed {
  1181  		// don't start child reader if parent Reader is closed
  1182  		return
  1183  	}
  1184  
  1185  	ctx, cancel := context.WithCancel(context.Background())
  1186  
  1187  	r.cancel() // always cancel the previous reader
  1188  	r.cancel = cancel
  1189  	r.version++
  1190  
  1191  	r.join.Add(len(offsetsByPartition))
  1192  	for key, offset := range offsetsByPartition {
  1193  		go func(ctx context.Context, key topicPartition, offset int64, join *sync.WaitGroup) {
  1194  			defer join.Done()
  1195  
  1196  			(&reader{
  1197  				dialer:           r.config.Dialer,
  1198  				logger:           r.config.Logger,
  1199  				errorLogger:      r.config.ErrorLogger,
  1200  				brokers:          r.config.Brokers,
  1201  				topic:            key.topic,
  1202  				partition:        int(key.partition),
  1203  				minBytes:         r.config.MinBytes,
  1204  				maxBytes:         r.config.MaxBytes,
  1205  				maxWait:          r.config.MaxWait,
  1206  				readBatchTimeout: r.config.ReadBatchTimeout,
  1207  				backoffDelayMin:  r.config.ReadBackoffMin,
  1208  				backoffDelayMax:  r.config.ReadBackoffMax,
  1209  				version:          r.version,
  1210  				msgs:             r.msgs,
  1211  				stats:            r.stats,
  1212  				isolationLevel:   r.config.IsolationLevel,
  1213  				maxAttempts:      r.config.MaxAttempts,
  1214  
  1215  				// backwards-compatibility flags
  1216  				offsetOutOfRangeError: r.config.OffsetOutOfRangeError,
  1217  			}).run(ctx, offset)
  1218  		}(ctx, key, offset, &r.join)
  1219  	}
  1220  }
  1221  
  1222  // A reader reads messages from kafka and produces them on its channels, it's
  1223  // used as a way to asynchronously fetch messages while the main program reads
  1224  // them using the high level reader API.
  1225  type reader struct {
  1226  	dialer           *Dialer
  1227  	logger           Logger
  1228  	errorLogger      Logger
  1229  	brokers          []string
  1230  	topic            string
  1231  	partition        int
  1232  	minBytes         int
  1233  	maxBytes         int
  1234  	maxWait          time.Duration
  1235  	readBatchTimeout time.Duration
  1236  	backoffDelayMin  time.Duration
  1237  	backoffDelayMax  time.Duration
  1238  	version          int64
  1239  	msgs             chan<- readerMessage
  1240  	stats            *readerStats
  1241  	isolationLevel   IsolationLevel
  1242  	maxAttempts      int
  1243  
  1244  	offsetOutOfRangeError bool
  1245  }
  1246  
  1247  type readerMessage struct {
  1248  	version   int64
  1249  	message   Message
  1250  	watermark int64
  1251  	error     error
  1252  }
  1253  
  1254  func (r *reader) run(ctx context.Context, offset int64) {
  1255  	// This is the reader's main loop, it only ends if the context is canceled
  1256  	// and will keep attempting to reader messages otherwise.
  1257  	//
  1258  	// Retrying indefinitely has the nice side effect of preventing Read calls
  1259  	// on the parent reader to block if connection to the kafka server fails,
  1260  	// the reader keeps reporting errors on the error channel which will then
  1261  	// be surfaced to the program.
  1262  	// If the reader wasn't retrying then the program would block indefinitely
  1263  	// on a Read call after reading the first error.
  1264  	for attempt := 0; true; attempt++ {
  1265  		if attempt != 0 {
  1266  			if !sleep(ctx, backoff(attempt, r.backoffDelayMin, r.backoffDelayMax)) {
  1267  				return
  1268  			}
  1269  		}
  1270  
  1271  		r.withLogger(func(log Logger) {
  1272  			log.Printf("initializing kafka reader for partition %d of %s starting at offset %d", r.partition, r.topic, toHumanOffset(offset))
  1273  		})
  1274  
  1275  		conn, start, err := r.initialize(ctx, offset)
  1276  		if err != nil {
  1277  			if errors.Is(err, OffsetOutOfRange) {
  1278  				if r.offsetOutOfRangeError {
  1279  					r.sendError(ctx, err)
  1280  					return
  1281  				}
  1282  
  1283  				// This would happen if the requested offset is passed the last
  1284  				// offset on the partition leader. In that case we're just going
  1285  				// to retry later hoping that enough data has been produced.
  1286  				r.withErrorLogger(func(log Logger) {
  1287  					log.Printf("error initializing the kafka reader for partition %d of %s: %s", r.partition, r.topic, err)
  1288  				})
  1289  
  1290  				continue
  1291  			}
  1292  
  1293  			// Perform a configured number of attempts before
  1294  			// reporting first errors, this helps mitigate
  1295  			// situations where the kafka server is temporarily
  1296  			// unavailable.
  1297  			if attempt >= r.maxAttempts {
  1298  				r.sendError(ctx, err)
  1299  			} else {
  1300  				r.stats.errors.observe(1)
  1301  				r.withErrorLogger(func(log Logger) {
  1302  					log.Printf("error initializing the kafka reader for partition %d of %s: %s", r.partition, r.topic, err)
  1303  				})
  1304  			}
  1305  			continue
  1306  		}
  1307  
  1308  		// Resetting the attempt counter ensures that if a failure occurs after
  1309  		// a successful initialization we don't keep increasing the backoff
  1310  		// timeout.
  1311  		attempt = 0
  1312  
  1313  		// Now we're sure to have an absolute offset number, may anything happen
  1314  		// to the connection we know we'll want to restart from this offset.
  1315  		offset = start
  1316  
  1317  		errcount := 0
  1318  	readLoop:
  1319  		for {
  1320  			if !sleep(ctx, backoff(errcount, r.backoffDelayMin, r.backoffDelayMax)) {
  1321  				conn.Close()
  1322  				return
  1323  			}
  1324  
  1325  			offset, err = r.read(ctx, offset, conn)
  1326  			switch {
  1327  			case err == nil:
  1328  				errcount = 0
  1329  				continue
  1330  
  1331  			case errors.Is(err, io.EOF):
  1332  				// done with this batch of messages...carry on.  note that this
  1333  				// block relies on the batch repackaging real io.EOF errors as
  1334  				// io.UnexpectedEOF.  otherwise, we would end up swallowing real
  1335  				// errors here.
  1336  				errcount = 0
  1337  				continue
  1338  
  1339  			case errors.Is(err, io.ErrNoProgress):
  1340  				// This error is returned by the Conn when it believes the connection
  1341  				// has been corrupted, so we need to explicitly close it. Since we are
  1342  				// explicitly handling it and a retry will pick up, we can suppress the
  1343  				// error metrics and logs for this case.
  1344  				conn.Close()
  1345  				break readLoop
  1346  
  1347  			case errors.Is(err, UnknownTopicOrPartition):
  1348  				r.withErrorLogger(func(log Logger) {
  1349  					log.Printf("failed to read from current broker %v for partition %d of %s at offset %d: %v", r.brokers, r.partition, r.topic, toHumanOffset(offset), err)
  1350  				})
  1351  
  1352  				conn.Close()
  1353  
  1354  				// The next call to .initialize will re-establish a connection to the proper
  1355  				// topic/partition broker combo.
  1356  				r.stats.rebalances.observe(1)
  1357  				break readLoop
  1358  
  1359  			case errors.Is(err, NotLeaderForPartition):
  1360  				r.withErrorLogger(func(log Logger) {
  1361  					log.Printf("failed to read from current broker for partition %d of %s at offset %d: %v", r.partition, r.topic, toHumanOffset(offset), err)
  1362  				})
  1363  
  1364  				conn.Close()
  1365  
  1366  				// The next call to .initialize will re-establish a connection to the proper
  1367  				// partition leader.
  1368  				r.stats.rebalances.observe(1)
  1369  				break readLoop
  1370  
  1371  			case errors.Is(err, RequestTimedOut):
  1372  				// Timeout on the kafka side, this can be safely retried.
  1373  				errcount = 0
  1374  				r.withLogger(func(log Logger) {
  1375  					log.Printf("no messages received from kafka within the allocated time for partition %d of %s at offset %d: %v", r.partition, r.topic, toHumanOffset(offset), err)
  1376  				})
  1377  				r.stats.timeouts.observe(1)
  1378  				continue
  1379  
  1380  			case errors.Is(err, OffsetOutOfRange):
  1381  				first, last, err := r.readOffsets(conn)
  1382  				if err != nil {
  1383  					r.withErrorLogger(func(log Logger) {
  1384  						log.Printf("the kafka reader got an error while attempting to determine whether it was reading before the first offset or after the last offset of partition %d of %s: %s", r.partition, r.topic, err)
  1385  					})
  1386  					conn.Close()
  1387  					break readLoop
  1388  				}
  1389  
  1390  				switch {
  1391  				case offset < first:
  1392  					r.withErrorLogger(func(log Logger) {
  1393  						log.Printf("the kafka reader is reading before the first offset for partition %d of %s, skipping from offset %d to %d (%d messages)", r.partition, r.topic, toHumanOffset(offset), first, first-offset)
  1394  					})
  1395  					offset, errcount = first, 0
  1396  					continue // retry immediately so we don't keep falling behind due to the backoff
  1397  
  1398  				case offset < last:
  1399  					errcount = 0
  1400  					continue // more messages have already become available, retry immediately
  1401  
  1402  				default:
  1403  					// We may be reading past the last offset, will retry later.
  1404  					r.withErrorLogger(func(log Logger) {
  1405  						log.Printf("the kafka reader is reading passed the last offset for partition %d of %s at offset %d", r.partition, r.topic, toHumanOffset(offset))
  1406  					})
  1407  				}
  1408  
  1409  			case errors.Is(err, context.Canceled):
  1410  				// Another reader has taken over, we can safely quit.
  1411  				conn.Close()
  1412  				return
  1413  
  1414  			case errors.Is(err, errUnknownCodec):
  1415  				// The compression codec is either unsupported or has not been
  1416  				// imported.  This is a fatal error b/c the reader cannot
  1417  				// proceed.
  1418  				r.sendError(ctx, err)
  1419  				break readLoop
  1420  
  1421  			default:
  1422  				var kafkaError Error
  1423  				if errors.As(err, &kafkaError) {
  1424  					r.sendError(ctx, err)
  1425  				} else {
  1426  					r.withErrorLogger(func(log Logger) {
  1427  						log.Printf("the kafka reader got an unknown error reading partition %d of %s at offset %d: %s", r.partition, r.topic, toHumanOffset(offset), err)
  1428  					})
  1429  					r.stats.errors.observe(1)
  1430  					conn.Close()
  1431  					break readLoop
  1432  				}
  1433  			}
  1434  
  1435  			errcount++
  1436  		}
  1437  	}
  1438  }
  1439  
  1440  func (r *reader) initialize(ctx context.Context, offset int64) (conn *Conn, start int64, err error) {
  1441  	for i := 0; i != len(r.brokers) && conn == nil; i++ {
  1442  		broker := r.brokers[i]
  1443  		var first, last int64
  1444  
  1445  		t0 := time.Now()
  1446  		conn, err = r.dialer.DialLeader(ctx, "tcp", broker, r.topic, r.partition)
  1447  		t1 := time.Now()
  1448  		r.stats.dials.observe(1)
  1449  		r.stats.dialTime.observeDuration(t1.Sub(t0))
  1450  
  1451  		if err != nil {
  1452  			continue
  1453  		}
  1454  
  1455  		if first, last, err = r.readOffsets(conn); err != nil {
  1456  			conn.Close()
  1457  			conn = nil
  1458  			break
  1459  		}
  1460  
  1461  		switch {
  1462  		case offset == FirstOffset:
  1463  			offset = first
  1464  
  1465  		case offset == LastOffset:
  1466  			offset = last
  1467  
  1468  		case offset < first:
  1469  			offset = first
  1470  		}
  1471  
  1472  		r.withLogger(func(log Logger) {
  1473  			log.Printf("the kafka reader for partition %d of %s is seeking to offset %d", r.partition, r.topic, toHumanOffset(offset))
  1474  		})
  1475  
  1476  		if start, err = conn.Seek(offset, SeekAbsolute); err != nil {
  1477  			conn.Close()
  1478  			conn = nil
  1479  			break
  1480  		}
  1481  
  1482  		conn.SetDeadline(time.Time{})
  1483  	}
  1484  
  1485  	return
  1486  }
  1487  
  1488  func (r *reader) read(ctx context.Context, offset int64, conn *Conn) (int64, error) {
  1489  	r.stats.fetches.observe(1)
  1490  	r.stats.offset.observe(offset)
  1491  
  1492  	t0 := time.Now()
  1493  	conn.SetReadDeadline(t0.Add(r.maxWait))
  1494  
  1495  	batch := conn.ReadBatchWith(ReadBatchConfig{
  1496  		MinBytes:       r.minBytes,
  1497  		MaxBytes:       r.maxBytes,
  1498  		IsolationLevel: r.isolationLevel,
  1499  	})
  1500  	highWaterMark := batch.HighWaterMark()
  1501  
  1502  	t1 := time.Now()
  1503  	r.stats.waitTime.observeDuration(t1.Sub(t0))
  1504  
  1505  	var msg Message
  1506  	var err error
  1507  	var size int64
  1508  	var bytes int64
  1509  
  1510  	for {
  1511  		conn.SetReadDeadline(time.Now().Add(r.readBatchTimeout))
  1512  
  1513  		if msg, err = batch.ReadMessage(); err != nil {
  1514  			batch.Close()
  1515  			break
  1516  		}
  1517  
  1518  		n := int64(len(msg.Key) + len(msg.Value))
  1519  		r.stats.messages.observe(1)
  1520  		r.stats.bytes.observe(n)
  1521  
  1522  		if err = r.sendMessage(ctx, msg, highWaterMark); err != nil {
  1523  			batch.Close()
  1524  			break
  1525  		}
  1526  
  1527  		offset = msg.Offset + 1
  1528  		r.stats.offset.observe(offset)
  1529  		r.stats.lag.observe(highWaterMark - offset)
  1530  
  1531  		size++
  1532  		bytes += n
  1533  	}
  1534  
  1535  	conn.SetReadDeadline(time.Time{})
  1536  
  1537  	t2 := time.Now()
  1538  	r.stats.readTime.observeDuration(t2.Sub(t1))
  1539  	r.stats.fetchSize.observe(size)
  1540  	r.stats.fetchBytes.observe(bytes)
  1541  	return offset, err
  1542  }
  1543  
  1544  func (r *reader) readOffsets(conn *Conn) (first, last int64, err error) {
  1545  	conn.SetDeadline(time.Now().Add(10 * time.Second))
  1546  	return conn.ReadOffsets()
  1547  }
  1548  
  1549  func (r *reader) sendMessage(ctx context.Context, msg Message, watermark int64) error {
  1550  	select {
  1551  	case r.msgs <- readerMessage{version: r.version, message: msg, watermark: watermark}:
  1552  		return nil
  1553  	case <-ctx.Done():
  1554  		return ctx.Err()
  1555  	}
  1556  }
  1557  
  1558  func (r *reader) sendError(ctx context.Context, err error) error {
  1559  	select {
  1560  	case r.msgs <- readerMessage{version: r.version, error: err}:
  1561  		return nil
  1562  	case <-ctx.Done():
  1563  		return ctx.Err()
  1564  	}
  1565  }
  1566  
  1567  func (r *reader) withLogger(do func(Logger)) {
  1568  	if r.logger != nil {
  1569  		do(r.logger)
  1570  	}
  1571  }
  1572  
  1573  func (r *reader) withErrorLogger(do func(Logger)) {
  1574  	if r.errorLogger != nil {
  1575  		do(r.errorLogger)
  1576  	} else {
  1577  		r.withLogger(do)
  1578  	}
  1579  }
  1580  
  1581  // extractTopics returns the unique list of topics represented by the set of
  1582  // provided members.
  1583  func extractTopics(members []GroupMember) []string {
  1584  	visited := map[string]struct{}{}
  1585  	var topics []string
  1586  
  1587  	for _, member := range members {
  1588  		for _, topic := range member.Topics {
  1589  			if _, seen := visited[topic]; seen {
  1590  				continue
  1591  			}
  1592  
  1593  			topics = append(topics, topic)
  1594  			visited[topic] = struct{}{}
  1595  		}
  1596  	}
  1597  
  1598  	sort.Strings(topics)
  1599  
  1600  	return topics
  1601  }
  1602  
  1603  type humanOffset int64
  1604  
  1605  func toHumanOffset(v int64) humanOffset {
  1606  	return humanOffset(v)
  1607  }
  1608  
  1609  func (offset humanOffset) Format(w fmt.State, _ rune) {
  1610  	v := int64(offset)
  1611  	switch v {
  1612  	case FirstOffset:
  1613  		fmt.Fprint(w, "first offset")
  1614  	case LastOffset:
  1615  		fmt.Fprint(w, "last offset")
  1616  	default:
  1617  		fmt.Fprint(w, strconv.FormatInt(v, 10))
  1618  	}
  1619  }