github.com/deanMdreon/kafka-go@v0.4.32/batch.go

github.com/deanMdreon/kafka-go@v0.4.32/batch.go (about)

     1  package kafka
     2  
     3  import (
     4  	"bufio"
     5  	"io"
     6  	"sync"
     7  	"time"
     8  )
     9  
    10  // A Batch is an iterator over a sequence of messages fetched from a kafka
    11  // server.
    12  //
    13  // Batches are created by calling (*Conn).ReadBatch. They hold a internal lock
    14  // on the connection, which is released when the batch is closed. Failing to
    15  // call a batch's Close method will likely result in a dead-lock when trying to
    16  // use the connection.
    17  //
    18  // Batches are safe to use concurrently from multiple goroutines.
    19  type Batch struct {
    20  	mutex         sync.Mutex
    21  	conn          *Conn
    22  	lock          *sync.Mutex
    23  	msgs          *messageSetReader
    24  	deadline      time.Time
    25  	throttle      time.Duration
    26  	topic         string
    27  	partition     int
    28  	offset        int64
    29  	highWaterMark int64
    30  	err           error
    31  	// The last offset in the batch.
    32  	//
    33  	// We use lastOffset to skip offsets that have been compacted away.
    34  	//
    35  	// We store lastOffset because we get lastOffset when we read a new message
    36  	// but only try to handle compaction when we receive an EOF. However, when
    37  	// we get an EOF we do not get the lastOffset. So there is a mismatch
    38  	// between when we receive it and need to use it.
    39  	lastOffset int64
    40  }
    41  
    42  // Throttle gives the throttling duration applied by the kafka server on the
    43  // connection.
    44  func (batch *Batch) Throttle() time.Duration {
    45  	return batch.throttle
    46  }
    47  
    48  // Watermark returns the current highest watermark in a partition.
    49  func (batch *Batch) HighWaterMark() int64 {
    50  	return batch.highWaterMark
    51  }
    52  
    53  // Partition returns the batch partition.
    54  func (batch *Batch) Partition() int {
    55  	return batch.partition
    56  }
    57  
    58  // Offset returns the offset of the next message in the batch.
    59  func (batch *Batch) Offset() int64 {
    60  	batch.mutex.Lock()
    61  	offset := batch.offset
    62  	batch.mutex.Unlock()
    63  	return offset
    64  }
    65  
    66  // Close closes the batch, releasing the connection lock and returning an error
    67  // if reading the batch failed for any reason.
    68  func (batch *Batch) Close() error {
    69  	batch.mutex.Lock()
    70  	err := batch.close()
    71  	batch.mutex.Unlock()
    72  	return err
    73  }
    74  
    75  func (batch *Batch) close() (err error) {
    76  	conn := batch.conn
    77  	lock := batch.lock
    78  
    79  	batch.conn = nil
    80  	batch.lock = nil
    81  	if batch.msgs != nil {
    82  		batch.msgs.discard()
    83  	}
    84  
    85  	if err = batch.err; err == io.EOF {
    86  		err = nil
    87  	}
    88  
    89  	if conn != nil {
    90  		conn.rdeadline.unsetConnReadDeadline()
    91  		conn.mutex.Lock()
    92  		conn.offset = batch.offset
    93  		conn.mutex.Unlock()
    94  
    95  		if err != nil {
    96  			if _, ok := err.(Error); !ok && err != io.ErrShortBuffer {
    97  				conn.Close()
    98  			}
    99  		}
   100  	}
   101  
   102  	if lock != nil {
   103  		lock.Unlock()
   104  	}
   105  
   106  	return
   107  }
   108  
   109  // Err returns a non-nil error if the batch is broken. This is the same error
   110  // that would be returned by Read, ReadMessage or Close (except in the case of
   111  // io.EOF which is never returned by Close).
   112  //
   113  // This method is useful when building retry mechanisms for (*Conn).ReadBatch,
   114  // the program can check whether the batch carried a error before attempting to
   115  // read the first message.
   116  //
   117  // Note that checking errors on a batch is optional, calling Read or ReadMessage
   118  // is always valid and can be used to either read a message or an error in cases
   119  // where that's convenient.
   120  func (batch *Batch) Err() error { return batch.err }
   121  
   122  // Read reads the value of the next message from the batch into b, returning the
   123  // number of bytes read, or an error if the next message couldn't be read.
   124  //
   125  // If an error is returned the batch cannot be used anymore and calling Read
   126  // again will keep returning that error. All errors except io.EOF (indicating
   127  // that the program consumed all messages from the batch) are also returned by
   128  // Close.
   129  //
   130  // The method fails with io.ErrShortBuffer if the buffer passed as argument is
   131  // too small to hold the message value.
   132  func (batch *Batch) Read(b []byte) (int, error) {
   133  	n := 0
   134  
   135  	batch.mutex.Lock()
   136  	offset := batch.offset
   137  
   138  	_, _, _, err := batch.readMessage(
   139  		func(r *bufio.Reader, size int, nbytes int) (int, error) {
   140  			if nbytes < 0 {
   141  				return size, nil
   142  			}
   143  			return discardN(r, size, nbytes)
   144  		},
   145  		func(r *bufio.Reader, size int, nbytes int) (int, error) {
   146  			if nbytes < 0 {
   147  				return size, nil
   148  			}
   149  			// make sure there are enough bytes for the message value.  return
   150  			// errShortRead if the message is truncated.
   151  			if nbytes > size {
   152  				return size, errShortRead
   153  			}
   154  			n = nbytes // return value
   155  			if nbytes > cap(b) {
   156  				nbytes = cap(b)
   157  			}
   158  			if nbytes > len(b) {
   159  				b = b[:nbytes]
   160  			}
   161  			nbytes, err := io.ReadFull(r, b[:nbytes])
   162  			if err != nil {
   163  				return size - nbytes, err
   164  			}
   165  			return discardN(r, size-nbytes, n-nbytes)
   166  		},
   167  	)
   168  
   169  	if err == nil && n > len(b) {
   170  		n, err = len(b), io.ErrShortBuffer
   171  		batch.err = io.ErrShortBuffer
   172  		batch.offset = offset // rollback
   173  	}
   174  
   175  	batch.mutex.Unlock()
   176  	return n, err
   177  }
   178  
   179  // ReadMessage reads and return the next message from the batch.
   180  //
   181  // Because this method allocate memory buffers for the message key and value
   182  // it is less memory-efficient than Read, but has the advantage of never
   183  // failing with io.ErrShortBuffer.
   184  func (batch *Batch) ReadMessage() (Message, error) {
   185  	msg := Message{}
   186  	batch.mutex.Lock()
   187  
   188  	var offset, timestamp int64
   189  	var headers []Header
   190  	var err error
   191  
   192  	offset, timestamp, headers, err = batch.readMessage(
   193  		func(r *bufio.Reader, size int, nbytes int) (remain int, err error) {
   194  			msg.Key, remain, err = readNewBytes(r, size, nbytes)
   195  			return
   196  		},
   197  		func(r *bufio.Reader, size int, nbytes int) (remain int, err error) {
   198  			msg.Value, remain, err = readNewBytes(r, size, nbytes)
   199  			return
   200  		},
   201  	)
   202  	// A batch may start before the requested offset so skip messages
   203  	// until the requested offset is reached.
   204  	for batch.conn != nil && offset < batch.conn.offset {
   205  		if err != nil {
   206  			break
   207  		}
   208  		offset, timestamp, headers, err = batch.readMessage(
   209  			func(r *bufio.Reader, size int, nbytes int) (remain int, err error) {
   210  				msg.Key, remain, err = readNewBytes(r, size, nbytes)
   211  				return
   212  			},
   213  			func(r *bufio.Reader, size int, nbytes int) (remain int, err error) {
   214  				msg.Value, remain, err = readNewBytes(r, size, nbytes)
   215  				return
   216  			},
   217  		)
   218  	}
   219  
   220  	batch.mutex.Unlock()
   221  	msg.Topic = batch.topic
   222  	msg.Partition = batch.partition
   223  	msg.Offset = offset
   224  	msg.HighWaterMark = batch.highWaterMark
   225  	msg.Time = makeTime(timestamp)
   226  	msg.Headers = headers
   227  
   228  	return msg, err
   229  }
   230  
   231  func (batch *Batch) readMessage(
   232  	key func(*bufio.Reader, int, int) (int, error),
   233  	val func(*bufio.Reader, int, int) (int, error),
   234  ) (offset int64, timestamp int64, headers []Header, err error) {
   235  	if err = batch.err; err != nil {
   236  		return
   237  	}
   238  
   239  	var lastOffset int64
   240  	offset, lastOffset, timestamp, headers, err = batch.msgs.readMessage(batch.offset, key, val)
   241  	switch err {
   242  	case nil:
   243  		batch.offset = offset + 1
   244  		batch.lastOffset = lastOffset
   245  	case errShortRead:
   246  		// As an "optimization" kafka truncates the returned response after
   247  		// producing MaxBytes, which could then cause the code to return
   248  		// errShortRead.
   249  		err = batch.msgs.discard()
   250  		switch {
   251  		case err != nil:
   252  			// Since io.EOF is used by the batch to indicate that there is are
   253  			// no more messages to consume, it is crucial that any io.EOF errors
   254  			// on the underlying connection are repackaged.  Otherwise, the
   255  			// caller can't tell the difference between a batch that was fully
   256  			// consumed or a batch whose connection is in an error state.
   257  			batch.err = dontExpectEOF(err)
   258  		case batch.msgs.remaining() == 0:
   259  			// Because we use the adjusted deadline we could end up returning
   260  			// before the actual deadline occurred. This is necessary otherwise
   261  			// timing out the connection for real could end up leaving it in an
   262  			// unpredictable state, which would require closing it.
   263  			// This design decision was made to maximize the chances of keeping
   264  			// the connection open, the trade off being to lose precision on the
   265  			// read deadline management.
   266  			err = checkTimeoutErr(batch.deadline)
   267  			batch.err = err
   268  
   269  			// Checks the following:
   270  			// - `batch.err` for a "success" from the previous timeout check
   271  			// - `batch.msgs.lengthRemain` to ensure that this EOF is not due
   272  			//   to MaxBytes truncation
   273  			// - `batch.lastOffset` to ensure that the message format contains
   274  			//   `lastOffset`
   275  			if batch.err == io.EOF && batch.msgs.lengthRemain <= 0 && batch.lastOffset != -1 {
   276  				// Log compaction can create batches that end with compacted
   277  				// records so the normal strategy that increments the "next"
   278  				// offset as records are read doesn't work as the compacted
   279  				// records are "missing" and never get "read".
   280  				//
   281  				// In order to reliably reach the next non-compacted offset we
   282  				// jump past the saved lastOffset.
   283  				batch.offset = batch.lastOffset + 1
   284  			}
   285  		}
   286  	default:
   287  		// Since io.EOF is used by the batch to indicate that there is are
   288  		// no more messages to consume, it is crucial that any io.EOF errors
   289  		// on the underlying connection are repackaged.  Otherwise, the
   290  		// caller can't tell the difference between a batch that was fully
   291  		// consumed or a batch whose connection is in an error state.
   292  		batch.err = dontExpectEOF(err)
   293  	}
   294  
   295  	return
   296  }
   297  
   298  func checkTimeoutErr(deadline time.Time) (err error) {
   299  	if !deadline.IsZero() && time.Now().After(deadline) {
   300  		err = RequestTimedOut
   301  	} else {
   302  		err = io.EOF
   303  	}
   304  	return
   305  }