github.com/rbisecke/kafka-go@v0.4.27/batch.go (about)

     1  package kafka
     2  
     3  import (
     4  	"bufio"
     5  	"io"
     6  	"sync"
     7  	"time"
     8  )
     9  
    10  // A Batch is an iterator over a sequence of messages fetched from a kafka
    11  // server.
    12  //
    13  // Batches are created by calling (*Conn).ReadBatch. They hold a internal lock
    14  // on the connection, which is released when the batch is closed. Failing to
    15  // call a batch's Close method will likely result in a dead-lock when trying to
    16  // use the connection.
    17  //
    18  // Batches are safe to use concurrently from multiple goroutines.
    19  type Batch struct {
    20  	mutex         sync.Mutex
    21  	conn          *Conn
    22  	lock          *sync.Mutex
    23  	msgs          *messageSetReader
    24  	deadline      time.Time
    25  	throttle      time.Duration
    26  	topic         string
    27  	partition     int
    28  	offset        int64
    29  	highWaterMark int64
    30  	err           error
    31  }
    32  
    33  // Throttle gives the throttling duration applied by the kafka server on the
    34  // connection.
    35  func (batch *Batch) Throttle() time.Duration {
    36  	return batch.throttle
    37  }
    38  
    39  // Watermark returns the current highest watermark in a partition.
    40  func (batch *Batch) HighWaterMark() int64 {
    41  	return batch.highWaterMark
    42  }
    43  
    44  // Partition returns the batch partition.
    45  func (batch *Batch) Partition() int {
    46  	return batch.partition
    47  }
    48  
    49  // Offset returns the offset of the next message in the batch.
    50  func (batch *Batch) Offset() int64 {
    51  	batch.mutex.Lock()
    52  	offset := batch.offset
    53  	batch.mutex.Unlock()
    54  	return offset
    55  }
    56  
    57  // Close closes the batch, releasing the connection lock and returning an error
    58  // if reading the batch failed for any reason.
    59  func (batch *Batch) Close() error {
    60  	batch.mutex.Lock()
    61  	err := batch.close()
    62  	batch.mutex.Unlock()
    63  	return err
    64  }
    65  
    66  func (batch *Batch) close() (err error) {
    67  	conn := batch.conn
    68  	lock := batch.lock
    69  
    70  	batch.conn = nil
    71  	batch.lock = nil
    72  	if batch.msgs != nil {
    73  		batch.msgs.discard()
    74  	}
    75  
    76  	if err = batch.err; err == io.EOF {
    77  		err = nil
    78  	}
    79  
    80  	if conn != nil {
    81  		conn.rdeadline.unsetConnReadDeadline()
    82  		conn.mutex.Lock()
    83  		conn.offset = batch.offset
    84  		conn.mutex.Unlock()
    85  
    86  		if err != nil {
    87  			if _, ok := err.(Error); !ok && err != io.ErrShortBuffer {
    88  				conn.Close()
    89  			}
    90  		}
    91  	}
    92  
    93  	if lock != nil {
    94  		lock.Unlock()
    95  	}
    96  
    97  	return
    98  }
    99  
   100  // Err returns a non-nil error if the batch is broken. This is the same error
   101  // that would be returned by Read, ReadMessage or Close (except in the case of
   102  // io.EOF which is never returned by Close).
   103  //
   104  // This method is useful when building retry mechanisms for (*Conn).ReadBatch,
   105  // the program can check whether the batch carried a error before attempting to
   106  // read the first message.
   107  //
   108  // Note that checking errors on a batch is optional, calling Read or ReadMessage
   109  // is always valid and can be used to either read a message or an error in cases
   110  // where that's convenient.
   111  func (batch *Batch) Err() error { return batch.err }
   112  
   113  // Read reads the value of the next message from the batch into b, returning the
   114  // number of bytes read, or an error if the next message couldn't be read.
   115  //
   116  // If an error is returned the batch cannot be used anymore and calling Read
   117  // again will keep returning that error. All errors except io.EOF (indicating
   118  // that the program consumed all messages from the batch) are also returned by
   119  // Close.
   120  //
   121  // The method fails with io.ErrShortBuffer if the buffer passed as argument is
   122  // too small to hold the message value.
   123  func (batch *Batch) Read(b []byte) (int, error) {
   124  	n := 0
   125  
   126  	batch.mutex.Lock()
   127  	offset := batch.offset
   128  
   129  	_, _, _, err := batch.readMessage(
   130  		func(r *bufio.Reader, size int, nbytes int) (int, error) {
   131  			if nbytes < 0 {
   132  				return size, nil
   133  			}
   134  			return discardN(r, size, nbytes)
   135  		},
   136  		func(r *bufio.Reader, size int, nbytes int) (int, error) {
   137  			if nbytes < 0 {
   138  				return size, nil
   139  			}
   140  			// make sure there are enough bytes for the message value.  return
   141  			// errShortRead if the message is truncated.
   142  			if nbytes > size {
   143  				return size, errShortRead
   144  			}
   145  			n = nbytes // return value
   146  			if nbytes > cap(b) {
   147  				nbytes = cap(b)
   148  			}
   149  			if nbytes > len(b) {
   150  				b = b[:nbytes]
   151  			}
   152  			nbytes, err := io.ReadFull(r, b[:nbytes])
   153  			if err != nil {
   154  				return size - nbytes, err
   155  			}
   156  			return discardN(r, size-nbytes, n-nbytes)
   157  		},
   158  	)
   159  
   160  	if err == nil && n > len(b) {
   161  		n, err = len(b), io.ErrShortBuffer
   162  		batch.err = io.ErrShortBuffer
   163  		batch.offset = offset // rollback
   164  	}
   165  
   166  	batch.mutex.Unlock()
   167  	return n, err
   168  }
   169  
   170  // ReadMessage reads and return the next message from the batch.
   171  //
   172  // Because this method allocate memory buffers for the message key and value
   173  // it is less memory-efficient than Read, but has the advantage of never
   174  // failing with io.ErrShortBuffer.
   175  func (batch *Batch) ReadMessage() (Message, error) {
   176  	msg := Message{}
   177  	batch.mutex.Lock()
   178  
   179  	var offset, timestamp int64
   180  	var headers []Header
   181  	var err error
   182  
   183  	offset, timestamp, headers, err = batch.readMessage(
   184  		func(r *bufio.Reader, size int, nbytes int) (remain int, err error) {
   185  			msg.Key, remain, err = readNewBytes(r, size, nbytes)
   186  			return
   187  		},
   188  		func(r *bufio.Reader, size int, nbytes int) (remain int, err error) {
   189  			msg.Value, remain, err = readNewBytes(r, size, nbytes)
   190  			return
   191  		},
   192  	)
   193  	for batch.conn != nil && offset < batch.conn.offset {
   194  		if err != nil {
   195  			break
   196  		}
   197  		offset, timestamp, headers, err = batch.readMessage(
   198  			func(r *bufio.Reader, size int, nbytes int) (remain int, err error) {
   199  				msg.Key, remain, err = readNewBytes(r, size, nbytes)
   200  				return
   201  			},
   202  			func(r *bufio.Reader, size int, nbytes int) (remain int, err error) {
   203  				msg.Value, remain, err = readNewBytes(r, size, nbytes)
   204  				return
   205  			},
   206  		)
   207  	}
   208  
   209  	batch.mutex.Unlock()
   210  	msg.Topic = batch.topic
   211  	msg.Partition = batch.partition
   212  	msg.Offset = offset
   213  	msg.HighWaterMark = batch.highWaterMark
   214  	msg.Time = makeTime(timestamp)
   215  	msg.Headers = headers
   216  
   217  	return msg, err
   218  }
   219  
   220  func (batch *Batch) readMessage(
   221  	key func(*bufio.Reader, int, int) (int, error),
   222  	val func(*bufio.Reader, int, int) (int, error),
   223  ) (offset int64, timestamp int64, headers []Header, err error) {
   224  	if err = batch.err; err != nil {
   225  		return
   226  	}
   227  
   228  	offset, timestamp, headers, err = batch.msgs.readMessage(batch.offset, key, val)
   229  	switch err {
   230  	case nil:
   231  		batch.offset = offset + 1
   232  	case errShortRead:
   233  		// As an "optimization" kafka truncates the returned response after
   234  		// producing MaxBytes, which could then cause the code to return
   235  		// errShortRead.
   236  		err = batch.msgs.discard()
   237  		switch {
   238  		case err != nil:
   239  			// Since io.EOF is used by the batch to indicate that there is are
   240  			// no more messages to consume, it is crucial that any io.EOF errors
   241  			// on the underlying connection are repackaged.  Otherwise, the
   242  			// caller can't tell the difference between a batch that was fully
   243  			// consumed or a batch whose connection is in an error state.
   244  			batch.err = dontExpectEOF(err)
   245  		case batch.msgs.remaining() == 0:
   246  			// Because we use the adjusted deadline we could end up returning
   247  			// before the actual deadline occurred. This is necessary otherwise
   248  			// timing out the connection for real could end up leaving it in an
   249  			// unpredictable state, which would require closing it.
   250  			// This design decision was made to maximize the chances of keeping
   251  			// the connection open, the trade off being to lose precision on the
   252  			// read deadline management.
   253  			err = checkTimeoutErr(batch.deadline)
   254  			batch.err = err
   255  		}
   256  	default:
   257  		// Since io.EOF is used by the batch to indicate that there is are
   258  		// no more messages to consume, it is crucial that any io.EOF errors
   259  		// on the underlying connection are repackaged.  Otherwise, the
   260  		// caller can't tell the difference between a batch that was fully
   261  		// consumed or a batch whose connection is in an error state.
   262  		batch.err = dontExpectEOF(err)
   263  	}
   264  
   265  	return
   266  }
   267  
   268  func checkTimeoutErr(deadline time.Time) (err error) {
   269  	if !deadline.IsZero() && time.Now().After(deadline) {
   270  		err = RequestTimedOut
   271  	} else {
   272  		err = io.EOF
   273  	}
   274  	return
   275  }