github.com/rbisecke/kafka-go@v0.4.27/batch.go (about) 1 package kafka 2 3 import ( 4 "bufio" 5 "io" 6 "sync" 7 "time" 8 ) 9 10 // A Batch is an iterator over a sequence of messages fetched from a kafka 11 // server. 12 // 13 // Batches are created by calling (*Conn).ReadBatch. They hold a internal lock 14 // on the connection, which is released when the batch is closed. Failing to 15 // call a batch's Close method will likely result in a dead-lock when trying to 16 // use the connection. 17 // 18 // Batches are safe to use concurrently from multiple goroutines. 19 type Batch struct { 20 mutex sync.Mutex 21 conn *Conn 22 lock *sync.Mutex 23 msgs *messageSetReader 24 deadline time.Time 25 throttle time.Duration 26 topic string 27 partition int 28 offset int64 29 highWaterMark int64 30 err error 31 } 32 33 // Throttle gives the throttling duration applied by the kafka server on the 34 // connection. 35 func (batch *Batch) Throttle() time.Duration { 36 return batch.throttle 37 } 38 39 // Watermark returns the current highest watermark in a partition. 40 func (batch *Batch) HighWaterMark() int64 { 41 return batch.highWaterMark 42 } 43 44 // Partition returns the batch partition. 45 func (batch *Batch) Partition() int { 46 return batch.partition 47 } 48 49 // Offset returns the offset of the next message in the batch. 50 func (batch *Batch) Offset() int64 { 51 batch.mutex.Lock() 52 offset := batch.offset 53 batch.mutex.Unlock() 54 return offset 55 } 56 57 // Close closes the batch, releasing the connection lock and returning an error 58 // if reading the batch failed for any reason. 59 func (batch *Batch) Close() error { 60 batch.mutex.Lock() 61 err := batch.close() 62 batch.mutex.Unlock() 63 return err 64 } 65 66 func (batch *Batch) close() (err error) { 67 conn := batch.conn 68 lock := batch.lock 69 70 batch.conn = nil 71 batch.lock = nil 72 if batch.msgs != nil { 73 batch.msgs.discard() 74 } 75 76 if err = batch.err; err == io.EOF { 77 err = nil 78 } 79 80 if conn != nil { 81 conn.rdeadline.unsetConnReadDeadline() 82 conn.mutex.Lock() 83 conn.offset = batch.offset 84 conn.mutex.Unlock() 85 86 if err != nil { 87 if _, ok := err.(Error); !ok && err != io.ErrShortBuffer { 88 conn.Close() 89 } 90 } 91 } 92 93 if lock != nil { 94 lock.Unlock() 95 } 96 97 return 98 } 99 100 // Err returns a non-nil error if the batch is broken. This is the same error 101 // that would be returned by Read, ReadMessage or Close (except in the case of 102 // io.EOF which is never returned by Close). 103 // 104 // This method is useful when building retry mechanisms for (*Conn).ReadBatch, 105 // the program can check whether the batch carried a error before attempting to 106 // read the first message. 107 // 108 // Note that checking errors on a batch is optional, calling Read or ReadMessage 109 // is always valid and can be used to either read a message or an error in cases 110 // where that's convenient. 111 func (batch *Batch) Err() error { return batch.err } 112 113 // Read reads the value of the next message from the batch into b, returning the 114 // number of bytes read, or an error if the next message couldn't be read. 115 // 116 // If an error is returned the batch cannot be used anymore and calling Read 117 // again will keep returning that error. All errors except io.EOF (indicating 118 // that the program consumed all messages from the batch) are also returned by 119 // Close. 120 // 121 // The method fails with io.ErrShortBuffer if the buffer passed as argument is 122 // too small to hold the message value. 123 func (batch *Batch) Read(b []byte) (int, error) { 124 n := 0 125 126 batch.mutex.Lock() 127 offset := batch.offset 128 129 _, _, _, err := batch.readMessage( 130 func(r *bufio.Reader, size int, nbytes int) (int, error) { 131 if nbytes < 0 { 132 return size, nil 133 } 134 return discardN(r, size, nbytes) 135 }, 136 func(r *bufio.Reader, size int, nbytes int) (int, error) { 137 if nbytes < 0 { 138 return size, nil 139 } 140 // make sure there are enough bytes for the message value. return 141 // errShortRead if the message is truncated. 142 if nbytes > size { 143 return size, errShortRead 144 } 145 n = nbytes // return value 146 if nbytes > cap(b) { 147 nbytes = cap(b) 148 } 149 if nbytes > len(b) { 150 b = b[:nbytes] 151 } 152 nbytes, err := io.ReadFull(r, b[:nbytes]) 153 if err != nil { 154 return size - nbytes, err 155 } 156 return discardN(r, size-nbytes, n-nbytes) 157 }, 158 ) 159 160 if err == nil && n > len(b) { 161 n, err = len(b), io.ErrShortBuffer 162 batch.err = io.ErrShortBuffer 163 batch.offset = offset // rollback 164 } 165 166 batch.mutex.Unlock() 167 return n, err 168 } 169 170 // ReadMessage reads and return the next message from the batch. 171 // 172 // Because this method allocate memory buffers for the message key and value 173 // it is less memory-efficient than Read, but has the advantage of never 174 // failing with io.ErrShortBuffer. 175 func (batch *Batch) ReadMessage() (Message, error) { 176 msg := Message{} 177 batch.mutex.Lock() 178 179 var offset, timestamp int64 180 var headers []Header 181 var err error 182 183 offset, timestamp, headers, err = batch.readMessage( 184 func(r *bufio.Reader, size int, nbytes int) (remain int, err error) { 185 msg.Key, remain, err = readNewBytes(r, size, nbytes) 186 return 187 }, 188 func(r *bufio.Reader, size int, nbytes int) (remain int, err error) { 189 msg.Value, remain, err = readNewBytes(r, size, nbytes) 190 return 191 }, 192 ) 193 for batch.conn != nil && offset < batch.conn.offset { 194 if err != nil { 195 break 196 } 197 offset, timestamp, headers, err = batch.readMessage( 198 func(r *bufio.Reader, size int, nbytes int) (remain int, err error) { 199 msg.Key, remain, err = readNewBytes(r, size, nbytes) 200 return 201 }, 202 func(r *bufio.Reader, size int, nbytes int) (remain int, err error) { 203 msg.Value, remain, err = readNewBytes(r, size, nbytes) 204 return 205 }, 206 ) 207 } 208 209 batch.mutex.Unlock() 210 msg.Topic = batch.topic 211 msg.Partition = batch.partition 212 msg.Offset = offset 213 msg.HighWaterMark = batch.highWaterMark 214 msg.Time = makeTime(timestamp) 215 msg.Headers = headers 216 217 return msg, err 218 } 219 220 func (batch *Batch) readMessage( 221 key func(*bufio.Reader, int, int) (int, error), 222 val func(*bufio.Reader, int, int) (int, error), 223 ) (offset int64, timestamp int64, headers []Header, err error) { 224 if err = batch.err; err != nil { 225 return 226 } 227 228 offset, timestamp, headers, err = batch.msgs.readMessage(batch.offset, key, val) 229 switch err { 230 case nil: 231 batch.offset = offset + 1 232 case errShortRead: 233 // As an "optimization" kafka truncates the returned response after 234 // producing MaxBytes, which could then cause the code to return 235 // errShortRead. 236 err = batch.msgs.discard() 237 switch { 238 case err != nil: 239 // Since io.EOF is used by the batch to indicate that there is are 240 // no more messages to consume, it is crucial that any io.EOF errors 241 // on the underlying connection are repackaged. Otherwise, the 242 // caller can't tell the difference between a batch that was fully 243 // consumed or a batch whose connection is in an error state. 244 batch.err = dontExpectEOF(err) 245 case batch.msgs.remaining() == 0: 246 // Because we use the adjusted deadline we could end up returning 247 // before the actual deadline occurred. This is necessary otherwise 248 // timing out the connection for real could end up leaving it in an 249 // unpredictable state, which would require closing it. 250 // This design decision was made to maximize the chances of keeping 251 // the connection open, the trade off being to lose precision on the 252 // read deadline management. 253 err = checkTimeoutErr(batch.deadline) 254 batch.err = err 255 } 256 default: 257 // Since io.EOF is used by the batch to indicate that there is are 258 // no more messages to consume, it is crucial that any io.EOF errors 259 // on the underlying connection are repackaged. Otherwise, the 260 // caller can't tell the difference between a batch that was fully 261 // consumed or a batch whose connection is in an error state. 262 batch.err = dontExpectEOF(err) 263 } 264 265 return 266 } 267 268 func checkTimeoutErr(deadline time.Time) (err error) { 269 if !deadline.IsZero() && time.Now().After(deadline) { 270 err = RequestTimedOut 271 } else { 272 err = io.EOF 273 } 274 return 275 }