github.com/segmentio/kafka-go@v0.4.48-0.20240318174348-3f6244eb34fd/protocol/record.go (about) 1 package protocol 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "fmt" 7 "io" 8 "time" 9 10 "github.com/segmentio/kafka-go/compress" 11 ) 12 13 // Attributes is a bitset representing special attributes set on records. 14 type Attributes int16 15 16 const ( 17 Gzip Attributes = Attributes(compress.Gzip) // 1 18 Snappy Attributes = Attributes(compress.Snappy) // 2 19 Lz4 Attributes = Attributes(compress.Lz4) // 3 20 Zstd Attributes = Attributes(compress.Zstd) // 4 21 Transactional Attributes = 1 << 4 22 Control Attributes = 1 << 5 23 ) 24 25 func (a Attributes) Compression() compress.Compression { 26 return compress.Compression(a & 7) 27 } 28 29 func (a Attributes) Transactional() bool { 30 return (a & Transactional) != 0 31 } 32 33 func (a Attributes) Control() bool { 34 return (a & Control) != 0 35 } 36 37 func (a Attributes) String() string { 38 s := a.Compression().String() 39 if a.Transactional() { 40 s += "+transactional" 41 } 42 if a.Control() { 43 s += "+control" 44 } 45 return s 46 } 47 48 // Header represents a single entry in a list of record headers. 49 type Header struct { 50 Key string 51 Value []byte 52 } 53 54 // Record is an interface representing a single kafka record. 55 // 56 // Record values are not safe to use concurrently from multiple goroutines. 57 type Record struct { 58 // The offset at which the record exists in a topic partition. This value 59 // is ignored in produce requests. 60 Offset int64 61 62 // Returns the time of the record. This value may be omitted in produce 63 // requests to let kafka set the time when it saves the record. 64 Time time.Time 65 66 // Returns a byte sequence containing the key of this record. The returned 67 // sequence may be nil to indicate that the record has no key. If the record 68 // is part of a RecordSet, the content of the key must remain valid at least 69 // until the record set is closed (or until the key is closed). 70 Key Bytes 71 72 // Returns a byte sequence containing the value of this record. The returned 73 // sequence may be nil to indicate that the record has no value. If the 74 // record is part of a RecordSet, the content of the value must remain valid 75 // at least until the record set is closed (or until the value is closed). 76 Value Bytes 77 78 // Returns the list of headers associated with this record. The returned 79 // slice may be reused across calls, the program should use it as an 80 // immutable value. 81 Headers []Header 82 } 83 84 // RecordSet represents a sequence of records in Produce requests and Fetch 85 // responses. All v0, v1, and v2 formats are supported. 86 type RecordSet struct { 87 // The message version that this record set will be represented as, valid 88 // values are 1, or 2. 89 // 90 // When reading, this is the value of the highest version used in the 91 // batches that compose the record set. 92 // 93 // When writing, this value dictates the format that the records will be 94 // encoded in. 95 Version int8 96 97 // Attributes set on the record set. 98 // 99 // When reading, the attributes are the combination of all attributes in 100 // the batches that compose the record set. 101 // 102 // When writing, the attributes apply to the whole sequence of records in 103 // the set. 104 Attributes Attributes 105 106 // A reader exposing the sequence of records. 107 // 108 // When reading a RecordSet from an io.Reader, the Records field will be a 109 // *RecordStream. If the program needs to access the details of each batch 110 // that compose the stream, it may use type assertions to access the 111 // underlying types of each batch. 112 Records RecordReader 113 } 114 115 // bufferedReader is an interface implemented by types like bufio.Reader, which 116 // we use to optimize prefix reads by accessing the internal buffer directly 117 // through calls to Peek. 118 type bufferedReader interface { 119 Discard(int) (int, error) 120 Peek(int) ([]byte, error) 121 } 122 123 // bytesBuffer is an interface implemented by types like bytes.Buffer, which we 124 // use to optimize prefix reads by accessing the internal buffer directly 125 // through calls to Bytes. 126 type bytesBuffer interface { 127 Bytes() []byte 128 } 129 130 // magicByteOffset is the position of the magic byte in all versions of record 131 // sets in the kafka protocol. 132 const magicByteOffset = 16 133 134 // ReadFrom reads the representation of a record set from r into rs, returning 135 // the number of bytes consumed from r, and an non-nil error if the record set 136 // could not be read. 137 func (rs *RecordSet) ReadFrom(r io.Reader) (int64, error) { 138 d, _ := r.(*decoder) 139 if d == nil { 140 d = &decoder{ 141 reader: r, 142 remain: 4, 143 } 144 } 145 146 *rs = RecordSet{} 147 limit := d.remain 148 size := d.readInt32() 149 150 if d.err != nil { 151 return int64(limit - d.remain), d.err 152 } 153 154 if size <= 0 { 155 return 4, nil 156 } 157 158 stream := &RecordStream{ 159 Records: make([]RecordReader, 0, 4), 160 } 161 162 var err error 163 d.remain = int(size) 164 165 for d.remain > 0 && err == nil { 166 var version byte 167 168 if d.remain < (magicByteOffset + 1) { 169 if len(stream.Records) != 0 { 170 break 171 } 172 return 4, fmt.Errorf("impossible record set shorter than %d bytes", magicByteOffset+1) 173 } 174 175 switch r := d.reader.(type) { 176 case bufferedReader: 177 b, err := r.Peek(magicByteOffset + 1) 178 if err != nil { 179 n, _ := r.Discard(len(b)) 180 return 4 + int64(n), dontExpectEOF(err) 181 } 182 version = b[magicByteOffset] 183 case bytesBuffer: 184 version = r.Bytes()[magicByteOffset] 185 default: 186 b := make([]byte, magicByteOffset+1) 187 if n, err := io.ReadFull(d.reader, b); err != nil { 188 return 4 + int64(n), dontExpectEOF(err) 189 } 190 version = b[magicByteOffset] 191 // Reconstruct the prefix that we had to read to determine the version 192 // of the record set from the magic byte. 193 // 194 // Technically this may recurisvely stack readers when consuming all 195 // items of the batch, which could hurt performance. In practice this 196 // path should not be taken tho, since the decoder would read from a 197 // *bufio.Reader which implements the bufferedReader interface. 198 d.reader = io.MultiReader(bytes.NewReader(b), d.reader) 199 } 200 201 var tmp RecordSet 202 switch version { 203 case 0, 1: 204 err = tmp.readFromVersion1(d) 205 case 2: 206 err = tmp.readFromVersion2(d) 207 default: 208 err = fmt.Errorf("unsupported message version %d for message of size %d", version, size) 209 } 210 211 if tmp.Version > rs.Version { 212 rs.Version = tmp.Version 213 } 214 215 rs.Attributes |= tmp.Attributes 216 217 if tmp.Records != nil { 218 stream.Records = append(stream.Records, tmp.Records) 219 } 220 } 221 222 if len(stream.Records) != 0 { 223 rs.Records = stream 224 // Ignore errors if we've successfully read records, so the 225 // program can keep making progress. 226 err = nil 227 } 228 229 d.discardAll() 230 rn := 4 + (int(size) - d.remain) 231 d.remain = limit - rn 232 return int64(rn), err 233 } 234 235 // WriteTo writes the representation of rs into w. The value of rs.Version 236 // dictates which format that the record set will be represented as. 237 // 238 // The error will be ErrNoRecord if rs contained no records. 239 // 240 // Note: since this package is only compatible with kafka 0.10 and above, the 241 // method never produces messages in version 0. If rs.Version is zero, the 242 // method defaults to producing messages in version 1. 243 func (rs *RecordSet) WriteTo(w io.Writer) (int64, error) { 244 if rs.Records == nil { 245 return 0, ErrNoRecord 246 } 247 248 // This optimization avoids rendering the record set in an intermediary 249 // buffer when the writer is already a pageBuffer, which is a common case 250 // due to the way WriteRequest and WriteResponse are implemented. 251 buffer, _ := w.(*pageBuffer) 252 bufferOffset := int64(0) 253 254 if buffer != nil { 255 bufferOffset = buffer.Size() 256 } else { 257 buffer = newPageBuffer() 258 defer buffer.unref() 259 } 260 261 size := packUint32(0) 262 buffer.Write(size[:]) // size placeholder 263 264 var err error 265 switch rs.Version { 266 case 0, 1: 267 err = rs.writeToVersion1(buffer, bufferOffset+4) 268 case 2: 269 err = rs.writeToVersion2(buffer, bufferOffset+4) 270 default: 271 err = fmt.Errorf("unsupported record set version %d", rs.Version) 272 } 273 if err != nil { 274 return 0, err 275 } 276 277 n := buffer.Size() - bufferOffset 278 if n == 0 { 279 size = packUint32(^uint32(0)) 280 } else { 281 size = packUint32(uint32(n) - 4) 282 } 283 buffer.WriteAt(size[:], bufferOffset) 284 285 // This condition indicates that the output writer received by `WriteTo` was 286 // not a *pageBuffer, in which case we need to flush the buffered records 287 // data into it. 288 if buffer != w { 289 return buffer.WriteTo(w) 290 } 291 292 return n, nil 293 } 294 295 // RawRecordSet represents a record set for a RawProduce request. The record set is 296 // represented as a raw sequence of pre-encoded record set bytes. 297 type RawRecordSet struct { 298 // Reader exposes the raw sequence of record set bytes. 299 Reader io.Reader 300 } 301 302 // ReadFrom reads the representation of a record set from r into rrs. It re-uses the 303 // existing RecordSet.ReadFrom implementation to first read/decode data into a RecordSet, 304 // then writes/encodes the RecordSet to a buffer referenced by the RawRecordSet. 305 // 306 // Note: re-using the RecordSet.ReadFrom implementation makes this suboptimal from a 307 // performance standpoint as it require an extra copy of the record bytes. Holding off 308 // on optimizing, as this code path is only invoked in tests. 309 func (rrs *RawRecordSet) ReadFrom(r io.Reader) (int64, error) { 310 rs := &RecordSet{} 311 n, err := rs.ReadFrom(r) 312 if err != nil { 313 return 0, err 314 } 315 316 buf := &bytes.Buffer{} 317 rs.WriteTo(buf) 318 *rrs = RawRecordSet{ 319 Reader: buf, 320 } 321 322 return n, nil 323 } 324 325 // WriteTo writes the RawRecordSet to an io.Writer. Since this is a raw record set representation, all that is 326 // done here is copying bytes from the underlying reader to the specified writer. 327 func (rrs *RawRecordSet) WriteTo(w io.Writer) (int64, error) { 328 if rrs.Reader == nil { 329 return 0, ErrNoRecord 330 } 331 332 return io.Copy(w, rrs.Reader) 333 } 334 335 func makeTime(t int64) time.Time { 336 return time.Unix(t/1000, (t%1000)*int64(time.Millisecond)) 337 } 338 339 func timestamp(t time.Time) int64 { 340 if t.IsZero() { 341 return 0 342 } 343 return t.UnixNano() / int64(time.Millisecond) 344 } 345 346 func packUint32(u uint32) (b [4]byte) { 347 binary.BigEndian.PutUint32(b[:], u) 348 return 349 } 350 351 func packUint64(u uint64) (b [8]byte) { 352 binary.BigEndian.PutUint64(b[:], u) 353 return 354 }