github.com/segmentio/kafka-go@v0.4.48-0.20240318174348-3f6244eb34fd/protocol/record_v2.go (about) 1 package protocol 2 3 import ( 4 "fmt" 5 "hash/crc32" 6 "io" 7 "time" 8 ) 9 10 func (rs *RecordSet) readFromVersion2(d *decoder) error { 11 baseOffset := d.readInt64() 12 batchLength := d.readInt32() 13 14 if int(batchLength) > d.remain || d.err != nil { 15 d.discardAll() 16 return nil 17 } 18 19 dec := &decoder{ 20 reader: d, 21 remain: int(batchLength), 22 } 23 24 partitionLeaderEpoch := dec.readInt32() 25 magicByte := dec.readInt8() 26 crc := dec.readInt32() 27 28 dec.setCRC(crc32.MakeTable(crc32.Castagnoli)) 29 30 attributes := dec.readInt16() 31 lastOffsetDelta := dec.readInt32() 32 firstTimestamp := dec.readInt64() 33 maxTimestamp := dec.readInt64() 34 producerID := dec.readInt64() 35 producerEpoch := dec.readInt16() 36 baseSequence := dec.readInt32() 37 numRecords := dec.readInt32() 38 reader := io.Reader(dec) 39 40 // unused 41 _ = lastOffsetDelta 42 _ = maxTimestamp 43 44 if compression := Attributes(attributes).Compression(); compression != 0 { 45 codec := compression.Codec() 46 fmt.Printf("codec: %v\n", codec) 47 if codec == nil { 48 return fmt.Errorf("unsupported compression codec (%d)", compression) 49 } 50 decompressor := codec.NewReader(reader) 51 defer decompressor.Close() 52 reader = decompressor 53 } 54 55 buffer := newPageBuffer() 56 defer buffer.unref() 57 58 _, err := buffer.ReadFrom(reader) 59 if err != nil { 60 return err 61 } 62 if dec.crc32 != uint32(crc) { 63 return fmt.Errorf("crc32 checksum mismatch (computed=%d found=%d)", dec.crc32, uint32(crc)) 64 } 65 66 recordsLength := buffer.Len() 67 dec.reader = buffer 68 dec.remain = recordsLength 69 70 records := make([]optimizedRecord, numRecords) 71 // These are two lazy allocators that will be used to optimize allocation of 72 // page references for keys and values. 73 // 74 // By default, no memory is allocated and on first use, numRecords page refs 75 // are allocated in a contiguous memory space, and the allocators return 76 // pointers into those arrays for each page ref that get requested. 77 // 78 // The reasoning is that kafka partitions typically have records of a single 79 // form, which either have no keys, no values, or both keys and values. 80 // Using lazy allocators adapts nicely to these patterns to only allocate 81 // the memory that is needed by the program, while still reducing the number 82 // of malloc calls made by the program. 83 // 84 // Using a single allocator for both keys and values keeps related values 85 // close by in memory, making access to the records more friendly to CPU 86 // caches. 87 alloc := pageRefAllocator{size: int(numRecords)} 88 // Following the same reasoning that kafka partitions will typically have 89 // records with repeating formats, we expect to either find records with 90 // no headers, or records which always contain headers. 91 // 92 // To reduce the memory footprint when records have no headers, the Header 93 // slices are lazily allocated in a separate array. 94 headers := ([][]Header)(nil) 95 96 for i := range records { 97 r := &records[i] 98 _ = dec.readVarInt() // record length (unused) 99 _ = dec.readInt8() // record attributes (unused) 100 timestampDelta := dec.readVarInt() 101 offsetDelta := dec.readVarInt() 102 103 r.offset = baseOffset + offsetDelta 104 r.timestamp = firstTimestamp + timestampDelta 105 106 keyLength := dec.readVarInt() 107 keyOffset := int64(recordsLength - dec.remain) 108 if keyLength > 0 { 109 dec.discard(int(keyLength)) 110 } 111 112 valueLength := dec.readVarInt() 113 valueOffset := int64(recordsLength - dec.remain) 114 if valueLength > 0 { 115 dec.discard(int(valueLength)) 116 } 117 118 if numHeaders := dec.readVarInt(); numHeaders > 0 { 119 if headers == nil { 120 headers = make([][]Header, numRecords) 121 } 122 123 h := make([]Header, numHeaders) 124 125 for i := range h { 126 h[i] = Header{ 127 Key: dec.readVarString(), 128 Value: dec.readVarBytes(), 129 } 130 } 131 132 headers[i] = h 133 } 134 135 if dec.err != nil { 136 records = records[:i] 137 break 138 } 139 140 if keyLength >= 0 { 141 r.keyRef = alloc.newPageRef() 142 buffer.refTo(r.keyRef, keyOffset, keyOffset+keyLength) 143 } 144 145 if valueLength >= 0 { 146 r.valueRef = alloc.newPageRef() 147 buffer.refTo(r.valueRef, valueOffset, valueOffset+valueLength) 148 } 149 } 150 151 // Note: it's unclear whether kafka 0.11+ still truncates the responses, 152 // all attempts I made at constructing a test to trigger a truncation have 153 // failed. I kept this code here as a safeguard but it may never execute. 154 if dec.err != nil && len(records) == 0 { 155 return dec.err 156 } 157 158 *rs = RecordSet{ 159 Version: magicByte, 160 Attributes: Attributes(attributes), 161 Records: &optimizedRecordReader{ 162 records: records, 163 headers: headers, 164 }, 165 } 166 167 if rs.Attributes.Control() { 168 rs.Records = &ControlBatch{ 169 Attributes: rs.Attributes, 170 PartitionLeaderEpoch: partitionLeaderEpoch, 171 BaseOffset: baseOffset, 172 ProducerID: producerID, 173 ProducerEpoch: producerEpoch, 174 BaseSequence: baseSequence, 175 Records: rs.Records, 176 } 177 } else { 178 rs.Records = &RecordBatch{ 179 Attributes: rs.Attributes, 180 PartitionLeaderEpoch: partitionLeaderEpoch, 181 BaseOffset: baseOffset, 182 ProducerID: producerID, 183 ProducerEpoch: producerEpoch, 184 BaseSequence: baseSequence, 185 Records: rs.Records, 186 } 187 } 188 189 return nil 190 } 191 192 func (rs *RecordSet) writeToVersion2(buffer *pageBuffer, bufferOffset int64) error { 193 records := rs.Records 194 numRecords := int32(0) 195 196 e := &encoder{writer: buffer} 197 e.writeInt64(0) // base offset | 0 +8 198 e.writeInt32(0) // placeholder for record batch length | 8 +4 199 e.writeInt32(-1) // partition leader epoch | 12 +3 200 e.writeInt8(2) // magic byte | 16 +1 201 e.writeInt32(0) // placeholder for crc32 checksum | 17 +4 202 e.writeInt16(int16(rs.Attributes)) // attributes | 21 +2 203 e.writeInt32(0) // placeholder for lastOffsetDelta | 23 +4 204 e.writeInt64(0) // placeholder for firstTimestamp | 27 +8 205 e.writeInt64(0) // placeholder for maxTimestamp | 35 +8 206 e.writeInt64(-1) // producer id | 43 +8 207 e.writeInt16(-1) // producer epoch | 51 +2 208 e.writeInt32(-1) // base sequence | 53 +4 209 e.writeInt32(0) // placeholder for numRecords | 57 +4 210 211 var compressor io.WriteCloser 212 if compression := rs.Attributes.Compression(); compression != 0 { 213 if codec := compression.Codec(); codec != nil { 214 compressor = codec.NewWriter(buffer) 215 e.writer = compressor 216 } 217 } 218 219 currentTimestamp := timestamp(time.Now()) 220 lastOffsetDelta := int32(0) 221 firstTimestamp := int64(0) 222 maxTimestamp := int64(0) 223 224 err := forEachRecord(records, func(i int, r *Record) error { 225 t := timestamp(r.Time) 226 if t == 0 { 227 t = currentTimestamp 228 } 229 if i == 0 { 230 firstTimestamp = t 231 } 232 if t > maxTimestamp { 233 maxTimestamp = t 234 } 235 236 timestampDelta := t - firstTimestamp 237 offsetDelta := int64(i) 238 lastOffsetDelta = int32(offsetDelta) 239 240 length := 1 + // attributes 241 sizeOfVarInt(timestampDelta) + 242 sizeOfVarInt(offsetDelta) + 243 sizeOfVarNullBytesIface(r.Key) + 244 sizeOfVarNullBytesIface(r.Value) + 245 sizeOfVarInt(int64(len(r.Headers))) 246 247 for _, h := range r.Headers { 248 length += sizeOfVarString(h.Key) + sizeOfVarNullBytes(h.Value) 249 } 250 251 e.writeVarInt(int64(length)) 252 e.writeInt8(0) // record attributes (unused) 253 e.writeVarInt(timestampDelta) 254 e.writeVarInt(offsetDelta) 255 256 if err := e.writeVarNullBytesFrom(r.Key); err != nil { 257 return err 258 } 259 260 if err := e.writeVarNullBytesFrom(r.Value); err != nil { 261 return err 262 } 263 264 e.writeVarInt(int64(len(r.Headers))) 265 266 for _, h := range r.Headers { 267 e.writeVarString(h.Key) 268 e.writeVarNullBytes(h.Value) 269 } 270 271 numRecords++ 272 return nil 273 }) 274 275 if err != nil { 276 return err 277 } 278 279 if compressor != nil { 280 if err := compressor.Close(); err != nil { 281 return err 282 } 283 } 284 285 if numRecords == 0 { 286 return ErrNoRecord 287 } 288 289 b2 := packUint32(uint32(lastOffsetDelta)) 290 b3 := packUint64(uint64(firstTimestamp)) 291 b4 := packUint64(uint64(maxTimestamp)) 292 b5 := packUint32(uint32(numRecords)) 293 294 buffer.WriteAt(b2[:], bufferOffset+23) 295 buffer.WriteAt(b3[:], bufferOffset+27) 296 buffer.WriteAt(b4[:], bufferOffset+35) 297 buffer.WriteAt(b5[:], bufferOffset+57) 298 299 totalLength := buffer.Size() - bufferOffset 300 batchLength := totalLength - 12 301 302 checksum := uint32(0) 303 crcTable := crc32.MakeTable(crc32.Castagnoli) 304 305 buffer.pages.scan(bufferOffset+21, bufferOffset+totalLength, func(chunk []byte) bool { 306 checksum = crc32.Update(checksum, crcTable, chunk) 307 return true 308 }) 309 310 b0 := packUint32(uint32(batchLength)) 311 b1 := packUint32(checksum) 312 313 buffer.WriteAt(b0[:], bufferOffset+8) 314 buffer.WriteAt(b1[:], bufferOffset+17) 315 return nil 316 }