github.com/hack0072008/kafka-go@v1.0.1/protocol/record_v2.go (about) 1 package protocol 2 3 import ( 4 "fmt" 5 "hash/crc32" 6 "io" 7 "time" 8 ) 9 10 func (rs *RecordSet) readFromVersion2(d *decoder) error { 11 baseOffset := d.readInt64() 12 batchLength := d.readInt32() 13 14 if int(batchLength) > d.remain || d.err != nil { 15 d.discardAll() 16 return nil 17 } 18 19 dec := &decoder{ 20 reader: d, 21 remain: int(batchLength), 22 } 23 24 partitionLeaderEpoch := dec.readInt32() 25 magicByte := dec.readInt8() 26 crc := dec.readInt32() 27 28 dec.setCRC(crc32.MakeTable(crc32.Castagnoli)) 29 30 attributes := dec.readInt16() 31 lastOffsetDelta := dec.readInt32() 32 firstTimestamp := dec.readInt64() 33 maxTimestamp := dec.readInt64() 34 producerID := dec.readInt64() 35 producerEpoch := dec.readInt16() 36 baseSequence := dec.readInt32() 37 numRecords := dec.readInt32() 38 reader := io.Reader(dec) 39 40 // unused 41 _ = lastOffsetDelta 42 _ = maxTimestamp 43 44 if compression := Attributes(attributes).Compression(); compression != 0 { 45 codec := compression.Codec() 46 if codec == nil { 47 return fmt.Errorf("unsupported compression codec (%d)", compression) 48 } 49 decompressor := codec.NewReader(reader) 50 defer decompressor.Close() 51 reader = decompressor 52 } 53 54 buffer := newPageBuffer() 55 defer buffer.unref() 56 57 _, err := buffer.ReadFrom(reader) 58 if err != nil { 59 return err 60 } 61 if dec.crc32 != uint32(crc) { 62 return fmt.Errorf("crc32 checksum mismatch (computed=%d found=%d)", dec.crc32, uint32(crc)) 63 } 64 65 recordsLength := buffer.Len() 66 dec.reader = buffer 67 dec.remain = recordsLength 68 69 records := make([]optimizedRecord, numRecords) 70 // These are two lazy allocators that will be used to optimize allocation of 71 // page references for keys and values. 72 // 73 // By default, no memory is allocated and on first use, numRecords page refs 74 // are allocated in a contiguous memory space, and the allocators return 75 // pointers into those arrays for each page ref that get requested. 76 // 77 // The reasoning is that kafka partitions typically have records of a single 78 // form, which either have no keys, no values, or both keys and values. 79 // Using lazy allocators adapts nicely to these patterns to only allocate 80 // the memory that is needed by the program, while still reducing the number 81 // of malloc calls made by the program. 82 // 83 // Using a single allocator for both keys and values keeps related values 84 // close by in memory, making access to the records more friendly to CPU 85 // caches. 86 alloc := pageRefAllocator{size: int(numRecords)} 87 // Following the same reasoning that kafka partitions will typically have 88 // records with repeating formats, we expect to either find records with 89 // no headers, or records which always contain headers. 90 // 91 // To reduce the memory footprint when records have no headers, the Header 92 // slices are lazily allocated in a separate array. 93 headers := ([][]Header)(nil) 94 95 for i := range records { 96 r := &records[i] 97 _ = dec.readVarInt() // record length (unused) 98 _ = dec.readInt8() // record attributes (unused) 99 timestampDelta := dec.readVarInt() 100 offsetDelta := dec.readVarInt() 101 102 r.offset = baseOffset + offsetDelta 103 r.timestamp = firstTimestamp + timestampDelta 104 105 keyLength := dec.readVarInt() 106 keyOffset := int64(recordsLength - dec.remain) 107 if keyLength > 0 { 108 dec.discard(int(keyLength)) 109 } 110 111 valueLength := dec.readVarInt() 112 valueOffset := int64(recordsLength - dec.remain) 113 if valueLength > 0 { 114 dec.discard(int(valueLength)) 115 } 116 117 if numHeaders := dec.readVarInt(); numHeaders > 0 { 118 if headers == nil { 119 headers = make([][]Header, numRecords) 120 } 121 122 h := make([]Header, numHeaders) 123 124 for i := range h { 125 h[i] = Header{ 126 Key: dec.readVarString(), 127 Value: dec.readVarBytes(), 128 } 129 } 130 131 headers[i] = h 132 } 133 134 if dec.err != nil { 135 records = records[:i] 136 break 137 } 138 139 if keyLength >= 0 { 140 r.keyRef = alloc.newPageRef() 141 buffer.refTo(r.keyRef, keyOffset, keyOffset+keyLength) 142 } 143 144 if valueLength >= 0 { 145 r.valueRef = alloc.newPageRef() 146 buffer.refTo(r.valueRef, valueOffset, valueOffset+valueLength) 147 } 148 } 149 150 // Note: it's unclear whether kafka 0.11+ still truncates the responses, 151 // all attempts I made at constructing a test to trigger a truncation have 152 // failed. I kept this code here as a safeguard but it may never execute. 153 if dec.err != nil && len(records) == 0 { 154 return dec.err 155 } 156 157 *rs = RecordSet{ 158 Version: magicByte, 159 Attributes: Attributes(attributes), 160 Records: &optimizedRecordReader{ 161 records: records, 162 headers: headers, 163 }, 164 } 165 166 if rs.Attributes.Control() { 167 rs.Records = &ControlBatch{ 168 Attributes: rs.Attributes, 169 PartitionLeaderEpoch: partitionLeaderEpoch, 170 BaseOffset: baseOffset, 171 ProducerID: producerID, 172 ProducerEpoch: producerEpoch, 173 BaseSequence: baseSequence, 174 Records: rs.Records, 175 } 176 } else { 177 rs.Records = &RecordBatch{ 178 Attributes: rs.Attributes, 179 PartitionLeaderEpoch: partitionLeaderEpoch, 180 BaseOffset: baseOffset, 181 ProducerID: producerID, 182 ProducerEpoch: producerEpoch, 183 BaseSequence: baseSequence, 184 Records: rs.Records, 185 } 186 } 187 188 return nil 189 } 190 191 func (rs *RecordSet) writeToVersion2(buffer *pageBuffer, bufferOffset int64) error { 192 records := rs.Records 193 numRecords := int32(0) 194 195 e := &encoder{writer: buffer} 196 e.writeInt64(0) // base offset | 0 +8 197 e.writeInt32(0) // placeholder for record batch length | 8 +4 198 e.writeInt32(-1) // partition leader epoch | 12 +3 199 e.writeInt8(2) // magic byte | 16 +1 200 e.writeInt32(0) // placeholder for crc32 checksum | 17 +4 201 e.writeInt16(int16(rs.Attributes)) // attributes | 21 +2 202 e.writeInt32(0) // placeholder for lastOffsetDelta | 23 +4 203 e.writeInt64(0) // placeholder for firstTimestamp | 27 +8 204 e.writeInt64(0) // placeholder for maxTimestamp | 35 +8 205 e.writeInt64(-1) // producer id | 43 +8 206 e.writeInt16(-1) // producer epoch | 51 +2 207 e.writeInt32(-1) // base sequence | 53 +4 208 e.writeInt32(0) // placeholder for numRecords | 57 +4 209 210 var compressor io.WriteCloser 211 if compression := rs.Attributes.Compression(); compression != 0 { 212 if codec := compression.Codec(); codec != nil { 213 compressor = codec.NewWriter(buffer) 214 e.writer = compressor 215 } 216 } 217 218 currentTimestamp := timestamp(time.Now()) 219 lastOffsetDelta := int32(0) 220 firstTimestamp := int64(0) 221 maxTimestamp := int64(0) 222 223 err := forEachRecord(records, func(i int, r *Record) error { 224 t := timestamp(r.Time) 225 if t == 0 { 226 t = currentTimestamp 227 } 228 if i == 0 { 229 firstTimestamp = t 230 } 231 if t > maxTimestamp { 232 maxTimestamp = t 233 } 234 235 timestampDelta := t - firstTimestamp 236 offsetDelta := int64(i) 237 lastOffsetDelta = int32(offsetDelta) 238 239 length := 1 + // attributes 240 sizeOfVarInt(timestampDelta) + 241 sizeOfVarInt(offsetDelta) + 242 sizeOfVarNullBytesIface(r.Key) + 243 sizeOfVarNullBytesIface(r.Value) + 244 sizeOfVarInt(int64(len(r.Headers))) 245 246 for _, h := range r.Headers { 247 length += sizeOfVarString(h.Key) + sizeOfVarNullBytes(h.Value) 248 } 249 250 e.writeVarInt(int64(length)) 251 e.writeInt8(0) // record attributes (unused) 252 e.writeVarInt(timestampDelta) 253 e.writeVarInt(offsetDelta) 254 255 if err := e.writeVarNullBytesFrom(r.Key); err != nil { 256 return err 257 } 258 259 if err := e.writeVarNullBytesFrom(r.Value); err != nil { 260 return err 261 } 262 263 e.writeVarInt(int64(len(r.Headers))) 264 265 for _, h := range r.Headers { 266 e.writeVarString(h.Key) 267 e.writeVarNullBytes(h.Value) 268 } 269 270 numRecords++ 271 return nil 272 }) 273 274 if err != nil { 275 return err 276 } 277 278 if compressor != nil { 279 if err := compressor.Close(); err != nil { 280 return err 281 } 282 } 283 284 if numRecords == 0 { 285 return ErrNoRecord 286 } 287 288 b2 := packUint32(uint32(lastOffsetDelta)) 289 b3 := packUint64(uint64(firstTimestamp)) 290 b4 := packUint64(uint64(maxTimestamp)) 291 b5 := packUint32(uint32(numRecords)) 292 293 buffer.WriteAt(b2[:], bufferOffset+23) 294 buffer.WriteAt(b3[:], bufferOffset+27) 295 buffer.WriteAt(b4[:], bufferOffset+35) 296 buffer.WriteAt(b5[:], bufferOffset+57) 297 298 totalLength := buffer.Size() - bufferOffset 299 batchLength := totalLength - 12 300 301 checksum := uint32(0) 302 crcTable := crc32.MakeTable(crc32.Castagnoli) 303 304 buffer.pages.scan(bufferOffset+21, bufferOffset+totalLength, func(chunk []byte) bool { 305 checksum = crc32.Update(checksum, crcTable, chunk) 306 return true 307 }) 308 309 b0 := packUint32(uint32(batchLength)) 310 b1 := packUint32(checksum) 311 312 buffer.WriteAt(b0[:], bufferOffset+8) 313 buffer.WriteAt(b1[:], bufferOffset+17) 314 return nil 315 }