github.com/segmentio/kafka-go@v0.4.48-0.20240318174348-3f6244eb34fd/protocol/record_v2.go

github.com/segmentio/kafka-go@v0.4.48-0.20240318174348-3f6244eb34fd/protocol/record_v2.go (about)

     1  package protocol
     2  
     3  import (
     4  	"fmt"
     5  	"hash/crc32"
     6  	"io"
     7  	"time"
     8  )
     9  
    10  func (rs *RecordSet) readFromVersion2(d *decoder) error {
    11  	baseOffset := d.readInt64()
    12  	batchLength := d.readInt32()
    13  
    14  	if int(batchLength) > d.remain || d.err != nil {
    15  		d.discardAll()
    16  		return nil
    17  	}
    18  
    19  	dec := &decoder{
    20  		reader: d,
    21  		remain: int(batchLength),
    22  	}
    23  
    24  	partitionLeaderEpoch := dec.readInt32()
    25  	magicByte := dec.readInt8()
    26  	crc := dec.readInt32()
    27  
    28  	dec.setCRC(crc32.MakeTable(crc32.Castagnoli))
    29  
    30  	attributes := dec.readInt16()
    31  	lastOffsetDelta := dec.readInt32()
    32  	firstTimestamp := dec.readInt64()
    33  	maxTimestamp := dec.readInt64()
    34  	producerID := dec.readInt64()
    35  	producerEpoch := dec.readInt16()
    36  	baseSequence := dec.readInt32()
    37  	numRecords := dec.readInt32()
    38  	reader := io.Reader(dec)
    39  
    40  	// unused
    41  	_ = lastOffsetDelta
    42  	_ = maxTimestamp
    43  
    44  	if compression := Attributes(attributes).Compression(); compression != 0 {
    45  		codec := compression.Codec()
    46  		fmt.Printf("codec: %v\n", codec)
    47  		if codec == nil {
    48  			return fmt.Errorf("unsupported compression codec (%d)", compression)
    49  		}
    50  		decompressor := codec.NewReader(reader)
    51  		defer decompressor.Close()
    52  		reader = decompressor
    53  	}
    54  
    55  	buffer := newPageBuffer()
    56  	defer buffer.unref()
    57  
    58  	_, err := buffer.ReadFrom(reader)
    59  	if err != nil {
    60  		return err
    61  	}
    62  	if dec.crc32 != uint32(crc) {
    63  		return fmt.Errorf("crc32 checksum mismatch (computed=%d found=%d)", dec.crc32, uint32(crc))
    64  	}
    65  
    66  	recordsLength := buffer.Len()
    67  	dec.reader = buffer
    68  	dec.remain = recordsLength
    69  
    70  	records := make([]optimizedRecord, numRecords)
    71  	// These are two lazy allocators that will be used to optimize allocation of
    72  	// page references for keys and values.
    73  	//
    74  	// By default, no memory is allocated and on first use, numRecords page refs
    75  	// are allocated in a contiguous memory space, and the allocators return
    76  	// pointers into those arrays for each page ref that get requested.
    77  	//
    78  	// The reasoning is that kafka partitions typically have records of a single
    79  	// form, which either have no keys, no values, or both keys and values.
    80  	// Using lazy allocators adapts nicely to these patterns to only allocate
    81  	// the memory that is needed by the program, while still reducing the number
    82  	// of malloc calls made by the program.
    83  	//
    84  	// Using a single allocator for both keys and values keeps related values
    85  	// close by in memory, making access to the records more friendly to CPU
    86  	// caches.
    87  	alloc := pageRefAllocator{size: int(numRecords)}
    88  	// Following the same reasoning that kafka partitions will typically have
    89  	// records with repeating formats, we expect to either find records with
    90  	// no headers, or records which always contain headers.
    91  	//
    92  	// To reduce the memory footprint when records have no headers, the Header
    93  	// slices are lazily allocated in a separate array.
    94  	headers := ([][]Header)(nil)
    95  
    96  	for i := range records {
    97  		r := &records[i]
    98  		_ = dec.readVarInt() // record length (unused)
    99  		_ = dec.readInt8()   // record attributes (unused)
   100  		timestampDelta := dec.readVarInt()
   101  		offsetDelta := dec.readVarInt()
   102  
   103  		r.offset = baseOffset + offsetDelta
   104  		r.timestamp = firstTimestamp + timestampDelta
   105  
   106  		keyLength := dec.readVarInt()
   107  		keyOffset := int64(recordsLength - dec.remain)
   108  		if keyLength > 0 {
   109  			dec.discard(int(keyLength))
   110  		}
   111  
   112  		valueLength := dec.readVarInt()
   113  		valueOffset := int64(recordsLength - dec.remain)
   114  		if valueLength > 0 {
   115  			dec.discard(int(valueLength))
   116  		}
   117  
   118  		if numHeaders := dec.readVarInt(); numHeaders > 0 {
   119  			if headers == nil {
   120  				headers = make([][]Header, numRecords)
   121  			}
   122  
   123  			h := make([]Header, numHeaders)
   124  
   125  			for i := range h {
   126  				h[i] = Header{
   127  					Key:   dec.readVarString(),
   128  					Value: dec.readVarBytes(),
   129  				}
   130  			}
   131  
   132  			headers[i] = h
   133  		}
   134  
   135  		if dec.err != nil {
   136  			records = records[:i]
   137  			break
   138  		}
   139  
   140  		if keyLength >= 0 {
   141  			r.keyRef = alloc.newPageRef()
   142  			buffer.refTo(r.keyRef, keyOffset, keyOffset+keyLength)
   143  		}
   144  
   145  		if valueLength >= 0 {
   146  			r.valueRef = alloc.newPageRef()
   147  			buffer.refTo(r.valueRef, valueOffset, valueOffset+valueLength)
   148  		}
   149  	}
   150  
   151  	// Note: it's unclear whether kafka 0.11+ still truncates the responses,
   152  	// all attempts I made at constructing a test to trigger a truncation have
   153  	// failed. I kept this code here as a safeguard but it may never execute.
   154  	if dec.err != nil && len(records) == 0 {
   155  		return dec.err
   156  	}
   157  
   158  	*rs = RecordSet{
   159  		Version:    magicByte,
   160  		Attributes: Attributes(attributes),
   161  		Records: &optimizedRecordReader{
   162  			records: records,
   163  			headers: headers,
   164  		},
   165  	}
   166  
   167  	if rs.Attributes.Control() {
   168  		rs.Records = &ControlBatch{
   169  			Attributes:           rs.Attributes,
   170  			PartitionLeaderEpoch: partitionLeaderEpoch,
   171  			BaseOffset:           baseOffset,
   172  			ProducerID:           producerID,
   173  			ProducerEpoch:        producerEpoch,
   174  			BaseSequence:         baseSequence,
   175  			Records:              rs.Records,
   176  		}
   177  	} else {
   178  		rs.Records = &RecordBatch{
   179  			Attributes:           rs.Attributes,
   180  			PartitionLeaderEpoch: partitionLeaderEpoch,
   181  			BaseOffset:           baseOffset,
   182  			ProducerID:           producerID,
   183  			ProducerEpoch:        producerEpoch,
   184  			BaseSequence:         baseSequence,
   185  			Records:              rs.Records,
   186  		}
   187  	}
   188  
   189  	return nil
   190  }
   191  
   192  func (rs *RecordSet) writeToVersion2(buffer *pageBuffer, bufferOffset int64) error {
   193  	records := rs.Records
   194  	numRecords := int32(0)
   195  
   196  	e := &encoder{writer: buffer}
   197  	e.writeInt64(0)                    // base offset                         |  0 +8
   198  	e.writeInt32(0)                    // placeholder for record batch length |  8 +4
   199  	e.writeInt32(-1)                   // partition leader epoch              | 12 +3
   200  	e.writeInt8(2)                     // magic byte                          | 16 +1
   201  	e.writeInt32(0)                    // placeholder for crc32 checksum      | 17 +4
   202  	e.writeInt16(int16(rs.Attributes)) // attributes                          | 21 +2
   203  	e.writeInt32(0)                    // placeholder for lastOffsetDelta     | 23 +4
   204  	e.writeInt64(0)                    // placeholder for firstTimestamp      | 27 +8
   205  	e.writeInt64(0)                    // placeholder for maxTimestamp        | 35 +8
   206  	e.writeInt64(-1)                   // producer id                         | 43 +8
   207  	e.writeInt16(-1)                   // producer epoch                      | 51 +2
   208  	e.writeInt32(-1)                   // base sequence                       | 53 +4
   209  	e.writeInt32(0)                    // placeholder for numRecords          | 57 +4
   210  
   211  	var compressor io.WriteCloser
   212  	if compression := rs.Attributes.Compression(); compression != 0 {
   213  		if codec := compression.Codec(); codec != nil {
   214  			compressor = codec.NewWriter(buffer)
   215  			e.writer = compressor
   216  		}
   217  	}
   218  
   219  	currentTimestamp := timestamp(time.Now())
   220  	lastOffsetDelta := int32(0)
   221  	firstTimestamp := int64(0)
   222  	maxTimestamp := int64(0)
   223  
   224  	err := forEachRecord(records, func(i int, r *Record) error {
   225  		t := timestamp(r.Time)
   226  		if t == 0 {
   227  			t = currentTimestamp
   228  		}
   229  		if i == 0 {
   230  			firstTimestamp = t
   231  		}
   232  		if t > maxTimestamp {
   233  			maxTimestamp = t
   234  		}
   235  
   236  		timestampDelta := t - firstTimestamp
   237  		offsetDelta := int64(i)
   238  		lastOffsetDelta = int32(offsetDelta)
   239  
   240  		length := 1 + // attributes
   241  			sizeOfVarInt(timestampDelta) +
   242  			sizeOfVarInt(offsetDelta) +
   243  			sizeOfVarNullBytesIface(r.Key) +
   244  			sizeOfVarNullBytesIface(r.Value) +
   245  			sizeOfVarInt(int64(len(r.Headers)))
   246  
   247  		for _, h := range r.Headers {
   248  			length += sizeOfVarString(h.Key) + sizeOfVarNullBytes(h.Value)
   249  		}
   250  
   251  		e.writeVarInt(int64(length))
   252  		e.writeInt8(0) // record attributes (unused)
   253  		e.writeVarInt(timestampDelta)
   254  		e.writeVarInt(offsetDelta)
   255  
   256  		if err := e.writeVarNullBytesFrom(r.Key); err != nil {
   257  			return err
   258  		}
   259  
   260  		if err := e.writeVarNullBytesFrom(r.Value); err != nil {
   261  			return err
   262  		}
   263  
   264  		e.writeVarInt(int64(len(r.Headers)))
   265  
   266  		for _, h := range r.Headers {
   267  			e.writeVarString(h.Key)
   268  			e.writeVarNullBytes(h.Value)
   269  		}
   270  
   271  		numRecords++
   272  		return nil
   273  	})
   274  
   275  	if err != nil {
   276  		return err
   277  	}
   278  
   279  	if compressor != nil {
   280  		if err := compressor.Close(); err != nil {
   281  			return err
   282  		}
   283  	}
   284  
   285  	if numRecords == 0 {
   286  		return ErrNoRecord
   287  	}
   288  
   289  	b2 := packUint32(uint32(lastOffsetDelta))
   290  	b3 := packUint64(uint64(firstTimestamp))
   291  	b4 := packUint64(uint64(maxTimestamp))
   292  	b5 := packUint32(uint32(numRecords))
   293  
   294  	buffer.WriteAt(b2[:], bufferOffset+23)
   295  	buffer.WriteAt(b3[:], bufferOffset+27)
   296  	buffer.WriteAt(b4[:], bufferOffset+35)
   297  	buffer.WriteAt(b5[:], bufferOffset+57)
   298  
   299  	totalLength := buffer.Size() - bufferOffset
   300  	batchLength := totalLength - 12
   301  
   302  	checksum := uint32(0)
   303  	crcTable := crc32.MakeTable(crc32.Castagnoli)
   304  
   305  	buffer.pages.scan(bufferOffset+21, bufferOffset+totalLength, func(chunk []byte) bool {
   306  		checksum = crc32.Update(checksum, crcTable, chunk)
   307  		return true
   308  	})
   309  
   310  	b0 := packUint32(uint32(batchLength))
   311  	b1 := packUint32(checksum)
   312  
   313  	buffer.WriteAt(b0[:], bufferOffset+8)
   314  	buffer.WriteAt(b1[:], bufferOffset+17)
   315  	return nil
   316  }