github.com/streamdal/segmentio-kafka-go@v0.4.47-streamdal/protocol/record_v2.go (about)

     1  package protocol
     2  
     3  import (
     4  	"fmt"
     5  	"hash/crc32"
     6  	"io"
     7  	"time"
     8  )
     9  
    10  func (rs *RecordSet) readFromVersion2(d *decoder) error {
    11  	baseOffset := d.readInt64()
    12  	batchLength := d.readInt32()
    13  
    14  	if int(batchLength) > d.remain || d.err != nil {
    15  		d.discardAll()
    16  		return nil
    17  	}
    18  
    19  	dec := &decoder{
    20  		reader: d,
    21  		remain: int(batchLength),
    22  	}
    23  
    24  	partitionLeaderEpoch := dec.readInt32()
    25  	magicByte := dec.readInt8()
    26  	crc := dec.readInt32()
    27  
    28  	dec.setCRC(crc32.MakeTable(crc32.Castagnoli))
    29  
    30  	attributes := dec.readInt16()
    31  	lastOffsetDelta := dec.readInt32()
    32  	firstTimestamp := dec.readInt64()
    33  	maxTimestamp := dec.readInt64()
    34  	producerID := dec.readInt64()
    35  	producerEpoch := dec.readInt16()
    36  	baseSequence := dec.readInt32()
    37  	numRecords := dec.readInt32()
    38  	reader := io.Reader(dec)
    39  
    40  	// unused
    41  	_ = lastOffsetDelta
    42  	_ = maxTimestamp
    43  
    44  	if compression := Attributes(attributes).Compression(); compression != 0 {
    45  		codec := compression.Codec()
    46  		if codec == nil {
    47  			return fmt.Errorf("unsupported compression codec (%d)", compression)
    48  		}
    49  		decompressor := codec.NewReader(reader)
    50  		defer decompressor.Close()
    51  		reader = decompressor
    52  	}
    53  
    54  	buffer := newPageBuffer()
    55  	defer buffer.unref()
    56  
    57  	_, err := buffer.ReadFrom(reader)
    58  	if err != nil {
    59  		return err
    60  	}
    61  	if dec.crc32 != uint32(crc) {
    62  		return fmt.Errorf("crc32 checksum mismatch (computed=%d found=%d)", dec.crc32, uint32(crc))
    63  	}
    64  
    65  	recordsLength := buffer.Len()
    66  	dec.reader = buffer
    67  	dec.remain = recordsLength
    68  
    69  	records := make([]optimizedRecord, numRecords)
    70  	// These are two lazy allocators that will be used to optimize allocation of
    71  	// page references for keys and values.
    72  	//
    73  	// By default, no memory is allocated and on first use, numRecords page refs
    74  	// are allocated in a contiguous memory space, and the allocators return
    75  	// pointers into those arrays for each page ref that get requested.
    76  	//
    77  	// The reasoning is that kafka partitions typically have records of a single
    78  	// form, which either have no keys, no values, or both keys and values.
    79  	// Using lazy allocators adapts nicely to these patterns to only allocate
    80  	// the memory that is needed by the program, while still reducing the number
    81  	// of malloc calls made by the program.
    82  	//
    83  	// Using a single allocator for both keys and values keeps related values
    84  	// close by in memory, making access to the records more friendly to CPU
    85  	// caches.
    86  	alloc := pageRefAllocator{size: int(numRecords)}
    87  	// Following the same reasoning that kafka partitions will typically have
    88  	// records with repeating formats, we expect to either find records with
    89  	// no headers, or records which always contain headers.
    90  	//
    91  	// To reduce the memory footprint when records have no headers, the Header
    92  	// slices are lazily allocated in a separate array.
    93  	headers := ([][]Header)(nil)
    94  
    95  	for i := range records {
    96  		r := &records[i]
    97  		_ = dec.readVarInt() // record length (unused)
    98  		_ = dec.readInt8()   // record attributes (unused)
    99  		timestampDelta := dec.readVarInt()
   100  		offsetDelta := dec.readVarInt()
   101  
   102  		r.offset = baseOffset + offsetDelta
   103  		r.timestamp = firstTimestamp + timestampDelta
   104  
   105  		keyLength := dec.readVarInt()
   106  		keyOffset := int64(recordsLength - dec.remain)
   107  		if keyLength > 0 {
   108  			dec.discard(int(keyLength))
   109  		}
   110  
   111  		valueLength := dec.readVarInt()
   112  		valueOffset := int64(recordsLength - dec.remain)
   113  		if valueLength > 0 {
   114  			dec.discard(int(valueLength))
   115  		}
   116  
   117  		if numHeaders := dec.readVarInt(); numHeaders > 0 {
   118  			if headers == nil {
   119  				headers = make([][]Header, numRecords)
   120  			}
   121  
   122  			h := make([]Header, numHeaders)
   123  
   124  			for i := range h {
   125  				h[i] = Header{
   126  					Key:   dec.readVarString(),
   127  					Value: dec.readVarBytes(),
   128  				}
   129  			}
   130  
   131  			headers[i] = h
   132  		}
   133  
   134  		if dec.err != nil {
   135  			records = records[:i]
   136  			break
   137  		}
   138  
   139  		if keyLength >= 0 {
   140  			r.keyRef = alloc.newPageRef()
   141  			buffer.refTo(r.keyRef, keyOffset, keyOffset+keyLength)
   142  		}
   143  
   144  		if valueLength >= 0 {
   145  			r.valueRef = alloc.newPageRef()
   146  			buffer.refTo(r.valueRef, valueOffset, valueOffset+valueLength)
   147  		}
   148  	}
   149  
   150  	// Note: it's unclear whether kafka 0.11+ still truncates the responses,
   151  	// all attempts I made at constructing a test to trigger a truncation have
   152  	// failed. I kept this code here as a safeguard but it may never execute.
   153  	if dec.err != nil && len(records) == 0 {
   154  		return dec.err
   155  	}
   156  
   157  	*rs = RecordSet{
   158  		Version:    magicByte,
   159  		Attributes: Attributes(attributes),
   160  		Records: &optimizedRecordReader{
   161  			records: records,
   162  			headers: headers,
   163  		},
   164  	}
   165  
   166  	if rs.Attributes.Control() {
   167  		rs.Records = &ControlBatch{
   168  			Attributes:           rs.Attributes,
   169  			PartitionLeaderEpoch: partitionLeaderEpoch,
   170  			BaseOffset:           baseOffset,
   171  			ProducerID:           producerID,
   172  			ProducerEpoch:        producerEpoch,
   173  			BaseSequence:         baseSequence,
   174  			Records:              rs.Records,
   175  		}
   176  	} else {
   177  		rs.Records = &RecordBatch{
   178  			Attributes:           rs.Attributes,
   179  			PartitionLeaderEpoch: partitionLeaderEpoch,
   180  			BaseOffset:           baseOffset,
   181  			ProducerID:           producerID,
   182  			ProducerEpoch:        producerEpoch,
   183  			BaseSequence:         baseSequence,
   184  			Records:              rs.Records,
   185  		}
   186  	}
   187  
   188  	return nil
   189  }
   190  
   191  func (rs *RecordSet) writeToVersion2(buffer *pageBuffer, bufferOffset int64) error {
   192  	records := rs.Records
   193  	numRecords := int32(0)
   194  
   195  	e := &encoder{writer: buffer}
   196  	e.writeInt64(0)                    // base offset                         |  0 +8
   197  	e.writeInt32(0)                    // placeholder for record batch length |  8 +4
   198  	e.writeInt32(-1)                   // partition leader epoch              | 12 +3
   199  	e.writeInt8(2)                     // magic byte                          | 16 +1
   200  	e.writeInt32(0)                    // placeholder for crc32 checksum      | 17 +4
   201  	e.writeInt16(int16(rs.Attributes)) // attributes                          | 21 +2
   202  	e.writeInt32(0)                    // placeholder for lastOffsetDelta     | 23 +4
   203  	e.writeInt64(0)                    // placeholder for firstTimestamp      | 27 +8
   204  	e.writeInt64(0)                    // placeholder for maxTimestamp        | 35 +8
   205  	e.writeInt64(-1)                   // producer id                         | 43 +8
   206  	e.writeInt16(-1)                   // producer epoch                      | 51 +2
   207  	e.writeInt32(-1)                   // base sequence                       | 53 +4
   208  	e.writeInt32(0)                    // placeholder for numRecords          | 57 +4
   209  
   210  	var compressor io.WriteCloser
   211  	if compression := rs.Attributes.Compression(); compression != 0 {
   212  		if codec := compression.Codec(); codec != nil {
   213  			compressor = codec.NewWriter(buffer)
   214  			e.writer = compressor
   215  		}
   216  	}
   217  
   218  	currentTimestamp := timestamp(time.Now())
   219  	lastOffsetDelta := int32(0)
   220  	firstTimestamp := int64(0)
   221  	maxTimestamp := int64(0)
   222  
   223  	err := forEachRecord(records, func(i int, r *Record) error {
   224  		t := timestamp(r.Time)
   225  		if t == 0 {
   226  			t = currentTimestamp
   227  		}
   228  		if i == 0 {
   229  			firstTimestamp = t
   230  		}
   231  		if t > maxTimestamp {
   232  			maxTimestamp = t
   233  		}
   234  
   235  		timestampDelta := t - firstTimestamp
   236  		offsetDelta := int64(i)
   237  		lastOffsetDelta = int32(offsetDelta)
   238  
   239  		length := 1 + // attributes
   240  			sizeOfVarInt(timestampDelta) +
   241  			sizeOfVarInt(offsetDelta) +
   242  			sizeOfVarNullBytesIface(r.Key) +
   243  			sizeOfVarNullBytesIface(r.Value) +
   244  			sizeOfVarInt(int64(len(r.Headers)))
   245  
   246  		for _, h := range r.Headers {
   247  			length += sizeOfVarString(h.Key) + sizeOfVarNullBytes(h.Value)
   248  		}
   249  
   250  		e.writeVarInt(int64(length))
   251  		e.writeInt8(0) // record attributes (unused)
   252  		e.writeVarInt(timestampDelta)
   253  		e.writeVarInt(offsetDelta)
   254  
   255  		if err := e.writeVarNullBytesFrom(r.Key); err != nil {
   256  			return err
   257  		}
   258  
   259  		if err := e.writeVarNullBytesFrom(r.Value); err != nil {
   260  			return err
   261  		}
   262  
   263  		e.writeVarInt(int64(len(r.Headers)))
   264  
   265  		for _, h := range r.Headers {
   266  			e.writeVarString(h.Key)
   267  			e.writeVarNullBytes(h.Value)
   268  		}
   269  
   270  		numRecords++
   271  		return nil
   272  	})
   273  
   274  	if err != nil {
   275  		return err
   276  	}
   277  
   278  	if compressor != nil {
   279  		if err := compressor.Close(); err != nil {
   280  			return err
   281  		}
   282  	}
   283  
   284  	if numRecords == 0 {
   285  		return ErrNoRecord
   286  	}
   287  
   288  	b2 := packUint32(uint32(lastOffsetDelta))
   289  	b3 := packUint64(uint64(firstTimestamp))
   290  	b4 := packUint64(uint64(maxTimestamp))
   291  	b5 := packUint32(uint32(numRecords))
   292  
   293  	buffer.WriteAt(b2[:], bufferOffset+23)
   294  	buffer.WriteAt(b3[:], bufferOffset+27)
   295  	buffer.WriteAt(b4[:], bufferOffset+35)
   296  	buffer.WriteAt(b5[:], bufferOffset+57)
   297  
   298  	totalLength := buffer.Size() - bufferOffset
   299  	batchLength := totalLength - 12
   300  
   301  	checksum := uint32(0)
   302  	crcTable := crc32.MakeTable(crc32.Castagnoli)
   303  
   304  	buffer.pages.scan(bufferOffset+21, bufferOffset+totalLength, func(chunk []byte) bool {
   305  		checksum = crc32.Update(checksum, crcTable, chunk)
   306  		return true
   307  	})
   308  
   309  	b0 := packUint32(uint32(batchLength))
   310  	b1 := packUint32(checksum)
   311  
   312  	buffer.WriteAt(b0[:], bufferOffset+8)
   313  	buffer.WriteAt(b1[:], bufferOffset+17)
   314  	return nil
   315  }