github.com/segmentio/kafka-go@v0.4.48-0.20240318174348-3f6244eb34fd/protocol/record_v1.go (about)

     1  package protocol
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"hash/crc32"
     7  	"io"
     8  	"math"
     9  	"time"
    10  )
    11  
    12  func readMessage(b *pageBuffer, d *decoder) (attributes int8, baseOffset, timestamp int64, key, value Bytes, err error) {
    13  	md := decoder{
    14  		reader: d,
    15  		remain: 12,
    16  	}
    17  
    18  	baseOffset = md.readInt64()
    19  	md.remain = int(md.readInt32())
    20  
    21  	crc := uint32(md.readInt32())
    22  	md.setCRC(crc32.IEEETable)
    23  	magicByte := md.readInt8()
    24  	attributes = md.readInt8()
    25  	timestamp = int64(0)
    26  
    27  	if magicByte != 0 {
    28  		timestamp = md.readInt64()
    29  	}
    30  
    31  	keyOffset := b.Size()
    32  	keyLength := int(md.readInt32())
    33  	hasKey := keyLength >= 0
    34  	if hasKey {
    35  		md.writeTo(b, keyLength)
    36  		key = b.ref(keyOffset, b.Size())
    37  	}
    38  
    39  	valueOffset := b.Size()
    40  	valueLength := int(md.readInt32())
    41  	hasValue := valueLength >= 0
    42  	if hasValue {
    43  		md.writeTo(b, valueLength)
    44  		value = b.ref(valueOffset, b.Size())
    45  	}
    46  
    47  	if md.crc32 != crc {
    48  		err = Errorf("crc32 checksum mismatch (computed=%d found=%d)", md.crc32, crc)
    49  	} else {
    50  		err = dontExpectEOF(md.err)
    51  	}
    52  
    53  	return
    54  }
    55  
    56  func (rs *RecordSet) readFromVersion1(d *decoder) error {
    57  	var records RecordReader
    58  
    59  	b := newPageBuffer()
    60  	defer b.unref()
    61  
    62  	attributes, baseOffset, timestamp, key, value, err := readMessage(b, d)
    63  	if err != nil {
    64  		return err
    65  	}
    66  
    67  	if compression := Attributes(attributes).Compression(); compression == 0 {
    68  		records = &message{
    69  			Record: Record{
    70  				Offset: baseOffset,
    71  				Time:   makeTime(timestamp),
    72  				Key:    key,
    73  				Value:  value,
    74  			},
    75  		}
    76  	} else {
    77  		// Can we have a non-nil key when reading a compressed message?
    78  		if key != nil {
    79  			key.Close()
    80  		}
    81  		if value == nil {
    82  			records = emptyRecordReader{}
    83  		} else {
    84  			defer value.Close()
    85  
    86  			codec := compression.Codec()
    87  			fmt.Printf("codec: %v\n", codec)
    88  			if codec == nil {
    89  				return Errorf("unsupported compression codec: %d", compression)
    90  			}
    91  			decompressor := codec.NewReader(value)
    92  			defer decompressor.Close()
    93  
    94  			b := newPageBuffer()
    95  			defer b.unref()
    96  
    97  			d := &decoder{
    98  				reader: decompressor,
    99  				remain: math.MaxInt32,
   100  			}
   101  
   102  			r := &recordReader{
   103  				records: make([]Record, 0, 32),
   104  			}
   105  
   106  			for !d.done() {
   107  				_, offset, timestamp, key, value, err := readMessage(b, d)
   108  				if err != nil {
   109  					if errors.Is(err, io.ErrUnexpectedEOF) {
   110  						break
   111  					}
   112  					for _, rec := range r.records {
   113  						closeBytes(rec.Key)
   114  						closeBytes(rec.Value)
   115  					}
   116  					return err
   117  				}
   118  				r.records = append(r.records, Record{
   119  					Offset: offset,
   120  					Time:   makeTime(timestamp),
   121  					Key:    key,
   122  					Value:  value,
   123  				})
   124  			}
   125  
   126  			if baseOffset != 0 {
   127  				// https://kafka.apache.org/documentation/#messageset
   128  				//
   129  				// In version 1, to avoid server side re-compression, only the
   130  				// wrapper message will be assigned an offset. The inner messages
   131  				// will have relative offsets. The absolute offset can be computed
   132  				// using the offset from the outer message, which corresponds to the
   133  				// offset assigned to the last inner message.
   134  				lastRelativeOffset := int64(len(r.records)) - 1
   135  
   136  				for i := range r.records {
   137  					r.records[i].Offset = baseOffset - (lastRelativeOffset - r.records[i].Offset)
   138  				}
   139  			}
   140  
   141  			records = r
   142  		}
   143  	}
   144  
   145  	*rs = RecordSet{
   146  		Version:    1,
   147  		Attributes: Attributes(attributes),
   148  		Records:    records,
   149  	}
   150  
   151  	return nil
   152  }
   153  
   154  func (rs *RecordSet) writeToVersion1(buffer *pageBuffer, bufferOffset int64) error {
   155  	attributes := rs.Attributes
   156  	records := rs.Records
   157  
   158  	if compression := attributes.Compression(); compression != 0 {
   159  		if codec := compression.Codec(); codec != nil {
   160  			// In the message format version 1, compression is achieved by
   161  			// compressing the value of a message which recursively contains
   162  			// the representation of the compressed message set.
   163  			subset := *rs
   164  			subset.Attributes &= ^7 // erase compression
   165  
   166  			if err := subset.writeToVersion1(buffer, bufferOffset); err != nil {
   167  				return err
   168  			}
   169  
   170  			compressed := newPageBuffer()
   171  			defer compressed.unref()
   172  
   173  			compressor := codec.NewWriter(compressed)
   174  			defer compressor.Close()
   175  
   176  			var err error
   177  			buffer.pages.scan(bufferOffset, buffer.Size(), func(b []byte) bool {
   178  				_, err = compressor.Write(b)
   179  				return err == nil
   180  			})
   181  			if err != nil {
   182  				return err
   183  			}
   184  			if err := compressor.Close(); err != nil {
   185  				return err
   186  			}
   187  
   188  			buffer.Truncate(int(bufferOffset))
   189  
   190  			records = &message{
   191  				Record: Record{
   192  					Value: compressed,
   193  				},
   194  			}
   195  		}
   196  	}
   197  
   198  	e := encoder{writer: buffer}
   199  	currentTimestamp := timestamp(time.Now())
   200  
   201  	return forEachRecord(records, func(i int, r *Record) error {
   202  		t := timestamp(r.Time)
   203  		if t == 0 {
   204  			t = currentTimestamp
   205  		}
   206  
   207  		messageOffset := buffer.Size()
   208  		e.writeInt64(int64(i))
   209  		e.writeInt32(0) // message size placeholder
   210  		e.writeInt32(0) // crc32 placeholder
   211  		e.setCRC(crc32.IEEETable)
   212  		e.writeInt8(1) // magic byte: version 1
   213  		e.writeInt8(int8(attributes))
   214  		e.writeInt64(t)
   215  
   216  		if err := e.writeNullBytesFrom(r.Key); err != nil {
   217  			return err
   218  		}
   219  
   220  		if err := e.writeNullBytesFrom(r.Value); err != nil {
   221  			return err
   222  		}
   223  
   224  		b0 := packUint32(uint32(buffer.Size() - (messageOffset + 12)))
   225  		b1 := packUint32(e.crc32)
   226  
   227  		buffer.WriteAt(b0[:], messageOffset+8)
   228  		buffer.WriteAt(b1[:], messageOffset+12)
   229  		e.setCRC(nil)
   230  		return nil
   231  	})
   232  }
   233  
   234  type message struct {
   235  	Record Record
   236  	read   bool
   237  }
   238  
   239  func (m *message) ReadRecord() (*Record, error) {
   240  	if m.read {
   241  		return nil, io.EOF
   242  	}
   243  	m.read = true
   244  	return &m.Record, nil
   245  }