github.com/streamdal/segmentio-kafka-go@v0.4.47-streamdal/protocol/record_v1.go (about)

     1  package protocol
     2  
     3  import (
     4  	"errors"
     5  	"hash/crc32"
     6  	"io"
     7  	"math"
     8  	"time"
     9  )
    10  
    11  func readMessage(b *pageBuffer, d *decoder) (attributes int8, baseOffset, timestamp int64, key, value Bytes, err error) {
    12  	md := decoder{
    13  		reader: d,
    14  		remain: 12,
    15  	}
    16  
    17  	baseOffset = md.readInt64()
    18  	md.remain = int(md.readInt32())
    19  
    20  	crc := uint32(md.readInt32())
    21  	md.setCRC(crc32.IEEETable)
    22  	magicByte := md.readInt8()
    23  	attributes = md.readInt8()
    24  	timestamp = int64(0)
    25  
    26  	if magicByte != 0 {
    27  		timestamp = md.readInt64()
    28  	}
    29  
    30  	keyOffset := b.Size()
    31  	keyLength := int(md.readInt32())
    32  	hasKey := keyLength >= 0
    33  	if hasKey {
    34  		md.writeTo(b, keyLength)
    35  		key = b.ref(keyOffset, b.Size())
    36  	}
    37  
    38  	valueOffset := b.Size()
    39  	valueLength := int(md.readInt32())
    40  	hasValue := valueLength >= 0
    41  	if hasValue {
    42  		md.writeTo(b, valueLength)
    43  		value = b.ref(valueOffset, b.Size())
    44  	}
    45  
    46  	if md.crc32 != crc {
    47  		err = Errorf("crc32 checksum mismatch (computed=%d found=%d)", md.crc32, crc)
    48  	} else {
    49  		err = dontExpectEOF(md.err)
    50  	}
    51  
    52  	return
    53  }
    54  
    55  func (rs *RecordSet) readFromVersion1(d *decoder) error {
    56  	var records RecordReader
    57  
    58  	b := newPageBuffer()
    59  	defer b.unref()
    60  
    61  	attributes, baseOffset, timestamp, key, value, err := readMessage(b, d)
    62  	if err != nil {
    63  		return err
    64  	}
    65  
    66  	if compression := Attributes(attributes).Compression(); compression == 0 {
    67  		records = &message{
    68  			Record: Record{
    69  				Offset: baseOffset,
    70  				Time:   makeTime(timestamp),
    71  				Key:    key,
    72  				Value:  value,
    73  			},
    74  		}
    75  	} else {
    76  		// Can we have a non-nil key when reading a compressed message?
    77  		if key != nil {
    78  			key.Close()
    79  		}
    80  		if value == nil {
    81  			records = emptyRecordReader{}
    82  		} else {
    83  			defer value.Close()
    84  
    85  			codec := compression.Codec()
    86  			if codec == nil {
    87  				return Errorf("unsupported compression codec: %d", compression)
    88  			}
    89  			decompressor := codec.NewReader(value)
    90  			defer decompressor.Close()
    91  
    92  			b := newPageBuffer()
    93  			defer b.unref()
    94  
    95  			d := &decoder{
    96  				reader: decompressor,
    97  				remain: math.MaxInt32,
    98  			}
    99  
   100  			r := &recordReader{
   101  				records: make([]Record, 0, 32),
   102  			}
   103  
   104  			for !d.done() {
   105  				_, offset, timestamp, key, value, err := readMessage(b, d)
   106  				if err != nil {
   107  					if errors.Is(err, io.ErrUnexpectedEOF) {
   108  						break
   109  					}
   110  					for _, rec := range r.records {
   111  						closeBytes(rec.Key)
   112  						closeBytes(rec.Value)
   113  					}
   114  					return err
   115  				}
   116  				r.records = append(r.records, Record{
   117  					Offset: offset,
   118  					Time:   makeTime(timestamp),
   119  					Key:    key,
   120  					Value:  value,
   121  				})
   122  			}
   123  
   124  			if baseOffset != 0 {
   125  				// https://kafka.apache.org/documentation/#messageset
   126  				//
   127  				// In version 1, to avoid server side re-compression, only the
   128  				// wrapper message will be assigned an offset. The inner messages
   129  				// will have relative offsets. The absolute offset can be computed
   130  				// using the offset from the outer message, which corresponds to the
   131  				// offset assigned to the last inner message.
   132  				lastRelativeOffset := int64(len(r.records)) - 1
   133  
   134  				for i := range r.records {
   135  					r.records[i].Offset = baseOffset - (lastRelativeOffset - r.records[i].Offset)
   136  				}
   137  			}
   138  
   139  			records = r
   140  		}
   141  	}
   142  
   143  	*rs = RecordSet{
   144  		Version:    1,
   145  		Attributes: Attributes(attributes),
   146  		Records:    records,
   147  	}
   148  
   149  	return nil
   150  }
   151  
   152  func (rs *RecordSet) writeToVersion1(buffer *pageBuffer, bufferOffset int64) error {
   153  	attributes := rs.Attributes
   154  	records := rs.Records
   155  
   156  	if compression := attributes.Compression(); compression != 0 {
   157  		if codec := compression.Codec(); codec != nil {
   158  			// In the message format version 1, compression is achieved by
   159  			// compressing the value of a message which recursively contains
   160  			// the representation of the compressed message set.
   161  			subset := *rs
   162  			subset.Attributes &= ^7 // erase compression
   163  
   164  			if err := subset.writeToVersion1(buffer, bufferOffset); err != nil {
   165  				return err
   166  			}
   167  
   168  			compressed := newPageBuffer()
   169  			defer compressed.unref()
   170  
   171  			compressor := codec.NewWriter(compressed)
   172  			defer compressor.Close()
   173  
   174  			var err error
   175  			buffer.pages.scan(bufferOffset, buffer.Size(), func(b []byte) bool {
   176  				_, err = compressor.Write(b)
   177  				return err == nil
   178  			})
   179  			if err != nil {
   180  				return err
   181  			}
   182  			if err := compressor.Close(); err != nil {
   183  				return err
   184  			}
   185  
   186  			buffer.Truncate(int(bufferOffset))
   187  
   188  			records = &message{
   189  				Record: Record{
   190  					Value: compressed,
   191  				},
   192  			}
   193  		}
   194  	}
   195  
   196  	e := encoder{writer: buffer}
   197  	currentTimestamp := timestamp(time.Now())
   198  
   199  	return forEachRecord(records, func(i int, r *Record) error {
   200  		t := timestamp(r.Time)
   201  		if t == 0 {
   202  			t = currentTimestamp
   203  		}
   204  
   205  		messageOffset := buffer.Size()
   206  		e.writeInt64(int64(i))
   207  		e.writeInt32(0) // message size placeholder
   208  		e.writeInt32(0) // crc32 placeholder
   209  		e.setCRC(crc32.IEEETable)
   210  		e.writeInt8(1) // magic byte: version 1
   211  		e.writeInt8(int8(attributes))
   212  		e.writeInt64(t)
   213  
   214  		if err := e.writeNullBytesFrom(r.Key); err != nil {
   215  			return err
   216  		}
   217  
   218  		if err := e.writeNullBytesFrom(r.Value); err != nil {
   219  			return err
   220  		}
   221  
   222  		b0 := packUint32(uint32(buffer.Size() - (messageOffset + 12)))
   223  		b1 := packUint32(e.crc32)
   224  
   225  		buffer.WriteAt(b0[:], messageOffset+8)
   226  		buffer.WriteAt(b1[:], messageOffset+12)
   227  		e.setCRC(nil)
   228  		return nil
   229  	})
   230  }
   231  
   232  type message struct {
   233  	Record Record
   234  	read   bool
   235  }
   236  
   237  func (m *message) ReadRecord() (*Record, error) {
   238  	if m.read {
   239  		return nil, io.EOF
   240  	}
   241  	m.read = true
   242  	return &m.Record, nil
   243  }