github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/functions.go (about)

     1  //nolint:unused
     2  package symdb
     3  
     4  import (
     5  	"bytes"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"hash/crc32"
     9  	"io"
    10  	"unsafe"
    11  
    12  	"github.com/parquet-go/parquet-go/encoding/delta"
    13  
    14  	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    15  	"github.com/grafana/pyroscope/pkg/slices"
    16  )
    17  
    18  var (
    19  	_ symbolsBlockEncoder[v1.InMemoryFunction] = (*functionsBlockEncoder)(nil)
    20  	_ symbolsBlockDecoder[v1.InMemoryFunction] = (*functionsBlockDecoder)(nil)
    21  )
    22  
    23  type functionsBlockHeader struct {
    24  	FunctionsLen   uint32
    25  	NameSize       uint32
    26  	SystemNameSize uint32
    27  	FileNameSize   uint32
    28  	StartLineSize  uint32
    29  	CRC            uint32
    30  }
    31  
    32  func (h *functionsBlockHeader) marshal(b []byte) {
    33  	binary.BigEndian.PutUint32(b[0:4], h.FunctionsLen)
    34  	binary.BigEndian.PutUint32(b[4:8], h.NameSize)
    35  	binary.BigEndian.PutUint32(b[8:12], h.SystemNameSize)
    36  	binary.BigEndian.PutUint32(b[12:16], h.FileNameSize)
    37  	binary.BigEndian.PutUint32(b[16:20], h.StartLineSize)
    38  	// Fields can be added here in the future.
    39  	// CRC must be the last four bytes.
    40  	h.CRC = crc32.Checksum(b[0:20], castagnoli)
    41  	binary.BigEndian.PutUint32(b[20:24], h.CRC)
    42  }
    43  
    44  func (h *functionsBlockHeader) unmarshal(b []byte) {
    45  	h.FunctionsLen = binary.BigEndian.Uint32(b[0:4])
    46  	h.NameSize = binary.BigEndian.Uint32(b[4:8])
    47  	h.SystemNameSize = binary.BigEndian.Uint32(b[8:12])
    48  	h.FileNameSize = binary.BigEndian.Uint32(b[12:16])
    49  	h.StartLineSize = binary.BigEndian.Uint32(b[16:20])
    50  	// In future versions, new fields are decoded here;
    51  	// if pos < len(b)-checksumSize, then there are more fields.
    52  	h.CRC = binary.BigEndian.Uint32(b[len(b)-checksumSize:])
    53  }
    54  
    55  func (h *functionsBlockHeader) checksum() uint32 { return h.CRC }
    56  
    57  type functionsBlockEncoder struct {
    58  	header functionsBlockHeader
    59  
    60  	tmp  []byte
    61  	buf  bytes.Buffer
    62  	ints []int32
    63  }
    64  
    65  func newFunctionsEncoder() *symbolsEncoder[v1.InMemoryFunction] {
    66  	return newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder))
    67  }
    68  
    69  func (e *functionsBlockEncoder) format() SymbolsBlockFormat { return BlockFunctionsV1 }
    70  
    71  func (e *functionsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(functionsBlockHeader{}) }
    72  
    73  func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) error {
    74  	e.initWrite(len(functions))
    75  	var enc delta.BinaryPackedEncoding
    76  
    77  	for i, f := range functions {
    78  		e.ints[i] = int32(f.Name)
    79  	}
    80  	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
    81  	e.header.NameSize = uint32(len(e.tmp))
    82  	e.buf.Write(e.tmp)
    83  
    84  	for i, f := range functions {
    85  		e.ints[i] = int32(f.SystemName)
    86  	}
    87  	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
    88  	e.header.SystemNameSize = uint32(len(e.tmp))
    89  	e.buf.Write(e.tmp)
    90  
    91  	for i, f := range functions {
    92  		e.ints[i] = int32(f.Filename)
    93  	}
    94  	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
    95  	e.header.FileNameSize = uint32(len(e.tmp))
    96  	e.buf.Write(e.tmp)
    97  
    98  	for i, f := range functions {
    99  		e.ints[i] = int32(f.StartLine)
   100  	}
   101  	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
   102  	e.header.StartLineSize = uint32(len(e.tmp))
   103  	e.buf.Write(e.tmp)
   104  
   105  	e.tmp = slices.GrowLen(e.tmp, int(e.headerSize()))
   106  	e.header.marshal(e.tmp)
   107  	if _, err := w.Write(e.tmp); err != nil {
   108  		return err
   109  	}
   110  	_, err := e.buf.WriteTo(w)
   111  	return err
   112  }
   113  
   114  func (e *functionsBlockEncoder) initWrite(functions int) {
   115  	e.buf.Reset()
   116  	// Actual estimate is ~7 bytes per function.
   117  	e.buf.Grow(functions * 8)
   118  	*e = functionsBlockEncoder{
   119  		header: functionsBlockHeader{FunctionsLen: uint32(functions)},
   120  
   121  		tmp:  slices.GrowLen(e.tmp, functions*2),
   122  		ints: slices.GrowLen(e.ints, functions),
   123  		buf:  e.buf,
   124  	}
   125  }
   126  
   127  type functionsBlockDecoder struct {
   128  	headerSize uint16
   129  	header     functionsBlockHeader
   130  
   131  	ints []int32
   132  	buf  []byte
   133  }
   134  
   135  func newFunctionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) {
   136  	if h.Format == BlockFunctionsV1 {
   137  		headerSize := max(functionsBlockHeaderMinSize, h.BlockHeaderSize)
   138  		return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{headerSize: headerSize}), nil
   139  	}
   140  	return nil, fmt.Errorf("%w: unknown functions format: %d", ErrUnknownVersion, h.Format)
   141  }
   142  
   143  // In early versions, block header size is not specified. Must not change.
   144  const functionsBlockHeaderMinSize = 24
   145  
   146  func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunction) (err error) {
   147  	d.buf = slices.GrowLen(d.buf, int(d.headerSize))
   148  	if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil {
   149  		return err
   150  	}
   151  	if d.header.FunctionsLen > uint32(len(functions)) {
   152  		return fmt.Errorf("functions buffer is too short")
   153  	}
   154  
   155  	d.ints = slices.GrowLen(d.ints, int(d.header.FunctionsLen))
   156  	d.buf = slices.GrowLen(d.buf, int(d.header.NameSize))
   157  	if _, err = io.ReadFull(r, d.buf); err != nil {
   158  		return err
   159  	}
   160  	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen))
   161  	if err != nil {
   162  		return err
   163  	}
   164  	for i, v := range d.ints {
   165  		functions[i].Name = uint32(v)
   166  	}
   167  
   168  	d.buf = slices.GrowLen(d.buf, int(d.header.SystemNameSize))
   169  	if _, err = io.ReadFull(r, d.buf); err != nil {
   170  		return err
   171  	}
   172  	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen))
   173  	if err != nil {
   174  		return err
   175  	}
   176  	for i, v := range d.ints {
   177  		functions[i].SystemName = uint32(v)
   178  	}
   179  
   180  	d.buf = slices.GrowLen(d.buf, int(d.header.FileNameSize))
   181  	if _, err = io.ReadFull(r, d.buf); err != nil {
   182  		return err
   183  	}
   184  	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen))
   185  	if err != nil {
   186  		return err
   187  	}
   188  	for i, v := range d.ints {
   189  		functions[i].Filename = uint32(v)
   190  	}
   191  
   192  	d.buf = slices.GrowLen(d.buf, int(d.header.StartLineSize))
   193  	if _, err = io.ReadFull(r, d.buf); err != nil {
   194  		return err
   195  	}
   196  	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen))
   197  	if err != nil {
   198  		return err
   199  	}
   200  	for i, v := range d.ints {
   201  		functions[i].StartLine = uint32(v)
   202  	}
   203  
   204  	return nil
   205  }