github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/strings.go (about)

     1  //nolint:unused
     2  package symdb
     3  
     4  import (
     5  	"bytes"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"hash/crc32"
     9  	"io"
    10  	"unsafe"
    11  
    12  	"github.com/grafana/pyroscope/pkg/slices"
    13  )
    14  
    15  const maxStringLen = 1<<16 - 1
    16  
    17  var (
    18  	_ symbolsBlockEncoder[string] = (*stringsBlockEncoder)(nil)
    19  	_ symbolsBlockDecoder[string] = (*stringsBlockDecoder)(nil)
    20  )
    21  
    22  type stringsBlockHeader struct {
    23  	StringsLen    uint32
    24  	BlockEncoding byte
    25  	_             [3]byte
    26  	CRC           uint32
    27  }
    28  
    29  func (h *stringsBlockHeader) marshal(b []byte) {
    30  	binary.BigEndian.PutUint32(b[0:4], h.StringsLen)
    31  	b[5], b[6], b[7], b[8] = h.BlockEncoding, 0, 0, 0
    32  	// Fields can be added here in the future.
    33  	// CRC must be the last four bytes.
    34  	h.CRC = crc32.Checksum(b[0:8], castagnoli)
    35  	binary.BigEndian.PutUint32(b[8:12], h.CRC)
    36  }
    37  
    38  func (h *stringsBlockHeader) unmarshal(b []byte) {
    39  	h.StringsLen = binary.BigEndian.Uint32(b[0:4])
    40  	h.BlockEncoding = b[5]
    41  	// In future versions, new fields are decoded here;
    42  	// if pos < len(b)-checksumSize, then there are more fields.
    43  	h.CRC = binary.BigEndian.Uint32(b[8:12])
    44  }
    45  
    46  func (h *stringsBlockHeader) checksum() uint32 { return h.CRC }
    47  
    48  type stringsBlockEncoder struct {
    49  	header stringsBlockHeader
    50  	buf    bytes.Buffer
    51  	tmp    []byte
    52  }
    53  
    54  func newStringsEncoder() *symbolsEncoder[string] {
    55  	return newSymbolsEncoder[string](new(stringsBlockEncoder))
    56  }
    57  
    58  func (e *stringsBlockEncoder) format() SymbolsBlockFormat { return BlockStringsV1 }
    59  
    60  func (e *stringsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(stringsBlockHeader{}) }
    61  
    62  func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error {
    63  	e.initWrite(len(strings))
    64  	e.header.BlockEncoding = e.blockEncoding(strings)
    65  	switch e.header.BlockEncoding {
    66  	case 8:
    67  		for j, s := range strings {
    68  			e.tmp[j] = byte(len(s))
    69  		}
    70  	case 16:
    71  		for j, s := range strings {
    72  			binary.BigEndian.PutUint16(e.tmp[j*2:], uint16(len(s)))
    73  		}
    74  	}
    75  	if _, err := e.buf.Write(e.tmp[:len(strings)*int(e.header.BlockEncoding)/8]); err != nil {
    76  		return err
    77  	}
    78  	for _, s := range strings {
    79  		if len(s) > maxStringLen {
    80  			s = s[:maxStringLen]
    81  		}
    82  		if _, err := e.buf.Write(*((*[]byte)(unsafe.Pointer(&s)))); err != nil {
    83  			return err
    84  		}
    85  	}
    86  	e.tmp = slices.GrowLen(e.tmp, int(e.headerSize()))
    87  	e.header.marshal(e.tmp)
    88  	if _, err := w.Write(e.tmp); err != nil {
    89  		return err
    90  	}
    91  	_, err := e.buf.WriteTo(w)
    92  	return err
    93  }
    94  
    95  func (e *stringsBlockEncoder) blockEncoding(b []string) byte {
    96  	for _, s := range b {
    97  		if len(s) > 255 {
    98  			return 16
    99  		}
   100  	}
   101  	return 8
   102  }
   103  
   104  func (e *stringsBlockEncoder) initWrite(strings int) {
   105  	e.buf.Reset()
   106  	e.buf.Grow(strings * 16)
   107  	*e = stringsBlockEncoder{
   108  		header: stringsBlockHeader{StringsLen: uint32(strings)},
   109  		tmp:    slices.GrowLen(e.tmp, strings*2),
   110  		buf:    e.buf,
   111  	}
   112  }
   113  
   114  type stringsBlockDecoder struct {
   115  	headerSize uint16
   116  	header     stringsBlockHeader
   117  	buf        []byte
   118  }
   119  
   120  func newStringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) {
   121  	if h.Format == BlockStringsV1 {
   122  		headerSize := max(stringsBlockHeaderMinSize, h.BlockHeaderSize)
   123  		return newSymbolsDecoder[string](h, &stringsBlockDecoder{headerSize: headerSize}), nil
   124  	}
   125  	return nil, fmt.Errorf("%w: unknown strings format: %d", ErrUnknownVersion, h.Format)
   126  }
   127  
   128  // In early versions, block header size is not specified. Must not change.
   129  const stringsBlockHeaderMinSize = 12
   130  
   131  func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) {
   132  	d.buf = slices.GrowLen(d.buf, int(d.headerSize))
   133  	if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil {
   134  		return err
   135  	}
   136  	if d.header.BlockEncoding != 8 && d.header.BlockEncoding != 16 {
   137  		return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding)
   138  	}
   139  	if d.header.StringsLen != uint32(len(strings)) {
   140  		return fmt.Errorf("invalid string buffer size")
   141  	}
   142  	if d.header.BlockEncoding == 8 {
   143  		return d.decodeStrings8(r, strings)
   144  	}
   145  	return d.decodeStrings16(r, strings)
   146  }
   147  
   148  func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err error) {
   149  	d.buf = slices.GrowLen(d.buf, len(dst)) // 1 byte per string.
   150  	if _, err = io.ReadFull(r, d.buf); err != nil {
   151  		return err
   152  	}
   153  	for i := 0; i < len(dst); i++ {
   154  		s := make([]byte, d.buf[i])
   155  		if _, err = io.ReadFull(r, s); err != nil {
   156  			return err
   157  		}
   158  		dst[i] = *(*string)(unsafe.Pointer(&s))
   159  	}
   160  	return err
   161  }
   162  
   163  func (d *stringsBlockDecoder) decodeStrings16(r io.Reader, dst []string) (err error) {
   164  	d.buf = slices.GrowLen(d.buf, len(dst)*2) // 2 bytes per string.
   165  	if _, err = io.ReadFull(r, d.buf); err != nil {
   166  		return err
   167  	}
   168  	for i := 0; i < len(dst); i++ {
   169  		l := binary.BigEndian.Uint16(d.buf[i*2:])
   170  		s := make([]byte, l)
   171  		if _, err = io.ReadFull(r, s); err != nil {
   172  			return err
   173  		}
   174  		dst[i] = *(*string)(unsafe.Pointer(&s))
   175  	}
   176  	return err
   177  }