github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/strings.go (about) 1 //nolint:unused 2 package symdb 3 4 import ( 5 "bytes" 6 "encoding/binary" 7 "fmt" 8 "hash/crc32" 9 "io" 10 "unsafe" 11 12 "github.com/grafana/pyroscope/pkg/slices" 13 ) 14 15 const maxStringLen = 1<<16 - 1 16 17 var ( 18 _ symbolsBlockEncoder[string] = (*stringsBlockEncoder)(nil) 19 _ symbolsBlockDecoder[string] = (*stringsBlockDecoder)(nil) 20 ) 21 22 type stringsBlockHeader struct { 23 StringsLen uint32 24 BlockEncoding byte 25 _ [3]byte 26 CRC uint32 27 } 28 29 func (h *stringsBlockHeader) marshal(b []byte) { 30 binary.BigEndian.PutUint32(b[0:4], h.StringsLen) 31 b[5], b[6], b[7], b[8] = h.BlockEncoding, 0, 0, 0 32 // Fields can be added here in the future. 33 // CRC must be the last four bytes. 34 h.CRC = crc32.Checksum(b[0:8], castagnoli) 35 binary.BigEndian.PutUint32(b[8:12], h.CRC) 36 } 37 38 func (h *stringsBlockHeader) unmarshal(b []byte) { 39 h.StringsLen = binary.BigEndian.Uint32(b[0:4]) 40 h.BlockEncoding = b[5] 41 // In future versions, new fields are decoded here; 42 // if pos < len(b)-checksumSize, then there are more fields. 43 h.CRC = binary.BigEndian.Uint32(b[8:12]) 44 } 45 46 func (h *stringsBlockHeader) checksum() uint32 { return h.CRC } 47 48 type stringsBlockEncoder struct { 49 header stringsBlockHeader 50 buf bytes.Buffer 51 tmp []byte 52 } 53 54 func newStringsEncoder() *symbolsEncoder[string] { 55 return newSymbolsEncoder[string](new(stringsBlockEncoder)) 56 } 57 58 func (e *stringsBlockEncoder) format() SymbolsBlockFormat { return BlockStringsV1 } 59 60 func (e *stringsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(stringsBlockHeader{}) } 61 62 func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error { 63 e.initWrite(len(strings)) 64 e.header.BlockEncoding = e.blockEncoding(strings) 65 switch e.header.BlockEncoding { 66 case 8: 67 for j, s := range strings { 68 e.tmp[j] = byte(len(s)) 69 } 70 case 16: 71 for j, s := range strings { 72 binary.BigEndian.PutUint16(e.tmp[j*2:], uint16(len(s))) 73 } 74 } 75 if _, err := e.buf.Write(e.tmp[:len(strings)*int(e.header.BlockEncoding)/8]); err != nil { 76 return err 77 } 78 for _, s := range strings { 79 if len(s) > maxStringLen { 80 s = s[:maxStringLen] 81 } 82 if _, err := e.buf.Write(*((*[]byte)(unsafe.Pointer(&s)))); err != nil { 83 return err 84 } 85 } 86 e.tmp = slices.GrowLen(e.tmp, int(e.headerSize())) 87 e.header.marshal(e.tmp) 88 if _, err := w.Write(e.tmp); err != nil { 89 return err 90 } 91 _, err := e.buf.WriteTo(w) 92 return err 93 } 94 95 func (e *stringsBlockEncoder) blockEncoding(b []string) byte { 96 for _, s := range b { 97 if len(s) > 255 { 98 return 16 99 } 100 } 101 return 8 102 } 103 104 func (e *stringsBlockEncoder) initWrite(strings int) { 105 e.buf.Reset() 106 e.buf.Grow(strings * 16) 107 *e = stringsBlockEncoder{ 108 header: stringsBlockHeader{StringsLen: uint32(strings)}, 109 tmp: slices.GrowLen(e.tmp, strings*2), 110 buf: e.buf, 111 } 112 } 113 114 type stringsBlockDecoder struct { 115 headerSize uint16 116 header stringsBlockHeader 117 buf []byte 118 } 119 120 func newStringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) { 121 if h.Format == BlockStringsV1 { 122 headerSize := max(stringsBlockHeaderMinSize, h.BlockHeaderSize) 123 return newSymbolsDecoder[string](h, &stringsBlockDecoder{headerSize: headerSize}), nil 124 } 125 return nil, fmt.Errorf("%w: unknown strings format: %d", ErrUnknownVersion, h.Format) 126 } 127 128 // In early versions, block header size is not specified. Must not change. 129 const stringsBlockHeaderMinSize = 12 130 131 func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) { 132 d.buf = slices.GrowLen(d.buf, int(d.headerSize)) 133 if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil { 134 return err 135 } 136 if d.header.BlockEncoding != 8 && d.header.BlockEncoding != 16 { 137 return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding) 138 } 139 if d.header.StringsLen != uint32(len(strings)) { 140 return fmt.Errorf("invalid string buffer size") 141 } 142 if d.header.BlockEncoding == 8 { 143 return d.decodeStrings8(r, strings) 144 } 145 return d.decodeStrings16(r, strings) 146 } 147 148 func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err error) { 149 d.buf = slices.GrowLen(d.buf, len(dst)) // 1 byte per string. 150 if _, err = io.ReadFull(r, d.buf); err != nil { 151 return err 152 } 153 for i := 0; i < len(dst); i++ { 154 s := make([]byte, d.buf[i]) 155 if _, err = io.ReadFull(r, s); err != nil { 156 return err 157 } 158 dst[i] = *(*string)(unsafe.Pointer(&s)) 159 } 160 return err 161 } 162 163 func (d *stringsBlockDecoder) decodeStrings16(r io.Reader, dst []string) (err error) { 164 d.buf = slices.GrowLen(d.buf, len(dst)*2) // 2 bytes per string. 165 if _, err = io.ReadFull(r, d.buf); err != nil { 166 return err 167 } 168 for i := 0; i < len(dst); i++ { 169 l := binary.BigEndian.Uint16(d.buf[i*2:]) 170 s := make([]byte, l) 171 if _, err = io.ReadFull(r, s); err != nil { 172 return err 173 } 174 dst[i] = *(*string)(unsafe.Pointer(&s)) 175 } 176 return err 177 }