github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/locations.go (about) 1 //nolint:unused 2 package symdb 3 4 import ( 5 "bytes" 6 "encoding/binary" 7 "fmt" 8 "hash/crc32" 9 "io" 10 "unsafe" 11 12 "github.com/parquet-go/parquet-go/encoding/delta" 13 14 v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 15 "github.com/grafana/pyroscope/pkg/slices" 16 ) 17 18 const maxLocationLines = 255 19 20 var ( 21 _ symbolsBlockEncoder[v1.InMemoryLocation] = (*locationsBlockEncoder)(nil) 22 _ symbolsBlockDecoder[v1.InMemoryLocation] = (*locationsBlockDecoder)(nil) 23 ) 24 25 type locationsBlockHeader struct { 26 LocationsLen uint32 // Number of locations 27 MappingSize uint32 // Size of the encoded slice of mapping_ids 28 LinesLen uint32 // Number of lines per location 29 LinesSize uint32 // Size of the encoded lines 30 // Optional, might be empty. 31 AddrSize uint32 // Size of the encoded slice of addresses 32 IsFoldedSize uint32 // Size of the encoded slice of is_folded 33 CRC uint32 // Header CRC. 34 } 35 36 func (h *locationsBlockHeader) marshal(b []byte) { 37 binary.BigEndian.PutUint32(b[0:4], h.LocationsLen) 38 binary.BigEndian.PutUint32(b[4:8], h.MappingSize) 39 binary.BigEndian.PutUint32(b[8:12], h.LinesLen) 40 binary.BigEndian.PutUint32(b[12:16], h.LinesSize) 41 binary.BigEndian.PutUint32(b[16:20], h.AddrSize) 42 binary.BigEndian.PutUint32(b[20:24], h.IsFoldedSize) 43 // Fields can be added here in the future. 44 // CRC must be the last four bytes. 45 h.CRC = crc32.Checksum(b[0:24], castagnoli) 46 binary.BigEndian.PutUint32(b[24:28], h.CRC) 47 } 48 49 func (h *locationsBlockHeader) unmarshal(b []byte) { 50 h.LocationsLen = binary.BigEndian.Uint32(b[0:4]) 51 h.MappingSize = binary.BigEndian.Uint32(b[4:8]) 52 h.LinesLen = binary.BigEndian.Uint32(b[8:12]) 53 h.LinesSize = binary.BigEndian.Uint32(b[12:16]) 54 h.AddrSize = binary.BigEndian.Uint32(b[16:20]) 55 h.IsFoldedSize = binary.BigEndian.Uint32(b[20:24]) 56 // In future versions, new fields are decoded here; 57 // if pos < len(b)-checksumSize, then there are more fields. 58 h.CRC = binary.BigEndian.Uint32(b[24:28]) 59 } 60 61 func (h *locationsBlockHeader) checksum() uint32 { return h.CRC } 62 63 type locationsBlockEncoder struct { 64 header locationsBlockHeader 65 66 mapping []int32 67 // Assuming there are no locations with more than 255 lines. 68 // We could even use a nibble (4 bits), but there are locations 69 // with 10 and more functions, therefore there is a change that 70 // capacity of 2^4 is not enough in all cases. 71 lineCount []byte 72 lines []int32 73 // Optional. 74 addr []int64 75 folded []bool 76 77 tmp []byte 78 buf bytes.Buffer 79 } 80 81 func newLocationsEncoder() *symbolsEncoder[v1.InMemoryLocation] { 82 return newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder)) 83 } 84 85 func (e *locationsBlockEncoder) format() SymbolsBlockFormat { return BlockLocationsV1 } 86 87 func (e *locationsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(locationsBlockHeader{}) } 88 89 func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error { 90 e.initWrite(len(locations)) 91 var addr uint64 92 var folded bool 93 for i, loc := range locations { 94 e.mapping[i] = int32(loc.MappingId) 95 e.lineCount[i] = byte(len(loc.Line)) 96 for j := 0; j < len(loc.Line) && j < maxLocationLines; j++ { 97 e.lines = append(e.lines, 98 int32(loc.Line[j].FunctionId), 99 loc.Line[j].Line) 100 } 101 addr |= loc.Address 102 e.addr[i] = int64(loc.Address) 103 folded = folded || loc.IsFolded 104 e.folded[i] = loc.IsFolded 105 } 106 107 // Mapping and line count per location. 108 var enc delta.BinaryPackedEncoding 109 e.tmp, _ = enc.EncodeInt32(e.tmp, e.mapping) 110 e.header.MappingSize = uint32(len(e.tmp)) 111 e.buf.Write(e.tmp) 112 // Line count size and length is deterministic. 113 e.buf.Write(e.lineCount) // Without any encoding. 114 115 // Lines slice size and length (in lines, not int32s). 116 e.tmp, _ = enc.EncodeInt32(e.tmp, e.lines) 117 e.header.LinesLen = uint32(len(e.lines) / 2) 118 e.header.LinesSize = uint32(len(e.tmp)) 119 e.buf.Write(e.tmp) 120 121 if addr > 0 { 122 e.tmp, _ = enc.EncodeInt64(e.tmp, e.addr) 123 e.header.AddrSize = uint32(len(e.tmp)) 124 e.buf.Write(e.tmp) 125 } 126 127 if folded { 128 e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8+1) 129 encodeBoolean(e.tmp, e.folded) 130 e.header.IsFoldedSize = uint32(len(e.tmp)) 131 e.buf.Write(e.tmp) 132 } 133 134 e.tmp = slices.GrowLen(e.tmp, int(e.headerSize())) 135 e.header.marshal(e.tmp) 136 if _, err := w.Write(e.tmp); err != nil { 137 return err 138 } 139 _, err := e.buf.WriteTo(w) 140 return err 141 } 142 143 func (e *locationsBlockEncoder) initWrite(locations int) { 144 // Actual estimate is ~6 bytes per location. 145 // In a large data set, the most expensive member 146 // is FunctionID, and it's about 2 bytes per location. 147 e.buf.Reset() 148 e.buf.Grow(locations * 8) 149 *e = locationsBlockEncoder{ 150 header: locationsBlockHeader{LocationsLen: uint32(locations)}, 151 152 mapping: slices.GrowLen(e.mapping, locations), 153 lineCount: slices.GrowLen(e.lineCount, locations), 154 lines: e.lines[:0], // Appendable. 155 addr: slices.GrowLen(e.addr, locations), 156 folded: slices.GrowLen(e.folded, locations), 157 158 buf: e.buf, 159 tmp: slices.GrowLen(e.tmp, 2*locations), 160 } 161 } 162 163 type locationsBlockDecoder struct { 164 headerSize uint16 165 header locationsBlockHeader 166 167 mappings []int32 168 lineCount []byte 169 lines []int32 170 171 address []int64 172 folded []bool 173 174 buf []byte 175 } 176 177 func newLocationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) { 178 if h.Format == BlockLocationsV1 { 179 headerSize := max(locationsBlockHeaderMinSize, h.BlockHeaderSize) 180 return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{headerSize: headerSize}), nil 181 } 182 return nil, fmt.Errorf("%w: unknown locations format: %d", ErrUnknownVersion, h.Format) 183 } 184 185 // In early versions, block header size is not specified. Must not change. 186 const locationsBlockHeaderMinSize = 28 187 188 func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocation) (err error) { 189 d.buf = slices.GrowLen(d.buf, int(d.headerSize)) 190 if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil { 191 return err 192 } 193 if d.header.LocationsLen != uint32(len(locations)) { 194 return fmt.Errorf("locations buffer: %w", ErrInvalidSize) 195 } 196 197 // First we decode mapping_id and assign them to locations. 198 d.buf = slices.GrowLen(d.buf, int(d.header.MappingSize)) 199 if _, err = io.ReadFull(r, d.buf); err != nil { 200 return err 201 } 202 d.mappings, err = decodeBinaryPackedInt32(d.mappings, d.buf, int(d.header.LocationsLen)) 203 if err != nil { 204 return err 205 } 206 207 // Line count per location. 208 // One byte per location. 209 d.lineCount = slices.GrowLen(d.lineCount, int(d.header.LocationsLen)) 210 if _, err = io.ReadFull(r, d.lineCount); err != nil { 211 return err 212 } 213 214 // Lines. A single slice backs all the location line 215 // sub-slices. But it has to be allocated as we can't 216 // reference d.lines, which is reusable. 217 lines := make([]v1.InMemoryLine, d.header.LinesLen) 218 d.buf = slices.GrowLen(d.buf, int(d.header.LinesSize)) 219 if _, err = io.ReadFull(r, d.buf); err != nil { 220 return err 221 } 222 // Lines are encoded as pairs of uint32 (function_id and line number). 223 d.lines, err = decodeBinaryPackedInt32(d.lines, d.buf, int(d.header.LinesLen)*2) 224 if err != nil { 225 return err 226 } 227 copy(lines, *(*[]v1.InMemoryLine)(unsafe.Pointer(&d.lines))) 228 229 // In most cases we end up here. 230 if d.header.AddrSize == 0 && d.header.IsFoldedSize == 0 { 231 var o int // Offset within the lines slice. 232 // In case if the block is malformed, an invalid 233 // line count may cause an out-of-bounds panic. 234 maxLines := len(lines) 235 for i := 0; i < len(locations); i++ { 236 locations[i].MappingId = uint32(d.mappings[i]) 237 n := o + int(d.lineCount[i]) 238 if n > maxLines { 239 return fmt.Errorf("%w: location lines out of bounds", ErrInvalidSize) 240 } 241 locations[i].Line = lines[o:n] 242 o = n 243 } 244 return nil 245 } 246 247 // Otherwise, inspect all the optional fields. 248 d.address = slices.GrowLen(d.address, int(d.header.LocationsLen)) 249 d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen)) 250 if int(d.header.AddrSize) > 0 { 251 d.buf = slices.GrowLen(d.buf, int(d.header.AddrSize)) 252 if _, err = io.ReadFull(r, d.buf); err != nil { 253 return err 254 } 255 d.address, err = decodeBinaryPackedInt64(d.address, d.buf, int(d.header.LocationsLen)) 256 if err != nil { 257 return err 258 } 259 } 260 if int(d.header.IsFoldedSize) > 0 { 261 d.buf = slices.GrowLen(d.buf, int(d.header.IsFoldedSize)) 262 if _, err = io.ReadFull(r, d.buf); err != nil { 263 return err 264 } 265 decodeBoolean(d.folded, d.buf) 266 } 267 268 var o int // Offset within the lines slice. 269 for i := uint32(0); i < d.header.LocationsLen; i++ { 270 locations[i].MappingId = uint32(d.mappings[i]) 271 n := o + int(d.lineCount[i]) 272 locations[i].Line = lines[o:n] 273 o = n 274 locations[i].Address = uint64(d.address[i]) 275 locations[i].IsFolded = d.folded[i] 276 } 277 278 return nil 279 } 280 281 func encodeBoolean(dst []byte, src []bool) { 282 for i := range dst { 283 dst[i] = 0 284 } 285 for i, b := range src { 286 if b { 287 dst[i>>3] |= 1 << i & 7 288 } 289 } 290 } 291 292 func decodeBoolean(dst []bool, src []byte) { 293 for i := range dst { 294 dst[i] = false 295 } 296 for i := range dst { 297 dst[i] = src[i>>3]&(1<<i&7) != 0 298 } 299 }