github.com/grafana/pyroscope@v1.18.0/pkg/pprof/merge.go (about) 1 package pprof 2 3 import ( 4 "fmt" 5 "hash/maphash" 6 "sort" 7 "sync" 8 9 "github.com/dolthub/swiss" 10 11 profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 12 "github.com/grafana/pyroscope/pkg/slices" 13 ) 14 15 // TODO(kolesnikovae): 16 // Add a function that incorporates Merge and Normalize. 17 // Both functions perform some sanity checks but none of them 18 // is enough to "vet" the profile completely. 19 // Specifically: 20 // - it's possible that unreferenced objects will remain in the 21 // profile, and therefore will be written to the storage. 22 // - Normalize does not remove duplicates and unreferenced objects 23 // except samples. 24 // - Merge does not remove unreferenced objects at all. 25 // - Merge is fairly expensive: allocated capacities should be 26 // reused and the number of allocs decreased. 27 28 type ProfileMerge struct { 29 mu sync.Mutex 30 31 profile *profilev1.Profile 32 tmp []uint32 33 34 stringTable RewriteTable[string, string, string] 35 functionTable RewriteTable[FunctionKey, *profilev1.Function, *profilev1.Function] 36 mappingTable RewriteTable[MappingKey, *profilev1.Mapping, *profilev1.Mapping] 37 locationTable RewriteTable[LocationKey, *profilev1.Location, *profilev1.Location] 38 sampleTable RewriteTable[SampleKey, *profilev1.Sample, *profilev1.Sample] 39 } 40 41 // Merge adds p to the profile merge, cloning new objects. 42 // Profile p is modified in place but not retained by the function. 43 func (m *ProfileMerge) Merge(p *profilev1.Profile, sanitize bool) error { 44 m.mu.Lock() 45 defer m.mu.Unlock() 46 47 if p == nil || len(p.Sample) == 0 || len(p.StringTable) < 2 { 48 return nil 49 } 50 51 if sanitize { 52 var stats sanitizeStats 53 sanitizeProfile(p, &stats) 54 } 55 var initial bool 56 if m.profile == nil { 57 m.init(p) 58 initial = true 59 } 60 61 // We rewrite strings first in order to compare 62 // sample types and period type. 63 m.tmp = slices.GrowLen(m.tmp, len(p.StringTable)) 64 m.stringTable.Index(m.tmp, p.StringTable) 65 RewriteStrings(p, m.tmp) 66 if initial { 67 // Right after initialisation we need to make 68 // sure that the string identifiers are normalized 69 // among profiles. 70 RewriteStrings(m.profile, m.tmp) 71 } 72 73 if err := combineHeaders(m.profile, p); err != nil { 74 return err 75 } 76 77 m.tmp = slices.GrowLen(m.tmp, len(p.Function)) 78 m.functionTable.Index(m.tmp, p.Function) 79 RewriteFunctions(p, m.tmp) 80 81 m.tmp = slices.GrowLen(m.tmp, len(p.Mapping)) 82 m.mappingTable.Index(m.tmp, p.Mapping) 83 RewriteMappings(p, m.tmp) 84 85 m.tmp = slices.GrowLen(m.tmp, len(p.Location)) 86 m.locationTable.Index(m.tmp, p.Location) 87 RewriteLocations(p, m.tmp) 88 89 m.tmp = slices.GrowLen(m.tmp, len(p.Sample)) 90 m.sampleTable.Index(m.tmp, p.Sample) 91 92 for i, idx := range m.tmp { 93 dst := m.sampleTable.s[idx].Value 94 src := p.Sample[i].Value 95 for j, v := range src { 96 dst[j] += v 97 } 98 } 99 100 return nil 101 } 102 103 func (m *ProfileMerge) MergeBytes(b []byte, sanitize bool) error { 104 var p profilev1.Profile 105 if err := Unmarshal(b, &p); err != nil { 106 return err 107 } 108 return m.Merge(&p, sanitize) 109 } 110 111 func (m *ProfileMerge) Profile() *profilev1.Profile { 112 if m.profile == nil { 113 return &profilev1.Profile{ 114 SampleType: []*profilev1.ValueType{new(profilev1.ValueType)}, 115 PeriodType: new(profilev1.ValueType), 116 StringTable: []string{""}, 117 } 118 } 119 m.profile.Sample = m.sampleTable.Values() 120 m.profile.Location = m.locationTable.Values() 121 m.profile.Function = m.functionTable.Values() 122 m.profile.Mapping = m.mappingTable.Values() 123 m.profile.StringTable = m.stringTable.Values() 124 for i := range m.profile.Location { 125 m.profile.Location[i].Id = uint64(i + 1) 126 } 127 for i := range m.profile.Function { 128 m.profile.Function[i].Id = uint64(i + 1) 129 } 130 for i := range m.profile.Mapping { 131 m.profile.Mapping[i].Id = uint64(i + 1) 132 } 133 return m.profile 134 } 135 136 func (m *ProfileMerge) init(x *profilev1.Profile) { 137 factor := 2 138 m.stringTable = NewRewriteTable( 139 factor*len(x.StringTable), 140 func(s string) string { return s }, 141 func(s string) string { return s }, 142 ) 143 144 m.functionTable = NewRewriteTable[FunctionKey, *profilev1.Function, *profilev1.Function]( 145 factor*len(x.Function), GetFunctionKey, cloneVT[*profilev1.Function]) 146 147 m.mappingTable = NewRewriteTable[MappingKey, *profilev1.Mapping, *profilev1.Mapping]( 148 factor*len(x.Mapping), GetMappingKey, cloneVT[*profilev1.Mapping]) 149 150 m.locationTable = NewRewriteTable[LocationKey, *profilev1.Location, *profilev1.Location]( 151 factor*len(x.Location), GetLocationKey, cloneVT[*profilev1.Location]) 152 153 m.sampleTable = NewRewriteTable[SampleKey, *profilev1.Sample, *profilev1.Sample]( 154 factor*len(x.Sample), GetSampleKey, func(sample *profilev1.Sample) *profilev1.Sample { 155 c := sample.CloneVT() 156 slices.Clear(c.Value) 157 return c 158 }) 159 160 m.profile = &profilev1.Profile{ 161 SampleType: make([]*profilev1.ValueType, len(x.SampleType)), 162 DropFrames: x.DropFrames, 163 KeepFrames: x.KeepFrames, 164 TimeNanos: x.TimeNanos, 165 // Profile durations are summed up, therefore 166 // we skip the field at initialization. 167 // DurationNanos: x.DurationNanos, 168 PeriodType: x.PeriodType.CloneVT(), 169 Period: x.Period, 170 DefaultSampleType: x.DefaultSampleType, 171 } 172 for i, st := range x.SampleType { 173 m.profile.SampleType[i] = st.CloneVT() 174 } 175 } 176 177 func cloneVT[T interface{ CloneVT() T }](t T) T { return t.CloneVT() } 178 179 // combineHeaders checks that all profiles can be merged and returns 180 // their combined profile. 181 // NOTE(kolesnikovae): Copied from pprof. 182 func combineHeaders(a, b *profilev1.Profile) error { 183 if err := compatible(a, b); err != nil { 184 return err 185 } 186 // Smallest timestamp. 187 if a.TimeNanos == 0 || b.TimeNanos < a.TimeNanos { 188 a.TimeNanos = b.TimeNanos 189 } 190 // Summed up duration. 191 a.DurationNanos += b.DurationNanos 192 // Largest period. 193 if a.Period == 0 || a.Period < b.Period { 194 a.Period = b.Period 195 } 196 if a.DefaultSampleType == 0 { 197 a.DefaultSampleType = b.DefaultSampleType 198 } 199 return nil 200 } 201 202 // compatible determines if two profiles can be compared/merged. 203 // returns nil if the profiles are compatible; otherwise an error with 204 // details on the incompatibility. 205 func compatible(a, b *profilev1.Profile) error { 206 if !equalValueType(a.PeriodType, b.PeriodType) { 207 return fmt.Errorf("incompatible period types %v and %v", a.PeriodType, b.PeriodType) 208 } 209 if len(b.SampleType) != len(a.SampleType) { 210 return fmt.Errorf("incompatible sample types %v and %v", a.SampleType, b.SampleType) 211 } 212 for i := range a.SampleType { 213 if !equalValueType(a.SampleType[i], b.SampleType[i]) { 214 return fmt.Errorf("incompatible sample types %v and %v", a.SampleType, b.SampleType) 215 } 216 } 217 return nil 218 } 219 220 // equalValueType returns true if the two value types are semantically 221 // equal. It ignores the internal fields used during encode/decode. 222 func equalValueType(st1, st2 *profilev1.ValueType) bool { 223 if st1 == nil || st2 == nil { 224 return false 225 } 226 return st1.Type == st2.Type && st1.Unit == st2.Unit 227 } 228 229 func RewriteStrings(p *profilev1.Profile, n []uint32) { 230 for _, t := range p.SampleType { 231 if t.Unit != 0 { 232 t.Unit = int64(n[t.Unit]) 233 } 234 if t.Type != 0 { 235 t.Type = int64(n[t.Type]) 236 } 237 } 238 for _, s := range p.Sample { 239 for _, l := range s.Label { 240 l.Key = int64(n[l.Key]) 241 l.Str = int64(n[l.Str]) 242 } 243 } 244 for _, m := range p.Mapping { 245 m.Filename = int64(n[m.Filename]) 246 m.BuildId = int64(n[m.BuildId]) 247 } 248 for _, f := range p.Function { 249 f.Name = int64(n[f.Name]) 250 f.Filename = int64(n[f.Filename]) 251 f.SystemName = int64(n[f.SystemName]) 252 } 253 p.DropFrames = int64(n[p.DropFrames]) 254 p.KeepFrames = int64(n[p.KeepFrames]) 255 if p.PeriodType != nil { 256 if p.PeriodType.Type != 0 { 257 p.PeriodType.Type = int64(n[p.PeriodType.Type]) 258 } 259 if p.PeriodType.Unit != 0 { 260 p.PeriodType.Unit = int64(n[p.PeriodType.Unit]) 261 } 262 } 263 for i, x := range p.Comment { 264 p.Comment[i] = int64(n[x]) 265 } 266 p.DefaultSampleType = int64(n[p.DefaultSampleType]) 267 } 268 269 func RewriteFunctions(p *profilev1.Profile, n []uint32) { 270 for _, loc := range p.Location { 271 for _, line := range loc.Line { 272 if line.FunctionId > 0 { 273 line.FunctionId = uint64(n[line.FunctionId-1]) + 1 274 } 275 } 276 } 277 } 278 279 func RewriteMappings(p *profilev1.Profile, n []uint32) { 280 for _, loc := range p.Location { 281 if loc.MappingId > 0 { 282 loc.MappingId = uint64(n[loc.MappingId-1]) + 1 283 } 284 } 285 } 286 287 func RewriteLocations(p *profilev1.Profile, n []uint32) { 288 for _, s := range p.Sample { 289 for i, loc := range s.LocationId { 290 if loc > 0 { 291 s.LocationId[i] = uint64(n[loc-1]) + 1 292 } 293 } 294 } 295 } 296 297 type FunctionKey struct { 298 startLine uint32 299 name uint32 300 systemName uint32 301 fileName uint32 302 } 303 304 func GetFunctionKey(fn *profilev1.Function) FunctionKey { 305 return FunctionKey{ 306 startLine: uint32(fn.StartLine), 307 name: uint32(fn.Name), 308 systemName: uint32(fn.SystemName), 309 fileName: uint32(fn.Filename), 310 } 311 } 312 313 type MappingKey struct { 314 size uint64 315 offset uint64 316 buildIDOrFile int64 317 } 318 319 func GetMappingKey(m *profilev1.Mapping) MappingKey { 320 // NOTE(kolesnikovae): Copied from pprof. 321 // Normalize addresses to handle address space randomization. 322 // Round up to next 4K boundary to avoid minor discrepancies. 323 const mapsizeRounding = 0x1000 324 size := m.MemoryLimit - m.MemoryStart 325 size = size + mapsizeRounding - 1 326 size = size - (size % mapsizeRounding) 327 k := MappingKey{ 328 size: size, 329 offset: m.FileOffset, 330 } 331 switch { 332 case m.BuildId != 0: 333 k.buildIDOrFile = m.BuildId 334 case m.Filename != 0: 335 k.buildIDOrFile = m.Filename 336 default: 337 // A mapping containing neither build ID nor file name is a fake mapping. A 338 // key with empty buildIDOrFile is used for fake mappings so that they are 339 // treated as the same mapping during merging. 340 } 341 return k 342 } 343 344 type LocationKey struct { 345 addr uint64 346 lines uint64 347 mappingID uint64 348 } 349 350 func GetLocationKey(loc *profilev1.Location) LocationKey { 351 return LocationKey{ 352 addr: loc.Address, 353 mappingID: loc.MappingId, 354 lines: hashLines(loc.Line), 355 } 356 } 357 358 type SampleKey struct { 359 locations uint64 360 labels uint64 361 } 362 363 func GetSampleKey(s *profilev1.Sample) SampleKey { 364 return SampleKey{ 365 locations: hashLocations(s.LocationId), 366 labels: hashLabels(s.Label), 367 } 368 } 369 370 var mapHashSeed = maphash.MakeSeed() 371 372 // NOTE(kolesnikovae): 373 // Probably we should use strings instead of hashes 374 // to eliminate collisions. 375 376 func hashLocations(s []uint64) uint64 { 377 return maphash.Bytes(mapHashSeed, uint64Bytes(s)) 378 } 379 380 func hashLines(s []*profilev1.Line) uint64 { 381 x := make([]uint64, len(s)) 382 for i, l := range s { 383 x[i] = l.FunctionId | uint64(l.Line)<<32 384 } 385 return maphash.Bytes(mapHashSeed, uint64Bytes(x)) 386 } 387 388 func hashLabels(s []*profilev1.Label) uint64 { 389 if len(s) == 0 { 390 return 0 391 } 392 sort.Sort(LabelsByKeyValue(s)) 393 x := make([]uint64, len(s)) 394 for i, l := range s { 395 // Num and Unit ignored. 396 x[i] = uint64(l.Key | l.Str<<32) 397 } 398 return maphash.Bytes(mapHashSeed, uint64Bytes(x)) 399 } 400 401 // RewriteTable maintains unique values V and their indices. 402 // V is never modified nor retained, K and M are kept in memory. 403 type RewriteTable[K comparable, V, M any] struct { 404 k func(V) K 405 v func(V) M 406 t *swiss.Map[K, uint32] 407 s []M 408 } 409 410 func NewRewriteTable[K comparable, V, M any]( 411 size int, 412 k func(V) K, 413 v func(V) M, 414 ) RewriteTable[K, V, M] { 415 return RewriteTable[K, V, M]{ 416 k: k, 417 v: v, 418 t: swiss.NewMap[K, uint32](uint32(size)), 419 s: make([]M, 0, size), 420 } 421 } 422 423 func (t *RewriteTable[K, V, M]) Index(dst []uint32, values []V) { 424 for i, value := range values { 425 k := t.k(value) 426 n, found := t.t.Get(k) 427 if !found { 428 n = uint32(len(t.s)) 429 t.s = append(t.s, t.v(value)) 430 t.t.Put(k, n) 431 } 432 dst[i] = n 433 } 434 } 435 436 func (t *RewriteTable[K, V, M]) Append(values []V) { 437 for _, value := range values { 438 k := t.k(value) 439 n := uint32(len(t.s)) 440 t.s = append(t.s, t.v(value)) 441 t.t.Put(k, n) 442 } 443 } 444 445 func (t *RewriteTable[K, V, M]) Values() []M { return t.s }