github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/dedup_slice.go (about) 1 //nolint:unused,unparam 2 package symdb 3 4 import ( 5 "fmt" 6 "hash/maphash" 7 stdslices "slices" 8 "sort" 9 "sync" 10 "unsafe" 11 12 "github.com/colega/zeropool" 13 "go.uber.org/atomic" 14 15 profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 16 schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 17 "github.com/grafana/pyroscope/pkg/pprof" 18 "github.com/grafana/pyroscope/pkg/slices" 19 ) 20 21 // Refactored as is from the phlaredb package. 22 23 var ( 24 int64SlicePool zeropool.Pool[[]int64] 25 uint32SlicePool zeropool.Pool[[]uint32] 26 ) 27 28 // TODO(kolesnikovae): 29 // - PartitionWriter should only rewrite profile symbol indices; 30 // - InMemoryProfile should be created somewhere else on the call side. 31 32 func (p *PartitionWriter) WriteProfileSymbols(profile *profilev1.Profile) []schemav1.InMemoryProfile { 33 // create a rewriter state 34 rewrites := &rewriter{} 35 36 spans := pprof.ProfileSpans(profile) 37 pprof.ZeroLabelStrings(profile) 38 39 p.strings.ingest(profile.StringTable, rewrites) 40 mappings := make([]schemav1.InMemoryMapping, len(profile.Mapping)) 41 for i, v := range profile.Mapping { 42 mappings[i] = schemav1.InMemoryMapping{ 43 Id: v.Id, 44 MemoryStart: v.MemoryStart, 45 MemoryLimit: v.MemoryLimit, 46 FileOffset: v.FileOffset, 47 Filename: uint32(v.Filename), 48 BuildId: uint32(v.BuildId), 49 HasFunctions: v.HasFunctions, 50 HasFilenames: v.HasFilenames, 51 HasLineNumbers: v.HasLineNumbers, 52 HasInlineFrames: v.HasInlineFrames, 53 } 54 } 55 56 p.mappings.ingest(mappings, rewrites) 57 funcs := make([]schemav1.InMemoryFunction, len(profile.Function)) 58 for i, v := range profile.Function { 59 funcs[i] = schemav1.InMemoryFunction{ 60 Id: v.Id, 61 Name: uint32(v.Name), 62 SystemName: uint32(v.SystemName), 63 Filename: uint32(v.Filename), 64 StartLine: uint32(v.StartLine), 65 } 66 } 67 68 p.functions.ingest(funcs, rewrites) 69 locs := make([]schemav1.InMemoryLocation, len(profile.Location)) 70 for i, v := range profile.Location { 71 x := schemav1.InMemoryLocation{ 72 Id: v.Id, 73 Address: v.Address, 74 MappingId: uint32(v.MappingId), 75 IsFolded: v.IsFolded, 76 } 77 x.Line = make([]schemav1.InMemoryLine, len(v.Line)) 78 for j, line := range v.Line { 79 x.Line[j] = schemav1.InMemoryLine{ 80 FunctionId: uint32(line.FunctionId), 81 Line: int32(line.Line), 82 } 83 } 84 locs[i] = x 85 } 86 87 p.locations.ingest(locs, rewrites) 88 samplesPerType := p.convertSamples(rewrites, profile.Sample, spans) 89 90 profiles := make([]schemav1.InMemoryProfile, len(samplesPerType)) 91 for idxType := range samplesPerType { 92 profiles[idxType] = schemav1.InMemoryProfile{ 93 StacktracePartition: p.header.Partition, 94 Samples: samplesPerType[idxType], 95 DropFrames: profile.DropFrames, 96 KeepFrames: profile.KeepFrames, 97 TimeNanos: profile.TimeNanos, 98 DurationNanos: profile.DurationNanos, 99 Comments: copySlice(profile.Comment), 100 DefaultSampleType: profile.DefaultSampleType, 101 } 102 } 103 104 return profiles 105 } 106 107 func (p *PartitionWriter) convertSamples(r *rewriter, in []*profilev1.Sample, spans []uint64) []schemav1.Samples { 108 if len(in) == 0 { 109 return nil 110 } 111 112 // populate output 113 var ( 114 samplesByType = make([]schemav1.Samples, len(in[0].Value)) 115 stacktraces = make([]*schemav1.Stacktrace, len(in)) 116 ) 117 118 for i := range samplesByType { 119 s := schemav1.Samples{ 120 Values: make([]uint64, len(in)), 121 StacktraceIDs: make([]uint32, len(in)), 122 } 123 if len(spans) > 0 { 124 s.Spans = make([]uint64, len(spans)) 125 copy(s.Spans, spans) 126 } 127 samplesByType[i] = s 128 } 129 130 for idxSample := range in { 131 // populate samples 132 src := in[idxSample] 133 for idxType := range samplesByType { 134 samplesByType[idxType].Values[idxSample] = uint64(src.Value[idxType]) 135 } 136 stacktraces[idxSample] = &schemav1.Stacktrace{LocationIDs: src.LocationId} 137 for i := range stacktraces[idxSample].LocationIDs { 138 r.locations.rewriteUint64(&stacktraces[idxSample].LocationIDs[i]) 139 } 140 } 141 142 stacktracesIds := slices.GrowLen(uint32SlicePool.Get(), len(stacktraces)) 143 p.stacktraces.append(stacktracesIds, stacktraces) 144 145 // Rewrite stacktraces 146 for idxType := range samplesByType { 147 samples := samplesByType[idxType] 148 for i := range samples.StacktraceIDs { 149 samples.StacktraceIDs[i] = stacktracesIds[i] 150 } 151 samples = samples.Compact(false) 152 sort.Sort(samples) 153 samplesByType[idxType] = samples 154 } 155 156 uint32SlicePool.Put(stacktracesIds) 157 return samplesByType 158 } 159 160 func copySlice[T any](in []T) []T { 161 out := make([]T, len(in)) 162 copy(out, in) 163 return out 164 } 165 166 type idConversionTable map[int64]int64 167 168 // nolint unused 169 func (t idConversionTable) rewrite(idx *int64) { 170 pos := *idx 171 var ok bool 172 *idx, ok = t[pos] 173 if !ok { 174 panic(fmt.Sprintf("unable to rewrite index %d", pos)) 175 } 176 } 177 178 // nolint unused 179 func (t idConversionTable) rewriteUint64(idx *uint64) { 180 pos := *idx 181 v, ok := t[int64(pos)] 182 if !ok { 183 panic(fmt.Sprintf("unable to rewrite index %d", pos)) 184 } 185 *idx = uint64(v) 186 } 187 188 // nolint unused 189 func (t idConversionTable) rewriteUint32(idx *uint32) { 190 pos := *idx 191 v, ok := t[int64(pos)] 192 if !ok { 193 panic(fmt.Sprintf("unable to rewrite index %d", pos)) 194 } 195 *idx = uint32(v) 196 } 197 198 func emptyRewriter() *rewriter { 199 return &rewriter{ 200 strings: []int64{0}, 201 } 202 } 203 204 // rewriter contains slices to rewrite the per profile reference into per head references. 205 type rewriter struct { 206 strings stringConversionTable 207 // nolint unused 208 functions idConversionTable 209 // nolint unused 210 mappings idConversionTable 211 // nolint unused 212 locations idConversionTable 213 } 214 215 type storeHelper[M schemav1.Models] interface { 216 // some Models contain their own IDs within the struct, this allows to set them and keep track of the preexisting ID. It should return the oldID that is supposed to be rewritten. 217 setID(existingSliceID uint64, newID uint64, element *M) uint64 218 219 // size returns a (rough estimation) of the size of a single element M 220 size(M) uint64 221 222 // clone copies parts that are not optimally sized from protobuf parsing 223 clone(M) M 224 225 rewrite(*rewriter, *M) error 226 } 227 228 type Helper[M schemav1.Models, K comparable] interface { 229 storeHelper[M] 230 key(M) K 231 addToRewriter(*rewriter, idConversionTable) 232 } 233 234 type deduplicatingSlice[M schemav1.Models, K comparable, H Helper[M, K]] struct { 235 lock sync.RWMutex 236 slice []M 237 size atomic.Uint64 238 lookup map[K]int64 239 240 helper H 241 } 242 243 func (s *deduplicatingSlice[M, K, H]) init() { 244 s.lookup = make(map[K]int64) 245 } 246 247 func (s *deduplicatingSlice[M, K, H]) MemorySize() uint64 { 248 // FIXME(kolesnikovae): Map footprint + slice capacity. 249 return s.size.Load() 250 } 251 252 func (s *deduplicatingSlice[M, K, H]) Size() uint64 { 253 return s.size.Load() 254 } 255 256 func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) { 257 var ( 258 rewritingMap = make(map[int64]int64, len(elems)) 259 missing = int64SlicePool.Get() 260 ) 261 missing = missing[:0] 262 // rewrite elements 263 for pos := range elems { 264 _ = s.helper.rewrite(rewriter, &elems[pos]) 265 } 266 267 // try to find if element already exists in slice, when supposed to deduplicate 268 s.lock.RLock() 269 for pos := range elems { 270 k := s.helper.key(elems[pos]) 271 if posSlice, exists := s.lookup[k]; exists { 272 rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice 273 } else { 274 missing = append(missing, int64(pos)) 275 } 276 } 277 s.lock.RUnlock() 278 279 // if there are missing elements, acquire write lock 280 if len(missing) > 0 { 281 s.lock.Lock() 282 posSlice := int64(len(s.slice)) 283 s.slice = stdslices.Grow(s.slice, len(missing)) 284 for _, pos := range missing { 285 // check again if element exists 286 k := s.helper.key(elems[pos]) 287 if posSlice, exists := s.lookup[k]; exists { 288 rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice 289 continue 290 } 291 292 // add element to slice/map 293 s.slice = append(s.slice, s.helper.clone(elems[pos])) 294 s.lookup[k] = posSlice 295 rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice 296 posSlice++ 297 s.size.Add(s.helper.size(elems[pos])) 298 } 299 s.lock.Unlock() 300 } 301 302 // nolint staticcheck 303 int64SlicePool.Put(missing) 304 305 // add rewrite information to struct 306 s.helper.addToRewriter(rewriter, rewritingMap) 307 } 308 309 func (s *deduplicatingSlice[M, K, H]) append(dst []uint32, elems []M) { 310 missing := int64SlicePool.Get()[:0] 311 s.lock.RLock() 312 for i, v := range elems { 313 k := s.helper.key(v) 314 if x, ok := s.lookup[k]; ok { 315 dst[i] = uint32(x) 316 } else { 317 missing = append(missing, int64(i)) 318 } 319 } 320 s.lock.RUnlock() 321 if len(missing) > 0 { 322 s.lock.RLock() 323 p := uint32(len(s.slice)) 324 for _, i := range missing { 325 e := elems[i] 326 k := s.helper.key(e) 327 x, ok := s.lookup[k] 328 if ok { 329 dst[i] = uint32(x) 330 continue 331 } 332 s.size.Add(s.helper.size(e)) 333 s.slice = append(s.slice, s.helper.clone(e)) 334 s.lookup[k] = int64(p) 335 dst[i] = p 336 p++ 337 } 338 s.lock.RUnlock() 339 } 340 int64SlicePool.Put(missing) 341 } 342 343 func (s *deduplicatingSlice[M, K, H]) sliceHeaderCopy() []M { 344 s.lock.RLock() 345 h := s.slice 346 s.lock.RUnlock() 347 return h 348 } 349 350 type stringConversionTable []int64 351 352 func (t stringConversionTable) rewrite(idx *int64) { 353 originalValue := int(*idx) 354 newValue := t[originalValue] 355 *idx = newValue 356 } 357 358 func (t stringConversionTable) rewriteUint32(idx *uint32) { 359 originalValue := int(*idx) 360 newValue := t[originalValue] 361 *idx = uint32(newValue) 362 } 363 364 type stringsHelper struct{} 365 366 func (*stringsHelper) key(s string) string { 367 return s 368 } 369 370 func (*stringsHelper) addToRewriter(r *rewriter, m idConversionTable) { 371 var maxID int64 372 for id := range m { 373 if id > maxID { 374 maxID = id 375 } 376 } 377 r.strings = make(stringConversionTable, maxID+1) 378 379 for x, y := range m { 380 r.strings[x] = y 381 } 382 } 383 384 // nolint unused 385 func (*stringsHelper) rewrite(*rewriter, *string) error { 386 return nil 387 } 388 389 func (*stringsHelper) size(s string) uint64 { 390 return uint64(len(s)) 391 } 392 393 func (*stringsHelper) setID(oldID, newID uint64, s *string) uint64 { 394 return oldID 395 } 396 397 func (*stringsHelper) clone(s string) string { 398 return s 399 } 400 401 type locationsKey struct { 402 MappingId uint32 //nolint 403 Address uint64 404 LinesHash uint64 405 } 406 407 const ( 408 lineSize = uint64(unsafe.Sizeof(schemav1.InMemoryLine{})) 409 locationSize = uint64(unsafe.Sizeof(schemav1.InMemoryLocation{})) 410 ) 411 412 type locationsHelper struct{} 413 414 func (*locationsHelper) key(l schemav1.InMemoryLocation) locationsKey { 415 return locationsKey{ 416 Address: l.Address, 417 MappingId: l.MappingId, 418 LinesHash: hashLines(l.Line), 419 } 420 } 421 422 var mapHashSeed = maphash.MakeSeed() 423 424 func hashLines(s []schemav1.InMemoryLine) uint64 { 425 if len(s) == 0 { 426 return 0 427 } 428 p := (*byte)(unsafe.Pointer(&s[0])) 429 b := unsafe.Slice(p, len(s)*int(lineSize)) 430 return maphash.Bytes(mapHashSeed, b) 431 } 432 433 func hashLocations(s []uint64) uint64 { 434 if len(s) == 0 { 435 return 0 436 } 437 p := (*byte)(unsafe.Pointer(&s[0])) 438 b := unsafe.Slice(p, len(s)*8) 439 return maphash.Bytes(mapHashSeed, b) 440 } 441 442 func (*locationsHelper) addToRewriter(r *rewriter, elemRewriter idConversionTable) { 443 r.locations = elemRewriter 444 } 445 446 func (*locationsHelper) rewrite(r *rewriter, l *schemav1.InMemoryLocation) error { 447 // when mapping id is not 0, rewrite it 448 if l.MappingId != 0 { 449 r.mappings.rewriteUint32(&l.MappingId) 450 } 451 for pos := range l.Line { 452 r.functions.rewriteUint32(&l.Line[pos].FunctionId) 453 } 454 return nil 455 } 456 457 func (*locationsHelper) setID(_, newID uint64, l *schemav1.InMemoryLocation) uint64 { 458 oldID := l.Id 459 l.Id = newID 460 return oldID 461 } 462 463 func (*locationsHelper) size(l schemav1.InMemoryLocation) uint64 { 464 return uint64(len(l.Line))*lineSize + locationSize 465 } 466 467 func (*locationsHelper) clone(l schemav1.InMemoryLocation) schemav1.InMemoryLocation { 468 x := l 469 x.Line = make([]schemav1.InMemoryLine, len(l.Line)) 470 copy(x.Line, l.Line) 471 return x 472 } 473 474 type mappingsHelper struct{} 475 476 const mappingSize = uint64(unsafe.Sizeof(schemav1.InMemoryMapping{})) 477 478 type mappingsKey struct { 479 MemoryStart uint64 480 MemoryLimit uint64 481 FileOffset uint64 482 Filename uint32 // Index into string table 483 BuildId uint32 // Index into string table 484 HasFunctions bool 485 HasFilenames bool 486 HasLineNumbers bool 487 HasInlineFrames bool 488 } 489 490 func (*mappingsHelper) key(m schemav1.InMemoryMapping) mappingsKey { 491 return mappingsKey{ 492 MemoryStart: m.MemoryStart, 493 MemoryLimit: m.MemoryLimit, 494 FileOffset: m.FileOffset, 495 Filename: m.Filename, 496 BuildId: m.BuildId, 497 HasFunctions: m.HasFunctions, 498 HasFilenames: m.HasFilenames, 499 HasLineNumbers: m.HasLineNumbers, 500 HasInlineFrames: m.HasInlineFrames, 501 } 502 } 503 504 func (*mappingsHelper) addToRewriter(r *rewriter, elemRewriter idConversionTable) { 505 r.mappings = elemRewriter 506 } 507 508 // nolint unparam 509 func (*mappingsHelper) rewrite(r *rewriter, m *schemav1.InMemoryMapping) error { 510 r.strings.rewriteUint32(&m.Filename) 511 r.strings.rewriteUint32(&m.BuildId) 512 return nil 513 } 514 515 func (*mappingsHelper) setID(_, newID uint64, m *schemav1.InMemoryMapping) uint64 { 516 oldID := m.Id 517 m.Id = newID 518 return oldID 519 } 520 521 func (*mappingsHelper) size(_ schemav1.InMemoryMapping) uint64 { 522 return mappingSize 523 } 524 525 func (*mappingsHelper) clone(m schemav1.InMemoryMapping) schemav1.InMemoryMapping { 526 return m 527 } 528 529 type functionsKey struct { 530 Name uint32 531 SystemName uint32 532 Filename uint32 533 StartLine uint32 534 } 535 536 type functionsHelper struct{} 537 538 const functionSize = uint64(unsafe.Sizeof(schemav1.InMemoryFunction{})) 539 540 func (*functionsHelper) key(f schemav1.InMemoryFunction) functionsKey { 541 return functionsKey{ 542 Name: f.Name, 543 SystemName: f.SystemName, 544 Filename: f.Filename, 545 StartLine: f.StartLine, 546 } 547 } 548 549 func (*functionsHelper) addToRewriter(r *rewriter, elemRewriter idConversionTable) { 550 r.functions = elemRewriter 551 } 552 553 func (*functionsHelper) rewrite(r *rewriter, f *schemav1.InMemoryFunction) error { 554 r.strings.rewriteUint32(&f.Filename) 555 r.strings.rewriteUint32(&f.Name) 556 r.strings.rewriteUint32(&f.SystemName) 557 return nil 558 } 559 560 func (*functionsHelper) setID(_, newID uint64, f *schemav1.InMemoryFunction) uint64 { 561 oldID := f.Id 562 f.Id = newID 563 return oldID 564 } 565 566 func (*functionsHelper) size(_ schemav1.InMemoryFunction) uint64 { 567 return functionSize 568 } 569 570 func (*functionsHelper) clone(f schemav1.InMemoryFunction) schemav1.InMemoryFunction { 571 return f 572 }