github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/rewriter.go (about) 1 package symdb 2 3 import ( 4 "context" 5 "math" 6 "sort" 7 8 lru "github.com/hashicorp/golang-lru/v2" 9 10 schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 11 "github.com/grafana/pyroscope/pkg/slices" 12 ) 13 14 type Rewriter struct { 15 symdb *SymDB 16 source SymbolsReader 17 partitions *lru.Cache[uint64, *partitionRewriter] 18 observer SymbolsObserver 19 } 20 21 type SymbolsObserver interface { 22 // ObserveSymbols is called once new symbols have been rewritten. This method must not modify the symbols. 23 // When using within a SampleObserver, Evaluate should be called first 24 ObserveSymbols(strings []string, functions []schemav1.InMemoryFunction, locations []schemav1.InMemoryLocation, 25 stacktraceValues [][]int32, stacktraceIds []uint32) 26 } 27 28 func NewRewriter(w *SymDB, r SymbolsReader, o SymbolsObserver) *Rewriter { 29 return &Rewriter{ 30 source: r, 31 symdb: w, 32 observer: o, 33 } 34 } 35 36 func (r *Rewriter) Rewrite(partition uint64, stacktraces []uint32) error { 37 p, err := r.init(partition) 38 if err != nil { 39 return err 40 } 41 if err = p.populateUnresolved(stacktraces); err != nil { 42 return err 43 } 44 if p.hasUnresolved() { 45 return p.appendRewrite(stacktraces) 46 } 47 return nil 48 } 49 50 func (r *Rewriter) init(partition uint64) (p *partitionRewriter, err error) { 51 if r.partitions == nil { 52 r.partitions, _ = lru.NewWithEvict(2, func(_ uint64, p *partitionRewriter) { 53 p.reader.Release() 54 }) 55 } 56 return r.getOrCreatePartitionRewriter(partition) 57 } 58 59 func (r *Rewriter) getOrCreatePartitionRewriter(partition uint64) (_ *partitionRewriter, err error) { 60 p, ok := r.partitions.Get(partition) 61 if ok { 62 p.reset() 63 return p, nil 64 } 65 pr, err := r.newRewriter(partition) 66 if err != nil { 67 return nil, err 68 } 69 r.partitions.Add(partition, pr) 70 return pr, nil 71 } 72 73 func (r *Rewriter) newRewriter(p uint64) (*partitionRewriter, error) { 74 n := &partitionRewriter{name: p} 75 reader, err := r.source.Partition(context.TODO(), p) 76 if err != nil { 77 return nil, err 78 } 79 n.reader = reader 80 n.dst = r.symdb.PartitionWriter(p) 81 // We clone locations, functions, and mappings, 82 // because these object will be modified. 83 n.src = cloneSymbolsPartially(reader.Symbols()) 84 var stats PartitionStats 85 reader.WriteStats(&stats) 86 n.stacktraces = newLookupTable[[]int32](stats.MaxStacktraceID) 87 n.locations = newLookupTable[schemav1.InMemoryLocation](stats.LocationsTotal) 88 n.mappings = newLookupTable[schemav1.InMemoryMapping](stats.MappingsTotal) 89 n.functions = newLookupTable[schemav1.InMemoryFunction](stats.FunctionsTotal) 90 n.strings = newLookupTable[string](stats.StringsTotal) 91 n.observer = r.observer 92 return n, nil 93 } 94 95 type partitionRewriter struct { 96 name uint64 97 src *Symbols 98 dst *PartitionWriter 99 reader PartitionReader 100 101 stacktraces *lookupTable[[]int32] 102 locations *lookupTable[schemav1.InMemoryLocation] 103 mappings *lookupTable[schemav1.InMemoryMapping] 104 functions *lookupTable[schemav1.InMemoryFunction] 105 strings *lookupTable[string] 106 current []*schemav1.Stacktrace 107 108 observer SymbolsObserver 109 } 110 111 func (p *partitionRewriter) reset() { 112 p.stacktraces.reset() 113 p.locations.reset() 114 p.mappings.reset() 115 p.functions.reset() 116 p.strings.reset() 117 p.current = p.current[:0] 118 } 119 120 func (p *partitionRewriter) hasUnresolved() bool { 121 return len(p.stacktraces.unresolved)+ 122 len(p.locations.unresolved)+ 123 len(p.mappings.unresolved)+ 124 len(p.functions.unresolved)+ 125 len(p.strings.unresolved) > 0 126 } 127 128 func (p *partitionRewriter) populateUnresolved(stacktraceIDs []uint32) error { 129 // Filter out all stack traces that have been already 130 // resolved and populate locations lookup table. 131 if err := p.resolveStacktraces(stacktraceIDs); err != nil { 132 return err 133 } 134 if len(p.locations.unresolved) == 0 { 135 return nil 136 } 137 138 // Resolve functions and mappings for new locations. 139 unresolvedLocs := p.locations.iter() 140 for unresolvedLocs.Next() { 141 location := p.src.Locations[unresolvedLocs.At()] 142 location.MappingId = p.mappings.tryLookup(location.MappingId) 143 if len(p.src.Functions) == 0 { 144 location.Line = nil 145 continue 146 } 147 for j, line := range location.Line { 148 location.Line[j].FunctionId = p.functions.tryLookup(line.FunctionId) 149 } 150 unresolvedLocs.setValue(location) 151 } 152 153 // Resolve strings. 154 unresolvedMappings := p.mappings.iter() 155 for unresolvedMappings.Next() { 156 mapping := p.src.Mappings[unresolvedMappings.At()] 157 mapping.BuildId = p.strings.tryLookup(mapping.BuildId) 158 mapping.Filename = p.strings.tryLookup(mapping.Filename) 159 unresolvedMappings.setValue(mapping) 160 } 161 162 unresolvedFunctions := p.functions.iter() 163 for unresolvedFunctions.Next() { 164 function := p.src.Functions[unresolvedFunctions.At()] 165 function.Name = p.strings.tryLookup(function.Name) 166 function.Filename = p.strings.tryLookup(function.Filename) 167 function.SystemName = p.strings.tryLookup(function.SystemName) 168 unresolvedFunctions.setValue(function) 169 } 170 171 unresolvedStrings := p.strings.iter() 172 for unresolvedStrings.Next() { 173 unresolvedStrings.setValue(p.src.Strings[unresolvedStrings.At()]) 174 } 175 176 return nil 177 } 178 179 func (p *partitionRewriter) appendRewrite(stacktraces []uint32) error { 180 p.dst.AppendStrings(p.strings.buf, p.strings.values) 181 p.strings.updateResolved() 182 183 for i := range p.functions.values { 184 p.functions.values[i].Name = p.strings.lookupResolved(p.functions.values[i].Name) 185 p.functions.values[i].Filename = p.strings.lookupResolved(p.functions.values[i].Filename) 186 p.functions.values[i].SystemName = p.strings.lookupResolved(p.functions.values[i].SystemName) 187 } 188 p.dst.AppendFunctions(p.functions.buf, p.functions.values) 189 p.functions.updateResolved() 190 191 for i := range p.mappings.values { 192 p.mappings.values[i].BuildId = p.strings.lookupResolved(p.mappings.values[i].BuildId) 193 p.mappings.values[i].Filename = p.strings.lookupResolved(p.mappings.values[i].Filename) 194 } 195 p.dst.AppendMappings(p.mappings.buf, p.mappings.values) 196 p.mappings.updateResolved() 197 198 for i := range p.locations.values { 199 p.locations.values[i].MappingId = p.mappings.lookupResolved(p.locations.values[i].MappingId) 200 for j, line := range p.locations.values[i].Line { 201 p.locations.values[i].Line[j].FunctionId = p.functions.lookupResolved(line.FunctionId) 202 } 203 } 204 p.dst.AppendLocations(p.locations.buf, p.locations.values) 205 p.locations.updateResolved() 206 207 for _, v := range p.stacktraces.values { 208 for j, location := range v { 209 v[j] = int32(p.locations.lookupResolved(uint32(location))) 210 } 211 } 212 p.dst.AppendStacktraces(p.stacktraces.buf, p.stacktracesFromResolvedValues()) 213 p.stacktraces.updateResolved() 214 215 for i, v := range stacktraces { 216 stacktraces[i] = p.stacktraces.lookupResolved(v) 217 } 218 219 if p.observer != nil { 220 p.observer.ObserveSymbols(p.dst.strings.slice, p.dst.functions.slice, p.dst.locations.slice, p.stacktraces.values, p.stacktraces.buf) 221 } 222 223 return nil 224 } 225 226 func (p *partitionRewriter) resolveStacktraces(stacktraceIDs []uint32) error { 227 for i, v := range stacktraceIDs { 228 stacktraceIDs[i] = p.stacktraces.tryLookup(v) 229 } 230 if len(p.stacktraces.unresolved) == 0 { 231 return nil 232 } 233 p.stacktraces.initSorted() 234 return p.src.Stacktraces.ResolveStacktraceLocations( 235 context.Background(), p, p.stacktraces.buf) 236 } 237 238 func (p *partitionRewriter) stacktracesFromResolvedValues() []*schemav1.Stacktrace { 239 p.current = slices.GrowLen(p.current, len(p.stacktraces.values)) 240 for i, v := range p.stacktraces.values { 241 s := p.current[i] 242 if s == nil { 243 s = &schemav1.Stacktrace{LocationIDs: make([]uint64, len(v))} 244 p.current[i] = s 245 } 246 s.LocationIDs = slices.GrowLen(s.LocationIDs, len(v)) 247 for j, m := range v { 248 s.LocationIDs[j] = uint64(m) 249 } 250 } 251 return p.current 252 } 253 254 func (p *partitionRewriter) InsertStacktrace(stacktrace uint32, locations []int32) { 255 // Resolve locations for new stack traces. 256 for j, loc := range locations { 257 locations[j] = int32(p.locations.tryLookup(uint32(loc))) 258 } 259 // stacktrace points to resolved which should 260 // be a marked pointer to unresolved value. 261 idx := p.stacktraces.resolved[stacktrace] & markerMask 262 v := &p.stacktraces.values[idx] 263 n := slices.GrowLen(*v, len(locations)) 264 copy(n, locations) 265 // Preserve allocated capacity. 266 p.stacktraces.values[idx] = n 267 } 268 269 func cloneSymbolsPartially(x *Symbols) *Symbols { 270 n := Symbols{ 271 Stacktraces: x.Stacktraces, 272 Locations: make([]schemav1.InMemoryLocation, len(x.Locations)), 273 Mappings: make([]schemav1.InMemoryMapping, len(x.Mappings)), 274 Functions: make([]schemav1.InMemoryFunction, len(x.Functions)), 275 Strings: x.Strings, 276 } 277 for i, l := range x.Locations { 278 n.Locations[i] = l.Clone() 279 } 280 for i, m := range x.Mappings { 281 n.Mappings[i] = m.Clone() 282 } 283 for i, f := range x.Functions { 284 n.Functions[i] = f.Clone() 285 } 286 return &n 287 } 288 289 const ( 290 marker = 1 << 31 291 markerMask = math.MaxUint32 >> 1 292 ) 293 294 type lookupTable[T any] struct { 295 // Index is source ID, and the value is the destination ID. 296 // If destination ID is not known, the element is index to 'unresolved' (marked). 297 resolved []uint32 298 unresolved []uint32 // Points to resolved. Index matches values. 299 values []T // Values are populated for unresolved items. 300 buf []uint32 // Sorted unresolved values. 301 } 302 303 func newLookupTable[T any](size int) *lookupTable[T] { 304 var t lookupTable[T] 305 t.grow(size) 306 return &t 307 } 308 309 func (t *lookupTable[T]) grow(size int) { 310 if cap(t.resolved) < size { 311 t.resolved = make([]uint32, size) 312 return 313 } 314 t.resolved = t.resolved[:size] 315 for i := range t.resolved { 316 t.resolved[i] = 0 317 } 318 } 319 320 func (t *lookupTable[T]) reset() { 321 t.unresolved = t.unresolved[:0] 322 t.values = t.values[:0] 323 t.buf = t.buf[:0] 324 } 325 326 // tryLookup looks up the value at x in resolved. 327 // If x is has not been resolved yet, the x is memorized 328 // for future resolve, and returned values is the marked 329 // index to unresolved. 330 func (t *lookupTable[T]) tryLookup(x uint32) uint32 { 331 // todo(ctovena): this is a hack to make sure we don't have any out of bounds errors 332 // see https://github.com/grafana/pyroscope/issues/2488 333 if x >= uint32(len(t.resolved)) { 334 t.grow(int(x + 1)) 335 } 336 if v := t.resolved[x]; v != 0 { 337 if v&marker > 0 { 338 return v // Already marked for resolve. 339 } 340 return v - 1 // Already resolved. 341 } 342 u := t.newUnresolved(x) | marker 343 t.resolved[x] = u 344 return u 345 } 346 347 func (t *lookupTable[T]) newUnresolved(rid uint32) uint32 { 348 t.unresolved = append(t.unresolved, rid) 349 x := len(t.values) 350 if x < cap(t.values) { 351 // Try to reuse previously allocated value. 352 t.values = t.values[:x+1] 353 } else { 354 var v T 355 t.values = append(t.values, v) 356 } 357 return uint32(x) 358 } 359 360 func (t *lookupTable[T]) storeResolved(i int, rid uint32) { 361 // The index is incremented to avoid 0 because it is 362 // used as sentinel and indicates absence (resolved is 363 // a sparse slice initialized with the maximal expected 364 // size). Correspondingly, lookupResolved should 365 // decrement the index on read. 366 t.resolved[t.unresolved[i]] = rid + 1 367 } 368 369 func (t *lookupTable[T]) lookupResolved(x uint32) uint32 { 370 if x&marker > 0 { 371 return t.resolved[t.unresolved[x&markerMask]] - 1 372 } 373 return x // Already resolved. 374 } 375 376 // updateResolved loads indices from buf to resolved. 377 // It is expected that the order matches values. 378 func (t *lookupTable[T]) updateResolved() { 379 for i, rid := range t.unresolved { 380 t.resolved[rid] = t.buf[i] + 1 381 } 382 } 383 384 func (t *lookupTable[T]) initSorted() { 385 // Gather and sort references to unresolved values. 386 t.buf = slices.GrowLen(t.buf, len(t.unresolved)) 387 copy(t.buf, t.unresolved) 388 sort.Slice(t.buf, func(i, j int) bool { 389 return t.buf[i] < t.buf[j] 390 }) 391 } 392 393 func (t *lookupTable[T]) iter() *lookupTableIterator[T] { 394 t.initSorted() 395 return &lookupTableIterator[T]{table: t} 396 } 397 398 type lookupTableIterator[T any] struct { 399 table *lookupTable[T] 400 cur uint32 401 } 402 403 func (t *lookupTableIterator[T]) Next() bool { 404 return t.cur < uint32(len(t.table.buf)) 405 } 406 407 func (t *lookupTableIterator[T]) At() uint32 { 408 x := t.table.buf[t.cur] 409 t.cur++ 410 return x 411 } 412 413 func (t *lookupTableIterator[T]) setValue(v T) { 414 u := t.table.resolved[t.table.buf[t.cur-1]] 415 t.table.values[u&markerMask] = v 416 } 417 418 func (t *lookupTableIterator[T]) Close() error { return nil } 419 420 func (t *lookupTableIterator[T]) Err() error { return nil }