github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/rewriter.go (about)

     1  package symdb
     2  
     3  import (
     4  	"context"
     5  	"math"
     6  	"sort"
     7  
     8  	lru "github.com/hashicorp/golang-lru/v2"
     9  
    10  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    11  	"github.com/grafana/pyroscope/pkg/slices"
    12  )
    13  
    14  type Rewriter struct {
    15  	symdb      *SymDB
    16  	source     SymbolsReader
    17  	partitions *lru.Cache[uint64, *partitionRewriter]
    18  	observer   SymbolsObserver
    19  }
    20  
    21  type SymbolsObserver interface {
    22  	// ObserveSymbols is called once new symbols have been rewritten. This method must not modify the symbols.
    23  	// When using within a SampleObserver, Evaluate should be called first
    24  	ObserveSymbols(strings []string, functions []schemav1.InMemoryFunction, locations []schemav1.InMemoryLocation,
    25  		stacktraceValues [][]int32, stacktraceIds []uint32)
    26  }
    27  
    28  func NewRewriter(w *SymDB, r SymbolsReader, o SymbolsObserver) *Rewriter {
    29  	return &Rewriter{
    30  		source:   r,
    31  		symdb:    w,
    32  		observer: o,
    33  	}
    34  }
    35  
    36  func (r *Rewriter) Rewrite(partition uint64, stacktraces []uint32) error {
    37  	p, err := r.init(partition)
    38  	if err != nil {
    39  		return err
    40  	}
    41  	if err = p.populateUnresolved(stacktraces); err != nil {
    42  		return err
    43  	}
    44  	if p.hasUnresolved() {
    45  		return p.appendRewrite(stacktraces)
    46  	}
    47  	return nil
    48  }
    49  
    50  func (r *Rewriter) init(partition uint64) (p *partitionRewriter, err error) {
    51  	if r.partitions == nil {
    52  		r.partitions, _ = lru.NewWithEvict(2, func(_ uint64, p *partitionRewriter) {
    53  			p.reader.Release()
    54  		})
    55  	}
    56  	return r.getOrCreatePartitionRewriter(partition)
    57  }
    58  
    59  func (r *Rewriter) getOrCreatePartitionRewriter(partition uint64) (_ *partitionRewriter, err error) {
    60  	p, ok := r.partitions.Get(partition)
    61  	if ok {
    62  		p.reset()
    63  		return p, nil
    64  	}
    65  	pr, err := r.newRewriter(partition)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	r.partitions.Add(partition, pr)
    70  	return pr, nil
    71  }
    72  
    73  func (r *Rewriter) newRewriter(p uint64) (*partitionRewriter, error) {
    74  	n := &partitionRewriter{name: p}
    75  	reader, err := r.source.Partition(context.TODO(), p)
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	n.reader = reader
    80  	n.dst = r.symdb.PartitionWriter(p)
    81  	// We clone locations, functions, and mappings,
    82  	// because these object will be modified.
    83  	n.src = cloneSymbolsPartially(reader.Symbols())
    84  	var stats PartitionStats
    85  	reader.WriteStats(&stats)
    86  	n.stacktraces = newLookupTable[[]int32](stats.MaxStacktraceID)
    87  	n.locations = newLookupTable[schemav1.InMemoryLocation](stats.LocationsTotal)
    88  	n.mappings = newLookupTable[schemav1.InMemoryMapping](stats.MappingsTotal)
    89  	n.functions = newLookupTable[schemav1.InMemoryFunction](stats.FunctionsTotal)
    90  	n.strings = newLookupTable[string](stats.StringsTotal)
    91  	n.observer = r.observer
    92  	return n, nil
    93  }
    94  
    95  type partitionRewriter struct {
    96  	name   uint64
    97  	src    *Symbols
    98  	dst    *PartitionWriter
    99  	reader PartitionReader
   100  
   101  	stacktraces *lookupTable[[]int32]
   102  	locations   *lookupTable[schemav1.InMemoryLocation]
   103  	mappings    *lookupTable[schemav1.InMemoryMapping]
   104  	functions   *lookupTable[schemav1.InMemoryFunction]
   105  	strings     *lookupTable[string]
   106  	current     []*schemav1.Stacktrace
   107  
   108  	observer SymbolsObserver
   109  }
   110  
   111  func (p *partitionRewriter) reset() {
   112  	p.stacktraces.reset()
   113  	p.locations.reset()
   114  	p.mappings.reset()
   115  	p.functions.reset()
   116  	p.strings.reset()
   117  	p.current = p.current[:0]
   118  }
   119  
   120  func (p *partitionRewriter) hasUnresolved() bool {
   121  	return len(p.stacktraces.unresolved)+
   122  		len(p.locations.unresolved)+
   123  		len(p.mappings.unresolved)+
   124  		len(p.functions.unresolved)+
   125  		len(p.strings.unresolved) > 0
   126  }
   127  
   128  func (p *partitionRewriter) populateUnresolved(stacktraceIDs []uint32) error {
   129  	// Filter out all stack traces that have been already
   130  	// resolved and populate locations lookup table.
   131  	if err := p.resolveStacktraces(stacktraceIDs); err != nil {
   132  		return err
   133  	}
   134  	if len(p.locations.unresolved) == 0 {
   135  		return nil
   136  	}
   137  
   138  	// Resolve functions and mappings for new locations.
   139  	unresolvedLocs := p.locations.iter()
   140  	for unresolvedLocs.Next() {
   141  		location := p.src.Locations[unresolvedLocs.At()]
   142  		location.MappingId = p.mappings.tryLookup(location.MappingId)
   143  		if len(p.src.Functions) == 0 {
   144  			location.Line = nil
   145  			continue
   146  		}
   147  		for j, line := range location.Line {
   148  			location.Line[j].FunctionId = p.functions.tryLookup(line.FunctionId)
   149  		}
   150  		unresolvedLocs.setValue(location)
   151  	}
   152  
   153  	// Resolve strings.
   154  	unresolvedMappings := p.mappings.iter()
   155  	for unresolvedMappings.Next() {
   156  		mapping := p.src.Mappings[unresolvedMappings.At()]
   157  		mapping.BuildId = p.strings.tryLookup(mapping.BuildId)
   158  		mapping.Filename = p.strings.tryLookup(mapping.Filename)
   159  		unresolvedMappings.setValue(mapping)
   160  	}
   161  
   162  	unresolvedFunctions := p.functions.iter()
   163  	for unresolvedFunctions.Next() {
   164  		function := p.src.Functions[unresolvedFunctions.At()]
   165  		function.Name = p.strings.tryLookup(function.Name)
   166  		function.Filename = p.strings.tryLookup(function.Filename)
   167  		function.SystemName = p.strings.tryLookup(function.SystemName)
   168  		unresolvedFunctions.setValue(function)
   169  	}
   170  
   171  	unresolvedStrings := p.strings.iter()
   172  	for unresolvedStrings.Next() {
   173  		unresolvedStrings.setValue(p.src.Strings[unresolvedStrings.At()])
   174  	}
   175  
   176  	return nil
   177  }
   178  
   179  func (p *partitionRewriter) appendRewrite(stacktraces []uint32) error {
   180  	p.dst.AppendStrings(p.strings.buf, p.strings.values)
   181  	p.strings.updateResolved()
   182  
   183  	for i := range p.functions.values {
   184  		p.functions.values[i].Name = p.strings.lookupResolved(p.functions.values[i].Name)
   185  		p.functions.values[i].Filename = p.strings.lookupResolved(p.functions.values[i].Filename)
   186  		p.functions.values[i].SystemName = p.strings.lookupResolved(p.functions.values[i].SystemName)
   187  	}
   188  	p.dst.AppendFunctions(p.functions.buf, p.functions.values)
   189  	p.functions.updateResolved()
   190  
   191  	for i := range p.mappings.values {
   192  		p.mappings.values[i].BuildId = p.strings.lookupResolved(p.mappings.values[i].BuildId)
   193  		p.mappings.values[i].Filename = p.strings.lookupResolved(p.mappings.values[i].Filename)
   194  	}
   195  	p.dst.AppendMappings(p.mappings.buf, p.mappings.values)
   196  	p.mappings.updateResolved()
   197  
   198  	for i := range p.locations.values {
   199  		p.locations.values[i].MappingId = p.mappings.lookupResolved(p.locations.values[i].MappingId)
   200  		for j, line := range p.locations.values[i].Line {
   201  			p.locations.values[i].Line[j].FunctionId = p.functions.lookupResolved(line.FunctionId)
   202  		}
   203  	}
   204  	p.dst.AppendLocations(p.locations.buf, p.locations.values)
   205  	p.locations.updateResolved()
   206  
   207  	for _, v := range p.stacktraces.values {
   208  		for j, location := range v {
   209  			v[j] = int32(p.locations.lookupResolved(uint32(location)))
   210  		}
   211  	}
   212  	p.dst.AppendStacktraces(p.stacktraces.buf, p.stacktracesFromResolvedValues())
   213  	p.stacktraces.updateResolved()
   214  
   215  	for i, v := range stacktraces {
   216  		stacktraces[i] = p.stacktraces.lookupResolved(v)
   217  	}
   218  
   219  	if p.observer != nil {
   220  		p.observer.ObserveSymbols(p.dst.strings.slice, p.dst.functions.slice, p.dst.locations.slice, p.stacktraces.values, p.stacktraces.buf)
   221  	}
   222  
   223  	return nil
   224  }
   225  
   226  func (p *partitionRewriter) resolveStacktraces(stacktraceIDs []uint32) error {
   227  	for i, v := range stacktraceIDs {
   228  		stacktraceIDs[i] = p.stacktraces.tryLookup(v)
   229  	}
   230  	if len(p.stacktraces.unresolved) == 0 {
   231  		return nil
   232  	}
   233  	p.stacktraces.initSorted()
   234  	return p.src.Stacktraces.ResolveStacktraceLocations(
   235  		context.Background(), p, p.stacktraces.buf)
   236  }
   237  
   238  func (p *partitionRewriter) stacktracesFromResolvedValues() []*schemav1.Stacktrace {
   239  	p.current = slices.GrowLen(p.current, len(p.stacktraces.values))
   240  	for i, v := range p.stacktraces.values {
   241  		s := p.current[i]
   242  		if s == nil {
   243  			s = &schemav1.Stacktrace{LocationIDs: make([]uint64, len(v))}
   244  			p.current[i] = s
   245  		}
   246  		s.LocationIDs = slices.GrowLen(s.LocationIDs, len(v))
   247  		for j, m := range v {
   248  			s.LocationIDs[j] = uint64(m)
   249  		}
   250  	}
   251  	return p.current
   252  }
   253  
   254  func (p *partitionRewriter) InsertStacktrace(stacktrace uint32, locations []int32) {
   255  	// Resolve locations for new stack traces.
   256  	for j, loc := range locations {
   257  		locations[j] = int32(p.locations.tryLookup(uint32(loc)))
   258  	}
   259  	// stacktrace points to resolved which should
   260  	// be a marked pointer to unresolved value.
   261  	idx := p.stacktraces.resolved[stacktrace] & markerMask
   262  	v := &p.stacktraces.values[idx]
   263  	n := slices.GrowLen(*v, len(locations))
   264  	copy(n, locations)
   265  	// Preserve allocated capacity.
   266  	p.stacktraces.values[idx] = n
   267  }
   268  
   269  func cloneSymbolsPartially(x *Symbols) *Symbols {
   270  	n := Symbols{
   271  		Stacktraces: x.Stacktraces,
   272  		Locations:   make([]schemav1.InMemoryLocation, len(x.Locations)),
   273  		Mappings:    make([]schemav1.InMemoryMapping, len(x.Mappings)),
   274  		Functions:   make([]schemav1.InMemoryFunction, len(x.Functions)),
   275  		Strings:     x.Strings,
   276  	}
   277  	for i, l := range x.Locations {
   278  		n.Locations[i] = l.Clone()
   279  	}
   280  	for i, m := range x.Mappings {
   281  		n.Mappings[i] = m.Clone()
   282  	}
   283  	for i, f := range x.Functions {
   284  		n.Functions[i] = f.Clone()
   285  	}
   286  	return &n
   287  }
   288  
   289  const (
   290  	marker     = 1 << 31
   291  	markerMask = math.MaxUint32 >> 1
   292  )
   293  
   294  type lookupTable[T any] struct {
   295  	// Index is source ID, and the value is the destination ID.
   296  	// If destination ID is not known, the element is index to 'unresolved' (marked).
   297  	resolved   []uint32
   298  	unresolved []uint32 // Points to resolved. Index matches values.
   299  	values     []T      // Values are populated for unresolved items.
   300  	buf        []uint32 // Sorted unresolved values.
   301  }
   302  
   303  func newLookupTable[T any](size int) *lookupTable[T] {
   304  	var t lookupTable[T]
   305  	t.grow(size)
   306  	return &t
   307  }
   308  
   309  func (t *lookupTable[T]) grow(size int) {
   310  	if cap(t.resolved) < size {
   311  		t.resolved = make([]uint32, size)
   312  		return
   313  	}
   314  	t.resolved = t.resolved[:size]
   315  	for i := range t.resolved {
   316  		t.resolved[i] = 0
   317  	}
   318  }
   319  
   320  func (t *lookupTable[T]) reset() {
   321  	t.unresolved = t.unresolved[:0]
   322  	t.values = t.values[:0]
   323  	t.buf = t.buf[:0]
   324  }
   325  
   326  // tryLookup looks up the value at x in resolved.
   327  // If x is has not been resolved yet, the x is memorized
   328  // for future resolve, and returned values is the marked
   329  // index to unresolved.
   330  func (t *lookupTable[T]) tryLookup(x uint32) uint32 {
   331  	// todo(ctovena): this is a hack to make sure we don't have any out of bounds errors
   332  	// see https://github.com/grafana/pyroscope/issues/2488
   333  	if x >= uint32(len(t.resolved)) {
   334  		t.grow(int(x + 1))
   335  	}
   336  	if v := t.resolved[x]; v != 0 {
   337  		if v&marker > 0 {
   338  			return v // Already marked for resolve.
   339  		}
   340  		return v - 1 // Already resolved.
   341  	}
   342  	u := t.newUnresolved(x) | marker
   343  	t.resolved[x] = u
   344  	return u
   345  }
   346  
   347  func (t *lookupTable[T]) newUnresolved(rid uint32) uint32 {
   348  	t.unresolved = append(t.unresolved, rid)
   349  	x := len(t.values)
   350  	if x < cap(t.values) {
   351  		// Try to reuse previously allocated value.
   352  		t.values = t.values[:x+1]
   353  	} else {
   354  		var v T
   355  		t.values = append(t.values, v)
   356  	}
   357  	return uint32(x)
   358  }
   359  
   360  func (t *lookupTable[T]) storeResolved(i int, rid uint32) {
   361  	// The index is incremented to avoid 0 because it is
   362  	// used as sentinel and indicates absence (resolved is
   363  	// a sparse slice initialized with the maximal expected
   364  	// size). Correspondingly, lookupResolved should
   365  	// decrement the index on read.
   366  	t.resolved[t.unresolved[i]] = rid + 1
   367  }
   368  
   369  func (t *lookupTable[T]) lookupResolved(x uint32) uint32 {
   370  	if x&marker > 0 {
   371  		return t.resolved[t.unresolved[x&markerMask]] - 1
   372  	}
   373  	return x // Already resolved.
   374  }
   375  
   376  // updateResolved loads indices from buf to resolved.
   377  // It is expected that the order matches values.
   378  func (t *lookupTable[T]) updateResolved() {
   379  	for i, rid := range t.unresolved {
   380  		t.resolved[rid] = t.buf[i] + 1
   381  	}
   382  }
   383  
   384  func (t *lookupTable[T]) initSorted() {
   385  	// Gather and sort references to unresolved values.
   386  	t.buf = slices.GrowLen(t.buf, len(t.unresolved))
   387  	copy(t.buf, t.unresolved)
   388  	sort.Slice(t.buf, func(i, j int) bool {
   389  		return t.buf[i] < t.buf[j]
   390  	})
   391  }
   392  
   393  func (t *lookupTable[T]) iter() *lookupTableIterator[T] {
   394  	t.initSorted()
   395  	return &lookupTableIterator[T]{table: t}
   396  }
   397  
   398  type lookupTableIterator[T any] struct {
   399  	table *lookupTable[T]
   400  	cur   uint32
   401  }
   402  
   403  func (t *lookupTableIterator[T]) Next() bool {
   404  	return t.cur < uint32(len(t.table.buf))
   405  }
   406  
   407  func (t *lookupTableIterator[T]) At() uint32 {
   408  	x := t.table.buf[t.cur]
   409  	t.cur++
   410  	return x
   411  }
   412  
   413  func (t *lookupTableIterator[T]) setValue(v T) {
   414  	u := t.table.resolved[t.table.buf[t.cur-1]]
   415  	t.table.values[u&markerMask] = v
   416  }
   417  
   418  func (t *lookupTableIterator[T]) Close() error { return nil }
   419  
   420  func (t *lookupTableIterator[T]) Err() error { return nil }