github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/symdb.go (about)

     1  package symdb
     2  
     3  import (
     4  	"context"
     5  	"io"
     6  	"math"
     7  	"sort"
     8  	"sync"
     9  	"time"
    10  
    11  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    12  	"github.com/grafana/pyroscope/pkg/iter"
    13  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    14  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    15  )
    16  
    17  // SymbolsReader provides access to a symdb partition.
    18  type SymbolsReader interface {
    19  	Partition(ctx context.Context, partition uint64) (PartitionReader, error)
    20  }
    21  
    22  type PartitionReader interface {
    23  	WriteStats(s *PartitionStats)
    24  	Symbols() *Symbols
    25  	Release()
    26  }
    27  
    28  type Symbols struct {
    29  	Stacktraces StacktraceResolver
    30  	Locations   []schemav1.InMemoryLocation
    31  	Mappings    []schemav1.InMemoryMapping
    32  	Functions   []schemav1.InMemoryFunction
    33  	Strings     []string
    34  }
    35  
    36  type PartitionStats struct {
    37  	StacktracesTotal int
    38  	MaxStacktraceID  int
    39  	LocationsTotal   int
    40  	MappingsTotal    int
    41  	FunctionsTotal   int
    42  	StringsTotal     int
    43  }
    44  
    45  type StacktraceResolver interface {
    46  	// ResolveStacktraceLocations resolves locations for each stack
    47  	// trace and inserts it to the StacktraceInserter provided.
    48  	//
    49  	// The stacktraces must be ordered in the ascending order.
    50  	// If a stacktrace can't be resolved, dst receives an empty
    51  	// array of locations.
    52  	//
    53  	// Stacktraces slice might be modified during the call.
    54  	ResolveStacktraceLocations(ctx context.Context, dst StacktraceInserter, stacktraces []uint32) error
    55  	LookupLocations(dst []uint64, stacktraceID uint32) []uint64
    56  
    57  	// Optional:
    58  	// StacktraceIDRangeIterator
    59  }
    60  
    61  // StacktraceIDRangeIterator provides low level access
    62  // to stack traces, stored in painter point trees.
    63  type StacktraceIDRangeIterator interface {
    64  	SplitStacktraceIDRanges(*SampleAppender) iter.Iterator[*StacktraceIDRange]
    65  }
    66  
    67  type ParentPointerTree interface {
    68  	Nodes() []Node
    69  }
    70  
    71  type Node struct {
    72  	Parent   int32
    73  	Location int32
    74  	Value    int64
    75  }
    76  
    77  // StacktraceInserter accepts resolved locations for a given stack
    78  // trace. The leaf is at locations[0].
    79  //
    80  // Locations slice must not be retained by implementation.
    81  // It is guaranteed, that for a given stacktrace ID
    82  // InsertStacktrace is called not more than once.
    83  type StacktraceInserter interface {
    84  	InsertStacktrace(stacktraceID uint32, locations []int32)
    85  }
    86  
    87  type SymDB struct {
    88  	config Config
    89  	writer blockWriter
    90  	stats  MemoryStats
    91  
    92  	m          sync.RWMutex
    93  	partitions map[uint64]*PartitionWriter
    94  
    95  	wg   sync.WaitGroup
    96  	stop chan struct{}
    97  }
    98  
    99  type Config struct {
   100  	Version FormatVersion
   101  	// Output writer. Optional, V3 only.
   102  	Writer io.WriteCloser
   103  
   104  	// DEPRECATED: the parameter is not used and
   105  	// will be removed in the future versions.
   106  	Dir string
   107  	// DEPRECATED: the parameter is not used and
   108  	// will be removed in the future versions.
   109  	Stacktraces StacktracesConfig
   110  	// DEPRECATED: the parameter is not used and
   111  	// will be removed in the future versions.
   112  	Parquet ParquetConfig
   113  }
   114  
   115  type StacktracesConfig struct {
   116  	MaxNodesPerChunk uint32
   117  }
   118  
   119  type ParquetConfig struct {
   120  	// DEPRECATED: the parameter is not used and
   121  	// will be removed in the future versions.
   122  	MaxBufferRowCount int
   123  }
   124  
   125  type MemoryStats struct {
   126  	StacktracesSize uint64
   127  	LocationsSize   uint64
   128  	MappingsSize    uint64
   129  	FunctionsSize   uint64
   130  	StringsSize     uint64
   131  }
   132  
   133  func (m *MemoryStats) MemorySize() uint64 {
   134  	return m.StacktracesSize +
   135  		m.LocationsSize +
   136  		m.MappingsSize +
   137  		m.FunctionsSize +
   138  		m.StringsSize
   139  }
   140  
   141  const statsUpdateInterval = 5 * time.Second
   142  
   143  func DefaultConfig() *Config {
   144  	return &Config{
   145  		Version: FormatV2,
   146  	}
   147  }
   148  
   149  func (c *Config) WithDirectory(dir string) *Config {
   150  	c.Dir = dir
   151  	return c
   152  }
   153  
   154  func (c *Config) WithVersion(v FormatVersion) *Config {
   155  	c.Version = v
   156  	return c
   157  }
   158  
   159  func NewSymDB(c *Config) *SymDB {
   160  	if c == nil {
   161  		c = DefaultConfig()
   162  	}
   163  	c.Parquet.MaxBufferRowCount = math.MaxInt
   164  	c.Stacktraces.MaxNodesPerChunk = math.MaxUint32
   165  	db := &SymDB{
   166  		config:     *c,
   167  		partitions: make(map[uint64]*PartitionWriter),
   168  		stop:       make(chan struct{}),
   169  	}
   170  	switch c.Version {
   171  	case FormatV3:
   172  		db.writer = newWriterV3(c)
   173  	default:
   174  		db.config.Version = FormatV2
   175  		db.writer = newWriterV2(c)
   176  	}
   177  	db.wg.Add(1)
   178  	go db.updateStatsLoop()
   179  	return db
   180  }
   181  
   182  func (s *SymDB) PartitionWriter(partition uint64) *PartitionWriter {
   183  	p, ok := s.lookupPartition(partition)
   184  	if ok {
   185  		return p
   186  	}
   187  	s.m.Lock()
   188  	if p, ok = s.partitions[partition]; ok {
   189  		s.m.Unlock()
   190  		return p
   191  	}
   192  	p = NewPartitionWriter(partition, &s.config)
   193  	s.partitions[partition] = p
   194  	s.m.Unlock()
   195  	return p
   196  }
   197  
   198  func NewPartitionWriter(partition uint64, config *Config) *PartitionWriter {
   199  	p := PartitionWriter{
   200  		header:      PartitionHeader{Partition: partition},
   201  		stacktraces: newStacktraces(),
   202  	}
   203  	switch config.Version {
   204  	case FormatV2:
   205  		p.header.V2 = new(PartitionHeaderV2)
   206  	case FormatV3:
   207  		p.header.V3 = new(PartitionHeaderV3)
   208  	}
   209  	p.strings.init()
   210  	p.mappings.init()
   211  	p.functions.init()
   212  	p.locations.init()
   213  	// To ensure that the first string is always "".
   214  	p.strings.slice = append(p.strings.slice, "")
   215  	p.strings.lookup[""] = 0
   216  	return &p
   217  }
   218  
   219  func (s *SymDB) WriteProfileSymbols(partition uint64, profile *profilev1.Profile) []schemav1.InMemoryProfile {
   220  	return s.PartitionWriter(partition).WriteProfileSymbols(profile)
   221  }
   222  
   223  func (s *SymDB) Partition(_ context.Context, partition uint64) (PartitionReader, error) {
   224  	if p, ok := s.lookupPartition(partition); ok {
   225  		return p, nil
   226  	}
   227  	return nil, ErrPartitionNotFound
   228  }
   229  
   230  func (s *SymDB) lookupPartition(partition uint64) (*PartitionWriter, bool) {
   231  	s.m.RLock()
   232  	p, ok := s.partitions[partition]
   233  	if ok {
   234  		s.m.RUnlock()
   235  		return p, true
   236  	}
   237  	s.m.RUnlock()
   238  	return nil, false
   239  }
   240  
   241  func (s *SymDB) MemorySize() uint64 {
   242  	s.m.RLock()
   243  	m := s.stats
   244  	s.m.RUnlock()
   245  	return m.MemorySize()
   246  }
   247  
   248  var emptyMemoryStats MemoryStats
   249  
   250  func (s *SymDB) WriteMemoryStats(m *MemoryStats) {
   251  	s.m.Lock()
   252  	c := s.stats
   253  	if c == emptyMemoryStats {
   254  		s.updateStats()
   255  		c = s.stats
   256  	}
   257  	s.m.Unlock()
   258  	*m = c
   259  }
   260  
   261  func (s *SymDB) updateStatsLoop() {
   262  	t := time.NewTicker(statsUpdateInterval)
   263  	defer func() {
   264  		t.Stop()
   265  		s.wg.Done()
   266  	}()
   267  	for {
   268  		select {
   269  		case <-s.stop:
   270  			return
   271  		case <-t.C:
   272  			s.m.Lock()
   273  			s.updateStats()
   274  			s.m.Unlock()
   275  		}
   276  	}
   277  }
   278  
   279  func (s *SymDB) updateStats() {
   280  	s.stats = MemoryStats{}
   281  	for _, m := range s.partitions {
   282  		s.stats.StacktracesSize += m.stacktraces.size()
   283  		s.stats.MappingsSize += m.mappings.Size()
   284  		s.stats.FunctionsSize += m.functions.Size()
   285  		s.stats.LocationsSize += m.locations.Size()
   286  		s.stats.StringsSize += m.strings.Size()
   287  	}
   288  }
   289  
   290  func (s *SymDB) Flush() error {
   291  	close(s.stop)
   292  	s.wg.Wait()
   293  	s.updateStats()
   294  	partitions := make([]*PartitionWriter, len(s.partitions))
   295  	var i int
   296  	for _, v := range s.partitions {
   297  		partitions[i] = v
   298  		i++
   299  	}
   300  	sort.Slice(partitions, func(i, j int) bool {
   301  		return partitions[i].header.Partition < partitions[j].header.Partition
   302  	})
   303  	return s.writer.writePartitions(partitions)
   304  }
   305  
   306  func (s *SymDB) Files() []block.File {
   307  	return s.writer.meta()
   308  }
   309  
   310  func (s *SymDB) FormatVersion() FormatVersion {
   311  	return s.config.Version
   312  }