github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/sample_appender.go (about)

     1  package symdb
     2  
     3  import (
     4  	"slices"
     5  
     6  	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
     7  )
     8  
     9  // SampleAppender is a dynamic data structure that accumulates
    10  // samples, by summing them up by stack trace ID.
    11  //
    12  // It has two underlying implementations:
    13  //   - map: a hash table is used for small sparse data sets (16k by default).
    14  //     This representation is optimal for small profiles, like span profile,
    15  //     or a short time range profile of a specific service/series.
    16  //   - chunked sparse set: stack trace IDs serve as indices in a sparse set.
    17  //     Provided that the stack trace IDs are dense (as they point to the node
    18  //     index in the parent pointer tree), this representation is significantly
    19  //     more performant, but may require more space, if the stack trace IDs set
    20  //     is very sparse. In order to reduce memory consumption, the set is split
    21  //     into chunks (16k by default), that are allocated once at least one ID
    22  //     matches the chunk range. In addition, values are ordered by stack trace
    23  //     ID without being sorted explicitly.
    24  type SampleAppender struct {
    25  	// Max number of elements in the map.
    26  	// Once the limit is exceeded, values
    27  	// are migrated to the chunked set.
    28  	maxMapSize uint32
    29  	hashmap    map[uint32]uint64
    30  	chunkSize  uint32 // Must be a power of 2.
    31  	chunks     [][]uint64
    32  	size       int
    33  
    34  	Append     func(stacktrace uint32, value uint64)
    35  	AppendMany func(stacktraces []uint32, values []uint64)
    36  }
    37  
    38  // Hashmap is used for small data sets (<= 16k elements, be default).
    39  // Once the limit is exceeded, the data is migrated to the chunked set.
    40  // Chunk size is 16k (128KiB) by default.
    41  const (
    42  	defaultSampleAppenderSize = 16 << 10
    43  	defaultChunkSize          = 16 << 10
    44  )
    45  
    46  func NewSampleAppender() *SampleAppender {
    47  	return NewSampleAppenderSize(defaultSampleAppenderSize, defaultChunkSize)
    48  }
    49  
    50  func NewSampleAppenderSize(maxMapSize, chunkSize uint32) *SampleAppender {
    51  	if chunkSize == 0 || (chunkSize&(chunkSize-1)) != 0 {
    52  		panic("chunk size must be a power of 2")
    53  	}
    54  	s := &SampleAppender{
    55  		chunkSize:  chunkSize,
    56  		maxMapSize: maxMapSize,
    57  		hashmap:    make(map[uint32]uint64),
    58  	}
    59  	s.Append = s.mapAppend
    60  	s.AppendMany = s.mapAppendMany
    61  	return s
    62  }
    63  
    64  func (s *SampleAppender) mapAppend(stacktrace uint32, value uint64) {
    65  	if len(s.hashmap) < int(s.maxMapSize) {
    66  		s.hashmap[stacktrace] += value
    67  		return
    68  	}
    69  	s.migrate()
    70  	s.Append(stacktrace, value)
    71  }
    72  
    73  func (s *SampleAppender) mapAppendMany(stacktraces []uint32, values []uint64) {
    74  	if len(s.hashmap)+len(stacktraces) < int(s.maxMapSize) {
    75  		for i, stacktrace := range stacktraces {
    76  			if v := values[i]; v != 0 && stacktrace != 0 {
    77  				s.hashmap[stacktrace] += v
    78  			}
    79  		}
    80  		return
    81  	}
    82  	s.migrate()
    83  	s.AppendMany(stacktraces, values)
    84  }
    85  
    86  func (s *SampleAppender) migrate() {
    87  	s.Append = s.setAppend
    88  	s.AppendMany = s.setAppendMany
    89  	for k, v := range s.hashmap {
    90  		s.Append(k, v)
    91  	}
    92  	s.hashmap = nil
    93  }
    94  
    95  func (s *SampleAppender) setAppend(stacktrace uint32, value uint64) {
    96  	if value == 0 || stacktrace == 0 {
    97  		return
    98  	}
    99  	ci := stacktrace / s.chunkSize
   100  	vi := stacktrace & (s.chunkSize - 1) // stacktrace % s.chunkSize
   101  	if x := int(ci) + 1; x > len(s.chunks) {
   102  		s.chunks = slices.Grow(s.chunks, x)
   103  		s.chunks = s.chunks[:x]
   104  	}
   105  	c := s.chunks[ci]
   106  	if cap(c) == 0 {
   107  		c = make([]uint64, s.chunkSize)
   108  		s.chunks[ci] = c
   109  	}
   110  	v := c[vi]
   111  	c[vi] += value
   112  	if v == 0 {
   113  		s.size++
   114  	}
   115  }
   116  
   117  func (s *SampleAppender) setAppendMany(stacktraces []uint32, values []uint64) {
   118  	// Inlined Append.
   119  	for i, stacktrace := range stacktraces {
   120  		value := values[i]
   121  		if value == 0 || stacktrace == 0 {
   122  			continue
   123  		}
   124  		ci := stacktrace / s.chunkSize
   125  		vi := stacktrace & (s.chunkSize - 1) // stacktrace % s.chunkSize
   126  		if x := int(ci) + 1; x > len(s.chunks) {
   127  			s.chunks = slices.Grow(s.chunks, x)
   128  			s.chunks = s.chunks[:x]
   129  		}
   130  		c := s.chunks[ci]
   131  		if cap(c) == 0 {
   132  			c = make([]uint64, s.chunkSize)
   133  			s.chunks[ci] = c
   134  		}
   135  		v := c[vi]
   136  		c[vi] += value
   137  		if v == 0 {
   138  			s.size++
   139  		}
   140  	}
   141  }
   142  
   143  func (s *SampleAppender) Len() int { return s.size + len(s.hashmap) }
   144  
   145  func (s *SampleAppender) Samples() v1.Samples {
   146  	if len(s.hashmap) > 0 {
   147  		return v1.NewSamplesFromMap(s.hashmap)
   148  	}
   149  	samples := v1.NewSamples(s.Len())
   150  	chunks := uint32(len(s.chunks))
   151  	x := 0
   152  	for i := uint32(0); i < chunks; i++ {
   153  		values := uint32(len(s.chunks[i]))
   154  		for j := uint32(0); j < values; j++ {
   155  			if v := s.chunks[i][j]; v != 0 {
   156  				if sid := i*s.chunkSize + j; sid > 0 {
   157  					samples.StacktraceIDs[x] = sid
   158  					samples.Values[x] = v
   159  				}
   160  				x++
   161  			}
   162  		}
   163  	}
   164  	return samples
   165  }