github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/downsample/downsample.go (about)

     1  package downsample
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"path/filepath"
     8  
     9  	"github.com/dolthub/swiss"
    10  	"github.com/go-kit/log"
    11  	"github.com/go-kit/log/level"
    12  	"github.com/parquet-go/parquet-go"
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	"github.com/prometheus/client_golang/prometheus/promauto"
    15  	"github.com/prometheus/common/model"
    16  
    17  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    18  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    19  	"github.com/grafana/pyroscope/pkg/util/build"
    20  )
    21  
    22  type interval struct {
    23  	durationSeconds int64
    24  	shortName       string
    25  }
    26  
    27  type aggregationType struct {
    28  	fn   func(a, b int64) int64
    29  	name string
    30  }
    31  
    32  type state struct {
    33  	currentRow          parquet.Row
    34  	currentTime         int64
    35  	currentFp           model.Fingerprint
    36  	currentPartition    uint64
    37  	totalValue          int64
    38  	profileCount        int64
    39  	stackTraceIds       []uint64
    40  	values              []int64
    41  	annotations         schemav1.Annotations
    42  	stackTraceIdToIndex *swiss.Map[uint64, int]
    43  }
    44  
    45  type downsampleConfig struct {
    46  	interval    interval
    47  	aggregation aggregationType
    48  }
    49  
    50  var (
    51  	intervals = []interval{
    52  		{
    53  			durationSeconds: 5 * 60,
    54  			shortName:       "5m",
    55  		},
    56  		{
    57  			durationSeconds: 60 * 60,
    58  			shortName:       "1h",
    59  		},
    60  	}
    61  	aggregations = []aggregationType{
    62  		{
    63  			name: "sum",
    64  			fn: func(a, b int64) int64 {
    65  				return a + b
    66  			},
    67  		},
    68  	}
    69  	configs               = initConfigs()
    70  	inputSamplesHistogram = promauto.NewHistogram(
    71  		prometheus.HistogramOpts{
    72  			Name:    "pyroscope_downsampler_input_profile_samples",
    73  			Help:    "The number of samples per profile before downsampling",
    74  			Buckets: prometheus.ExponentialBuckets(32, 2, 15),
    75  		})
    76  	outputSamplesHistogram = promauto.NewHistogramVec(
    77  		prometheus.HistogramOpts{
    78  			Name:    "pyroscope_downsampler_output_profile_samples",
    79  			Help:    "The number of samples per profile after downsampling",
    80  			Buckets: prometheus.ExponentialBuckets(32, 2, 15),
    81  		}, []string{"interval"})
    82  )
    83  
    84  func initConfigs() []downsampleConfig {
    85  	configs := make([]downsampleConfig, 0)
    86  	for _, i := range intervals {
    87  		for _, a := range aggregations {
    88  			configs = append(configs, downsampleConfig{
    89  				interval:    i,
    90  				aggregation: a,
    91  			})
    92  		}
    93  	}
    94  	return configs
    95  }
    96  
    97  type profilesWriter struct {
    98  	*parquet.GenericWriter[*schemav1.Profile]
    99  	file *os.File
   100  
   101  	buf []parquet.Row
   102  }
   103  
   104  func (p *profilesWriter) WriteRow(r parquet.Row) error {
   105  	p.buf[0] = r
   106  	_, err := p.WriteRows(p.buf)
   107  	if err != nil {
   108  		return err
   109  	}
   110  
   111  	return nil
   112  }
   113  
   114  func newProfilesWriter(path string, i interval, aggregation string) (*profilesWriter, error) {
   115  	profilePath := filepath.Join(path, fmt.Sprintf("profiles_%s_%s", i.shortName, aggregation)+block.ParquetSuffix)
   116  	profileFile, err := os.OpenFile(profilePath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  	return &profilesWriter{
   121  		GenericWriter: newParquetProfileWriter(profileFile, parquet.MaxRowsPerRowGroup(100_000)),
   122  		file:          profileFile,
   123  		buf:           make([]parquet.Row, 1),
   124  	}, nil
   125  }
   126  
   127  func newParquetProfileWriter(writer io.Writer, options ...parquet.WriterOption) *parquet.GenericWriter[*schemav1.Profile] {
   128  	options = append(options, parquet.PageBufferSize(3*1024*1024))
   129  	options = append(options, parquet.CreatedBy("github.com/grafana/pyroscope/", build.Version, build.Revision))
   130  	options = append(options, schemav1.DownsampledProfilesSchema)
   131  	return parquet.NewGenericWriter[*schemav1.Profile](
   132  		writer, options...,
   133  	)
   134  }
   135  
   136  type Downsampler struct {
   137  	path           string
   138  	profileWriters []*profilesWriter
   139  	states         []*state
   140  	logger         log.Logger
   141  }
   142  
   143  func NewDownsampler(path string, logger log.Logger) (*Downsampler, error) {
   144  	writers := make([]*profilesWriter, 0)
   145  	states := make([]*state, 0)
   146  	for _, c := range configs {
   147  		writer, err := newProfilesWriter(path, c.interval, c.aggregation.name)
   148  		if err != nil {
   149  			return nil, err
   150  		}
   151  		writers = append(writers, writer)
   152  		states = append(states, &state{})
   153  	}
   154  
   155  	return &Downsampler{
   156  		path:           path,
   157  		profileWriters: writers,
   158  		states:         states,
   159  		logger:         logger,
   160  	}, nil
   161  }
   162  
   163  func (d *Downsampler) flush(s *state, w *profilesWriter, c downsampleConfig) error {
   164  	level.Debug(d.logger).Log(
   165  		"msg", "flushing downsampled profile",
   166  		"interval", c.interval.shortName,
   167  		"aggregation", c.aggregation.name,
   168  		"sourceProfileCount", s.profileCount,
   169  		"sampleCount", len(s.values))
   170  	outputSamplesHistogram.WithLabelValues(c.interval.shortName).Observe(float64(len(s.values)))
   171  	var (
   172  		col    = len(s.currentRow) - 1
   173  		newCol = func() int {
   174  			col++
   175  			return col
   176  		}
   177  	)
   178  	s.currentRow = append(s.currentRow, parquet.Int64Value(s.totalValue).Level(0, 0, newCol()))
   179  
   180  	newCol()
   181  	repetition := -1
   182  	for _, stacktraceId := range s.stackTraceIds {
   183  		if repetition < 1 {
   184  			repetition++
   185  		}
   186  		s.currentRow = append(s.currentRow, parquet.Int64Value(int64(stacktraceId)).Level(repetition, 1, col))
   187  	}
   188  	newCol()
   189  	repetition = -1
   190  	for _, value := range s.values {
   191  		if repetition < 1 {
   192  			repetition++
   193  		}
   194  		s.currentRow = append(s.currentRow, parquet.Int64Value(value).Level(repetition, 1, col))
   195  	}
   196  
   197  	s.currentRow = append(s.currentRow, parquet.Int64Value(s.currentTime*1000*1000*1000).Level(0, 0, newCol()))
   198  
   199  	newCol()
   200  	if len(s.annotations.Keys) == 0 {
   201  		s.currentRow = append(s.currentRow, parquet.Value{}.Level(0, 0, col))
   202  	} else {
   203  		repetition = -1
   204  		for _, key := range s.annotations.Keys {
   205  			if repetition < 1 {
   206  				repetition++
   207  			}
   208  			s.currentRow = append(s.currentRow, parquet.ByteArrayValue([]byte(key)).Level(repetition, 1, col))
   209  		}
   210  	}
   211  
   212  	newCol()
   213  	if len(s.annotations.Values) == 0 {
   214  		s.currentRow = append(s.currentRow, parquet.Value{}.Level(0, 0, col))
   215  	} else {
   216  		repetition = -1
   217  		for _, value := range s.annotations.Values {
   218  			if repetition < 1 {
   219  				repetition++
   220  			}
   221  			s.currentRow = append(s.currentRow, parquet.ByteArrayValue([]byte(value)).Level(repetition, 1, col))
   222  		}
   223  	}
   224  
   225  	err := w.WriteRow(s.currentRow)
   226  	if err != nil {
   227  		return err
   228  	}
   229  	return nil
   230  }
   231  
   232  func (d *Downsampler) AddRow(row schemav1.ProfileRow, fp model.Fingerprint) error {
   233  	rowTimeSeconds := row.TimeNanos() / 1000 / 1000 / 1000
   234  	sourceSampleCount := 0
   235  	for i, c := range configs {
   236  		s := d.states[i]
   237  		aggregationTime := rowTimeSeconds / c.interval.durationSeconds * c.interval.durationSeconds
   238  		if len(d.states[i].currentRow) == 0 {
   239  			s.init(row, aggregationTime, fp)
   240  		}
   241  		if !s.matches(aggregationTime, fp, row.StacktracePartitionID()) {
   242  			err := d.flush(s, d.profileWriters[i], c)
   243  			if err != nil {
   244  				return err
   245  			}
   246  			s.init(row, aggregationTime, fp)
   247  		}
   248  		s.profileCount++
   249  		row.ForStacktraceIdsAndValues(func(stacktraceIds []parquet.Value, values []parquet.Value) {
   250  			for i := 0; i < len(stacktraceIds); i++ {
   251  				stacktraceId := stacktraceIds[i].Uint64()
   252  				value := values[i].Int64()
   253  				index, ok := s.stackTraceIdToIndex.Get(stacktraceId)
   254  				if ok {
   255  					s.values[index] = c.aggregation.fn(s.values[index], value)
   256  				} else {
   257  					s.stackTraceIds = append(s.stackTraceIds, stacktraceId)
   258  					s.values = append(s.values, value)
   259  					s.stackTraceIdToIndex.Put(stacktraceId, len(s.values)-1)
   260  				}
   261  				s.totalValue = c.aggregation.fn(s.totalValue, value)
   262  			}
   263  			sourceSampleCount = len(values)
   264  		})
   265  		row.ForAnnotations(func(keys []parquet.Value, values []parquet.Value) {
   266  			for i := 0; i < len(keys); i++ {
   267  				key := keys[i].String()
   268  				value := values[i].String()
   269  				s.annotations.Keys = append(s.annotations.Keys, key)
   270  				s.annotations.Values = append(s.annotations.Values, value)
   271  			}
   272  		})
   273  	}
   274  	inputSamplesHistogram.Observe(float64(sourceSampleCount))
   275  	return nil
   276  }
   277  
   278  func (d *Downsampler) Close() error {
   279  	for i, c := range configs {
   280  		if len(d.states[i].currentRow) > 0 {
   281  			err := d.flush(d.states[i], d.profileWriters[i], c)
   282  			if err != nil {
   283  				return err
   284  			}
   285  		}
   286  		err := d.profileWriters[i].Close()
   287  		if err != nil {
   288  			return err
   289  		}
   290  	}
   291  	return nil
   292  }
   293  
   294  func (s *state) init(row schemav1.ProfileRow, aggregationTime int64, fp model.Fingerprint) {
   295  	s.currentTime = aggregationTime
   296  	s.currentFp = fp
   297  	s.currentPartition = row.StacktracePartitionID()
   298  	s.totalValue = 0
   299  	s.profileCount = 0
   300  	if s.values == nil {
   301  		s.values = make([]int64, 0, len(row))
   302  	} else {
   303  		s.values = s.values[:0]
   304  	}
   305  	if s.stackTraceIds == nil {
   306  		s.stackTraceIds = make([]uint64, 0, len(row))
   307  	} else {
   308  		s.stackTraceIds = s.stackTraceIds[:0]
   309  	}
   310  	if s.stackTraceIdToIndex == nil {
   311  		s.stackTraceIdToIndex = swiss.NewMap[uint64, int](uint32(len(row)))
   312  	} else {
   313  		s.stackTraceIdToIndex.Clear()
   314  	}
   315  	if s.annotations.Keys == nil {
   316  		s.annotations.Keys = make([]string, 0)
   317  		s.annotations.Values = make([]string, 0)
   318  	} else {
   319  		s.annotations.Keys = s.annotations.Keys[:0]
   320  		s.annotations.Values = s.annotations.Values[:0]
   321  	}
   322  	var (
   323  		col    = -1
   324  		newCol = func() int {
   325  			col++
   326  			return col
   327  		}
   328  	)
   329  	if s.currentRow == nil {
   330  		s.currentRow = make(parquet.Row, 0, len(row)) // we might need to make this bigger
   331  	} else {
   332  		s.currentRow = s.currentRow[:0]
   333  	}
   334  	s.currentRow = append(s.currentRow, parquet.Int32Value(int32(row.SeriesIndex())).Level(0, 0, newCol()))
   335  	s.currentRow = append(s.currentRow, parquet.Int64Value(int64(row.StacktracePartitionID())).Level(0, 0, newCol()))
   336  }
   337  
   338  func (s *state) matches(t int64, fp model.Fingerprint, sp uint64) bool {
   339  	return s.currentTime == t && s.currentFp == fp && s.currentPartition == sp
   340  }