github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/block_querier_symbols.go (about)

     1  package phlaredb
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"strings"
     9  
    10  	"github.com/grafana/dskit/multierror"
    11  	"github.com/grafana/dskit/runutil"
    12  	"github.com/parquet-go/parquet-go"
    13  	"golang.org/x/sync/errgroup"
    14  
    15  	"github.com/grafana/pyroscope/pkg/iter"
    16  	phlareobj "github.com/grafana/pyroscope/pkg/objstore"
    17  	parquetobj "github.com/grafana/pyroscope/pkg/objstore/parquet"
    18  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    19  	"github.com/grafana/pyroscope/pkg/phlaredb/query"
    20  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    21  	"github.com/grafana/pyroscope/pkg/phlaredb/symdb"
    22  	"github.com/grafana/pyroscope/pkg/util"
    23  )
    24  
    25  // TODO(kolesnikovae) Decouple from phlaredb and refactor to symdb/compat.
    26  
    27  type symbolsResolver interface {
    28  	symdb.SymbolsReader
    29  	io.Closer
    30  }
    31  
    32  type symbolsResolverV1 struct {
    33  	stacktraces  parquetReader[*schemav1.StacktracePersister]
    34  	bucketReader phlareobj.Bucket
    35  	*inMemoryParquetTables
    36  }
    37  
    38  func newSymbolsResolverV1(ctx context.Context, bucketReader phlareobj.Bucket, meta *block.Meta) (*symbolsResolverV1, error) {
    39  	r := &symbolsResolverV1{bucketReader: bucketReader}
    40  	p := r.stacktraces.relPath()
    41  	for _, f := range meta.Files {
    42  		if f.RelPath == p {
    43  			r.stacktraces.meta = f
    44  			break
    45  		}
    46  	}
    47  	var err error
    48  	if err = r.stacktraces.open(ctx, r.bucketReader); err != nil {
    49  		return nil, err
    50  	}
    51  	r.inMemoryParquetTables, err = openInMemoryParquetTables(ctx, bucketReader, meta)
    52  	return r, err
    53  }
    54  
    55  func (r *symbolsResolverV1) Close() error {
    56  	return multierror.New(
    57  		r.stacktraces.Close(),
    58  		r.inMemoryParquetTables.Close()).
    59  		Err()
    60  }
    61  
    62  func (r *symbolsResolverV1) Partition(_ context.Context, _ uint64) (symdb.PartitionReader, error) {
    63  	p := symbolsPartition{
    64  		stats: symdb.PartitionStats{
    65  			StacktracesTotal: int(r.stacktraces.file.NumRows()),
    66  			MaxStacktraceID:  int(r.stacktraces.file.NumRows()),
    67  			LocationsTotal:   len(r.locations.cache),
    68  			MappingsTotal:    len(r.mappings.cache),
    69  			FunctionsTotal:   len(r.functions.cache),
    70  			StringsTotal:     len(r.strings.cache),
    71  		},
    72  		symbols: &symdb.Symbols{
    73  			Stacktraces: stacktraceResolverV1{r: r},
    74  			Locations:   r.locations.cache,
    75  			Mappings:    r.mappings.cache,
    76  			Functions:   r.functions.cache,
    77  			Strings:     r.strings.cache,
    78  		},
    79  	}
    80  	return &p, nil
    81  }
    82  
    83  type stacktraceResolverV1 struct{ r *symbolsResolverV1 }
    84  
    85  func (r stacktraceResolverV1) ResolveStacktraceLocations(ctx context.Context, dst symdb.StacktraceInserter, stacktraces []uint32) error {
    86  	column, err := schemav1.ResolveColumnByPath(r.r.stacktraces.file.Schema(), strings.Split("LocationIDs.list.element", "."))
    87  	if err != nil {
    88  		return err
    89  	}
    90  	it := query.NewRepeatedRowIterator(ctx, iter.NewSliceIterator(stacktraces), r.r.stacktraces.file.RowGroups(), column.ColumnIndex)
    91  	defer runutil.CloseWithErrCapture(&err, it, "failed to close stack trace stream")
    92  	t := make([]int32, 0, 64)
    93  	for it.Next() {
    94  		s := it.At()
    95  		t = grow(t, len(s.Values))
    96  		for i, v := range s.Values {
    97  			t[i] = v[0].Int32()
    98  		}
    99  		dst.InsertStacktrace(s.Row, t)
   100  	}
   101  	return it.Err()
   102  }
   103  
   104  func (r stacktraceResolverV1) LookupLocations(_ []uint64, _ uint32) []uint64 {
   105  	// NOTE(kolesnikovae): This API is not supported.
   106  	// Despite the fact that this could be implemented,
   107  	// practically this is not viable.
   108  	//
   109  	// The method is only implemented to satisfy the
   110  	// StacktraceResolver interface and must not be used.
   111  	return nil
   112  }
   113  
   114  func grow[T any](s []T, n int) []T {
   115  	if cap(s) < n {
   116  		return make([]T, n, 2*n)
   117  	}
   118  	return s[:n]
   119  }
   120  
   121  type symbolsResolverV2 struct {
   122  	symbols *symdb.Reader
   123  	bucket  phlareobj.Bucket
   124  	*inMemoryParquetTables
   125  }
   126  
   127  func newSymbolsResolverV2(ctx context.Context, b phlareobj.Bucket, meta *block.Meta) (*symbolsResolverV2, error) {
   128  	r := symbolsResolverV2{bucket: b}
   129  	var err error
   130  	if r.symbols, err = symdb.Open(ctx, b, meta); err != nil {
   131  		return nil, err
   132  	}
   133  	r.inMemoryParquetTables, err = openInMemoryParquetTables(ctx, b, meta)
   134  	return &r, err
   135  }
   136  
   137  func (r *symbolsResolverV2) Close() error {
   138  	err := multierror.New()
   139  	if r.symbols != nil {
   140  		err.Add(r.symbols.Close())
   141  	}
   142  	if r.inMemoryParquetTables != nil {
   143  		err.Add(r.inMemoryParquetTables.Close())
   144  	}
   145  	return err.Err()
   146  }
   147  
   148  func (r *symbolsResolverV2) Partition(ctx context.Context, partition uint64) (symdb.PartitionReader, error) {
   149  	sr, err := r.symbols.Partition(ctx, partition)
   150  	if err != nil {
   151  		return nil, err
   152  	}
   153  	var t symdb.PartitionStats
   154  	sr.WriteStats(&t)
   155  	s := sr.Symbols()
   156  	p := symbolsPartition{
   157  		stats: symdb.PartitionStats{
   158  			StacktracesTotal: t.StacktracesTotal,
   159  			MaxStacktraceID:  t.MaxStacktraceID,
   160  			LocationsTotal:   len(s.Locations),
   161  			MappingsTotal:    len(s.Mappings),
   162  			FunctionsTotal:   len(s.Functions),
   163  			StringsTotal:     len(s.Strings),
   164  		},
   165  		symbols: &symdb.Symbols{
   166  			Stacktraces: s.Stacktraces,
   167  			Locations:   r.locations.cache,
   168  			Mappings:    r.mappings.cache,
   169  			Functions:   r.functions.cache,
   170  			Strings:     r.strings.cache,
   171  		},
   172  		release: sr.Release,
   173  	}
   174  	return &p, nil
   175  }
   176  
   177  type symbolsPartition struct {
   178  	stats   symdb.PartitionStats
   179  	symbols *symdb.Symbols
   180  	release func()
   181  }
   182  
   183  func (p *symbolsPartition) Symbols() *symdb.Symbols { return p.symbols }
   184  
   185  func (p *symbolsPartition) WriteStats(stats *symdb.PartitionStats) { *stats = p.stats }
   186  
   187  func (p *symbolsPartition) Release() {
   188  	if p.release != nil {
   189  		p.release()
   190  	}
   191  }
   192  
   193  type inMemoryParquetTables struct {
   194  	strings   inMemoryparquetReader[string, schemav1.StringPersister]
   195  	functions inMemoryparquetReader[schemav1.InMemoryFunction, schemav1.FunctionPersister]
   196  	locations inMemoryparquetReader[schemav1.InMemoryLocation, schemav1.LocationPersister]
   197  	mappings  inMemoryparquetReader[schemav1.InMemoryMapping, schemav1.MappingPersister]
   198  }
   199  
   200  func openInMemoryParquetTables(ctx context.Context, r phlareobj.BucketReader, meta *block.Meta) (*inMemoryParquetTables, error) {
   201  	var t inMemoryParquetTables
   202  	for _, f := range meta.Files {
   203  		switch f.RelPath {
   204  		case t.locations.relPath():
   205  			t.locations.meta = f
   206  		case t.functions.relPath():
   207  			t.functions.meta = f
   208  		case t.mappings.relPath():
   209  			t.mappings.meta = f
   210  		case t.strings.relPath():
   211  			t.strings.meta = f
   212  		}
   213  	}
   214  	g, ctx := errgroup.WithContext(ctx)
   215  	g.Go(func() error { return t.locations.open(ctx, r) })
   216  	g.Go(func() error { return t.mappings.open(ctx, r) })
   217  	g.Go(func() error { return t.functions.open(ctx, r) })
   218  	g.Go(func() error { return t.strings.open(ctx, r) })
   219  	return &t, g.Wait()
   220  }
   221  
   222  func (t *inMemoryParquetTables) Close() error {
   223  	return multierror.New(
   224  		t.strings.Close(),
   225  		t.functions.Close(),
   226  		t.locations.Close(),
   227  		t.mappings.Close()).
   228  		Err()
   229  }
   230  
   231  type ResultWithRowNum[M any] struct {
   232  	Result M
   233  	RowNum int64
   234  }
   235  
   236  type inMemoryparquetReader[M schemav1.Models, P schemav1.Persister[M]] struct {
   237  	persister P
   238  	meta      block.File
   239  	cache     []M
   240  }
   241  
   242  func (r *inMemoryparquetReader[M, P]) open(ctx context.Context, bucketReader phlareobj.BucketReader) error {
   243  	var file parquetobj.File
   244  	if err := file.Open(
   245  		ctx,
   246  		bucketReader,
   247  		r.meta,
   248  		parquet.SkipBloomFilters(true), // we don't use bloom filters
   249  		parquet.FileReadMode(parquet.ReadModeAsync),
   250  		parquet.ReadBufferSize(parquetReadBufferSize),
   251  	); err != nil {
   252  		return err
   253  	}
   254  
   255  	// read all rows into memory
   256  	r.cache = make([]M, file.NumRows())
   257  	var offset int64
   258  	for _, rg := range file.RowGroups() {
   259  		rows := rg.NumRows()
   260  		dst := r.cache[offset : offset+rows]
   261  		offset += rows
   262  		if err := r.readRG(dst, rg); err != nil {
   263  			return fmt.Errorf("reading row group from parquet file '%s': %w", file.Path(), err)
   264  		}
   265  	}
   266  	return file.Close()
   267  }
   268  
   269  // parquet.CopyRows uses hardcoded buffer size:
   270  // defaultRowBufferSize = 42
   271  const inMemoryReaderRowsBufSize = 1 << 10
   272  
   273  func (r *inMemoryparquetReader[M, P]) readRG(dst []M, rg parquet.RowGroup) (err error) {
   274  	rr := parquet.NewRowGroupReader(rg)
   275  	defer runutil.CloseWithLogOnErr(util.Logger, rr, "closing parquet row group reader")
   276  	buf := make([]parquet.Row, inMemoryReaderRowsBufSize)
   277  	for i := 0; i < len(dst); {
   278  		n, err := rr.ReadRows(buf)
   279  		if n > 0 {
   280  			for _, row := range buf[:n] {
   281  				v, err := r.persister.Reconstruct(row)
   282  				if err != nil {
   283  					return err
   284  				}
   285  				dst[i] = v
   286  				i++
   287  			}
   288  		}
   289  		if err != nil {
   290  			if errors.Is(err, io.EOF) {
   291  				return nil
   292  			}
   293  			return err
   294  		}
   295  	}
   296  	return nil
   297  }
   298  
   299  func (r *inMemoryparquetReader[M, P]) Close() error {
   300  	r.cache = nil
   301  	return nil
   302  }
   303  
   304  func (r *inMemoryparquetReader[M, P]) relPath() string {
   305  	return r.persister.Name() + block.ParquetSuffix
   306  }