github.com/grafana/pyroscope@v1.18.0/pkg/symbolizer/symbolizer.go (about)

     1  package symbolizer
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"debug/elf"
     7  	"errors"
     8  	"flag"
     9  	"fmt"
    10  	"io"
    11  	"net/http"
    12  	"path/filepath"
    13  	"time"
    14  
    15  	"github.com/go-kit/log"
    16  	"github.com/go-kit/log/level"
    17  	"github.com/grafana/dskit/tenant"
    18  	"github.com/prometheus/client_golang/prometheus"
    19  	"golang.org/x/sync/errgroup"
    20  
    21  	googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    22  	"github.com/grafana/pyroscope/lidia"
    23  	"github.com/grafana/pyroscope/pkg/objstore"
    24  )
    25  
    26  type DebuginfodClient interface {
    27  	FetchDebuginfo(ctx context.Context, buildID string) (io.ReadCloser, error)
    28  }
    29  
    30  type Config struct {
    31  	DebuginfodURL            string `yaml:"debuginfod_url"`
    32  	MaxDebuginfodConcurrency int    `yaml:"max_debuginfod_concurrency" category:"advanced"`
    33  }
    34  
    35  func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
    36  	f.StringVar(&cfg.DebuginfodURL, "symbolizer.debuginfod-url", "https://debuginfod.elfutils.org", "URL of the debuginfod server")
    37  	f.IntVar(&cfg.MaxDebuginfodConcurrency, "symbolizer.max-debuginfod-concurrency", 10, "Maximum number of concurrent symbolization requests to debuginfod server.")
    38  }
    39  
    40  func (cfg *Config) Validate() error {
    41  	if cfg.MaxDebuginfodConcurrency < 1 {
    42  		return fmt.Errorf("invalid max-debuginfod-concurrency value, must be positive")
    43  	}
    44  	return nil
    45  }
    46  
    47  type Symbolizer struct {
    48  	logger  log.Logger
    49  	client  DebuginfodClient
    50  	bucket  objstore.Bucket
    51  	metrics *metrics
    52  	cfg     Config
    53  	limits  Limits
    54  }
    55  
    56  type ErrSymbolSizeBytesExceedsLimit struct {
    57  	Limit int64
    58  }
    59  
    60  func (e *ErrSymbolSizeBytesExceedsLimit) Error() string {
    61  	return fmt.Sprintf("symbol size exceeds maximum allowed size of %d bytes", e.Limit)
    62  }
    63  
    64  type Limits interface {
    65  	SymbolizerMaxSymbolSizeBytes(tenantID string) int
    66  }
    67  
    68  func New(logger log.Logger, cfg Config, reg prometheus.Registerer, bucket objstore.Bucket, limits Limits) (*Symbolizer, error) {
    69  	if err := cfg.Validate(); err != nil {
    70  		return nil, err
    71  	}
    72  
    73  	metrics := newMetrics(reg)
    74  
    75  	client, err := NewDebuginfodClient(logger, cfg.DebuginfodURL, metrics, limits)
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  
    80  	return &Symbolizer{
    81  		logger:  logger,
    82  		client:  client,
    83  		bucket:  bucket,
    84  		metrics: metrics,
    85  		cfg:     cfg,
    86  		limits:  limits,
    87  	}, nil
    88  }
    89  
    90  func (s *Symbolizer) SymbolizePprof(ctx context.Context, profile *googlev1.Profile) error {
    91  	start := time.Now()
    92  	status := statusSuccess
    93  	defer func() {
    94  		s.metrics.profileSymbolization.WithLabelValues(status).Observe(time.Since(start).Seconds())
    95  	}()
    96  
    97  	mappingsToSymbolize := make(map[uint64]bool)
    98  	for i, mapping := range profile.Mapping {
    99  		if mapping.HasFunctions {
   100  			continue
   101  		}
   102  		mappingsToSymbolize[uint64(i+1)] = true
   103  	}
   104  	if len(mappingsToSymbolize) == 0 {
   105  		return nil
   106  	}
   107  
   108  	locationsByMapping, err := s.groupLocationsByMapping(profile, mappingsToSymbolize)
   109  	if err != nil {
   110  		return fmt.Errorf("grouping locations by mapping: %w", err)
   111  	}
   112  
   113  	stringMap := make(map[string]int64, len(profile.StringTable))
   114  	for i, str := range profile.StringTable {
   115  		stringMap[str] = int64(i)
   116  	}
   117  
   118  	allSymbolizedLocs, err := s.symbolizeMappingsConcurrently(ctx, profile, locationsByMapping)
   119  	if err != nil {
   120  		return fmt.Errorf("symbolizing mappings: %w", err)
   121  	}
   122  
   123  	s.updateAllSymbolsInProfile(profile, allSymbolizedLocs, stringMap)
   124  
   125  	return nil
   126  }
   127  
   128  // symbolizeMappingsConcurrently symbolizes multiple mappings concurrently with a concurrency limit.
   129  func (s *Symbolizer) symbolizeMappingsConcurrently(
   130  	ctx context.Context,
   131  	profile *googlev1.Profile,
   132  	locationsByMapping map[uint64][]*googlev1.Location,
   133  ) ([]symbolizedLocation, error) {
   134  	maxConcurrency := s.cfg.MaxDebuginfodConcurrency
   135  	if maxConcurrency <= 0 {
   136  		maxConcurrency = 10
   137  	}
   138  
   139  	type mappingJob struct {
   140  		mappingID uint64
   141  		locations []*googlev1.Location
   142  	}
   143  
   144  	type mappingResult struct {
   145  		mappingID uint64
   146  		locations []symbolizedLocation
   147  	}
   148  
   149  	totalLocs := 0
   150  	jobs := make(chan mappingJob, len(locationsByMapping))
   151  	for mappingID, locations := range locationsByMapping {
   152  		totalLocs += len(locations)
   153  		jobs <- mappingJob{mappingID: mappingID, locations: locations}
   154  	}
   155  	close(jobs)
   156  
   157  	// Process jobs concurrently with errgroup for proper error handling
   158  	g, ctx := errgroup.WithContext(ctx)
   159  	g.SetLimit(maxConcurrency)
   160  
   161  	// Results channel with buffer to avoid blocking jobs
   162  	results := make(chan mappingResult, len(locationsByMapping))
   163  
   164  	for job := range jobs {
   165  		job := job
   166  		g.Go(func() error {
   167  			mapping := profile.Mapping[job.mappingID-1]
   168  
   169  			binaryName, err := s.extractBinaryName(profile, mapping)
   170  			if err != nil {
   171  				return fmt.Errorf("extract binary name for mapping %d: %w", job.mappingID, err)
   172  			}
   173  
   174  			buildID, err := s.extractBuildID(profile, mapping)
   175  			if err != nil {
   176  				return fmt.Errorf("extract build ID for mapping %d: %w", job.mappingID, err)
   177  			}
   178  
   179  			req := s.createSymbolizationRequest(binaryName, buildID, job.locations)
   180  			s.symbolize(ctx, &req)
   181  
   182  			// Collect symbolized locations for this mapping
   183  			symbolizedLocs := make([]symbolizedLocation, len(job.locations))
   184  			for i, loc := range job.locations {
   185  				symbolizedLocs[i] = symbolizedLocation{
   186  					loc:     loc,
   187  					symLoc:  req.locations[i],
   188  					mapping: mapping,
   189  				}
   190  			}
   191  
   192  			select {
   193  			case results <- mappingResult{mappingID: job.mappingID, locations: symbolizedLocs}:
   194  			case <-ctx.Done():
   195  				return ctx.Err()
   196  			}
   197  
   198  			return nil
   199  		})
   200  	}
   201  
   202  	err := g.Wait()
   203  	close(results)
   204  
   205  	if err != nil {
   206  		return nil, err
   207  	}
   208  
   209  	allSymbolizedLocs := make([]symbolizedLocation, 0, totalLocs)
   210  	for result := range results {
   211  		allSymbolizedLocs = append(allSymbolizedLocs, result.locations...)
   212  	}
   213  
   214  	return allSymbolizedLocs, nil
   215  }
   216  
   217  // groupLocationsByMapping groups locations by their mapping ID
   218  func (s *Symbolizer) groupLocationsByMapping(profile *googlev1.Profile, mappingsToSymbolize map[uint64]bool) (map[uint64][]*googlev1.Location, error) {
   219  	locsByMapping := make(map[uint64][]*googlev1.Location)
   220  
   221  	for i, loc := range profile.Location {
   222  		if loc.MappingId == 0 {
   223  			return nil, fmt.Errorf("invalid profile: location at index %d has MappingId 0", i)
   224  		}
   225  
   226  		mappingIdx := loc.MappingId - 1
   227  		if int(mappingIdx) >= len(profile.Mapping) {
   228  			return nil, fmt.Errorf("invalid profile: location at index %d references non-existent mapping %d", i, loc.MappingId)
   229  		}
   230  
   231  		if !mappingsToSymbolize[loc.MappingId] {
   232  			continue
   233  		}
   234  
   235  		// Skip locations that already have symbols
   236  		if len(loc.Line) > 0 {
   237  			continue
   238  		}
   239  
   240  		locsByMapping[loc.MappingId] = append(locsByMapping[loc.MappingId], loc)
   241  	}
   242  
   243  	return locsByMapping, nil
   244  }
   245  
   246  // extractBinaryName extracts the binary name from the mapping
   247  func (s *Symbolizer) extractBinaryName(profile *googlev1.Profile, mapping *googlev1.Mapping) (string, error) {
   248  	if mapping.Filename < 0 || int(mapping.Filename) >= len(profile.StringTable) {
   249  		return "", fmt.Errorf("invalid mapping: filename index %d out of range (string table length: %d)",
   250  			mapping.Filename, len(profile.StringTable))
   251  	}
   252  
   253  	fullPath := profile.StringTable[mapping.Filename]
   254  	return filepath.Base(fullPath), nil
   255  }
   256  
   257  // extractBuildID extracts and sanitizes the build ID from the mapping
   258  func (s *Symbolizer) extractBuildID(profile *googlev1.Profile, mapping *googlev1.Mapping) (string, error) {
   259  	buildID := profile.StringTable[mapping.BuildId]
   260  	sanitizedBuildID, err := sanitizeBuildID(buildID)
   261  	if err != nil {
   262  		level.Error(s.logger).Log("msg", "Invalid buildID", "buildID", buildID)
   263  		return "", err
   264  	}
   265  
   266  	return sanitizedBuildID, nil
   267  }
   268  
   269  // createSymbolizationRequest creates a symbolization request for a mapping group
   270  func (s *Symbolizer) createSymbolizationRequest(binaryName, buildID string, locs []*googlev1.Location) request {
   271  	req := request{
   272  		buildID:    buildID,
   273  		binaryName: binaryName,
   274  		locations:  make([]*location, len(locs)),
   275  	}
   276  
   277  	for i, loc := range locs {
   278  		req.locations[i] = &location{
   279  			address: loc.Address,
   280  		}
   281  	}
   282  
   283  	return req
   284  }
   285  
   286  func (s *Symbolizer) updateAllSymbolsInProfile(
   287  	profile *googlev1.Profile,
   288  	symbolizedLocs []symbolizedLocation,
   289  	stringMap map[string]int64,
   290  ) {
   291  	funcMap := make(map[funcKey]uint64)
   292  	maxFuncID := uint64(len(profile.Function))
   293  	funcPtrMap := make(map[uint64]*googlev1.Function)
   294  
   295  	for _, item := range symbolizedLocs {
   296  		loc := item.loc
   297  		symLoc := item.symLoc
   298  		mapping := item.mapping
   299  
   300  		locIdx := loc.Id - 1
   301  		if loc.Id <= 0 || locIdx >= uint64(len(profile.Location)) {
   302  			continue
   303  		}
   304  
   305  		profile.Location[locIdx].Line = make([]*googlev1.Line, len(symLoc.lines))
   306  
   307  		for j, line := range symLoc.lines {
   308  			nameIdx, ok := stringMap[line.FunctionName]
   309  			if !ok {
   310  				nameIdx = int64(len(profile.StringTable))
   311  				profile.StringTable = append(profile.StringTable, line.FunctionName)
   312  				stringMap[line.FunctionName] = nameIdx
   313  			}
   314  
   315  			filenameIdx, ok := stringMap[line.FilePath]
   316  			if !ok {
   317  				filenameIdx = int64(len(profile.StringTable))
   318  				profile.StringTable = append(profile.StringTable, line.FilePath)
   319  				stringMap[line.FilePath] = filenameIdx
   320  			}
   321  
   322  			key := funcKey{nameIdx, filenameIdx}
   323  			funcID, ok := funcMap[key]
   324  			if !ok {
   325  				maxFuncID++
   326  				funcID = maxFuncID
   327  				fn := &googlev1.Function{
   328  					Id:        funcID,
   329  					Name:      nameIdx,
   330  					Filename:  filenameIdx,
   331  					StartLine: int64(line.LineNumber),
   332  				}
   333  				profile.Function = append(profile.Function, fn)
   334  				funcMap[key] = funcID
   335  				funcPtrMap[funcID] = fn
   336  			} else {
   337  				// Update StartLine to be the minimum line number seen for this function
   338  				if line.LineNumber > 0 {
   339  					if fn, ok := funcPtrMap[funcID]; ok {
   340  						currentStartLine := fn.StartLine
   341  						// 0 means "not set" in proto
   342  						if currentStartLine == 0 || int64(line.LineNumber) < currentStartLine {
   343  							fn.StartLine = int64(line.LineNumber)
   344  						}
   345  					}
   346  				}
   347  			}
   348  
   349  			profile.Location[locIdx].Line[j] = &googlev1.Line{
   350  				FunctionId: funcID,
   351  				Line:       int64(line.LineNumber),
   352  			}
   353  		}
   354  
   355  		mapping.HasFunctions = true
   356  	}
   357  }
   358  
   359  func (s *Symbolizer) symbolize(ctx context.Context, req *request) {
   360  	if req.buildID == "" {
   361  		s.metrics.debugSymbolResolutionErrors.WithLabelValues("empty_build_id").Inc()
   362  		s.setFallbackSymbols(req)
   363  		return
   364  	}
   365  
   366  	lidiaBytes, err := s.getLidiaBytes(ctx, req.buildID)
   367  	if err != nil {
   368  		level.Warn(s.logger).Log("msg", "Failed to get debug info", "buildID", req.buildID, "err", err)
   369  		s.setFallbackSymbols(req)
   370  		return
   371  	}
   372  
   373  	lidiaReader := NewReaderAtCloser(lidiaBytes)
   374  	table, err := lidia.OpenReader(lidiaReader, lidia.WithCRC())
   375  	if err != nil {
   376  		s.metrics.debugSymbolResolutionErrors.WithLabelValues("lidia_error").Inc()
   377  		level.Warn(s.logger).Log("msg", "Failed to open Lidia file", "err", err)
   378  		s.setFallbackSymbols(req)
   379  		return
   380  	}
   381  	defer table.Close()
   382  
   383  	s.symbolizeWithTable(table, req)
   384  }
   385  
   386  // setFallbackSymbols sets fallback symbols for all locations in the request
   387  func (s *Symbolizer) setFallbackSymbols(req *request) {
   388  	for _, loc := range req.locations {
   389  		loc.lines = s.createFallbackSymbol(req.binaryName, loc)
   390  	}
   391  }
   392  
   393  func (s *Symbolizer) symbolizeWithTable(table *lidia.Table, req *request) {
   394  	var framesBuf []lidia.SourceInfoFrame
   395  
   396  	resolveStart := time.Now()
   397  	defer func() {
   398  		s.metrics.debugSymbolResolution.WithLabelValues(statusSuccess).Observe(time.Since(resolveStart).Seconds())
   399  	}()
   400  
   401  	for _, loc := range req.locations {
   402  		frames, err := table.Lookup(framesBuf, loc.address)
   403  		if err != nil {
   404  			loc.lines = s.createFallbackSymbol(req.binaryName, loc)
   405  			continue
   406  		}
   407  
   408  		if len(frames) == 0 {
   409  			loc.lines = s.createFallbackSymbol(req.binaryName, loc)
   410  			continue
   411  		}
   412  
   413  		loc.lines = frames
   414  	}
   415  }
   416  
   417  func (s *Symbolizer) getLidiaBytes(ctx context.Context, buildID string) ([]byte, error) {
   418  	if client, ok := s.client.(*DebuginfodHTTPClient); ok {
   419  		if sanitizedBuildID, err := sanitizeBuildID(buildID); err == nil {
   420  			if found, _ := client.notFoundCache.Get(sanitizedBuildID); found {
   421  				s.metrics.cacheOperations.WithLabelValues("not_found", "get", statusSuccess).Inc()
   422  				return nil, buildIDNotFoundError{buildID: buildID}
   423  			}
   424  		}
   425  	}
   426  
   427  	lidiaBytes, err := s.fetchLidiaFromObjectStore(ctx, buildID)
   428  	if err == nil {
   429  		s.metrics.cacheOperations.WithLabelValues("object_storage", "get", statusSuccess).Inc()
   430  		return lidiaBytes, nil
   431  	}
   432  	s.metrics.cacheOperations.WithLabelValues("object_storage", "get", "miss").Inc()
   433  
   434  	lidiaBytes, err = s.fetchLidiaFromDebuginfod(ctx, buildID)
   435  	if err != nil {
   436  		return nil, err
   437  	}
   438  
   439  	if err := s.bucket.Upload(ctx, buildID, bytes.NewReader(lidiaBytes)); err != nil {
   440  		level.Warn(s.logger).Log("msg", "Failed to store debug info in objstore", "buildID", buildID, "err", err)
   441  		s.metrics.cacheOperations.WithLabelValues("object_storage", "set", "error").Inc()
   442  	} else {
   443  		s.metrics.cacheOperations.WithLabelValues("object_storage", "set", statusSuccess).Inc()
   444  	}
   445  
   446  	return lidiaBytes, nil
   447  }
   448  
   449  // fetchLidiaFromObjectStore retrieves Lidia data from the object store
   450  func (s *Symbolizer) fetchLidiaFromObjectStore(ctx context.Context, buildID string) ([]byte, error) {
   451  	objstoreReader, err := s.bucket.Get(ctx, buildID)
   452  	if err != nil {
   453  		return nil, err
   454  	}
   455  	defer objstoreReader.Close()
   456  
   457  	data, err := io.ReadAll(objstoreReader)
   458  	if err != nil {
   459  		return nil, fmt.Errorf("read content: %w", err)
   460  	}
   461  
   462  	return data, nil
   463  }
   464  
   465  // fetchLidiaFromDebuginfod fetches debug info from debuginfod and converts to Lidia format
   466  func (s *Symbolizer) fetchLidiaFromDebuginfod(ctx context.Context, buildID string) ([]byte, error) {
   467  	debugReader, err := s.fetchFromDebuginfod(ctx, buildID)
   468  	if err != nil {
   469  		var bnfErr buildIDNotFoundError
   470  		if errors.As(err, &bnfErr) {
   471  			return nil, err
   472  		}
   473  		return nil, err
   474  	}
   475  	defer debugReader.Close()
   476  
   477  	elfData, err := io.ReadAll(debugReader)
   478  	if err != nil {
   479  		return nil, fmt.Errorf("read debuginfod data: %w", err)
   480  	}
   481  
   482  	tenantID, err := tenant.TenantID(ctx)
   483  	if err != nil {
   484  		return nil, err
   485  	}
   486  
   487  	lidiaData, err := s.processELFData(elfData, int64(s.limits.SymbolizerMaxSymbolSizeBytes(tenantID)))
   488  	if err != nil {
   489  		return nil, err
   490  	}
   491  
   492  	return lidiaData, nil
   493  }
   494  
   495  func (s *Symbolizer) fetchFromDebuginfod(ctx context.Context, buildID string) (io.ReadCloser, error) {
   496  	debugReader, err := s.client.FetchDebuginfo(ctx, buildID)
   497  	if err != nil {
   498  		var bnfErr buildIDNotFoundError
   499  		statusCode, isHTTPError := isHTTPStatusError(err)
   500  
   501  		if errors.As(err, &bnfErr) || (isHTTPError && statusCode == http.StatusNotFound) {
   502  			return nil, buildIDNotFoundError{buildID: buildID}
   503  		}
   504  
   505  		return nil, fmt.Errorf("fetch debuginfo: %w", err)
   506  	}
   507  
   508  	return debugReader, nil
   509  }
   510  
   511  func (s *Symbolizer) processELFData(data []byte, maxSize int64) (lidiaData []byte, err error) {
   512  	decompressedData, err := detectCompression(data, maxSize)
   513  	if err != nil {
   514  		s.metrics.debugSymbolResolutionErrors.WithLabelValues("compression_error").Inc()
   515  		return nil, fmt.Errorf("detect compression: %w", err)
   516  	}
   517  
   518  	reader := bytes.NewReader(decompressedData)
   519  
   520  	elfFile, err := elf.NewFile(reader)
   521  	if err != nil {
   522  		s.metrics.debugSymbolResolutionErrors.WithLabelValues("elf_parsing_error").Inc()
   523  		return nil, fmt.Errorf("parse ELF file: %w", err)
   524  	}
   525  	defer elfFile.Close()
   526  
   527  	initialSize := len(data) * 2 // A simple heuristic: twice the compressed size
   528  	memBuffer := newMemoryBuffer(initialSize)
   529  
   530  	err = lidia.CreateLidiaFromELF(elfFile, memBuffer, lidia.WithCRC(), lidia.WithFiles(), lidia.WithLines())
   531  	if err != nil {
   532  		return nil, fmt.Errorf("create lidia file: %w", err)
   533  	}
   534  
   535  	return memBuffer.Bytes(), nil
   536  }
   537  
   538  func (s *Symbolizer) createFallbackSymbol(binaryName string, loc *location) []lidia.SourceInfoFrame {
   539  	prefix := "unknown"
   540  	if binaryName != "" {
   541  		prefix = binaryName
   542  	}
   543  
   544  	return []lidia.SourceInfoFrame{{
   545  		FunctionName: fmt.Sprintf("%s!0x%x", prefix, loc.address),
   546  		LineNumber:   0,
   547  	}}
   548  }