github.com/grafana/pyroscope@v1.18.0/pkg/symbolizer/symbolizer.go (about) 1 package symbolizer 2 3 import ( 4 "bytes" 5 "context" 6 "debug/elf" 7 "errors" 8 "flag" 9 "fmt" 10 "io" 11 "net/http" 12 "path/filepath" 13 "time" 14 15 "github.com/go-kit/log" 16 "github.com/go-kit/log/level" 17 "github.com/grafana/dskit/tenant" 18 "github.com/prometheus/client_golang/prometheus" 19 "golang.org/x/sync/errgroup" 20 21 googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 22 "github.com/grafana/pyroscope/lidia" 23 "github.com/grafana/pyroscope/pkg/objstore" 24 ) 25 26 type DebuginfodClient interface { 27 FetchDebuginfo(ctx context.Context, buildID string) (io.ReadCloser, error) 28 } 29 30 type Config struct { 31 DebuginfodURL string `yaml:"debuginfod_url"` 32 MaxDebuginfodConcurrency int `yaml:"max_debuginfod_concurrency" category:"advanced"` 33 } 34 35 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 36 f.StringVar(&cfg.DebuginfodURL, "symbolizer.debuginfod-url", "https://debuginfod.elfutils.org", "URL of the debuginfod server") 37 f.IntVar(&cfg.MaxDebuginfodConcurrency, "symbolizer.max-debuginfod-concurrency", 10, "Maximum number of concurrent symbolization requests to debuginfod server.") 38 } 39 40 func (cfg *Config) Validate() error { 41 if cfg.MaxDebuginfodConcurrency < 1 { 42 return fmt.Errorf("invalid max-debuginfod-concurrency value, must be positive") 43 } 44 return nil 45 } 46 47 type Symbolizer struct { 48 logger log.Logger 49 client DebuginfodClient 50 bucket objstore.Bucket 51 metrics *metrics 52 cfg Config 53 limits Limits 54 } 55 56 type ErrSymbolSizeBytesExceedsLimit struct { 57 Limit int64 58 } 59 60 func (e *ErrSymbolSizeBytesExceedsLimit) Error() string { 61 return fmt.Sprintf("symbol size exceeds maximum allowed size of %d bytes", e.Limit) 62 } 63 64 type Limits interface { 65 SymbolizerMaxSymbolSizeBytes(tenantID string) int 66 } 67 68 func New(logger log.Logger, cfg Config, reg prometheus.Registerer, bucket objstore.Bucket, limits Limits) (*Symbolizer, error) { 69 if err := cfg.Validate(); err != nil { 70 return nil, err 71 } 72 73 metrics := newMetrics(reg) 74 75 client, err := NewDebuginfodClient(logger, cfg.DebuginfodURL, metrics, limits) 76 if err != nil { 77 return nil, err 78 } 79 80 return &Symbolizer{ 81 logger: logger, 82 client: client, 83 bucket: bucket, 84 metrics: metrics, 85 cfg: cfg, 86 limits: limits, 87 }, nil 88 } 89 90 func (s *Symbolizer) SymbolizePprof(ctx context.Context, profile *googlev1.Profile) error { 91 start := time.Now() 92 status := statusSuccess 93 defer func() { 94 s.metrics.profileSymbolization.WithLabelValues(status).Observe(time.Since(start).Seconds()) 95 }() 96 97 mappingsToSymbolize := make(map[uint64]bool) 98 for i, mapping := range profile.Mapping { 99 if mapping.HasFunctions { 100 continue 101 } 102 mappingsToSymbolize[uint64(i+1)] = true 103 } 104 if len(mappingsToSymbolize) == 0 { 105 return nil 106 } 107 108 locationsByMapping, err := s.groupLocationsByMapping(profile, mappingsToSymbolize) 109 if err != nil { 110 return fmt.Errorf("grouping locations by mapping: %w", err) 111 } 112 113 stringMap := make(map[string]int64, len(profile.StringTable)) 114 for i, str := range profile.StringTable { 115 stringMap[str] = int64(i) 116 } 117 118 allSymbolizedLocs, err := s.symbolizeMappingsConcurrently(ctx, profile, locationsByMapping) 119 if err != nil { 120 return fmt.Errorf("symbolizing mappings: %w", err) 121 } 122 123 s.updateAllSymbolsInProfile(profile, allSymbolizedLocs, stringMap) 124 125 return nil 126 } 127 128 // symbolizeMappingsConcurrently symbolizes multiple mappings concurrently with a concurrency limit. 129 func (s *Symbolizer) symbolizeMappingsConcurrently( 130 ctx context.Context, 131 profile *googlev1.Profile, 132 locationsByMapping map[uint64][]*googlev1.Location, 133 ) ([]symbolizedLocation, error) { 134 maxConcurrency := s.cfg.MaxDebuginfodConcurrency 135 if maxConcurrency <= 0 { 136 maxConcurrency = 10 137 } 138 139 type mappingJob struct { 140 mappingID uint64 141 locations []*googlev1.Location 142 } 143 144 type mappingResult struct { 145 mappingID uint64 146 locations []symbolizedLocation 147 } 148 149 totalLocs := 0 150 jobs := make(chan mappingJob, len(locationsByMapping)) 151 for mappingID, locations := range locationsByMapping { 152 totalLocs += len(locations) 153 jobs <- mappingJob{mappingID: mappingID, locations: locations} 154 } 155 close(jobs) 156 157 // Process jobs concurrently with errgroup for proper error handling 158 g, ctx := errgroup.WithContext(ctx) 159 g.SetLimit(maxConcurrency) 160 161 // Results channel with buffer to avoid blocking jobs 162 results := make(chan mappingResult, len(locationsByMapping)) 163 164 for job := range jobs { 165 job := job 166 g.Go(func() error { 167 mapping := profile.Mapping[job.mappingID-1] 168 169 binaryName, err := s.extractBinaryName(profile, mapping) 170 if err != nil { 171 return fmt.Errorf("extract binary name for mapping %d: %w", job.mappingID, err) 172 } 173 174 buildID, err := s.extractBuildID(profile, mapping) 175 if err != nil { 176 return fmt.Errorf("extract build ID for mapping %d: %w", job.mappingID, err) 177 } 178 179 req := s.createSymbolizationRequest(binaryName, buildID, job.locations) 180 s.symbolize(ctx, &req) 181 182 // Collect symbolized locations for this mapping 183 symbolizedLocs := make([]symbolizedLocation, len(job.locations)) 184 for i, loc := range job.locations { 185 symbolizedLocs[i] = symbolizedLocation{ 186 loc: loc, 187 symLoc: req.locations[i], 188 mapping: mapping, 189 } 190 } 191 192 select { 193 case results <- mappingResult{mappingID: job.mappingID, locations: symbolizedLocs}: 194 case <-ctx.Done(): 195 return ctx.Err() 196 } 197 198 return nil 199 }) 200 } 201 202 err := g.Wait() 203 close(results) 204 205 if err != nil { 206 return nil, err 207 } 208 209 allSymbolizedLocs := make([]symbolizedLocation, 0, totalLocs) 210 for result := range results { 211 allSymbolizedLocs = append(allSymbolizedLocs, result.locations...) 212 } 213 214 return allSymbolizedLocs, nil 215 } 216 217 // groupLocationsByMapping groups locations by their mapping ID 218 func (s *Symbolizer) groupLocationsByMapping(profile *googlev1.Profile, mappingsToSymbolize map[uint64]bool) (map[uint64][]*googlev1.Location, error) { 219 locsByMapping := make(map[uint64][]*googlev1.Location) 220 221 for i, loc := range profile.Location { 222 if loc.MappingId == 0 { 223 return nil, fmt.Errorf("invalid profile: location at index %d has MappingId 0", i) 224 } 225 226 mappingIdx := loc.MappingId - 1 227 if int(mappingIdx) >= len(profile.Mapping) { 228 return nil, fmt.Errorf("invalid profile: location at index %d references non-existent mapping %d", i, loc.MappingId) 229 } 230 231 if !mappingsToSymbolize[loc.MappingId] { 232 continue 233 } 234 235 // Skip locations that already have symbols 236 if len(loc.Line) > 0 { 237 continue 238 } 239 240 locsByMapping[loc.MappingId] = append(locsByMapping[loc.MappingId], loc) 241 } 242 243 return locsByMapping, nil 244 } 245 246 // extractBinaryName extracts the binary name from the mapping 247 func (s *Symbolizer) extractBinaryName(profile *googlev1.Profile, mapping *googlev1.Mapping) (string, error) { 248 if mapping.Filename < 0 || int(mapping.Filename) >= len(profile.StringTable) { 249 return "", fmt.Errorf("invalid mapping: filename index %d out of range (string table length: %d)", 250 mapping.Filename, len(profile.StringTable)) 251 } 252 253 fullPath := profile.StringTable[mapping.Filename] 254 return filepath.Base(fullPath), nil 255 } 256 257 // extractBuildID extracts and sanitizes the build ID from the mapping 258 func (s *Symbolizer) extractBuildID(profile *googlev1.Profile, mapping *googlev1.Mapping) (string, error) { 259 buildID := profile.StringTable[mapping.BuildId] 260 sanitizedBuildID, err := sanitizeBuildID(buildID) 261 if err != nil { 262 level.Error(s.logger).Log("msg", "Invalid buildID", "buildID", buildID) 263 return "", err 264 } 265 266 return sanitizedBuildID, nil 267 } 268 269 // createSymbolizationRequest creates a symbolization request for a mapping group 270 func (s *Symbolizer) createSymbolizationRequest(binaryName, buildID string, locs []*googlev1.Location) request { 271 req := request{ 272 buildID: buildID, 273 binaryName: binaryName, 274 locations: make([]*location, len(locs)), 275 } 276 277 for i, loc := range locs { 278 req.locations[i] = &location{ 279 address: loc.Address, 280 } 281 } 282 283 return req 284 } 285 286 func (s *Symbolizer) updateAllSymbolsInProfile( 287 profile *googlev1.Profile, 288 symbolizedLocs []symbolizedLocation, 289 stringMap map[string]int64, 290 ) { 291 funcMap := make(map[funcKey]uint64) 292 maxFuncID := uint64(len(profile.Function)) 293 funcPtrMap := make(map[uint64]*googlev1.Function) 294 295 for _, item := range symbolizedLocs { 296 loc := item.loc 297 symLoc := item.symLoc 298 mapping := item.mapping 299 300 locIdx := loc.Id - 1 301 if loc.Id <= 0 || locIdx >= uint64(len(profile.Location)) { 302 continue 303 } 304 305 profile.Location[locIdx].Line = make([]*googlev1.Line, len(symLoc.lines)) 306 307 for j, line := range symLoc.lines { 308 nameIdx, ok := stringMap[line.FunctionName] 309 if !ok { 310 nameIdx = int64(len(profile.StringTable)) 311 profile.StringTable = append(profile.StringTable, line.FunctionName) 312 stringMap[line.FunctionName] = nameIdx 313 } 314 315 filenameIdx, ok := stringMap[line.FilePath] 316 if !ok { 317 filenameIdx = int64(len(profile.StringTable)) 318 profile.StringTable = append(profile.StringTable, line.FilePath) 319 stringMap[line.FilePath] = filenameIdx 320 } 321 322 key := funcKey{nameIdx, filenameIdx} 323 funcID, ok := funcMap[key] 324 if !ok { 325 maxFuncID++ 326 funcID = maxFuncID 327 fn := &googlev1.Function{ 328 Id: funcID, 329 Name: nameIdx, 330 Filename: filenameIdx, 331 StartLine: int64(line.LineNumber), 332 } 333 profile.Function = append(profile.Function, fn) 334 funcMap[key] = funcID 335 funcPtrMap[funcID] = fn 336 } else { 337 // Update StartLine to be the minimum line number seen for this function 338 if line.LineNumber > 0 { 339 if fn, ok := funcPtrMap[funcID]; ok { 340 currentStartLine := fn.StartLine 341 // 0 means "not set" in proto 342 if currentStartLine == 0 || int64(line.LineNumber) < currentStartLine { 343 fn.StartLine = int64(line.LineNumber) 344 } 345 } 346 } 347 } 348 349 profile.Location[locIdx].Line[j] = &googlev1.Line{ 350 FunctionId: funcID, 351 Line: int64(line.LineNumber), 352 } 353 } 354 355 mapping.HasFunctions = true 356 } 357 } 358 359 func (s *Symbolizer) symbolize(ctx context.Context, req *request) { 360 if req.buildID == "" { 361 s.metrics.debugSymbolResolutionErrors.WithLabelValues("empty_build_id").Inc() 362 s.setFallbackSymbols(req) 363 return 364 } 365 366 lidiaBytes, err := s.getLidiaBytes(ctx, req.buildID) 367 if err != nil { 368 level.Warn(s.logger).Log("msg", "Failed to get debug info", "buildID", req.buildID, "err", err) 369 s.setFallbackSymbols(req) 370 return 371 } 372 373 lidiaReader := NewReaderAtCloser(lidiaBytes) 374 table, err := lidia.OpenReader(lidiaReader, lidia.WithCRC()) 375 if err != nil { 376 s.metrics.debugSymbolResolutionErrors.WithLabelValues("lidia_error").Inc() 377 level.Warn(s.logger).Log("msg", "Failed to open Lidia file", "err", err) 378 s.setFallbackSymbols(req) 379 return 380 } 381 defer table.Close() 382 383 s.symbolizeWithTable(table, req) 384 } 385 386 // setFallbackSymbols sets fallback symbols for all locations in the request 387 func (s *Symbolizer) setFallbackSymbols(req *request) { 388 for _, loc := range req.locations { 389 loc.lines = s.createFallbackSymbol(req.binaryName, loc) 390 } 391 } 392 393 func (s *Symbolizer) symbolizeWithTable(table *lidia.Table, req *request) { 394 var framesBuf []lidia.SourceInfoFrame 395 396 resolveStart := time.Now() 397 defer func() { 398 s.metrics.debugSymbolResolution.WithLabelValues(statusSuccess).Observe(time.Since(resolveStart).Seconds()) 399 }() 400 401 for _, loc := range req.locations { 402 frames, err := table.Lookup(framesBuf, loc.address) 403 if err != nil { 404 loc.lines = s.createFallbackSymbol(req.binaryName, loc) 405 continue 406 } 407 408 if len(frames) == 0 { 409 loc.lines = s.createFallbackSymbol(req.binaryName, loc) 410 continue 411 } 412 413 loc.lines = frames 414 } 415 } 416 417 func (s *Symbolizer) getLidiaBytes(ctx context.Context, buildID string) ([]byte, error) { 418 if client, ok := s.client.(*DebuginfodHTTPClient); ok { 419 if sanitizedBuildID, err := sanitizeBuildID(buildID); err == nil { 420 if found, _ := client.notFoundCache.Get(sanitizedBuildID); found { 421 s.metrics.cacheOperations.WithLabelValues("not_found", "get", statusSuccess).Inc() 422 return nil, buildIDNotFoundError{buildID: buildID} 423 } 424 } 425 } 426 427 lidiaBytes, err := s.fetchLidiaFromObjectStore(ctx, buildID) 428 if err == nil { 429 s.metrics.cacheOperations.WithLabelValues("object_storage", "get", statusSuccess).Inc() 430 return lidiaBytes, nil 431 } 432 s.metrics.cacheOperations.WithLabelValues("object_storage", "get", "miss").Inc() 433 434 lidiaBytes, err = s.fetchLidiaFromDebuginfod(ctx, buildID) 435 if err != nil { 436 return nil, err 437 } 438 439 if err := s.bucket.Upload(ctx, buildID, bytes.NewReader(lidiaBytes)); err != nil { 440 level.Warn(s.logger).Log("msg", "Failed to store debug info in objstore", "buildID", buildID, "err", err) 441 s.metrics.cacheOperations.WithLabelValues("object_storage", "set", "error").Inc() 442 } else { 443 s.metrics.cacheOperations.WithLabelValues("object_storage", "set", statusSuccess).Inc() 444 } 445 446 return lidiaBytes, nil 447 } 448 449 // fetchLidiaFromObjectStore retrieves Lidia data from the object store 450 func (s *Symbolizer) fetchLidiaFromObjectStore(ctx context.Context, buildID string) ([]byte, error) { 451 objstoreReader, err := s.bucket.Get(ctx, buildID) 452 if err != nil { 453 return nil, err 454 } 455 defer objstoreReader.Close() 456 457 data, err := io.ReadAll(objstoreReader) 458 if err != nil { 459 return nil, fmt.Errorf("read content: %w", err) 460 } 461 462 return data, nil 463 } 464 465 // fetchLidiaFromDebuginfod fetches debug info from debuginfod and converts to Lidia format 466 func (s *Symbolizer) fetchLidiaFromDebuginfod(ctx context.Context, buildID string) ([]byte, error) { 467 debugReader, err := s.fetchFromDebuginfod(ctx, buildID) 468 if err != nil { 469 var bnfErr buildIDNotFoundError 470 if errors.As(err, &bnfErr) { 471 return nil, err 472 } 473 return nil, err 474 } 475 defer debugReader.Close() 476 477 elfData, err := io.ReadAll(debugReader) 478 if err != nil { 479 return nil, fmt.Errorf("read debuginfod data: %w", err) 480 } 481 482 tenantID, err := tenant.TenantID(ctx) 483 if err != nil { 484 return nil, err 485 } 486 487 lidiaData, err := s.processELFData(elfData, int64(s.limits.SymbolizerMaxSymbolSizeBytes(tenantID))) 488 if err != nil { 489 return nil, err 490 } 491 492 return lidiaData, nil 493 } 494 495 func (s *Symbolizer) fetchFromDebuginfod(ctx context.Context, buildID string) (io.ReadCloser, error) { 496 debugReader, err := s.client.FetchDebuginfo(ctx, buildID) 497 if err != nil { 498 var bnfErr buildIDNotFoundError 499 statusCode, isHTTPError := isHTTPStatusError(err) 500 501 if errors.As(err, &bnfErr) || (isHTTPError && statusCode == http.StatusNotFound) { 502 return nil, buildIDNotFoundError{buildID: buildID} 503 } 504 505 return nil, fmt.Errorf("fetch debuginfo: %w", err) 506 } 507 508 return debugReader, nil 509 } 510 511 func (s *Symbolizer) processELFData(data []byte, maxSize int64) (lidiaData []byte, err error) { 512 decompressedData, err := detectCompression(data, maxSize) 513 if err != nil { 514 s.metrics.debugSymbolResolutionErrors.WithLabelValues("compression_error").Inc() 515 return nil, fmt.Errorf("detect compression: %w", err) 516 } 517 518 reader := bytes.NewReader(decompressedData) 519 520 elfFile, err := elf.NewFile(reader) 521 if err != nil { 522 s.metrics.debugSymbolResolutionErrors.WithLabelValues("elf_parsing_error").Inc() 523 return nil, fmt.Errorf("parse ELF file: %w", err) 524 } 525 defer elfFile.Close() 526 527 initialSize := len(data) * 2 // A simple heuristic: twice the compressed size 528 memBuffer := newMemoryBuffer(initialSize) 529 530 err = lidia.CreateLidiaFromELF(elfFile, memBuffer, lidia.WithCRC(), lidia.WithFiles(), lidia.WithLines()) 531 if err != nil { 532 return nil, fmt.Errorf("create lidia file: %w", err) 533 } 534 535 return memBuffer.Bytes(), nil 536 } 537 538 func (s *Symbolizer) createFallbackSymbol(binaryName string, loc *location) []lidia.SourceInfoFrame { 539 prefix := "unknown" 540 if binaryName != "" { 541 prefix = binaryName 542 } 543 544 return []lidia.SourceInfoFrame{{ 545 FunctionName: fmt.Sprintf("%s!0x%x", prefix, loc.address), 546 LineNumber: 0, 547 }} 548 }