github.com/grafana/pyroscope@v1.18.0/pkg/symbolizer/debuginfod_client.go (about) 1 package symbolizer 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 "io" 9 "net" 10 "net/http" 11 "os" 12 "regexp" 13 "time" 14 15 "github.com/go-kit/log" 16 "github.com/grafana/dskit/backoff" 17 "github.com/grafana/dskit/tenant" 18 "golang.org/x/sync/singleflight" 19 20 "github.com/dgraph-io/ristretto/v2" 21 ) 22 23 // DebuginfodClientConfig holds configuration for the debuginfod client. 24 type DebuginfodClientConfig struct { 25 BaseURL string 26 HTTPClient *http.Client 27 BackoffConfig backoff.Config 28 UserAgent string 29 30 NotFoundCacheMaxItems int64 31 NotFoundCacheTTL time.Duration 32 } 33 34 // DebuginfodHTTPClient implements the DebuginfodClient interface using HTTP. 35 type DebuginfodHTTPClient struct { 36 cfg DebuginfodClientConfig 37 metrics *metrics 38 logger log.Logger 39 limits Limits 40 41 // Used to deduplicate concurrent requests for the same build ID 42 group singleflight.Group 43 44 notFoundCache *ristretto.Cache[string, bool] 45 } 46 47 // NewDebuginfodClient creates a new client for fetching debug information from a debuginfod server. 48 func NewDebuginfodClient(logger log.Logger, baseURL string, metrics *metrics, limits Limits) (*DebuginfodHTTPClient, error) { 49 return NewDebuginfodClientWithConfig(logger, DebuginfodClientConfig{ 50 BaseURL: baseURL, 51 //UserAgent: "Pyroscope-Symbolizer/1.0", 52 BackoffConfig: backoff.Config{ 53 MinBackoff: 1 * time.Second, 54 MaxBackoff: 10 * time.Second, 55 MaxRetries: 3, 56 }, 57 NotFoundCacheMaxItems: 100000, 58 NotFoundCacheTTL: 7 * 24 * time.Hour, 59 }, metrics, limits) 60 } 61 62 // NewDebuginfodClientWithConfig creates a new client with the specified configuration. 63 func NewDebuginfodClientWithConfig(logger log.Logger, cfg DebuginfodClientConfig, metrics *metrics, limits Limits) (*DebuginfodHTTPClient, error) { 64 httpClient := cfg.HTTPClient 65 if httpClient == nil { 66 transport := &http.Transport{ 67 MaxIdleConnsPerHost: 10, 68 IdleConnTimeout: 90 * time.Second, 69 TLSHandshakeTimeout: 10 * time.Second, 70 } 71 72 httpClient = &http.Client{ 73 Transport: transport, 74 Timeout: 120 * time.Second, 75 CheckRedirect: func(req *http.Request, via []*http.Request) error { 76 if len(via) >= 3 { 77 return fmt.Errorf("stopped after 3 redirects") 78 } 79 return nil 80 }, 81 } 82 } 83 84 cache, err := ristretto.NewCache(&ristretto.Config[string, bool]{ 85 NumCounters: cfg.NotFoundCacheMaxItems * 10, 86 MaxCost: cfg.NotFoundCacheMaxItems, 87 BufferItems: 64, 88 }) 89 if err != nil { 90 return nil, fmt.Errorf("failed to create not-found cache: %w", err) 91 } 92 93 client := &DebuginfodHTTPClient{ 94 cfg: DebuginfodClientConfig{ 95 BaseURL: cfg.BaseURL, 96 UserAgent: cfg.UserAgent, 97 HTTPClient: httpClient, 98 BackoffConfig: cfg.BackoffConfig, 99 }, 100 metrics: metrics, 101 logger: logger, 102 notFoundCache: cache, 103 limits: limits, 104 } 105 106 return client, nil 107 } 108 109 // FetchDebuginfo fetches the debuginfo file for a specific build ID. 110 func (c *DebuginfodHTTPClient) FetchDebuginfo(ctx context.Context, buildID string) (io.ReadCloser, error) { 111 start := time.Now() 112 status := statusSuccess 113 defer func() { 114 c.metrics.debuginfodRequestDuration.WithLabelValues(status).Observe(time.Since(start).Seconds()) 115 }() 116 117 sanitizedBuildID, err := sanitizeBuildID(buildID) 118 if err != nil { 119 status = statusErrorInvalidID 120 return nil, err 121 } 122 123 if found, _ := c.notFoundCache.Get(sanitizedBuildID); found { 124 status = statusErrorNotFound 125 c.metrics.cacheOperations.WithLabelValues("not_found", "get", statusSuccess).Inc() 126 return nil, buildIDNotFoundError{buildID: sanitizedBuildID} 127 } 128 c.metrics.cacheOperations.WithLabelValues("not_found", "get", "miss").Inc() 129 130 v, err, _ := c.group.Do(sanitizedBuildID, func() (interface{}, error) { 131 return c.fetchDebugInfoWithRetries(ctx, sanitizedBuildID) 132 }) 133 134 if err != nil { 135 var bnfErr buildIDNotFoundError 136 switch { 137 case errors.As(err, &bnfErr): 138 status = statusErrorNotFound 139 case errors.Is(err, context.Canceled): 140 status = statusErrorCanceled 141 case errors.Is(err, context.DeadlineExceeded): 142 status = statusErrorTimeout 143 case isInvalidBuildIDError(err): 144 status = statusErrorInvalidID 145 default: 146 if statusCode, ok := isHTTPStatusError(err); ok { 147 status = categorizeHTTPStatusCode(statusCode) 148 } else { 149 status = statusErrorOther 150 } 151 } 152 return nil, err 153 } 154 155 data := v.([]byte) 156 return io.NopCloser(bytes.NewReader(data)), nil 157 } 158 159 // doRequest performs an HTTP request to the specified URL and returns the response body. 160 func (c *DebuginfodHTTPClient) doRequest(ctx context.Context, url string) ([]byte, error) { 161 tenantID, err := tenant.TenantID(ctx) 162 if err != nil { 163 return nil, err 164 } 165 166 maxBytes := int64(c.limits.SymbolizerMaxSymbolSizeBytes(tenantID)) 167 168 req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) 169 if err != nil { 170 return nil, fmt.Errorf("failed to create request: %w", err) 171 } 172 173 req.Header.Set("Accept-Encoding", "gzip, deflate") 174 if c.cfg.UserAgent != "" { 175 req.Header.Set("User-Agent", c.cfg.UserAgent) 176 } 177 178 resp, err := c.cfg.HTTPClient.Do(req) 179 if err != nil { 180 return nil, fmt.Errorf("failed to execute request: %w", err) 181 } 182 defer resp.Body.Close() 183 184 var r io.Reader = resp.Body 185 // maxBytes == 0 means unlimited body size 186 if maxBytes > 0 { 187 r = io.LimitReader(r, maxBytes+1) // +1 to detect if limit is exceeded 188 } 189 190 data, err := io.ReadAll(r) 191 if err != nil { 192 return nil, fmt.Errorf("failed to read response body: %w", err) 193 } 194 195 // Check if we hit the size limit 196 if maxBytes > 0 && int64(len(data)) > maxBytes { 197 return nil, &ErrSymbolSizeBytesExceedsLimit{Limit: maxBytes} 198 } 199 200 if resp.StatusCode != http.StatusOK { 201 errorBody := string(data) 202 203 // Truncate large error responses 204 if len(errorBody) > 1000 { 205 errorBody = errorBody[:1000] + "... [truncated]" 206 } 207 return nil, httpStatusError{ 208 statusCode: resp.StatusCode, 209 body: errorBody, 210 } 211 } 212 213 c.metrics.debuginfodFileSize.Observe(float64(len(data))) 214 215 return data, nil 216 } 217 218 // fetchDebugInfoWithRetries attempts to fetch debug info with retries on transient errors. 219 func (c *DebuginfodHTTPClient) fetchDebugInfoWithRetries(ctx context.Context, sanitizedBuildID string) ([]byte, error) { 220 url := fmt.Sprintf("%s/buildid/%s/debuginfo", c.cfg.BaseURL, sanitizedBuildID) 221 var data []byte 222 223 // Use dskit backoff for retries with exponential backoff 224 backOff := backoff.New(ctx, c.cfg.BackoffConfig) 225 226 attempt := func() ([]byte, error) { 227 return c.doRequest(ctx, url) 228 } 229 230 var lastErr error 231 for backOff.Ongoing() { 232 data, err := attempt() 233 if err == nil { 234 return data, nil 235 } 236 237 // Don't retry on 404 errors 238 if statusCode, isHTTPErr := isHTTPStatusError(err); isHTTPErr && statusCode == http.StatusNotFound { 239 c.notFoundCache.SetWithTTL(sanitizedBuildID, true, 1, c.cfg.NotFoundCacheTTL) 240 c.notFoundCache.Wait() 241 c.metrics.cacheOperations.WithLabelValues("not_found", "set", statusSuccess).Inc() 242 c.metrics.cacheSizeBytes.WithLabelValues("not_found").Set(float64(c.notFoundCache.Metrics.CostAdded())) 243 return nil, buildIDNotFoundError{buildID: sanitizedBuildID} 244 } 245 246 lastErr = err 247 248 if !isRetryableError(err) { 249 break 250 } 251 252 backOff.Wait() 253 } 254 255 if lastErr != nil { 256 return nil, fmt.Errorf("failed to fetch debuginfo after %d attempts: %w", backOff.NumRetries(), lastErr) 257 } 258 259 return data, nil 260 } 261 262 // categorizeHTTPStatusCode maps HTTP status codes to metric status strings. 263 func categorizeHTTPStatusCode(statusCode int) string { 264 switch { 265 case statusCode == http.StatusNotFound: 266 return statusErrorNotFound 267 case statusCode == http.StatusUnauthorized || statusCode == http.StatusForbidden: 268 return statusErrorUnauthorized 269 case statusCode == http.StatusTooManyRequests: 270 return statusErrorRateLimited 271 case statusCode >= 400 && statusCode < 500: 272 return statusErrorClientError 273 case statusCode >= 500: 274 return statusErrorServerError 275 default: 276 return statusErrorHTTPOther 277 } 278 } 279 280 // isRetryableError determines if an error should trigger a retry attempt. 281 func isRetryableError(err error) bool { 282 if err == nil { 283 return false 284 } 285 286 if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { 287 return false 288 } 289 290 if isInvalidBuildIDError(err) { 291 return false 292 } 293 294 var bnfErr buildIDNotFoundError 295 if errors.As(err, &bnfErr) { 296 return false 297 } 298 299 if statusCode, ok := isHTTPStatusError(err); ok { 300 // Don't retry 4xx client errors except for 429 (too many requests) 301 if statusCode == http.StatusTooManyRequests { 302 return true 303 } 304 if statusCode >= 400 && statusCode < 500 { 305 return false 306 } 307 // Retry on 5xx server errors 308 return statusCode >= 500 309 } 310 311 if os.IsTimeout(err) { 312 return true 313 } 314 315 var netErr net.Error 316 if errors.As(err, &netErr) { 317 return netErr.Timeout() 318 } 319 320 return false 321 } 322 323 // sanitizeBuildID ensures that the buildID is a safe and valid string for use in file paths. 324 // It validates that the build ID contains only alphanumeric characters, underscores, and hyphens. 325 // This prevents potential security issues like path traversal attacks. 326 func sanitizeBuildID(buildID string) (string, error) { 327 if buildID == "" { 328 return "", nil 329 } 330 331 validBuildID := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`) 332 if !validBuildID.MatchString(buildID) { 333 return "", invalidBuildIDError{buildID: buildID} 334 } 335 return buildID, nil 336 }