github.com/grafana/pyroscope@v1.18.0/pkg/symbolizer/debuginfod_client.go (about)

     1  package symbolizer
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"net"
    10  	"net/http"
    11  	"os"
    12  	"regexp"
    13  	"time"
    14  
    15  	"github.com/go-kit/log"
    16  	"github.com/grafana/dskit/backoff"
    17  	"github.com/grafana/dskit/tenant"
    18  	"golang.org/x/sync/singleflight"
    19  
    20  	"github.com/dgraph-io/ristretto/v2"
    21  )
    22  
    23  // DebuginfodClientConfig holds configuration for the debuginfod client.
    24  type DebuginfodClientConfig struct {
    25  	BaseURL       string
    26  	HTTPClient    *http.Client
    27  	BackoffConfig backoff.Config
    28  	UserAgent     string
    29  
    30  	NotFoundCacheMaxItems int64
    31  	NotFoundCacheTTL      time.Duration
    32  }
    33  
    34  // DebuginfodHTTPClient implements the DebuginfodClient interface using HTTP.
    35  type DebuginfodHTTPClient struct {
    36  	cfg     DebuginfodClientConfig
    37  	metrics *metrics
    38  	logger  log.Logger
    39  	limits  Limits
    40  
    41  	// Used to deduplicate concurrent requests for the same build ID
    42  	group singleflight.Group
    43  
    44  	notFoundCache *ristretto.Cache[string, bool]
    45  }
    46  
    47  // NewDebuginfodClient creates a new client for fetching debug information from a debuginfod server.
    48  func NewDebuginfodClient(logger log.Logger, baseURL string, metrics *metrics, limits Limits) (*DebuginfodHTTPClient, error) {
    49  	return NewDebuginfodClientWithConfig(logger, DebuginfodClientConfig{
    50  		BaseURL: baseURL,
    51  		//UserAgent:  "Pyroscope-Symbolizer/1.0",
    52  		BackoffConfig: backoff.Config{
    53  			MinBackoff: 1 * time.Second,
    54  			MaxBackoff: 10 * time.Second,
    55  			MaxRetries: 3,
    56  		},
    57  		NotFoundCacheMaxItems: 100000,
    58  		NotFoundCacheTTL:      7 * 24 * time.Hour,
    59  	}, metrics, limits)
    60  }
    61  
    62  // NewDebuginfodClientWithConfig creates a new client with the specified configuration.
    63  func NewDebuginfodClientWithConfig(logger log.Logger, cfg DebuginfodClientConfig, metrics *metrics, limits Limits) (*DebuginfodHTTPClient, error) {
    64  	httpClient := cfg.HTTPClient
    65  	if httpClient == nil {
    66  		transport := &http.Transport{
    67  			MaxIdleConnsPerHost: 10,
    68  			IdleConnTimeout:     90 * time.Second,
    69  			TLSHandshakeTimeout: 10 * time.Second,
    70  		}
    71  
    72  		httpClient = &http.Client{
    73  			Transport: transport,
    74  			Timeout:   120 * time.Second,
    75  			CheckRedirect: func(req *http.Request, via []*http.Request) error {
    76  				if len(via) >= 3 {
    77  					return fmt.Errorf("stopped after 3 redirects")
    78  				}
    79  				return nil
    80  			},
    81  		}
    82  	}
    83  
    84  	cache, err := ristretto.NewCache(&ristretto.Config[string, bool]{
    85  		NumCounters: cfg.NotFoundCacheMaxItems * 10,
    86  		MaxCost:     cfg.NotFoundCacheMaxItems,
    87  		BufferItems: 64,
    88  	})
    89  	if err != nil {
    90  		return nil, fmt.Errorf("failed to create not-found cache: %w", err)
    91  	}
    92  
    93  	client := &DebuginfodHTTPClient{
    94  		cfg: DebuginfodClientConfig{
    95  			BaseURL:       cfg.BaseURL,
    96  			UserAgent:     cfg.UserAgent,
    97  			HTTPClient:    httpClient,
    98  			BackoffConfig: cfg.BackoffConfig,
    99  		},
   100  		metrics:       metrics,
   101  		logger:        logger,
   102  		notFoundCache: cache,
   103  		limits:        limits,
   104  	}
   105  
   106  	return client, nil
   107  }
   108  
   109  // FetchDebuginfo fetches the debuginfo file for a specific build ID.
   110  func (c *DebuginfodHTTPClient) FetchDebuginfo(ctx context.Context, buildID string) (io.ReadCloser, error) {
   111  	start := time.Now()
   112  	status := statusSuccess
   113  	defer func() {
   114  		c.metrics.debuginfodRequestDuration.WithLabelValues(status).Observe(time.Since(start).Seconds())
   115  	}()
   116  
   117  	sanitizedBuildID, err := sanitizeBuildID(buildID)
   118  	if err != nil {
   119  		status = statusErrorInvalidID
   120  		return nil, err
   121  	}
   122  
   123  	if found, _ := c.notFoundCache.Get(sanitizedBuildID); found {
   124  		status = statusErrorNotFound
   125  		c.metrics.cacheOperations.WithLabelValues("not_found", "get", statusSuccess).Inc()
   126  		return nil, buildIDNotFoundError{buildID: sanitizedBuildID}
   127  	}
   128  	c.metrics.cacheOperations.WithLabelValues("not_found", "get", "miss").Inc()
   129  
   130  	v, err, _ := c.group.Do(sanitizedBuildID, func() (interface{}, error) {
   131  		return c.fetchDebugInfoWithRetries(ctx, sanitizedBuildID)
   132  	})
   133  
   134  	if err != nil {
   135  		var bnfErr buildIDNotFoundError
   136  		switch {
   137  		case errors.As(err, &bnfErr):
   138  			status = statusErrorNotFound
   139  		case errors.Is(err, context.Canceled):
   140  			status = statusErrorCanceled
   141  		case errors.Is(err, context.DeadlineExceeded):
   142  			status = statusErrorTimeout
   143  		case isInvalidBuildIDError(err):
   144  			status = statusErrorInvalidID
   145  		default:
   146  			if statusCode, ok := isHTTPStatusError(err); ok {
   147  				status = categorizeHTTPStatusCode(statusCode)
   148  			} else {
   149  				status = statusErrorOther
   150  			}
   151  		}
   152  		return nil, err
   153  	}
   154  
   155  	data := v.([]byte)
   156  	return io.NopCloser(bytes.NewReader(data)), nil
   157  }
   158  
   159  // doRequest performs an HTTP request to the specified URL and returns the response body.
   160  func (c *DebuginfodHTTPClient) doRequest(ctx context.Context, url string) ([]byte, error) {
   161  	tenantID, err := tenant.TenantID(ctx)
   162  	if err != nil {
   163  		return nil, err
   164  	}
   165  
   166  	maxBytes := int64(c.limits.SymbolizerMaxSymbolSizeBytes(tenantID))
   167  
   168  	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
   169  	if err != nil {
   170  		return nil, fmt.Errorf("failed to create request: %w", err)
   171  	}
   172  
   173  	req.Header.Set("Accept-Encoding", "gzip, deflate")
   174  	if c.cfg.UserAgent != "" {
   175  		req.Header.Set("User-Agent", c.cfg.UserAgent)
   176  	}
   177  
   178  	resp, err := c.cfg.HTTPClient.Do(req)
   179  	if err != nil {
   180  		return nil, fmt.Errorf("failed to execute request: %w", err)
   181  	}
   182  	defer resp.Body.Close()
   183  
   184  	var r io.Reader = resp.Body
   185  	// maxBytes == 0 means unlimited body size
   186  	if maxBytes > 0 {
   187  		r = io.LimitReader(r, maxBytes+1) // +1 to detect if limit is exceeded
   188  	}
   189  
   190  	data, err := io.ReadAll(r)
   191  	if err != nil {
   192  		return nil, fmt.Errorf("failed to read response body: %w", err)
   193  	}
   194  
   195  	// Check if we hit the size limit
   196  	if maxBytes > 0 && int64(len(data)) > maxBytes {
   197  		return nil, &ErrSymbolSizeBytesExceedsLimit{Limit: maxBytes}
   198  	}
   199  
   200  	if resp.StatusCode != http.StatusOK {
   201  		errorBody := string(data)
   202  
   203  		// Truncate large error responses
   204  		if len(errorBody) > 1000 {
   205  			errorBody = errorBody[:1000] + "... [truncated]"
   206  		}
   207  		return nil, httpStatusError{
   208  			statusCode: resp.StatusCode,
   209  			body:       errorBody,
   210  		}
   211  	}
   212  
   213  	c.metrics.debuginfodFileSize.Observe(float64(len(data)))
   214  
   215  	return data, nil
   216  }
   217  
   218  // fetchDebugInfoWithRetries attempts to fetch debug info with retries on transient errors.
   219  func (c *DebuginfodHTTPClient) fetchDebugInfoWithRetries(ctx context.Context, sanitizedBuildID string) ([]byte, error) {
   220  	url := fmt.Sprintf("%s/buildid/%s/debuginfo", c.cfg.BaseURL, sanitizedBuildID)
   221  	var data []byte
   222  
   223  	// Use dskit backoff for retries with exponential backoff
   224  	backOff := backoff.New(ctx, c.cfg.BackoffConfig)
   225  
   226  	attempt := func() ([]byte, error) {
   227  		return c.doRequest(ctx, url)
   228  	}
   229  
   230  	var lastErr error
   231  	for backOff.Ongoing() {
   232  		data, err := attempt()
   233  		if err == nil {
   234  			return data, nil
   235  		}
   236  
   237  		// Don't retry on 404 errors
   238  		if statusCode, isHTTPErr := isHTTPStatusError(err); isHTTPErr && statusCode == http.StatusNotFound {
   239  			c.notFoundCache.SetWithTTL(sanitizedBuildID, true, 1, c.cfg.NotFoundCacheTTL)
   240  			c.notFoundCache.Wait()
   241  			c.metrics.cacheOperations.WithLabelValues("not_found", "set", statusSuccess).Inc()
   242  			c.metrics.cacheSizeBytes.WithLabelValues("not_found").Set(float64(c.notFoundCache.Metrics.CostAdded()))
   243  			return nil, buildIDNotFoundError{buildID: sanitizedBuildID}
   244  		}
   245  
   246  		lastErr = err
   247  
   248  		if !isRetryableError(err) {
   249  			break
   250  		}
   251  
   252  		backOff.Wait()
   253  	}
   254  
   255  	if lastErr != nil {
   256  		return nil, fmt.Errorf("failed to fetch debuginfo after %d attempts: %w", backOff.NumRetries(), lastErr)
   257  	}
   258  
   259  	return data, nil
   260  }
   261  
   262  // categorizeHTTPStatusCode maps HTTP status codes to metric status strings.
   263  func categorizeHTTPStatusCode(statusCode int) string {
   264  	switch {
   265  	case statusCode == http.StatusNotFound:
   266  		return statusErrorNotFound
   267  	case statusCode == http.StatusUnauthorized || statusCode == http.StatusForbidden:
   268  		return statusErrorUnauthorized
   269  	case statusCode == http.StatusTooManyRequests:
   270  		return statusErrorRateLimited
   271  	case statusCode >= 400 && statusCode < 500:
   272  		return statusErrorClientError
   273  	case statusCode >= 500:
   274  		return statusErrorServerError
   275  	default:
   276  		return statusErrorHTTPOther
   277  	}
   278  }
   279  
   280  // isRetryableError determines if an error should trigger a retry attempt.
   281  func isRetryableError(err error) bool {
   282  	if err == nil {
   283  		return false
   284  	}
   285  
   286  	if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
   287  		return false
   288  	}
   289  
   290  	if isInvalidBuildIDError(err) {
   291  		return false
   292  	}
   293  
   294  	var bnfErr buildIDNotFoundError
   295  	if errors.As(err, &bnfErr) {
   296  		return false
   297  	}
   298  
   299  	if statusCode, ok := isHTTPStatusError(err); ok {
   300  		// Don't retry 4xx client errors except for 429 (too many requests)
   301  		if statusCode == http.StatusTooManyRequests {
   302  			return true
   303  		}
   304  		if statusCode >= 400 && statusCode < 500 {
   305  			return false
   306  		}
   307  		// Retry on 5xx server errors
   308  		return statusCode >= 500
   309  	}
   310  
   311  	if os.IsTimeout(err) {
   312  		return true
   313  	}
   314  
   315  	var netErr net.Error
   316  	if errors.As(err, &netErr) {
   317  		return netErr.Timeout()
   318  	}
   319  
   320  	return false
   321  }
   322  
   323  // sanitizeBuildID ensures that the buildID is a safe and valid string for use in file paths.
   324  // It validates that the build ID contains only alphanumeric characters, underscores, and hyphens.
   325  // This prevents potential security issues like path traversal attacks.
   326  func sanitizeBuildID(buildID string) (string, error) {
   327  	if buildID == "" {
   328  		return "", nil
   329  	}
   330  
   331  	validBuildID := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
   332  	if !validBuildID.MatchString(buildID) {
   333  		return "", invalidBuildIDError{buildID: buildID}
   334  	}
   335  	return buildID, nil
   336  }