github.com/thanos-io/thanos@v0.32.5/internal/cortex/querier/queryrange/retry.go (about)

     1  // Copyright (c) The Cortex Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package queryrange
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  
    10  	"github.com/go-kit/log"
    11  	"github.com/go-kit/log/level"
    12  	"github.com/prometheus/client_golang/prometheus"
    13  	"github.com/prometheus/client_golang/prometheus/promauto"
    14  	"github.com/weaveworks/common/httpgrpc"
    15  
    16  	util_log "github.com/thanos-io/thanos/internal/cortex/util/log"
    17  )
    18  
    19  type RetryMiddlewareMetrics struct {
    20  	retriesCount prometheus.Histogram
    21  }
    22  
    23  func NewRetryMiddlewareMetrics(registerer prometheus.Registerer) *RetryMiddlewareMetrics {
    24  	return &RetryMiddlewareMetrics{
    25  		retriesCount: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{
    26  			Namespace: "cortex",
    27  			Name:      "query_frontend_retries",
    28  			Help:      "Number of times a request is retried.",
    29  			Buckets:   []float64{0, 1, 2, 3, 4, 5},
    30  		}),
    31  	}
    32  }
    33  
    34  type retry struct {
    35  	log        log.Logger
    36  	next       Handler
    37  	maxRetries int
    38  
    39  	metrics *RetryMiddlewareMetrics
    40  }
    41  
    42  // NewRetryMiddleware returns a middleware that retries requests if they
    43  // fail with 500 or a non-HTTP error.
    44  func NewRetryMiddleware(log log.Logger, maxRetries int, metrics *RetryMiddlewareMetrics) Middleware {
    45  	if metrics == nil {
    46  		metrics = NewRetryMiddlewareMetrics(nil)
    47  	}
    48  
    49  	return MiddlewareFunc(func(next Handler) Handler {
    50  		return retry{
    51  			log:        log,
    52  			next:       next,
    53  			maxRetries: maxRetries,
    54  			metrics:    metrics,
    55  		}
    56  	})
    57  }
    58  
    59  func (r retry) Do(ctx context.Context, req Request) (Response, error) {
    60  	tries := 0
    61  	defer func() { r.metrics.retriesCount.Observe(float64(tries)) }()
    62  
    63  	var lastErr error
    64  	for ; tries < r.maxRetries; tries++ {
    65  		if ctx.Err() != nil {
    66  			return nil, ctx.Err()
    67  		}
    68  		resp, err := r.next.Do(ctx, req)
    69  		if err == nil {
    70  			return resp, nil
    71  		}
    72  
    73  		if errors.Is(err, context.Canceled) {
    74  			return nil, err
    75  		}
    76  
    77  		// Retry if we get a HTTP 500 or a non-HTTP error.
    78  		httpResp, ok := httpgrpc.HTTPResponseFromError(err)
    79  		if !ok || httpResp.Code/100 == 5 {
    80  			lastErr = err
    81  			level.Error(util_log.WithContext(ctx, r.log)).Log("msg", "error processing request", "try", tries, "err", err)
    82  			continue
    83  		}
    84  
    85  		return nil, err
    86  	}
    87  	return nil, lastErr
    88  }