github.com/kyma-incubator/compass/components/director@v0.0.0-20230623144113-d764f56ff805/internal/metrics/pusher.go (about)

     1  package metrics
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"net/http"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/sirupsen/logrus"
    12  
    13  	"github.com/google/uuid"
    14  
    15  	"github.com/kyma-incubator/compass/components/director/pkg/log"
    16  	"github.com/pkg/errors"
    17  	"github.com/prometheus/client_golang/prometheus"
    18  	"github.com/prometheus/client_golang/prometheus/push"
    19  )
    20  
    21  const (
    22  	maxErrMessageLength = 50
    23  )
    24  
    25  // PusherConfig is used to provide configuration options for AggregationFailurePusher.
    26  type PusherConfig struct {
    27  	Enabled    bool
    28  	Endpoint   string
    29  	MetricName string
    30  	Timeout    time.Duration
    31  	Subsystem  string
    32  	Labels     []string
    33  }
    34  
    35  // AggregationFailurePusher is used for pushing metrics to Prometheus related to failed aggregation.
    36  type AggregationFailurePusher struct {
    37  	aggregationFailuresCounter *prometheus.CounterVec
    38  	pusher                     *push.Pusher
    39  	instanceID                 uuid.UUID
    40  }
    41  
    42  // NewAggregationFailurePusher returns a new Prometheus metrics pusher that can be used to report aggregation failures.
    43  func NewAggregationFailurePusher(cfg PusherConfig) AggregationFailurePusher {
    44  	if !cfg.Enabled {
    45  		return AggregationFailurePusher{}
    46  	}
    47  	instanceID := uuid.New()
    48  	log.D().WithField(InstanceIDKeyName, instanceID).Infof("Initializing Metrics Pusher...")
    49  
    50  	aggregationFailuresCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
    51  		Namespace: Namespace,
    52  		Subsystem: cfg.Subsystem,
    53  		Name:      cfg.MetricName,
    54  		Help:      fmt.Sprintf("Aggregation status for %s", cfg.Subsystem),
    55  	}, cfg.Labels)
    56  
    57  	pusher := newPusher(cfg, aggregationFailuresCounter)
    58  
    59  	return AggregationFailurePusher{
    60  		aggregationFailuresCounter: aggregationFailuresCounter,
    61  		pusher:                     pusher,
    62  		instanceID:                 instanceID,
    63  	}
    64  }
    65  
    66  // ReportAggregationFailure reports failed aggregation with the provided error.
    67  func (p AggregationFailurePusher) ReportAggregationFailure(ctx context.Context, err error) {
    68  	if p.pusher == nil {
    69  		log.C(ctx).Error("Metrics pusher is not configured, skipping report...")
    70  		return
    71  	}
    72  
    73  	log.C(ctx).WithFields(logrus.Fields{InstanceIDKeyName: p.instanceID}).Info("Reporting failed aggregation...")
    74  
    75  	p.aggregationFailuresCounter.WithLabelValues(errorDescription(err)).Inc()
    76  
    77  	p.push(ctx)
    78  }
    79  
    80  // ReportAggregationFailureORD reports failed aggregation with the provided error. copy
    81  func (p AggregationFailurePusher) ReportAggregationFailureORD(ctx context.Context, err string) {
    82  	if p.pusher == nil {
    83  		log.C(ctx).Error("Metrics pusher is not configured, skipping report...")
    84  		return
    85  	}
    86  
    87  	log.C(ctx).WithFields(logrus.Fields{InstanceIDKeyName: p.instanceID}).Info("Reporting failed aggregation...")
    88  
    89  	currentResourceID := log.C(ctx).Data["resource_id"]
    90  	currentResourceType := log.C(ctx).Data["resource_type"]
    91  	currentCorrelationID := log.C(ctx).Data["x-request-id"]
    92  
    93  	p.aggregationFailuresCounter.WithLabelValues(err, currentResourceType.(string), currentResourceID.(string), currentCorrelationID.(string)).Inc()
    94  
    95  	p.push(ctx)
    96  }
    97  
    98  func (p AggregationFailurePusher) push(ctx context.Context) {
    99  	if err := p.pusher.Add(); err != nil {
   100  		wrappedErr := errors.Wrap(err, "while pushing metrics to Pushgateway")
   101  		log.C(ctx).WithField(InstanceIDKeyName, p.instanceID).Error(wrappedErr)
   102  	}
   103  }
   104  
   105  func newPusher(cfg PusherConfig, collectors ...prometheus.Collector) *push.Pusher {
   106  	registry := prometheus.NewRegistry()
   107  	for _, c := range collectors {
   108  		registry.MustRegister(c)
   109  	}
   110  
   111  	return push.New(cfg.Endpoint, cfg.Subsystem).Gatherer(registry).Client(&http.Client{
   112  		Timeout: cfg.Timeout,
   113  	})
   114  }
   115  
   116  func errorDescription(err error) string {
   117  	var e *net.OpError
   118  	if errors.As(err, &e) && e.Err != nil {
   119  		return e.Err.Error()
   120  	}
   121  
   122  	if len(err.Error()) > maxErrMessageLength {
   123  		// not all errors are actually wrapped, sometimes the error message is just concatenated with ":"
   124  		errParts := strings.Split(err.Error(), ":")
   125  		return errParts[len(errParts)-1]
   126  	}
   127  
   128  	return err.Error()
   129  }