github.com/cilium/cilium@v1.16.2/pkg/hubble/metrics/metrics.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Hubble
     3  
     4  package metrics
     5  
     6  import (
     7  	"context"
     8  	"crypto/tls"
     9  	"errors"
    10  	"fmt"
    11  	"net/http"
    12  
    13  	grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
    14  	"github.com/prometheus/client_golang/prometheus"
    15  	"github.com/prometheus/client_golang/prometheus/promhttp"
    16  	"github.com/sirupsen/logrus"
    17  	"k8s.io/client-go/util/workqueue"
    18  
    19  	pb "github.com/cilium/cilium/api/v1/flow"
    20  	"github.com/cilium/cilium/pkg/crypto/certloader"
    21  	"github.com/cilium/cilium/pkg/hubble/metrics/api"
    22  	_ "github.com/cilium/cilium/pkg/hubble/metrics/dns"               // invoke init
    23  	_ "github.com/cilium/cilium/pkg/hubble/metrics/drop"              // invoke init
    24  	_ "github.com/cilium/cilium/pkg/hubble/metrics/flow"              // invoke init
    25  	_ "github.com/cilium/cilium/pkg/hubble/metrics/flows-to-world"    // invoke init
    26  	_ "github.com/cilium/cilium/pkg/hubble/metrics/http"              // invoke init
    27  	_ "github.com/cilium/cilium/pkg/hubble/metrics/icmp"              // invoke init
    28  	_ "github.com/cilium/cilium/pkg/hubble/metrics/kafka"             // invoke init
    29  	_ "github.com/cilium/cilium/pkg/hubble/metrics/policy"            // invoke init
    30  	_ "github.com/cilium/cilium/pkg/hubble/metrics/port-distribution" // invoke init
    31  	_ "github.com/cilium/cilium/pkg/hubble/metrics/tcp"               // invoke init
    32  	"github.com/cilium/cilium/pkg/hubble/server/serveroption"
    33  	"github.com/cilium/cilium/pkg/k8s/types"
    34  	"github.com/cilium/cilium/pkg/time"
    35  )
    36  
    37  type CiliumEndpointDeletionHandler struct {
    38  	gracefulPeriod time.Duration
    39  	queue          workqueue.DelayingInterface
    40  }
    41  
    42  var (
    43  	enabledMetrics          *api.Handlers
    44  	registry                = prometheus.NewPedanticRegistry()
    45  	endpointDeletionHandler *CiliumEndpointDeletionHandler
    46  )
    47  
    48  // Additional metrics - they're not counting flows, so are not served via
    49  // Hubble metrics API, but belong to the same Prometheus namespace.
    50  var (
    51  	labelSource = "source"
    52  	LostEvents  = prometheus.NewCounterVec(prometheus.CounterOpts{
    53  		Namespace: api.DefaultPrometheusNamespace,
    54  		Name:      "lost_events_total",
    55  		Help:      "Number of lost events",
    56  	}, []string{labelSource})
    57  )
    58  
    59  // Metrics related to Hubble metrics HTTP requests handling
    60  var (
    61  	RequestsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
    62  		Namespace: api.DefaultPrometheusNamespace,
    63  		Name:      "metrics_http_handler_requests_total",
    64  		Help:      "A counter for requests to Hubble metrics handler.",
    65  	}, []string{"code"})
    66  	RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
    67  		Namespace: api.DefaultPrometheusNamespace,
    68  		Name:      "metrics_http_handler_request_duration_seconds",
    69  		Help:      "A histogram of latencies of Hubble metrics handler.",
    70  	}, []string{"code"})
    71  )
    72  
    73  // ProcessFlow processes a flow and updates metrics
    74  func ProcessFlow(ctx context.Context, flow *pb.Flow) error {
    75  	if enabledMetrics != nil {
    76  		return enabledMetrics.ProcessFlow(ctx, flow)
    77  	}
    78  	return nil
    79  }
    80  
    81  func ProcessCiliumEndpointDeletion(pod *types.CiliumEndpoint) error {
    82  	if endpointDeletionHandler != nil && enabledMetrics != nil {
    83  		endpointDeletionHandler.queue.AddAfter(pod, endpointDeletionHandler.gracefulPeriod)
    84  	}
    85  	return nil
    86  }
    87  
    88  func initMetricHandlers(enabled api.Map) (*api.Handlers, error) {
    89  	return api.DefaultRegistry().ConfigureHandlers(registry, enabled)
    90  }
    91  
    92  func initMetricsServer(address string, metricsTLSConfig *certloader.WatchedServerConfig, enableOpenMetrics bool, errChan chan error) {
    93  	go func() {
    94  		mux := http.NewServeMux()
    95  		handler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
    96  			EnableOpenMetrics: enableOpenMetrics,
    97  		})
    98  		handler = promhttp.InstrumentHandlerCounter(RequestsTotal, handler)
    99  		handler = promhttp.InstrumentHandlerDuration(RequestDuration, handler)
   100  		mux.Handle("/metrics", handler)
   101  		srv := http.Server{
   102  			Addr:    address,
   103  			Handler: mux,
   104  		}
   105  		if metricsTLSConfig != nil {
   106  			srv.TLSConfig = metricsTLSConfig.ServerConfig(&tls.Config{ //nolint:gosec
   107  				MinVersion: serveroption.MinTLSVersion,
   108  			})
   109  			errChan <- srv.ListenAndServeTLS("", "")
   110  		} else {
   111  			errChan <- srv.ListenAndServe()
   112  		}
   113  	}()
   114  
   115  }
   116  
   117  func initEndpointDeletionHandler() {
   118  	endpointDeletionHandler = &CiliumEndpointDeletionHandler{
   119  		gracefulPeriod: time.Minute,
   120  		queue:          workqueue.NewDelayingQueue(),
   121  	}
   122  
   123  	go func() {
   124  		for {
   125  			endpoint, quit := endpointDeletionHandler.queue.Get()
   126  			if quit {
   127  				return
   128  			}
   129  			enabledMetrics.ProcessCiliumEndpointDeletion(endpoint.(*types.CiliumEndpoint))
   130  			endpointDeletionHandler.queue.Done(endpoint)
   131  		}
   132  	}()
   133  }
   134  
   135  // initMetrics initializes the metrics system
   136  func initMetrics(address string, metricsTLSConfig *certloader.WatchedServerConfig, enabled api.Map, grpcMetrics *grpc_prometheus.ServerMetrics, enableOpenMetrics bool) (<-chan error, error) {
   137  	e, err := initMetricHandlers(enabled)
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  	enabledMetrics = e
   142  
   143  	registry.MustRegister(grpcMetrics)
   144  	registry.MustRegister(LostEvents)
   145  	registry.MustRegister(RequestsTotal)
   146  	registry.MustRegister(RequestDuration)
   147  
   148  	errChan := make(chan error, 1)
   149  
   150  	initMetricsServer(address, metricsTLSConfig, enableOpenMetrics, errChan)
   151  	initEndpointDeletionHandler()
   152  
   153  	return errChan, nil
   154  }
   155  
   156  // EnableMetrics starts the metrics server with a given list of metrics. This is the
   157  // function Cilium uses to configure Hubble metrics in embedded mode.
   158  func EnableMetrics(log logrus.FieldLogger, metricsServer string, metricsTLSConfig *certloader.WatchedServerConfig, m []string, grpcMetrics *grpc_prometheus.ServerMetrics, enableOpenMetrics bool) error {
   159  	errChan, err := initMetrics(metricsServer, metricsTLSConfig, api.ParseMetricList(m), grpcMetrics, enableOpenMetrics)
   160  	if err != nil {
   161  		return fmt.Errorf("unable to setup metrics: %w", err)
   162  	}
   163  	go func() {
   164  		err := <-errChan
   165  		if err != nil && !errors.Is(err, http.ErrServerClosed) {
   166  			log.WithError(err).Error("Unable to initialize Hubble metrics server")
   167  		}
   168  	}()
   169  	return nil
   170  }
   171  
   172  // Register registers additional metrics collectors within hubble metrics registry.
   173  func Register(cs ...prometheus.Collector) {
   174  	registry.MustRegister(cs...)
   175  }