github.com/cilium/cilium@v1.16.2/pkg/datapath/linux/ipsec/xfrm_collector.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package ipsec
     5  
     6  import (
     7  	"log/slog"
     8  
     9  	"github.com/prometheus/client_golang/prometheus"
    10  	"github.com/prometheus/procfs"
    11  	"github.com/vishvananda/netlink"
    12  
    13  	"github.com/cilium/cilium/pkg/common/ipsec"
    14  	"github.com/cilium/cilium/pkg/logging/logfields"
    15  	"github.com/cilium/cilium/pkg/metrics"
    16  )
    17  
    18  const (
    19  	labelErrorType         = "type"
    20  	labelErrorTypeInbound  = "inbound"
    21  	labelErrorTypeOutbound = "outbound"
    22  
    23  	labelErrorOther            = "other"
    24  	labelErrorNoBuffer         = "no_buffer"
    25  	labelErrorHeader           = "header"
    26  	labelErrorNoState          = "no_state"
    27  	labelErrorStateProtocol    = "state_protocol"
    28  	labelErrorStateMode        = "state_mode"
    29  	labelErrorStateSequence    = "state_sequence"
    30  	labelErrorStateExpired     = "state_expired"
    31  	labelErrorStateMismatched  = "state_mismatched"
    32  	labelErrorStateInvalid     = "state_invalid"
    33  	labelErrorTemplateMismatch = "template_mismatched"
    34  	labelErrorNoPolicy         = "no_policy"
    35  	labelErrorPolicyBlocked    = "policy_blocked"
    36  	labelErrorPolicyDead       = "policy_dead"
    37  	labelErrorPolicy           = "policy"
    38  	labelErrorForwardHeader    = "forward_header"
    39  	labelErrorAcquire          = "acquire"
    40  	labelErrorBundleGeneration = "bundle_generation"
    41  	labelErrorBundleCheck      = "bundle_check"
    42  
    43  	labelDir = "direction"
    44  
    45  	labelDirIn  = "in"
    46  	labelDirOut = "out"
    47  	labelDirFwd = "fwd"
    48  )
    49  
    50  type xfrmCollector struct {
    51  	log              *slog.Logger
    52  	xfrmErrorDesc    *prometheus.Desc
    53  	nbKeysDesc       *prometheus.Desc
    54  	nbXFRMStatesDesc *prometheus.Desc
    55  	nbXFRMPolsDesc   *prometheus.Desc
    56  }
    57  
    58  func NewXFRMCollector(log *slog.Logger) prometheus.Collector {
    59  	return &xfrmCollector{
    60  		log: log,
    61  		xfrmErrorDesc: prometheus.NewDesc(
    62  			prometheus.BuildFQName(metrics.Namespace, subsystem, "xfrm_error"),
    63  			"Total number of xfrm errors",
    64  			[]string{labelErrorType, metrics.LabelError}, nil,
    65  		),
    66  		nbKeysDesc: prometheus.NewDesc(
    67  			prometheus.BuildFQName(metrics.Namespace, subsystem, "keys"),
    68  			"Number of IPsec keys in use",
    69  			[]string{}, nil,
    70  		),
    71  		nbXFRMStatesDesc: prometheus.NewDesc(
    72  			prometheus.BuildFQName(metrics.Namespace, subsystem, "xfrm_states"),
    73  			"Number of XFRM states",
    74  			[]string{labelDir}, nil,
    75  		),
    76  		nbXFRMPolsDesc: prometheus.NewDesc(
    77  			prometheus.BuildFQName(metrics.Namespace, subsystem, "xfrm_policies"),
    78  			"Number of XFRM policies",
    79  			[]string{labelDir}, nil,
    80  		),
    81  	}
    82  }
    83  
    84  func (x *xfrmCollector) Describe(ch chan<- *prometheus.Desc) {
    85  	ch <- x.xfrmErrorDesc
    86  	ch <- x.nbKeysDesc
    87  	ch <- x.nbXFRMStatesDesc
    88  	ch <- x.nbXFRMPolsDesc
    89  }
    90  
    91  func (x *xfrmCollector) collectErrors(ch chan<- prometheus.Metric) {
    92  	stats, err := procfs.NewXfrmStat()
    93  	if err != nil {
    94  		x.log.Error("Error while getting xfrm stats", logfields.Error, err)
    95  		return
    96  	}
    97  
    98  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInError), labelErrorTypeInbound, labelErrorOther)
    99  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInBufferError), labelErrorTypeInbound, labelErrorNoBuffer)
   100  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInHdrError), labelErrorTypeInbound, labelErrorHeader)
   101  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInNoStates), labelErrorTypeInbound, labelErrorNoState)
   102  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInStateProtoError), labelErrorTypeInbound, labelErrorStateProtocol)
   103  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInStateModeError), labelErrorTypeInbound, labelErrorStateMode)
   104  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInStateSeqError), labelErrorTypeInbound, labelErrorStateSequence)
   105  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInStateExpired), labelErrorTypeInbound, labelErrorStateExpired)
   106  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInStateMismatch), labelErrorTypeInbound, labelErrorStateMismatched)
   107  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInStateInvalid), labelErrorTypeInbound, labelErrorStateInvalid)
   108  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInTmplMismatch), labelErrorTypeInbound, labelErrorTemplateMismatch)
   109  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInNoPols), labelErrorTypeInbound, labelErrorNoPolicy)
   110  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInPolBlock), labelErrorTypeInbound, labelErrorPolicyBlocked)
   111  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmInPolError), labelErrorTypeInbound, labelErrorPolicy)
   112  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmFwdHdrError), labelErrorTypeInbound, labelErrorForwardHeader)
   113  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmAcquireError), labelErrorTypeInbound, labelErrorAcquire)
   114  
   115  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutError), labelErrorTypeOutbound, labelErrorOther)
   116  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutBundleGenError), labelErrorTypeOutbound, labelErrorBundleGeneration)
   117  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutBundleCheckError), labelErrorTypeOutbound, labelErrorBundleCheck)
   118  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutNoStates), labelErrorTypeOutbound, labelErrorNoState)
   119  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutStateProtoError), labelErrorTypeOutbound, labelErrorStateProtocol)
   120  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutStateModeError), labelErrorTypeOutbound, labelErrorStateMode)
   121  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutStateSeqError), labelErrorTypeOutbound, labelErrorStateSequence)
   122  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutStateExpired), labelErrorTypeOutbound, labelErrorStateExpired)
   123  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutPolBlock), labelErrorTypeOutbound, labelErrorPolicyBlocked)
   124  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutPolDead), labelErrorTypeOutbound, labelErrorPolicyDead)
   125  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutPolError), labelErrorTypeOutbound, labelErrorPolicy)
   126  	ch <- prometheus.MustNewConstMetric(x.xfrmErrorDesc, prometheus.GaugeValue, float64(stats.XfrmOutStateInvalid), labelErrorTypeOutbound, labelErrorStateInvalid)
   127  }
   128  
   129  func (x *xfrmCollector) collectConfigStats(ch chan<- prometheus.Metric) {
   130  	states, err := netlink.XfrmStateList(netlink.FAMILY_ALL)
   131  	if err != nil {
   132  		x.log.Error("Failed to retrieve XFRM states to compute Prometheus metrics", logfields.Error, err)
   133  		return
   134  	}
   135  	nbKeys, err := ipsec.CountUniqueIPsecKeys(states)
   136  	if err != nil {
   137  		x.log.Error("Error counting IPsec keys", logfields.Error, err)
   138  	}
   139  	ch <- prometheus.MustNewConstMetric(x.nbKeysDesc, prometheus.GaugeValue, float64(nbKeys))
   140  
   141  	nbStatesIn, nbStatesOut := ipsec.CountXfrmStatesByDir(states)
   142  	ch <- prometheus.MustNewConstMetric(x.nbXFRMStatesDesc, prometheus.GaugeValue, float64(nbStatesIn), labelDirIn)
   143  	ch <- prometheus.MustNewConstMetric(x.nbXFRMStatesDesc, prometheus.GaugeValue, float64(nbStatesOut), labelDirOut)
   144  
   145  	policies, err := netlink.XfrmPolicyList(netlink.FAMILY_ALL)
   146  	if err != nil {
   147  		x.log.Error("Failed to retrieve XFRM policies to compute Prometheus metrics", logfields.Error, err)
   148  		return
   149  	}
   150  	nbPolIn, nbPolOut, nbPolFwd := ipsec.CountXfrmPoliciesByDir(policies)
   151  	ch <- prometheus.MustNewConstMetric(x.nbXFRMPolsDesc, prometheus.GaugeValue, float64(nbPolIn), labelDirIn)
   152  	ch <- prometheus.MustNewConstMetric(x.nbXFRMPolsDesc, prometheus.GaugeValue, float64(nbPolOut), labelDirOut)
   153  	ch <- prometheus.MustNewConstMetric(x.nbXFRMPolsDesc, prometheus.GaugeValue, float64(nbPolFwd), labelDirFwd)
   154  }
   155  
   156  func (x *xfrmCollector) Collect(ch chan<- prometheus.Metric) {
   157  	x.collectErrors(ch)
   158  	x.collectConfigStats(ch)
   159  }