github.com/cilium/cilium@v1.16.2/pkg/bgpv1/metrics/metrics.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package metrics 5 6 import ( 7 "context" 8 "net/netip" 9 "strconv" 10 11 "github.com/cilium/hive/cell" 12 "github.com/prometheus/client_golang/prometheus" 13 "github.com/sirupsen/logrus" 14 15 "github.com/cilium/cilium/pkg/bgpv1/agent" 16 "github.com/cilium/cilium/pkg/bgpv1/types" 17 "github.com/cilium/cilium/pkg/metrics" 18 "github.com/cilium/cilium/pkg/option" 19 "github.com/cilium/cilium/pkg/time" 20 ) 21 22 const ( 23 LabelVRouter = "vrouter" 24 LabelNeighbor = "neighbor" 25 LabelAfi = "afi" 26 LabelSafi = "safi" 27 28 metricsSubsystem = "bgp_control_plane" 29 ) 30 31 type collector struct { 32 SessionState *prometheus.Desc 33 TotalAdvertisedRoutes *prometheus.Desc 34 TotalReceivedRoutes *prometheus.Desc 35 36 in collectorIn 37 } 38 39 type collectorIn struct { 40 cell.In 41 42 Logger logrus.FieldLogger 43 DaemonConfig *option.DaemonConfig 44 Registry *metrics.Registry 45 RouterManager agent.BGPRouterManager 46 } 47 48 // RegisterCollector registers the BGP Control Plane metrics collector to the 49 // global prometheus registry. We don't rely on the metrics.Metric because the 50 // collectors we can provide through metrics.Metric needs to implement 51 // prometheus.Collector per metric which is not optimal in our case. We can 52 // retrieve the multiple metrics from the single call to 53 // RouterManager.GetPeers() and it is wasteful to call the same function 54 // multiple times for each metric. Thus, we provide a raw Collector through 55 // MustRegister interface. We may want to revisit this in the future. 56 func RegisterCollector(in collectorIn) { 57 // Don't provide the collector if BGP control plane is disabled 58 if !in.DaemonConfig.EnableBGPControlPlane { 59 return 60 } 61 in.Registry.MustRegister(&collector{ 62 SessionState: prometheus.NewDesc( 63 prometheus.BuildFQName(metrics.Namespace, metricsSubsystem, "session_state"), 64 "Current state of the BGP session with the peer, Up = 1 or Down = 0", 65 []string{LabelVRouter, LabelNeighbor}, nil, 66 ), 67 TotalAdvertisedRoutes: prometheus.NewDesc( 68 prometheus.BuildFQName(metrics.Namespace, metricsSubsystem, "advertised_routes"), 69 "Number of routes advertised to the peer", 70 []string{LabelVRouter, LabelNeighbor, LabelAfi, LabelSafi}, nil, 71 ), 72 TotalReceivedRoutes: prometheus.NewDesc( 73 prometheus.BuildFQName(metrics.Namespace, metricsSubsystem, "received_routes"), 74 "Number of routes received from the peer", 75 []string{LabelVRouter, LabelNeighbor, LabelAfi, LabelSafi}, nil, 76 ), 77 in: in, 78 }) 79 } 80 81 func (c *collector) Describe(ch chan<- *prometheus.Desc) { 82 ch <- c.SessionState 83 ch <- c.TotalAdvertisedRoutes 84 ch <- c.TotalReceivedRoutes 85 } 86 87 func (c *collector) Collect(ch chan<- prometheus.Metric) { 88 // We defensively set a 5 sec timeout here. When the underlying router 89 // is not responsive, we cannot make a progress. 5 sec is chosen to be 90 // a too long time that we should never hit for normal cases. We should 91 // revisit this timeout when the metrics collection starts to involve a 92 // network communication. 93 ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) 94 peers, err := c.in.RouterManager.GetPeers(ctx) 95 cancel() 96 if err != nil { 97 c.in.Logger.WithError(err).Error("Failed to retrieve BGP peer information. Metrics is not collected.") 98 return 99 } 100 101 for _, peer := range peers { 102 if peer == nil { 103 continue 104 } 105 106 vrouterLabel := strconv.FormatInt(peer.LocalAsn, 10) 107 108 addr, err := netip.ParseAddr(peer.PeerAddress) 109 if err != nil { 110 continue 111 } 112 113 neighborLabel := netip.AddrPortFrom(addr, uint16(peer.PeerPort)).String() 114 115 // Collect session state metrics 116 var up float64 117 if peer.SessionState == types.SessionEstablished.String() { 118 up = 1 119 } else { 120 up = 0 121 } 122 ch <- prometheus.MustNewConstMetric( 123 c.SessionState, 124 prometheus.GaugeValue, 125 up, 126 vrouterLabel, 127 neighborLabel, 128 ) 129 130 // Collect route metrics per address family 131 for _, family := range peer.Families { 132 if family == nil { 133 continue 134 } 135 ch <- prometheus.MustNewConstMetric( 136 c.TotalAdvertisedRoutes, 137 prometheus.GaugeValue, 138 float64(family.Advertised), 139 vrouterLabel, 140 neighborLabel, 141 family.Afi, 142 family.Safi, 143 ) 144 ch <- prometheus.MustNewConstMetric( 145 c.TotalReceivedRoutes, 146 prometheus.GaugeValue, 147 float64(family.Received), 148 vrouterLabel, 149 neighborLabel, 150 family.Afi, 151 family.Safi, 152 ) 153 } 154 } 155 }