github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/pkg/metrics/status.go (about) 1 // Copyright 2018 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metrics 16 17 import ( 18 "time" 19 20 clientPkg "github.com/cilium/cilium/pkg/client" 21 healthClientPkg "github.com/cilium/cilium/pkg/health/client" 22 23 "github.com/prometheus/client_golang/prometheus" 24 log "github.com/sirupsen/logrus" 25 ) 26 27 const ( 28 updateLatencyMetricsInterval = 30 * time.Second 29 ) 30 31 type statusCollector struct { 32 ciliumClient *clientPkg.Client 33 healthClient *healthClientPkg.Client 34 35 controllersFailingDesc *prometheus.Desc 36 ipAddressesDesc *prometheus.Desc 37 unreachableNodesDesc *prometheus.Desc 38 unreachableHealthEndpointsDesc *prometheus.Desc 39 } 40 41 func newStatusCollector() *statusCollector { 42 ciliumClient, err := clientPkg.NewClient("") 43 if err != nil { 44 log.WithError(err).Fatal("Error while creating Cilium API client") 45 } 46 47 healthClient, err := healthClientPkg.NewClient("") 48 if err != nil { 49 log.WithError(err).Fatal("Error while creating cilium-health API client") 50 } 51 52 return &statusCollector{ 53 ciliumClient: ciliumClient, 54 healthClient: healthClient, 55 controllersFailingDesc: prometheus.NewDesc( 56 prometheus.BuildFQName(Namespace, "", "controllers_failing"), 57 "Number of failing controllers", 58 nil, nil, 59 ), 60 ipAddressesDesc: prometheus.NewDesc( 61 prometheus.BuildFQName(Namespace, "", "ip_addresses"), 62 "Number of allocated IP addresses", 63 []string{"family"}, nil, 64 ), 65 unreachableNodesDesc: prometheus.NewDesc( 66 prometheus.BuildFQName(Namespace, "", "unreachable_nodes"), 67 "Number of nodes that cannot be reached", 68 nil, nil, 69 ), 70 unreachableHealthEndpointsDesc: prometheus.NewDesc( 71 prometheus.BuildFQName(Namespace, "", "unreachable_health_endpoints"), 72 "Number of health endpoints that cannot be reached", 73 nil, nil, 74 ), 75 } 76 } 77 78 func (s *statusCollector) Describe(ch chan<- *prometheus.Desc) { 79 ch <- s.controllersFailingDesc 80 ch <- s.ipAddressesDesc 81 ch <- s.unreachableNodesDesc 82 ch <- s.unreachableHealthEndpointsDesc 83 } 84 85 func (s *statusCollector) Collect(ch chan<- prometheus.Metric) { 86 statusResponse, err := s.ciliumClient.Daemon.GetHealthz(nil) 87 if err != nil { 88 log.WithError(err).Error("Error while getting Cilium status") 89 return 90 } 91 92 if statusResponse.Payload == nil { 93 return 94 } 95 96 // Controllers failing 97 controllersFailing := 0 98 99 for _, ctrl := range statusResponse.Payload.Controllers { 100 if ctrl.Status == nil { 101 continue 102 } 103 if ctrl.Status.ConsecutiveFailureCount > 0 { 104 controllersFailing++ 105 } 106 } 107 108 ch <- prometheus.MustNewConstMetric( 109 s.controllersFailingDesc, 110 prometheus.GaugeValue, 111 float64(controllersFailing), 112 ) 113 114 if statusResponse.Payload.IPAM != nil { 115 // Address count 116 ch <- prometheus.MustNewConstMetric( 117 s.ipAddressesDesc, 118 prometheus.GaugeValue, 119 float64(len(statusResponse.Payload.IPAM.IPV4)), 120 "ipv4", 121 ) 122 123 ch <- prometheus.MustNewConstMetric( 124 s.ipAddressesDesc, 125 prometheus.GaugeValue, 126 float64(len(statusResponse.Payload.IPAM.IPV6)), 127 "ipv6", 128 ) 129 } 130 131 healthStatusResponse, err := s.healthClient.Connectivity.GetStatus(nil) 132 if err != nil { 133 log.WithError(err).Error("Error while getting cilium-health status") 134 return 135 } 136 137 if healthStatusResponse.Payload == nil { 138 return 139 } 140 141 // Nodes and endpoints healthStatusResponse 142 var ( 143 unreachableNodes int 144 unreachableEndpoints int 145 ) 146 147 for _, nodeStatus := range healthStatusResponse.Payload.Nodes { 148 if !healthClientPkg.PathIsHealthy(healthClientPkg.GetHostPrimaryAddress(nodeStatus)) { 149 unreachableNodes++ 150 } 151 if nodeStatus.Endpoint != nil && !healthClientPkg.PathIsHealthy(nodeStatus.Endpoint) { 152 unreachableEndpoints++ 153 } 154 } 155 156 ch <- prometheus.MustNewConstMetric( 157 s.unreachableNodesDesc, 158 prometheus.GaugeValue, 159 float64(unreachableNodes), 160 ) 161 162 ch <- prometheus.MustNewConstMetric( 163 s.unreachableHealthEndpointsDesc, 164 prometheus.GaugeValue, 165 float64(unreachableEndpoints), 166 ) 167 }