github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/pkg/metrics/status.go (about)

     1  // Copyright 2018 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metrics
    16  
    17  import (
    18  	"time"
    19  
    20  	clientPkg "github.com/cilium/cilium/pkg/client"
    21  	healthClientPkg "github.com/cilium/cilium/pkg/health/client"
    22  
    23  	"github.com/prometheus/client_golang/prometheus"
    24  	log "github.com/sirupsen/logrus"
    25  )
    26  
    27  const (
    28  	updateLatencyMetricsInterval = 30 * time.Second
    29  )
    30  
    31  type statusCollector struct {
    32  	ciliumClient *clientPkg.Client
    33  	healthClient *healthClientPkg.Client
    34  
    35  	controllersFailingDesc         *prometheus.Desc
    36  	ipAddressesDesc                *prometheus.Desc
    37  	unreachableNodesDesc           *prometheus.Desc
    38  	unreachableHealthEndpointsDesc *prometheus.Desc
    39  }
    40  
    41  func newStatusCollector() *statusCollector {
    42  	ciliumClient, err := clientPkg.NewClient("")
    43  	if err != nil {
    44  		log.WithError(err).Fatal("Error while creating Cilium API client")
    45  	}
    46  
    47  	healthClient, err := healthClientPkg.NewClient("")
    48  	if err != nil {
    49  		log.WithError(err).Fatal("Error while creating cilium-health API client")
    50  	}
    51  
    52  	return &statusCollector{
    53  		ciliumClient: ciliumClient,
    54  		healthClient: healthClient,
    55  		controllersFailingDesc: prometheus.NewDesc(
    56  			prometheus.BuildFQName(Namespace, "", "controllers_failing"),
    57  			"Number of failing controllers",
    58  			nil, nil,
    59  		),
    60  		ipAddressesDesc: prometheus.NewDesc(
    61  			prometheus.BuildFQName(Namespace, "", "ip_addresses"),
    62  			"Number of allocated IP addresses",
    63  			[]string{"family"}, nil,
    64  		),
    65  		unreachableNodesDesc: prometheus.NewDesc(
    66  			prometheus.BuildFQName(Namespace, "", "unreachable_nodes"),
    67  			"Number of nodes that cannot be reached",
    68  			nil, nil,
    69  		),
    70  		unreachableHealthEndpointsDesc: prometheus.NewDesc(
    71  			prometheus.BuildFQName(Namespace, "", "unreachable_health_endpoints"),
    72  			"Number of health endpoints that cannot be reached",
    73  			nil, nil,
    74  		),
    75  	}
    76  }
    77  
    78  func (s *statusCollector) Describe(ch chan<- *prometheus.Desc) {
    79  	ch <- s.controllersFailingDesc
    80  	ch <- s.ipAddressesDesc
    81  	ch <- s.unreachableNodesDesc
    82  	ch <- s.unreachableHealthEndpointsDesc
    83  }
    84  
    85  func (s *statusCollector) Collect(ch chan<- prometheus.Metric) {
    86  	statusResponse, err := s.ciliumClient.Daemon.GetHealthz(nil)
    87  	if err != nil {
    88  		log.WithError(err).Error("Error while getting Cilium status")
    89  		return
    90  	}
    91  
    92  	if statusResponse.Payload == nil {
    93  		return
    94  	}
    95  
    96  	// Controllers failing
    97  	controllersFailing := 0
    98  
    99  	for _, ctrl := range statusResponse.Payload.Controllers {
   100  		if ctrl.Status == nil {
   101  			continue
   102  		}
   103  		if ctrl.Status.ConsecutiveFailureCount > 0 {
   104  			controllersFailing++
   105  		}
   106  	}
   107  
   108  	ch <- prometheus.MustNewConstMetric(
   109  		s.controllersFailingDesc,
   110  		prometheus.GaugeValue,
   111  		float64(controllersFailing),
   112  	)
   113  
   114  	if statusResponse.Payload.IPAM != nil {
   115  		// Address count
   116  		ch <- prometheus.MustNewConstMetric(
   117  			s.ipAddressesDesc,
   118  			prometheus.GaugeValue,
   119  			float64(len(statusResponse.Payload.IPAM.IPV4)),
   120  			"ipv4",
   121  		)
   122  
   123  		ch <- prometheus.MustNewConstMetric(
   124  			s.ipAddressesDesc,
   125  			prometheus.GaugeValue,
   126  			float64(len(statusResponse.Payload.IPAM.IPV6)),
   127  			"ipv6",
   128  		)
   129  	}
   130  
   131  	healthStatusResponse, err := s.healthClient.Connectivity.GetStatus(nil)
   132  	if err != nil {
   133  		log.WithError(err).Error("Error while getting cilium-health status")
   134  		return
   135  	}
   136  
   137  	if healthStatusResponse.Payload == nil {
   138  		return
   139  	}
   140  
   141  	// Nodes and endpoints healthStatusResponse
   142  	var (
   143  		unreachableNodes     int
   144  		unreachableEndpoints int
   145  	)
   146  
   147  	for _, nodeStatus := range healthStatusResponse.Payload.Nodes {
   148  		if !healthClientPkg.PathIsHealthy(healthClientPkg.GetHostPrimaryAddress(nodeStatus)) {
   149  			unreachableNodes++
   150  		}
   151  		if nodeStatus.Endpoint != nil && !healthClientPkg.PathIsHealthy(nodeStatus.Endpoint) {
   152  			unreachableEndpoints++
   153  		}
   154  	}
   155  
   156  	ch <- prometheus.MustNewConstMetric(
   157  		s.unreachableNodesDesc,
   158  		prometheus.GaugeValue,
   159  		float64(unreachableNodes),
   160  	)
   161  
   162  	ch <- prometheus.MustNewConstMetric(
   163  		s.unreachableHealthEndpointsDesc,
   164  		prometheus.GaugeValue,
   165  		float64(unreachableEndpoints),
   166  	)
   167  }