github.com/noironetworks/cilium-net@v1.6.12/pkg/endpoint/metrics.go (about)

     1  // Copyright 2018-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package endpoint
    16  
    17  import (
    18  	"time"
    19  
    20  	"github.com/cilium/cilium/api/v1/models"
    21  	"github.com/cilium/cilium/pkg/datapath/loader"
    22  	"github.com/cilium/cilium/pkg/lock"
    23  	"github.com/cilium/cilium/pkg/logging/logfields"
    24  	"github.com/cilium/cilium/pkg/metrics"
    25  	"github.com/cilium/cilium/pkg/spanstat"
    26  
    27  	"github.com/prometheus/client_golang/prometheus"
    28  )
    29  
    30  var endpointPolicyStatus endpointPolicyStatusMap
    31  
    32  func init() {
    33  	endpointPolicyStatus = newEndpointPolicyStatusMap()
    34  }
    35  
    36  type statistics interface {
    37  	GetMap() map[string]*spanstat.SpanStat
    38  }
    39  
    40  func sendMetrics(stats statistics, metric prometheus.ObserverVec) {
    41  	for scope, stat := range stats.GetMap() {
    42  		// Skip scopes that have not been hit (zero duration), so the count in
    43  		// the histogram accurately reflects the number of times each scope is
    44  		// hit, and the distribution is not incorrectly skewed towards zero.
    45  		if stat.SuccessTotal() != time.Duration(0) {
    46  			metric.WithLabelValues(scope, "success").Observe(stat.SuccessTotal().Seconds())
    47  		}
    48  		if stat.FailureTotal() != time.Duration(0) {
    49  			metric.WithLabelValues(scope, "failure").Observe(stat.FailureTotal().Seconds())
    50  		}
    51  	}
    52  }
    53  
    54  type regenerationStatistics struct {
    55  	success                bool
    56  	endpointID             uint16
    57  	policyStatus           models.EndpointPolicyEnabled
    58  	totalTime              spanstat.SpanStat
    59  	waitingForLock         spanstat.SpanStat
    60  	waitingForCTClean      spanstat.SpanStat
    61  	policyCalculation      spanstat.SpanStat
    62  	proxyConfiguration     spanstat.SpanStat
    63  	proxyPolicyCalculation spanstat.SpanStat
    64  	proxyWaitForAck        spanstat.SpanStat
    65  	datapathRealization    loader.SpanStat
    66  	mapSync                spanstat.SpanStat
    67  	prepareBuild           spanstat.SpanStat
    68  }
    69  
    70  // SendMetrics sends the regeneration statistics for this endpoint to
    71  // Prometheus.
    72  func (s *regenerationStatistics) SendMetrics() {
    73  	endpointPolicyStatus.Update(s.endpointID, s.policyStatus)
    74  
    75  	if !s.success {
    76  		// Endpoint regeneration failed, increase on failed metrics
    77  		metrics.EndpointRegenerationCount.WithLabelValues(metrics.LabelValueOutcomeFail).Inc()
    78  		return
    79  	}
    80  
    81  	metrics.EndpointRegenerationCount.WithLabelValues(metrics.LabelValueOutcomeSuccess).Inc()
    82  
    83  	sendMetrics(s, metrics.EndpointRegenerationTimeStats)
    84  }
    85  
    86  // GetMap returns a map which key is the stat name and the value is the stat
    87  func (s *regenerationStatistics) GetMap() map[string]*spanstat.SpanStat {
    88  	result := map[string]*spanstat.SpanStat{
    89  		"waitingForLock":         &s.waitingForLock,
    90  		"waitingForCTClean":      &s.waitingForCTClean,
    91  		"policyCalculation":      &s.policyCalculation,
    92  		"proxyConfiguration":     &s.proxyConfiguration,
    93  		"proxyPolicyCalculation": &s.proxyPolicyCalculation,
    94  		"proxyWaitForAck":        &s.proxyWaitForAck,
    95  		"mapSync":                &s.mapSync,
    96  		"prepareBuild":           &s.prepareBuild,
    97  		logfields.BuildDuration:  &s.totalTime,
    98  	}
    99  	for k, v := range s.datapathRealization.GetMap() {
   100  		result[k] = v
   101  	}
   102  	return result
   103  }
   104  
   105  type policyRegenerationStatistics struct {
   106  	success                    bool
   107  	totalTime                  spanstat.SpanStat
   108  	waitingForIdentityCache    spanstat.SpanStat
   109  	waitingForPolicyRepository spanstat.SpanStat
   110  	policyCalculation          spanstat.SpanStat
   111  }
   112  
   113  func (ps *policyRegenerationStatistics) SendMetrics() {
   114  	metrics.PolicyRegenerationCount.Inc()
   115  
   116  	sendMetrics(ps, metrics.PolicyRegenerationTimeStats)
   117  }
   118  
   119  func (ps *policyRegenerationStatistics) GetMap() map[string]*spanstat.SpanStat {
   120  	return map[string]*spanstat.SpanStat{
   121  		"waitingForIdentityCache":    &ps.waitingForIdentityCache,
   122  		"waitingForPolicyRepository": &ps.waitingForPolicyRepository,
   123  		"policyCalculation":          &ps.policyCalculation,
   124  		logfields.BuildDuration:      &ps.totalTime,
   125  	}
   126  }
   127  
   128  // endpointPolicyStatusMap is a map to store the endpoint id and the policy
   129  // enforcement status. It is used only to send metrics to prometheus.
   130  type endpointPolicyStatusMap struct {
   131  	mutex lock.Mutex
   132  	m     map[uint16]models.EndpointPolicyEnabled
   133  }
   134  
   135  func newEndpointPolicyStatusMap() endpointPolicyStatusMap {
   136  	return endpointPolicyStatusMap{m: make(map[uint16]models.EndpointPolicyEnabled)}
   137  }
   138  
   139  // Update adds or updates a new endpoint to the map and update the metrics
   140  // related
   141  func (epPolicyMaps *endpointPolicyStatusMap) Update(endpointID uint16, policyStatus models.EndpointPolicyEnabled) {
   142  	epPolicyMaps.mutex.Lock()
   143  	epPolicyMaps.m[endpointID] = policyStatus
   144  	epPolicyMaps.mutex.Unlock()
   145  	endpointPolicyStatus.UpdateMetrics()
   146  }
   147  
   148  // Remove deletes the given endpoint from the map and update the metrics
   149  func (epPolicyMaps *endpointPolicyStatusMap) Remove(endpointID uint16) {
   150  	epPolicyMaps.mutex.Lock()
   151  	delete(epPolicyMaps.m, endpointID)
   152  	epPolicyMaps.mutex.Unlock()
   153  	epPolicyMaps.UpdateMetrics()
   154  }
   155  
   156  // UpdateMetrics update the policy enforcement metrics statistics for the endpoints.
   157  func (epPolicyMaps *endpointPolicyStatusMap) UpdateMetrics() {
   158  	policyStatus := map[models.EndpointPolicyEnabled]float64{
   159  		models.EndpointPolicyEnabledNone:    0,
   160  		models.EndpointPolicyEnabledEgress:  0,
   161  		models.EndpointPolicyEnabledIngress: 0,
   162  		models.EndpointPolicyEnabledBoth:    0,
   163  	}
   164  
   165  	epPolicyMaps.mutex.Lock()
   166  	for _, value := range epPolicyMaps.m {
   167  		policyStatus[value]++
   168  	}
   169  	epPolicyMaps.mutex.Unlock()
   170  
   171  	for k, v := range policyStatus {
   172  		metrics.PolicyEndpointStatus.WithLabelValues(string(k)).Set(v)
   173  	}
   174  }