github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/pkg/metrics/metrics.go (about)

     1  // Copyright 2017-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package metrics holds prometheus metrics objects and related utility functions. It
    16  // does not abstract away the prometheus client but the caller rarely needs to
    17  // refer to prometheus directly.
    18  package metrics
    19  
    20  // Adding a metric
    21  // - Add a metric object of the appropriate type as an exported variable
    22  // - Register the new object in the init function
    23  
    24  import (
    25  	"net/http"
    26  	"syscall"
    27  
    28  	"github.com/cilium/cilium/api/v1/models"
    29  
    30  	"github.com/prometheus/client_golang/prometheus"
    31  	"github.com/prometheus/client_golang/prometheus/promhttp"
    32  	dto "github.com/prometheus/client_model/go"
    33  )
    34  
    35  const (
    36  	// ErrorTimeout is the value used to notify timeout errors.
    37  	ErrorTimeout = "timeout"
    38  
    39  	// ErrorProxy is the value used to notify errors on Proxy.
    40  	ErrorProxy = "proxy"
    41  
    42  	//L7DNS is the value used to report DNS label on metrics
    43  	L7DNS = "dns"
    44  
    45  	// SubsystemBPF is the subsystem to scope metrics related to the bpf syscalls.
    46  	SubsystemBPF = "bpf"
    47  
    48  	// SubsystemDatapath is the subsystem to scope metrics related to management of
    49  	// the datapath. It is prepended to metric names and separated with a '_'.
    50  	SubsystemDatapath = "datapath"
    51  
    52  	// SubsystemAgent is the subsystem to scope metrics related to the cilium agent itself.
    53  	SubsystemAgent = "agent"
    54  
    55  	// SubsystemK8s is the subsystem to scope metrics related to Kubernetes
    56  	SubsystemK8s = "k8s"
    57  
    58  	// SubsystemK8sClient is the subsystem to scope metrics related to the kubernetes client.
    59  	SubsystemK8sClient = "k8s_client"
    60  
    61  	// SubsystemKVStore is the subsystem to scope metrics related to the kvstore.
    62  	SubsystemKVStore = "kvstore"
    63  
    64  	// SubsystemNodes is the subsystem to scope metrics related to the node manager.
    65  	SubsystemNodes = "nodes"
    66  
    67  	// SubsystemTriggers is the subsystem to scope metrics related to the trigger package.
    68  	SubsystemTriggers = "triggers"
    69  
    70  	// Namespace is used to scope metrics from cilium. It is prepended to metric
    71  	// names and separated with a '_'
    72  	Namespace = "cilium"
    73  
    74  	// LabelOutcome indicates whether the outcome of the operation was successful or not
    75  	LabelOutcome = "outcome"
    76  
    77  	// LabelAttempts is the number of attempts it took to complete the operation
    78  	LabelAttempts = "attempts"
    79  
    80  	// Labels
    81  
    82  	// LabelValueOutcomeSuccess is used as a successful outcome of an operation
    83  	LabelValueOutcomeSuccess = "success"
    84  
    85  	// LabelValueOutcomeFail is used as an unsuccessful outcome of an operation
    86  	LabelValueOutcomeFail = "fail"
    87  
    88  	// LabelEventSourceAPI marks event-related metrics that come from the API
    89  	LabelEventSourceAPI = "api"
    90  
    91  	// LabelEventSourceK8s marks event-related metrics that come from k8s
    92  	LabelEventSourceK8s = "k8s"
    93  
    94  	// LabelEventSourceFQDN marks event-related metrics that come from pkg/fqdn
    95  	LabelEventSourceFQDN = "fqdn"
    96  
    97  	// LabelEventSourceContainerd marks event-related metrics that come from docker
    98  	LabelEventSourceContainerd = "docker"
    99  
   100  	// LabelDatapathArea marks which area the metrics are related to (eg, which BPF map)
   101  	LabelDatapathArea = "area"
   102  
   103  	// LabelDatapathName marks a unique identifier for this metric.
   104  	// The name should be defined once for a given type of error.
   105  	LabelDatapathName = "name"
   106  
   107  	// LabelDatapathFamily marks which protocol family (IPv4, IPV6) the metric is related to.
   108  	LabelDatapathFamily = "family"
   109  
   110  	// LabelProtocol marks the L4 protocol (TCP, ANY) for the metric.
   111  	LabelProtocol = "protocol"
   112  
   113  	// LabelSignalType marks the signal name
   114  	LabelSignalType = "signal"
   115  
   116  	// LabelSignalData marks the signal data
   117  	LabelSignalData = "data"
   118  
   119  	// LabelStatus the label from completed task
   120  	LabelStatus = "status"
   121  
   122  	// LabelPolicyEnforcement is the label used to see the enforcement status
   123  	LabelPolicyEnforcement = "enforcement"
   124  
   125  	// LabelPolicySource is the label used to see the enforcement status
   126  	LabelPolicySource = "source"
   127  
   128  	// LabelScope is the label used to defined multiples scopes in the same
   129  	// metric. For example, one counter may measure a metric over the scope of
   130  	// the entire event (scope=global), or just part of an event
   131  	// (scope=slow_path)
   132  	LabelScope = "scope"
   133  
   134  	// LabelProtocolL7 is the label used when working with layer 7 protocols.
   135  	LabelProtocolL7 = "protocol_l7"
   136  
   137  	// LabelBuildState is the state a build queue entry is in
   138  	LabelBuildState = "state"
   139  
   140  	// LabelBuildQueueName is the name of the build queue
   141  	LabelBuildQueueName = "name"
   142  
   143  	// LabelAction is the label used to defined what kind of action was performed in a metric
   144  	LabelAction = "action"
   145  
   146  	// LabelSubsystem is the label used to refer to any of the child process
   147  	// started by cilium (Envoy, monitor, etc..)
   148  	LabelSubsystem = "subsystem"
   149  
   150  	// LabelKind is the kind a label
   151  	LabelKind = "kind"
   152  
   153  	// LabelPath is the label for the API path
   154  	LabelPath = "path"
   155  	// LabelMethod is the label for the HTTP method
   156  	LabelMethod = "method"
   157  
   158  	// LabelAPIReturnCode is the HTTP code returned for that API path
   159  	LabelAPIReturnCode = "return_code"
   160  
   161  	// LabelOperation is the label for BPF maps operations
   162  	LabelOperation = "operation"
   163  
   164  	// LabelMapName is the label for the BPF map name
   165  	LabelMapName = "mapName"
   166  )
   167  
   168  var (
   169  	registry = prometheus.NewPedanticRegistry()
   170  
   171  	// APIInteractions is the total time taken to process an API call made
   172  	// to the cilium-agent
   173  	APIInteractions = NoOpObserverVec
   174  
   175  	// Endpoint
   176  
   177  	// EndpointCount is a function used to collect this metric.
   178  	// It must be thread-safe.
   179  	EndpointCount prometheus.GaugeFunc
   180  
   181  	// EndpointRegenerationCount is a count of the number of times any endpoint
   182  	// has been regenerated and success/fail outcome
   183  	EndpointRegenerationCount = NoOpCounterVec
   184  
   185  	// EndpointStateCount is the total count of the endpoints in various states.
   186  	EndpointStateCount = NoOpGaugeVec
   187  
   188  	// EndpointRegenerationTimeStats is the total time taken to regenerate
   189  	// endpoints, labeled by span name and status ("success" or "failure")
   190  	EndpointRegenerationTimeStats = NoOpObserverVec
   191  
   192  	// Policy
   193  
   194  	// PolicyCount is the number of policies loaded into the agent
   195  	PolicyCount = NoOpGauge
   196  
   197  	// PolicyRegenerationCount is the total number of successful policy
   198  	// regenerations.
   199  	PolicyRegenerationCount = NoOpCounter
   200  
   201  	// PolicyRegenerationTimeStats is the total time taken to generate policies
   202  	PolicyRegenerationTimeStats = NoOpObserverVec
   203  
   204  	// PolicyRevision is the current policy revision number for this agent
   205  	PolicyRevision = NoOpGauge
   206  
   207  	// PolicyImportErrors is a count of failed policy imports
   208  	PolicyImportErrors = NoOpCounter
   209  
   210  	// PolicyEndpointStatus is the number of endpoints with policy labeled by enforcement type
   211  	PolicyEndpointStatus = NoOpGaugeVec
   212  
   213  	// PolicyImplementationDelay is a distribution of times taken from adding a
   214  	// policy (and incrementing the policy revision) to seeing it in the datapath
   215  	// per Endpoint. This reflects the actual delay perceived by traffic flowing
   216  	// through the datapath. The longest times will roughly correlate with the
   217  	// time taken to fully deploy an endpoint.
   218  	PolicyImplementationDelay = NoOpObserverVec
   219  
   220  	// Identity
   221  
   222  	// IdentityCount is the number of identities currently in use on the node
   223  	IdentityCount = NoOpGauge
   224  
   225  	// Events
   226  
   227  	// EventTS*is the time in seconds since epoch that we last received an
   228  	// event that we will handle
   229  	// source is one of k8s, docker or apia
   230  
   231  	// EventTSK8s is the timestamp of k8s events
   232  	EventTSK8s = NoOpGauge
   233  
   234  	// EventTSContainerd is the timestamp of docker events
   235  	EventTSContainerd = NoOpGauge
   236  
   237  	// EventTSAPI is the timestamp of docker events
   238  	EventTSAPI = NoOpGauge
   239  
   240  	// L7 statistics
   241  
   242  	// ProxyRedirects is the number of redirects labeled by protocol
   243  	ProxyRedirects = NoOpGaugeVec
   244  
   245  	// ProxyPolicyL7Total is a count of all l7 requests handled by proxy
   246  	ProxyPolicyL7Total = NoOpCounterVec
   247  
   248  	// ProxyParseErrors is a count of failed parse errors on proxy
   249  	// Deprecated: in favor of ProxyPolicyL7Total
   250  	ProxyParseErrors = NoOpCounter
   251  
   252  	// ProxyForwarded is a count of all forwarded requests by proxy
   253  	// Deprecated: in favor of ProxyPolicyL7Total
   254  	ProxyForwarded = NoOpCounter
   255  
   256  	// ProxyDenied is a count of all denied requests by policy by the proxy
   257  	// Deprecated: in favor of ProxyPolicyL7Total
   258  	ProxyDenied = NoOpCounter
   259  
   260  	// ProxyReceived is a count of all received requests by the proxy
   261  	// Deprecated: in favor of ProxyPolicyL7Total
   262  	ProxyReceived = NoOpCounter
   263  
   264  	// ProxyUpstreamTime is how long the upstream server took to reply labeled
   265  	// by error, protocol and span time
   266  	ProxyUpstreamTime = NoOpObserverVec
   267  
   268  	// L3-L4 statistics
   269  
   270  	// DropCount is the total drop requests,
   271  	// tagged by drop reason and direction(ingress/egress)
   272  	DropCount = NoOpCounterVec
   273  
   274  	// DropBytes is the total dropped bytes,
   275  	// tagged by drop reason and direction(ingress/egress)
   276  	DropBytes = NoOpCounterVec
   277  
   278  	// ForwardCount is the total forwarded packets,
   279  	// tagged by ingress/egress direction
   280  	ForwardCount = NoOpCounterVec
   281  
   282  	// ForwardBytes is the total forwarded bytes,
   283  	// tagged by ingress/egress direction
   284  	ForwardBytes = NoOpCounterVec
   285  
   286  	// Datapath statistics
   287  
   288  	// DatapathErrors is the number of errors managing datapath components
   289  	// such as BPF maps.
   290  	DatapathErrors = NoOpCounterVec
   291  
   292  	// ConntrackGCRuns is the number of times that the conntrack GC
   293  	// process was run.
   294  	ConntrackGCRuns = NoOpCounterVec
   295  
   296  	// ConntrackGCKeyFallbacks number of times that the conntrack key fallback was invalid.
   297  	ConntrackGCKeyFallbacks = NoOpCounterVec
   298  
   299  	// ConntrackGCSize the number of entries in the conntrack table
   300  	ConntrackGCSize = NoOpGaugeVec
   301  
   302  	// ConntrackGCDuration the duration of the conntrack GC process in milliseconds.
   303  	ConntrackGCDuration = NoOpObserverVec
   304  
   305  	// Signals
   306  
   307  	// SignalsHandled is the number of signals received.
   308  	SignalsHandled = NoOpCounterVec
   309  
   310  	// Services
   311  
   312  	// ServicesCount number of services
   313  	ServicesCount = NoOpCounterVec
   314  
   315  	// Errors and warnings
   316  
   317  	// ErrorsWarnings is the number of errors and warnings in cilium-agent instances
   318  	ErrorsWarnings = NoOpCounterVec
   319  
   320  	// ControllerRuns is the number of times that a controller process runs.
   321  	ControllerRuns = NoOpCounterVec
   322  
   323  	// ControllerRunsDuration the duration of the controller process in seconds
   324  	ControllerRunsDuration = NoOpObserverVec
   325  
   326  	// subprocess, labeled by Subsystem
   327  	SubprocessStart = NoOpCounterVec
   328  
   329  	// Kubernetes Events
   330  
   331  	// KubernetesEventProcessed is the number of Kubernetes events
   332  	// processed labeled by scope, action and execution result
   333  	KubernetesEventProcessed = NoOpCounterVec
   334  
   335  	// KubernetesEventReceived is the number of Kubernetes events received
   336  	// labeled by scope, action, valid data and equalness.
   337  	KubernetesEventReceived = NoOpCounterVec
   338  
   339  	// Kubernetes interactions
   340  
   341  	// KubernetesAPIInteractions is the total time taken to process an API call made
   342  	// to the kube-apiserver
   343  	KubernetesAPIInteractions = NoOpObserverVec
   344  
   345  	// KubernetesAPICalls is the counter for all API calls made to
   346  	// kube-apiserver.
   347  	KubernetesAPICalls = NoOpCounterVec
   348  
   349  	// KubernetesCNPStatusCompletion is the number of seconds it takes to
   350  	// complete a CNP status update
   351  	KubernetesCNPStatusCompletion = NoOpObserverVec
   352  
   353  	// IPAM events
   354  
   355  	// IpamEvent is the number of IPAM events received labeled by action and
   356  	// datapath family type
   357  	IpamEvent = NoOpCounterVec
   358  
   359  	// KVstore events
   360  
   361  	// KVStoreOperationsDuration records the duration of kvstore operations
   362  	KVStoreOperationsDuration = NoOpObserverVec
   363  
   364  	// KVStoreEventsQueueDuration records the duration in seconds of time
   365  	// received event was blocked before it could be queued
   366  	KVStoreEventsQueueDuration = NoOpObserverVec
   367  
   368  	// FQDNGarbageCollectorCleanedTotal is the number of domains cleaned by the
   369  	// GC job.
   370  	FQDNGarbageCollectorCleanedTotal = NoOpCounter
   371  
   372  	// BPFSyscallDuration is the metric for bpf syscalls duration.
   373  	BPFSyscallDuration = NoOpObserverVec
   374  
   375  	// BPFMapOps is the metric to measure the number of operations done to a
   376  	// bpf map.
   377  	BPFMapOps = NoOpCounterVec
   378  
   379  	// TriggerPolicyUpdateTotal is the metric to count total number of
   380  	// policy update triggers
   381  	TriggerPolicyUpdateTotal = NoOpCounterVec
   382  
   383  	// TriggerPolicyUpdateFolds is the current level folding that is
   384  	// happening when running policy update triggers
   385  	TriggerPolicyUpdateFolds = NoOpGauge
   386  
   387  	// TriggerPolicyUpdateCallDuration measures the latency and call
   388  	// duration of policy update triggers
   389  	TriggerPolicyUpdateCallDuration = NoOpObserverVec
   390  )
   391  
   392  type Configuration struct {
   393  	APIInteractionsEnabled                  bool
   394  	EndpointRegenerationCountEnabled        bool
   395  	EndpointStateCountEnabled               bool
   396  	EndpointRegenerationTimeStatsEnabled    bool
   397  	PolicyCountEnabled                      bool
   398  	PolicyRegenerationCountEnabled          bool
   399  	PolicyRegenerationTimeStatsEnabled      bool
   400  	PolicyRevisionEnabled                   bool
   401  	PolicyImportErrorsEnabled               bool
   402  	PolicyEndpointStatusEnabled             bool
   403  	PolicyImplementationDelayEnabled        bool
   404  	IdentityCountEnabled                    bool
   405  	EventTSK8sEnabled                       bool
   406  	EventTSContainerdEnabled                bool
   407  	EventTSAPIEnabled                       bool
   408  	ProxyRedirectsEnabled                   bool
   409  	ProxyPolicyL7Enabled                    bool
   410  	ProxyParseErrorsEnabled                 bool
   411  	ProxyForwardedEnabled                   bool
   412  	ProxyDeniedEnabled                      bool
   413  	ProxyReceivedEnabled                    bool
   414  	NoOpObserverVecEnabled                  bool
   415  	DropCountEnabled                        bool
   416  	DropBytesEnabled                        bool
   417  	NoOpCounterVecEnabled                   bool
   418  	ForwardBytesEnabled                     bool
   419  	DatapathErrorsEnabled                   bool
   420  	ConntrackGCRunsEnabled                  bool
   421  	ConntrackGCKeyFallbacksEnabled          bool
   422  	ConntrackGCSizeEnabled                  bool
   423  	ConntrackGCDurationEnabled              bool
   424  	SignalsHandledEnabled                   bool
   425  	ServicesCountEnabled                    bool
   426  	ErrorsWarningsEnabled                   bool
   427  	ControllerRunsEnabled                   bool
   428  	ControllerRunsDurationEnabled           bool
   429  	SubprocessStartEnabled                  bool
   430  	KubernetesEventProcessedEnabled         bool
   431  	KubernetesEventReceivedEnabled          bool
   432  	KubernetesAPIInteractionsEnabled        bool
   433  	KubernetesAPICallsEnabled               bool
   434  	KubernetesCNPStatusCompletionEnabled    bool
   435  	IpamEventEnabled                        bool
   436  	KVStoreOperationsDurationEnabled        bool
   437  	KVStoreEventsQueueDurationEnabled       bool
   438  	FQDNGarbageCollectorCleanedTotalEnabled bool
   439  	BPFSyscallDurationEnabled               bool
   440  	BPFMapOps                               bool
   441  	TriggerPolicyUpdateTotal                bool
   442  	TriggerPolicyUpdateFolds                bool
   443  	TriggerPolicyUpdateCallDuration         bool
   444  }
   445  
   446  func DefaultMetrics() map[string]struct{} {
   447  	return map[string]struct{}{
   448  		Namespace + "_" + SubsystemAgent + "_api_process_time_seconds":               {},
   449  		Namespace + "_endpoint_regenerations":                                        {},
   450  		Namespace + "_endpoint_state":                                                {},
   451  		Namespace + "_endpoint_regeneration_time_stats_seconds":                      {},
   452  		Namespace + "_policy_count":                                                  {},
   453  		Namespace + "_policy_regeneration_total":                                     {},
   454  		Namespace + "_policy_regeneration_time_stats_seconds":                        {},
   455  		Namespace + "_policy_max_revision":                                           {},
   456  		Namespace + "_policy_import_errors":                                          {},
   457  		Namespace + "_policy_endpoint_enforcement_status":                            {},
   458  		Namespace + "_policy_implementation_delay":                                   {},
   459  		Namespace + "_identity_count":                                                {},
   460  		Namespace + "_event_ts":                                                      {},
   461  		Namespace + "_proxy_redirects":                                               {},
   462  		Namespace + "_policy_l7_total":                                               {},
   463  		Namespace + "_policy_l7_parse_errors_total":                                  {},
   464  		Namespace + "_policy_l7_forwarded_total":                                     {},
   465  		Namespace + "_policy_l7_denied_total":                                        {},
   466  		Namespace + "_policy_l7_received_total":                                      {},
   467  		Namespace + "_proxy_upstream_reply_seconds":                                  {},
   468  		Namespace + "_drop_count_total":                                              {},
   469  		Namespace + "_drop_bytes_total":                                              {},
   470  		Namespace + "_forward_count_total":                                           {},
   471  		Namespace + "_forward_bytes_total":                                           {},
   472  		Namespace + "_" + SubsystemDatapath + "_errors_total":                        {},
   473  		Namespace + "_" + SubsystemDatapath + "_conntrack_gc_runs_total":             {},
   474  		Namespace + "_" + SubsystemDatapath + "_conntrack_gc_key_fallbacks_total":    {},
   475  		Namespace + "_" + SubsystemDatapath + "_conntrack_gc_entries":                {},
   476  		Namespace + "_" + SubsystemDatapath + "_conntrack_gc_duration_seconds":       {},
   477  		Namespace + "_" + SubsystemDatapath + "_signals_handled_total":               {},
   478  		Namespace + "_services_events_total":                                         {},
   479  		Namespace + "_errors_warnings_total":                                         {},
   480  		Namespace + "_controllers_runs_total":                                        {},
   481  		Namespace + "_controllers_runs_duration_seconds":                             {},
   482  		Namespace + "_subprocess_start_total":                                        {},
   483  		Namespace + "_kubernetes_events_total":                                       {},
   484  		Namespace + "_kubernetes_events_received_total":                              {},
   485  		Namespace + "_" + SubsystemK8sClient + "_api_latency_time_seconds":           {},
   486  		Namespace + "_" + SubsystemK8sClient + "_api_calls_counter":                  {},
   487  		Namespace + "_" + SubsystemK8s + "_cnp_status_completion_seconds":            {},
   488  		Namespace + "_ipam_events_total":                                             {},
   489  		Namespace + "_" + SubsystemKVStore + "_operations_duration_seconds":          {},
   490  		Namespace + "_" + SubsystemKVStore + "_events_queue_seconds":                 {},
   491  		Namespace + "_fqdn_gc_deletions_total":                                       {},
   492  		Namespace + "_" + SubsystemBPF + "_map_ops_total":                            {},
   493  		Namespace + "_" + SubsystemTriggers + "_policy_update_total":                 {},
   494  		Namespace + "_" + SubsystemTriggers + "_policy_update_folds":                 {},
   495  		Namespace + "_" + SubsystemTriggers + "_policy_update_call_duration_seconds": {},
   496  	}
   497  }
   498  
   499  // CreateConfiguration returns a Configuration with all metrics that are
   500  // considered enabled from the given slice of metricsEnabled as well as a slice
   501  // of prometheus.Collectors that must be registered in the prometheus default
   502  // register.
   503  func CreateConfiguration(metricsEnabled []string) (Configuration, []prometheus.Collector) {
   504  	var collectors []prometheus.Collector
   505  	c := Configuration{}
   506  
   507  	for _, metricName := range metricsEnabled {
   508  		switch metricName {
   509  		case Namespace + "_" + SubsystemAgent + "_api_process_time_seconds":
   510  			APIInteractions = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   511  				Namespace: Namespace,
   512  				Subsystem: SubsystemAgent,
   513  				Name:      "api_process_time_seconds",
   514  				Help:      "Duration of processed API calls labeled by path, method and return code.",
   515  			}, []string{LabelPath, LabelMethod, LabelAPIReturnCode})
   516  
   517  			collectors = append(collectors, APIInteractions)
   518  			c.APIInteractionsEnabled = true
   519  
   520  		case Namespace + "_endpoint_regenerations":
   521  			EndpointRegenerationCount = prometheus.NewCounterVec(prometheus.CounterOpts{
   522  				Namespace: Namespace,
   523  				Name:      "endpoint_regenerations",
   524  				Help:      "Count of all endpoint regenerations that have completed, tagged by outcome",
   525  			}, []string{"outcome"})
   526  
   527  			collectors = append(collectors, EndpointRegenerationCount)
   528  			c.EndpointRegenerationCountEnabled = true
   529  
   530  		case Namespace + "_endpoint_state":
   531  			EndpointStateCount = prometheus.NewGaugeVec(
   532  				prometheus.GaugeOpts{
   533  					Namespace: Namespace,
   534  					Name:      "endpoint_state",
   535  					Help:      "Count of all endpoints, tagged by different endpoint states",
   536  				},
   537  				[]string{"endpoint_state"},
   538  			)
   539  
   540  			collectors = append(collectors, EndpointStateCount)
   541  			c.EndpointStateCountEnabled = true
   542  
   543  		case Namespace + "_endpoint_regeneration_time_stats_seconds":
   544  			EndpointRegenerationTimeStats = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   545  				Namespace: Namespace,
   546  				Name:      "endpoint_regeneration_time_stats_seconds",
   547  				Help:      "Endpoint regeneration time stats labeled by the scope",
   548  			}, []string{LabelScope, LabelStatus})
   549  
   550  			collectors = append(collectors, EndpointRegenerationTimeStats)
   551  			c.EndpointRegenerationTimeStatsEnabled = true
   552  
   553  		case Namespace + "_policy_count":
   554  			PolicyCount = prometheus.NewGauge(prometheus.GaugeOpts{
   555  				Namespace: Namespace,
   556  				Name:      "policy_count",
   557  				Help:      "Number of policies currently loaded",
   558  			})
   559  
   560  			collectors = append(collectors, PolicyCount)
   561  			c.PolicyCountEnabled = true
   562  
   563  		case Namespace + "_policy_regeneration_total":
   564  			PolicyRegenerationCount = prometheus.NewCounter(prometheus.CounterOpts{
   565  				Namespace: Namespace,
   566  				Name:      "policy_regeneration_total",
   567  				Help:      "Total number of successful policy regenerations",
   568  			})
   569  
   570  			collectors = append(collectors, PolicyRegenerationCount)
   571  			c.PolicyRegenerationCountEnabled = true
   572  
   573  		case Namespace + "_policy_regeneration_time_stats_seconds":
   574  			PolicyRegenerationTimeStats = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   575  				Namespace: Namespace,
   576  				Name:      "policy_regeneration_time_stats_seconds",
   577  				Help:      "Policy regeneration time stats labeled by the scope",
   578  			}, []string{LabelScope, LabelStatus})
   579  
   580  			collectors = append(collectors, PolicyRegenerationTimeStats)
   581  			c.PolicyRegenerationTimeStatsEnabled = true
   582  
   583  		case Namespace + "_policy_max_revision":
   584  			PolicyRevision = prometheus.NewGauge(prometheus.GaugeOpts{
   585  				Namespace: Namespace,
   586  				Name:      "policy_max_revision",
   587  				Help:      "Highest policy revision number in the agent",
   588  			})
   589  
   590  			collectors = append(collectors, PolicyRevision)
   591  			c.PolicyRegenerationTimeStatsEnabled = true
   592  
   593  		case Namespace + "_policy_import_errors":
   594  			PolicyImportErrors = prometheus.NewCounter(prometheus.CounterOpts{
   595  				Namespace: Namespace,
   596  				Name:      "policy_import_errors",
   597  				Help:      "Number of times a policy import has failed",
   598  			})
   599  
   600  			collectors = append(collectors, PolicyImportErrors)
   601  			c.PolicyImportErrorsEnabled = true
   602  
   603  		case Namespace + "_policy_endpoint_enforcement_status":
   604  			PolicyEndpointStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
   605  				Namespace: Namespace,
   606  				Name:      "policy_endpoint_enforcement_status",
   607  				Help:      "Number of endpoints labeled by policy enforcement status",
   608  			}, []string{LabelPolicyEnforcement})
   609  
   610  			collectors = append(collectors, PolicyEndpointStatus)
   611  			c.PolicyEndpointStatusEnabled = true
   612  
   613  		case Namespace + "_policy_implementation_delay":
   614  			PolicyImplementationDelay = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   615  				Namespace: Namespace,
   616  				Name:      "policy_implementation_delay",
   617  				Help:      "Time between a policy change and it being fully deployed into the datapath",
   618  			}, []string{LabelPolicySource})
   619  
   620  			collectors = append(collectors, PolicyImplementationDelay)
   621  			c.PolicyImplementationDelayEnabled = true
   622  
   623  		case Namespace + "_identity_count":
   624  			IdentityCount = prometheus.NewGauge(prometheus.GaugeOpts{
   625  				Namespace: Namespace,
   626  				Name:      "identity_count",
   627  				Help:      "Number of identities currently allocated",
   628  			})
   629  
   630  			collectors = append(collectors, IdentityCount)
   631  			c.IdentityCountEnabled = true
   632  
   633  		case Namespace + "_event_ts":
   634  			EventTSK8s = prometheus.NewGauge(prometheus.GaugeOpts{
   635  				Namespace:   Namespace,
   636  				Name:        "event_ts",
   637  				Help:        "Last timestamp when we received an event",
   638  				ConstLabels: prometheus.Labels{"source": LabelEventSourceK8s},
   639  			})
   640  
   641  			collectors = append(collectors, EventTSK8s)
   642  			c.EventTSK8sEnabled = true
   643  
   644  			EventTSContainerd = prometheus.NewGauge(prometheus.GaugeOpts{
   645  				Namespace:   Namespace,
   646  				Name:        "event_ts",
   647  				Help:        "Last timestamp when we received an event",
   648  				ConstLabels: prometheus.Labels{"source": LabelEventSourceContainerd},
   649  			})
   650  
   651  			collectors = append(collectors, EventTSContainerd)
   652  			c.EventTSContainerdEnabled = true
   653  
   654  			EventTSAPI = prometheus.NewGauge(prometheus.GaugeOpts{
   655  				Namespace:   Namespace,
   656  				Name:        "event_ts",
   657  				Help:        "Last timestamp when we received an event",
   658  				ConstLabels: prometheus.Labels{"source": LabelEventSourceAPI},
   659  			})
   660  
   661  			collectors = append(collectors, EventTSAPI)
   662  			c.EventTSAPIEnabled = true
   663  
   664  		case Namespace + "_proxy_redirects":
   665  			ProxyRedirects = prometheus.NewGaugeVec(prometheus.GaugeOpts{
   666  				Namespace: Namespace,
   667  				Name:      "proxy_redirects",
   668  				Help:      "Number of redirects installed for endpoints, labeled by protocol",
   669  			}, []string{LabelProtocolL7})
   670  
   671  			collectors = append(collectors, ProxyRedirects)
   672  			c.ProxyRedirectsEnabled = true
   673  
   674  		case Namespace + "_policy_l7_total":
   675  			ProxyPolicyL7Total = prometheus.NewCounterVec(prometheus.CounterOpts{
   676  				Namespace: Namespace,
   677  				Name:      "policy_l7_total",
   678  				Help:      "Number of total proxy requests handled",
   679  			}, []string{"rule"})
   680  
   681  			collectors = append(collectors, ProxyPolicyL7Total)
   682  			c.ProxyPolicyL7Enabled = true
   683  
   684  		case Namespace + "_policy_l7_parse_errors_total":
   685  			ProxyParseErrors = prometheus.NewCounter(prometheus.CounterOpts{
   686  				Namespace: Namespace,
   687  				Name:      "policy_l7_parse_errors_total",
   688  				Help:      "Number of total L7 parse errors",
   689  			})
   690  
   691  			collectors = append(collectors, ProxyParseErrors)
   692  			c.ProxyParseErrorsEnabled = true
   693  
   694  		case Namespace + "_policy_l7_forwarded_total":
   695  			ProxyForwarded = prometheus.NewCounter(prometheus.CounterOpts{
   696  				Namespace: Namespace,
   697  				Name:      "policy_l7_forwarded_total",
   698  				Help:      "Number of total L7 forwarded requests/responses",
   699  			})
   700  
   701  			collectors = append(collectors, ProxyForwarded)
   702  			c.ProxyForwardedEnabled = true
   703  
   704  		case Namespace + "_policy_l7_denied_total":
   705  			ProxyDenied = prometheus.NewCounter(prometheus.CounterOpts{
   706  				Namespace: Namespace,
   707  				Name:      "policy_l7_denied_total",
   708  				Help:      "Number of total L7 denied requests/responses due to policy",
   709  			})
   710  
   711  			collectors = append(collectors, ProxyDenied)
   712  			c.ProxyDeniedEnabled = true
   713  
   714  		case Namespace + "_policy_l7_received_total":
   715  			ProxyReceived = prometheus.NewCounter(prometheus.CounterOpts{
   716  				Namespace: Namespace,
   717  				Name:      "policy_l7_received_total",
   718  				Help:      "Number of total L7 received requests/responses",
   719  			})
   720  
   721  			collectors = append(collectors, ProxyReceived)
   722  			c.ProxyReceivedEnabled = true
   723  
   724  		case Namespace + "_proxy_upstream_reply_seconds":
   725  			ProxyUpstreamTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   726  				Namespace: Namespace,
   727  				Name:      "proxy_upstream_reply_seconds",
   728  				Help:      "Seconds waited to get a reply from a upstream server",
   729  			}, []string{"error", LabelProtocolL7, LabelScope})
   730  
   731  			collectors = append(collectors, ProxyUpstreamTime)
   732  			c.NoOpObserverVecEnabled = true
   733  
   734  		case Namespace + "_drop_count_total":
   735  			DropCount = prometheus.NewCounterVec(prometheus.CounterOpts{
   736  				Namespace: Namespace,
   737  				Name:      "drop_count_total",
   738  				Help:      "Total dropped packets, tagged by drop reason and ingress/egress direction",
   739  			},
   740  				[]string{"reason", "direction"})
   741  
   742  			collectors = append(collectors, DropCount)
   743  			c.DropCountEnabled = true
   744  
   745  		case Namespace + "_drop_bytes_total":
   746  			DropBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
   747  				Namespace: Namespace,
   748  				Name:      "drop_bytes_total",
   749  				Help:      "Total dropped bytes, tagged by drop reason and ingress/egress direction",
   750  			},
   751  				[]string{"reason", "direction"})
   752  
   753  			collectors = append(collectors, DropBytes)
   754  			c.DropBytesEnabled = true
   755  
   756  		case Namespace + "_forward_count_total":
   757  			ForwardCount = prometheus.NewCounterVec(prometheus.CounterOpts{
   758  				Namespace: Namespace,
   759  				Name:      "forward_count_total",
   760  				Help:      "Total forwarded packets, tagged by ingress/egress direction",
   761  			},
   762  				[]string{"direction"})
   763  
   764  			collectors = append(collectors, ForwardCount)
   765  			c.NoOpCounterVecEnabled = true
   766  
   767  		case Namespace + "_forward_bytes_total":
   768  			ForwardBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
   769  				Namespace: Namespace,
   770  				Name:      "forward_bytes_total",
   771  				Help:      "Total forwarded bytes, tagged by ingress/egress direction",
   772  			},
   773  				[]string{"direction"})
   774  
   775  			collectors = append(collectors, ForwardBytes)
   776  			c.ForwardBytesEnabled = true
   777  
   778  		case Namespace + "_" + SubsystemDatapath + "_errors_total":
   779  			DatapathErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
   780  				Namespace: Namespace,
   781  				Subsystem: SubsystemDatapath,
   782  				Name:      "errors_total",
   783  				Help:      "Number of errors that occurred in the datapath or datapath management",
   784  			}, []string{LabelDatapathArea, LabelDatapathName, LabelDatapathFamily})
   785  
   786  			collectors = append(collectors, DatapathErrors)
   787  			c.DatapathErrorsEnabled = true
   788  
   789  		case Namespace + "_" + SubsystemDatapath + "_conntrack_gc_runs_total":
   790  			ConntrackGCRuns = prometheus.NewCounterVec(prometheus.CounterOpts{
   791  				Namespace: Namespace,
   792  				Subsystem: SubsystemDatapath,
   793  				Name:      "conntrack_gc_runs_total",
   794  				Help: "Number of times that the conntrack garbage collector process was run " +
   795  					"labeled by completion status",
   796  			}, []string{LabelDatapathFamily, LabelProtocol, LabelStatus})
   797  
   798  			collectors = append(collectors, ConntrackGCRuns)
   799  			c.ConntrackGCRunsEnabled = true
   800  
   801  		case Namespace + "_" + SubsystemDatapath + "_conntrack_gc_key_fallbacks_total":
   802  			ConntrackGCKeyFallbacks = prometheus.NewCounterVec(prometheus.CounterOpts{
   803  				Namespace: Namespace,
   804  				Subsystem: SubsystemDatapath,
   805  				Name:      "conntrack_gc_key_fallbacks_total",
   806  				Help:      "Number of times a key fallback was needed when iterating over the BPF map",
   807  			}, []string{LabelDatapathFamily, LabelProtocol})
   808  
   809  			collectors = append(collectors, ConntrackGCKeyFallbacks)
   810  			c.ConntrackGCKeyFallbacksEnabled = true
   811  
   812  		case Namespace + "_" + SubsystemDatapath + "_conntrack_gc_entries":
   813  			ConntrackGCSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
   814  				Namespace: Namespace,
   815  				Subsystem: SubsystemDatapath,
   816  				Name:      "conntrack_gc_entries",
   817  				Help: "The number of alive and deleted conntrack entries at the end " +
   818  					"of a garbage collector run labeled by datapath family.",
   819  			}, []string{LabelDatapathFamily, LabelProtocol, LabelStatus})
   820  
   821  			collectors = append(collectors, ConntrackGCSize)
   822  			c.ConntrackGCSizeEnabled = true
   823  
   824  		case Namespace + "_" + SubsystemDatapath + "_conntrack_gc_duration_seconds":
   825  			ConntrackGCDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   826  				Namespace: Namespace,
   827  				Subsystem: SubsystemDatapath,
   828  				Name:      "conntrack_gc_duration_seconds",
   829  				Help: "Duration in seconds of the garbage collector process " +
   830  					"labeled by datapath family and completion status",
   831  			}, []string{LabelDatapathFamily, LabelProtocol, LabelStatus})
   832  
   833  			collectors = append(collectors, ConntrackGCDuration)
   834  			c.ConntrackGCDurationEnabled = true
   835  
   836  		case Namespace + "_" + SubsystemDatapath + "_signals_handled_total":
   837  			SignalsHandled = prometheus.NewCounterVec(prometheus.CounterOpts{
   838  				Namespace: Namespace,
   839  				Subsystem: SubsystemDatapath,
   840  				Name:      "signals_handled_total",
   841  				Help: "Number of times that the datapath signal handler process was run " +
   842  					"labeled by signal type, data and completion status",
   843  			}, []string{LabelSignalType, LabelSignalData, LabelStatus})
   844  
   845  			collectors = append(collectors, SignalsHandled)
   846  			c.SignalsHandledEnabled = true
   847  
   848  		case Namespace + "_services_events_total":
   849  			ServicesCount = prometheus.NewCounterVec(prometheus.CounterOpts{
   850  				Namespace: Namespace,
   851  				Name:      "services_events_total",
   852  				Help:      "Number of services events labeled by action type",
   853  			}, []string{LabelAction})
   854  
   855  			collectors = append(collectors, ServicesCount)
   856  			c.ServicesCountEnabled = true
   857  
   858  		case Namespace + "_errors_warnings_total":
   859  			ErrorsWarnings = prometheus.NewCounterVec(prometheus.CounterOpts{
   860  				Namespace: Namespace,
   861  				Name:      "errors_warnings_total",
   862  				Help:      "Number of total errors in cilium-agent instances",
   863  			}, []string{"level", "subsystem"})
   864  
   865  			collectors = append(collectors, ErrorsWarnings)
   866  			c.ErrorsWarningsEnabled = true
   867  
   868  		case Namespace + "_controllers_runs_total":
   869  			ControllerRuns = prometheus.NewCounterVec(prometheus.CounterOpts{
   870  				Namespace: Namespace,
   871  				Name:      "controllers_runs_total",
   872  				Help:      "Number of times that a controller process was run labeled by completion status",
   873  			}, []string{LabelStatus})
   874  
   875  			collectors = append(collectors, ControllerRuns)
   876  			c.ControllerRunsEnabled = true
   877  
   878  		case Namespace + "_controllers_runs_duration_seconds":
   879  			ControllerRunsDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   880  				Namespace: Namespace,
   881  				Name:      "controllers_runs_duration_seconds",
   882  				Help:      "Duration in seconds of the controller process labeled by completion status",
   883  			}, []string{LabelStatus})
   884  
   885  			collectors = append(collectors, ControllerRunsDuration)
   886  			c.ControllerRunsDurationEnabled = true
   887  
   888  		case Namespace + "_subprocess_start_total":
   889  			SubprocessStart = prometheus.NewCounterVec(prometheus.CounterOpts{
   890  				Namespace: Namespace,
   891  				Name:      "subprocess_start_total",
   892  				Help:      "Number of times that Cilium has started a subprocess, labeled by subsystem",
   893  			}, []string{LabelSubsystem})
   894  
   895  			collectors = append(collectors, SubprocessStart)
   896  			c.SubprocessStartEnabled = true
   897  
   898  		case Namespace + "_kubernetes_events_total":
   899  			KubernetesEventProcessed = prometheus.NewCounterVec(prometheus.CounterOpts{
   900  				Namespace: Namespace,
   901  				Name:      "kubernetes_events_total",
   902  				Help:      "Number of Kubernetes events processed labeled by scope, action and execution result",
   903  			}, []string{LabelScope, LabelAction, LabelStatus})
   904  
   905  			collectors = append(collectors, KubernetesEventProcessed)
   906  			c.KubernetesEventProcessedEnabled = true
   907  
   908  		case Namespace + "_kubernetes_events_received_total":
   909  			KubernetesEventReceived = prometheus.NewCounterVec(prometheus.CounterOpts{
   910  				Namespace: Namespace,
   911  				Name:      "kubernetes_events_received_total",
   912  				Help:      "Number of Kubernetes events processed labeled by scope, action and execution result",
   913  			}, []string{LabelScope, LabelAction, "valid", "equal"})
   914  
   915  			collectors = append(collectors, KubernetesEventReceived)
   916  			c.KubernetesEventReceivedEnabled = true
   917  
   918  		case Namespace + "_" + SubsystemK8sClient + "_api_latency_time_seconds":
   919  			KubernetesAPIInteractions = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   920  				Namespace: Namespace,
   921  				Subsystem: SubsystemK8sClient,
   922  				Name:      "api_latency_time_seconds",
   923  				Help:      "Duration of processed API calls labeled by path and method.",
   924  			}, []string{LabelPath, LabelMethod})
   925  
   926  			collectors = append(collectors, KubernetesAPIInteractions)
   927  			c.KubernetesAPIInteractionsEnabled = true
   928  
   929  		case Namespace + "_" + SubsystemK8sClient + "_api_calls_counter":
   930  			KubernetesAPICalls = prometheus.NewCounterVec(prometheus.CounterOpts{
   931  				Namespace: Namespace,
   932  				Subsystem: SubsystemK8sClient,
   933  				Name:      "api_calls_counter",
   934  				Help:      "Number of API calls made to kube-apiserver labeled by host, method and return code.",
   935  			}, []string{"host", LabelMethod, LabelAPIReturnCode})
   936  
   937  			collectors = append(collectors, KubernetesAPICalls)
   938  			c.KubernetesAPICallsEnabled = true
   939  
   940  		case Namespace + "_" + SubsystemK8s + "_cnp_status_completion_seconds":
   941  			KubernetesCNPStatusCompletion = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   942  				Namespace: Namespace,
   943  				Subsystem: SubsystemK8s,
   944  				Name:      "cnp_status_completion_seconds",
   945  				Help:      "Duration in seconds in how long it took to complete a CNP status update",
   946  			}, []string{LabelAttempts, LabelOutcome})
   947  
   948  			collectors = append(collectors, KubernetesCNPStatusCompletion)
   949  			c.KubernetesCNPStatusCompletionEnabled = true
   950  
   951  		case Namespace + "_ipam_events_total":
   952  			IpamEvent = prometheus.NewCounterVec(prometheus.CounterOpts{
   953  				Namespace: Namespace,
   954  				Name:      "ipam_events_total",
   955  				Help:      "Number of IPAM events received labeled by action and datapath family type",
   956  			}, []string{LabelAction, LabelDatapathFamily})
   957  
   958  			collectors = append(collectors, IpamEvent)
   959  			c.IpamEventEnabled = true
   960  
   961  		case Namespace + "_" + SubsystemKVStore + "_operations_duration_seconds":
   962  			KVStoreOperationsDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   963  				Namespace: Namespace,
   964  				Subsystem: SubsystemKVStore,
   965  				Name:      "operations_duration_seconds",
   966  				Help:      "Duration in seconds of kvstore operations",
   967  			}, []string{LabelScope, LabelKind, LabelAction, LabelOutcome})
   968  
   969  			collectors = append(collectors, KVStoreOperationsDuration)
   970  			c.KVStoreOperationsDurationEnabled = true
   971  
   972  		case Namespace + "_" + SubsystemKVStore + "_events_queue_seconds":
   973  			KVStoreEventsQueueDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   974  				Namespace: Namespace,
   975  				Subsystem: SubsystemKVStore,
   976  				Name:      "events_queue_seconds",
   977  				Help:      "Duration in seconds of time received event was blocked before it could be queued",
   978  				Buckets:   []float64{.002, .005, .01, .015, .025, .05, .1, .25, .5, .75, 1},
   979  			}, []string{LabelScope, LabelAction})
   980  
   981  			collectors = append(collectors, KVStoreEventsQueueDuration)
   982  			c.KVStoreEventsQueueDurationEnabled = true
   983  
   984  		case Namespace + "_fqdn_gc_deletions_total":
   985  			FQDNGarbageCollectorCleanedTotal = prometheus.NewCounter(prometheus.CounterOpts{
   986  				Namespace: Namespace,
   987  				Name:      "fqdn_gc_deletions_total",
   988  				Help:      "Number of FQDNs that have been cleaned on FQDN Garbage collector job",
   989  			})
   990  
   991  			collectors = append(collectors, FQDNGarbageCollectorCleanedTotal)
   992  			c.FQDNGarbageCollectorCleanedTotalEnabled = true
   993  
   994  		case Namespace + "_" + SubsystemBPF + "_syscall_duration_seconds":
   995  			BPFSyscallDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
   996  				Namespace: Namespace,
   997  				Subsystem: SubsystemBPF,
   998  				Name:      "syscall_duration_seconds",
   999  				Help:      "Duration of BPF system calls",
  1000  			}, []string{LabelOperation, LabelOutcome})
  1001  
  1002  			collectors = append(collectors, BPFSyscallDuration)
  1003  			c.BPFSyscallDurationEnabled = true
  1004  
  1005  		case Namespace + "_" + SubsystemBPF + "_map_ops_total":
  1006  			BPFMapOps = prometheus.NewCounterVec(prometheus.CounterOpts{
  1007  				Namespace: Namespace,
  1008  				Subsystem: SubsystemBPF,
  1009  				Name:      "map_ops_total",
  1010  				Help:      "Total operations on map, tagged by map name",
  1011  			}, []string{LabelMapName, LabelOperation, LabelOutcome})
  1012  
  1013  			collectors = append(collectors, BPFMapOps)
  1014  			c.BPFMapOps = true
  1015  
  1016  		case Namespace + "_" + SubsystemTriggers + "_policy_update_total":
  1017  			TriggerPolicyUpdateTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
  1018  				Namespace: Namespace,
  1019  				Subsystem: SubsystemTriggers,
  1020  				Name:      "policy_update_total",
  1021  				Help:      "Total number of policy update trigger invocations labeled by reason",
  1022  			}, []string{"reason"})
  1023  
  1024  			collectors = append(collectors, TriggerPolicyUpdateTotal)
  1025  			c.TriggerPolicyUpdateTotal = true
  1026  
  1027  		case Namespace + "_" + SubsystemTriggers + "_policy_update_folds":
  1028  			TriggerPolicyUpdateFolds = prometheus.NewGauge(prometheus.GaugeOpts{
  1029  				Namespace: Namespace,
  1030  				Subsystem: SubsystemTriggers,
  1031  				Name:      "policy_update_folds",
  1032  				Help:      "Current number of folds",
  1033  			})
  1034  
  1035  			collectors = append(collectors, TriggerPolicyUpdateFolds)
  1036  			c.TriggerPolicyUpdateFolds = true
  1037  
  1038  		case Namespace + "_" + SubsystemTriggers + "_policy_update_call_duration_seconds":
  1039  			TriggerPolicyUpdateCallDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
  1040  				Namespace: Namespace,
  1041  				Subsystem: SubsystemTriggers,
  1042  				Name:      "policy_update_call_duration_seconds",
  1043  				Help:      "Duration of policy update trigger",
  1044  			}, []string{"type"})
  1045  
  1046  			collectors = append(collectors, TriggerPolicyUpdateCallDuration)
  1047  			c.TriggerPolicyUpdateCallDuration = true
  1048  		}
  1049  	}
  1050  
  1051  	return c, collectors
  1052  }
  1053  
  1054  func init() {
  1055  	MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{Namespace: Namespace}))
  1056  	// TODO: Figure out how to put this into a Namespace
  1057  	// MustRegister(prometheus.NewGoCollector())
  1058  	MustRegister(newStatusCollector())
  1059  }
  1060  
  1061  // MustRegister adds the collector to the registry, exposing this metric to
  1062  // prometheus scrapes.
  1063  // It will panic on error.
  1064  func MustRegister(c ...prometheus.Collector) {
  1065  	registry.MustRegister(c...)
  1066  }
  1067  
  1068  // Register registers a collector
  1069  func Register(c prometheus.Collector) error {
  1070  	return registry.Register(c)
  1071  }
  1072  
  1073  // RegisterList registers a list of collectors. If registration of one
  1074  // collector fails, no collector is registered.
  1075  func RegisterList(list []prometheus.Collector) error {
  1076  	registered := []prometheus.Collector{}
  1077  
  1078  	for _, c := range list {
  1079  		if err := Register(c); err != nil {
  1080  			for _, c := range registered {
  1081  				Unregister(c)
  1082  			}
  1083  			return err
  1084  		}
  1085  
  1086  		registered = append(registered, c)
  1087  	}
  1088  
  1089  	return nil
  1090  }
  1091  
  1092  // Unregister unregisters a collector
  1093  func Unregister(c prometheus.Collector) bool {
  1094  	return registry.Unregister(c)
  1095  }
  1096  
  1097  // Enable begins serving prometheus metrics on the address passed in. Addresses
  1098  // of the form ":8080" will bind the port on all interfaces.
  1099  func Enable(addr string) <-chan error {
  1100  	errs := make(chan error, 1)
  1101  
  1102  	go func() {
  1103  		// The Handler function provides a default handler to expose metrics
  1104  		// via an HTTP server. "/metrics" is the usual endpoint for that.
  1105  		http.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
  1106  		errs <- http.ListenAndServe(addr, nil)
  1107  	}()
  1108  
  1109  	return errs
  1110  }
  1111  
  1112  // GetCounterValue returns the current value
  1113  // stored for the counter
  1114  func GetCounterValue(m prometheus.Counter) float64 {
  1115  	var pm dto.Metric
  1116  	err := m.Write(&pm)
  1117  	if err == nil {
  1118  		return *pm.Counter.Value
  1119  	}
  1120  	return 0
  1121  }
  1122  
  1123  // GetGaugeValue returns the current value stored for the gauge. This function
  1124  // is useful in tests.
  1125  func GetGaugeValue(m prometheus.Gauge) float64 {
  1126  	var pm dto.Metric
  1127  	err := m.Write(&pm)
  1128  	if err == nil {
  1129  		return *pm.Gauge.Value
  1130  	}
  1131  	return 0
  1132  }
  1133  
  1134  // DumpMetrics gets the current Cilium metrics and dumps all into a
  1135  // models.Metrics structure.If metrics cannot be retrieved, returns an error
  1136  func DumpMetrics() ([]*models.Metric, error) {
  1137  	result := []*models.Metric{}
  1138  	currentMetrics, err := registry.Gather()
  1139  	if err != nil {
  1140  		return result, err
  1141  	}
  1142  
  1143  	for _, val := range currentMetrics {
  1144  
  1145  		metricName := val.GetName()
  1146  		metricType := val.GetType()
  1147  
  1148  		for _, metricLabel := range val.Metric {
  1149  			labels := map[string]string{}
  1150  			for _, label := range metricLabel.GetLabel() {
  1151  				labels[label.GetName()] = label.GetValue()
  1152  			}
  1153  
  1154  			var value float64
  1155  			switch metricType {
  1156  			case dto.MetricType_COUNTER:
  1157  				value = metricLabel.Counter.GetValue()
  1158  			case dto.MetricType_GAUGE:
  1159  				value = metricLabel.GetGauge().GetValue()
  1160  			case dto.MetricType_UNTYPED:
  1161  				value = metricLabel.GetUntyped().GetValue()
  1162  			case dto.MetricType_SUMMARY:
  1163  				value = metricLabel.GetSummary().GetSampleSum()
  1164  			case dto.MetricType_HISTOGRAM:
  1165  				value = metricLabel.GetHistogram().GetSampleSum()
  1166  			default:
  1167  				continue
  1168  			}
  1169  
  1170  			metric := &models.Metric{
  1171  				Name:   metricName,
  1172  				Labels: labels,
  1173  				Value:  value,
  1174  			}
  1175  			result = append(result, metric)
  1176  		}
  1177  	}
  1178  	return result, nil
  1179  }
  1180  
  1181  // Error2Outcome converts an error to LabelOutcome
  1182  func Error2Outcome(err error) string {
  1183  	if err != nil {
  1184  		return LabelValueOutcomeFail
  1185  	}
  1186  
  1187  	return LabelValueOutcomeSuccess
  1188  }
  1189  
  1190  // Errno2Outcome converts a syscall.Errno to LabelOutcome
  1191  func Errno2Outcome(errno syscall.Errno) string {
  1192  	if errno != 0 {
  1193  		return LabelValueOutcomeFail
  1194  	}
  1195  
  1196  	return LabelValueOutcomeSuccess
  1197  }