github.phpd.cn/cilium/cilium@v1.6.12/pkg/aws/eni/metrics/metrics.go (about) 1 // Copyright 2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metrics 16 17 import ( 18 "time" 19 20 "github.com/cilium/cilium/pkg/trigger" 21 22 "github.com/prometheus/client_golang/prometheus" 23 ) 24 25 const eniSubsystem = "eni" 26 27 type prometheusMetrics struct { 28 registry *prometheus.Registry 29 AllocateEniOps *prometheus.CounterVec 30 AllocateIpOps *prometheus.CounterVec 31 ReleaseIpOps *prometheus.CounterVec 32 IPsAllocated *prometheus.GaugeVec 33 AvailableENIs prometheus.Gauge 34 AvailableIPsPerSubnet *prometheus.GaugeVec 35 Nodes *prometheus.GaugeVec 36 Resync prometheus.Counter 37 EC2ApiDuration *prometheus.HistogramVec 38 EC2RateLimit *prometheus.HistogramVec 39 poolMaintainer *triggerMetrics 40 k8sSync *triggerMetrics 41 resync *triggerMetrics 42 } 43 44 // NewPrometheusMetrics returns a new ENI metrics implementation backed by 45 // Prometheus metrics. 46 func NewPrometheusMetrics(namespace string, registry *prometheus.Registry) *prometheusMetrics { 47 m := &prometheusMetrics{ 48 registry: registry, 49 } 50 51 m.IPsAllocated = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 52 Namespace: namespace, 53 Subsystem: eniSubsystem, 54 Name: "ips", 55 Help: "Number of IPs allocated", 56 }, []string{"type"}) 57 58 m.AllocateIpOps = prometheus.NewCounterVec(prometheus.CounterOpts{ 59 Namespace: namespace, 60 Subsystem: eniSubsystem, 61 Name: "allocation_ops", 62 Help: "Number of IP allocation operations", 63 }, []string{"subnetId"}) 64 65 m.ReleaseIpOps = prometheus.NewCounterVec(prometheus.CounterOpts{ 66 Namespace: namespace, 67 Subsystem: eniSubsystem, 68 Name: "release_ops", 69 Help: "Number of IP release operations", 70 }, []string{"subnetId"}) 71 72 m.AllocateEniOps = prometheus.NewCounterVec(prometheus.CounterOpts{ 73 Namespace: namespace, 74 Subsystem: eniSubsystem, 75 Name: "interface_creation_ops", 76 Help: "Number of ENIs allocated", 77 }, []string{"subnetId", "status"}) 78 79 m.AvailableENIs = prometheus.NewGauge(prometheus.GaugeOpts{ 80 Namespace: namespace, 81 Subsystem: eniSubsystem, 82 Name: "available", 83 Help: "Number of ENIs with addresses available", 84 }) 85 86 m.AvailableIPsPerSubnet = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 87 Namespace: namespace, 88 Subsystem: eniSubsystem, 89 Name: "available_ips_per_subnet", 90 Help: "Number of available IPs per subnet ID", 91 }, []string{"subnetId", "availabilityZone"}) 92 93 m.Nodes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 94 Namespace: namespace, 95 Subsystem: eniSubsystem, 96 Name: "nodes", 97 Help: "Number of nodes by category { total | in-deficit | at-capacity }", 98 }, []string{"category"}) 99 100 m.EC2ApiDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ 101 Namespace: namespace, 102 Subsystem: eniSubsystem, 103 Name: "aws_api_duration_seconds", 104 Help: "Duration of interactions with AWS API", 105 }, []string{"operation", "responseCode"}) 106 107 m.Resync = prometheus.NewCounter(prometheus.CounterOpts{ 108 Namespace: namespace, 109 Subsystem: eniSubsystem, 110 Name: "resync_total", 111 Help: "Number of resync operations to synchronize AWS EC2 metadata", 112 }) 113 114 m.EC2RateLimit = prometheus.NewHistogramVec(prometheus.HistogramOpts{ 115 Namespace: namespace, 116 Subsystem: eniSubsystem, 117 Name: "ec2_rate_limit_duration_seconds", 118 Help: "Duration of EC2 client-side rate limiter blocking", 119 }, []string{"operation"}) 120 121 // pool_maintainer is a more generic name, but for backward compatibility 122 // of dashboard, keep the metric name deficit_resolver unchanged 123 m.poolMaintainer = newTriggerMetrics(namespace, "deficit_resolver") 124 m.k8sSync = newTriggerMetrics(namespace, "k8s_sync") 125 m.resync = newTriggerMetrics(namespace, "ec2_resync") 126 127 registry.MustRegister(m.IPsAllocated) 128 registry.MustRegister(m.AllocateIpOps) 129 registry.MustRegister(m.ReleaseIpOps) 130 registry.MustRegister(m.AllocateEniOps) 131 registry.MustRegister(m.AvailableENIs) 132 registry.MustRegister(m.AvailableIPsPerSubnet) 133 registry.MustRegister(m.Nodes) 134 registry.MustRegister(m.Resync) 135 registry.MustRegister(m.EC2ApiDuration) 136 registry.MustRegister(m.EC2RateLimit) 137 m.poolMaintainer.register(registry) 138 m.k8sSync.register(registry) 139 m.resync.register(registry) 140 141 return m 142 } 143 144 func (p *prometheusMetrics) PoolMaintainerTrigger() trigger.MetricsObserver { 145 return p.poolMaintainer 146 } 147 148 func (p *prometheusMetrics) K8sSyncTrigger() trigger.MetricsObserver { 149 return p.k8sSync 150 } 151 152 func (p *prometheusMetrics) ResyncTrigger() trigger.MetricsObserver { 153 return p.resync 154 } 155 156 func (p *prometheusMetrics) IncENIAllocationAttempt(status, subnetID string) { 157 p.AllocateEniOps.WithLabelValues(subnetID, status).Inc() 158 } 159 160 func (p *prometheusMetrics) AddIPAllocation(subnetID string, allocated int64) { 161 p.AllocateIpOps.WithLabelValues(subnetID).Add(float64(allocated)) 162 } 163 164 func (p *prometheusMetrics) AddIPRelease(subnetID string, released int64) { 165 p.ReleaseIpOps.WithLabelValues(subnetID).Add(float64(released)) 166 } 167 168 func (p *prometheusMetrics) SetAllocatedIPs(typ string, allocated int) { 169 p.IPsAllocated.WithLabelValues(typ).Set(float64(allocated)) 170 } 171 172 func (p *prometheusMetrics) SetAvailableENIs(available int) { 173 p.AvailableENIs.Set(float64(available)) 174 } 175 176 func (p *prometheusMetrics) SetAvailableIPsPerSubnet(subnetID string, availabilityZone string, available int) { 177 p.AvailableIPsPerSubnet.WithLabelValues(subnetID, availabilityZone).Set(float64(available)) 178 } 179 180 func (p *prometheusMetrics) SetNodes(label string, nodes int) { 181 p.Nodes.WithLabelValues(label).Set(float64(nodes)) 182 } 183 184 func (p *prometheusMetrics) ObserveEC2APICall(operation, status string, duration float64) { 185 p.EC2ApiDuration.WithLabelValues(operation, status).Observe(duration) 186 } 187 188 func (p *prometheusMetrics) ObserveEC2RateLimit(operation string, delay time.Duration) { 189 p.EC2RateLimit.WithLabelValues(operation).Observe(delay.Seconds()) 190 } 191 192 func (p *prometheusMetrics) IncResyncCount() { 193 p.Resync.Inc() 194 } 195 196 type triggerMetrics struct { 197 total prometheus.Counter 198 folds prometheus.Gauge 199 callDuration prometheus.Histogram 200 latency prometheus.Histogram 201 } 202 203 func newTriggerMetrics(namespace, name string) *triggerMetrics { 204 return &triggerMetrics{ 205 total: prometheus.NewCounter(prometheus.CounterOpts{ 206 Namespace: namespace, 207 Subsystem: eniSubsystem, 208 Name: name + "_queued_total", 209 Help: "Number of queued triggers", 210 }), 211 folds: prometheus.NewGauge(prometheus.GaugeOpts{ 212 Namespace: namespace, 213 Subsystem: eniSubsystem, 214 Name: name + "_folds", 215 Help: "Current level of folding", 216 }), 217 callDuration: prometheus.NewHistogram(prometheus.HistogramOpts{ 218 Namespace: namespace, 219 Subsystem: eniSubsystem, 220 Name: name + "_duration_seconds", 221 Help: "Duration of trigger runs", 222 }), 223 latency: prometheus.NewHistogram(prometheus.HistogramOpts{ 224 Namespace: namespace, 225 Subsystem: eniSubsystem, 226 Name: name + "_latency_seconds", 227 Help: "Latency between queue and trigger run", 228 }), 229 } 230 } 231 232 func (t *triggerMetrics) register(registry *prometheus.Registry) { 233 registry.MustRegister(t.total) 234 registry.MustRegister(t.folds) 235 registry.MustRegister(t.callDuration) 236 registry.MustRegister(t.latency) 237 } 238 239 func (t *triggerMetrics) QueueEvent(reason string) { 240 t.total.Inc() 241 } 242 243 func (t *triggerMetrics) PostRun(duration, latency time.Duration, folds int) { 244 t.callDuration.Observe(duration.Seconds()) 245 t.latency.Observe(latency.Seconds()) 246 t.folds.Set(float64(folds)) 247 }