go.ligato.io/vpp-agent/v3@v3.5.0/plugins/telemetry/prometheus.go (about) 1 // Copyright (c) 2019 Cisco and/or its affiliates. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at: 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package telemetry 16 17 import ( 18 "context" 19 "fmt" 20 "strconv" 21 22 "github.com/prometheus/client_golang/prometheus" 23 ) 24 25 const ( 26 // Registry path for telemetry metrics 27 registryPath = "/metrics" 28 29 vppMetricsNamespace = "vpp" 30 31 // Metrics label used for agent label 32 agentLabel = "agent" 33 ) 34 35 // Runtime metrics 36 const ( 37 runtimeMetricsNamespace = "runtime" 38 39 runtimeThreadLabel = "thread" 40 runtimeThreadIDLabel = "threadID" 41 runtimeItemLabel = "item" 42 43 runtimeCallsMetric = "calls" 44 runtimeVectorsMetric = "vectors" 45 runtimeSuspendsMetric = "suspends" 46 runtimeClocksMetric = "clocks" 47 runtimeVectorsPerCallMetric = "vectors_per_call" 48 ) 49 50 // Memory metrics 51 const ( 52 memoryMetricsNamespace = "memory" 53 54 memoryThreadLabel = "thread" 55 memoryThreadIDLabel = "threadID" 56 57 memoryObjectsMetric = "objects" 58 memoryUsedMetric = "used" 59 memoryTotalMetric = "total" 60 memoryFreeMetric = "free" 61 memoryTrimmableMetric = "trimmable" 62 memoryFreeChunksMetric = "free_chunks" 63 memoryFreeFastbinBlksMetric = "free_fastbin_blks" 64 memoryMaxTotalAlloc = "max_total_allocated" 65 memorySizeMetric = "size" 66 memoryPagesMetric = "pages" 67 ) 68 69 // Buffers metrics 70 const ( 71 buffersMetricsNamespace = "buffers" 72 73 buffersThreadIDLabel = "threadID" 74 buffersItemLabel = "item" 75 buffersIndexLabel = "index" 76 77 buffersSizeMetric = "size" 78 buffersAllocMetric = "alloc" 79 buffersFreeMetric = "free" 80 buffersNumAllocMetric = "num_alloc" 81 buffersNumFreeMetric = "num_free" 82 ) 83 84 // Node metrics 85 const ( 86 nodeMetricsNamespace = "nodes" 87 88 nodeCounterItemLabel = "item" 89 nodeCounterReasonLabel = "reason" 90 91 nodeCounterCounterMetric = "counter" 92 ) 93 94 // Interface metrics 95 const ( 96 ifMetricsNamespace = "interfaces" 97 98 ifCounterNameLabel = "name" 99 ifCounterIndexLabel = "index" 100 101 ifCounterRxPackets = "rx_packets" 102 ifCounterRxBytes = "rx_bytes" 103 ifCounterRxErrors = "rx_errors" 104 ifCounterTxPackets = "tx_packets" 105 ifCounterTxBytes = "tx_bytes" 106 ifCounterTxErrors = "tx_errors" 107 ifCounterDrops = "drops" 108 ifCounterPunts = "punts" 109 ifCounterIP4 = "ip4" 110 ifCounterIP6 = "ip6" 111 ifCounterRxNoBuf = "rx_no_buf" 112 ifCounterRxMiss = "rx_miss" 113 ) 114 115 type prometheusMetrics struct { 116 runtimeGaugeVecs map[string]*prometheus.GaugeVec 117 runtimeStats map[string]*runtimeStats 118 119 memoryGaugeVecs map[string]*prometheus.GaugeVec 120 memoryStats map[string]*memoryStats 121 122 buffersGaugeVecs map[string]*prometheus.GaugeVec 123 buffersStats map[string]*buffersStats 124 125 nodeCounterGaugeVecs map[string]*prometheus.GaugeVec 126 nodeCounterStats map[string]*nodeCounterStats 127 128 ifCounterGaugeVecs map[string]*prometheus.GaugeVec 129 ifCounterStats map[string]*ifCounterStats 130 } 131 132 type runtimeStats struct { 133 threadName string 134 threadID uint 135 itemName string 136 metrics map[string]prometheus.Gauge 137 } 138 139 type memoryStats struct { 140 threadName string 141 threadID uint 142 metrics map[string]prometheus.Gauge 143 } 144 145 type buffersStats struct { 146 threadID uint 147 itemName string 148 itemIndex uint 149 metrics map[string]prometheus.Gauge 150 } 151 152 type nodeCounterStats struct { 153 itemName string 154 metrics map[string]prometheus.Gauge 155 } 156 157 type ifCounterStats struct { 158 name string 159 metrics map[string]prometheus.Gauge 160 } 161 162 func (p *Plugin) registerPrometheus() error { 163 p.Log.Debugf("registering prometheus registry path: %v", registryPath) 164 165 // Runtime metrics 166 p.runtimeGaugeVecs = make(map[string]*prometheus.GaugeVec) 167 p.runtimeStats = make(map[string]*runtimeStats) 168 169 for _, metric := range [][2]string{ 170 {runtimeCallsMetric, "Number of calls"}, 171 {runtimeVectorsMetric, "Number of vectors"}, 172 {runtimeSuspendsMetric, "Number of suspends"}, 173 {runtimeClocksMetric, "Number of clocks"}, 174 {runtimeVectorsPerCallMetric, "Number of vectors per call"}, 175 } { 176 name := metric[0] 177 p.runtimeGaugeVecs[name] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 178 Namespace: vppMetricsNamespace, 179 Subsystem: runtimeMetricsNamespace, 180 Name: name, 181 Help: metric[1], 182 ConstLabels: prometheus.Labels{ 183 agentLabel: p.ServiceLabel.GetAgentLabel(), 184 }, 185 }, []string{runtimeItemLabel, runtimeThreadLabel, runtimeThreadIDLabel}) 186 } 187 188 // register created vectors to prometheus 189 for name, metric := range p.runtimeGaugeVecs { 190 if err := p.Prometheus.Register(registryPath, metric); err != nil { 191 p.Log.Errorf("failed to register %v metric: %v", name, err) 192 return err 193 } 194 } 195 196 // Memory metrics 197 p.memoryGaugeVecs = make(map[string]*prometheus.GaugeVec) 198 p.memoryStats = make(map[string]*memoryStats) 199 200 for _, metric := range [][2]string{ 201 {memoryObjectsMetric, "Number of objects"}, 202 {memoryUsedMetric, "Used memory"}, 203 {memoryTotalMetric, "Total memory"}, 204 {memoryFreeMetric, "Free memory"}, 205 {memorySizeMetric, "Size"}, 206 {memoryPagesMetric, "Pages"}, 207 {memoryTrimmableMetric, "Trimmable"}, 208 {memoryFreeChunksMetric, "Free Chunks"}, 209 {memoryFreeFastbinBlksMetric, "Free Fastbin Bulks"}, 210 {memoryMaxTotalAlloc, "Max Total Allocations"}, 211 } { 212 name := metric[0] 213 p.memoryGaugeVecs[name] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 214 Namespace: vppMetricsNamespace, 215 Subsystem: memoryMetricsNamespace, 216 Name: name, 217 Help: metric[1], 218 ConstLabels: prometheus.Labels{ 219 agentLabel: p.ServiceLabel.GetAgentLabel(), 220 }, 221 }, []string{memoryThreadLabel, memoryThreadIDLabel}) 222 223 } 224 225 // register created vectors to prometheus 226 for name, metric := range p.memoryGaugeVecs { 227 if err := p.Prometheus.Register(registryPath, metric); err != nil { 228 p.Log.Errorf("failed to register %v metric: %v", name, err) 229 return err 230 } 231 } 232 233 // Buffers metrics 234 p.buffersGaugeVecs = make(map[string]*prometheus.GaugeVec) 235 p.buffersStats = make(map[string]*buffersStats) 236 237 for _, metric := range [][2]string{ 238 {buffersSizeMetric, "Size of buffer"}, 239 {buffersAllocMetric, "Allocated"}, 240 {buffersFreeMetric, "Free"}, 241 {buffersNumAllocMetric, "Number of allocated"}, 242 {buffersNumFreeMetric, "Number of free"}, 243 } { 244 name := metric[0] 245 p.buffersGaugeVecs[name] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 246 Namespace: vppMetricsNamespace, 247 Subsystem: buffersMetricsNamespace, 248 Name: name, 249 Help: metric[1], 250 ConstLabels: prometheus.Labels{ 251 agentLabel: p.ServiceLabel.GetAgentLabel(), 252 }, 253 }, []string{buffersThreadIDLabel, buffersItemLabel, buffersIndexLabel}) 254 255 } 256 257 // register created vectors to prometheus 258 for name, metric := range p.buffersGaugeVecs { 259 if err := p.Prometheus.Register(registryPath, metric); err != nil { 260 p.Log.Errorf("failed to register %v metric: %v", name, err) 261 return err 262 } 263 } 264 265 // Node counters metrics 266 p.nodeCounterGaugeVecs = make(map[string]*prometheus.GaugeVec) 267 p.nodeCounterStats = make(map[string]*nodeCounterStats) 268 269 for _, metric := range [][2]string{ 270 {nodeCounterCounterMetric, "Counter"}, 271 } { 272 name := metric[0] 273 p.nodeCounterGaugeVecs[name] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 274 Namespace: vppMetricsNamespace, 275 Subsystem: nodeMetricsNamespace, 276 Name: name, 277 Help: metric[1], 278 ConstLabels: prometheus.Labels{ 279 agentLabel: p.ServiceLabel.GetAgentLabel(), 280 }, 281 }, []string{nodeCounterItemLabel, nodeCounterReasonLabel}) 282 283 } 284 285 // register created vectors to prometheus 286 for name, metric := range p.nodeCounterGaugeVecs { 287 if err := p.Prometheus.Register(registryPath, metric); err != nil { 288 p.Log.Errorf("failed to register %v metric: %v", name, err) 289 return err 290 } 291 } 292 293 // Interface counter metrics 294 p.ifCounterGaugeVecs = make(map[string]*prometheus.GaugeVec) 295 p.ifCounterStats = make(map[string]*ifCounterStats) 296 297 for _, metric := range [][2]string{ 298 {ifCounterRxPackets, "RX packets"}, 299 {ifCounterRxBytes, "RX bytes"}, 300 {ifCounterRxErrors, "RX errors"}, 301 {ifCounterTxPackets, "TX packets"}, 302 {ifCounterTxBytes, "TX bytes"}, 303 {ifCounterTxErrors, "TX errors"}, 304 {ifCounterDrops, "Drops"}, 305 {ifCounterPunts, "Punts"}, 306 {ifCounterIP4, "IP4"}, 307 {ifCounterIP6, "IP6"}, 308 {ifCounterRxNoBuf, "RX nobuf"}, 309 {ifCounterRxMiss, "RX miss"}, 310 } { 311 name := metric[0] 312 p.ifCounterGaugeVecs[name] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 313 Namespace: vppMetricsNamespace, 314 Subsystem: ifMetricsNamespace, 315 Name: name, 316 Help: metric[1], 317 ConstLabels: prometheus.Labels{ 318 agentLabel: p.ServiceLabel.GetAgentLabel(), 319 }, 320 }, []string{ifCounterNameLabel, ifCounterIndexLabel}) 321 322 } 323 324 // register created vectors to prometheus 325 for name, metric := range p.ifCounterGaugeVecs { 326 if err := p.Prometheus.Register(registryPath, metric); err != nil { 327 p.Log.Errorf("failed to register %v metric: %v", name, err) 328 return err 329 } 330 } 331 332 return nil 333 } 334 335 func (p *Plugin) updatePrometheus(ctx context.Context) { 336 p.tracef("running update") 337 338 if !p.skipped[runtimeMetricsNamespace] { 339 // Update runtime 340 runtimeInfo, err := p.handler.GetRuntimeInfo(ctx) 341 if err != nil { 342 p.Log.Errorf("GetRuntimeInfo failed: %v", err) 343 } else { 344 p.tracef("runtime info: %+v", runtimeInfo) 345 for _, thread := range runtimeInfo.GetThreads() { 346 for _, item := range thread.Items { 347 stats, ok := p.runtimeStats[item.Name] 348 if !ok { 349 stats = &runtimeStats{ 350 threadID: thread.ID, 351 threadName: thread.Name, 352 itemName: item.Name, 353 metrics: map[string]prometheus.Gauge{}, 354 } 355 p.runtimeStats[item.Name] = stats 356 357 // add gauges with corresponding labels into vectors 358 for k, vec := range p.runtimeGaugeVecs { 359 stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ 360 runtimeItemLabel: item.Name, 361 runtimeThreadLabel: thread.Name, 362 runtimeThreadIDLabel: strconv.Itoa(int(thread.ID)), 363 }) 364 if err != nil { 365 p.Log.Error(err) 366 } 367 } 368 } 369 370 stats.metrics[runtimeCallsMetric].Set(float64(item.Calls)) 371 stats.metrics[runtimeVectorsMetric].Set(float64(item.Vectors)) 372 stats.metrics[runtimeSuspendsMetric].Set(float64(item.Suspends)) 373 stats.metrics[runtimeClocksMetric].Set(item.Clocks) 374 stats.metrics[runtimeVectorsPerCallMetric].Set(item.VectorsPerCall) 375 } 376 } 377 } 378 } 379 380 if !p.skipped[buffersMetricsNamespace] { 381 // Update buffers 382 buffersInfo, err := p.handler.GetBuffersInfo(ctx) 383 if err != nil { 384 p.Log.Errorf("GetBuffersInfo failed: %v", err) 385 } else { 386 p.tracef("buffers info: %+v", buffersInfo) 387 for _, item := range buffersInfo.GetItems() { 388 stats, ok := p.buffersStats[item.Name] 389 if !ok { 390 stats = &buffersStats{ 391 threadID: item.ThreadID, 392 itemName: item.Name, 393 itemIndex: item.Index, 394 metrics: map[string]prometheus.Gauge{}, 395 } 396 p.buffersStats[item.Name] = stats 397 398 // add gauges with corresponding labels into vectors 399 for k, vec := range p.buffersGaugeVecs { 400 stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ 401 buffersThreadIDLabel: strconv.Itoa(int(item.ThreadID)), 402 buffersItemLabel: item.Name, 403 buffersIndexLabel: strconv.Itoa(int(item.Index)), 404 }) 405 if err != nil { 406 p.Log.Error(err) 407 } 408 } 409 } 410 411 stats.metrics[buffersSizeMetric].Set(float64(item.Size)) 412 stats.metrics[buffersAllocMetric].Set(float64(item.Alloc)) 413 stats.metrics[buffersFreeMetric].Set(float64(item.Free)) 414 stats.metrics[buffersNumAllocMetric].Set(float64(item.NumAlloc)) 415 stats.metrics[buffersNumFreeMetric].Set(float64(item.NumFree)) 416 } 417 } 418 } 419 420 if !p.skipped[memoryMetricsNamespace] { 421 // Update memory 422 memoryInfo, err := p.handler.GetMemory(ctx) 423 if err != nil { 424 p.Log.Errorf("GetMemory failed: %v", err) 425 } else { 426 p.tracef("memory info: %+v", memoryInfo) 427 for _, thread := range memoryInfo.GetThreads() { 428 stats, ok := p.memoryStats[thread.Name] 429 if !ok { 430 stats = &memoryStats{ 431 threadName: thread.Name, 432 threadID: thread.ID, 433 metrics: map[string]prometheus.Gauge{}, 434 } 435 p.memoryStats[thread.Name] = stats 436 437 // add gauges with corresponding labels into vectors 438 for k, vec := range p.memoryGaugeVecs { 439 stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ 440 memoryThreadLabel: thread.Name, 441 memoryThreadIDLabel: strconv.Itoa(int(thread.ID)), 442 }) 443 if err != nil { 444 p.Log.Error(err) 445 } 446 } 447 } 448 449 stats.metrics[memoryUsedMetric].Set(float64(thread.Used)) 450 stats.metrics[memoryTotalMetric].Set(float64(thread.Total)) 451 stats.metrics[memoryFreeMetric].Set(float64(thread.Free)) 452 stats.metrics[memorySizeMetric].Set(float64(thread.Size)) 453 stats.metrics[memoryPagesMetric].Set(float64(thread.Pages)) 454 stats.metrics[memoryTrimmableMetric].Set(float64(thread.Trimmable)) 455 stats.metrics[memoryFreeChunksMetric].Set(float64(thread.FreeChunks)) 456 stats.metrics[memoryFreeFastbinBlksMetric].Set(float64(thread.FreeFastbinBlks)) 457 stats.metrics[memoryMaxTotalAlloc].Set(float64(thread.MaxTotalAlloc)) 458 } 459 } 460 } 461 462 if !p.skipped[nodeMetricsNamespace] { 463 // Update node counters 464 nodeCountersInfo, err := p.handler.GetNodeCounters(ctx) 465 if err != nil { 466 p.Log.Errorf("GetNodeCounters failed: %v", err) 467 } else { 468 p.tracef("node counters info: %+v", nodeCountersInfo) 469 for _, item := range nodeCountersInfo.GetCounters() { 470 stats, ok := p.nodeCounterStats[item.Name] 471 if !ok { 472 stats = &nodeCounterStats{ 473 itemName: item.Name, 474 metrics: map[string]prometheus.Gauge{}, 475 } 476 p.nodeCounterStats[item.Name] = stats 477 478 // add gauges with corresponding labels into vectors 479 for k, vec := range p.nodeCounterGaugeVecs { 480 stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ 481 nodeCounterItemLabel: item.Node, 482 nodeCounterReasonLabel: item.Name, 483 }) 484 if err != nil { 485 p.Log.Error(err) 486 } 487 } 488 } 489 490 stats.metrics[nodeCounterCounterMetric].Set(float64(item.Value)) 491 } 492 } 493 } 494 495 if !p.skipped[ifMetricsNamespace] { 496 // Update interface counters 497 ifStats, err := p.handler.GetInterfaceStats(ctx) 498 if err != nil { 499 p.Log.Errorf("GetInterfaceStats failed: %v", err) 500 return 501 } else { 502 p.tracef("interface stats: %+v", ifStats) 503 if ifStats == nil { 504 return 505 } 506 for _, item := range ifStats.Interfaces { 507 stats, ok := p.ifCounterStats[item.InterfaceName] 508 if !ok { 509 stats = &ifCounterStats{ 510 name: item.InterfaceName, 511 metrics: map[string]prometheus.Gauge{}, 512 } 513 p.ifCounterStats[item.InterfaceName] = stats 514 515 // add gauges with corresponding labels into vectors 516 for k, vec := range p.ifCounterGaugeVecs { 517 stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ 518 ifCounterNameLabel: item.InterfaceName, 519 ifCounterIndexLabel: fmt.Sprint(item.InterfaceIndex), 520 }) 521 if err != nil { 522 p.Log.Error(err) 523 } 524 } 525 } 526 527 stats.metrics[ifCounterRxPackets].Set(float64(item.Rx.Packets)) 528 stats.metrics[ifCounterRxBytes].Set(float64(item.Rx.Bytes)) 529 stats.metrics[ifCounterRxErrors].Set(float64(item.RxErrors)) 530 stats.metrics[ifCounterTxPackets].Set(float64(item.Tx.Packets)) 531 stats.metrics[ifCounterTxBytes].Set(float64(item.Tx.Bytes)) 532 stats.metrics[ifCounterTxErrors].Set(float64(item.TxErrors)) 533 stats.metrics[ifCounterDrops].Set(float64(item.Drops)) 534 stats.metrics[ifCounterPunts].Set(float64(item.Punts)) 535 stats.metrics[ifCounterIP4].Set(float64(item.IP4)) 536 stats.metrics[ifCounterIP6].Set(float64(item.IP6)) 537 stats.metrics[ifCounterRxNoBuf].Set(float64(item.RxNoBuf)) 538 stats.metrics[ifCounterRxMiss].Set(float64(item.RxMiss)) 539 } 540 } 541 } 542 543 p.tracef("update complete") 544 }