github.com/google/cadvisor@v0.49.1/metrics/prometheus.go (about) 1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metrics 16 17 import ( 18 "fmt" 19 "regexp" 20 "strconv" 21 "time" 22 23 "github.com/google/cadvisor/container" 24 info "github.com/google/cadvisor/info/v1" 25 v2 "github.com/google/cadvisor/info/v2" 26 27 "github.com/prometheus/client_golang/prometheus" 28 29 "k8s.io/klog/v2" 30 "k8s.io/utils/clock" 31 ) 32 33 // asFloat64 converts a uint64 into a float64. 34 func asFloat64(v uint64) float64 { return float64(v) } 35 36 // asNanosecondsToSeconds converts nanoseconds into a float64 representing seconds. 37 func asNanosecondsToSeconds(v uint64) float64 { 38 return float64(v) / float64(time.Second) 39 } 40 41 // fsValues is a helper method for assembling per-filesystem stats. 42 func fsValues(fsStats []info.FsStats, valueFn func(*info.FsStats) float64, timestamp time.Time) metricValues { 43 values := make(metricValues, 0, len(fsStats)) 44 for _, stat := range fsStats { 45 values = append(values, metricValue{ 46 value: valueFn(&stat), 47 labels: []string{stat.Device}, 48 timestamp: timestamp, 49 }) 50 } 51 return values 52 } 53 54 // ioValues is a helper method for assembling per-disk and per-filesystem stats. 55 func ioValues(ioStats []info.PerDiskStats, ioType string, ioValueFn func(uint64) float64, 56 fsStats []info.FsStats, valueFn func(*info.FsStats) float64, timestamp time.Time) metricValues { 57 58 values := make(metricValues, 0, len(ioStats)+len(fsStats)) 59 for _, stat := range ioStats { 60 values = append(values, metricValue{ 61 value: ioValueFn(stat.Stats[ioType]), 62 labels: []string{stat.Device}, 63 timestamp: timestamp, 64 }) 65 } 66 for _, stat := range fsStats { 67 values = append(values, metricValue{ 68 value: valueFn(&stat), 69 labels: []string{stat.Device}, 70 timestamp: timestamp, 71 }) 72 } 73 return values 74 } 75 76 // containerMetric describes a multi-dimensional metric used for exposing a 77 // certain type of container statistic. 78 type containerMetric struct { 79 name string 80 help string 81 valueType prometheus.ValueType 82 extraLabels []string 83 condition func(s info.ContainerSpec) bool 84 getValues func(s *info.ContainerStats) metricValues 85 } 86 87 func (cm *containerMetric) desc(baseLabels []string) *prometheus.Desc { 88 return prometheus.NewDesc(cm.name, cm.help, append(baseLabels, cm.extraLabels...), nil) 89 } 90 91 // ContainerLabelsFunc defines all base labels and their values attached to 92 // each metric exported by cAdvisor. 93 type ContainerLabelsFunc func(*info.ContainerInfo) map[string]string 94 95 // PrometheusCollector implements prometheus.Collector. 96 type PrometheusCollector struct { 97 infoProvider infoProvider 98 errors prometheus.Gauge 99 containerMetrics []containerMetric 100 containerLabelsFunc ContainerLabelsFunc 101 includedMetrics container.MetricSet 102 opts v2.RequestOptions 103 } 104 105 // NewPrometheusCollector returns a new PrometheusCollector. The passed 106 // ContainerLabelsFunc specifies which base labels will be attached to all 107 // exported metrics. If left to nil, the DefaultContainerLabels function 108 // will be used instead. 109 func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet, now clock.Clock, opts v2.RequestOptions) *PrometheusCollector { 110 if f == nil { 111 f = DefaultContainerLabels 112 } 113 c := &PrometheusCollector{ 114 infoProvider: i, 115 containerLabelsFunc: f, 116 errors: prometheus.NewGauge(prometheus.GaugeOpts{ 117 Namespace: "container", 118 Name: "scrape_error", 119 Help: "1 if there was an error while getting container metrics, 0 otherwise", 120 }), 121 containerMetrics: []containerMetric{ 122 { 123 name: "container_last_seen", 124 help: "Last time a container was seen by the exporter", 125 valueType: prometheus.GaugeValue, 126 getValues: func(s *info.ContainerStats) metricValues { 127 return metricValues{{ 128 value: float64(now.Now().Unix()), 129 timestamp: now.Now(), 130 }} 131 }, 132 }, 133 }, 134 includedMetrics: includedMetrics, 135 opts: opts, 136 } 137 if includedMetrics.Has(container.CpuUsageMetrics) { 138 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 139 { 140 name: "container_cpu_user_seconds_total", 141 help: "Cumulative user cpu time consumed in seconds.", 142 valueType: prometheus.CounterValue, 143 getValues: func(s *info.ContainerStats) metricValues { 144 return metricValues{ 145 { 146 value: float64(s.Cpu.Usage.User) / float64(time.Second), 147 timestamp: s.Timestamp, 148 }, 149 } 150 }, 151 }, { 152 name: "container_cpu_system_seconds_total", 153 help: "Cumulative system cpu time consumed in seconds.", 154 valueType: prometheus.CounterValue, 155 getValues: func(s *info.ContainerStats) metricValues { 156 return metricValues{ 157 { 158 value: float64(s.Cpu.Usage.System) / float64(time.Second), 159 timestamp: s.Timestamp, 160 }, 161 } 162 }, 163 }, { 164 name: "container_cpu_usage_seconds_total", 165 help: "Cumulative cpu time consumed in seconds.", 166 valueType: prometheus.CounterValue, 167 extraLabels: []string{"cpu"}, 168 getValues: func(s *info.ContainerStats) metricValues { 169 if len(s.Cpu.Usage.PerCpu) == 0 { 170 if s.Cpu.Usage.Total > 0 { 171 return metricValues{{ 172 value: float64(s.Cpu.Usage.Total) / float64(time.Second), 173 labels: []string{"total"}, 174 timestamp: s.Timestamp, 175 }} 176 } 177 } 178 values := make(metricValues, 0, len(s.Cpu.Usage.PerCpu)) 179 for i, value := range s.Cpu.Usage.PerCpu { 180 if value > 0 { 181 values = append(values, metricValue{ 182 value: float64(value) / float64(time.Second), 183 labels: []string{fmt.Sprintf("cpu%02d", i)}, 184 timestamp: s.Timestamp, 185 }) 186 } 187 } 188 return values 189 }, 190 }, { 191 name: "container_cpu_cfs_periods_total", 192 help: "Number of elapsed enforcement period intervals.", 193 valueType: prometheus.CounterValue, 194 condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 }, 195 getValues: func(s *info.ContainerStats) metricValues { 196 return metricValues{ 197 { 198 value: float64(s.Cpu.CFS.Periods), 199 timestamp: s.Timestamp, 200 }} 201 }, 202 }, { 203 name: "container_cpu_cfs_throttled_periods_total", 204 help: "Number of throttled period intervals.", 205 valueType: prometheus.CounterValue, 206 condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 }, 207 getValues: func(s *info.ContainerStats) metricValues { 208 return metricValues{ 209 { 210 value: float64(s.Cpu.CFS.ThrottledPeriods), 211 timestamp: s.Timestamp, 212 }} 213 }, 214 }, { 215 name: "container_cpu_cfs_throttled_seconds_total", 216 help: "Total time duration the container has been throttled.", 217 valueType: prometheus.CounterValue, 218 condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 }, 219 getValues: func(s *info.ContainerStats) metricValues { 220 return metricValues{ 221 { 222 value: float64(s.Cpu.CFS.ThrottledTime) / float64(time.Second), 223 timestamp: s.Timestamp, 224 }} 225 }, 226 }, 227 }...) 228 } 229 if includedMetrics.Has(container.ProcessSchedulerMetrics) { 230 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 231 { 232 name: "container_cpu_schedstat_run_seconds_total", 233 help: "Time duration the processes of the container have run on the CPU.", 234 valueType: prometheus.CounterValue, 235 getValues: func(s *info.ContainerStats) metricValues { 236 return metricValues{{ 237 value: float64(s.Cpu.Schedstat.RunTime) / float64(time.Second), 238 timestamp: s.Timestamp, 239 }} 240 }, 241 }, { 242 name: "container_cpu_schedstat_runqueue_seconds_total", 243 help: "Time duration processes of the container have been waiting on a runqueue.", 244 valueType: prometheus.CounterValue, 245 getValues: func(s *info.ContainerStats) metricValues { 246 return metricValues{{ 247 value: float64(s.Cpu.Schedstat.RunqueueTime) / float64(time.Second), 248 timestamp: s.Timestamp, 249 }} 250 }, 251 }, { 252 name: "container_cpu_schedstat_run_periods_total", 253 help: "Number of times processes of the cgroup have run on the cpu", 254 valueType: prometheus.CounterValue, 255 getValues: func(s *info.ContainerStats) metricValues { 256 return metricValues{{ 257 value: float64(s.Cpu.Schedstat.RunPeriods), 258 timestamp: s.Timestamp, 259 }} 260 }, 261 }, 262 }...) 263 } 264 if includedMetrics.Has(container.CpuLoadMetrics) { 265 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 266 { 267 name: "container_cpu_load_average_10s", 268 help: "Value of container cpu load average over the last 10 seconds.", 269 valueType: prometheus.GaugeValue, 270 getValues: func(s *info.ContainerStats) metricValues { 271 return metricValues{{value: float64(s.Cpu.LoadAverage), timestamp: s.Timestamp}} 272 }, 273 }, { 274 name: "container_tasks_state", 275 help: "Number of tasks in given state", 276 extraLabels: []string{"state"}, 277 valueType: prometheus.GaugeValue, 278 getValues: func(s *info.ContainerStats) metricValues { 279 return metricValues{ 280 { 281 value: float64(s.TaskStats.NrSleeping), 282 labels: []string{"sleeping"}, 283 timestamp: s.Timestamp, 284 }, 285 { 286 value: float64(s.TaskStats.NrRunning), 287 labels: []string{"running"}, 288 timestamp: s.Timestamp, 289 }, 290 { 291 value: float64(s.TaskStats.NrStopped), 292 labels: []string{"stopped"}, 293 timestamp: s.Timestamp, 294 }, 295 { 296 value: float64(s.TaskStats.NrUninterruptible), 297 labels: []string{"uninterruptible"}, 298 timestamp: s.Timestamp, 299 }, 300 { 301 value: float64(s.TaskStats.NrIoWait), 302 labels: []string{"iowaiting"}, 303 timestamp: s.Timestamp, 304 }, 305 } 306 }, 307 }, 308 }...) 309 } 310 if includedMetrics.Has(container.HugetlbUsageMetrics) { 311 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 312 { 313 name: "container_hugetlb_failcnt", 314 help: "Number of hugepage usage hits limits", 315 valueType: prometheus.CounterValue, 316 extraLabels: []string{"pagesize"}, 317 getValues: func(s *info.ContainerStats) metricValues { 318 values := make(metricValues, 0, len(s.Hugetlb)) 319 for k, v := range s.Hugetlb { 320 values = append(values, metricValue{ 321 value: float64(v.Failcnt), 322 labels: []string{k}, 323 timestamp: s.Timestamp, 324 }) 325 } 326 return values 327 }, 328 }, { 329 name: "container_hugetlb_usage_bytes", 330 help: "Current hugepage usage in bytes", 331 valueType: prometheus.GaugeValue, 332 extraLabels: []string{"pagesize"}, 333 getValues: func(s *info.ContainerStats) metricValues { 334 values := make(metricValues, 0, len(s.Hugetlb)) 335 for k, v := range s.Hugetlb { 336 values = append(values, metricValue{ 337 value: float64(v.Usage), 338 labels: []string{k}, 339 timestamp: s.Timestamp, 340 }) 341 } 342 return values 343 }, 344 }, 345 { 346 name: "container_hugetlb_max_usage_bytes", 347 help: "Maximum hugepage usage recorded in bytes", 348 valueType: prometheus.GaugeValue, 349 extraLabels: []string{"pagesize"}, 350 getValues: func(s *info.ContainerStats) metricValues { 351 values := make(metricValues, 0, len(s.Hugetlb)) 352 for k, v := range s.Hugetlb { 353 values = append(values, metricValue{ 354 value: float64(v.MaxUsage), 355 labels: []string{k}, 356 timestamp: s.Timestamp, 357 }) 358 } 359 return values 360 }, 361 }, 362 }...) 363 } 364 if includedMetrics.Has(container.MemoryUsageMetrics) { 365 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 366 { 367 name: "container_memory_cache", 368 help: "Number of bytes of page cache memory.", 369 valueType: prometheus.GaugeValue, 370 getValues: func(s *info.ContainerStats) metricValues { 371 return metricValues{{value: float64(s.Memory.Cache), timestamp: s.Timestamp}} 372 }, 373 }, { 374 name: "container_memory_rss", 375 help: "Size of RSS in bytes.", 376 valueType: prometheus.GaugeValue, 377 getValues: func(s *info.ContainerStats) metricValues { 378 return metricValues{{value: float64(s.Memory.RSS), timestamp: s.Timestamp}} 379 }, 380 }, { 381 name: "container_memory_kernel_usage", 382 help: "Size of kernel memory allocated in bytes.", 383 valueType: prometheus.GaugeValue, 384 getValues: func(s *info.ContainerStats) metricValues { 385 return metricValues{{value: float64(s.Memory.KernelUsage), timestamp: s.Timestamp}} 386 }, 387 }, { 388 name: "container_memory_mapped_file", 389 help: "Size of memory mapped files in bytes.", 390 valueType: prometheus.GaugeValue, 391 getValues: func(s *info.ContainerStats) metricValues { 392 return metricValues{{value: float64(s.Memory.MappedFile), timestamp: s.Timestamp}} 393 }, 394 }, { 395 name: "container_memory_swap", 396 help: "Container swap usage in bytes.", 397 valueType: prometheus.GaugeValue, 398 getValues: func(s *info.ContainerStats) metricValues { 399 return metricValues{{value: float64(s.Memory.Swap), timestamp: s.Timestamp}} 400 }, 401 }, { 402 name: "container_memory_failcnt", 403 help: "Number of memory usage hits limits", 404 valueType: prometheus.CounterValue, 405 getValues: func(s *info.ContainerStats) metricValues { 406 return metricValues{{ 407 value: float64(s.Memory.Failcnt), 408 timestamp: s.Timestamp, 409 }} 410 }, 411 }, { 412 name: "container_memory_usage_bytes", 413 help: "Current memory usage in bytes, including all memory regardless of when it was accessed", 414 valueType: prometheus.GaugeValue, 415 getValues: func(s *info.ContainerStats) metricValues { 416 return metricValues{{value: float64(s.Memory.Usage), timestamp: s.Timestamp}} 417 }, 418 }, 419 { 420 name: "container_memory_max_usage_bytes", 421 help: "Maximum memory usage recorded in bytes", 422 valueType: prometheus.GaugeValue, 423 getValues: func(s *info.ContainerStats) metricValues { 424 return metricValues{{value: float64(s.Memory.MaxUsage), timestamp: s.Timestamp}} 425 }, 426 }, { 427 name: "container_memory_working_set_bytes", 428 help: "Current working set in bytes.", 429 valueType: prometheus.GaugeValue, 430 getValues: func(s *info.ContainerStats) metricValues { 431 return metricValues{{value: float64(s.Memory.WorkingSet), timestamp: s.Timestamp}} 432 }, 433 }, 434 { 435 name: "container_memory_failures_total", 436 help: "Cumulative count of memory allocation failures.", 437 valueType: prometheus.CounterValue, 438 extraLabels: []string{"failure_type", "scope"}, 439 getValues: func(s *info.ContainerStats) metricValues { 440 return metricValues{ 441 { 442 value: float64(s.Memory.ContainerData.Pgfault), 443 labels: []string{"pgfault", "container"}, 444 timestamp: s.Timestamp, 445 }, 446 { 447 value: float64(s.Memory.ContainerData.Pgmajfault), 448 labels: []string{"pgmajfault", "container"}, 449 timestamp: s.Timestamp, 450 }, 451 { 452 value: float64(s.Memory.HierarchicalData.Pgfault), 453 labels: []string{"pgfault", "hierarchy"}, 454 timestamp: s.Timestamp, 455 }, 456 { 457 value: float64(s.Memory.HierarchicalData.Pgmajfault), 458 labels: []string{"pgmajfault", "hierarchy"}, 459 timestamp: s.Timestamp, 460 }, 461 } 462 }, 463 }, 464 }...) 465 } 466 if includedMetrics.Has(container.CPUSetMetrics) { 467 c.containerMetrics = append(c.containerMetrics, containerMetric{ 468 name: "container_memory_migrate", 469 help: "Memory migrate status.", 470 valueType: prometheus.GaugeValue, 471 getValues: func(s *info.ContainerStats) metricValues { 472 return metricValues{{value: float64(s.CpuSet.MemoryMigrate), timestamp: s.Timestamp}} 473 }, 474 }) 475 } 476 if includedMetrics.Has(container.MemoryNumaMetrics) { 477 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 478 { 479 name: "container_memory_numa_pages", 480 help: "Number of used pages per NUMA node", 481 valueType: prometheus.GaugeValue, 482 extraLabels: []string{"type", "scope", "node"}, 483 getValues: func(s *info.ContainerStats) metricValues { 484 values := make(metricValues, 0) 485 values = append(values, getNumaStatsPerNode(s.Memory.ContainerData.NumaStats.File, 486 []string{"file", "container"}, s.Timestamp)...) 487 values = append(values, getNumaStatsPerNode(s.Memory.ContainerData.NumaStats.Anon, 488 []string{"anon", "container"}, s.Timestamp)...) 489 values = append(values, getNumaStatsPerNode(s.Memory.ContainerData.NumaStats.Unevictable, 490 []string{"unevictable", "container"}, s.Timestamp)...) 491 492 values = append(values, getNumaStatsPerNode(s.Memory.HierarchicalData.NumaStats.File, 493 []string{"file", "hierarchy"}, s.Timestamp)...) 494 values = append(values, getNumaStatsPerNode(s.Memory.HierarchicalData.NumaStats.Anon, 495 []string{"anon", "hierarchy"}, s.Timestamp)...) 496 values = append(values, getNumaStatsPerNode(s.Memory.HierarchicalData.NumaStats.Unevictable, 497 []string{"unevictable", "hierarchy"}, s.Timestamp)...) 498 return values 499 }, 500 }, 501 }...) 502 } 503 if includedMetrics.Has(container.DiskUsageMetrics) { 504 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 505 { 506 name: "container_fs_inodes_free", 507 help: "Number of available Inodes", 508 valueType: prometheus.GaugeValue, 509 extraLabels: []string{"device"}, 510 getValues: func(s *info.ContainerStats) metricValues { 511 return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { 512 return float64(fs.InodesFree) 513 }, s.Timestamp) 514 }, 515 }, { 516 name: "container_fs_inodes_total", 517 help: "Number of Inodes", 518 valueType: prometheus.GaugeValue, 519 extraLabels: []string{"device"}, 520 getValues: func(s *info.ContainerStats) metricValues { 521 return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { 522 return float64(fs.Inodes) 523 }, s.Timestamp) 524 }, 525 }, { 526 name: "container_fs_limit_bytes", 527 help: "Number of bytes that can be consumed by the container on this filesystem.", 528 valueType: prometheus.GaugeValue, 529 extraLabels: []string{"device"}, 530 getValues: func(s *info.ContainerStats) metricValues { 531 return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { 532 return float64(fs.Limit) 533 }, s.Timestamp) 534 }, 535 }, { 536 name: "container_fs_usage_bytes", 537 help: "Number of bytes that are consumed by the container on this filesystem.", 538 valueType: prometheus.GaugeValue, 539 extraLabels: []string{"device"}, 540 getValues: func(s *info.ContainerStats) metricValues { 541 return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { 542 return float64(fs.Usage) 543 }, s.Timestamp) 544 }, 545 }, 546 }...) 547 } 548 if includedMetrics.Has(container.DiskIOMetrics) { 549 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 550 { 551 name: "container_fs_reads_bytes_total", 552 help: "Cumulative count of bytes read", 553 valueType: prometheus.CounterValue, 554 extraLabels: []string{"device"}, 555 getValues: func(s *info.ContainerStats) metricValues { 556 return ioValues( 557 s.DiskIo.IoServiceBytes, "Read", asFloat64, 558 nil, nil, 559 s.Timestamp, 560 ) 561 }, 562 }, { 563 name: "container_fs_reads_total", 564 help: "Cumulative count of reads completed", 565 valueType: prometheus.CounterValue, 566 extraLabels: []string{"device"}, 567 getValues: func(s *info.ContainerStats) metricValues { 568 return ioValues( 569 s.DiskIo.IoServiced, "Read", asFloat64, 570 s.Filesystem, func(fs *info.FsStats) float64 { 571 return float64(fs.ReadsCompleted) 572 }, 573 s.Timestamp, 574 ) 575 }, 576 }, { 577 name: "container_fs_sector_reads_total", 578 help: "Cumulative count of sector reads completed", 579 valueType: prometheus.CounterValue, 580 extraLabels: []string{"device"}, 581 getValues: func(s *info.ContainerStats) metricValues { 582 return ioValues( 583 s.DiskIo.Sectors, "Read", asFloat64, 584 s.Filesystem, func(fs *info.FsStats) float64 { 585 return float64(fs.SectorsRead) 586 }, 587 s.Timestamp, 588 ) 589 }, 590 }, { 591 name: "container_fs_reads_merged_total", 592 help: "Cumulative count of reads merged", 593 valueType: prometheus.CounterValue, 594 extraLabels: []string{"device"}, 595 getValues: func(s *info.ContainerStats) metricValues { 596 return ioValues( 597 s.DiskIo.IoMerged, "Read", asFloat64, 598 s.Filesystem, func(fs *info.FsStats) float64 { 599 return float64(fs.ReadsMerged) 600 }, 601 s.Timestamp, 602 ) 603 }, 604 }, { 605 name: "container_fs_read_seconds_total", 606 help: "Cumulative count of seconds spent reading", 607 valueType: prometheus.CounterValue, 608 extraLabels: []string{"device"}, 609 getValues: func(s *info.ContainerStats) metricValues { 610 return ioValues( 611 s.DiskIo.IoServiceTime, "Read", asNanosecondsToSeconds, 612 s.Filesystem, func(fs *info.FsStats) float64 { 613 return float64(fs.ReadTime) / float64(time.Second) 614 }, 615 s.Timestamp, 616 ) 617 }, 618 }, { 619 name: "container_fs_writes_bytes_total", 620 help: "Cumulative count of bytes written", 621 valueType: prometheus.CounterValue, 622 extraLabels: []string{"device"}, 623 getValues: func(s *info.ContainerStats) metricValues { 624 return ioValues( 625 s.DiskIo.IoServiceBytes, "Write", asFloat64, 626 nil, nil, 627 s.Timestamp, 628 ) 629 }, 630 }, { 631 name: "container_fs_writes_total", 632 help: "Cumulative count of writes completed", 633 valueType: prometheus.CounterValue, 634 extraLabels: []string{"device"}, 635 getValues: func(s *info.ContainerStats) metricValues { 636 return ioValues( 637 s.DiskIo.IoServiced, "Write", asFloat64, 638 s.Filesystem, func(fs *info.FsStats) float64 { 639 return float64(fs.WritesCompleted) 640 }, 641 s.Timestamp, 642 ) 643 }, 644 }, { 645 name: "container_fs_sector_writes_total", 646 help: "Cumulative count of sector writes completed", 647 valueType: prometheus.CounterValue, 648 extraLabels: []string{"device"}, 649 getValues: func(s *info.ContainerStats) metricValues { 650 return ioValues( 651 s.DiskIo.Sectors, "Write", asFloat64, 652 s.Filesystem, func(fs *info.FsStats) float64 { 653 return float64(fs.SectorsWritten) 654 }, 655 s.Timestamp, 656 ) 657 }, 658 }, { 659 name: "container_fs_writes_merged_total", 660 help: "Cumulative count of writes merged", 661 valueType: prometheus.CounterValue, 662 extraLabels: []string{"device"}, 663 getValues: func(s *info.ContainerStats) metricValues { 664 return ioValues( 665 s.DiskIo.IoMerged, "Write", asFloat64, 666 s.Filesystem, func(fs *info.FsStats) float64 { 667 return float64(fs.WritesMerged) 668 }, 669 s.Timestamp, 670 ) 671 }, 672 }, { 673 name: "container_fs_write_seconds_total", 674 help: "Cumulative count of seconds spent writing", 675 valueType: prometheus.CounterValue, 676 extraLabels: []string{"device"}, 677 getValues: func(s *info.ContainerStats) metricValues { 678 return ioValues( 679 s.DiskIo.IoServiceTime, "Write", asNanosecondsToSeconds, 680 s.Filesystem, func(fs *info.FsStats) float64 { 681 return float64(fs.WriteTime) / float64(time.Second) 682 }, 683 s.Timestamp, 684 ) 685 }, 686 }, { 687 name: "container_fs_io_current", 688 help: "Number of I/Os currently in progress", 689 valueType: prometheus.GaugeValue, 690 extraLabels: []string{"device"}, 691 getValues: func(s *info.ContainerStats) metricValues { 692 return ioValues( 693 s.DiskIo.IoQueued, "Total", asFloat64, 694 s.Filesystem, func(fs *info.FsStats) float64 { 695 return float64(fs.IoInProgress) 696 }, 697 s.Timestamp, 698 ) 699 }, 700 }, { 701 name: "container_fs_io_time_seconds_total", 702 help: "Cumulative count of seconds spent doing I/Os", 703 valueType: prometheus.CounterValue, 704 extraLabels: []string{"device"}, 705 getValues: func(s *info.ContainerStats) metricValues { 706 return ioValues( 707 s.DiskIo.IoServiceTime, "Total", asNanosecondsToSeconds, 708 s.Filesystem, func(fs *info.FsStats) float64 { 709 return float64(float64(fs.IoTime) / float64(time.Second)) 710 }, 711 s.Timestamp, 712 ) 713 }, 714 }, { 715 name: "container_fs_io_time_weighted_seconds_total", 716 help: "Cumulative weighted I/O time in seconds", 717 valueType: prometheus.CounterValue, 718 extraLabels: []string{"device"}, 719 getValues: func(s *info.ContainerStats) metricValues { 720 return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { 721 return float64(fs.WeightedIoTime) / float64(time.Second) 722 }, s.Timestamp) 723 }, 724 }, 725 { 726 name: "container_blkio_device_usage_total", 727 help: "Blkio Device bytes usage", 728 valueType: prometheus.CounterValue, 729 extraLabels: []string{"device", "major", "minor", "operation"}, 730 getValues: func(s *info.ContainerStats) metricValues { 731 var values metricValues 732 for _, diskStat := range s.DiskIo.IoServiceBytes { 733 for operation, value := range diskStat.Stats { 734 values = append(values, metricValue{ 735 value: float64(value), 736 labels: []string{diskStat.Device, 737 strconv.Itoa(int(diskStat.Major)), 738 strconv.Itoa(int(diskStat.Minor)), 739 operation}, 740 timestamp: s.Timestamp, 741 }) 742 } 743 } 744 return values 745 }, 746 }, 747 }...) 748 } 749 if includedMetrics.Has(container.NetworkUsageMetrics) { 750 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 751 { 752 name: "container_network_receive_bytes_total", 753 help: "Cumulative count of bytes received", 754 valueType: prometheus.CounterValue, 755 extraLabels: []string{"interface"}, 756 getValues: func(s *info.ContainerStats) metricValues { 757 values := make(metricValues, 0, len(s.Network.Interfaces)) 758 for _, value := range s.Network.Interfaces { 759 values = append(values, metricValue{ 760 value: float64(value.RxBytes), 761 labels: []string{value.Name}, 762 timestamp: s.Timestamp, 763 }) 764 } 765 return values 766 }, 767 }, { 768 name: "container_network_receive_packets_total", 769 help: "Cumulative count of packets received", 770 valueType: prometheus.CounterValue, 771 extraLabels: []string{"interface"}, 772 getValues: func(s *info.ContainerStats) metricValues { 773 values := make(metricValues, 0, len(s.Network.Interfaces)) 774 for _, value := range s.Network.Interfaces { 775 values = append(values, metricValue{ 776 value: float64(value.RxPackets), 777 labels: []string{value.Name}, 778 timestamp: s.Timestamp, 779 }) 780 } 781 return values 782 }, 783 }, { 784 name: "container_network_receive_packets_dropped_total", 785 help: "Cumulative count of packets dropped while receiving", 786 valueType: prometheus.CounterValue, 787 extraLabels: []string{"interface"}, 788 getValues: func(s *info.ContainerStats) metricValues { 789 values := make(metricValues, 0, len(s.Network.Interfaces)) 790 for _, value := range s.Network.Interfaces { 791 values = append(values, metricValue{ 792 value: float64(value.RxDropped), 793 labels: []string{value.Name}, 794 timestamp: s.Timestamp, 795 }) 796 } 797 return values 798 }, 799 }, { 800 name: "container_network_receive_errors_total", 801 help: "Cumulative count of errors encountered while receiving", 802 valueType: prometheus.CounterValue, 803 extraLabels: []string{"interface"}, 804 getValues: func(s *info.ContainerStats) metricValues { 805 values := make(metricValues, 0, len(s.Network.Interfaces)) 806 for _, value := range s.Network.Interfaces { 807 values = append(values, metricValue{ 808 value: float64(value.RxErrors), 809 labels: []string{value.Name}, 810 timestamp: s.Timestamp, 811 }) 812 } 813 return values 814 }, 815 }, { 816 name: "container_network_transmit_bytes_total", 817 help: "Cumulative count of bytes transmitted", 818 valueType: prometheus.CounterValue, 819 extraLabels: []string{"interface"}, 820 getValues: func(s *info.ContainerStats) metricValues { 821 values := make(metricValues, 0, len(s.Network.Interfaces)) 822 for _, value := range s.Network.Interfaces { 823 values = append(values, metricValue{ 824 value: float64(value.TxBytes), 825 labels: []string{value.Name}, 826 timestamp: s.Timestamp, 827 }) 828 } 829 return values 830 }, 831 }, { 832 name: "container_network_transmit_packets_total", 833 help: "Cumulative count of packets transmitted", 834 valueType: prometheus.CounterValue, 835 extraLabels: []string{"interface"}, 836 getValues: func(s *info.ContainerStats) metricValues { 837 values := make(metricValues, 0, len(s.Network.Interfaces)) 838 for _, value := range s.Network.Interfaces { 839 values = append(values, metricValue{ 840 value: float64(value.TxPackets), 841 labels: []string{value.Name}, 842 timestamp: s.Timestamp, 843 }) 844 } 845 return values 846 }, 847 }, { 848 name: "container_network_transmit_packets_dropped_total", 849 help: "Cumulative count of packets dropped while transmitting", 850 valueType: prometheus.CounterValue, 851 extraLabels: []string{"interface"}, 852 getValues: func(s *info.ContainerStats) metricValues { 853 values := make(metricValues, 0, len(s.Network.Interfaces)) 854 for _, value := range s.Network.Interfaces { 855 values = append(values, metricValue{ 856 value: float64(value.TxDropped), 857 labels: []string{value.Name}, 858 timestamp: s.Timestamp, 859 }) 860 } 861 return values 862 }, 863 }, { 864 name: "container_network_transmit_errors_total", 865 help: "Cumulative count of errors encountered while transmitting", 866 valueType: prometheus.CounterValue, 867 extraLabels: []string{"interface"}, 868 getValues: func(s *info.ContainerStats) metricValues { 869 values := make(metricValues, 0, len(s.Network.Interfaces)) 870 for _, value := range s.Network.Interfaces { 871 values = append(values, metricValue{ 872 value: float64(value.TxErrors), 873 labels: []string{value.Name}, 874 timestamp: s.Timestamp, 875 }) 876 } 877 return values 878 }, 879 }, 880 }...) 881 } 882 if includedMetrics.Has(container.NetworkTcpUsageMetrics) { 883 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 884 { 885 name: "container_network_tcp_usage_total", 886 help: "tcp connection usage statistic for container", 887 valueType: prometheus.GaugeValue, 888 extraLabels: []string{"tcp_state"}, 889 getValues: func(s *info.ContainerStats) metricValues { 890 return metricValues{ 891 { 892 value: float64(s.Network.Tcp.Established), 893 labels: []string{"established"}, 894 timestamp: s.Timestamp, 895 }, 896 { 897 value: float64(s.Network.Tcp.SynSent), 898 labels: []string{"synsent"}, 899 timestamp: s.Timestamp, 900 }, 901 { 902 value: float64(s.Network.Tcp.SynRecv), 903 labels: []string{"synrecv"}, 904 timestamp: s.Timestamp, 905 }, 906 { 907 value: float64(s.Network.Tcp.FinWait1), 908 labels: []string{"finwait1"}, 909 timestamp: s.Timestamp, 910 }, 911 { 912 value: float64(s.Network.Tcp.FinWait2), 913 labels: []string{"finwait2"}, 914 timestamp: s.Timestamp, 915 }, 916 { 917 value: float64(s.Network.Tcp.TimeWait), 918 labels: []string{"timewait"}, 919 timestamp: s.Timestamp, 920 }, 921 { 922 value: float64(s.Network.Tcp.Close), 923 labels: []string{"close"}, 924 timestamp: s.Timestamp, 925 }, 926 { 927 value: float64(s.Network.Tcp.CloseWait), 928 labels: []string{"closewait"}, 929 timestamp: s.Timestamp, 930 }, 931 { 932 value: float64(s.Network.Tcp.LastAck), 933 labels: []string{"lastack"}, 934 timestamp: s.Timestamp, 935 }, 936 { 937 value: float64(s.Network.Tcp.Listen), 938 labels: []string{"listen"}, 939 timestamp: s.Timestamp, 940 }, 941 { 942 value: float64(s.Network.Tcp.Closing), 943 labels: []string{"closing"}, 944 timestamp: s.Timestamp, 945 }, 946 } 947 }, 948 }, 949 }...) 950 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 951 { 952 name: "container_network_tcp6_usage_total", 953 help: "tcp6 connection usage statistic for container", 954 valueType: prometheus.GaugeValue, 955 extraLabels: []string{"tcp_state"}, 956 getValues: func(s *info.ContainerStats) metricValues { 957 return metricValues{ 958 { 959 value: float64(s.Network.Tcp6.Established), 960 labels: []string{"established"}, 961 timestamp: s.Timestamp, 962 }, 963 { 964 value: float64(s.Network.Tcp6.SynSent), 965 labels: []string{"synsent"}, 966 timestamp: s.Timestamp, 967 }, 968 { 969 value: float64(s.Network.Tcp6.SynRecv), 970 labels: []string{"synrecv"}, 971 timestamp: s.Timestamp, 972 }, 973 { 974 value: float64(s.Network.Tcp6.FinWait1), 975 labels: []string{"finwait1"}, 976 timestamp: s.Timestamp, 977 }, 978 { 979 value: float64(s.Network.Tcp6.FinWait2), 980 labels: []string{"finwait2"}, 981 timestamp: s.Timestamp, 982 }, 983 { 984 value: float64(s.Network.Tcp6.TimeWait), 985 labels: []string{"timewait"}, 986 timestamp: s.Timestamp, 987 }, 988 { 989 value: float64(s.Network.Tcp6.Close), 990 labels: []string{"close"}, 991 timestamp: s.Timestamp, 992 }, 993 { 994 value: float64(s.Network.Tcp6.CloseWait), 995 labels: []string{"closewait"}, 996 timestamp: s.Timestamp, 997 }, 998 { 999 value: float64(s.Network.Tcp6.LastAck), 1000 labels: []string{"lastack"}, 1001 timestamp: s.Timestamp, 1002 }, 1003 { 1004 value: float64(s.Network.Tcp6.Listen), 1005 labels: []string{"listen"}, 1006 timestamp: s.Timestamp, 1007 }, 1008 { 1009 value: float64(s.Network.Tcp6.Closing), 1010 labels: []string{"closing"}, 1011 timestamp: s.Timestamp, 1012 }, 1013 } 1014 }, 1015 }, 1016 }...) 1017 } 1018 if includedMetrics.Has(container.NetworkAdvancedTcpUsageMetrics) { 1019 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1020 { 1021 name: "container_network_advance_tcp_stats_total", 1022 help: "advance tcp connections statistic for container", 1023 valueType: prometheus.GaugeValue, 1024 extraLabels: []string{"tcp_state"}, 1025 getValues: func(s *info.ContainerStats) metricValues { 1026 return metricValues{ 1027 { 1028 value: float64(s.Network.TcpAdvanced.RtoAlgorithm), 1029 labels: []string{"rtoalgorithm"}, 1030 timestamp: s.Timestamp, 1031 }, { 1032 value: float64(s.Network.TcpAdvanced.RtoMin), 1033 labels: []string{"rtomin"}, 1034 timestamp: s.Timestamp, 1035 }, { 1036 value: float64(s.Network.TcpAdvanced.RtoMax), 1037 labels: []string{"rtomax"}, 1038 timestamp: s.Timestamp, 1039 }, { 1040 value: float64(s.Network.TcpAdvanced.MaxConn), 1041 labels: []string{"maxconn"}, 1042 timestamp: s.Timestamp, 1043 }, { 1044 value: float64(s.Network.TcpAdvanced.ActiveOpens), 1045 labels: []string{"activeopens"}, 1046 timestamp: s.Timestamp, 1047 }, { 1048 value: float64(s.Network.TcpAdvanced.PassiveOpens), 1049 labels: []string{"passiveopens"}, 1050 timestamp: s.Timestamp, 1051 }, { 1052 value: float64(s.Network.TcpAdvanced.AttemptFails), 1053 labels: []string{"attemptfails"}, 1054 timestamp: s.Timestamp, 1055 }, { 1056 value: float64(s.Network.TcpAdvanced.EstabResets), 1057 labels: []string{"estabresets"}, 1058 timestamp: s.Timestamp, 1059 }, { 1060 value: float64(s.Network.TcpAdvanced.CurrEstab), 1061 labels: []string{"currestab"}, 1062 timestamp: s.Timestamp, 1063 }, { 1064 value: float64(s.Network.TcpAdvanced.InSegs), 1065 labels: []string{"insegs"}, 1066 timestamp: s.Timestamp, 1067 }, { 1068 value: float64(s.Network.TcpAdvanced.OutSegs), 1069 labels: []string{"outsegs"}, 1070 timestamp: s.Timestamp, 1071 }, { 1072 value: float64(s.Network.TcpAdvanced.RetransSegs), 1073 labels: []string{"retranssegs"}, 1074 timestamp: s.Timestamp, 1075 }, { 1076 value: float64(s.Network.TcpAdvanced.InErrs), 1077 labels: []string{"inerrs"}, 1078 timestamp: s.Timestamp, 1079 }, { 1080 value: float64(s.Network.TcpAdvanced.OutRsts), 1081 labels: []string{"outrsts"}, 1082 timestamp: s.Timestamp, 1083 }, { 1084 value: float64(s.Network.TcpAdvanced.InCsumErrors), 1085 labels: []string{"incsumerrors"}, 1086 timestamp: s.Timestamp, 1087 }, { 1088 value: float64(s.Network.TcpAdvanced.EmbryonicRsts), 1089 labels: []string{"embryonicrsts"}, 1090 timestamp: s.Timestamp, 1091 }, { 1092 value: float64(s.Network.TcpAdvanced.SyncookiesSent), 1093 labels: []string{"syncookiessent"}, 1094 timestamp: s.Timestamp, 1095 }, { 1096 value: float64(s.Network.TcpAdvanced.SyncookiesRecv), 1097 labels: []string{"syncookiesrecv"}, 1098 timestamp: s.Timestamp, 1099 }, { 1100 value: float64(s.Network.TcpAdvanced.SyncookiesFailed), 1101 labels: []string{"syncookiesfailed"}, 1102 timestamp: s.Timestamp, 1103 }, { 1104 value: float64(s.Network.TcpAdvanced.PruneCalled), 1105 labels: []string{"prunecalled"}, 1106 timestamp: s.Timestamp, 1107 }, { 1108 value: float64(s.Network.TcpAdvanced.RcvPruned), 1109 labels: []string{"rcvpruned"}, 1110 timestamp: s.Timestamp, 1111 }, { 1112 value: float64(s.Network.TcpAdvanced.OfoPruned), 1113 labels: []string{"ofopruned"}, 1114 timestamp: s.Timestamp, 1115 }, { 1116 value: float64(s.Network.TcpAdvanced.OutOfWindowIcmps), 1117 labels: []string{"outofwindowicmps"}, 1118 timestamp: s.Timestamp, 1119 }, { 1120 value: float64(s.Network.TcpAdvanced.LockDroppedIcmps), 1121 labels: []string{"lockdroppedicmps"}, 1122 timestamp: s.Timestamp, 1123 }, { 1124 value: float64(s.Network.TcpAdvanced.TW), 1125 labels: []string{"tw"}, 1126 timestamp: s.Timestamp, 1127 }, { 1128 value: float64(s.Network.TcpAdvanced.TWRecycled), 1129 labels: []string{"twrecycled"}, 1130 timestamp: s.Timestamp, 1131 }, { 1132 value: float64(s.Network.TcpAdvanced.TWKilled), 1133 labels: []string{"twkilled"}, 1134 timestamp: s.Timestamp, 1135 }, { 1136 value: float64(s.Network.TcpAdvanced.TCPTimeWaitOverflow), 1137 labels: []string{"tcptimewaitoverflow"}, 1138 timestamp: s.Timestamp, 1139 }, { 1140 value: float64(s.Network.TcpAdvanced.TCPTimeouts), 1141 labels: []string{"tcptimeouts"}, 1142 timestamp: s.Timestamp, 1143 }, { 1144 value: float64(s.Network.TcpAdvanced.TCPSpuriousRTOs), 1145 labels: []string{"tcpspuriousrtos"}, 1146 timestamp: s.Timestamp, 1147 }, { 1148 value: float64(s.Network.TcpAdvanced.TCPLossProbes), 1149 labels: []string{"tcplossprobes"}, 1150 timestamp: s.Timestamp, 1151 }, { 1152 value: float64(s.Network.TcpAdvanced.TCPLossProbeRecovery), 1153 labels: []string{"tcplossproberecovery"}, 1154 timestamp: s.Timestamp, 1155 }, { 1156 value: float64(s.Network.TcpAdvanced.TCPRenoRecoveryFail), 1157 labels: []string{"tcprenorecoveryfail"}, 1158 timestamp: s.Timestamp, 1159 }, { 1160 value: float64(s.Network.TcpAdvanced.TCPSackRecoveryFail), 1161 labels: []string{"tcpsackrecoveryfail"}, 1162 timestamp: s.Timestamp, 1163 }, { 1164 value: float64(s.Network.TcpAdvanced.TCPRenoFailures), 1165 labels: []string{"tcprenofailures"}, 1166 timestamp: s.Timestamp, 1167 }, { 1168 value: float64(s.Network.TcpAdvanced.TCPSackFailures), 1169 labels: []string{"tcpsackfailures"}, 1170 timestamp: s.Timestamp, 1171 }, { 1172 value: float64(s.Network.TcpAdvanced.TCPLossFailures), 1173 labels: []string{"tcplossfailures"}, 1174 timestamp: s.Timestamp, 1175 }, { 1176 value: float64(s.Network.TcpAdvanced.DelayedACKs), 1177 labels: []string{"delayedacks"}, 1178 timestamp: s.Timestamp, 1179 }, { 1180 value: float64(s.Network.TcpAdvanced.DelayedACKLocked), 1181 labels: []string{"delayedacklocked"}, 1182 timestamp: s.Timestamp, 1183 }, { 1184 value: float64(s.Network.TcpAdvanced.DelayedACKLost), 1185 labels: []string{"delayedacklost"}, 1186 timestamp: s.Timestamp, 1187 }, { 1188 value: float64(s.Network.TcpAdvanced.ListenOverflows), 1189 labels: []string{"listenoverflows"}, 1190 timestamp: s.Timestamp, 1191 }, { 1192 value: float64(s.Network.TcpAdvanced.ListenDrops), 1193 labels: []string{"listendrops"}, 1194 timestamp: s.Timestamp, 1195 }, { 1196 value: float64(s.Network.TcpAdvanced.TCPHPHits), 1197 labels: []string{"tcphphits"}, 1198 timestamp: s.Timestamp, 1199 }, { 1200 value: float64(s.Network.TcpAdvanced.TCPPureAcks), 1201 labels: []string{"tcppureacks"}, 1202 timestamp: s.Timestamp, 1203 }, { 1204 value: float64(s.Network.TcpAdvanced.TCPHPAcks), 1205 labels: []string{"tcphpacks"}, 1206 timestamp: s.Timestamp, 1207 }, { 1208 value: float64(s.Network.TcpAdvanced.TCPRenoRecovery), 1209 labels: []string{"tcprenorecovery"}, 1210 timestamp: s.Timestamp, 1211 }, { 1212 value: float64(s.Network.TcpAdvanced.TCPSackRecovery), 1213 labels: []string{"tcpsackrecovery"}, 1214 timestamp: s.Timestamp, 1215 }, { 1216 value: float64(s.Network.TcpAdvanced.TCPSACKReneging), 1217 labels: []string{"tcpsackreneging"}, 1218 timestamp: s.Timestamp, 1219 }, { 1220 value: float64(s.Network.TcpAdvanced.TCPFACKReorder), 1221 labels: []string{"tcpfackreorder"}, 1222 timestamp: s.Timestamp, 1223 }, { 1224 value: float64(s.Network.TcpAdvanced.TCPSACKReorder), 1225 labels: []string{"tcpsackreorder"}, 1226 timestamp: s.Timestamp, 1227 }, { 1228 value: float64(s.Network.TcpAdvanced.TCPRenoReorder), 1229 labels: []string{"tcprenoreorder"}, 1230 timestamp: s.Timestamp, 1231 }, { 1232 value: float64(s.Network.TcpAdvanced.TCPTSReorder), 1233 labels: []string{"tcptsreorder"}, 1234 timestamp: s.Timestamp, 1235 }, { 1236 value: float64(s.Network.TcpAdvanced.TCPFullUndo), 1237 labels: []string{"tcpfullundo"}, 1238 timestamp: s.Timestamp, 1239 }, { 1240 value: float64(s.Network.TcpAdvanced.TCPPartialUndo), 1241 labels: []string{"tcppartialundo"}, 1242 timestamp: s.Timestamp, 1243 }, { 1244 value: float64(s.Network.TcpAdvanced.TCPDSACKUndo), 1245 labels: []string{"tcpdsackundo"}, 1246 timestamp: s.Timestamp, 1247 }, { 1248 value: float64(s.Network.TcpAdvanced.TCPLossUndo), 1249 labels: []string{"tcplossundo"}, 1250 timestamp: s.Timestamp, 1251 }, { 1252 value: float64(s.Network.TcpAdvanced.TCPFastRetrans), 1253 labels: []string{"tcpfastretrans"}, 1254 timestamp: s.Timestamp, 1255 }, { 1256 value: float64(s.Network.TcpAdvanced.TCPSlowStartRetrans), 1257 labels: []string{"tcpslowstartretrans"}, 1258 timestamp: s.Timestamp, 1259 }, { 1260 value: float64(s.Network.TcpAdvanced.TCPLostRetransmit), 1261 labels: []string{"tcplostretransmit"}, 1262 timestamp: s.Timestamp, 1263 }, { 1264 value: float64(s.Network.TcpAdvanced.TCPRetransFail), 1265 labels: []string{"tcpretransfail"}, 1266 timestamp: s.Timestamp, 1267 }, { 1268 value: float64(s.Network.TcpAdvanced.TCPRcvCollapsed), 1269 labels: []string{"tcprcvcollapsed"}, 1270 timestamp: s.Timestamp, 1271 }, { 1272 value: float64(s.Network.TcpAdvanced.TCPDSACKOldSent), 1273 labels: []string{"tcpdsackoldsent"}, 1274 timestamp: s.Timestamp, 1275 }, { 1276 value: float64(s.Network.TcpAdvanced.TCPDSACKOfoSent), 1277 labels: []string{"tcpdsackofosent"}, 1278 timestamp: s.Timestamp, 1279 }, { 1280 value: float64(s.Network.TcpAdvanced.TCPDSACKRecv), 1281 labels: []string{"tcpdsackrecv"}, 1282 timestamp: s.Timestamp, 1283 }, { 1284 value: float64(s.Network.TcpAdvanced.TCPDSACKOfoRecv), 1285 labels: []string{"tcpdsackoforecv"}, 1286 timestamp: s.Timestamp, 1287 }, { 1288 value: float64(s.Network.TcpAdvanced.TCPAbortOnData), 1289 labels: []string{"tcpabortondata"}, 1290 timestamp: s.Timestamp, 1291 }, { 1292 value: float64(s.Network.TcpAdvanced.TCPAbortOnClose), 1293 labels: []string{"tcpabortonclose"}, 1294 timestamp: s.Timestamp, 1295 }, { 1296 value: float64(s.Network.TcpAdvanced.TCPAbortOnMemory), 1297 labels: []string{"tcpabortonmemory"}, 1298 timestamp: s.Timestamp, 1299 }, { 1300 value: float64(s.Network.TcpAdvanced.TCPAbortOnTimeout), 1301 labels: []string{"tcpabortontimeout"}, 1302 timestamp: s.Timestamp, 1303 }, { 1304 value: float64(s.Network.TcpAdvanced.TCPAbortOnLinger), 1305 labels: []string{"tcpabortonlinger"}, 1306 timestamp: s.Timestamp, 1307 }, { 1308 value: float64(s.Network.TcpAdvanced.TCPAbortFailed), 1309 labels: []string{"tcpabortfailed"}, 1310 timestamp: s.Timestamp, 1311 }, { 1312 value: float64(s.Network.TcpAdvanced.TCPMemoryPressures), 1313 labels: []string{"tcpmemorypressures"}, 1314 timestamp: s.Timestamp, 1315 }, { 1316 value: float64(s.Network.TcpAdvanced.TCPMemoryPressuresChrono), 1317 labels: []string{"tcpmemorypressureschrono"}, 1318 timestamp: s.Timestamp, 1319 }, { 1320 value: float64(s.Network.TcpAdvanced.TCPSACKDiscard), 1321 labels: []string{"tcpsackdiscard"}, 1322 timestamp: s.Timestamp, 1323 }, { 1324 value: float64(s.Network.TcpAdvanced.TCPDSACKIgnoredOld), 1325 labels: []string{"tcpdsackignoredold"}, 1326 timestamp: s.Timestamp, 1327 }, { 1328 value: float64(s.Network.TcpAdvanced.TCPDSACKIgnoredNoUndo), 1329 labels: []string{"tcpdsackignorednoundo"}, 1330 timestamp: s.Timestamp, 1331 }, { 1332 value: float64(s.Network.TcpAdvanced.TCPMD5NotFound), 1333 labels: []string{"tcpmd5notfound"}, 1334 timestamp: s.Timestamp, 1335 }, { 1336 value: float64(s.Network.TcpAdvanced.TCPMD5Unexpected), 1337 labels: []string{"tcpmd5unexpected"}, 1338 timestamp: s.Timestamp, 1339 }, { 1340 value: float64(s.Network.TcpAdvanced.TCPMD5Failure), 1341 labels: []string{"tcpmd5failure"}, 1342 timestamp: s.Timestamp, 1343 }, { 1344 value: float64(s.Network.TcpAdvanced.TCPSackShifted), 1345 labels: []string{"tcpsackshifted"}, 1346 timestamp: s.Timestamp, 1347 }, { 1348 value: float64(s.Network.TcpAdvanced.TCPSackMerged), 1349 labels: []string{"tcpsackmerged"}, 1350 timestamp: s.Timestamp, 1351 }, { 1352 value: float64(s.Network.TcpAdvanced.TCPSackShiftFallback), 1353 labels: []string{"tcpsackshiftfallback"}, 1354 timestamp: s.Timestamp, 1355 }, { 1356 value: float64(s.Network.TcpAdvanced.TCPBacklogDrop), 1357 labels: []string{"tcpbacklogdrop"}, 1358 timestamp: s.Timestamp, 1359 }, { 1360 value: float64(s.Network.TcpAdvanced.PFMemallocDrop), 1361 labels: []string{"pfmemallocdrop"}, 1362 timestamp: s.Timestamp, 1363 }, { 1364 value: float64(s.Network.TcpAdvanced.TCPMinTTLDrop), 1365 labels: []string{"tcpminttldrop"}, 1366 timestamp: s.Timestamp, 1367 }, { 1368 value: float64(s.Network.TcpAdvanced.TCPDeferAcceptDrop), 1369 labels: []string{"tcpdeferacceptdrop"}, 1370 timestamp: s.Timestamp, 1371 }, { 1372 value: float64(s.Network.TcpAdvanced.IPReversePathFilter), 1373 labels: []string{"ipreversepathfilter"}, 1374 timestamp: s.Timestamp, 1375 }, { 1376 value: float64(s.Network.TcpAdvanced.TCPReqQFullDoCookies), 1377 labels: []string{"tcpreqqfulldocookies"}, 1378 timestamp: s.Timestamp, 1379 }, { 1380 value: float64(s.Network.TcpAdvanced.TCPReqQFullDrop), 1381 labels: []string{"tcpreqqfulldrop"}, 1382 timestamp: s.Timestamp, 1383 }, { 1384 value: float64(s.Network.TcpAdvanced.TCPFastOpenActive), 1385 labels: []string{"tcpfastopenactive"}, 1386 timestamp: s.Timestamp, 1387 }, { 1388 value: float64(s.Network.TcpAdvanced.TCPFastOpenActiveFail), 1389 labels: []string{"tcpfastopenactivefail"}, 1390 timestamp: s.Timestamp, 1391 }, { 1392 value: float64(s.Network.TcpAdvanced.TCPFastOpenPassive), 1393 labels: []string{"tcpfastopenpassive"}, 1394 timestamp: s.Timestamp, 1395 }, { 1396 value: float64(s.Network.TcpAdvanced.TCPFastOpenPassiveFail), 1397 labels: []string{"tcpfastopenpassivefail"}, 1398 timestamp: s.Timestamp, 1399 }, { 1400 value: float64(s.Network.TcpAdvanced.TCPFastOpenListenOverflow), 1401 labels: []string{"tcpfastopenlistenoverflow"}, 1402 timestamp: s.Timestamp, 1403 }, { 1404 value: float64(s.Network.TcpAdvanced.TCPFastOpenCookieReqd), 1405 labels: []string{"tcpfastopencookiereqd"}, 1406 timestamp: s.Timestamp, 1407 }, { 1408 value: float64(s.Network.TcpAdvanced.TCPSynRetrans), 1409 labels: []string{"tcpsynretrans"}, 1410 timestamp: s.Timestamp, 1411 }, { 1412 value: float64(s.Network.TcpAdvanced.TCPOrigDataSent), 1413 labels: []string{"tcporigdatasent"}, 1414 timestamp: s.Timestamp, 1415 }, { 1416 value: float64(s.Network.TcpAdvanced.PAWSActive), 1417 labels: []string{"pawsactive"}, 1418 timestamp: s.Timestamp, 1419 }, { 1420 value: float64(s.Network.TcpAdvanced.PAWSEstab), 1421 labels: []string{"pawsestab"}, 1422 timestamp: s.Timestamp, 1423 }, 1424 } 1425 }, 1426 }, 1427 }...) 1428 } 1429 if includedMetrics.Has(container.NetworkUdpUsageMetrics) { 1430 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1431 { 1432 name: "container_network_udp6_usage_total", 1433 help: "udp6 connection usage statistic for container", 1434 valueType: prometheus.GaugeValue, 1435 extraLabels: []string{"udp_state"}, 1436 getValues: func(s *info.ContainerStats) metricValues { 1437 return metricValues{ 1438 { 1439 value: float64(s.Network.Udp6.Listen), 1440 labels: []string{"listen"}, 1441 timestamp: s.Timestamp, 1442 }, 1443 { 1444 value: float64(s.Network.Udp6.Dropped), 1445 labels: []string{"dropped"}, 1446 timestamp: s.Timestamp, 1447 }, 1448 { 1449 value: float64(s.Network.Udp6.RxQueued), 1450 labels: []string{"rxqueued"}, 1451 timestamp: s.Timestamp, 1452 }, 1453 { 1454 value: float64(s.Network.Udp6.TxQueued), 1455 labels: []string{"txqueued"}, 1456 timestamp: s.Timestamp, 1457 }, 1458 } 1459 }, 1460 }, 1461 }...) 1462 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1463 { 1464 name: "container_network_udp_usage_total", 1465 help: "udp connection usage statistic for container", 1466 valueType: prometheus.GaugeValue, 1467 extraLabels: []string{"udp_state"}, 1468 getValues: func(s *info.ContainerStats) metricValues { 1469 return metricValues{ 1470 { 1471 value: float64(s.Network.Udp.Listen), 1472 labels: []string{"listen"}, 1473 timestamp: s.Timestamp, 1474 }, 1475 { 1476 value: float64(s.Network.Udp.Dropped), 1477 labels: []string{"dropped"}, 1478 timestamp: s.Timestamp, 1479 }, 1480 { 1481 value: float64(s.Network.Udp.RxQueued), 1482 labels: []string{"rxqueued"}, 1483 timestamp: s.Timestamp, 1484 }, 1485 { 1486 value: float64(s.Network.Udp.TxQueued), 1487 labels: []string{"txqueued"}, 1488 timestamp: s.Timestamp, 1489 }, 1490 } 1491 }, 1492 }, 1493 }...) 1494 } 1495 if includedMetrics.Has(container.ProcessMetrics) { 1496 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1497 { 1498 name: "container_processes", 1499 help: "Number of processes running inside the container.", 1500 valueType: prometheus.GaugeValue, 1501 getValues: func(s *info.ContainerStats) metricValues { 1502 return metricValues{{value: float64(s.Processes.ProcessCount), timestamp: s.Timestamp}} 1503 }, 1504 }, 1505 { 1506 name: "container_file_descriptors", 1507 help: "Number of open file descriptors for the container.", 1508 valueType: prometheus.GaugeValue, 1509 getValues: func(s *info.ContainerStats) metricValues { 1510 return metricValues{{value: float64(s.Processes.FdCount), timestamp: s.Timestamp}} 1511 }, 1512 }, 1513 { 1514 name: "container_sockets", 1515 help: "Number of open sockets for the container.", 1516 valueType: prometheus.GaugeValue, 1517 getValues: func(s *info.ContainerStats) metricValues { 1518 return metricValues{{value: float64(s.Processes.SocketCount), timestamp: s.Timestamp}} 1519 }, 1520 }, 1521 { 1522 name: "container_threads_max", 1523 help: "Maximum number of threads allowed inside the container, infinity if value is zero", 1524 valueType: prometheus.GaugeValue, 1525 getValues: func(s *info.ContainerStats) metricValues { 1526 return metricValues{ 1527 { 1528 value: float64(s.Processes.ThreadsMax), 1529 timestamp: s.Timestamp, 1530 }, 1531 } 1532 }, 1533 }, 1534 { 1535 name: "container_threads", 1536 help: "Number of threads running inside the container", 1537 valueType: prometheus.GaugeValue, 1538 getValues: func(s *info.ContainerStats) metricValues { 1539 return metricValues{ 1540 { 1541 value: float64(s.Processes.ThreadsCurrent), 1542 timestamp: s.Timestamp, 1543 }, 1544 } 1545 }, 1546 }, 1547 { 1548 name: "container_ulimits_soft", 1549 help: "Soft ulimit values for the container root process. Unlimited if -1, except priority and nice", 1550 valueType: prometheus.GaugeValue, 1551 extraLabels: []string{"ulimit"}, 1552 getValues: func(s *info.ContainerStats) metricValues { 1553 values := make(metricValues, 0, len(s.Processes.Ulimits)) 1554 for _, ulimit := range s.Processes.Ulimits { 1555 values = append(values, metricValue{ 1556 value: float64(ulimit.SoftLimit), 1557 labels: []string{ulimit.Name}, 1558 timestamp: s.Timestamp, 1559 }) 1560 } 1561 return values 1562 }, 1563 }, 1564 }...) 1565 } 1566 if includedMetrics.Has(container.PerfMetrics) { 1567 if includedMetrics.Has(container.PerCpuUsageMetrics) { 1568 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1569 { 1570 name: "container_perf_events_total", 1571 help: "Perf event metric.", 1572 valueType: prometheus.CounterValue, 1573 extraLabels: []string{"cpu", "event"}, 1574 getValues: func(s *info.ContainerStats) metricValues { 1575 return getPerCPUCorePerfEvents(s) 1576 }, 1577 }, 1578 { 1579 name: "container_perf_events_scaling_ratio", 1580 help: "Perf event metric scaling ratio.", 1581 valueType: prometheus.GaugeValue, 1582 extraLabels: []string{"cpu", "event"}, 1583 getValues: func(s *info.ContainerStats) metricValues { 1584 return getPerCPUCoreScalingRatio(s) 1585 }, 1586 }}...) 1587 } else { 1588 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1589 { 1590 name: "container_perf_events_total", 1591 help: "Perf event metric.", 1592 valueType: prometheus.CounterValue, 1593 extraLabels: []string{"cpu", "event"}, 1594 getValues: func(s *info.ContainerStats) metricValues { 1595 return getAggregatedCorePerfEvents(s) 1596 }, 1597 }, 1598 { 1599 name: "container_perf_events_scaling_ratio", 1600 help: "Perf event metric scaling ratio.", 1601 valueType: prometheus.GaugeValue, 1602 extraLabels: []string{"cpu", "event"}, 1603 getValues: func(s *info.ContainerStats) metricValues { 1604 return getMinCoreScalingRatio(s) 1605 }, 1606 }}...) 1607 } 1608 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1609 { 1610 name: "container_perf_uncore_events_total", 1611 help: "Perf uncore event metric.", 1612 valueType: prometheus.CounterValue, 1613 extraLabels: []string{"socket", "event", "pmu"}, 1614 getValues: func(s *info.ContainerStats) metricValues { 1615 values := make(metricValues, 0, len(s.PerfUncoreStats)) 1616 for _, metric := range s.PerfUncoreStats { 1617 values = append(values, metricValue{ 1618 value: float64(metric.Value), 1619 labels: []string{strconv.Itoa(metric.Socket), metric.Name, metric.PMU}, 1620 timestamp: s.Timestamp, 1621 }) 1622 } 1623 return values 1624 }, 1625 }, 1626 { 1627 name: "container_perf_uncore_events_scaling_ratio", 1628 help: "Perf uncore event metric scaling ratio.", 1629 valueType: prometheus.GaugeValue, 1630 extraLabels: []string{"socket", "event", "pmu"}, 1631 getValues: func(s *info.ContainerStats) metricValues { 1632 values := make(metricValues, 0, len(s.PerfUncoreStats)) 1633 for _, metric := range s.PerfUncoreStats { 1634 values = append(values, metricValue{ 1635 value: metric.ScalingRatio, 1636 labels: []string{strconv.Itoa(metric.Socket), metric.Name, metric.PMU}, 1637 timestamp: s.Timestamp, 1638 }) 1639 } 1640 return values 1641 }, 1642 }, 1643 }...) 1644 } 1645 if includedMetrics.Has(container.ReferencedMemoryMetrics) { 1646 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1647 { 1648 name: "container_referenced_bytes", 1649 help: "Container referenced bytes during last measurements cycle", 1650 valueType: prometheus.GaugeValue, 1651 getValues: func(s *info.ContainerStats) metricValues { 1652 return metricValues{{value: float64(s.ReferencedMemory), timestamp: s.Timestamp}} 1653 }, 1654 }, 1655 }...) 1656 } 1657 if includedMetrics.Has(container.ResctrlMetrics) { 1658 c.containerMetrics = append(c.containerMetrics, []containerMetric{ 1659 { 1660 name: "container_memory_bandwidth_bytes", 1661 help: "Total memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM).", 1662 valueType: prometheus.GaugeValue, 1663 extraLabels: []string{prometheusNodeLabelName}, 1664 getValues: func(s *info.ContainerStats) metricValues { 1665 numberOfNUMANodes := len(s.Resctrl.MemoryBandwidth) 1666 metrics := make(metricValues, numberOfNUMANodes) 1667 for numaNode, stats := range s.Resctrl.MemoryBandwidth { 1668 metrics[numaNode] = metricValue{ 1669 value: float64(stats.TotalBytes), 1670 timestamp: s.Timestamp, 1671 labels: []string{strconv.Itoa(numaNode)}, 1672 } 1673 } 1674 return metrics 1675 }, 1676 }, 1677 { 1678 name: "container_memory_bandwidth_local_bytes", 1679 help: "Local memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM).", 1680 valueType: prometheus.GaugeValue, 1681 extraLabels: []string{prometheusNodeLabelName}, 1682 getValues: func(s *info.ContainerStats) metricValues { 1683 numberOfNUMANodes := len(s.Resctrl.MemoryBandwidth) 1684 metrics := make(metricValues, numberOfNUMANodes) 1685 for numaNode, stats := range s.Resctrl.MemoryBandwidth { 1686 metrics[numaNode] = metricValue{ 1687 value: float64(stats.LocalBytes), 1688 timestamp: s.Timestamp, 1689 labels: []string{strconv.Itoa(numaNode)}, 1690 } 1691 } 1692 return metrics 1693 }, 1694 }, 1695 { 1696 name: "container_llc_occupancy_bytes", 1697 help: "Last level cache usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM).", 1698 valueType: prometheus.GaugeValue, 1699 extraLabels: []string{prometheusNodeLabelName}, 1700 getValues: func(s *info.ContainerStats) metricValues { 1701 numberOfNUMANodes := len(s.Resctrl.Cache) 1702 metrics := make(metricValues, numberOfNUMANodes) 1703 for numaNode, stats := range s.Resctrl.Cache { 1704 metrics[numaNode] = metricValue{ 1705 value: float64(stats.LLCOccupancy), 1706 timestamp: s.Timestamp, 1707 labels: []string{strconv.Itoa(numaNode)}, 1708 } 1709 } 1710 return metrics 1711 }, 1712 }, 1713 }...) 1714 } 1715 if includedMetrics.Has(container.OOMMetrics) { 1716 c.containerMetrics = append(c.containerMetrics, containerMetric{ 1717 name: "container_oom_events_total", 1718 help: "Count of out of memory events observed for the container", 1719 valueType: prometheus.CounterValue, 1720 getValues: func(s *info.ContainerStats) metricValues { 1721 return metricValues{{value: float64(s.OOMEvents), timestamp: s.Timestamp}} 1722 }, 1723 }) 1724 } 1725 1726 return c 1727 } 1728 1729 var ( 1730 versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil) 1731 startTimeDesc = prometheus.NewDesc("container_start_time_seconds", "Start time of the container since unix epoch in seconds.", nil, nil) 1732 cpuPeriodDesc = prometheus.NewDesc("container_spec_cpu_period", "CPU period of the container.", nil, nil) 1733 cpuQuotaDesc = prometheus.NewDesc("container_spec_cpu_quota", "CPU quota of the container.", nil, nil) 1734 cpuSharesDesc = prometheus.NewDesc("container_spec_cpu_shares", "CPU share of the container.", nil, nil) 1735 ) 1736 1737 // Describe describes all the metrics ever exported by cadvisor. It 1738 // implements prometheus.PrometheusCollector. 1739 func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) { 1740 c.errors.Describe(ch) 1741 for _, cm := range c.containerMetrics { 1742 ch <- cm.desc([]string{}) 1743 } 1744 ch <- startTimeDesc 1745 ch <- cpuPeriodDesc 1746 ch <- cpuQuotaDesc 1747 ch <- cpuSharesDesc 1748 ch <- versionInfoDesc 1749 } 1750 1751 // Collect fetches the stats from all containers and delivers them as 1752 // Prometheus metrics. It implements prometheus.PrometheusCollector. 1753 func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) { 1754 c.errors.Set(0) 1755 c.collectVersionInfo(ch) 1756 c.collectContainersInfo(ch) 1757 c.errors.Collect(ch) 1758 } 1759 1760 const ( 1761 // ContainerLabelPrefix is the prefix added to all container labels. 1762 ContainerLabelPrefix = "container_label_" 1763 // ContainerEnvPrefix is the prefix added to all env variable labels. 1764 ContainerEnvPrefix = "container_env_" 1765 // LabelID is the name of the id label. 1766 LabelID = "id" 1767 // LabelName is the name of the name label. 1768 LabelName = "name" 1769 // LabelImage is the name of the image label. 1770 LabelImage = "image" 1771 ) 1772 1773 // DefaultContainerLabels implements ContainerLabelsFunc. It exports the 1774 // container name, first alias, image name as well as all its env and label 1775 // values. 1776 func DefaultContainerLabels(container *info.ContainerInfo) map[string]string { 1777 set := map[string]string{LabelID: container.Name} 1778 if len(container.Aliases) > 0 { 1779 set[LabelName] = container.Aliases[0] 1780 } 1781 if image := container.Spec.Image; len(image) > 0 { 1782 set[LabelImage] = image 1783 } 1784 for k, v := range container.Spec.Labels { 1785 set[ContainerLabelPrefix+k] = v 1786 } 1787 for k, v := range container.Spec.Envs { 1788 set[ContainerEnvPrefix+k] = v 1789 } 1790 return set 1791 } 1792 1793 // BaseContainerLabels returns a ContainerLabelsFunc that exports the container 1794 // name, first alias, image name as well as all its white listed env and label values. 1795 func BaseContainerLabels(whiteList []string) func(container *info.ContainerInfo) map[string]string { 1796 whiteListMap := make(map[string]struct{}, len(whiteList)) 1797 for _, k := range whiteList { 1798 whiteListMap[k] = struct{}{} 1799 } 1800 1801 return func(container *info.ContainerInfo) map[string]string { 1802 set := map[string]string{LabelID: container.Name} 1803 if len(container.Aliases) > 0 { 1804 set[LabelName] = container.Aliases[0] 1805 } 1806 if image := container.Spec.Image; len(image) > 0 { 1807 set[LabelImage] = image 1808 } 1809 for k, v := range container.Spec.Labels { 1810 if _, ok := whiteListMap[k]; ok { 1811 set[ContainerLabelPrefix+k] = v 1812 } 1813 } 1814 for k, v := range container.Spec.Envs { 1815 set[ContainerEnvPrefix+k] = v 1816 } 1817 return set 1818 } 1819 } 1820 1821 func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) { 1822 containers, err := c.infoProvider.GetRequestedContainersInfo("/", c.opts) 1823 if err != nil { 1824 c.errors.Set(1) 1825 klog.Warningf("Couldn't get containers: %s", err) 1826 return 1827 } 1828 rawLabels := map[string]struct{}{} 1829 for _, container := range containers { 1830 for l := range c.containerLabelsFunc(container) { 1831 rawLabels[l] = struct{}{} 1832 } 1833 } 1834 1835 for _, cont := range containers { 1836 values := make([]string, 0, len(rawLabels)) 1837 labels := make([]string, 0, len(rawLabels)) 1838 containerLabels := c.containerLabelsFunc(cont) 1839 for l := range rawLabels { 1840 duplicate := false 1841 sl := sanitizeLabelName(l) 1842 for _, x := range labels { 1843 if sl == x { 1844 duplicate = true 1845 break 1846 } 1847 } 1848 if !duplicate { 1849 labels = append(labels, sl) 1850 values = append(values, containerLabels[l]) 1851 } 1852 } 1853 1854 // Container spec 1855 desc := prometheus.NewDesc("container_start_time_seconds", "Start time of the container since unix epoch in seconds.", labels, nil) 1856 ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.CreationTime.Unix()), values...) 1857 1858 if cont.Spec.HasCpu { 1859 desc = prometheus.NewDesc("container_spec_cpu_period", "CPU period of the container.", labels, nil) 1860 ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Period), values...) 1861 if cont.Spec.Cpu.Quota != 0 { 1862 desc = prometheus.NewDesc("container_spec_cpu_quota", "CPU quota of the container.", labels, nil) 1863 ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Quota), values...) 1864 } 1865 desc := prometheus.NewDesc("container_spec_cpu_shares", "CPU share of the container.", labels, nil) 1866 ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Limit), values...) 1867 1868 } 1869 if cont.Spec.HasMemory { 1870 desc := prometheus.NewDesc("container_spec_memory_limit_bytes", "Memory limit for the container.", labels, nil) 1871 ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.Limit), values...) 1872 desc = prometheus.NewDesc("container_spec_memory_swap_limit_bytes", "Memory swap limit for the container.", labels, nil) 1873 ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.SwapLimit), values...) 1874 desc = prometheus.NewDesc("container_spec_memory_reservation_limit_bytes", "Memory reservation limit for the container.", labels, nil) 1875 ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.Reservation), values...) 1876 } 1877 1878 // Now for the actual metrics 1879 if len(cont.Stats) == 0 { 1880 continue 1881 } 1882 stats := cont.Stats[0] 1883 for _, cm := range c.containerMetrics { 1884 if cm.condition != nil && !cm.condition(cont.Spec) { 1885 continue 1886 } 1887 desc := cm.desc(labels) 1888 for _, metricValue := range cm.getValues(stats) { 1889 ch <- prometheus.NewMetricWithTimestamp( 1890 metricValue.timestamp, 1891 prometheus.MustNewConstMetric(desc, cm.valueType, float64(metricValue.value), append(values, metricValue.labels...)...), 1892 ) 1893 } 1894 } 1895 if c.includedMetrics.Has(container.AppMetrics) { 1896 for metricLabel, v := range stats.CustomMetrics { 1897 for _, metric := range v { 1898 clabels := make([]string, len(rawLabels), len(rawLabels)+len(metric.Labels)) 1899 cvalues := make([]string, len(rawLabels), len(rawLabels)+len(metric.Labels)) 1900 copy(clabels, labels) 1901 copy(cvalues, values) 1902 for label, value := range metric.Labels { 1903 clabels = append(clabels, sanitizeLabelName("app_"+label)) 1904 cvalues = append(cvalues, value) 1905 } 1906 desc := prometheus.NewDesc(metricLabel, "Custom application metric.", clabels, nil) 1907 ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(metric.FloatValue), cvalues...) 1908 } 1909 } 1910 } 1911 } 1912 } 1913 1914 func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) { 1915 versionInfo, err := c.infoProvider.GetVersionInfo() 1916 if err != nil { 1917 c.errors.Set(1) 1918 klog.Warningf("Couldn't get version info: %s", err) 1919 return 1920 } 1921 ch <- prometheus.MustNewConstMetric(versionInfoDesc, prometheus.GaugeValue, 1, []string{versionInfo.KernelVersion, versionInfo.ContainerOsVersion, versionInfo.DockerVersion, versionInfo.CadvisorVersion, versionInfo.CadvisorRevision}...) 1922 } 1923 1924 // Size after which we consider memory to be "unlimited". This is not 1925 // MaxInt64 due to rounding by the kernel. 1926 const maxMemorySize = uint64(1 << 62) 1927 1928 func specMemoryValue(v uint64) float64 { 1929 if v > maxMemorySize { 1930 return 0 1931 } 1932 return float64(v) 1933 } 1934 1935 var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) 1936 1937 // sanitizeLabelName replaces anything that doesn't match 1938 // client_label.LabelNameRE with an underscore. 1939 func sanitizeLabelName(name string) string { 1940 return invalidNameCharRE.ReplaceAllString(name, "_") 1941 } 1942 1943 func getNumaStatsPerNode(nodeStats map[uint8]uint64, labels []string, timestamp time.Time) metricValues { 1944 mValues := make(metricValues, 0, len(nodeStats)) 1945 for node, stat := range nodeStats { 1946 nodeLabels := append(labels, strconv.FormatUint(uint64(node), 10)) 1947 mValues = append(mValues, metricValue{value: float64(stat), labels: nodeLabels, timestamp: timestamp}) 1948 } 1949 return mValues 1950 } 1951 1952 func getPerCPUCorePerfEvents(s *info.ContainerStats) metricValues { 1953 values := make(metricValues, 0, len(s.PerfStats)) 1954 for _, metric := range s.PerfStats { 1955 values = append(values, metricValue{ 1956 value: float64(metric.Value), 1957 labels: []string{strconv.Itoa(metric.Cpu), metric.Name}, 1958 timestamp: s.Timestamp, 1959 }) 1960 } 1961 return values 1962 } 1963 1964 func getPerCPUCoreScalingRatio(s *info.ContainerStats) metricValues { 1965 values := make(metricValues, 0, len(s.PerfStats)) 1966 for _, metric := range s.PerfStats { 1967 values = append(values, metricValue{ 1968 value: metric.ScalingRatio, 1969 labels: []string{strconv.Itoa(metric.Cpu), metric.Name}, 1970 timestamp: s.Timestamp, 1971 }) 1972 } 1973 return values 1974 } 1975 1976 func getAggregatedCorePerfEvents(s *info.ContainerStats) metricValues { 1977 values := make(metricValues, 0) 1978 1979 perfEventStatAgg := make(map[string]uint64) 1980 // aggregate by event 1981 for _, perfStat := range s.PerfStats { 1982 perfEventStatAgg[perfStat.Name] += perfStat.Value 1983 } 1984 // create aggregated metrics 1985 for perfEvent, perfValue := range perfEventStatAgg { 1986 values = append(values, metricValue{ 1987 value: float64(perfValue), 1988 labels: []string{"", perfEvent}, 1989 timestamp: s.Timestamp, 1990 }) 1991 } 1992 return values 1993 } 1994 1995 func getMinCoreScalingRatio(s *info.ContainerStats) metricValues { 1996 values := make(metricValues, 0) 1997 perfEventStatMin := make(map[string]float64) 1998 // search for minimal value of scalin ratio for specific event 1999 for _, perfStat := range s.PerfStats { 2000 if _, ok := perfEventStatMin[perfStat.Name]; !ok { 2001 // found a new event 2002 perfEventStatMin[perfStat.Name] = perfStat.ScalingRatio 2003 } else if perfStat.ScalingRatio < perfEventStatMin[perfStat.Name] { 2004 // found a lower value of scaling ration so replace the minimal value 2005 perfEventStatMin[perfStat.Name] = perfStat.ScalingRatio 2006 } 2007 } 2008 2009 for perfEvent, perfScalingRatio := range perfEventStatMin { 2010 values = append(values, metricValue{ 2011 value: perfScalingRatio, 2012 labels: []string{"", perfEvent}, 2013 timestamp: s.Timestamp, 2014 }) 2015 } 2016 return values 2017 }