k8s.io/kubernetes@v1.29.3/pkg/kubelet/stats/helper.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package stats 18 19 import ( 20 "fmt" 21 "time" 22 23 cadvisorapiv1 "github.com/google/cadvisor/info/v1" 24 cadvisorapiv2 "github.com/google/cadvisor/info/v2" 25 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 "k8s.io/apimachinery/pkg/types" 27 "k8s.io/klog/v2" 28 statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 29 "k8s.io/kubernetes/pkg/kubelet/cadvisor" 30 "k8s.io/kubernetes/pkg/kubelet/server/stats" 31 ) 32 33 // defaultNetworkInterfaceName is used for collectng network stats. 34 // This logic relies on knowledge of the container runtime implementation and 35 // is not reliable. 36 const defaultNetworkInterfaceName = "eth0" 37 38 func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsapi.CPUStats, *statsapi.MemoryStats) { 39 cstat, found := latestContainerStats(info) 40 if !found { 41 return nil, nil 42 } 43 var cpuStats *statsapi.CPUStats 44 var memoryStats *statsapi.MemoryStats 45 cpuStats = &statsapi.CPUStats{ 46 Time: metav1.NewTime(cstat.Timestamp), 47 UsageNanoCores: uint64Ptr(0), 48 UsageCoreNanoSeconds: uint64Ptr(0), 49 } 50 if info.Spec.HasCpu { 51 if cstat.CpuInst != nil { 52 cpuStats.UsageNanoCores = &cstat.CpuInst.Usage.Total 53 } 54 if cstat.Cpu != nil { 55 cpuStats.UsageCoreNanoSeconds = &cstat.Cpu.Usage.Total 56 } 57 } 58 if info.Spec.HasMemory && cstat.Memory != nil { 59 pageFaults := cstat.Memory.ContainerData.Pgfault 60 majorPageFaults := cstat.Memory.ContainerData.Pgmajfault 61 memoryStats = &statsapi.MemoryStats{ 62 Time: metav1.NewTime(cstat.Timestamp), 63 UsageBytes: &cstat.Memory.Usage, 64 WorkingSetBytes: &cstat.Memory.WorkingSet, 65 RSSBytes: &cstat.Memory.RSS, 66 PageFaults: &pageFaults, 67 MajorPageFaults: &majorPageFaults, 68 } 69 // availableBytes = memory limit (if known) - workingset 70 if !isMemoryUnlimited(info.Spec.Memory.Limit) { 71 availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet 72 memoryStats.AvailableBytes = &availableBytes 73 } 74 } else { 75 memoryStats = &statsapi.MemoryStats{ 76 Time: metav1.NewTime(cstat.Timestamp), 77 WorkingSetBytes: uint64Ptr(0), 78 } 79 } 80 return cpuStats, memoryStats 81 } 82 83 // cadvisorInfoToContainerStats returns the statsapi.ContainerStats converted 84 // from the container and filesystem info. 85 func cadvisorInfoToContainerStats(name string, info *cadvisorapiv2.ContainerInfo, rootFs, imageFs *cadvisorapiv2.FsInfo) *statsapi.ContainerStats { 86 result := &statsapi.ContainerStats{ 87 StartTime: metav1.NewTime(info.Spec.CreationTime), 88 Name: name, 89 } 90 cstat, found := latestContainerStats(info) 91 if !found { 92 return result 93 } 94 95 cpu, memory := cadvisorInfoToCPUandMemoryStats(info) 96 result.CPU = cpu 97 result.Memory = memory 98 result.Swap = cadvisorInfoToSwapStats(info) 99 100 // NOTE: if they can be found, log stats will be overwritten 101 // by the caller, as it knows more information about the pod, 102 // which is needed to determine log size. 103 if rootFs != nil { 104 // The container logs live on the node rootfs device 105 result.Logs = buildLogsStats(cstat, rootFs) 106 } 107 108 if imageFs != nil { 109 // The container rootFs lives on the imageFs devices (which may not be the node root fs) 110 result.Rootfs = buildRootfsStats(cstat, imageFs) 111 } 112 113 cfs := cstat.Filesystem 114 if cfs != nil { 115 if cfs.BaseUsageBytes != nil { 116 if result.Rootfs != nil { 117 rootfsUsage := *cfs.BaseUsageBytes 118 result.Rootfs.UsedBytes = &rootfsUsage 119 } 120 if cfs.TotalUsageBytes != nil && result.Logs != nil { 121 logsUsage := *cfs.TotalUsageBytes - *cfs.BaseUsageBytes 122 result.Logs.UsedBytes = &logsUsage 123 } 124 } 125 if cfs.InodeUsage != nil && result.Rootfs != nil { 126 rootInodes := *cfs.InodeUsage 127 result.Rootfs.InodesUsed = &rootInodes 128 } 129 } 130 131 for _, acc := range cstat.Accelerators { 132 result.Accelerators = append(result.Accelerators, statsapi.AcceleratorStats{ 133 Make: acc.Make, 134 Model: acc.Model, 135 ID: acc.ID, 136 MemoryTotal: acc.MemoryTotal, 137 MemoryUsed: acc.MemoryUsed, 138 DutyCycle: acc.DutyCycle, 139 }) 140 } 141 142 result.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(info) 143 144 return result 145 } 146 147 // cadvisorInfoToContainerCPUAndMemoryStats returns the statsapi.ContainerStats converted 148 // from the container and filesystem info. 149 func cadvisorInfoToContainerCPUAndMemoryStats(name string, info *cadvisorapiv2.ContainerInfo) *statsapi.ContainerStats { 150 result := &statsapi.ContainerStats{ 151 StartTime: metav1.NewTime(info.Spec.CreationTime), 152 Name: name, 153 } 154 155 cpu, memory := cadvisorInfoToCPUandMemoryStats(info) 156 result.CPU = cpu 157 result.Memory = memory 158 159 return result 160 } 161 162 func cadvisorInfoToProcessStats(info *cadvisorapiv2.ContainerInfo) *statsapi.ProcessStats { 163 cstat, found := latestContainerStats(info) 164 if !found || cstat.Processes == nil { 165 return nil 166 } 167 num := cstat.Processes.ProcessCount 168 return &statsapi.ProcessStats{ProcessCount: uint64Ptr(num)} 169 } 170 171 // cadvisorInfoToNetworkStats returns the statsapi.NetworkStats converted from 172 // the container info from cadvisor. 173 func cadvisorInfoToNetworkStats(info *cadvisorapiv2.ContainerInfo) *statsapi.NetworkStats { 174 if !info.Spec.HasNetwork { 175 return nil 176 } 177 cstat, found := latestContainerStats(info) 178 if !found { 179 return nil 180 } 181 182 if cstat.Network == nil { 183 return nil 184 } 185 186 iStats := statsapi.NetworkStats{ 187 Time: metav1.NewTime(cstat.Timestamp), 188 } 189 190 for i := range cstat.Network.Interfaces { 191 inter := cstat.Network.Interfaces[i] 192 iStat := statsapi.InterfaceStats{ 193 Name: inter.Name, 194 RxBytes: &inter.RxBytes, 195 RxErrors: &inter.RxErrors, 196 TxBytes: &inter.TxBytes, 197 TxErrors: &inter.TxErrors, 198 } 199 200 if inter.Name == defaultNetworkInterfaceName { 201 iStats.InterfaceStats = iStat 202 } 203 204 iStats.Interfaces = append(iStats.Interfaces, iStat) 205 } 206 207 return &iStats 208 } 209 210 // cadvisorInfoToUserDefinedMetrics returns the statsapi.UserDefinedMetric 211 // converted from the container info from cadvisor. 212 func cadvisorInfoToUserDefinedMetrics(info *cadvisorapiv2.ContainerInfo) []statsapi.UserDefinedMetric { 213 type specVal struct { 214 ref statsapi.UserDefinedMetricDescriptor 215 valType cadvisorapiv1.DataType 216 time time.Time 217 value float64 218 } 219 udmMap := map[string]*specVal{} 220 for _, spec := range info.Spec.CustomMetrics { 221 udmMap[spec.Name] = &specVal{ 222 ref: statsapi.UserDefinedMetricDescriptor{ 223 Name: spec.Name, 224 Type: statsapi.UserDefinedMetricType(spec.Type), 225 Units: spec.Units, 226 }, 227 valType: spec.Format, 228 } 229 } 230 for _, stat := range info.Stats { 231 for name, values := range stat.CustomMetrics { 232 specVal, ok := udmMap[name] 233 if !ok { 234 klog.InfoS("Spec for custom metric is missing from cAdvisor output", "metric", name, "spec", info.Spec, "metrics", stat.CustomMetrics) 235 continue 236 } 237 for _, value := range values { 238 // Pick the most recent value 239 if value.Timestamp.Before(specVal.time) { 240 continue 241 } 242 specVal.time = value.Timestamp 243 specVal.value = value.FloatValue 244 if specVal.valType == cadvisorapiv1.IntType { 245 specVal.value = float64(value.IntValue) 246 } 247 } 248 } 249 } 250 var udm []statsapi.UserDefinedMetric 251 for _, specVal := range udmMap { 252 udm = append(udm, statsapi.UserDefinedMetric{ 253 UserDefinedMetricDescriptor: specVal.ref, 254 Time: metav1.NewTime(specVal.time), 255 Value: specVal.value, 256 }) 257 } 258 return udm 259 } 260 261 func cadvisorInfoToSwapStats(info *cadvisorapiv2.ContainerInfo) *statsapi.SwapStats { 262 cstat, found := latestContainerStats(info) 263 if !found { 264 return nil 265 } 266 267 var swapStats *statsapi.SwapStats 268 269 if info.Spec.HasMemory && cstat.Memory != nil { 270 swapStats = &statsapi.SwapStats{ 271 Time: metav1.NewTime(cstat.Timestamp), 272 SwapUsageBytes: &cstat.Memory.Swap, 273 } 274 275 if !isMemoryUnlimited(info.Spec.Memory.SwapLimit) { 276 swapAvailableBytes := info.Spec.Memory.SwapLimit - cstat.Memory.Swap 277 swapStats.SwapAvailableBytes = &swapAvailableBytes 278 } 279 } 280 281 return swapStats 282 } 283 284 // latestContainerStats returns the latest container stats from cadvisor, or nil if none exist 285 func latestContainerStats(info *cadvisorapiv2.ContainerInfo) (*cadvisorapiv2.ContainerStats, bool) { 286 stats := info.Stats 287 if len(stats) < 1 { 288 return nil, false 289 } 290 latest := stats[len(stats)-1] 291 if latest == nil { 292 return nil, false 293 } 294 return latest, true 295 } 296 297 func isMemoryUnlimited(v uint64) bool { 298 // Size after which we consider memory to be "unlimited". This is not 299 // MaxInt64 due to rounding by the kernel. 300 // TODO: cadvisor should export this https://github.com/google/cadvisor/blob/master/metrics/prometheus.go#L596 301 const maxMemorySize = uint64(1 << 62) 302 303 return v > maxMemorySize 304 } 305 306 // getCgroupInfo returns the information of the container with the specified 307 // containerName from cadvisor. 308 func getCgroupInfo(cadvisor cadvisor.Interface, containerName string, updateStats bool) (*cadvisorapiv2.ContainerInfo, error) { 309 var maxAge *time.Duration 310 if updateStats { 311 age := 0 * time.Second 312 maxAge = &age 313 } 314 infoMap, err := cadvisor.ContainerInfoV2(containerName, cadvisorapiv2.RequestOptions{ 315 IdType: cadvisorapiv2.TypeName, 316 Count: 2, // 2 samples are needed to compute "instantaneous" CPU 317 Recursive: false, 318 MaxAge: maxAge, 319 }) 320 if err != nil { 321 return nil, fmt.Errorf("failed to get container info for %q: %v", containerName, err) 322 } 323 if len(infoMap) != 1 { 324 return nil, fmt.Errorf("unexpected number of containers: %v", len(infoMap)) 325 } 326 info := infoMap[containerName] 327 return &info, nil 328 } 329 330 // getCgroupStats returns the latest stats of the container having the 331 // specified containerName from cadvisor. 332 func getCgroupStats(cadvisor cadvisor.Interface, containerName string, updateStats bool) (*cadvisorapiv2.ContainerStats, error) { 333 info, err := getCgroupInfo(cadvisor, containerName, updateStats) 334 if err != nil { 335 return nil, err 336 } 337 stats, found := latestContainerStats(info) 338 if !found { 339 return nil, fmt.Errorf("failed to get latest stats from container info for %q", containerName) 340 } 341 return stats, nil 342 } 343 344 func buildLogsStats(cstat *cadvisorapiv2.ContainerStats, rootFs *cadvisorapiv2.FsInfo) *statsapi.FsStats { 345 fsStats := &statsapi.FsStats{ 346 Time: metav1.NewTime(cstat.Timestamp), 347 AvailableBytes: &rootFs.Available, 348 CapacityBytes: &rootFs.Capacity, 349 InodesFree: rootFs.InodesFree, 350 Inodes: rootFs.Inodes, 351 } 352 353 if rootFs.Inodes != nil && rootFs.InodesFree != nil { 354 logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree 355 fsStats.InodesUsed = &logsInodesUsed 356 } 357 return fsStats 358 } 359 360 func buildRootfsStats(cstat *cadvisorapiv2.ContainerStats, imageFs *cadvisorapiv2.FsInfo) *statsapi.FsStats { 361 return &statsapi.FsStats{ 362 Time: metav1.NewTime(cstat.Timestamp), 363 AvailableBytes: &imageFs.Available, 364 CapacityBytes: &imageFs.Capacity, 365 InodesFree: imageFs.InodesFree, 366 Inodes: imageFs.Inodes, 367 } 368 } 369 370 func getUint64Value(value *uint64) uint64 { 371 if value == nil { 372 return 0 373 } 374 375 return *value 376 } 377 378 func uint64Ptr(i uint64) *uint64 { 379 return &i 380 } 381 382 func calcEphemeralStorage(containers []statsapi.ContainerStats, volumes []statsapi.VolumeStats, rootFsInfo *cadvisorapiv2.FsInfo, 383 podLogStats *statsapi.FsStats, etcHostsStats *statsapi.FsStats, isCRIStatsProvider bool) *statsapi.FsStats { 384 result := &statsapi.FsStats{ 385 Time: metav1.NewTime(rootFsInfo.Timestamp), 386 AvailableBytes: &rootFsInfo.Available, 387 CapacityBytes: &rootFsInfo.Capacity, 388 InodesFree: rootFsInfo.InodesFree, 389 Inodes: rootFsInfo.Inodes, 390 } 391 for _, container := range containers { 392 addContainerUsage(result, &container, isCRIStatsProvider) 393 } 394 for _, volume := range volumes { 395 result.UsedBytes = addUsage(result.UsedBytes, volume.FsStats.UsedBytes) 396 result.InodesUsed = addUsage(result.InodesUsed, volume.InodesUsed) 397 result.Time = maxUpdateTime(&result.Time, &volume.FsStats.Time) 398 } 399 if podLogStats != nil { 400 result.UsedBytes = addUsage(result.UsedBytes, podLogStats.UsedBytes) 401 result.InodesUsed = addUsage(result.InodesUsed, podLogStats.InodesUsed) 402 result.Time = maxUpdateTime(&result.Time, &podLogStats.Time) 403 } 404 if etcHostsStats != nil { 405 result.UsedBytes = addUsage(result.UsedBytes, etcHostsStats.UsedBytes) 406 result.InodesUsed = addUsage(result.InodesUsed, etcHostsStats.InodesUsed) 407 result.Time = maxUpdateTime(&result.Time, &etcHostsStats.Time) 408 } 409 return result 410 } 411 412 func addContainerUsage(stat *statsapi.FsStats, container *statsapi.ContainerStats, isCRIStatsProvider bool) { 413 if rootFs := container.Rootfs; rootFs != nil { 414 stat.Time = maxUpdateTime(&stat.Time, &rootFs.Time) 415 stat.InodesUsed = addUsage(stat.InodesUsed, rootFs.InodesUsed) 416 stat.UsedBytes = addUsage(stat.UsedBytes, rootFs.UsedBytes) 417 if logs := container.Logs; logs != nil { 418 stat.UsedBytes = addUsage(stat.UsedBytes, logs.UsedBytes) 419 // We have accurate container log inode usage for CRI stats provider. 420 if isCRIStatsProvider { 421 stat.InodesUsed = addUsage(stat.InodesUsed, logs.InodesUsed) 422 } 423 stat.Time = maxUpdateTime(&stat.Time, &logs.Time) 424 } 425 } 426 } 427 428 func maxUpdateTime(first, second *metav1.Time) metav1.Time { 429 if first.Before(second) { 430 return *second 431 } 432 return *first 433 } 434 435 func addUsage(first, second *uint64) *uint64 { 436 if first == nil { 437 return second 438 } else if second == nil { 439 return first 440 } 441 total := *first + *second 442 return &total 443 } 444 445 func makePodStorageStats(s *statsapi.PodStats, rootFsInfo *cadvisorapiv2.FsInfo, resourceAnalyzer stats.ResourceAnalyzer, hostStatsProvider HostStatsProvider, isCRIStatsProvider bool) { 446 podNs := s.PodRef.Namespace 447 podName := s.PodRef.Name 448 podUID := types.UID(s.PodRef.UID) 449 var ephemeralStats []statsapi.VolumeStats 450 if vstats, found := resourceAnalyzer.GetPodVolumeStats(podUID); found { 451 ephemeralStats = make([]statsapi.VolumeStats, len(vstats.EphemeralVolumes)) 452 copy(ephemeralStats, vstats.EphemeralVolumes) 453 s.VolumeStats = append(append([]statsapi.VolumeStats{}, vstats.EphemeralVolumes...), vstats.PersistentVolumes...) 454 455 } 456 logStats, err := hostStatsProvider.getPodLogStats(podNs, podName, podUID, rootFsInfo) 457 if err != nil { 458 klog.V(6).ErrorS(err, "Unable to fetch pod log stats", "pod", klog.KRef(podNs, podName)) 459 // If people do in-place upgrade, there might be pods still using 460 // the old log path. For those pods, no pod log stats is returned. 461 // We should continue generating other stats in that case. 462 // calcEphemeralStorage tolerants logStats == nil. 463 } 464 etcHostsStats, err := hostStatsProvider.getPodEtcHostsStats(podUID, rootFsInfo) 465 if err != nil { 466 klog.V(6).ErrorS(err, "Unable to fetch pod etc hosts stats", "pod", klog.KRef(podNs, podName)) 467 } 468 s.EphemeralStorage = calcEphemeralStorage(s.Containers, ephemeralStats, rootFsInfo, logStats, etcHostsStats, isCRIStatsProvider) 469 }