k8s.io/kubernetes@v1.29.3/pkg/kubelet/winstats/perfcounter_nodestats.go (about) 1 //go:build windows 2 // +build windows 3 4 /* 5 Copyright 2017 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package winstats 21 22 import ( 23 "errors" 24 "fmt" 25 "os" 26 "os/exec" 27 "runtime" 28 "strconv" 29 "strings" 30 "sync" 31 "syscall" 32 "time" 33 "unsafe" 34 35 cadvisorapi "github.com/google/cadvisor/info/v1" 36 "golang.org/x/sys/windows" 37 "golang.org/x/sys/windows/registry" 38 "k8s.io/apimachinery/pkg/util/wait" 39 "k8s.io/klog/v2" 40 ) 41 42 const ( 43 bootIdRegistry = `SYSTEM\CurrentControlSet\Control\Session Manager\Memory Management\PrefetchParameters` 44 bootIdKey = `BootId` 45 ) 46 47 // MemoryStatusEx is the same as Windows structure MEMORYSTATUSEX 48 // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366770(v=vs.85).aspx 49 type MemoryStatusEx struct { 50 Length uint32 51 MemoryLoad uint32 52 TotalPhys uint64 53 AvailPhys uint64 54 TotalPageFile uint64 55 AvailPageFile uint64 56 TotalVirtual uint64 57 AvailVirtual uint64 58 AvailExtendedVirtual uint64 59 } 60 61 var ( 62 modkernel32 = windows.NewLazySystemDLL("kernel32.dll") 63 procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx") 64 procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount") 65 ) 66 67 const allProcessorGroups = 0xFFFF 68 69 // NewPerfCounterClient creates a client using perf counters 70 func NewPerfCounterClient() (Client, error) { 71 // Initialize the cache 72 initCache := cpuUsageCoreNanoSecondsCache{0, 0} 73 return newClient(&perfCounterNodeStatsClient{ 74 cpuUsageCoreNanoSecondsCache: initCache, 75 }) 76 } 77 78 // perfCounterNodeStatsClient is a client that provides Windows Stats via PerfCounters 79 type perfCounterNodeStatsClient struct { 80 nodeMetrics 81 mu sync.RWMutex // mu protects nodeMetrics 82 nodeInfo 83 // cpuUsageCoreNanoSecondsCache caches the cpu usage for nodes. 84 cpuUsageCoreNanoSecondsCache 85 } 86 87 func (p *perfCounterNodeStatsClient) startMonitoring() error { 88 memory, err := getPhysicallyInstalledSystemMemoryBytes() 89 if err != nil { 90 return err 91 } 92 93 osInfo, err := GetOSInfo() 94 if err != nil { 95 return err 96 } 97 98 p.nodeInfo = nodeInfo{ 99 kernelVersion: osInfo.GetPatchVersion(), 100 osImageVersion: osInfo.ProductName, 101 memoryPhysicalCapacityBytes: memory, 102 startTime: time.Now(), 103 } 104 105 cpuCounter, err := newPerfCounter(cpuQuery) 106 if err != nil { 107 return err 108 } 109 110 memWorkingSetCounter, err := newPerfCounter(memoryPrivWorkingSetQuery) 111 if err != nil { 112 return err 113 } 114 115 memCommittedBytesCounter, err := newPerfCounter(memoryCommittedBytesQuery) 116 if err != nil { 117 return err 118 } 119 120 networkAdapterCounter, err := newNetworkCounters() 121 if err != nil { 122 return err 123 } 124 125 go wait.Forever(func() { 126 p.collectMetricsData(cpuCounter, memWorkingSetCounter, memCommittedBytesCounter, networkAdapterCounter) 127 }, perfCounterUpdatePeriod) 128 129 // Cache the CPU usage every defaultCachePeriod 130 go wait.Forever(func() { 131 newValue := p.nodeMetrics.cpuUsageCoreNanoSeconds 132 p.mu.Lock() 133 defer p.mu.Unlock() 134 p.cpuUsageCoreNanoSecondsCache = cpuUsageCoreNanoSecondsCache{ 135 previousValue: p.cpuUsageCoreNanoSecondsCache.latestValue, 136 latestValue: newValue, 137 } 138 }, defaultCachePeriod) 139 140 return nil 141 } 142 143 func (p *perfCounterNodeStatsClient) getMachineInfo() (*cadvisorapi.MachineInfo, error) { 144 hostname, err := os.Hostname() 145 if err != nil { 146 return nil, err 147 } 148 149 systemUUID, err := getSystemUUID() 150 if err != nil { 151 return nil, err 152 } 153 154 bootId, err := getBootID() 155 if err != nil { 156 return nil, err 157 } 158 159 return &cadvisorapi.MachineInfo{ 160 NumCores: ProcessorCount(), 161 MemoryCapacity: p.nodeInfo.memoryPhysicalCapacityBytes, 162 MachineID: hostname, 163 SystemUUID: systemUUID, 164 BootID: bootId, 165 }, nil 166 } 167 168 // runtime.NumCPU() will only return the information for a single Processor Group. 169 // Since a single group can only hold 64 logical processors, this 170 // means when there are more they will be divided into multiple groups. 171 // For the above reason, procGetActiveProcessorCount is used to get the 172 // cpu count for all processor groups of the windows node. 173 // more notes for this issue: 174 // same issue in moby: https://github.com/moby/moby/issues/38935#issuecomment-744638345 175 // solution in hcsshim: https://github.com/microsoft/hcsshim/blob/master/internal/processorinfo/processor_count.go 176 func ProcessorCount() int { 177 if amount := getActiveProcessorCount(allProcessorGroups); amount != 0 { 178 return int(amount) 179 } 180 return runtime.NumCPU() 181 } 182 183 func getActiveProcessorCount(groupNumber uint16) int { 184 r0, _, _ := syscall.Syscall(procGetActiveProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0) 185 return int(r0) 186 } 187 188 func (p *perfCounterNodeStatsClient) getVersionInfo() (*cadvisorapi.VersionInfo, error) { 189 return &cadvisorapi.VersionInfo{ 190 KernelVersion: p.nodeInfo.kernelVersion, 191 ContainerOsVersion: p.nodeInfo.osImageVersion, 192 }, nil 193 } 194 195 func (p *perfCounterNodeStatsClient) getNodeMetrics() (nodeMetrics, error) { 196 p.mu.RLock() 197 defer p.mu.RUnlock() 198 return p.nodeMetrics, nil 199 } 200 201 func (p *perfCounterNodeStatsClient) getNodeInfo() nodeInfo { 202 return p.nodeInfo 203 } 204 205 func (p *perfCounterNodeStatsClient) collectMetricsData(cpuCounter, memWorkingSetCounter, memCommittedBytesCounter perfCounter, networkAdapterCounter *networkCounter) { 206 cpuValue, err := cpuCounter.getData() 207 cpuCores := ProcessorCount() 208 if err != nil { 209 klog.ErrorS(err, "Unable to get cpu perf counter data") 210 return 211 } 212 213 memWorkingSetValue, err := memWorkingSetCounter.getData() 214 if err != nil { 215 klog.ErrorS(err, "Unable to get memWorkingSet perf counter data") 216 return 217 } 218 219 memCommittedBytesValue, err := memCommittedBytesCounter.getData() 220 if err != nil { 221 klog.ErrorS(err, "Unable to get memCommittedBytes perf counter data") 222 return 223 } 224 225 networkAdapterStats, err := networkAdapterCounter.getData() 226 if err != nil { 227 klog.ErrorS(err, "Unable to get network adapter perf counter data") 228 return 229 } 230 231 p.mu.Lock() 232 defer p.mu.Unlock() 233 p.nodeMetrics = nodeMetrics{ 234 cpuUsageCoreNanoSeconds: p.convertCPUValue(cpuCores, cpuValue), 235 cpuUsageNanoCores: p.getCPUUsageNanoCores(), 236 memoryPrivWorkingSetBytes: memWorkingSetValue, 237 memoryCommittedBytes: memCommittedBytesValue, 238 interfaceStats: networkAdapterStats, 239 timeStamp: time.Now(), 240 } 241 } 242 243 func (p *perfCounterNodeStatsClient) convertCPUValue(cpuCores int, cpuValue uint64) uint64 { 244 // This converts perf counter data which is cpu percentage for all cores into nanoseconds. 245 // The formula is (cpuPercentage / 100.0) * #cores * 1e+9 (nano seconds). More info here: 246 // https://github.com/kubernetes/heapster/issues/650 247 newValue := p.nodeMetrics.cpuUsageCoreNanoSeconds + uint64((float64(cpuValue)/100.0)*float64(cpuCores)*1e9) 248 return newValue 249 } 250 251 func (p *perfCounterNodeStatsClient) getCPUUsageNanoCores() uint64 { 252 cachePeriodSeconds := uint64(defaultCachePeriod / time.Second) 253 perfCounterUpdatePeriodSeconds := uint64(perfCounterUpdatePeriod / time.Second) 254 cpuUsageNanoCores := ((p.cpuUsageCoreNanoSecondsCache.latestValue - p.cpuUsageCoreNanoSecondsCache.previousValue) * perfCounterUpdatePeriodSeconds) / cachePeriodSeconds 255 return cpuUsageNanoCores 256 } 257 258 func getSystemUUID() (string, error) { 259 result, err := exec.Command("wmic", "csproduct", "get", "UUID").Output() 260 if err != nil { 261 return "", err 262 } 263 fields := strings.Fields(string(result)) 264 if len(fields) != 2 { 265 return "", fmt.Errorf("received unexpected value retrieving vm uuid: %q", string(result)) 266 } 267 return fields[1], nil 268 } 269 270 func getPhysicallyInstalledSystemMemoryBytes() (uint64, error) { 271 // We use GlobalMemoryStatusEx instead of GetPhysicallyInstalledSystemMemory 272 // on Windows node for the following reasons: 273 // 1. GetPhysicallyInstalledSystemMemory retrieves the amount of physically 274 // installed RAM from the computer's SMBIOS firmware tables. 275 // https://msdn.microsoft.com/en-us/library/windows/desktop/cc300158(v=vs.85).aspx 276 // On some VM, it is unable to read data from SMBIOS and fails with ERROR_INVALID_DATA. 277 // 2. On Linux node, total physical memory is read from MemTotal in /proc/meminfo. 278 // GlobalMemoryStatusEx returns the amount of physical memory that is available 279 // for the operating system to use. The amount returned by GlobalMemoryStatusEx 280 // is closer in parity with Linux 281 // https://www.kernel.org/doc/Documentation/filesystems/proc.txt 282 var statex MemoryStatusEx 283 statex.Length = uint32(unsafe.Sizeof(statex)) 284 ret, _, _ := procGlobalMemoryStatusEx.Call(uintptr(unsafe.Pointer(&statex))) 285 286 if ret == 0 { 287 return 0, errors.New("unable to read physical memory") 288 } 289 290 return statex.TotalPhys, nil 291 } 292 293 func getBootID() (string, error) { 294 regKey, err := registry.OpenKey(registry.LOCAL_MACHINE, bootIdRegistry, registry.READ) 295 if err != nil { 296 return "", err 297 } 298 defer regKey.Close() 299 regValue, _, err := regKey.GetIntegerValue(bootIdKey) 300 if err != nil { 301 return "", err 302 } 303 return strconv.FormatUint(regValue, 10), nil 304 }