github.com/kubewharf/katalyst-core@v0.5.3/pkg/metaserver/agent/metric/provisioner/malachite/provisioner.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package malachite 18 19 import ( 20 "context" 21 "strconv" 22 "strings" 23 "sync" 24 "time" 25 26 "k8s.io/apimachinery/pkg/util/errors" 27 "k8s.io/klog/v2" 28 29 "github.com/kubewharf/katalyst-core/pkg/config/agent/global" 30 "github.com/kubewharf/katalyst-core/pkg/config/agent/metaserver" 31 "github.com/kubewharf/katalyst-core/pkg/consts" 32 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/provisioner/malachite/client" 33 malachitetypes "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/provisioner/malachite/types" 34 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/types" 35 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" 36 "github.com/kubewharf/katalyst-core/pkg/metrics" 37 "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" 38 "github.com/kubewharf/katalyst-core/pkg/util/general" 39 utilmetric "github.com/kubewharf/katalyst-core/pkg/util/metric" 40 ) 41 42 const ( 43 metricsNamMalachiteUnHealthy = "malachite_unhealthy" 44 metricsNameMalachiteGetSystemStatusFailed = "malachite_get_system_status_failed" 45 metricsNameMalachiteGetPodStatusFailed = "malachite_get_pod_status_failed" 46 47 // Typically, katalyst's metric component does sampling per 10s. 48 defaultMetricUpdateInterval = 10.0 49 50 pageShift = 12 51 52 malachiteProvisionerHealthCheckName = "malachite_provisioner_sample" 53 malachiteProvisionTolerationTime = 15 * time.Second 54 ) 55 56 // NewMalachiteMetricsProvisioner returns the default implementation of MetricsFetcher. 57 func NewMalachiteMetricsProvisioner(baseConf *global.BaseConfiguration, _ *metaserver.MetricConfiguration, 58 emitter metrics.MetricEmitter, fetcher pod.PodFetcher, metricStore *utilmetric.MetricStore, 59 ) types.MetricsProvisioner { 60 return &MalachiteMetricsProvisioner{ 61 malachiteClient: client.NewMalachiteClient(fetcher), 62 metricStore: metricStore, 63 emitter: emitter, 64 baseConf: baseConf, 65 } 66 } 67 68 type MalachiteMetricsProvisioner struct { 69 metricStore *utilmetric.MetricStore 70 malachiteClient *client.MalachiteClient 71 baseConf *global.BaseConfiguration 72 emitter metrics.MetricEmitter 73 startOnce sync.Once 74 } 75 76 func (m *MalachiteMetricsProvisioner) Run(ctx context.Context) { 77 m.startOnce.Do(func() { 78 general.RegisterHeartbeatCheck(malachiteProvisionerHealthCheckName, malachiteProvisionTolerationTime, 79 general.HealthzCheckStateNotReady, malachiteProvisionTolerationTime) 80 }) 81 m.sample(ctx) 82 } 83 84 func (m *MalachiteMetricsProvisioner) sample(ctx context.Context) { 85 klog.V(4).Infof("[malachite] heartbeat") 86 87 if !m.checkMalachiteHealthy() { 88 _ = general.UpdateHealthzState(malachiteProvisionerHealthCheckName, general.HealthzCheckStateNotReady, "malachite is not healthy") 89 return 90 } 91 errList := make([]error, 0) 92 93 // Update system data 94 if err := m.updateSystemStats(); err != nil { 95 errList = append(errList, err) 96 } 97 // Update pod data 98 if err := m.updatePodsCgroupData(ctx); err != nil { 99 errList = append(errList, err) 100 } 101 // Update top level cgroup of kubepods 102 if err := m.updateCgroupData(); err != nil { 103 errList = append(errList, err) 104 } 105 _ = general.UpdateHealthzStateByError(malachiteProvisionerHealthCheckName, errors.NewAggregate(errList)) 106 } 107 108 // checkMalachiteHealthy is to check whether malachite is healthy 109 func (m *MalachiteMetricsProvisioner) checkMalachiteHealthy() bool { 110 _, err := m.malachiteClient.GetSystemComputeStats() 111 if err != nil { 112 klog.Errorf("[malachite] malachite is unhealthy: %v", err) 113 _ = m.emitter.StoreInt64(metricsNamMalachiteUnHealthy, 1, metrics.MetricTypeNameRaw) 114 return false 115 } 116 117 return true 118 } 119 120 // Get raw system stats by malachite sdk and set to metricStore 121 func (m *MalachiteMetricsProvisioner) updateSystemStats() error { 122 errList := make([]error, 0) 123 systemComputeData, err := m.malachiteClient.GetSystemComputeStats() 124 if err != nil { 125 errList = append(errList, err) 126 klog.Errorf("[malachite] get system compute stats failed, err %v", err) 127 _ = m.emitter.StoreInt64(metricsNameMalachiteGetSystemStatusFailed, 1, metrics.MetricTypeNameCount, 128 metrics.MetricTag{Key: "kind", Val: "compute"}) 129 } else { 130 m.processSystemComputeData(systemComputeData) 131 m.processSystemCPUComputeData(systemComputeData) 132 } 133 134 systemMemoryData, err := m.malachiteClient.GetSystemMemoryStats() 135 if err != nil { 136 errList = append(errList, err) 137 klog.Errorf("[malachite] get system memory stats failed, err %v", err) 138 _ = m.emitter.StoreInt64(metricsNameMalachiteGetSystemStatusFailed, 1, metrics.MetricTypeNameCount, 139 metrics.MetricTag{Key: "kind", Val: "memory"}) 140 } else { 141 m.processSystemMemoryData(systemMemoryData) 142 m.processSystemNumaData(systemMemoryData) 143 } 144 145 systemIOData, err := m.malachiteClient.GetSystemIOStats() 146 if err != nil { 147 errList = append(errList, err) 148 klog.Errorf("[malachite] get system io stats failed, err %v", err) 149 _ = m.emitter.StoreInt64(metricsNameMalachiteGetSystemStatusFailed, 1, metrics.MetricTypeNameCount, 150 metrics.MetricTag{Key: "kind", Val: "io"}) 151 } else { 152 m.processSystemIOData(systemIOData) 153 } 154 155 systemNetData, err := m.malachiteClient.GetSystemNetStats() 156 if err != nil { 157 errList = append(errList, err) 158 klog.Errorf("[malachite] get system net stats failed, err %v", err) 159 _ = m.emitter.StoreInt64(metricsNameMalachiteGetSystemStatusFailed, 1, metrics.MetricTypeNameCount, 160 metrics.MetricTag{Key: "kind", Val: "net"}) 161 } else { 162 m.processSystemNetData(systemNetData) 163 } 164 165 return errors.NewAggregate(errList) 166 } 167 168 func (m *MalachiteMetricsProvisioner) getCgroupPaths() []string { 169 cgroupPaths := []string{m.baseConf.ReclaimRelativeRootCgroupPath, common.CgroupFsRootPathBurstable, common.CgroupFsRootPathBestEffort} 170 for _, path := range m.baseConf.OptionalRelativeCgroupPaths { 171 absPath := common.GetAbsCgroupPath(common.DefaultSelectedSubsys, path) 172 if !general.IsPathExists(absPath) { 173 general.Infof("cgroup path %v not existed, ignore it", path) 174 continue 175 } 176 cgroupPaths = append(cgroupPaths, path) 177 } 178 for _, path := range m.baseConf.GeneralRelativeCgroupPaths { 179 cgroupPaths = append(cgroupPaths, path) 180 } 181 182 dedupCgroupPaths := general.DedupStringSlice(cgroupPaths) 183 return dedupCgroupPaths 184 } 185 186 func (m *MalachiteMetricsProvisioner) updateCgroupData() error { 187 cgroupPaths := m.getCgroupPaths() 188 errList := make([]error, 0) 189 for _, path := range cgroupPaths { 190 stats, err := m.malachiteClient.GetCgroupStats(path) 191 if err != nil { 192 errList = append(errList, err) 193 general.Errorf("GetCgroupStats %v err %v", path, err) 194 continue 195 } 196 m.processCgroupCPUData(path, stats) 197 m.processCgroupMemoryData(path, stats) 198 m.processCgroupBlkIOData(path, stats) 199 m.processCgroupNetData(path, stats) 200 m.processCgroupPerNumaMemoryData(path, stats) 201 } 202 203 return errors.NewAggregate(errList) 204 } 205 206 // Get raw cgroup data by malachite sdk and set container metrics to metricStore, GC not existed pod metrics 207 func (m *MalachiteMetricsProvisioner) updatePodsCgroupData(ctx context.Context) error { 208 podsContainersStats, err := m.malachiteClient.GetAllPodContainersStats(ctx) 209 if err != nil { 210 klog.Errorf("[malachite] GetAllPodsContainersStats failed, error %v", err) 211 _ = m.emitter.StoreInt64(metricsNameMalachiteGetPodStatusFailed, 1, metrics.MetricTypeNameCount) 212 } 213 214 podUIDSet := make(map[string]bool) 215 for podUID, containerStats := range podsContainersStats { 216 podUIDSet[podUID] = true 217 for containerName, cgStats := range containerStats { 218 m.processContainerCPUData(podUID, containerName, cgStats) 219 m.processContainerMemoryData(podUID, containerName, cgStats) 220 m.processContainerBlkIOData(podUID, containerName, cgStats) 221 m.processContainerNetData(podUID, containerName, cgStats) 222 m.processContainerPerfData(podUID, containerName, cgStats) 223 m.processContainerPerNumaMemoryData(podUID, containerName, cgStats) 224 } 225 } 226 m.metricStore.GCPodsMetric(podUIDSet) 227 return err 228 } 229 230 func (m *MalachiteMetricsProvisioner) processSystemComputeData(systemComputeData *malachitetypes.SystemComputeData) { 231 if systemComputeData == nil { 232 return 233 } 234 // todo, currently we only get a unified data for the whole system compute data 235 updateTime := time.Unix(systemComputeData.UpdateTime, 0) 236 237 load := systemComputeData.Load 238 m.metricStore.SetNodeMetric(consts.MetricLoad1MinSystem, 239 utilmetric.MetricData{Value: load.One, Time: &updateTime}) 240 m.metricStore.SetNodeMetric(consts.MetricLoad5MinSystem, 241 utilmetric.MetricData{Value: load.Five, Time: &updateTime}) 242 m.metricStore.SetNodeMetric(consts.MetricLoad15MinSystem, 243 utilmetric.MetricData{Value: load.Fifteen, Time: &updateTime}) 244 } 245 246 func (m *MalachiteMetricsProvisioner) processSystemMemoryData(systemMemoryData *malachitetypes.SystemMemoryData) { 247 if systemMemoryData == nil { 248 return 249 } 250 // todo, currently we only get a unified data for the whole system memory data 251 updateTime := time.Unix(systemMemoryData.UpdateTime, 0) 252 253 mem := systemMemoryData.System 254 255 // updating on previous status 256 // TODO delta func 257 prevMemKswapdStealMetric, _ := m.metricStore.GetNodeMetric(consts.MetricMemKswapdstealSystem) 258 m.metricStore.SetNodeMetric(consts.MetricMemKswapdstealDeltaSystem, 259 utilmetric.MetricData{Value: float64(mem.VmstatPgstealKswapd) - prevMemKswapdStealMetric.Value, Time: &updateTime}) 260 261 // updating current status 262 m.metricStore.SetNodeMetric(consts.MetricMemTotalSystem, 263 utilmetric.MetricData{Value: float64(mem.MemTotal << 10), Time: &updateTime}) 264 m.metricStore.SetNodeMetric(consts.MetricMemUsedSystem, 265 utilmetric.MetricData{Value: float64(mem.MemUsed << 10), Time: &updateTime}) 266 m.metricStore.SetNodeMetric(consts.MetricMemFreeSystem, 267 utilmetric.MetricData{Value: float64(mem.MemFree << 10), Time: &updateTime}) 268 m.metricStore.SetNodeMetric(consts.MetricMemShmemSystem, 269 utilmetric.MetricData{Value: float64(mem.MemShm << 10), Time: &updateTime}) 270 m.metricStore.SetNodeMetric(consts.MetricMemBufferSystem, 271 utilmetric.MetricData{Value: float64(mem.MemBuffers << 10), Time: &updateTime}) 272 m.metricStore.SetNodeMetric(consts.MetricMemPageCacheSystem, 273 utilmetric.MetricData{Value: float64(mem.MemPageCache << 10), Time: &updateTime}) 274 m.metricStore.SetNodeMetric(consts.MetricMemAvailableSystem, 275 utilmetric.MetricData{Value: float64(mem.MemAvailable << 10), Time: &updateTime}) 276 277 m.metricStore.SetNodeMetric(consts.MetricMemDirtySystem, 278 utilmetric.MetricData{Value: float64(mem.MemDirtyPageCache << 10), Time: &updateTime}) 279 m.metricStore.SetNodeMetric(consts.MetricMemWritebackSystem, 280 utilmetric.MetricData{Value: float64(mem.MemWriteBackPageCache << 10), Time: &updateTime}) 281 m.metricStore.SetNodeMetric(consts.MetricMemKswapdstealSystem, 282 utilmetric.MetricData{Value: float64(mem.VmstatPgstealKswapd), Time: &updateTime}) 283 284 m.metricStore.SetNodeMetric(consts.MetricMemSwapTotalSystem, 285 utilmetric.MetricData{Value: float64(mem.MemSwapTotal << 10), Time: &updateTime}) 286 m.metricStore.SetNodeMetric(consts.MetricMemSwapFreeSystem, 287 utilmetric.MetricData{Value: float64(mem.MemSwapFree << 10), Time: &updateTime}) 288 m.metricStore.SetNodeMetric(consts.MetricMemSlabReclaimableSystem, 289 utilmetric.MetricData{Value: float64(mem.MemSlabReclaimable << 10), Time: &updateTime}) 290 291 m.metricStore.SetNodeMetric(consts.MetricMemScaleFactorSystem, 292 utilmetric.MetricData{Value: float64(mem.VMWatermarkScaleFactor), Time: &updateTime}) 293 294 // timestamp 295 m.metricStore.SetNodeMetric(consts.MetricMemUpdateTimeSystem, 296 utilmetric.MetricData{Value: float64(systemMemoryData.UpdateTime), Time: &updateTime}) 297 } 298 299 func (m *MalachiteMetricsProvisioner) processSystemIOData(systemIOData *malachitetypes.SystemDiskIoData) { 300 if systemIOData == nil { 301 return 302 } 303 // todo, currently we only get a unified data for the whole system io data 304 updateTime := time.Unix(systemIOData.UpdateTime, 0) 305 306 // calculate rate of the metric, and tell the caller if it's a valid value. 307 ioStatFunc := func(deviceName, metricName string, value float64) (float64, bool) { 308 prevData, err := m.metricStore.GetDeviceMetric(deviceName, metricName) 309 if err != nil || prevData.Time == nil { 310 return 0, false 311 } 312 313 timestampDeltaInMill := updateTime.UnixMilli() - prevData.Time.UnixMilli() 314 if timestampDeltaInMill == 0 { 315 return prevData.Value, false 316 } 317 318 return (value - prevData.Value) / float64(timestampDeltaInMill), true 319 } 320 321 setStatMetricIfValid := func(deviceName, rawMetricName, metricName string, value, scale float64) { 322 ioStatData, isValid := ioStatFunc(deviceName, rawMetricName, value) 323 if !isValid { 324 return 325 } 326 m.metricStore.SetDeviceMetric(deviceName, metricName, 327 utilmetric.MetricData{ 328 Value: ioStatData * scale, 329 Time: &updateTime, 330 }) 331 } 332 333 for _, device := range systemIOData.DiskIo { 334 setStatMetricIfValid(device.DeviceName, consts.MetricIOReadSystem, consts.MetricIOReadOpsSystem, float64(device.IoRead), 1000.0) 335 setStatMetricIfValid(device.DeviceName, consts.MetricIOWriteSystem, consts.MetricIOWriteOpsSystem, float64(device.IoWrite), 1000.0) 336 setStatMetricIfValid(device.DeviceName, consts.MetricIOBusySystem, consts.MetricIOBusyRateSystem, float64(device.IoBusy), 1.0) 337 338 m.metricStore.SetDeviceMetric(device.DeviceName, consts.MetricIOReadSystem, 339 utilmetric.MetricData{Value: float64(device.IoRead), Time: &updateTime}) 340 m.metricStore.SetDeviceMetric(device.DeviceName, consts.MetricIOWriteSystem, 341 utilmetric.MetricData{Value: float64(device.IoWrite), Time: &updateTime}) 342 m.metricStore.SetDeviceMetric(device.DeviceName, consts.MetricIOBusySystem, 343 utilmetric.MetricData{Value: float64(device.IoBusy), Time: &updateTime}) 344 345 diskType := consts.DiskTypeUnknown 346 if device.DiskType == "HDD" { 347 diskType = consts.DiskTypeHDD 348 } else if device.DiskType == "SSD" { 349 diskType = consts.DiskTypeSSD 350 } else if device.DiskType == "NVME" { 351 diskType = consts.DiskTypeNVME 352 } 353 m.metricStore.SetDeviceMetric(device.DeviceName, consts.MetricIODiskType, 354 utilmetric.MetricData{Value: float64(diskType), Time: &updateTime}) 355 m.metricStore.SetDeviceMetric(device.DeviceName, consts.MetricIODiskWBTValue, 356 utilmetric.MetricData{Value: float64(device.WBTValue), Time: &updateTime}) 357 } 358 } 359 360 func (m *MalachiteMetricsProvisioner) processSystemNetData(systemNetData *malachitetypes.SystemNetworkData) { 361 if systemNetData == nil { 362 return 363 } 364 // todo, currently we only get a unified data for the whole system io data 365 updateTime := time.Unix(systemNetData.UpdateTime, 0) 366 367 m.metricStore.SetNodeMetric(consts.MetricNetTcpDelayedAcks, 368 utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPDelayAcks), Time: &updateTime}) 369 m.metricStore.SetNodeMetric(consts.MetricNetTcpOverflows, 370 utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPListenOverflows), Time: &updateTime}) 371 m.metricStore.SetNodeMetric(consts.MetricNetTcpDrops, 372 utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPListenDrops), Time: &updateTime}) 373 m.metricStore.SetNodeMetric(consts.MetricNetTcpAbort, 374 utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPAbortOnMemory), Time: &updateTime}) 375 m.metricStore.SetNodeMetric(consts.MetricNetTcpDrop, 376 utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPReqQFullDrop), Time: &updateTime}) 377 m.metricStore.SetNodeMetric(consts.MetricNetTcpRetran, 378 utilmetric.MetricData{Value: systemNetData.TCP.TCPRetran, Time: &updateTime}) 379 m.metricStore.SetNodeMetric(consts.MetricNetTcpRetranSegs, 380 utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPRetransSegs), Time: &updateTime}) 381 m.metricStore.SetNodeMetric(consts.MetricNetTcpRecvPackets, 382 utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPOutSegs), Time: &updateTime}) 383 m.metricStore.SetNodeMetric(consts.MetricNetTcpCloseWait, 384 utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPCloseWait), Time: &updateTime}) 385 386 for _, device := range systemNetData.NetworkCard { 387 // for now, we will only consider standard network interface 388 // todo, may need to use configurations in the future to filter 389 if !strings.HasPrefix(device.Name, "eth") { 390 continue 391 } 392 393 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceiveBytes, 394 utilmetric.MetricData{Value: float64(device.ReceiveBytes), Time: &updateTime}) 395 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceivePackets, 396 utilmetric.MetricData{Value: float64(device.ReceivePackets), Time: &updateTime}) 397 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceiveErrs, 398 utilmetric.MetricData{Value: float64(device.ReceiveErrs), Time: &updateTime}) 399 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceiveDrops, 400 utilmetric.MetricData{Value: float64(device.ReceiveDrop), Time: &updateTime}) 401 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceiveFIFO, 402 utilmetric.MetricData{Value: float64(device.ReceiveFifo), Time: &updateTime}) 403 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceiveFrame, 404 utilmetric.MetricData{Value: float64(device.ReceiveFrame), Time: &updateTime}) 405 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceiveCompressed, 406 utilmetric.MetricData{Value: float64(device.ReceiveCompressed), Time: &updateTime}) 407 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitMulticast, 408 utilmetric.MetricData{Value: float64(device.ReceiveMulticast), Time: &updateTime}) 409 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitBytes, 410 utilmetric.MetricData{Value: float64(device.TransmitBytes), Time: &updateTime}) 411 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitPackets, 412 utilmetric.MetricData{Value: float64(device.TransmitPackets), Time: &updateTime}) 413 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitErrs, 414 utilmetric.MetricData{Value: float64(device.TransmitErrs), Time: &updateTime}) 415 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitDrops, 416 utilmetric.MetricData{Value: float64(device.TransmitDrop), Time: &updateTime}) 417 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitFIFO, 418 utilmetric.MetricData{Value: float64(device.TransmitFifo), Time: &updateTime}) 419 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitColls, 420 utilmetric.MetricData{Value: float64(device.TransmitColls), Time: &updateTime}) 421 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitCarrier, 422 utilmetric.MetricData{Value: float64(device.TransmitCarrier), Time: &updateTime}) 423 m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitCompressed, 424 utilmetric.MetricData{Value: float64(device.TransmitCompressed), Time: &updateTime}) 425 426 } 427 } 428 429 func (m *MalachiteMetricsProvisioner) processSystemNumaData(systemMemoryData *malachitetypes.SystemMemoryData) { 430 // todo, currently we only get a unified data for the whole system memory data 431 updateTime := time.Unix(systemMemoryData.UpdateTime, 0) 432 433 for _, numa := range systemMemoryData.Numa { 434 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemTotalNuma, 435 utilmetric.MetricData{Value: float64(numa.MemTotal << 10), Time: &updateTime}) 436 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemUsedNuma, 437 utilmetric.MetricData{Value: float64(numa.MemUsed << 10), Time: &updateTime}) 438 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemFreeNuma, 439 utilmetric.MetricData{Value: float64(numa.MemFree << 10), Time: &updateTime}) 440 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemShmemNuma, 441 utilmetric.MetricData{Value: float64(numa.MemShmem << 10), Time: &updateTime}) 442 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemAvailableNuma, 443 utilmetric.MetricData{Value: float64(numa.MemAvailable << 10), Time: &updateTime}) 444 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemFilepageNuma, 445 utilmetric.MetricData{Value: float64(numa.MemFilePages << 10), Time: &updateTime}) 446 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemInactiveFileNuma, 447 utilmetric.MetricData{Value: float64(numa.MemInactiveFile << 10), Time: &updateTime}) 448 449 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemBandwidthNuma, 450 utilmetric.MetricData{Value: numa.MemReadBandwidthMB/1024.0 + numa.MemWriteBandwidthMB/1024.0, Time: &updateTime}) 451 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemBandwidthMaxNuma, 452 utilmetric.MetricData{Value: numa.MemTheoryMaxBandwidthMB * 0.8 / 1024.0, Time: &updateTime}) 453 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemBandwidthTheoryNuma, 454 utilmetric.MetricData{Value: numa.MemTheoryMaxBandwidthMB / 1024.0, Time: &updateTime}) 455 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemBandwidthReadNuma, 456 utilmetric.MetricData{Value: numa.MemReadBandwidthMB / 1024.0, Time: &updateTime}) 457 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemBandwidthWriteNuma, 458 utilmetric.MetricData{Value: numa.MemWriteBandwidthMB / 1024.0, Time: &updateTime}) 459 460 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemLatencyReadNuma, 461 utilmetric.MetricData{Value: numa.MemReadLatency, Time: &updateTime}) 462 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemLatencyWriteNuma, 463 utilmetric.MetricData{Value: numa.MemWriteLatency, Time: &updateTime}) 464 m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemAMDL3MissLatencyNuma, 465 utilmetric.MetricData{Value: numa.AMDL3MissLatencyMax, Time: &updateTime}) 466 } 467 } 468 469 func (m *MalachiteMetricsProvisioner) processSystemCPUComputeData(systemComputeData *malachitetypes.SystemComputeData) { 470 // todo, currently we only get a unified data for the whole system compute data 471 updateTime := time.Unix(systemComputeData.UpdateTime, 0) 472 473 for _, cpu := range systemComputeData.CPU { 474 cpuID, err := strconv.Atoi(cpu.Name[3:]) 475 if err != nil { 476 klog.Errorf("[malachite] parse cpu name %v with err: %v", cpu.Name, err) 477 continue 478 } 479 480 // todo it's kind of confusing but the `cpu-usage` in `system-level` actually represents `ratio`, 481 // we will always rename metric in local store to replenish `ratio` to avoid ambiguity. 482 m.metricStore.SetCPUMetric(cpuID, consts.MetricCPUUsageRatio, 483 utilmetric.MetricData{Value: cpu.CPUUsage / 100.0, Time: &updateTime}) 484 m.metricStore.SetCPUMetric(cpuID, consts.MetricCPUSchedwait, 485 utilmetric.MetricData{Value: cpu.CPUSchedWait * 1000, Time: &updateTime}) 486 m.metricStore.SetCPUMetric(cpuID, consts.MetricCPUIOWaitRatio, 487 utilmetric.MetricData{Value: cpu.CPUIowaitRatio, Time: &updateTime}) 488 } 489 m.metricStore.SetNodeMetric(consts.MetricCPUUsageRatio, 490 utilmetric.MetricData{Value: systemComputeData.GlobalCPU.CPUUsage / 100.0, Time: &updateTime}) 491 } 492 493 func (m *MalachiteMetricsProvisioner) processCgroupCPUData(cgroupPath string, cgStats *malachitetypes.MalachiteCgroupInfo) { 494 if cgStats == nil { 495 return 496 } 497 498 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 499 cpu := cgStats.V1.Cpu 500 updateTime := time.Unix(cgStats.V1.Cpu.UpdateTime, 0) 501 502 // todo it's kind of confusing but the `cpu-usage-ratio` in `cgroup-level` actually represents `actual cores`, 503 // we will always rename metric in local store to eliminate `ratio` to avoid ambiguity. 504 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPULimitCgroup, utilmetric.MetricData{Value: float64(cpu.CfsQuotaUs) / float64(cpu.CfsPeriodUs), Time: &updateTime}) 505 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUUsageCgroup, utilmetric.MetricData{Value: cpu.CPUUsageRatio, Time: &updateTime}) 506 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUUsageUserCgroup, utilmetric.MetricData{Value: cpu.CPUUserUsageRatio, Time: &updateTime}) 507 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUUsageSysCgroup, utilmetric.MetricData{Value: cpu.CPUSysUsageRatio, Time: &updateTime}) 508 509 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUShareCgroup, utilmetric.MetricData{Value: float64(cpu.CPUShares), Time: &updateTime}) 510 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUQuotaCgroup, utilmetric.MetricData{Value: float64(cpu.CfsQuotaUs), Time: &updateTime}) 511 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUPeriodCgroup, utilmetric.MetricData{Value: float64(cpu.CfsPeriodUs), Time: &updateTime}) 512 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUNrThrottledCgroup, utilmetric.MetricData{Value: float64(cpu.CPUNrThrottled), Time: &updateTime}) 513 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUThrottledPeriodCgroup, utilmetric.MetricData{Value: float64(cpu.CPUNrPeriods), Time: &updateTime}) 514 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUThrottledTimeCgroup, utilmetric.MetricData{Value: float64(cpu.CPUThrottledTime / 1000), Time: &updateTime}) 515 516 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUNrRunnableCgroup, utilmetric.MetricData{Value: float64(cpu.TaskNrRunning), Time: &updateTime}) 517 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUNrUninterruptibleCgroup, utilmetric.MetricData{Value: float64(cpu.TaskNrUninterruptible), Time: &updateTime}) 518 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUNrIOWaitCgroup, utilmetric.MetricData{Value: float64(cpu.TaskNrIoWait), Time: &updateTime}) 519 520 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricLoad1MinCgroup, utilmetric.MetricData{Value: cpu.Load.One, Time: &updateTime}) 521 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricLoad5MinCgroup, utilmetric.MetricData{Value: cpu.Load.Five, Time: &updateTime}) 522 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricLoad15MinCgroup, utilmetric.MetricData{Value: cpu.Load.Fifteen, Time: &updateTime}) 523 524 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 525 cpu := cgStats.V2.Cpu 526 updateTime := time.Unix(cgStats.V2.Cpu.UpdateTime, 0) 527 528 // todo it's kind of confusing but the `cpu-usage-ratio` in `cgroup-level` actually represents `actual cores`, 529 // we will always rename metric in local store to eliminate `ratio` to avoid ambiguity. 530 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUUsageCgroup, utilmetric.MetricData{Value: cpu.CPUUsageRatio, Time: &updateTime}) 531 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUUsageUserCgroup, utilmetric.MetricData{Value: cpu.CPUUserUsageRatio, Time: &updateTime}) 532 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUUsageSysCgroup, utilmetric.MetricData{Value: cpu.CPUSysUsageRatio, Time: &updateTime}) 533 534 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUNrRunnableCgroup, utilmetric.MetricData{Value: float64(cpu.TaskNrRunning), Time: &updateTime}) 535 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUNrUninterruptibleCgroup, utilmetric.MetricData{Value: float64(cpu.TaskNrUninterruptible), Time: &updateTime}) 536 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricCPUNrIOWaitCgroup, utilmetric.MetricData{Value: float64(cpu.TaskNrIoWait), Time: &updateTime}) 537 538 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricLoad1MinCgroup, utilmetric.MetricData{Value: cpu.Load.One, Time: &updateTime}) 539 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricLoad5MinCgroup, utilmetric.MetricData{Value: cpu.Load.Five, Time: &updateTime}) 540 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricLoad15MinCgroup, utilmetric.MetricData{Value: cpu.Load.Fifteen, Time: &updateTime}) 541 } 542 } 543 544 func (m *MalachiteMetricsProvisioner) processCgroupMemoryData(cgroupPath string, cgStats *malachitetypes.MalachiteCgroupInfo) { 545 if cgStats == nil { 546 return 547 } 548 549 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 550 mem := cgStats.V1.Memory 551 updateTime := time.Unix(cgStats.V1.Memory.UpdateTime, 0) 552 553 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemLimitCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.MemoryLimitInBytes)}) 554 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemUsageCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.MemoryUsageInBytes)}) 555 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemUsageUserCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.MemoryLimitInBytes - mem.KernMemoryUsageInBytes)}) 556 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemUsageSysCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.KernMemoryUsageInBytes)}) 557 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemRssCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.TotalRss)}) 558 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemCacheCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.TotalCache)}) 559 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemShmemCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.TotalShmem)}) 560 561 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemDirtyCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.TotalDirty)}) 562 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemWritebackCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.TotalWriteback)}) 563 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemPgfaultCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.TotalPgfault)}) 564 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemPgmajfaultCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.TotalPgmajfault)}) 565 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemAllocstallCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.TotalAllocstall)}) 566 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemKswapdstealCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.KswapdSteal)}) 567 568 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemOomCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(mem.BpfMemStat.OomCnt)}) 569 // m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemScaleFactorCgroup, utilmetric.MetricData{Time: &updateTime, Value: general.UIntPointerToFloat64(mem.WatermarkScaleFactor)}) 570 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 571 mem := cgStats.V2.Memory 572 updateTime := time.Unix(cgStats.V2.Memory.UpdateTime, 0) 573 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemUsageCgroup, utilmetric.MetricData{Value: float64(mem.MemoryUsageInBytes), Time: &updateTime}) 574 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemRssCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.Anon), Time: &updateTime}) 575 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemCacheCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.File), Time: &updateTime}) 576 577 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemShmemCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.Shmem), Time: &updateTime}) 578 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemPgfaultCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.Pgfault), Time: &updateTime}) 579 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemPgmajfaultCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.Pgmajfault), Time: &updateTime}) 580 581 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemOomCgroup, utilmetric.MetricData{Value: float64(mem.BpfMemStat.OomCnt), Time: &updateTime}) 582 // m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemScaleFactorCgroup, utilmetric.MetricData{Value: general.UInt64PointerToFloat64(mem.WatermarkScaleFactor), Time: &updateTime}) 583 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemPgstealCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.Pgsteal), Time: &updateTime}) 584 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemPgscanCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.Pgscan), Time: &updateTime}) 585 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemWorkingsetRefaultCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.WorkingsetRefault), Time: &updateTime}) 586 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemWorkingsetActivateCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.WorkingsetActivate), Time: &updateTime}) 587 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemPsiAvg60Cgroup, utilmetric.MetricData{Value: float64(mem.BpfMemStat.MemReclaimSettingSum), Time: &updateTime}) 588 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemInactiveAnonCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.InactiveAnon), Time: &updateTime}) 589 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricMemInactiveFileCgroup, utilmetric.MetricData{Value: float64(mem.MemStats.InactiveFile), Time: &updateTime}) 590 } 591 } 592 593 func (m *MalachiteMetricsProvisioner) processCgroupBlkIOData(cgroupPath string, cgStats *malachitetypes.MalachiteCgroupInfo) { 594 if cgStats == nil { 595 return 596 } 597 598 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 599 updateTime := time.Unix(cgStats.V1.Blkio.UpdateTime, 0) 600 601 io := cgStats.V1.Blkio 602 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricBlkioReadIopsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(io.BpfFsData.FsRead - io.OldBpfFsData.FsRead)}) 603 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricBlkioWriteIopsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(io.BpfFsData.FsWrite - io.OldBpfFsData.FsWrite)}) 604 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricBlkioReadBpsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(io.BpfFsData.FsReadBytes - io.OldBpfFsData.FsReadBytes)}) 605 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricBlkioWriteBpsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(io.BpfFsData.FsWriteBytes - io.OldBpfFsData.FsWriteBytes)}) 606 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 607 io := cgStats.V2.Blkio 608 updateTime := time.Unix(cgStats.V2.Blkio.UpdateTime, 0) 609 610 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricBlkioReadIopsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(io.BpfFsData.FsRead - io.OldBpfFsData.FsRead)}) 611 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricBlkioWriteIopsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(io.BpfFsData.FsWrite - io.OldBpfFsData.FsWrite)}) 612 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricBlkioReadBpsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(io.BpfFsData.FsReadBytes - io.OldBpfFsData.FsReadBytes)}) 613 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricBlkioWriteBpsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(io.BpfFsData.FsWriteBytes - io.OldBpfFsData.FsWriteBytes)}) 614 } 615 } 616 617 func (m *MalachiteMetricsProvisioner) processCgroupNetData(cgroupPath string, cgStats *malachitetypes.MalachiteCgroupInfo) { 618 if cgStats == nil { 619 return 620 } 621 updateTime := time.Now() 622 623 var net *malachitetypes.NetClsCgData 624 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 625 net = cgStats.V1.NetCls 626 updateTime = time.Unix(cgStats.V1.NetCls.UpdateTime, 0) 627 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 628 net = cgStats.V2.NetCls 629 updateTime = time.Unix(cgStats.V2.NetCls.UpdateTime, 0) 630 } 631 if net == nil { 632 return 633 } 634 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricNetTcpSendByteCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(net.BpfNetData.NetTCPTxBytes - net.OldBpfNetData.NetTCPTxBytes)}) 635 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricNetTcpSendPpsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(net.BpfNetData.NetTCPTx - net.OldBpfNetData.NetTCPTx)}) 636 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricNetTcpRecvByteCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(net.BpfNetData.NetTCPRxBytes - net.OldBpfNetData.NetTCPRxBytes)}) 637 m.metricStore.SetCgroupMetric(cgroupPath, consts.MetricNetTcpRecvPpsCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(net.BpfNetData.NetTCPRx - net.OldBpfNetData.NetTCPRx)}) 638 } 639 640 func (m *MalachiteMetricsProvisioner) processCgroupPerNumaMemoryData(cgroupPath string, cgStats *malachitetypes.MalachiteCgroupInfo) { 641 if cgStats == nil { 642 return 643 } 644 645 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 646 numaStats := cgStats.V1.Memory.NumaStats 647 updateTime := time.Unix(cgStats.V1.Memory.UpdateTime, 0) 648 649 for _, data := range numaStats { 650 numaIDStr := strings.TrimPrefix(data.NumaName, "N") 651 numaID, err := strconv.Atoi(numaIDStr) 652 if err != nil { 653 klog.ErrorS(err, "failed to parse numa", "str", numaIDStr) 654 continue 655 } 656 m.metricStore.SetCgroupNumaMetric(cgroupPath, numaID, consts.MetricsMemTotalPerNumaCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(data.HierarchicalTotal << pageShift)}) 657 m.metricStore.SetCgroupNumaMetric(cgroupPath, numaID, consts.MetricsMemFilePerNumaCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(data.HierarchicalFile << pageShift)}) 658 m.metricStore.SetCgroupNumaMetric(cgroupPath, numaID, consts.MetricsMemAnonPerNumaCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(data.HierarchicalAnon << pageShift)}) 659 } 660 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 661 numaStats := cgStats.V2.Memory.MemNumaStats 662 updateTime := time.Unix(cgStats.V2.Memory.UpdateTime, 0) 663 664 for numa, data := range numaStats { 665 numaIDStr := strings.TrimPrefix(numa, "N") 666 numaID, err := strconv.Atoi(numaIDStr) 667 if err != nil { 668 klog.ErrorS(err, "failed to parse numaIDStr", "str", numaIDStr) 669 continue 670 } 671 total := data.Anon + data.File + data.Unevictable 672 m.metricStore.SetCgroupNumaMetric(cgroupPath, numaID, consts.MetricsMemTotalPerNumaCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(total)}) 673 m.metricStore.SetCgroupNumaMetric(cgroupPath, numaID, consts.MetricsMemFilePerNumaCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(data.File)}) 674 m.metricStore.SetCgroupNumaMetric(cgroupPath, numaID, consts.MetricsMemAnonPerNumaCgroup, utilmetric.MetricData{Time: &updateTime, Value: float64(data.Anon)}) 675 } 676 } 677 } 678 679 func (m *MalachiteMetricsProvisioner) processContainerCPUData(podUID, containerName string, cgStats *malachitetypes.MalachiteCgroupInfo) { 680 if cgStats == nil { 681 return 682 } 683 684 var ( 685 metricLastUpdateTime, _ = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricCPUUpdateTimeContainer) 686 cyclesOld, _ = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricCPUCyclesContainer) 687 instructionsOld, _ = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricCPUInstructionsContainer) 688 ) 689 690 m.processContainerMemBandwidth(podUID, containerName, cgStats, metricLastUpdateTime.Value) 691 m.processContainerCPURelevantRate(podUID, containerName, cgStats, metricLastUpdateTime.Value) 692 693 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 694 cpu := cgStats.V1.Cpu 695 updateTime := time.Unix(cgStats.V1.Cpu.UpdateTime, 0) 696 697 // todo it's kind of confusing but the `cpu-usage-ratio` in `cgroup-level` actually represents `actual cores`, 698 // we will always rename metric in local store to eliminate `ratio` to avoid ambiguity. 699 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPULimitContainer, 700 utilmetric.MetricData{Value: float64(cpu.CfsQuotaUs) / float64(cpu.CfsPeriodUs), Time: &updateTime}) 701 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUUsageContainer, 702 utilmetric.MetricData{Value: cpu.CPUUsageRatio, Time: &updateTime}) 703 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUUsageUserContainer, 704 utilmetric.MetricData{Value: cpu.CPUUserUsageRatio, Time: &updateTime}) 705 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUUsageSysContainer, 706 utilmetric.MetricData{Value: cpu.CPUSysUsageRatio, Time: &updateTime}) 707 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUShareContainer, 708 utilmetric.MetricData{Value: float64(cpu.CPUShares), Time: &updateTime}) 709 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUQuotaContainer, 710 utilmetric.MetricData{Value: float64(cpu.CfsQuotaUs), Time: &updateTime}) 711 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUPeriodContainer, 712 utilmetric.MetricData{Value: float64(cpu.CfsPeriodUs), Time: &updateTime}) 713 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrThrottledContainer, 714 utilmetric.MetricData{Value: float64(cpu.CPUNrThrottled), Time: &updateTime}) 715 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrPeriodContainer, 716 utilmetric.MetricData{Value: float64(cpu.CPUNrPeriods), Time: &updateTime}) 717 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUThrottledTimeContainer, 718 utilmetric.MetricData{Value: float64(cpu.CPUThrottledTime / 1000), Time: &updateTime}) 719 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrRunnableContainer, 720 utilmetric.MetricData{Value: float64(cpu.TaskNrRunning), Time: &updateTime}) 721 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrUninterruptibleContainer, 722 utilmetric.MetricData{Value: float64(cpu.TaskNrUninterruptible), Time: &updateTime}) 723 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrIOWaitContainer, 724 utilmetric.MetricData{Value: float64(cpu.TaskNrIoWait), Time: &updateTime}) 725 726 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricLoad1MinContainer, 727 utilmetric.MetricData{Value: cpu.Load.One, Time: &updateTime}) 728 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricLoad5MinContainer, 729 utilmetric.MetricData{Value: cpu.Load.Five, Time: &updateTime}) 730 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricLoad15MinContainer, 731 utilmetric.MetricData{Value: cpu.Load.Fifteen, Time: &updateTime}) 732 733 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricOCRReadDRAMsContainer, 734 utilmetric.MetricData{Value: float64(cpu.OcrReadDrams), Time: &updateTime}) 735 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricIMCWriteContainer, 736 utilmetric.MetricData{Value: float64(cpu.ImcWrites), Time: &updateTime}) 737 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricStoreAllInsContainer, 738 utilmetric.MetricData{Value: float64(cpu.StoreAllIns), Time: &updateTime}) 739 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricStoreInsContainer, 740 utilmetric.MetricData{Value: float64(cpu.StoreIns), Time: &updateTime}) 741 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUUpdateTimeContainer, 742 utilmetric.MetricData{Value: float64(cpu.UpdateTime), Time: &updateTime}) 743 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUCyclesContainer, 744 utilmetric.MetricData{Value: float64(cpu.Cycles), Time: &updateTime}) 745 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUInstructionsContainer, 746 utilmetric.MetricData{Value: float64(cpu.Instructions), Time: &updateTime}) 747 // L3Misses is similar to OcrReadDrams 748 if cpu.L3Misses > 0 { 749 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUL3CacheMissContainer, 750 utilmetric.MetricData{Value: float64(cpu.L3Misses), Time: &updateTime}) 751 } else if cpu.OcrReadDrams > 0 { 752 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUL3CacheMissContainer, 753 utilmetric.MetricData{Value: float64(cpu.OcrReadDrams), Time: &updateTime}) 754 } 755 756 if cyclesOld.Value > 0 && instructionsOld.Value > 0 { 757 instructionDiff := float64(cpu.Instructions) - instructionsOld.Value 758 if instructionDiff > 0 { 759 cpi := (float64(cpu.Cycles) - cyclesOld.Value) / instructionDiff 760 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUCPIContainer, 761 utilmetric.MetricData{Value: cpi, Time: &updateTime}) 762 } 763 } 764 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 765 cpu := cgStats.V2.Cpu 766 updateTime := time.Unix(cgStats.V2.Cpu.UpdateTime, 0) 767 768 // todo it's kind of confusing but the `cpu-usage-ratio` in `cgroup-level` actually represents `actual cores`, 769 // we will always rename metric in local store to eliminate `ratio` to avoid ambiguity. 770 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUUsageContainer, 771 utilmetric.MetricData{Value: cpu.CPUUsageRatio, Time: &updateTime}) 772 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUUsageUserContainer, 773 utilmetric.MetricData{Value: cpu.CPUUserUsageRatio, Time: &updateTime}) 774 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUUsageSysContainer, 775 utilmetric.MetricData{Value: cpu.CPUSysUsageRatio, Time: &updateTime}) 776 777 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrRunnableContainer, 778 utilmetric.MetricData{Value: float64(cpu.TaskNrRunning), Time: &updateTime}) 779 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrUninterruptibleContainer, 780 utilmetric.MetricData{Value: float64(cpu.TaskNrUninterruptible), Time: &updateTime}) 781 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrIOWaitContainer, 782 utilmetric.MetricData{Value: float64(cpu.TaskNrIoWait), Time: &updateTime}) 783 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUThrottledTimeContainer, 784 utilmetric.MetricData{Value: float64(cpu.CPUStats.ThrottledUsec), Time: &updateTime}) 785 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrThrottledContainer, 786 utilmetric.MetricData{Value: float64(cpu.CPUStats.NrThrottled), Time: &updateTime}) 787 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUNrPeriodContainer, 788 utilmetric.MetricData{Value: float64(cpu.CPUStats.NrPeriods), Time: &updateTime}) 789 790 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricLoad1MinContainer, 791 utilmetric.MetricData{Value: cpu.Load.One, Time: &updateTime}) 792 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricLoad5MinContainer, 793 utilmetric.MetricData{Value: cpu.Load.Five, Time: &updateTime}) 794 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricLoad15MinContainer, 795 utilmetric.MetricData{Value: cpu.Load.Fifteen, Time: &updateTime}) 796 797 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricOCRReadDRAMsContainer, 798 utilmetric.MetricData{Value: float64(cpu.OcrReadDrams), Time: &updateTime}) 799 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricIMCWriteContainer, 800 utilmetric.MetricData{Value: float64(cpu.ImcWrites), Time: &updateTime}) 801 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricStoreAllInsContainer, 802 utilmetric.MetricData{Value: float64(cpu.StoreAllIns), Time: &updateTime}) 803 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricStoreInsContainer, 804 utilmetric.MetricData{Value: float64(cpu.StoreIns), Time: &updateTime}) 805 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUUpdateTimeContainer, 806 utilmetric.MetricData{Value: float64(cpu.UpdateTime), Time: &updateTime}) 807 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUCyclesContainer, 808 utilmetric.MetricData{Value: float64(cpu.Cycles), Time: &updateTime}) 809 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUInstructionsContainer, 810 utilmetric.MetricData{Value: float64(cpu.Instructions), Time: &updateTime}) 811 // L3Misses is similar to OcrReadDrams 812 if cpu.L3Misses > 0 { 813 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUL3CacheMissContainer, 814 utilmetric.MetricData{Value: float64(cpu.L3Misses), Time: &updateTime}) 815 } else if cpu.OcrReadDrams > 0 { 816 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUL3CacheMissContainer, 817 utilmetric.MetricData{Value: float64(cpu.OcrReadDrams), Time: &updateTime}) 818 } 819 if cyclesOld.Value > 0 && instructionsOld.Value > 0 { 820 instructionDiff := float64(cpu.Instructions) - instructionsOld.Value 821 if instructionDiff > 0 { 822 cpi := (float64(cpu.Cycles) - cyclesOld.Value) / instructionDiff 823 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricCPUCPIContainer, 824 utilmetric.MetricData{Value: cpi, Time: &updateTime}) 825 } 826 } 827 } 828 } 829 830 func (m *MalachiteMetricsProvisioner) processContainerMemoryData(podUID, containerName string, cgStats *malachitetypes.MalachiteCgroupInfo) { 831 if cgStats == nil { 832 return 833 } 834 835 lastUpdateTimeMetric, _ := m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricMemUpdateTimeContainer) 836 837 m.processContainerMemRelevantRate(podUID, containerName, cgStats, lastUpdateTimeMetric.Value) 838 839 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 840 mem := cgStats.V1.Memory 841 updateTime := time.Unix(cgStats.V1.Memory.UpdateTime, 0) 842 843 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemLimitContainer, 844 utilmetric.MetricData{Value: float64(mem.MemoryLimitInBytes), Time: &updateTime}) 845 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemTCPLimitContainer, 846 utilmetric.MetricData{Value: float64(mem.KernMemoryTcpLimitInBytes), Time: &updateTime}) 847 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemUsageContainer, 848 utilmetric.MetricData{Value: float64(mem.MemoryUsageInBytes), Time: &updateTime}) 849 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemUsageUserContainer, 850 utilmetric.MetricData{Value: float64(mem.MemoryLimitInBytes - mem.KernMemoryUsageInBytes), Time: &updateTime}) 851 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemUsageKernContainer, 852 utilmetric.MetricData{Value: float64(mem.KernMemoryUsageInBytes), Time: &updateTime}) 853 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemRssContainer, 854 utilmetric.MetricData{Value: float64(mem.TotalRss), Time: &updateTime}) 855 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemCacheContainer, 856 utilmetric.MetricData{Value: float64(mem.TotalCache), Time: &updateTime}) 857 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemShmemContainer, 858 utilmetric.MetricData{Value: float64(mem.TotalShmem), Time: &updateTime}) 859 860 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemDirtyContainer, 861 utilmetric.MetricData{Value: float64(mem.Dirty), Time: &updateTime}) 862 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemWritebackContainer, 863 utilmetric.MetricData{Value: float64(mem.Writeback), Time: &updateTime}) 864 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemPgfaultContainer, 865 utilmetric.MetricData{Value: float64(mem.Pgfault), Time: &updateTime}) 866 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemPgmajfaultContainer, 867 utilmetric.MetricData{Value: float64(mem.Pgmajfault), Time: &updateTime}) 868 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemAllocstallContainer, 869 utilmetric.MetricData{Value: float64(mem.TotalAllocstall), Time: &updateTime}) 870 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemKswapdstealContainer, 871 utilmetric.MetricData{Value: float64(mem.KswapdSteal), Time: &updateTime}) 872 873 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemOomContainer, 874 utilmetric.MetricData{Value: float64(mem.BpfMemStat.OomCnt), Time: &updateTime}) 875 // m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemScaleFactorContainer, 876 // utilmetric.MetricData{Value: general.UIntPointerToFloat64(mem.WatermarkScaleFactor), Time: &updateTime}) 877 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemUpdateTimeContainer, 878 utilmetric.MetricData{Value: float64(mem.UpdateTime), Time: &updateTime}) 879 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 880 mem := cgStats.V2.Memory 881 updateTime := time.Unix(cgStats.V2.Memory.UpdateTime, 0) 882 883 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemUsageContainer, 884 utilmetric.MetricData{Value: float64(mem.MemoryUsageInBytes), Time: &updateTime}) 885 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemUsageKernContainer, 886 utilmetric.MetricData{Value: float64(mem.MemStats.Kernel), Time: &updateTime}) 887 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemRssContainer, 888 utilmetric.MetricData{Value: float64(mem.MemStats.Anon), Time: &updateTime}) 889 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemCacheContainer, 890 utilmetric.MetricData{Value: float64(mem.MemStats.File), Time: &updateTime}) 891 892 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemShmemContainer, 893 utilmetric.MetricData{Value: float64(mem.MemStats.Shmem), Time: &updateTime}) 894 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemPgfaultContainer, 895 utilmetric.MetricData{Value: float64(mem.MemStats.Pgfault), Time: &updateTime}) 896 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemPgmajfaultContainer, 897 utilmetric.MetricData{Value: float64(mem.MemStats.Pgmajfault), Time: &updateTime}) 898 899 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemDirtyContainer, 900 utilmetric.MetricData{Value: float64(mem.MemStats.FileDirty), Time: &updateTime}) 901 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemOomContainer, 902 utilmetric.MetricData{Value: float64(mem.BpfMemStat.OomCnt), Time: &updateTime}) 903 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemWritebackContainer, 904 utilmetric.MetricData{Value: float64(mem.MemStats.FileWriteback), Time: &updateTime}) 905 // m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemScaleFactorContainer, 906 // utilmetric.MetricData{Value: general.UInt64PointerToFloat64(mem.WatermarkScaleFactor), Time: &updateTime}) 907 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemUpdateTimeContainer, 908 utilmetric.MetricData{Value: float64(mem.UpdateTime), Time: &updateTime}) 909 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemPgstealContainer, 910 utilmetric.MetricData{Value: float64(mem.MemStats.Pgsteal), Time: &updateTime}) 911 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemPgscanContainer, 912 utilmetric.MetricData{Value: float64(mem.MemStats.Pgscan), Time: &updateTime}) 913 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemWorkingsetRefaultContainer, 914 utilmetric.MetricData{Value: float64(mem.MemStats.WorkingsetRefault), Time: &updateTime}) 915 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemWorkingsetActivateContainer, 916 utilmetric.MetricData{Value: float64(mem.MemStats.WorkingsetActivate), Time: &updateTime}) 917 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemPsiAvg60Container, 918 utilmetric.MetricData{Value: mem.MemPressure.Some.Avg60, Time: &updateTime}) 919 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemInactiveAnonContainer, 920 utilmetric.MetricData{Value: float64(mem.MemStats.InactiveAnon), Time: &updateTime}) 921 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricMemInactiveFileContainer, 922 utilmetric.MetricData{Value: float64(mem.MemStats.InactiveFile), Time: &updateTime}) 923 } 924 } 925 926 func (m *MalachiteMetricsProvisioner) processContainerBlkIOData(podUID, containerName string, cgStats *malachitetypes.MalachiteCgroupInfo) { 927 if cgStats == nil { 928 return 929 } 930 931 lastUpdateTime, _ := m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricBlkioUpdateTimeContainer) 932 933 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 934 io := cgStats.V1.Blkio 935 updateTime := time.Unix(io.UpdateTime, 0) 936 updateTimestampInSec := updateTime.Unix() 937 938 m.setContainerRateMetric(podUID, containerName, consts.MetricBlkioReadIopsContainer, 939 func() float64 { 940 return float64(uint64CounterDelta(io.OldBpfFsData.FsRead, io.BpfFsData.FsRead)) 941 }, 942 int64(lastUpdateTime.Value), updateTimestampInSec) 943 m.setContainerRateMetric(podUID, containerName, consts.MetricBlkioWriteIopsContainer, 944 func() float64 { 945 return float64(uint64CounterDelta(io.OldBpfFsData.FsWrite, io.BpfFsData.FsWrite)) 946 }, 947 int64(lastUpdateTime.Value), updateTimestampInSec) 948 m.setContainerRateMetric(podUID, containerName, consts.MetricBlkioReadBpsContainer, 949 func() float64 { 950 return float64(uint64CounterDelta(io.OldBpfFsData.FsReadBytes, io.BpfFsData.FsReadBytes)) 951 }, 952 int64(lastUpdateTime.Value), updateTimestampInSec) 953 m.setContainerRateMetric(podUID, containerName, consts.MetricBlkioWriteBpsContainer, 954 func() float64 { 955 return float64(uint64CounterDelta(io.OldBpfFsData.FsWriteBytes, io.BpfFsData.FsWriteBytes)) 956 }, 957 int64(lastUpdateTime.Value), updateTimestampInSec) 958 959 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricBlkioUpdateTimeContainer, 960 utilmetric.MetricData{Value: float64(updateTimestampInSec), Time: &updateTime}) 961 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 962 io := cgStats.V2.Blkio 963 updateTime := time.Unix(io.UpdateTime, 0) 964 updateTimestampInSec := updateTime.Unix() 965 966 m.setContainerRateMetric(podUID, containerName, consts.MetricBlkioReadIopsContainer, 967 func() float64 { return float64(uint64CounterDelta(io.OldBpfFsData.FsRead, io.BpfFsData.FsRead)) }, 968 int64(lastUpdateTime.Value), updateTimestampInSec) 969 m.setContainerRateMetric(podUID, containerName, consts.MetricBlkioWriteIopsContainer, 970 func() float64 { return float64(uint64CounterDelta(io.OldBpfFsData.FsWrite, io.BpfFsData.FsWrite)) }, 971 int64(lastUpdateTime.Value), updateTimestampInSec) 972 m.setContainerRateMetric(podUID, containerName, consts.MetricBlkioReadBpsContainer, 973 func() float64 { 974 return float64(uint64CounterDelta(io.OldBpfFsData.FsReadBytes, io.BpfFsData.FsReadBytes)) 975 }, 976 int64(lastUpdateTime.Value), updateTimestampInSec) 977 m.setContainerRateMetric(podUID, containerName, consts.MetricBlkioWriteBpsContainer, 978 func() float64 { 979 return float64(uint64CounterDelta(io.OldBpfFsData.FsWriteBytes, io.BpfFsData.FsWriteBytes)) 980 }, 981 int64(lastUpdateTime.Value), updateTimestampInSec) 982 983 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricBlkioUpdateTimeContainer, 984 utilmetric.MetricData{Value: float64(io.UpdateTime), Time: &updateTime}) 985 } 986 } 987 988 func (m *MalachiteMetricsProvisioner) processContainerNetData(podUID, containerName string, cgStats *malachitetypes.MalachiteCgroupInfo) { 989 if cgStats == nil { 990 return 991 } 992 993 var net *malachitetypes.NetClsCgData 994 var updateTime time.Time 995 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 996 net = cgStats.V1.NetCls 997 updateTime = time.Unix(cgStats.V1.NetCls.UpdateTime, 0) 998 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 999 net = cgStats.V2.NetCls 1000 updateTime = time.Unix(cgStats.V2.NetCls.UpdateTime, 0) 1001 } 1002 if net == nil { 1003 return 1004 } 1005 1006 lastUpdateTimeMetric, _ := m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricNetworkUpdateTimeContainer) 1007 m.processContainerNetRelevantRate(podUID, containerName, cgStats, lastUpdateTimeMetric.Value) 1008 1009 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpRecvPacketsContainer, utilmetric.MetricData{ 1010 Value: float64(net.BpfNetData.NetTCPRx), 1011 Time: &updateTime, 1012 }) 1013 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpSendPacketsContainer, utilmetric.MetricData{ 1014 Value: float64(net.BpfNetData.NetTCPTx), 1015 Time: &updateTime, 1016 }) 1017 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpRecvBytesContainer, utilmetric.MetricData{ 1018 Value: float64(net.BpfNetData.NetTCPRxBytes), 1019 Time: &updateTime, 1020 }) 1021 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpSendBytesContainer, utilmetric.MetricData{ 1022 Value: float64(net.BpfNetData.NetTCPTxBytes), 1023 Time: &updateTime, 1024 }) 1025 1026 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetworkUpdateTimeContainer, utilmetric.MetricData{ 1027 Value: float64(updateTime.Unix()), 1028 Time: &updateTime, 1029 }) 1030 } 1031 1032 // Currently, these valid perf event data are provided through types.MalachiteCgroupInfo.V1/V2.CPU by malachite. 1033 // Keep an empty func here in case of that malachite provides more detailed perf event someday. 1034 func (m *MalachiteMetricsProvisioner) processContainerPerfData(podUID, containerName string, cgStats *malachitetypes.MalachiteCgroupInfo) { 1035 } 1036 1037 func (m *MalachiteMetricsProvisioner) processContainerPerNumaMemoryData(podUID, containerName string, cgStats *malachitetypes.MalachiteCgroupInfo) { 1038 if cgStats == nil { 1039 return 1040 } 1041 1042 if cgStats.CgroupType == "V1" && cgStats.V1 != nil { 1043 numaStats := cgStats.V1.Memory.NumaStats 1044 updateTime := time.Unix(cgStats.V1.Memory.UpdateTime, 0) 1045 1046 for _, data := range numaStats { 1047 numaID := strings.TrimPrefix(data.NumaName, "N") 1048 m.metricStore.SetContainerNumaMetric(podUID, containerName, numaID, consts.MetricsMemTotalPerNumaContainer, 1049 utilmetric.MetricData{Value: float64(data.Total << pageShift), Time: &updateTime}) 1050 m.metricStore.SetContainerNumaMetric(podUID, containerName, numaID, consts.MetricsMemFilePerNumaContainer, 1051 utilmetric.MetricData{Value: float64(data.File << pageShift), Time: &updateTime}) 1052 m.metricStore.SetContainerNumaMetric(podUID, containerName, numaID, consts.MetricsMemAnonPerNumaContainer, 1053 utilmetric.MetricData{Value: float64(data.Anon << pageShift), Time: &updateTime}) 1054 } 1055 } else if cgStats.CgroupType == "V2" && cgStats.V2 != nil { 1056 numaStats := cgStats.V2.Memory.MemNumaStats 1057 updateTime := time.Unix(cgStats.V2.Memory.UpdateTime, 0) 1058 1059 for numa, data := range numaStats { 1060 numaID := strings.TrimPrefix(numa, "N") 1061 total := data.Anon + data.File + data.Unevictable 1062 m.metricStore.SetContainerNumaMetric(podUID, containerName, numaID, consts.MetricsMemTotalPerNumaContainer, 1063 utilmetric.MetricData{Value: float64(total), Time: &updateTime}) 1064 m.metricStore.SetContainerNumaMetric(podUID, containerName, numaID, consts.MetricsMemFilePerNumaContainer, 1065 utilmetric.MetricData{Value: float64(data.File), Time: &updateTime}) 1066 m.metricStore.SetContainerNumaMetric(podUID, containerName, numaID, consts.MetricsMemAnonPerNumaContainer, 1067 utilmetric.MetricData{Value: float64(data.Anon), Time: &updateTime}) 1068 } 1069 } 1070 }