github.com/kubewharf/katalyst-core@v0.5.3/pkg/metaserver/agent/metric/provisioner/malachite/provisioner_calculate.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package malachite 18 19 // for those metrics need extra calculation logic, 20 // we will put them in a separate file here 21 import ( 22 "time" 23 24 "github.com/kubewharf/katalyst-core/pkg/consts" 25 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/provisioner/malachite/types" 26 "github.com/kubewharf/katalyst-core/pkg/util/metric" 27 ) 28 29 // processContainerMemBandwidth handles memory bandwidth (read/write) rate in a period while, 30 // and it will need the previously collected data to do this 31 func (m *MalachiteMetricsProvisioner) processContainerMemBandwidth(podUID, containerName string, cgStats *types.MalachiteCgroupInfo, lastUpdateTimeInSec float64) { 32 var ( 33 lastOCRReadDRAMsMetric, _ = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricOCRReadDRAMsContainer) 34 lastIMCWritesMetric, _ = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricIMCWriteContainer) 35 lastStoreAllInsMetric, _ = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricStoreAllInsContainer) 36 lastStoreInsMetric, _ = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricStoreInsContainer) 37 38 // those value are uint64 type from source 39 lastOCRReadDRAMs = uint64(lastOCRReadDRAMsMetric.Value) 40 lastIMCWrites = uint64(lastIMCWritesMetric.Value) 41 lastStoreAllIns = uint64(lastStoreAllInsMetric.Value) 42 lastStoreIns = uint64(lastStoreInsMetric.Value) 43 ) 44 45 var ( 46 curOCRReadDRAMs, curIMCWrites, curStoreAllIns, curStoreIns uint64 47 curUpdateTimeInSec float64 48 ) 49 50 if cgStats.CgroupType == "V1" { 51 curOCRReadDRAMs = cgStats.V1.Cpu.OcrReadDrams 52 curIMCWrites = cgStats.V1.Cpu.ImcWrites 53 curStoreAllIns = cgStats.V1.Cpu.StoreAllIns 54 curStoreIns = cgStats.V1.Cpu.StoreIns 55 curUpdateTimeInSec = float64(cgStats.V1.Cpu.UpdateTime) 56 } else if cgStats.CgroupType == "V2" { 57 curOCRReadDRAMs = cgStats.V2.Cpu.OcrReadDrams 58 curIMCWrites = cgStats.V2.Cpu.ImcWrites 59 curStoreAllIns = cgStats.V2.Cpu.StoreAllIns 60 curStoreIns = cgStats.V2.Cpu.StoreIns 61 curUpdateTimeInSec = float64(cgStats.V2.Cpu.UpdateTime) 62 } else { 63 return 64 } 65 66 // read bandwidth 67 m.setContainerRateMetric(podUID, containerName, consts.MetricMemBandwidthReadContainer, 68 func() float64 { 69 // read megabyte 70 return float64(uint64CounterDelta(lastOCRReadDRAMs, curOCRReadDRAMs)) * 64 / (1024 * 1024) 71 }, 72 int64(lastUpdateTimeInSec), int64(curUpdateTimeInSec)) 73 74 // write bandwidth 75 m.setContainerRateMetric(podUID, containerName, consts.MetricMemBandwidthWriteContainer, 76 func() float64 { 77 storeAllInsInc := uint64CounterDelta(lastStoreAllIns, curStoreAllIns) 78 if storeAllInsInc == 0 { 79 return 0 80 } 81 82 storeInsInc := uint64CounterDelta(lastStoreIns, curStoreIns) 83 imcWritesInc := uint64CounterDelta(lastIMCWrites, curIMCWrites) 84 85 // write megabyte 86 return float64(storeInsInc) / float64(storeAllInsInc) / (1024 * 1024) * float64(imcWritesInc) * 64 87 }, 88 int64(lastUpdateTimeInSec), int64(curUpdateTimeInSec)) 89 } 90 91 // processContainerCPURelevantRate is used to calculate some container cpu-relevant rates. 92 // this would be executed before setting the latest values into metricStore. 93 func (m *MalachiteMetricsProvisioner) processContainerCPURelevantRate(podUID, containerName string, cgStats *types.MalachiteCgroupInfo, lastUpdateTimeInSec float64) { 94 lastMetricValueFn := func(metricName string) float64 { 95 lastMetric, _ := m.metricStore.GetContainerMetric(podUID, containerName, metricName) 96 return lastMetric.Value 97 } 98 99 var ( 100 lastCPUIns = uint64(lastMetricValueFn(consts.MetricCPUInstructionsContainer)) 101 lastCPUCycles = uint64(lastMetricValueFn(consts.MetricCPUCyclesContainer)) 102 lastCPUNRTht = uint64(lastMetricValueFn(consts.MetricCPUNrThrottledContainer)) 103 lastCPUNRPeriod = uint64(lastMetricValueFn(consts.MetricCPUNrPeriodContainer)) 104 lastThrottleTime = uint64(lastMetricValueFn(consts.MetricCPUThrottledTimeContainer)) 105 lastL3CacheMiss = uint64(lastMetricValueFn(consts.MetricCPUL3CacheMissContainer)) 106 107 curCPUIns, curCPUCycles, curCPUNRTht, curCPUNRPeriod, curCPUThrottleTime, curL3CacheMiss uint64 108 109 curUpdateTime int64 110 ) 111 112 if cgStats.CgroupType == "V1" { 113 curCPUIns = cgStats.V1.Cpu.Instructions 114 curCPUCycles = cgStats.V1.Cpu.Cycles 115 curCPUNRTht = cgStats.V1.Cpu.CPUNrThrottled 116 curCPUNRPeriod = cgStats.V1.Cpu.CPUNrPeriods 117 curCPUThrottleTime = cgStats.V1.Cpu.CPUThrottledTime / 1000 118 if cgStats.V1.Cpu.L3Misses > 0 { 119 curL3CacheMiss = cgStats.V1.Cpu.L3Misses 120 } else if cgStats.V1.Cpu.OcrReadDrams > 0 { 121 curL3CacheMiss = cgStats.V1.Cpu.OcrReadDrams 122 } 123 curUpdateTime = cgStats.V1.Cpu.UpdateTime 124 } else if cgStats.CgroupType == "V2" { 125 curCPUIns = cgStats.V2.Cpu.Instructions 126 curCPUCycles = cgStats.V2.Cpu.Cycles 127 curCPUNRTht = cgStats.V2.Cpu.CPUStats.NrThrottled 128 curCPUNRPeriod = cgStats.V2.Cpu.CPUStats.NrPeriods 129 curCPUThrottleTime = cgStats.V2.Cpu.CPUStats.ThrottledUsec 130 if cgStats.V2.Cpu.L3Misses > 0 { 131 curL3CacheMiss = cgStats.V2.Cpu.L3Misses 132 } else if cgStats.V2.Cpu.OcrReadDrams > 0 { 133 curL3CacheMiss = cgStats.V2.Cpu.OcrReadDrams 134 } 135 curUpdateTime = cgStats.V2.Cpu.UpdateTime 136 } else { 137 return 138 } 139 m.setContainerRateMetric(podUID, containerName, consts.MetricCPUInstructionsRateContainer, func() float64 { 140 return float64(uint64CounterDelta(lastCPUIns, curCPUIns)) 141 }, int64(lastUpdateTimeInSec), curUpdateTime) 142 m.setContainerRateMetric(podUID, containerName, consts.MetricCPUCyclesRateContainer, func() float64 { 143 return float64(uint64CounterDelta(lastCPUCycles, curCPUCycles)) 144 }, int64(lastUpdateTimeInSec), curUpdateTime) 145 m.setContainerRateMetric(podUID, containerName, consts.MetricCPUNrThrottledRateContainer, func() float64 { 146 return float64(uint64CounterDelta(lastCPUNRTht, curCPUNRTht)) 147 }, int64(lastUpdateTimeInSec), curUpdateTime) 148 m.setContainerRateMetric(podUID, containerName, consts.MetricCPUNrPeriodRateContainer, func() float64 { 149 return float64(uint64CounterDelta(lastCPUNRPeriod, curCPUNRPeriod)) 150 }, int64(lastUpdateTimeInSec), curUpdateTime) 151 m.setContainerRateMetric(podUID, containerName, consts.MetricCPUThrottledTimeRateContainer, func() float64 { 152 return float64(uint64CounterDelta(lastThrottleTime, curCPUThrottleTime)) 153 }, int64(lastUpdateTimeInSec), curUpdateTime) 154 m.setContainerRateMetric(podUID, containerName, consts.MetricCPUL3CacheMissRateContainer, func() float64 { 155 return float64(uint64CounterDelta(lastL3CacheMiss, curL3CacheMiss)) 156 }, int64(lastUpdateTimeInSec), curUpdateTime) 157 } 158 159 func (m *MalachiteMetricsProvisioner) processContainerMemRelevantRate(podUID, containerName string, cgStats *types.MalachiteCgroupInfo, lastUpdateTimeInSec float64) { 160 lastMetricValueFn := func(metricName string) float64 { 161 lastMetric, _ := m.metricStore.GetContainerMetric(podUID, containerName, metricName) 162 return lastMetric.Value 163 } 164 165 var ( 166 lastPGFault = uint64(lastMetricValueFn(consts.MetricMemPgfaultContainer)) 167 lastPGMajFault = uint64(lastMetricValueFn(consts.MetricMemPgmajfaultContainer)) 168 lastOOMCnt = uint64(lastMetricValueFn(consts.MetricMemOomContainer)) 169 170 curPGFault, curPGMajFault, curOOMCnt uint64 171 172 curUpdateTime int64 173 ) 174 175 if cgStats.CgroupType == "V1" { 176 curPGFault = cgStats.V1.Memory.Pgfault 177 curPGMajFault = cgStats.V1.Memory.Pgmajfault 178 curOOMCnt = cgStats.V1.Memory.BpfMemStat.OomCnt 179 curUpdateTime = cgStats.V1.Memory.UpdateTime 180 } else if cgStats.CgroupType == "V2" { 181 curPGFault = cgStats.V2.Memory.MemStats.Pgmajfault 182 curPGMajFault = cgStats.V2.Memory.MemStats.Pgmajfault 183 curOOMCnt = cgStats.V2.Memory.BpfMemStat.OomCnt 184 curUpdateTime = cgStats.V2.Memory.UpdateTime 185 } else { 186 return 187 } 188 189 m.setContainerRateMetric(podUID, containerName, consts.MetricMemPgfaultRateContainer, func() float64 { 190 return float64(uint64CounterDelta(lastPGFault, curPGFault)) 191 }, int64(lastUpdateTimeInSec), curUpdateTime) 192 m.setContainerRateMetric(podUID, containerName, consts.MetricMemPgmajfaultRateContainer, func() float64 { 193 return float64(uint64CounterDelta(lastPGMajFault, curPGMajFault)) 194 }, int64(lastUpdateTimeInSec), curUpdateTime) 195 m.setContainerRateMetric(podUID, containerName, consts.MetricMemOomRateContainer, func() float64 { 196 return float64(uint64CounterDelta(lastOOMCnt, curOOMCnt)) 197 }, int64(lastUpdateTimeInSec), curUpdateTime) 198 } 199 200 func (m *MalachiteMetricsProvisioner) processContainerNetRelevantRate(podUID, containerName string, cgStats *types.MalachiteCgroupInfo, lastUpdateTimeInSec float64) { 201 lastMetricValueFn := func(metricName string) float64 { 202 lastMetric, _ := m.metricStore.GetContainerMetric(podUID, containerName, metricName) 203 return lastMetric.Value 204 } 205 206 var ( 207 lastNetTCPRx = uint64(lastMetricValueFn(consts.MetricNetTcpRecvPacketsContainer)) 208 lastNetTCPTx = uint64(lastMetricValueFn(consts.MetricNetTcpSendPacketsContainer)) 209 lastNetTCPRxBytes = uint64(lastMetricValueFn(consts.MetricNetTcpRecvBytesContainer)) 210 lastNetTCPTxBytes = uint64(lastMetricValueFn(consts.MetricNetTcpSendBytesContainer)) 211 212 netData *types.NetClsCgData 213 ) 214 215 if cgStats.V1 != nil { 216 netData = cgStats.V1.NetCls 217 } else if cgStats.V2 != nil { 218 netData = cgStats.V2.NetCls 219 } else { 220 return 221 } 222 223 curUpdateTime := netData.UpdateTime 224 _curUpdateTime := time.Unix(curUpdateTime, 0) 225 updateTimeDiff := float64(curUpdateTime) - lastUpdateTimeInSec 226 if updateTimeDiff > 0 { 227 m.setContainerRateMetric(podUID, containerName, consts.MetricNetTcpSendBPSContainer, func() float64 { 228 return float64(uint64CounterDelta(lastNetTCPTxBytes, netData.BpfNetData.NetTCPTxBytes)) 229 }, int64(lastUpdateTimeInSec), curUpdateTime) 230 m.setContainerRateMetric(podUID, containerName, consts.MetricNetTcpRecvBPSContainer, func() float64 { 231 return float64(uint64CounterDelta(lastNetTCPRxBytes, netData.BpfNetData.NetTCPRxBytes)) 232 }, int64(lastUpdateTimeInSec), curUpdateTime) 233 m.setContainerRateMetric(podUID, containerName, consts.MetricNetTcpSendPpsContainer, func() float64 { 234 return float64(uint64CounterDelta(lastNetTCPTx, netData.BpfNetData.NetTCPTx)) 235 }, int64(lastUpdateTimeInSec), curUpdateTime) 236 m.setContainerRateMetric(podUID, containerName, consts.MetricNetTcpRecvPpsContainer, func() float64 { 237 return float64(uint64CounterDelta(lastNetTCPRx, netData.BpfNetData.NetTCPRx)) 238 }, int64(lastUpdateTimeInSec), curUpdateTime) 239 } else { 240 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpSendBPSContainer, metric.MetricData{ 241 Value: float64(uint64CounterDelta(netData.OldBpfNetData.NetTCPTxBytes, netData.BpfNetData.NetTCPTxBytes)) / defaultMetricUpdateInterval, 242 Time: &_curUpdateTime, 243 }) 244 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpRecvBPSContainer, metric.MetricData{ 245 Value: float64(uint64CounterDelta(netData.OldBpfNetData.NetTCPRxBytes, netData.BpfNetData.NetTCPRxBytes)) / defaultMetricUpdateInterval, 246 Time: &_curUpdateTime, 247 }) 248 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpSendPpsContainer, metric.MetricData{ 249 Value: float64(uint64CounterDelta(netData.OldBpfNetData.NetTCPTx, netData.BpfNetData.NetTCPTx)) / defaultMetricUpdateInterval, 250 Time: &_curUpdateTime, 251 }) 252 m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpRecvPpsContainer, metric.MetricData{ 253 Value: float64(uint64CounterDelta(netData.OldBpfNetData.NetTCPRx, netData.BpfNetData.NetTCPRx)) / defaultMetricUpdateInterval, 254 Time: &_curUpdateTime, 255 }) 256 } 257 } 258 259 // setContainerRateMetric is used to set rate metric in container level. 260 // This method will check if the metric is really updated, and decide weather to update metric in metricStore. 261 // The method could help avoid lots of meaningless "zero" value. 262 func (m *MalachiteMetricsProvisioner) setContainerRateMetric(podUID, containerName, targetMetricName string, deltaValueFunc func() float64, lastUpdateTime, curUpdateTime int64) { 263 timeDeltaInSec := curUpdateTime - lastUpdateTime 264 if lastUpdateTime == 0 || timeDeltaInSec <= 0 { 265 // Return directly when the following situations happen: 266 // 1. lastUpdateTime == 0, which means no previous data. 267 // 2. timeDeltaInSec == 0, which means the metric is not updated, 268 // this is originated from the sampling lag between katalyst-core and malachite(data source) 269 // 3. timeDeltaInSec < 0, this is illegal and unlikely to happen. 270 return 271 } 272 273 // TODO this will duplicate "updateTime" a lot. 274 // But to my knowledge, the cost could be acceptable. 275 updateTime := time.Unix(curUpdateTime, 0) 276 m.metricStore.SetContainerMetric(podUID, containerName, targetMetricName, 277 metric.MetricData{Value: deltaValueFunc() / float64(timeDeltaInSec), Time: &updateTime}) 278 } 279 280 // uint64CounterDelta calculate the delta between two uint64 counters 281 // Sometimes the counter value would go beyond the MaxUint64. In that case, 282 // negative counter delta would happen, and the data is not incorrect. 283 func uint64CounterDelta(previous, current uint64) uint64 { 284 if current >= previous { 285 return current - previous 286 } 287 288 // Return 0 when previous > current, because we may not be able to make sure 289 // the upper bound for each counter. 290 return 0 291 }