github.com/kubewharf/katalyst-core@v0.5.3/pkg/metaserver/agent/metric/provisioner/malachite/provisioner_calculate.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package malachite
    18  
    19  // for those metrics need extra calculation logic,
    20  // we will put them in a separate file here
    21  import (
    22  	"time"
    23  
    24  	"github.com/kubewharf/katalyst-core/pkg/consts"
    25  	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/provisioner/malachite/types"
    26  	"github.com/kubewharf/katalyst-core/pkg/util/metric"
    27  )
    28  
    29  // processContainerMemBandwidth handles memory bandwidth (read/write) rate in a period while,
    30  // and it will need the previously collected data to do this
    31  func (m *MalachiteMetricsProvisioner) processContainerMemBandwidth(podUID, containerName string, cgStats *types.MalachiteCgroupInfo, lastUpdateTimeInSec float64) {
    32  	var (
    33  		lastOCRReadDRAMsMetric, _ = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricOCRReadDRAMsContainer)
    34  		lastIMCWritesMetric, _    = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricIMCWriteContainer)
    35  		lastStoreAllInsMetric, _  = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricStoreAllInsContainer)
    36  		lastStoreInsMetric, _     = m.metricStore.GetContainerMetric(podUID, containerName, consts.MetricStoreInsContainer)
    37  
    38  		// those value are uint64 type from source
    39  		lastOCRReadDRAMs = uint64(lastOCRReadDRAMsMetric.Value)
    40  		lastIMCWrites    = uint64(lastIMCWritesMetric.Value)
    41  		lastStoreAllIns  = uint64(lastStoreAllInsMetric.Value)
    42  		lastStoreIns     = uint64(lastStoreInsMetric.Value)
    43  	)
    44  
    45  	var (
    46  		curOCRReadDRAMs, curIMCWrites, curStoreAllIns, curStoreIns uint64
    47  		curUpdateTimeInSec                                         float64
    48  	)
    49  
    50  	if cgStats.CgroupType == "V1" {
    51  		curOCRReadDRAMs = cgStats.V1.Cpu.OcrReadDrams
    52  		curIMCWrites = cgStats.V1.Cpu.ImcWrites
    53  		curStoreAllIns = cgStats.V1.Cpu.StoreAllIns
    54  		curStoreIns = cgStats.V1.Cpu.StoreIns
    55  		curUpdateTimeInSec = float64(cgStats.V1.Cpu.UpdateTime)
    56  	} else if cgStats.CgroupType == "V2" {
    57  		curOCRReadDRAMs = cgStats.V2.Cpu.OcrReadDrams
    58  		curIMCWrites = cgStats.V2.Cpu.ImcWrites
    59  		curStoreAllIns = cgStats.V2.Cpu.StoreAllIns
    60  		curStoreIns = cgStats.V2.Cpu.StoreIns
    61  		curUpdateTimeInSec = float64(cgStats.V2.Cpu.UpdateTime)
    62  	} else {
    63  		return
    64  	}
    65  
    66  	// read bandwidth
    67  	m.setContainerRateMetric(podUID, containerName, consts.MetricMemBandwidthReadContainer,
    68  		func() float64 {
    69  			// read megabyte
    70  			return float64(uint64CounterDelta(lastOCRReadDRAMs, curOCRReadDRAMs)) * 64 / (1024 * 1024)
    71  		},
    72  		int64(lastUpdateTimeInSec), int64(curUpdateTimeInSec))
    73  
    74  	// write bandwidth
    75  	m.setContainerRateMetric(podUID, containerName, consts.MetricMemBandwidthWriteContainer,
    76  		func() float64 {
    77  			storeAllInsInc := uint64CounterDelta(lastStoreAllIns, curStoreAllIns)
    78  			if storeAllInsInc == 0 {
    79  				return 0
    80  			}
    81  
    82  			storeInsInc := uint64CounterDelta(lastStoreIns, curStoreIns)
    83  			imcWritesInc := uint64CounterDelta(lastIMCWrites, curIMCWrites)
    84  
    85  			// write megabyte
    86  			return float64(storeInsInc) / float64(storeAllInsInc) / (1024 * 1024) * float64(imcWritesInc) * 64
    87  		},
    88  		int64(lastUpdateTimeInSec), int64(curUpdateTimeInSec))
    89  }
    90  
    91  // processContainerCPURelevantRate is used to calculate some container cpu-relevant rates.
    92  // this would be executed before setting the latest values into metricStore.
    93  func (m *MalachiteMetricsProvisioner) processContainerCPURelevantRate(podUID, containerName string, cgStats *types.MalachiteCgroupInfo, lastUpdateTimeInSec float64) {
    94  	lastMetricValueFn := func(metricName string) float64 {
    95  		lastMetric, _ := m.metricStore.GetContainerMetric(podUID, containerName, metricName)
    96  		return lastMetric.Value
    97  	}
    98  
    99  	var (
   100  		lastCPUIns       = uint64(lastMetricValueFn(consts.MetricCPUInstructionsContainer))
   101  		lastCPUCycles    = uint64(lastMetricValueFn(consts.MetricCPUCyclesContainer))
   102  		lastCPUNRTht     = uint64(lastMetricValueFn(consts.MetricCPUNrThrottledContainer))
   103  		lastCPUNRPeriod  = uint64(lastMetricValueFn(consts.MetricCPUNrPeriodContainer))
   104  		lastThrottleTime = uint64(lastMetricValueFn(consts.MetricCPUThrottledTimeContainer))
   105  		lastL3CacheMiss  = uint64(lastMetricValueFn(consts.MetricCPUL3CacheMissContainer))
   106  
   107  		curCPUIns, curCPUCycles, curCPUNRTht, curCPUNRPeriod, curCPUThrottleTime, curL3CacheMiss uint64
   108  
   109  		curUpdateTime int64
   110  	)
   111  
   112  	if cgStats.CgroupType == "V1" {
   113  		curCPUIns = cgStats.V1.Cpu.Instructions
   114  		curCPUCycles = cgStats.V1.Cpu.Cycles
   115  		curCPUNRTht = cgStats.V1.Cpu.CPUNrThrottled
   116  		curCPUNRPeriod = cgStats.V1.Cpu.CPUNrPeriods
   117  		curCPUThrottleTime = cgStats.V1.Cpu.CPUThrottledTime / 1000
   118  		if cgStats.V1.Cpu.L3Misses > 0 {
   119  			curL3CacheMiss = cgStats.V1.Cpu.L3Misses
   120  		} else if cgStats.V1.Cpu.OcrReadDrams > 0 {
   121  			curL3CacheMiss = cgStats.V1.Cpu.OcrReadDrams
   122  		}
   123  		curUpdateTime = cgStats.V1.Cpu.UpdateTime
   124  	} else if cgStats.CgroupType == "V2" {
   125  		curCPUIns = cgStats.V2.Cpu.Instructions
   126  		curCPUCycles = cgStats.V2.Cpu.Cycles
   127  		curCPUNRTht = cgStats.V2.Cpu.CPUStats.NrThrottled
   128  		curCPUNRPeriod = cgStats.V2.Cpu.CPUStats.NrPeriods
   129  		curCPUThrottleTime = cgStats.V2.Cpu.CPUStats.ThrottledUsec
   130  		if cgStats.V2.Cpu.L3Misses > 0 {
   131  			curL3CacheMiss = cgStats.V2.Cpu.L3Misses
   132  		} else if cgStats.V2.Cpu.OcrReadDrams > 0 {
   133  			curL3CacheMiss = cgStats.V2.Cpu.OcrReadDrams
   134  		}
   135  		curUpdateTime = cgStats.V2.Cpu.UpdateTime
   136  	} else {
   137  		return
   138  	}
   139  	m.setContainerRateMetric(podUID, containerName, consts.MetricCPUInstructionsRateContainer, func() float64 {
   140  		return float64(uint64CounterDelta(lastCPUIns, curCPUIns))
   141  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   142  	m.setContainerRateMetric(podUID, containerName, consts.MetricCPUCyclesRateContainer, func() float64 {
   143  		return float64(uint64CounterDelta(lastCPUCycles, curCPUCycles))
   144  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   145  	m.setContainerRateMetric(podUID, containerName, consts.MetricCPUNrThrottledRateContainer, func() float64 {
   146  		return float64(uint64CounterDelta(lastCPUNRTht, curCPUNRTht))
   147  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   148  	m.setContainerRateMetric(podUID, containerName, consts.MetricCPUNrPeriodRateContainer, func() float64 {
   149  		return float64(uint64CounterDelta(lastCPUNRPeriod, curCPUNRPeriod))
   150  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   151  	m.setContainerRateMetric(podUID, containerName, consts.MetricCPUThrottledTimeRateContainer, func() float64 {
   152  		return float64(uint64CounterDelta(lastThrottleTime, curCPUThrottleTime))
   153  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   154  	m.setContainerRateMetric(podUID, containerName, consts.MetricCPUL3CacheMissRateContainer, func() float64 {
   155  		return float64(uint64CounterDelta(lastL3CacheMiss, curL3CacheMiss))
   156  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   157  }
   158  
   159  func (m *MalachiteMetricsProvisioner) processContainerMemRelevantRate(podUID, containerName string, cgStats *types.MalachiteCgroupInfo, lastUpdateTimeInSec float64) {
   160  	lastMetricValueFn := func(metricName string) float64 {
   161  		lastMetric, _ := m.metricStore.GetContainerMetric(podUID, containerName, metricName)
   162  		return lastMetric.Value
   163  	}
   164  
   165  	var (
   166  		lastPGFault    = uint64(lastMetricValueFn(consts.MetricMemPgfaultContainer))
   167  		lastPGMajFault = uint64(lastMetricValueFn(consts.MetricMemPgmajfaultContainer))
   168  		lastOOMCnt     = uint64(lastMetricValueFn(consts.MetricMemOomContainer))
   169  
   170  		curPGFault, curPGMajFault, curOOMCnt uint64
   171  
   172  		curUpdateTime int64
   173  	)
   174  
   175  	if cgStats.CgroupType == "V1" {
   176  		curPGFault = cgStats.V1.Memory.Pgfault
   177  		curPGMajFault = cgStats.V1.Memory.Pgmajfault
   178  		curOOMCnt = cgStats.V1.Memory.BpfMemStat.OomCnt
   179  		curUpdateTime = cgStats.V1.Memory.UpdateTime
   180  	} else if cgStats.CgroupType == "V2" {
   181  		curPGFault = cgStats.V2.Memory.MemStats.Pgmajfault
   182  		curPGMajFault = cgStats.V2.Memory.MemStats.Pgmajfault
   183  		curOOMCnt = cgStats.V2.Memory.BpfMemStat.OomCnt
   184  		curUpdateTime = cgStats.V2.Memory.UpdateTime
   185  	} else {
   186  		return
   187  	}
   188  
   189  	m.setContainerRateMetric(podUID, containerName, consts.MetricMemPgfaultRateContainer, func() float64 {
   190  		return float64(uint64CounterDelta(lastPGFault, curPGFault))
   191  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   192  	m.setContainerRateMetric(podUID, containerName, consts.MetricMemPgmajfaultRateContainer, func() float64 {
   193  		return float64(uint64CounterDelta(lastPGMajFault, curPGMajFault))
   194  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   195  	m.setContainerRateMetric(podUID, containerName, consts.MetricMemOomRateContainer, func() float64 {
   196  		return float64(uint64CounterDelta(lastOOMCnt, curOOMCnt))
   197  	}, int64(lastUpdateTimeInSec), curUpdateTime)
   198  }
   199  
   200  func (m *MalachiteMetricsProvisioner) processContainerNetRelevantRate(podUID, containerName string, cgStats *types.MalachiteCgroupInfo, lastUpdateTimeInSec float64) {
   201  	lastMetricValueFn := func(metricName string) float64 {
   202  		lastMetric, _ := m.metricStore.GetContainerMetric(podUID, containerName, metricName)
   203  		return lastMetric.Value
   204  	}
   205  
   206  	var (
   207  		lastNetTCPRx      = uint64(lastMetricValueFn(consts.MetricNetTcpRecvPacketsContainer))
   208  		lastNetTCPTx      = uint64(lastMetricValueFn(consts.MetricNetTcpSendPacketsContainer))
   209  		lastNetTCPRxBytes = uint64(lastMetricValueFn(consts.MetricNetTcpRecvBytesContainer))
   210  		lastNetTCPTxBytes = uint64(lastMetricValueFn(consts.MetricNetTcpSendBytesContainer))
   211  
   212  		netData *types.NetClsCgData
   213  	)
   214  
   215  	if cgStats.V1 != nil {
   216  		netData = cgStats.V1.NetCls
   217  	} else if cgStats.V2 != nil {
   218  		netData = cgStats.V2.NetCls
   219  	} else {
   220  		return
   221  	}
   222  
   223  	curUpdateTime := netData.UpdateTime
   224  	_curUpdateTime := time.Unix(curUpdateTime, 0)
   225  	updateTimeDiff := float64(curUpdateTime) - lastUpdateTimeInSec
   226  	if updateTimeDiff > 0 {
   227  		m.setContainerRateMetric(podUID, containerName, consts.MetricNetTcpSendBPSContainer, func() float64 {
   228  			return float64(uint64CounterDelta(lastNetTCPTxBytes, netData.BpfNetData.NetTCPTxBytes))
   229  		}, int64(lastUpdateTimeInSec), curUpdateTime)
   230  		m.setContainerRateMetric(podUID, containerName, consts.MetricNetTcpRecvBPSContainer, func() float64 {
   231  			return float64(uint64CounterDelta(lastNetTCPRxBytes, netData.BpfNetData.NetTCPRxBytes))
   232  		}, int64(lastUpdateTimeInSec), curUpdateTime)
   233  		m.setContainerRateMetric(podUID, containerName, consts.MetricNetTcpSendPpsContainer, func() float64 {
   234  			return float64(uint64CounterDelta(lastNetTCPTx, netData.BpfNetData.NetTCPTx))
   235  		}, int64(lastUpdateTimeInSec), curUpdateTime)
   236  		m.setContainerRateMetric(podUID, containerName, consts.MetricNetTcpRecvPpsContainer, func() float64 {
   237  			return float64(uint64CounterDelta(lastNetTCPRx, netData.BpfNetData.NetTCPRx))
   238  		}, int64(lastUpdateTimeInSec), curUpdateTime)
   239  	} else {
   240  		m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpSendBPSContainer, metric.MetricData{
   241  			Value: float64(uint64CounterDelta(netData.OldBpfNetData.NetTCPTxBytes, netData.BpfNetData.NetTCPTxBytes)) / defaultMetricUpdateInterval,
   242  			Time:  &_curUpdateTime,
   243  		})
   244  		m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpRecvBPSContainer, metric.MetricData{
   245  			Value: float64(uint64CounterDelta(netData.OldBpfNetData.NetTCPRxBytes, netData.BpfNetData.NetTCPRxBytes)) / defaultMetricUpdateInterval,
   246  			Time:  &_curUpdateTime,
   247  		})
   248  		m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpSendPpsContainer, metric.MetricData{
   249  			Value: float64(uint64CounterDelta(netData.OldBpfNetData.NetTCPTx, netData.BpfNetData.NetTCPTx)) / defaultMetricUpdateInterval,
   250  			Time:  &_curUpdateTime,
   251  		})
   252  		m.metricStore.SetContainerMetric(podUID, containerName, consts.MetricNetTcpRecvPpsContainer, metric.MetricData{
   253  			Value: float64(uint64CounterDelta(netData.OldBpfNetData.NetTCPRx, netData.BpfNetData.NetTCPRx)) / defaultMetricUpdateInterval,
   254  			Time:  &_curUpdateTime,
   255  		})
   256  	}
   257  }
   258  
   259  // setContainerRateMetric is used to set rate metric in container level.
   260  // This method will check if the metric is really updated, and decide weather to update metric in metricStore.
   261  // The method could help avoid lots of meaningless "zero" value.
   262  func (m *MalachiteMetricsProvisioner) setContainerRateMetric(podUID, containerName, targetMetricName string, deltaValueFunc func() float64, lastUpdateTime, curUpdateTime int64) {
   263  	timeDeltaInSec := curUpdateTime - lastUpdateTime
   264  	if lastUpdateTime == 0 || timeDeltaInSec <= 0 {
   265  		// Return directly when the following situations happen:
   266  		// 1. lastUpdateTime == 0, which means no previous data.
   267  		// 2. timeDeltaInSec == 0, which means the metric is not updated,
   268  		//	this is originated from the sampling lag between katalyst-core and malachite(data source)
   269  		// 3. timeDeltaInSec < 0, this is illegal and unlikely to happen.
   270  		return
   271  	}
   272  
   273  	// TODO this will duplicate "updateTime" a lot.
   274  	// But to my knowledge, the cost could be acceptable.
   275  	updateTime := time.Unix(curUpdateTime, 0)
   276  	m.metricStore.SetContainerMetric(podUID, containerName, targetMetricName,
   277  		metric.MetricData{Value: deltaValueFunc() / float64(timeDeltaInSec), Time: &updateTime})
   278  }
   279  
   280  // uint64CounterDelta calculate the delta between two uint64 counters
   281  // Sometimes the counter value would go beyond the MaxUint64. In that case,
   282  // negative counter delta would happen, and the data is not incorrect.
   283  func uint64CounterDelta(previous, current uint64) uint64 {
   284  	if current >= previous {
   285  		return current - previous
   286  	}
   287  
   288  	// Return 0 when previous > current, because we may not be able to make sure
   289  	// the upper bound for each counter.
   290  	return 0
   291  }