github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package state
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  
    23  	info "github.com/google/cadvisor/info/v1"
    24  	v1 "k8s.io/api/core/v1"
    25  	"k8s.io/klog/v2"
    26  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    27  
    28  	"github.com/kubewharf/katalyst-api/pkg/consts"
    29  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
    30  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
    31  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    32  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    33  )
    34  
    35  type AllocationInfo struct {
    36  	PodUid               string         `json:"pod_uid,omitempty"`
    37  	PodNamespace         string         `json:"pod_namespace,omitempty"`
    38  	PodName              string         `json:"pod_name,omitempty"`
    39  	ContainerName        string         `json:"container_name,omitempty"`
    40  	ContainerType        string         `json:"container_type,omitempty"`
    41  	ContainerIndex       uint64         `json:"container_index,omitempty"`
    42  	RampUp               bool           `json:"ramp_up,omitempty"`
    43  	PodRole              string         `json:"pod_role,omitempty"`
    44  	PodType              string         `json:"pod_type,omitempty"`
    45  	AggregatedQuantity   uint64         `json:"aggregated_quantity"`
    46  	NumaAllocationResult machine.CPUSet `json:"numa_allocation_result,omitempty"`
    47  
    48  	// keyed by numa node id, value is assignment for the pod in corresponding NUMA node
    49  	TopologyAwareAllocations map[int]uint64 `json:"topology_aware_allocations"`
    50  
    51  	// keyed by control knob names referred in memoryadvisor package
    52  	ExtraControlKnobInfo map[string]commonstate.ControlKnobInfo `json:"extra_control_knob_info"`
    53  	Labels               map[string]string                      `json:"labels"`
    54  	Annotations          map[string]string                      `json:"annotations"`
    55  	QoSLevel             string                                 `json:"qosLevel"`
    56  }
    57  
    58  type (
    59  	ContainerEntries   map[string]*AllocationInfo     // Keyed by container name
    60  	PodEntries         map[string]ContainerEntries    // Keyed by pod UID
    61  	PodResourceEntries map[v1.ResourceName]PodEntries // Keyed by resource name
    62  )
    63  
    64  // NUMANodeState records the amount of memory per numa node (in bytes)
    65  type NUMANodeState struct {
    66  	TotalMemSize   uint64     `json:"total"`
    67  	SystemReserved uint64     `json:"systemReserved"`
    68  	Allocatable    uint64     `json:"allocatable"`
    69  	Allocated      uint64     `json:"Allocated"`
    70  	Free           uint64     `json:"free"`
    71  	PodEntries     PodEntries `json:"pod_entries"`
    72  }
    73  
    74  type (
    75  	NUMANodeMap          map[int]*NUMANodeState          // keyed by numa node id
    76  	NUMANodeResourcesMap map[v1.ResourceName]NUMANodeMap // keyed by resource name
    77  )
    78  
    79  func (ai *AllocationInfo) String() string {
    80  	if ai == nil {
    81  		return ""
    82  	}
    83  
    84  	contentBytes, err := json.Marshal(ai)
    85  	if err != nil {
    86  		klog.Errorf("[AllocationInfo.String] marshal AllocationInfo failed with error: %v", err)
    87  		return ""
    88  	}
    89  	return string(contentBytes)
    90  }
    91  
    92  func (ai *AllocationInfo) Clone() *AllocationInfo {
    93  	if ai == nil {
    94  		return nil
    95  	}
    96  
    97  	clone := &AllocationInfo{
    98  		PodUid:               ai.PodUid,
    99  		PodNamespace:         ai.PodNamespace,
   100  		PodName:              ai.PodName,
   101  		ContainerName:        ai.ContainerName,
   102  		ContainerType:        ai.ContainerType,
   103  		ContainerIndex:       ai.ContainerIndex,
   104  		RampUp:               ai.RampUp,
   105  		PodRole:              ai.PodRole,
   106  		PodType:              ai.PodType,
   107  		AggregatedQuantity:   ai.AggregatedQuantity,
   108  		NumaAllocationResult: ai.NumaAllocationResult.Clone(),
   109  		QoSLevel:             ai.QoSLevel,
   110  		Labels:               general.DeepCopyMap(ai.Labels),
   111  		Annotations:          general.DeepCopyMap(ai.Annotations),
   112  	}
   113  
   114  	if ai.TopologyAwareAllocations != nil {
   115  		clone.TopologyAwareAllocations = make(map[int]uint64)
   116  
   117  		for node, quantity := range ai.TopologyAwareAllocations {
   118  			clone.TopologyAwareAllocations[node] = quantity
   119  		}
   120  	}
   121  
   122  	if ai.ExtraControlKnobInfo != nil {
   123  		clone.ExtraControlKnobInfo = make(map[string]commonstate.ControlKnobInfo)
   124  
   125  		for name := range ai.ExtraControlKnobInfo {
   126  			clone.ExtraControlKnobInfo[name] = ai.ExtraControlKnobInfo[name]
   127  		}
   128  	}
   129  
   130  	return clone
   131  }
   132  
   133  // CheckNumaBinding returns true if the AllocationInfo is for pod with
   134  // dedicated-qos and numa-binding enhancement
   135  func (ai *AllocationInfo) CheckNumaBinding() bool {
   136  	return ai.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores &&
   137  		ai.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable
   138  }
   139  
   140  // CheckMainContainer returns true if the AllocationInfo is for main container
   141  func (ai *AllocationInfo) CheckMainContainer() bool {
   142  	return ai.ContainerType == pluginapi.ContainerType_MAIN.String()
   143  }
   144  
   145  // CheckSideCar returns true if the AllocationInfo is for side-car container
   146  func (ai *AllocationInfo) CheckSideCar() bool {
   147  	return ai.ContainerType == pluginapi.ContainerType_SIDECAR.String()
   148  }
   149  
   150  // GetResourceAllocation transforms resource allocation information into *pluginapi.ResourceAllocation
   151  func (ai *AllocationInfo) GetResourceAllocation() (*pluginapi.ResourceAllocation, error) {
   152  	if ai == nil {
   153  		return nil, fmt.Errorf("GetResourceAllocation of nil AllocationInfo")
   154  	}
   155  
   156  	// deal with main resource
   157  	resourceAllocation := &pluginapi.ResourceAllocation{
   158  		ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{
   159  			string(v1.ResourceMemory): {
   160  				OciPropertyName:   util.OCIPropertyNameCPUSetMems,
   161  				IsNodeResource:    false,
   162  				IsScalarResource:  true,
   163  				AllocatedQuantity: float64(ai.AggregatedQuantity),
   164  				AllocationResult:  ai.NumaAllocationResult.String(),
   165  			},
   166  		},
   167  	}
   168  
   169  	// deal with accompanying resources
   170  	for name, entry := range ai.ExtraControlKnobInfo {
   171  		if entry.OciPropertyName == "" {
   172  			continue
   173  		}
   174  
   175  		if resourceAllocation.ResourceAllocation[name] != nil {
   176  			return nil, fmt.Errorf("name: %s meets conflict", name)
   177  		}
   178  
   179  		resourceAllocation.ResourceAllocation[name] = &pluginapi.ResourceAllocationInfo{
   180  			OciPropertyName:  entry.OciPropertyName,
   181  			AllocationResult: entry.ControlKnobValue,
   182  		}
   183  	}
   184  
   185  	return resourceAllocation, nil
   186  }
   187  
   188  func (pe PodEntries) Clone() PodEntries {
   189  	if pe == nil {
   190  		return nil
   191  	}
   192  
   193  	clone := make(PodEntries)
   194  	for podUID, containerEntries := range pe {
   195  		if containerEntries == nil {
   196  			continue
   197  		}
   198  
   199  		clone[podUID] = make(ContainerEntries)
   200  		for containerName, allocationInfo := range containerEntries {
   201  			clone[podUID][containerName] = allocationInfo.Clone()
   202  		}
   203  	}
   204  	return clone
   205  }
   206  
   207  // GetMainContainerAllocation returns AllocationInfo that belongs
   208  // the main container for this pod
   209  func (pe PodEntries) GetMainContainerAllocation(podUID string) (*AllocationInfo, bool) {
   210  	for _, allocationInfo := range pe[podUID] {
   211  		if allocationInfo.CheckMainContainer() {
   212  			return allocationInfo, true
   213  		}
   214  	}
   215  	return nil, false
   216  }
   217  
   218  func (pre PodResourceEntries) String() string {
   219  	if pre == nil {
   220  		return ""
   221  	}
   222  
   223  	contentBytes, err := json.Marshal(pre)
   224  	if err != nil {
   225  		klog.Errorf("[PodResourceEntries.String] marshal PodResourceEntries failed with error: %v", err)
   226  		return ""
   227  	}
   228  	return string(contentBytes)
   229  }
   230  
   231  func (pre PodResourceEntries) Clone() PodResourceEntries {
   232  	if pre == nil {
   233  		return nil
   234  	}
   235  
   236  	clone := make(PodResourceEntries)
   237  	for resourceName, podEntries := range pre {
   238  		clone[resourceName] = podEntries.Clone()
   239  	}
   240  	return clone
   241  }
   242  
   243  func (ns *NUMANodeState) String() string {
   244  	if ns == nil {
   245  		return ""
   246  	}
   247  
   248  	contentBytes, err := json.Marshal(ns)
   249  	if err != nil {
   250  		klog.Errorf("[NUMANodeState.String] marshal NUMANodeState failed with error: %v", err)
   251  		return ""
   252  	}
   253  	return string(contentBytes)
   254  }
   255  
   256  func (ns *NUMANodeState) Clone() *NUMANodeState {
   257  	if ns == nil {
   258  		return nil
   259  	}
   260  
   261  	return &NUMANodeState{
   262  		TotalMemSize:   ns.TotalMemSize,
   263  		SystemReserved: ns.SystemReserved,
   264  		Allocatable:    ns.Allocatable,
   265  		Allocated:      ns.Allocated,
   266  		Free:           ns.Free,
   267  		PodEntries:     ns.PodEntries.Clone(),
   268  	}
   269  }
   270  
   271  // HasNUMABindingPods returns true if any AllocationInfo in this NUMANodeState is for numa-binding
   272  func (ns *NUMANodeState) HasNUMABindingPods() bool {
   273  	if ns == nil {
   274  		return false
   275  	}
   276  
   277  	for _, containerEntries := range ns.PodEntries {
   278  		for _, allocationInfo := range containerEntries {
   279  			if allocationInfo != nil && allocationInfo.CheckNumaBinding() {
   280  				return true
   281  			}
   282  		}
   283  	}
   284  	return false
   285  }
   286  
   287  // SetAllocationInfo adds a new AllocationInfo (for pod/container pairs) into the given NUMANodeState
   288  func (ns *NUMANodeState) SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo) {
   289  	if ns == nil {
   290  		return
   291  	}
   292  
   293  	if ns.PodEntries == nil {
   294  		ns.PodEntries = make(PodEntries)
   295  	}
   296  
   297  	if _, ok := ns.PodEntries[podUID]; !ok {
   298  		ns.PodEntries[podUID] = make(ContainerEntries)
   299  	}
   300  
   301  	ns.PodEntries[podUID][containerName] = allocationInfo.Clone()
   302  }
   303  
   304  func (nm NUMANodeMap) Clone() NUMANodeMap {
   305  	clone := make(NUMANodeMap)
   306  	for node, ns := range nm {
   307  		clone[node] = ns.Clone()
   308  	}
   309  	return clone
   310  }
   311  
   312  // BytesPerNUMA is a helper function to parse memory capacity at per numa level
   313  func (nm NUMANodeMap) BytesPerNUMA() (uint64, error) {
   314  	if len(nm) == 0 {
   315  		return 0, fmt.Errorf("getBytesPerNUMAFromMachineState got nil numaMap")
   316  	}
   317  
   318  	var maxNUMAAllocatable uint64
   319  	for _, numaState := range nm {
   320  		if numaState != nil {
   321  			maxNUMAAllocatable = general.MaxUInt64(maxNUMAAllocatable, numaState.Allocatable)
   322  		}
   323  	}
   324  
   325  	if maxNUMAAllocatable > 0 {
   326  		return maxNUMAAllocatable, nil
   327  	}
   328  
   329  	return 0, fmt.Errorf("getBytesPerNUMAFromMachineState doesn't get valid numaState")
   330  }
   331  
   332  // GetNUMANodesWithoutNUMABindingPods returns a set of numa nodes; for
   333  // those numa nodes, they all don't contain numa-binding pods
   334  func (nm NUMANodeMap) GetNUMANodesWithoutNUMABindingPods() machine.CPUSet {
   335  	res := machine.NewCPUSet()
   336  	for numaId, numaNodeState := range nm {
   337  		if numaNodeState != nil && !numaNodeState.HasNUMABindingPods() {
   338  			res = res.Union(machine.NewCPUSet(numaId))
   339  		}
   340  	}
   341  	return res
   342  }
   343  
   344  func (nrm NUMANodeResourcesMap) String() string {
   345  	if nrm == nil {
   346  		return ""
   347  	}
   348  
   349  	contentBytes, err := json.Marshal(nrm)
   350  	if err != nil {
   351  		klog.Errorf("[NUMANodeResourcesMap.String] marshal NUMANodeResourcesMap failed with error: %v", err)
   352  		return ""
   353  	}
   354  	return string(contentBytes)
   355  }
   356  
   357  func (nrm NUMANodeResourcesMap) Clone() NUMANodeResourcesMap {
   358  	clone := make(NUMANodeResourcesMap)
   359  	for resourceName, nm := range nrm {
   360  		clone[resourceName] = nm.Clone()
   361  	}
   362  	return clone
   363  }
   364  
   365  // reader is used to get information from local states
   366  type reader interface {
   367  	GetMachineState() NUMANodeResourcesMap
   368  	GetPodResourceEntries() PodResourceEntries
   369  	GetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string) *AllocationInfo
   370  }
   371  
   372  // writer is used to store information into local states,
   373  // and it also provides functionality to maintain the local files
   374  type writer interface {
   375  	SetMachineState(numaNodeResourcesMap NUMANodeResourcesMap)
   376  	SetPodResourceEntries(podResourceEntries PodResourceEntries)
   377  	SetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string, allocationInfo *AllocationInfo)
   378  
   379  	Delete(resourceName v1.ResourceName, podUID, containerName string)
   380  	ClearState()
   381  }
   382  
   383  // ReadonlyState interface only provides methods for tracking pod assignments
   384  type ReadonlyState interface {
   385  	reader
   386  
   387  	GetMachineInfo() *info.MachineInfo
   388  	GetReservedMemory() map[v1.ResourceName]map[int]uint64
   389  }
   390  
   391  // State interface provides methods for tracking and setting pod assignments
   392  type State interface {
   393  	writer
   394  	ReadonlyState
   395  }