github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/util.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package state
    18  
    19  import (
    20  	"fmt"
    21  
    22  	info "github.com/google/cadvisor/info/v1"
    23  	v1 "k8s.io/api/core/v1"
    24  	"k8s.io/klog/v2"
    25  
    26  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    27  )
    28  
    29  // GenerateMachineState returns NUMANodeResourcesMap based on
    30  // machine info and reserved resources
    31  func GenerateMachineState(machineInfo *info.MachineInfo, reserved map[v1.ResourceName]map[int]uint64) (NUMANodeResourcesMap, error) {
    32  	if machineInfo == nil {
    33  		return nil, fmt.Errorf("GenerateMachineState got nil machineInfo")
    34  	}
    35  
    36  	// todo: currently only support memory, we will support huge page later.
    37  	defaultResourcesMachineState := make(NUMANodeResourcesMap)
    38  	for _, resourceName := range []v1.ResourceName{v1.ResourceMemory} {
    39  		machineState, err := GenerateResourceState(machineInfo, reserved, resourceName)
    40  		if err != nil {
    41  			return nil, fmt.Errorf("GenerateResourceState for resource: %s failed with error: %v", resourceName, err)
    42  		}
    43  
    44  		defaultResourcesMachineState[resourceName] = machineState
    45  	}
    46  	return defaultResourcesMachineState, nil
    47  }
    48  
    49  // GenerateResourceState returns NUMANodeMap for given resource based on
    50  // machine info and reserved resources
    51  func GenerateResourceState(machineInfo *info.MachineInfo, reserved map[v1.ResourceName]map[int]uint64, resourceName v1.ResourceName) (NUMANodeMap, error) {
    52  	defaultMachineState := make(NUMANodeMap)
    53  
    54  	switch resourceName {
    55  	case v1.ResourceMemory:
    56  		for _, node := range machineInfo.Topology {
    57  			totalMemSizeQuantity := node.Memory
    58  			numaReservedMemQuantity := reserved[resourceName][node.Id]
    59  
    60  			if totalMemSizeQuantity < numaReservedMemQuantity {
    61  				return nil, fmt.Errorf("invalid reserved memory: %d in NUMA: %d with total memory size: %d", numaReservedMemQuantity, node.Id, totalMemSizeQuantity)
    62  			}
    63  
    64  			allocatableQuantity := totalMemSizeQuantity - numaReservedMemQuantity
    65  			freeQuantity := allocatableQuantity
    66  
    67  			defaultMachineState[node.Id] = &NUMANodeState{
    68  				TotalMemSize:   totalMemSizeQuantity,
    69  				SystemReserved: numaReservedMemQuantity,
    70  				Allocatable:    allocatableQuantity,
    71  				Allocated:      0,
    72  				Free:           freeQuantity,
    73  				PodEntries:     make(PodEntries),
    74  			}
    75  		}
    76  	default:
    77  		return nil, fmt.Errorf("unsupported resource name: %s", resourceName)
    78  	}
    79  
    80  	return defaultMachineState, nil
    81  }
    82  
    83  // GenerateMachineStateFromPodEntries returns NUMANodeResourcesMap based on
    84  // machine info and reserved resources (along with existed pod entries)
    85  func GenerateMachineStateFromPodEntries(machineInfo *info.MachineInfo,
    86  	podResourceEntries PodResourceEntries, reserved map[v1.ResourceName]map[int]uint64,
    87  ) (NUMANodeResourcesMap, error) {
    88  	if machineInfo == nil {
    89  		return nil, fmt.Errorf("GenerateMachineStateFromPodEntries got nil machineInfo")
    90  	}
    91  
    92  	// todo: currently only support memory, we will support huge page later.
    93  	defaultResourcesMachineState := make(NUMANodeResourcesMap)
    94  	for _, resourceName := range []v1.ResourceName{v1.ResourceMemory} {
    95  		machineState, err := GenerateResourceStateFromPodEntries(machineInfo, podResourceEntries[resourceName], reserved, resourceName)
    96  		if err != nil {
    97  			return nil, fmt.Errorf("GenerateResourceState for resource: %s failed with error: %v", resourceName, err)
    98  		}
    99  
   100  		defaultResourcesMachineState[resourceName] = machineState
   101  	}
   102  	return defaultResourcesMachineState, nil
   103  }
   104  
   105  // GenerateResourceStateFromPodEntries returns NUMANodeMap for given resource based on
   106  // machine info and reserved resources along with existed pod entries
   107  func GenerateResourceStateFromPodEntries(machineInfo *info.MachineInfo,
   108  	podEntries PodEntries, reserved map[v1.ResourceName]map[int]uint64, resourceName v1.ResourceName,
   109  ) (NUMANodeMap, error) {
   110  	switch resourceName {
   111  	case v1.ResourceMemory:
   112  		return GenerateMemoryStateFromPodEntries(machineInfo, podEntries, reserved)
   113  	default:
   114  		return nil, fmt.Errorf("unsupported resource name: %s", resourceName)
   115  	}
   116  }
   117  
   118  // GenerateMemoryStateFromPodEntries returns NUMANodeMap for memory based on
   119  // machine info and reserved resources along with existed pod entries
   120  func GenerateMemoryStateFromPodEntries(machineInfo *info.MachineInfo,
   121  	podEntries PodEntries, reserved map[v1.ResourceName]map[int]uint64,
   122  ) (NUMANodeMap, error) {
   123  	machineState, err := GenerateResourceState(machineInfo, reserved, v1.ResourceMemory)
   124  	if err != nil {
   125  		return nil, fmt.Errorf("GenerateResourceState failed with error: %v", err)
   126  	}
   127  
   128  	for numaId, numaNodeState := range machineState {
   129  		var allocatedMemQuantityInNumaNode uint64 = 0
   130  
   131  		for podUID, containerEntries := range podEntries {
   132  			for containerName, allocationInfo := range containerEntries {
   133  				if containerName != "" && allocationInfo != nil {
   134  					curContainerAllocatedQuantityInNumaNode := allocationInfo.TopologyAwareAllocations[numaId]
   135  					if curContainerAllocatedQuantityInNumaNode == 0 &&
   136  						allocationInfo.NumaAllocationResult.Intersection(machine.NewCPUSet(numaId)).IsEmpty() {
   137  						continue
   138  					}
   139  
   140  					allocatedMemQuantityInNumaNode += curContainerAllocatedQuantityInNumaNode
   141  					numaNodeAllocationInfo := allocationInfo.Clone()
   142  					numaNodeAllocationInfo.NumaAllocationResult = machine.NewCPUSet(numaId)
   143  
   144  					if curContainerAllocatedQuantityInNumaNode != 0 {
   145  						numaNodeAllocationInfo.AggregatedQuantity = curContainerAllocatedQuantityInNumaNode
   146  						numaNodeAllocationInfo.TopologyAwareAllocations = map[int]uint64{
   147  							numaId: curContainerAllocatedQuantityInNumaNode,
   148  						}
   149  					}
   150  					numaNodeState.SetAllocationInfo(podUID, containerName, numaNodeAllocationInfo)
   151  				}
   152  			}
   153  		}
   154  
   155  		numaNodeState.Allocated = allocatedMemQuantityInNumaNode
   156  		if numaNodeState.Allocatable < numaNodeState.Allocated {
   157  			klog.Warningf("[GenerateMemoryStateFromPodEntries] invalid allocated memory: %d in NUMA: %d"+
   158  				" with allocatable memory size: %d, total memory size: %d, reserved memory size: %d",
   159  				numaNodeState.Allocated, numaId, numaNodeState.Allocatable, numaNodeState.TotalMemSize, numaNodeState.SystemReserved)
   160  			numaNodeState.Allocatable = numaNodeState.Allocated
   161  		}
   162  		numaNodeState.Free = numaNodeState.Allocatable - numaNodeState.Allocated
   163  
   164  		machineState[numaId] = numaNodeState
   165  	}
   166  
   167  	return machineState, nil
   168  }