github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package state
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  	"strings"
    23  	"sync"
    24  
    25  	"k8s.io/apimachinery/pkg/util/sets"
    26  	"k8s.io/klog/v2"
    27  
    28  	apiconsts "github.com/kubewharf/katalyst-api/pkg/consts"
    29  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts"
    30  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor"
    31  	advisorapi "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor"
    32  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    33  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    34  )
    35  
    36  // notice that pool-name may not have direct mapping relations with qos-level, for instance
    37  // - both isolated_shared_cores and dedicated_cores fall into PoolNameDedicated
    38  const (
    39  	PoolNameShare           = "share"
    40  	PoolNameReclaim         = "reclaim"
    41  	PoolNameDedicated       = "dedicated"
    42  	PoolNameReserve         = "reserve"
    43  	PoolNamePrefixIsolation = "isolation"
    44  
    45  	// PoolNameFallback is not a real pool, and is a union of
    46  	// all none-reclaimed pools to put pod should have been isolated
    47  	PoolNameFallback = "fallback"
    48  )
    49  
    50  var (
    51  	// StaticPools are generated by cpu plugin statically,
    52  	// and they will be ignored when reading cpu advisor list and watch response.
    53  	StaticPools = sets.NewString(
    54  		PoolNameReserve,
    55  	)
    56  
    57  	// ResidentPools are guaranteed existing in state,
    58  	// and they are usually used to ensure stability.
    59  	ResidentPools = sets.NewString(
    60  		PoolNameReclaim,
    61  	).Union(StaticPools)
    62  )
    63  
    64  var (
    65  	containerRequestedCoresLock sync.RWMutex
    66  	containerRequestedCores     func(allocationInfo *AllocationInfo) float64
    67  )
    68  
    69  func GetContainerRequestedCores() func(allocationInfo *AllocationInfo) float64 {
    70  	containerRequestedCoresLock.RLock()
    71  	defer containerRequestedCoresLock.RUnlock()
    72  	return containerRequestedCores
    73  }
    74  
    75  func SetContainerRequestedCores(f func(allocationInfo *AllocationInfo) float64) {
    76  	containerRequestedCoresLock.Lock()
    77  	defer containerRequestedCoresLock.Unlock()
    78  	containerRequestedCores = f
    79  }
    80  
    81  // GetIsolatedQuantityMapFromPodEntries returns a map to indicates isolation info,
    82  // and the map is formatted as pod -> container -> isolated-quantity
    83  func GetIsolatedQuantityMapFromPodEntries(podEntries PodEntries, ignoreAllocationInfos []*AllocationInfo) map[string]map[string]int {
    84  	ret := make(map[string]map[string]int)
    85  	for podUID, entries := range podEntries {
    86  		if entries.IsPoolEntry() {
    87  			continue
    88  		}
    89  
    90  	containerLoop:
    91  		for containerName, allocationInfo := range entries {
    92  			// only filter dedicated_cores without numa_binding
    93  			if allocationInfo == nil || CheckDedicatedNUMABinding(allocationInfo) || !CheckDedicated(allocationInfo) {
    94  				continue
    95  			}
    96  
    97  			for _, ignoreAllocationInfo := range ignoreAllocationInfos {
    98  				if allocationInfo.PodUid == ignoreAllocationInfo.PodUid && allocationInfo.ContainerName == ignoreAllocationInfo.ContainerName {
    99  					continue containerLoop
   100  				}
   101  			}
   102  
   103  			// if there is no more cores to allocate, we will put dedicated_cores without numa_binding
   104  			// to pool rather than isolation. calling this function means we will start to adjust allocation,
   105  			// and we will try to isolate those containers, so we will treat them as containers to be isolated.
   106  			var quantity int
   107  			if allocationInfo.OwnerPoolName != PoolNameDedicated {
   108  				quantity = int(math.Ceil(GetContainerRequestedCores()(allocationInfo)))
   109  			} else {
   110  				quantity = allocationInfo.AllocationResult.Size()
   111  			}
   112  			if quantity == 0 {
   113  				klog.Warningf("[GetIsolatedQuantityMapFromPodEntries] isolated pod: %s/%s container: %s get zero quantity",
   114  					allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName)
   115  				continue
   116  			}
   117  
   118  			if ret[podUID] == nil {
   119  				ret[podUID] = make(map[string]int)
   120  			}
   121  			ret[podUID][containerName] = quantity
   122  		}
   123  	}
   124  	return ret
   125  }
   126  
   127  // GetSharedQuantityMapFromPodEntries returns a map to indicates quantity info for each shared pool,
   128  // and the map is formatted as pool -> quantity
   129  func GetSharedQuantityMapFromPodEntries(podEntries PodEntries, ignoreAllocationInfos []*AllocationInfo) map[string]int {
   130  	ret := make(map[string]int)
   131  	preciseQuantityMap := make(map[string]float64)
   132  	for _, entries := range podEntries {
   133  		if entries.IsPoolEntry() {
   134  			continue
   135  		}
   136  
   137  	containerLoop:
   138  		for _, allocationInfo := range entries {
   139  			// only count shared_cores not isolated.
   140  			// if there is no more cores to allocate, we will put dedicated_cores without numa_binding to pool rather than isolation.
   141  			// calling this function means we will start to adjust allocation, and we will try to isolate those containers,
   142  			// so we will treat them as containers to be isolated.
   143  			if allocationInfo == nil || !CheckShared(allocationInfo) {
   144  				continue
   145  			}
   146  
   147  			for _, ignoreAllocationInfo := range ignoreAllocationInfos {
   148  				if allocationInfo.PodUid == ignoreAllocationInfo.PodUid && allocationInfo.ContainerName == ignoreAllocationInfo.ContainerName {
   149  					continue containerLoop
   150  				}
   151  			}
   152  
   153  			if poolName := allocationInfo.GetOwnerPoolName(); poolName != advisorapi.EmptyOwnerPoolName {
   154  				preciseQuantityMap[poolName] += GetContainerRequestedCores()(allocationInfo)
   155  			}
   156  		}
   157  	}
   158  
   159  	for poolName, preciseQuantity := range preciseQuantityMap {
   160  		ret[poolName] = int(math.Ceil(preciseQuantity))
   161  		general.Infof("ceil pool: %s precise quantity: %.3f to %d",
   162  			poolName, preciseQuantity, ret[poolName])
   163  	}
   164  	return ret
   165  }
   166  
   167  // GenerateMachineStateFromPodEntries returns NUMANodeMap for given resource based on
   168  // machine info and reserved resources along with existed pod entries
   169  func GenerateMachineStateFromPodEntries(topology *machine.CPUTopology, podEntries PodEntries, policyName string) (NUMANodeMap, error) {
   170  	if topology == nil {
   171  		return nil, fmt.Errorf("GenerateMachineStateFromPodEntries got nil topology")
   172  	}
   173  
   174  	machineState := make(NUMANodeMap)
   175  	for _, numaNode := range topology.CPUDetails.NUMANodes().ToSliceInt64() {
   176  		numaNodeState := &NUMANodeState{}
   177  		numaNodeAllCPUs := topology.CPUDetails.CPUsInNUMANodes(int(numaNode)).Clone()
   178  		allocatedCPUsInNumaNode := machine.NewCPUSet()
   179  
   180  		for podUID, containerEntries := range podEntries {
   181  			for containerName, allocationInfo := range containerEntries {
   182  				if containerName != advisorapi.FakedContainerName && allocationInfo != nil {
   183  
   184  					// the container hasn't cpuset assignment in the current NUMA node
   185  					if allocationInfo.OriginalTopologyAwareAssignments[int(numaNode)].Size() == 0 &&
   186  						allocationInfo.TopologyAwareAssignments[int(numaNode)].Size() == 0 {
   187  						continue
   188  					}
   189  
   190  					switch policyName {
   191  					case consts.CPUResourcePluginPolicyNameDynamic:
   192  						// only modify allocated and default properties in NUMA node state if the policy is dynamic and the QoS class is dedicated_cores with NUMA binding
   193  						if CheckDedicatedNUMABinding(allocationInfo) {
   194  							allocatedCPUsInNumaNode = allocatedCPUsInNumaNode.Union(allocationInfo.OriginalTopologyAwareAssignments[int(numaNode)])
   195  						}
   196  					case consts.CPUResourcePluginPolicyNameNative:
   197  						// only modify allocated and default properties in NUMA node state if the policy is native and the QoS class is Guaranteed
   198  						if CheckDedicatedPool(allocationInfo) {
   199  							allocatedCPUsInNumaNode = allocatedCPUsInNumaNode.Union(allocationInfo.OriginalTopologyAwareAssignments[int(numaNode)])
   200  						}
   201  					}
   202  
   203  					topologyAwareAssignments, _ := machine.GetNumaAwareAssignments(topology, allocationInfo.AllocationResult.Intersection(numaNodeAllCPUs))
   204  					originalTopologyAwareAssignments, _ := machine.GetNumaAwareAssignments(topology, allocationInfo.OriginalAllocationResult.Intersection(numaNodeAllCPUs))
   205  
   206  					numaNodeAllocationInfo := allocationInfo.Clone()
   207  					numaNodeAllocationInfo.AllocationResult = allocationInfo.AllocationResult.Intersection(numaNodeAllCPUs)
   208  					numaNodeAllocationInfo.OriginalAllocationResult = allocationInfo.OriginalAllocationResult.Intersection(numaNodeAllCPUs)
   209  					numaNodeAllocationInfo.TopologyAwareAssignments = topologyAwareAssignments
   210  					numaNodeAllocationInfo.OriginalTopologyAwareAssignments = originalTopologyAwareAssignments
   211  
   212  					numaNodeState.SetAllocationInfo(podUID, containerName, numaNodeAllocationInfo)
   213  				}
   214  			}
   215  		}
   216  
   217  		numaNodeState.AllocatedCPUSet = allocatedCPUsInNumaNode.Clone()
   218  		numaNodeState.DefaultCPUSet = numaNodeAllCPUs.Difference(numaNodeState.AllocatedCPUSet)
   219  		machineState[int(numaNode)] = numaNodeState
   220  	}
   221  	return machineState, nil
   222  }
   223  
   224  func IsIsolationPool(poolName string) bool {
   225  	return strings.HasPrefix(poolName, PoolNamePrefixIsolation)
   226  }
   227  
   228  func GetPoolType(poolName string) string {
   229  	if IsIsolationPool(poolName) {
   230  		return PoolNamePrefixIsolation
   231  	}
   232  	switch poolName {
   233  	case PoolNameReclaim, PoolNameDedicated, PoolNameReserve, PoolNameFallback:
   234  		return poolName
   235  	default:
   236  		return PoolNameShare
   237  	}
   238  }
   239  
   240  // GetSpecifiedPoolName todo: this function (along with pool-name consts) should be moved to generic qos conf
   241  func GetSpecifiedPoolName(qosLevel, cpusetEnhancementValue string) string {
   242  	switch qosLevel {
   243  	case apiconsts.PodAnnotationQoSLevelSharedCores:
   244  		if cpusetEnhancementValue != cpuadvisor.EmptyOwnerPoolName {
   245  			return cpusetEnhancementValue
   246  		}
   247  		return PoolNameShare
   248  	case apiconsts.PodAnnotationQoSLevelReclaimedCores:
   249  		return PoolNameReclaim
   250  	case apiconsts.PodAnnotationQoSLevelDedicatedCores:
   251  		return PoolNameDedicated
   252  	default:
   253  		return cpuadvisor.EmptyOwnerPoolName
   254  	}
   255  }