github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package state
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  
    23  	"k8s.io/apimachinery/pkg/util/sets"
    24  	"k8s.io/klog/v2"
    25  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    26  
    27  	"github.com/kubewharf/katalyst-api/pkg/consts"
    28  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor"
    29  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    30  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    31  )
    32  
    33  // to compatible with checkpoint checksum calculation,
    34  // we should make guarantees below in checkpoint properties assignment
    35  // 1. resource.Quantity use resource.MustParse("0") to initialize, not to use resource.Quantity{}
    36  // 2. CPUSet use NewCPUSet(...) to initialize, not to use CPUSet{}
    37  // 3. not use omitempty in map property and must make new map to do initialization
    38  
    39  type AllocationInfo struct {
    40  	PodUid                   string         `json:"pod_uid,omitempty"`
    41  	PodNamespace             string         `json:"pod_namespace,omitempty"`
    42  	PodName                  string         `json:"pod_name,omitempty"`
    43  	ContainerName            string         `json:"container_name,omitempty"`
    44  	ContainerType            string         `json:"container_type,omitempty"`
    45  	ContainerIndex           uint64         `json:"container_index,omitempty"`
    46  	RampUp                   bool           `json:"ramp_up,omitempty"`
    47  	OwnerPoolName            string         `json:"owner_pool_name,omitempty"`
    48  	PodRole                  string         `json:"pod_role,omitempty"`
    49  	PodType                  string         `json:"pod_type,omitempty"`
    50  	AllocationResult         machine.CPUSet `json:"allocation_result,omitempty"`
    51  	OriginalAllocationResult machine.CPUSet `json:"original_allocation_result,omitempty"`
    52  
    53  	// key by numa node id, value is assignment for the pod in corresponding NUMA node
    54  	TopologyAwareAssignments map[int]machine.CPUSet `json:"topology_aware_assignments"`
    55  	// key by numa node id, value is assignment for the pod in corresponding NUMA node
    56  	OriginalTopologyAwareAssignments map[int]machine.CPUSet `json:"original_topology_aware_assignments"`
    57  	// for ramp up calculation. notice we don't use time.Time type here to avid checksum corruption.
    58  	InitTimestamp string `json:"init_timestamp"`
    59  
    60  	Labels          map[string]string `json:"labels"`
    61  	Annotations     map[string]string `json:"annotations"`
    62  	QoSLevel        string            `json:"qosLevel"`
    63  	RequestQuantity float64           `json:"request_quantity,omitempty"`
    64  }
    65  
    66  type (
    67  	ContainerEntries map[string]*AllocationInfo  // Keyed by containerName.
    68  	PodEntries       map[string]ContainerEntries // Keyed by podUID.
    69  )
    70  
    71  type NUMANodeState struct {
    72  	// equals to allocatable cpuset subtracting original allocation result of dedicated_cores with NUMA binding
    73  	DefaultCPUSet machine.CPUSet `json:"default_cpuset,omitempty"`
    74  	// equals to original allocation result of dedicated_cores with NUMA binding
    75  	AllocatedCPUSet machine.CPUSet `json:"allocated_cpuset,omitempty"`
    76  
    77  	PodEntries PodEntries `json:"pod_entries"`
    78  }
    79  
    80  type NUMANodeMap map[int]*NUMANodeState // keyed by numa node id
    81  
    82  func (ai *AllocationInfo) Clone() *AllocationInfo {
    83  	if ai == nil {
    84  		return nil
    85  	}
    86  
    87  	clone := &AllocationInfo{
    88  		PodUid:                   ai.PodUid,
    89  		PodNamespace:             ai.PodNamespace,
    90  		PodName:                  ai.PodName,
    91  		ContainerName:            ai.ContainerName,
    92  		ContainerType:            ai.ContainerType,
    93  		ContainerIndex:           ai.ContainerIndex,
    94  		RampUp:                   ai.RampUp,
    95  		OwnerPoolName:            ai.OwnerPoolName,
    96  		PodRole:                  ai.PodRole,
    97  		PodType:                  ai.PodType,
    98  		AllocationResult:         ai.AllocationResult.Clone(),
    99  		OriginalAllocationResult: ai.OriginalAllocationResult.Clone(),
   100  		InitTimestamp:            ai.InitTimestamp,
   101  		QoSLevel:                 ai.QoSLevel,
   102  		Labels:                   general.DeepCopyMap(ai.Labels),
   103  		Annotations:              general.DeepCopyMap(ai.Annotations),
   104  		RequestQuantity:          ai.RequestQuantity,
   105  	}
   106  
   107  	if ai.TopologyAwareAssignments != nil {
   108  		clone.TopologyAwareAssignments = make(map[int]machine.CPUSet)
   109  
   110  		for node, cpus := range ai.TopologyAwareAssignments {
   111  			clone.TopologyAwareAssignments[node] = cpus.Clone()
   112  		}
   113  	}
   114  
   115  	if ai.OriginalTopologyAwareAssignments != nil {
   116  		clone.OriginalTopologyAwareAssignments = make(map[int]machine.CPUSet)
   117  
   118  		for node, cpus := range ai.OriginalTopologyAwareAssignments {
   119  			clone.OriginalTopologyAwareAssignments[node] = cpus.Clone()
   120  		}
   121  	}
   122  
   123  	return clone
   124  }
   125  
   126  func (ai *AllocationInfo) String() string {
   127  	if ai == nil {
   128  		return ""
   129  	}
   130  
   131  	contentBytes, err := json.Marshal(ai)
   132  	if err != nil {
   133  		klog.Errorf("[AllocationInfo.String] marshal AllocationInfo failed with error: %v", err)
   134  		return ""
   135  	}
   136  	return string(contentBytes)
   137  }
   138  
   139  // GetPoolName parses the owner pool name for AllocationInfo
   140  // if owner exists, just return; otherwise, parse from qos-level
   141  func (ai *AllocationInfo) GetPoolName() string {
   142  	if ai == nil {
   143  		return cpuadvisor.EmptyOwnerPoolName
   144  	}
   145  
   146  	if ownerPoolName := ai.GetOwnerPoolName(); ownerPoolName != cpuadvisor.EmptyOwnerPoolName {
   147  		return ownerPoolName
   148  	}
   149  	return ai.GetSpecifiedPoolName()
   150  }
   151  
   152  // GetOwnerPoolName parses the owner pool name for AllocationInfo
   153  func (ai *AllocationInfo) GetOwnerPoolName() string {
   154  	if ai == nil {
   155  		return cpuadvisor.EmptyOwnerPoolName
   156  	}
   157  	return ai.OwnerPoolName
   158  }
   159  
   160  // GetSpecifiedPoolName parses the owner pool name for AllocationInfo from qos-level
   161  func (ai *AllocationInfo) GetSpecifiedPoolName() string {
   162  	if ai == nil {
   163  		return cpuadvisor.EmptyOwnerPoolName
   164  	}
   165  
   166  	return GetSpecifiedPoolName(ai.QoSLevel, ai.Annotations[consts.PodAnnotationCPUEnhancementCPUSet])
   167  }
   168  
   169  // CheckMainContainer returns true if the AllocationInfo is for main container
   170  func (ai *AllocationInfo) CheckMainContainer() bool {
   171  	return ai.ContainerType == pluginapi.ContainerType_MAIN.String()
   172  }
   173  
   174  // CheckSideCar returns true if the AllocationInfo is for side-car container
   175  func (ai *AllocationInfo) CheckSideCar() bool {
   176  	return ai.ContainerType == pluginapi.ContainerType_SIDECAR.String()
   177  }
   178  
   179  // CheckDedicated returns true if the AllocationInfo is for pod with dedicated-qos
   180  func CheckDedicated(ai *AllocationInfo) bool {
   181  	return ai.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores
   182  }
   183  
   184  // CheckShared returns true if the AllocationInfo is for pod with shared-qos
   185  func CheckShared(ai *AllocationInfo) bool {
   186  	return ai.QoSLevel == consts.PodAnnotationQoSLevelSharedCores
   187  }
   188  
   189  // CheckReclaimed returns true if the AllocationInfo is for pod with reclaimed-qos
   190  func CheckReclaimed(ai *AllocationInfo) bool {
   191  	return ai.QoSLevel == consts.PodAnnotationQoSLevelReclaimedCores
   192  }
   193  
   194  // CheckNUMABinding returns true if the AllocationInfo is for pod with numa-binding enhancement
   195  func CheckNUMABinding(ai *AllocationInfo) bool {
   196  	return ai.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable
   197  }
   198  
   199  // CheckDedicatedNUMABinding returns true if the AllocationInfo is for pod with
   200  // dedicated-qos and numa-binding enhancement
   201  func CheckDedicatedNUMABinding(ai *AllocationInfo) bool {
   202  	return CheckDedicated(ai) && CheckNUMABinding(ai)
   203  }
   204  
   205  // CheckDedicatedPool returns true if the AllocationInfo is for a container in the dedicated pool
   206  func CheckDedicatedPool(ai *AllocationInfo) bool {
   207  	return ai.OwnerPoolName == PoolNameDedicated
   208  }
   209  
   210  // IsPoolEntry returns true if this entry is for a pool;
   211  // otherwise, this entry is for a container entity.
   212  func (ce ContainerEntries) IsPoolEntry() bool {
   213  	return len(ce) == 1 && ce[cpuadvisor.FakedContainerName] != nil
   214  }
   215  
   216  func (ce ContainerEntries) GetPoolEntry() *AllocationInfo {
   217  	if !ce.IsPoolEntry() {
   218  		return nil
   219  	}
   220  	return ce[cpuadvisor.FakedContainerName]
   221  }
   222  
   223  // GetMainContainerEntry returns the main container entry in pod container entries
   224  func (ce ContainerEntries) GetMainContainerEntry() *AllocationInfo {
   225  	var mainContainerEntry *AllocationInfo
   226  
   227  	for _, siblingEntry := range ce {
   228  		if siblingEntry != nil && siblingEntry.CheckMainContainer() {
   229  			mainContainerEntry = siblingEntry
   230  			break
   231  		}
   232  	}
   233  
   234  	return mainContainerEntry
   235  }
   236  
   237  // GetMainContainerPoolName returns the main container owner pool name in pod container entries
   238  func (ce ContainerEntries) GetMainContainerPoolName() string {
   239  	return ce.GetMainContainerEntry().GetOwnerPoolName()
   240  }
   241  
   242  func (pe PodEntries) Clone() PodEntries {
   243  	if pe == nil {
   244  		return nil
   245  	}
   246  
   247  	clone := make(PodEntries)
   248  	for podUID, containerEntries := range pe {
   249  		if containerEntries == nil {
   250  			continue
   251  		}
   252  
   253  		clone[podUID] = make(ContainerEntries)
   254  		for containerName, allocationInfo := range containerEntries {
   255  			clone[podUID][containerName] = allocationInfo.Clone()
   256  		}
   257  	}
   258  	return clone
   259  }
   260  
   261  func (pe PodEntries) String() string {
   262  	if pe == nil {
   263  		return ""
   264  	}
   265  
   266  	contentBytes, err := json.Marshal(pe)
   267  	if err != nil {
   268  		klog.Errorf("[PodEntries.String] marshal PodEntries failed with error: %v", err)
   269  		return ""
   270  	}
   271  	return string(contentBytes)
   272  }
   273  
   274  // CheckPoolEmpty returns true if the given pool doesn't exist
   275  func (pe PodEntries) CheckPoolEmpty(poolName string) bool {
   276  	return pe[poolName][cpuadvisor.FakedContainerName] == nil ||
   277  		pe[poolName][cpuadvisor.FakedContainerName].AllocationResult.IsEmpty()
   278  }
   279  
   280  // GetCPUSetForPool returns cpuset that belongs to the given pool
   281  func (pe PodEntries) GetCPUSetForPool(poolName string) (machine.CPUSet, error) {
   282  	if pe == nil {
   283  		return machine.NewCPUSet(), fmt.Errorf("GetCPUSetForPool from nil podEntries")
   284  	}
   285  
   286  	if !pe[poolName].IsPoolEntry() {
   287  		return machine.NewCPUSet(), fmt.Errorf("pool not found")
   288  	}
   289  	return pe[poolName][cpuadvisor.FakedContainerName].AllocationResult.Clone(), nil
   290  }
   291  
   292  // GetFilteredPoolsCPUSet returns a mapping of pools for all of them (except for those skipped ones)
   293  func (pe PodEntries) GetFilteredPoolsCPUSet(ignorePools sets.String) machine.CPUSet {
   294  	ret := machine.NewCPUSet()
   295  	if pe == nil {
   296  		return ret
   297  	}
   298  
   299  	for poolName, entries := range pe {
   300  		allocationInfo := entries.GetPoolEntry()
   301  		if allocationInfo != nil && !ignorePools.Has(poolName) {
   302  			ret = ret.Union(allocationInfo.AllocationResult.Clone())
   303  		}
   304  	}
   305  	return ret
   306  }
   307  
   308  // GetFilteredPoolsCPUSetMap returns a mapping of pools for all of them (except for those skipped ones)
   309  func (pe PodEntries) GetFilteredPoolsCPUSetMap(ignorePools sets.String) map[string]machine.CPUSet {
   310  	ret := make(map[string]machine.CPUSet)
   311  	if pe == nil {
   312  		return ret
   313  	}
   314  
   315  	for poolName, entries := range pe {
   316  		allocationInfo := entries.GetPoolEntry()
   317  		if allocationInfo != nil && !ignorePools.Has(poolName) {
   318  			ret[poolName] = allocationInfo.AllocationResult.Clone()
   319  		}
   320  	}
   321  	return ret
   322  }
   323  
   324  // GetFilteredPodEntries filter out PodEntries according to the given filter logic
   325  func (pe PodEntries) GetFilteredPodEntries(filter func(ai *AllocationInfo) bool) PodEntries {
   326  	numaBindingEntries := make(PodEntries)
   327  	for podUID, containerEntries := range pe {
   328  		if containerEntries.IsPoolEntry() {
   329  			continue
   330  		}
   331  
   332  		for containerName, allocationInfo := range containerEntries {
   333  			if allocationInfo != nil && filter(allocationInfo) {
   334  				if numaBindingEntries[podUID] == nil {
   335  					numaBindingEntries[podUID] = make(ContainerEntries)
   336  				}
   337  				numaBindingEntries[podUID][containerName] = allocationInfo.Clone()
   338  			}
   339  		}
   340  	}
   341  	return numaBindingEntries
   342  }
   343  
   344  func (ns *NUMANodeState) Clone() *NUMANodeState {
   345  	if ns == nil {
   346  		return nil
   347  	}
   348  	return &NUMANodeState{
   349  		DefaultCPUSet:   ns.DefaultCPUSet.Clone(),
   350  		AllocatedCPUSet: ns.AllocatedCPUSet.Clone(),
   351  		PodEntries:      ns.PodEntries.Clone(),
   352  	}
   353  }
   354  
   355  // GetAvailableCPUSet returns available cpuset in this numa
   356  func (ns *NUMANodeState) GetAvailableCPUSet(reservedCPUs machine.CPUSet) machine.CPUSet {
   357  	if ns == nil {
   358  		return machine.NewCPUSet()
   359  	}
   360  	return ns.DefaultCPUSet.Difference(reservedCPUs)
   361  }
   362  
   363  // GetFilteredDefaultCPUSet returns default cpuset in this numa, along with the filter functions
   364  func (ns *NUMANodeState) GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA func(ai *AllocationInfo) bool) machine.CPUSet {
   365  	if ns == nil {
   366  		return machine.NewCPUSet()
   367  	}
   368  
   369  	res := ns.DefaultCPUSet.Clone()
   370  	res = res.Union(ns.AllocatedCPUSet)
   371  	for _, containerEntries := range ns.PodEntries {
   372  		for _, allocationInfo := range containerEntries {
   373  			if excludeWholeNUMA != nil && excludeWholeNUMA(allocationInfo) {
   374  				return machine.NewCPUSet()
   375  			} else if excludeEntry != nil && excludeEntry(allocationInfo) {
   376  				res = res.Difference(allocationInfo.AllocationResult)
   377  			}
   378  		}
   379  	}
   380  	return res
   381  }
   382  
   383  // ExistMatchedAllocationInfo returns true if the stated predicate holds true for some pods of this numa else it returns false.
   384  func (ns *NUMANodeState) ExistMatchedAllocationInfo(f func(ai *AllocationInfo) bool) bool {
   385  	for _, containerEntries := range ns.PodEntries {
   386  		for _, allocationInfo := range containerEntries {
   387  			if f(allocationInfo) {
   388  				return true
   389  			}
   390  		}
   391  	}
   392  
   393  	return false
   394  }
   395  
   396  func (ns *NUMANodeState) SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo) {
   397  	if ns == nil {
   398  		return
   399  	}
   400  
   401  	if ns.PodEntries == nil {
   402  		ns.PodEntries = make(PodEntries)
   403  	}
   404  	if _, ok := ns.PodEntries[podUID]; !ok {
   405  		ns.PodEntries[podUID] = make(ContainerEntries)
   406  	}
   407  
   408  	ns.PodEntries[podUID][containerName] = allocationInfo.Clone()
   409  }
   410  
   411  // GetDefaultCPUSet returns default cpuset in this node
   412  func (nm NUMANodeMap) GetDefaultCPUSet() machine.CPUSet {
   413  	res := machine.NewCPUSet()
   414  	for _, numaNodeState := range nm {
   415  		res = res.Union(numaNodeState.DefaultCPUSet)
   416  	}
   417  	return res
   418  }
   419  
   420  // GetAvailableCPUSet returns available cpuset in this node
   421  func (nm NUMANodeMap) GetAvailableCPUSet(reservedCPUs machine.CPUSet) machine.CPUSet {
   422  	return nm.GetDefaultCPUSet().Difference(reservedCPUs)
   423  }
   424  
   425  // GetFilteredDefaultCPUSet returns default cpuset in this node, along with the filter functions
   426  func (nm NUMANodeMap) GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA func(ai *AllocationInfo) bool) machine.CPUSet {
   427  	res := machine.NewCPUSet()
   428  	for _, numaNodeState := range nm {
   429  		res = res.Union(numaNodeState.GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA))
   430  	}
   431  	return res
   432  }
   433  
   434  // GetFilteredAvailableCPUSet returns available cpuset in this node, along with the filter functions
   435  func (nm NUMANodeMap) GetFilteredAvailableCPUSet(reservedCPUs machine.CPUSet,
   436  	excludeEntry, excludeWholeNUMA func(ai *AllocationInfo) bool,
   437  ) machine.CPUSet {
   438  	return nm.GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA).Difference(reservedCPUs)
   439  }
   440  
   441  // GetFilteredNUMASet return numa set except the numa which are excluded by the predicate.
   442  func (nm NUMANodeMap) GetFilteredNUMASet(excludeNUMAPredicate func(ai *AllocationInfo) bool) machine.CPUSet {
   443  	res := machine.NewCPUSet()
   444  	for numaID, numaNodeState := range nm {
   445  		if numaNodeState.ExistMatchedAllocationInfo(excludeNUMAPredicate) {
   446  			continue
   447  		}
   448  		res.Add(numaID)
   449  	}
   450  	return res
   451  }
   452  
   453  func (nm NUMANodeMap) Clone() NUMANodeMap {
   454  	if nm == nil {
   455  		return nil
   456  	}
   457  
   458  	clone := make(NUMANodeMap)
   459  	for node, ns := range nm {
   460  		clone[node] = ns.Clone()
   461  	}
   462  	return clone
   463  }
   464  
   465  func (nm NUMANodeMap) String() string {
   466  	if nm == nil {
   467  		return ""
   468  	}
   469  
   470  	contentBytes, err := json.Marshal(nm)
   471  	if err != nil {
   472  		klog.Errorf("[NUMANodeMap.String] marshal NUMANodeMap failed with error: %v", err)
   473  		return ""
   474  	}
   475  	return string(contentBytes)
   476  }
   477  
   478  // reader is used to get information from local states
   479  type reader interface {
   480  	GetMachineState() NUMANodeMap
   481  	GetPodEntries() PodEntries
   482  	GetAllocationInfo(podUID string, containerName string) *AllocationInfo
   483  }
   484  
   485  // writer is used to store information into local states,
   486  // and it also provides functionality to maintain the local files
   487  type writer interface {
   488  	SetMachineState(numaNodeMap NUMANodeMap)
   489  	SetPodEntries(podEntries PodEntries)
   490  	SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo)
   491  
   492  	Delete(podUID string, containerName string)
   493  	ClearState()
   494  }
   495  
   496  // State interface provides methods for tracking and setting pod assignments
   497  type State interface {
   498  	reader
   499  	writer
   500  }
   501  
   502  // ReadonlyState interface only provides methods for tracking pod assignments
   503  type ReadonlyState interface {
   504  	reader
   505  }