github.com/kubewharf/katalyst-core@v0.5.3/pkg/scheduler/cache/nodeinfo.go

github.com/kubewharf/katalyst-core@v0.5.3/pkg/scheduler/cache/nodeinfo.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cache
    18  
    19  import (
    20  	"sync"
    21  
    22  	v1 "k8s.io/api/core/v1"
    23  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    24  	"k8s.io/klog/v2"
    25  
    26  	apis "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
    27  	"github.com/kubewharf/katalyst-api/pkg/consts"
    28  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    29  )
    30  
    31  // PodInfo is pod level aggregated information.
    32  type PodInfo struct {
    33  	QoSResourcesRequested        *native.QoSResource
    34  	QoSResourcesNonZeroRequested *native.QoSResource
    35  }
    36  
    37  // NodeInfo is node level aggregated information.
    38  type NodeInfo struct {
    39  	// Mutex guards all fields within this NodeInfo struct.
    40  	Mutex sync.RWMutex
    41  
    42  	// Total requested qos resources of this node. This includes assumed
    43  	// pods, which scheduler has sent for binding, but may not be scheduled yet.
    44  	QoSResourcesRequested *native.QoSResource
    45  	// Total requested qos resources of this node with a minimum value
    46  	// applied to each container's CPU and memory requests. This does not reflect
    47  	// the actual resource requests for this node, but is used to avoid scheduling
    48  	// many zero-request pods onto one node.
    49  	QoSResourcesNonZeroRequested *native.QoSResource
    50  	// We store qos allocatedResources (which is CNR.Status.BestEffortResourceAllocatable.*) explicitly
    51  	// as int64, to avoid conversions and accessing map.
    52  	QoSResourcesAllocatable *native.QoSResource
    53  
    54  	// record PodInfo here since we may have the functionality to
    55  	// change pod resources.
    56  	Pods map[string]*PodInfo
    57  
    58  	// node TopologyPolicy and TopologyZones from CNR status.
    59  	// is total CNR data necessary in extendedCache ?
    60  	ResourceTopology *ResourceTopology
    61  
    62  	// record assumed pod resource util pod is watched in CNR updated events.
    63  	AssumedPodResources native.PodResource
    64  }
    65  
    66  // NewNodeInfo returns a ready to use empty NodeInfo object.
    67  // If any pods are given in arguments, their information will be aggregated in
    68  // the returned object.
    69  func NewNodeInfo() *NodeInfo {
    70  	ni := &NodeInfo{
    71  		QoSResourcesRequested:        &native.QoSResource{},
    72  		QoSResourcesNonZeroRequested: &native.QoSResource{},
    73  		QoSResourcesAllocatable:      &native.QoSResource{},
    74  		Pods:                         make(map[string]*PodInfo),
    75  		ResourceTopology:             new(ResourceTopology),
    76  		AssumedPodResources:          native.PodResource{},
    77  	}
    78  	return ni
    79  }
    80  
    81  // UpdateNodeInfo updates the NodeInfo.
    82  func (n *NodeInfo) UpdateNodeInfo(cnr *apis.CustomNodeResource) {
    83  	n.Mutex.Lock()
    84  	defer n.Mutex.Unlock()
    85  
    86  	n.updateReclaimed(cnr)
    87  
    88  	n.updateTopology(cnr)
    89  }
    90  
    91  func (n *NodeInfo) updateReclaimed(cnr *apis.CustomNodeResource) {
    92  	if cnr.Status.Resources.Allocatable != nil {
    93  		beResourceList := *cnr.Status.Resources.Allocatable
    94  		if reclaimedMilliCPU, ok := beResourceList[consts.ReclaimedResourceMilliCPU]; ok {
    95  			n.QoSResourcesAllocatable.ReclaimedMilliCPU = reclaimedMilliCPU.Value()
    96  		} else {
    97  			n.QoSResourcesAllocatable.ReclaimedMilliCPU = 0
    98  		}
    99  
   100  		if reclaimedMemory, ok := beResourceList[consts.ReclaimedResourceMemory]; ok {
   101  			n.QoSResourcesAllocatable.ReclaimedMemory = reclaimedMemory.Value()
   102  		} else {
   103  			n.QoSResourcesAllocatable.ReclaimedMemory = 0
   104  		}
   105  	}
   106  }
   107  
   108  func (n *NodeInfo) updateTopology(cnr *apis.CustomNodeResource) {
   109  	for _, topologyZone := range cnr.Status.TopologyZone {
   110  		if topologyZone.Type != apis.TopologyTypeSocket {
   111  			continue
   112  		}
   113  		for _, child := range topologyZone.Children {
   114  			if child.Type != apis.TopologyTypeNuma {
   115  				continue
   116  			}
   117  
   118  			for _, alloc := range child.Allocations {
   119  				namespace, name, _, err := native.ParseNamespaceNameUIDKey(alloc.Consumer)
   120  				if err != nil {
   121  					klog.Errorf("unexpected CNR numa consumer: %v", err)
   122  					continue
   123  				}
   124  				// delete all pod from AssumedPodResource
   125  				n.AssumedPodResources.DeletePod(&v1.Pod{
   126  					ObjectMeta: metav1.ObjectMeta{
   127  						Name:      name,
   128  						Namespace: namespace,
   129  					},
   130  				})
   131  			}
   132  		}
   133  	}
   134  
   135  	n.ResourceTopology.Update(cnr)
   136  }
   137  
   138  // AddPod adds pod information to this NodeInfo.
   139  func (n *NodeInfo) AddPod(key string, pod *v1.Pod) {
   140  	// always try to clean previous pod, and then insert
   141  	n.RemovePod(key, pod)
   142  
   143  	res, non0CPU, non0Mem := native.CalculateQoSResource(pod)
   144  
   145  	n.Mutex.Lock()
   146  	defer n.Mutex.Unlock()
   147  
   148  	n.Pods[key] = &PodInfo{
   149  		QoSResourcesRequested: &res,
   150  		QoSResourcesNonZeroRequested: &native.QoSResource{
   151  			ReclaimedMilliCPU: non0CPU,
   152  			ReclaimedMemory:   non0Mem,
   153  		},
   154  	}
   155  
   156  	n.QoSResourcesRequested.ReclaimedMilliCPU += res.ReclaimedMilliCPU
   157  	n.QoSResourcesRequested.ReclaimedMemory += res.ReclaimedMemory
   158  
   159  	n.QoSResourcesNonZeroRequested.ReclaimedMilliCPU += non0CPU
   160  	n.QoSResourcesNonZeroRequested.ReclaimedMemory += non0Mem
   161  }
   162  
   163  // RemovePod subtracts pod information from this NodeInfo.
   164  func (n *NodeInfo) RemovePod(key string, pod *v1.Pod) {
   165  	n.Mutex.Lock()
   166  	defer n.Mutex.Unlock()
   167  
   168  	podInfo, ok := n.Pods[key]
   169  	if !ok {
   170  		return
   171  	}
   172  
   173  	n.QoSResourcesRequested.ReclaimedMilliCPU -= podInfo.QoSResourcesRequested.ReclaimedMilliCPU
   174  	n.QoSResourcesRequested.ReclaimedMemory -= podInfo.QoSResourcesRequested.ReclaimedMemory
   175  
   176  	n.QoSResourcesNonZeroRequested.ReclaimedMilliCPU -= podInfo.QoSResourcesNonZeroRequested.ReclaimedMilliCPU
   177  	n.QoSResourcesNonZeroRequested.ReclaimedMemory -= podInfo.QoSResourcesNonZeroRequested.ReclaimedMemory
   178  	delete(n.Pods, key)
   179  }
   180  
   181  func (n *NodeInfo) AddAssumedPod(pod *v1.Pod) {
   182  	n.Mutex.Lock()
   183  	defer n.Mutex.Unlock()
   184  	n.AssumedPodResources.AddPod(pod)
   185  }
   186  
   187  func (n *NodeInfo) DeleteAssumedPod(pod *v1.Pod) {
   188  	n.Mutex.Lock()
   189  	defer n.Mutex.Unlock()
   190  
   191  	n.AssumedPodResources.DeletePod(pod)
   192  }
   193  
   194  func (n *NodeInfo) GetResourceTopologyCopy(filterFn podFilter) *ResourceTopology {
   195  	n.Mutex.RLock()
   196  	defer n.Mutex.RUnlock()
   197  
   198  	if n.ResourceTopology == nil {
   199  		return nil
   200  	}
   201  
   202  	return n.ResourceTopology.WithPodReousrce(n.AssumedPodResources, filterFn)
   203  }