github.com/kubewharf/katalyst-core@v0.5.3/pkg/scheduler/plugins/nodeovercommitment/cache/cache.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cache 18 19 import ( 20 "fmt" 21 "sync" 22 23 v1 "k8s.io/api/core/v1" 24 "k8s.io/klog/v2" 25 "k8s.io/kubernetes/pkg/features" 26 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" 27 "k8s.io/kubernetes/pkg/scheduler/framework" 28 29 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" 30 "github.com/kubewharf/katalyst-api/pkg/consts" 31 "github.com/kubewharf/katalyst-core/pkg/util/native" 32 ) 33 34 var cache *overcommitCache 35 36 func init() { 37 cache = &overcommitCache{ 38 nodeCaches: map[string]*NodeCache{}, 39 } 40 } 41 42 // cache stored node native topology providers and guaranteed requested resource. 43 // only used in overcommit scenario when kubelet uses native topology strategy. 44 type overcommitCache struct { 45 sync.RWMutex 46 nodeCaches map[string]*NodeCache 47 } 48 49 func GetCache() *overcommitCache { 50 return cache 51 } 52 53 func (c *overcommitCache) GetNode(name string) (*NodeCache, error) { 54 c.RLock() 55 defer c.RUnlock() 56 57 node, ok := c.nodeCaches[name] 58 if !ok { 59 return nil, fmt.Errorf("node %v not found", name) 60 } 61 62 return node, nil 63 } 64 65 func (c *overcommitCache) AddPod(pod *v1.Pod) error { 66 key, err := framework.GetPodKey(pod) 67 if err != nil { 68 return err 69 } 70 71 c.Lock() 72 defer c.Unlock() 73 74 n, ok := c.nodeCaches[pod.Spec.NodeName] 75 if !ok { 76 n = New() 77 c.nodeCaches[pod.Spec.NodeName] = n 78 } 79 n.AddPod(key, pod) 80 81 return nil 82 } 83 84 func (c *overcommitCache) RemovePod(pod *v1.Pod) error { 85 key, err := framework.GetPodKey(pod) 86 if err != nil { 87 return err 88 } 89 90 c.Lock() 91 defer c.Unlock() 92 93 n, ok := c.nodeCaches[pod.Spec.NodeName] 94 if !ok { 95 klog.ErrorS(nil, "Node not found when trying to remove pod", "node", klog.KRef("", pod.Spec.NodeName), "pod", klog.KObj(pod)) 96 } else { 97 n.RemovePod(key, pod) 98 } 99 100 return nil 101 } 102 103 func (c *overcommitCache) AddOrUpdateCNR(cnr *v1alpha1.CustomNodeResource) { 104 c.Lock() 105 defer c.Unlock() 106 107 n, ok := c.nodeCaches[cnr.Name] 108 if !ok { 109 n = New() 110 c.nodeCaches[cnr.Name] = n 111 } 112 113 n.updateTopologyProvider(cnr) 114 } 115 116 func (c *overcommitCache) RemoveCNR(cnr *v1alpha1.CustomNodeResource) { 117 c.Lock() 118 defer c.Unlock() 119 120 delete(c.nodeCaches, cnr.Name) 121 } 122 123 type NodeCache struct { 124 sync.RWMutex 125 126 PodResources map[string]int 127 128 // kubelet topology hint providers from CNR annotation. 129 // provider will be cached only if provider policy is available. 130 // only used for node resource overcommitment. 131 HintProviders map[string]struct{} 132 133 // total guaranteed cpus on node 134 GuaranteedCPUs int 135 } 136 137 func New() *NodeCache { 138 return &NodeCache{ 139 PodResources: map[string]int{}, 140 HintProviders: map[string]struct{}{}, 141 } 142 } 143 144 func (n *NodeCache) AddPod(key string, pod *v1.Pod) { 145 n.RemovePod(key, pod) 146 guaranteedCPUs := native.PodGuaranteedCPUs(pod) 147 148 n.Lock() 149 defer n.Unlock() 150 151 n.PodResources[key] = guaranteedCPUs 152 n.GuaranteedCPUs += guaranteedCPUs 153 } 154 155 func (n *NodeCache) RemovePod(key string, pod *v1.Pod) { 156 n.Lock() 157 defer n.Unlock() 158 podResource, ok := n.PodResources[key] 159 if !ok { 160 return 161 } 162 163 n.GuaranteedCPUs -= podResource 164 delete(n.PodResources, key) 165 } 166 167 func (n *NodeCache) updateTopologyProvider(cnr *v1alpha1.CustomNodeResource) { 168 if len(cnr.Annotations) <= 0 { 169 return 170 } 171 172 if CPUManagerPolicy, ok := cnr.Annotations[consts.KCNRAnnotationCPUManager]; ok { 173 if CPUManagerPolicy == string(cpumanager.PolicyStatic) { 174 n.HintProviders[string(features.CPUManager)] = struct{}{} 175 } 176 } 177 178 if memoryManagerPolicy, ok := cnr.Annotations[consts.KCNRAnnotationMemoryManager]; ok { 179 if memoryManagerPolicy == "Static" { 180 n.HintProviders[string(features.MemoryManager)] = struct{}{} 181 } 182 } 183 } 184 185 func (n *NodeCache) HintProvidersAvailable() (CPUManager, MemoryManager bool) { 186 n.RLock() 187 defer n.RUnlock() 188 189 _, ok := n.HintProviders[string(features.CPUManager)] 190 if ok { 191 CPUManager = true 192 } 193 194 _, ok = n.HintProviders[string(features.MemoryManager)] 195 if ok { 196 MemoryManager = true 197 } 198 199 return 200 } 201 202 func (n *NodeCache) GetGuaranteedCPUs() int { 203 n.RLock() 204 defer n.RUnlock() 205 206 return n.GuaranteedCPUs 207 }