github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/orm/topology/manager.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package topology 18 19 import ( 20 "fmt" 21 "sync" 22 23 cadvisorapi "github.com/google/cadvisor/info/v1" 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/klog/v2" 26 ) 27 28 const ( 29 // maxAllowableNUMANodes specifies the maximum number of NUMA Nodes that 30 // the TopologyManager supports on the underlying machine. 31 // 32 // At present, having more than this number of NUMA Nodes will result in a 33 // state explosion when trying to enumerate possible NUMAAffinity masks and 34 // generate hints for them. As such, if more NUMA Nodes than this are 35 // present on a machine and the TopologyManager is enabled, an error will 36 // be returned and the TopologyManager will not be loaded. 37 maxAllowableNUMANodes = 8 38 // defaultResourceKey is the key to store the default hint for those resourceNames 39 // which don't specify hint. 40 defaultResourceKey = "*" 41 ) 42 43 type Manager interface { 44 Admit(pod *v1.Pod) error 45 46 AddHintProvider(provider HintProvider) 47 48 GetAffinity(podUID string, containerName string, resourceName string) TopologyHint 49 50 RemovePod(podUID string) 51 } 52 53 // HintProvider is an interface for components that want to collaborate to 54 // achieve globally optimal concrete resource alignment with respect to 55 // NUMA locality. 56 type HintProvider interface { 57 // GetTopologyHints returns a map of resource names to a list of possible 58 // concrete resource allocations in terms of NUMA locality hints. Each hint 59 // is optionally marked "preferred" and indicates the set of NUMA nodes 60 // involved in the hypothetical allocation. The topology manager calls 61 // this function for each hint provider, and merges the hints to produce 62 // a consensus "best" hint. The hint providers may subsequently query the 63 // topology manager to influence actual resource assignment. 64 GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint 65 // GetPodTopologyHints returns a map of resource names to a list of possible 66 // concrete resource allocations per Pod in terms of NUMA locality hints. 67 GetPodTopologyHints(pod *v1.Pod) map[string][]TopologyHint 68 // Allocate triggers resource allocation to occur on the HintProvider after 69 // all hints have been gathered and the aggregated Hint is available via a 70 // call to GetAffinity(). 71 Allocate(pod *v1.Pod, container *v1.Container) error 72 } 73 74 type manager struct { 75 mutex sync.Mutex 76 // Mapping of a Pods mapping of Containers and their TopologyHints 77 // Indexed by PodUID to ContainerName 78 podTopologyHints map[string]podTopologyHints 79 // The list of components registered with the Manager 80 hintProviders []HintProvider 81 // Topology Manager Policy 82 policy Policy 83 } 84 85 func NewManager(topology []cadvisorapi.Node, topologyPolicyName string, alignResources []string) (Manager, error) { 86 klog.InfoS("Creating topology manager with policy per scope", "topologyPolicyName", topologyPolicyName) 87 88 var numaNodes []int 89 for _, node := range topology { 90 numaNodes = append(numaNodes, node.Id) 91 } 92 93 if topologyPolicyName != PolicyNone && len(numaNodes) > maxAllowableNUMANodes { 94 return nil, fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", maxAllowableNUMANodes) 95 } 96 97 var policy Policy 98 switch topologyPolicyName { 99 case PolicyNone: 100 policy = NewNonePolicy() 101 102 case PolicyBestEffort: 103 policy = NewBestEffortPolicy(numaNodes) 104 105 case PolicyRestricted: 106 policy = NewRestrictedPolicy(numaNodes) 107 108 case PolicySingleNumaNode: 109 policy = NewSingleNumaNodePolicy(numaNodes) 110 111 case PolicyNumeric: 112 policy = NewNumericPolicy(alignResources) 113 114 default: 115 return nil, fmt.Errorf("unknown policy: \"%s\"", topologyPolicyName) 116 } 117 118 m := &manager{ 119 podTopologyHints: map[string]podTopologyHints{}, 120 hintProviders: make([]HintProvider, 0), 121 policy: policy, 122 } 123 return m, nil 124 } 125 126 func (m *manager) Admit(pod *v1.Pod) error { 127 if m.policy.Name() == PolicyNone { 128 return m.admitPolicyNone(pod) 129 } 130 131 for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { 132 bestHint, admit := m.calculateAffinity(pod, &container) 133 klog.V(3).Infof("Best TopologyHint, bestHint: %v, pod: %v, containerName: %v", bestHint, klog.KObj(pod), container.Name) 134 135 if !admit { 136 err := fmt.Errorf("pod: %v, containerName: %v not admit", pod.Name, container.Name) 137 return err 138 } 139 klog.V(3).Infof("Topology Affinity, bestHint: %v, pod: %v, containerName: %v", bestHint, klog.KObj(pod), container.Name) 140 m.setTopologyHints(string(pod.UID), container.Name, bestHint) 141 142 err := m.allocateAlignedResources(pod, &container) 143 if err != nil { 144 klog.Errorf("allocateAlignedResources fail, pod: %v, containerName: %v, err: %v", klog.KObj(pod), container.Name, err) 145 return err 146 } 147 } 148 149 return nil 150 } 151 152 func (m *manager) admitPolicyNone(pod *v1.Pod) error { 153 for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { 154 err := m.allocateAlignedResources(pod, &container) 155 if err != nil { 156 klog.Errorf("allocateAlignedResources fail, pod: %v, containerName: %v, err: %v", klog.KObj(pod), container.Name, err) 157 return err 158 } 159 } 160 161 return nil 162 } 163 164 func (m *manager) AddHintProvider(provider HintProvider) { 165 m.hintProviders = append(m.hintProviders, provider) 166 } 167 168 func (m *manager) GetAffinity(podUID string, containerName string, resourceName string) TopologyHint { 169 return m.getTopologyHints(podUID, containerName, resourceName) 170 } 171 172 func (m *manager) calculateAffinity(pod *v1.Pod, container *v1.Container) (map[string]TopologyHint, bool) { 173 providersHints := m.accumulateProvidersHints(pod, container) 174 bestHint, admit := m.policy.Merge(providersHints) 175 klog.V(3).Infof("ContainerTopologyHint, bestHint: %v", bestHint) 176 return bestHint, admit 177 } 178 179 func (m *manager) accumulateProvidersHints(pod *v1.Pod, container *v1.Container) []map[string][]TopologyHint { 180 var providersHints []map[string][]TopologyHint 181 182 for _, provider := range m.hintProviders { 183 // Get the TopologyHints for a Container from a provider. 184 hints := provider.GetTopologyHints(pod, container) 185 providersHints = append(providersHints, hints) 186 klog.V(3).Infof("TopologyHints, hints: %v, pod: %v, containerName: %v", hints, klog.KObj(pod), container.Name) 187 } 188 return providersHints 189 } 190 191 func (m *manager) allocateAlignedResources(pod *v1.Pod, container *v1.Container) error { 192 for _, provider := range m.hintProviders { 193 err := provider.Allocate(pod, container) 194 if err != nil { 195 return err 196 } 197 } 198 return nil 199 } 200 201 func (m *manager) setTopologyHints(podUID string, containerName string, th map[string]TopologyHint) { 202 m.mutex.Lock() 203 defer m.mutex.Unlock() 204 205 if m.podTopologyHints[podUID] == nil { 206 m.podTopologyHints[podUID] = make(map[string]map[string]TopologyHint) 207 } 208 m.podTopologyHints[podUID][containerName] = th 209 } 210 211 func (m *manager) getTopologyHints(podUID string, containerName string, resourceName string) TopologyHint { 212 m.mutex.Lock() 213 defer m.mutex.Unlock() 214 hint, ok := m.podTopologyHints[podUID][containerName][resourceName] 215 if ok { 216 return hint 217 } 218 return m.podTopologyHints[podUID][containerName][defaultResourceKey] 219 } 220 221 func (m *manager) RemovePod(podUID string) { 222 m.mutex.Lock() 223 defer m.mutex.Unlock() 224 225 klog.V(3).Infof("RemovePod, podUID: %v", podUID) 226 delete(m.podTopologyHints, podUID) 227 }