github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/orm/resourceprovider.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package orm 18 19 import ( 20 "fmt" 21 22 //nolint 23 "github.com/golang/protobuf/proto" 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/klog/v2" 26 podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" 27 pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 28 resourcepluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 29 maputil "k8s.io/kubernetes/pkg/util/maps" 30 31 "github.com/kubewharf/katalyst-core/pkg/metrics" 32 "github.com/kubewharf/katalyst-core/pkg/util/native" 33 ) 34 35 func (m *ManagerImpl) GetTopologyAwareResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.TopologyAwareResource { 36 resp, err := m.getTopologyAwareResources(pod, container) 37 if err != nil { 38 klog.Error(err) 39 _ = m.emitter.StoreInt64(MetricGetTopologyAwareResourcesFail, 1, metrics.MetricTypeNameCount) 40 return nil 41 } 42 43 if resp == nil || resp.ContainerTopologyAwareResources == nil { 44 return nil 45 } 46 47 topologyAwareResources := make([]*podresourcesapi.TopologyAwareResource, 0, len(resp.ContainerTopologyAwareResources.AllocatedResources)) 48 49 for resourceName, resource := range resp.ContainerTopologyAwareResources.AllocatedResources { 50 if resource == nil { 51 continue 52 } 53 54 topologyAwareResources = append(topologyAwareResources, &podresourcesapi.TopologyAwareResource{ 55 ResourceName: resourceName, 56 IsNodeResource: resource.IsNodeResource, 57 IsScalarResource: resource.IsScalarResource, 58 AggregatedQuantity: resource.AggregatedQuantity, 59 OriginalAggregatedQuantity: resource.OriginalAggregatedQuantity, 60 TopologyAwareQuantityList: transformTopologyAwareQuantity(resource.TopologyAwareQuantityList), 61 OriginalTopologyAwareQuantityList: transformTopologyAwareQuantity(resource.OriginalTopologyAwareQuantityList), 62 }) 63 } 64 65 return topologyAwareResources 66 } 67 68 func (m *ManagerImpl) GetTopologyAwareAllocatableResources() []*podresourcesapi.AllocatableTopologyAwareResource { 69 resp, err := m.getTopologyAwareAllocatableResources() 70 if err != nil { 71 klog.Error(err) 72 _ = m.emitter.StoreInt64(MetricGetTopologyAwareAllocatableResourcesFail, 1, metrics.MetricTypeNameCount) 73 return nil 74 } 75 76 if resp == nil { 77 return nil 78 } 79 80 allocatableTopologyAwareResources := make([]*podresourcesapi.AllocatableTopologyAwareResource, 0, len(resp.AllocatableResources)) 81 for resourceName, resource := range resp.AllocatableResources { 82 if resource == nil { 83 continue 84 } 85 86 allocatableTopologyAwareResources = append(allocatableTopologyAwareResources, &podresourcesapi.AllocatableTopologyAwareResource{ 87 ResourceName: resourceName, 88 IsNodeResource: resource.IsNodeResource, 89 IsScalarResource: resource.IsScalarResource, 90 AggregatedAllocatableQuantity: resource.AggregatedAllocatableQuantity, 91 TopologyAwareAllocatableQuantityList: transformTopologyAwareQuantity(resource.TopologyAwareAllocatableQuantityList), 92 AggregatedCapacityQuantity: resource.AggregatedCapacityQuantity, 93 TopologyAwareCapacityQuantityList: transformTopologyAwareQuantity(resource.TopologyAwareCapacityQuantityList), 94 }) 95 } 96 97 return allocatableTopologyAwareResources 98 } 99 100 // UpdateAllocatedResources process add pods and delete pods synchronously. 101 func (m *ManagerImpl) UpdateAllocatedResources() { 102 podsToBeAdded, podsToBeRemoved, err := m.metaManager.ReconcilePods() 103 if err != nil { 104 klog.Errorf("ReconcilePods fail: %v", err) 105 _ = m.emitter.StoreInt64(MetricUpdateAllocatedResourcesFail, 1, metrics.MetricTypeNameCount) 106 return 107 } 108 109 for _, podUID := range podsToBeAdded { 110 err = m.processAddPod(podUID) 111 if err != nil { 112 klog.Errorf("ReconcilePods fail: %v", err) 113 _ = m.emitter.StoreInt64(MetricUpdateAllocatedResourcesFail, 1, metrics.MetricTypeNameCount) 114 } 115 } 116 117 for podUID := range podsToBeRemoved { 118 err = m.processDeletePod(podUID) 119 if err != nil { 120 klog.Errorf("ReconcilePods fail: %v", err) 121 _ = m.emitter.StoreInt64(MetricUpdateAllocatedResourcesFail, 1, metrics.MetricTypeNameCount) 122 } 123 } 124 return 125 } 126 127 func (m *ManagerImpl) getTopologyAwareResources(pod *v1.Pod, container *v1.Container) (*pluginapi.GetTopologyAwareResourcesResponse, error) { 128 var resp *pluginapi.GetTopologyAwareResourcesResponse 129 130 if pod == nil || container == nil { 131 err := fmt.Errorf("GetTopologyAwareResources got nil pod: %v or container: %v", pod, container) 132 return nil, err 133 } 134 systemCores, err := isPodKatalystQoSLevelSystemCores(m.qosConfig, pod) 135 if err != nil { 136 err = fmt.Errorf("[ORM] check pod %s qos level fail: %v", pod.Name, err) 137 return nil, err 138 } 139 if native.CheckDaemonPod(pod) && !systemCores { 140 klog.V(5).Infof("[ORM] skip pod: %s, container: %v", pod.Name, container.Name) 141 return nil, nil 142 } 143 144 m.mutex.RLock() 145 defer m.mutex.RUnlock() 146 for resourceName, eI := range m.endpoints { 147 if eI.E.IsStopped() { 148 klog.Warningf("[ORM] resource %s endpoints %s stopped, pod: %s, container: %s", resourceName, pod.Name, container.Name) 149 continue 150 } 151 152 curResp, err := eI.E.GetTopologyAwareResources(m.ctx, &pluginapi.GetTopologyAwareResourcesRequest{ 153 PodUid: string(pod.UID), 154 ContainerName: container.Name, 155 }) 156 if err != nil { 157 return nil, fmt.Errorf("[ORM] getTopologyAwareResources for resource: %s failed with error: %v", resourceName, err) 158 } else if curResp == nil { 159 klog.Warningf("[ORM] getTopologyAwareResources of resource: %s for pod: %s container: %s, got nil response but without error", resourceName, pod.Name, container.Name) 160 continue 161 } 162 163 if resp == nil { 164 resp = curResp 165 166 if resp.ContainerTopologyAwareResources == nil { 167 resp.ContainerTopologyAwareResources = &pluginapi.ContainerTopologyAwareResources{ 168 ContainerName: container.Name, 169 } 170 } 171 172 if resp.ContainerTopologyAwareResources.AllocatedResources == nil { 173 resp.ContainerTopologyAwareResources.AllocatedResources = make(map[string]*pluginapi.TopologyAwareResource) 174 } 175 } else if curResp.ContainerTopologyAwareResources != nil && curResp.ContainerTopologyAwareResources.AllocatedResources != nil { 176 for resourceName, topologyAwareResource := range curResp.ContainerTopologyAwareResources.AllocatedResources { 177 if topologyAwareResource != nil { 178 resp.ContainerTopologyAwareResources.AllocatedResources[resourceName] = proto.Clone(topologyAwareResource).(*pluginapi.TopologyAwareResource) 179 } 180 } 181 } else { 182 klog.Warningf("[ORM] getTopologyAwareResources of resource: %s for pod: %s container: %s, get nil resp or nil topologyAwareResources in resp", 183 resourceName, pod.UID, container.Name) 184 } 185 } 186 187 return resp, nil 188 } 189 190 func (m *ManagerImpl) getTopologyAwareAllocatableResources() (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) { 191 var resp *pluginapi.GetTopologyAwareAllocatableResourcesResponse 192 193 m.mutex.RLock() 194 defer m.mutex.RUnlock() 195 for resourceName, eI := range m.endpoints { 196 if eI.E.IsStopped() { 197 klog.Warningf("[ORM] resource %s endpoints %s stopped", resourceName) 198 continue 199 } 200 201 curResp, err := eI.E.GetTopologyAwareAllocatableResources(m.ctx, &pluginapi.GetTopologyAwareAllocatableResourcesRequest{}) 202 if err != nil { 203 return nil, fmt.Errorf("[ORM] getTopologyAwareAllocatableResources for resource: %s failed with error: %v", resourceName, err) 204 } else if curResp == nil { 205 klog.Warningf("[ORM] getTopologyAwareAllocatableResources of resource: %s, got nil response but without error", resourceName) 206 continue 207 } 208 209 if resp == nil { 210 resp = curResp 211 212 if resp.AllocatableResources == nil { 213 resp.AllocatableResources = make(map[string]*pluginapi.AllocatableTopologyAwareResource) 214 } 215 } else if curResp.AllocatableResources != nil { 216 for resourceName, topologyAwareResource := range curResp.AllocatableResources { 217 if topologyAwareResource != nil { 218 resp.AllocatableResources[resourceName] = proto.Clone(topologyAwareResource).(*pluginapi.AllocatableTopologyAwareResource) 219 } 220 } 221 } else { 222 klog.Warningf("[ORM] getTopologyAwareAllocatableResources of resource: %s, get nil resp or nil topologyAwareResources in resp", resourceName) 223 } 224 } 225 226 return resp, nil 227 } 228 229 func transformTopologyAwareQuantity(pluginAPITopologyAwareQuantityList []*resourcepluginapi.TopologyAwareQuantity) []*podresourcesapi.TopologyAwareQuantity { 230 if pluginAPITopologyAwareQuantityList == nil { 231 return nil 232 } 233 234 topologyAwareQuantityList := make([]*podresourcesapi.TopologyAwareQuantity, 0, len(pluginAPITopologyAwareQuantityList)) 235 236 for _, topologyAwareQuantity := range pluginAPITopologyAwareQuantityList { 237 if topologyAwareQuantity != nil { 238 topologyAwareQuantityList = append(topologyAwareQuantityList, &podresourcesapi.TopologyAwareQuantity{ 239 ResourceValue: topologyAwareQuantity.ResourceValue, 240 Node: topologyAwareQuantity.Node, 241 Name: topologyAwareQuantity.Name, 242 Type: topologyAwareQuantity.Type, 243 TopologyLevel: transformTopologyLevel(topologyAwareQuantity.TopologyLevel), 244 Annotations: maputil.CopySS(topologyAwareQuantity.Annotations), 245 }) 246 } 247 } 248 249 return topologyAwareQuantityList 250 } 251 252 func transformTopologyLevel(pluginAPITopologyLevel resourcepluginapi.TopologyLevel) podresourcesapi.TopologyLevel { 253 switch pluginAPITopologyLevel { 254 case resourcepluginapi.TopologyLevel_NUMA: 255 return podresourcesapi.TopologyLevel_NUMA 256 case resourcepluginapi.TopologyLevel_SOCKET: 257 return podresourcesapi.TopologyLevel_SOCKET 258 } 259 260 klog.Warningf("[transformTopologyLevel] unrecognized pluginAPITopologyLevel %s:%v, set podResouresAPITopologyLevel to default value: %s:%v", 261 pluginAPITopologyLevel.String(), pluginAPITopologyLevel, podresourcesapi.TopologyLevel_NUMA.String(), podresourcesapi.TopologyLevel_NUMA) 262 return podresourcesapi.TopologyLevel_NUMA 263 }