github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/orm/resourceprovider.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package orm
    18  
    19  import (
    20  	"fmt"
    21  
    22  	//nolint
    23  	"github.com/golang/protobuf/proto"
    24  	v1 "k8s.io/api/core/v1"
    25  	"k8s.io/klog/v2"
    26  	podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
    27  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    28  	resourcepluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    29  	maputil "k8s.io/kubernetes/pkg/util/maps"
    30  
    31  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    32  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    33  )
    34  
    35  func (m *ManagerImpl) GetTopologyAwareResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.TopologyAwareResource {
    36  	resp, err := m.getTopologyAwareResources(pod, container)
    37  	if err != nil {
    38  		klog.Error(err)
    39  		_ = m.emitter.StoreInt64(MetricGetTopologyAwareResourcesFail, 1, metrics.MetricTypeNameCount)
    40  		return nil
    41  	}
    42  
    43  	if resp == nil || resp.ContainerTopologyAwareResources == nil {
    44  		return nil
    45  	}
    46  
    47  	topologyAwareResources := make([]*podresourcesapi.TopologyAwareResource, 0, len(resp.ContainerTopologyAwareResources.AllocatedResources))
    48  
    49  	for resourceName, resource := range resp.ContainerTopologyAwareResources.AllocatedResources {
    50  		if resource == nil {
    51  			continue
    52  		}
    53  
    54  		topologyAwareResources = append(topologyAwareResources, &podresourcesapi.TopologyAwareResource{
    55  			ResourceName:                      resourceName,
    56  			IsNodeResource:                    resource.IsNodeResource,
    57  			IsScalarResource:                  resource.IsScalarResource,
    58  			AggregatedQuantity:                resource.AggregatedQuantity,
    59  			OriginalAggregatedQuantity:        resource.OriginalAggregatedQuantity,
    60  			TopologyAwareQuantityList:         transformTopologyAwareQuantity(resource.TopologyAwareQuantityList),
    61  			OriginalTopologyAwareQuantityList: transformTopologyAwareQuantity(resource.OriginalTopologyAwareQuantityList),
    62  		})
    63  	}
    64  
    65  	return topologyAwareResources
    66  }
    67  
    68  func (m *ManagerImpl) GetTopologyAwareAllocatableResources() []*podresourcesapi.AllocatableTopologyAwareResource {
    69  	resp, err := m.getTopologyAwareAllocatableResources()
    70  	if err != nil {
    71  		klog.Error(err)
    72  		_ = m.emitter.StoreInt64(MetricGetTopologyAwareAllocatableResourcesFail, 1, metrics.MetricTypeNameCount)
    73  		return nil
    74  	}
    75  
    76  	if resp == nil {
    77  		return nil
    78  	}
    79  
    80  	allocatableTopologyAwareResources := make([]*podresourcesapi.AllocatableTopologyAwareResource, 0, len(resp.AllocatableResources))
    81  	for resourceName, resource := range resp.AllocatableResources {
    82  		if resource == nil {
    83  			continue
    84  		}
    85  
    86  		allocatableTopologyAwareResources = append(allocatableTopologyAwareResources, &podresourcesapi.AllocatableTopologyAwareResource{
    87  			ResourceName:                         resourceName,
    88  			IsNodeResource:                       resource.IsNodeResource,
    89  			IsScalarResource:                     resource.IsScalarResource,
    90  			AggregatedAllocatableQuantity:        resource.AggregatedAllocatableQuantity,
    91  			TopologyAwareAllocatableQuantityList: transformTopologyAwareQuantity(resource.TopologyAwareAllocatableQuantityList),
    92  			AggregatedCapacityQuantity:           resource.AggregatedCapacityQuantity,
    93  			TopologyAwareCapacityQuantityList:    transformTopologyAwareQuantity(resource.TopologyAwareCapacityQuantityList),
    94  		})
    95  	}
    96  
    97  	return allocatableTopologyAwareResources
    98  }
    99  
   100  // UpdateAllocatedResources process add pods and delete pods synchronously.
   101  func (m *ManagerImpl) UpdateAllocatedResources() {
   102  	podsToBeAdded, podsToBeRemoved, err := m.metaManager.ReconcilePods()
   103  	if err != nil {
   104  		klog.Errorf("ReconcilePods fail: %v", err)
   105  		_ = m.emitter.StoreInt64(MetricUpdateAllocatedResourcesFail, 1, metrics.MetricTypeNameCount)
   106  		return
   107  	}
   108  
   109  	for _, podUID := range podsToBeAdded {
   110  		err = m.processAddPod(podUID)
   111  		if err != nil {
   112  			klog.Errorf("ReconcilePods fail: %v", err)
   113  			_ = m.emitter.StoreInt64(MetricUpdateAllocatedResourcesFail, 1, metrics.MetricTypeNameCount)
   114  		}
   115  	}
   116  
   117  	for podUID := range podsToBeRemoved {
   118  		err = m.processDeletePod(podUID)
   119  		if err != nil {
   120  			klog.Errorf("ReconcilePods fail: %v", err)
   121  			_ = m.emitter.StoreInt64(MetricUpdateAllocatedResourcesFail, 1, metrics.MetricTypeNameCount)
   122  		}
   123  	}
   124  	return
   125  }
   126  
   127  func (m *ManagerImpl) getTopologyAwareResources(pod *v1.Pod, container *v1.Container) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
   128  	var resp *pluginapi.GetTopologyAwareResourcesResponse
   129  
   130  	if pod == nil || container == nil {
   131  		err := fmt.Errorf("GetTopologyAwareResources got nil pod: %v or container: %v", pod, container)
   132  		return nil, err
   133  	}
   134  	systemCores, err := isPodKatalystQoSLevelSystemCores(m.qosConfig, pod)
   135  	if err != nil {
   136  		err = fmt.Errorf("[ORM] check pod %s qos level fail: %v", pod.Name, err)
   137  		return nil, err
   138  	}
   139  	if native.CheckDaemonPod(pod) && !systemCores {
   140  		klog.V(5).Infof("[ORM] skip pod: %s, container: %v", pod.Name, container.Name)
   141  		return nil, nil
   142  	}
   143  
   144  	m.mutex.RLock()
   145  	defer m.mutex.RUnlock()
   146  	for resourceName, eI := range m.endpoints {
   147  		if eI.E.IsStopped() {
   148  			klog.Warningf("[ORM] resource %s endpoints %s stopped, pod: %s, container: %s", resourceName, pod.Name, container.Name)
   149  			continue
   150  		}
   151  
   152  		curResp, err := eI.E.GetTopologyAwareResources(m.ctx, &pluginapi.GetTopologyAwareResourcesRequest{
   153  			PodUid:        string(pod.UID),
   154  			ContainerName: container.Name,
   155  		})
   156  		if err != nil {
   157  			return nil, fmt.Errorf("[ORM] getTopologyAwareResources for resource: %s failed with error: %v", resourceName, err)
   158  		} else if curResp == nil {
   159  			klog.Warningf("[ORM] getTopologyAwareResources of resource: %s for pod: %s container: %s, got nil response but without error", resourceName, pod.Name, container.Name)
   160  			continue
   161  		}
   162  
   163  		if resp == nil {
   164  			resp = curResp
   165  
   166  			if resp.ContainerTopologyAwareResources == nil {
   167  				resp.ContainerTopologyAwareResources = &pluginapi.ContainerTopologyAwareResources{
   168  					ContainerName: container.Name,
   169  				}
   170  			}
   171  
   172  			if resp.ContainerTopologyAwareResources.AllocatedResources == nil {
   173  				resp.ContainerTopologyAwareResources.AllocatedResources = make(map[string]*pluginapi.TopologyAwareResource)
   174  			}
   175  		} else if curResp.ContainerTopologyAwareResources != nil && curResp.ContainerTopologyAwareResources.AllocatedResources != nil {
   176  			for resourceName, topologyAwareResource := range curResp.ContainerTopologyAwareResources.AllocatedResources {
   177  				if topologyAwareResource != nil {
   178  					resp.ContainerTopologyAwareResources.AllocatedResources[resourceName] = proto.Clone(topologyAwareResource).(*pluginapi.TopologyAwareResource)
   179  				}
   180  			}
   181  		} else {
   182  			klog.Warningf("[ORM] getTopologyAwareResources of resource: %s for pod: %s container: %s, get nil resp or nil topologyAwareResources in resp",
   183  				resourceName, pod.UID, container.Name)
   184  		}
   185  	}
   186  
   187  	return resp, nil
   188  }
   189  
   190  func (m *ManagerImpl) getTopologyAwareAllocatableResources() (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) {
   191  	var resp *pluginapi.GetTopologyAwareAllocatableResourcesResponse
   192  
   193  	m.mutex.RLock()
   194  	defer m.mutex.RUnlock()
   195  	for resourceName, eI := range m.endpoints {
   196  		if eI.E.IsStopped() {
   197  			klog.Warningf("[ORM] resource %s endpoints %s stopped", resourceName)
   198  			continue
   199  		}
   200  
   201  		curResp, err := eI.E.GetTopologyAwareAllocatableResources(m.ctx, &pluginapi.GetTopologyAwareAllocatableResourcesRequest{})
   202  		if err != nil {
   203  			return nil, fmt.Errorf("[ORM] getTopologyAwareAllocatableResources for resource: %s failed with error: %v", resourceName, err)
   204  		} else if curResp == nil {
   205  			klog.Warningf("[ORM] getTopologyAwareAllocatableResources of resource: %s, got nil response but without error", resourceName)
   206  			continue
   207  		}
   208  
   209  		if resp == nil {
   210  			resp = curResp
   211  
   212  			if resp.AllocatableResources == nil {
   213  				resp.AllocatableResources = make(map[string]*pluginapi.AllocatableTopologyAwareResource)
   214  			}
   215  		} else if curResp.AllocatableResources != nil {
   216  			for resourceName, topologyAwareResource := range curResp.AllocatableResources {
   217  				if topologyAwareResource != nil {
   218  					resp.AllocatableResources[resourceName] = proto.Clone(topologyAwareResource).(*pluginapi.AllocatableTopologyAwareResource)
   219  				}
   220  			}
   221  		} else {
   222  			klog.Warningf("[ORM] getTopologyAwareAllocatableResources of resource: %s, get nil resp or nil topologyAwareResources in resp", resourceName)
   223  		}
   224  	}
   225  
   226  	return resp, nil
   227  }
   228  
   229  func transformTopologyAwareQuantity(pluginAPITopologyAwareQuantityList []*resourcepluginapi.TopologyAwareQuantity) []*podresourcesapi.TopologyAwareQuantity {
   230  	if pluginAPITopologyAwareQuantityList == nil {
   231  		return nil
   232  	}
   233  
   234  	topologyAwareQuantityList := make([]*podresourcesapi.TopologyAwareQuantity, 0, len(pluginAPITopologyAwareQuantityList))
   235  
   236  	for _, topologyAwareQuantity := range pluginAPITopologyAwareQuantityList {
   237  		if topologyAwareQuantity != nil {
   238  			topologyAwareQuantityList = append(topologyAwareQuantityList, &podresourcesapi.TopologyAwareQuantity{
   239  				ResourceValue: topologyAwareQuantity.ResourceValue,
   240  				Node:          topologyAwareQuantity.Node,
   241  				Name:          topologyAwareQuantity.Name,
   242  				Type:          topologyAwareQuantity.Type,
   243  				TopologyLevel: transformTopologyLevel(topologyAwareQuantity.TopologyLevel),
   244  				Annotations:   maputil.CopySS(topologyAwareQuantity.Annotations),
   245  			})
   246  		}
   247  	}
   248  
   249  	return topologyAwareQuantityList
   250  }
   251  
   252  func transformTopologyLevel(pluginAPITopologyLevel resourcepluginapi.TopologyLevel) podresourcesapi.TopologyLevel {
   253  	switch pluginAPITopologyLevel {
   254  	case resourcepluginapi.TopologyLevel_NUMA:
   255  		return podresourcesapi.TopologyLevel_NUMA
   256  	case resourcepluginapi.TopologyLevel_SOCKET:
   257  		return podresourcesapi.TopologyLevel_SOCKET
   258  	}
   259  
   260  	klog.Warningf("[transformTopologyLevel] unrecognized pluginAPITopologyLevel %s:%v, set podResouresAPITopologyLevel to default value: %s:%v",
   261  		pluginAPITopologyLevel.String(), pluginAPITopologyLevel, podresourcesapi.TopologyLevel_NUMA.String(), podresourcesapi.TopologyLevel_NUMA)
   262  	return podresourcesapi.TopologyLevel_NUMA
   263  }