github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/network/staticpolicy/policy.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package staticpolicy
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sort"
    23  	"strconv"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"k8s.io/apimachinery/pkg/util/wait"
    29  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    30  	maputil "k8s.io/kubernetes/pkg/util/maps"
    31  
    32  	apinode "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
    33  	apiconsts "github.com/kubewharf/katalyst-api/pkg/consts"
    34  	"github.com/kubewharf/katalyst-api/pkg/plugins/skeleton"
    35  	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
    36  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/network/state"
    37  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
    38  	"github.com/kubewharf/katalyst-core/pkg/config"
    39  	agentconfig "github.com/kubewharf/katalyst-core/pkg/config/agent"
    40  	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
    41  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    42  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    43  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    44  	"github.com/kubewharf/katalyst-core/pkg/util/cgroup/common"
    45  	cgroupcmutils "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager"
    46  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    47  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    48  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    49  	"github.com/kubewharf/katalyst-core/pkg/util/qos"
    50  )
    51  
    52  const (
    53  	// NetworkResourcePluginPolicyNameStatic is the policy name of static network resource plugin
    54  	NetworkResourcePluginPolicyNameStatic = string(apiconsts.ResourcePluginPolicyNameStatic)
    55  
    56  	NetworkPluginStateFileName = "network_plugin_state"
    57  
    58  	// IPsSeparator is used to split merged IPs string
    59  	IPsSeparator = ","
    60  )
    61  
    62  // StaticPolicy is the static network policy
    63  type StaticPolicy struct {
    64  	sync.Mutex
    65  
    66  	name       string
    67  	stopCh     chan struct{}
    68  	started    bool
    69  	qosConfig  *generic.QoSConfiguration
    70  	qrmConfig  *qrm.QRMPluginsConfiguration
    71  	emitter    metrics.MetricEmitter
    72  	metaServer *metaserver.MetaServer
    73  	agentCtx   *agent.GenericContext
    74  	nics       []machine.InterfaceInfo
    75  	state      state.State
    76  
    77  	CgroupV2Env                                     bool
    78  	qosLevelToNetClassMap                           map[string]uint32
    79  	applyNetClassFunc                               func(podUID, containerID string, data *common.NetClsData) error
    80  	podLevelNetClassAnnoKey                         string
    81  	podLevelNetAttributesAnnoKeys                   []string
    82  	ipv4ResourceAllocationAnnotationKey             string
    83  	ipv6ResourceAllocationAnnotationKey             string
    84  	netNSPathResourceAllocationAnnotationKey        string
    85  	netInterfaceNameResourceAllocationAnnotationKey string
    86  	netClassIDResourceAllocationAnnotationKey       string
    87  	netBandwidthResourceAllocationAnnotationKey     string
    88  }
    89  
    90  // NewStaticPolicy returns a static network policy
    91  func NewStaticPolicy(agentCtx *agent.GenericContext, conf *config.Configuration,
    92  	_ interface{}, agentName string,
    93  ) (bool, agent.Component, error) {
    94  	wrappedEmitter := agentCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags(agentName, metrics.MetricTag{
    95  		Key: util.QRMPluginPolicyTagName,
    96  		Val: NetworkResourcePluginPolicyNameStatic,
    97  	})
    98  
    99  	// it is incorrect to reserve bandwidth on those diabled NICs.
   100  	// we only count active NICs as available network devices and allocate bandwidth on them
   101  	enabledNICs := filterNICsByAvailability(agentCtx.KatalystMachineInfo.ExtraNetworkInfo.Interface, nil, nil)
   102  	if len(enabledNICs) != 0 {
   103  		// the NICs should be in order by interface name so that we can adopt specific policies for bandwidth reservation or allocation
   104  		// e.g. reserve bandwidth for high-priority tasks on the first NIC
   105  		sort.SliceStable(enabledNICs, func(i, j int) bool {
   106  			return enabledNICs[i].Iface < enabledNICs[j].Iface
   107  		})
   108  	} else {
   109  		general.Infof("no valid nics on this node")
   110  	}
   111  
   112  	// we only support one spreading policy for now: reserve the bandwidth on the first NIC.
   113  	// TODO: make the reservation policy configurable
   114  	reservation, err := getReservedBandwidth(enabledNICs, conf.ReservedBandwidth, FirstNIC)
   115  	if err != nil {
   116  		return false, agent.ComponentStub{}, fmt.Errorf("getReservedBandwidth failed with error: %v", err)
   117  	}
   118  
   119  	stateImpl, err := state.NewCheckpointState(conf.QRMPluginsConfiguration, conf.GenericQRMPluginConfiguration.StateFileDirectory, NetworkPluginStateFileName,
   120  		NetworkResourcePluginPolicyNameStatic, agentCtx.MachineInfo, enabledNICs, reservation, conf.SkipNetworkStateCorruption)
   121  	if err != nil {
   122  		return false, agent.ComponentStub{}, fmt.Errorf("NewCheckpointState failed with error: %v", err)
   123  	}
   124  
   125  	policyImplement := &StaticPolicy{
   126  		nics:                  enabledNICs,
   127  		qosConfig:             conf.QoSConfiguration,
   128  		qrmConfig:             conf.QRMPluginsConfiguration,
   129  		emitter:               wrappedEmitter,
   130  		metaServer:            agentCtx.MetaServer,
   131  		agentCtx:              agentCtx,
   132  		state:                 stateImpl,
   133  		stopCh:                make(chan struct{}),
   134  		name:                  fmt.Sprintf("%s_%s", agentName, NetworkResourcePluginPolicyNameStatic),
   135  		qosLevelToNetClassMap: make(map[string]uint32),
   136  	}
   137  
   138  	if common.CheckCgroup2UnifiedMode() {
   139  		policyImplement.CgroupV2Env = true
   140  		policyImplement.applyNetClassFunc = agentCtx.MetaServer.ExternalManager.ApplyNetClass
   141  	} else {
   142  		policyImplement.CgroupV2Env = false
   143  		policyImplement.applyNetClassFunc = cgroupcmutils.ApplyNetClsForContainer
   144  	}
   145  
   146  	policyImplement.ApplyConfig(conf.StaticAgentConfiguration)
   147  
   148  	pluginWrapper, err := skeleton.NewRegistrationPluginWrapper(policyImplement, conf.QRMPluginSocketDirs,
   149  		func(key string, value int64) {
   150  			_ = wrappedEmitter.StoreInt64(key, value, metrics.MetricTypeNameRaw)
   151  		})
   152  	if err != nil {
   153  		return false, agent.ComponentStub{}, fmt.Errorf("static policy new plugin wrapper failed with error: %v", err)
   154  	}
   155  
   156  	return true, &agent.PluginWrapper{GenericPlugin: pluginWrapper}, nil
   157  }
   158  
   159  // ApplyConfig applies config to StaticPolicy
   160  func (p *StaticPolicy) ApplyConfig(conf *agentconfig.StaticAgentConfiguration) {
   161  	p.Lock()
   162  	defer p.Unlock()
   163  
   164  	p.qosLevelToNetClassMap[apiconsts.PodAnnotationQoSLevelReclaimedCores] = conf.NetClass.ReclaimedCores
   165  	p.qosLevelToNetClassMap[apiconsts.PodAnnotationQoSLevelSharedCores] = conf.NetClass.SharedCores
   166  	p.qosLevelToNetClassMap[apiconsts.PodAnnotationQoSLevelDedicatedCores] = conf.NetClass.DedicatedCores
   167  	p.qosLevelToNetClassMap[apiconsts.PodAnnotationQoSLevelSystemCores] = conf.NetClass.SystemCores
   168  
   169  	p.podLevelNetClassAnnoKey = conf.PodLevelNetClassAnnoKey
   170  	p.podLevelNetAttributesAnnoKeys = strings.Split(conf.PodLevelNetAttributesAnnoKeys, ",")
   171  	p.ipv4ResourceAllocationAnnotationKey = conf.IPv4ResourceAllocationAnnotationKey
   172  	p.ipv6ResourceAllocationAnnotationKey = conf.IPv6ResourceAllocationAnnotationKey
   173  	p.netNSPathResourceAllocationAnnotationKey = conf.NetNSPathResourceAllocationAnnotationKey
   174  	p.netInterfaceNameResourceAllocationAnnotationKey = conf.NetInterfaceNameResourceAllocationAnnotationKey
   175  	p.netClassIDResourceAllocationAnnotationKey = conf.NetClassIDResourceAllocationAnnotationKey
   176  	p.netBandwidthResourceAllocationAnnotationKey = conf.NetBandwidthResourceAllocationAnnotationKey
   177  
   178  	general.Infof("apply configs, "+
   179  		"qosLevelToNetClassMap: %+v, "+
   180  		"podLevelNetClassAnnoKey: %s, "+
   181  		"podLevelNetAttributesAnnoKeys: %+v",
   182  		p.qosLevelToNetClassMap,
   183  		p.podLevelNetClassAnnoKey,
   184  		p.podLevelNetAttributesAnnoKeys)
   185  }
   186  
   187  // Start starts this plugin
   188  func (p *StaticPolicy) Start() (err error) {
   189  	general.Infof("called")
   190  
   191  	p.Lock()
   192  	defer func() {
   193  		if !p.started {
   194  			if err == nil {
   195  				p.started = true
   196  			} else {
   197  				close(p.stopCh)
   198  			}
   199  		}
   200  		p.Unlock()
   201  	}()
   202  
   203  	if p.started {
   204  		general.Infof("already started")
   205  		return nil
   206  	}
   207  
   208  	p.stopCh = make(chan struct{})
   209  
   210  	go wait.Until(func() {
   211  		_ = p.emitter.StoreInt64(util.MetricNameHeartBeat, 1, metrics.MetricTypeNameRaw)
   212  	}, time.Second*30, p.stopCh)
   213  	go wait.Until(p.applyNetClass, 5*time.Second, p.stopCh)
   214  
   215  	return nil
   216  }
   217  
   218  // Stop stops this plugin
   219  func (p *StaticPolicy) Stop() error {
   220  	p.Lock()
   221  	defer func() {
   222  		p.started = false
   223  		p.Unlock()
   224  		general.Infof("stopped")
   225  	}()
   226  
   227  	if !p.started {
   228  		general.Warningf("already stopped")
   229  		return nil
   230  	}
   231  	close(p.stopCh)
   232  	return nil
   233  }
   234  
   235  // Name returns the name of this plugin
   236  func (p *StaticPolicy) Name() string {
   237  	return p.name
   238  }
   239  
   240  // ResourceName returns resource names managed by this plugin
   241  func (p *StaticPolicy) ResourceName() string {
   242  	return string(apiconsts.ResourceNetBandwidth)
   243  }
   244  
   245  // GetTopologyHints returns hints of corresponding resources
   246  func (p *StaticPolicy) GetTopologyHints(_ context.Context,
   247  	req *pluginapi.ResourceRequest,
   248  ) (resp *pluginapi.ResourceHintsResponse, err error) {
   249  	if req == nil {
   250  		return nil, fmt.Errorf("GetTopologyHints got nil req")
   251  	}
   252  
   253  	qosLevel, err := util.GetKatalystQoSLevelFromResourceReq(p.qosConfig, req)
   254  	if err != nil {
   255  		err = fmt.Errorf("GetKatalystQoSLevelFromResourceReq for pod: %s/%s, container: %s failed with error: %v",
   256  			req.PodNamespace, req.PodName, req.ContainerName, err)
   257  		general.Errorf("%s", err.Error())
   258  		return nil, err
   259  	}
   260  
   261  	reqInt, _, err := util.GetQuantityFromResourceReq(req)
   262  	if err != nil {
   263  		return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err)
   264  	}
   265  
   266  	general.InfoS("called",
   267  		"podNamespace", req.PodNamespace,
   268  		"podName", req.PodName,
   269  		"containerName", req.ContainerName,
   270  		"qosLevel", qosLevel,
   271  		"resourceRequests", req.ResourceRequests,
   272  		"reqAnnotations", req.Annotations,
   273  		"netBandwidthReq(Mbps)", reqInt)
   274  
   275  	p.Lock()
   276  	defer func() {
   277  		p.Unlock()
   278  		if err != nil {
   279  			_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw)
   280  		}
   281  	}()
   282  
   283  	if req.ContainerType == pluginapi.ContainerType_INIT ||
   284  		req.ContainerType == pluginapi.ContainerType_SIDECAR {
   285  		return util.PackResourceHintsResponse(req, p.ResourceName(), map[string]*pluginapi.ListOfTopologyHints{
   286  			p.ResourceName(): nil, // indicates that there is no numa preference
   287  		})
   288  	}
   289  
   290  	hints, err := p.calculateHints(req)
   291  	if err != nil {
   292  		err = fmt.Errorf("calculateHints for pod: %s/%s, container: %s failed with error: %v",
   293  			req.PodNamespace, req.PodName, req.ContainerName, err)
   294  		general.Errorf("%s", err.Error())
   295  		return nil, err
   296  	}
   297  
   298  	return util.PackResourceHintsResponse(req, p.ResourceName(), hints)
   299  }
   300  
   301  func (p *StaticPolicy) RemovePod(_ context.Context,
   302  	req *pluginapi.RemovePodRequest,
   303  ) (*pluginapi.RemovePodResponse, error) {
   304  	if req == nil {
   305  		return nil, fmt.Errorf("RemovePod got nil req")
   306  	}
   307  
   308  	p.Lock()
   309  	defer p.Unlock()
   310  
   311  	if err := p.removePod(req.PodUid); err != nil {
   312  		general.ErrorS(err, "remove pod failed with error", "podUID", req.PodUid)
   313  		return nil, err
   314  	}
   315  
   316  	return &pluginapi.RemovePodResponse{}, nil
   317  }
   318  
   319  // GetResourcesAllocation returns allocation results of corresponding resources
   320  func (p *StaticPolicy) GetResourcesAllocation(_ context.Context,
   321  	_ *pluginapi.GetResourcesAllocationRequest,
   322  ) (*pluginapi.GetResourcesAllocationResponse, error) {
   323  	// no need to implement this function, because NeedReconcile is false
   324  	return &pluginapi.GetResourcesAllocationResponse{}, nil
   325  }
   326  
   327  // GetTopologyAwareResources returns allocation results of corresponding resources as topology aware format
   328  func (p *StaticPolicy) GetTopologyAwareResources(_ context.Context,
   329  	req *pluginapi.GetTopologyAwareResourcesRequest,
   330  ) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
   331  	if req == nil {
   332  		return nil, fmt.Errorf("GetTopologyAwareResources got nil req")
   333  	}
   334  
   335  	p.Lock()
   336  	defer p.Unlock()
   337  
   338  	allocationInfo := p.state.GetAllocationInfo(req.PodUid, req.ContainerName)
   339  	if allocationInfo == nil {
   340  		return &pluginapi.GetTopologyAwareResourcesResponse{}, nil
   341  	}
   342  
   343  	socket, err := p.getSocketIDByNIC(allocationInfo.IfName)
   344  	if err != nil {
   345  		return nil, fmt.Errorf("failed to find topologyNode for pod %s, container %s : %v", req.PodUid, req.ContainerName, err)
   346  	}
   347  
   348  	nic := p.getNICByName(allocationInfo.IfName)
   349  	topologyAwareQuantityList := []*pluginapi.TopologyAwareQuantity{
   350  		{
   351  			ResourceValue: float64(allocationInfo.Egress),
   352  			Node:          uint64(socket),
   353  			Name:          allocationInfo.IfName,
   354  			Type:          string(apinode.TopologyTypeNIC),
   355  			TopologyLevel: pluginapi.TopologyLevel_SOCKET,
   356  			Annotations: map[string]string{
   357  				apiconsts.ResourceAnnotationKeyResourceIdentifier: getResourceIdentifier(nic.NSName, allocationInfo.IfName),
   358  				apiconsts.ResourceAnnotationKeyNICNetNSName:       nic.NSName,
   359  			},
   360  		},
   361  	}
   362  	resp := &pluginapi.GetTopologyAwareResourcesResponse{
   363  		PodUid:       allocationInfo.PodUid,
   364  		PodName:      allocationInfo.PodName,
   365  		PodNamespace: allocationInfo.PodNamespace,
   366  		ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
   367  			ContainerName: allocationInfo.ContainerName,
   368  		},
   369  	}
   370  
   371  	if allocationInfo.CheckSideCar() {
   372  		resp.ContainerTopologyAwareResources.AllocatedResources = map[string]*pluginapi.TopologyAwareResource{
   373  			string(apiconsts.ResourceNetBandwidth): {
   374  				IsNodeResource:                    true,
   375  				IsScalarResource:                  true,
   376  				AggregatedQuantity:                0,
   377  				OriginalAggregatedQuantity:        0,
   378  				TopologyAwareQuantityList:         nil,
   379  				OriginalTopologyAwareQuantityList: nil,
   380  			},
   381  		}
   382  	} else {
   383  		resp.ContainerTopologyAwareResources.AllocatedResources = map[string]*pluginapi.TopologyAwareResource{
   384  			string(apiconsts.ResourceNetBandwidth): {
   385  				IsNodeResource:                    true,
   386  				IsScalarResource:                  true,
   387  				AggregatedQuantity:                float64(allocationInfo.Egress),
   388  				OriginalAggregatedQuantity:        float64(allocationInfo.Egress),
   389  				TopologyAwareQuantityList:         topologyAwareQuantityList,
   390  				OriginalTopologyAwareQuantityList: topologyAwareQuantityList,
   391  			},
   392  		}
   393  	}
   394  
   395  	return resp, nil
   396  }
   397  
   398  // GetTopologyAwareAllocatableResources returns corresponding allocatable resources as topology aware format
   399  func (p *StaticPolicy) GetTopologyAwareAllocatableResources(_ context.Context,
   400  	_ *pluginapi.GetTopologyAwareAllocatableResourcesRequest,
   401  ) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) {
   402  	p.Lock()
   403  	defer p.Unlock()
   404  
   405  	machineState := p.state.GetMachineState()
   406  
   407  	topologyAwareAllocatableQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(machineState))
   408  	topologyAwareCapacityQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(machineState))
   409  
   410  	var aggregatedAllocatableQuantity, aggregatedCapacityQuantity uint32 = 0, 0
   411  	for _, iface := range p.nics {
   412  		nicState := machineState[iface.Iface]
   413  		if nicState == nil {
   414  			return nil, fmt.Errorf("nil nicState for NIC: %s", iface.Iface)
   415  		}
   416  
   417  		topologyNode, err := p.getSocketIDByNIC(iface.Iface)
   418  		if err != nil {
   419  			return nil, fmt.Errorf("failed to find topologyNode: %v", err)
   420  		}
   421  
   422  		resourceIdentifier := getResourceIdentifier(iface.NSName, iface.Iface)
   423  		topologyAwareAllocatableQuantityList = append(topologyAwareAllocatableQuantityList, &pluginapi.TopologyAwareQuantity{
   424  			ResourceValue: float64(general.MinUInt32(nicState.EgressState.Allocatable, nicState.IngressState.Allocatable)),
   425  			Node:          uint64(topologyNode),
   426  			Name:          iface.Iface,
   427  			Type:          string(apinode.TopologyTypeNIC),
   428  			TopologyLevel: pluginapi.TopologyLevel_SOCKET,
   429  			Annotations: map[string]string{
   430  				apiconsts.ResourceAnnotationKeyResourceIdentifier: resourceIdentifier,
   431  				apiconsts.ResourceAnnotationKeyNICNetNSName:       iface.NSName,
   432  			},
   433  		})
   434  		topologyAwareCapacityQuantityList = append(topologyAwareCapacityQuantityList, &pluginapi.TopologyAwareQuantity{
   435  			ResourceValue: float64(general.MinUInt32(nicState.EgressState.Capacity, nicState.IngressState.Capacity)),
   436  			Node:          uint64(topologyNode),
   437  			Name:          iface.Iface,
   438  			Type:          string(apinode.TopologyTypeNIC),
   439  			TopologyLevel: pluginapi.TopologyLevel_SOCKET,
   440  			Annotations: map[string]string{
   441  				apiconsts.ResourceAnnotationKeyResourceIdentifier: resourceIdentifier,
   442  				apiconsts.ResourceAnnotationKeyNICNetNSName:       iface.NSName,
   443  			},
   444  		})
   445  		aggregatedAllocatableQuantity += general.MinUInt32(nicState.EgressState.Allocatable, nicState.IngressState.Allocatable)
   446  		aggregatedCapacityQuantity += general.MinUInt32(nicState.EgressState.Capacity, nicState.IngressState.Capacity)
   447  	}
   448  
   449  	return &pluginapi.GetTopologyAwareAllocatableResourcesResponse{
   450  		AllocatableResources: map[string]*pluginapi.AllocatableTopologyAwareResource{
   451  			string(apiconsts.ResourceNetBandwidth): {
   452  				IsNodeResource:                       true,
   453  				IsScalarResource:                     true,
   454  				AggregatedAllocatableQuantity:        float64(aggregatedAllocatableQuantity),
   455  				TopologyAwareAllocatableQuantityList: topologyAwareAllocatableQuantityList,
   456  				AggregatedCapacityQuantity:           float64(aggregatedCapacityQuantity),
   457  				TopologyAwareCapacityQuantityList:    topologyAwareCapacityQuantityList,
   458  			},
   459  		},
   460  	}, nil
   461  }
   462  
   463  // GetResourcePluginOptions returns options to be communicated with Resource Manager
   464  func (p *StaticPolicy) GetResourcePluginOptions(context.Context,
   465  	*pluginapi.Empty,
   466  ) (*pluginapi.ResourcePluginOptions, error) {
   467  	return &pluginapi.ResourcePluginOptions{
   468  		PreStartRequired:      false,
   469  		WithTopologyAlignment: true,
   470  		NeedReconcile:         false,
   471  	}, nil
   472  }
   473  
   474  // Allocate is called during pod admit so that the resource
   475  // plugin can allocate corresponding resource for the container
   476  // according to resource request
   477  func (p *StaticPolicy) Allocate(_ context.Context,
   478  	req *pluginapi.ResourceRequest,
   479  ) (resp *pluginapi.ResourceAllocationResponse, err error) {
   480  	if req == nil {
   481  		return nil, fmt.Errorf("GetTopologyHints got nil req")
   482  	}
   483  
   484  	// since qos config util will filter out annotation keys not related to katalyst QoS,
   485  	// we copy original pod annotations here to use them later
   486  	podAnnotations := maputil.CopySS(req.Annotations)
   487  
   488  	qosLevel, err := util.GetKatalystQoSLevelFromResourceReq(p.qosConfig, req)
   489  	if err != nil {
   490  		err = fmt.Errorf("GetKatalystQoSLevelFromResourceReq for pod: %s/%s, container: %s failed with error: %v",
   491  			req.PodNamespace, req.PodName, req.ContainerName, err)
   492  		general.Errorf("%s", err.Error())
   493  		return nil, err
   494  	}
   495  
   496  	reqInt, _, err := util.GetQuantityFromResourceReq(req)
   497  	if err != nil {
   498  		return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err)
   499  	}
   500  
   501  	general.InfoS("called",
   502  		"podNamespace", req.PodNamespace,
   503  		"podName", req.PodName,
   504  		"containerName", req.ContainerName,
   505  		"qosLevel", qosLevel,
   506  		"reqAnnotations", req.Annotations,
   507  		"netBandwidthReq(Mbps)", reqInt)
   508  
   509  	p.Lock()
   510  	defer func() {
   511  		p.Unlock()
   512  		if err != nil {
   513  			_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw)
   514  		}
   515  	}()
   516  
   517  	emptyResponse := &pluginapi.ResourceAllocationResponse{
   518  		PodUid:         req.PodUid,
   519  		PodNamespace:   req.PodNamespace,
   520  		PodName:        req.PodName,
   521  		ContainerName:  req.ContainerName,
   522  		ContainerType:  req.ContainerType,
   523  		ContainerIndex: req.ContainerIndex,
   524  		PodRole:        req.PodRole,
   525  		PodType:        req.PodType,
   526  		ResourceName:   p.ResourceName(),
   527  		Labels:         general.DeepCopyMap(req.Labels),
   528  		Annotations:    general.DeepCopyMap(req.Annotations),
   529  	}
   530  
   531  	// currently, not to deal with init containers
   532  	if req.ContainerType == pluginapi.ContainerType_INIT {
   533  		return emptyResponse, nil
   534  	} else if req.ContainerType == pluginapi.ContainerType_SIDECAR {
   535  		// not to deal with sidecars, and return a trivial allocationResult to avoid re-allocating
   536  		return packAllocationResponse(req, &state.AllocationInfo{}, nil, nil)
   537  	}
   538  
   539  	// check allocationInfo is nil or not
   540  	podEntries := p.state.GetPodEntries()
   541  	allocationInfo := p.state.GetAllocationInfo(req.PodUid, req.ContainerName)
   542  
   543  	if allocationInfo != nil {
   544  		if allocationInfo.Egress >= uint32(reqInt) && allocationInfo.Ingress >= uint32(reqInt) {
   545  			general.InfoS("already allocated and meet requirement",
   546  				"podNamespace", req.PodNamespace,
   547  				"podName", req.PodName,
   548  				"containerName", req.ContainerName,
   549  				"bandwidthReq(Mbps)", reqInt,
   550  				"currentResult(Mbps)", allocationInfo.Egress)
   551  
   552  			resourceAllocationAnnotations, err := p.getResourceAllocationAnnotations(podAnnotations, allocationInfo)
   553  			if err != nil {
   554  				err = fmt.Errorf("getResourceAllocationAnnotations for pod: %s/%s, container: %s failed with error: %v",
   555  					req.PodNamespace, req.PodName, req.ContainerName, err)
   556  				general.Errorf("%s", err.Error())
   557  				return nil, err
   558  			}
   559  
   560  			resp, packErr := packAllocationResponse(req, allocationInfo, req.Hint, resourceAllocationAnnotations)
   561  			if packErr != nil {
   562  				general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v",
   563  					req.PodNamespace, req.PodName, req.ContainerName, packErr)
   564  				return nil, fmt.Errorf("packAllocationResponse failed with error: %v", packErr)
   565  			}
   566  			return resp, nil
   567  		} else {
   568  			general.InfoS("not meet requirement, clear record and re-allocate",
   569  				"podNamespace", req.PodNamespace,
   570  				"podName", req.PodName,
   571  				"containerName", req.ContainerName,
   572  				"bandwidthReq(Mbps)", reqInt,
   573  				"currentResult(Mbps)", allocationInfo.Egress)
   574  			delete(podEntries, req.PodUid)
   575  
   576  			_, stateErr := state.GenerateMachineStateFromPodEntries(p.qrmConfig, p.nics, podEntries, p.state.GetReservedBandwidth())
   577  			if stateErr != nil {
   578  				general.ErrorS(stateErr, "generateNetworkMachineStateByPodEntries failed",
   579  					"podNamespace", req.PodNamespace,
   580  					"podName", req.PodName,
   581  					"containerName", req.ContainerName,
   582  					"bandwidthReq(Mbps)", reqInt,
   583  					"currentResult(Mbps)", allocationInfo.Egress)
   584  				return nil, fmt.Errorf("generateNetworkMachineStateByPodEntries failed with error: %v", stateErr)
   585  			}
   586  		}
   587  	}
   588  
   589  	candidateNICs, err := p.selectNICsByReq(req)
   590  	if err != nil {
   591  		err = fmt.Errorf("selectNICsByReq for pod: %s/%s, container: %s, reqInt: %d, failed with error: %v",
   592  			req.PodNamespace, req.PodName, req.ContainerName, reqInt, err)
   593  		general.Errorf("%s", err.Error())
   594  		return nil, err
   595  	}
   596  
   597  	if len(candidateNICs) == 0 {
   598  		general.ErrorS(err, "insufficient bandwidth on this node to satisfy the request",
   599  			"podNamespace", req.PodNamespace,
   600  			"podName", req.PodName,
   601  			"containerName", req.ContainerName,
   602  			"netBandwidthReq(Mbps)", reqInt,
   603  			"nicState", p.state.GetMachineState().String())
   604  		return nil, fmt.Errorf("failed to meet the bandwidth requirement of %d Mbps", reqInt)
   605  	}
   606  
   607  	// we only support one policy and hard code it for now
   608  	// TODO: make the policy configurable
   609  	selectedNIC := selectOneNIC(candidateNICs, RandomOne)
   610  	general.Infof("select NIC %s to allocate bandwidth (%dMbps)", selectedNIC.Iface, reqInt)
   611  
   612  	siblingNUMAs, err := machine.GetSiblingNUMAs(selectedNIC.NumaNode, p.agentCtx.CPUTopology)
   613  	if err != nil {
   614  		general.Errorf("get siblingNUMAs for nic: %s failed with error: %v. Incorrect NumaNodes in machineState allocationInfo", selectedNIC.Iface, err)
   615  	}
   616  
   617  	// generate the response hint
   618  	// it could be different from the req.Hint if the affinitive NIC does not have sufficient bandwidth
   619  	nicPreference, err := checkNICPreferenceOfReq(selectedNIC, req.Annotations)
   620  	if err != nil {
   621  		return nil, fmt.Errorf("checkNICPreferenceOfReq for nic: %s failed with error: %v", selectedNIC.Iface, err)
   622  	}
   623  
   624  	respHint := &pluginapi.TopologyHint{
   625  		Nodes:     siblingNUMAs.ToSliceUInt64(),
   626  		Preferred: nicPreference,
   627  	}
   628  
   629  	// generate allocationInfo and update the checkpoint accordingly
   630  	newAllocation := &state.AllocationInfo{
   631  		PodUid:         req.PodUid,
   632  		PodNamespace:   req.PodNamespace,
   633  		PodName:        req.PodName,
   634  		ContainerName:  req.ContainerName,
   635  		ContainerType:  req.ContainerType.String(),
   636  		ContainerIndex: req.ContainerIndex,
   637  		PodRole:        req.PodRole,
   638  		PodType:        req.PodType,
   639  		Egress:         uint32(reqInt),
   640  		Ingress:        uint32(reqInt),
   641  		IfName:         selectedNIC.Iface,
   642  		NumaNodes:      siblingNUMAs,
   643  		Labels:         general.DeepCopyMap(req.Labels),
   644  		Annotations:    general.DeepCopyMap(req.Annotations),
   645  	}
   646  
   647  	resourceAllocationAnnotations, err := p.getResourceAllocationAnnotations(podAnnotations, newAllocation)
   648  	if err != nil {
   649  		err = fmt.Errorf("getResourceAllocationAnnotations for pod: %s/%s, container: %s failed with error: %v",
   650  			req.PodNamespace, req.PodName, req.ContainerName, err)
   651  		general.Errorf("%s", err.Error())
   652  		return nil, err
   653  	}
   654  
   655  	// update PodEntries
   656  	p.state.SetAllocationInfo(req.PodUid, req.ContainerName, newAllocation)
   657  
   658  	machineState, stateErr := state.GenerateMachineStateFromPodEntries(p.qrmConfig, p.nics, p.state.GetPodEntries(), p.state.GetReservedBandwidth())
   659  	if stateErr != nil {
   660  		general.ErrorS(stateErr, "generateNetworkMachineStateByPodEntries failed",
   661  			"podNamespace", req.PodNamespace,
   662  			"podName", req.PodName,
   663  			"containerName", req.ContainerName,
   664  			"bandwidthReq(Mbps)", reqInt,
   665  			"currentResult(Mbps)", allocationInfo.Egress)
   666  		return nil, fmt.Errorf("generateNetworkMachineStateByPodEntries failed with error: %v", stateErr)
   667  	}
   668  
   669  	// update state cache
   670  	p.state.SetMachineState(machineState)
   671  
   672  	return packAllocationResponse(req, newAllocation, respHint, resourceAllocationAnnotations)
   673  }
   674  
   675  // PreStartContainer is called, if indicated by resource plugin during registration phase,
   676  // before each container start. Resource plugin can run resource specific operations
   677  // such as resetting the resource before making resources available to the container
   678  func (p *StaticPolicy) PreStartContainer(context.Context,
   679  	*pluginapi.PreStartContainerRequest,
   680  ) (*pluginapi.PreStartContainerResponse, error) {
   681  	return &pluginapi.PreStartContainerResponse{}, nil
   682  }
   683  
   684  func (p *StaticPolicy) applyNetClass() {
   685  	if p.metaServer == nil {
   686  		general.Errorf("nil metaServer")
   687  		return
   688  	}
   689  
   690  	podList, err := p.metaServer.GetPodList(context.Background(), nil)
   691  	if err != nil {
   692  		general.Errorf("get pod list failed, err: %v", err)
   693  		return
   694  	}
   695  
   696  	for _, pod := range podList {
   697  		if pod == nil {
   698  			general.Errorf("get nil pod from metaServer")
   699  			continue
   700  		}
   701  
   702  		classID, err := p.getNetClassID(pod.GetAnnotations(), p.podLevelNetClassAnnoKey)
   703  		if err != nil {
   704  			general.Errorf("get net class id failed, pod: %s, err: %s", native.GenerateUniqObjectNameKey(pod), err)
   705  			continue
   706  		}
   707  		netClsData := &common.NetClsData{
   708  			ClassID:    classID,
   709  			Attributes: native.FilterPodAnnotations(p.podLevelNetAttributesAnnoKeys, pod),
   710  		}
   711  
   712  		for _, container := range pod.Spec.Containers {
   713  			go func(podUID, containerName string, netClsData *common.NetClsData) {
   714  				containerID, err := p.metaServer.GetContainerID(podUID, containerName)
   715  				if err != nil {
   716  					general.Errorf("get container id failed, pod: %s, container: %s(%s), err: %v",
   717  						podUID, containerName, containerID, err)
   718  					return
   719  				}
   720  
   721  				if exist, err := common.IsContainerCgroupExist(podUID, containerID); err != nil {
   722  					general.Errorf("check if container cgroup exists failed, pod: %s, container: %s(%s), err: %v",
   723  						podUID, containerName, containerID, err)
   724  					return
   725  				} else if !exist {
   726  					general.Infof("container cgroup does not exist, pod: %s, container: %s(%s)", podUID, containerName, containerID)
   727  					return
   728  				}
   729  
   730  				if p.CgroupV2Env {
   731  					cgID, err := p.metaServer.ExternalManager.GetCgroupIDForContainer(podUID, containerID)
   732  					if err != nil {
   733  						general.Errorf("get cgroup id failed, pod: %s, container: %s(%s), err: %v",
   734  							podUID, containerName, containerID, err)
   735  						return
   736  					}
   737  					netClsData.CgroupID = cgID
   738  				}
   739  
   740  				if err = p.applyNetClassFunc(podUID, containerID, netClsData); err != nil {
   741  					general.Errorf("apply net class failed, pod: %s, container: %s(%s), netClsData: %+v, err: %v",
   742  						podUID, containerName, containerID, *netClsData, err)
   743  					return
   744  				}
   745  
   746  				general.Infof("apply net class successfully, pod: %s, container: %s(%s), netClsData: %+v",
   747  					podUID, containerName, containerID, *netClsData)
   748  			}(string(pod.UID), container.Name, netClsData)
   749  		}
   750  	}
   751  }
   752  
   753  func (p *StaticPolicy) filterAvailableNICsByBandwidth(nics []machine.InterfaceInfo, req *pluginapi.ResourceRequest, _ *agent.GenericContext) []machine.InterfaceInfo {
   754  	filteredNICs := make([]machine.InterfaceInfo, 0, len(nics))
   755  
   756  	if req == nil {
   757  		general.Infof("filterNICsByBandwidth got nil req")
   758  		return nil
   759  	}
   760  
   761  	reqInt, _, err := util.GetQuantityFromResourceReq(req)
   762  	if err != nil {
   763  		general.Errorf("getReqQuantityFromResourceReq failed with error: %v", err)
   764  		return nil
   765  	}
   766  
   767  	machineState := p.state.GetMachineState()
   768  	if len(machineState) == 0 || len(nics) == 0 {
   769  		general.Errorf("filterNICsByBandwidth with 0 NIC")
   770  		return nil
   771  	}
   772  
   773  	// filter NICs by available bandwidth
   774  	for _, iface := range nics {
   775  		if machineState[iface.Iface].EgressState.Free >= uint32(reqInt) && machineState[iface.Iface].IngressState.Free >= uint32(reqInt) {
   776  			filteredNICs = append(filteredNICs, iface)
   777  		}
   778  	}
   779  
   780  	// no nic meets the bandwidth request
   781  	if len(filteredNICs) == 0 {
   782  		general.InfoS("nic list returned by filtereNICsByBandwidth is empty",
   783  			"podNamespace", req.PodNamespace,
   784  			"podName", req.PodName,
   785  			"containerName", req.ContainerName)
   786  	}
   787  
   788  	return filteredNICs
   789  }
   790  
   791  func (p *StaticPolicy) calculateHints(req *pluginapi.ResourceRequest) (map[string]*pluginapi.ListOfTopologyHints, error) {
   792  	// resp.hints: 1) empty, means no resource (i.e. NIC) meeting requirements found; 2) nil, does not care about the hints
   793  	// since NIC is a kind of topology-aware resource, it is incorrect to return nil
   794  	hints := map[string]*pluginapi.ListOfTopologyHints{
   795  		p.ResourceName(): {
   796  			Hints: []*pluginapi.TopologyHint{},
   797  		},
   798  	}
   799  
   800  	// return empty hints immediately if no valid nics on this node
   801  	if len(p.nics) == 0 {
   802  		return hints, nil
   803  	}
   804  
   805  	candidateNICs, err := p.selectNICsByReq(req)
   806  	if err != nil {
   807  		return hints, fmt.Errorf("failed to select available NICs: %v", err)
   808  	}
   809  
   810  	if len(candidateNICs) == 0 {
   811  		general.InfoS("candidateNICs is empty",
   812  			"podNamespace", req.PodNamespace,
   813  			"podName", req.PodName,
   814  			"containerName", req.ContainerName)
   815  		// if the req.NS asks to allocate on the 1st NIC which does not have sufficient bandwidth, candidateNICs is empty.
   816  		// however, we should not return directly here. To indicate the option of the 2nd NIC if no restricted affinity or ns requested, we return [0,1,2,3] instead.
   817  	}
   818  
   819  	numasToHintMap := make(map[string]*pluginapi.TopologyHint)
   820  	for _, nic := range candidateNICs {
   821  		siblingNUMAs, err := machine.GetSiblingNUMAs(nic.NumaNode, p.agentCtx.CPUTopology)
   822  		if err != nil {
   823  			return nil, fmt.Errorf("get siblingNUMAs for nic: %s failed with error: %v", nic.Iface, err)
   824  		}
   825  
   826  		nicPreference, err := checkNICPreferenceOfReq(nic, req.Annotations)
   827  		if err != nil {
   828  			return nil, fmt.Errorf("checkNICPreferenceOfReq for nic: %s failed with error: %v", nic.Iface, err)
   829  		}
   830  
   831  		siblingNUMAsStr := siblingNUMAs.String()
   832  		if numasToHintMap[siblingNUMAsStr] == nil {
   833  			numasToHintMap[siblingNUMAsStr] = &pluginapi.TopologyHint{
   834  				Nodes: siblingNUMAs.ToSliceUInt64(),
   835  			}
   836  		}
   837  
   838  		if nicPreference {
   839  			general.InfoS("set nic preferred to true",
   840  				"podNamespace", req.PodNamespace,
   841  				"podName", req.PodName,
   842  				"containerName", req.ContainerName,
   843  				"nic", nic.Iface)
   844  			numasToHintMap[siblingNUMAsStr].Preferred = nicPreference
   845  		}
   846  	}
   847  
   848  	for _, hint := range numasToHintMap {
   849  		hints[p.ResourceName()].Hints = append(hints[p.ResourceName()].Hints, hint)
   850  	}
   851  
   852  	// check if restricted affinity or ns requested
   853  	if !isReqAffinityRestricted(req.Annotations) && !isReqNamespaceRestricted(req.Annotations) {
   854  		general.InfoS("add all NUMAs to hint to avoid affinity error",
   855  			"podNamespace", req.PodNamespace,
   856  			"podName", req.PodName,
   857  			"containerName", req.ContainerName,
   858  			req.Annotations[apiconsts.PodAnnotationNetworkEnhancementAffinityRestricted],
   859  			apiconsts.PodAnnotationNetworkEnhancementAffinityRestrictedTrue)
   860  
   861  		hints[p.ResourceName()].Hints = append(hints[p.ResourceName()].Hints, &pluginapi.TopologyHint{
   862  			Nodes: p.agentCtx.CPUDetails.NUMANodes().ToSliceUInt64(),
   863  		})
   864  	}
   865  
   866  	return hints, nil
   867  }
   868  
   869  /*
   870  The NIC selection depends on the following three aspects: available Bandwidth on each NIC, Namespace parameter in request, and req.Hints.
   871  1) The availability of sufficient bandwidth on the NIC is a prerequisite for determining whether the card can be selected.
   872  If there is insufficient bandwidth on a NIC, it cannot be included in the candidate list.
   873  
   874  2) We may put NICs into separate net namespaces in order to use both NICs simultaneously (Host network mode).
   875  If a container wants to request a specific NIC through the namespace parameter, this requirement must also be met.
   876  If the specified NIC has insufficient bandwidth, it cannot be included in the candidate list.
   877  
   878  3) The req.Hints parameter represents the affinity of a NIC. For example, a socket container running on a specific socket
   879  may use req.Hints to prioritize the selection of a NIC connected to that socket. However, this requirement is only satisfied as much as possible.
   880  If the NIC connected to the socket has sufficient bandwidth, only this NIC is returned. Otherwise, other cards with sufficient bandwidth will be returned.
   881  */
   882  func (p *StaticPolicy) selectNICsByReq(req *pluginapi.ResourceRequest) ([]machine.InterfaceInfo, error) {
   883  	nicFilters := []NICFilter{
   884  		p.filterAvailableNICsByBandwidth,
   885  		filterNICsByNamespaceType,
   886  		filterNICsByHint,
   887  	}
   888  
   889  	if len(p.nics) == 0 {
   890  		return []machine.InterfaceInfo{}, nil
   891  	}
   892  
   893  	candidateNICs, err := filterAvailableNICsByReq(p.nics, req, p.agentCtx, nicFilters)
   894  	if err != nil {
   895  		return nil, fmt.Errorf("filterAvailableNICsByReq failed with error: %v", err)
   896  	}
   897  
   898  	// this node can not meet the combined requests
   899  	if len(candidateNICs) == 0 {
   900  		general.InfoS("nic list returned by filterAvailableNICsByReq is empty",
   901  			"podNamespace", req.PodNamespace,
   902  			"podName", req.PodName,
   903  			"containerName", req.ContainerName)
   904  	}
   905  
   906  	return candidateNICs, nil
   907  }
   908  
   909  func (p *StaticPolicy) getResourceAllocationAnnotations(podAnnotations map[string]string, allocation *state.AllocationInfo) (map[string]string, error) {
   910  	netClsID, err := p.getNetClassID(podAnnotations, p.podLevelNetClassAnnoKey)
   911  	if err != nil {
   912  		return nil, fmt.Errorf("getNetClassID failed with error: %v", err)
   913  	}
   914  
   915  	selectedNIC := p.getNICByName(allocation.IfName)
   916  
   917  	resourceAllocationAnnotations := map[string]string{
   918  		p.ipv4ResourceAllocationAnnotationKey:             strings.Join(selectedNIC.GetNICIPs(machine.IPVersionV4), IPsSeparator),
   919  		p.ipv6ResourceAllocationAnnotationKey:             strings.Join(selectedNIC.GetNICIPs(machine.IPVersionV6), IPsSeparator),
   920  		p.netInterfaceNameResourceAllocationAnnotationKey: selectedNIC.Iface,
   921  		p.netClassIDResourceAllocationAnnotationKey:       fmt.Sprintf("%d", netClsID),
   922  		// TODO: support differentiated Egress/Ingress bandwidth later
   923  		p.netBandwidthResourceAllocationAnnotationKey: strconv.Itoa(int(allocation.Egress)),
   924  	}
   925  
   926  	if len(selectedNIC.NSAbsolutePath) > 0 {
   927  		resourceAllocationAnnotations[p.netNSPathResourceAllocationAnnotationKey] = selectedNIC.NSAbsolutePath
   928  	}
   929  
   930  	return resourceAllocationAnnotations, nil
   931  }
   932  
   933  func (p *StaticPolicy) removePod(podUID string) error {
   934  	if p.CgroupV2Env {
   935  		cgIDList, err := p.metaServer.ExternalManager.ListCgroupIDsForPod(podUID)
   936  		if err != nil {
   937  			if general.IsErrNotFound(err) {
   938  				general.Warningf("cgroup ids for pod not found")
   939  				return nil
   940  			}
   941  			return fmt.Errorf("[NetworkStaticPolicy.removePod] list cgroup ids of pod: %s failed with error: %v", podUID, err)
   942  		}
   943  
   944  		for _, cgID := range cgIDList {
   945  			go func(cgID uint64) {
   946  				if err := p.metaServer.ExternalManager.ClearNetClass(cgID); err != nil {
   947  					general.Errorf("delete net class failed, cgID: %v, err: %v", cgID, err)
   948  					return
   949  				}
   950  			}(cgID)
   951  		}
   952  	}
   953  
   954  	// update state cache
   955  	podEntries := p.state.GetPodEntries()
   956  	delete(podEntries, podUID)
   957  
   958  	machineState, err := state.GenerateMachineStateFromPodEntries(p.qrmConfig, p.nics, podEntries, p.state.GetReservedBandwidth())
   959  	if err != nil {
   960  		general.Errorf("pod: %s, GenerateMachineStateFromPodEntries failed with error: %v", podUID, err)
   961  		return fmt.Errorf("calculate machineState by updated pod entries failed with error: %v", err)
   962  	}
   963  
   964  	p.state.SetPodEntries(podEntries)
   965  	p.state.SetMachineState(machineState)
   966  
   967  	return nil
   968  }
   969  
   970  func (p *StaticPolicy) getNetClassID(podAnnotations map[string]string, podLevelNetClassAnnoKey string) (uint32, error) {
   971  	isPodLevelNetClassExist, classID, err := qos.GetPodNetClassID(podAnnotations, podLevelNetClassAnnoKey)
   972  	if err != nil {
   973  		return 0, err
   974  	}
   975  	if isPodLevelNetClassExist {
   976  		return classID, nil
   977  	}
   978  
   979  	qosLevel, err := p.qosConfig.GetQoSLevel(nil, podAnnotations)
   980  	if err != nil {
   981  		return 0, err
   982  	}
   983  	return p.getNetClassIDByQoSLevel(qosLevel)
   984  }
   985  
   986  func (p *StaticPolicy) getNetClassIDByQoSLevel(qosLevel string) (uint32, error) {
   987  	if netClsID, found := p.qosLevelToNetClassMap[qosLevel]; found {
   988  		return netClsID, nil
   989  	} else {
   990  		return 0, fmt.Errorf("netClsID for qosLevel: %s isn't found", qosLevel)
   991  	}
   992  }
   993  
   994  func (p *StaticPolicy) getNICByName(ifName string) machine.InterfaceInfo {
   995  	for idx := range p.nics {
   996  		if p.nics[idx].Iface == ifName {
   997  			return p.nics[idx]
   998  		}
   999  	}
  1000  
  1001  	return machine.InterfaceInfo{}
  1002  }
  1003  
  1004  // return the Socket id/index that the specified NIC attached to
  1005  func (p *StaticPolicy) getSocketIDByNIC(ifName string) (int, error) {
  1006  	for _, iface := range p.nics {
  1007  		if iface.Iface == ifName {
  1008  			socketIDs := p.agentCtx.KatalystMachineInfo.CPUDetails.SocketsInNUMANodes(iface.NumaNode)
  1009  			if socketIDs.Size() == 0 {
  1010  				return -1, fmt.Errorf("failed to find the associated socket ID for the specified NIC %s - numanode: %d, cpuDetails: %v", ifName, iface.NumaNode, p.agentCtx.KatalystMachineInfo.CPUDetails)
  1011  			}
  1012  
  1013  			return socketIDs.ToSliceInt()[0], nil
  1014  		}
  1015  	}
  1016  
  1017  	return -1, fmt.Errorf("invalid NIC name - failed to find a matched NIC")
  1018  }