github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/nativepolicy/policy_hint_handlers.go

github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/nativepolicy/policy_hint_handlers.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  Copyright 2017 The Kubernetes Authors.
     4  
     5  Licensed under the Apache License, Version 2.0 (the "License");
     6  you may not use this file except in compliance with the License.
     7  You may obtain a copy of the License at
     8  
     9      http://www.apache.org/licenses/LICENSE-2.0
    10  
    11  Unless required by applicable law or agreed to in writing, software
    12  distributed under the License is distributed on an "AS IS" BASIS,
    13  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  See the License for the specific language governing permissions and
    15  limitations under the License.
    16  */
    17  
    18  package nativepolicy
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  
    24  	v1 "k8s.io/api/core/v1"
    25  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    26  	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
    27  
    28  	nativepolicyutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/nativepolicy/util"
    29  	cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util"
    30  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
    31  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    32  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    33  )
    34  
    35  func (p *NativePolicy) dedicatedCoresHintHandler(ctx context.Context,
    36  	req *pluginapi.ResourceRequest,
    37  ) (*pluginapi.ResourceHintsResponse, error) {
    38  	if req == nil {
    39  		return nil, fmt.Errorf("HintHandler got nil req")
    40  	}
    41  
    42  	reqInt, _, err := util.GetQuantityFromResourceReq(req)
    43  	if err != nil {
    44  		return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err)
    45  	}
    46  
    47  	machineState := p.state.GetMachineState()
    48  	var hints map[string]*pluginapi.ListOfTopologyHints
    49  
    50  	allocationInfo := p.state.GetAllocationInfo(req.PodUid, req.ContainerName)
    51  	if allocationInfo != nil {
    52  		hints = cpuutil.RegenerateHints(allocationInfo, reqInt)
    53  
    54  		// regenerateHints failed. need to clear container record and re-calculate.
    55  		if hints == nil {
    56  			podEntries := p.state.GetPodEntries()
    57  			delete(podEntries[req.PodUid], req.ContainerName)
    58  			if len(podEntries[req.PodUid]) == 0 {
    59  				delete(podEntries, req.PodUid)
    60  			}
    61  
    62  			var err error
    63  			machineState, err = nativepolicyutil.GenerateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries)
    64  			if err != nil {
    65  				general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v",
    66  					req.PodNamespace, req.PodName, req.ContainerName, err)
    67  				return nil, fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err)
    68  			}
    69  		}
    70  	}
    71  
    72  	// otherwise, calculate hint for container without allocated memory
    73  	if hints == nil {
    74  		// Get a list of available CPUs.
    75  		available := machineState.GetAvailableCPUSet(p.reservedCPUs)
    76  
    77  		// Get a list of reusable CPUs (e.g. CPUs reused from initContainers).
    78  		// It should be an empty CPUSet for a newly created pod.
    79  		reusable := p.cpusToReuse[req.PodUid]
    80  
    81  		// calculate hint for container without allocated cpus
    82  		hints = p.generateCPUTopologyHints(available, reusable, reqInt)
    83  	}
    84  
    85  	general.InfoS("TopologyHints generated", "pod", fmt.Sprintf("%s/%s", req.PodNamespace, req.PodName), "containerName", req.ContainerName, "cpuHints", hints)
    86  
    87  	return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), hints)
    88  }
    89  
    90  func (p *NativePolicy) sharedPoolHintHandler(_ context.Context,
    91  	req *pluginapi.ResourceRequest,
    92  ) (*pluginapi.ResourceHintsResponse, error) {
    93  	return util.PackResourceHintsResponse(req, string(v1.ResourceCPU),
    94  		map[string]*pluginapi.ListOfTopologyHints{
    95  			string(v1.ResourceCPU): nil, // indicates that there is no numa preference
    96  		})
    97  }
    98  
    99  // generateCPUtopologyHints generates a set of TopologyHints given the set of
   100  // available CPUs and the number of CPUs being requested.
   101  //
   102  // It follows the convention of marking all hints that have the same number of
   103  // bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and
   104  // marking all others with 'Preferred: false'.
   105  func (p *NativePolicy) generateCPUTopologyHints(availableCPUs machine.CPUSet, reusableCPUs machine.CPUSet, request int) map[string]*pluginapi.ListOfTopologyHints {
   106  	// Initialize minAffinitySize to include all NUMA Nodes.
   107  	minAffinitySize := p.machineInfo.CPUDetails.NUMANodes().Size()
   108  
   109  	hints := map[string]*pluginapi.ListOfTopologyHints{
   110  		string(v1.ResourceCPU): {
   111  			Hints: []*pluginapi.TopologyHint{},
   112  		},
   113  	}
   114  
   115  	// Iterate through all combinations of numa nodes bitmask and build hints from them.
   116  	bitmask.IterateBitMasks(p.machineInfo.CPUDetails.NUMANodes().ToSliceInt(), func(mask bitmask.BitMask) {
   117  		// First, update minAffinitySize for the current request size.
   118  		cpusInMask := p.machineInfo.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size()
   119  		if cpusInMask >= request && mask.Count() < minAffinitySize {
   120  			minAffinitySize = mask.Count()
   121  		}
   122  
   123  		// Then check to see if we have enough CPUs available on the current
   124  		// numa node bitmask to satisfy the CPU request.
   125  		numMatching := 0
   126  		for _, c := range reusableCPUs.ToSliceInt() {
   127  			// Disregard this mask if its NUMANode isn't part of it.
   128  			if !mask.IsSet(p.machineInfo.CPUDetails[c].NUMANodeID) {
   129  				return
   130  			}
   131  			numMatching++
   132  		}
   133  
   134  		// Finally, check to see if enough available CPUs remain on the current
   135  		// NUMA node combination to satisfy the CPU request.
   136  		for _, c := range availableCPUs.ToSliceInt() {
   137  			if mask.IsSet(p.machineInfo.CPUDetails[c].NUMANodeID) {
   138  				numMatching++
   139  			}
   140  		}
   141  
   142  		// If they don't, then move onto the next combination.
   143  		if numMatching < request {
   144  			return
   145  		}
   146  
   147  		// Otherwise, create a new hint from the numa node bitmask and add it to the
   148  		// list of hints.  We set all hint preferences to 'false' on the first
   149  		// pass through.
   150  		hints[string(v1.ResourceCPU)].Hints = append(hints[string(v1.ResourceCPU)].Hints, &pluginapi.TopologyHint{
   151  			Nodes:     machine.MaskToUInt64Array(mask),
   152  			Preferred: mask.Count() == minAffinitySize,
   153  		})
   154  	})
   155  
   156  	return hints
   157  }