k8s.io/kubernetes@v1.29.3/pkg/scheduler/extender.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package scheduler
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"fmt"
    23  	"net/http"
    24  	"strings"
    25  	"time"
    26  
    27  	v1 "k8s.io/api/core/v1"
    28  	utilnet "k8s.io/apimachinery/pkg/util/net"
    29  	"k8s.io/apimachinery/pkg/util/sets"
    30  	restclient "k8s.io/client-go/rest"
    31  	extenderv1 "k8s.io/kube-scheduler/extender/v1"
    32  	schedulerapi "k8s.io/kubernetes/pkg/scheduler/apis/config"
    33  	"k8s.io/kubernetes/pkg/scheduler/framework"
    34  )
    35  
    36  const (
    37  	// DefaultExtenderTimeout defines the default extender timeout in second.
    38  	DefaultExtenderTimeout = 5 * time.Second
    39  )
    40  
    41  // HTTPExtender implements the Extender interface.
    42  type HTTPExtender struct {
    43  	extenderURL      string
    44  	preemptVerb      string
    45  	filterVerb       string
    46  	prioritizeVerb   string
    47  	bindVerb         string
    48  	weight           int64
    49  	client           *http.Client
    50  	nodeCacheCapable bool
    51  	managedResources sets.Set[string]
    52  	ignorable        bool
    53  }
    54  
    55  func makeTransport(config *schedulerapi.Extender) (http.RoundTripper, error) {
    56  	var cfg restclient.Config
    57  	if config.TLSConfig != nil {
    58  		cfg.TLSClientConfig.Insecure = config.TLSConfig.Insecure
    59  		cfg.TLSClientConfig.ServerName = config.TLSConfig.ServerName
    60  		cfg.TLSClientConfig.CertFile = config.TLSConfig.CertFile
    61  		cfg.TLSClientConfig.KeyFile = config.TLSConfig.KeyFile
    62  		cfg.TLSClientConfig.CAFile = config.TLSConfig.CAFile
    63  		cfg.TLSClientConfig.CertData = config.TLSConfig.CertData
    64  		cfg.TLSClientConfig.KeyData = config.TLSConfig.KeyData
    65  		cfg.TLSClientConfig.CAData = config.TLSConfig.CAData
    66  	}
    67  	if config.EnableHTTPS {
    68  		hasCA := len(cfg.CAFile) > 0 || len(cfg.CAData) > 0
    69  		if !hasCA {
    70  			cfg.Insecure = true
    71  		}
    72  	}
    73  	tlsConfig, err := restclient.TLSConfigFor(&cfg)
    74  	if err != nil {
    75  		return nil, err
    76  	}
    77  	if tlsConfig != nil {
    78  		return utilnet.SetTransportDefaults(&http.Transport{
    79  			TLSClientConfig: tlsConfig,
    80  		}), nil
    81  	}
    82  	return utilnet.SetTransportDefaults(&http.Transport{}), nil
    83  }
    84  
    85  // NewHTTPExtender creates an HTTPExtender object.
    86  func NewHTTPExtender(config *schedulerapi.Extender) (framework.Extender, error) {
    87  	if config.HTTPTimeout.Duration.Nanoseconds() == 0 {
    88  		config.HTTPTimeout.Duration = time.Duration(DefaultExtenderTimeout)
    89  	}
    90  
    91  	transport, err := makeTransport(config)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  	client := &http.Client{
    96  		Transport: transport,
    97  		Timeout:   config.HTTPTimeout.Duration,
    98  	}
    99  	managedResources := sets.New[string]()
   100  	for _, r := range config.ManagedResources {
   101  		managedResources.Insert(string(r.Name))
   102  	}
   103  	return &HTTPExtender{
   104  		extenderURL:      config.URLPrefix,
   105  		preemptVerb:      config.PreemptVerb,
   106  		filterVerb:       config.FilterVerb,
   107  		prioritizeVerb:   config.PrioritizeVerb,
   108  		bindVerb:         config.BindVerb,
   109  		weight:           config.Weight,
   110  		client:           client,
   111  		nodeCacheCapable: config.NodeCacheCapable,
   112  		managedResources: managedResources,
   113  		ignorable:        config.Ignorable,
   114  	}, nil
   115  }
   116  
   117  // Name returns extenderURL to identify the extender.
   118  func (h *HTTPExtender) Name() string {
   119  	return h.extenderURL
   120  }
   121  
   122  // IsIgnorable returns true indicates scheduling should not fail when this extender
   123  // is unavailable
   124  func (h *HTTPExtender) IsIgnorable() bool {
   125  	return h.ignorable
   126  }
   127  
   128  // SupportsPreemption returns true if an extender supports preemption.
   129  // An extender should have preempt verb defined and enabled its own node cache.
   130  func (h *HTTPExtender) SupportsPreemption() bool {
   131  	return len(h.preemptVerb) > 0
   132  }
   133  
   134  // ProcessPreemption returns filtered candidate nodes and victims after running preemption logic in extender.
   135  func (h *HTTPExtender) ProcessPreemption(
   136  	pod *v1.Pod,
   137  	nodeNameToVictims map[string]*extenderv1.Victims,
   138  	nodeInfos framework.NodeInfoLister,
   139  ) (map[string]*extenderv1.Victims, error) {
   140  	var (
   141  		result extenderv1.ExtenderPreemptionResult
   142  		args   *extenderv1.ExtenderPreemptionArgs
   143  	)
   144  
   145  	if !h.SupportsPreemption() {
   146  		return nil, fmt.Errorf("preempt verb is not defined for extender %v but run into ProcessPreemption", h.extenderURL)
   147  	}
   148  
   149  	if h.nodeCacheCapable {
   150  		// If extender has cached node info, pass NodeNameToMetaVictims in args.
   151  		nodeNameToMetaVictims := convertToMetaVictims(nodeNameToVictims)
   152  		args = &extenderv1.ExtenderPreemptionArgs{
   153  			Pod:                   pod,
   154  			NodeNameToMetaVictims: nodeNameToMetaVictims,
   155  		}
   156  	} else {
   157  		args = &extenderv1.ExtenderPreemptionArgs{
   158  			Pod:               pod,
   159  			NodeNameToVictims: nodeNameToVictims,
   160  		}
   161  	}
   162  
   163  	if err := h.send(h.preemptVerb, args, &result); err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	// Extender will always return NodeNameToMetaVictims.
   168  	// So let's convert it to NodeNameToVictims by using <nodeInfos>.
   169  	newNodeNameToVictims, err := h.convertToVictims(result.NodeNameToMetaVictims, nodeInfos)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  	// Do not override <nodeNameToVictims>.
   174  	return newNodeNameToVictims, nil
   175  }
   176  
   177  // convertToVictims converts "nodeNameToMetaVictims" from object identifiers,
   178  // such as UIDs and names, to object pointers.
   179  func (h *HTTPExtender) convertToVictims(
   180  	nodeNameToMetaVictims map[string]*extenderv1.MetaVictims,
   181  	nodeInfos framework.NodeInfoLister,
   182  ) (map[string]*extenderv1.Victims, error) {
   183  	nodeNameToVictims := map[string]*extenderv1.Victims{}
   184  	for nodeName, metaVictims := range nodeNameToMetaVictims {
   185  		nodeInfo, err := nodeInfos.Get(nodeName)
   186  		if err != nil {
   187  			return nil, err
   188  		}
   189  		victims := &extenderv1.Victims{
   190  			Pods:             []*v1.Pod{},
   191  			NumPDBViolations: metaVictims.NumPDBViolations,
   192  		}
   193  		for _, metaPod := range metaVictims.Pods {
   194  			pod, err := h.convertPodUIDToPod(metaPod, nodeInfo)
   195  			if err != nil {
   196  				return nil, err
   197  			}
   198  			victims.Pods = append(victims.Pods, pod)
   199  		}
   200  		nodeNameToVictims[nodeName] = victims
   201  	}
   202  	return nodeNameToVictims, nil
   203  }
   204  
   205  // convertPodUIDToPod returns v1.Pod object for given MetaPod and node info.
   206  // The v1.Pod object is restored by nodeInfo.Pods().
   207  // It returns an error if there's cache inconsistency between default scheduler
   208  // and extender, i.e. when the pod is not found in nodeInfo.Pods.
   209  func (h *HTTPExtender) convertPodUIDToPod(
   210  	metaPod *extenderv1.MetaPod,
   211  	nodeInfo *framework.NodeInfo) (*v1.Pod, error) {
   212  	for _, p := range nodeInfo.Pods {
   213  		if string(p.Pod.UID) == metaPod.UID {
   214  			return p.Pod, nil
   215  		}
   216  	}
   217  	return nil, fmt.Errorf("extender: %v claims to preempt pod (UID: %v) on node: %v, but the pod is not found on that node",
   218  		h.extenderURL, metaPod, nodeInfo.Node().Name)
   219  }
   220  
   221  // convertToMetaVictims converts from struct type to meta types.
   222  func convertToMetaVictims(
   223  	nodeNameToVictims map[string]*extenderv1.Victims,
   224  ) map[string]*extenderv1.MetaVictims {
   225  	nodeNameToMetaVictims := map[string]*extenderv1.MetaVictims{}
   226  	for node, victims := range nodeNameToVictims {
   227  		metaVictims := &extenderv1.MetaVictims{
   228  			Pods:             []*extenderv1.MetaPod{},
   229  			NumPDBViolations: victims.NumPDBViolations,
   230  		}
   231  		for _, pod := range victims.Pods {
   232  			metaPod := &extenderv1.MetaPod{
   233  				UID: string(pod.UID),
   234  			}
   235  			metaVictims.Pods = append(metaVictims.Pods, metaPod)
   236  		}
   237  		nodeNameToMetaVictims[node] = metaVictims
   238  	}
   239  	return nodeNameToMetaVictims
   240  }
   241  
   242  // Filter based on extender implemented predicate functions. The filtered list is
   243  // expected to be a subset of the supplied list; otherwise the function returns an error.
   244  // The failedNodes and failedAndUnresolvableNodes optionally contains the list
   245  // of failed nodes and failure reasons, except nodes in the latter are
   246  // unresolvable.
   247  func (h *HTTPExtender) Filter(
   248  	pod *v1.Pod,
   249  	nodes []*v1.Node,
   250  ) (filteredList []*v1.Node, failedNodes, failedAndUnresolvableNodes extenderv1.FailedNodesMap, err error) {
   251  	var (
   252  		result     extenderv1.ExtenderFilterResult
   253  		nodeList   *v1.NodeList
   254  		nodeNames  *[]string
   255  		nodeResult []*v1.Node
   256  		args       *extenderv1.ExtenderArgs
   257  	)
   258  	fromNodeName := make(map[string]*v1.Node)
   259  	for _, n := range nodes {
   260  		fromNodeName[n.Name] = n
   261  	}
   262  
   263  	if h.filterVerb == "" {
   264  		return nodes, extenderv1.FailedNodesMap{}, extenderv1.FailedNodesMap{}, nil
   265  	}
   266  
   267  	if h.nodeCacheCapable {
   268  		nodeNameSlice := make([]string, 0, len(nodes))
   269  		for _, node := range nodes {
   270  			nodeNameSlice = append(nodeNameSlice, node.Name)
   271  		}
   272  		nodeNames = &nodeNameSlice
   273  	} else {
   274  		nodeList = &v1.NodeList{}
   275  		for _, node := range nodes {
   276  			nodeList.Items = append(nodeList.Items, *node)
   277  		}
   278  	}
   279  
   280  	args = &extenderv1.ExtenderArgs{
   281  		Pod:       pod,
   282  		Nodes:     nodeList,
   283  		NodeNames: nodeNames,
   284  	}
   285  
   286  	if err := h.send(h.filterVerb, args, &result); err != nil {
   287  		return nil, nil, nil, err
   288  	}
   289  	if result.Error != "" {
   290  		return nil, nil, nil, fmt.Errorf(result.Error)
   291  	}
   292  
   293  	if h.nodeCacheCapable && result.NodeNames != nil {
   294  		nodeResult = make([]*v1.Node, len(*result.NodeNames))
   295  		for i, nodeName := range *result.NodeNames {
   296  			if n, ok := fromNodeName[nodeName]; ok {
   297  				nodeResult[i] = n
   298  			} else {
   299  				return nil, nil, nil, fmt.Errorf(
   300  					"extender %q claims a filtered node %q which is not found in the input node list",
   301  					h.extenderURL, nodeName)
   302  			}
   303  		}
   304  	} else if result.Nodes != nil {
   305  		nodeResult = make([]*v1.Node, len(result.Nodes.Items))
   306  		for i := range result.Nodes.Items {
   307  			nodeResult[i] = &result.Nodes.Items[i]
   308  		}
   309  	}
   310  
   311  	return nodeResult, result.FailedNodes, result.FailedAndUnresolvableNodes, nil
   312  }
   313  
   314  // Prioritize based on extender implemented priority functions. Weight*priority is added
   315  // up for each such priority function. The returned score is added to the score computed
   316  // by Kubernetes scheduler. The total score is used to do the host selection.
   317  func (h *HTTPExtender) Prioritize(pod *v1.Pod, nodes []*v1.Node) (*extenderv1.HostPriorityList, int64, error) {
   318  	var (
   319  		result    extenderv1.HostPriorityList
   320  		nodeList  *v1.NodeList
   321  		nodeNames *[]string
   322  		args      *extenderv1.ExtenderArgs
   323  	)
   324  
   325  	if h.prioritizeVerb == "" {
   326  		result := extenderv1.HostPriorityList{}
   327  		for _, node := range nodes {
   328  			result = append(result, extenderv1.HostPriority{Host: node.Name, Score: 0})
   329  		}
   330  		return &result, 0, nil
   331  	}
   332  
   333  	if h.nodeCacheCapable {
   334  		nodeNameSlice := make([]string, 0, len(nodes))
   335  		for _, node := range nodes {
   336  			nodeNameSlice = append(nodeNameSlice, node.Name)
   337  		}
   338  		nodeNames = &nodeNameSlice
   339  	} else {
   340  		nodeList = &v1.NodeList{}
   341  		for _, node := range nodes {
   342  			nodeList.Items = append(nodeList.Items, *node)
   343  		}
   344  	}
   345  
   346  	args = &extenderv1.ExtenderArgs{
   347  		Pod:       pod,
   348  		Nodes:     nodeList,
   349  		NodeNames: nodeNames,
   350  	}
   351  
   352  	if err := h.send(h.prioritizeVerb, args, &result); err != nil {
   353  		return nil, 0, err
   354  	}
   355  	return &result, h.weight, nil
   356  }
   357  
   358  // Bind delegates the action of binding a pod to a node to the extender.
   359  func (h *HTTPExtender) Bind(binding *v1.Binding) error {
   360  	var result extenderv1.ExtenderBindingResult
   361  	if !h.IsBinder() {
   362  		// This shouldn't happen as this extender wouldn't have become a Binder.
   363  		return fmt.Errorf("unexpected empty bindVerb in extender")
   364  	}
   365  	req := &extenderv1.ExtenderBindingArgs{
   366  		PodName:      binding.Name,
   367  		PodNamespace: binding.Namespace,
   368  		PodUID:       binding.UID,
   369  		Node:         binding.Target.Name,
   370  	}
   371  	if err := h.send(h.bindVerb, req, &result); err != nil {
   372  		return err
   373  	}
   374  	if result.Error != "" {
   375  		return fmt.Errorf(result.Error)
   376  	}
   377  	return nil
   378  }
   379  
   380  // IsBinder returns whether this extender is configured for the Bind method.
   381  func (h *HTTPExtender) IsBinder() bool {
   382  	return h.bindVerb != ""
   383  }
   384  
   385  // Helper function to send messages to the extender
   386  func (h *HTTPExtender) send(action string, args interface{}, result interface{}) error {
   387  	out, err := json.Marshal(args)
   388  	if err != nil {
   389  		return err
   390  	}
   391  
   392  	url := strings.TrimRight(h.extenderURL, "/") + "/" + action
   393  
   394  	req, err := http.NewRequest("POST", url, bytes.NewReader(out))
   395  	if err != nil {
   396  		return err
   397  	}
   398  
   399  	req.Header.Set("Content-Type", "application/json")
   400  
   401  	resp, err := h.client.Do(req)
   402  	if err != nil {
   403  		return err
   404  	}
   405  	defer resp.Body.Close()
   406  
   407  	if resp.StatusCode != http.StatusOK {
   408  		return fmt.Errorf("failed %v with extender at URL %v, code %v", action, url, resp.StatusCode)
   409  	}
   410  
   411  	return json.NewDecoder(resp.Body).Decode(result)
   412  }
   413  
   414  // IsInterested returns true if at least one extended resource requested by
   415  // this pod is managed by this extender.
   416  func (h *HTTPExtender) IsInterested(pod *v1.Pod) bool {
   417  	if h.managedResources.Len() == 0 {
   418  		return true
   419  	}
   420  	if h.hasManagedResources(pod.Spec.Containers) {
   421  		return true
   422  	}
   423  	if h.hasManagedResources(pod.Spec.InitContainers) {
   424  		return true
   425  	}
   426  	return false
   427  }
   428  
   429  func (h *HTTPExtender) hasManagedResources(containers []v1.Container) bool {
   430  	for i := range containers {
   431  		container := &containers[i]
   432  		for resourceName := range container.Resources.Requests {
   433  			if h.managedResources.Has(string(resourceName)) {
   434  				return true
   435  			}
   436  		}
   437  		for resourceName := range container.Resources.Limits {
   438  			if h.managedResources.Has(string(resourceName)) {
   439  				return true
   440  			}
   441  		}
   442  	}
   443  	return false
   444  }