volcano.sh/volcano@v1.9.0/pkg/controllers/podgroup/pg_controller_handler.go (about)

     1  /*
     2  Copyright 2019 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package podgroup
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"strings"
    23  
    24  	appsv1 "k8s.io/api/apps/v1"
    25  	v1 "k8s.io/api/core/v1"
    26  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/runtime/schema"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	"k8s.io/klog/v2"
    31  
    32  	batchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1"
    33  	"volcano.sh/apis/pkg/apis/helpers"
    34  	scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    35  	"volcano.sh/volcano/pkg/controllers/util"
    36  )
    37  
    38  type podRequest struct {
    39  	podName      string
    40  	podNamespace string
    41  }
    42  
    43  type metadataForMergePatch struct {
    44  	Metadata annotationForMergePatch `json:"metadata"`
    45  }
    46  
    47  type annotationForMergePatch struct {
    48  	Annotations map[string]string `json:"annotations"`
    49  }
    50  
    51  func (pg *pgcontroller) addPod(obj interface{}) {
    52  	pod, ok := obj.(*v1.Pod)
    53  	if !ok {
    54  		klog.Errorf("Failed to convert %v to v1.Pod", obj)
    55  		return
    56  	}
    57  
    58  	req := podRequest{
    59  		podName:      pod.Name,
    60  		podNamespace: pod.Namespace,
    61  	}
    62  
    63  	pg.queue.Add(req)
    64  }
    65  
    66  func (pg *pgcontroller) addReplicaSet(obj interface{}) {
    67  	rs, ok := obj.(*appsv1.ReplicaSet)
    68  	if !ok {
    69  		klog.Errorf("Failed to convert %v to appsv1.ReplicaSet", obj)
    70  		return
    71  	}
    72  
    73  	if *rs.Spec.Replicas == 0 {
    74  		pgName := batchv1alpha1.PodgroupNamePrefix + string(rs.UID)
    75  		err := pg.vcClient.SchedulingV1beta1().PodGroups(rs.Namespace).Delete(context.TODO(), pgName, metav1.DeleteOptions{})
    76  		if err != nil && !apierrors.IsNotFound(err) {
    77  			klog.Errorf("Failed to delete PodGroup <%s/%s>: %v", rs.Namespace, pgName, err)
    78  		}
    79  	}
    80  }
    81  
    82  func (pg *pgcontroller) updateReplicaSet(oldObj, newObj interface{}) {
    83  	pg.addReplicaSet(newObj)
    84  }
    85  
    86  func (pg *pgcontroller) updatePodAnnotations(pod *v1.Pod, pgName string) error {
    87  	if pod.Annotations == nil {
    88  		pod.Annotations = make(map[string]string)
    89  	}
    90  	if pod.Annotations[scheduling.KubeGroupNameAnnotationKey] == "" {
    91  		patch := metadataForMergePatch{
    92  			Metadata: annotationForMergePatch{
    93  				Annotations: map[string]string{
    94  					scheduling.KubeGroupNameAnnotationKey: pgName,
    95  				},
    96  			},
    97  		}
    98  
    99  		patchBytes, err := json.Marshal(&patch)
   100  		if err != nil {
   101  			klog.Errorf("Failed to json.Marshal pod annotation: %v", err)
   102  			return err
   103  		}
   104  
   105  		if _, err := pg.kubeClient.CoreV1().Pods(pod.Namespace).Patch(context.TODO(), pod.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil {
   106  			klog.Errorf("Failed to update pod <%s/%s>: %v", pod.Namespace, pod.Name, err)
   107  			return err
   108  		}
   109  	} else {
   110  		if pod.Annotations[scheduling.KubeGroupNameAnnotationKey] != pgName {
   111  			klog.Errorf("normal pod %s/%s annotations %s value is not %s, but %s", pod.Namespace, pod.Name,
   112  				scheduling.KubeGroupNameAnnotationKey, pgName, pod.Annotations[scheduling.KubeGroupNameAnnotationKey])
   113  		}
   114  	}
   115  	return nil
   116  }
   117  
   118  func (pg *pgcontroller) getAnnotationsFromUpperRes(kind string, name string, namespace string) map[string]string {
   119  	switch kind {
   120  	case "ReplicaSet":
   121  		rs, err := pg.kubeClient.AppsV1().ReplicaSets(namespace).Get(context.TODO(), name, metav1.GetOptions{})
   122  		if err != nil {
   123  			klog.Errorf("Failed to get upper %s for Pod <%s/%s>: %v", kind, namespace, name, err)
   124  			return map[string]string{}
   125  		}
   126  		return rs.Annotations
   127  	case "DaemonSet":
   128  		ds, err := pg.kubeClient.AppsV1().DaemonSets(namespace).Get(context.TODO(), name, metav1.GetOptions{})
   129  		if err != nil {
   130  			klog.Errorf("Failed to get upper %s for Pod <%s/%s>: %v", kind, namespace, name, err)
   131  			return map[string]string{}
   132  		}
   133  		return ds.Annotations
   134  	case "StatefulSet":
   135  		ss, err := pg.kubeClient.AppsV1().StatefulSets(namespace).Get(context.TODO(), name, metav1.GetOptions{})
   136  		if err != nil {
   137  			klog.Errorf("Failed to get upper %s for Pod <%s/%s>: %v", kind, namespace, name, err)
   138  			return map[string]string{}
   139  		}
   140  		return ss.Annotations
   141  	case "Job":
   142  		job, err := pg.kubeClient.BatchV1().Jobs(namespace).Get(context.TODO(), name, metav1.GetOptions{})
   143  		if err != nil {
   144  			klog.Errorf("Failed to get upper %s for Pod <%s/%s>: %v", kind, namespace, name, err)
   145  			return map[string]string{}
   146  		}
   147  		return job.Annotations
   148  	default:
   149  		return map[string]string{}
   150  	}
   151  }
   152  
   153  // Inherit annotations from upper resources.
   154  func (pg *pgcontroller) inheritUpperAnnotations(pod *v1.Pod, obj *scheduling.PodGroup) {
   155  	if pg.inheritOwnerAnnotations {
   156  		for _, reference := range pod.OwnerReferences {
   157  			if reference.Kind != "" && reference.Name != "" {
   158  				var upperAnnotations = pg.getAnnotationsFromUpperRes(reference.Kind, reference.Name, pod.Namespace)
   159  				for k, v := range upperAnnotations {
   160  					if strings.HasPrefix(k, scheduling.AnnotationPrefix) {
   161  						obj.Annotations[k] = v
   162  					}
   163  				}
   164  			}
   165  		}
   166  	}
   167  }
   168  
   169  func (pg *pgcontroller) createNormalPodPGIfNotExist(pod *v1.Pod) error {
   170  	pgName := helpers.GeneratePodgroupName(pod)
   171  
   172  	if _, err := pg.pgLister.PodGroups(pod.Namespace).Get(pgName); err != nil {
   173  		if !apierrors.IsNotFound(err) {
   174  			klog.Errorf("Failed to get normal PodGroup for Pod <%s/%s>: %v",
   175  				pod.Namespace, pod.Name, err)
   176  			return err
   177  		}
   178  
   179  		obj := &scheduling.PodGroup{
   180  			ObjectMeta: metav1.ObjectMeta{
   181  				Namespace:       pod.Namespace,
   182  				Name:            pgName,
   183  				OwnerReferences: newPGOwnerReferences(pod),
   184  				Annotations:     map[string]string{},
   185  				Labels:          map[string]string{},
   186  			},
   187  			Spec: scheduling.PodGroupSpec{
   188  				MinMember:         1,
   189  				PriorityClassName: pod.Spec.PriorityClassName,
   190  				MinResources:      util.GetPodQuotaUsage(pod),
   191  			},
   192  			Status: scheduling.PodGroupStatus{
   193  				Phase: scheduling.PodGroupPending,
   194  			},
   195  		}
   196  
   197  		pg.inheritUpperAnnotations(pod, obj)
   198  		// Individual annotations on pods would overwrite annotations inherited from upper resources.
   199  		if queueName, ok := pod.Annotations[scheduling.QueueNameAnnotationKey]; ok {
   200  			obj.Spec.Queue = queueName
   201  		}
   202  
   203  		if value, ok := pod.Annotations[scheduling.PodPreemptable]; ok {
   204  			obj.Annotations[scheduling.PodPreemptable] = value
   205  		}
   206  		if value, ok := pod.Annotations[scheduling.CooldownTime]; ok {
   207  			obj.Annotations[scheduling.CooldownTime] = value
   208  		}
   209  		if value, ok := pod.Annotations[scheduling.RevocableZone]; ok {
   210  			obj.Annotations[scheduling.RevocableZone] = value
   211  		}
   212  		if value, ok := pod.Labels[scheduling.PodPreemptable]; ok {
   213  			obj.Labels[scheduling.PodPreemptable] = value
   214  		}
   215  		if value, ok := pod.Labels[scheduling.CooldownTime]; ok {
   216  			obj.Labels[scheduling.CooldownTime] = value
   217  		}
   218  
   219  		if value, found := pod.Annotations[scheduling.JDBMinAvailable]; found {
   220  			obj.Annotations[scheduling.JDBMinAvailable] = value
   221  		} else if value, found := pod.Annotations[scheduling.JDBMaxUnavailable]; found {
   222  			obj.Annotations[scheduling.JDBMaxUnavailable] = value
   223  		}
   224  
   225  		if _, err := pg.vcClient.SchedulingV1beta1().PodGroups(pod.Namespace).Create(context.TODO(), obj, metav1.CreateOptions{}); err != nil {
   226  			if !apierrors.IsAlreadyExists(err) {
   227  				klog.Errorf("Failed to create normal PodGroup for Pod <%s/%s>: %v",
   228  					pod.Namespace, pod.Name, err)
   229  				return err
   230  			}
   231  		}
   232  	}
   233  
   234  	return pg.updatePodAnnotations(pod, pgName)
   235  }
   236  
   237  func newPGOwnerReferences(pod *v1.Pod) []metav1.OwnerReference {
   238  	if len(pod.OwnerReferences) != 0 {
   239  		for _, ownerReference := range pod.OwnerReferences {
   240  			if ownerReference.Controller != nil && *ownerReference.Controller {
   241  				return pod.OwnerReferences
   242  			}
   243  		}
   244  	}
   245  
   246  	gvk := schema.GroupVersionKind{
   247  		Group:   v1.SchemeGroupVersion.Group,
   248  		Version: v1.SchemeGroupVersion.Version,
   249  		Kind:    "Pod",
   250  	}
   251  	ref := metav1.NewControllerRef(pod, gvk)
   252  	return []metav1.OwnerReference{*ref}
   253  }