github.com/kubeflow/training-operator@v1.7.0/pkg/common/util/reconciler.go (about)

     1  // Copyright 2021 The Kubeflow Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License
    14  
    15  package util
    16  
    17  import (
    18  	"fmt"
    19  	"reflect"
    20  
    21  	corev1 "k8s.io/api/core/v1"
    22  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    23  	"sigs.k8s.io/controller-runtime/pkg/event"
    24  
    25  	kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
    26  	"github.com/kubeflow/training-operator/pkg/controller.v1/common"
    27  	"github.com/kubeflow/training-operator/pkg/controller.v1/expectation"
    28  	commonutil "github.com/kubeflow/training-operator/pkg/util"
    29  )
    30  
    31  // SatisfiedExpectations returns true if the required adds/dels for the given mxjob have been observed.
    32  // Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller
    33  // manager.
    34  func SatisfiedExpectations(exp expectation.ControllerExpectationsInterface, jobKey string, replicaTypes []kubeflowv1.ReplicaType) bool {
    35  	satisfied := false
    36  	for _, rtype := range replicaTypes {
    37  		// Check the expectations of the pods.
    38  		expectationPodsKey := expectation.GenExpectationPodsKey(jobKey, string(rtype))
    39  		satisfied = satisfied || exp.SatisfiedExpectations(expectationPodsKey)
    40  		// Check the expectations of the services.
    41  		expectationServicesKey := expectation.GenExpectationServicesKey(jobKey, string(rtype))
    42  		satisfied = satisfied || exp.SatisfiedExpectations(expectationServicesKey)
    43  	}
    44  
    45  	return satisfied
    46  }
    47  
    48  // OnDependentCreateFunc modify expectations when dependent (pod/service) creation observed.
    49  func OnDependentCreateFunc(exp expectation.ControllerExpectationsInterface) func(event.CreateEvent) bool {
    50  	return func(e event.CreateEvent) bool {
    51  		rtype := e.Object.GetLabels()[kubeflowv1.ReplicaTypeLabel]
    52  		if len(rtype) == 0 {
    53  			return false
    54  		}
    55  
    56  		//logrus.Info("Update on create function ", ptjr.ControllerName(), " create object ", e.Object.GetName())
    57  		if controllerRef := metav1.GetControllerOf(e.Object); controllerRef != nil {
    58  			jobKey := fmt.Sprintf("%s/%s", e.Object.GetNamespace(), controllerRef.Name)
    59  			var expectKey string
    60  			switch e.Object.(type) {
    61  			case *corev1.Pod:
    62  				expectKey = expectation.GenExpectationPodsKey(jobKey, rtype)
    63  			case *corev1.Service:
    64  				expectKey = expectation.GenExpectationServicesKey(jobKey, rtype)
    65  			default:
    66  				return false
    67  			}
    68  			exp.CreationObserved(expectKey)
    69  			return true
    70  		}
    71  
    72  		return true
    73  	}
    74  }
    75  
    76  // OnDependentUpdateFunc modify expectations when dependent (pod/service) update observed.
    77  func OnDependentUpdateFunc(jc *common.JobController) func(updateEvent event.UpdateEvent) bool {
    78  	return func(e event.UpdateEvent) bool {
    79  		newObj := e.ObjectNew
    80  		oldObj := e.ObjectOld
    81  		if newObj.GetResourceVersion() == oldObj.GetResourceVersion() {
    82  			// Periodic resync will send update events for all known pods.
    83  			// Two different versions of the same pod will always have different RVs.
    84  			return false
    85  		}
    86  
    87  		kind := jc.Controller.GetAPIGroupVersionKind().Kind
    88  		var logger = LoggerForGenericKind(newObj, kind)
    89  
    90  		switch obj := newObj.(type) {
    91  		case *corev1.Pod:
    92  			logger = commonutil.LoggerForPod(obj, jc.Controller.GetAPIGroupVersionKind().Kind)
    93  		case *corev1.Service:
    94  			logger = commonutil.LoggerForService(newObj.(*corev1.Service), jc.Controller.GetAPIGroupVersionKind().Kind)
    95  		default:
    96  			return false
    97  		}
    98  
    99  		newControllerRef := metav1.GetControllerOf(newObj)
   100  		oldControllerRef := metav1.GetControllerOf(oldObj)
   101  		controllerRefChanged := !reflect.DeepEqual(newControllerRef, oldControllerRef)
   102  
   103  		if controllerRefChanged && oldControllerRef != nil {
   104  			// The ControllerRef was changed. Sync the old controller, if any.
   105  			if job := resolveControllerRef(jc, oldObj.GetNamespace(), oldControllerRef); job != nil {
   106  				logger.Infof("pod/service controller ref updated: %v, %v", newObj, oldObj)
   107  				return true
   108  			}
   109  		}
   110  
   111  		// If it has a controller ref, that's all that matters.
   112  		if newControllerRef != nil {
   113  			job := resolveControllerRef(jc, newObj.GetNamespace(), newControllerRef)
   114  			if job == nil {
   115  				return false
   116  			}
   117  			logger.Debugf("pod/service has a controller ref: %v, %v", newObj, oldObj)
   118  			return true
   119  		}
   120  		return false
   121  	}
   122  }
   123  
   124  // resolveControllerRef returns the job referenced by a ControllerRef,
   125  // or nil if the ControllerRef could not be resolved to a matching job
   126  // of the correct Kind.
   127  func resolveControllerRef(jc *common.JobController, namespace string, controllerRef *metav1.OwnerReference) metav1.Object {
   128  	// We can't look up by UID, so look up by Name and then verify UID.
   129  	// Don't even try to look up by Name if it's the wrong Kind.
   130  	if controllerRef.Kind != jc.Controller.GetAPIGroupVersionKind().Kind {
   131  		return nil
   132  	}
   133  	job, err := jc.Controller.GetJobFromInformerCache(namespace, controllerRef.Name)
   134  	if err != nil {
   135  		return nil
   136  	}
   137  	if job.GetUID() != controllerRef.UID {
   138  		// The controller we found with this Name is not the same one that the
   139  		// ControllerRef points to.
   140  		return nil
   141  	}
   142  	return job
   143  }
   144  
   145  // OnDependentDeleteFunc modify expectations when dependent (pod/service) deletion observed.
   146  func OnDependentDeleteFunc(exp expectation.ControllerExpectationsInterface) func(event.DeleteEvent) bool {
   147  	return func(e event.DeleteEvent) bool {
   148  
   149  		rtype := e.Object.GetLabels()[kubeflowv1.ReplicaTypeLabel]
   150  		if len(rtype) == 0 {
   151  			return false
   152  		}
   153  
   154  		// logrus.Info("Update on deleting function ", xgbr.ControllerName(), " delete object ", e.Object.GetName())
   155  		if controllerRef := metav1.GetControllerOf(e.Object); controllerRef != nil {
   156  			jobKey := fmt.Sprintf("%s/%s", e.Object.GetNamespace(), controllerRef.Name)
   157  			var expectKey string
   158  			switch e.Object.(type) {
   159  			case *corev1.Pod:
   160  				expectKey = expectation.GenExpectationPodsKey(jobKey, rtype)
   161  			case *corev1.Service:
   162  				expectKey = expectation.GenExpectationServicesKey(jobKey, rtype)
   163  			default:
   164  				return false
   165  			}
   166  			exp.DeletionObserved(expectKey)
   167  			return true
   168  		}
   169  
   170  		return true
   171  	}
   172  }