volcano.sh/volcano@v1.9.0/pkg/controllers/jobtemplate/jobtemplate_controller.go (about)

     1  /*
     2  Copyright 2022 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package jobtemplate
    18  
    19  import (
    20  	"fmt"
    21  	"time"
    22  
    23  	v1 "k8s.io/api/core/v1"
    24  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    25  	"k8s.io/apimachinery/pkg/util/wait"
    26  	"k8s.io/client-go/kubernetes"
    27  	corev1 "k8s.io/client-go/kubernetes/typed/core/v1"
    28  	"k8s.io/client-go/tools/cache"
    29  	"k8s.io/client-go/tools/record"
    30  	"k8s.io/client-go/util/workqueue"
    31  	"k8s.io/klog"
    32  
    33  	vcclientset "volcano.sh/apis/pkg/client/clientset/versioned"
    34  	versionedscheme "volcano.sh/apis/pkg/client/clientset/versioned/scheme"
    35  	informerfactory "volcano.sh/apis/pkg/client/informers/externalversions"
    36  	batchinformer "volcano.sh/apis/pkg/client/informers/externalversions/batch/v1alpha1"
    37  	flowinformer "volcano.sh/apis/pkg/client/informers/externalversions/flow/v1alpha1"
    38  	batchlister "volcano.sh/apis/pkg/client/listers/batch/v1alpha1"
    39  	flowlister "volcano.sh/apis/pkg/client/listers/flow/v1alpha1"
    40  	"volcano.sh/volcano/pkg/controllers/apis"
    41  	"volcano.sh/volcano/pkg/controllers/framework"
    42  )
    43  
    44  func init() {
    45  	framework.RegisterController(&jobtemplatecontroller{})
    46  }
    47  
    48  // jobtemplatecontroller the JobTemplate jobtemplatecontroller type.
    49  type jobtemplatecontroller struct {
    50  	kubeClient kubernetes.Interface
    51  	vcClient   vcclientset.Interface
    52  
    53  	//informer
    54  	jobTemplateInformer flowinformer.JobTemplateInformer
    55  	jobInformer         batchinformer.JobInformer
    56  
    57  	//jobTemplateLister
    58  	jobTemplateLister flowlister.JobTemplateLister
    59  	jobTemplateSynced cache.InformerSynced
    60  
    61  	//jobLister
    62  	jobLister batchlister.JobLister
    63  	jobSynced cache.InformerSynced
    64  
    65  	// JobTemplate Event recorder
    66  	recorder record.EventRecorder
    67  
    68  	queue              workqueue.RateLimitingInterface
    69  	enqueueJobTemplate func(req apis.FlowRequest)
    70  
    71  	syncHandler func(req *apis.FlowRequest) error
    72  
    73  	maxRequeueNum int
    74  }
    75  
    76  func (jt *jobtemplatecontroller) Name() string {
    77  	return "jobtemplate-controller"
    78  }
    79  
    80  func (jt *jobtemplatecontroller) Initialize(opt *framework.ControllerOption) error {
    81  	jt.kubeClient = opt.KubeClient
    82  	jt.vcClient = opt.VolcanoClient
    83  
    84  	jt.jobTemplateInformer = informerfactory.NewSharedInformerFactory(jt.vcClient, 0).Flow().V1alpha1().JobTemplates()
    85  	jt.jobTemplateSynced = jt.jobTemplateInformer.Informer().HasSynced
    86  	jt.jobTemplateLister = jt.jobTemplateInformer.Lister()
    87  	jt.jobTemplateInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
    88  		AddFunc: jt.addJobTemplate,
    89  	})
    90  
    91  	jt.jobInformer = informerfactory.NewSharedInformerFactory(jt.vcClient, 0).Batch().V1alpha1().Jobs()
    92  	jt.jobSynced = jt.jobInformer.Informer().HasSynced
    93  	jt.jobLister = jt.jobInformer.Lister()
    94  	jt.jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
    95  		AddFunc: jt.addJob,
    96  	})
    97  
    98  	jt.maxRequeueNum = opt.MaxRequeueNum
    99  	if jt.maxRequeueNum < 0 {
   100  		jt.maxRequeueNum = -1
   101  	}
   102  
   103  	eventBroadcaster := record.NewBroadcaster()
   104  	eventBroadcaster.StartLogging(klog.Infof)
   105  	eventBroadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: jt.kubeClient.CoreV1().Events("")})
   106  
   107  	jt.recorder = eventBroadcaster.NewRecorder(versionedscheme.Scheme, v1.EventSource{Component: "vc-controller-manager"})
   108  	jt.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
   109  
   110  	jt.enqueueJobTemplate = jt.enqueue
   111  
   112  	jt.syncHandler = jt.handleJobTemplate
   113  
   114  	return nil
   115  }
   116  
   117  func (jt *jobtemplatecontroller) Run(stopCh <-chan struct{}) {
   118  	defer jt.queue.ShutDown()
   119  
   120  	go jt.jobTemplateInformer.Informer().Run(stopCh)
   121  	go jt.jobInformer.Informer().Run(stopCh)
   122  
   123  	cache.WaitForCacheSync(stopCh, jt.jobSynced, jt.jobTemplateSynced)
   124  
   125  	go wait.Until(jt.worker, time.Second, stopCh)
   126  
   127  	klog.Infof("JobTemplateController is running ...... ")
   128  
   129  	<-stopCh
   130  }
   131  
   132  func (jt *jobtemplatecontroller) worker() {
   133  	for jt.processNextWorkItem() {
   134  	}
   135  }
   136  
   137  func (jt *jobtemplatecontroller) processNextWorkItem() bool {
   138  	obj, shutdown := jt.queue.Get()
   139  	if shutdown {
   140  		// Stop working
   141  		return false
   142  	}
   143  
   144  	// We call Done here so the workqueue knows we have finished
   145  	// processing this item. We also must remember to call Forget if we
   146  	// do not want this work item being re-queued. For example, we do
   147  	// not call Forget if a transient error occurs, instead the item is
   148  	// put back on the workqueue and attempted again after a back-off
   149  	// period.
   150  	defer jt.queue.Done(obj)
   151  
   152  	req, ok := obj.(apis.FlowRequest)
   153  	if !ok {
   154  		klog.Errorf("%v is not a valid queue request struct.", obj)
   155  		return true
   156  	}
   157  
   158  	err := jt.syncHandler(&req)
   159  	jt.handleJobTemplateErr(err, obj)
   160  
   161  	return true
   162  }
   163  
   164  func (jt *jobtemplatecontroller) handleJobTemplate(req *apis.FlowRequest) error {
   165  	startTime := time.Now()
   166  	defer func() {
   167  		klog.V(4).Infof("Finished syncing jobTemplate %s (%v).", req.JobTemplateName, time.Since(startTime))
   168  	}()
   169  
   170  	jobTemplate, err := jt.jobTemplateLister.JobTemplates(req.Namespace).Get(req.JobTemplateName)
   171  	if err != nil {
   172  		if apierrors.IsNotFound(err) {
   173  			klog.V(4).Infof("JobTemplate %s has been deleted.", req.JobTemplateName)
   174  			return nil
   175  		}
   176  
   177  		return fmt.Errorf("get jobTemplate %s failed for %v", req.JobFlowName, err)
   178  	}
   179  
   180  	klog.V(4).Infof("Begin syncJobTemplate for jobTemplate %s", req.JobFlowName)
   181  	if err := jt.syncJobTemplate(jobTemplate); err != nil {
   182  		return fmt.Errorf("sync jobTemplate %s failed for %v, event is %v, action is %s",
   183  			req.JobFlowName, err, req.Event, req.Action)
   184  	}
   185  
   186  	return nil
   187  }
   188  
   189  func (jt *jobtemplatecontroller) handleJobTemplateErr(err error, obj interface{}) {
   190  	if err == nil {
   191  		jt.queue.Forget(obj)
   192  		return
   193  	}
   194  
   195  	if jt.maxRequeueNum == -1 || jt.queue.NumRequeues(obj) < jt.maxRequeueNum {
   196  		klog.V(4).Infof("Error syncing jobTemplate request %v for %v.", obj, err)
   197  		jt.queue.AddRateLimited(obj)
   198  		return
   199  	}
   200  
   201  	req, _ := obj.(*apis.FlowRequest)
   202  	jt.recordEventsForJobTemplate(req.Namespace, req.JobTemplateName, v1.EventTypeWarning, string(req.Action),
   203  		fmt.Sprintf("%v JobTemplate failed for %v", req.Action, err))
   204  	klog.V(2).Infof("Dropping JobTemplate request %v out of the queue for %v.", obj, err)
   205  	jt.queue.Forget(obj)
   206  }
   207  
   208  func (jt *jobtemplatecontroller) recordEventsForJobTemplate(namespace, name, eventType, reason, message string) {
   209  	jobTemplate, err := jt.jobTemplateLister.JobTemplates(namespace).Get(name)
   210  	if err != nil {
   211  		klog.Errorf("Get JobTemplate %s failed for %v.", name, err)
   212  		return
   213  	}
   214  
   215  	jt.recorder.Event(jobTemplate, eventType, reason, message)
   216  }