volcano.sh/volcano@v1.9.0/pkg/controllers/jobtemplate/jobtemplate_controller.go (about) 1 /* 2 Copyright 2022 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package jobtemplate 18 19 import ( 20 "fmt" 21 "time" 22 23 v1 "k8s.io/api/core/v1" 24 apierrors "k8s.io/apimachinery/pkg/api/errors" 25 "k8s.io/apimachinery/pkg/util/wait" 26 "k8s.io/client-go/kubernetes" 27 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 28 "k8s.io/client-go/tools/cache" 29 "k8s.io/client-go/tools/record" 30 "k8s.io/client-go/util/workqueue" 31 "k8s.io/klog" 32 33 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 34 versionedscheme "volcano.sh/apis/pkg/client/clientset/versioned/scheme" 35 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 36 batchinformer "volcano.sh/apis/pkg/client/informers/externalversions/batch/v1alpha1" 37 flowinformer "volcano.sh/apis/pkg/client/informers/externalversions/flow/v1alpha1" 38 batchlister "volcano.sh/apis/pkg/client/listers/batch/v1alpha1" 39 flowlister "volcano.sh/apis/pkg/client/listers/flow/v1alpha1" 40 "volcano.sh/volcano/pkg/controllers/apis" 41 "volcano.sh/volcano/pkg/controllers/framework" 42 ) 43 44 func init() { 45 framework.RegisterController(&jobtemplatecontroller{}) 46 } 47 48 // jobtemplatecontroller the JobTemplate jobtemplatecontroller type. 49 type jobtemplatecontroller struct { 50 kubeClient kubernetes.Interface 51 vcClient vcclientset.Interface 52 53 //informer 54 jobTemplateInformer flowinformer.JobTemplateInformer 55 jobInformer batchinformer.JobInformer 56 57 //jobTemplateLister 58 jobTemplateLister flowlister.JobTemplateLister 59 jobTemplateSynced cache.InformerSynced 60 61 //jobLister 62 jobLister batchlister.JobLister 63 jobSynced cache.InformerSynced 64 65 // JobTemplate Event recorder 66 recorder record.EventRecorder 67 68 queue workqueue.RateLimitingInterface 69 enqueueJobTemplate func(req apis.FlowRequest) 70 71 syncHandler func(req *apis.FlowRequest) error 72 73 maxRequeueNum int 74 } 75 76 func (jt *jobtemplatecontroller) Name() string { 77 return "jobtemplate-controller" 78 } 79 80 func (jt *jobtemplatecontroller) Initialize(opt *framework.ControllerOption) error { 81 jt.kubeClient = opt.KubeClient 82 jt.vcClient = opt.VolcanoClient 83 84 jt.jobTemplateInformer = informerfactory.NewSharedInformerFactory(jt.vcClient, 0).Flow().V1alpha1().JobTemplates() 85 jt.jobTemplateSynced = jt.jobTemplateInformer.Informer().HasSynced 86 jt.jobTemplateLister = jt.jobTemplateInformer.Lister() 87 jt.jobTemplateInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 88 AddFunc: jt.addJobTemplate, 89 }) 90 91 jt.jobInformer = informerfactory.NewSharedInformerFactory(jt.vcClient, 0).Batch().V1alpha1().Jobs() 92 jt.jobSynced = jt.jobInformer.Informer().HasSynced 93 jt.jobLister = jt.jobInformer.Lister() 94 jt.jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 95 AddFunc: jt.addJob, 96 }) 97 98 jt.maxRequeueNum = opt.MaxRequeueNum 99 if jt.maxRequeueNum < 0 { 100 jt.maxRequeueNum = -1 101 } 102 103 eventBroadcaster := record.NewBroadcaster() 104 eventBroadcaster.StartLogging(klog.Infof) 105 eventBroadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: jt.kubeClient.CoreV1().Events("")}) 106 107 jt.recorder = eventBroadcaster.NewRecorder(versionedscheme.Scheme, v1.EventSource{Component: "vc-controller-manager"}) 108 jt.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 109 110 jt.enqueueJobTemplate = jt.enqueue 111 112 jt.syncHandler = jt.handleJobTemplate 113 114 return nil 115 } 116 117 func (jt *jobtemplatecontroller) Run(stopCh <-chan struct{}) { 118 defer jt.queue.ShutDown() 119 120 go jt.jobTemplateInformer.Informer().Run(stopCh) 121 go jt.jobInformer.Informer().Run(stopCh) 122 123 cache.WaitForCacheSync(stopCh, jt.jobSynced, jt.jobTemplateSynced) 124 125 go wait.Until(jt.worker, time.Second, stopCh) 126 127 klog.Infof("JobTemplateController is running ...... ") 128 129 <-stopCh 130 } 131 132 func (jt *jobtemplatecontroller) worker() { 133 for jt.processNextWorkItem() { 134 } 135 } 136 137 func (jt *jobtemplatecontroller) processNextWorkItem() bool { 138 obj, shutdown := jt.queue.Get() 139 if shutdown { 140 // Stop working 141 return false 142 } 143 144 // We call Done here so the workqueue knows we have finished 145 // processing this item. We also must remember to call Forget if we 146 // do not want this work item being re-queued. For example, we do 147 // not call Forget if a transient error occurs, instead the item is 148 // put back on the workqueue and attempted again after a back-off 149 // period. 150 defer jt.queue.Done(obj) 151 152 req, ok := obj.(apis.FlowRequest) 153 if !ok { 154 klog.Errorf("%v is not a valid queue request struct.", obj) 155 return true 156 } 157 158 err := jt.syncHandler(&req) 159 jt.handleJobTemplateErr(err, obj) 160 161 return true 162 } 163 164 func (jt *jobtemplatecontroller) handleJobTemplate(req *apis.FlowRequest) error { 165 startTime := time.Now() 166 defer func() { 167 klog.V(4).Infof("Finished syncing jobTemplate %s (%v).", req.JobTemplateName, time.Since(startTime)) 168 }() 169 170 jobTemplate, err := jt.jobTemplateLister.JobTemplates(req.Namespace).Get(req.JobTemplateName) 171 if err != nil { 172 if apierrors.IsNotFound(err) { 173 klog.V(4).Infof("JobTemplate %s has been deleted.", req.JobTemplateName) 174 return nil 175 } 176 177 return fmt.Errorf("get jobTemplate %s failed for %v", req.JobFlowName, err) 178 } 179 180 klog.V(4).Infof("Begin syncJobTemplate for jobTemplate %s", req.JobFlowName) 181 if err := jt.syncJobTemplate(jobTemplate); err != nil { 182 return fmt.Errorf("sync jobTemplate %s failed for %v, event is %v, action is %s", 183 req.JobFlowName, err, req.Event, req.Action) 184 } 185 186 return nil 187 } 188 189 func (jt *jobtemplatecontroller) handleJobTemplateErr(err error, obj interface{}) { 190 if err == nil { 191 jt.queue.Forget(obj) 192 return 193 } 194 195 if jt.maxRequeueNum == -1 || jt.queue.NumRequeues(obj) < jt.maxRequeueNum { 196 klog.V(4).Infof("Error syncing jobTemplate request %v for %v.", obj, err) 197 jt.queue.AddRateLimited(obj) 198 return 199 } 200 201 req, _ := obj.(*apis.FlowRequest) 202 jt.recordEventsForJobTemplate(req.Namespace, req.JobTemplateName, v1.EventTypeWarning, string(req.Action), 203 fmt.Sprintf("%v JobTemplate failed for %v", req.Action, err)) 204 klog.V(2).Infof("Dropping JobTemplate request %v out of the queue for %v.", obj, err) 205 jt.queue.Forget(obj) 206 } 207 208 func (jt *jobtemplatecontroller) recordEventsForJobTemplate(namespace, name, eventType, reason, message string) { 209 jobTemplate, err := jt.jobTemplateLister.JobTemplates(namespace).Get(name) 210 if err != nil { 211 klog.Errorf("Get JobTemplate %s failed for %v.", name, err) 212 return 213 } 214 215 jt.recorder.Event(jobTemplate, eventType, reason, message) 216 }