volcano.sh/volcano@v1.9.0/pkg/controllers/queue/queue_controller.go (about) 1 /* 2 Copyright 2019 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package queue 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 v1 "k8s.io/api/core/v1" 26 apierrors "k8s.io/apimachinery/pkg/api/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 29 "k8s.io/apimachinery/pkg/util/wait" 30 utilfeature "k8s.io/apiserver/pkg/util/feature" 31 "k8s.io/client-go/kubernetes" 32 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 33 "k8s.io/client-go/tools/cache" 34 "k8s.io/client-go/tools/record" 35 "k8s.io/client-go/util/workqueue" 36 "k8s.io/klog/v2" 37 38 busv1alpha1 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 39 vcclientset "volcano.sh/apis/pkg/client/clientset/versioned" 40 versionedscheme "volcano.sh/apis/pkg/client/clientset/versioned/scheme" 41 informerfactory "volcano.sh/apis/pkg/client/informers/externalversions" 42 vcinformer "volcano.sh/apis/pkg/client/informers/externalversions" 43 busv1alpha1informer "volcano.sh/apis/pkg/client/informers/externalversions/bus/v1alpha1" 44 schedulinginformer "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1" 45 busv1alpha1lister "volcano.sh/apis/pkg/client/listers/bus/v1alpha1" 46 schedulinglister "volcano.sh/apis/pkg/client/listers/scheduling/v1beta1" 47 "volcano.sh/volcano/pkg/controllers/apis" 48 "volcano.sh/volcano/pkg/controllers/framework" 49 queuestate "volcano.sh/volcano/pkg/controllers/queue/state" 50 "volcano.sh/volcano/pkg/features" 51 ) 52 53 func init() { 54 framework.RegisterController(&queuecontroller{}) 55 } 56 57 // queuecontroller manages queue status. 58 type queuecontroller struct { 59 kubeClient kubernetes.Interface 60 vcClient vcclientset.Interface 61 62 // informer 63 queueInformer schedulinginformer.QueueInformer 64 pgInformer schedulinginformer.PodGroupInformer 65 66 // queueLister 67 queueLister schedulinglister.QueueLister 68 queueSynced cache.InformerSynced 69 70 // podGroup lister 71 pgLister schedulinglister.PodGroupLister 72 pgSynced cache.InformerSynced 73 74 cmdInformer busv1alpha1informer.CommandInformer 75 cmdLister busv1alpha1lister.CommandLister 76 cmdSynced cache.InformerSynced 77 78 vcInformerFactory vcinformer.SharedInformerFactory 79 80 // queues that need to be updated. 81 queue workqueue.RateLimitingInterface 82 commandQueue workqueue.RateLimitingInterface 83 84 pgMutex sync.RWMutex 85 // queue name -> podgroup namespace/name 86 podGroups map[string]map[string]struct{} 87 88 syncHandler func(req *apis.Request) error 89 syncCommandHandler func(cmd *busv1alpha1.Command) error 90 91 enqueueQueue func(req *apis.Request) 92 93 recorder record.EventRecorder 94 maxRequeueNum int 95 } 96 97 func (c *queuecontroller) Name() string { 98 return "queue-controller" 99 } 100 101 // NewQueueController creates a QueueController. 102 func (c *queuecontroller) Initialize(opt *framework.ControllerOption) error { 103 c.vcClient = opt.VolcanoClient 104 c.kubeClient = opt.KubeClient 105 106 factory := informerfactory.NewSharedInformerFactory(c.vcClient, 0) 107 queueInformer := factory.Scheduling().V1beta1().Queues() 108 pgInformer := factory.Scheduling().V1beta1().PodGroups() 109 110 eventBroadcaster := record.NewBroadcaster() 111 eventBroadcaster.StartLogging(klog.Infof) 112 eventBroadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: c.kubeClient.CoreV1().Events("")}) 113 114 c.vcInformerFactory = factory 115 c.queueInformer = queueInformer 116 c.pgInformer = pgInformer 117 c.queueLister = queueInformer.Lister() 118 c.queueSynced = queueInformer.Informer().HasSynced 119 c.pgLister = pgInformer.Lister() 120 c.pgSynced = pgInformer.Informer().HasSynced 121 c.queue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 122 c.commandQueue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 123 c.podGroups = make(map[string]map[string]struct{}) 124 c.recorder = eventBroadcaster.NewRecorder(versionedscheme.Scheme, v1.EventSource{Component: "vc-controller-manager"}) 125 c.maxRequeueNum = opt.MaxRequeueNum 126 if c.maxRequeueNum < 0 { 127 c.maxRequeueNum = -1 128 } 129 130 queueInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 131 AddFunc: c.addQueue, 132 UpdateFunc: c.updateQueue, 133 DeleteFunc: c.deleteQueue, 134 }) 135 136 pgInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 137 AddFunc: c.addPodGroup, 138 UpdateFunc: c.updatePodGroup, 139 DeleteFunc: c.deletePodGroup, 140 }) 141 142 if utilfeature.DefaultFeatureGate.Enabled(features.QueueCommandSync) { 143 c.cmdInformer = factory.Bus().V1alpha1().Commands() 144 c.cmdInformer.Informer().AddEventHandler(cache.FilteringResourceEventHandler{ 145 FilterFunc: func(obj interface{}) bool { 146 switch v := obj.(type) { 147 case *busv1alpha1.Command: 148 return IsQueueReference(v.TargetObject) 149 default: 150 return false 151 } 152 }, 153 Handler: cache.ResourceEventHandlerFuncs{ 154 AddFunc: c.addCommand, 155 }, 156 }) 157 c.cmdLister = c.cmdInformer.Lister() 158 c.cmdSynced = c.cmdInformer.Informer().HasSynced 159 } 160 161 queuestate.SyncQueue = c.syncQueue 162 queuestate.OpenQueue = c.openQueue 163 queuestate.CloseQueue = c.closeQueue 164 165 c.syncHandler = c.handleQueue 166 c.syncCommandHandler = c.handleCommand 167 168 c.enqueueQueue = c.enqueue 169 170 return nil 171 } 172 173 // Run starts QueueController. 174 func (c *queuecontroller) Run(stopCh <-chan struct{}) { 175 defer utilruntime.HandleCrash() 176 defer c.queue.ShutDown() 177 defer c.commandQueue.ShutDown() 178 179 klog.Infof("Starting queue controller.") 180 defer klog.Infof("Shutting down queue controller.") 181 182 c.vcInformerFactory.Start(stopCh) 183 184 for informerType, ok := range c.vcInformerFactory.WaitForCacheSync(stopCh) { 185 if !ok { 186 klog.Errorf("caches failed to sync: %v", informerType) 187 return 188 } 189 } 190 191 go wait.Until(c.worker, 0, stopCh) 192 go wait.Until(c.commandWorker, 0, stopCh) 193 194 <-stopCh 195 } 196 197 // worker runs a worker thread that just dequeues items, processes them, and 198 // marks them done. You may run as many of these in parallel as you wish; the 199 // workqueue guarantees that they will not end up processing the same `queue` 200 // at the same time. 201 func (c *queuecontroller) worker() { 202 for c.processNextWorkItem() { 203 } 204 } 205 206 func (c *queuecontroller) processNextWorkItem() bool { 207 obj, shutdown := c.queue.Get() 208 if shutdown { 209 return false 210 } 211 defer c.queue.Done(obj) 212 213 req, ok := obj.(*apis.Request) 214 if !ok { 215 klog.Errorf("%v is not a valid queue request struct.", obj) 216 return true 217 } 218 219 err := c.syncHandler(req) 220 c.handleQueueErr(err, obj) 221 222 return true 223 } 224 225 func (c *queuecontroller) handleQueue(req *apis.Request) error { 226 startTime := time.Now() 227 defer func() { 228 klog.V(4).Infof("Finished syncing queue %s (%v).", req.QueueName, time.Since(startTime)) 229 }() 230 231 queue, err := c.queueLister.Get(req.QueueName) 232 if err != nil { 233 if apierrors.IsNotFound(err) { 234 klog.V(4).Infof("Queue %s has been deleted.", req.QueueName) 235 return nil 236 } 237 238 return fmt.Errorf("get queue %s failed for %v", req.QueueName, err) 239 } 240 241 queueState := queuestate.NewState(queue) 242 if queueState == nil { 243 return fmt.Errorf("queue %s state %s is invalid", queue.Name, queue.Status.State) 244 } 245 246 klog.V(4).Infof("Begin execute %s action for queue %s, current status %s", req.Action, req.QueueName, queue.Status.State) 247 if err := queueState.Execute(req.Action); err != nil { 248 return fmt.Errorf("sync queue %s failed for %v, event is %v, action is %s", 249 req.QueueName, err, req.Event, req.Action) 250 } 251 252 return nil 253 } 254 255 func (c *queuecontroller) handleQueueErr(err error, obj interface{}) { 256 if err == nil { 257 c.queue.Forget(obj) 258 return 259 } 260 261 if c.maxRequeueNum == -1 || c.queue.NumRequeues(obj) < c.maxRequeueNum { 262 klog.V(4).Infof("Error syncing queue request %v for %v.", obj, err) 263 c.queue.AddRateLimited(obj) 264 return 265 } 266 267 req, _ := obj.(*apis.Request) 268 c.recordEventsForQueue(req.QueueName, v1.EventTypeWarning, string(req.Action), 269 fmt.Sprintf("%v queue failed for %v", req.Action, err)) 270 klog.V(2).Infof("Dropping queue request %v out of the queue for %v.", obj, err) 271 c.queue.Forget(obj) 272 } 273 274 func (c *queuecontroller) commandWorker() { 275 for c.processNextCommand() { 276 } 277 } 278 279 func (c *queuecontroller) processNextCommand() bool { 280 obj, shutdown := c.commandQueue.Get() 281 if shutdown { 282 return false 283 } 284 defer c.commandQueue.Done(obj) 285 286 cmd, ok := obj.(*busv1alpha1.Command) 287 if !ok { 288 klog.Errorf("%v is not a valid Command struct.", obj) 289 return true 290 } 291 292 err := c.syncCommandHandler(cmd) 293 c.handleCommandErr(err, obj) 294 295 return true 296 } 297 298 func (c *queuecontroller) handleCommand(cmd *busv1alpha1.Command) error { 299 startTime := time.Now() 300 defer func() { 301 klog.V(4).Infof("Finished syncing command %s/%s (%v).", cmd.Namespace, cmd.Name, time.Since(startTime)) 302 }() 303 304 err := c.vcClient.BusV1alpha1().Commands(cmd.Namespace).Delete(context.TODO(), cmd.Name, metav1.DeleteOptions{}) 305 if err != nil { 306 if apierrors.IsNotFound(err) { 307 return nil 308 } 309 310 return fmt.Errorf("failed to delete command <%s/%s> for %v", cmd.Namespace, cmd.Name, err) 311 } 312 313 req := &apis.Request{ 314 QueueName: cmd.TargetObject.Name, 315 Event: busv1alpha1.CommandIssuedEvent, 316 Action: busv1alpha1.Action(cmd.Action), 317 } 318 319 c.enqueueQueue(req) 320 321 return nil 322 } 323 324 func (c *queuecontroller) handleCommandErr(err error, obj interface{}) { 325 if err == nil { 326 c.commandQueue.Forget(obj) 327 return 328 } 329 330 if c.maxRequeueNum == -1 || c.commandQueue.NumRequeues(obj) < c.maxRequeueNum { 331 klog.V(4).Infof("Error syncing command %v for %v.", obj, err) 332 c.commandQueue.AddRateLimited(obj) 333 return 334 } 335 336 klog.V(2).Infof("Dropping command %v out of the queue for %v.", obj, err) 337 c.commandQueue.Forget(obj) 338 }