volcano.sh/volcano@v1.9.0/cmd/scheduler/app/server.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package app 18 19 import ( 20 "context" 21 "fmt" 22 "net/http" 23 "os" 24 "time" 25 26 "volcano.sh/apis/pkg/apis/helpers" 27 28 "volcano.sh/volcano/cmd/scheduler/app/options" 29 "volcano.sh/volcano/pkg/kube" 30 "volcano.sh/volcano/pkg/scheduler" 31 "volcano.sh/volcano/pkg/scheduler/framework" 32 "volcano.sh/volcano/pkg/signals" 33 commonutil "volcano.sh/volcano/pkg/util" 34 "volcano.sh/volcano/pkg/version" 35 36 "github.com/prometheus/client_golang/prometheus" 37 "github.com/prometheus/client_golang/prometheus/collectors" 38 "github.com/prometheus/client_golang/prometheus/promhttp" 39 v1 "k8s.io/api/core/v1" 40 "k8s.io/apimachinery/pkg/util/uuid" 41 clientset "k8s.io/client-go/kubernetes" 42 "k8s.io/client-go/kubernetes/scheme" 43 corev1 "k8s.io/client-go/kubernetes/typed/core/v1" 44 "k8s.io/component-base/metrics/legacyregistry" 45 "k8s.io/klog/v2" 46 47 // Register gcp auth 48 _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 49 restclient "k8s.io/client-go/rest" 50 "k8s.io/client-go/tools/leaderelection" 51 "k8s.io/client-go/tools/leaderelection/resourcelock" 52 "k8s.io/client-go/tools/record" 53 54 // Register rest client metrics 55 _ "k8s.io/component-base/metrics/prometheus/restclient" 56 ) 57 58 const ( 59 leaseDuration = 15 * time.Second 60 renewDeadline = 10 * time.Second 61 retryPeriod = 5 * time.Second 62 ) 63 64 // Run the volcano scheduler. 65 func Run(opt *options.ServerOption) error { 66 if opt.PrintVersion { 67 version.PrintVersionAndExit() 68 } 69 70 config, err := kube.BuildConfig(opt.KubeClientOptions) 71 if err != nil { 72 return err 73 } 74 75 if opt.PluginsDir != "" { 76 err := framework.LoadCustomPlugins(opt.PluginsDir) 77 if err != nil { 78 klog.Errorf("Fail to load custom plugins: %v", err) 79 return err 80 } 81 } 82 83 sched, err := scheduler.NewScheduler(config, opt) 84 if err != nil { 85 panic(err) 86 } 87 88 if opt.EnableMetrics { 89 go func() { 90 http.Handle("/metrics", promHandler()) 91 klog.Fatalf("Prometheus Http Server failed %s", http.ListenAndServe(opt.ListenAddress, nil)) 92 }() 93 } 94 95 if opt.EnableHealthz { 96 if err := helpers.StartHealthz(opt.HealthzBindAddress, "volcano-scheduler", opt.CaCertData, opt.CertData, opt.KeyData); err != nil { 97 return err 98 } 99 } 100 101 ctx := signals.SetupSignalContext() 102 run := func(ctx context.Context) { 103 sched.Run(ctx.Done()) 104 <-ctx.Done() 105 } 106 107 if !opt.EnableLeaderElection { 108 run(ctx) 109 return fmt.Errorf("finished without leader elect") 110 } 111 112 leaderElectionClient, err := clientset.NewForConfig(restclient.AddUserAgent(config, "leader-election")) 113 if err != nil { 114 return err 115 } 116 117 // Prepare event clients. 118 broadcaster := record.NewBroadcaster() 119 broadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: leaderElectionClient.CoreV1().Events(opt.LockObjectNamespace)}) 120 eventRecorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: commonutil.GenerateComponentName(opt.SchedulerNames)}) 121 122 hostname, err := os.Hostname() 123 if err != nil { 124 return fmt.Errorf("unable to get hostname: %v", err) 125 } 126 // add a uniquifier so that two processes on the same host don't accidentally both become active 127 id := hostname + "_" + string(uuid.NewUUID()) 128 129 rl, err := resourcelock.New(resourcelock.LeasesResourceLock, 130 opt.LockObjectNamespace, 131 commonutil.GenerateComponentName(opt.SchedulerNames), 132 leaderElectionClient.CoreV1(), 133 leaderElectionClient.CoordinationV1(), 134 resourcelock.ResourceLockConfig{ 135 Identity: id, 136 EventRecorder: eventRecorder, 137 }) 138 if err != nil { 139 return fmt.Errorf("couldn't create resource lock: %v", err) 140 } 141 142 leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ 143 Lock: rl, 144 LeaseDuration: leaseDuration, 145 RenewDeadline: renewDeadline, 146 RetryPeriod: retryPeriod, 147 Callbacks: leaderelection.LeaderCallbacks{ 148 OnStartedLeading: run, 149 OnStoppedLeading: func() { 150 klog.Fatalf("leaderelection lost") 151 }, 152 }, 153 }) 154 return fmt.Errorf("lost lease") 155 } 156 157 func promHandler() http.Handler { 158 // Unregister go and process related collector because it's duplicated and `legacyregistry.DefaultGatherer` also has registered them. 159 prometheus.DefaultRegisterer.Unregister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) 160 prometheus.DefaultRegisterer.Unregister(collectors.NewGoCollector()) 161 return promhttp.InstrumentMetricHandler(prometheus.DefaultRegisterer, promhttp.HandlerFor(prometheus.Gatherers{prometheus.DefaultGatherer, legacyregistry.DefaultGatherer}, promhttp.HandlerOpts{})) 162 }