github.com/oam-dev/kubevela@v1.9.11/cmd/core/app/server.go (about) 1 /* 2 Copyright 2022 The KubeVela Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package app 18 19 import ( 20 "context" 21 "fmt" 22 "io" 23 "os" 24 "path/filepath" 25 "strconv" 26 "time" 27 28 velaclient "github.com/kubevela/pkg/controller/client" 29 "github.com/kubevela/pkg/controller/sharding" 30 "github.com/kubevela/pkg/meta" 31 "github.com/kubevela/pkg/util/profiling" 32 "github.com/kubevela/workflow/pkg/cue/packages" 33 "github.com/pkg/errors" 34 "github.com/spf13/cobra" 35 utilfeature "k8s.io/apiserver/pkg/util/feature" 36 "k8s.io/klog/v2" 37 "k8s.io/klog/v2/klogr" 38 ctrl "sigs.k8s.io/controller-runtime" 39 "sigs.k8s.io/controller-runtime/pkg/healthz" 40 "sigs.k8s.io/controller-runtime/pkg/manager" 41 "sigs.k8s.io/controller-runtime/pkg/manager/signals" 42 43 "github.com/oam-dev/kubevela/apis/core.oam.dev/v1beta1" 44 "github.com/oam-dev/kubevela/apis/types" 45 "github.com/oam-dev/kubevela/cmd/core/app/hooks" 46 "github.com/oam-dev/kubevela/cmd/core/app/options" 47 "github.com/oam-dev/kubevela/pkg/auth" 48 "github.com/oam-dev/kubevela/pkg/cache" 49 commonconfig "github.com/oam-dev/kubevela/pkg/controller/common" 50 oamv1beta1 "github.com/oam-dev/kubevela/pkg/controller/core.oam.dev/v1beta1" 51 "github.com/oam-dev/kubevela/pkg/controller/core.oam.dev/v1beta1/application" 52 "github.com/oam-dev/kubevela/pkg/features" 53 "github.com/oam-dev/kubevela/pkg/monitor/watcher" 54 "github.com/oam-dev/kubevela/pkg/multicluster" 55 "github.com/oam-dev/kubevela/pkg/oam" 56 "github.com/oam-dev/kubevela/pkg/utils/common" 57 "github.com/oam-dev/kubevela/pkg/utils/util" 58 oamwebhook "github.com/oam-dev/kubevela/pkg/webhook/core.oam.dev" 59 "github.com/oam-dev/kubevela/version" 60 ) 61 62 var ( 63 scheme = common.Scheme 64 waitSecretTimeout = 90 * time.Second 65 waitSecretInterval = 2 * time.Second 66 ) 67 68 // NewCoreCommand creates a *cobra.Command object with default parameters 69 func NewCoreCommand() *cobra.Command { 70 s := options.NewCoreOptions() 71 cmd := &cobra.Command{ 72 Use: "vela-core", 73 Long: `The KubeVela controller manager is a daemon that embeds the core control loops shipped with KubeVela`, 74 RunE: func(cmd *cobra.Command, args []string) error { 75 return run(signals.SetupSignalHandler(), s) 76 }, 77 SilenceUsage: true, 78 FParseErrWhitelist: cobra.FParseErrWhitelist{ 79 // Allow unknown flags for backward-compatibility. 80 UnknownFlags: true, 81 }, 82 } 83 84 fs := cmd.Flags() 85 namedFlagSets := s.Flags() 86 for _, set := range namedFlagSets.FlagSets { 87 fs.AddFlagSet(set) 88 } 89 meta.Name = types.VelaCoreName 90 91 klog.InfoS("KubeVela information", "version", version.VelaVersion, "revision", version.GitRevision) 92 klog.InfoS("Vela-Core init", "definition namespace", oam.SystemDefinitionNamespace) 93 94 return cmd 95 } 96 97 func run(ctx context.Context, s *options.CoreOptions) error { 98 restConfig := ctrl.GetConfigOrDie() 99 restConfig.UserAgent = types.KubeVelaName + "/" + version.GitRevision 100 restConfig.QPS = float32(s.QPS) 101 restConfig.Burst = s.Burst 102 restConfig.Wrap(auth.NewImpersonatingRoundTripper) 103 klog.InfoS("Kubernetes Config Loaded", 104 "UserAgent", restConfig.UserAgent, 105 "QPS", restConfig.QPS, 106 "Burst", restConfig.Burst, 107 ) 108 go profiling.StartProfilingServer(nil) 109 110 // wrapper the round tripper by multi cluster rewriter 111 if s.EnableClusterGateway { 112 client, err := multicluster.Initialize(restConfig, true) 113 if err != nil { 114 klog.ErrorS(err, "failed to enable multi-cluster capability") 115 return err 116 } 117 118 if s.EnableClusterMetrics { 119 _, err := multicluster.NewClusterMetricsMgr(context.Background(), client, s.ClusterMetricsInterval) 120 if err != nil { 121 klog.ErrorS(err, "failed to enable multi-cluster-metrics capability") 122 return err 123 } 124 } 125 } 126 127 ctrl.SetLogger(klogr.New()) 128 129 if utilfeature.DefaultMutableFeatureGate.Enabled(features.ApplyOnce) { 130 commonconfig.ApplicationReSyncPeriod = s.InformerSyncPeriod 131 } 132 133 leaderElectionID := util.GenerateLeaderElectionID(types.KubeVelaName, s.ControllerArgs.IgnoreAppWithoutControllerRequirement) 134 leaderElectionID += sharding.GetShardIDSuffix() 135 mgr, err := ctrl.NewManager(restConfig, ctrl.Options{ 136 Scheme: scheme, 137 MetricsBindAddress: s.MetricsAddr, 138 LeaderElection: s.EnableLeaderElection, 139 LeaderElectionNamespace: s.LeaderElectionNamespace, 140 LeaderElectionID: leaderElectionID, 141 Port: s.WebhookPort, 142 CertDir: s.CertDir, 143 HealthProbeBindAddress: s.HealthAddr, 144 LeaderElectionResourceLock: s.LeaderElectionResourceLock, 145 LeaseDuration: &s.LeaseDuration, 146 RenewDeadline: &s.RenewDeadLine, 147 RetryPeriod: &s.RetryPeriod, 148 SyncPeriod: &s.InformerSyncPeriod, 149 // SyncPeriod is configured with default value, aka. 10h. First, controller-runtime does not 150 // recommend use it as a time trigger, instead, it is expected to work for failure tolerance 151 // of controller-runtime. Additionally, set this value will affect not only application 152 // controller but also all other controllers like definition controller. Therefore, for 153 // functionalities like state-keep, they should be invented in other ways. 154 NewClient: velaclient.DefaultNewControllerClient, 155 NewCache: cache.BuildCache(ctx, scheme, &v1beta1.Application{}, &v1beta1.ApplicationRevision{}, &v1beta1.ResourceTracker{}), 156 ClientDisableCacheFor: cache.NewResourcesToDisableCache(), 157 }) 158 if err != nil { 159 klog.ErrorS(err, "Unable to create a controller manager") 160 return err 161 } 162 163 if err := registerHealthChecks(mgr); err != nil { 164 klog.ErrorS(err, "Unable to register ready/health checks") 165 return err 166 } 167 168 pd, err := packages.NewPackageDiscover(mgr.GetConfig()) 169 if err != nil { 170 klog.Error(err, "Failed to create CRD discovery for CUE package client") 171 if !packages.IsCUEParseErr(err) { 172 return err 173 } 174 } 175 s.ControllerArgs.PackageDiscover = pd 176 177 if !sharding.EnableSharding { 178 if err = prepareRun(ctx, mgr, s); err != nil { 179 return err 180 } 181 } else { 182 if err = prepareRunInShardingMode(ctx, mgr, s); err != nil { 183 return err 184 } 185 } 186 187 klog.Info("Start the vela application monitor") 188 informer, err := mgr.GetCache().GetInformer(ctx, &v1beta1.Application{}) 189 if err != nil { 190 klog.ErrorS(err, "Unable to get informer for application") 191 } 192 watcher.StartApplicationMetricsWatcher(informer) 193 194 if err := mgr.Start(ctx); err != nil { 195 klog.ErrorS(err, "Failed to run manager") 196 return err 197 } 198 if s.LogFilePath != "" { 199 klog.Flush() 200 } 201 klog.Info("Safely stops Program...") 202 return nil 203 } 204 205 func prepareRunInShardingMode(ctx context.Context, mgr manager.Manager, s *options.CoreOptions) error { 206 if sharding.IsMaster() { 207 klog.Infof("controller running in sharding mode, current shard is master") 208 if !utilfeature.DefaultMutableFeatureGate.Enabled(features.DisableWebhookAutoSchedule) { 209 go sharding.DefaultScheduler.Get().Start(ctx) 210 } 211 if err := prepareRun(ctx, mgr, s); err != nil { 212 return err 213 } 214 } else { 215 klog.Infof("controller running in sharding mode, current shard id: %s", sharding.ShardID) 216 if err := application.Setup(mgr, *s.ControllerArgs); err != nil { 217 return err 218 } 219 } 220 221 return nil 222 } 223 224 func prepareRun(ctx context.Context, mgr manager.Manager, s *options.CoreOptions) error { 225 if s.UseWebhook { 226 klog.InfoS("Enable webhook", "server port", strconv.Itoa(s.WebhookPort)) 227 oamwebhook.Register(mgr, *s.ControllerArgs) 228 if err := waitWebhookSecretVolume(s.CertDir, waitSecretTimeout, waitSecretInterval); err != nil { 229 klog.ErrorS(err, "Unable to get webhook secret") 230 return err 231 } 232 } 233 234 if err := oamv1beta1.Setup(mgr, *s.ControllerArgs); err != nil { 235 klog.ErrorS(err, "Unable to setup the oam controller") 236 return err 237 } 238 239 if err := multicluster.InitClusterInfo(mgr.GetConfig()); err != nil { 240 klog.ErrorS(err, "Init control plane cluster info") 241 return err 242 } 243 244 klog.Info("Start the vela controller manager") 245 for _, hook := range []hooks.PreStartHook{hooks.NewSystemCRDValidationHook()} { 246 if err := hook.Run(ctx); err != nil { 247 return fmt.Errorf("failed to run hook %T: %w", hook, err) 248 } 249 } 250 251 return nil 252 } 253 254 // registerHealthChecks is used to create readiness&liveness probes 255 func registerHealthChecks(mgr ctrl.Manager) error { 256 klog.Info("Create readiness/health check") 257 if err := mgr.AddReadyzCheck("ping", healthz.Ping); err != nil { 258 return err 259 } 260 // TODO: change the health check to be different from readiness check 261 if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil { 262 return err 263 } 264 return nil 265 } 266 267 // waitWebhookSecretVolume waits for webhook secret ready to avoid mgr running crash 268 func waitWebhookSecretVolume(certDir string, timeout, interval time.Duration) error { 269 start := time.Now() 270 for { 271 time.Sleep(interval) 272 if time.Since(start) > timeout { 273 return fmt.Errorf("getting webhook secret timeout after %s", timeout.String()) 274 } 275 klog.InfoS("Wait webhook secret", "time consumed(second)", int64(time.Since(start).Seconds()), 276 "timeout(second)", int64(timeout.Seconds())) 277 if _, err := os.Stat(certDir); !os.IsNotExist(err) { 278 ready := func() bool { 279 f, err := os.Open(filepath.Clean(certDir)) 280 if err != nil { 281 return false 282 } 283 defer func() { 284 if err := f.Close(); err != nil { 285 klog.Error(err, "Failed to close file") 286 } 287 }() 288 // check if dir is empty 289 if _, err := f.Readdir(1); errors.Is(err, io.EOF) { 290 return false 291 } 292 // check if secret files are empty 293 err = filepath.Walk(certDir, func(path string, info os.FileInfo, err error) error { 294 // even Cert dir is created, cert files are still empty for a while 295 if info.Size() == 0 { 296 return errors.New("secret is not ready") 297 } 298 return nil 299 }) 300 if err == nil { 301 klog.InfoS("Webhook secret is ready", "time consumed(second)", 302 int64(time.Since(start).Seconds())) 303 return true 304 } 305 return false 306 }() 307 if ready { 308 return nil 309 } 310 } 311 } 312 }