github.com/oam-dev/kubevela@v1.9.11/cmd/core/app/server.go (about)

     1  /*
     2  Copyright 2022 The KubeVela Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8     http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package app
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"io"
    23  	"os"
    24  	"path/filepath"
    25  	"strconv"
    26  	"time"
    27  
    28  	velaclient "github.com/kubevela/pkg/controller/client"
    29  	"github.com/kubevela/pkg/controller/sharding"
    30  	"github.com/kubevela/pkg/meta"
    31  	"github.com/kubevela/pkg/util/profiling"
    32  	"github.com/kubevela/workflow/pkg/cue/packages"
    33  	"github.com/pkg/errors"
    34  	"github.com/spf13/cobra"
    35  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    36  	"k8s.io/klog/v2"
    37  	"k8s.io/klog/v2/klogr"
    38  	ctrl "sigs.k8s.io/controller-runtime"
    39  	"sigs.k8s.io/controller-runtime/pkg/healthz"
    40  	"sigs.k8s.io/controller-runtime/pkg/manager"
    41  	"sigs.k8s.io/controller-runtime/pkg/manager/signals"
    42  
    43  	"github.com/oam-dev/kubevela/apis/core.oam.dev/v1beta1"
    44  	"github.com/oam-dev/kubevela/apis/types"
    45  	"github.com/oam-dev/kubevela/cmd/core/app/hooks"
    46  	"github.com/oam-dev/kubevela/cmd/core/app/options"
    47  	"github.com/oam-dev/kubevela/pkg/auth"
    48  	"github.com/oam-dev/kubevela/pkg/cache"
    49  	commonconfig "github.com/oam-dev/kubevela/pkg/controller/common"
    50  	oamv1beta1 "github.com/oam-dev/kubevela/pkg/controller/core.oam.dev/v1beta1"
    51  	"github.com/oam-dev/kubevela/pkg/controller/core.oam.dev/v1beta1/application"
    52  	"github.com/oam-dev/kubevela/pkg/features"
    53  	"github.com/oam-dev/kubevela/pkg/monitor/watcher"
    54  	"github.com/oam-dev/kubevela/pkg/multicluster"
    55  	"github.com/oam-dev/kubevela/pkg/oam"
    56  	"github.com/oam-dev/kubevela/pkg/utils/common"
    57  	"github.com/oam-dev/kubevela/pkg/utils/util"
    58  	oamwebhook "github.com/oam-dev/kubevela/pkg/webhook/core.oam.dev"
    59  	"github.com/oam-dev/kubevela/version"
    60  )
    61  
    62  var (
    63  	scheme             = common.Scheme
    64  	waitSecretTimeout  = 90 * time.Second
    65  	waitSecretInterval = 2 * time.Second
    66  )
    67  
    68  // NewCoreCommand creates a *cobra.Command object with default parameters
    69  func NewCoreCommand() *cobra.Command {
    70  	s := options.NewCoreOptions()
    71  	cmd := &cobra.Command{
    72  		Use:  "vela-core",
    73  		Long: `The KubeVela controller manager is a daemon that embeds the core control loops shipped with KubeVela`,
    74  		RunE: func(cmd *cobra.Command, args []string) error {
    75  			return run(signals.SetupSignalHandler(), s)
    76  		},
    77  		SilenceUsage: true,
    78  		FParseErrWhitelist: cobra.FParseErrWhitelist{
    79  			// Allow unknown flags for backward-compatibility.
    80  			UnknownFlags: true,
    81  		},
    82  	}
    83  
    84  	fs := cmd.Flags()
    85  	namedFlagSets := s.Flags()
    86  	for _, set := range namedFlagSets.FlagSets {
    87  		fs.AddFlagSet(set)
    88  	}
    89  	meta.Name = types.VelaCoreName
    90  
    91  	klog.InfoS("KubeVela information", "version", version.VelaVersion, "revision", version.GitRevision)
    92  	klog.InfoS("Vela-Core init", "definition namespace", oam.SystemDefinitionNamespace)
    93  
    94  	return cmd
    95  }
    96  
    97  func run(ctx context.Context, s *options.CoreOptions) error {
    98  	restConfig := ctrl.GetConfigOrDie()
    99  	restConfig.UserAgent = types.KubeVelaName + "/" + version.GitRevision
   100  	restConfig.QPS = float32(s.QPS)
   101  	restConfig.Burst = s.Burst
   102  	restConfig.Wrap(auth.NewImpersonatingRoundTripper)
   103  	klog.InfoS("Kubernetes Config Loaded",
   104  		"UserAgent", restConfig.UserAgent,
   105  		"QPS", restConfig.QPS,
   106  		"Burst", restConfig.Burst,
   107  	)
   108  	go profiling.StartProfilingServer(nil)
   109  
   110  	// wrapper the round tripper by multi cluster rewriter
   111  	if s.EnableClusterGateway {
   112  		client, err := multicluster.Initialize(restConfig, true)
   113  		if err != nil {
   114  			klog.ErrorS(err, "failed to enable multi-cluster capability")
   115  			return err
   116  		}
   117  
   118  		if s.EnableClusterMetrics {
   119  			_, err := multicluster.NewClusterMetricsMgr(context.Background(), client, s.ClusterMetricsInterval)
   120  			if err != nil {
   121  				klog.ErrorS(err, "failed to enable multi-cluster-metrics capability")
   122  				return err
   123  			}
   124  		}
   125  	}
   126  
   127  	ctrl.SetLogger(klogr.New())
   128  
   129  	if utilfeature.DefaultMutableFeatureGate.Enabled(features.ApplyOnce) {
   130  		commonconfig.ApplicationReSyncPeriod = s.InformerSyncPeriod
   131  	}
   132  
   133  	leaderElectionID := util.GenerateLeaderElectionID(types.KubeVelaName, s.ControllerArgs.IgnoreAppWithoutControllerRequirement)
   134  	leaderElectionID += sharding.GetShardIDSuffix()
   135  	mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
   136  		Scheme:                     scheme,
   137  		MetricsBindAddress:         s.MetricsAddr,
   138  		LeaderElection:             s.EnableLeaderElection,
   139  		LeaderElectionNamespace:    s.LeaderElectionNamespace,
   140  		LeaderElectionID:           leaderElectionID,
   141  		Port:                       s.WebhookPort,
   142  		CertDir:                    s.CertDir,
   143  		HealthProbeBindAddress:     s.HealthAddr,
   144  		LeaderElectionResourceLock: s.LeaderElectionResourceLock,
   145  		LeaseDuration:              &s.LeaseDuration,
   146  		RenewDeadline:              &s.RenewDeadLine,
   147  		RetryPeriod:                &s.RetryPeriod,
   148  		SyncPeriod:                 &s.InformerSyncPeriod,
   149  		// SyncPeriod is configured with default value, aka. 10h. First, controller-runtime does not
   150  		// recommend use it as a time trigger, instead, it is expected to work for failure tolerance
   151  		// of controller-runtime. Additionally, set this value will affect not only application
   152  		// controller but also all other controllers like definition controller. Therefore, for
   153  		// functionalities like state-keep, they should be invented in other ways.
   154  		NewClient:             velaclient.DefaultNewControllerClient,
   155  		NewCache:              cache.BuildCache(ctx, scheme, &v1beta1.Application{}, &v1beta1.ApplicationRevision{}, &v1beta1.ResourceTracker{}),
   156  		ClientDisableCacheFor: cache.NewResourcesToDisableCache(),
   157  	})
   158  	if err != nil {
   159  		klog.ErrorS(err, "Unable to create a controller manager")
   160  		return err
   161  	}
   162  
   163  	if err := registerHealthChecks(mgr); err != nil {
   164  		klog.ErrorS(err, "Unable to register ready/health checks")
   165  		return err
   166  	}
   167  
   168  	pd, err := packages.NewPackageDiscover(mgr.GetConfig())
   169  	if err != nil {
   170  		klog.Error(err, "Failed to create CRD discovery for CUE package client")
   171  		if !packages.IsCUEParseErr(err) {
   172  			return err
   173  		}
   174  	}
   175  	s.ControllerArgs.PackageDiscover = pd
   176  
   177  	if !sharding.EnableSharding {
   178  		if err = prepareRun(ctx, mgr, s); err != nil {
   179  			return err
   180  		}
   181  	} else {
   182  		if err = prepareRunInShardingMode(ctx, mgr, s); err != nil {
   183  			return err
   184  		}
   185  	}
   186  
   187  	klog.Info("Start the vela application monitor")
   188  	informer, err := mgr.GetCache().GetInformer(ctx, &v1beta1.Application{})
   189  	if err != nil {
   190  		klog.ErrorS(err, "Unable to get informer for application")
   191  	}
   192  	watcher.StartApplicationMetricsWatcher(informer)
   193  
   194  	if err := mgr.Start(ctx); err != nil {
   195  		klog.ErrorS(err, "Failed to run manager")
   196  		return err
   197  	}
   198  	if s.LogFilePath != "" {
   199  		klog.Flush()
   200  	}
   201  	klog.Info("Safely stops Program...")
   202  	return nil
   203  }
   204  
   205  func prepareRunInShardingMode(ctx context.Context, mgr manager.Manager, s *options.CoreOptions) error {
   206  	if sharding.IsMaster() {
   207  		klog.Infof("controller running in sharding mode, current shard is master")
   208  		if !utilfeature.DefaultMutableFeatureGate.Enabled(features.DisableWebhookAutoSchedule) {
   209  			go sharding.DefaultScheduler.Get().Start(ctx)
   210  		}
   211  		if err := prepareRun(ctx, mgr, s); err != nil {
   212  			return err
   213  		}
   214  	} else {
   215  		klog.Infof("controller running in sharding mode, current shard id: %s", sharding.ShardID)
   216  		if err := application.Setup(mgr, *s.ControllerArgs); err != nil {
   217  			return err
   218  		}
   219  	}
   220  
   221  	return nil
   222  }
   223  
   224  func prepareRun(ctx context.Context, mgr manager.Manager, s *options.CoreOptions) error {
   225  	if s.UseWebhook {
   226  		klog.InfoS("Enable webhook", "server port", strconv.Itoa(s.WebhookPort))
   227  		oamwebhook.Register(mgr, *s.ControllerArgs)
   228  		if err := waitWebhookSecretVolume(s.CertDir, waitSecretTimeout, waitSecretInterval); err != nil {
   229  			klog.ErrorS(err, "Unable to get webhook secret")
   230  			return err
   231  		}
   232  	}
   233  
   234  	if err := oamv1beta1.Setup(mgr, *s.ControllerArgs); err != nil {
   235  		klog.ErrorS(err, "Unable to setup the oam controller")
   236  		return err
   237  	}
   238  
   239  	if err := multicluster.InitClusterInfo(mgr.GetConfig()); err != nil {
   240  		klog.ErrorS(err, "Init control plane cluster info")
   241  		return err
   242  	}
   243  
   244  	klog.Info("Start the vela controller manager")
   245  	for _, hook := range []hooks.PreStartHook{hooks.NewSystemCRDValidationHook()} {
   246  		if err := hook.Run(ctx); err != nil {
   247  			return fmt.Errorf("failed to run hook %T: %w", hook, err)
   248  		}
   249  	}
   250  
   251  	return nil
   252  }
   253  
   254  // registerHealthChecks is used to create readiness&liveness probes
   255  func registerHealthChecks(mgr ctrl.Manager) error {
   256  	klog.Info("Create readiness/health check")
   257  	if err := mgr.AddReadyzCheck("ping", healthz.Ping); err != nil {
   258  		return err
   259  	}
   260  	// TODO: change the health check to be different from readiness check
   261  	if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil {
   262  		return err
   263  	}
   264  	return nil
   265  }
   266  
   267  // waitWebhookSecretVolume waits for webhook secret ready to avoid mgr running crash
   268  func waitWebhookSecretVolume(certDir string, timeout, interval time.Duration) error {
   269  	start := time.Now()
   270  	for {
   271  		time.Sleep(interval)
   272  		if time.Since(start) > timeout {
   273  			return fmt.Errorf("getting webhook secret timeout after %s", timeout.String())
   274  		}
   275  		klog.InfoS("Wait webhook secret", "time consumed(second)", int64(time.Since(start).Seconds()),
   276  			"timeout(second)", int64(timeout.Seconds()))
   277  		if _, err := os.Stat(certDir); !os.IsNotExist(err) {
   278  			ready := func() bool {
   279  				f, err := os.Open(filepath.Clean(certDir))
   280  				if err != nil {
   281  					return false
   282  				}
   283  				defer func() {
   284  					if err := f.Close(); err != nil {
   285  						klog.Error(err, "Failed to close file")
   286  					}
   287  				}()
   288  				// check if dir is empty
   289  				if _, err := f.Readdir(1); errors.Is(err, io.EOF) {
   290  					return false
   291  				}
   292  				// check if secret files are empty
   293  				err = filepath.Walk(certDir, func(path string, info os.FileInfo, err error) error {
   294  					// even Cert dir is created, cert files are still empty for a while
   295  					if info.Size() == 0 {
   296  						return errors.New("secret is not ready")
   297  					}
   298  					return nil
   299  				})
   300  				if err == nil {
   301  					klog.InfoS("Webhook secret is ready", "time consumed(second)",
   302  						int64(time.Since(start).Seconds()))
   303  					return true
   304  				}
   305  				return false
   306  			}()
   307  			if ready {
   308  				return nil
   309  			}
   310  		}
   311  	}
   312  }