github.com/verrazzano/verrazzano@v1.7.0/cluster-operator/internal/operatorinit/run_operator.go (about)

     1  // Copyright (c) 2022, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package operatorinit
     5  
     6  import (
     7  	"context"
     8  	"github.com/pkg/errors"
     9  	"github.com/prometheus/client_golang/prometheus/promhttp"
    10  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/capi"
    11  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/controller"
    12  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/controller/oci"
    13  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/ociocne"
    14  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/quickcreate/oke"
    15  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/rancher"
    16  	"github.com/verrazzano/verrazzano/cluster-operator/controllers/vmc"
    17  	"github.com/verrazzano/verrazzano/pkg/k8sutil"
    18  	"github.com/verrazzano/verrazzano/pkg/log/vzlog"
    19  	"github.com/verrazzano/verrazzano/pkg/nginxutil"
    20  	"github.com/verrazzano/verrazzano/pkg/rancherutil"
    21  	"go.uber.org/zap"
    22  	apiextv1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1"
    23  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    24  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    25  	"k8s.io/apimachinery/pkg/runtime"
    26  	"net/http"
    27  	"os"
    28  	ctrl "sigs.k8s.io/controller-runtime"
    29  	"sigs.k8s.io/controller-runtime/pkg/healthz"
    30  	"sigs.k8s.io/yaml"
    31  	"strings"
    32  	"time"
    33  )
    34  
    35  const (
    36  	clusterSelectorFilePath = "/var/syncClusters/selector.yaml"
    37  	syncClustersEnvVarName  = "CLUSTER_SYNC_ENABLED"
    38  	cattleClustersCRDName   = "clusters.management.cattle.io"
    39  	capiClustersCRDName     = "clusters.cluster.x-k8s.io"
    40  )
    41  
    42  type Properties struct {
    43  	Scheme                         *runtime.Scheme
    44  	CertificateDir                 string
    45  	MetricsAddress                 string
    46  	ProbeAddress                   string
    47  	IngressHost                    string
    48  	EnableLeaderElection           bool
    49  	EnableQuickCreate              bool
    50  	DisableCAPIRancherRegistration bool
    51  }
    52  
    53  // StartClusterOperator Cluster operator execution entry point
    54  func StartClusterOperator(log *zap.SugaredLogger, props Properties) error {
    55  	options := ctrl.Options{
    56  		Scheme:                 props.Scheme,
    57  		MetricsBindAddress:     props.MetricsAddress,
    58  		Port:                   9443,
    59  		HealthProbeBindAddress: props.ProbeAddress,
    60  		LeaderElection:         props.EnableLeaderElection,
    61  		LeaderElectionID:       "42d5ea87.verrazzano.io",
    62  	}
    63  
    64  	ingressNGINXNamespace, err := nginxutil.DetermineNamespaceForIngressNGINX(vzlog.DefaultLogger())
    65  	if err != nil {
    66  		return err
    67  	}
    68  	nginxutil.SetIngressNGINXNamespace(ingressNGINXNamespace)
    69  
    70  	ctrlConfig := k8sutil.GetConfigOrDieFromController()
    71  	mgr, err := ctrl.NewManager(ctrlConfig, options)
    72  	if err != nil {
    73  		return errors.Wrapf(err, "Failed to setup controller manager")
    74  	}
    75  
    76  	apiextv1Client := apiextv1.NewForConfigOrDie(ctrlConfig)
    77  	crdInstalled, err := isCRDInstalled(apiextv1Client, cattleClustersCRDName)
    78  	if err != nil {
    79  		log.Error(err, "unable to determine if cattle CRD is installed")
    80  		os.Exit(1)
    81  	}
    82  
    83  	// only start the Rancher cluster sync controller if the cattle clusters CRD is installed
    84  	if crdInstalled {
    85  		syncEnabled, clusterSelector, err := shouldSyncClusters(clusterSelectorFilePath)
    86  		if err != nil {
    87  			log.Error(err, "error processing cluster sync config")
    88  			os.Exit(1)
    89  		}
    90  
    91  		if err = (&rancher.RancherClusterReconciler{
    92  			Client:             mgr.GetClient(),
    93  			ClusterSyncEnabled: syncEnabled,
    94  			ClusterSelector:    clusterSelector,
    95  			Log:                log,
    96  			Scheme:             mgr.GetScheme(),
    97  		}).SetupWithManager(mgr); err != nil {
    98  			log.Errorf("Failed to create Rancher cluster controller: %v", err)
    99  			os.Exit(1)
   100  		}
   101  	}
   102  
   103  	capiCrdInstalled, err := isCRDInstalled(apiextv1Client, capiClustersCRDName)
   104  	if err != nil {
   105  		log.Error(err, "unable to determine if CAPI CRD is installed")
   106  		os.Exit(1)
   107  	}
   108  
   109  	if props.IngressHost == "" {
   110  		props.IngressHost = rancherutil.DefaultRancherIngressHostPrefix + nginxutil.IngressNGINXNamespace()
   111  	}
   112  
   113  	// only start the CAPI cluster controller if the clusters CRD is installed and the controller is enabled
   114  	if capiCrdInstalled && !props.DisableCAPIRancherRegistration {
   115  		rancherRegistration := &capi.RancherRegistration{
   116  			Client:             mgr.GetClient(),
   117  			Log:                log,
   118  			RancherIngressHost: props.IngressHost,
   119  		}
   120  		vzRegistration := &capi.VerrazzanoRegistration{
   121  			Client: mgr.GetClient(),
   122  			Log:    log,
   123  		}
   124  		log.Infof("Starting CAPI Cluster controller")
   125  		if err = (&capi.CAPIClusterReconciler{
   126  			Client:              mgr.GetClient(),
   127  			Log:                 log,
   128  			Scheme:              mgr.GetScheme(),
   129  			RancherRegistrar:    rancherRegistration,
   130  			RancherIngressHost:  props.IngressHost,
   131  			RancherEnabled:      crdInstalled,
   132  			VerrazzanoRegistrar: vzRegistration,
   133  		}).SetupWithManager(mgr); err != nil {
   134  			log.Errorf("Failed to create CAPI cluster controller: %v", err)
   135  			os.Exit(1)
   136  		}
   137  	}
   138  
   139  	// Set up the reconciler for VerrazzanoManagedCluster objects
   140  	if err = (&vmc.VerrazzanoManagedClusterReconciler{
   141  		Client:             mgr.GetClient(),
   142  		Scheme:             mgr.GetScheme(),
   143  		RancherIngressHost: props.IngressHost,
   144  	}).SetupWithManager(mgr); err != nil {
   145  		log.Error(err, "Failed to setup controller VerrazzanoManagedCluster")
   146  		os.Exit(1)
   147  	}
   148  	if props.EnableQuickCreate {
   149  		if err = (&ociocne.ClusterReconciler{
   150  			Base: &controller.Base{
   151  				Client: mgr.GetClient(),
   152  			},
   153  			Scheme:            mgr.GetScheme(),
   154  			CredentialsLoader: oci.CredentialsLoaderImpl{},
   155  			OCIClientGetter: func(credentials *oci.Credentials) (oci.Client, error) {
   156  				return oci.NewClient(credentials)
   157  			},
   158  		}).SetupWithManager(mgr); err != nil {
   159  			log.Error(err, "Failed to setup controller OCNEOCIQuickCreate")
   160  			os.Exit(1)
   161  		}
   162  		if err = (&oke.ClusterReconciler{
   163  			Base: &controller.Base{
   164  				Client: mgr.GetClient(),
   165  			},
   166  			CredentialsLoader: oci.CredentialsLoaderImpl{},
   167  			OCIClientGetter: func(credentials *oci.Credentials) (oci.Client, error) {
   168  				return oci.NewClient(credentials)
   169  			},
   170  			Scheme: mgr.GetScheme(),
   171  		}).SetupWithManager(mgr); err != nil {
   172  			log.Error(err, "Failed to setup controller OKEQuickCreate")
   173  			os.Exit(1)
   174  		}
   175  	}
   176  
   177  	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
   178  		log.Error(err, "unable to set up health check")
   179  		os.Exit(1)
   180  	}
   181  	if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
   182  		log.Error(err, "unable to set up ready check")
   183  		os.Exit(1)
   184  	}
   185  
   186  	// wrap the controller context with a new context so we can cancel the context if we detect
   187  	// a change in the clusters.management.cattle.io CRD installation
   188  	ctx, cancel := context.WithCancel(ctrl.SetupSignalHandler())
   189  	go watchCattleClustersCRD(cancel, apiextv1Client, crdInstalled, log)
   190  
   191  	go startMetricsServer(log)
   192  
   193  	log.Info("starting manager")
   194  	if err := mgr.Start(ctx); err != nil {
   195  		log.Error(err, "problem running manager")
   196  		os.Exit(1)
   197  	}
   198  	return nil
   199  }
   200  
   201  // shouldSyncClusters returns true if Rancher cluster synchronization is enabled. An optional
   202  // user-specified label selector can be used to filter the Rancher clusters. If sync is enabled and
   203  // the label selector is nil, we will sync all Rancher clusters.
   204  func shouldSyncClusters(clusterSelectorFile string) (bool, *metav1.LabelSelector, error) {
   205  	enabled := os.Getenv(syncClustersEnvVarName)
   206  	if enabled == "" || strings.ToLower(enabled) != "true" {
   207  		return false, nil, nil
   208  	}
   209  
   210  	f, err := os.Stat(clusterSelectorFile)
   211  	if err != nil || f.Size() == 0 {
   212  		return true, nil, nil
   213  	}
   214  
   215  	b, err := os.ReadFile(clusterSelectorFile)
   216  	if err != nil {
   217  		return true, nil, err
   218  	}
   219  
   220  	selector := &metav1.LabelSelector{}
   221  	err = yaml.Unmarshal(b, selector)
   222  	if err != nil {
   223  		return true, nil, err
   224  	}
   225  
   226  	return true, selector, err
   227  }
   228  
   229  // isCRDInstalled returns true if the clusters.management.cattle.io CRD is installed
   230  func isCRDInstalled(client apiextv1.ApiextensionsV1Interface, crdName string) (bool, error) {
   231  	_, err := client.CustomResourceDefinitions().Get(context.TODO(), crdName, metav1.GetOptions{})
   232  	if k8serrors.IsNotFound(err) {
   233  		return false, nil
   234  	}
   235  	if err != nil {
   236  		return false, err
   237  	}
   238  
   239  	return true, nil
   240  }
   241  
   242  // watchCattleClustersCRD periodically checks to see if the clusters.management.cattle.io CRD is installed. If it detects a change
   243  // it will call the context cancel function which will cause the operator to gracefully shut down. The operator will then be
   244  // restarted by Kubernetes and it will start the cattle clusters sync controller if the CRD is installed.
   245  func watchCattleClustersCRD(cancel context.CancelFunc, client apiextv1.ApiextensionsV1Interface, crdInstalled bool, log *zap.SugaredLogger) {
   246  	log.Infof("Watching for CRD %s to be installed or uninstalled", cattleClustersCRDName)
   247  	for {
   248  		installed, err := isCRDInstalled(client, cattleClustersCRDName)
   249  		if err != nil {
   250  			log.Debugf("Unable to determine if CRD %s is installed: %v", cattleClustersCRDName, err)
   251  			continue
   252  		}
   253  		if installed != crdInstalled {
   254  			log.Infof("Detected CRD %s was installed or uninstalled, shutting down operator", cattleClustersCRDName)
   255  			cancel()
   256  			return
   257  		}
   258  		time.Sleep(10 * time.Second)
   259  	}
   260  }
   261  
   262  // startMetricsServer initializes the HTTP listener for the metrics server
   263  func startMetricsServer(log *zap.SugaredLogger) {
   264  	// Start up the Prometheus Metrics Exporter server to emit operator metrics
   265  	http.Handle("/metrics", promhttp.Handler())
   266  	server := &http.Server{
   267  		ReadTimeout:  10 * time.Second,
   268  		WriteTimeout: 10 * time.Second,
   269  		Addr:         ":9100",
   270  	}
   271  	for err := server.ListenAndServe(); err != nil; err = server.ListenAndServe() {
   272  		log.Debugf("Failed to start the metrics server on port 9100: %v", err)
   273  		time.Sleep(10 * time.Second)
   274  	}
   275  }